├── .dockerignore
├── .flake8
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── pull_request_template.md
└── workflows
│ ├── cicd.yml
│ ├── pr.yml
│ ├── publish-charts-dev.yml
│ ├── publish-charts.yml
│ ├── publish-func-package-dev.yml
│ └── publish-func-package.yml
├── .gitignore
├── .isort.cfg
├── .readthedocs.yaml
├── CODE_OF_CONDUCT.md
├── Dockerfile.dev
├── Dockerfile.stacapi
├── Dockerfile.stacbrowser
├── Dockerfile.task_base
├── LICENSE
├── README.md
├── RELEASING.md
├── SECURITY.md
├── SUPPORT.md
├── cluster
├── README.md
├── argo-values.yaml
├── dev-values.yaml
├── kind-config.yaml
└── pctasks-dev
│ ├── Chart.yaml
│ ├── templates
│ ├── NOTES.txt
│ ├── _helpers.tpl
│ ├── argo-ingress.yaml
│ ├── nginx-configmap.yaml
│ ├── pctasks-ingress.yaml
│ ├── role.yaml
│ ├── rolebinding.yaml
│ ├── secret.yaml
│ └── serviceaccount.yaml
│ └── values.yaml
├── conftest.py
├── datasets
├── alos-dem
│ └── collection
│ │ └── config.json
├── alos-fnf-mosaic
│ └── collection
│ │ └── config.json
├── alos-palsar-mosaic
│ └── collection
│ │ └── config.json
├── aster
│ ├── Dockerfile
│ ├── README.md
│ ├── aster.py
│ ├── collection
│ │ ├── config.json
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── requirements.txt
│ ├── scripts
│ │ ├── compare_items.py
│ │ └── print_partition_paths.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── data-files
│ │ │ └── aster-l1t-subset.parquet
│ │ └── test_tasks.py
│ ├── update-geometries-ingest.yaml
│ └── update-geometries.yaml
├── chesapeake_lulc
│ ├── chesapeake_lulc.py
│ ├── collection
│ │ ├── chesapeake-lc-13
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── chesapeake-lc-7
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── chesapeake-lu
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ ├── dataset.yaml
│ ├── requirements.txt
│ └── tests
│ │ └── test_dataset.py
├── chloris-biomass
│ └── collection
│ │ └── config.json
├── cil-gdpcir
│ ├── collection
│ │ ├── cil-gdpcir-cc-by-sa
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── cil-gdpcir-cc-by
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── cil-gdpcir-cc0
│ │ │ ├── description.md
│ │ │ └── template.json
│ └── dataset.yaml
├── conus404
│ ├── README.md
│ ├── collection
│ │ ├── description.md
│ │ └── template.json
│ └── dataset.yaml
├── cop-dem
│ └── collection
│ │ ├── cop-dem-glo-30
│ │ └── config.json
│ │ └── cop-dem-glo-90
│ │ └── config.json
├── deltaresfloods
│ ├── README.md
│ ├── collection
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ └── tests
│ │ ├── test_dataset.py
│ │ └── test_tasks.py
├── drcog-lulc
│ └── collection
│ │ └── config.json
├── eclipse
│ ├── collection
│ │ ├── description.md
│ │ └── template.json
│ └── dataset.yaml
├── ecmwf-forecast
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── ecmwf_forecast.py
│ ├── requirements.txt
│ ├── streaming.yaml
│ └── test_ecmwf_forecast.py
├── era5-pds
│ ├── collection
│ │ ├── description.md
│ │ └── template.json
│ └── dataset.yaml
├── esa-cci-lc
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── config.json
│ │ ├── esa-cci-lc-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── esa-cci-lc
│ │ │ ├── description.md
│ │ │ └── template.json
│ ├── dataset.yaml
│ ├── esa_cci_lc.py
│ └── requirements.txt
├── esa-worldcover
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── config.json
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── esa_worldcover.py
│ ├── requirements.txt
│ └── workflows
│ │ ├── esa-worldcover-process-items-2020.yaml
│ │ └── esa-worldcover-process-items-2021.yaml
├── fws-nwi
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── fws_nwi.py
│ ├── requirements.txt
│ └── scripts
│ │ └── create_collection.py
├── gbif
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── gbif.py
│ └── requirements.txt
├── goes
│ ├── goes-cmi
│ │ ├── Dockerfile
│ │ ├── README.md
│ │ ├── collection
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── config.json
│ │ ├── dataset.yaml
│ │ ├── goes_cmi
│ │ │ ├── __init__.py
│ │ │ ├── goes_cmi.py
│ │ │ ├── goes_errors.py
│ │ │ └── goes_paths.py
│ │ ├── requirements.txt
│ │ └── streaming.yaml
│ └── goes-glm
│ │ ├── Dockerfile
│ │ ├── README.md
│ │ ├── collection
│ │ ├── description.md
│ │ └── template.json
│ │ ├── dataset.yaml
│ │ ├── goes_glm.py
│ │ ├── requirements.txt
│ │ ├── streaming.yaml
│ │ ├── tests
│ │ ├── dataset-dev-cluster.yaml
│ │ └── test_dataset.py
│ │ └── workflows
│ │ └── goes-glm-update.yaml
├── hgb
│ └── collection
│ │ └── config.json
├── hls2
│ ├── README.md
│ ├── collection
│ │ ├── hls2-l30
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── hls2-s30
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ ├── dataset.yaml
│ ├── hls2.py
│ └── test_hls2.py
├── hrea
│ └── collection
│ │ └── config.json
├── io-biodiversity
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── config.json
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── io_biodiversity.py
│ └── requirements.txt
├── io-land-cover
│ ├── collection
│ │ ├── io-lulc-9-class
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── io-lulc-annual-v02
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── io-lulc
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ ├── dataset.yaml
│ ├── io_lulc.py
│ └── tests
│ │ └── test_dataset.py
├── jrc-gsw
│ └── collection
│ │ └── config.json
├── landsat
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── landsat-c2-l1
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── landsat-c2-l2
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ ├── dataset.yaml
│ ├── landsat.py
│ ├── requirements.txt
│ └── streaming.yaml
├── mobi
│ └── collection
│ │ └── config.json
├── modis
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── modis-09A1-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-09Q1-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-10A1-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-10A2-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-11A1-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-11A2-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-13A1-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-13Q1-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-14A1-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-14A2-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-15A2H-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-15A3H-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-16A3GF-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-17A2H-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-17A2HGF-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-17A3HGF-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-21A2-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── modis-43A4-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── modis-64A1-061
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ ├── dataset.yaml
│ ├── misc.py
│ ├── modis.py
│ ├── requirements.txt
│ └── tests
│ │ └── test_platform_field.py
├── ms-buildings
│ ├── Dockerfile
│ ├── collection
│ │ ├── config.json
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── ms_buildings.py
│ └── requirements.txt
├── mtbs
│ └── collection
│ │ └── config.json
├── naip
│ ├── Dockerfile
│ ├── Explore.ipynb
│ ├── README.md
│ ├── collection
│ │ ├── config.json
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── naip.py
│ ├── requirements.txt
│ └── test_naip.py
├── nasa-nex-gddp-cmip6
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ └── collection.json
│ ├── dataset.yaml
│ ├── nasa_nex_gddp_cmip6.py
│ └── requirements.txt
├── nasadem
│ └── collection
│ │ └── config.json
├── noaa-c-cap
│ └── collection
│ │ └── config.json
├── noaa-cdr
│ ├── Dockerfile
│ ├── README.md
│ ├── collections
│ │ ├── ocean-heat-content-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── ocean-heat-content
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sea-ice-concentration
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sea-surface-temperature-optimum-interpolation
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sea-surface-temperature-whoi-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── sea-surface-temperature-whoi
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ ├── dataset.yaml
│ ├── hang.py
│ ├── noaa_cdr.py
│ ├── requirements.txt
│ ├── scripts
│ │ └── create_collections.py
│ └── update.yaml
├── noaa-climate-normals
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── noaa-climate-normals-gridded
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── noaa-climate-normals-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── noaa-climate-normals-tabular
│ │ │ ├── description.md
│ │ │ └── template.json
│ ├── dataset.yaml
│ ├── noaa_climate_normals.py
│ ├── requirements.txt
│ └── workflows
│ │ ├── noaa-climate-normals-tabular_process-items_annualseasonal_1981-2010.yaml
│ │ ├── noaa-climate-normals-tabular_process-items_annualseasonal_1991-2020.yaml
│ │ ├── noaa-climate-normals-tabular_process-items_annualseasonal_2006-2020.yaml
│ │ ├── noaa-climate-normals-tabular_process-items_daily_1981-2010.yaml
│ │ ├── noaa-climate-normals-tabular_process-items_daily_1991-2020.yaml
│ │ ├── noaa-climate-normals-tabular_process-items_daily_2006-2020.yaml
│ │ ├── noaa-climate-normals-tabular_process-items_hourly_1981-2010.yaml
│ │ ├── noaa-climate-normals-tabular_process-items_hourly_1991-2020.yaml
│ │ ├── noaa-climate-normals-tabular_process-items_hourly_2006-2020.yaml
│ │ ├── noaa-climate-normals-tabular_process-items_monthly_1981-2010.yaml
│ │ ├── noaa-climate-normals-tabular_process-items_monthly_1991-2020.yaml
│ │ └── noaa-climate-normals-tabular_process-items_monthly_2006-2020.yaml
├── noaa-mrms-qpe
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── noaa-mrms-qpe-1h-pass1
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── noaa-mrms-qpe-1h-pass2
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── noaa-mrms-qpe-24h-pass2
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ ├── dataset.yaml
│ ├── noaa_mrms_qpe.py
│ ├── requirements.txt
│ ├── tests
│ │ ├── dataset-dev-cluster.yaml
│ │ └── test_dataset.py
│ └── workflows
│ │ ├── update-noaa-mrms-qpe-1h-pass1.yaml
│ │ ├── update-noaa-mrms-qpe-1h-pass2.yaml
│ │ └── update-noaa-mrms-qpe-24h-pass2.yaml
├── noaa_nclimgrid
│ ├── collection
│ │ ├── noaa-nclimgrid-daily-prelim
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── noaa-nclimgrid-daily-scaled
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── noaa-nclimgrid-monthly
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ ├── dataset.yaml
│ ├── noaa_nclimgrid.py
│ ├── requirements.txt
│ └── tests
│ │ ├── conftest.py
│ │ └── test_dataset.py
├── nrcan-landcover
│ └── collection
│ │ └── config.json
├── sentinel-1-grd
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── config.json
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── requirements.txt
│ ├── s1grd.py
│ ├── test-data
│ │ └── sentinel-1-grd-item-raw.json
│ └── test_s1grd.py
├── sentinel-1-rtc
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── config.json
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── requirements.txt
│ └── s1rtc.py
├── sentinel-2
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── config.json
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── requirements.txt
│ └── sentinel2.py
├── sentinel-3
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── sentinel-3-olci-lfr-l2-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sentinel-3-olci-wfr-l2-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sentinel-3-slstr-frp-l2-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sentinel-3-slstr-lst-l2-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sentinel-3-slstr-wst-l2-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sentinel-3-sral-lan-l2-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sentinel-3-sral-wat-l2-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sentinel-3-synergy-aod-l2-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sentinel-3-synergy-syn-l2-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sentinel-3-synergy-v10-l2-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── sentinel-3-synergy-vg1-l2-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── sentinel-3-synergy-vgp-l2-netcdf
│ │ │ ├── description.md
│ │ │ └── template.json
│ ├── dataset.yaml
│ ├── requirements.txt
│ ├── sentinel_3.py
│ └── tests
│ │ └── test_sentinel_3.py
├── sentinel-5p
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── requirements.txt
│ ├── sentinel_5p.py
│ └── test_sentinel_5p.py
├── stac-geoparquet
│ ├── Dockerfile
│ ├── README.md
│ ├── pc_stac_geoparquet.py
│ ├── requirements.txt
│ ├── workflow.yaml
│ └── workflow_test.yaml
├── terraclimate
│ ├── collection
│ │ ├── description.md
│ │ └── template.json
│ └── dataset.yaml
├── usda-cdl
│ ├── Dockerfile
│ ├── README.md
│ ├── collection
│ │ ├── config.json
│ │ ├── description.md
│ │ └── template.json
│ ├── dataset.yaml
│ ├── requirements.txt
│ ├── scripts
│ │ └── create_collection.py
│ ├── tile.yaml
│ └── usda_cdl.py
├── usgs-gap
│ └── collection
│ │ └── config.json
├── usgs-lcmap
│ ├── Dockerfile
│ ├── collection
│ │ ├── usgs-lcmap-conus-v13
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ └── usgs-lcmap-hawaii-v10
│ │ │ ├── config.json
│ │ │ ├── description.md
│ │ │ └── template.json
│ ├── dataset.yaml
│ ├── fix_items
│ │ ├── README.md
│ │ ├── fix_items.py
│ │ ├── fix_items.yaml
│ │ └── tests
│ │ │ ├── __init__.py
│ │ │ ├── data-files
│ │ │ └── items.ndjson
│ │ │ └── test_tasks.py
│ ├── requirements.txt
│ └── usgs_lcmap.py
└── usgs-lidar
│ ├── README.md
│ ├── collection
│ ├── 3dep-lidar-classification
│ │ └── config.json
│ ├── 3dep-lidar-dsm
│ │ └── config.json
│ ├── 3dep-lidar-dtm-native
│ │ └── config.json
│ ├── 3dep-lidar-dtm
│ │ └── config.json
│ ├── 3dep-lidar-hag
│ │ └── config.json
│ ├── 3dep-lidar-intensity
│ │ └── config.json
│ ├── 3dep-lidar-pointsourceid
│ │ └── config.json
│ ├── 3dep-lidar-returns
│ │ └── config.json
│ └── 3dep-seamless
│ │ └── config.json
│ ├── concatenate_csvs.py
│ ├── lidar_audit.py
│ ├── requirements.txt
│ └── workflow.yaml
├── deployment
├── .gitignore
├── Dockerfile
├── bin
│ ├── azlogin
│ ├── deploy
│ ├── get_tfvars
│ ├── jinja
│ ├── kv_add_ip
│ ├── kv_rmv_ip
│ ├── lib
│ ├── nginx-values.yaml
│ ├── publish_pkgs
│ ├── setup_storage.py
│ ├── update_pkg_index
│ └── write_tfvars
├── cosmosdb
│ ├── README.md
│ └── scripts
│ │ ├── stored_procs
│ │ └── workflow-runs
│ │ │ └── bulkput-workflowruns.js
│ │ └── triggers
│ │ ├── workflow-runs
│ │ └── post-all-workflowruns.js
│ │ └── workflows
│ │ └── post-all-workflows.js
├── docker-compose.yml
├── helm
│ ├── argo-values.yaml
│ ├── deploy-values.template.yaml
│ ├── pc-tasks-ingress
│ │ ├── Chart.yaml
│ │ ├── templates
│ │ │ ├── NOTES.txt
│ │ │ ├── _helpers.tpl
│ │ │ ├── ingress.yaml
│ │ │ ├── nginx-configmap.yaml
│ │ │ ├── secret-provider.yaml
│ │ │ └── secret.yaml
│ │ └── values.yaml
│ ├── published
│ │ └── pctasks-server
│ │ │ ├── Chart.yaml
│ │ │ ├── templates
│ │ │ ├── NOTES.txt
│ │ │ ├── _helpers.tpl
│ │ │ ├── deployment.yaml
│ │ │ ├── service.yaml
│ │ │ └── serviceaccount.yaml
│ │ │ └── values.yaml
│ └── vendored
│ │ ├── argo-workflows-0.41.8.tgz
│ │ ├── ingress-nginx-4.8.3.tgz
│ │ └── keda-2.14.2.tgz
├── manual
│ ├── backend-app.manifest.json
│ └── frontend-app.manifest.json
├── requirements.txt
└── terraform
│ ├── batch_pool
│ ├── main.tf
│ ├── providers.tf
│ └── variables.tf
│ ├── resources
│ ├── acr.tf
│ ├── aks.tf
│ ├── apim.tf
│ ├── app_insights.tf
│ ├── batch.tf
│ ├── cosmosdb.tf
│ ├── function.tf
│ ├── ip.tf
│ ├── keyvault.tf
│ ├── output.tf
│ ├── providers.tf
│ ├── rg.tf
│ ├── storage_account.tf
│ ├── values.tfvars.template
│ ├── variables.tf
│ └── vnet.tf
│ └── staging
│ ├── backend.tf
│ ├── env.sh
│ ├── main.tf
│ ├── output.tf
│ ├── pools.tf
│ └── variables.tf
├── dev-secrets.template.yaml
├── dev
├── nginx
│ ├── Dockerfile
│ ├── README.md
│ └── etc
│ │ └── nginx
│ │ ├── conf.d
│ │ └── default.conf
│ │ └── nginx.conf
└── stacapi.py
├── docker-compose.aux.yml
├── docker-compose.console.yml
├── docker-compose.cosmosdb.yml
├── docker-compose.tasks.yml
├── docker-compose.yml
├── docs
├── Makefile
├── _static
│ ├── .gitignore
│ └── low-latency-workflows.svg
├── conf.py
├── development
│ ├── deploying.md
│ ├── faq.md
│ ├── index.md
│ └── setup.md
├── getting_started
│ ├── creating_a_dataset.md
│ ├── dev_workflows.md
│ ├── index.md
│ └── telemetry.md
├── index.md
├── make.bat
├── reference
│ ├── api.md
│ └── index.md
└── user_guide
│ ├── chunking.md
│ ├── index.md
│ ├── runtime.md
│ ├── settings.md
│ ├── storage.md
│ ├── streaming.md
│ ├── templating.md
│ └── workflows.md
├── examples
├── list-logs.yaml
├── run-etl.yaml
├── streaming-workflow.yaml
├── test_workflow.yaml
├── test_workflow_naip.yaml
└── workflow.yaml
├── ingest-collection.yaml
├── mypy.ini
├── pctasks
├── .dockerignore
├── cli
│ ├── README.md
│ ├── pctasks
│ │ └── cli
│ │ │ ├── __init__.py
│ │ │ ├── cli.py
│ │ │ └── version.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── tests
│ │ ├── __init__.py
│ │ └── test_cli.py
├── client
│ ├── README.md
│ ├── pctasks
│ │ └── client
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ ├── constants.py
│ │ │ ├── context.py
│ │ │ ├── errors.py
│ │ │ ├── profile
│ │ │ ├── __init__.py
│ │ │ ├── cli.py
│ │ │ └── commands.py
│ │ │ ├── py.typed
│ │ │ ├── runs
│ │ │ ├── __init__.py
│ │ │ ├── _get.py
│ │ │ ├── _list.py
│ │ │ ├── _status.py
│ │ │ ├── cli.py
│ │ │ ├── get.py
│ │ │ ├── list.py
│ │ │ ├── options.py
│ │ │ ├── status.py
│ │ │ └── utils.py
│ │ │ ├── settings.py
│ │ │ ├── storage
│ │ │ ├── __init__.py
│ │ │ ├── cli.py
│ │ │ └── commands.py
│ │ │ ├── utils.py
│ │ │ ├── version.py
│ │ │ └── workflow
│ │ │ ├── __init__.py
│ │ │ ├── cli.py
│ │ │ ├── commands.py
│ │ │ ├── options.py
│ │ │ └── template.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── tests
│ │ ├── __init__.py
│ │ ├── data-files
│ │ ├── mycode.py
│ │ └── test_collection.json
│ │ ├── records
│ │ ├── __init__.py
│ │ └── test_records.py
│ │ ├── test_client.py
│ │ ├── test_storage.py
│ │ └── test_template.py
├── core
│ ├── README.md
│ ├── pctasks
│ │ └── core
│ │ │ ├── __init__.py
│ │ │ ├── _compat.py
│ │ │ ├── activity.py
│ │ │ ├── cli.py
│ │ │ ├── constants.py
│ │ │ ├── context.py
│ │ │ ├── cosmos
│ │ │ ├── __init__.py
│ │ │ ├── container.py
│ │ │ ├── containers
│ │ │ │ ├── __init__.py
│ │ │ │ ├── items.py
│ │ │ │ ├── process_item_errors.py
│ │ │ │ ├── records.py
│ │ │ │ ├── storage_events.py
│ │ │ │ ├── workflow_runs.py
│ │ │ │ └── workflows.py
│ │ │ ├── database.py
│ │ │ ├── page.py
│ │ │ └── settings.py
│ │ │ ├── importer.py
│ │ │ ├── logging.py
│ │ │ ├── message_handler.py
│ │ │ ├── models
│ │ │ ├── __init__.py
│ │ │ ├── activity.py
│ │ │ ├── base.py
│ │ │ ├── config.py
│ │ │ ├── event.py
│ │ │ ├── item.py
│ │ │ ├── record.py
│ │ │ ├── registration.py
│ │ │ ├── response.py
│ │ │ ├── run.py
│ │ │ ├── task.py
│ │ │ ├── tokens.py
│ │ │ ├── utils.py
│ │ │ └── workflow.py
│ │ │ ├── py.typed
│ │ │ ├── queues.py
│ │ │ ├── settings.py
│ │ │ ├── storage
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── blob.py
│ │ │ ├── errors.py
│ │ │ ├── local.py
│ │ │ └── path_filter.py
│ │ │ ├── tables
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── config.py
│ │ │ ├── registration.py
│ │ │ ├── task.py
│ │ │ └── utils.py
│ │ │ ├── tokens.py
│ │ │ ├── utils
│ │ │ ├── __init__.py
│ │ │ ├── backoff.py
│ │ │ ├── stac.py
│ │ │ ├── summary.py
│ │ │ └── template.py
│ │ │ ├── version.py
│ │ │ └── yaml.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── tests
│ │ ├── __init__.py
│ │ ├── cosmos
│ │ ├── __init__.py
│ │ ├── containers
│ │ │ ├── __init__.py
│ │ │ ├── test_workflow_runs.py
│ │ │ └── test_workflows.py
│ │ └── test_container.py
│ │ ├── data-files
│ │ ├── example_module
│ │ │ ├── __init__.py
│ │ │ ├── a.py
│ │ │ └── b.py
│ │ ├── items
│ │ │ ├── io-lulc-item.json
│ │ │ ├── naip
│ │ │ │ ├── naip1.json
│ │ │ │ └── naip2.json
│ │ │ ├── s1-grd.json
│ │ │ └── s1-rtc
│ │ │ │ └── 2019
│ │ │ │ └── 12
│ │ │ │ └── 15
│ │ │ │ └── IW
│ │ │ │ ├── DH
│ │ │ │ └── S1A_IW_GRDH_1SDH_20191215T034818_20191215T034847_030353_0378EA_rtc.json
│ │ │ │ └── DV
│ │ │ │ └── S1A_IW_GRDH_1SDV_20191215T003835_20191215T003904_030352_0378DC_rtc.json
│ │ ├── planet-nicfi-analytic.json
│ │ └── simple-assets
│ │ │ ├── a
│ │ │ ├── asset-a-1.json
│ │ │ └── asset-a-2.json
│ │ │ └── b
│ │ │ ├── asset-b-1.json
│ │ │ └── asset-b-2.json
│ │ ├── models
│ │ ├── __init__.py
│ │ ├── test_item.py
│ │ ├── test_storage_event.py
│ │ ├── test_task.py
│ │ └── test_workflow.py
│ │ ├── storage
│ │ ├── __init__.py
│ │ ├── test_blob.py
│ │ ├── test_importer.py
│ │ └── test_local.py
│ │ ├── tables
│ │ ├── __init__.py
│ │ ├── test_config.py
│ │ └── test_record.py
│ │ ├── test_activity.py
│ │ ├── test_messages.py
│ │ ├── test_yaml.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── test_backoff.py
│ │ ├── test_summary.py
│ │ ├── test_template.py
│ │ └── test_utils.py
├── dataset
│ ├── README.md
│ ├── pctasks
│ │ └── dataset
│ │ │ ├── __init__.py
│ │ │ ├── _cli.py
│ │ │ ├── chunks
│ │ │ ├── __init__.py
│ │ │ ├── chunkset.py
│ │ │ ├── constants.py
│ │ │ ├── models.py
│ │ │ └── task.py
│ │ │ ├── cli.py
│ │ │ ├── collection.py
│ │ │ ├── constants.py
│ │ │ ├── items
│ │ │ ├── __init__.py
│ │ │ ├── constants.py
│ │ │ ├── models.py
│ │ │ └── task.py
│ │ │ ├── models.py
│ │ │ ├── py.typed
│ │ │ ├── splits
│ │ │ ├── __init__.py
│ │ │ ├── cli.py
│ │ │ ├── constants.py
│ │ │ ├── models.py
│ │ │ └── task.py
│ │ │ ├── streaming.py
│ │ │ ├── template.py
│ │ │ ├── utils.py
│ │ │ ├── validate.py
│ │ │ ├── version.py
│ │ │ └── workflow.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── tests
│ │ ├── __init__.py
│ │ ├── chunks
│ │ ├── __init__.py
│ │ └── test_task.py
│ │ ├── data-files
│ │ ├── datasets
│ │ │ ├── mycode.py
│ │ │ ├── naip.yaml
│ │ │ └── test-dataset.yaml
│ │ ├── simple-assets
│ │ │ ├── a
│ │ │ │ ├── asset-a-1.json
│ │ │ │ └── asset-a-2.json
│ │ │ └── b
│ │ │ │ ├── asset-b-1.json
│ │ │ │ └── asset-b-2.json
│ │ ├── storage-event.json
│ │ └── test-assets
│ │ │ ├── one.txt
│ │ │ ├── three.txt
│ │ │ └── two.txt
│ │ ├── items
│ │ ├── __init__.py
│ │ └── test_task.py
│ │ ├── test_dataset.py
│ │ ├── test_streaming_create_items.py
│ │ └── test_validate_collection.py
├── dev
│ ├── README.md
│ ├── pctasks
│ │ └── dev
│ │ │ ├── __init__.py
│ │ │ ├── azurite.py
│ │ │ ├── blob.py
│ │ │ ├── cli.py
│ │ │ ├── config.py
│ │ │ ├── constants.py
│ │ │ ├── cosmosdb.py
│ │ │ ├── db.py
│ │ │ ├── env.py
│ │ │ ├── k8s.py
│ │ │ ├── local_dev_endpoints.py
│ │ │ ├── logs.py
│ │ │ ├── mocks.py
│ │ │ ├── py.typed
│ │ │ ├── queues.py
│ │ │ ├── secrets.py
│ │ │ ├── settings.py
│ │ │ ├── tables.py
│ │ │ ├── task.py
│ │ │ ├── test_utils.py
│ │ │ └── version.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── tests
│ │ └── test_temp_queue.py
├── ingest
│ ├── README.md
│ ├── pctasks
│ │ └── ingest
│ │ │ ├── __init__.py
│ │ │ ├── _cli.py
│ │ │ ├── cli.py
│ │ │ ├── constants.py
│ │ │ ├── models.py
│ │ │ ├── py.typed
│ │ │ ├── settings.py
│ │ │ ├── utils.py
│ │ │ └── version.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── tests
│ │ ├── __init__.py
│ │ ├── data-files
│ │ ├── goes-collection-workflow.yaml
│ │ └── test_collection.json
│ │ ├── test_collection.py
│ │ └── test_settings.py
├── ingest_task
│ ├── Dockerfile
│ ├── README.md
│ ├── pctasks
│ │ └── ingest_task
│ │ │ ├── __init__.py
│ │ │ ├── collection.py
│ │ │ ├── items.py
│ │ │ ├── pgstac.py
│ │ │ ├── py.typed
│ │ │ ├── streaming.py
│ │ │ ├── task.py
│ │ │ ├── utils.py
│ │ │ └── version.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── tests
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── data-files
│ │ ├── era5-pds
│ │ │ ├── description.md
│ │ │ └── template.json
│ │ ├── items
│ │ │ ├── item1.json
│ │ │ └── items.ndjson
│ │ └── test_collection.json
│ │ ├── items_document.json
│ │ ├── test_collection.py
│ │ ├── test_items.py
│ │ └── test_streaming_ingest.py
├── notify
│ ├── README.md
│ ├── pctasks
│ │ └── notify
│ │ │ ├── __init__.py
│ │ │ ├── activities.py
│ │ │ ├── models.py
│ │ │ ├── py.typed
│ │ │ ├── settings.py
│ │ │ └── version.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── tests
│ │ ├── __init__.py
│ │ └── test_process.py
├── router
│ ├── README.md
│ ├── pctasks
│ │ └── router
│ │ │ ├── __init__.py
│ │ │ ├── handlers
│ │ │ ├── __init__.py
│ │ │ ├── eventgrid.py
│ │ │ └── forward.py
│ │ │ ├── message_handler.py
│ │ │ ├── py.typed
│ │ │ ├── settings.py
│ │ │ └── version.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── tests
│ │ ├── __init__.py
│ │ ├── test_process.py
│ │ └── test_settings.py
├── run
│ ├── Dockerfile
│ ├── README.md
│ ├── pctasks
│ │ └── run
│ │ │ ├── __init__.py
│ │ │ ├── _cli.py
│ │ │ ├── argo
│ │ │ ├── __init__.py
│ │ │ └── client.py
│ │ │ ├── batch
│ │ │ ├── __init__.py
│ │ │ ├── client.py
│ │ │ ├── model.py
│ │ │ ├── task.py
│ │ │ └── utils.py
│ │ │ ├── cli.py
│ │ │ ├── constants.py
│ │ │ ├── dag.py
│ │ │ ├── errors.py
│ │ │ ├── models.py
│ │ │ ├── py.typed
│ │ │ ├── secrets
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── keyvault.py
│ │ │ └── local.py
│ │ │ ├── settings.py
│ │ │ ├── task
│ │ │ ├── __init__.py
│ │ │ ├── argo.py
│ │ │ ├── base.py
│ │ │ ├── batch.py
│ │ │ ├── local.py
│ │ │ └── prepare.py
│ │ │ ├── template.py
│ │ │ ├── utils.py
│ │ │ ├── version.py
│ │ │ └── workflow
│ │ │ ├── __init__.py
│ │ │ ├── argo.py
│ │ │ ├── base.py
│ │ │ ├── executor
│ │ │ ├── __init__.py
│ │ │ ├── models.py
│ │ │ ├── remote.py
│ │ │ ├── simple.py
│ │ │ └── streaming.py
│ │ │ ├── kubernetes.py
│ │ │ └── local.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── tests
│ │ ├── __init__.py
│ │ ├── batch
│ │ └── test_utils.py
│ │ ├── data-files
│ │ └── workflows
│ │ │ └── test_remote1.yaml
│ │ ├── secrets
│ │ ├── __init__.py
│ │ └── test_base.py
│ │ ├── test_dag.py
│ │ ├── test_messages.py
│ │ ├── test_settings.py
│ │ ├── test_template.py
│ │ └── workflow
│ │ ├── __init__.py
│ │ ├── test_kubernetes.py
│ │ └── test_remote.py
├── server
│ ├── Dockerfile
│ ├── README.md
│ ├── pctasks
│ │ └── server
│ │ │ ├── __init__.py
│ │ │ ├── constants.py
│ │ │ ├── dependencies.py
│ │ │ ├── logging.py
│ │ │ ├── main.py
│ │ │ ├── middleware.py
│ │ │ ├── py.typed
│ │ │ ├── request.py
│ │ │ ├── routes
│ │ │ ├── __init__.py
│ │ │ ├── code.py
│ │ │ ├── runs.py
│ │ │ └── workflows.py
│ │ │ ├── settings.py
│ │ │ └── version.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── tests
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── routes
│ │ └── test_run.py
│ │ └── test_request.py
└── task
│ ├── README.md
│ ├── pctasks
│ └── task
│ │ ├── __init__.py
│ │ ├── _cli.py
│ │ ├── cli.py
│ │ ├── common
│ │ ├── __init__.py
│ │ ├── list_files.py
│ │ ├── list_prefixes.py
│ │ ├── summarize.py
│ │ └── write.py
│ │ ├── constants.py
│ │ ├── context.py
│ │ ├── py.typed
│ │ ├── run.py
│ │ ├── settings.py
│ │ ├── streaming.py
│ │ ├── task.py
│ │ ├── utils.py
│ │ └── version.py
│ ├── pyproject.toml
│ ├── requirements.txt
│ └── tests
│ ├── __init__.py
│ ├── common
│ ├── __init__.py
│ ├── test_list_files.py
│ ├── test_list_prefixes.py
│ ├── test_summarize.py
│ └── test_write.py
│ ├── data-files
│ ├── items
│ │ └── s1-rtc
│ │ │ └── 2019
│ │ │ └── 12
│ │ │ └── 15
│ │ │ └── IW
│ │ │ ├── DH
│ │ │ ├── S1A_IW_GRDH_1SDH_20191215T034818_20191215T034847_030353_0378EA_rtc.json
│ │ │ ├── S1A_IW_GRDH_1SDH_20191215T105713_20191215T105738_030358_037912_rtc.json
│ │ │ ├── S1A_IW_GRDH_1SDH_20191215T105738_20191215T105803_030358_037912_rtc.json
│ │ │ └── S1A_IW_GRDH_1SDH_20191215T105803_20191215T105828_030358_037912_rtc.json
│ │ │ └── DV
│ │ │ ├── S1A_IW_GRDH_1SDV_20191215T003835_20191215T003904_030352_0378DC_rtc.json
│ │ │ ├── S1A_IW_GRDH_1SDV_20191215T004249_20191215T004314_030352_0378DC_rtc.json
│ │ │ ├── S1A_IW_GRDH_1SDV_20191215T004314_20191215T004339_030352_0378DC_rtc.json
│ │ │ └── S1A_IW_GRDH_1SDV_20191215T004339_20191215T004404_030352_0378DC_rtc.json
│ └── test-files
│ │ ├── a
│ │ ├── three.txt
│ │ └── two.txt
│ │ ├── b
│ │ ├── c
│ │ │ ├── five.txt
│ │ │ └── six.txt
│ │ └── four.txt
│ │ └── one.txt
│ ├── test_cli.py
│ └── test_utils.py
├── pctasks_frontend
├── .dockerignore
├── .env.example
├── .gitignore
├── .prettierrc
├── .storybook
│ ├── main.js
│ └── preview.js
├── README.md
├── package-lock.json
├── package.json
├── public
│ ├── index.html
│ └── robots.txt
├── src
│ ├── App.test.tsx
│ ├── App.tsx
│ ├── components
│ │ ├── auth
│ │ │ ├── AuthPage
│ │ │ │ └── AuthPage.index.tsx
│ │ │ ├── hooks
│ │ │ │ ├── useApiClient.ts
│ │ │ │ └── useMsalToken.ts
│ │ │ ├── index.ts
│ │ │ └── login
│ │ │ │ ├── PcPersona.tsx
│ │ │ │ ├── SignInButton.tsx
│ │ │ │ ├── UserHeaderControl.tsx
│ │ │ │ └── index.ts
│ │ ├── common
│ │ │ ├── RunItem
│ │ │ │ └── RunItem.index.tsx
│ │ │ ├── RunTimes
│ │ │ │ ├── RunDuration.tsx
│ │ │ │ ├── RunStarted.tsx
│ │ │ │ ├── RunTimeBlock.tsx
│ │ │ │ └── RunTimeEntry.tsx
│ │ │ ├── StatusIcon
│ │ │ │ ├── StatusIcon.index.tsx
│ │ │ │ ├── __tests__
│ │ │ │ │ └── StatusIcon.stories.tsx
│ │ │ │ └── index.ts
│ │ │ ├── TextOutput
│ │ │ │ └── TextOutput.index.tsx
│ │ │ └── hooks
│ │ │ │ ├── index.ts
│ │ │ │ ├── useExpandButton.tsx
│ │ │ │ └── usePageTitle.tsx
│ │ ├── jobs
│ │ │ ├── JobRunItem
│ │ │ │ ├── JobRunItem.index.tsx
│ │ │ │ └── __tests__
│ │ │ │ │ ├── JobRunItem.stories.tsx
│ │ │ │ │ ├── data.ts
│ │ │ │ │ ├── workflow-job-definitions.json
│ │ │ │ │ └── workflow-job-runs.json
│ │ │ ├── JobRunList
│ │ │ │ └── JobRunList.index.tsx
│ │ │ ├── JobRunWithSubJobs
│ │ │ │ ├── JobRunWithSubJobs.index.tsx
│ │ │ │ └── __tests__
│ │ │ │ │ ├── data.ts
│ │ │ │ │ ├── jobWithSubJobs.json
│ │ │ │ │ └── subJobRuns.json
│ │ │ ├── JobRunWithTasks
│ │ │ │ ├── JobRunWithTasks.index.tsx
│ │ │ │ └── __tests__
│ │ │ │ │ ├── JobRunWithTasks.stories.tsx
│ │ │ │ │ └── data.ts
│ │ │ ├── JobStatusFilter
│ │ │ │ └── JobStatusFilter.index.tsx
│ │ │ ├── ParentJobRunItem
│ │ │ │ └── ParentJobRunItem.index.tsx
│ │ │ ├── hooks
│ │ │ │ └── useSubJobFilter.tsx
│ │ │ └── index.ts
│ │ ├── layout
│ │ │ ├── Header.tsx
│ │ │ └── index.ts
│ │ ├── tasks
│ │ │ ├── TaskRunItem
│ │ │ │ ├── TaskRunItem.index.tsx
│ │ │ │ ├── __tests__
│ │ │ │ │ ├── TaskRunItem.stories.tsx
│ │ │ │ │ └── data.ts
│ │ │ │ └── index.ts
│ │ │ ├── TaskRunList
│ │ │ │ ├── TaskRunList.index.tsx
│ │ │ │ ├── __tests__
│ │ │ │ │ ├── TaskRunList.stories.tsx
│ │ │ │ │ └── data.ts
│ │ │ │ └── index.ts
│ │ │ └── index.ts
│ │ └── workflows
│ │ │ ├── WorkflowRunHeader
│ │ │ ├── WorkflowRunHeader.index.tsx
│ │ │ └── __tests__
│ │ │ │ └── workflow-detail.json
│ │ │ ├── WorkflowRunItem
│ │ │ ├── WorkflowRunItem.index.tsx
│ │ │ ├── WorkflowRunItemErrors.tsx
│ │ │ └── __tests__
│ │ │ │ ├── WorkflowRunItem.stories.tsx
│ │ │ │ └── data.ts
│ │ │ ├── WorkflowRunList
│ │ │ ├── WorkflowRunList.index.tsx
│ │ │ └── __tests__
│ │ │ │ ├── WorkflowRunList.stories.tsx
│ │ │ │ ├── data.ts
│ │ │ │ └── workflow-runs.json
│ │ │ └── index.ts
│ ├── global.d.ts
│ ├── helpers
│ │ ├── api.ts
│ │ ├── auth.ts
│ │ ├── constants.ts
│ │ ├── job-create-splits.json
│ │ ├── jobs.ts
│ │ ├── logs-create-splits.json
│ │ ├── task-create-splits.json
│ │ ├── tasks.ts
│ │ ├── time.ts
│ │ ├── utils.ts
│ │ └── workflows.ts
│ ├── index.css
│ ├── index.tsx
│ ├── pages
│ │ ├── Home
│ │ │ └── Home.index.tsx
│ │ ├── WorkflowDetail
│ │ │ └── WorkflowDetail.index.tsx
│ │ ├── Workflows
│ │ │ └── Workflows.index.tsx
│ │ └── index.ts
│ ├── react-app-env.d.ts
│ ├── reportWebVitals.ts
│ ├── setupTests.ts
│ ├── state
│ │ └── SelectionProvider.tsx
│ ├── styles
│ │ └── global.ts
│ └── types
│ │ ├── enums.ts
│ │ └── index.ts
└── tsconfig.json
├── pctasks_funcs
├── .dockerignore
├── .funcignore
├── .gitignore
├── Dockerfile
├── PublishItemsCF
│ ├── __init__.py
│ └── function.json
├── StorageEventsCF
│ ├── __init__.py
│ └── function.json
├── StorageEventsQueue
│ ├── __init__.py
│ └── function.json
├── WorkflowRunsCF
│ ├── __init__.py
│ └── function.json
├── WorkflowsCF
│ ├── __init__.py
│ └── function.json
├── host.json
├── pctasks_funcs_base
│ └── __init__.py
├── requirements-deploy.txt
├── requirements.txt
├── start.sh
└── tests
│ ├── items_document.json
│ ├── stac_item_record.json
│ ├── storage_event.json
│ ├── test_publish_items.py
│ └── test_storage_events.py
├── pytest.ini
├── requirements-dev.txt
├── requirements-task-base.txt
├── scripts
├── bin
│ ├── format
│ ├── pctasks-pip-compile
│ ├── test
│ └── test-integration
├── build
├── ciauthenticate
├── cideploy
├── cipublish-pkgs
├── citest-integration
├── cluster
├── console
├── env
├── format
├── generate-requirements
├── install
├── publish
├── server
├── setup
├── test
├── test-integration
├── update
└── validate-collections
├── tests
├── __init__.py
├── constants.py
├── data-files
│ ├── assets
│ │ ├── a
│ │ │ └── asset-a-1.json
│ │ ├── b
│ │ │ ├── b_1
│ │ │ │ └── asset-b_1-1.json
│ │ │ └── b_2
│ │ │ │ └── asset-b_2-1.json
│ │ └── c
│ │ │ ├── c_1
│ │ │ ├── c_1_1
│ │ │ │ ├── asset-c_1_1-1.json
│ │ │ │ └── asset-c_1_1-2.json
│ │ │ └── c_1_2
│ │ │ │ └── asset-c_1-2.json
│ │ │ └── c_2
│ │ │ └── c_2_1
│ │ │ ├── asset-c_2_1-1.json
│ │ │ └── asset-c_2_1-2.json
│ ├── collection.json
│ ├── collection_template
│ │ ├── description.md
│ │ └── template.json
│ ├── modis
│ │ ├── collection.json
│ │ └── items.ndjson
│ └── simple-assets
│ │ ├── a
│ │ ├── asset-a-1.json
│ │ └── asset-a-2.json
│ │ └── b
│ │ ├── asset-b-1.json
│ │ └── asset-b-2.json
├── dataset
│ ├── __init__.py
│ ├── collection.py
│ ├── dataset.yaml
│ ├── streaming-create-items.yaml
│ ├── streaming-ingest.yaml
│ └── test_dataset.py
├── ingest
│ ├── test_collection_ingest.py
│ └── test_modis_ingest.py
├── tasks.py
├── test_foreach.py
├── test_invalid_image.py
└── workflows
│ ├── ingest-collection.yaml
│ └── test-foreach.yaml
├── workflow.yaml
└── workflows
└── streaming-ingest.yaml
/.dockerignore:
--------------------------------------------------------------------------------
1 | **/.envrc
2 | **/.direnv
3 | **/__pycache__
4 | **/.mypy_cache
5 | **/.pytest_cache
6 | **/.terraform
7 | **/node_modules
8 | **/.terraform
9 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | extend-ignore = E203, W503, E731, E722
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for the Microsoft Planetary Computer
4 | title: ''
5 | labels: enhancement
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen. Ex. I would like to use stac to do [...]
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
--------------------------------------------------------------------------------
/.github/workflows/publish-charts.yml:
--------------------------------------------------------------------------------
1 | name: Publish charts (release)
2 |
3 | on:
4 | push:
5 | tags: ["*"]
6 | workflow_dispatch:
7 |
8 | defaults:
9 | run:
10 | shell: bash
11 |
12 | jobs:
13 | build:
14 | permissions:
15 | contents: write
16 | runs-on: ubuntu-20.04
17 |
18 | steps:
19 | - uses: actions/checkout@v2
20 |
21 | - name: Get tag
22 | id: previoustag
23 | uses: "WyriHaximus/github-action-get-previous-tag@v1"
24 |
25 | - name: Publish Helm charts
26 | uses: stefanprodan/helm-gh-pages@master
27 | with:
28 | token: ${{ secrets.GITHUB_TOKEN }}
29 | charts_dir: "deployment/helm/published"
30 | linting: "off"
31 | helm_version: 3.5.4
32 | chart_version: ${{steps.previoustag.outputs.tag}}
--------------------------------------------------------------------------------
/.github/workflows/publish-func-package-dev.yml:
--------------------------------------------------------------------------------
1 | name: Publish function package (dev)
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | workflow_dispatch:
7 |
8 | defaults:
9 | run:
10 | shell: bash
11 |
12 | jobs:
13 | publish:
14 | runs-on: ubuntu-20.04
15 | permissions:
16 | contents: write
17 |
18 | steps:
19 | - uses: actions/checkout@v2
20 | with:
21 | fetch-depth: 0
22 |
23 | - name: "Get Previous tag"
24 | id: previoustag
25 | uses: "WyriHaximus/github-action-get-previous-tag@v1"
26 | with:
27 | fallback: 2022.2.0
28 |
29 | - name: "Get next minor version"
30 | id: semvers
31 | uses: "WyriHaximus/github-action-next-semvers@v1"
32 | with:
33 | version: ${{ steps.previoustag.outputs.tag }}
34 |
35 | - name: "Publish package"
36 | env:
37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
38 | run: ./scripts/cipublish-pkgs -t "${{ steps.semvers.outputs.minor }}-dev"
39 |
--------------------------------------------------------------------------------
/.github/workflows/publish-func-package.yml:
--------------------------------------------------------------------------------
1 | name: Publish function package (release)
2 |
3 | on:
4 | push:
5 | tags: ["*"]
6 | workflow_dispatch:
7 |
8 | defaults:
9 | run:
10 | shell: bash
11 |
12 | jobs:
13 | publish:
14 | runs-on: ubuntu-20.04
15 | permissions:
16 | contents: write
17 |
18 | steps:
19 | - uses: actions/checkout@v2
20 | with:
21 | fetch-depth: 0
22 |
23 | - name: "Get tag"
24 | id: previoustag
25 | uses: "WyriHaximus/github-action-get-previous-tag@v1"
26 | with:
27 | fallback: 2022.2.0
28 |
29 | - name: "Publish package"
30 | env:
31 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
32 | run: ./scripts/cipublish-pkgs -t "${{ steps.previoustag.outputs.tag }}"
33 |
--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | profile = black
3 | multi_line_output = 3
4 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Microsoft Open Source Code of Conduct
2 |
3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
4 |
5 | Resources:
6 |
7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 |
--------------------------------------------------------------------------------
/Dockerfile.stacapi:
--------------------------------------------------------------------------------
1 | FROM python:3.8-slim
2 |
3 |
4 | ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
5 |
6 | ENV PATH=$PATH:/install/bin
7 |
8 | RUN pip install stac_fastapi.api==2.4.1 stac_fastapi.pgstac==2.4.1 uvicorn==0.17.6
9 |
10 | RUN mkdir -p /opt/src
11 | COPY dev/stacapi.py /opt/src/stacapi.py
12 | WORKDIR /opt/src
13 |
--------------------------------------------------------------------------------
/Dockerfile.stacbrowser:
--------------------------------------------------------------------------------
1 | FROM mcr.microsoft.com/cbl-mariner/base/nodejs:16
2 |
3 | RUN tdnf install -y git
4 |
5 | RUN mkdir -p /opt/src
6 | WORKDIR /opt/src
7 | RUN git clone https://github.com/radiantearth/stac-browser
8 | WORKDIR /opt/src/stac-browser
9 | RUN git checkout v3.0.0-beta.1
10 | RUN npm install
11 | RUN npm install http-server -g
12 | RUN npm run build -- --catalogUrl="http://localhost:8513/"
13 | WORKDIR /opt/src/stac-browser/dist
14 |
15 | CMD ["http-server", "-p", "8080", "."]
16 |
--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
1 | # Support
2 |
3 | ## How to file issues and get help
4 |
5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing
6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or
7 | feature request as a new Issue.
8 |
9 | For help and questions about using this project, please use the [Planetary Computer Discussions](https://github.com/microsoft/PlanetaryComputer/discussions) page.
10 |
11 | ## Microsoft Support Policy
12 |
13 | Support for this project is limited to the resources listed above.
14 |
--------------------------------------------------------------------------------
/cluster/README.md:
--------------------------------------------------------------------------------
1 | # Cluster configuration
2 |
3 | Configuration for the Kubernetes cluster for local development
--------------------------------------------------------------------------------
/cluster/argo-values.yaml:
--------------------------------------------------------------------------------
1 | server:
2 | baseHref: /argo/
3 | secure: false
4 | extraArgs:
5 | - --auth-mode=server
--------------------------------------------------------------------------------
/cluster/pctasks-dev/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: pctasks-dev-ingress
3 | description: A Helm chart for setting up the development cluster
4 | type: application
5 | version: 0.1.0
6 | appVersion: 0.1.0
7 |
--------------------------------------------------------------------------------
/cluster/pctasks-dev/templates/NOTES.txt:
--------------------------------------------------------------------------------
1 | Application information:
2 | {{ include "pcdev.selectorLabels" . }}
3 | Ingress host: {{ .Values.pcdev.pctasks_ingress.host }}
4 | Service Fullname: {{ include "pcdev.fullname" . }}
--------------------------------------------------------------------------------
/cluster/pctasks-dev/templates/nginx-configmap.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | annotations:
5 | labels:
6 | app: pc-apis-ingress
7 | name: nginx-configuration
8 | namespace: {{ .Values.namespace }}
9 | data:
10 | use-forwarded-headers: "true"
11 | enable-real-ip: "true"
--------------------------------------------------------------------------------
/cluster/pctasks-dev/templates/role.yaml:
--------------------------------------------------------------------------------
1 | kind: Role
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | metadata:
4 | name: pctasks
5 | rules:
6 | - verbs:
7 | - get
8 | - list
9 | - watch
10 | - update
11 | - create
12 | - patch
13 | - delete
14 | apiGroups:
15 | - argoproj.io
16 | resources:
17 | - workflows
18 |
--------------------------------------------------------------------------------
/cluster/pctasks-dev/templates/rolebinding.yaml:
--------------------------------------------------------------------------------
1 | kind: RoleBinding
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | metadata:
4 | name: argo-workflows-manual-rolebinding
5 | namespace: pc
6 | subjects:
7 | - kind: ServiceAccount
8 | name: pctasks-sa
9 | namespace: {{ .Release.Namespace }}
10 | roleRef:
11 | kind: Role
12 | name: argo-workflows-manual-role
13 | apiGroup: rbac.authorization.k8s.io
14 |
--------------------------------------------------------------------------------
/cluster/pctasks-dev/templates/secret.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 | name: pctasks-sa-token
5 | namespace: pc
6 | annotations:
7 | kubernetes.io/service-account.name: pctasks-sa
8 | type: kubernetes.io/service-account-token
--------------------------------------------------------------------------------
/cluster/pctasks-dev/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 | name: pctasks-sa
5 | namespace: pc
--------------------------------------------------------------------------------
/cluster/pctasks-dev/values.yaml:
--------------------------------------------------------------------------------
1 | environment: "staging"
2 | namespace: "default"
3 |
4 | pcingress:
5 | services:
6 | pctasks:
7 | path: ""
8 | name: ""
9 | port: ""
10 |
11 | nameOverride: ""
12 | fullnameOverride: ""
13 |
--------------------------------------------------------------------------------
/datasets/aster/README.md:
--------------------------------------------------------------------------------
1 | # planetary-computer-tasks dataset: aster
2 |
3 | For now, this dataset exists only to update existing ASTER items with new geometries, using [stactools's footprint capabilities](https://stactools.readthedocs.io/en/stable/footprint.html).
4 | See [update-geometries.yaml](./update-geometries.yaml) for the workflow.
5 |
6 | ## Running
7 |
8 | To run a test and watch it go:
9 |
10 | ```shell
11 | pctasks workflow upsert-and-submit datasets/aster/update-geometries.yaml | tee /dev/stderr | xargs pctasks runs status -w
12 | ```
13 |
14 | ### Building the Docker image
15 |
16 | The update geometries workflow takes a lot of workers, and if they all hit PyPI at the same time, they can get rate limited.
17 | To avoid that problem, we use a custom image in the workflow.
18 | To build and push a custom docker image to our container registry:
19 |
20 | ```shell
21 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-aster:latest -f datasets/aster/Dockerfile .
22 | ```
23 |
--------------------------------------------------------------------------------
/datasets/aster/collection/description.md:
--------------------------------------------------------------------------------
1 | The [ASTER](https://terra.nasa.gov/about/terra-instruments/aster) instrument, launched on-board NASA's [Terra](https://terra.nasa.gov/) satellite in 1999, provides multispectral images of the Earth at 15m-90m resolution. ASTER images provide information about land surface temperature, color, elevation, and mineral composition.\n\nThis dataset represents ASTER [L1T](https://lpdaac.usgs.gov/products/ast_l1tv003/) data from 2000-2006. L1T images have been terrain-corrected and rotated to a north-up UTM projection. Images are in [cloud-optimized GeoTIFF](https://www.cogeo.org/) format.
2 |
--------------------------------------------------------------------------------
/datasets/aster/dataset.yaml:
--------------------------------------------------------------------------------
1 | # TODO actually implement this -- this is currently a placeholder just to upload the collection
2 | id: aster
3 | image: ${{ args.registry }}/pctasks-aster:latest
4 |
5 | args:
6 | - registry
7 |
8 | code:
9 | src: ${{ local.path(./aster.py) }}
10 | requirements: ${{ local.path(./requirements.txt) }}
11 |
12 | collections:
13 | - id: aster-l1t
14 | template: ${{ local.path(./collection) }}
15 | class: aster:AsterL1tCollection
16 | asset_storage:
17 | - uri: blob://astersa/aster/
18 | token: ${{ pc.get_token(astersa, aster) }}
19 | chunks:
20 | options:
21 | name_starts_with: images/L1T
22 | ends_with: .xml
23 | chunk_length: 1000
24 | chunk_storage:
25 | uri: blob://astersa/aster-etl-data/chunks/
26 |
--------------------------------------------------------------------------------
/datasets/aster/requirements.txt:
--------------------------------------------------------------------------------
1 | adlfs==2022.7.0
2 | geopandas==0.12.1
3 | stactools-aster==0.2.1
4 | git+https://github.com/TomAugspurger/stac-geoparquet@09f3bce33c4e2ab8a796b21fd02df55c1b7754f9
5 | orjson==3.*
6 |
--------------------------------------------------------------------------------
/datasets/aster/scripts/print_partition_paths.py:
--------------------------------------------------------------------------------
1 | from importlib.metadata import files
2 | import adlfs
3 | import planetary_computer
4 | from pystac import Collection
5 |
6 | collection = Collection.from_file(
7 | "https://planetarycomputer.microsoft.com/api/stac/v1/collections/aster-l1t"
8 | )
9 | asset = planetary_computer.sign(collection.assets["geoparquet-items"])
10 | filesystem = adlfs.AzureBlobFileSystem(**asset.extra_fields["table:storage_options"])
11 | for path in filesystem.ls("items/aster-l1t.parquet"):
12 | print(path)
13 |
--------------------------------------------------------------------------------
/datasets/aster/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/datasets/aster/tests/__init__.py
--------------------------------------------------------------------------------
/datasets/aster/tests/data-files/aster-l1t-subset.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/datasets/aster/tests/data-files/aster-l1t-subset.parquet
--------------------------------------------------------------------------------
/datasets/aster/tests/test_tasks.py:
--------------------------------------------------------------------------------
1 | # Note you will have to run this with `python -m pytest` from the datasets/aster directory
2 |
3 | from pathlib import Path
4 |
5 | import aster
6 | import orjson
7 |
8 |
9 | def test_update_geometries_from_dataframe() -> None:
10 | path = Path(__file__).parent / "data-files" / "aster-l1t-subset.parquet"
11 | item_collection = aster.read_item_collection(path)
12 | item = aster.sign_and_update(item_collection.items[0], 0.001)
13 | _ = orjson.dumps(aster.fix_dict(item.to_dict(include_self_link=False))).decode(
14 | "utf-8"
15 | )
16 |
--------------------------------------------------------------------------------
/datasets/aster/update-geometries-ingest.yaml:
--------------------------------------------------------------------------------
1 | name: Ingest NDJsons from blob://astersa/aster-etl-data/items/update-geometries
2 | jobs:
3 | ingest-items:
4 | id: ingest-items
5 | tasks:
6 | - id: ingest-ndjson
7 | image_key: ingest
8 | task: pctasks.ingest_task.task:ingest_task
9 | args:
10 | content:
11 | type: Ndjson
12 | ndjson_folder:
13 | uri: blob://astersa/aster-etl-data/items/update-geometries
14 | extensions:
15 | - .ndjson
16 | matches: \d+.ndjson
17 | options:
18 | insert_group_size: 5000
19 | insert_only: false
20 | schema_version: 1.0.0
21 | schema_version: 1.0.0
22 | id: aster-update-geometries-ingest
23 | dataset: microsoft/aster-l1t
24 |
25 |
--------------------------------------------------------------------------------
/datasets/chesapeake_lulc/chesapeake_lulc.py:
--------------------------------------------------------------------------------
1 | from typing import List, Union
2 |
3 | import pystac
4 | from stactools.chesapeake_lulc.stac import create_item
5 |
6 | from pctasks.core.models.task import WaitTaskResult
7 | from pctasks.core.storage import StorageFactory
8 | from pctasks.dataset.collection import Collection
9 |
10 |
11 | class ChesapeakeCollection(Collection):
12 | @classmethod
13 | def create_item(
14 | cls, asset_uri: str, storage_factory: StorageFactory
15 | ) -> Union[List[pystac.Item], WaitTaskResult]:
16 | storage, asset_path = storage_factory.get_storage_for_file(asset_uri)
17 | href = storage.get_url(asset_path)
18 | item = create_item(href, read_href_modifier=storage.sign)
19 | return [item]
20 |
--------------------------------------------------------------------------------
/datasets/chesapeake_lulc/requirements.txt:
--------------------------------------------------------------------------------
1 | stactools.chesapeake-lulc @ git+https://github.com/stactools-packages/chesapeake-lulc.git@698b13066cb5ffeb55f972d79d21ce04ec30874e
--------------------------------------------------------------------------------
/datasets/conus404/README.md:
--------------------------------------------------------------------------------
1 | # CONUS404
2 |
3 | ## First-time publishing
4 | First you need to validate the STAC collection with `pctasks dataset validate-collection [path-to-template.json]`, fix any validation errors.
5 |
6 | Then submit the collection ingestion with `pctasks dataset ingest-collection -d datasets/conus404/dataset.yaml -s -a registry pccomponents`
7 |
8 | Get the workflow ID and then watch it with: `pctasks runs status $WORKFLOW_ID --watch`.
9 | It must succeed.
10 |
11 | Verify that it was successful with `curl "https://planetarycomputer.microsoft.com/api/stac/v1/collections/conus404"`
12 |
13 | ## Updating
14 | Simply add `-u` to the command.
15 | `pctasks dataset ingest-collection -d datasets/conus404/dataset.yaml -u -s -a registry pccomponents`
--------------------------------------------------------------------------------
/datasets/conus404/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: conus404
2 | image: ${{ args.registry }}/pctasks-task-base:latest
3 |
4 | args:
5 | - registry
6 |
7 | collections:
8 | - id: conus404
9 | template: ${{ local.path(./collection) }}
10 | class: pctasks.dataset.collection:PremadeItemCollection
11 | asset_storage: []
12 | chunk_storage:
13 | uri: "blob://cpdataeuwest/cpdata-etl-data/chunks"
14 |
15 |
--------------------------------------------------------------------------------
/datasets/deltaresfloods/README.md:
--------------------------------------------------------------------------------
1 | # deltaresfloods PC Tasks
2 |
3 | PCTasks code for ingesting deltaresfloods data into the Planetary Computer.
4 | These STAC items JSONs are stored as blobs in the `deltaresfloodssa` storage account under the `floods-stac` container.
5 | You can mount them locally and make modifications before re-ingesting them into the STAC database.
6 |
7 | ## Item Updates
8 | After fixing items in the `deltaresfloodssa/floods-stac` container, you may run to reingest them:
9 |
10 | ```bash
11 | pctasks dataset process-items xarray-access-fix \
12 | -a since "2024-11-17T00:00:00Z" \
13 | --dataset datasets/deltaresfloods/dataset.yaml \
14 | --is-update-workflow --upsert --submit
15 | ```
16 |
17 | Set since to a date strictly before you modified the STAC items in storage.
18 | For example, if you modified items on December 4, set the since to be any date before December 4.
19 | `since` must be a full ISO8061 datetime.
--------------------------------------------------------------------------------
/datasets/deltaresfloods/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: deltaresfloods
2 | image: pccomponents.azurecr.io/pctasks-task-base:latest
3 | target_environment: staging
4 |
5 | collections:
6 | - id: deltares-floods
7 | template: ${{ local.path(./collection) }}
8 | class: pctasks.dataset.collection:PremadeItemCollection
9 | asset_storage:
10 | # the STAC items
11 | - uri: blob://deltaresfloodssa/floods-stac
12 | token: ${{ pc.get_token(deltaresfloodssa, floods-stac)}}
13 | chunks:
14 | options:
15 | chunk_length: 3000
16 | extensions:
17 | - ".json"
18 | chunk_storage:
19 | uri: blob://deltaresfloodssa/floods-etl-data/chunks
20 |
--------------------------------------------------------------------------------
/datasets/deltaresfloods/tests/test_dataset.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from pathlib import Path
3 |
4 | from pctasks.cli.cli import setup_logging, setup_logging_for_module
5 | from pctasks.dev.test_utils import run_process_items_workflow
6 |
7 | HERE = Path(__file__).parent
8 | DATASET_PATH = HERE / ".." / "dataset.yaml"
9 |
10 |
11 | def test_dataset():
12 | run_process_items_workflow(
13 | DATASET_PATH, image="localhost:5001/pctasks-task-base:latest"
14 | )
15 |
16 |
17 | if __name__ == "__main__":
18 | setup_logging(logging.DEBUG)
19 | setup_logging_for_module("__main__", logging.DEBUG)
20 | test_dataset()
21 | print("All tests passed")
22 | exit(0)
23 |
--------------------------------------------------------------------------------
/datasets/eclipse/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: eclipse
2 | image: ${{ args.registry }}/pctasks-task-base:latest
3 |
4 | args:
5 | - registry
6 |
7 | collections:
8 | - id: eclipse
9 | template: ${{ local.path(./collection) }}
10 | class: pctasks.dataset.collection:PremadeItemCollection
11 | asset_storage:
12 | # the STAC items
13 | - uri: blob://ai4edataeuwest/eclipse-stac
14 | token: ${{ pc.get_token(ai4edataeuwest, eclipse-stac)}}
15 | chunks:
16 | options:
17 | chunk_length: 3000
18 | extensions:
19 | - ".json"
20 | chunk_storage:
21 | uri: blob://ai4edataeuwest/eclipse-etl-data/chunks
22 |
--------------------------------------------------------------------------------
/datasets/ecmwf-forecast/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: ecmwf_forecast
2 | image: ${{ args.registry }}/pctasks-ecmwf-forecast:2024.6.13.0
3 |
4 | args:
5 | - registry
6 |
7 | code:
8 | src: ${{ local.path(./ecmwf_forecast.py) }}
9 |
10 | environment:
11 | APPLICATIONINSIGHTS_CONNECTION_STRING: ${{ secrets.task-application-insights-connection-string }}
12 |
13 | collections:
14 | - id: ecmwf-forecast
15 | template: ${{ local.path(./collection/) }}
16 | class: ecmwf_forecast:EcmwfCollection
17 | asset_storage:
18 | - uri: blob://ai4edataeuwest/ecmwf/
19 | chunks:
20 | options:
21 | # currently excluding "aifs", in favor of "ifs"
22 | # Could put that in a different collection, or modify
23 | # the stactools package.
24 | matches: /ifs/(0p25|0p4-beta)/(enfo|oper|waef|wave)(?!-opendata)
25 | match_full_path: true
26 | extensions: [.grib2]
27 | chunk_storage:
28 | uri: blob://ai4edataeuwest/ecmwf-etl-data/pctasks/
29 |
--------------------------------------------------------------------------------
/datasets/ecmwf-forecast/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/stactools-packages/ecmwf-forecast@0.2.0
2 |
--------------------------------------------------------------------------------
/datasets/ecmwf-forecast/test_ecmwf_forecast.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from ecmwf_forecast import EcmwfCollection
3 | from pctasks.core.storage import StorageFactory
4 |
5 |
6 | @pytest.mark.parametrize(
7 | "href",
8 | [
9 | "blob://ai4edataeuwest/ecmwf/20240314/00z/ifs/0p4-beta/enfo/20240314000000-0h-enfo-ef.grib2",
10 | "blob://ai4edataeuwest/ecmwf/20240314/00z/ifs/0p25/waef/20240314000000-0h-waef-ef.grib2",
11 | ],
12 | )
13 | def test_ecmwf(href: str) -> None:
14 | storage_factory = StorageFactory()
15 | (item,) = EcmwfCollection.create_item(href, storage_factory)
16 | assert "ecmwf:resolution" in item.properties
17 | if "/0p4-beta/" in href:
18 | assert item.properties["ecmwf:resolution"] == "0.40"
19 | if "/0p25/" in href:
20 | assert item.properties["ecmwf:resolution"] == "0.25"
21 |
--------------------------------------------------------------------------------
/datasets/era5-pds/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: goes_cmi
2 | image: ${{ args.registry }}/pctasks-task-base:latest
3 |
4 | args:
5 | - registry
6 |
7 | collections:
8 | - id: goes-cmi
9 | template: ${{ local.path(./collection) }}
10 | class: pctasks.dataset.collection:PremadeItemCollection
11 | asset_storage:
12 | # the STAC items
13 | - uri: blob://cpdataeuwest/era5-stac
14 | token: ${{ pc.get_token(cpdataeuwest, era5-stac)}}
15 | chunks:
16 | options:
17 | chunk_length: 3000
18 | extensions:
19 | - ".json"
20 | chunk_storage:
21 | uri: blob://cpdataeuwest/era5-etl-data/chunks
22 |
--------------------------------------------------------------------------------
/datasets/esa-cci-lc/README.md:
--------------------------------------------------------------------------------
1 | # ESA CCI
2 |
3 | ## Docker container
4 |
5 | ```shell
6 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-esa-cci-lc:latest -f datasets/esa-cci-lc/Dockerfile .
7 | ```
8 |
--------------------------------------------------------------------------------
/datasets/esa-cci-lc/collection/esa-cci-lc-netcdf/description.md:
--------------------------------------------------------------------------------
1 | The ESA Climate Change Initiative (CCI) [Land Cover dataset](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover?tab=overview) provides consistent global annual land cover maps at 300m spatial resolution from 1992 to 2020. The land cover classes are defined using the United Nations Food and Agriculture Organization's (UN FAO) [Land Cover Classification System](https://www.fao.org/land-water/land/land-governance/land-resources-planning-toolbox/category/details/en/c/1036361/) (LCCS). In addition to the land cover maps, four quality flags are produced to document the reliability of the classification and change detection.
2 |
3 | The data in this Collection are the original NetCDF files accessed from the [Copernicus Climate Data Store](https://cds.climate.copernicus.eu/#!/home). We recommend users use the [`esa-cci-lc` Collection](planetarycomputer.microsoft.com/dataset/esa-cci-lc), which provides the data as Cloud Optimized GeoTIFFs.
--------------------------------------------------------------------------------
/datasets/esa-cci-lc/collection/esa-cci-lc/description.md:
--------------------------------------------------------------------------------
1 | The ESA Climate Change Initiative (CCI) [Land Cover dataset](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover?tab=overview) provides consistent global annual land cover maps at 300m spatial resolution from 1992 to 2020. The land cover classes are defined using the United Nations Food and Agriculture Organization's (UN FAO) [Land Cover Classification System](https://www.fao.org/land-water/land/land-governance/land-resources-planning-toolbox/category/details/en/c/1036361/) (LCCS). In addition to the land cover maps, four quality flags are produced to document the reliability of the classification and change detection.
2 |
3 | The data in this Collection have been converted from the [original NetCDF data](https://planetarycomputer.microsoft.com/dataset/esa-cci-lc-netcdf) to a set of tiled [Cloud Optimized GeoTIFFs](https://www.cogeo.org/) (COGs).
4 |
--------------------------------------------------------------------------------
/datasets/esa-cci-lc/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/stactools-packages/esa-cci-lc.git@c468c0a3e9149e530a72d74d51bf288f927fd41a
--------------------------------------------------------------------------------
/datasets/esa-worldcover/README.md:
--------------------------------------------------------------------------------
1 | # planetary-computer-tasks dataset: esa-worldcover
2 |
3 | ## Building the Docker image
4 |
5 | To build and push a custom docker image to our container registry:
6 |
7 | ```shell
8 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-esa-worldcover:latest -f datasets/esa-worldcover/Dockerfile .
9 | ```
10 |
--------------------------------------------------------------------------------
/datasets/esa-worldcover/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: esa-worldcover
2 | image: ${{ args.registry }}/pctasks-esa-worldcover:latest
3 |
4 | args:
5 | - registry
6 |
7 | code:
8 | src: ${{ local.path(./esa_worldcover.py) }}
9 |
10 | collections:
11 | - id: esa-worldcover
12 | template: ${{ local.path(./collection) }}
13 | class: esa_worldcover:ESAWorldCoverCollection
14 | asset_storage:
15 | - uri: blob://ai4edataeuwest/esa-worldcover/
16 | chunks:
17 | options:
18 | extensions: [.tif]
19 | # The 'name_starts_with' filter will run a single year only. This
20 | # is helpful since this is an annual product -> next year we can
21 | # update this field and create only the items needed.
22 | name_starts_with: v100/2020/map
23 | chunk_length: 200
24 | # limit: 40
25 | chunk_storage:
26 | uri: blob://ai4edataeuwest/esa-worldcover-etl-data/pctasks/
27 |
--------------------------------------------------------------------------------
/datasets/esa-worldcover/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/stactools-packages/esa-worldcover.git@164fcfea77954c87eb73d465d8da4bee00e1840c
--------------------------------------------------------------------------------
/datasets/fws-nwi/README.md:
--------------------------------------------------------------------------------
1 | # planetary-computer-tasks dataset: fws-nwi
2 |
3 | ## Building the Docker image
4 |
5 | To build and push a custom docker image to our container registry:
6 |
7 | ```shell
8 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-fws-nwi:latest -f datasets/fws-nwi/Dockerfile .
9 | ```
10 |
--------------------------------------------------------------------------------
/datasets/fws-nwi/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: fws-nwi
2 | image: ${{ args.registry }}/pctasks-fws-nwi:latest
3 | args:
4 | - registry
5 | code:
6 | src: ${{ local.path(./fws_nwi.py) }}
7 | # requirements: ${{ local.path(./requirements.txt) }}
8 |
9 | task_config:
10 | fws-nwi:
11 | create-items:
12 | tags:
13 | batch_pool_id: high_memory_pool
14 |
15 | collections:
16 | - id: fws-nwi
17 | template: ${{ local.path(./collection) }}
18 | class: fws_nwi:FwsNwiCollection
19 | asset_storage:
20 | - uri: blob://landcoverdata/fws-nwi-onboarding/
21 | chunks:
22 | options:
23 | extensions: [.zip]
24 | chunk_length: 1
25 | chunk_storage:
26 | uri: blob://landcoverdata/fws-nwi-etl-data/
27 |
--------------------------------------------------------------------------------
/datasets/fws-nwi/requirements.txt:
--------------------------------------------------------------------------------
1 | stactools-fws-nwi == 0.2.0
2 |
--------------------------------------------------------------------------------
/datasets/gbif/README.md:
--------------------------------------------------------------------------------
1 | # planetary-computer-tasks dataset: gbif
2 |
3 | Global Biodiversity Information Facility
4 |
5 | ## Building the Docker image
6 |
7 | To build and push a custom docker image to our container registry:
8 |
9 | ```shell
10 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-gbif:latest -t pctasks-gbif:{date}.{count} -f datasets/gbif/Dockerfile .
11 | ```
12 |
13 | ## Update workflow
14 |
15 | The update workflow was registered with
16 |
17 | ```shell
18 | pctasks dataset process-items gbif-update --is-update-workflow --dataset datasets/gbif/dataset.yaml -u
19 | ```
--------------------------------------------------------------------------------
/datasets/gbif/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: gbif
2 | image: ${{ args.registry }}/pctasks-gbif:20230607.1
3 |
4 | args:
5 | - registry
6 |
7 | code:
8 | src: ${{ local.path(./gbif.py) }}
9 | requirements: ${{ local.path(./requirements.txt) }}
10 |
11 | environment:
12 | APPLICATIONINSIGHTS_CONNECTION_STRING: ${{ secrets.task-application-insights-connection-string }}
13 |
14 | collections:
15 | - id: gbif
16 | template: ${{ local.path(./collection/gbif) }}
17 | token: ${{ pc.get_token(ai4edataeuwest, gbif) }}
18 | class: gbif:GBIFCollection
19 | asset_storage:
20 | - uri: blob://ai4edataeuwest/gbif/
21 | chunks:
22 | options:
23 | list_folders: true
24 | min_depth: 2
25 | max_depth: 2
26 | chunk_storage:
27 | uri: blob://ai4edataeuwest/gbif-etl-data/pctasks-chunks
--------------------------------------------------------------------------------
/datasets/gbif/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/TomAugspurger/gbif
2 | git+https://github.com/stac-utils/stac-table@99a30be419baa2445ab6d0756629eea079c80972
3 | dask
4 | dask-geopandas
5 | pytest
6 | adlfs
7 |
--------------------------------------------------------------------------------
/datasets/goes/goes-cmi/goes_cmi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/datasets/goes/goes-cmi/goes_cmi/__init__.py
--------------------------------------------------------------------------------
/datasets/goes/goes-cmi/requirements.txt:
--------------------------------------------------------------------------------
1 | stactools-goes==0.1.8
2 | pystac==1.10.1
--------------------------------------------------------------------------------
/datasets/goes/goes-glm/requirements.txt:
--------------------------------------------------------------------------------
1 | stactools-goes-glm==0.2.4
--------------------------------------------------------------------------------
/datasets/goes/goes-glm/tests/test_dataset.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from pathlib import Path
3 |
4 | from pctasks.cli.cli import setup_logging, setup_logging_for_module
5 | from pctasks.dev.test_utils import run_process_items_workflow
6 |
7 | HERE = Path(__file__).parent
8 | DATASET_PATH = HERE / "dataset-dev-cluster.yaml"
9 |
10 |
11 | def test_goes_glm():
12 | run_process_items_workflow(
13 | DATASET_PATH,
14 | collection_id="goes-glm",
15 | args={
16 | "registry": "localhost:5001",
17 | },
18 | splits_limit=1,
19 | chunks_limit=2,
20 | timeout_seconds=600
21 | )
22 |
23 |
24 | if __name__ == "__main__":
25 | setup_logging(logging.DEBUG)
26 | setup_logging_for_module("__main__", logging.DEBUG)
27 | test_goes_glm()
28 | print("Test passed")
29 | exit(0)
30 |
--------------------------------------------------------------------------------
/datasets/io-biodiversity/README.md:
--------------------------------------------------------------------------------
1 | # planetary-computer-tasks dataset: io-biodiversity
2 |
3 | ## Building the Docker image
4 |
5 | To build and push a custom docker image to our container registry:
6 |
7 | ```shell
8 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-io-biodiversity:latest -f datasets/io-biodiversity/Dockerfile .
9 | ```
10 |
--------------------------------------------------------------------------------
/datasets/io-biodiversity/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: io_biodiversity
2 | image: ${{ args.registry }}/pctasks-io-biodiversity:latest
3 |
4 | args:
5 | - registry
6 |
7 | code:
8 | src: ${{ local.path(./io_biodiversity.py) }}
9 |
10 | collections:
11 | - id: io-biodiversity
12 | template: ${{ local.path(./collection) }}
13 | class: io_biodiversity:IOBiodiversityIntactness
14 | asset_storage:
15 | - uri: blob://pcdata01euw/impact/bii-v1
16 | token: ${{ pc.get_token(pcdata01euw, impact) }}
17 | chunks:
18 | options:
19 | ends_with: ".tif"
20 | chunk_length: 500 # 1224 blobs per year; 4 years of data
21 |
22 | chunk_storage:
23 | uri: blob://pcdata01euw/impact-etl-data/pctasks/
24 |
--------------------------------------------------------------------------------
/datasets/io-biodiversity/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/stac-utils/stactools.git@c2bdf32331a9782373d7152472093cd4bc4298a9
--------------------------------------------------------------------------------
/datasets/io-land-cover/collection/io-lulc/description.md:
--------------------------------------------------------------------------------
1 | __Note__: _A new version of this item is available for your use. This mature version of the map remains available for use in existing applications. This item will be retired in December 2024. There is 2020 data available in the newer [9-class dataset](https://planetarycomputer.microsoft.com/dataset/io-lulc-9-class)._
2 |
3 | Global estimates of 10-class land use/land cover (LULC) for 2020, derived from ESA Sentinel-2 imagery at 10m resolution. This dataset was generated by [Impact Observatory](http://impactobservatory.com/), who used billions of human-labeled pixels (curated by the National Geographic Society) to train a deep learning model for land classification. The global map was produced by applying this model to the relevant yearly Sentinel-2 scenes on the Planetary Computer.
4 |
5 | This dataset is also available on the [ArcGIS Living Atlas of the World](https://livingatlas.arcgis.com/landcover/).
6 |
--------------------------------------------------------------------------------
/datasets/landsat/collection/landsat-c2-l1/description.md:
--------------------------------------------------------------------------------
1 | Landsat Collection 2 Level-1 data, consisting of quantized and calibrated scaled Digital Numbers (DN) representing the multispectral image data. These [Level-1](https://www.usgs.gov/landsat-missions/landsat-collection-2-level-1-data) data can be [rescaled](https://www.usgs.gov/landsat-missions/using-usgs-landsat-level-1-data-product) to top of atmosphere (TOA) reflectance and/or radiance. Thermal band data can be rescaled to TOA brightness temperature.
2 |
3 | This dataset represents the global archive of Level-1 data from [Landsat Collection 2](https://www.usgs.gov/core-science-systems/nli/landsat/landsat-collection-2) acquired by the [Multispectral Scanner System](https://landsat.gsfc.nasa.gov/multispectral-scanner-system/) onboard Landsat 1 through Landsat 5 from July 7, 1972 to January 7, 2013. Images are stored in [cloud-optimized GeoTIFF](https://www.cogeo.org/) format.
4 |
--------------------------------------------------------------------------------
/datasets/landsat/requirements.txt:
--------------------------------------------------------------------------------
1 | stactools-landsat==0.2.4
2 | pystac-client>=0.4.0
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-09A1-061/description.md:
--------------------------------------------------------------------------------
1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) 09A1 Version 6.1 product provides an estimate of the surface spectral reflectance of MODIS Bands 1 through 7 corrected for atmospheric conditions such as gasses, aerosols, and Rayleigh scattering. Along with the seven 500 meter (m) reflectance bands are two quality layers and four observation bands. For each pixel, a value is selected from all the acquisitions within the 8-day composite period. The criteria for the pixel choice include cloud and solar zenith. When several acquisitions meet the criteria the pixel with the minimum channel 3 (blue) value is used.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-09Q1-061/description.md:
--------------------------------------------------------------------------------
1 | The 09Q1 Version 6.1 product provides an estimate of the surface spectral reflectance of Moderate Resolution Imaging Spectroradiometer (MODIS) Bands 1 and 2, corrected for atmospheric conditions such as gasses, aerosols, and Rayleigh scattering. Provided along with the 250 meter (m) surface reflectance bands are two quality layers. For each pixel, a value is selected from all the acquisitions within the 8-day composite period. The criteria for the pixel choice include cloud and solar zenith. When several acquisitions meet the criteria the pixel with the minimum channel 3 (blue) value is used.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-10A1-061/description.md:
--------------------------------------------------------------------------------
1 | This global Level-3 (L3) data set provides a daily composite of snow cover and albedo derived from the 'MODIS Snow Cover 5-Min L2 Swath 500m' data set. Each data granule is a 10degx10deg tile projected to a 500 m sinusoidal grid.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-10A2-061/description.md:
--------------------------------------------------------------------------------
1 | This global Level-3 (L3) data set provides the maximum snow cover extent observed over an eight-day period within 10degx10deg MODIS sinusoidal grid tiles. Tiles are generated by compositing 500 m observations from the 'MODIS Snow Cover Daily L3 Global 500m Grid' data set. A bit flag index is used to track the eight-day snow/no-snow chronology for each 500 m cell.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-11A1-061/description.md:
--------------------------------------------------------------------------------
1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) Land Surface Temperature/Emissivity Daily Version 6.1 product provides daily per-pixel Land Surface Temperature and Emissivity (LST&E) with 1 kilometer (km) spatial resolution in a 1,200 by 1,200 km grid. The pixel temperature value is derived from the MOD11_L2 swath product. Above 30 degrees latitude, some pixels may have multiple observations where the criteria for clear-sky are met. When this occurs, the pixel value is a result of the average of all qualifying observations. Provided along with the daytime and nighttime surface temperature bands are associated quality control assessments, observation times, view zenith angles, and clear-sky coverages along with bands 31 and 32 emissivities from land cover types
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-11A2-061/description.md:
--------------------------------------------------------------------------------
1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) Land Surface Temperature/Emissivity 8-Day Version 6.1 product provides an average 8-day per-pixel Land Surface Temperature and Emissivity (LST&E) with a 1 kilometer (km) spatial resolution in a 1,200 by 1,200 km grid. Each pixel value in the MOD11A2 is a simple average of all the corresponding MOD11A1 LST pixels collected within that 8-day period. The 8-day compositing period was chosen because twice that period is the exact ground track repeat period of the Terra and Aqua platforms. Provided along with the daytime and nighttime surface temperature bands are associated quality control assessments, observation times, view zenith angles, and clear-sky coverages along with bands 31 and 32 emissivities from land cover types.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-13A1-061/description.md:
--------------------------------------------------------------------------------
1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) Vegetation Indices 16-Day Version 6.1 product provides Vegetation Index (VI) values at a per pixel basis at 500 meter (m) spatial resolution. There are two primary vegetation layers. The first is the Normalized Difference Vegetation Index (NDVI), which is referred to as the continuity index to the existing National Oceanic and Atmospheric Administration-Advanced Very High Resolution Radiometer (NOAA-AVHRR) derived NDVI. The second vegetation layer is the Enhanced Vegetation Index (EVI), which has improved sensitivity over high biomass regions. The algorithm for this product chooses the best available pixel value from all the acquisitions from the 16 day period. The criteria used is low clouds, low view angle, and the highest NDVI/EVI value. Provided along with the vegetation layers and two quality assurance (QA) layers are reflectance bands 1 (red), 2 (near-infrared), 3 (blue), and 7 (mid-infrared), as well as four observation layers.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-13Q1-061/description.md:
--------------------------------------------------------------------------------
1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) Vegetation Indices Version 6.1 data are generated every 16 days at 250 meter (m) spatial resolution as a Level 3 product. The MOD13Q1 product provides two primary vegetation layers. The first is the Normalized Difference Vegetation Index (NDVI) which is referred to as the continuity index to the existing National Oceanic and Atmospheric Administration-Advanced Very High Resolution Radiometer (NOAA-AVHRR) derived NDVI. The second vegetation layer is the Enhanced Vegetation Index (EVI), which has improved sensitivity over high biomass regions. The algorithm chooses the best available pixel value from all the acquisitions from the 16 day period. The criteria used is low clouds, low view angle, and the highest NDVI/EVI value. Along with the vegetation layers and the two quality layers, the HDF file will have MODIS reflectance bands 1 (red), 2 (near-infrared), 3 (blue), and 7 (mid-infrared), as well as four observation layers.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-14A1-061/description.md:
--------------------------------------------------------------------------------
1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) Thermal Anomalies and Fire Daily Version 6.1 data are generated every eight days at 1 kilometer (km) spatial resolution as a Level 3 product. MOD14A1 contains eight consecutive days of fire data conveniently packaged into a single file. The Science Dataset (SDS) layers include the fire mask, pixel quality indicators, maximum fire radiative power (MaxFRP), and the position of the fire pixel within the scan. Each layer consists of daily per pixel information for each of the eight days of data acquisition.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-14A2-061/description.md:
--------------------------------------------------------------------------------
1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) Thermal Anomalies and Fire 8-Day Version 6.1 data are generated at 1 kilometer (km) spatial resolution as a Level 3 product. The MOD14A2 gridded composite contains the maximum value of the individual fire pixel classes detected during the eight days of acquisition. The Science Dataset (SDS) layers include the fire mask and pixel quality indicators.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-15A2H-061/description.md:
--------------------------------------------------------------------------------
1 | The Version 6.1 Moderate Resolution Imaging Spectroradiometer (MODIS) Level 4, Combined Fraction of Photosynthetically Active Radiation (FPAR), and Leaf Area Index (LAI) product is an 8-day composite dataset with 500 meter pixel size. The algorithm chooses the best pixel available from within the 8-day period. LAI is defined as the one-sided green leaf area per unit ground area in broadleaf canopies and as one-half the total needle surface area per unit ground area in coniferous canopies. FPAR is defined as the fraction of incident photosynthetically active radiation (400-700 nm) absorbed by the green elements of a vegetation canopy.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-15A3H-061/description.md:
--------------------------------------------------------------------------------
1 | The MCD15A3H Version 6.1 Moderate Resolution Imaging Spectroradiometer (MODIS) Level 4, Combined Fraction of Photosynthetically Active Radiation (FPAR), and Leaf Area Index (LAI) product is a 4-day composite data set with 500 meter pixel size. The algorithm chooses the best pixel available from all the acquisitions of both MODIS sensors located on NASA's Terra and Aqua satellites from within the 4-day period. LAI is defined as the one-sided green leaf area per unit ground area in broadleaf canopies and as one-half the total needle surface area per unit ground area in coniferous canopies. FPAR is defined as the fraction of incident photosynthetically active radiation (400-700 nm) absorbed by the green elements of a vegetation canopy.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-17A2H-061/description.md:
--------------------------------------------------------------------------------
1 | The Version 6.1 Gross Primary Productivity (GPP) product is a cumulative 8-day composite of values with 500 meter (m) pixel size based on the radiation use efficiency concept that can be potentially used as inputs to data models to calculate terrestrial energy, carbon, water cycle processes, and biogeochemistry of vegetation. The Moderate Resolution Imaging Spectroradiometer (MODIS) data product includes information about GPP and Net Photosynthesis (PSN). The PSN band values are the GPP less the Maintenance Respiration (MR). The data product also contains a PSN Quality Control (QC) layer. The quality layer contains quality information for both the GPP and the PSN.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-17A3HGF-061/description.md:
--------------------------------------------------------------------------------
1 | The Version 6.1 product provides information about annual Net Primary Production (NPP) at 500 meter (m) pixel resolution. Annual Moderate Resolution Imaging Spectroradiometer (MODIS) NPP is derived from the sum of all 8-day Net Photosynthesis (PSN) products (MOD17A2H) from the given year. The PSN value is the difference of the Gross Primary Productivity (GPP) and the Maintenance Respiration (MR). The product will be generated at the end of each year when the entire yearly 8-day 15A2H is available. Hence, the gap-filled product is the improved 17, which has cleaned the poor-quality inputs from 8-day Leaf Area Index and Fraction of Photosynthetically Active Radiation (LAI/FPAR) based on the Quality Control (QC) label for every pixel. If any LAI/FPAR pixel did not meet the quality screening criteria, its value is determined through linear interpolation. However, users cannot get this product in near-real time because it will be generated only at the end of a given year.
--------------------------------------------------------------------------------
/datasets/modis/collection/modis-43A4-061/description.md:
--------------------------------------------------------------------------------
1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) MCD43A4 Version 6.1 Nadir Bidirectional Reflectance Distribution Function (BRDF)-Adjusted Reflectance (NBAR) dataset is produced daily using 16 days of Terra and Aqua MODIS data at 500 meter (m) resolution. The view angle effects are removed from the directional reflectances, resulting in a stable and consistent NBAR product. Data are temporally weighted to the ninth day which is reflected in the Julian date in the file name. Users are urged to use the band specific quality flags to isolate the highest quality full inversion results for their own science applications as described in the User Guide. The MCD43A4 provides NBAR and simplified mandatory quality layers for MODIS bands 1 through 7. Essential quality information provided in the corresponding MCD43A2 data file should be consulted when using this product.
--------------------------------------------------------------------------------
/datasets/modis/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/stactools-packages/modis@419101223609805f9ac9d2a38401448a36331460
2 |
--------------------------------------------------------------------------------
/datasets/ms-buildings/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/stactools-packages/msbuildings.git@e7731afde6a1a767827c2c98b12cb414c08add6c
2 | adlfs
--------------------------------------------------------------------------------
/datasets/naip/collection/description.md:
--------------------------------------------------------------------------------
1 | The [National Agriculture Imagery Program](https://www.fsa.usda.gov/programs-and-services/aerial-photography/imagery-programs/naip-imagery/) (NAIP)
2 | provides U.S.-wide, high-resolution aerial imagery, with four spectral bands (R, G, B, IR).
3 | NAIP is administered by the [Aerial Field Photography Office](https://www.fsa.usda.gov/programs-and-services/aerial-photography/) (AFPO)
4 | within the [US Department of Agriculture](https://www.usda.gov/) (USDA).
5 | Data are captured at least once every three years for each state.
6 | This dataset represents NAIP data from 2010-present, in [cloud-optimized GeoTIFF](https://www.cogeo.org/) format.
7 | You can visualize the coverage of current and past collections [here](https://naip-usdaonline.hub.arcgis.com/).
8 |
--------------------------------------------------------------------------------
/datasets/naip/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: naip
2 | image: ${{ args.registry }}/pctasks-naip:latest
3 |
4 | args:
5 | - registry
6 | - year
7 |
8 | code:
9 | src: ${{ local.path(./naip.py) }}
10 |
11 | environment:
12 | APPLICATIONINSIGHTS_CONNECTION_STRING: ${{ secrets.task-application-insights-connection-string }}
13 |
14 | collections:
15 | - id: naip
16 | template: ${{ local.path(./collection) }}
17 | class: naip:NAIPCollection
18 | asset_storage:
19 | - uri: blob://naipeuwest/naip/
20 | chunks:
21 | options:
22 | extensions: [.tif]
23 | name_starts_with: v002/
24 | chunk_length: 3000
25 | matches: ".*?_(\\d{3})_${{args.year}}"
26 | chunk_storage:
27 | uri: blob://naipeuwest/naip-etl-data/pctasks/
28 |
--------------------------------------------------------------------------------
/datasets/naip/requirements.txt:
--------------------------------------------------------------------------------
1 | stactools-naip==0.5.0
2 |
--------------------------------------------------------------------------------
/datasets/nasa-nex-gddp-cmip6/README.md:
--------------------------------------------------------------------------------
1 | # planetary-computer-tasks dataset: nasa-nex-gddp-cmip6
2 |
3 | NASA NEX GDDP CMIP6 Dataset
4 |
5 | ## Building the Docker image
6 |
7 | To build and push a custom docker image to our container registry:
8 |
9 | ```shell
10 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-nasa-nex-gddp-cmip6:latest -t pctasks-nasa-nex-gddp-cmip6:{date}.{count} -f datasets/nasa-nex-gddp-cmip6/Dockerfile .
11 | ```
12 |
13 | ## Static update
14 |
15 | This collection is not regularly updated.
16 |
17 | ```console
18 | $ pctasks dataset process-items \
19 | -d datasets/nasa-nex-gddp-cmip6/dataset.yaml \
20 | nasa-nex-gddp-cmip-test
21 | --arg registry pccomponentstest.azurecr.io \
22 | --upsert --submit
23 | ```
24 |
25 | **Notes:**
26 |
27 | - Currently uses chunk size of one, because the item creation was timing out with chunksize of 100. However, haven't investigated middle ground.
28 | - Runs in about 10 hours.
--------------------------------------------------------------------------------
/datasets/nasa-nex-gddp-cmip6/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: nasa_nex_cddp_cmip6
2 | image: ${{ args.registry }}/pctasks-nasa-nex
3 |
4 | args:
5 | - registry
6 |
7 | code:
8 | src: ${{ local.path(./nasa_nex_gddp_cmip6.py) }}
9 |
10 | environment:
11 | APPLICATIONINSIGHTS_CONNECTION_STRING: ${{ secrets.task-application-insights-connection-string }}
12 |
13 | collections:
14 | - id: nasa-nex-gddp-cmip6
15 | template: ${{ local.path(./collection/) }}
16 | class: nasa_nex_cddp_cmip6:Cmip6Collection
17 | asset_storage:
18 | - uri: blob://nasagddp/nex-gddp-cmip6/NEX/GDDP-CMIP6/
19 | chunks:
20 | options:
21 | extensions: [.nc]
22 | matches: (rsds)
23 | chunk_length: 1
24 |
25 | chunk_storage:
26 | uri: blob://nasagddp/nex-gddp-cmip6-etl-data/pctasks/
27 |
28 |
--------------------------------------------------------------------------------
/datasets/nasa-nex-gddp-cmip6/requirements.txt:
--------------------------------------------------------------------------------
1 | adlfs
2 | h5netcdf
3 | h5py
4 | kerchunk
5 | netcdf4
6 | scipy
7 | xarray
8 | xstac
--------------------------------------------------------------------------------
/datasets/noaa-cdr/README.md:
--------------------------------------------------------------------------------
1 | # NOAA Climate Data Records (CDR)
2 |
3 | ### Dynamic updates
4 |
5 | `noaa-cdr-sea-surface-temperature-optimum-interpolation` is updated daily.
6 |
7 | ```console
8 | $ pctasks dataset process-items '${{ args.since }}' \
9 | -d datasets/noaa-cdr/update.yaml \
10 | -c noaa-cdr-sea-surface-temperature-optimum-interpolation \
11 | --workflow-id=noaa-cdr-sea-surface-temperature-optimum-interpolation-update \
12 | --is-update-workflow \
13 | --upsert
14 | ```
--------------------------------------------------------------------------------
/datasets/noaa-cdr/collections/ocean-heat-content-netcdf/description.md:
--------------------------------------------------------------------------------
1 | The Ocean Heat Content Climate Data Record (CDR) is a set of ocean heat content anomaly (OHCA) time-series for 1955-present on 3-monthly, yearly, and pentadal (five-yearly) scales. This CDR quantifies ocean heat content change over time, which is an essential metric for understanding climate change and the Earth's energy budget. It provides time-series for multiple depth ranges in the global ocean and each of the major basins (Atlantic, Pacific, and Indian) divided by hemisphere (Northern, Southern).
2 |
3 | This is a NetCDF-only collection, for Cloud-Optimized GeoTIFFs use collection `noaa-cdr-ocean-heat-content`.
4 | The NetCDF files are delivered to Azure as part of the [NOAA Open Data Dissemination (NODD) Program](https://www.noaa.gov/information-technology/open-data-dissemination).
5 |
--------------------------------------------------------------------------------
/datasets/noaa-cdr/collections/ocean-heat-content/description.md:
--------------------------------------------------------------------------------
1 | The Ocean Heat Content Climate Data Record (CDR) is a set of ocean heat content anomaly (OHCA) time-series for 1955-present on 3-monthly, yearly, and pentadal (five-yearly) scales. This CDR quantifies ocean heat content change over time, which is an essential metric for understanding climate change and the Earth's energy budget. It provides time-series for multiple depth ranges in the global ocean and each of the major basins (Atlantic, Pacific, and Indian) divided by hemisphere (Northern, Southern).
2 |
3 | These Cloud Optimized GeoTIFFs (COGs) were created from NetCDF files which are delivered to Azure as part of the [NOAA Open Data Dissemination (NODD) Program](https://www.noaa.gov/information-technology/open-data-dissemination).
4 | For the NetCDF files, see collection `noaa-cdr-ocean-heat-content-netcdf`.
5 |
--------------------------------------------------------------------------------
/datasets/noaa-cdr/collections/sea-ice-concentration/description.md:
--------------------------------------------------------------------------------
1 | The Sea Ice Concentration Climate Data Record (CDR) provides a consistent daily and monthly time series of sea ice concentrations for both the north and south Polar Regions on a 25 km x 25 km grid. These data can be used to estimate how much of the ocean surface is covered by ice, and monitor changes in sea ice concentration. The CDR combines concentration estimates using two algorithms developed at the NASA Goddard Space Flight Center (GSFC). Gridded brightness temperatures acquired from a number of Defense Meteorological Satellite Program (DMSP) passive microwave radiometers provide the necessary input to produce the dataset.
2 |
3 | These Cloud Optimized GeoTIFFs (COGs) were created from NetCDF files which are delivered to Azure as part of the [NOAA Open Data Dissemination (NODD) Program](https://www.noaa.gov/information-technology/open-data-dissemination).
4 | For the NetCDF files, see collection `noaa-cdr-sea-ice-concentration-netcdf`.
5 |
--------------------------------------------------------------------------------
/datasets/noaa-cdr/collections/sea-surface-temperature-whoi-netcdf/description.md:
--------------------------------------------------------------------------------
1 | The Sea Surface Temperature-Woods Hole Oceanographic Institution (WHOI) Climate Data Record (CDR) is one of three CDRs which combine to form the NOAA Ocean Surface Bundle (OSB) CDR. The resultant sea surface temperature (SST) data are produced through modeling the diurnal variability in combination with AVHRR SST observations. The final record is output to a 3-hourly 0.25° resolution grid over the global ice-free oceans from January 1988—present.
2 |
3 | This is a NetCDF-only collection, for Cloud-Optimized GeoTIFFs use collection `noaa-cdr-sea-surface-temperature-whoi`.
4 | The NetCDF files are delivered to Azure as part of the [NOAA Open Data Dissemination (NODD) Program](https://www.noaa.gov/information-technology/open-data-dissemination).
5 |
--------------------------------------------------------------------------------
/datasets/noaa-cdr/collections/sea-surface-temperature-whoi/description.md:
--------------------------------------------------------------------------------
1 | The Sea Surface Temperature-Woods Hole Oceanographic Institution (WHOI) Climate Data Record (CDR) is one of three CDRs which combine to form the NOAA Ocean Surface Bundle (OSB) CDR. The resultant sea surface temperature (SST) data are produced through modeling the diurnal variability in combination with AVHRR SST observations. The final record is output to a 3-hourly 0.25° resolution grid over the global ice-free oceans from January 1988—present.
2 |
3 | These Cloud Optimized GeoTIFFs (COGs) were created from NetCDF files which are delivered to Azure as part of the [NOAA Open Data Dissemination (NODD) Program](https://www.noaa.gov/information-technology/open-data-dissemination).
4 | For the NetCDF files, see collection `noaa-cdr-sea-surface-temperature-whoi-netcdf`.
5 |
--------------------------------------------------------------------------------
/datasets/noaa-cdr/requirements.txt:
--------------------------------------------------------------------------------
1 | stactools-noaa-cdr @ git+https://github.com/stactools-packages/noaa-cdr@db4ebdc633a2cb1f27874b039edcbe761b81b214
2 |
--------------------------------------------------------------------------------
/datasets/noaa-climate-normals/README.md:
--------------------------------------------------------------------------------
1 | # NOAA Climate Normals
2 |
3 | ### Building the Docker image
4 |
5 | ```shell
6 | az acr build -r $REGISTRY --subscription $SUBSCRIPTION -t pctasks-noaa-climate-normals:latest -f datasets/noaa-climate-normals/Dockerfile .
7 | ```
8 |
--------------------------------------------------------------------------------
/datasets/noaa-climate-normals/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/stactools-packages/noaa-climate-normals.git@2d574925ac928d4705f3f9e85f5fbb4794d0593f
--------------------------------------------------------------------------------
/datasets/noaa-mrms-qpe/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG registry
2 | FROM ${registry}/pctasks-task-base:latest
3 |
4 | COPY datasets/noaa-mrms-qpe /opt/src/datasets/noaa-mrms-qpe
5 | RUN python3 -m pip install -r /opt/src/datasets/noaa-mrms-qpe/requirements.txt
6 |
--------------------------------------------------------------------------------
/datasets/noaa-mrms-qpe/requirements.txt:
--------------------------------------------------------------------------------
1 | stactools-noaa-mrms-qpe == 0.3.1
--------------------------------------------------------------------------------
/datasets/noaa-mrms-qpe/tests/test_dataset.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pytest
4 |
5 | from pctasks.dev.test_utils import run_process_items_workflow
6 |
7 | HERE = Path(__file__).parent
8 | DATASET_PATH = HERE / "dataset-dev-cluster.yaml"
9 |
10 |
11 | @pytest.mark.parametrize(
12 | "collection",
13 | ["noaa-mrms-qpe-1h-pass1", "noaa-mrms-qpe-1h-pass2", "noaa-mrms-qpe-24h-pass2"],
14 | )
15 | def test_collection(collection):
16 | run_process_items_workflow(
17 | DATASET_PATH,
18 | collection,
19 | args={
20 | "registry": "localhost:5001",
21 | },
22 | splits_limit=1,
23 | chunks_limit=2,
24 | timeout_seconds=600,
25 | )
26 |
--------------------------------------------------------------------------------
/datasets/noaa_nclimgrid/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/stactools-packages/noaa-nclimgrid.git@137e512eaf11df824e2a232e62ffca7a4d9dddef
--------------------------------------------------------------------------------
/datasets/noaa_nclimgrid/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 |
4 | def pytest_addoption(parser):
5 | parser.addoption(
6 | "--run-slow",
7 | action="store_true",
8 | default=False,
9 | help="Run slow tests",
10 | )
11 |
12 |
13 | def pytest_configure(config) -> None:
14 | config.addinivalue_line("markers", "slow: mark test as slow to run")
15 |
16 |
17 | def pytest_collection_modifyitems(config, items):
18 | if not config.getoption("--run-slow"):
19 | skip_slow = pytest.mark.skip(reason="Only run when --run-slow is given")
20 | for item in items:
21 | if "slow" in item.keywords:
22 | item.add_marker(skip_slow)
23 |
--------------------------------------------------------------------------------
/datasets/sentinel-1-grd/README.md:
--------------------------------------------------------------------------------
1 | # planetary-computer-tasks dataset: sentinel-1-grd
2 |
3 | ## Chunking for dynamic ingest
4 |
5 | - Requires an extra `--arg year-prefix {year}` argument when running `pctasks dataset create-chunks` or `pctasks dataset process-items` commands.
6 | - Asset chunkfile creation takes about 5 minutes.
7 | - Item creation takes about 5 minutes for ~1 day of data.
8 |
9 | ## Docker image
10 |
11 | To build and push a custom docker image to our container registry:
12 |
13 | ```shell
14 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-sentinel-1-grd:latest -t pctasks-sentinel-1-grd:{date}.{count} -f datasets/sentinel-1-grd/Dockerfile .
15 | ```
16 |
17 | ## Dynamic updates
18 |
19 | The dynamic update workflow was registered with
20 |
21 | ```shell
22 | $ pctasks dataset process-items sentinel-1-grd-update --is-update-workflow -d datasets/sentinel-1-grd/dataset.yaml --upsert
23 | ```
--------------------------------------------------------------------------------
/datasets/sentinel-1-grd/requirements.txt:
--------------------------------------------------------------------------------
1 | stactools-sentinel1==0.5.0
--------------------------------------------------------------------------------
/datasets/sentinel-1-rtc/requirements.txt:
--------------------------------------------------------------------------------
1 | stactools==0.3.*
--------------------------------------------------------------------------------
/datasets/sentinel-2/README.md:
--------------------------------------------------------------------------------
1 | # Sentinel-2
2 |
3 | ## Chunk creation for dynamic ingest
4 |
5 | - Using the same chunking split level and options as ETL
6 | - Listing the `manifest.safe` files
7 | - Generates about 1000 tasks
8 | - 5-6 hour run-time with a `--since` option and run on the `pctasksteststaging` batch account
9 | - No faster set of chunking options found.
10 |
11 | ## Docker container
12 |
13 | ```shell
14 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-sentinel-2:latest -t pctasks-sentinel-2:{date}.{count} -f datasets/sentinel-2/Dockerfile .
15 | ```
16 |
17 | ## Update Workflow
18 |
19 | Created with
20 |
21 | ```
22 | pctasks dataset process-items --is-update-workflow sentinel-2-l2a-update -d datasets/sentinel-2/dataset.yaml -u
23 | ```
--------------------------------------------------------------------------------
/datasets/sentinel-2/collection/description.md:
--------------------------------------------------------------------------------
1 | The [Sentinel-2](https://sentinel.esa.int/web/sentinel/missions/sentinel-2) program provides global imagery in thirteen spectral bands at 10m-60m resolution and a revisit time of approximately five days. This dataset represents the global Sentinel-2 archive, from 2016 to the present, processed to L2A (bottom-of-atmosphere) using [Sen2Cor](https://step.esa.int/main/snap-supported-plugins/sen2cor/) and converted to [cloud-optimized GeoTIFF](https://www.cogeo.org/) format.
--------------------------------------------------------------------------------
/datasets/sentinel-2/requirements.txt:
--------------------------------------------------------------------------------
1 | stactools_sentinel2==0.2.1
2 |
--------------------------------------------------------------------------------
/datasets/sentinel-3/collection/sentinel-3-sral-lan-l2-netcdf/description.md:
--------------------------------------------------------------------------------
1 | This Collection provides Sentinel-3 [SRAL Level-2 Land Altimetry](https://sentinel.esa.int/web/sentinel/technical-guides/sentinel-3-altimetry/level-2-algorithms-products) products, which contain data on land radar altimetry measurements. Each product contains three NetCDF files:
2 |
3 | - A reduced data file containing a subset of the 1 Hz Ku-band parameters.
4 | - A standard data file containing the standard 1 Hz and 20 Hz Ku- and C-band parameters.
5 | - An enhanced data file containing the standard 1 Hz and 20 Hz Ku- and C-band parameters along with the waveforms and parameters necessary to reprocess the data.
6 |
7 | More information about the product and data processing can be found in the [User Guide](https://sentinels.copernicus.eu/web/sentinel/user-guides/sentinel-3-altimetry/overview) and [Technical Guide](https://sentinel.esa.int/web/sentinel/technical-guides/sentinel-3-altimetry).
8 |
9 | This Collection contains Level-2 data in NetCDF files from March 2016 to present.
10 |
--------------------------------------------------------------------------------
/datasets/sentinel-3/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/stactools-packages/sentinel3.git@36375cc63c053087380664ff931ceed5ad3b5f83
2 |
--------------------------------------------------------------------------------
/datasets/sentinel-5p/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/stactools-packages/sentinel5p.git@a4d1e06fb438823027bbc7a34899c0fc297a1e1b
--------------------------------------------------------------------------------
/datasets/stac-geoparquet/requirements.txt:
--------------------------------------------------------------------------------
1 | stac-geoparquet[pgstac,pc]==0.2.1
2 | psycopg[binary,pool]==3.1.8
3 | azure-data-tables==12.4.2
4 | pypgstac==0.7.4
--------------------------------------------------------------------------------
/datasets/stac-geoparquet/workflow_test.yaml:
--------------------------------------------------------------------------------
1 | name: stac-geoparquet
2 | dataset: microsoft/stac-geoparquet
3 | id: stac-geoparquet
4 |
5 | jobs:
6 | stac:
7 | tasks:
8 | - id: create
9 | image: pccomponentstest.azurecr.io/pctasks-stac-geoparquet:2023.7.10.1
10 | code:
11 | src: ${{ local.path(pc_stac_geoparquet.py) }}
12 | task: pc_stac_geoparquet:StacGeoparquetTask
13 | args:
14 | table_account_url: "https://pctapisstagingsa.table.core.windows.net"
15 | table_name: "collectionconfig"
16 | storage_options_account_name: "pcstacitems"
17 | collections: "io-lulc-annual-v02"
18 | environment:
19 | APPLICATIONINSIGHTS_CONNECTION_STRING: ${{ secrets.task-application-insights-connection-string }}
20 | STAC_GEOPARQUET_CONNECTION_INFO: ${{secrets.pgstac-connection-string}}
21 |
--------------------------------------------------------------------------------
/datasets/terraclimate/collection/description.md:
--------------------------------------------------------------------------------
1 | [TerraClimate](http://www.climatologylab.org/terraclimate.html) is a dataset of monthly climate and climatic water balance for global terrestrial surfaces from 1958 to the present. These data provide important inputs for ecological and hydrological studies at global scales that require high spatial resolution and time-varying data. All data have monthly temporal resolution and a ~4-km (1/24th degree) spatial resolution. This dataset is provided in [Zarr](https://zarr.readthedocs.io/) format.
2 |
--------------------------------------------------------------------------------
/datasets/terraclimate/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: terraclimate
2 | image: ${{ args.registry }}/pctasks-task-base:latest
3 |
4 | args:
5 | - registry
6 |
7 | collections:
8 | - id: terraclimate
9 | template: ${{ local.path(./collection) }}
10 | class: pctasks.dataset.collection:PremadeItemCollection
11 | asset_storage: []
12 | chunk_storage:
13 | uri: "blob://cpdataeuwest/cpdata-etl-data/chunks"
14 |
--------------------------------------------------------------------------------
/datasets/usda-cdl/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/datasets/usda-cdl/README.md
--------------------------------------------------------------------------------
/datasets/usda-cdl/dataset.yaml:
--------------------------------------------------------------------------------
1 | id: usda-cdl
2 | image: ${{ args.registry }}/pctasks-usda-cdl:latest
3 | args:
4 | - registry
5 | code:
6 | src: ${{ local.path(./usda_cdl.py) }}
7 | # requirements: ${{ local.path(./requirements.txt) }}
8 | collections:
9 | - id: usda-cdl
10 | template: ${{ local.path(./collection) }}
11 | class: usda_cdl:UsdaCdlCollection
12 | asset_storage:
13 | - uri: blob://landcoverdata/usda-cdl/tiles
14 | chunks:
15 | options:
16 | list_folders: true
17 | max_depth: 1
18 | chunk_length: 2
19 | chunk_storage:
20 | uri: blob://landcoverdata/usda-cdl-etl-data/chunks
21 |
--------------------------------------------------------------------------------
/datasets/usda-cdl/requirements.txt:
--------------------------------------------------------------------------------
1 | stactools-usda-cdl == 0.1.3
2 |
--------------------------------------------------------------------------------
/datasets/usda-cdl/tile.yaml:
--------------------------------------------------------------------------------
1 | name: Tile USDA CDL assets
2 | id: usda-cdl-tile
3 | dataset: usda-cdl
4 | args:
5 | - registry
6 | jobs:
7 | list-files:
8 | tasks:
9 | - id: list-files
10 | image: ${{ args.registry }}/pctasks-usda-cdl:latest
11 | task: pctasks.task.common.list_files:task
12 | args:
13 | src_uri: blob://landcoverdata/usda-cdl-onboarding
14 | extensions:
15 | - .zip
16 | tile:
17 | foreach:
18 | items: ${{ jobs.list-files.tasks.list-files.output.uris }}
19 | tasks:
20 | - id: tile
21 | image: ${{ args.registry }}/pctasks-usda-cdl:latest
22 | code:
23 | src: ${{ local.path(./usda_cdl.py) }}
24 | # requirements: ${{ local.path(./requirements.txt) }}
25 | task: usda_cdl:tile_task
26 | args:
27 | src_uri: ${{ item }}
28 | dst_uri: blob://landcoverdata/usda-cdl/tiles
29 |
--------------------------------------------------------------------------------
/datasets/usgs-lcmap/fix_items/README.md:
--------------------------------------------------------------------------------
1 | # Fix LCMAP Item Asset class lists
2 |
3 | Downloads the Item ndjsons from blob storage, removes the land cover change classes from the primary (lcpri) and secondary (lcsec) land cover Assets in each Item, uploades ndjsons containing the corrected Items back to blob storage.
4 |
5 | ## Running
6 |
7 | Edit the `incorrect_chunkset_uri` and `corrected_chunkset_uri` arguments to operate on either CONUS or Hawaii data. Then run:
8 |
9 | ```shell
10 | pctasks workflow upsert-and-submit datasets/usgs-lcmap/fix-item-classes/fix_items.yaml
11 | ```
12 |
--------------------------------------------------------------------------------
/datasets/usgs-lcmap/fix_items/fix_items.yaml:
--------------------------------------------------------------------------------
1 | name: Fix USGS LCMAP Item classes
2 | id: usgs-lcmap-fix-items
3 | dataset: usgs-lcmap
4 | args:
5 | - registry
6 | jobs:
7 | fix-items:
8 | id: fix-items
9 | tasks:
10 | - id: fix-items
11 | image: ${{ args.registry }}/pctasks-task-base:latest
12 | code:
13 | src: ${{ local.path(./fix_items.py) }}
14 | task: fix_items:fix_items_task
15 | args:
16 | incorrect_chunkset_uri: blob://landcoverdata/lcmap-etl-data/lcmap-conus-v13/2023-01-12-full-2/items
17 | corrected_chunkset_uri: blob://landcoverdata/lcmap-etl-data/lcmap-conus-v13/2023-01-12-full-2/fixed-items
18 |
--------------------------------------------------------------------------------
/datasets/usgs-lcmap/fix_items/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/datasets/usgs-lcmap/fix_items/tests/__init__.py
--------------------------------------------------------------------------------
/datasets/usgs-lcmap/fix_items/tests/test_tasks.py:
--------------------------------------------------------------------------------
1 | # run this with `python -m pytest` from the datasets/usgs-lcmap/fix_items directory
2 |
3 | from pathlib import Path
4 | import json
5 |
6 | from fix_items import remove_classes
7 |
8 |
9 | def test_fix_ndjson_classes() -> None:
10 | path = Path(__file__).parent / "data-files" / "items.ndjson"
11 | with open(path, "r") as fobj:
12 | item = json.loads(fobj.readline())
13 | corrected_item = remove_classes(item)
14 | assert len(corrected_item["assets"]["lcpri"]["classification:classes"]) == 9
15 | assert len(corrected_item["assets"]["lcsec"]["classification:classes"]) == 9
16 | for c in corrected_item["assets"]["lcsec"]["classification:classes"]:
17 | assert c["value"] < 9
18 | assert "_to_" not in c["name"]
19 |
--------------------------------------------------------------------------------
/datasets/usgs-lcmap/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/stactools-packages/usgs-lcmap.git@73d936d1ed4b756b25b6782164778451de38af73
--------------------------------------------------------------------------------
/datasets/usgs-lidar/requirements.txt:
--------------------------------------------------------------------------------
1 | geopandas
2 | pyarrow
3 | adlfs
4 | azure.identity
5 | rasterio
6 | planetary-computer
7 | pyproj
8 | shapely
9 | opencensus.ext.azure
--------------------------------------------------------------------------------
/deployment/.gitignore:
--------------------------------------------------------------------------------
1 | tf-output.json
2 | conf
3 |
--------------------------------------------------------------------------------
/deployment/bin/azlogin:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 |
5 | if [[ "${CI}" ]]; then
6 | set -x
7 | fi
8 |
9 | function usage() {
10 | echo -n \
11 | "Usage: $(basename "$0")
12 | Login to Azure CLI
13 | "
14 | }
15 |
16 | while [[ "$#" -gt 0 ]]; do case $1 in
17 | *)
18 | usage "Unknown parameter passed: $1"
19 | shift
20 | shift
21 | ;;
22 | esac done
23 |
24 |
25 | if [ "${BASH_SOURCE[0]}" = "${0}" ]; then
26 |
27 | az login --service-principal \
28 | --username ${AZURE_CLIENT_ID} \
29 | --password ${AZURE_CLIENT_SECRET} \
30 | --tenant ${AZURE_TENANT_ID}
31 |
32 | fi
33 |
--------------------------------------------------------------------------------
/deployment/bin/nginx-values.yaml:
--------------------------------------------------------------------------------
1 | controller:
2 | podLabels:
3 | azure.workload.identity/use: "true"
4 | extraVolumes:
5 | - name: secrets-store-inline
6 | csi:
7 | driver: secrets-store.csi.k8s.io
8 | readOnly: true
9 | volumeAttributes:
10 | secretProviderClass: "keyvault"
11 | extraVolumeMounts:
12 | - name: secrets-store-inline
13 | mountPath: "/mnt/secrets-store"
14 | readOnly: true
15 | extraArgs:
16 | default-ssl-certificate: pc/planetarycomputer-test-certificate
--------------------------------------------------------------------------------
/deployment/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | deploy:
3 | container_name: pc-etl-deploy
4 | image: pc-etl-deploy
5 | build:
6 | context: ..
7 | dockerfile: deployment/Dockerfile
8 | environment:
9 | # For Terraform
10 | - ARM_SUBSCRIPTION_ID=${ARM_SUBSCRIPTION_ID:-a84a690d-585b-4c7c-80d9-851a48af5a50}
11 | - ARM_TENANT_ID=${ARM_TENANT_ID:-72f988bf-86f1-41af-91ab-2d7cd011db47}
12 | - ARM_CLIENT_ID
13 | - ARM_USE_OIDC
14 | - ARM_OIDC_TOKEN
15 | - ACTIONS_ID_TOKEN_REQUEST_URL
16 | - ACTIONS_ID_TOKEN_REQUEST_TOKEN
17 | - ARM_OIDC_REQUEST_TOKEN
18 | - ARM_OIDC_REQUEST_URL
19 |
20 | # Used in function deployment injected by GH Actions
21 | - GITHUB_TOKEN
22 | - GITHUB_REPOSITORY
23 | - GITHUB_ACTOR
24 | working_dir: /opt/src/deployment
25 | volumes:
26 | - ../deployment:/opt/src/deployment
27 | - ../pctasks:/opt/src/pctasks:ro
28 | - ../pctasks_funcs:/opt/src/pctasks_funcs:ro
29 | - ~/.azure:/root/.azure
30 |
--------------------------------------------------------------------------------
/deployment/helm/argo-values.yaml:
--------------------------------------------------------------------------------
1 | server:
2 | baseHref: /argo/
3 | secure: false
4 | serviceAccount:
5 | name: pctasks-sa
6 | image:
7 | registry: pccomponentstest.azurecr.io
8 | repository: argoproj/argocli
9 | tag: v3.5.8
10 | controller:
11 | image:
12 | registry: pccomponentstest.azurecr.io
13 | repository: argoproj/workflow-controller
14 | tag: v3.5.8
15 | executor:
16 | image:
17 | registry: pccomponentstest.azurecr.io
18 | repository: argoproj/argoexec
19 | tag: v3.5.8
20 |
--------------------------------------------------------------------------------
/deployment/helm/pc-tasks-ingress/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: pc-tasks-ingress
3 | description: A Helm chart for the ingress for Planetary Computer Tasks test environment
4 | type: application
5 | version: 0.1.0
6 | appVersion: 0.1.0
7 |
--------------------------------------------------------------------------------
/deployment/helm/pc-tasks-ingress/templates/NOTES.txt:
--------------------------------------------------------------------------------
1 | Application information:
2 | {{ include "pcingress.selectorLabels" . }}
3 | Ingress host: {{ .Values.pcingress.ingress.host }}
4 | Service Fullname: {{ include "pcingress.fullname" . }}
5 | KeyVault secret provider created: {{ .Values.secretProvider.create }}
--------------------------------------------------------------------------------
/deployment/helm/pc-tasks-ingress/templates/nginx-configmap.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | annotations:
5 | labels:
6 | app: pc-tasks-ingress
7 | name: nginx-configuration
8 | namespace: {{ .Values.namespace }}
9 | data:
10 | use-forwarded-headers: "true"
11 | enable-real-ip: "true"
--------------------------------------------------------------------------------
/deployment/helm/pc-tasks-ingress/templates/secret.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 | name: pctasks-sa-token
5 | annotations:
6 | # Service account created by Argo, name
7 | # set in argo-values.yaml
8 | kubernetes.io/service-account.name: pctasks-sa
9 | type: kubernetes.io/service-account-token
--------------------------------------------------------------------------------
/deployment/helm/pc-tasks-ingress/values.yaml:
--------------------------------------------------------------------------------
1 | environment: "staging"
2 | namespace: "default"
3 |
4 | stac:
5 | enabled: true
6 |
7 | tiler:
8 | enabled: true
9 |
10 | pcingress:
11 | services:
12 | stac:
13 | path: ""
14 | name: ""
15 | port: ""
16 | tiler:
17 | path: ""
18 | name: ""
19 | port: ""
20 |
21 | cert:
22 | secretName: ""
23 |
24 | ingress:
25 | enabled: false
26 | tlsHost: ""
27 | hosts: []
28 | annotations: {}
29 |
30 | secretProvider:
31 | create: true
32 | providerName: "keyvault"
33 | namespace: ""
34 | userAssignedIdentityID: ""
35 | tenantId: ""
36 | keyvaultName: ""
37 | keyvaultCertificateName: ""
38 | kubernetesCertificateSecretName: ""
39 |
40 | nameOverride: ""
41 | fullnameOverride: ""
42 |
--------------------------------------------------------------------------------
/deployment/helm/published/pctasks-server/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: pctasks-server
3 | description: A Helm chart for the Planetary Computer Tasks server
4 | type: application
5 | version: 0.1.0
6 | appVersion: 0.1.0
--------------------------------------------------------------------------------
/deployment/helm/published/pctasks-server/templates/NOTES.txt:
--------------------------------------------------------------------------------
1 | Application information:
2 | {{ include "pctasks.selectorLabels" . }}
3 | Ingress host: {{ .Values.pctasks.server.ingress.host }}
4 | Service Fullname: {{ include "pctasks.fullname" . }}
--------------------------------------------------------------------------------
/deployment/helm/published/pctasks-server/templates/service.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | name: {{ include "pctasks.fullname" . }}
5 | labels:
6 | {{- include "pctasks.labels" . | nindent 4 }}
7 | {{- with .Values.pctasks.server.service.annotations }}
8 | annotations:
9 | {{- toYaml . | nindent 4 }}
10 | {{- end }}
11 | spec:
12 | type: {{ .Values.pctasks.server.service.type }}
13 | ports:
14 | - port: {{ .Values.pctasks.server.service.port }}
15 | selector:
16 | {{- include "pctasks.selectorLabels" . | nindent 4 }}
17 |
--------------------------------------------------------------------------------
/deployment/helm/published/pctasks-server/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | {{- if .Values.pctasks.server.serviceAccount.create -}}
2 | apiVersion: v1
3 | kind: ServiceAccount
4 | metadata:
5 | name: {{ include "pctasks.serviceAccountName" . }}
6 | labels:
7 | {{- include "pctasks.labels" . | nindent 4 }}
8 | {{- with .Values.pctasks.server.serviceAccount.annotations }}
9 | annotations:
10 | {{- toYaml . | nindent 4 }}
11 | {{- end }}
12 |
13 | ---
14 | apiVersion: rbac.authorization.k8s.io/v1
15 | kind: RoleBinding
16 | metadata:
17 | name: pctasks-server-argoworkflows-workflow-rolebinding
18 | subjects:
19 | - kind: ServiceAccount
20 | name: {{ include "pctasks.serviceAccountName" . }}
21 | roleRef:
22 | kind: Role
23 | name: argo-workflows-workflow
24 | apiGroup: rbac.authorization.k8s.io
25 |
26 | {{- end }}
27 |
--------------------------------------------------------------------------------
/deployment/helm/vendored/argo-workflows-0.41.8.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/deployment/helm/vendored/argo-workflows-0.41.8.tgz
--------------------------------------------------------------------------------
/deployment/helm/vendored/ingress-nginx-4.8.3.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/deployment/helm/vendored/ingress-nginx-4.8.3.tgz
--------------------------------------------------------------------------------
/deployment/helm/vendored/keda-2.14.2.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/deployment/helm/vendored/keda-2.14.2.tgz
--------------------------------------------------------------------------------
/deployment/requirements.txt:
--------------------------------------------------------------------------------
1 | pypgstac==0.7.10
2 | Jinja2==3.1.2
3 |
--------------------------------------------------------------------------------
/deployment/terraform/batch_pool/providers.tf:
--------------------------------------------------------------------------------
1 | provider azurerm {
2 | features {}
3 | skip_provider_registration = true
4 | use_oidc = true
5 | }
6 |
7 | terraform {
8 | required_version = ">= 0.13"
9 |
10 | required_providers {
11 | azurerm = {
12 | source = "hashicorp/azurerm"
13 | version = "3.110.0"
14 | }
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/deployment/terraform/batch_pool/variables.tf:
--------------------------------------------------------------------------------
1 | variable "name" {
2 | type = string
3 | }
4 |
5 | variable "resource_group_name" {
6 | type = string
7 | }
8 |
9 | variable "account_name" {
10 | type = string
11 | }
12 |
13 | variable "display_name" {
14 | type = string
15 | }
16 |
17 | variable "vm_size" {
18 | type = string
19 | }
20 |
21 | variable "max_tasks_per_node" {
22 | type = number
23 | }
24 |
25 | variable "subnet_id" {
26 | type = string
27 | }
28 |
29 | variable "min_dedicated" {
30 | type = number
31 | }
32 |
33 | variable "max_dedicated" {
34 | type = number
35 | }
36 |
37 | variable "min_low_priority" {
38 | type = number
39 | }
40 |
41 | variable "max_low_priority" {
42 | type = number
43 | }
44 |
45 | variable "max_increase_per_scale" {
46 | type = number
47 | }
48 |
49 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
50 | # ACR
51 |
52 | variable "acr_name" {
53 | type = string
54 | }
55 |
56 | variable "user_assigned_identity_id" {
57 | type = string
58 | }
--------------------------------------------------------------------------------
/deployment/terraform/resources/acr.tf:
--------------------------------------------------------------------------------
1 | data "azurerm_container_registry" "task_acr" {
2 | name = var.task_acr_name
3 | resource_group_name = var.task_acr_resource_group
4 | }
5 |
6 | data "azurerm_container_registry" "component_acr" {
7 | name = var.component_acr_name
8 | resource_group_name = var.component_acr_resource_group
9 | }
10 |
11 | # Role assignments
12 |
13 | # Note: role to the batch account task acr service principal
14 | # should have AcrPull access to the task acr.
15 |
16 | # add the role to the identity the kubernetes cluster was assigned
17 | resource "azurerm_role_assignment" "attach_acr" {
18 | scope = data.azurerm_container_registry.component_acr.id
19 | role_definition_name = "AcrPull"
20 | principal_id = azurerm_kubernetes_cluster.pctasks.kubelet_identity[0].object_id
21 | }
--------------------------------------------------------------------------------
/deployment/terraform/resources/app_insights.tf:
--------------------------------------------------------------------------------
1 | resource "azurerm_log_analytics_workspace" "pctasks" {
2 | name = "log-${local.prefix}"
3 | location = azurerm_resource_group.pctasks.location
4 | resource_group_name = azurerm_resource_group.pctasks.name
5 | sku = "PerGB2018"
6 | retention_in_days = 30
7 | }
8 |
9 | resource "azurerm_application_insights" "pctasks" {
10 | name = "appi-${local.prefix}"
11 | location = azurerm_resource_group.pctasks.location
12 | resource_group_name = azurerm_resource_group.pctasks.name
13 | workspace_id = azurerm_log_analytics_workspace.pctasks.id
14 | application_type = "web"
15 | }
16 |
--------------------------------------------------------------------------------
/deployment/terraform/resources/providers.tf:
--------------------------------------------------------------------------------
1 | provider "azurerm" {
2 | features {}
3 | skip_provider_registration = true
4 | use_oidc = true
5 |
6 | # This could be used instead of temporarily enabling shared key access once
7 | # this issue is resolved.
8 | # https://github.com/hashicorp/terraform-provider-azurerm/issues/15083
9 | # storage_use_azuread = true
10 | }
11 |
12 | terraform {
13 | required_version = ">= 0.13"
14 |
15 | required_providers {
16 | azurerm = {
17 | source = "hashicorp/azurerm"
18 | version = "3.110.0"
19 | }
20 | }
21 | }
22 |
23 | data "azurerm_client_config" "current" {
24 | }
25 |
26 |
27 | # Terraform stuff to include
28 | # 1. This provider
29 | # 2. Cosmos DB containers
30 | # 3. The AKS Node Pool
31 | # 4. The Kubernetes namespace, secrets
32 |
--------------------------------------------------------------------------------
/deployment/terraform/resources/rg.tf:
--------------------------------------------------------------------------------
1 | resource "azurerm_resource_group" "pctasks" {
2 | name = "rg-${local.full_prefix}"
3 | location = var.region
4 |
5 | tags = {
6 | "ringValue" = "r0"
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/deployment/terraform/staging/backend.tf:
--------------------------------------------------------------------------------
1 | terraform {
2 | backend "azurerm" {
3 | resource_group_name = "pc-test-manual-resources"
4 | storage_account_name = "pctesttfstate"
5 | container_name = "pctasks"
6 | key = "staging.terraform.tfstate"
7 | use_oidc = true
8 | use_azuread_auth = true
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/deployment/terraform/staging/env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export DEPLOY_SECRETS_KV=pc-test-deploy-secrets
4 | export DEPLOY_SECRETS_KV_SECRET=pctasks-test-tfvars-staging
5 | export DEPLOY_SECRETS_KV_RG_NAME=pc-test-manual-resources
6 |
7 | export PCTASKS_TASK_KV=kv-pctaskstest-staging
8 | export PCTASKS_TASK_KV_RESOURCE_GROUP_NAME=rg-pctaskstest-staging-westeurope
9 |
--------------------------------------------------------------------------------
/deployment/terraform/staging/output.tf:
--------------------------------------------------------------------------------
1 | output "resources" {
2 | value = module.resources
3 | sensitive = true
4 | }
5 |
--------------------------------------------------------------------------------
/dev-secrets.template.yaml:
--------------------------------------------------------------------------------
1 | # Secrets can be specified as key:value
2 | example: value
--------------------------------------------------------------------------------
/dev/nginx/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx:1.10
2 |
3 | COPY etc/nginx/nginx.conf /etc/nginx/nginx.conf
4 | COPY etc/nginx/conf.d/default.conf /etc/nginx/conf.d/default.conf
--------------------------------------------------------------------------------
/dev/nginx/README.md:
--------------------------------------------------------------------------------
1 | # nginx
2 |
3 | Sets up the NGINX server for the dev environment. We use nginx in the dev environment as a reverse proxy to simulate the production environment.
--------------------------------------------------------------------------------
/dev/nginx/etc/nginx/conf.d/default.conf:
--------------------------------------------------------------------------------
1 | upstream pctasks-server-upstream {
2 | server server:8511;
3 | }
4 |
5 | server {
6 | listen 80;
7 | server_name localhost;
8 |
9 | location /tasks {
10 | proxy_set_header Host $http_host;
11 | proxy_set_header X-Forwarded-For $remote_addr;
12 | proxy_pass_request_headers on;
13 | proxy_buffers 8 8k;
14 | proxy_buffer_size "16k";
15 |
16 | proxy_pass http://pctasks-server-upstream;
17 | proxy_redirect http://pctasks-server-upstream/ /tasks;
18 | rewrite ^/tasks/?(.*)$ /$1 break;
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/dev/nginx/etc/nginx/nginx.conf:
--------------------------------------------------------------------------------
1 | user nginx;
2 |
3 | error_log /var/log/nginx/error.log warn;
4 | pid /var/run/nginx.pid;
5 |
6 | events {
7 |
8 | }
9 |
10 | http {
11 | default_type application/octet-stream;
12 |
13 | include /etc/nginx/conf.d/*.conf;
14 | }
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | livehtml:
12 | sphinx-autobuild --host 0.0.0.0 ${SOURCEDIR} $(BUILDDIR)/html -d _build/doctrees
13 |
14 | # Put it first so that "make" without argument is like "make help".
15 | help:
16 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
17 |
18 | .PHONY: help Makefile
19 |
20 | # Catch-all target: route all unknown targets to Sphinx using the new
21 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
22 | %: Makefile
23 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
24 |
--------------------------------------------------------------------------------
/docs/_static/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/docs/_static/.gitignore
--------------------------------------------------------------------------------
/docs/development/index.md:
--------------------------------------------------------------------------------
1 |
2 | # Development
3 |
4 | ```{toctree}
5 | ---
6 | maxdepth: 2
7 | ---
8 | setup
9 | deploying
10 | faq
11 | ````
12 |
--------------------------------------------------------------------------------
/docs/getting_started/index.md:
--------------------------------------------------------------------------------
1 | # Getting Started
2 |
3 | ```{toctree}
4 | ---
5 | maxdepth: 2
6 | ---
7 | dev_workflows
8 | creating_a_dataset
9 | ````
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | echo.
16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | echo.installed, then set the SPHINXBUILD environment variable to point
18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | echo.may add the Sphinx directory to PATH.
20 | echo.
21 | echo.If you don't have Sphinx installed, grab it from
22 | echo.https://www.sphinx-doc.org/
23 | exit /b 1
24 | )
25 |
26 | if "%1" == "" goto help
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/reference/index.md:
--------------------------------------------------------------------------------
1 | # References
2 |
3 | ```{toctree}
4 | ---
5 | maxdepth: 2
6 | ---
7 | api
8 | ````
--------------------------------------------------------------------------------
/docs/user_guide/index.md:
--------------------------------------------------------------------------------
1 | # User Guide
2 |
3 | ```{toctree}
4 | ---
5 | maxdepth: 2
6 | ---
7 | settings
8 | workflows
9 | templating
10 | storage
11 | runtime
12 | chunking
13 | streaming
14 | ````
--------------------------------------------------------------------------------
/examples/list-logs.yaml:
--------------------------------------------------------------------------------
1 | id: list-logs
2 | name: List log files in Azurite
3 | dataset: microsoft/test
4 |
5 | jobs:
6 | list-logs-job:
7 | name: List logs job
8 | tasks:
9 | - id: list-logs-task
10 | image: localhost:5001/pctasks-task-base:latest
11 | task: pctasks.task.common.list_files:ListFilesTask
12 | args:
13 | src_uri: blob://devstoreaccount1/tasklogs/
14 |
--------------------------------------------------------------------------------
/ingest-collection.yaml:
--------------------------------------------------------------------------------
1 | name: Ingest Collection Test Workflow
2 | dataset: microsoft/test-collection
3 | target_environment: staging
4 |
5 | jobs:
6 | ingest:
7 | name: Ingest Collection
8 | tasks:
9 | - id: ingest-collection
10 | image_key: ingest
11 | task: pctasks.ingest_task.task:ingest_task
12 | environment:
13 | DB_CONNECTION_STRING: "${{ secrets.pgstac-connection-string }}"
14 | args:
15 | content:
16 | type: Collections
17 | collections:
18 | - ${{ local.file(tests/data-files/collection.json) }}
--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | ignore_missing_imports = True
3 | disallow_untyped_defs = True
4 | namespace_packages = True
5 | explicit_package_bases = True
6 |
7 | [mypy-azure.storage.blob.*]
8 | ignore_errors = True
9 |
--------------------------------------------------------------------------------
/pctasks/.dockerignore:
--------------------------------------------------------------------------------
1 | **/.envrc
2 | **/.direnv
3 | **/__pycache__
4 | **/.mypy_cache
5 | **/.pytest_cache
6 | **/.terraform
7 | **/node_modules
8 | **/.terraform
--------------------------------------------------------------------------------
/pctasks/cli/README.md:
--------------------------------------------------------------------------------
1 | # Planetary Computer Tasks: Core
2 |
3 | This is the base library of the PCTasks framework.
4 | It provides core functionality and base messages as
5 | Pydantic models.
6 |
7 |
--------------------------------------------------------------------------------
/pctasks/cli/pctasks/cli/__init__.py:
--------------------------------------------------------------------------------
1 | from pctasks.cli.version import __version__
2 |
3 | __all__ = ["__version__"]
4 |
--------------------------------------------------------------------------------
/pctasks/cli/pctasks/cli/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.5"
2 |
--------------------------------------------------------------------------------
/pctasks/cli/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/cli/tests/__init__.py
--------------------------------------------------------------------------------
/pctasks/cli/tests/test_cli.py:
--------------------------------------------------------------------------------
1 | from click.testing import CliRunner
2 |
3 | from pctasks.cli.cli import pctasks_cmd
4 | from pctasks.cli.version import __version__
5 |
6 |
7 | def test_cli_version():
8 | runner = CliRunner()
9 | result = runner.invoke(pctasks_cmd, ["--version"])
10 | assert result.output == f"pctasks, version {__version__}\n"
11 |
12 |
13 | def test_direct_invoke():
14 | result = pctasks_cmd.main(["--version"], standalone_mode=False)
15 | assert result == 0
16 |
--------------------------------------------------------------------------------
/pctasks/client/README.md:
--------------------------------------------------------------------------------
1 | # Planetary Computer Tasks: Client
2 |
3 | This project provides functionality for interacting with the PCTasks API, like submitting workflows and querying logs.
4 |
5 |
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/__init__.py:
--------------------------------------------------------------------------------
1 | # isort:skip_file
2 |
3 | from pctasks.client.version import __version__
4 |
5 | __all__ = ["__version__", "PCTasksClient"]
6 |
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/constants.py:
--------------------------------------------------------------------------------
1 | NOT_FOUND_EXIT_CODE = 148
2 | FILE_EXISTS_EXIT_CODE = 149
3 | UNEXPECTED_ERROR_EXIT_CODE = 150
4 |
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/context.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import Optional
3 |
4 | from pctasks.core.context import PCTasksCommandContext
5 |
6 |
7 | @dataclass
8 | class ClientCommandContext(PCTasksCommandContext):
9 | pretty_print: bool = False
10 | """Whether to pretty print the output, e.g. syntax highlight YAML."""
11 |
12 | # PCTasksCommandContext added here to avoid mypy issues
13 |
14 | profile: Optional[str] = None
15 | """Settings profile. Determines which settings file is read."""
16 |
17 | settings_file: Optional[str] = None
18 | """Full path to the settings file. If present, overrides the profile."""
19 |
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/errors.py:
--------------------------------------------------------------------------------
1 | class PCTasksError(Exception):
2 | """Base class for all PCTasks errors."""
3 |
4 | pass
5 |
6 |
7 | class NotFoundError(PCTasksError):
8 | """Raised when a record is not found."""
9 |
10 | pass
11 |
12 |
13 | class WorkflowRunNotFoundError(NotFoundError):
14 | """Raised when a workflow run is not found."""
15 |
16 | pass
17 |
18 |
19 | class WorkflowNotFoundError(NotFoundError):
20 | """Raised when a workflow is not found."""
21 |
22 | pass
23 |
24 |
25 | class WorkflowExistsError(NotFoundError):
26 | """Raised when a workflow exists when it is not expected."""
27 |
28 | pass
29 |
30 |
31 | class JobPartitionRunNotFoundError(NotFoundError):
32 | """Raised when a job is not found."""
33 |
34 | pass
35 |
36 |
37 | class ConfirmationError(Exception):
38 | pass
39 |
40 |
41 | class NoWorkflowIDError(Exception):
42 | pass
43 |
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/profile/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/pctasks/client/profile/__init__.py
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/pctasks/client/py.typed
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/runs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/pctasks/client/runs/__init__.py
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/runs/options.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Callable
2 |
3 | import click
4 |
5 |
6 | def opt_page(fn: Callable[..., Any]) -> Callable[..., Any]:
7 | _opt = click.option(
8 | "-p", "--page", is_flag=True, help="Page output."
9 | ) # type: ignore[var-annotated]
10 | _opt(fn)
11 | return fn
12 |
13 |
14 | def opt_all(fn: Callable[..., Any]) -> Callable[..., Any]:
15 | _opt = click.option(
16 | "-a", "--all", is_flag=True, help="Print all output, even if large."
17 | ) # type: ignore[var-annotated]
18 | _opt(fn)
19 | return fn
20 |
21 |
22 | def opt_status(fn: Callable[..., Any]) -> Callable[..., Any]:
23 | _opt = click.option("-s", "--status", help="Filter by status.") # type: ignore[var-annotated] # noqa: E501
24 | _opt(fn)
25 | return fn
26 |
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/storage/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/pctasks/client/storage/__init__.py
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/utils.py:
--------------------------------------------------------------------------------
1 | def status_emoji(status: str) -> str:
2 | if status.lower() == "completed":
3 | return "✅"
4 | if status.lower() == "failed":
5 | return "❌"
6 | if status.lower() == "running":
7 | return "🏃"
8 | if status.lower() == "cancelled":
9 | return "🚫"
10 | if status.lower() == "skipped":
11 | return "⏭️"
12 | else:
13 | return "🕖"
14 |
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.5"
2 |
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/workflow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/pctasks/client/workflow/__init__.py
--------------------------------------------------------------------------------
/pctasks/client/pctasks/client/workflow/options.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Callable
2 |
3 | import click
4 |
5 |
6 | def opt_args(fn: Callable[..., Any]) -> Callable[..., Any]:
7 | _opt = click.option(
8 | "-a", "--arg", multiple=True, help="Argument value to use.", type=(str, str)
9 | ) # type: ignore[var-annotated]
10 | _opt(fn)
11 | return fn
12 |
--------------------------------------------------------------------------------
/pctasks/client/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/tests/__init__.py
--------------------------------------------------------------------------------
/pctasks/client/tests/data-files/mycode.py:
--------------------------------------------------------------------------------
1 | from pctasks.dev.mocks import MockTask
2 |
3 |
4 | class MyMockTask(MockTask):
5 | pass
6 |
--------------------------------------------------------------------------------
/pctasks/client/tests/records/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/tests/records/__init__.py
--------------------------------------------------------------------------------
/pctasks/client/tests/records/test_records.py:
--------------------------------------------------------------------------------
1 | def test_records(): ...
2 |
--------------------------------------------------------------------------------
/pctasks/client/tests/test_storage.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from tempfile import TemporaryDirectory
3 |
4 | from pctasks.dev.blob import temp_azurite_blob_storage
5 | from pctasks.dev.test_utils import run_pctasks
6 |
7 | TEST_DATA_DIR = Path(__file__).parent / "data-files"
8 |
9 |
10 | def test_get():
11 | with temp_azurite_blob_storage(test_files=TEST_DATA_DIR):
12 | local_path = TEST_DATA_DIR / "test_collection.json"
13 | with TemporaryDirectory() as tmp_dir:
14 | run_pctasks(["storage", "get", str(local_path), "-o", tmp_dir])
15 | assert (Path(tmp_dir) / "test_collection.json").exists()
16 |
17 |
18 | def test_put():
19 | with temp_azurite_blob_storage() as storage:
20 | local_path = TEST_DATA_DIR / "test_collection.json"
21 | remote_uri = storage.get_uri() + "/"
22 | run_pctasks(["storage", "put", str(local_path), remote_uri])
23 |
24 | assert storage.file_exists("test_collection.json")
25 |
--------------------------------------------------------------------------------
/pctasks/client/tests/test_template.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import yaml
4 |
5 | from pctasks.client.workflow.template import LocalTemplater
6 | from pctasks.ingest.models import IngestCollectionsInput
7 |
8 | HERE = Path(__file__).parent
9 | TEST_COLLECTION = HERE / "data-files" / "test_collection.json"
10 |
11 |
12 | def test_local_file_template():
13 | yaml_str = (
14 | """
15 | type: Collections
16 | collections:
17 | - ${{ local.file("""
18 | + str(TEST_COLLECTION)
19 | + """) }}
20 | """
21 | )
22 |
23 | yaml_dict = yaml.safe_load(yaml_str)
24 | templated_dict = LocalTemplater().template_dict(yaml_dict)
25 |
26 | data = IngestCollectionsInput.model_validate(templated_dict)
27 |
28 | assert data.collections
29 | assert data.collections[0]["id"] == "test-collection"
30 |
--------------------------------------------------------------------------------
/pctasks/core/README.md:
--------------------------------------------------------------------------------
1 | # Planetary Computer Tasks: Core
2 |
3 | This is the base library of the PCTasks framework.
4 | It provides core functionality and base messages as
5 | Pydantic models.
6 |
7 |
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/__init__.py:
--------------------------------------------------------------------------------
1 | from pctasks.core.version import __version__
2 |
3 | __all__ = ["__version__"]
4 |
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/_compat.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | if sys.version_info >= (3, 10):
4 | from typing import TypeAlias
5 | else:
6 | from typing_extensions import TypeAlias
7 |
8 |
9 | __all__ = [
10 | "TypeAlias",
11 | ]
12 |
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/context.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import Optional
3 |
4 |
5 | @dataclass
6 | class PCTasksCommandContext:
7 | """Context used in the pctasks CLI."""
8 |
9 | profile: Optional[str] = None
10 | """Settings profile. Determines which settings file is read."""
11 |
12 | settings_file: Optional[str] = None
13 | """Full path to the settings file. If present, overrides the profile."""
14 |
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/cosmos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/pctasks/core/cosmos/__init__.py
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/cosmos/containers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/pctasks/core/cosmos/containers/__init__.py
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/cosmos/page.py:
--------------------------------------------------------------------------------
1 | from typing import Iterable, Iterator, Optional, TypeVar
2 |
3 | T = TypeVar("T")
4 |
5 |
6 | class Page(Iterable[T]):
7 | def __init__(self, items: Iterable[T], continuation_token: Optional[str]):
8 | self._items = items
9 | self._continuation_token = continuation_token
10 |
11 | def __iter__(self) -> Iterator[T]:
12 | return iter(self._items)
13 |
14 | @property
15 | def continuation_token(self) -> Optional[str]:
16 | return self._continuation_token
17 |
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/pctasks/core/models/__init__.py
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/models/activity.py:
--------------------------------------------------------------------------------
1 | from typing import Generic, TypeVar
2 |
3 | from pydantic import BaseModel
4 |
5 | from pctasks.core.models.base import PCBaseModel, RunRecordId
6 |
7 | T = TypeVar("T", bound=BaseModel)
8 |
9 |
10 | class ActivityMessage(PCBaseModel, Generic[T]):
11 | run_record_id: RunRecordId
12 | msg: T
13 |
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/models/tokens.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Optional
2 |
3 | from pctasks.core.models.base import PCBaseModel
4 |
5 |
6 | class ContainerTokens(PCBaseModel):
7 | token: Optional[str] = None
8 | blobs: Optional[Dict[str, str]] = None
9 |
10 |
11 | class StorageAccountTokens(PCBaseModel):
12 | token: Optional[str] = None
13 | containers: Optional[Dict[str, ContainerTokens]] = None
14 |
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/models/utils.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 |
3 | from dateutil.tz import tzutc
4 |
5 |
6 | def tzutc_now() -> datetime:
7 | """Consistent timezone-aware UTC timestamp for record models that are
8 | serialized for API responses."""
9 | return datetime.now(tzutc())
10 |
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/pctasks/core/py.typed
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/storage/errors.py:
--------------------------------------------------------------------------------
1 | class FileNotFoundError(Exception):
2 | pass
3 |
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/tables/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/pctasks/core/tables/__init__.py
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/tables/utils.py:
--------------------------------------------------------------------------------
1 | import unicodedata
2 |
3 | PROHIBITED_TABLE_KEY_CHARS = ["/", "\\", "#", "?"]
4 |
5 |
6 | def is_valid_table_key(table_key: str) -> bool:
7 | for char in PROHIBITED_TABLE_KEY_CHARS:
8 | if char in table_key:
9 | return False
10 | if unicodedata.category(char)[0] == "C":
11 | return False
12 | return True
13 |
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/utils/stac.py:
--------------------------------------------------------------------------------
1 | import json
2 | from typing import Any, Dict, List, Union
3 |
4 | import pystac
5 | from stac_validator.validate import StacValidate
6 |
7 |
8 | class STACValidationError(Exception):
9 | def __init__(self, message: str, detail: List[Dict[str, Any]]):
10 | super().__init__(message)
11 | detail = detail
12 |
13 |
14 | def validate_stac(object: Union[Dict[str, Any], pystac.STACObject]) -> None:
15 | validator = StacValidate(extensions=True)
16 | validator.stac_content = object if isinstance(object, dict) else object.to_dict()
17 | validator.run()
18 | if not validator.valid:
19 | raise STACValidationError(
20 | f"Invalid STAC:\n{json.dumps(validator.message, indent=2)}",
21 | validator.message,
22 | )
23 |
--------------------------------------------------------------------------------
/pctasks/core/pctasks/core/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.5"
2 |
--------------------------------------------------------------------------------
/pctasks/core/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/__init__.py
--------------------------------------------------------------------------------
/pctasks/core/tests/cosmos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/cosmos/__init__.py
--------------------------------------------------------------------------------
/pctasks/core/tests/cosmos/containers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/cosmos/containers/__init__.py
--------------------------------------------------------------------------------
/pctasks/core/tests/data-files/example_module/__init__.py:
--------------------------------------------------------------------------------
1 | from .a import A
2 | from .b import B
3 |
4 | __all__ = ["A", "B"]
5 |
--------------------------------------------------------------------------------
/pctasks/core/tests/data-files/example_module/a.py:
--------------------------------------------------------------------------------
1 | class A:
2 | def a(self):
3 | return "a"
4 |
--------------------------------------------------------------------------------
/pctasks/core/tests/data-files/example_module/b.py:
--------------------------------------------------------------------------------
1 | from .a import A
2 |
3 |
4 | class B(A):
5 | def b(self):
6 | return "b"
7 |
--------------------------------------------------------------------------------
/pctasks/core/tests/data-files/simple-assets/a/asset-a-1.json:
--------------------------------------------------------------------------------
1 | {"name": "asset-a-1.json"}
--------------------------------------------------------------------------------
/pctasks/core/tests/data-files/simple-assets/a/asset-a-2.json:
--------------------------------------------------------------------------------
1 | {"name": "asset-a-2.json"}
--------------------------------------------------------------------------------
/pctasks/core/tests/data-files/simple-assets/b/asset-b-1.json:
--------------------------------------------------------------------------------
1 | {"name": "asset-b-1.json"}
--------------------------------------------------------------------------------
/pctasks/core/tests/data-files/simple-assets/b/asset-b-2.json:
--------------------------------------------------------------------------------
1 | {"name": "asset-b-2.json"}
--------------------------------------------------------------------------------
/pctasks/core/tests/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/models/__init__.py
--------------------------------------------------------------------------------
/pctasks/core/tests/storage/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/storage/__init__.py
--------------------------------------------------------------------------------
/pctasks/core/tests/tables/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/tables/__init__.py
--------------------------------------------------------------------------------
/pctasks/core/tests/test_messages.py:
--------------------------------------------------------------------------------
1 | from pctasks.core.models.task import TaskDefinition
2 |
3 |
4 | def test_submit_message_deserialize_serialize():
5 | js = {
6 | "id": "test-task",
7 | "image": "test",
8 | "task": "foo.bar:task",
9 | "args": {},
10 | }
11 |
12 | msg = TaskDefinition(**js)
13 | js2 = msg.dict(exclude_none=True)
14 | msg2 = TaskDefinition(**js2)
15 |
16 | assert msg == msg2
17 |
--------------------------------------------------------------------------------
/pctasks/core/tests/test_yaml.py:
--------------------------------------------------------------------------------
1 | from pctasks.core.models.workflow import WorkflowDefinition
2 | from pctasks.core.yaml import YamlValidationError
3 |
4 |
5 | def test_error_handling():
6 | try:
7 | _ = WorkflowDefinition.from_yaml(
8 | """
9 | name: A workflow* *with* *asterisks
10 |
11 | jobs:
12 | name: A job
13 | test-job:
14 | tasks:
15 | - id: test-task
16 | image-key: ingest-prod
17 | task: tests.test_submit.MockTask
18 | args:
19 | hello: world
20 | """
21 | )
22 | except YamlValidationError as e:
23 | error_text = str(e)
24 | assert "dataset: Field required" in error_text
25 |
--------------------------------------------------------------------------------
/pctasks/core/tests/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/utils/__init__.py
--------------------------------------------------------------------------------
/pctasks/core/tests/utils/test_backoff.py:
--------------------------------------------------------------------------------
1 | import azure.core.exceptions
2 | import pytest
3 | import requests.exceptions
4 |
5 | from pctasks.core.utils.backoff import with_backoff
6 |
7 |
8 | @pytest.mark.parametrize(
9 | "kind",
10 | [
11 | TimeoutError,
12 | requests.exceptions.ConnectionError,
13 | azure.core.exceptions.IncompleteReadError,
14 | ],
15 | )
16 | def test_retry_timeout_errors(kind):
17 |
18 | i = 0
19 |
20 | def make_callable(kind):
21 | def fn():
22 | nonlocal i
23 | i += 1
24 |
25 | if i > 2:
26 | return True
27 | else:
28 | raise kind()
29 |
30 | return fn
31 |
32 | result = with_backoff(make_callable(kind))
33 | assert i == 3
34 | assert result is True
35 |
--------------------------------------------------------------------------------
/pctasks/dataset/README.md:
--------------------------------------------------------------------------------
1 | # Planetary Computer Tasks: Dataset
2 |
3 | This component of the PCTasks framework lets users create datasets by defining
4 | configuration and specific tasks around STAC Items.
--------------------------------------------------------------------------------
/pctasks/dataset/pctasks/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from pctasks.dataset.version import __version__
2 |
3 | __all__ = ["__version__"]
4 |
--------------------------------------------------------------------------------
/pctasks/dataset/pctasks/dataset/chunks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/pctasks/dataset/chunks/__init__.py
--------------------------------------------------------------------------------
/pctasks/dataset/pctasks/dataset/chunks/constants.py:
--------------------------------------------------------------------------------
1 | CREATE_CHUNKS_TASK_PATH = "pctasks.dataset.chunks.task:create_chunks_task"
2 | LIST_CHUNKS_TASK_PATH = "pctasks.dataset.chunks.task:list_chunks_task"
3 |
4 | ASSET_CHUNKS_PREFIX = "assets"
5 | ITEM_CHUNKS_PREFIX = "items"
6 |
7 | ALL_CHUNK_PREFIX = "all"
8 | SUCCESS_CHUNK_PREFIX = "success"
9 | FAILURE_CHUNK_PREFIX = "failed"
10 |
--------------------------------------------------------------------------------
/pctasks/dataset/pctasks/dataset/constants.py:
--------------------------------------------------------------------------------
1 | DEFAULT_CHUNK_LENGTH = 30000
2 |
3 | CREATE_CHUNKS_TASK_ID = "create-chunks"
4 | LIST_CHUNKS_TASK_ID = "list-chunks"
5 | CREATE_ITEMS_TASK_ID = "create-items"
6 |
7 | CHUNK_FOLDER = "chunks"
8 |
9 | PROCESS_ITEMS_JOB_ID = "process_items"
10 |
11 | DEFAULT_DATASET_YAML_PATH = "dataset.yaml"
12 |
--------------------------------------------------------------------------------
/pctasks/dataset/pctasks/dataset/items/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/pctasks/dataset/items/__init__.py
--------------------------------------------------------------------------------
/pctasks/dataset/pctasks/dataset/items/constants.py:
--------------------------------------------------------------------------------
1 | PROCESS_ITEMS_JOB_ID = "process-items"
2 | CREATE_ITEMS_TASK_ID = "create-items"
3 |
--------------------------------------------------------------------------------
/pctasks/dataset/pctasks/dataset/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/pctasks/dataset/py.typed
--------------------------------------------------------------------------------
/pctasks/dataset/pctasks/dataset/splits/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/pctasks/dataset/splits/__init__.py
--------------------------------------------------------------------------------
/pctasks/dataset/pctasks/dataset/splits/constants.py:
--------------------------------------------------------------------------------
1 | CREATE_SPLITS_TASK_ID = "create-splits"
2 | CREATE_SPLITS_TASK_PATH = "pctasks.dataset.splits.task:create_splits_task"
3 |
--------------------------------------------------------------------------------
/pctasks/dataset/pctasks/dataset/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.5"
2 |
--------------------------------------------------------------------------------
/pctasks/dataset/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/tests/__init__.py
--------------------------------------------------------------------------------
/pctasks/dataset/tests/chunks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/tests/chunks/__init__.py
--------------------------------------------------------------------------------
/pctasks/dataset/tests/data-files/datasets/naip.yaml:
--------------------------------------------------------------------------------
1 | name: naip
2 | image: pc-tasks-naip:latest
3 | collections:
4 | - id: naip
5 | class: naip.dataset:Naip
6 | asset_storage:
7 | - storage_account: naipeuwest
8 | container: naip
9 | sas_token: ${{ pc.get_token(naipeuwest, naip) }}
10 | chunks:
11 | length: 3000
12 | ext: "*.tif"
13 | splits:
14 | - depth: 2
15 | name_starts_with: v002
16 | chunk_storage:
17 | uri: blob://naipeuwest/naip-etl-data/chunks/assets
18 | item_storage:
19 | uri: blob://naipeuwest/naip-etl-data/chunks/items
20 |
--------------------------------------------------------------------------------
/pctasks/dataset/tests/data-files/datasets/test-dataset.yaml:
--------------------------------------------------------------------------------
1 | id: dataset-test
2 | image: mock:latest
3 | code:
4 | src: ${{ local.path(mycode.py) }}
5 |
6 | args:
7 | - test_prefix
8 | - sas_token
9 |
10 | task_config:
11 | test-dataset:
12 | create-items:
13 | tags:
14 | batch_pool_id: high_memory_pool
15 | ingest-collection:
16 | tags:
17 | batch_pool_id: ingest_pool
18 |
19 | collections:
20 | - id: test-dataset
21 | class: mycode:TestCollection
22 | asset_storage:
23 | - uri: blob://devstoreaccount1/test-data/${{ args.test_prefix }}/assets
24 | token: ${{ args.sas_token }}
25 | chunks:
26 | options:
27 | chunk_length: 2
28 | extensions:
29 | - .json
30 | splits:
31 | - depth: 1
32 | chunk_storage:
33 | uri: blob://devstoreaccount1/test-data/${{ args.test_prefix }}/chunks
34 |
--------------------------------------------------------------------------------
/pctasks/dataset/tests/data-files/simple-assets/a/asset-a-1.json:
--------------------------------------------------------------------------------
1 | {"name": "asset-a-1.json"}
--------------------------------------------------------------------------------
/pctasks/dataset/tests/data-files/simple-assets/a/asset-a-2.json:
--------------------------------------------------------------------------------
1 | {"name": "asset-a-2.json"}
--------------------------------------------------------------------------------
/pctasks/dataset/tests/data-files/simple-assets/b/asset-b-1.json:
--------------------------------------------------------------------------------
1 | {"name": "asset-b-1.json"}
--------------------------------------------------------------------------------
/pctasks/dataset/tests/data-files/simple-assets/b/asset-b-2.json:
--------------------------------------------------------------------------------
1 | {"name": "asset-b-2.json"}
--------------------------------------------------------------------------------
/pctasks/dataset/tests/data-files/test-assets/one.txt:
--------------------------------------------------------------------------------
1 | one
2 |
--------------------------------------------------------------------------------
/pctasks/dataset/tests/data-files/test-assets/three.txt:
--------------------------------------------------------------------------------
1 | three
2 |
--------------------------------------------------------------------------------
/pctasks/dataset/tests/data-files/test-assets/two.txt:
--------------------------------------------------------------------------------
1 | two
2 |
--------------------------------------------------------------------------------
/pctasks/dataset/tests/items/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/tests/items/__init__.py
--------------------------------------------------------------------------------
/pctasks/dev/pctasks/dev/__init__.py:
--------------------------------------------------------------------------------
1 | from pctasks.dev.version import __version__
2 |
3 | __all__ = ["__version__"]
4 |
--------------------------------------------------------------------------------
/pctasks/dev/pctasks/dev/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dev/pctasks/dev/py.typed
--------------------------------------------------------------------------------
/pctasks/dev/pctasks/dev/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.5"
2 |
--------------------------------------------------------------------------------
/pctasks/dev/tests/test_temp_queue.py:
--------------------------------------------------------------------------------
1 | from pctasks.dev.queues import TempQueue
2 |
3 |
4 | def test_temp_queue_name() -> None:
5 | name = "test-temp-queue-name"
6 | with TempQueue(name=name) as queue_client:
7 | assert queue_client.queue_name == name
8 |
9 |
10 | def test_temp_queue_suffix() -> None:
11 | suffix = "test-temp-queue-suffix"
12 | name = f"test-queue-{suffix}"
13 | with TempQueue(suffix=suffix) as queue_client:
14 | assert queue_client.queue_name == name
15 |
16 |
17 | def test_temp_queue_ignores_existing_resource() -> None:
18 | name = "test-temp-queue-name"
19 | with TempQueue(name=name):
20 | with TempQueue(name=name):
21 | # No exception
22 | pass
23 |
--------------------------------------------------------------------------------
/pctasks/ingest/pctasks/ingest/__init__.py:
--------------------------------------------------------------------------------
1 | from pctasks.ingest.version import __version__
2 |
3 | __all__ = ["__version__"]
4 |
--------------------------------------------------------------------------------
/pctasks/ingest/pctasks/ingest/constants.py:
--------------------------------------------------------------------------------
1 | DEFAULT_INSERT_GROUP_SIZE = 5000
2 |
3 | INGEST_TASK = "pctasks.ingest_task.task:ingest_task"
4 | INGEST_TASK_ID = "ingest-items"
5 | ITEM_TASK_ID = "ingest-item"
6 | COLLECTION_TASK_ID = "ingest-collection"
7 | NDJSON_TASK_ID = "ingest-ndjson"
8 |
9 | NDJSON_MESSAGE_TYPE = "Ndjson"
10 | COLLECTIONS_MESSAGE_TYPE = "Collections"
11 |
12 | DB_CONNECTION_STRING_ENV_VAR = "DB_CONNECTION_STRING"
13 |
--------------------------------------------------------------------------------
/pctasks/ingest/pctasks/ingest/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/ingest/pctasks/ingest/py.typed
--------------------------------------------------------------------------------
/pctasks/ingest/pctasks/ingest/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.5"
2 |
--------------------------------------------------------------------------------
/pctasks/ingest/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/ingest/tests/__init__.py
--------------------------------------------------------------------------------
/pctasks/ingest/tests/test_settings.py:
--------------------------------------------------------------------------------
1 | from pctasks.core.yaml import model_from_yaml
2 | from pctasks.ingest.settings import SECTION_NAME, IngestSettings
3 |
4 |
5 | def test_image_keys():
6 | yaml = """
7 | submit:
8 | account_name: pctrxetlrobrxetlsa
9 | queue_name: inbox
10 | image_keys:
11 | - key: ingest
12 | image: pctasks-ingest:lastest
13 | environment:
14 | - DB_CONNECTION_STR= ${ secrets.DB_CONNECTION_STR }
15 |
16 | ingest:
17 | image_keys:
18 | default: ingest
19 | targets:
20 | prod: ingest-prod
21 | staging: ingest-staging
22 | """
23 |
24 | settings = model_from_yaml(IngestSettings, yaml, section=SECTION_NAME)
25 | assert settings.image_keys.default == "ingest"
26 | assert settings.image_keys.targets
27 | assert settings.image_keys.targets["prod"] == "ingest-prod"
28 |
--------------------------------------------------------------------------------
/pctasks/ingest_task/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.9-slim
2 |
3 | # Setup timezone info
4 | ENV TZ=UTC
5 |
6 | ENV LC_ALL=C.UTF-8
7 | ENV LANG=C.UTF-8
8 | ENV PIP_NO_CACHE_DIR=1
9 | RUN pip install "setuptools>=65.5.1"
10 |
11 | RUN python -m pip install --upgrade pip
12 |
13 | #
14 | # Copy and install packages
15 | #
16 |
17 | COPY core /opt/src/core
18 | RUN cd /opt/src/core && \
19 | pip install .
20 |
21 | COPY cli /opt/src/cli
22 | RUN cd /opt/src/cli && \
23 | pip install .
24 |
25 | COPY task /opt/src/task
26 | RUN cd /opt/src/task && \
27 | pip install .
28 |
29 | COPY client /opt/src/client
30 | RUN cd /opt/src/client && \
31 | pip install .
32 |
33 | COPY ingest /opt/src/ingest
34 | RUN cd /opt/src/ingest && \
35 | pip install .
36 |
37 | COPY ingest_task /opt/src/ingest_task
38 | RUN cd /opt/src/ingest_task && \
39 | pip install .
40 |
41 | WORKDIR /opt/src
42 |
--------------------------------------------------------------------------------
/pctasks/ingest_task/pctasks/ingest_task/__init__.py:
--------------------------------------------------------------------------------
1 | from pctasks.ingest_task.version import __version__
2 |
3 | __all__ = ["__version__"]
4 |
--------------------------------------------------------------------------------
/pctasks/ingest_task/pctasks/ingest_task/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/ingest_task/pctasks/ingest_task/py.typed
--------------------------------------------------------------------------------
/pctasks/ingest_task/pctasks/ingest_task/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.5"
2 |
--------------------------------------------------------------------------------
/pctasks/ingest_task/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/ingest_task/tests/__init__.py
--------------------------------------------------------------------------------
/pctasks/ingest_task/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 | from contextlib import contextmanager
3 | from typing import Generator
4 |
5 | from pctasks.core.utils import environment
6 | from pctasks.dev.db import ConnStrInfo, temp_pgstac_db
7 | from pctasks.ingest.constants import DB_CONNECTION_STRING_ENV_VAR
8 |
9 |
10 | @contextmanager
11 | def ingest_test_environment() -> Generator[ConnStrInfo, None, None]:
12 | db_secret = os.getenv("SECRETS_DB_CONNECTION_STRING")
13 |
14 | if not db_secret:
15 | raise ValueError("SECRETS_DB_CONNECTION_STRING must be set")
16 |
17 | with temp_pgstac_db(db_secret) as test_db_conn_str:
18 | with environment(**{DB_CONNECTION_STRING_ENV_VAR: test_db_conn_str.local}):
19 | yield test_db_conn_str
20 |
--------------------------------------------------------------------------------
/pctasks/notify/README.md:
--------------------------------------------------------------------------------
1 | # Planetary Computer Tasks: Notify
2 |
3 | This component of the PCTasks framework is around notifications.
4 |
--------------------------------------------------------------------------------
/pctasks/notify/pctasks/notify/__init__.py:
--------------------------------------------------------------------------------
1 | from pctasks.notify.version import __version__
2 |
3 | __all__ = ["__version__"]
4 |
--------------------------------------------------------------------------------
/pctasks/notify/pctasks/notify/models.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 |
3 | from pctasks.core.models.base import PCBaseModel
4 | from pctasks.core.models.event import CloudEvent, NotificationMessage
5 | from pctasks.core.models.registration import (
6 | EventGridChannelInfo,
7 | STACItemEventRegistration,
8 | )
9 |
10 |
11 | class NotifyFetchMessage(PCBaseModel):
12 | notification: NotificationMessage
13 | target_environment: Optional[str] = None
14 |
15 |
16 | class NotifyFetchResult(PCBaseModel):
17 | registrations: List[STACItemEventRegistration]
18 |
19 |
20 | class NotifyWebhookMessage(PCBaseModel):
21 | endpoint: str
22 | event: CloudEvent
23 |
24 |
25 | class NotifyEventGridChannelMessage(PCBaseModel):
26 | channel_info: EventGridChannelInfo
27 | event: CloudEvent
28 |
29 |
30 | class NotifyResult(PCBaseModel):
31 | success: bool = True
32 |
--------------------------------------------------------------------------------
/pctasks/notify/pctasks/notify/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/notify/pctasks/notify/py.typed
--------------------------------------------------------------------------------
/pctasks/notify/pctasks/notify/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.5"
2 |
--------------------------------------------------------------------------------
/pctasks/notify/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/notify/tests/__init__.py
--------------------------------------------------------------------------------
/pctasks/notify/tests/test_process.py:
--------------------------------------------------------------------------------
1 | def test_notify(): ...
2 |
--------------------------------------------------------------------------------
/pctasks/router/README.md:
--------------------------------------------------------------------------------
1 | # Planetary Computer Tasks: Router
2 |
3 | This component of the PCTasks framework is around routing tasks.
4 |
--------------------------------------------------------------------------------
/pctasks/router/pctasks/router/__init__.py:
--------------------------------------------------------------------------------
1 | from pctasks.router.version import __version__
2 |
3 | __all__ = ["__version__"]
4 |
--------------------------------------------------------------------------------
/pctasks/router/pctasks/router/handlers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/router/pctasks/router/handlers/__init__.py
--------------------------------------------------------------------------------
/pctasks/router/pctasks/router/handlers/forward.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Callable, Dict
2 |
3 | import orjson
4 |
5 | from pctasks.core.message_handler import MessageHandler
6 | from pctasks.core.queues import QueueService
7 | from pctasks.router.settings import RouterSettings
8 |
9 |
10 | class ForwardingMessageHandler(MessageHandler):
11 | def __init__(self, get_queue_name: Callable[[RouterSettings], str]) -> None:
12 | self.get_queue_name = get_queue_name
13 |
14 | def handle(self, message: Dict[str, Any]) -> None:
15 | settings = RouterSettings.get()
16 | with QueueService.from_connection_string(
17 | connection_string=settings.queues_connection_string,
18 | queue_name=self.get_queue_name(settings),
19 | ) as queue:
20 | queue.send_message(orjson.dumps(message, option=orjson.OPT_SERIALIZE_NUMPY))
21 |
--------------------------------------------------------------------------------
/pctasks/router/pctasks/router/message_handler.py:
--------------------------------------------------------------------------------
1 | from pctasks.core.constants import (
2 | EVENTGRID_MESSAGE_TYPE,
3 | NOTIFICATION_MESSAGE_TYPE,
4 | WORKFLOW_SUBMIT_MESSAGE_TYPE,
5 | )
6 | from pctasks.core.message_handler import TypeMessageHandlers
7 | from pctasks.router.handlers.eventgrid import EventGridMessageHandler
8 | from pctasks.router.handlers.forward import ForwardingMessageHandler
9 |
10 | HANDLERS = TypeMessageHandlers(
11 | {
12 | EVENTGRID_MESSAGE_TYPE: EventGridMessageHandler(),
13 | WORKFLOW_SUBMIT_MESSAGE_TYPE: ForwardingMessageHandler(
14 | lambda settings: settings.workflow_queue_name
15 | ),
16 | NOTIFICATION_MESSAGE_TYPE: ForwardingMessageHandler(
17 | lambda settings: settings.notification_queue_name
18 | ),
19 | }
20 | )
21 |
--------------------------------------------------------------------------------
/pctasks/router/pctasks/router/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/router/pctasks/router/py.typed
--------------------------------------------------------------------------------
/pctasks/router/pctasks/router/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.5"
2 |
--------------------------------------------------------------------------------
/pctasks/router/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/router/tests/__init__.py
--------------------------------------------------------------------------------
/pctasks/router/tests/test_process.py:
--------------------------------------------------------------------------------
1 | def test_router(): ...
2 |
--------------------------------------------------------------------------------
/pctasks/router/tests/test_settings.py:
--------------------------------------------------------------------------------
1 | from pctasks.router.settings import RouterSettings
2 |
3 |
4 | def test_settings():
5 | _ = RouterSettings.get()
6 |
--------------------------------------------------------------------------------
/pctasks/run/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM mcr.microsoft.com/azurelinux/base/python:3.12
2 |
3 | RUN tdnf install ca-certificates azure-cli -y \
4 | && tdnf clean all
5 | ENV PIP_NO_CACHE_DIR=1
6 | RUN pip install "setuptools>=65.5.1"
7 |
8 | WORKDIR /opt/src
9 |
10 | COPY core /opt/src/core
11 | RUN cd /opt/src/core && \
12 | pip install .
13 |
14 | COPY cli /opt/src/cli
15 | RUN cd /opt/src/cli && \
16 | pip install .
17 |
18 | COPY task /opt/src/task
19 | RUN cd /opt/src/task && \
20 | pip install .
21 |
22 | COPY client /opt/src/client
23 | RUN cd /opt/src/client && \
24 | pip install .
25 |
26 | COPY run /opt/src/run
27 | RUN cd /opt/src/run && \
28 | pip install .
29 |
30 | ENV APP_HOST=0.0.0.0
31 | ENV APP_PORT=81
32 |
--------------------------------------------------------------------------------
/pctasks/run/README.md:
--------------------------------------------------------------------------------
1 | # Planetary Computer Tasks: Run
2 |
3 | This component of the PCTasks framework is around running workflows and
4 | tasks that are submitted to the system. The `pctasks.run` library
5 | contains functionality used to transform workflows into
6 | Azure Batch jobs and tasks.
7 |
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/__init__.py:
--------------------------------------------------------------------------------
1 | from pctasks.run.version import __version__
2 |
3 | __all__ = ["__version__"]
4 |
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/argo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/pctasks/run/argo/__init__.py
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/batch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/pctasks/run/batch/__init__.py
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/batch/utils.py:
--------------------------------------------------------------------------------
1 | import re
2 | from datetime import datetime
3 |
4 |
5 | def make_unique_job_id(job_id: str) -> str:
6 | return make_valid_batch_id(
7 | f"{job_id}-{datetime.utcnow().strftime('%y%m%d-%H%M%S')}"
8 | )
9 |
10 |
11 | def make_valid_batch_id(id: str) -> str:
12 | """Returns a job id or task id that is valid to Batch
13 |
14 | Note from Azure Batch SDK:
15 |
16 | Task ids can only contain any
17 | combination of alphanumeric characters along with dash (-)
18 | and underscore (_).
19 | The name must be from 1 through 64 characters long
20 | """
21 | id = re.sub("[^a-zA-Z0-9_-]", "-", id)
22 | if len(id) > 64:
23 | id = id[:32] + "-" + id[-31:]
24 | return id.strip("-")
25 |
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/dag.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import networkx as nx
4 |
5 | from pctasks.core.models.workflow import JobDefinition
6 |
7 |
8 | def sort_jobs(jobs: List[JobDefinition]) -> List[JobDefinition]:
9 | G = nx.DiGraph()
10 |
11 | for job in jobs:
12 | G.add_node(job.get_id())
13 | for dep in job.get_dependencies() or []:
14 | G.add_edge(dep, job.get_id())
15 |
16 | sorted_ids: List[str] = list(nx.topological_sort(G))
17 | return sorted(jobs, key=lambda job: sorted_ids.index(job.get_id()))
18 |
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/errors.py:
--------------------------------------------------------------------------------
1 | class WorkflowFailedError(Exception):
2 | pass
3 |
4 |
5 | class TaskFailedError(Exception):
6 | pass
7 |
8 |
9 | class TaskPreparationError(Exception):
10 | pass
11 |
12 |
13 | class WorkflowRunRecordError(Exception):
14 | """Raised when there are unexpected results or behaviors from run records"""
15 |
16 | pass
17 |
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/pctasks/run/py.typed
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/secrets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/pctasks/run/secrets/__init__.py
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/task/__init__.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from pctasks.run.settings import RunSettings, TaskRunnerType
4 | from pctasks.run.task.argo import ArgoTaskRunner
5 | from pctasks.run.task.base import TaskRunner
6 | from pctasks.run.task.batch import BatchTaskRunner
7 | from pctasks.run.task.local import LocalTaskRunner
8 |
9 |
10 | def get_task_runner(settings: Optional[RunSettings] = None) -> TaskRunner:
11 | settings = settings or RunSettings.get()
12 |
13 | if settings.task_runner_type == TaskRunnerType.LOCAL:
14 | assert settings.local_dev_endpoints_url # Checked during settings validation
15 | return LocalTaskRunner(settings.local_dev_endpoints_url)
16 | elif settings.task_runner_type == TaskRunnerType.BATCH:
17 | return BatchTaskRunner(settings)
18 | elif settings.task_runner_type == TaskRunnerType.ARGO:
19 | return ArgoTaskRunner(settings)
20 | else:
21 | raise ValueError(f"Unknown task runner type: {settings.task_runner_type}")
22 |
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.5"
2 |
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/workflow/__init__.py:
--------------------------------------------------------------------------------
1 | from pctasks.core.cosmos.settings import CosmosDBSettings
2 | from pctasks.run.settings import RunSettings, WorkflowRunnerType
3 | from pctasks.run.workflow.argo import ArgoWorkflowRunner
4 | from pctasks.run.workflow.base import WorkflowRunner
5 | from pctasks.run.workflow.local import LocalWorkflowRunner
6 |
7 |
8 | def get_workflow_runner() -> WorkflowRunner:
9 | run_settings = RunSettings.get()
10 | cosmosdb_settings = CosmosDBSettings.get()
11 |
12 | if run_settings.workflow_runner_type == WorkflowRunnerType.LOCAL:
13 | assert run_settings.local_dev_endpoints_url # Checked during validation
14 | return LocalWorkflowRunner(run_settings, cosmosdb_settings)
15 | elif run_settings.workflow_runner_type == WorkflowRunnerType.ARGO:
16 | return ArgoWorkflowRunner(run_settings, cosmosdb_settings)
17 | else:
18 | raise ValueError(
19 | f"Unknown workflow runner type: {run_settings.workflow_runner_type}"
20 | )
21 |
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/workflow/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | from pctasks.core.cosmos.settings import CosmosDBSettings
4 | from pctasks.core.models.workflow import WorkflowSubmitMessage, WorkflowSubmitResult
5 | from pctasks.run.settings import RunSettings, WorkflowExecutorConfig
6 |
7 |
8 | class WorkflowRunner(ABC):
9 | def __init__(self, run_settings: RunSettings, cosmosdb_settings: CosmosDBSettings):
10 | self.run_settings = run_settings
11 | self.cosmosdb_settings = cosmosdb_settings
12 |
13 | def get_executor_config(self) -> WorkflowExecutorConfig:
14 | return WorkflowExecutorConfig(
15 | run_settings=self.run_settings, cosmosdb_settings=self.cosmosdb_settings
16 | )
17 |
18 | @abstractmethod
19 | def submit_workflow(
20 | self, submit_msg: WorkflowSubmitMessage
21 | ) -> WorkflowSubmitResult:
22 | pass
23 |
--------------------------------------------------------------------------------
/pctasks/run/pctasks/run/workflow/executor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/pctasks/run/workflow/executor/__init__.py
--------------------------------------------------------------------------------
/pctasks/run/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/tests/__init__.py
--------------------------------------------------------------------------------
/pctasks/run/tests/batch/test_utils.py:
--------------------------------------------------------------------------------
1 | from pctasks.run.batch.utils import make_valid_batch_id
2 |
3 |
4 | def test_make_valid_job_id():
5 | assert (
6 | make_valid_batch_id("some-job/job_ok/!this/is/not/valid")
7 | == "some-job-job_ok--this-is-not-valid"
8 | )
9 | long_job_id = "test-chars" * 7
10 | assert len(make_valid_batch_id(long_job_id)) == 64
11 |
--------------------------------------------------------------------------------
/pctasks/run/tests/secrets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/tests/secrets/__init__.py
--------------------------------------------------------------------------------
/pctasks/run/tests/secrets/test_base.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 |
3 | from pctasks.run.secrets.base import SecretsProvider
4 |
5 |
6 | class MockSecretsProvider(SecretsProvider):
7 | def __init__(self, secrets: Dict[str, str]) -> None:
8 | self.secrets = secrets
9 |
10 | def get_secret(self, name: str) -> str:
11 | result = self.secrets.get(name, None)
12 | if not result:
13 | raise ValueError(f"Secret {name} requested but not provided")
14 | return result
15 |
16 |
17 | def test_parse_secret():
18 | provider = MockSecretsProvider({"foo": "bar"})
19 | env = {"foo": "${{ secrets.foo }}"}
20 | parsed = provider.substitute_secrets(env)
21 | assert parsed["foo"] == "bar"
22 |
--------------------------------------------------------------------------------
/pctasks/run/tests/test_settings.py:
--------------------------------------------------------------------------------
1 | from pctasks.run.settings import RunSettings
2 |
3 |
4 | def test_settings():
5 | _ = RunSettings.get()
6 |
--------------------------------------------------------------------------------
/pctasks/run/tests/workflow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/tests/workflow/__init__.py
--------------------------------------------------------------------------------
/pctasks/server/README.md:
--------------------------------------------------------------------------------
1 | # Planetary Computer Tasks: Server
2 |
3 | Server component of PCTasks
--------------------------------------------------------------------------------
/pctasks/server/pctasks/server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/server/pctasks/server/__init__.py
--------------------------------------------------------------------------------
/pctasks/server/pctasks/server/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/server/pctasks/server/py.typed
--------------------------------------------------------------------------------
/pctasks/server/pctasks/server/routes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/server/pctasks/server/routes/__init__.py
--------------------------------------------------------------------------------
/pctasks/server/pctasks/server/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.5"
2 |
--------------------------------------------------------------------------------
/pctasks/server/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/server/tests/__init__.py
--------------------------------------------------------------------------------
/pctasks/server/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from starlette.testclient import TestClient
3 |
4 | from pctasks.server.main import app
5 |
6 |
7 | @pytest.fixture(scope="function")
8 | def client() -> TestClient:
9 | return TestClient(app)
10 |
--------------------------------------------------------------------------------
/pctasks/task/README.md:
--------------------------------------------------------------------------------
1 | # Planetary Computer Tasks: Task
2 |
3 | The pctasks.task library supplies functionality for creating executable tasks in the PCTasks system.
4 |
5 |
--------------------------------------------------------------------------------
/pctasks/task/pctasks/task/__init__.py:
--------------------------------------------------------------------------------
1 | """pctasks.task
2 |
3 | isort:skip_file
4 | """
5 |
6 | from pctasks.task.version import __version__
7 |
8 | __all__ = ["__version__"]
9 |
--------------------------------------------------------------------------------
/pctasks/task/pctasks/task/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/task/pctasks/task/common/__init__.py
--------------------------------------------------------------------------------
/pctasks/task/pctasks/task/constants.py:
--------------------------------------------------------------------------------
1 | # Environment Variables
2 |
3 | TASKIO_TENANT_ID_ENV_VAR = "TASKIO_TENANT_ID"
4 | TASKIO_CLIENT_ID_ENV_VAR = "TASKIO_CLIENT_ID"
5 | TASKIO_CLIENT_SECRET_ENV_VAR = "TASKIO_CLIENT_SECRET"
6 |
--------------------------------------------------------------------------------
/pctasks/task/pctasks/task/context.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 |
3 | from pctasks.core.models.task import TaskRunConfig
4 | from pctasks.core.storage import StorageFactory
5 | from pctasks.core.tokens import Tokens
6 |
7 |
8 | @dataclass
9 | class TaskContext:
10 | """Context that is passed into Task run methods.
11 |
12 | This class is used to supply the Task with the necessary
13 | framework components to run.
14 | """
15 |
16 | storage_factory: StorageFactory
17 | """A StorageFactory instance configured with workflow tokens"""
18 |
19 | run_id: str
20 | """The run ID of the workflow currently being executed."""
21 |
22 | @classmethod
23 | def from_task_run_config(cls, task_config: TaskRunConfig) -> "TaskContext":
24 | return cls(
25 | storage_factory=StorageFactory(Tokens(task_config.tokens)),
26 | run_id=task_config.run_id,
27 | )
28 |
--------------------------------------------------------------------------------
/pctasks/task/pctasks/task/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/task/pctasks/task/py.typed
--------------------------------------------------------------------------------
/pctasks/task/pctasks/task/settings.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from pctasks.core.settings import PCTasksSettings
4 |
5 |
6 | class TaskSettings(PCTasksSettings):
7 | @classmethod
8 | def section_name(cls) -> str:
9 | return "task"
10 |
11 | code_dir: Optional[str] = None
12 | """The directory which downloaded code and requirements are stored.
13 |
14 | If provided, this directory will be used as the target for pip installs,
15 | and code source will be downloaded to this directory.
16 | If None, will use sys.path and pip install will not use a target directory.
17 | """
18 |
--------------------------------------------------------------------------------
/pctasks/task/pctasks/task/utils.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | from typing import Optional, TypeVar
3 |
4 | from pctasks.core.models.base import PCBaseModel
5 | from pctasks.task.task import Task
6 |
7 | T = TypeVar("T", bound=PCBaseModel)
8 | U = TypeVar("U", bound=PCBaseModel)
9 |
10 |
11 | def get_task_path(task: Task[T, U], name: str, module: Optional[str] = None) -> str:
12 | """Convenience method for getting the path to a task.
13 |
14 | Detects the module name. Requires the user supply the importable
15 | variable name, including any containing instances or classes.
16 | """
17 | if not module:
18 | m = inspect.getmodule(task)
19 | if not m:
20 | raise ValueError(f"Could not find module for task {task}")
21 | module = m.__name__
22 | return f"{module}:{name}"
23 |
--------------------------------------------------------------------------------
/pctasks/task/pctasks/task/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.5"
2 |
--------------------------------------------------------------------------------
/pctasks/task/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/task/tests/__init__.py
--------------------------------------------------------------------------------
/pctasks/task/tests/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/task/tests/common/__init__.py
--------------------------------------------------------------------------------
/pctasks/task/tests/data-files/test-files/a/three.txt:
--------------------------------------------------------------------------------
1 | three
2 |
--------------------------------------------------------------------------------
/pctasks/task/tests/data-files/test-files/a/two.txt:
--------------------------------------------------------------------------------
1 | two
2 |
--------------------------------------------------------------------------------
/pctasks/task/tests/data-files/test-files/b/c/five.txt:
--------------------------------------------------------------------------------
1 | five
2 |
--------------------------------------------------------------------------------
/pctasks/task/tests/data-files/test-files/b/c/six.txt:
--------------------------------------------------------------------------------
1 | six
2 |
--------------------------------------------------------------------------------
/pctasks/task/tests/data-files/test-files/b/four.txt:
--------------------------------------------------------------------------------
1 | four
2 |
--------------------------------------------------------------------------------
/pctasks/task/tests/data-files/test-files/one.txt:
--------------------------------------------------------------------------------
1 | one
2 |
--------------------------------------------------------------------------------
/pctasks/task/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | from pctasks.dataset.chunks.task import create_chunks_task
2 | from pctasks.task.utils import get_task_path
3 |
4 |
5 | class TestTaskHolder:
6 | task = create_chunks_task
7 |
8 |
9 | def test_get_object_path_in_package():
10 | task_path = get_task_path(create_chunks_task, "create_chunks_task")
11 | assert task_path == "pctasks.dataset.chunks.task:create_chunks_task"
12 |
13 |
14 | def test_get_task_path_in_class():
15 | task_path = get_task_path(
16 | TestTaskHolder.task, "TestTaskHolder.task", module=TestTaskHolder.__module__
17 | )
18 | assert task_path == "tests.test_utils:TestTaskHolder.task"
19 |
--------------------------------------------------------------------------------
/pctasks_frontend/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 |
--------------------------------------------------------------------------------
/pctasks_frontend/.env.example:
--------------------------------------------------------------------------------
1 | REACT_APP_IS_DEV=true
2 | REACT_APP_API_ROOT=http://localhost:8511
3 |
4 | # Not needed if IS_DEV is true, otherwise get these values from the portal for
5 | # the environment you're targeting
6 | REACT_APP_AUTH_TENANT_ID=
7 | REACT_APP_AUTH_CLIENT_ID=
8 | REACT_APP_AUTH_BACKEND_APP_ID=
9 |
--------------------------------------------------------------------------------
/pctasks_frontend/.gitignore:
--------------------------------------------------------------------------------
1 | # dependencies
2 | /node_modules
3 | /.pnp
4 | .pnp.js
5 |
6 | # testing
7 | /coverage
8 |
9 | # production
10 | /build
11 |
12 | # misc
13 | .DS_Store
14 | .env
15 | .env.local
16 | .env.development.local
17 | .env.test.local
18 | .env.production.local
19 |
20 | npm-debug.log*
21 | yarn-debug.log*
22 | yarn-error.log*
23 |
--------------------------------------------------------------------------------
/pctasks_frontend/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "arrowParens": "avoid",
3 | "htmlWhitespaceSensitivity": "css",
4 | "insertPragma": false,
5 | "jsxSingleQuote": false,
6 | "printWidth": 85,
7 | "proseWrap": "always",
8 | "requirePragma": false,
9 | "semi": true,
10 | "tabWidth": 2,
11 | "trailingComma": "es5",
12 | "useTabs": false
13 | }
14 |
--------------------------------------------------------------------------------
/pctasks_frontend/.storybook/main.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | stories: ["../src/**/*.stories.mdx", "../src/**/*.stories.@(js|jsx|ts|tsx)"],
3 | addons: [
4 | "@storybook/addon-links",
5 | "@storybook/addon-essentials",
6 | "@storybook/addon-interactions",
7 | "@storybook/preset-create-react-app",
8 | "storybook-addon-react-router-v6",
9 | ],
10 | framework: "@storybook/react",
11 | core: {
12 | builder: "@storybook/builder-webpack5",
13 | },
14 | };
15 |
--------------------------------------------------------------------------------
/pctasks_frontend/.storybook/preview.js:
--------------------------------------------------------------------------------
1 | import { ThemeProvider } from "@fluentui/react";
2 |
3 | export const parameters = {
4 | actions: { argTypesRegex: "^on[A-Z].*" },
5 | controls: {
6 | matchers: {
7 | color: /(background|color)$/i,
8 | date: /Date$/,
9 | },
10 | },
11 | };
12 |
13 | export const decorators = [
14 | (Story) => {
15 | return (
16 |