├── .dockerignore ├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── pull_request_template.md └── workflows │ ├── cicd.yml │ ├── pr.yml │ ├── publish-charts-dev.yml │ ├── publish-charts.yml │ ├── publish-func-package-dev.yml │ └── publish-func-package.yml ├── .gitignore ├── .isort.cfg ├── .readthedocs.yaml ├── CODE_OF_CONDUCT.md ├── Dockerfile.dev ├── Dockerfile.stacapi ├── Dockerfile.stacbrowser ├── Dockerfile.task_base ├── LICENSE ├── README.md ├── RELEASING.md ├── SECURITY.md ├── SUPPORT.md ├── cluster ├── README.md ├── argo-values.yaml ├── dev-values.yaml ├── kind-config.yaml └── pctasks-dev │ ├── Chart.yaml │ ├── templates │ ├── NOTES.txt │ ├── _helpers.tpl │ ├── argo-ingress.yaml │ ├── nginx-configmap.yaml │ ├── pctasks-ingress.yaml │ ├── role.yaml │ ├── rolebinding.yaml │ ├── secret.yaml │ └── serviceaccount.yaml │ └── values.yaml ├── conftest.py ├── datasets ├── alos-dem │ └── collection │ │ └── config.json ├── alos-fnf-mosaic │ └── collection │ │ └── config.json ├── alos-palsar-mosaic │ └── collection │ │ └── config.json ├── aster │ ├── Dockerfile │ ├── README.md │ ├── aster.py │ ├── collection │ │ ├── config.json │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── requirements.txt │ ├── scripts │ │ ├── compare_items.py │ │ └── print_partition_paths.py │ ├── tests │ │ ├── __init__.py │ │ ├── data-files │ │ │ └── aster-l1t-subset.parquet │ │ └── test_tasks.py │ ├── update-geometries-ingest.yaml │ └── update-geometries.yaml ├── chesapeake_lulc │ ├── chesapeake_lulc.py │ ├── collection │ │ ├── chesapeake-lc-13 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── chesapeake-lc-7 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ └── chesapeake-lu │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ ├── dataset.yaml │ ├── requirements.txt │ └── tests │ │ └── test_dataset.py ├── chloris-biomass │ └── collection │ │ └── config.json ├── cil-gdpcir │ ├── collection │ │ ├── cil-gdpcir-cc-by-sa │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── cil-gdpcir-cc-by │ │ │ ├── description.md │ │ │ └── template.json │ │ └── cil-gdpcir-cc0 │ │ │ ├── description.md │ │ │ └── template.json │ └── dataset.yaml ├── conus404 │ ├── README.md │ ├── collection │ │ ├── description.md │ │ └── template.json │ └── dataset.yaml ├── cop-dem │ └── collection │ │ ├── cop-dem-glo-30 │ │ └── config.json │ │ └── cop-dem-glo-90 │ │ └── config.json ├── deltaresfloods │ ├── README.md │ ├── collection │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ └── tests │ │ ├── test_dataset.py │ │ └── test_tasks.py ├── drcog-lulc │ └── collection │ │ └── config.json ├── eclipse │ ├── collection │ │ ├── description.md │ │ └── template.json │ └── dataset.yaml ├── ecmwf-forecast │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── ecmwf_forecast.py │ ├── requirements.txt │ ├── streaming.yaml │ └── test_ecmwf_forecast.py ├── era5-pds │ ├── collection │ │ ├── description.md │ │ └── template.json │ └── dataset.yaml ├── esa-cci-lc │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── config.json │ │ ├── esa-cci-lc-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ └── esa-cci-lc │ │ │ ├── description.md │ │ │ └── template.json │ ├── dataset.yaml │ ├── esa_cci_lc.py │ └── requirements.txt ├── esa-worldcover │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── config.json │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── esa_worldcover.py │ ├── requirements.txt │ └── workflows │ │ ├── esa-worldcover-process-items-2020.yaml │ │ └── esa-worldcover-process-items-2021.yaml ├── fws-nwi │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── fws_nwi.py │ ├── requirements.txt │ └── scripts │ │ └── create_collection.py ├── gbif │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── gbif.py │ └── requirements.txt ├── goes │ ├── goes-cmi │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── collection │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── config.json │ │ ├── dataset.yaml │ │ ├── goes_cmi │ │ │ ├── __init__.py │ │ │ ├── goes_cmi.py │ │ │ ├── goes_errors.py │ │ │ └── goes_paths.py │ │ ├── requirements.txt │ │ └── streaming.yaml │ └── goes-glm │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── collection │ │ ├── description.md │ │ └── template.json │ │ ├── dataset.yaml │ │ ├── goes_glm.py │ │ ├── requirements.txt │ │ ├── streaming.yaml │ │ ├── tests │ │ ├── dataset-dev-cluster.yaml │ │ └── test_dataset.py │ │ └── workflows │ │ └── goes-glm-update.yaml ├── hgb │ └── collection │ │ └── config.json ├── hls2 │ ├── README.md │ ├── collection │ │ ├── hls2-l30 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ └── hls2-s30 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ ├── dataset.yaml │ ├── hls2.py │ └── test_hls2.py ├── hrea │ └── collection │ │ └── config.json ├── io-biodiversity │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── config.json │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── io_biodiversity.py │ └── requirements.txt ├── io-land-cover │ ├── collection │ │ ├── io-lulc-9-class │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── io-lulc-annual-v02 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ └── io-lulc │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ ├── dataset.yaml │ ├── io_lulc.py │ └── tests │ │ └── test_dataset.py ├── jrc-gsw │ └── collection │ │ └── config.json ├── landsat │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── landsat-c2-l1 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ └── landsat-c2-l2 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ ├── dataset.yaml │ ├── landsat.py │ ├── requirements.txt │ └── streaming.yaml ├── mobi │ └── collection │ │ └── config.json ├── modis │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── modis-09A1-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-09Q1-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-10A1-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-10A2-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-11A1-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-11A2-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-13A1-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-13Q1-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-14A1-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-14A2-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-15A2H-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-15A3H-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-16A3GF-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-17A2H-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-17A2HGF-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-17A3HGF-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-21A2-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── modis-43A4-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ └── modis-64A1-061 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ ├── dataset.yaml │ ├── misc.py │ ├── modis.py │ ├── requirements.txt │ └── tests │ │ └── test_platform_field.py ├── ms-buildings │ ├── Dockerfile │ ├── collection │ │ ├── config.json │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── ms_buildings.py │ └── requirements.txt ├── mtbs │ └── collection │ │ └── config.json ├── naip │ ├── Dockerfile │ ├── Explore.ipynb │ ├── README.md │ ├── collection │ │ ├── config.json │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── naip.py │ ├── requirements.txt │ └── test_naip.py ├── nasa-nex-gddp-cmip6 │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ └── collection.json │ ├── dataset.yaml │ ├── nasa_nex_gddp_cmip6.py │ └── requirements.txt ├── nasadem │ └── collection │ │ └── config.json ├── noaa-c-cap │ └── collection │ │ └── config.json ├── noaa-cdr │ ├── Dockerfile │ ├── README.md │ ├── collections │ │ ├── ocean-heat-content-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── ocean-heat-content │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sea-ice-concentration │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sea-surface-temperature-optimum-interpolation │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sea-surface-temperature-whoi-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ └── sea-surface-temperature-whoi │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ ├── dataset.yaml │ ├── hang.py │ ├── noaa_cdr.py │ ├── requirements.txt │ ├── scripts │ │ └── create_collections.py │ └── update.yaml ├── noaa-climate-normals │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── noaa-climate-normals-gridded │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── noaa-climate-normals-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ └── noaa-climate-normals-tabular │ │ │ ├── description.md │ │ │ └── template.json │ ├── dataset.yaml │ ├── noaa_climate_normals.py │ ├── requirements.txt │ └── workflows │ │ ├── noaa-climate-normals-tabular_process-items_annualseasonal_1981-2010.yaml │ │ ├── noaa-climate-normals-tabular_process-items_annualseasonal_1991-2020.yaml │ │ ├── noaa-climate-normals-tabular_process-items_annualseasonal_2006-2020.yaml │ │ ├── noaa-climate-normals-tabular_process-items_daily_1981-2010.yaml │ │ ├── noaa-climate-normals-tabular_process-items_daily_1991-2020.yaml │ │ ├── noaa-climate-normals-tabular_process-items_daily_2006-2020.yaml │ │ ├── noaa-climate-normals-tabular_process-items_hourly_1981-2010.yaml │ │ ├── noaa-climate-normals-tabular_process-items_hourly_1991-2020.yaml │ │ ├── noaa-climate-normals-tabular_process-items_hourly_2006-2020.yaml │ │ ├── noaa-climate-normals-tabular_process-items_monthly_1981-2010.yaml │ │ ├── noaa-climate-normals-tabular_process-items_monthly_1991-2020.yaml │ │ └── noaa-climate-normals-tabular_process-items_monthly_2006-2020.yaml ├── noaa-mrms-qpe │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── noaa-mrms-qpe-1h-pass1 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── noaa-mrms-qpe-1h-pass2 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ └── noaa-mrms-qpe-24h-pass2 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ ├── dataset.yaml │ ├── noaa_mrms_qpe.py │ ├── requirements.txt │ ├── tests │ │ ├── dataset-dev-cluster.yaml │ │ └── test_dataset.py │ └── workflows │ │ ├── update-noaa-mrms-qpe-1h-pass1.yaml │ │ ├── update-noaa-mrms-qpe-1h-pass2.yaml │ │ └── update-noaa-mrms-qpe-24h-pass2.yaml ├── noaa_nclimgrid │ ├── collection │ │ ├── noaa-nclimgrid-daily-prelim │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── noaa-nclimgrid-daily-scaled │ │ │ ├── description.md │ │ │ └── template.json │ │ └── noaa-nclimgrid-monthly │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ ├── dataset.yaml │ ├── noaa_nclimgrid.py │ ├── requirements.txt │ └── tests │ │ ├── conftest.py │ │ └── test_dataset.py ├── nrcan-landcover │ └── collection │ │ └── config.json ├── sentinel-1-grd │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── config.json │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── requirements.txt │ ├── s1grd.py │ ├── test-data │ │ └── sentinel-1-grd-item-raw.json │ └── test_s1grd.py ├── sentinel-1-rtc │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── config.json │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── requirements.txt │ └── s1rtc.py ├── sentinel-2 │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── config.json │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── requirements.txt │ └── sentinel2.py ├── sentinel-3 │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── sentinel-3-olci-lfr-l2-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sentinel-3-olci-wfr-l2-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sentinel-3-slstr-frp-l2-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sentinel-3-slstr-lst-l2-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sentinel-3-slstr-wst-l2-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sentinel-3-sral-lan-l2-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sentinel-3-sral-wat-l2-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sentinel-3-synergy-aod-l2-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sentinel-3-synergy-syn-l2-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sentinel-3-synergy-v10-l2-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── sentinel-3-synergy-vg1-l2-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ │ └── sentinel-3-synergy-vgp-l2-netcdf │ │ │ ├── description.md │ │ │ └── template.json │ ├── dataset.yaml │ ├── requirements.txt │ ├── sentinel_3.py │ └── tests │ │ └── test_sentinel_3.py ├── sentinel-5p │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── requirements.txt │ ├── sentinel_5p.py │ └── test_sentinel_5p.py ├── stac-geoparquet │ ├── Dockerfile │ ├── README.md │ ├── pc_stac_geoparquet.py │ ├── requirements.txt │ ├── workflow.yaml │ └── workflow_test.yaml ├── terraclimate │ ├── collection │ │ ├── description.md │ │ └── template.json │ └── dataset.yaml ├── usda-cdl │ ├── Dockerfile │ ├── README.md │ ├── collection │ │ ├── config.json │ │ ├── description.md │ │ └── template.json │ ├── dataset.yaml │ ├── requirements.txt │ ├── scripts │ │ └── create_collection.py │ ├── tile.yaml │ └── usda_cdl.py ├── usgs-gap │ └── collection │ │ └── config.json ├── usgs-lcmap │ ├── Dockerfile │ ├── collection │ │ ├── usgs-lcmap-conus-v13 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ │ └── usgs-lcmap-hawaii-v10 │ │ │ ├── config.json │ │ │ ├── description.md │ │ │ └── template.json │ ├── dataset.yaml │ ├── fix_items │ │ ├── README.md │ │ ├── fix_items.py │ │ ├── fix_items.yaml │ │ └── tests │ │ │ ├── __init__.py │ │ │ ├── data-files │ │ │ └── items.ndjson │ │ │ └── test_tasks.py │ ├── requirements.txt │ └── usgs_lcmap.py └── usgs-lidar │ ├── README.md │ ├── collection │ ├── 3dep-lidar-classification │ │ └── config.json │ ├── 3dep-lidar-dsm │ │ └── config.json │ ├── 3dep-lidar-dtm-native │ │ └── config.json │ ├── 3dep-lidar-dtm │ │ └── config.json │ ├── 3dep-lidar-hag │ │ └── config.json │ ├── 3dep-lidar-intensity │ │ └── config.json │ ├── 3dep-lidar-pointsourceid │ │ └── config.json │ ├── 3dep-lidar-returns │ │ └── config.json │ └── 3dep-seamless │ │ └── config.json │ ├── concatenate_csvs.py │ ├── lidar_audit.py │ ├── requirements.txt │ └── workflow.yaml ├── deployment ├── .gitignore ├── Dockerfile ├── bin │ ├── azlogin │ ├── deploy │ ├── get_tfvars │ ├── jinja │ ├── kv_add_ip │ ├── kv_rmv_ip │ ├── lib │ ├── nginx-values.yaml │ ├── publish_pkgs │ ├── setup_storage.py │ ├── update_pkg_index │ └── write_tfvars ├── cosmosdb │ ├── README.md │ └── scripts │ │ ├── stored_procs │ │ └── workflow-runs │ │ │ └── bulkput-workflowruns.js │ │ └── triggers │ │ ├── workflow-runs │ │ └── post-all-workflowruns.js │ │ └── workflows │ │ └── post-all-workflows.js ├── docker-compose.yml ├── helm │ ├── argo-values.yaml │ ├── deploy-values.template.yaml │ ├── pc-tasks-ingress │ │ ├── Chart.yaml │ │ ├── templates │ │ │ ├── NOTES.txt │ │ │ ├── _helpers.tpl │ │ │ ├── ingress.yaml │ │ │ ├── nginx-configmap.yaml │ │ │ ├── secret-provider.yaml │ │ │ └── secret.yaml │ │ └── values.yaml │ ├── published │ │ └── pctasks-server │ │ │ ├── Chart.yaml │ │ │ ├── templates │ │ │ ├── NOTES.txt │ │ │ ├── _helpers.tpl │ │ │ ├── deployment.yaml │ │ │ ├── service.yaml │ │ │ └── serviceaccount.yaml │ │ │ └── values.yaml │ └── vendored │ │ ├── argo-workflows-0.41.8.tgz │ │ ├── ingress-nginx-4.8.3.tgz │ │ └── keda-2.14.2.tgz ├── manual │ ├── backend-app.manifest.json │ └── frontend-app.manifest.json ├── requirements.txt └── terraform │ ├── batch_pool │ ├── main.tf │ ├── providers.tf │ └── variables.tf │ ├── resources │ ├── acr.tf │ ├── aks.tf │ ├── apim.tf │ ├── app_insights.tf │ ├── batch.tf │ ├── cosmosdb.tf │ ├── function.tf │ ├── ip.tf │ ├── keyvault.tf │ ├── output.tf │ ├── providers.tf │ ├── rg.tf │ ├── storage_account.tf │ ├── values.tfvars.template │ ├── variables.tf │ └── vnet.tf │ └── staging │ ├── backend.tf │ ├── env.sh │ ├── main.tf │ ├── output.tf │ ├── pools.tf │ └── variables.tf ├── dev-secrets.template.yaml ├── dev ├── nginx │ ├── Dockerfile │ ├── README.md │ └── etc │ │ └── nginx │ │ ├── conf.d │ │ └── default.conf │ │ └── nginx.conf └── stacapi.py ├── docker-compose.aux.yml ├── docker-compose.console.yml ├── docker-compose.cosmosdb.yml ├── docker-compose.tasks.yml ├── docker-compose.yml ├── docs ├── Makefile ├── _static │ ├── .gitignore │ └── low-latency-workflows.svg ├── conf.py ├── development │ ├── deploying.md │ ├── faq.md │ ├── index.md │ └── setup.md ├── getting_started │ ├── creating_a_dataset.md │ ├── dev_workflows.md │ ├── index.md │ └── telemetry.md ├── index.md ├── make.bat ├── reference │ ├── api.md │ └── index.md └── user_guide │ ├── chunking.md │ ├── index.md │ ├── runtime.md │ ├── settings.md │ ├── storage.md │ ├── streaming.md │ ├── templating.md │ └── workflows.md ├── examples ├── list-logs.yaml ├── run-etl.yaml ├── streaming-workflow.yaml ├── test_workflow.yaml ├── test_workflow_naip.yaml └── workflow.yaml ├── ingest-collection.yaml ├── mypy.ini ├── pctasks ├── .dockerignore ├── cli │ ├── README.md │ ├── pctasks │ │ └── cli │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ └── version.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ │ ├── __init__.py │ │ └── test_cli.py ├── client │ ├── README.md │ ├── pctasks │ │ └── client │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ ├── constants.py │ │ │ ├── context.py │ │ │ ├── errors.py │ │ │ ├── profile │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ └── commands.py │ │ │ ├── py.typed │ │ │ ├── runs │ │ │ ├── __init__.py │ │ │ ├── _get.py │ │ │ ├── _list.py │ │ │ ├── _status.py │ │ │ ├── cli.py │ │ │ ├── get.py │ │ │ ├── list.py │ │ │ ├── options.py │ │ │ ├── status.py │ │ │ └── utils.py │ │ │ ├── settings.py │ │ │ ├── storage │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ └── commands.py │ │ │ ├── utils.py │ │ │ ├── version.py │ │ │ └── workflow │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ ├── commands.py │ │ │ ├── options.py │ │ │ └── template.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ │ ├── __init__.py │ │ ├── data-files │ │ ├── mycode.py │ │ └── test_collection.json │ │ ├── records │ │ ├── __init__.py │ │ └── test_records.py │ │ ├── test_client.py │ │ ├── test_storage.py │ │ └── test_template.py ├── core │ ├── README.md │ ├── pctasks │ │ └── core │ │ │ ├── __init__.py │ │ │ ├── _compat.py │ │ │ ├── activity.py │ │ │ ├── cli.py │ │ │ ├── constants.py │ │ │ ├── context.py │ │ │ ├── cosmos │ │ │ ├── __init__.py │ │ │ ├── container.py │ │ │ ├── containers │ │ │ │ ├── __init__.py │ │ │ │ ├── items.py │ │ │ │ ├── process_item_errors.py │ │ │ │ ├── records.py │ │ │ │ ├── storage_events.py │ │ │ │ ├── workflow_runs.py │ │ │ │ └── workflows.py │ │ │ ├── database.py │ │ │ ├── page.py │ │ │ └── settings.py │ │ │ ├── importer.py │ │ │ ├── logging.py │ │ │ ├── message_handler.py │ │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── activity.py │ │ │ ├── base.py │ │ │ ├── config.py │ │ │ ├── event.py │ │ │ ├── item.py │ │ │ ├── record.py │ │ │ ├── registration.py │ │ │ ├── response.py │ │ │ ├── run.py │ │ │ ├── task.py │ │ │ ├── tokens.py │ │ │ ├── utils.py │ │ │ └── workflow.py │ │ │ ├── py.typed │ │ │ ├── queues.py │ │ │ ├── settings.py │ │ │ ├── storage │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── blob.py │ │ │ ├── errors.py │ │ │ ├── local.py │ │ │ └── path_filter.py │ │ │ ├── tables │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── config.py │ │ │ ├── registration.py │ │ │ ├── task.py │ │ │ └── utils.py │ │ │ ├── tokens.py │ │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── backoff.py │ │ │ ├── stac.py │ │ │ ├── summary.py │ │ │ └── template.py │ │ │ ├── version.py │ │ │ └── yaml.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ │ ├── __init__.py │ │ ├── cosmos │ │ ├── __init__.py │ │ ├── containers │ │ │ ├── __init__.py │ │ │ ├── test_workflow_runs.py │ │ │ └── test_workflows.py │ │ └── test_container.py │ │ ├── data-files │ │ ├── example_module │ │ │ ├── __init__.py │ │ │ ├── a.py │ │ │ └── b.py │ │ ├── items │ │ │ ├── io-lulc-item.json │ │ │ ├── naip │ │ │ │ ├── naip1.json │ │ │ │ └── naip2.json │ │ │ ├── s1-grd.json │ │ │ └── s1-rtc │ │ │ │ └── 2019 │ │ │ │ └── 12 │ │ │ │ └── 15 │ │ │ │ └── IW │ │ │ │ ├── DH │ │ │ │ └── S1A_IW_GRDH_1SDH_20191215T034818_20191215T034847_030353_0378EA_rtc.json │ │ │ │ └── DV │ │ │ │ └── S1A_IW_GRDH_1SDV_20191215T003835_20191215T003904_030352_0378DC_rtc.json │ │ ├── planet-nicfi-analytic.json │ │ └── simple-assets │ │ │ ├── a │ │ │ ├── asset-a-1.json │ │ │ └── asset-a-2.json │ │ │ └── b │ │ │ ├── asset-b-1.json │ │ │ └── asset-b-2.json │ │ ├── models │ │ ├── __init__.py │ │ ├── test_item.py │ │ ├── test_storage_event.py │ │ ├── test_task.py │ │ └── test_workflow.py │ │ ├── storage │ │ ├── __init__.py │ │ ├── test_blob.py │ │ ├── test_importer.py │ │ └── test_local.py │ │ ├── tables │ │ ├── __init__.py │ │ ├── test_config.py │ │ └── test_record.py │ │ ├── test_activity.py │ │ ├── test_messages.py │ │ ├── test_yaml.py │ │ └── utils │ │ ├── __init__.py │ │ ├── test_backoff.py │ │ ├── test_summary.py │ │ ├── test_template.py │ │ └── test_utils.py ├── dataset │ ├── README.md │ ├── pctasks │ │ └── dataset │ │ │ ├── __init__.py │ │ │ ├── _cli.py │ │ │ ├── chunks │ │ │ ├── __init__.py │ │ │ ├── chunkset.py │ │ │ ├── constants.py │ │ │ ├── models.py │ │ │ └── task.py │ │ │ ├── cli.py │ │ │ ├── collection.py │ │ │ ├── constants.py │ │ │ ├── items │ │ │ ├── __init__.py │ │ │ ├── constants.py │ │ │ ├── models.py │ │ │ └── task.py │ │ │ ├── models.py │ │ │ ├── py.typed │ │ │ ├── splits │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ ├── constants.py │ │ │ ├── models.py │ │ │ └── task.py │ │ │ ├── streaming.py │ │ │ ├── template.py │ │ │ ├── utils.py │ │ │ ├── validate.py │ │ │ ├── version.py │ │ │ └── workflow.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ │ ├── __init__.py │ │ ├── chunks │ │ ├── __init__.py │ │ └── test_task.py │ │ ├── data-files │ │ ├── datasets │ │ │ ├── mycode.py │ │ │ ├── naip.yaml │ │ │ └── test-dataset.yaml │ │ ├── simple-assets │ │ │ ├── a │ │ │ │ ├── asset-a-1.json │ │ │ │ └── asset-a-2.json │ │ │ └── b │ │ │ │ ├── asset-b-1.json │ │ │ │ └── asset-b-2.json │ │ ├── storage-event.json │ │ └── test-assets │ │ │ ├── one.txt │ │ │ ├── three.txt │ │ │ └── two.txt │ │ ├── items │ │ ├── __init__.py │ │ └── test_task.py │ │ ├── test_dataset.py │ │ ├── test_streaming_create_items.py │ │ └── test_validate_collection.py ├── dev │ ├── README.md │ ├── pctasks │ │ └── dev │ │ │ ├── __init__.py │ │ │ ├── azurite.py │ │ │ ├── blob.py │ │ │ ├── cli.py │ │ │ ├── config.py │ │ │ ├── constants.py │ │ │ ├── cosmosdb.py │ │ │ ├── db.py │ │ │ ├── env.py │ │ │ ├── k8s.py │ │ │ ├── local_dev_endpoints.py │ │ │ ├── logs.py │ │ │ ├── mocks.py │ │ │ ├── py.typed │ │ │ ├── queues.py │ │ │ ├── secrets.py │ │ │ ├── settings.py │ │ │ ├── tables.py │ │ │ ├── task.py │ │ │ ├── test_utils.py │ │ │ └── version.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ │ └── test_temp_queue.py ├── ingest │ ├── README.md │ ├── pctasks │ │ └── ingest │ │ │ ├── __init__.py │ │ │ ├── _cli.py │ │ │ ├── cli.py │ │ │ ├── constants.py │ │ │ ├── models.py │ │ │ ├── py.typed │ │ │ ├── settings.py │ │ │ ├── utils.py │ │ │ └── version.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ │ ├── __init__.py │ │ ├── data-files │ │ ├── goes-collection-workflow.yaml │ │ └── test_collection.json │ │ ├── test_collection.py │ │ └── test_settings.py ├── ingest_task │ ├── Dockerfile │ ├── README.md │ ├── pctasks │ │ └── ingest_task │ │ │ ├── __init__.py │ │ │ ├── collection.py │ │ │ ├── items.py │ │ │ ├── pgstac.py │ │ │ ├── py.typed │ │ │ ├── streaming.py │ │ │ ├── task.py │ │ │ ├── utils.py │ │ │ └── version.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── data-files │ │ ├── era5-pds │ │ │ ├── description.md │ │ │ └── template.json │ │ ├── items │ │ │ ├── item1.json │ │ │ └── items.ndjson │ │ └── test_collection.json │ │ ├── items_document.json │ │ ├── test_collection.py │ │ ├── test_items.py │ │ └── test_streaming_ingest.py ├── notify │ ├── README.md │ ├── pctasks │ │ └── notify │ │ │ ├── __init__.py │ │ │ ├── activities.py │ │ │ ├── models.py │ │ │ ├── py.typed │ │ │ ├── settings.py │ │ │ └── version.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ │ ├── __init__.py │ │ └── test_process.py ├── router │ ├── README.md │ ├── pctasks │ │ └── router │ │ │ ├── __init__.py │ │ │ ├── handlers │ │ │ ├── __init__.py │ │ │ ├── eventgrid.py │ │ │ └── forward.py │ │ │ ├── message_handler.py │ │ │ ├── py.typed │ │ │ ├── settings.py │ │ │ └── version.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ │ ├── __init__.py │ │ ├── test_process.py │ │ └── test_settings.py ├── run │ ├── Dockerfile │ ├── README.md │ ├── pctasks │ │ └── run │ │ │ ├── __init__.py │ │ │ ├── _cli.py │ │ │ ├── argo │ │ │ ├── __init__.py │ │ │ └── client.py │ │ │ ├── batch │ │ │ ├── __init__.py │ │ │ ├── client.py │ │ │ ├── model.py │ │ │ ├── task.py │ │ │ └── utils.py │ │ │ ├── cli.py │ │ │ ├── constants.py │ │ │ ├── dag.py │ │ │ ├── errors.py │ │ │ ├── models.py │ │ │ ├── py.typed │ │ │ ├── secrets │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── keyvault.py │ │ │ └── local.py │ │ │ ├── settings.py │ │ │ ├── task │ │ │ ├── __init__.py │ │ │ ├── argo.py │ │ │ ├── base.py │ │ │ ├── batch.py │ │ │ ├── local.py │ │ │ └── prepare.py │ │ │ ├── template.py │ │ │ ├── utils.py │ │ │ ├── version.py │ │ │ └── workflow │ │ │ ├── __init__.py │ │ │ ├── argo.py │ │ │ ├── base.py │ │ │ ├── executor │ │ │ ├── __init__.py │ │ │ ├── models.py │ │ │ ├── remote.py │ │ │ ├── simple.py │ │ │ └── streaming.py │ │ │ ├── kubernetes.py │ │ │ └── local.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ │ ├── __init__.py │ │ ├── batch │ │ └── test_utils.py │ │ ├── data-files │ │ └── workflows │ │ │ └── test_remote1.yaml │ │ ├── secrets │ │ ├── __init__.py │ │ └── test_base.py │ │ ├── test_dag.py │ │ ├── test_messages.py │ │ ├── test_settings.py │ │ ├── test_template.py │ │ └── workflow │ │ ├── __init__.py │ │ ├── test_kubernetes.py │ │ └── test_remote.py ├── server │ ├── Dockerfile │ ├── README.md │ ├── pctasks │ │ └── server │ │ │ ├── __init__.py │ │ │ ├── constants.py │ │ │ ├── dependencies.py │ │ │ ├── logging.py │ │ │ ├── main.py │ │ │ ├── middleware.py │ │ │ ├── py.typed │ │ │ ├── request.py │ │ │ ├── routes │ │ │ ├── __init__.py │ │ │ ├── code.py │ │ │ ├── runs.py │ │ │ └── workflows.py │ │ │ ├── settings.py │ │ │ └── version.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── routes │ │ └── test_run.py │ │ └── test_request.py └── task │ ├── README.md │ ├── pctasks │ └── task │ │ ├── __init__.py │ │ ├── _cli.py │ │ ├── cli.py │ │ ├── common │ │ ├── __init__.py │ │ ├── list_files.py │ │ ├── list_prefixes.py │ │ ├── summarize.py │ │ └── write.py │ │ ├── constants.py │ │ ├── context.py │ │ ├── py.typed │ │ ├── run.py │ │ ├── settings.py │ │ ├── streaming.py │ │ ├── task.py │ │ ├── utils.py │ │ └── version.py │ ├── pyproject.toml │ ├── requirements.txt │ └── tests │ ├── __init__.py │ ├── common │ ├── __init__.py │ ├── test_list_files.py │ ├── test_list_prefixes.py │ ├── test_summarize.py │ └── test_write.py │ ├── data-files │ ├── items │ │ └── s1-rtc │ │ │ └── 2019 │ │ │ └── 12 │ │ │ └── 15 │ │ │ └── IW │ │ │ ├── DH │ │ │ ├── S1A_IW_GRDH_1SDH_20191215T034818_20191215T034847_030353_0378EA_rtc.json │ │ │ ├── S1A_IW_GRDH_1SDH_20191215T105713_20191215T105738_030358_037912_rtc.json │ │ │ ├── S1A_IW_GRDH_1SDH_20191215T105738_20191215T105803_030358_037912_rtc.json │ │ │ └── S1A_IW_GRDH_1SDH_20191215T105803_20191215T105828_030358_037912_rtc.json │ │ │ └── DV │ │ │ ├── S1A_IW_GRDH_1SDV_20191215T003835_20191215T003904_030352_0378DC_rtc.json │ │ │ ├── S1A_IW_GRDH_1SDV_20191215T004249_20191215T004314_030352_0378DC_rtc.json │ │ │ ├── S1A_IW_GRDH_1SDV_20191215T004314_20191215T004339_030352_0378DC_rtc.json │ │ │ └── S1A_IW_GRDH_1SDV_20191215T004339_20191215T004404_030352_0378DC_rtc.json │ └── test-files │ │ ├── a │ │ ├── three.txt │ │ └── two.txt │ │ ├── b │ │ ├── c │ │ │ ├── five.txt │ │ │ └── six.txt │ │ └── four.txt │ │ └── one.txt │ ├── test_cli.py │ └── test_utils.py ├── pctasks_frontend ├── .dockerignore ├── .env.example ├── .gitignore ├── .prettierrc ├── .storybook │ ├── main.js │ └── preview.js ├── README.md ├── package-lock.json ├── package.json ├── public │ ├── index.html │ └── robots.txt ├── src │ ├── App.test.tsx │ ├── App.tsx │ ├── components │ │ ├── auth │ │ │ ├── AuthPage │ │ │ │ └── AuthPage.index.tsx │ │ │ ├── hooks │ │ │ │ ├── useApiClient.ts │ │ │ │ └── useMsalToken.ts │ │ │ ├── index.ts │ │ │ └── login │ │ │ │ ├── PcPersona.tsx │ │ │ │ ├── SignInButton.tsx │ │ │ │ ├── UserHeaderControl.tsx │ │ │ │ └── index.ts │ │ ├── common │ │ │ ├── RunItem │ │ │ │ └── RunItem.index.tsx │ │ │ ├── RunTimes │ │ │ │ ├── RunDuration.tsx │ │ │ │ ├── RunStarted.tsx │ │ │ │ ├── RunTimeBlock.tsx │ │ │ │ └── RunTimeEntry.tsx │ │ │ ├── StatusIcon │ │ │ │ ├── StatusIcon.index.tsx │ │ │ │ ├── __tests__ │ │ │ │ │ └── StatusIcon.stories.tsx │ │ │ │ └── index.ts │ │ │ ├── TextOutput │ │ │ │ └── TextOutput.index.tsx │ │ │ └── hooks │ │ │ │ ├── index.ts │ │ │ │ ├── useExpandButton.tsx │ │ │ │ └── usePageTitle.tsx │ │ ├── jobs │ │ │ ├── JobRunItem │ │ │ │ ├── JobRunItem.index.tsx │ │ │ │ └── __tests__ │ │ │ │ │ ├── JobRunItem.stories.tsx │ │ │ │ │ ├── data.ts │ │ │ │ │ ├── workflow-job-definitions.json │ │ │ │ │ └── workflow-job-runs.json │ │ │ ├── JobRunList │ │ │ │ └── JobRunList.index.tsx │ │ │ ├── JobRunWithSubJobs │ │ │ │ ├── JobRunWithSubJobs.index.tsx │ │ │ │ └── __tests__ │ │ │ │ │ ├── data.ts │ │ │ │ │ ├── jobWithSubJobs.json │ │ │ │ │ └── subJobRuns.json │ │ │ ├── JobRunWithTasks │ │ │ │ ├── JobRunWithTasks.index.tsx │ │ │ │ └── __tests__ │ │ │ │ │ ├── JobRunWithTasks.stories.tsx │ │ │ │ │ └── data.ts │ │ │ ├── JobStatusFilter │ │ │ │ └── JobStatusFilter.index.tsx │ │ │ ├── ParentJobRunItem │ │ │ │ └── ParentJobRunItem.index.tsx │ │ │ ├── hooks │ │ │ │ └── useSubJobFilter.tsx │ │ │ └── index.ts │ │ ├── layout │ │ │ ├── Header.tsx │ │ │ └── index.ts │ │ ├── tasks │ │ │ ├── TaskRunItem │ │ │ │ ├── TaskRunItem.index.tsx │ │ │ │ ├── __tests__ │ │ │ │ │ ├── TaskRunItem.stories.tsx │ │ │ │ │ └── data.ts │ │ │ │ └── index.ts │ │ │ ├── TaskRunList │ │ │ │ ├── TaskRunList.index.tsx │ │ │ │ ├── __tests__ │ │ │ │ │ ├── TaskRunList.stories.tsx │ │ │ │ │ └── data.ts │ │ │ │ └── index.ts │ │ │ └── index.ts │ │ └── workflows │ │ │ ├── WorkflowRunHeader │ │ │ ├── WorkflowRunHeader.index.tsx │ │ │ └── __tests__ │ │ │ │ └── workflow-detail.json │ │ │ ├── WorkflowRunItem │ │ │ ├── WorkflowRunItem.index.tsx │ │ │ ├── WorkflowRunItemErrors.tsx │ │ │ └── __tests__ │ │ │ │ ├── WorkflowRunItem.stories.tsx │ │ │ │ └── data.ts │ │ │ ├── WorkflowRunList │ │ │ ├── WorkflowRunList.index.tsx │ │ │ └── __tests__ │ │ │ │ ├── WorkflowRunList.stories.tsx │ │ │ │ ├── data.ts │ │ │ │ └── workflow-runs.json │ │ │ └── index.ts │ ├── global.d.ts │ ├── helpers │ │ ├── api.ts │ │ ├── auth.ts │ │ ├── constants.ts │ │ ├── job-create-splits.json │ │ ├── jobs.ts │ │ ├── logs-create-splits.json │ │ ├── task-create-splits.json │ │ ├── tasks.ts │ │ ├── time.ts │ │ ├── utils.ts │ │ └── workflows.ts │ ├── index.css │ ├── index.tsx │ ├── pages │ │ ├── Home │ │ │ └── Home.index.tsx │ │ ├── WorkflowDetail │ │ │ └── WorkflowDetail.index.tsx │ │ ├── Workflows │ │ │ └── Workflows.index.tsx │ │ └── index.ts │ ├── react-app-env.d.ts │ ├── reportWebVitals.ts │ ├── setupTests.ts │ ├── state │ │ └── SelectionProvider.tsx │ ├── styles │ │ └── global.ts │ └── types │ │ ├── enums.ts │ │ └── index.ts └── tsconfig.json ├── pctasks_funcs ├── .dockerignore ├── .funcignore ├── .gitignore ├── Dockerfile ├── PublishItemsCF │ ├── __init__.py │ └── function.json ├── StorageEventsCF │ ├── __init__.py │ └── function.json ├── StorageEventsQueue │ ├── __init__.py │ └── function.json ├── WorkflowRunsCF │ ├── __init__.py │ └── function.json ├── WorkflowsCF │ ├── __init__.py │ └── function.json ├── host.json ├── pctasks_funcs_base │ └── __init__.py ├── requirements-deploy.txt ├── requirements.txt ├── start.sh └── tests │ ├── items_document.json │ ├── stac_item_record.json │ ├── storage_event.json │ ├── test_publish_items.py │ └── test_storage_events.py ├── pytest.ini ├── requirements-dev.txt ├── requirements-task-base.txt ├── scripts ├── bin │ ├── format │ ├── pctasks-pip-compile │ ├── test │ └── test-integration ├── build ├── ciauthenticate ├── cideploy ├── cipublish-pkgs ├── citest-integration ├── cluster ├── console ├── env ├── format ├── generate-requirements ├── install ├── publish ├── server ├── setup ├── test ├── test-integration ├── update └── validate-collections ├── tests ├── __init__.py ├── constants.py ├── data-files │ ├── assets │ │ ├── a │ │ │ └── asset-a-1.json │ │ ├── b │ │ │ ├── b_1 │ │ │ │ └── asset-b_1-1.json │ │ │ └── b_2 │ │ │ │ └── asset-b_2-1.json │ │ └── c │ │ │ ├── c_1 │ │ │ ├── c_1_1 │ │ │ │ ├── asset-c_1_1-1.json │ │ │ │ └── asset-c_1_1-2.json │ │ │ └── c_1_2 │ │ │ │ └── asset-c_1-2.json │ │ │ └── c_2 │ │ │ └── c_2_1 │ │ │ ├── asset-c_2_1-1.json │ │ │ └── asset-c_2_1-2.json │ ├── collection.json │ ├── collection_template │ │ ├── description.md │ │ └── template.json │ ├── modis │ │ ├── collection.json │ │ └── items.ndjson │ └── simple-assets │ │ ├── a │ │ ├── asset-a-1.json │ │ └── asset-a-2.json │ │ └── b │ │ ├── asset-b-1.json │ │ └── asset-b-2.json ├── dataset │ ├── __init__.py │ ├── collection.py │ ├── dataset.yaml │ ├── streaming-create-items.yaml │ ├── streaming-ingest.yaml │ └── test_dataset.py ├── ingest │ ├── test_collection_ingest.py │ └── test_modis_ingest.py ├── tasks.py ├── test_foreach.py ├── test_invalid_image.py └── workflows │ ├── ingest-collection.yaml │ └── test-foreach.yaml ├── workflow.yaml └── workflows └── streaming-ingest.yaml /.dockerignore: -------------------------------------------------------------------------------- 1 | **/.envrc 2 | **/.direnv 3 | **/__pycache__ 4 | **/.mypy_cache 5 | **/.pytest_cache 6 | **/.terraform 7 | **/node_modules 8 | **/.terraform 9 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | extend-ignore = E203, W503, E731, E722 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for the Microsoft Planetary Computer 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. Ex. I would like to use stac to do [...] 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. -------------------------------------------------------------------------------- /.github/workflows/publish-charts.yml: -------------------------------------------------------------------------------- 1 | name: Publish charts (release) 2 | 3 | on: 4 | push: 5 | tags: ["*"] 6 | workflow_dispatch: 7 | 8 | defaults: 9 | run: 10 | shell: bash 11 | 12 | jobs: 13 | build: 14 | permissions: 15 | contents: write 16 | runs-on: ubuntu-20.04 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | 21 | - name: Get tag 22 | id: previoustag 23 | uses: "WyriHaximus/github-action-get-previous-tag@v1" 24 | 25 | - name: Publish Helm charts 26 | uses: stefanprodan/helm-gh-pages@master 27 | with: 28 | token: ${{ secrets.GITHUB_TOKEN }} 29 | charts_dir: "deployment/helm/published" 30 | linting: "off" 31 | helm_version: 3.5.4 32 | chart_version: ${{steps.previoustag.outputs.tag}} -------------------------------------------------------------------------------- /.github/workflows/publish-func-package-dev.yml: -------------------------------------------------------------------------------- 1 | name: Publish function package (dev) 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | workflow_dispatch: 7 | 8 | defaults: 9 | run: 10 | shell: bash 11 | 12 | jobs: 13 | publish: 14 | runs-on: ubuntu-20.04 15 | permissions: 16 | contents: write 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | with: 21 | fetch-depth: 0 22 | 23 | - name: "Get Previous tag" 24 | id: previoustag 25 | uses: "WyriHaximus/github-action-get-previous-tag@v1" 26 | with: 27 | fallback: 2022.2.0 28 | 29 | - name: "Get next minor version" 30 | id: semvers 31 | uses: "WyriHaximus/github-action-next-semvers@v1" 32 | with: 33 | version: ${{ steps.previoustag.outputs.tag }} 34 | 35 | - name: "Publish package" 36 | env: 37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 38 | run: ./scripts/cipublish-pkgs -t "${{ steps.semvers.outputs.minor }}-dev" 39 | -------------------------------------------------------------------------------- /.github/workflows/publish-func-package.yml: -------------------------------------------------------------------------------- 1 | name: Publish function package (release) 2 | 3 | on: 4 | push: 5 | tags: ["*"] 6 | workflow_dispatch: 7 | 8 | defaults: 9 | run: 10 | shell: bash 11 | 12 | jobs: 13 | publish: 14 | runs-on: ubuntu-20.04 15 | permissions: 16 | contents: write 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | with: 21 | fetch-depth: 0 22 | 23 | - name: "Get tag" 24 | id: previoustag 25 | uses: "WyriHaximus/github-action-get-previous-tag@v1" 26 | with: 27 | fallback: 2022.2.0 28 | 29 | - name: "Publish package" 30 | env: 31 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 32 | run: ./scripts/cipublish-pkgs -t "${{ steps.previoustag.outputs.tag }}" 33 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | profile = black 3 | multi_line_output = 3 4 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /Dockerfile.stacapi: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim 2 | 3 | 4 | ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt 5 | 6 | ENV PATH=$PATH:/install/bin 7 | 8 | RUN pip install stac_fastapi.api==2.4.1 stac_fastapi.pgstac==2.4.1 uvicorn==0.17.6 9 | 10 | RUN mkdir -p /opt/src 11 | COPY dev/stacapi.py /opt/src/stacapi.py 12 | WORKDIR /opt/src 13 | -------------------------------------------------------------------------------- /Dockerfile.stacbrowser: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/cbl-mariner/base/nodejs:16 2 | 3 | RUN tdnf install -y git 4 | 5 | RUN mkdir -p /opt/src 6 | WORKDIR /opt/src 7 | RUN git clone https://github.com/radiantearth/stac-browser 8 | WORKDIR /opt/src/stac-browser 9 | RUN git checkout v3.0.0-beta.1 10 | RUN npm install 11 | RUN npm install http-server -g 12 | RUN npm run build -- --catalogUrl="http://localhost:8513/" 13 | WORKDIR /opt/src/stac-browser/dist 14 | 15 | CMD ["http-server", "-p", "8080", "."] 16 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Support 2 | 3 | ## How to file issues and get help 4 | 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 7 | feature request as a new Issue. 8 | 9 | For help and questions about using this project, please use the [Planetary Computer Discussions](https://github.com/microsoft/PlanetaryComputer/discussions) page. 10 | 11 | ## Microsoft Support Policy 12 | 13 | Support for this project is limited to the resources listed above. 14 | -------------------------------------------------------------------------------- /cluster/README.md: -------------------------------------------------------------------------------- 1 | # Cluster configuration 2 | 3 | Configuration for the Kubernetes cluster for local development -------------------------------------------------------------------------------- /cluster/argo-values.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | baseHref: /argo/ 3 | secure: false 4 | extraArgs: 5 | - --auth-mode=server -------------------------------------------------------------------------------- /cluster/pctasks-dev/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: pctasks-dev-ingress 3 | description: A Helm chart for setting up the development cluster 4 | type: application 5 | version: 0.1.0 6 | appVersion: 0.1.0 7 | -------------------------------------------------------------------------------- /cluster/pctasks-dev/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Application information: 2 | {{ include "pcdev.selectorLabels" . }} 3 | Ingress host: {{ .Values.pcdev.pctasks_ingress.host }} 4 | Service Fullname: {{ include "pcdev.fullname" . }} -------------------------------------------------------------------------------- /cluster/pctasks-dev/templates/nginx-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | annotations: 5 | labels: 6 | app: pc-apis-ingress 7 | name: nginx-configuration 8 | namespace: {{ .Values.namespace }} 9 | data: 10 | use-forwarded-headers: "true" 11 | enable-real-ip: "true" -------------------------------------------------------------------------------- /cluster/pctasks-dev/templates/role.yaml: -------------------------------------------------------------------------------- 1 | kind: Role 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | metadata: 4 | name: pctasks 5 | rules: 6 | - verbs: 7 | - get 8 | - list 9 | - watch 10 | - update 11 | - create 12 | - patch 13 | - delete 14 | apiGroups: 15 | - argoproj.io 16 | resources: 17 | - workflows 18 | -------------------------------------------------------------------------------- /cluster/pctasks-dev/templates/rolebinding.yaml: -------------------------------------------------------------------------------- 1 | kind: RoleBinding 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | metadata: 4 | name: argo-workflows-manual-rolebinding 5 | namespace: pc 6 | subjects: 7 | - kind: ServiceAccount 8 | name: pctasks-sa 9 | namespace: {{ .Release.Namespace }} 10 | roleRef: 11 | kind: Role 12 | name: argo-workflows-manual-role 13 | apiGroup: rbac.authorization.k8s.io 14 | -------------------------------------------------------------------------------- /cluster/pctasks-dev/templates/secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: pctasks-sa-token 5 | namespace: pc 6 | annotations: 7 | kubernetes.io/service-account.name: pctasks-sa 8 | type: kubernetes.io/service-account-token -------------------------------------------------------------------------------- /cluster/pctasks-dev/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: pctasks-sa 5 | namespace: pc -------------------------------------------------------------------------------- /cluster/pctasks-dev/values.yaml: -------------------------------------------------------------------------------- 1 | environment: "staging" 2 | namespace: "default" 3 | 4 | pcingress: 5 | services: 6 | pctasks: 7 | path: "" 8 | name: "" 9 | port: "" 10 | 11 | nameOverride: "" 12 | fullnameOverride: "" 13 | -------------------------------------------------------------------------------- /datasets/aster/README.md: -------------------------------------------------------------------------------- 1 | # planetary-computer-tasks dataset: aster 2 | 3 | For now, this dataset exists only to update existing ASTER items with new geometries, using [stactools's footprint capabilities](https://stactools.readthedocs.io/en/stable/footprint.html). 4 | See [update-geometries.yaml](./update-geometries.yaml) for the workflow. 5 | 6 | ## Running 7 | 8 | To run a test and watch it go: 9 | 10 | ```shell 11 | pctasks workflow upsert-and-submit datasets/aster/update-geometries.yaml | tee /dev/stderr | xargs pctasks runs status -w 12 | ``` 13 | 14 | ### Building the Docker image 15 | 16 | The update geometries workflow takes a lot of workers, and if they all hit PyPI at the same time, they can get rate limited. 17 | To avoid that problem, we use a custom image in the workflow. 18 | To build and push a custom docker image to our container registry: 19 | 20 | ```shell 21 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-aster:latest -f datasets/aster/Dockerfile . 22 | ``` 23 | -------------------------------------------------------------------------------- /datasets/aster/collection/description.md: -------------------------------------------------------------------------------- 1 | The [ASTER](https://terra.nasa.gov/about/terra-instruments/aster) instrument, launched on-board NASA's [Terra](https://terra.nasa.gov/) satellite in 1999, provides multispectral images of the Earth at 15m-90m resolution. ASTER images provide information about land surface temperature, color, elevation, and mineral composition.\n\nThis dataset represents ASTER [L1T](https://lpdaac.usgs.gov/products/ast_l1tv003/) data from 2000-2006. L1T images have been terrain-corrected and rotated to a north-up UTM projection. Images are in [cloud-optimized GeoTIFF](https://www.cogeo.org/) format. 2 | -------------------------------------------------------------------------------- /datasets/aster/dataset.yaml: -------------------------------------------------------------------------------- 1 | # TODO actually implement this -- this is currently a placeholder just to upload the collection 2 | id: aster 3 | image: ${{ args.registry }}/pctasks-aster:latest 4 | 5 | args: 6 | - registry 7 | 8 | code: 9 | src: ${{ local.path(./aster.py) }} 10 | requirements: ${{ local.path(./requirements.txt) }} 11 | 12 | collections: 13 | - id: aster-l1t 14 | template: ${{ local.path(./collection) }} 15 | class: aster:AsterL1tCollection 16 | asset_storage: 17 | - uri: blob://astersa/aster/ 18 | token: ${{ pc.get_token(astersa, aster) }} 19 | chunks: 20 | options: 21 | name_starts_with: images/L1T 22 | ends_with: .xml 23 | chunk_length: 1000 24 | chunk_storage: 25 | uri: blob://astersa/aster-etl-data/chunks/ 26 | -------------------------------------------------------------------------------- /datasets/aster/requirements.txt: -------------------------------------------------------------------------------- 1 | adlfs==2022.7.0 2 | geopandas==0.12.1 3 | stactools-aster==0.2.1 4 | git+https://github.com/TomAugspurger/stac-geoparquet@09f3bce33c4e2ab8a796b21fd02df55c1b7754f9 5 | orjson==3.* 6 | -------------------------------------------------------------------------------- /datasets/aster/scripts/print_partition_paths.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import files 2 | import adlfs 3 | import planetary_computer 4 | from pystac import Collection 5 | 6 | collection = Collection.from_file( 7 | "https://planetarycomputer.microsoft.com/api/stac/v1/collections/aster-l1t" 8 | ) 9 | asset = planetary_computer.sign(collection.assets["geoparquet-items"]) 10 | filesystem = adlfs.AzureBlobFileSystem(**asset.extra_fields["table:storage_options"]) 11 | for path in filesystem.ls("items/aster-l1t.parquet"): 12 | print(path) 13 | -------------------------------------------------------------------------------- /datasets/aster/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/datasets/aster/tests/__init__.py -------------------------------------------------------------------------------- /datasets/aster/tests/data-files/aster-l1t-subset.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/datasets/aster/tests/data-files/aster-l1t-subset.parquet -------------------------------------------------------------------------------- /datasets/aster/tests/test_tasks.py: -------------------------------------------------------------------------------- 1 | # Note you will have to run this with `python -m pytest` from the datasets/aster directory 2 | 3 | from pathlib import Path 4 | 5 | import aster 6 | import orjson 7 | 8 | 9 | def test_update_geometries_from_dataframe() -> None: 10 | path = Path(__file__).parent / "data-files" / "aster-l1t-subset.parquet" 11 | item_collection = aster.read_item_collection(path) 12 | item = aster.sign_and_update(item_collection.items[0], 0.001) 13 | _ = orjson.dumps(aster.fix_dict(item.to_dict(include_self_link=False))).decode( 14 | "utf-8" 15 | ) 16 | -------------------------------------------------------------------------------- /datasets/aster/update-geometries-ingest.yaml: -------------------------------------------------------------------------------- 1 | name: Ingest NDJsons from blob://astersa/aster-etl-data/items/update-geometries 2 | jobs: 3 | ingest-items: 4 | id: ingest-items 5 | tasks: 6 | - id: ingest-ndjson 7 | image_key: ingest 8 | task: pctasks.ingest_task.task:ingest_task 9 | args: 10 | content: 11 | type: Ndjson 12 | ndjson_folder: 13 | uri: blob://astersa/aster-etl-data/items/update-geometries 14 | extensions: 15 | - .ndjson 16 | matches: \d+.ndjson 17 | options: 18 | insert_group_size: 5000 19 | insert_only: false 20 | schema_version: 1.0.0 21 | schema_version: 1.0.0 22 | id: aster-update-geometries-ingest 23 | dataset: microsoft/aster-l1t 24 | 25 | -------------------------------------------------------------------------------- /datasets/chesapeake_lulc/chesapeake_lulc.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | 3 | import pystac 4 | from stactools.chesapeake_lulc.stac import create_item 5 | 6 | from pctasks.core.models.task import WaitTaskResult 7 | from pctasks.core.storage import StorageFactory 8 | from pctasks.dataset.collection import Collection 9 | 10 | 11 | class ChesapeakeCollection(Collection): 12 | @classmethod 13 | def create_item( 14 | cls, asset_uri: str, storage_factory: StorageFactory 15 | ) -> Union[List[pystac.Item], WaitTaskResult]: 16 | storage, asset_path = storage_factory.get_storage_for_file(asset_uri) 17 | href = storage.get_url(asset_path) 18 | item = create_item(href, read_href_modifier=storage.sign) 19 | return [item] 20 | -------------------------------------------------------------------------------- /datasets/chesapeake_lulc/requirements.txt: -------------------------------------------------------------------------------- 1 | stactools.chesapeake-lulc @ git+https://github.com/stactools-packages/chesapeake-lulc.git@698b13066cb5ffeb55f972d79d21ce04ec30874e -------------------------------------------------------------------------------- /datasets/conus404/README.md: -------------------------------------------------------------------------------- 1 | # CONUS404 2 | 3 | ## First-time publishing 4 | First you need to validate the STAC collection with `pctasks dataset validate-collection [path-to-template.json]`, fix any validation errors. 5 | 6 | Then submit the collection ingestion with `pctasks dataset ingest-collection -d datasets/conus404/dataset.yaml -s -a registry pccomponents` 7 | 8 | Get the workflow ID and then watch it with: `pctasks runs status $WORKFLOW_ID --watch`. 9 | It must succeed. 10 | 11 | Verify that it was successful with `curl "https://planetarycomputer.microsoft.com/api/stac/v1/collections/conus404"` 12 | 13 | ## Updating 14 | Simply add `-u` to the command. 15 | `pctasks dataset ingest-collection -d datasets/conus404/dataset.yaml -u -s -a registry pccomponents` -------------------------------------------------------------------------------- /datasets/conus404/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: conus404 2 | image: ${{ args.registry }}/pctasks-task-base:latest 3 | 4 | args: 5 | - registry 6 | 7 | collections: 8 | - id: conus404 9 | template: ${{ local.path(./collection) }} 10 | class: pctasks.dataset.collection:PremadeItemCollection 11 | asset_storage: [] 12 | chunk_storage: 13 | uri: "blob://cpdataeuwest/cpdata-etl-data/chunks" 14 | 15 | -------------------------------------------------------------------------------- /datasets/deltaresfloods/README.md: -------------------------------------------------------------------------------- 1 | # deltaresfloods PC Tasks 2 | 3 | PCTasks code for ingesting deltaresfloods data into the Planetary Computer. 4 | These STAC items JSONs are stored as blobs in the `deltaresfloodssa` storage account under the `floods-stac` container. 5 | You can mount them locally and make modifications before re-ingesting them into the STAC database. 6 | 7 | ## Item Updates 8 | After fixing items in the `deltaresfloodssa/floods-stac` container, you may run to reingest them: 9 | 10 | ```bash 11 | pctasks dataset process-items xarray-access-fix \ 12 | -a since "2024-11-17T00:00:00Z" \ 13 | --dataset datasets/deltaresfloods/dataset.yaml \ 14 | --is-update-workflow --upsert --submit 15 | ``` 16 | 17 | Set since to a date strictly before you modified the STAC items in storage. 18 | For example, if you modified items on December 4, set the since to be any date before December 4. 19 | `since` must be a full ISO8061 datetime. -------------------------------------------------------------------------------- /datasets/deltaresfloods/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: deltaresfloods 2 | image: pccomponents.azurecr.io/pctasks-task-base:latest 3 | target_environment: staging 4 | 5 | collections: 6 | - id: deltares-floods 7 | template: ${{ local.path(./collection) }} 8 | class: pctasks.dataset.collection:PremadeItemCollection 9 | asset_storage: 10 | # the STAC items 11 | - uri: blob://deltaresfloodssa/floods-stac 12 | token: ${{ pc.get_token(deltaresfloodssa, floods-stac)}} 13 | chunks: 14 | options: 15 | chunk_length: 3000 16 | extensions: 17 | - ".json" 18 | chunk_storage: 19 | uri: blob://deltaresfloodssa/floods-etl-data/chunks 20 | -------------------------------------------------------------------------------- /datasets/deltaresfloods/tests/test_dataset.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | 4 | from pctasks.cli.cli import setup_logging, setup_logging_for_module 5 | from pctasks.dev.test_utils import run_process_items_workflow 6 | 7 | HERE = Path(__file__).parent 8 | DATASET_PATH = HERE / ".." / "dataset.yaml" 9 | 10 | 11 | def test_dataset(): 12 | run_process_items_workflow( 13 | DATASET_PATH, image="localhost:5001/pctasks-task-base:latest" 14 | ) 15 | 16 | 17 | if __name__ == "__main__": 18 | setup_logging(logging.DEBUG) 19 | setup_logging_for_module("__main__", logging.DEBUG) 20 | test_dataset() 21 | print("All tests passed") 22 | exit(0) 23 | -------------------------------------------------------------------------------- /datasets/eclipse/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: eclipse 2 | image: ${{ args.registry }}/pctasks-task-base:latest 3 | 4 | args: 5 | - registry 6 | 7 | collections: 8 | - id: eclipse 9 | template: ${{ local.path(./collection) }} 10 | class: pctasks.dataset.collection:PremadeItemCollection 11 | asset_storage: 12 | # the STAC items 13 | - uri: blob://ai4edataeuwest/eclipse-stac 14 | token: ${{ pc.get_token(ai4edataeuwest, eclipse-stac)}} 15 | chunks: 16 | options: 17 | chunk_length: 3000 18 | extensions: 19 | - ".json" 20 | chunk_storage: 21 | uri: blob://ai4edataeuwest/eclipse-etl-data/chunks 22 | -------------------------------------------------------------------------------- /datasets/ecmwf-forecast/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: ecmwf_forecast 2 | image: ${{ args.registry }}/pctasks-ecmwf-forecast:2024.6.13.0 3 | 4 | args: 5 | - registry 6 | 7 | code: 8 | src: ${{ local.path(./ecmwf_forecast.py) }} 9 | 10 | environment: 11 | APPLICATIONINSIGHTS_CONNECTION_STRING: ${{ secrets.task-application-insights-connection-string }} 12 | 13 | collections: 14 | - id: ecmwf-forecast 15 | template: ${{ local.path(./collection/) }} 16 | class: ecmwf_forecast:EcmwfCollection 17 | asset_storage: 18 | - uri: blob://ai4edataeuwest/ecmwf/ 19 | chunks: 20 | options: 21 | # currently excluding "aifs", in favor of "ifs" 22 | # Could put that in a different collection, or modify 23 | # the stactools package. 24 | matches: /ifs/(0p25|0p4-beta)/(enfo|oper|waef|wave)(?!-opendata) 25 | match_full_path: true 26 | extensions: [.grib2] 27 | chunk_storage: 28 | uri: blob://ai4edataeuwest/ecmwf-etl-data/pctasks/ 29 | -------------------------------------------------------------------------------- /datasets/ecmwf-forecast/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/stactools-packages/ecmwf-forecast@0.2.0 2 | -------------------------------------------------------------------------------- /datasets/ecmwf-forecast/test_ecmwf_forecast.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ecmwf_forecast import EcmwfCollection 3 | from pctasks.core.storage import StorageFactory 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "href", 8 | [ 9 | "blob://ai4edataeuwest/ecmwf/20240314/00z/ifs/0p4-beta/enfo/20240314000000-0h-enfo-ef.grib2", 10 | "blob://ai4edataeuwest/ecmwf/20240314/00z/ifs/0p25/waef/20240314000000-0h-waef-ef.grib2", 11 | ], 12 | ) 13 | def test_ecmwf(href: str) -> None: 14 | storage_factory = StorageFactory() 15 | (item,) = EcmwfCollection.create_item(href, storage_factory) 16 | assert "ecmwf:resolution" in item.properties 17 | if "/0p4-beta/" in href: 18 | assert item.properties["ecmwf:resolution"] == "0.40" 19 | if "/0p25/" in href: 20 | assert item.properties["ecmwf:resolution"] == "0.25" 21 | -------------------------------------------------------------------------------- /datasets/era5-pds/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: goes_cmi 2 | image: ${{ args.registry }}/pctasks-task-base:latest 3 | 4 | args: 5 | - registry 6 | 7 | collections: 8 | - id: goes-cmi 9 | template: ${{ local.path(./collection) }} 10 | class: pctasks.dataset.collection:PremadeItemCollection 11 | asset_storage: 12 | # the STAC items 13 | - uri: blob://cpdataeuwest/era5-stac 14 | token: ${{ pc.get_token(cpdataeuwest, era5-stac)}} 15 | chunks: 16 | options: 17 | chunk_length: 3000 18 | extensions: 19 | - ".json" 20 | chunk_storage: 21 | uri: blob://cpdataeuwest/era5-etl-data/chunks 22 | -------------------------------------------------------------------------------- /datasets/esa-cci-lc/README.md: -------------------------------------------------------------------------------- 1 | # ESA CCI 2 | 3 | ## Docker container 4 | 5 | ```shell 6 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-esa-cci-lc:latest -f datasets/esa-cci-lc/Dockerfile . 7 | ``` 8 | -------------------------------------------------------------------------------- /datasets/esa-cci-lc/collection/esa-cci-lc-netcdf/description.md: -------------------------------------------------------------------------------- 1 | The ESA Climate Change Initiative (CCI) [Land Cover dataset](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover?tab=overview) provides consistent global annual land cover maps at 300m spatial resolution from 1992 to 2020. The land cover classes are defined using the United Nations Food and Agriculture Organization's (UN FAO) [Land Cover Classification System](https://www.fao.org/land-water/land/land-governance/land-resources-planning-toolbox/category/details/en/c/1036361/) (LCCS). In addition to the land cover maps, four quality flags are produced to document the reliability of the classification and change detection. 2 | 3 | The data in this Collection are the original NetCDF files accessed from the [Copernicus Climate Data Store](https://cds.climate.copernicus.eu/#!/home). We recommend users use the [`esa-cci-lc` Collection](planetarycomputer.microsoft.com/dataset/esa-cci-lc), which provides the data as Cloud Optimized GeoTIFFs. -------------------------------------------------------------------------------- /datasets/esa-cci-lc/collection/esa-cci-lc/description.md: -------------------------------------------------------------------------------- 1 | The ESA Climate Change Initiative (CCI) [Land Cover dataset](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover?tab=overview) provides consistent global annual land cover maps at 300m spatial resolution from 1992 to 2020. The land cover classes are defined using the United Nations Food and Agriculture Organization's (UN FAO) [Land Cover Classification System](https://www.fao.org/land-water/land/land-governance/land-resources-planning-toolbox/category/details/en/c/1036361/) (LCCS). In addition to the land cover maps, four quality flags are produced to document the reliability of the classification and change detection. 2 | 3 | The data in this Collection have been converted from the [original NetCDF data](https://planetarycomputer.microsoft.com/dataset/esa-cci-lc-netcdf) to a set of tiled [Cloud Optimized GeoTIFFs](https://www.cogeo.org/) (COGs). 4 | -------------------------------------------------------------------------------- /datasets/esa-cci-lc/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/stactools-packages/esa-cci-lc.git@c468c0a3e9149e530a72d74d51bf288f927fd41a -------------------------------------------------------------------------------- /datasets/esa-worldcover/README.md: -------------------------------------------------------------------------------- 1 | # planetary-computer-tasks dataset: esa-worldcover 2 | 3 | ## Building the Docker image 4 | 5 | To build and push a custom docker image to our container registry: 6 | 7 | ```shell 8 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-esa-worldcover:latest -f datasets/esa-worldcover/Dockerfile . 9 | ``` 10 | -------------------------------------------------------------------------------- /datasets/esa-worldcover/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: esa-worldcover 2 | image: ${{ args.registry }}/pctasks-esa-worldcover:latest 3 | 4 | args: 5 | - registry 6 | 7 | code: 8 | src: ${{ local.path(./esa_worldcover.py) }} 9 | 10 | collections: 11 | - id: esa-worldcover 12 | template: ${{ local.path(./collection) }} 13 | class: esa_worldcover:ESAWorldCoverCollection 14 | asset_storage: 15 | - uri: blob://ai4edataeuwest/esa-worldcover/ 16 | chunks: 17 | options: 18 | extensions: [.tif] 19 | # The 'name_starts_with' filter will run a single year only. This 20 | # is helpful since this is an annual product -> next year we can 21 | # update this field and create only the items needed. 22 | name_starts_with: v100/2020/map 23 | chunk_length: 200 24 | # limit: 40 25 | chunk_storage: 26 | uri: blob://ai4edataeuwest/esa-worldcover-etl-data/pctasks/ 27 | -------------------------------------------------------------------------------- /datasets/esa-worldcover/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/stactools-packages/esa-worldcover.git@164fcfea77954c87eb73d465d8da4bee00e1840c -------------------------------------------------------------------------------- /datasets/fws-nwi/README.md: -------------------------------------------------------------------------------- 1 | # planetary-computer-tasks dataset: fws-nwi 2 | 3 | ## Building the Docker image 4 | 5 | To build and push a custom docker image to our container registry: 6 | 7 | ```shell 8 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-fws-nwi:latest -f datasets/fws-nwi/Dockerfile . 9 | ``` 10 | -------------------------------------------------------------------------------- /datasets/fws-nwi/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: fws-nwi 2 | image: ${{ args.registry }}/pctasks-fws-nwi:latest 3 | args: 4 | - registry 5 | code: 6 | src: ${{ local.path(./fws_nwi.py) }} 7 | # requirements: ${{ local.path(./requirements.txt) }} 8 | 9 | task_config: 10 | fws-nwi: 11 | create-items: 12 | tags: 13 | batch_pool_id: high_memory_pool 14 | 15 | collections: 16 | - id: fws-nwi 17 | template: ${{ local.path(./collection) }} 18 | class: fws_nwi:FwsNwiCollection 19 | asset_storage: 20 | - uri: blob://landcoverdata/fws-nwi-onboarding/ 21 | chunks: 22 | options: 23 | extensions: [.zip] 24 | chunk_length: 1 25 | chunk_storage: 26 | uri: blob://landcoverdata/fws-nwi-etl-data/ 27 | -------------------------------------------------------------------------------- /datasets/fws-nwi/requirements.txt: -------------------------------------------------------------------------------- 1 | stactools-fws-nwi == 0.2.0 2 | -------------------------------------------------------------------------------- /datasets/gbif/README.md: -------------------------------------------------------------------------------- 1 | # planetary-computer-tasks dataset: gbif 2 | 3 | Global Biodiversity Information Facility 4 | 5 | ## Building the Docker image 6 | 7 | To build and push a custom docker image to our container registry: 8 | 9 | ```shell 10 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-gbif:latest -t pctasks-gbif:{date}.{count} -f datasets/gbif/Dockerfile . 11 | ``` 12 | 13 | ## Update workflow 14 | 15 | The update workflow was registered with 16 | 17 | ```shell 18 | pctasks dataset process-items gbif-update --is-update-workflow --dataset datasets/gbif/dataset.yaml -u 19 | ``` -------------------------------------------------------------------------------- /datasets/gbif/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: gbif 2 | image: ${{ args.registry }}/pctasks-gbif:20230607.1 3 | 4 | args: 5 | - registry 6 | 7 | code: 8 | src: ${{ local.path(./gbif.py) }} 9 | requirements: ${{ local.path(./requirements.txt) }} 10 | 11 | environment: 12 | APPLICATIONINSIGHTS_CONNECTION_STRING: ${{ secrets.task-application-insights-connection-string }} 13 | 14 | collections: 15 | - id: gbif 16 | template: ${{ local.path(./collection/gbif) }} 17 | token: ${{ pc.get_token(ai4edataeuwest, gbif) }} 18 | class: gbif:GBIFCollection 19 | asset_storage: 20 | - uri: blob://ai4edataeuwest/gbif/ 21 | chunks: 22 | options: 23 | list_folders: true 24 | min_depth: 2 25 | max_depth: 2 26 | chunk_storage: 27 | uri: blob://ai4edataeuwest/gbif-etl-data/pctasks-chunks -------------------------------------------------------------------------------- /datasets/gbif/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/TomAugspurger/gbif 2 | git+https://github.com/stac-utils/stac-table@99a30be419baa2445ab6d0756629eea079c80972 3 | dask 4 | dask-geopandas 5 | pytest 6 | adlfs 7 | -------------------------------------------------------------------------------- /datasets/goes/goes-cmi/goes_cmi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/datasets/goes/goes-cmi/goes_cmi/__init__.py -------------------------------------------------------------------------------- /datasets/goes/goes-cmi/requirements.txt: -------------------------------------------------------------------------------- 1 | stactools-goes==0.1.8 2 | pystac==1.10.1 -------------------------------------------------------------------------------- /datasets/goes/goes-glm/requirements.txt: -------------------------------------------------------------------------------- 1 | stactools-goes-glm==0.2.4 -------------------------------------------------------------------------------- /datasets/goes/goes-glm/tests/test_dataset.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | 4 | from pctasks.cli.cli import setup_logging, setup_logging_for_module 5 | from pctasks.dev.test_utils import run_process_items_workflow 6 | 7 | HERE = Path(__file__).parent 8 | DATASET_PATH = HERE / "dataset-dev-cluster.yaml" 9 | 10 | 11 | def test_goes_glm(): 12 | run_process_items_workflow( 13 | DATASET_PATH, 14 | collection_id="goes-glm", 15 | args={ 16 | "registry": "localhost:5001", 17 | }, 18 | splits_limit=1, 19 | chunks_limit=2, 20 | timeout_seconds=600 21 | ) 22 | 23 | 24 | if __name__ == "__main__": 25 | setup_logging(logging.DEBUG) 26 | setup_logging_for_module("__main__", logging.DEBUG) 27 | test_goes_glm() 28 | print("Test passed") 29 | exit(0) 30 | -------------------------------------------------------------------------------- /datasets/io-biodiversity/README.md: -------------------------------------------------------------------------------- 1 | # planetary-computer-tasks dataset: io-biodiversity 2 | 3 | ## Building the Docker image 4 | 5 | To build and push a custom docker image to our container registry: 6 | 7 | ```shell 8 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-io-biodiversity:latest -f datasets/io-biodiversity/Dockerfile . 9 | ``` 10 | -------------------------------------------------------------------------------- /datasets/io-biodiversity/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: io_biodiversity 2 | image: ${{ args.registry }}/pctasks-io-biodiversity:latest 3 | 4 | args: 5 | - registry 6 | 7 | code: 8 | src: ${{ local.path(./io_biodiversity.py) }} 9 | 10 | collections: 11 | - id: io-biodiversity 12 | template: ${{ local.path(./collection) }} 13 | class: io_biodiversity:IOBiodiversityIntactness 14 | asset_storage: 15 | - uri: blob://pcdata01euw/impact/bii-v1 16 | token: ${{ pc.get_token(pcdata01euw, impact) }} 17 | chunks: 18 | options: 19 | ends_with: ".tif" 20 | chunk_length: 500 # 1224 blobs per year; 4 years of data 21 | 22 | chunk_storage: 23 | uri: blob://pcdata01euw/impact-etl-data/pctasks/ 24 | -------------------------------------------------------------------------------- /datasets/io-biodiversity/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/stac-utils/stactools.git@c2bdf32331a9782373d7152472093cd4bc4298a9 -------------------------------------------------------------------------------- /datasets/io-land-cover/collection/io-lulc/description.md: -------------------------------------------------------------------------------- 1 | __Note__: _A new version of this item is available for your use. This mature version of the map remains available for use in existing applications. This item will be retired in December 2024. There is 2020 data available in the newer [9-class dataset](https://planetarycomputer.microsoft.com/dataset/io-lulc-9-class)._ 2 | 3 | Global estimates of 10-class land use/land cover (LULC) for 2020, derived from ESA Sentinel-2 imagery at 10m resolution. This dataset was generated by [Impact Observatory](http://impactobservatory.com/), who used billions of human-labeled pixels (curated by the National Geographic Society) to train a deep learning model for land classification. The global map was produced by applying this model to the relevant yearly Sentinel-2 scenes on the Planetary Computer. 4 | 5 | This dataset is also available on the [ArcGIS Living Atlas of the World](https://livingatlas.arcgis.com/landcover/). 6 | -------------------------------------------------------------------------------- /datasets/landsat/collection/landsat-c2-l1/description.md: -------------------------------------------------------------------------------- 1 | Landsat Collection 2 Level-1 data, consisting of quantized and calibrated scaled Digital Numbers (DN) representing the multispectral image data. These [Level-1](https://www.usgs.gov/landsat-missions/landsat-collection-2-level-1-data) data can be [rescaled](https://www.usgs.gov/landsat-missions/using-usgs-landsat-level-1-data-product) to top of atmosphere (TOA) reflectance and/or radiance. Thermal band data can be rescaled to TOA brightness temperature. 2 | 3 | This dataset represents the global archive of Level-1 data from [Landsat Collection 2](https://www.usgs.gov/core-science-systems/nli/landsat/landsat-collection-2) acquired by the [Multispectral Scanner System](https://landsat.gsfc.nasa.gov/multispectral-scanner-system/) onboard Landsat 1 through Landsat 5 from July 7, 1972 to January 7, 2013. Images are stored in [cloud-optimized GeoTIFF](https://www.cogeo.org/) format. 4 | -------------------------------------------------------------------------------- /datasets/landsat/requirements.txt: -------------------------------------------------------------------------------- 1 | stactools-landsat==0.2.4 2 | pystac-client>=0.4.0 -------------------------------------------------------------------------------- /datasets/modis/collection/modis-09A1-061/description.md: -------------------------------------------------------------------------------- 1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) 09A1 Version 6.1 product provides an estimate of the surface spectral reflectance of MODIS Bands 1 through 7 corrected for atmospheric conditions such as gasses, aerosols, and Rayleigh scattering. Along with the seven 500 meter (m) reflectance bands are two quality layers and four observation bands. For each pixel, a value is selected from all the acquisitions within the 8-day composite period. The criteria for the pixel choice include cloud and solar zenith. When several acquisitions meet the criteria the pixel with the minimum channel 3 (blue) value is used. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-09Q1-061/description.md: -------------------------------------------------------------------------------- 1 | The 09Q1 Version 6.1 product provides an estimate of the surface spectral reflectance of Moderate Resolution Imaging Spectroradiometer (MODIS) Bands 1 and 2, corrected for atmospheric conditions such as gasses, aerosols, and Rayleigh scattering. Provided along with the 250 meter (m) surface reflectance bands are two quality layers. For each pixel, a value is selected from all the acquisitions within the 8-day composite period. The criteria for the pixel choice include cloud and solar zenith. When several acquisitions meet the criteria the pixel with the minimum channel 3 (blue) value is used. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-10A1-061/description.md: -------------------------------------------------------------------------------- 1 | This global Level-3 (L3) data set provides a daily composite of snow cover and albedo derived from the 'MODIS Snow Cover 5-Min L2 Swath 500m' data set. Each data granule is a 10degx10deg tile projected to a 500 m sinusoidal grid. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-10A2-061/description.md: -------------------------------------------------------------------------------- 1 | This global Level-3 (L3) data set provides the maximum snow cover extent observed over an eight-day period within 10degx10deg MODIS sinusoidal grid tiles. Tiles are generated by compositing 500 m observations from the 'MODIS Snow Cover Daily L3 Global 500m Grid' data set. A bit flag index is used to track the eight-day snow/no-snow chronology for each 500 m cell. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-11A1-061/description.md: -------------------------------------------------------------------------------- 1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) Land Surface Temperature/Emissivity Daily Version 6.1 product provides daily per-pixel Land Surface Temperature and Emissivity (LST&E) with 1 kilometer (km) spatial resolution in a 1,200 by 1,200 km grid. The pixel temperature value is derived from the MOD11_L2 swath product. Above 30 degrees latitude, some pixels may have multiple observations where the criteria for clear-sky are met. When this occurs, the pixel value is a result of the average of all qualifying observations. Provided along with the daytime and nighttime surface temperature bands are associated quality control assessments, observation times, view zenith angles, and clear-sky coverages along with bands 31 and 32 emissivities from land cover types -------------------------------------------------------------------------------- /datasets/modis/collection/modis-11A2-061/description.md: -------------------------------------------------------------------------------- 1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) Land Surface Temperature/Emissivity 8-Day Version 6.1 product provides an average 8-day per-pixel Land Surface Temperature and Emissivity (LST&E) with a 1 kilometer (km) spatial resolution in a 1,200 by 1,200 km grid. Each pixel value in the MOD11A2 is a simple average of all the corresponding MOD11A1 LST pixels collected within that 8-day period. The 8-day compositing period was chosen because twice that period is the exact ground track repeat period of the Terra and Aqua platforms. Provided along with the daytime and nighttime surface temperature bands are associated quality control assessments, observation times, view zenith angles, and clear-sky coverages along with bands 31 and 32 emissivities from land cover types. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-13A1-061/description.md: -------------------------------------------------------------------------------- 1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) Vegetation Indices 16-Day Version 6.1 product provides Vegetation Index (VI) values at a per pixel basis at 500 meter (m) spatial resolution. There are two primary vegetation layers. The first is the Normalized Difference Vegetation Index (NDVI), which is referred to as the continuity index to the existing National Oceanic and Atmospheric Administration-Advanced Very High Resolution Radiometer (NOAA-AVHRR) derived NDVI. The second vegetation layer is the Enhanced Vegetation Index (EVI), which has improved sensitivity over high biomass regions. The algorithm for this product chooses the best available pixel value from all the acquisitions from the 16 day period. The criteria used is low clouds, low view angle, and the highest NDVI/EVI value. Provided along with the vegetation layers and two quality assurance (QA) layers are reflectance bands 1 (red), 2 (near-infrared), 3 (blue), and 7 (mid-infrared), as well as four observation layers. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-13Q1-061/description.md: -------------------------------------------------------------------------------- 1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) Vegetation Indices Version 6.1 data are generated every 16 days at 250 meter (m) spatial resolution as a Level 3 product. The MOD13Q1 product provides two primary vegetation layers. The first is the Normalized Difference Vegetation Index (NDVI) which is referred to as the continuity index to the existing National Oceanic and Atmospheric Administration-Advanced Very High Resolution Radiometer (NOAA-AVHRR) derived NDVI. The second vegetation layer is the Enhanced Vegetation Index (EVI), which has improved sensitivity over high biomass regions. The algorithm chooses the best available pixel value from all the acquisitions from the 16 day period. The criteria used is low clouds, low view angle, and the highest NDVI/EVI value. Along with the vegetation layers and the two quality layers, the HDF file will have MODIS reflectance bands 1 (red), 2 (near-infrared), 3 (blue), and 7 (mid-infrared), as well as four observation layers. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-14A1-061/description.md: -------------------------------------------------------------------------------- 1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) Thermal Anomalies and Fire Daily Version 6.1 data are generated every eight days at 1 kilometer (km) spatial resolution as a Level 3 product. MOD14A1 contains eight consecutive days of fire data conveniently packaged into a single file. The Science Dataset (SDS) layers include the fire mask, pixel quality indicators, maximum fire radiative power (MaxFRP), and the position of the fire pixel within the scan. Each layer consists of daily per pixel information for each of the eight days of data acquisition. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-14A2-061/description.md: -------------------------------------------------------------------------------- 1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) Thermal Anomalies and Fire 8-Day Version 6.1 data are generated at 1 kilometer (km) spatial resolution as a Level 3 product. The MOD14A2 gridded composite contains the maximum value of the individual fire pixel classes detected during the eight days of acquisition. The Science Dataset (SDS) layers include the fire mask and pixel quality indicators. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-15A2H-061/description.md: -------------------------------------------------------------------------------- 1 | The Version 6.1 Moderate Resolution Imaging Spectroradiometer (MODIS) Level 4, Combined Fraction of Photosynthetically Active Radiation (FPAR), and Leaf Area Index (LAI) product is an 8-day composite dataset with 500 meter pixel size. The algorithm chooses the best pixel available from within the 8-day period. LAI is defined as the one-sided green leaf area per unit ground area in broadleaf canopies and as one-half the total needle surface area per unit ground area in coniferous canopies. FPAR is defined as the fraction of incident photosynthetically active radiation (400-700 nm) absorbed by the green elements of a vegetation canopy. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-15A3H-061/description.md: -------------------------------------------------------------------------------- 1 | The MCD15A3H Version 6.1 Moderate Resolution Imaging Spectroradiometer (MODIS) Level 4, Combined Fraction of Photosynthetically Active Radiation (FPAR), and Leaf Area Index (LAI) product is a 4-day composite data set with 500 meter pixel size. The algorithm chooses the best pixel available from all the acquisitions of both MODIS sensors located on NASA's Terra and Aqua satellites from within the 4-day period. LAI is defined as the one-sided green leaf area per unit ground area in broadleaf canopies and as one-half the total needle surface area per unit ground area in coniferous canopies. FPAR is defined as the fraction of incident photosynthetically active radiation (400-700 nm) absorbed by the green elements of a vegetation canopy. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-17A2H-061/description.md: -------------------------------------------------------------------------------- 1 | The Version 6.1 Gross Primary Productivity (GPP) product is a cumulative 8-day composite of values with 500 meter (m) pixel size based on the radiation use efficiency concept that can be potentially used as inputs to data models to calculate terrestrial energy, carbon, water cycle processes, and biogeochemistry of vegetation. The Moderate Resolution Imaging Spectroradiometer (MODIS) data product includes information about GPP and Net Photosynthesis (PSN). The PSN band values are the GPP less the Maintenance Respiration (MR). The data product also contains a PSN Quality Control (QC) layer. The quality layer contains quality information for both the GPP and the PSN. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-17A3HGF-061/description.md: -------------------------------------------------------------------------------- 1 | The Version 6.1 product provides information about annual Net Primary Production (NPP) at 500 meter (m) pixel resolution. Annual Moderate Resolution Imaging Spectroradiometer (MODIS) NPP is derived from the sum of all 8-day Net Photosynthesis (PSN) products (MOD17A2H) from the given year. The PSN value is the difference of the Gross Primary Productivity (GPP) and the Maintenance Respiration (MR). The product will be generated at the end of each year when the entire yearly 8-day 15A2H is available. Hence, the gap-filled product is the improved 17, which has cleaned the poor-quality inputs from 8-day Leaf Area Index and Fraction of Photosynthetically Active Radiation (LAI/FPAR) based on the Quality Control (QC) label for every pixel. If any LAI/FPAR pixel did not meet the quality screening criteria, its value is determined through linear interpolation. However, users cannot get this product in near-real time because it will be generated only at the end of a given year. -------------------------------------------------------------------------------- /datasets/modis/collection/modis-43A4-061/description.md: -------------------------------------------------------------------------------- 1 | The Moderate Resolution Imaging Spectroradiometer (MODIS) MCD43A4 Version 6.1 Nadir Bidirectional Reflectance Distribution Function (BRDF)-Adjusted Reflectance (NBAR) dataset is produced daily using 16 days of Terra and Aqua MODIS data at 500 meter (m) resolution. The view angle effects are removed from the directional reflectances, resulting in a stable and consistent NBAR product. Data are temporally weighted to the ninth day which is reflected in the Julian date in the file name. Users are urged to use the band specific quality flags to isolate the highest quality full inversion results for their own science applications as described in the User Guide. The MCD43A4 provides NBAR and simplified mandatory quality layers for MODIS bands 1 through 7. Essential quality information provided in the corresponding MCD43A2 data file should be consulted when using this product. -------------------------------------------------------------------------------- /datasets/modis/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/stactools-packages/modis@419101223609805f9ac9d2a38401448a36331460 2 | -------------------------------------------------------------------------------- /datasets/ms-buildings/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/stactools-packages/msbuildings.git@e7731afde6a1a767827c2c98b12cb414c08add6c 2 | adlfs -------------------------------------------------------------------------------- /datasets/naip/collection/description.md: -------------------------------------------------------------------------------- 1 | The [National Agriculture Imagery Program](https://www.fsa.usda.gov/programs-and-services/aerial-photography/imagery-programs/naip-imagery/) (NAIP) 2 | provides U.S.-wide, high-resolution aerial imagery, with four spectral bands (R, G, B, IR). 3 | NAIP is administered by the [Aerial Field Photography Office](https://www.fsa.usda.gov/programs-and-services/aerial-photography/) (AFPO) 4 | within the [US Department of Agriculture](https://www.usda.gov/) (USDA). 5 | Data are captured at least once every three years for each state. 6 | This dataset represents NAIP data from 2010-present, in [cloud-optimized GeoTIFF](https://www.cogeo.org/) format. 7 | You can visualize the coverage of current and past collections [here](https://naip-usdaonline.hub.arcgis.com/). 8 | -------------------------------------------------------------------------------- /datasets/naip/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: naip 2 | image: ${{ args.registry }}/pctasks-naip:latest 3 | 4 | args: 5 | - registry 6 | - year 7 | 8 | code: 9 | src: ${{ local.path(./naip.py) }} 10 | 11 | environment: 12 | APPLICATIONINSIGHTS_CONNECTION_STRING: ${{ secrets.task-application-insights-connection-string }} 13 | 14 | collections: 15 | - id: naip 16 | template: ${{ local.path(./collection) }} 17 | class: naip:NAIPCollection 18 | asset_storage: 19 | - uri: blob://naipeuwest/naip/ 20 | chunks: 21 | options: 22 | extensions: [.tif] 23 | name_starts_with: v002/ 24 | chunk_length: 3000 25 | matches: ".*?_(\\d{3})_${{args.year}}" 26 | chunk_storage: 27 | uri: blob://naipeuwest/naip-etl-data/pctasks/ 28 | -------------------------------------------------------------------------------- /datasets/naip/requirements.txt: -------------------------------------------------------------------------------- 1 | stactools-naip==0.5.0 2 | -------------------------------------------------------------------------------- /datasets/nasa-nex-gddp-cmip6/README.md: -------------------------------------------------------------------------------- 1 | # planetary-computer-tasks dataset: nasa-nex-gddp-cmip6 2 | 3 | NASA NEX GDDP CMIP6 Dataset 4 | 5 | ## Building the Docker image 6 | 7 | To build and push a custom docker image to our container registry: 8 | 9 | ```shell 10 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-nasa-nex-gddp-cmip6:latest -t pctasks-nasa-nex-gddp-cmip6:{date}.{count} -f datasets/nasa-nex-gddp-cmip6/Dockerfile . 11 | ``` 12 | 13 | ## Static update 14 | 15 | This collection is not regularly updated. 16 | 17 | ```console 18 | $ pctasks dataset process-items \ 19 | -d datasets/nasa-nex-gddp-cmip6/dataset.yaml \ 20 | nasa-nex-gddp-cmip-test 21 | --arg registry pccomponentstest.azurecr.io \ 22 | --upsert --submit 23 | ``` 24 | 25 | **Notes:** 26 | 27 | - Currently uses chunk size of one, because the item creation was timing out with chunksize of 100. However, haven't investigated middle ground. 28 | - Runs in about 10 hours. -------------------------------------------------------------------------------- /datasets/nasa-nex-gddp-cmip6/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: nasa_nex_cddp_cmip6 2 | image: ${{ args.registry }}/pctasks-nasa-nex 3 | 4 | args: 5 | - registry 6 | 7 | code: 8 | src: ${{ local.path(./nasa_nex_gddp_cmip6.py) }} 9 | 10 | environment: 11 | APPLICATIONINSIGHTS_CONNECTION_STRING: ${{ secrets.task-application-insights-connection-string }} 12 | 13 | collections: 14 | - id: nasa-nex-gddp-cmip6 15 | template: ${{ local.path(./collection/) }} 16 | class: nasa_nex_cddp_cmip6:Cmip6Collection 17 | asset_storage: 18 | - uri: blob://nasagddp/nex-gddp-cmip6/NEX/GDDP-CMIP6/ 19 | chunks: 20 | options: 21 | extensions: [.nc] 22 | matches: (rsds) 23 | chunk_length: 1 24 | 25 | chunk_storage: 26 | uri: blob://nasagddp/nex-gddp-cmip6-etl-data/pctasks/ 27 | 28 | -------------------------------------------------------------------------------- /datasets/nasa-nex-gddp-cmip6/requirements.txt: -------------------------------------------------------------------------------- 1 | adlfs 2 | h5netcdf 3 | h5py 4 | kerchunk 5 | netcdf4 6 | scipy 7 | xarray 8 | xstac -------------------------------------------------------------------------------- /datasets/noaa-cdr/README.md: -------------------------------------------------------------------------------- 1 | # NOAA Climate Data Records (CDR) 2 | 3 | ### Dynamic updates 4 | 5 | `noaa-cdr-sea-surface-temperature-optimum-interpolation` is updated daily. 6 | 7 | ```console 8 | $ pctasks dataset process-items '${{ args.since }}' \ 9 | -d datasets/noaa-cdr/update.yaml \ 10 | -c noaa-cdr-sea-surface-temperature-optimum-interpolation \ 11 | --workflow-id=noaa-cdr-sea-surface-temperature-optimum-interpolation-update \ 12 | --is-update-workflow \ 13 | --upsert 14 | ``` -------------------------------------------------------------------------------- /datasets/noaa-cdr/collections/ocean-heat-content-netcdf/description.md: -------------------------------------------------------------------------------- 1 | The Ocean Heat Content Climate Data Record (CDR) is a set of ocean heat content anomaly (OHCA) time-series for 1955-present on 3-monthly, yearly, and pentadal (five-yearly) scales. This CDR quantifies ocean heat content change over time, which is an essential metric for understanding climate change and the Earth's energy budget. It provides time-series for multiple depth ranges in the global ocean and each of the major basins (Atlantic, Pacific, and Indian) divided by hemisphere (Northern, Southern). 2 | 3 | This is a NetCDF-only collection, for Cloud-Optimized GeoTIFFs use collection `noaa-cdr-ocean-heat-content`. 4 | The NetCDF files are delivered to Azure as part of the [NOAA Open Data Dissemination (NODD) Program](https://www.noaa.gov/information-technology/open-data-dissemination). 5 | -------------------------------------------------------------------------------- /datasets/noaa-cdr/collections/ocean-heat-content/description.md: -------------------------------------------------------------------------------- 1 | The Ocean Heat Content Climate Data Record (CDR) is a set of ocean heat content anomaly (OHCA) time-series for 1955-present on 3-monthly, yearly, and pentadal (five-yearly) scales. This CDR quantifies ocean heat content change over time, which is an essential metric for understanding climate change and the Earth's energy budget. It provides time-series for multiple depth ranges in the global ocean and each of the major basins (Atlantic, Pacific, and Indian) divided by hemisphere (Northern, Southern). 2 | 3 | These Cloud Optimized GeoTIFFs (COGs) were created from NetCDF files which are delivered to Azure as part of the [NOAA Open Data Dissemination (NODD) Program](https://www.noaa.gov/information-technology/open-data-dissemination). 4 | For the NetCDF files, see collection `noaa-cdr-ocean-heat-content-netcdf`. 5 | -------------------------------------------------------------------------------- /datasets/noaa-cdr/collections/sea-ice-concentration/description.md: -------------------------------------------------------------------------------- 1 | The Sea Ice Concentration Climate Data Record (CDR) provides a consistent daily and monthly time series of sea ice concentrations for both the north and south Polar Regions on a 25 km x 25 km grid. These data can be used to estimate how much of the ocean surface is covered by ice, and monitor changes in sea ice concentration. The CDR combines concentration estimates using two algorithms developed at the NASA Goddard Space Flight Center (GSFC). Gridded brightness temperatures acquired from a number of Defense Meteorological Satellite Program (DMSP) passive microwave radiometers provide the necessary input to produce the dataset. 2 | 3 | These Cloud Optimized GeoTIFFs (COGs) were created from NetCDF files which are delivered to Azure as part of the [NOAA Open Data Dissemination (NODD) Program](https://www.noaa.gov/information-technology/open-data-dissemination). 4 | For the NetCDF files, see collection `noaa-cdr-sea-ice-concentration-netcdf`. 5 | -------------------------------------------------------------------------------- /datasets/noaa-cdr/collections/sea-surface-temperature-whoi-netcdf/description.md: -------------------------------------------------------------------------------- 1 | The Sea Surface Temperature-Woods Hole Oceanographic Institution (WHOI) Climate Data Record (CDR) is one of three CDRs which combine to form the NOAA Ocean Surface Bundle (OSB) CDR. The resultant sea surface temperature (SST) data are produced through modeling the diurnal variability in combination with AVHRR SST observations. The final record is output to a 3-hourly 0.25° resolution grid over the global ice-free oceans from January 1988—present. 2 | 3 | This is a NetCDF-only collection, for Cloud-Optimized GeoTIFFs use collection `noaa-cdr-sea-surface-temperature-whoi`. 4 | The NetCDF files are delivered to Azure as part of the [NOAA Open Data Dissemination (NODD) Program](https://www.noaa.gov/information-technology/open-data-dissemination). 5 | -------------------------------------------------------------------------------- /datasets/noaa-cdr/collections/sea-surface-temperature-whoi/description.md: -------------------------------------------------------------------------------- 1 | The Sea Surface Temperature-Woods Hole Oceanographic Institution (WHOI) Climate Data Record (CDR) is one of three CDRs which combine to form the NOAA Ocean Surface Bundle (OSB) CDR. The resultant sea surface temperature (SST) data are produced through modeling the diurnal variability in combination with AVHRR SST observations. The final record is output to a 3-hourly 0.25° resolution grid over the global ice-free oceans from January 1988—present. 2 | 3 | These Cloud Optimized GeoTIFFs (COGs) were created from NetCDF files which are delivered to Azure as part of the [NOAA Open Data Dissemination (NODD) Program](https://www.noaa.gov/information-technology/open-data-dissemination). 4 | For the NetCDF files, see collection `noaa-cdr-sea-surface-temperature-whoi-netcdf`. 5 | -------------------------------------------------------------------------------- /datasets/noaa-cdr/requirements.txt: -------------------------------------------------------------------------------- 1 | stactools-noaa-cdr @ git+https://github.com/stactools-packages/noaa-cdr@db4ebdc633a2cb1f27874b039edcbe761b81b214 2 | -------------------------------------------------------------------------------- /datasets/noaa-climate-normals/README.md: -------------------------------------------------------------------------------- 1 | # NOAA Climate Normals 2 | 3 | ### Building the Docker image 4 | 5 | ```shell 6 | az acr build -r $REGISTRY --subscription $SUBSCRIPTION -t pctasks-noaa-climate-normals:latest -f datasets/noaa-climate-normals/Dockerfile . 7 | ``` 8 | -------------------------------------------------------------------------------- /datasets/noaa-climate-normals/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/stactools-packages/noaa-climate-normals.git@2d574925ac928d4705f3f9e85f5fbb4794d0593f -------------------------------------------------------------------------------- /datasets/noaa-mrms-qpe/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG registry 2 | FROM ${registry}/pctasks-task-base:latest 3 | 4 | COPY datasets/noaa-mrms-qpe /opt/src/datasets/noaa-mrms-qpe 5 | RUN python3 -m pip install -r /opt/src/datasets/noaa-mrms-qpe/requirements.txt 6 | -------------------------------------------------------------------------------- /datasets/noaa-mrms-qpe/requirements.txt: -------------------------------------------------------------------------------- 1 | stactools-noaa-mrms-qpe == 0.3.1 -------------------------------------------------------------------------------- /datasets/noaa-mrms-qpe/tests/test_dataset.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from pctasks.dev.test_utils import run_process_items_workflow 6 | 7 | HERE = Path(__file__).parent 8 | DATASET_PATH = HERE / "dataset-dev-cluster.yaml" 9 | 10 | 11 | @pytest.mark.parametrize( 12 | "collection", 13 | ["noaa-mrms-qpe-1h-pass1", "noaa-mrms-qpe-1h-pass2", "noaa-mrms-qpe-24h-pass2"], 14 | ) 15 | def test_collection(collection): 16 | run_process_items_workflow( 17 | DATASET_PATH, 18 | collection, 19 | args={ 20 | "registry": "localhost:5001", 21 | }, 22 | splits_limit=1, 23 | chunks_limit=2, 24 | timeout_seconds=600, 25 | ) 26 | -------------------------------------------------------------------------------- /datasets/noaa_nclimgrid/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/stactools-packages/noaa-nclimgrid.git@137e512eaf11df824e2a232e62ffca7a4d9dddef -------------------------------------------------------------------------------- /datasets/noaa_nclimgrid/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | def pytest_addoption(parser): 5 | parser.addoption( 6 | "--run-slow", 7 | action="store_true", 8 | default=False, 9 | help="Run slow tests", 10 | ) 11 | 12 | 13 | def pytest_configure(config) -> None: 14 | config.addinivalue_line("markers", "slow: mark test as slow to run") 15 | 16 | 17 | def pytest_collection_modifyitems(config, items): 18 | if not config.getoption("--run-slow"): 19 | skip_slow = pytest.mark.skip(reason="Only run when --run-slow is given") 20 | for item in items: 21 | if "slow" in item.keywords: 22 | item.add_marker(skip_slow) 23 | -------------------------------------------------------------------------------- /datasets/sentinel-1-grd/README.md: -------------------------------------------------------------------------------- 1 | # planetary-computer-tasks dataset: sentinel-1-grd 2 | 3 | ## Chunking for dynamic ingest 4 | 5 | - Requires an extra `--arg year-prefix {year}` argument when running `pctasks dataset create-chunks` or `pctasks dataset process-items` commands. 6 | - Asset chunkfile creation takes about 5 minutes. 7 | - Item creation takes about 5 minutes for ~1 day of data. 8 | 9 | ## Docker image 10 | 11 | To build and push a custom docker image to our container registry: 12 | 13 | ```shell 14 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-sentinel-1-grd:latest -t pctasks-sentinel-1-grd:{date}.{count} -f datasets/sentinel-1-grd/Dockerfile . 15 | ``` 16 | 17 | ## Dynamic updates 18 | 19 | The dynamic update workflow was registered with 20 | 21 | ```shell 22 | $ pctasks dataset process-items sentinel-1-grd-update --is-update-workflow -d datasets/sentinel-1-grd/dataset.yaml --upsert 23 | ``` -------------------------------------------------------------------------------- /datasets/sentinel-1-grd/requirements.txt: -------------------------------------------------------------------------------- 1 | stactools-sentinel1==0.5.0 -------------------------------------------------------------------------------- /datasets/sentinel-1-rtc/requirements.txt: -------------------------------------------------------------------------------- 1 | stactools==0.3.* -------------------------------------------------------------------------------- /datasets/sentinel-2/README.md: -------------------------------------------------------------------------------- 1 | # Sentinel-2 2 | 3 | ## Chunk creation for dynamic ingest 4 | 5 | - Using the same chunking split level and options as ETL 6 | - Listing the `manifest.safe` files 7 | - Generates about 1000 tasks 8 | - 5-6 hour run-time with a `--since` option and run on the `pctasksteststaging` batch account 9 | - No faster set of chunking options found. 10 | 11 | ## Docker container 12 | 13 | ```shell 14 | az acr build -r {the registry} --subscription {the subscription} -t pctasks-sentinel-2:latest -t pctasks-sentinel-2:{date}.{count} -f datasets/sentinel-2/Dockerfile . 15 | ``` 16 | 17 | ## Update Workflow 18 | 19 | Created with 20 | 21 | ``` 22 | pctasks dataset process-items --is-update-workflow sentinel-2-l2a-update -d datasets/sentinel-2/dataset.yaml -u 23 | ``` -------------------------------------------------------------------------------- /datasets/sentinel-2/collection/description.md: -------------------------------------------------------------------------------- 1 | The [Sentinel-2](https://sentinel.esa.int/web/sentinel/missions/sentinel-2) program provides global imagery in thirteen spectral bands at 10m-60m resolution and a revisit time of approximately five days. This dataset represents the global Sentinel-2 archive, from 2016 to the present, processed to L2A (bottom-of-atmosphere) using [Sen2Cor](https://step.esa.int/main/snap-supported-plugins/sen2cor/) and converted to [cloud-optimized GeoTIFF](https://www.cogeo.org/) format. -------------------------------------------------------------------------------- /datasets/sentinel-2/requirements.txt: -------------------------------------------------------------------------------- 1 | stactools_sentinel2==0.2.1 2 | -------------------------------------------------------------------------------- /datasets/sentinel-3/collection/sentinel-3-sral-lan-l2-netcdf/description.md: -------------------------------------------------------------------------------- 1 | This Collection provides Sentinel-3 [SRAL Level-2 Land Altimetry](https://sentinel.esa.int/web/sentinel/technical-guides/sentinel-3-altimetry/level-2-algorithms-products) products, which contain data on land radar altimetry measurements. Each product contains three NetCDF files: 2 | 3 | - A reduced data file containing a subset of the 1 Hz Ku-band parameters. 4 | - A standard data file containing the standard 1 Hz and 20 Hz Ku- and C-band parameters. 5 | - An enhanced data file containing the standard 1 Hz and 20 Hz Ku- and C-band parameters along with the waveforms and parameters necessary to reprocess the data. 6 | 7 | More information about the product and data processing can be found in the [User Guide](https://sentinels.copernicus.eu/web/sentinel/user-guides/sentinel-3-altimetry/overview) and [Technical Guide](https://sentinel.esa.int/web/sentinel/technical-guides/sentinel-3-altimetry). 8 | 9 | This Collection contains Level-2 data in NetCDF files from March 2016 to present. 10 | -------------------------------------------------------------------------------- /datasets/sentinel-3/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/stactools-packages/sentinel3.git@36375cc63c053087380664ff931ceed5ad3b5f83 2 | -------------------------------------------------------------------------------- /datasets/sentinel-5p/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/stactools-packages/sentinel5p.git@a4d1e06fb438823027bbc7a34899c0fc297a1e1b -------------------------------------------------------------------------------- /datasets/stac-geoparquet/requirements.txt: -------------------------------------------------------------------------------- 1 | stac-geoparquet[pgstac,pc]==0.2.1 2 | psycopg[binary,pool]==3.1.8 3 | azure-data-tables==12.4.2 4 | pypgstac==0.7.4 -------------------------------------------------------------------------------- /datasets/stac-geoparquet/workflow_test.yaml: -------------------------------------------------------------------------------- 1 | name: stac-geoparquet 2 | dataset: microsoft/stac-geoparquet 3 | id: stac-geoparquet 4 | 5 | jobs: 6 | stac: 7 | tasks: 8 | - id: create 9 | image: pccomponentstest.azurecr.io/pctasks-stac-geoparquet:2023.7.10.1 10 | code: 11 | src: ${{ local.path(pc_stac_geoparquet.py) }} 12 | task: pc_stac_geoparquet:StacGeoparquetTask 13 | args: 14 | table_account_url: "https://pctapisstagingsa.table.core.windows.net" 15 | table_name: "collectionconfig" 16 | storage_options_account_name: "pcstacitems" 17 | collections: "io-lulc-annual-v02" 18 | environment: 19 | APPLICATIONINSIGHTS_CONNECTION_STRING: ${{ secrets.task-application-insights-connection-string }} 20 | STAC_GEOPARQUET_CONNECTION_INFO: ${{secrets.pgstac-connection-string}} 21 | -------------------------------------------------------------------------------- /datasets/terraclimate/collection/description.md: -------------------------------------------------------------------------------- 1 | [TerraClimate](http://www.climatologylab.org/terraclimate.html) is a dataset of monthly climate and climatic water balance for global terrestrial surfaces from 1958 to the present. These data provide important inputs for ecological and hydrological studies at global scales that require high spatial resolution and time-varying data. All data have monthly temporal resolution and a ~4-km (1/24th degree) spatial resolution. This dataset is provided in [Zarr](https://zarr.readthedocs.io/) format. 2 | -------------------------------------------------------------------------------- /datasets/terraclimate/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: terraclimate 2 | image: ${{ args.registry }}/pctasks-task-base:latest 3 | 4 | args: 5 | - registry 6 | 7 | collections: 8 | - id: terraclimate 9 | template: ${{ local.path(./collection) }} 10 | class: pctasks.dataset.collection:PremadeItemCollection 11 | asset_storage: [] 12 | chunk_storage: 13 | uri: "blob://cpdataeuwest/cpdata-etl-data/chunks" 14 | -------------------------------------------------------------------------------- /datasets/usda-cdl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/datasets/usda-cdl/README.md -------------------------------------------------------------------------------- /datasets/usda-cdl/dataset.yaml: -------------------------------------------------------------------------------- 1 | id: usda-cdl 2 | image: ${{ args.registry }}/pctasks-usda-cdl:latest 3 | args: 4 | - registry 5 | code: 6 | src: ${{ local.path(./usda_cdl.py) }} 7 | # requirements: ${{ local.path(./requirements.txt) }} 8 | collections: 9 | - id: usda-cdl 10 | template: ${{ local.path(./collection) }} 11 | class: usda_cdl:UsdaCdlCollection 12 | asset_storage: 13 | - uri: blob://landcoverdata/usda-cdl/tiles 14 | chunks: 15 | options: 16 | list_folders: true 17 | max_depth: 1 18 | chunk_length: 2 19 | chunk_storage: 20 | uri: blob://landcoverdata/usda-cdl-etl-data/chunks 21 | -------------------------------------------------------------------------------- /datasets/usda-cdl/requirements.txt: -------------------------------------------------------------------------------- 1 | stactools-usda-cdl == 0.1.3 2 | -------------------------------------------------------------------------------- /datasets/usda-cdl/tile.yaml: -------------------------------------------------------------------------------- 1 | name: Tile USDA CDL assets 2 | id: usda-cdl-tile 3 | dataset: usda-cdl 4 | args: 5 | - registry 6 | jobs: 7 | list-files: 8 | tasks: 9 | - id: list-files 10 | image: ${{ args.registry }}/pctasks-usda-cdl:latest 11 | task: pctasks.task.common.list_files:task 12 | args: 13 | src_uri: blob://landcoverdata/usda-cdl-onboarding 14 | extensions: 15 | - .zip 16 | tile: 17 | foreach: 18 | items: ${{ jobs.list-files.tasks.list-files.output.uris }} 19 | tasks: 20 | - id: tile 21 | image: ${{ args.registry }}/pctasks-usda-cdl:latest 22 | code: 23 | src: ${{ local.path(./usda_cdl.py) }} 24 | # requirements: ${{ local.path(./requirements.txt) }} 25 | task: usda_cdl:tile_task 26 | args: 27 | src_uri: ${{ item }} 28 | dst_uri: blob://landcoverdata/usda-cdl/tiles 29 | -------------------------------------------------------------------------------- /datasets/usgs-lcmap/fix_items/README.md: -------------------------------------------------------------------------------- 1 | # Fix LCMAP Item Asset class lists 2 | 3 | Downloads the Item ndjsons from blob storage, removes the land cover change classes from the primary (lcpri) and secondary (lcsec) land cover Assets in each Item, uploades ndjsons containing the corrected Items back to blob storage. 4 | 5 | ## Running 6 | 7 | Edit the `incorrect_chunkset_uri` and `corrected_chunkset_uri` arguments to operate on either CONUS or Hawaii data. Then run: 8 | 9 | ```shell 10 | pctasks workflow upsert-and-submit datasets/usgs-lcmap/fix-item-classes/fix_items.yaml 11 | ``` 12 | -------------------------------------------------------------------------------- /datasets/usgs-lcmap/fix_items/fix_items.yaml: -------------------------------------------------------------------------------- 1 | name: Fix USGS LCMAP Item classes 2 | id: usgs-lcmap-fix-items 3 | dataset: usgs-lcmap 4 | args: 5 | - registry 6 | jobs: 7 | fix-items: 8 | id: fix-items 9 | tasks: 10 | - id: fix-items 11 | image: ${{ args.registry }}/pctasks-task-base:latest 12 | code: 13 | src: ${{ local.path(./fix_items.py) }} 14 | task: fix_items:fix_items_task 15 | args: 16 | incorrect_chunkset_uri: blob://landcoverdata/lcmap-etl-data/lcmap-conus-v13/2023-01-12-full-2/items 17 | corrected_chunkset_uri: blob://landcoverdata/lcmap-etl-data/lcmap-conus-v13/2023-01-12-full-2/fixed-items 18 | -------------------------------------------------------------------------------- /datasets/usgs-lcmap/fix_items/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/datasets/usgs-lcmap/fix_items/tests/__init__.py -------------------------------------------------------------------------------- /datasets/usgs-lcmap/fix_items/tests/test_tasks.py: -------------------------------------------------------------------------------- 1 | # run this with `python -m pytest` from the datasets/usgs-lcmap/fix_items directory 2 | 3 | from pathlib import Path 4 | import json 5 | 6 | from fix_items import remove_classes 7 | 8 | 9 | def test_fix_ndjson_classes() -> None: 10 | path = Path(__file__).parent / "data-files" / "items.ndjson" 11 | with open(path, "r") as fobj: 12 | item = json.loads(fobj.readline()) 13 | corrected_item = remove_classes(item) 14 | assert len(corrected_item["assets"]["lcpri"]["classification:classes"]) == 9 15 | assert len(corrected_item["assets"]["lcsec"]["classification:classes"]) == 9 16 | for c in corrected_item["assets"]["lcsec"]["classification:classes"]: 17 | assert c["value"] < 9 18 | assert "_to_" not in c["name"] 19 | -------------------------------------------------------------------------------- /datasets/usgs-lcmap/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/stactools-packages/usgs-lcmap.git@73d936d1ed4b756b25b6782164778451de38af73 -------------------------------------------------------------------------------- /datasets/usgs-lidar/requirements.txt: -------------------------------------------------------------------------------- 1 | geopandas 2 | pyarrow 3 | adlfs 4 | azure.identity 5 | rasterio 6 | planetary-computer 7 | pyproj 8 | shapely 9 | opencensus.ext.azure -------------------------------------------------------------------------------- /deployment/.gitignore: -------------------------------------------------------------------------------- 1 | tf-output.json 2 | conf 3 | -------------------------------------------------------------------------------- /deployment/bin/azlogin: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [[ "${CI}" ]]; then 6 | set -x 7 | fi 8 | 9 | function usage() { 10 | echo -n \ 11 | "Usage: $(basename "$0") 12 | Login to Azure CLI 13 | " 14 | } 15 | 16 | while [[ "$#" -gt 0 ]]; do case $1 in 17 | *) 18 | usage "Unknown parameter passed: $1" 19 | shift 20 | shift 21 | ;; 22 | esac done 23 | 24 | 25 | if [ "${BASH_SOURCE[0]}" = "${0}" ]; then 26 | 27 | az login --service-principal \ 28 | --username ${AZURE_CLIENT_ID} \ 29 | --password ${AZURE_CLIENT_SECRET} \ 30 | --tenant ${AZURE_TENANT_ID} 31 | 32 | fi 33 | -------------------------------------------------------------------------------- /deployment/bin/nginx-values.yaml: -------------------------------------------------------------------------------- 1 | controller: 2 | podLabels: 3 | azure.workload.identity/use: "true" 4 | extraVolumes: 5 | - name: secrets-store-inline 6 | csi: 7 | driver: secrets-store.csi.k8s.io 8 | readOnly: true 9 | volumeAttributes: 10 | secretProviderClass: "keyvault" 11 | extraVolumeMounts: 12 | - name: secrets-store-inline 13 | mountPath: "/mnt/secrets-store" 14 | readOnly: true 15 | extraArgs: 16 | default-ssl-certificate: pc/planetarycomputer-test-certificate -------------------------------------------------------------------------------- /deployment/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | deploy: 3 | container_name: pc-etl-deploy 4 | image: pc-etl-deploy 5 | build: 6 | context: .. 7 | dockerfile: deployment/Dockerfile 8 | environment: 9 | # For Terraform 10 | - ARM_SUBSCRIPTION_ID=${ARM_SUBSCRIPTION_ID:-a84a690d-585b-4c7c-80d9-851a48af5a50} 11 | - ARM_TENANT_ID=${ARM_TENANT_ID:-72f988bf-86f1-41af-91ab-2d7cd011db47} 12 | - ARM_CLIENT_ID 13 | - ARM_USE_OIDC 14 | - ARM_OIDC_TOKEN 15 | - ACTIONS_ID_TOKEN_REQUEST_URL 16 | - ACTIONS_ID_TOKEN_REQUEST_TOKEN 17 | - ARM_OIDC_REQUEST_TOKEN 18 | - ARM_OIDC_REQUEST_URL 19 | 20 | # Used in function deployment injected by GH Actions 21 | - GITHUB_TOKEN 22 | - GITHUB_REPOSITORY 23 | - GITHUB_ACTOR 24 | working_dir: /opt/src/deployment 25 | volumes: 26 | - ../deployment:/opt/src/deployment 27 | - ../pctasks:/opt/src/pctasks:ro 28 | - ../pctasks_funcs:/opt/src/pctasks_funcs:ro 29 | - ~/.azure:/root/.azure 30 | -------------------------------------------------------------------------------- /deployment/helm/argo-values.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | baseHref: /argo/ 3 | secure: false 4 | serviceAccount: 5 | name: pctasks-sa 6 | image: 7 | registry: pccomponentstest.azurecr.io 8 | repository: argoproj/argocli 9 | tag: v3.5.8 10 | controller: 11 | image: 12 | registry: pccomponentstest.azurecr.io 13 | repository: argoproj/workflow-controller 14 | tag: v3.5.8 15 | executor: 16 | image: 17 | registry: pccomponentstest.azurecr.io 18 | repository: argoproj/argoexec 19 | tag: v3.5.8 20 | -------------------------------------------------------------------------------- /deployment/helm/pc-tasks-ingress/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: pc-tasks-ingress 3 | description: A Helm chart for the ingress for Planetary Computer Tasks test environment 4 | type: application 5 | version: 0.1.0 6 | appVersion: 0.1.0 7 | -------------------------------------------------------------------------------- /deployment/helm/pc-tasks-ingress/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Application information: 2 | {{ include "pcingress.selectorLabels" . }} 3 | Ingress host: {{ .Values.pcingress.ingress.host }} 4 | Service Fullname: {{ include "pcingress.fullname" . }} 5 | KeyVault secret provider created: {{ .Values.secretProvider.create }} -------------------------------------------------------------------------------- /deployment/helm/pc-tasks-ingress/templates/nginx-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | annotations: 5 | labels: 6 | app: pc-tasks-ingress 7 | name: nginx-configuration 8 | namespace: {{ .Values.namespace }} 9 | data: 10 | use-forwarded-headers: "true" 11 | enable-real-ip: "true" -------------------------------------------------------------------------------- /deployment/helm/pc-tasks-ingress/templates/secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: pctasks-sa-token 5 | annotations: 6 | # Service account created by Argo, name 7 | # set in argo-values.yaml 8 | kubernetes.io/service-account.name: pctasks-sa 9 | type: kubernetes.io/service-account-token -------------------------------------------------------------------------------- /deployment/helm/pc-tasks-ingress/values.yaml: -------------------------------------------------------------------------------- 1 | environment: "staging" 2 | namespace: "default" 3 | 4 | stac: 5 | enabled: true 6 | 7 | tiler: 8 | enabled: true 9 | 10 | pcingress: 11 | services: 12 | stac: 13 | path: "" 14 | name: "" 15 | port: "" 16 | tiler: 17 | path: "" 18 | name: "" 19 | port: "" 20 | 21 | cert: 22 | secretName: "" 23 | 24 | ingress: 25 | enabled: false 26 | tlsHost: "" 27 | hosts: [] 28 | annotations: {} 29 | 30 | secretProvider: 31 | create: true 32 | providerName: "keyvault" 33 | namespace: "" 34 | userAssignedIdentityID: "" 35 | tenantId: "" 36 | keyvaultName: "" 37 | keyvaultCertificateName: "" 38 | kubernetesCertificateSecretName: "" 39 | 40 | nameOverride: "" 41 | fullnameOverride: "" 42 | -------------------------------------------------------------------------------- /deployment/helm/published/pctasks-server/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: pctasks-server 3 | description: A Helm chart for the Planetary Computer Tasks server 4 | type: application 5 | version: 0.1.0 6 | appVersion: 0.1.0 -------------------------------------------------------------------------------- /deployment/helm/published/pctasks-server/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Application information: 2 | {{ include "pctasks.selectorLabels" . }} 3 | Ingress host: {{ .Values.pctasks.server.ingress.host }} 4 | Service Fullname: {{ include "pctasks.fullname" . }} -------------------------------------------------------------------------------- /deployment/helm/published/pctasks-server/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "pctasks.fullname" . }} 5 | labels: 6 | {{- include "pctasks.labels" . | nindent 4 }} 7 | {{- with .Values.pctasks.server.service.annotations }} 8 | annotations: 9 | {{- toYaml . | nindent 4 }} 10 | {{- end }} 11 | spec: 12 | type: {{ .Values.pctasks.server.service.type }} 13 | ports: 14 | - port: {{ .Values.pctasks.server.service.port }} 15 | selector: 16 | {{- include "pctasks.selectorLabels" . | nindent 4 }} 17 | -------------------------------------------------------------------------------- /deployment/helm/published/pctasks-server/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.pctasks.server.serviceAccount.create -}} 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: {{ include "pctasks.serviceAccountName" . }} 6 | labels: 7 | {{- include "pctasks.labels" . | nindent 4 }} 8 | {{- with .Values.pctasks.server.serviceAccount.annotations }} 9 | annotations: 10 | {{- toYaml . | nindent 4 }} 11 | {{- end }} 12 | 13 | --- 14 | apiVersion: rbac.authorization.k8s.io/v1 15 | kind: RoleBinding 16 | metadata: 17 | name: pctasks-server-argoworkflows-workflow-rolebinding 18 | subjects: 19 | - kind: ServiceAccount 20 | name: {{ include "pctasks.serviceAccountName" . }} 21 | roleRef: 22 | kind: Role 23 | name: argo-workflows-workflow 24 | apiGroup: rbac.authorization.k8s.io 25 | 26 | {{- end }} 27 | -------------------------------------------------------------------------------- /deployment/helm/vendored/argo-workflows-0.41.8.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/deployment/helm/vendored/argo-workflows-0.41.8.tgz -------------------------------------------------------------------------------- /deployment/helm/vendored/ingress-nginx-4.8.3.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/deployment/helm/vendored/ingress-nginx-4.8.3.tgz -------------------------------------------------------------------------------- /deployment/helm/vendored/keda-2.14.2.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/deployment/helm/vendored/keda-2.14.2.tgz -------------------------------------------------------------------------------- /deployment/requirements.txt: -------------------------------------------------------------------------------- 1 | pypgstac==0.7.10 2 | Jinja2==3.1.2 3 | -------------------------------------------------------------------------------- /deployment/terraform/batch_pool/providers.tf: -------------------------------------------------------------------------------- 1 | provider azurerm { 2 | features {} 3 | skip_provider_registration = true 4 | use_oidc = true 5 | } 6 | 7 | terraform { 8 | required_version = ">= 0.13" 9 | 10 | required_providers { 11 | azurerm = { 12 | source = "hashicorp/azurerm" 13 | version = "3.110.0" 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /deployment/terraform/batch_pool/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | type = string 3 | } 4 | 5 | variable "resource_group_name" { 6 | type = string 7 | } 8 | 9 | variable "account_name" { 10 | type = string 11 | } 12 | 13 | variable "display_name" { 14 | type = string 15 | } 16 | 17 | variable "vm_size" { 18 | type = string 19 | } 20 | 21 | variable "max_tasks_per_node" { 22 | type = number 23 | } 24 | 25 | variable "subnet_id" { 26 | type = string 27 | } 28 | 29 | variable "min_dedicated" { 30 | type = number 31 | } 32 | 33 | variable "max_dedicated" { 34 | type = number 35 | } 36 | 37 | variable "min_low_priority" { 38 | type = number 39 | } 40 | 41 | variable "max_low_priority" { 42 | type = number 43 | } 44 | 45 | variable "max_increase_per_scale" { 46 | type = number 47 | } 48 | 49 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 50 | # ACR 51 | 52 | variable "acr_name" { 53 | type = string 54 | } 55 | 56 | variable "user_assigned_identity_id" { 57 | type = string 58 | } -------------------------------------------------------------------------------- /deployment/terraform/resources/acr.tf: -------------------------------------------------------------------------------- 1 | data "azurerm_container_registry" "task_acr" { 2 | name = var.task_acr_name 3 | resource_group_name = var.task_acr_resource_group 4 | } 5 | 6 | data "azurerm_container_registry" "component_acr" { 7 | name = var.component_acr_name 8 | resource_group_name = var.component_acr_resource_group 9 | } 10 | 11 | # Role assignments 12 | 13 | # Note: role to the batch account task acr service principal 14 | # should have AcrPull access to the task acr. 15 | 16 | # add the role to the identity the kubernetes cluster was assigned 17 | resource "azurerm_role_assignment" "attach_acr" { 18 | scope = data.azurerm_container_registry.component_acr.id 19 | role_definition_name = "AcrPull" 20 | principal_id = azurerm_kubernetes_cluster.pctasks.kubelet_identity[0].object_id 21 | } -------------------------------------------------------------------------------- /deployment/terraform/resources/app_insights.tf: -------------------------------------------------------------------------------- 1 | resource "azurerm_log_analytics_workspace" "pctasks" { 2 | name = "log-${local.prefix}" 3 | location = azurerm_resource_group.pctasks.location 4 | resource_group_name = azurerm_resource_group.pctasks.name 5 | sku = "PerGB2018" 6 | retention_in_days = 30 7 | } 8 | 9 | resource "azurerm_application_insights" "pctasks" { 10 | name = "appi-${local.prefix}" 11 | location = azurerm_resource_group.pctasks.location 12 | resource_group_name = azurerm_resource_group.pctasks.name 13 | workspace_id = azurerm_log_analytics_workspace.pctasks.id 14 | application_type = "web" 15 | } 16 | -------------------------------------------------------------------------------- /deployment/terraform/resources/providers.tf: -------------------------------------------------------------------------------- 1 | provider "azurerm" { 2 | features {} 3 | skip_provider_registration = true 4 | use_oidc = true 5 | 6 | # This could be used instead of temporarily enabling shared key access once 7 | # this issue is resolved. 8 | # https://github.com/hashicorp/terraform-provider-azurerm/issues/15083 9 | # storage_use_azuread = true 10 | } 11 | 12 | terraform { 13 | required_version = ">= 0.13" 14 | 15 | required_providers { 16 | azurerm = { 17 | source = "hashicorp/azurerm" 18 | version = "3.110.0" 19 | } 20 | } 21 | } 22 | 23 | data "azurerm_client_config" "current" { 24 | } 25 | 26 | 27 | # Terraform stuff to include 28 | # 1. This provider 29 | # 2. Cosmos DB containers 30 | # 3. The AKS Node Pool 31 | # 4. The Kubernetes namespace, secrets 32 | -------------------------------------------------------------------------------- /deployment/terraform/resources/rg.tf: -------------------------------------------------------------------------------- 1 | resource "azurerm_resource_group" "pctasks" { 2 | name = "rg-${local.full_prefix}" 3 | location = var.region 4 | 5 | tags = { 6 | "ringValue" = "r0" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /deployment/terraform/staging/backend.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | backend "azurerm" { 3 | resource_group_name = "pc-test-manual-resources" 4 | storage_account_name = "pctesttfstate" 5 | container_name = "pctasks" 6 | key = "staging.terraform.tfstate" 7 | use_oidc = true 8 | use_azuread_auth = true 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /deployment/terraform/staging/env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export DEPLOY_SECRETS_KV=pc-test-deploy-secrets 4 | export DEPLOY_SECRETS_KV_SECRET=pctasks-test-tfvars-staging 5 | export DEPLOY_SECRETS_KV_RG_NAME=pc-test-manual-resources 6 | 7 | export PCTASKS_TASK_KV=kv-pctaskstest-staging 8 | export PCTASKS_TASK_KV_RESOURCE_GROUP_NAME=rg-pctaskstest-staging-westeurope 9 | -------------------------------------------------------------------------------- /deployment/terraform/staging/output.tf: -------------------------------------------------------------------------------- 1 | output "resources" { 2 | value = module.resources 3 | sensitive = true 4 | } 5 | -------------------------------------------------------------------------------- /dev-secrets.template.yaml: -------------------------------------------------------------------------------- 1 | # Secrets can be specified as key:value 2 | example: value -------------------------------------------------------------------------------- /dev/nginx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx:1.10 2 | 3 | COPY etc/nginx/nginx.conf /etc/nginx/nginx.conf 4 | COPY etc/nginx/conf.d/default.conf /etc/nginx/conf.d/default.conf -------------------------------------------------------------------------------- /dev/nginx/README.md: -------------------------------------------------------------------------------- 1 | # nginx 2 | 3 | Sets up the NGINX server for the dev environment. We use nginx in the dev environment as a reverse proxy to simulate the production environment. -------------------------------------------------------------------------------- /dev/nginx/etc/nginx/conf.d/default.conf: -------------------------------------------------------------------------------- 1 | upstream pctasks-server-upstream { 2 | server server:8511; 3 | } 4 | 5 | server { 6 | listen 80; 7 | server_name localhost; 8 | 9 | location /tasks { 10 | proxy_set_header Host $http_host; 11 | proxy_set_header X-Forwarded-For $remote_addr; 12 | proxy_pass_request_headers on; 13 | proxy_buffers 8 8k; 14 | proxy_buffer_size "16k"; 15 | 16 | proxy_pass http://pctasks-server-upstream; 17 | proxy_redirect http://pctasks-server-upstream/ /tasks; 18 | rewrite ^/tasks/?(.*)$ /$1 break; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /dev/nginx/etc/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | user nginx; 2 | 3 | error_log /var/log/nginx/error.log warn; 4 | pid /var/run/nginx.pid; 5 | 6 | events { 7 | 8 | } 9 | 10 | http { 11 | default_type application/octet-stream; 12 | 13 | include /etc/nginx/conf.d/*.conf; 14 | } -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | livehtml: 12 | sphinx-autobuild --host 0.0.0.0 ${SOURCEDIR} $(BUILDDIR)/html -d _build/doctrees 13 | 14 | # Put it first so that "make" without argument is like "make help". 15 | help: 16 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 17 | 18 | .PHONY: help Makefile 19 | 20 | # Catch-all target: route all unknown targets to Sphinx using the new 21 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 22 | %: Makefile 23 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 24 | -------------------------------------------------------------------------------- /docs/_static/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/docs/_static/.gitignore -------------------------------------------------------------------------------- /docs/development/index.md: -------------------------------------------------------------------------------- 1 | 2 | # Development 3 | 4 | ```{toctree} 5 | --- 6 | maxdepth: 2 7 | --- 8 | setup 9 | deploying 10 | faq 11 | ```` 12 | -------------------------------------------------------------------------------- /docs/getting_started/index.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | ```{toctree} 4 | --- 5 | maxdepth: 2 6 | --- 7 | dev_workflows 8 | creating_a_dataset 9 | ```` -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/reference/index.md: -------------------------------------------------------------------------------- 1 | # References 2 | 3 | ```{toctree} 4 | --- 5 | maxdepth: 2 6 | --- 7 | api 8 | ```` -------------------------------------------------------------------------------- /docs/user_guide/index.md: -------------------------------------------------------------------------------- 1 | # User Guide 2 | 3 | ```{toctree} 4 | --- 5 | maxdepth: 2 6 | --- 7 | settings 8 | workflows 9 | templating 10 | storage 11 | runtime 12 | chunking 13 | streaming 14 | ```` -------------------------------------------------------------------------------- /examples/list-logs.yaml: -------------------------------------------------------------------------------- 1 | id: list-logs 2 | name: List log files in Azurite 3 | dataset: microsoft/test 4 | 5 | jobs: 6 | list-logs-job: 7 | name: List logs job 8 | tasks: 9 | - id: list-logs-task 10 | image: localhost:5001/pctasks-task-base:latest 11 | task: pctasks.task.common.list_files:ListFilesTask 12 | args: 13 | src_uri: blob://devstoreaccount1/tasklogs/ 14 | -------------------------------------------------------------------------------- /ingest-collection.yaml: -------------------------------------------------------------------------------- 1 | name: Ingest Collection Test Workflow 2 | dataset: microsoft/test-collection 3 | target_environment: staging 4 | 5 | jobs: 6 | ingest: 7 | name: Ingest Collection 8 | tasks: 9 | - id: ingest-collection 10 | image_key: ingest 11 | task: pctasks.ingest_task.task:ingest_task 12 | environment: 13 | DB_CONNECTION_STRING: "${{ secrets.pgstac-connection-string }}" 14 | args: 15 | content: 16 | type: Collections 17 | collections: 18 | - ${{ local.file(tests/data-files/collection.json) }} -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | ignore_missing_imports = True 3 | disallow_untyped_defs = True 4 | namespace_packages = True 5 | explicit_package_bases = True 6 | 7 | [mypy-azure.storage.blob.*] 8 | ignore_errors = True 9 | -------------------------------------------------------------------------------- /pctasks/.dockerignore: -------------------------------------------------------------------------------- 1 | **/.envrc 2 | **/.direnv 3 | **/__pycache__ 4 | **/.mypy_cache 5 | **/.pytest_cache 6 | **/.terraform 7 | **/node_modules 8 | **/.terraform -------------------------------------------------------------------------------- /pctasks/cli/README.md: -------------------------------------------------------------------------------- 1 | # Planetary Computer Tasks: Core 2 | 3 | This is the base library of the PCTasks framework. 4 | It provides core functionality and base messages as 5 | Pydantic models. 6 | 7 | -------------------------------------------------------------------------------- /pctasks/cli/pctasks/cli/__init__.py: -------------------------------------------------------------------------------- 1 | from pctasks.cli.version import __version__ 2 | 3 | __all__ = ["__version__"] 4 | -------------------------------------------------------------------------------- /pctasks/cli/pctasks/cli/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5" 2 | -------------------------------------------------------------------------------- /pctasks/cli/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/cli/tests/__init__.py -------------------------------------------------------------------------------- /pctasks/cli/tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | 3 | from pctasks.cli.cli import pctasks_cmd 4 | from pctasks.cli.version import __version__ 5 | 6 | 7 | def test_cli_version(): 8 | runner = CliRunner() 9 | result = runner.invoke(pctasks_cmd, ["--version"]) 10 | assert result.output == f"pctasks, version {__version__}\n" 11 | 12 | 13 | def test_direct_invoke(): 14 | result = pctasks_cmd.main(["--version"], standalone_mode=False) 15 | assert result == 0 16 | -------------------------------------------------------------------------------- /pctasks/client/README.md: -------------------------------------------------------------------------------- 1 | # Planetary Computer Tasks: Client 2 | 3 | This project provides functionality for interacting with the PCTasks API, like submitting workflows and querying logs. 4 | 5 | -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/__init__.py: -------------------------------------------------------------------------------- 1 | # isort:skip_file 2 | 3 | from pctasks.client.version import __version__ 4 | 5 | __all__ = ["__version__", "PCTasksClient"] 6 | -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/constants.py: -------------------------------------------------------------------------------- 1 | NOT_FOUND_EXIT_CODE = 148 2 | FILE_EXISTS_EXIT_CODE = 149 3 | UNEXPECTED_ERROR_EXIT_CODE = 150 4 | -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/context.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional 3 | 4 | from pctasks.core.context import PCTasksCommandContext 5 | 6 | 7 | @dataclass 8 | class ClientCommandContext(PCTasksCommandContext): 9 | pretty_print: bool = False 10 | """Whether to pretty print the output, e.g. syntax highlight YAML.""" 11 | 12 | # PCTasksCommandContext added here to avoid mypy issues 13 | 14 | profile: Optional[str] = None 15 | """Settings profile. Determines which settings file is read.""" 16 | 17 | settings_file: Optional[str] = None 18 | """Full path to the settings file. If present, overrides the profile.""" 19 | -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/errors.py: -------------------------------------------------------------------------------- 1 | class PCTasksError(Exception): 2 | """Base class for all PCTasks errors.""" 3 | 4 | pass 5 | 6 | 7 | class NotFoundError(PCTasksError): 8 | """Raised when a record is not found.""" 9 | 10 | pass 11 | 12 | 13 | class WorkflowRunNotFoundError(NotFoundError): 14 | """Raised when a workflow run is not found.""" 15 | 16 | pass 17 | 18 | 19 | class WorkflowNotFoundError(NotFoundError): 20 | """Raised when a workflow is not found.""" 21 | 22 | pass 23 | 24 | 25 | class WorkflowExistsError(NotFoundError): 26 | """Raised when a workflow exists when it is not expected.""" 27 | 28 | pass 29 | 30 | 31 | class JobPartitionRunNotFoundError(NotFoundError): 32 | """Raised when a job is not found.""" 33 | 34 | pass 35 | 36 | 37 | class ConfirmationError(Exception): 38 | pass 39 | 40 | 41 | class NoWorkflowIDError(Exception): 42 | pass 43 | -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/profile/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/pctasks/client/profile/__init__.py -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/pctasks/client/py.typed -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/runs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/pctasks/client/runs/__init__.py -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/runs/options.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable 2 | 3 | import click 4 | 5 | 6 | def opt_page(fn: Callable[..., Any]) -> Callable[..., Any]: 7 | _opt = click.option( 8 | "-p", "--page", is_flag=True, help="Page output." 9 | ) # type: ignore[var-annotated] 10 | _opt(fn) 11 | return fn 12 | 13 | 14 | def opt_all(fn: Callable[..., Any]) -> Callable[..., Any]: 15 | _opt = click.option( 16 | "-a", "--all", is_flag=True, help="Print all output, even if large." 17 | ) # type: ignore[var-annotated] 18 | _opt(fn) 19 | return fn 20 | 21 | 22 | def opt_status(fn: Callable[..., Any]) -> Callable[..., Any]: 23 | _opt = click.option("-s", "--status", help="Filter by status.") # type: ignore[var-annotated] # noqa: E501 24 | _opt(fn) 25 | return fn 26 | -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/pctasks/client/storage/__init__.py -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/utils.py: -------------------------------------------------------------------------------- 1 | def status_emoji(status: str) -> str: 2 | if status.lower() == "completed": 3 | return "✅" 4 | if status.lower() == "failed": 5 | return "❌" 6 | if status.lower() == "running": 7 | return "🏃" 8 | if status.lower() == "cancelled": 9 | return "🚫" 10 | if status.lower() == "skipped": 11 | return "⏭️" 12 | else: 13 | return "🕖" 14 | -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5" 2 | -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/workflow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/pctasks/client/workflow/__init__.py -------------------------------------------------------------------------------- /pctasks/client/pctasks/client/workflow/options.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable 2 | 3 | import click 4 | 5 | 6 | def opt_args(fn: Callable[..., Any]) -> Callable[..., Any]: 7 | _opt = click.option( 8 | "-a", "--arg", multiple=True, help="Argument value to use.", type=(str, str) 9 | ) # type: ignore[var-annotated] 10 | _opt(fn) 11 | return fn 12 | -------------------------------------------------------------------------------- /pctasks/client/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/tests/__init__.py -------------------------------------------------------------------------------- /pctasks/client/tests/data-files/mycode.py: -------------------------------------------------------------------------------- 1 | from pctasks.dev.mocks import MockTask 2 | 3 | 4 | class MyMockTask(MockTask): 5 | pass 6 | -------------------------------------------------------------------------------- /pctasks/client/tests/records/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/client/tests/records/__init__.py -------------------------------------------------------------------------------- /pctasks/client/tests/records/test_records.py: -------------------------------------------------------------------------------- 1 | def test_records(): ... 2 | -------------------------------------------------------------------------------- /pctasks/client/tests/test_storage.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from tempfile import TemporaryDirectory 3 | 4 | from pctasks.dev.blob import temp_azurite_blob_storage 5 | from pctasks.dev.test_utils import run_pctasks 6 | 7 | TEST_DATA_DIR = Path(__file__).parent / "data-files" 8 | 9 | 10 | def test_get(): 11 | with temp_azurite_blob_storage(test_files=TEST_DATA_DIR): 12 | local_path = TEST_DATA_DIR / "test_collection.json" 13 | with TemporaryDirectory() as tmp_dir: 14 | run_pctasks(["storage", "get", str(local_path), "-o", tmp_dir]) 15 | assert (Path(tmp_dir) / "test_collection.json").exists() 16 | 17 | 18 | def test_put(): 19 | with temp_azurite_blob_storage() as storage: 20 | local_path = TEST_DATA_DIR / "test_collection.json" 21 | remote_uri = storage.get_uri() + "/" 22 | run_pctasks(["storage", "put", str(local_path), remote_uri]) 23 | 24 | assert storage.file_exists("test_collection.json") 25 | -------------------------------------------------------------------------------- /pctasks/client/tests/test_template.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import yaml 4 | 5 | from pctasks.client.workflow.template import LocalTemplater 6 | from pctasks.ingest.models import IngestCollectionsInput 7 | 8 | HERE = Path(__file__).parent 9 | TEST_COLLECTION = HERE / "data-files" / "test_collection.json" 10 | 11 | 12 | def test_local_file_template(): 13 | yaml_str = ( 14 | """ 15 | type: Collections 16 | collections: 17 | - ${{ local.file(""" 18 | + str(TEST_COLLECTION) 19 | + """) }} 20 | """ 21 | ) 22 | 23 | yaml_dict = yaml.safe_load(yaml_str) 24 | templated_dict = LocalTemplater().template_dict(yaml_dict) 25 | 26 | data = IngestCollectionsInput.model_validate(templated_dict) 27 | 28 | assert data.collections 29 | assert data.collections[0]["id"] == "test-collection" 30 | -------------------------------------------------------------------------------- /pctasks/core/README.md: -------------------------------------------------------------------------------- 1 | # Planetary Computer Tasks: Core 2 | 3 | This is the base library of the PCTasks framework. 4 | It provides core functionality and base messages as 5 | Pydantic models. 6 | 7 | -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/__init__.py: -------------------------------------------------------------------------------- 1 | from pctasks.core.version import __version__ 2 | 3 | __all__ = ["__version__"] 4 | -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/_compat.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if sys.version_info >= (3, 10): 4 | from typing import TypeAlias 5 | else: 6 | from typing_extensions import TypeAlias 7 | 8 | 9 | __all__ = [ 10 | "TypeAlias", 11 | ] 12 | -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/context.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional 3 | 4 | 5 | @dataclass 6 | class PCTasksCommandContext: 7 | """Context used in the pctasks CLI.""" 8 | 9 | profile: Optional[str] = None 10 | """Settings profile. Determines which settings file is read.""" 11 | 12 | settings_file: Optional[str] = None 13 | """Full path to the settings file. If present, overrides the profile.""" 14 | -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/cosmos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/pctasks/core/cosmos/__init__.py -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/cosmos/containers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/pctasks/core/cosmos/containers/__init__.py -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/cosmos/page.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable, Iterator, Optional, TypeVar 2 | 3 | T = TypeVar("T") 4 | 5 | 6 | class Page(Iterable[T]): 7 | def __init__(self, items: Iterable[T], continuation_token: Optional[str]): 8 | self._items = items 9 | self._continuation_token = continuation_token 10 | 11 | def __iter__(self) -> Iterator[T]: 12 | return iter(self._items) 13 | 14 | @property 15 | def continuation_token(self) -> Optional[str]: 16 | return self._continuation_token 17 | -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/pctasks/core/models/__init__.py -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/models/activity.py: -------------------------------------------------------------------------------- 1 | from typing import Generic, TypeVar 2 | 3 | from pydantic import BaseModel 4 | 5 | from pctasks.core.models.base import PCBaseModel, RunRecordId 6 | 7 | T = TypeVar("T", bound=BaseModel) 8 | 9 | 10 | class ActivityMessage(PCBaseModel, Generic[T]): 11 | run_record_id: RunRecordId 12 | msg: T 13 | -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/models/tokens.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | from pctasks.core.models.base import PCBaseModel 4 | 5 | 6 | class ContainerTokens(PCBaseModel): 7 | token: Optional[str] = None 8 | blobs: Optional[Dict[str, str]] = None 9 | 10 | 11 | class StorageAccountTokens(PCBaseModel): 12 | token: Optional[str] = None 13 | containers: Optional[Dict[str, ContainerTokens]] = None 14 | -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/models/utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from dateutil.tz import tzutc 4 | 5 | 6 | def tzutc_now() -> datetime: 7 | """Consistent timezone-aware UTC timestamp for record models that are 8 | serialized for API responses.""" 9 | return datetime.now(tzutc()) 10 | -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/pctasks/core/py.typed -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/storage/errors.py: -------------------------------------------------------------------------------- 1 | class FileNotFoundError(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/tables/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/pctasks/core/tables/__init__.py -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/tables/utils.py: -------------------------------------------------------------------------------- 1 | import unicodedata 2 | 3 | PROHIBITED_TABLE_KEY_CHARS = ["/", "\\", "#", "?"] 4 | 5 | 6 | def is_valid_table_key(table_key: str) -> bool: 7 | for char in PROHIBITED_TABLE_KEY_CHARS: 8 | if char in table_key: 9 | return False 10 | if unicodedata.category(char)[0] == "C": 11 | return False 12 | return True 13 | -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/utils/stac.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Any, Dict, List, Union 3 | 4 | import pystac 5 | from stac_validator.validate import StacValidate 6 | 7 | 8 | class STACValidationError(Exception): 9 | def __init__(self, message: str, detail: List[Dict[str, Any]]): 10 | super().__init__(message) 11 | detail = detail 12 | 13 | 14 | def validate_stac(object: Union[Dict[str, Any], pystac.STACObject]) -> None: 15 | validator = StacValidate(extensions=True) 16 | validator.stac_content = object if isinstance(object, dict) else object.to_dict() 17 | validator.run() 18 | if not validator.valid: 19 | raise STACValidationError( 20 | f"Invalid STAC:\n{json.dumps(validator.message, indent=2)}", 21 | validator.message, 22 | ) 23 | -------------------------------------------------------------------------------- /pctasks/core/pctasks/core/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5" 2 | -------------------------------------------------------------------------------- /pctasks/core/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/__init__.py -------------------------------------------------------------------------------- /pctasks/core/tests/cosmos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/cosmos/__init__.py -------------------------------------------------------------------------------- /pctasks/core/tests/cosmos/containers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/cosmos/containers/__init__.py -------------------------------------------------------------------------------- /pctasks/core/tests/data-files/example_module/__init__.py: -------------------------------------------------------------------------------- 1 | from .a import A 2 | from .b import B 3 | 4 | __all__ = ["A", "B"] 5 | -------------------------------------------------------------------------------- /pctasks/core/tests/data-files/example_module/a.py: -------------------------------------------------------------------------------- 1 | class A: 2 | def a(self): 3 | return "a" 4 | -------------------------------------------------------------------------------- /pctasks/core/tests/data-files/example_module/b.py: -------------------------------------------------------------------------------- 1 | from .a import A 2 | 3 | 4 | class B(A): 5 | def b(self): 6 | return "b" 7 | -------------------------------------------------------------------------------- /pctasks/core/tests/data-files/simple-assets/a/asset-a-1.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-a-1.json"} -------------------------------------------------------------------------------- /pctasks/core/tests/data-files/simple-assets/a/asset-a-2.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-a-2.json"} -------------------------------------------------------------------------------- /pctasks/core/tests/data-files/simple-assets/b/asset-b-1.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-b-1.json"} -------------------------------------------------------------------------------- /pctasks/core/tests/data-files/simple-assets/b/asset-b-2.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-b-2.json"} -------------------------------------------------------------------------------- /pctasks/core/tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/models/__init__.py -------------------------------------------------------------------------------- /pctasks/core/tests/storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/storage/__init__.py -------------------------------------------------------------------------------- /pctasks/core/tests/tables/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/tables/__init__.py -------------------------------------------------------------------------------- /pctasks/core/tests/test_messages.py: -------------------------------------------------------------------------------- 1 | from pctasks.core.models.task import TaskDefinition 2 | 3 | 4 | def test_submit_message_deserialize_serialize(): 5 | js = { 6 | "id": "test-task", 7 | "image": "test", 8 | "task": "foo.bar:task", 9 | "args": {}, 10 | } 11 | 12 | msg = TaskDefinition(**js) 13 | js2 = msg.dict(exclude_none=True) 14 | msg2 = TaskDefinition(**js2) 15 | 16 | assert msg == msg2 17 | -------------------------------------------------------------------------------- /pctasks/core/tests/test_yaml.py: -------------------------------------------------------------------------------- 1 | from pctasks.core.models.workflow import WorkflowDefinition 2 | from pctasks.core.yaml import YamlValidationError 3 | 4 | 5 | def test_error_handling(): 6 | try: 7 | _ = WorkflowDefinition.from_yaml( 8 | """ 9 | name: A workflow* *with* *asterisks 10 | 11 | jobs: 12 | name: A job 13 | test-job: 14 | tasks: 15 | - id: test-task 16 | image-key: ingest-prod 17 | task: tests.test_submit.MockTask 18 | args: 19 | hello: world 20 | """ 21 | ) 22 | except YamlValidationError as e: 23 | error_text = str(e) 24 | assert "dataset: Field required" in error_text 25 | -------------------------------------------------------------------------------- /pctasks/core/tests/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/core/tests/utils/__init__.py -------------------------------------------------------------------------------- /pctasks/core/tests/utils/test_backoff.py: -------------------------------------------------------------------------------- 1 | import azure.core.exceptions 2 | import pytest 3 | import requests.exceptions 4 | 5 | from pctasks.core.utils.backoff import with_backoff 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "kind", 10 | [ 11 | TimeoutError, 12 | requests.exceptions.ConnectionError, 13 | azure.core.exceptions.IncompleteReadError, 14 | ], 15 | ) 16 | def test_retry_timeout_errors(kind): 17 | 18 | i = 0 19 | 20 | def make_callable(kind): 21 | def fn(): 22 | nonlocal i 23 | i += 1 24 | 25 | if i > 2: 26 | return True 27 | else: 28 | raise kind() 29 | 30 | return fn 31 | 32 | result = with_backoff(make_callable(kind)) 33 | assert i == 3 34 | assert result is True 35 | -------------------------------------------------------------------------------- /pctasks/dataset/README.md: -------------------------------------------------------------------------------- 1 | # Planetary Computer Tasks: Dataset 2 | 3 | This component of the PCTasks framework lets users create datasets by defining 4 | configuration and specific tasks around STAC Items. -------------------------------------------------------------------------------- /pctasks/dataset/pctasks/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from pctasks.dataset.version import __version__ 2 | 3 | __all__ = ["__version__"] 4 | -------------------------------------------------------------------------------- /pctasks/dataset/pctasks/dataset/chunks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/pctasks/dataset/chunks/__init__.py -------------------------------------------------------------------------------- /pctasks/dataset/pctasks/dataset/chunks/constants.py: -------------------------------------------------------------------------------- 1 | CREATE_CHUNKS_TASK_PATH = "pctasks.dataset.chunks.task:create_chunks_task" 2 | LIST_CHUNKS_TASK_PATH = "pctasks.dataset.chunks.task:list_chunks_task" 3 | 4 | ASSET_CHUNKS_PREFIX = "assets" 5 | ITEM_CHUNKS_PREFIX = "items" 6 | 7 | ALL_CHUNK_PREFIX = "all" 8 | SUCCESS_CHUNK_PREFIX = "success" 9 | FAILURE_CHUNK_PREFIX = "failed" 10 | -------------------------------------------------------------------------------- /pctasks/dataset/pctasks/dataset/constants.py: -------------------------------------------------------------------------------- 1 | DEFAULT_CHUNK_LENGTH = 30000 2 | 3 | CREATE_CHUNKS_TASK_ID = "create-chunks" 4 | LIST_CHUNKS_TASK_ID = "list-chunks" 5 | CREATE_ITEMS_TASK_ID = "create-items" 6 | 7 | CHUNK_FOLDER = "chunks" 8 | 9 | PROCESS_ITEMS_JOB_ID = "process_items" 10 | 11 | DEFAULT_DATASET_YAML_PATH = "dataset.yaml" 12 | -------------------------------------------------------------------------------- /pctasks/dataset/pctasks/dataset/items/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/pctasks/dataset/items/__init__.py -------------------------------------------------------------------------------- /pctasks/dataset/pctasks/dataset/items/constants.py: -------------------------------------------------------------------------------- 1 | PROCESS_ITEMS_JOB_ID = "process-items" 2 | CREATE_ITEMS_TASK_ID = "create-items" 3 | -------------------------------------------------------------------------------- /pctasks/dataset/pctasks/dataset/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/pctasks/dataset/py.typed -------------------------------------------------------------------------------- /pctasks/dataset/pctasks/dataset/splits/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/pctasks/dataset/splits/__init__.py -------------------------------------------------------------------------------- /pctasks/dataset/pctasks/dataset/splits/constants.py: -------------------------------------------------------------------------------- 1 | CREATE_SPLITS_TASK_ID = "create-splits" 2 | CREATE_SPLITS_TASK_PATH = "pctasks.dataset.splits.task:create_splits_task" 3 | -------------------------------------------------------------------------------- /pctasks/dataset/pctasks/dataset/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5" 2 | -------------------------------------------------------------------------------- /pctasks/dataset/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/tests/__init__.py -------------------------------------------------------------------------------- /pctasks/dataset/tests/chunks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/tests/chunks/__init__.py -------------------------------------------------------------------------------- /pctasks/dataset/tests/data-files/datasets/naip.yaml: -------------------------------------------------------------------------------- 1 | name: naip 2 | image: pc-tasks-naip:latest 3 | collections: 4 | - id: naip 5 | class: naip.dataset:Naip 6 | asset_storage: 7 | - storage_account: naipeuwest 8 | container: naip 9 | sas_token: ${{ pc.get_token(naipeuwest, naip) }} 10 | chunks: 11 | length: 3000 12 | ext: "*.tif" 13 | splits: 14 | - depth: 2 15 | name_starts_with: v002 16 | chunk_storage: 17 | uri: blob://naipeuwest/naip-etl-data/chunks/assets 18 | item_storage: 19 | uri: blob://naipeuwest/naip-etl-data/chunks/items 20 | -------------------------------------------------------------------------------- /pctasks/dataset/tests/data-files/datasets/test-dataset.yaml: -------------------------------------------------------------------------------- 1 | id: dataset-test 2 | image: mock:latest 3 | code: 4 | src: ${{ local.path(mycode.py) }} 5 | 6 | args: 7 | - test_prefix 8 | - sas_token 9 | 10 | task_config: 11 | test-dataset: 12 | create-items: 13 | tags: 14 | batch_pool_id: high_memory_pool 15 | ingest-collection: 16 | tags: 17 | batch_pool_id: ingest_pool 18 | 19 | collections: 20 | - id: test-dataset 21 | class: mycode:TestCollection 22 | asset_storage: 23 | - uri: blob://devstoreaccount1/test-data/${{ args.test_prefix }}/assets 24 | token: ${{ args.sas_token }} 25 | chunks: 26 | options: 27 | chunk_length: 2 28 | extensions: 29 | - .json 30 | splits: 31 | - depth: 1 32 | chunk_storage: 33 | uri: blob://devstoreaccount1/test-data/${{ args.test_prefix }}/chunks 34 | -------------------------------------------------------------------------------- /pctasks/dataset/tests/data-files/simple-assets/a/asset-a-1.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-a-1.json"} -------------------------------------------------------------------------------- /pctasks/dataset/tests/data-files/simple-assets/a/asset-a-2.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-a-2.json"} -------------------------------------------------------------------------------- /pctasks/dataset/tests/data-files/simple-assets/b/asset-b-1.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-b-1.json"} -------------------------------------------------------------------------------- /pctasks/dataset/tests/data-files/simple-assets/b/asset-b-2.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-b-2.json"} -------------------------------------------------------------------------------- /pctasks/dataset/tests/data-files/test-assets/one.txt: -------------------------------------------------------------------------------- 1 | one 2 | -------------------------------------------------------------------------------- /pctasks/dataset/tests/data-files/test-assets/three.txt: -------------------------------------------------------------------------------- 1 | three 2 | -------------------------------------------------------------------------------- /pctasks/dataset/tests/data-files/test-assets/two.txt: -------------------------------------------------------------------------------- 1 | two 2 | -------------------------------------------------------------------------------- /pctasks/dataset/tests/items/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dataset/tests/items/__init__.py -------------------------------------------------------------------------------- /pctasks/dev/pctasks/dev/__init__.py: -------------------------------------------------------------------------------- 1 | from pctasks.dev.version import __version__ 2 | 3 | __all__ = ["__version__"] 4 | -------------------------------------------------------------------------------- /pctasks/dev/pctasks/dev/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/dev/pctasks/dev/py.typed -------------------------------------------------------------------------------- /pctasks/dev/pctasks/dev/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5" 2 | -------------------------------------------------------------------------------- /pctasks/dev/tests/test_temp_queue.py: -------------------------------------------------------------------------------- 1 | from pctasks.dev.queues import TempQueue 2 | 3 | 4 | def test_temp_queue_name() -> None: 5 | name = "test-temp-queue-name" 6 | with TempQueue(name=name) as queue_client: 7 | assert queue_client.queue_name == name 8 | 9 | 10 | def test_temp_queue_suffix() -> None: 11 | suffix = "test-temp-queue-suffix" 12 | name = f"test-queue-{suffix}" 13 | with TempQueue(suffix=suffix) as queue_client: 14 | assert queue_client.queue_name == name 15 | 16 | 17 | def test_temp_queue_ignores_existing_resource() -> None: 18 | name = "test-temp-queue-name" 19 | with TempQueue(name=name): 20 | with TempQueue(name=name): 21 | # No exception 22 | pass 23 | -------------------------------------------------------------------------------- /pctasks/ingest/pctasks/ingest/__init__.py: -------------------------------------------------------------------------------- 1 | from pctasks.ingest.version import __version__ 2 | 3 | __all__ = ["__version__"] 4 | -------------------------------------------------------------------------------- /pctasks/ingest/pctasks/ingest/constants.py: -------------------------------------------------------------------------------- 1 | DEFAULT_INSERT_GROUP_SIZE = 5000 2 | 3 | INGEST_TASK = "pctasks.ingest_task.task:ingest_task" 4 | INGEST_TASK_ID = "ingest-items" 5 | ITEM_TASK_ID = "ingest-item" 6 | COLLECTION_TASK_ID = "ingest-collection" 7 | NDJSON_TASK_ID = "ingest-ndjson" 8 | 9 | NDJSON_MESSAGE_TYPE = "Ndjson" 10 | COLLECTIONS_MESSAGE_TYPE = "Collections" 11 | 12 | DB_CONNECTION_STRING_ENV_VAR = "DB_CONNECTION_STRING" 13 | -------------------------------------------------------------------------------- /pctasks/ingest/pctasks/ingest/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/ingest/pctasks/ingest/py.typed -------------------------------------------------------------------------------- /pctasks/ingest/pctasks/ingest/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5" 2 | -------------------------------------------------------------------------------- /pctasks/ingest/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/ingest/tests/__init__.py -------------------------------------------------------------------------------- /pctasks/ingest/tests/test_settings.py: -------------------------------------------------------------------------------- 1 | from pctasks.core.yaml import model_from_yaml 2 | from pctasks.ingest.settings import SECTION_NAME, IngestSettings 3 | 4 | 5 | def test_image_keys(): 6 | yaml = """ 7 | submit: 8 | account_name: pctrxetlrobrxetlsa 9 | queue_name: inbox 10 | image_keys: 11 | - key: ingest 12 | image: pctasks-ingest:lastest 13 | environment: 14 | - DB_CONNECTION_STR= ${ secrets.DB_CONNECTION_STR } 15 | 16 | ingest: 17 | image_keys: 18 | default: ingest 19 | targets: 20 | prod: ingest-prod 21 | staging: ingest-staging 22 | """ 23 | 24 | settings = model_from_yaml(IngestSettings, yaml, section=SECTION_NAME) 25 | assert settings.image_keys.default == "ingest" 26 | assert settings.image_keys.targets 27 | assert settings.image_keys.targets["prod"] == "ingest-prod" 28 | -------------------------------------------------------------------------------- /pctasks/ingest_task/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim 2 | 3 | # Setup timezone info 4 | ENV TZ=UTC 5 | 6 | ENV LC_ALL=C.UTF-8 7 | ENV LANG=C.UTF-8 8 | ENV PIP_NO_CACHE_DIR=1 9 | RUN pip install "setuptools>=65.5.1" 10 | 11 | RUN python -m pip install --upgrade pip 12 | 13 | # 14 | # Copy and install packages 15 | # 16 | 17 | COPY core /opt/src/core 18 | RUN cd /opt/src/core && \ 19 | pip install . 20 | 21 | COPY cli /opt/src/cli 22 | RUN cd /opt/src/cli && \ 23 | pip install . 24 | 25 | COPY task /opt/src/task 26 | RUN cd /opt/src/task && \ 27 | pip install . 28 | 29 | COPY client /opt/src/client 30 | RUN cd /opt/src/client && \ 31 | pip install . 32 | 33 | COPY ingest /opt/src/ingest 34 | RUN cd /opt/src/ingest && \ 35 | pip install . 36 | 37 | COPY ingest_task /opt/src/ingest_task 38 | RUN cd /opt/src/ingest_task && \ 39 | pip install . 40 | 41 | WORKDIR /opt/src 42 | -------------------------------------------------------------------------------- /pctasks/ingest_task/pctasks/ingest_task/__init__.py: -------------------------------------------------------------------------------- 1 | from pctasks.ingest_task.version import __version__ 2 | 3 | __all__ = ["__version__"] 4 | -------------------------------------------------------------------------------- /pctasks/ingest_task/pctasks/ingest_task/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/ingest_task/pctasks/ingest_task/py.typed -------------------------------------------------------------------------------- /pctasks/ingest_task/pctasks/ingest_task/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5" 2 | -------------------------------------------------------------------------------- /pctasks/ingest_task/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/ingest_task/tests/__init__.py -------------------------------------------------------------------------------- /pctasks/ingest_task/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | from contextlib import contextmanager 3 | from typing import Generator 4 | 5 | from pctasks.core.utils import environment 6 | from pctasks.dev.db import ConnStrInfo, temp_pgstac_db 7 | from pctasks.ingest.constants import DB_CONNECTION_STRING_ENV_VAR 8 | 9 | 10 | @contextmanager 11 | def ingest_test_environment() -> Generator[ConnStrInfo, None, None]: 12 | db_secret = os.getenv("SECRETS_DB_CONNECTION_STRING") 13 | 14 | if not db_secret: 15 | raise ValueError("SECRETS_DB_CONNECTION_STRING must be set") 16 | 17 | with temp_pgstac_db(db_secret) as test_db_conn_str: 18 | with environment(**{DB_CONNECTION_STRING_ENV_VAR: test_db_conn_str.local}): 19 | yield test_db_conn_str 20 | -------------------------------------------------------------------------------- /pctasks/notify/README.md: -------------------------------------------------------------------------------- 1 | # Planetary Computer Tasks: Notify 2 | 3 | This component of the PCTasks framework is around notifications. 4 | -------------------------------------------------------------------------------- /pctasks/notify/pctasks/notify/__init__.py: -------------------------------------------------------------------------------- 1 | from pctasks.notify.version import __version__ 2 | 3 | __all__ = ["__version__"] 4 | -------------------------------------------------------------------------------- /pctasks/notify/pctasks/notify/models.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from pctasks.core.models.base import PCBaseModel 4 | from pctasks.core.models.event import CloudEvent, NotificationMessage 5 | from pctasks.core.models.registration import ( 6 | EventGridChannelInfo, 7 | STACItemEventRegistration, 8 | ) 9 | 10 | 11 | class NotifyFetchMessage(PCBaseModel): 12 | notification: NotificationMessage 13 | target_environment: Optional[str] = None 14 | 15 | 16 | class NotifyFetchResult(PCBaseModel): 17 | registrations: List[STACItemEventRegistration] 18 | 19 | 20 | class NotifyWebhookMessage(PCBaseModel): 21 | endpoint: str 22 | event: CloudEvent 23 | 24 | 25 | class NotifyEventGridChannelMessage(PCBaseModel): 26 | channel_info: EventGridChannelInfo 27 | event: CloudEvent 28 | 29 | 30 | class NotifyResult(PCBaseModel): 31 | success: bool = True 32 | -------------------------------------------------------------------------------- /pctasks/notify/pctasks/notify/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/notify/pctasks/notify/py.typed -------------------------------------------------------------------------------- /pctasks/notify/pctasks/notify/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5" 2 | -------------------------------------------------------------------------------- /pctasks/notify/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/notify/tests/__init__.py -------------------------------------------------------------------------------- /pctasks/notify/tests/test_process.py: -------------------------------------------------------------------------------- 1 | def test_notify(): ... 2 | -------------------------------------------------------------------------------- /pctasks/router/README.md: -------------------------------------------------------------------------------- 1 | # Planetary Computer Tasks: Router 2 | 3 | This component of the PCTasks framework is around routing tasks. 4 | -------------------------------------------------------------------------------- /pctasks/router/pctasks/router/__init__.py: -------------------------------------------------------------------------------- 1 | from pctasks.router.version import __version__ 2 | 3 | __all__ = ["__version__"] 4 | -------------------------------------------------------------------------------- /pctasks/router/pctasks/router/handlers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/router/pctasks/router/handlers/__init__.py -------------------------------------------------------------------------------- /pctasks/router/pctasks/router/handlers/forward.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, Dict 2 | 3 | import orjson 4 | 5 | from pctasks.core.message_handler import MessageHandler 6 | from pctasks.core.queues import QueueService 7 | from pctasks.router.settings import RouterSettings 8 | 9 | 10 | class ForwardingMessageHandler(MessageHandler): 11 | def __init__(self, get_queue_name: Callable[[RouterSettings], str]) -> None: 12 | self.get_queue_name = get_queue_name 13 | 14 | def handle(self, message: Dict[str, Any]) -> None: 15 | settings = RouterSettings.get() 16 | with QueueService.from_connection_string( 17 | connection_string=settings.queues_connection_string, 18 | queue_name=self.get_queue_name(settings), 19 | ) as queue: 20 | queue.send_message(orjson.dumps(message, option=orjson.OPT_SERIALIZE_NUMPY)) 21 | -------------------------------------------------------------------------------- /pctasks/router/pctasks/router/message_handler.py: -------------------------------------------------------------------------------- 1 | from pctasks.core.constants import ( 2 | EVENTGRID_MESSAGE_TYPE, 3 | NOTIFICATION_MESSAGE_TYPE, 4 | WORKFLOW_SUBMIT_MESSAGE_TYPE, 5 | ) 6 | from pctasks.core.message_handler import TypeMessageHandlers 7 | from pctasks.router.handlers.eventgrid import EventGridMessageHandler 8 | from pctasks.router.handlers.forward import ForwardingMessageHandler 9 | 10 | HANDLERS = TypeMessageHandlers( 11 | { 12 | EVENTGRID_MESSAGE_TYPE: EventGridMessageHandler(), 13 | WORKFLOW_SUBMIT_MESSAGE_TYPE: ForwardingMessageHandler( 14 | lambda settings: settings.workflow_queue_name 15 | ), 16 | NOTIFICATION_MESSAGE_TYPE: ForwardingMessageHandler( 17 | lambda settings: settings.notification_queue_name 18 | ), 19 | } 20 | ) 21 | -------------------------------------------------------------------------------- /pctasks/router/pctasks/router/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/router/pctasks/router/py.typed -------------------------------------------------------------------------------- /pctasks/router/pctasks/router/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5" 2 | -------------------------------------------------------------------------------- /pctasks/router/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/router/tests/__init__.py -------------------------------------------------------------------------------- /pctasks/router/tests/test_process.py: -------------------------------------------------------------------------------- 1 | def test_router(): ... 2 | -------------------------------------------------------------------------------- /pctasks/router/tests/test_settings.py: -------------------------------------------------------------------------------- 1 | from pctasks.router.settings import RouterSettings 2 | 3 | 4 | def test_settings(): 5 | _ = RouterSettings.get() 6 | -------------------------------------------------------------------------------- /pctasks/run/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azurelinux/base/python:3.12 2 | 3 | RUN tdnf install ca-certificates azure-cli -y \ 4 | && tdnf clean all 5 | ENV PIP_NO_CACHE_DIR=1 6 | RUN pip install "setuptools>=65.5.1" 7 | 8 | WORKDIR /opt/src 9 | 10 | COPY core /opt/src/core 11 | RUN cd /opt/src/core && \ 12 | pip install . 13 | 14 | COPY cli /opt/src/cli 15 | RUN cd /opt/src/cli && \ 16 | pip install . 17 | 18 | COPY task /opt/src/task 19 | RUN cd /opt/src/task && \ 20 | pip install . 21 | 22 | COPY client /opt/src/client 23 | RUN cd /opt/src/client && \ 24 | pip install . 25 | 26 | COPY run /opt/src/run 27 | RUN cd /opt/src/run && \ 28 | pip install . 29 | 30 | ENV APP_HOST=0.0.0.0 31 | ENV APP_PORT=81 32 | -------------------------------------------------------------------------------- /pctasks/run/README.md: -------------------------------------------------------------------------------- 1 | # Planetary Computer Tasks: Run 2 | 3 | This component of the PCTasks framework is around running workflows and 4 | tasks that are submitted to the system. The `pctasks.run` library 5 | contains functionality used to transform workflows into 6 | Azure Batch jobs and tasks. 7 | -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/__init__.py: -------------------------------------------------------------------------------- 1 | from pctasks.run.version import __version__ 2 | 3 | __all__ = ["__version__"] 4 | -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/argo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/pctasks/run/argo/__init__.py -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/batch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/pctasks/run/batch/__init__.py -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/batch/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | from datetime import datetime 3 | 4 | 5 | def make_unique_job_id(job_id: str) -> str: 6 | return make_valid_batch_id( 7 | f"{job_id}-{datetime.utcnow().strftime('%y%m%d-%H%M%S')}" 8 | ) 9 | 10 | 11 | def make_valid_batch_id(id: str) -> str: 12 | """Returns a job id or task id that is valid to Batch 13 | 14 | Note from Azure Batch SDK: 15 | 16 | Task ids can only contain any 17 | combination of alphanumeric characters along with dash (-) 18 | and underscore (_). 19 | The name must be from 1 through 64 characters long 20 | """ 21 | id = re.sub("[^a-zA-Z0-9_-]", "-", id) 22 | if len(id) > 64: 23 | id = id[:32] + "-" + id[-31:] 24 | return id.strip("-") 25 | -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/dag.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import networkx as nx 4 | 5 | from pctasks.core.models.workflow import JobDefinition 6 | 7 | 8 | def sort_jobs(jobs: List[JobDefinition]) -> List[JobDefinition]: 9 | G = nx.DiGraph() 10 | 11 | for job in jobs: 12 | G.add_node(job.get_id()) 13 | for dep in job.get_dependencies() or []: 14 | G.add_edge(dep, job.get_id()) 15 | 16 | sorted_ids: List[str] = list(nx.topological_sort(G)) 17 | return sorted(jobs, key=lambda job: sorted_ids.index(job.get_id())) 18 | -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/errors.py: -------------------------------------------------------------------------------- 1 | class WorkflowFailedError(Exception): 2 | pass 3 | 4 | 5 | class TaskFailedError(Exception): 6 | pass 7 | 8 | 9 | class TaskPreparationError(Exception): 10 | pass 11 | 12 | 13 | class WorkflowRunRecordError(Exception): 14 | """Raised when there are unexpected results or behaviors from run records""" 15 | 16 | pass 17 | -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/pctasks/run/py.typed -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/secrets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/pctasks/run/secrets/__init__.py -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/task/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pctasks.run.settings import RunSettings, TaskRunnerType 4 | from pctasks.run.task.argo import ArgoTaskRunner 5 | from pctasks.run.task.base import TaskRunner 6 | from pctasks.run.task.batch import BatchTaskRunner 7 | from pctasks.run.task.local import LocalTaskRunner 8 | 9 | 10 | def get_task_runner(settings: Optional[RunSettings] = None) -> TaskRunner: 11 | settings = settings or RunSettings.get() 12 | 13 | if settings.task_runner_type == TaskRunnerType.LOCAL: 14 | assert settings.local_dev_endpoints_url # Checked during settings validation 15 | return LocalTaskRunner(settings.local_dev_endpoints_url) 16 | elif settings.task_runner_type == TaskRunnerType.BATCH: 17 | return BatchTaskRunner(settings) 18 | elif settings.task_runner_type == TaskRunnerType.ARGO: 19 | return ArgoTaskRunner(settings) 20 | else: 21 | raise ValueError(f"Unknown task runner type: {settings.task_runner_type}") 22 | -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5" 2 | -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/workflow/__init__.py: -------------------------------------------------------------------------------- 1 | from pctasks.core.cosmos.settings import CosmosDBSettings 2 | from pctasks.run.settings import RunSettings, WorkflowRunnerType 3 | from pctasks.run.workflow.argo import ArgoWorkflowRunner 4 | from pctasks.run.workflow.base import WorkflowRunner 5 | from pctasks.run.workflow.local import LocalWorkflowRunner 6 | 7 | 8 | def get_workflow_runner() -> WorkflowRunner: 9 | run_settings = RunSettings.get() 10 | cosmosdb_settings = CosmosDBSettings.get() 11 | 12 | if run_settings.workflow_runner_type == WorkflowRunnerType.LOCAL: 13 | assert run_settings.local_dev_endpoints_url # Checked during validation 14 | return LocalWorkflowRunner(run_settings, cosmosdb_settings) 15 | elif run_settings.workflow_runner_type == WorkflowRunnerType.ARGO: 16 | return ArgoWorkflowRunner(run_settings, cosmosdb_settings) 17 | else: 18 | raise ValueError( 19 | f"Unknown workflow runner type: {run_settings.workflow_runner_type}" 20 | ) 21 | -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/workflow/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from pctasks.core.cosmos.settings import CosmosDBSettings 4 | from pctasks.core.models.workflow import WorkflowSubmitMessage, WorkflowSubmitResult 5 | from pctasks.run.settings import RunSettings, WorkflowExecutorConfig 6 | 7 | 8 | class WorkflowRunner(ABC): 9 | def __init__(self, run_settings: RunSettings, cosmosdb_settings: CosmosDBSettings): 10 | self.run_settings = run_settings 11 | self.cosmosdb_settings = cosmosdb_settings 12 | 13 | def get_executor_config(self) -> WorkflowExecutorConfig: 14 | return WorkflowExecutorConfig( 15 | run_settings=self.run_settings, cosmosdb_settings=self.cosmosdb_settings 16 | ) 17 | 18 | @abstractmethod 19 | def submit_workflow( 20 | self, submit_msg: WorkflowSubmitMessage 21 | ) -> WorkflowSubmitResult: 22 | pass 23 | -------------------------------------------------------------------------------- /pctasks/run/pctasks/run/workflow/executor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/pctasks/run/workflow/executor/__init__.py -------------------------------------------------------------------------------- /pctasks/run/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/tests/__init__.py -------------------------------------------------------------------------------- /pctasks/run/tests/batch/test_utils.py: -------------------------------------------------------------------------------- 1 | from pctasks.run.batch.utils import make_valid_batch_id 2 | 3 | 4 | def test_make_valid_job_id(): 5 | assert ( 6 | make_valid_batch_id("some-job/job_ok/!this/is/not/valid") 7 | == "some-job-job_ok--this-is-not-valid" 8 | ) 9 | long_job_id = "test-chars" * 7 10 | assert len(make_valid_batch_id(long_job_id)) == 64 11 | -------------------------------------------------------------------------------- /pctasks/run/tests/secrets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/tests/secrets/__init__.py -------------------------------------------------------------------------------- /pctasks/run/tests/secrets/test_base.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from pctasks.run.secrets.base import SecretsProvider 4 | 5 | 6 | class MockSecretsProvider(SecretsProvider): 7 | def __init__(self, secrets: Dict[str, str]) -> None: 8 | self.secrets = secrets 9 | 10 | def get_secret(self, name: str) -> str: 11 | result = self.secrets.get(name, None) 12 | if not result: 13 | raise ValueError(f"Secret {name} requested but not provided") 14 | return result 15 | 16 | 17 | def test_parse_secret(): 18 | provider = MockSecretsProvider({"foo": "bar"}) 19 | env = {"foo": "${{ secrets.foo }}"} 20 | parsed = provider.substitute_secrets(env) 21 | assert parsed["foo"] == "bar" 22 | -------------------------------------------------------------------------------- /pctasks/run/tests/test_settings.py: -------------------------------------------------------------------------------- 1 | from pctasks.run.settings import RunSettings 2 | 3 | 4 | def test_settings(): 5 | _ = RunSettings.get() 6 | -------------------------------------------------------------------------------- /pctasks/run/tests/workflow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/run/tests/workflow/__init__.py -------------------------------------------------------------------------------- /pctasks/server/README.md: -------------------------------------------------------------------------------- 1 | # Planetary Computer Tasks: Server 2 | 3 | Server component of PCTasks -------------------------------------------------------------------------------- /pctasks/server/pctasks/server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/server/pctasks/server/__init__.py -------------------------------------------------------------------------------- /pctasks/server/pctasks/server/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/server/pctasks/server/py.typed -------------------------------------------------------------------------------- /pctasks/server/pctasks/server/routes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/server/pctasks/server/routes/__init__.py -------------------------------------------------------------------------------- /pctasks/server/pctasks/server/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5" 2 | -------------------------------------------------------------------------------- /pctasks/server/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/server/tests/__init__.py -------------------------------------------------------------------------------- /pctasks/server/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from starlette.testclient import TestClient 3 | 4 | from pctasks.server.main import app 5 | 6 | 7 | @pytest.fixture(scope="function") 8 | def client() -> TestClient: 9 | return TestClient(app) 10 | -------------------------------------------------------------------------------- /pctasks/task/README.md: -------------------------------------------------------------------------------- 1 | # Planetary Computer Tasks: Task 2 | 3 | The pctasks.task library supplies functionality for creating executable tasks in the PCTasks system. 4 | 5 | -------------------------------------------------------------------------------- /pctasks/task/pctasks/task/__init__.py: -------------------------------------------------------------------------------- 1 | """pctasks.task 2 | 3 | isort:skip_file 4 | """ 5 | 6 | from pctasks.task.version import __version__ 7 | 8 | __all__ = ["__version__"] 9 | -------------------------------------------------------------------------------- /pctasks/task/pctasks/task/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/task/pctasks/task/common/__init__.py -------------------------------------------------------------------------------- /pctasks/task/pctasks/task/constants.py: -------------------------------------------------------------------------------- 1 | # Environment Variables 2 | 3 | TASKIO_TENANT_ID_ENV_VAR = "TASKIO_TENANT_ID" 4 | TASKIO_CLIENT_ID_ENV_VAR = "TASKIO_CLIENT_ID" 5 | TASKIO_CLIENT_SECRET_ENV_VAR = "TASKIO_CLIENT_SECRET" 6 | -------------------------------------------------------------------------------- /pctasks/task/pctasks/task/context.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from pctasks.core.models.task import TaskRunConfig 4 | from pctasks.core.storage import StorageFactory 5 | from pctasks.core.tokens import Tokens 6 | 7 | 8 | @dataclass 9 | class TaskContext: 10 | """Context that is passed into Task run methods. 11 | 12 | This class is used to supply the Task with the necessary 13 | framework components to run. 14 | """ 15 | 16 | storage_factory: StorageFactory 17 | """A StorageFactory instance configured with workflow tokens""" 18 | 19 | run_id: str 20 | """The run ID of the workflow currently being executed.""" 21 | 22 | @classmethod 23 | def from_task_run_config(cls, task_config: TaskRunConfig) -> "TaskContext": 24 | return cls( 25 | storage_factory=StorageFactory(Tokens(task_config.tokens)), 26 | run_id=task_config.run_id, 27 | ) 28 | -------------------------------------------------------------------------------- /pctasks/task/pctasks/task/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/task/pctasks/task/py.typed -------------------------------------------------------------------------------- /pctasks/task/pctasks/task/settings.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pctasks.core.settings import PCTasksSettings 4 | 5 | 6 | class TaskSettings(PCTasksSettings): 7 | @classmethod 8 | def section_name(cls) -> str: 9 | return "task" 10 | 11 | code_dir: Optional[str] = None 12 | """The directory which downloaded code and requirements are stored. 13 | 14 | If provided, this directory will be used as the target for pip installs, 15 | and code source will be downloaded to this directory. 16 | If None, will use sys.path and pip install will not use a target directory. 17 | """ 18 | -------------------------------------------------------------------------------- /pctasks/task/pctasks/task/utils.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from typing import Optional, TypeVar 3 | 4 | from pctasks.core.models.base import PCBaseModel 5 | from pctasks.task.task import Task 6 | 7 | T = TypeVar("T", bound=PCBaseModel) 8 | U = TypeVar("U", bound=PCBaseModel) 9 | 10 | 11 | def get_task_path(task: Task[T, U], name: str, module: Optional[str] = None) -> str: 12 | """Convenience method for getting the path to a task. 13 | 14 | Detects the module name. Requires the user supply the importable 15 | variable name, including any containing instances or classes. 16 | """ 17 | if not module: 18 | m = inspect.getmodule(task) 19 | if not m: 20 | raise ValueError(f"Could not find module for task {task}") 21 | module = m.__name__ 22 | return f"{module}:{name}" 23 | -------------------------------------------------------------------------------- /pctasks/task/pctasks/task/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.5" 2 | -------------------------------------------------------------------------------- /pctasks/task/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/task/tests/__init__.py -------------------------------------------------------------------------------- /pctasks/task/tests/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks/task/tests/common/__init__.py -------------------------------------------------------------------------------- /pctasks/task/tests/data-files/test-files/a/three.txt: -------------------------------------------------------------------------------- 1 | three 2 | -------------------------------------------------------------------------------- /pctasks/task/tests/data-files/test-files/a/two.txt: -------------------------------------------------------------------------------- 1 | two 2 | -------------------------------------------------------------------------------- /pctasks/task/tests/data-files/test-files/b/c/five.txt: -------------------------------------------------------------------------------- 1 | five 2 | -------------------------------------------------------------------------------- /pctasks/task/tests/data-files/test-files/b/c/six.txt: -------------------------------------------------------------------------------- 1 | six 2 | -------------------------------------------------------------------------------- /pctasks/task/tests/data-files/test-files/b/four.txt: -------------------------------------------------------------------------------- 1 | four 2 | -------------------------------------------------------------------------------- /pctasks/task/tests/data-files/test-files/one.txt: -------------------------------------------------------------------------------- 1 | one 2 | -------------------------------------------------------------------------------- /pctasks/task/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from pctasks.dataset.chunks.task import create_chunks_task 2 | from pctasks.task.utils import get_task_path 3 | 4 | 5 | class TestTaskHolder: 6 | task = create_chunks_task 7 | 8 | 9 | def test_get_object_path_in_package(): 10 | task_path = get_task_path(create_chunks_task, "create_chunks_task") 11 | assert task_path == "pctasks.dataset.chunks.task:create_chunks_task" 12 | 13 | 14 | def test_get_task_path_in_class(): 15 | task_path = get_task_path( 16 | TestTaskHolder.task, "TestTaskHolder.task", module=TestTaskHolder.__module__ 17 | ) 18 | assert task_path == "tests.test_utils:TestTaskHolder.task" 19 | -------------------------------------------------------------------------------- /pctasks_frontend/.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | -------------------------------------------------------------------------------- /pctasks_frontend/.env.example: -------------------------------------------------------------------------------- 1 | REACT_APP_IS_DEV=true 2 | REACT_APP_API_ROOT=http://localhost:8511 3 | 4 | # Not needed if IS_DEV is true, otherwise get these values from the portal for 5 | # the environment you're targeting 6 | REACT_APP_AUTH_TENANT_ID= 7 | REACT_APP_AUTH_CLIENT_ID= 8 | REACT_APP_AUTH_BACKEND_APP_ID= 9 | -------------------------------------------------------------------------------- /pctasks_frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # dependencies 2 | /node_modules 3 | /.pnp 4 | .pnp.js 5 | 6 | # testing 7 | /coverage 8 | 9 | # production 10 | /build 11 | 12 | # misc 13 | .DS_Store 14 | .env 15 | .env.local 16 | .env.development.local 17 | .env.test.local 18 | .env.production.local 19 | 20 | npm-debug.log* 21 | yarn-debug.log* 22 | yarn-error.log* 23 | -------------------------------------------------------------------------------- /pctasks_frontend/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "arrowParens": "avoid", 3 | "htmlWhitespaceSensitivity": "css", 4 | "insertPragma": false, 5 | "jsxSingleQuote": false, 6 | "printWidth": 85, 7 | "proseWrap": "always", 8 | "requirePragma": false, 9 | "semi": true, 10 | "tabWidth": 2, 11 | "trailingComma": "es5", 12 | "useTabs": false 13 | } 14 | -------------------------------------------------------------------------------- /pctasks_frontend/.storybook/main.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | stories: ["../src/**/*.stories.mdx", "../src/**/*.stories.@(js|jsx|ts|tsx)"], 3 | addons: [ 4 | "@storybook/addon-links", 5 | "@storybook/addon-essentials", 6 | "@storybook/addon-interactions", 7 | "@storybook/preset-create-react-app", 8 | "storybook-addon-react-router-v6", 9 | ], 10 | framework: "@storybook/react", 11 | core: { 12 | builder: "@storybook/builder-webpack5", 13 | }, 14 | }; 15 | -------------------------------------------------------------------------------- /pctasks_frontend/.storybook/preview.js: -------------------------------------------------------------------------------- 1 | import { ThemeProvider } from "@fluentui/react"; 2 | 3 | export const parameters = { 4 | actions: { argTypesRegex: "^on[A-Z].*" }, 5 | controls: { 6 | matchers: { 7 | color: /(background|color)$/i, 8 | date: /Date$/, 9 | }, 10 | }, 11 | }; 12 | 13 | export const decorators = [ 14 | (Story) => { 15 | return ( 16 | 17 | 18 | 19 | ); 20 | }, 21 | ]; 22 | -------------------------------------------------------------------------------- /pctasks_frontend/README.md: -------------------------------------------------------------------------------- 1 | # PC Tasks Frontend 2 | 3 | A web application for viewing PC Task worflow runs. 4 | -------------------------------------------------------------------------------- /pctasks_frontend/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 11 | 12 | 13 | PC Tasks 14 | 15 | 16 | 17 |
18 | 19 | 20 | -------------------------------------------------------------------------------- /pctasks_frontend/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: / 4 | -------------------------------------------------------------------------------- /pctasks_frontend/src/App.test.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { render, screen } from '@testing-library/react'; 3 | import App from './App'; 4 | 5 | test('renders learn react link', () => { 6 | render(); 7 | const linkElement = screen.getByText(/learn react/i); 8 | expect(linkElement).toBeInTheDocument(); 9 | }); 10 | -------------------------------------------------------------------------------- /pctasks_frontend/src/App.tsx: -------------------------------------------------------------------------------- 1 | import { mergeStyleSets, Separator } from "@fluentui/react"; 2 | import { Outlet } from "react-router-dom"; 3 | import { Header } from "components/layout"; 4 | 5 | function App() { 6 | return ( 7 |
8 |
9 | 10 |
11 |
12 | 13 |
14 |
15 |
Footer
16 |
17 | ); 18 | } 19 | 20 | export default App; 21 | 22 | const styles = mergeStyleSets({ 23 | wrapper: { 24 | display: "flex", 25 | flexDirection: "column", 26 | minHeight: "100vh", 27 | }, 28 | main: { 29 | display: "flex", 30 | flexDirection: "column", 31 | flexGrow: 1, 32 | margin: "0 20px", 33 | }, 34 | page: { 35 | flexGrow: 1, 36 | display: "flex", 37 | flexDirection: "column", 38 | }, 39 | }); 40 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/auth/AuthPage/AuthPage.index.tsx: -------------------------------------------------------------------------------- 1 | import { AuthenticatedTemplate, UnauthenticatedTemplate } from "@azure/msal-react"; 2 | import { IS_DEV } from "helpers/constants"; 3 | import React from "react"; 4 | 5 | export const AuthPage: React.FC<{ children: React.ReactNode }> = ({ children }) => { 6 | if (IS_DEV) { 7 | return <>{children}; 8 | } 9 | 10 | return ( 11 | <> 12 | {children} 13 | 14 |
You must be logged in to view this page.
15 |
16 | 17 | ); 18 | }; 19 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/auth/hooks/useApiClient.ts: -------------------------------------------------------------------------------- 1 | import axios, { AxiosInstance } from "axios"; 2 | import { useMsalToken } from "./useMsalToken"; 3 | import { API_ROOT } from "helpers/constants"; 4 | 5 | const getConfiguredClient = (accessToken: string): AxiosInstance => { 6 | const client = axios.create({ 7 | baseURL: API_ROOT, 8 | }); 9 | client.defaults.headers.common["Authorization"] = `Bearer ${accessToken}`; 10 | return client; 11 | }; 12 | 13 | export const useAuthApiClient = (): AxiosInstance | undefined => { 14 | const { accessToken } = useMsalToken(); 15 | if (accessToken) { 16 | return getConfiguredClient(accessToken); 17 | } 18 | return undefined; 19 | }; 20 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/auth/index.ts: -------------------------------------------------------------------------------- 1 | import { AuthPage } from "./AuthPage/AuthPage.index"; 2 | 3 | export { AuthPage }; 4 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/auth/login/SignInButton.tsx: -------------------------------------------------------------------------------- 1 | import { useMsal } from "@azure/msal-react"; 2 | import { loginRequest } from "helpers/auth"; 3 | 4 | import { DefaultButton } from "@fluentui/react"; 5 | 6 | export const SignInButton: React.FC = () => { 7 | const { instance } = useMsal(); 8 | 9 | const handleClick = () => { 10 | instance.loginRedirect(loginRequest).catch(e => { 11 | console.error(e); 12 | }); 13 | }; 14 | 15 | return Sign In; 16 | }; 17 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/auth/login/index.ts: -------------------------------------------------------------------------------- 1 | import { PcPersona } from "./PcPersona"; 2 | import { SignInButton } from "./SignInButton"; 3 | import { UserHeaderControl } from "./UserHeaderControl"; 4 | 5 | export { PcPersona, SignInButton, UserHeaderControl }; 6 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/common/RunTimes/RunDuration.tsx: -------------------------------------------------------------------------------- 1 | import { Clock20Regular } from "@fluentui/react-icons"; 2 | import { RunTimeEntry, RunTimeProps } from "./RunTimeEntry"; 3 | 4 | export const RunTimeDuration: React.FC = ({ 5 | times, 6 | showIcon = true, 7 | className = "", 8 | }) => { 9 | const icon = showIcon ? : null; 10 | return ( 11 | 12 | ); 13 | }; 14 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/common/RunTimes/RunStarted.tsx: -------------------------------------------------------------------------------- 1 | import { CalendarLtr20Regular } from "@fluentui/react-icons"; 2 | import { RunTimeEntry, RunTimeProps } from "./RunTimeEntry"; 3 | 4 | export const RunTimeStarted: React.FC = ({ times }) => { 5 | return ( 6 | } 8 | text={times.startFriendly} 9 | title={times.startFormatted} 10 | /> 11 | ); 12 | }; 13 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/common/RunTimes/RunTimeBlock.tsx: -------------------------------------------------------------------------------- 1 | import { mergeStyles, Stack } from "@fluentui/react"; 2 | import { formatRunTimes } from "helpers/time"; 3 | import { gapSmall } from "styles/global"; 4 | import { Run } from "types"; 5 | import { RunTimeDuration } from "./RunDuration"; 6 | import { RunTimeStarted } from "./RunStarted"; 7 | 8 | interface RunTimeBlockProps { 9 | run: Run; 10 | } 11 | 12 | export const RunTimeBlock: React.FC = ({ run }) => { 13 | const runTimes = formatRunTimes(run); 14 | const started = ; 15 | const duration = ; 16 | 17 | return ( 18 | 19 | {started} 20 | {duration} 21 | 22 | ); 23 | }; 24 | 25 | const className = mergeStyles({ 26 | minWidth: 110, 27 | }); 28 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/common/RunTimes/RunTimeEntry.tsx: -------------------------------------------------------------------------------- 1 | import { Stack, Text } from "@fluentui/react"; 2 | import { gapSmall } from "styles/global"; 3 | import { RunTimesHumanized } from "types"; 4 | 5 | interface RunTimeEntryProps extends React.HTMLAttributes { 6 | icon?: React.ReactNode; 7 | text: string; 8 | title?: string; 9 | } 10 | 11 | export interface RunTimeProps extends React.HTMLAttributes { 12 | times: RunTimesHumanized; 13 | showIcon?: boolean; 14 | } 15 | 16 | export const RunTimeEntry: React.FC = ({ 17 | icon, 18 | text, 19 | title, 20 | className, 21 | }) => { 22 | return ( 23 | 24 | {icon} 25 | 26 | {text} 27 | 28 | 29 | ); 30 | }; 31 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/common/StatusIcon/__tests__/StatusIcon.stories.tsx: -------------------------------------------------------------------------------- 1 | import { ComponentStory, ComponentMeta } from "@storybook/react"; 2 | 3 | import WorkflowStatusIcon from "components/common/StatusIcon"; 4 | import { WorkflowRunStatus } from "types/enums"; 5 | 6 | export default { 7 | title: "PC Tasks/StatusIcon", 8 | component: WorkflowStatusIcon, 9 | } as ComponentMeta; 10 | 11 | const Template: ComponentStory = args => ( 12 | 13 | ); 14 | 15 | export const Status = Template.bind({}); 16 | Status.args = { 17 | status: WorkflowRunStatus.completed, 18 | }; 19 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/common/StatusIcon/index.ts: -------------------------------------------------------------------------------- 1 | import { StatusIcon } from "./StatusIcon.index"; 2 | 3 | export default StatusIcon; 4 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/common/hooks/index.ts: -------------------------------------------------------------------------------- 1 | import { useExpandButton } from "./useExpandButton"; 2 | import { usePageTitle } from "./usePageTitle"; 3 | 4 | export { useExpandButton, usePageTitle }; 5 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/common/hooks/useExpandButton.tsx: -------------------------------------------------------------------------------- 1 | import { getTheme, IconButton, mergeStyleSets } from "@fluentui/react"; 2 | import { ChevronDown20Filled, ChevronRight20Filled } from "@fluentui/react-icons"; 3 | import { useState } from "react"; 4 | 5 | export const useExpandButton = (defaultExpanded: boolean) => { 6 | const [isExpanded, setIsExpanded] = useState(defaultExpanded); 7 | 8 | const chevron = isExpanded ? ( 9 | 10 | ) : ( 11 | 12 | ); 13 | const title = isExpanded ? "Collapse" : "Expand"; 14 | const toggleButton = ( 15 | chevron} 18 | onClick={() => setIsExpanded(!isExpanded)} 19 | /> 20 | ); 21 | 22 | return { isExpanded, setIsExpanded, toggleButton }; 23 | }; 24 | 25 | const theme = getTheme(); 26 | const styles = mergeStyleSets({ 27 | icon: { 28 | color: theme.semanticColors.bodyText, 29 | }, 30 | }); 31 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/common/hooks/usePageTitle.tsx: -------------------------------------------------------------------------------- 1 | export const usePageTitle = (title?: string) => { 2 | const defaultTitle = "PC Tasks"; 3 | document.title = title ? `${defaultTitle} | ${title}` : defaultTitle; 4 | return null; 5 | }; 6 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/jobs/JobRunItem/JobRunItem.index.tsx: -------------------------------------------------------------------------------- 1 | import { IndentLevel, JobApiDefinition, JobRun } from "types"; 2 | import { RunItem } from "components/common/RunItem/RunItem.index"; 3 | 4 | interface JobRunItemProps { 5 | job: JobApiDefinition; 6 | run: JobRun | undefined; 7 | indent: IndentLevel; 8 | children?: React.ReactNode; 9 | } 10 | 11 | export const JobRunItem: React.FC = ({ 12 | job, 13 | run, 14 | indent, 15 | children, 16 | }) => { 17 | // Use the job id from the run (since it may be a sub job), but if there are no runs 18 | // use the name from the workflow job definition. 19 | const title = run?.job_id || job.id; 20 | 21 | return ( 22 | 23 | {children} 24 | 25 | ); 26 | }; 27 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/jobs/JobRunItem/__tests__/data.ts: -------------------------------------------------------------------------------- 1 | import WorkflowJobsData from "./workflow-job-runs.json"; 2 | import WorkflowJobDefinitions from "./workflow-job-definitions.json"; 3 | 4 | export const TestJobRuns = WorkflowJobsData; 5 | export const TestJobDefinitions = WorkflowJobDefinitions.jobs; 6 | export const TestProcessChunkJobRunSingle = { 7 | links: null, 8 | errors: null, 9 | created: "2022-08-02T19:57:33.977422", 10 | updated: "2022-07-28T23:01:04.722900", 11 | run_id: "b6320d38fcec41a0a959831efc54345f", 12 | job_id: "process-chunk", 13 | status: "completed", 14 | }; 15 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/jobs/JobRunWithSubJobs/__tests__/data.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/pctasks_frontend/src/components/jobs/JobRunWithSubJobs/__tests__/data.ts -------------------------------------------------------------------------------- /pctasks_frontend/src/components/jobs/JobRunWithTasks/__tests__/data.ts: -------------------------------------------------------------------------------- 1 | export const TestTaskRuns = { 2 | "create-splits": [ 3 | { 4 | links: [ 5 | { 6 | rel: "log", 7 | href: "https://pctaskstest-staging.azure-api.net/tasks/runs/b6320d38fcec41a0a959831efc54345f/jobs/create-splits/tasks/create-splits/logs/run.txt", 8 | type: "text/plain", 9 | title: "Task log: run.txt", 10 | }, 11 | ], 12 | errors: null, 13 | created: "2022-08-04T14:22:41.179204", 14 | updated: "2022-07-28T22:53:26.821920", 15 | run_id: "b6320d38fcec41a0a959831efc54345f", 16 | job_id: "create-splits", 17 | task_id: "create-splits", 18 | status: "completed", 19 | }, 20 | ], 21 | }; 22 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/jobs/ParentJobRunItem/ParentJobRunItem.index.tsx: -------------------------------------------------------------------------------- 1 | import { makeSyntheticJobRun } from "helpers/jobs"; 2 | import { JobApiDefinition, JobRun } from "types"; 3 | import { JobRunItem } from "../JobRunItem/JobRunItem.index"; 4 | 5 | interface ParentJobRunItemProps { 6 | job: JobApiDefinition; 7 | runs: JobRun[]; 8 | children: React.ReactNode; 9 | } 10 | 11 | export const ParentJobRunItem: React.FC = ({ 12 | job, 13 | runs, 14 | children, 15 | }) => { 16 | const synthRun = makeSyntheticJobRun(job.id, runs); 17 | 18 | return ( 19 | 20 | {children} 21 | 22 | ); 23 | }; 24 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/jobs/hooks/useSubJobFilter.tsx: -------------------------------------------------------------------------------- 1 | import { useState } from "react"; 2 | import { JobRun } from "types"; 3 | import { JobRunStatus } from "types/enums"; 4 | import { JobStatusFilter } from "../JobStatusFilter/JobStatusFilter.index"; 5 | 6 | export const useSubJobFilter = (jobRuns: JobRun[]) => { 7 | const [filter, setFilter] = useState(allStatuses); 8 | 9 | const filterPanel = ( 10 | 15 | ); 16 | 17 | const filteredJobRuns = jobRuns.filter(run => filter.includes(run.status)); 18 | 19 | return { filterPanel, filteredJobRuns }; 20 | }; 21 | 22 | const allStatuses = Object.values(JobRunStatus) 23 | .map(key => JobRunStatus[key]) 24 | .filter(value => typeof value === "string") as string[]; 25 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/jobs/index.ts: -------------------------------------------------------------------------------- 1 | import { JobRunItem } from "./JobRunItem/JobRunItem.index"; 2 | import { JobRunWithSubJobs } from "./JobRunWithSubJobs/JobRunWithSubJobs.index"; 3 | import { JobRunWithTasks } from "./JobRunWithTasks/JobRunWithTasks.index"; 4 | import { JobRunList } from "./JobRunList/JobRunList.index"; 5 | 6 | export { JobRunItem, JobRunList, JobRunWithSubJobs, JobRunWithTasks }; 7 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/layout/index.ts: -------------------------------------------------------------------------------- 1 | import { Header } from "./Header"; 2 | 3 | export { Header }; 4 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/tasks/TaskRunItem/__tests__/TaskRunItem.stories.tsx: -------------------------------------------------------------------------------- 1 | import { ComponentStory, ComponentMeta } from "@storybook/react"; 2 | import TaskRunItem from "components/tasks/TaskRunItem"; 3 | import { TestTaskDefinitions, TestTaskRuns } from "./data"; 4 | 5 | export default { 6 | title: "PC Tasks/TaskRunItem", 7 | component: TaskRunItem, 8 | } as ComponentMeta; 9 | 10 | const Template: ComponentStory = args => ( 11 | 12 | ); 13 | 14 | export const Single = Template.bind({}); 15 | Single.args = { 16 | task: TestTaskDefinitions["create-splits"], 17 | // @ts-ignore 18 | taskRun: TestTaskRuns["create-splits"], 19 | }; 20 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/tasks/TaskRunItem/index.ts: -------------------------------------------------------------------------------- 1 | import { TaskRunItem } from "./TaskRunItem.index"; 2 | 3 | export default TaskRunItem; 4 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/tasks/TaskRunList/TaskRunList.index.tsx: -------------------------------------------------------------------------------- 1 | import { Stack } from "@fluentui/react"; 2 | import { IndentLevel, TaskApiDefinition, TaskRun } from "types"; 3 | import TaskRunItem from "../TaskRunItem"; 4 | 5 | interface TaskRunListProps { 6 | tasks: TaskApiDefinition[]; 7 | taskRuns: TaskRun[]; 8 | indent: IndentLevel; 9 | } 10 | 11 | export const TaskRunList: React.FC = ({ 12 | tasks, 13 | taskRuns, 14 | indent, 15 | }) => { 16 | const items = tasks.map(task => { 17 | const runs = taskRuns.find(run => run.task_id === task.id); 18 | return ; 19 | }); 20 | 21 | return {items}; 22 | }; 23 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/tasks/TaskRunList/__tests__/TaskRunList.stories.tsx: -------------------------------------------------------------------------------- 1 | import { ComponentStory, ComponentMeta } from "@storybook/react"; 2 | import TaskRunList from "components/tasks/TaskRunList"; 3 | import { TestJobTasks, TestTaskRuns } from "./data"; 4 | 5 | export default { 6 | title: "PC Tasks/TaskRunList", 7 | component: TaskRunList, 8 | } as ComponentMeta; 9 | 10 | const Template: ComponentStory = args => ( 11 | 12 | ); 13 | 14 | export const Basic = Template.bind({}); 15 | Basic.args = { 16 | tasks: TestJobTasks["process-chunk"], 17 | // @ts-ignore 18 | taskRuns: TestTaskRuns["process-chunk"], 19 | }; 20 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/tasks/TaskRunList/index.ts: -------------------------------------------------------------------------------- 1 | import { TaskRunList } from "./TaskRunList.index"; 2 | export default TaskRunList; 3 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/tasks/index.ts: -------------------------------------------------------------------------------- 1 | import { TaskRunItem } from "./TaskRunItem/TaskRunItem.index"; 2 | import { TaskRunList } from "./TaskRunList/TaskRunList.index"; 3 | 4 | export { TaskRunItem, TaskRunList }; 5 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/workflows/WorkflowRunList/__tests__/WorkflowRunList.stories.tsx: -------------------------------------------------------------------------------- 1 | import { ComponentStory, ComponentMeta } from "@storybook/react"; 2 | import { withRouter } from "storybook-addon-react-router-v6"; 3 | 4 | import WorkflowRunList from "components/workflows/WorkflowRunList"; 5 | import { TestWorkflowRunListItems, TestWorkflowRunListItemsLong } from "./data"; 6 | 7 | export default { 8 | title: "PC Tasks/WorkflowRunList", 9 | component: WorkflowRunList, 10 | decorators: [withRouter], 11 | } as ComponentMeta; 12 | 13 | const Template: ComponentStory = args => ( 14 | 15 | ); 16 | 17 | export const Basic = Template.bind({}); 18 | Basic.args = { 19 | runs: TestWorkflowRunListItems, 20 | }; 21 | 22 | export const Long = Template.bind({}); 23 | Long.args = { 24 | runs: TestWorkflowRunListItemsLong, 25 | }; 26 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/workflows/WorkflowRunList/__tests__/data.ts: -------------------------------------------------------------------------------- 1 | import { WorkflowRun } from "types"; 2 | import longWorkflowRunsList from "./workflow-runs.json"; 3 | import { TestWorkflowRunItems } from "components/workflows/WorkflowRunItem/__tests__/data"; 4 | 5 | const { completed, failed, running, submitted } = TestWorkflowRunItems; 6 | export const TestWorkflowRunListItems: WorkflowRun[] = [ 7 | submitted, 8 | running, 9 | failed, 10 | completed, 11 | ]; 12 | 13 | // @ts-ignore -- test data file enums as string don't satisfy type checker 14 | export const TestWorkflowRunListItemsLong: WorkflowRun[] = longWorkflowRunsList; 15 | -------------------------------------------------------------------------------- /pctasks_frontend/src/components/workflows/index.ts: -------------------------------------------------------------------------------- 1 | import { WorkflowRunList } from "./WorkflowRunList/WorkflowRunList.index"; 2 | import { WorkflowRunItem } from "./WorkflowRunItem/WorkflowRunItem.index"; 3 | import { WorkflowRunHeader } from "./WorkflowRunHeader/WorkflowRunHeader.index"; 4 | 5 | export { WorkflowRunList, WorkflowRunItem, WorkflowRunHeader }; 6 | -------------------------------------------------------------------------------- /pctasks_frontend/src/global.d.ts: -------------------------------------------------------------------------------- 1 | declare interface Window { 2 | publicConfig: Record; 3 | } 4 | -------------------------------------------------------------------------------- /pctasks_frontend/src/helpers/job-create-splits.json: -------------------------------------------------------------------------------- 1 | { 2 | "links": [ 3 | { 4 | "rel": "task", 5 | "href": "https://pctaskstest-staging.azure-api.net/tasks/runs/b6320d38fcec41a0a959831efc54345f/jobs/create-splits/tasks/create-splits", 6 | "type": "application/json", 7 | "title": "Task: create-splits" 8 | } 9 | ], 10 | "errors": null, 11 | "created": "2022-08-01T16:09:56.958856", 12 | "updated": "2022-07-28T22:53:26.860795", 13 | "run_id": "b6320d38fcec41a0a959831efc54345f", 14 | "job_id": "create-splits", 15 | "status": "completed" 16 | } -------------------------------------------------------------------------------- /pctasks_frontend/src/helpers/logs-create-splits.json: -------------------------------------------------------------------------------- 1 | [INFO] 2022-07-28 22:53:23,417 - === PCTasks === 2 | [INFO] 2022-07-28 22:53:23,417 - == b6320d38fcec41a0a959831efc54345f_j_create-splits_t_create-splits 3 | [INFO] 2022-07-28 22:53:23,417 - -- PCTasks: Setting up task... 4 | [INFO] 2022-07-28 22:53:23,729 - Using the following environment variables from task configuration: 5 | [INFO] 2022-07-28 22:53:23,729 - AZURE_TENANT_ID,AZURE_CLIENT_ID,AZURE_CLIENT_SECRET 6 | [INFO] 2022-07-28 22:53:23,738 - -- PCTasks: Running task... 7 | [INFO] 2022-07-28 22:53:23,739 - -- PCTasks: Handling task result... 8 | [INFO] 2022-07-28 22:53:23,793 - === PCTasks: Task completed! === 9 | -------------------------------------------------------------------------------- /pctasks_frontend/src/helpers/task-create-splits.json: -------------------------------------------------------------------------------- 1 | { 2 | "links": [ 3 | { 4 | "rel": "log", 5 | "href": "https://pctaskstest-staging.azure-api.net/tasks/runs/b6320d38fcec41a0a959831efc54345f/jobs/create-splits/tasks/create-splits/logs/run.txt", 6 | "type": "text/plain", 7 | "title": "Task log: run.txt" 8 | } 9 | ], 10 | "errors": null, 11 | "created": "2022-08-01T16:11:10.896926", 12 | "updated": "2022-07-28T22:53:26.821920", 13 | "run_id": "b6320d38fcec41a0a959831efc54345f", 14 | "job_id": "create-splits", 15 | "task_id": "create-splits", 16 | "status": "completed" 17 | } 18 | -------------------------------------------------------------------------------- /pctasks_frontend/src/helpers/tasks.ts: -------------------------------------------------------------------------------- 1 | import { TaskRun } from "types"; 2 | 3 | export const getLogUrl = (task: TaskRun): string | undefined => { 4 | return task?.links?.find(link => link.rel === "log")?.href; 5 | }; 6 | 7 | export const equals = (a: TaskRun, b: TaskRun): boolean => { 8 | return [a.job_id === b.job_id, a.task_id === b.task_id].every(Boolean); 9 | }; 10 | -------------------------------------------------------------------------------- /pctasks_frontend/src/helpers/utils.ts: -------------------------------------------------------------------------------- 1 | export const mergeClassNames = (...classNames: string[]) => { 2 | return classNames.filter(Boolean).join(" "); 3 | }; 4 | -------------------------------------------------------------------------------- /pctasks_frontend/src/helpers/workflows.ts: -------------------------------------------------------------------------------- 1 | export const getShortId = (id: string, length: number = 8): string => { 2 | return id.substring(0, length); 3 | }; 4 | -------------------------------------------------------------------------------- /pctasks_frontend/src/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 4 | 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', 5 | sans-serif; 6 | -webkit-font-smoothing: antialiased; 7 | -moz-osx-font-smoothing: grayscale; 8 | } 9 | 10 | code { 11 | font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', 12 | monospace; 13 | } 14 | -------------------------------------------------------------------------------- /pctasks_frontend/src/pages/Home/Home.index.tsx: -------------------------------------------------------------------------------- 1 | import { usePageTitle } from "components/common/hooks"; 2 | 3 | export const Home = () => { 4 | usePageTitle(); 5 | return ( 6 |
7 |

Home

8 |
9 | ); 10 | }; 11 | -------------------------------------------------------------------------------- /pctasks_frontend/src/pages/index.ts: -------------------------------------------------------------------------------- 1 | import { Workflows as WorkflowsPage } from "./Workflows/Workflows.index"; 2 | import { WorkflowDetail as WorkflowDetailPage } from "./WorkflowDetail/WorkflowDetail.index"; 3 | import { Home as HomePage } from "./Home/Home.index"; 4 | 5 | export const Home = HomePage; 6 | export const Workflows = WorkflowsPage; 7 | export const WorkflowDetail = WorkflowDetailPage; 8 | -------------------------------------------------------------------------------- /pctasks_frontend/src/react-app-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /pctasks_frontend/src/reportWebVitals.ts: -------------------------------------------------------------------------------- 1 | import { ReportHandler } from 'web-vitals'; 2 | 3 | const reportWebVitals = (onPerfEntry?: ReportHandler) => { 4 | if (onPerfEntry && onPerfEntry instanceof Function) { 5 | import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => { 6 | getCLS(onPerfEntry); 7 | getFID(onPerfEntry); 8 | getFCP(onPerfEntry); 9 | getLCP(onPerfEntry); 10 | getTTFB(onPerfEntry); 11 | }); 12 | } 13 | }; 14 | 15 | export default reportWebVitals; 16 | -------------------------------------------------------------------------------- /pctasks_frontend/src/setupTests.ts: -------------------------------------------------------------------------------- 1 | // jest-dom adds custom jest matchers for asserting on DOM nodes. 2 | // allows you to do things like: 3 | // expect(element).toHaveTextContent(/react/i) 4 | // learn more: https://github.com/testing-library/jest-dom 5 | import '@testing-library/jest-dom'; 6 | -------------------------------------------------------------------------------- /pctasks_frontend/src/styles/global.ts: -------------------------------------------------------------------------------- 1 | import { getTheme, IStackTokens, IStyle } from "@fluentui/react"; 2 | 3 | const theme = getTheme(); 4 | export const treeIndent = 30; 5 | 6 | export const gapRegular: IStackTokens = { 7 | childrenGap: 8, 8 | }; 9 | 10 | export const gapSmall: IStackTokens = { 11 | childrenGap: 4, 12 | }; 13 | 14 | export const borderColor = theme.palette.neutralLight; 15 | export const border: IStyle = { 16 | borderColor: borderColor, 17 | borderStyle: "solid", 18 | borderWidth: 1, 19 | }; 20 | export const borderTop: IStyle = { 21 | ...border, 22 | borderLeftWidth: 0, 23 | borderRightWidth: 0, 24 | borderBottomWidth: 0, 25 | }; 26 | 27 | export const borderRound: IStyle = { 28 | ...border, 29 | borderRadius: 4, 30 | }; 31 | -------------------------------------------------------------------------------- /pctasks_frontend/src/types/enums.ts: -------------------------------------------------------------------------------- 1 | export enum LinkRel { 2 | job = "job", 3 | task = "task", 4 | log = "log", 5 | } 6 | 7 | export enum TaskRunStatus { 8 | received = "received", 9 | pending = "pending", 10 | submitting = "submitting", 11 | submitted = "submitted", 12 | starting = "starting", 13 | running = "running", 14 | waiting = "waiting", 15 | completed = "completed", 16 | failed = "failed", 17 | cancelled = "cancelled", 18 | } 19 | 20 | export enum JobRunStatus { 21 | pending = "pending", 22 | running = "running", 23 | completed = "completed", 24 | failed = "failed", 25 | cancelled = "cancelled", 26 | notasks = "notasks", 27 | } 28 | 29 | export enum WorkflowRunStatus { 30 | submitted = "submitted", 31 | running = "running", 32 | completed = "completed", 33 | failed = "failed", 34 | } 35 | -------------------------------------------------------------------------------- /pctasks_frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "esModuleInterop": true, 8 | "allowSyntheticDefaultImports": true, 9 | "strict": true, 10 | "forceConsistentCasingInFileNames": true, 11 | "noFallthroughCasesInSwitch": true, 12 | "module": "esnext", 13 | "moduleResolution": "node", 14 | "resolveJsonModule": true, 15 | "isolatedModules": true, 16 | "noEmit": true, 17 | "jsx": "react-jsx", 18 | "baseUrl": "src" 19 | }, 20 | "include": ["src"] 21 | } 22 | -------------------------------------------------------------------------------- /pctasks_funcs/.dockerignore: -------------------------------------------------------------------------------- 1 | local.settings.json -------------------------------------------------------------------------------- /pctasks_funcs/.funcignore: -------------------------------------------------------------------------------- 1 | .git* 2 | .vscode 3 | local.settings.json 4 | test 5 | .venv 6 | Dockerfile 7 | tests 8 | .direnv 9 | -------------------------------------------------------------------------------- /pctasks_funcs/.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | obj 3 | csx 4 | .vs 5 | edge 6 | Publish 7 | 8 | *.user 9 | *.suo 10 | *.cscfg 11 | *.Cache 12 | project.lock.json 13 | 14 | /packages 15 | /TestResults 16 | 17 | /tools/NuGet.exe 18 | /App_Data 19 | /secrets 20 | /data 21 | .secrets 22 | appsettings.json 23 | local.settings.json 24 | 25 | node_modules 26 | dist 27 | 28 | # Local python packages 29 | .python_packages/ 30 | 31 | # Python Environments 32 | .env 33 | .venv 34 | env/ 35 | venv/ 36 | ENV/ 37 | env.bak/ 38 | venv.bak/ 39 | 40 | # Byte-compiled / optimized / DLL files 41 | __pycache__/ 42 | *.py[cod] 43 | *$py.class -------------------------------------------------------------------------------- /pctasks_funcs/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azure-functions/python:4-python3.10 2 | 3 | ENV AzureWebJobsScriptRoot=/home/site/wwwroot \ 4 | AzureFunctionsJobHost__Logging__Console__IsEnabled=true 5 | 6 | RUN apt update && apt install -y azure-cli 7 | 8 | # Copy pctasks packages 9 | COPY pctasks /home/site/pctasks 10 | 11 | # Copy function app 12 | COPY pctasks_funcs /home/site/wwwroot 13 | 14 | WORKDIR /home/site/wwwroot 15 | 16 | RUN pip install -r /home/site/wwwroot/requirements.txt 17 | 18 | CMD [ "/home/site/wwwroot/start.sh" ] 19 | -------------------------------------------------------------------------------- /pctasks_funcs/PublishItemsCF/function.json: -------------------------------------------------------------------------------- 1 | { 2 | "scriptFile": "__init__.py", 3 | "bindings": [ 4 | { 5 | "name": "documents", 6 | "type": "cosmosDBTrigger", 7 | "direction": "in", 8 | "connectionStringSetting": "FUNC_COSMOSDB_CONN_STR", 9 | "databaseName": "pctasks", 10 | "collectionName": "%FUNC_ITEMS_COLLECTION_NAME%" 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /pctasks_funcs/StorageEventsCF/function.json: -------------------------------------------------------------------------------- 1 | { 2 | "scriptFile": "__init__.py", 3 | "bindings": [ 4 | { 5 | "name": "documents", 6 | "type": "cosmosDBTrigger", 7 | "direction": "in", 8 | "connectionStringSetting": "FUNC_COSMOSDB_CONN_STR", 9 | "databaseName": "pctasks", 10 | "collectionName": "%FUNC_STORAGE_EVENTS_COLLECTION_NAME%" 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /pctasks_funcs/StorageEventsQueue/function.json: -------------------------------------------------------------------------------- 1 | { 2 | "scriptFile": "__init__.py", 3 | "bindings": [ 4 | { 5 | "type": "queueTrigger", 6 | "name": "msg", 7 | "direction": "in", 8 | "queueName": "storage-events", 9 | "connectionStringSetting": "FUNC_COSMOSDB_CONN_STR" 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /pctasks_funcs/WorkflowRunsCF/function.json: -------------------------------------------------------------------------------- 1 | { 2 | "scriptFile": "__init__.py", 3 | "bindings": [ 4 | { 5 | "name": "container", 6 | "type": "cosmosDBTrigger", 7 | "direction": "in", 8 | "connectionStringSetting": "FUNC_COSMOSDB_CONN_STR", 9 | "databaseName": "pctasks", 10 | "collectionName": "%FUNC_WORKFLOW_RUNS_COLLECTION_NAME%" 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /pctasks_funcs/WorkflowsCF/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any, Dict 3 | 4 | import azure.functions as func 5 | 6 | from pctasks.core.cosmos.containers.records import AsyncRecordsContainer 7 | from pctasks.core.models.workflow import WorkflowRecord, WorkflowRecordType 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | async def main(container: func.DocumentList) -> None: 13 | for document in container: 14 | data: Dict[str, Any] = document.data 15 | _type = data.get("type") 16 | 17 | if _type == WorkflowRecordType.WORKFLOW: 18 | await handle_workflow(data) 19 | else: 20 | pass 21 | 22 | 23 | async def handle_workflow(data: Dict[str, Any]) -> None: 24 | """Handle a workflow record.""" 25 | record = WorkflowRecord.model_validate(data) 26 | 27 | async with AsyncRecordsContainer(WorkflowRecord) as container: 28 | await container.put(record) 29 | 30 | logger.info(f"Workflow {record.workflow_id} saved to single partition container.") 31 | -------------------------------------------------------------------------------- /pctasks_funcs/WorkflowsCF/function.json: -------------------------------------------------------------------------------- 1 | { 2 | "scriptFile": "__init__.py", 3 | "bindings": [ 4 | { 5 | "name": "container", 6 | "type": "cosmosDBTrigger", 7 | "direction": "in", 8 | "connectionStringSetting": "FUNC_COSMOSDB_CONN_STR", 9 | "databaseName": "pctasks", 10 | "collectionName": "%FUNC_WORKFLOWS_COLLECTION_NAME%" 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /pctasks_funcs/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | } 9 | } 10 | }, 11 | "extensionBundle": { 12 | "id": "Microsoft.Azure.Functions.ExtensionBundle", 13 | "version": "[2.*, 3.0.0)" 14 | }, 15 | "extensions": { 16 | "queues": { 17 | "maxPollingInterval": "00:00:02", 18 | "visibilityTimeout": "00:00:30", 19 | "batchSize": 16, 20 | "maxDequeueCount": 5, 21 | "newBatchThreshold": 8 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /pctasks_funcs/requirements-deploy.txt: -------------------------------------------------------------------------------- 1 | # Do not include azure-functions-worker as it may conflict with the Azure Functions platform 2 | azure-functions 3 | azure-functions-durable 4 | 5 | # Docker deploy process needs symlinks to bring in 6 | # pctasks libraries. Symlink is created in deploy script 7 | pctasks_linked/core 8 | # pctasks_linked/cli 9 | # pctasks_linked/task 10 | # pctasks_linked/client 11 | # pctasks_linked/run 12 | # pctasks_linked/notify 13 | # pctasks_linked/router -------------------------------------------------------------------------------- /pctasks_funcs/requirements.txt: -------------------------------------------------------------------------------- 1 | # Do not include azure-functions-worker as it may conflict with the Azure Functions platform 2 | azure-functions 3 | azure-functions-durable 4 | 5 | # Keep in sync with requirements-deploy.txt 6 | -e ../pctasks/core 7 | # -e ../pctasks/cli 8 | # -e ../pctasks/task 9 | # -e ../pctasks/client 10 | # -e ../pctasks/run 11 | # -e ../pctasks/notify 12 | # -e ../pctasks/router -------------------------------------------------------------------------------- /pctasks_funcs/tests/test_publish_items.py: -------------------------------------------------------------------------------- 1 | # this can be run with python -m pytest 2 | # to ensure that the file is on sys.path 3 | import json 4 | import pathlib 5 | 6 | import azure.functions as func 7 | import PublishItemsCF 8 | import pytest 9 | 10 | HERE = pathlib.Path(__file__).parent 11 | 12 | 13 | @pytest.fixture 14 | def document() -> func.Document: 15 | data = json.load(pathlib.Path(HERE / "items_document.json").open()) 16 | return func.Document(data["data"]) 17 | 18 | 19 | def test_transform_document(document): 20 | result = PublishItemsCF.transform_document(document) 21 | assert result 22 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | asyncio_mode = auto 3 | markers = 4 | cosmos: marks tests as using Cosmos DB -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | flake8==7.1.0 2 | black==25.1.0 3 | mypy==1.15.0 4 | pytest==8.1.1 5 | isort==5.* 6 | pytest-asyncio==0.18.* 7 | kubernetes 8 | 9 | planetary-computer 10 | pystac[validation]==1.10.1 11 | 12 | azure-functions 13 | azure-functions-durable 14 | responses 15 | 16 | # Mypy stubs 17 | 18 | types-cachetools 19 | types-chardet 20 | types-requests 21 | types-six 22 | types-urllib3<1.27 23 | types-python-dateutil 24 | types-ujson 25 | types-PyYAML 26 | 27 | myst-parser 28 | pydata-sphinx-theme 29 | sphinx 30 | sphinx-autobuild 31 | sphinx_design 32 | -------------------------------------------------------------------------------- /requirements-task-base.txt: -------------------------------------------------------------------------------- 1 | pystac[validation]==1.10.1 2 | rasterio==1.4.3 -------------------------------------------------------------------------------- /scripts/bin/pctasks-pip-compile: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [[ "${CI}" ]]; then 6 | set -x 7 | fi 8 | 9 | uv pip compile \ 10 | --system \ 11 | --emit-index-annotation \ 12 | "$@" 13 | 14 | # 3 15 | # 2 16 | # 1 17 | # 0 18 | # We expect this to be called as pip-compile input -o output 19 | # so $2 should be the output file 20 | sed -i -e 's/.* @ file:\/\/\/.*//' $3 21 | -------------------------------------------------------------------------------- /scripts/build: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | set -o errtrace 5 | set -o nounset 6 | set -o pipefail 7 | 8 | 9 | python3 -m pip install build 10 | 11 | for subpackage in pctasks/* 12 | do 13 | python3 -m build "$subpackage" -o dist 14 | done -------------------------------------------------------------------------------- /scripts/ciauthenticate: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [[ "${CI}" ]]; then 6 | set -x 7 | fi 8 | 9 | function usage() { 10 | echo -n \ 11 | "Usage: $(basename "$0") 12 | CI authentication for this project. 13 | " 14 | } 15 | 16 | acr=$1 17 | 18 | az acr login --name $acr 19 | docker login $acr.azurecr.io --username ${CLIENT_ID} --password ${CLIENT_SECRET} -------------------------------------------------------------------------------- /scripts/citest-integration: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [[ "${CI}" ]]; then 6 | set -x 7 | fi 8 | 9 | function usage() { 10 | echo -n \ 11 | "Usage: $(basename "$0") --dev 12 | Intergration tests run as part of CI 13 | " 14 | } 15 | 16 | if [ "${BASH_SOURCE[0]}" = "${0}" ]; then 17 | 18 | export PCTASKS_CLIENT__ENDPOINT=http://localhost:8500/tasks 19 | export PCTASKS_CLIENT__API_KEY=kind-api-key 20 | export PCTASKS_CLIENT__CONFIRMATION_REQUIRED=False 21 | export AZURITE_HOST=localhost 22 | export AZURITE_PORT=10000 23 | export AZURITE_STORAGE_ACCOUNT=devstoreaccount1 24 | export DEV_DB_CONNECTION_STRING=postgresql://username:password@localhost:5499/postgis 25 | export DEV_REMOTE_DB_CONNECTION_STRING=postgresql://username:password@database:5432/postgis 26 | 27 | scripts/bin/test-integration --test-only 28 | fi -------------------------------------------------------------------------------- /scripts/format: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [[ "${CI}" ]]; then 6 | set -x 7 | fi 8 | 9 | function usage() { 10 | echo -n \ 11 | "Usage: $(basename "$0") DATASET [COMMAND] 12 | 13 | Applies a code formatter to this codebase. 14 | " 15 | } 16 | 17 | if [ "${BASH_SOURCE[0]}" = "${0}" ]; then 18 | if [[ "${1}" == "--help" ]]; then 19 | usage 20 | else 21 | docker compose \ 22 | -f docker-compose.console.yml \ 23 | run --rm \ 24 | dev \ 25 | scripts/bin/format 26 | 27 | fi 28 | fi 29 | -------------------------------------------------------------------------------- /scripts/validate-collections: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | ls -1N datasets/**/*/template.json | xargs -I {} pctasks dataset validate-collection "{}" 2>&1 6 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/tests/__init__.py -------------------------------------------------------------------------------- /tests/constants.py: -------------------------------------------------------------------------------- 1 | DEFAULT_TIMEOUT = 60 * 5 2 | -------------------------------------------------------------------------------- /tests/data-files/assets/a/asset-a-1.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-a-1.json"} -------------------------------------------------------------------------------- /tests/data-files/assets/b/b_1/asset-b_1-1.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-b_1-1.json"} -------------------------------------------------------------------------------- /tests/data-files/assets/b/b_2/asset-b_2-1.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-b_2-1.json"} -------------------------------------------------------------------------------- /tests/data-files/assets/c/c_1/c_1_1/asset-c_1_1-1.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-c_1_1-1.json"} -------------------------------------------------------------------------------- /tests/data-files/assets/c/c_1/c_1_1/asset-c_1_1-2.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-c_1_1-2.json"} -------------------------------------------------------------------------------- /tests/data-files/assets/c/c_1/c_1_2/asset-c_1-2.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-c_1-2.json"} -------------------------------------------------------------------------------- /tests/data-files/assets/c/c_2/c_2_1/asset-c_2_1-1.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-c_2_1-1.json"} -------------------------------------------------------------------------------- /tests/data-files/assets/c/c_2/c_2_1/asset-c_2_1-2.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-c_2_1-2.json"} -------------------------------------------------------------------------------- /tests/data-files/collection_template/description.md: -------------------------------------------------------------------------------- 1 | # Test collection 2 | 3 | This is a test collection. In particular, this is a description markdown that will be templated into the collection JSON. -------------------------------------------------------------------------------- /tests/data-files/simple-assets/a/asset-a-1.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-a-1.json"} -------------------------------------------------------------------------------- /tests/data-files/simple-assets/a/asset-a-2.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-a-2.json"} -------------------------------------------------------------------------------- /tests/data-files/simple-assets/b/asset-b-1.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-b-1.json"} -------------------------------------------------------------------------------- /tests/data-files/simple-assets/b/asset-b-2.json: -------------------------------------------------------------------------------- 1 | {"name": "asset-b-2.json"} -------------------------------------------------------------------------------- /tests/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/planetary-computer-tasks/bf362fc28b53dbefee4d7d339a8185cce7d630ee/tests/dataset/__init__.py -------------------------------------------------------------------------------- /tests/dataset/dataset.yaml: -------------------------------------------------------------------------------- 1 | image: localhost:5001/pctasks-task-base:latest 2 | owner: microsoft 3 | id: test-dataset 4 | 5 | args: 6 | - collection_id 7 | - collection_template 8 | - assets_uri 9 | - chunks_uri 10 | - code_path 11 | - db_connection_string 12 | 13 | environment: 14 | DB_CONNECTION_STRING: ${{ args.db_connection_string }} 15 | 16 | code: 17 | src: ${{ args.code_path }} 18 | 19 | collections: 20 | - id: ${{ args.collection_id }} 21 | template: ${{ args.collection_template }} 22 | collection_class: dataset.collection:TestCollection 23 | asset_storage: 24 | - uri: ${{ args.assets_uri }} 25 | chunks: 26 | length: 2 27 | ext: .json 28 | name_starts_with: asset 29 | splits: 30 | - depth: 1 31 | chunk_storage: 32 | uri: ${{ args.chunks_uri }} 33 | -------------------------------------------------------------------------------- /tests/ingest/test_collection_ingest.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | from pctasks.dev.db import temp_pgstac_db 5 | from pctasks.dev.test_utils import assert_workflow_is_successful, run_workflow_from_file 6 | from tests.constants import DEFAULT_TIMEOUT 7 | 8 | HERE = Path(__file__).parent 9 | WORKFLOWS = HERE / ".." / "workflows" 10 | 11 | TIMEOUT_SECONDS = DEFAULT_TIMEOUT 12 | 13 | 14 | def test_ingest_collection(): 15 | with temp_pgstac_db() as conn_str_info: 16 | collection_path = HERE / ".." / "data-files" / "collection.json" 17 | with collection_path.open() as f: 18 | collection = json.load(f) 19 | run_id = run_workflow_from_file( 20 | WORKFLOWS / "ingest-collection.yaml", 21 | args={"collection": collection, "db_connection_str": conn_str_info.remote}, 22 | ) 23 | assert_workflow_is_successful(run_id, timeout_seconds=TIMEOUT_SECONDS) 24 | -------------------------------------------------------------------------------- /tests/workflows/ingest-collection.yaml: -------------------------------------------------------------------------------- 1 | id: ingest-collection-test-workflow 2 | name: Ingest Collection Test Workflow 3 | dataset: microsoft/test-dataset2 4 | 5 | args: 6 | - collection 7 | - db_connection_str 8 | 9 | jobs: 10 | ingest: 11 | name: Ingest Collection 12 | tasks: 13 | - id: ingest-collection 14 | image: localhost:5001/pctasks-ingest:latest 15 | task: pctasks.ingest_task.task:ingest_task 16 | environment: 17 | DB_CONNECTION_STRING: "${{ args.db_connection_str }}" 18 | args: 19 | content: 20 | type: Collections 21 | collections: 22 | - ${{ args.collection }} 23 | -------------------------------------------------------------------------------- /workflow.yaml: -------------------------------------------------------------------------------- 1 | name: Ingest NDJsons from blob://modiseuwest/modis-061-etl-data/chunks-for-reingest/ 2 | jobs: 3 | ingest-items: 4 | id: ingest-items 5 | tasks: 6 | - id: ingest-ndjson 7 | image_key: ingest 8 | task: pctasks.ingest_task.task:ingest_task 9 | args: 10 | content: 11 | type: Ndjson 12 | ndjson_folder: 13 | uri: blob://modiseuwest/modis-061-etl-data/chunks-for-reingest/ 14 | extensions: [] 15 | options: 16 | insert_group_size: 5000 17 | insert_only: false 18 | environment: 19 | AZURE_TENANT_ID: ${{ secrets.task-tenant-id }} 20 | AZURE_CLIENT_ID: ${{ secrets.task-client-id }} 21 | AZURE_CLIENT_SECRET: ${{ secrets.task-client-secret }} 22 | schema_version: 1.0.0 23 | schema_version: 1.0.0 24 | id: modis-reingest-staging 25 | dataset: microsoft/modis 26 | 27 | --------------------------------------------------------------------------------