├── .dist.env
├── .dockerignore
├── .flake8
├── .github
    └── workflows
    │   ├── terraform_build.yaml
    │   ├── terraform_destroy_on_delete.yaml
    │   └── terraform_plan.yaml
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── .secrets.baseline
├── Dockerfile
├── README.md
├── alembic.ini
├── app
    ├── __init__.py
    ├── application.py
    ├── authentication
    │   ├── __init__.py
    │   ├── api_keys.py
    │   └── token.py
    ├── crud
    │   ├── __init__.py
    │   ├── api_keys.py
    │   ├── assets.py
    │   ├── datamart.py
    │   ├── datasets.py
    │   ├── geostore.py
    │   ├── metadata.py
    │   ├── tasks.py
    │   └── versions.py
    ├── errors.py
    ├── main.py
    ├── middleware.py
    ├── models
    │   ├── __init__.py
    │   ├── enum
    │   │   ├── analysis.py
    │   │   ├── assets.py
    │   │   ├── change_log.py
    │   │   ├── creation_options.py
    │   │   ├── entity.py
    │   │   ├── geostore.py
    │   │   ├── pg_admin_functions.py
    │   │   ├── pg_sys_functions.py
    │   │   ├── pg_types.py
    │   │   ├── pixetl.py
    │   │   ├── queries.py
    │   │   ├── sources.py
    │   │   └── versions.py
    │   ├── orm
    │   │   ├── __init__.py
    │   │   ├── api_keys.py
    │   │   ├── asset_metadata.py
    │   │   ├── assets.py
    │   │   ├── base.py
    │   │   ├── datamart.py
    │   │   ├── dataset_metadata.py
    │   │   ├── datasets.py
    │   │   ├── geostore.py
    │   │   ├── migrations
    │   │   │   ├── README
    │   │   │   ├── draft_versions
    │   │   │   │   └── 04fcb4f2408a_add_metadata_table.py
    │   │   │   ├── env.py
    │   │   │   ├── script.py.mako
    │   │   │   └── versions
    │   │   │   │   ├── 04fcb4f2408a_add_metadata_table.py
    │   │   │   │   ├── 167eebbf29e4_.py
    │   │   │   │   ├── 3e524ef0525f_.py
    │   │   │   │   ├── 4763f4b8141a_.py
    │   │   │   │   ├── 604bf4e66c2b_.py
    │   │   │   │   ├── 73fb3f5e39b8_.py
    │   │   │   │   ├── 86ae41de358d_.py
    │   │   │   │   ├── a5787f2eefe5_.py
    │   │   │   │   ├── aa5aefcbdfcf_.py
    │   │   │   │   ├── d62a9b15f844_.py
    │   │   │   │   ├── d767b6dd2c4c_.py
    │   │   │   │   ├── d8f049f00259_add_analysis_results_table.py
    │   │   │   │   └── ef3392e8e054_.py
    │   │   ├── mixins.py
    │   │   ├── queries
    │   │   │   ├── __init__.py
    │   │   │   ├── datasets.py
    │   │   │   ├── fields.py
    │   │   │   └── raster_assets.py
    │   │   ├── tasks.py
    │   │   ├── user_areas.py
    │   │   ├── version_metadata.py
    │   │   └── versions.py
    │   └── pydantic
    │   │   ├── __init__.py
    │   │   ├── analysis.py
    │   │   ├── asset_metadata.py
    │   │   ├── assets.py
    │   │   ├── authentication.py
    │   │   ├── base.py
    │   │   ├── change_log.py
    │   │   ├── creation_options.py
    │   │   ├── database.py
    │   │   ├── datamart.py
    │   │   ├── datasets.py
    │   │   ├── downloads.py
    │   │   ├── extent.py
    │   │   ├── features.py
    │   │   ├── geostore.py
    │   │   ├── jobs.py
    │   │   ├── metadata.py
    │   │   ├── political.py
    │   │   ├── query.py
    │   │   ├── raster_analysis.py
    │   │   ├── responses.py
    │   │   ├── sources.py
    │   │   ├── statistics.py
    │   │   ├── symbology.py
    │   │   ├── tasks.py
    │   │   ├── user_job.py
    │   │   └── versions.py
    ├── responses.py
    ├── routes
    │   ├── __init__.py
    │   ├── analysis
    │   │   ├── __init__.py
    │   │   └── analysis.py
    │   ├── assets
    │   │   ├── __init__.py
    │   │   ├── asset.py
    │   │   └── assets.py
    │   ├── authentication
    │   │   ├── __init__.py
    │   │   └── authentication.py
    │   ├── datamart
    │   │   ├── __init__.py
    │   │   └── land.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── asset.py
    │   │   ├── dataset.py
    │   │   ├── datasets.py
    │   │   ├── downloads.py
    │   │   ├── features.py
    │   │   ├── geostore.py
    │   │   ├── queries.py
    │   │   └── versions.py
    │   ├── geostore
    │   │   ├── __init__.py
    │   │   └── geostore.py
    │   ├── health.py
    │   ├── jobs
    │   │   ├── __init__.py
    │   │   └── job.py
    │   ├── political
    │   │   ├── __init__.py
    │   │   └── id_lookup.py
    │   └── tasks
    │   │   ├── __init__.py
    │   │   └── task.py
    ├── settings
    │   ├── __init__.py
    │   ├── globals.py
    │   ├── gunicorn_conf.py
    │   ├── prestart.sh
    │   └── start.sh
    ├── static
    │   └── gfw-data-api.png
    ├── tasks
    │   ├── __init__.py
    │   ├── assets.py
    │   ├── aws_tasks.py
    │   ├── batch.py
    │   ├── cog_assets.py
    │   ├── datamart
    │   │   └── land.py
    │   ├── default_assets.py
    │   ├── delete_assets.py
    │   ├── dynamic_vector_tile_cache_assets.py
    │   ├── raster_tile_cache_assets
    │   │   ├── __init__.py
    │   │   ├── raster_tile_cache_assets.py
    │   │   ├── symbology.py
    │   │   └── utils.py
    │   ├── raster_tile_set_assets
    │   │   ├── __init__.py
    │   │   ├── raster_tile_set_assets.py
    │   │   └── utils.py
    │   ├── static_vector_1x1_assets.py
    │   ├── static_vector_file_assets.py
    │   ├── static_vector_tile_cache_assets.py
    │   ├── table_source_assets.py
    │   ├── utils.py
    │   └── vector_source_assets.py
    └── utils
    │   ├── __init__.py
    │   ├── aws.py
    │   ├── decorators.py
    │   ├── fields.py
    │   ├── gadm.py
    │   ├── generators.py
    │   ├── geostore.py
    │   ├── google.py
    │   ├── paginate.py
    │   ├── path.py
    │   ├── rw_api.py
    │   ├── stats.py
    │   └── tile_cache.py
├── batch
    ├── .dockerignore
    ├── __init__.py
    ├── pixetl.dockerfile
    ├── python
    │   ├── 16bpp_gdal2tiles.py
    │   ├── 8bpp_gdal2tiles.py
    │   ├── __init__.py
    │   ├── adjust_num_processes.py
    │   ├── apply_colormap.py
    │   ├── aws_utils.py
    │   ├── check_csv.py
    │   ├── check_raster.py
    │   ├── check_vector.py
    │   ├── cluster_partitions.py
    │   ├── create_partitions.py
    │   ├── errors.py
    │   ├── export_1x1_grid.py
    │   ├── export_to_gee.py
    │   ├── extract_geometries.py
    │   ├── gdal_utils.py
    │   ├── logger.py
    │   ├── logging_utils.py
    │   ├── raster_tile_cache.py
    │   ├── resample.py
    │   └── tiles_geojson.py
    ├── scripts
    │   ├── _add_gfw_fields_sql.sh
    │   ├── _add_point_geometry_fields_sql.sh
    │   ├── _fill_gfw_fields_sql.sh
    │   ├── _fill_point_geometry_fields_sql.sh
    │   ├── _get_geometry_type_sql.sh
    │   ├── _tiff_crosses_dateline.sh
    │   ├── _warp_and_upload.sh
    │   ├── add_gfw_fields.sh
    │   ├── add_point_geometry_fields.sh
    │   ├── apply_colormap.sh
    │   ├── clip_and_reproject_geom.sh
    │   ├── cluster_partitions.sh
    │   ├── cluster_table.sh
    │   ├── cogify.sh
    │   ├── create_index.sh
    │   ├── create_partitions.sh
    │   ├── create_tabular_schema.sh
    │   ├── create_vector_schema.sh
    │   ├── create_vector_tile_cache.sh
    │   ├── export_1x1_grid.sh
    │   ├── export_vector_data.sh
    │   ├── get_arguments.sh
    │   ├── inherit_geostore.sh
    │   ├── load_tabular_data.sh
    │   ├── load_vector_csv_data.sh
    │   ├── load_vector_data.sh
    │   ├── raster_tile_cache.sh
    │   ├── report_status.sh
    │   ├── resample.sh
    │   ├── run_pixetl.sh
    │   ├── test_mock_s3_awscli.sh
    │   ├── test_mock_s3_ogr2ogr.sh
    │   ├── tmp
    │   │   ├── create_1x1_grid.sh
    │   │   ├── export_vector_data.sh
    │   │   ├── import_vector_data.sh
    │   │   └── post_process_vector_data.sh
    │   ├── unify_projection.sh
    │   └── update_point_geometry.sh
    └── universal_batch.dockerfile
├── docker-compose.dev.yml
├── docker-compose.prod.yml
├── docker-compose.test.yml
├── newrelic.ini
├── pyproject.toml
├── scripts
    ├── delete_workspace
    ├── develop
    ├── infra
    ├── migrate
    ├── migration_dry_run
    ├── setup
    ├── terraform
    ├── test
    └── test_v2
├── terraform.md
├── terraform
    ├── api_gateway
    │   ├── api_key_authorizer_lambda.py
    │   └── api_key_authorizer_lambda.zip
    ├── cloudfront.tf
    ├── data.tf
    ├── docker
    │   └── docker-compose.yml
    ├── generate_port.py
    ├── iam.tf
    ├── logging.tf
    ├── main.tf
    ├── modules
    │   ├── api_gateway
    │   │   ├── endpoint
    │   │   │   ├── main.tf
    │   │   │   ├── outputs.tf
    │   │   │   └── variables.tf
    │   │   ├── gateway
    │   │   │   ├── main.tf
    │   │   │   ├── outputs.tf
    │   │   │   └── variables.tf
    │   │   └── resource
    │   │   │   ├── main.tf
    │   │   │   ├── outputs.tf
    │   │   │   └── variables.tf
    │   └── batch
    │   │   ├── main.tf
    │   │   ├── outputs.tf
    │   │   └── variables.tf
    ├── outputs.tf
    ├── scripts
    │   └── hash.sh
    ├── templates
    │   ├── api_gateway_policy.json.tmpl
    │   ├── cloudwatch_log_policy.json.tmpl
    │   ├── container_definition.json.tmpl
    │   ├── container_properties.json.tmpl
    │   ├── iam_assume_role.json.tmpl
    │   ├── iam_s3_read_only.json
    │   ├── iam_trust_entity.json.tmpl
    │   ├── lambda_invoke_policy.json.tmpl
    │   ├── query_batch_policy.json.tmpl
    │   ├── role-trust-policy.json.tmpl
    │   ├── run_batch_policy.json.tmpl
    │   ├── step_function_policy.json.tmpl
    │   └── tile_cache_bucket_policy.json.tmpl
    ├── variables.tf
    ├── vars
    │   ├── backend-dev.tfvars
    │   ├── backend-production.tfvars
    │   ├── backend-staging.tfvars
    │   ├── terraform-dev.tfvars
    │   ├── terraform-production.tfvars
    │   └── terraform-staging.tfvars
    └── versions.tf
├── tests
    ├── __init__.py
    ├── conftest.py
    ├── crud
    │   ├── __init__.py
    │   ├── test_assets.py
    │   ├── test_datasets.py
    │   └── test_versions.py
    ├── fixtures
    │   ├── append_test.tsv
    │   ├── aws
    │   │   └── config
    │   ├── test.csv
    │   ├── test.geojson
    │   ├── test.gpkg.zip
    │   ├── test.shp.zip
    │   ├── test.tsv
    │   ├── test2.csv
    │   └── test2.geojson
    ├── models
    │   └── test_jobs.py
    ├── routes
    │   ├── __init__.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── test_assets.py
    │   │   ├── test_datasets.py
    │   │   └── test_versions.py
    │   ├── test_analysis.py
    │   ├── test_authorization.py
    │   ├── test_features.py
    │   ├── test_geostore.py
    │   └── test_tasks.py
    ├── tasks
    │   ├── __init__.py
    │   ├── test_aws_tasks.py
    │   ├── test_batch.py
    │   ├── test_batch_scheduler.py
    │   ├── test_default_assets.py
    │   ├── test_delete_assets.py
    │   ├── test_table_source_assets.py
    │   ├── test_vector_source_assets.py
    │   └── test_vector_tile_assets.py
    ├── utils.py
    └── utils
    │   └── test_path.py
├── tests_v2
    ├── __init__.py
    ├── conftest.py
    ├── fixtures
    │   ├── __init__.py
    │   ├── authentication
    │   │   ├── __init__.py
    │   │   └── api_keys.py
    │   ├── creation_options
    │   │   ├── __init__.py
    │   │   └── versions.py
    │   ├── geojson
    │   │   ├── test.geojson
    │   │   ├── test_bad.geojson
    │   │   └── test_huge.geojson
    │   ├── metadata
    │   │   ├── dataset.py
    │   │   └── version.py
    │   ├── otf_payload
    │   │   └── otf_payload.py
    │   └── sample_rw_geostore_response.py
    ├── unit
    │   ├── __init__.py
    │   ├── app
    │   │   ├── __init__.py
    │   │   ├── authentication
    │   │   │   ├── __init__.py
    │   │   │   └── test_api_keys.py
    │   │   ├── crud
    │   │   │   ├── __init__.py
    │   │   │   ├── test_api_keys.py
    │   │   │   ├── test_assets.py
    │   │   │   ├── test_datasets.py
    │   │   │   ├── test_geostore.py
    │   │   │   └── test_versions.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   └── pydantic
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── test_authentication.py
    │   │   │   │   └── test_datamart.py
    │   │   ├── routes
    │   │   │   ├── __init__.py
    │   │   │   ├── analysis
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── test_analysis.py
    │   │   │   ├── assets
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── test_assets_with_no_pagination.py
    │   │   │   │   └── test_assets_with_pagination.py
    │   │   │   ├── authentication
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── test_api_keys.py
    │   │   │   ├── datamart
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── test_land.py
    │   │   │   ├── datasets
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── datasets
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── assets
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── test_dataset_assets_with_no_pagination.py
    │   │   │   │   │   │   └── test_dataset_assets_with_pagination.py
    │   │   │   │   │   ├── test_datasets_with_no_pagination.py
    │   │   │   │   │   └── test_datasets_with_pagination.py
    │   │   │   │   ├── test_asset_metadata.py
    │   │   │   │   ├── test_dataset.py
    │   │   │   │   ├── test_download.py
    │   │   │   │   ├── test_query.py
    │   │   │   │   └── test_version.py
    │   │   │   ├── geostore
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── test_geostore.py
    │   │   │   ├── health
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── test_health.py
    │   │   │   ├── jobs
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── test_job.py
    │   │   │   ├── political
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── id_lookup
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   └── test_id_lookup.py
    │   │   │   ├── tasks
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── test_asset_tasks_with_no_pagination.py
    │   │   │   │   └── test_asset_tasks_with_pagination.py
    │   │   │   └── utils.py
    │   │   ├── tasks
    │   │   │   ├── __init__.py
    │   │   │   ├── datamart
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── test_tree_cover_loss_by_driver.py
    │   │   │   ├── raster_tile_cache_assets
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── collaborators
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── conftest.py
    │   │   │   │   │   ├── test_building_raster_tile_set_source_creation_options.py
    │   │   │   │   │   ├── test_create_tile_cache_collaboration.py
    │   │   │   │   │   ├── test_crud_collaboration.py
    │   │   │   │   │   ├── test_raster_tile_cache_assets_happy_path.py
    │   │   │   │   │   ├── test_symbology_function_collaboration.py
    │   │   │   │   │   ├── test_task_execution_collaboration.py
    │   │   │   │   │   └── test_web_mercator_reprojection_collaboration.py
    │   │   │   │   └── test_symbology.py
    │   │   │   ├── test_batch.py
    │   │   │   └── test_vector_source_assets.py
    │   │   ├── test_middleware.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── paginate
    │   │   │       ├── __init__.py
    │   │   │       ├── test_offset_calculation.py
    │   │   │       ├── test_pagination_links_info.py
    │   │   │       └── test_pagination_meta_info.py
    │   │   │   ├── test_aws.py
    │   │   │   ├── test_fields.py
    │   │   │   ├── test_gadm.py
    │   │   │   ├── test_geostore.py
    │   │   │   ├── test_google.py
    │   │   │   └── test_rw_api.py
    │   └── batch
    │   │   ├── __init__.py
    │   │   └── python
    │   │       ├── __init__.py
    │   │       ├── test_adjust_num_processes.py
    │   │       └── test_resample.py
    └── utils.py
├── uv.lock
└── wait_for_postgres.sh


/.dist.env:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Application Variables
 3 | 
 4 | # TO ADD APPLICATION GLOBALS:
 5 | # 1. Duplicate .dist.env, rename to .env
 6 | # 2. Add variable to the list below.
 7 | # 3. Add variable to app/settings/globals.py, define defaults, etc.
 8 | 
 9 | DATABASE=fill
10 | DB_USER=fill  # Optional, remove if unnecessary
11 | DB_PASSWORD=fill  # Optional, remove if unnecessary
12 | DB_HOST=fill  # Optional, remove if unnecessary
13 | DB_PORT=fill  # Optional, remove if unnecessary
14 | 
15 | REDIS_IP=fill
16 | REDIS_PORT=fill
17 | 
18 | # NOTE: Separate function references by comma. Example:
19 | # ARQ_BACKGROUND_FUNCTIONS=app.path.example_function, app.tasks.example.other_function
20 | ARQ_BACKGROUND_FUNCTIONS=app.tasks.messaging.send_message
21 | 
22 | ### Docker Runtime Variables
23 | # All available options at https://github.com/tiangolo/uvicorn-gunicorn-docker
24 | # Includes custom gunicorn, concurrency, workers and logging settings
25 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # IDE Fragments
 2 | /.vscode
 3 | *__pycache__*
 4 | *.idea*
 5 | 
 6 | # MyPy
 7 | .mypy_cache/*
 8 | 
 9 | docker-compose.dev.yml
10 | docker-compose.prod.yml
11 | docker-compose.test.yml
12 | 
13 | # GIT
14 | .git
15 | 
16 | # Ignore Files
17 | .gitignore
18 | 
19 | # Mac stuff
20 | *.DS_Store
21 | 
22 | # Test stuff
23 | tests/cobertura.xml
24 | tests_v2/cobertura.xml
25 | 
26 | # Terraform stuff
27 | *terraform*
28 | 
29 | # Virtual Environments
30 | .venv*


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | max-complexity = 18
4 | select = B,C,E,F,W,T4,B9
5 | ignore = E203, E266, E501, W503, F403, E402, W605


--------------------------------------------------------------------------------
/.github/workflows/terraform_destroy_on_delete.yaml:
--------------------------------------------------------------------------------
 1 | name: Destroy state and delete workspace after deleting feature branch
 2 | 
 3 | on: [delete]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     if: github.event.ref_type == 'branch' && (github.event.ref != 'refs/heads/master') && (github.event.ref != 'refs/heads/develop')
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |     - uses: actions/checkout@v1
11 |     - name: Destroy state and delete workspace
12 |       env:
13 |         ENV: dev
14 |         AWS_ACCESS_KEY_ID: ${{ secrets.aws_key_dev }}
15 |         AWS_SECRET_ACCESS_KEY: ${{ secrets.aws_secret_dev }}
16 |         AWS_REGION: ${{ secrets.aws_region_dev }}
17 |       run: ./scripts/delete_workspace -w ${{ github.event.ref }} -g "no_sha_available"
18 | 


--------------------------------------------------------------------------------
/.github/workflows/terraform_plan.yaml:
--------------------------------------------------------------------------------
 1 | name: Plan terraform changes for base branch
 2 | 
 3 | on: [pull_request]
 4 | 
 5 | jobs:
 6 |   plan:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - uses: actions/checkout@v1
11 |     - name: Plan production
12 |       if: success() && github.base_ref == 'master'
13 |       env:
14 |         ENV: production
15 |         AWS_ACCESS_KEY_ID: ${{ secrets.aws_key_production }}
16 |         AWS_SECRET_ACCESS_KEY: ${{ secrets.aws_secret_production }}
17 |         AWS_REGION: ${{ secrets.aws_region_production }}
18 |       run: ./scripts/infra plan -w ${{ github.base_ref }}
19 | 
20 |     - name: Plan staging
21 |       if: success() && github.base_ref == 'develop'
22 |       env:
23 |         ENV: staging
24 |         AWS_ACCESS_KEY_ID: ${{ secrets.aws_key_staging }}
25 |         AWS_SECRET_ACCESS_KEY: ${{ secrets.aws_secret_staging }}
26 |         AWS_REGION: ${{ secrets.aws_region_staging }}
27 |       run: ./scripts/infra plan -w ${{ github.base_ref }}
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # IDE Fragments
 2 | /.vscode
 3 | *__pycache__*
 4 | *.idea*
 5 | 
 6 | # MyPy
 7 | .mypy_cache
 8 | 
 9 | # GIT
10 | .git
11 | 
12 | # Environment Files
13 | /.env
14 | .python-version
15 | 
16 | # Mac stuff
17 | *.DS_Store
18 | 
19 | # Test stuff
20 | tests/cobertura.xml
21 | tests_v2/cobertura.xml
22 | 
23 | # Terraform stuff
24 | **/.terraform/*
25 | 
26 | # .tfstate files
27 | *.tfstate
28 | *.tfstate.*
29 | 
30 | # .tfplan files
31 | *.tfplan
32 | 
33 | # Virtual Environments
34 | .venv*


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | line_length = 88
3 | multi_line_output = 3
4 | include_trailing_comma = True
5 | known_third_party = _pytest,aenum,affine,alembic,asgi_lifespan,async_lru,asyncpg,aws_utils,boto3,botocore,click,docker,ee,errors,fastapi,fiona,gdal_utils,geoalchemy2,geojson,gfw_pixetl,gino,gino_starlette,google,httpx,httpx_auth,jsonschema,logger,logging_utils,moto,numpy,orjson,osgeo,pandas,pendulum,pglast,psutil,psycopg2,pydantic,pyproj,pytest,pytest_asyncio,pytest_unordered,rasterio,shapely,sqlalchemy,sqlalchemy_utils,starlette,tileputty,tiles_geojson,typer,unidecode
6 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: '^$'
 2 | fail_fast: false
 3 | repos:
 4 | -   repo: https://github.com/asottile/seed-isort-config
 5 |     rev: v2.2.0
 6 |     hooks:
 7 |     - id: seed-isort-config
 8 | -   repo: https://github.com/pre-commit/mirrors-isort
 9 |     rev: v5.10.1
10 |     hooks:
11 |     - id: isort
12 | -   repo: https://github.com/myint/docformatter
13 |     rev: eb1df347edd128b30cd3368dddc3aa65edcfac38  #  pragma: allowlist secret
14 |     hooks:
15 |     - id: docformatter
16 |       args: [--in-place]
17 | -   repo: https://github.com/ambv/black
18 |     rev: 24.10.0
19 |     hooks:
20 |     - id: black
21 |       language_version: python3.10
22 | -   repo: https://github.com/pre-commit/pre-commit-hooks
23 |     rev: v5.0.0
24 |     hooks:
25 |     - id: detect-aws-credentials
26 |     - id: detect-private-key
27 |     - id: trailing-whitespace
28 | -   repo: https://github.com/pycqa/flake8
29 |     rev: 7.1.1
30 |     hooks:
31 |     - id: flake8
32 | -   repo: https://github.com/pre-commit/mirrors-mypy
33 |     rev: v1.14.1
34 |     hooks:
35 |     - id: mypy
36 | -   repo: https://github.com/Yelp/detect-secrets
37 |     rev: v1.5.0
38 |     hooks:
39 |     - id: detect-secrets
40 |       args: ['--baseline', '.secrets.baseline'] # run: `pip install detect-secrets` to establish baseline
41 |       exclude: Pipfile.lock


--------------------------------------------------------------------------------
/alembic.ini:
--------------------------------------------------------------------------------
 1 | # A generic, single database configuration.
 2 | 
 3 | [alembic]
 4 | # path to migration scripts
 5 | script_location = app/models/orm/migrations
 6 | 
 7 | # template used to generate migration files
 8 | # file_template = %%(rev)s_%%(slug)s
 9 | 
10 | # timezone to use when rendering the date
11 | # within the migration file as well as the filename.
12 | # string value is passed to dateutil.tz.gettz()
13 | # leave blank for localtime
14 | # timezone =
15 | 
16 | # max length of characters to apply to the
17 | # "slug" field
18 | # truncate_slug_length = 40
19 | 
20 | # set to 'true' to run the environment during
21 | # the 'revision' command, regardless of autogenerate
22 | # revision_environment = false
23 | 
24 | # set to 'true' to allow .pyc and .pyo files without
25 | # a source .py file to be detected as revisions in the
26 | # versions/ directory
27 | # sourceless = false
28 | 
29 | # version location specification; this defaults
30 | # to app/models/orm/alembic/versions.  When using multiple version
31 | # directories, initial revisions must be specified with --version-path
32 | # version_locations = %(here)s/bar %(here)s/bat app/models/orm/alembic/versions
33 | 
34 | # the output encoding used when revision files
35 | # are written from script.py.mako
36 | # output_encoding = utf-8
37 | 
38 | [alembic:exclude]
39 | tables = spatial_ref_sys
40 | 
41 | # Logging configuration
42 | [loggers]
43 | keys = root,sqlalchemy,alembic
44 | 
45 | [handlers]
46 | keys = console
47 | 
48 | [formatters]
49 | keys = generic
50 | 
51 | [logger_root]
52 | level = WARN
53 | handlers = console
54 | qualname =
55 | 
56 | [logger_sqlalchemy]
57 | level = WARN
58 | handlers =
59 | qualname = sqlalchemy.engine
60 | 
61 | [logger_alembic]
62 | level = INFO
63 | handlers =
64 | qualname = alembic
65 | 
66 | [handler_console]
67 | class = StreamHandler
68 | args = (sys.stderr,)
69 | level = NOTSET
70 | formatter = generic
71 | 
72 | [formatter_generic]
73 | format = %(levelname)-5.5s [%(name)s] %(message)s
74 | datefmt = %H:%M:%S
75 | 


--------------------------------------------------------------------------------
/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/__init__.py


--------------------------------------------------------------------------------
/app/authentication/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/authentication/__init__.py


--------------------------------------------------------------------------------
/app/crud/__init__.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import Any, Dict, Union
 3 | 
 4 | from pydantic.main import BaseModel
 5 | 
 6 | from ..application import db
 7 | from ..models.pydantic.change_log import ChangeLog
 8 | 
 9 | 
10 | async def update_data(
11 |     row: db.Model, input_data: Union[BaseModel, Dict[str, Any]]  # type: ignore
12 | ) -> db.Model:  # type: ignore
13 |     """Merge updated metadata filed with existing fields."""
14 | 
15 |     if not input_data:
16 |         return row
17 | 
18 |     if isinstance(input_data, BaseModel):
19 |         input_data = input_data.dict(skip_defaults=True, by_alias=True)
20 | 
21 |     if input_data.get("change_log"):
22 |         change_log = row.change_log
23 |         # Make sure dates are correctly parsed as strings
24 |         _logs = list()
25 |         for data in input_data["change_log"]:
26 |             _log = ChangeLog(**data).json()
27 |             _logs.append(json.loads(_log))
28 | 
29 |         change_log.extend(_logs)
30 |         input_data["change_log"] = change_log
31 | 
32 |     await row.update(**input_data).apply()
33 | 
34 |     return row
35 | 


--------------------------------------------------------------------------------
/app/crud/datamart.py:
--------------------------------------------------------------------------------
 1 | """CRUD functions for data mart analysis results."""
 2 | 
 3 | import json
 4 | import uuid
 5 | 
 6 | from app.errors import RecordNotFoundError
 7 | from app.models.orm.datamart import AnalysisResult
 8 | from app.models.pydantic.datamart import DataMartResource
 9 | 
10 | 
11 | async def save_result(result_data: DataMartResource) -> AnalysisResult:
12 | 
13 |     analysis_result: AnalysisResult = await AnalysisResult.create(
14 |         **json.loads(result_data.json(by_alias=False))
15 |     )
16 | 
17 |     return analysis_result
18 | 
19 | 
20 | async def get_result(result_id: uuid.UUID) -> AnalysisResult:
21 |     analysis_result: AnalysisResult = await AnalysisResult.get([result_id])
22 |     if analysis_result is None:
23 |         raise RecordNotFoundError(f"Could not find requested result {result_id}")
24 | 
25 |     return analysis_result
26 | 
27 | 
28 | async def update_result(result_id: uuid.UUID, result_data) -> AnalysisResult:
29 |     analysis_result: AnalysisResult = await get_result(result_id)
30 |     await analysis_result.update(**json.loads(result_data.json(by_alias=False))).apply()
31 | 
32 |     return analysis_result
33 | 
34 | 
35 | async def delete_result(result_id: uuid.UUID) -> AnalysisResult:
36 |     analysis_result: AnalysisResult = await get_result(result_id)
37 |     await AnalysisResult.delete.where(AnalysisResult.id == result_id).gino.status()
38 | 
39 |     return analysis_result
40 | 


--------------------------------------------------------------------------------
/app/errors.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import traceback
 3 | 
 4 | from fastapi import HTTPException
 5 | from fastapi.responses import ORJSONResponse
 6 | 
 7 | from app.settings.globals import ENV
 8 | 
 9 | 
10 | class TooManyRetriesError(RecursionError):
11 |     def __init__(self, message: str, detail: str):
12 |         self.message = message
13 |         self.detail = detail
14 | 
15 | 
16 | class RecordNotFoundError(Exception):
17 |     pass
18 | 
19 | 
20 | class RecordAlreadyExistsError(Exception):
21 |     pass
22 | 
23 | 
24 | class BadRequestError(Exception):
25 |     pass
26 | 
27 | 
28 | class BadResponseError(Exception):
29 |     pass
30 | 
31 | 
32 | class InvalidResponseError(Exception):
33 |     pass
34 | 
35 | 
36 | class UnauthorizedError(Exception):
37 |     pass
38 | 
39 | 
40 | def http_error_handler(exc: HTTPException) -> ORJSONResponse:
41 | 
42 |     message = exc.detail
43 |     if exc.status_code < 500:
44 |         status = "failed"
45 |     else:
46 |         status = "error"
47 |         # In dev and test print full traceback of internal server errors
48 |         if ENV == "test" or ENV == "dev":
49 |             exc_type, exc_value, exc_traceback = sys.exc_info()
50 |             message = traceback.format_exception(exc_type, exc_value, exc_traceback)
51 |     return ORJSONResponse(
52 |         status_code=exc.status_code, content={"status": status, "message": message}
53 |     )
54 | 
55 | 
56 | class BadAdminSourceException(Exception):
57 |     pass
58 | 
59 | 
60 | class BadAdminVersionException(Exception):
61 |     pass
62 | 
63 | 
64 | class GeometryIsNullError(Exception):
65 |     pass
66 | 


--------------------------------------------------------------------------------
/app/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/models/__init__.py


--------------------------------------------------------------------------------
/app/models/enum/change_log.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class ChangeLogStatusTaskIn(str, Enum):
 5 |     success = "success"
 6 |     failed = "failed"
 7 | 
 8 | 
 9 | class ChangeLogStatus(str, Enum):
10 |     success = "success"
11 |     failed = "failed"
12 |     pending = "pending"
13 | 


--------------------------------------------------------------------------------
/app/models/enum/entity.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | 
4 | class EntityType(str, Enum):
5 |     saved = "dataset"
6 |     pending = "version"
7 |     failed = "asset"
8 | 


--------------------------------------------------------------------------------
/app/models/enum/geostore.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | 
4 | class GeostoreOrigin(str, Enum):
5 |     gfw = "gfw"
6 |     rw = "rw"
7 | 


--------------------------------------------------------------------------------
/app/models/enum/pg_types.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from aenum import Enum, extend_enum
 4 | 
 5 | 
 6 | class PGOtherType(str, Enum):
 7 |     __doc__ = "Other PostgreSQL data types"
 8 |     array = "ARRAY"
 9 |     boolean = "boolean"
10 |     jsonb = "jsonb"
11 |     time = "time"
12 |     uuid = "uuid"
13 |     xml = "xml"
14 | 
15 | 
16 | class PGNumericType(str, Enum):
17 |     __doc__ = "Numeric PostgreSQL data types"
18 |     bigint = "bigint"
19 |     double_precision = "double precision"
20 |     integer = "integer"
21 |     numeric = "numeric"
22 |     smallint = "smallint"
23 | 
24 | 
25 | class PGTextType(str, Enum):
26 |     __doc__ = "Text PostgreSQL data types"
27 |     character_varying = "character varying"
28 |     text = "text"
29 | 
30 | 
31 | class PGDateType(str, Enum):
32 |     __doc__ = "Date PostgreSQL data types"
33 |     date = "date"
34 |     timestamp = "timestamp"
35 |     timestamp_wtz = "timestamp without time zone"
36 | 
37 | 
38 | class PGGeometryType(str, Enum):
39 |     __doc__ = "Geometry PostgreSQL data types"
40 |     geometry = "geometry"
41 | 
42 | 
43 | class PGType(str, Enum):
44 |     __doc__ = "PostgreSQL data type enumeration"
45 | 
46 | 
47 | # extent PGTYPE with types listed above
48 | sub_classes: List[Enum] = [
49 |     PGDateType,
50 |     PGTextType,
51 |     PGNumericType,
52 |     PGGeometryType,
53 |     PGOtherType,
54 | ]
55 | for sub_class in sub_classes:
56 |     for field in sub_class:
57 |         extend_enum(PGType, field.name, field.value)
58 | 


--------------------------------------------------------------------------------
/app/models/enum/queries.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class QueryFormat(str, Enum):
 5 |     json = "json"
 6 |     csv = "csv"
 7 | 
 8 | 
 9 | class QueryType(str, Enum):
10 |     table = "table"
11 |     raster = "raster"
12 | 


--------------------------------------------------------------------------------
/app/models/enum/sources.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class SourceType(str, Enum):
 5 |     raster = "raster"
 6 |     table = "table"
 7 |     vector = "vector"
 8 | 
 9 | 
10 | class RasterSourceType(str, Enum):
11 |     raster = "raster"
12 | 
13 | 
14 | class TableSourceType(str, Enum):
15 |     __doc__ = "Source type of input file."
16 |     table = "table"
17 | 
18 | 
19 | class VectorSourceType(str, Enum):
20 |     vector = "vector"
21 | 


--------------------------------------------------------------------------------
/app/models/enum/versions.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import Enum
2 | 
3 | 
4 | class VersionStatus(str, Enum):
5 |     saved = "saved"
6 |     pending = "pending"
7 |     failed = "failed"
8 | 


--------------------------------------------------------------------------------
/app/models/orm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/models/orm/__init__.py


--------------------------------------------------------------------------------
/app/models/orm/api_keys.py:
--------------------------------------------------------------------------------
 1 | from app.application import db
 2 | from app.models.orm.base import Base
 3 | 
 4 | 
 5 | class ApiKey(Base):
 6 |     __tablename__ = "api_keys"
 7 |     alias = db.Column(db.String, nullable=False)
 8 |     user_id = db.Column(db.String, nullable=False)
 9 |     api_key = db.Column(db.UUID, primary_key=True)
10 |     organization = db.Column(db.String, nullable=False)
11 |     email = db.Column(db.String, nullable=False)
12 |     domains = db.Column(db.ARRAY(db.String), nullable=False)
13 |     expires_on = db.Column(db.DateTime)
14 | 
15 |     _api_keys_alias_user_id_unique = db.UniqueConstraint(
16 |         "alias", "user_id", name="alias_user_id_uc"
17 |     )
18 |     _api_keys_api_key_idx = db.Index(
19 |         "api_keys_api_key_idx", "api_key", postgresql_using="hash"
20 |     )
21 |     _api_keys_user_id_idx = db.Index(
22 |         "api_keys_user_id_idx", "user_id", postgresql_using="btree"
23 |     )
24 | 


--------------------------------------------------------------------------------
/app/models/orm/assets.py:
--------------------------------------------------------------------------------
 1 | from .base import Base, db
 2 | 
 3 | 
 4 | class Asset(Base):
 5 |     __tablename__ = "assets"
 6 |     asset_id = db.Column(db.UUID, primary_key=True)
 7 |     dataset = db.Column(db.String, nullable=False, index=True)
 8 |     version = db.Column(db.String, nullable=False, index=True)
 9 |     asset_type = db.Column(db.String, nullable=False)
10 |     asset_uri = db.Column(db.String, nullable=False)
11 |     status = db.Column(db.String, nullable=False, default="pending")
12 |     is_managed = db.Column(db.Boolean, nullable=False, default=True)
13 |     is_default = db.Column(db.Boolean, nullable=False, default=False)
14 |     is_downloadable = db.Column(db.Boolean, nullable=False, default=True)
15 |     creation_options = db.Column(db.JSONB, nullable=False, default=dict())
16 |     # metadata = db.Column(db.JSONB, nullable=False, default=dict())
17 |     fields = db.Column(db.JSONB, nullable=False, default=list())
18 |     extent = db.Column(db.JSONB, nullable=True, default=None)
19 |     stats = db.Column(db.JSONB, nullable=True, default=None)
20 |     change_log = db.Column(db.ARRAY(db.JSONB), nullable=False, default=list())
21 | 
22 |     fk = db.ForeignKeyConstraint(
23 |         ["dataset", "version"],
24 |         ["versions.dataset", "versions.version"],
25 |         name="fk",
26 |         onupdate="CASCADE",
27 |         ondelete="CASCADE",
28 |     )
29 | 
30 |     uq_asset_uri = db.UniqueConstraint("asset_uri", name="uq_asset_uri")
31 | 


--------------------------------------------------------------------------------
/app/models/orm/base.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | from geoalchemy2 import Geometry
 4 | from sqlalchemy.dialects.postgresql import ARRAY, JSONB, TEXT, UUID
 5 | from sqlalchemy_utils import EmailType, generic_repr
 6 | 
 7 | from ...application import db
 8 | 
 9 | db.JSONB, db.UUID, db.ARRAY, db.EmailType, db.TEXT, db.Geometry = (
10 |     JSONB,
11 |     UUID,
12 |     ARRAY,
13 |     EmailType,
14 |     TEXT,
15 |     Geometry,
16 | )
17 | 
18 | 
19 | @generic_repr
20 | class Base(db.Model):  # type: ignore
21 |     __abstract__ = True
22 |     created_on = db.Column(
23 |         db.DateTime, default=datetime.utcnow, server_default=db.func.now()
24 |     )
25 |     updated_on = db.Column(
26 |         db.DateTime,
27 |         default=datetime.utcnow,
28 |         onupdate=datetime.utcnow,
29 |         server_default=db.func.now(),
30 |     )
31 | 


--------------------------------------------------------------------------------
/app/models/orm/datamart.py:
--------------------------------------------------------------------------------
 1 | from .base import Base, db
 2 | 
 3 | 
 4 | class AnalysisResult(Base):
 5 |     __tablename__ = "analysis_results"
 6 |     id = db.Column(db.UUID, primary_key=True)
 7 |     endpoint = db.Column(db.String)
 8 |     result = db.Column(db.JSONB)
 9 |     metadata = db.Column(db.JSONB)
10 |     status = db.Column(db.String)
11 |     requested_by = db.Column(
12 |         db.UUID, db.ForeignKey("api_keys.api_key", name="api_key_fk")
13 |     )
14 |     message = db.Column(db.String)
15 | 
16 |     _api_keys_api_key_idx = db.Index(
17 |         "analysis_results_id_idx", "id", postgresql_using="hash"
18 |     )
19 | 


--------------------------------------------------------------------------------
/app/models/orm/dataset_metadata.py:
--------------------------------------------------------------------------------
 1 | import sqlalchemy as sa
 2 | 
 3 | from .base import Base, db
 4 | from .mixins import MetadataMixin
 5 | 
 6 | 
 7 | class DatasetMetadata(Base, MetadataMixin):
 8 |     __tablename__ = "dataset_metadata"
 9 | 
10 |     id = db.Column(db.UUID, primary_key=True)
11 |     dataset = db.Column(db.String, nullable=False, unique=True)
12 |     source = db.Column(db.String, nullable=True)
13 |     license = db.Column(db.String)
14 |     data_language = db.Column(db.String, nullable=True)
15 |     overview = db.Column(db.String, nullable=True)
16 | 
17 |     function = db.Column(db.String)
18 |     cautions = db.Column(db.String)
19 |     key_restrictions = db.Column(db.String)
20 |     tags = db.Column(sa.ARRAY(db.String))
21 |     why_added = db.Column(db.String)
22 |     learn_more = db.Column(db.String)
23 | 
24 |     fk = db.ForeignKeyConstraint(
25 |         ["dataset"],
26 |         ["datasets.dataset"],
27 |         name="fk",
28 |         onupdate="CASCADE",
29 |         ondelete="CASCADE",
30 |     )
31 | 


--------------------------------------------------------------------------------
/app/models/orm/datasets.py:
--------------------------------------------------------------------------------
 1 | from .base import Base, db
 2 | 
 3 | 
 4 | class Dataset(Base):
 5 |     __tablename__ = "datasets"
 6 |     dataset = db.Column(db.String, primary_key=True)
 7 |     is_downloadable = db.Column(db.Boolean, nullable=False, default=True)
 8 |     owner_id = db.Column(db.String, nullable=True, default=None)
 9 |     # metadata = db.Column(db.JSONB, default=dict())
10 | 


--------------------------------------------------------------------------------
/app/models/orm/geostore.py:
--------------------------------------------------------------------------------
 1 | from .base import Base, db
 2 | 
 3 | ########
 4 | # NOTE #
 5 | ########
 6 | # UserAreas doesn't officially inherit from Geostore in a class hierarchy, but it
 7 | # DOES inherit in the DB (via a custom migration). So any time you change the
 8 | # Geostore table, change UserAreas as well! And vice versa, of course.
 9 | 
10 | 
11 | class Geostore(Base):
12 |     __tablename__ = "geostore"
13 | 
14 |     gfw_geostore_id = db.Column(db.UUID, primary_key=True)
15 |     gfw_geojson = db.Column(db.TEXT)
16 |     gfw_area__ha = db.Column(db.Numeric)
17 |     gfw_bbox = db.Column(db.ARRAY(db.Numeric))
18 | 
19 |     _geostore_gfw_geostore_id_idx = db.Index(
20 |         "geostore_gfw_geostore_id_idx", "gfw_geostore_id", postgresql_using="hash"
21 |     )
22 | 


--------------------------------------------------------------------------------
/app/models/orm/migrations/README:
--------------------------------------------------------------------------------
1 | Generic single-database configuration.


--------------------------------------------------------------------------------
/app/models/orm/migrations/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | import sqlalchemy_utils
11 | ${imports if imports else ""}
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = ${repr(up_revision)}
15 | down_revision = ${repr(down_revision)}
16 | branch_labels = ${repr(branch_labels)}
17 | depends_on = ${repr(depends_on)}
18 | 
19 | 
20 | def upgrade():
21 |     ${upgrades if upgrades else "pass"}
22 | 
23 | 
24 | def downgrade():
25 |     ${downgrades if downgrades else "pass"}
26 | 


--------------------------------------------------------------------------------
/app/models/orm/migrations/versions/3e524ef0525f_.py:
--------------------------------------------------------------------------------
 1 | """empty message.
 2 | 
 3 | Revision ID: 3e524ef0525f
 4 | Revises: 604bf4e66c2b
 5 | Create Date: 2024-12-18 00:43:46.681427
 6 | """
 7 | import sqlalchemy as sa
 8 | from alembic import op
 9 | 
10 | # revision identifiers, used by Alembic.
11 | revision = "3e524ef0525f"
12 | down_revision = "604bf4e66c2b"
13 | branch_labels = None
14 | depends_on = None
15 | 
16 | 
17 | def upgrade():
18 |     # ### commands auto generated by Alembic - please adjust! ###
19 |     op.create_index(op.f("ix_assets_dataset"), "assets", ["dataset"], unique=False)
20 |     op.create_index(op.f("ix_assets_version"), "assets", ["version"], unique=False)
21 |     op.add_column("dataset_metadata", sa.Column("subtitle", sa.String(), nullable=True))
22 |     op.add_column("version_metadata", sa.Column("subtitle", sa.String(), nullable=True))
23 |     # ### end Alembic commands ###
24 | 
25 | 
26 | def downgrade():
27 |     # ### commands auto generated by Alembic - please adjust! ###
28 |     op.drop_column("version_metadata", "subtitle")
29 |     op.drop_column("dataset_metadata", "subtitle")
30 |     op.drop_index(op.f("ix_assets_version"), table_name="assets")
31 |     op.drop_index(op.f("ix_assets_dataset"), table_name="assets")
32 |     # ### end Alembic commands ###
33 | 


--------------------------------------------------------------------------------
/app/models/orm/migrations/versions/604bf4e66c2b_.py:
--------------------------------------------------------------------------------
 1 | """Add content_date_description to version_metadata
 2 | 
 3 | Revision ID: 604bf4e66c2b
 4 | Revises: ef3392e8e054
 5 | Create Date: 2024-10-31 16:52:56.571782
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | import sqlalchemy_utils
11 | from sqlalchemy.dialects import postgresql
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = '604bf4e66c2b'
15 | down_revision = 'ef3392e8e054'
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     op.add_column('version_metadata', sa.Column('content_date_description', sa.String(), nullable=True))
23 |     # ### end Alembic commands ###
24 | 
25 | 
26 | def downgrade():
27 |     # ### commands auto generated by Alembic - please adjust! ###
28 |     op.drop_column('version_metadata', 'content_date_description')
29 |     # ### end Alembic commands ###
30 | 


--------------------------------------------------------------------------------
/app/models/orm/migrations/versions/a5787f2eefe5_.py:
--------------------------------------------------------------------------------
 1 | """Adding dataset version alias table.
 2 | 
 3 | Revision ID: a5787f2eefe5
 4 | Revises: 4763f4b8141a
 5 | Create Date: 2021-09-27 22:12:26.964711
 6 | """
 7 | import sqlalchemy as sa
 8 | from alembic import op
 9 | 
10 | # revision identifiers, used by Alembic.
11 | revision = "a5787f2eefe5"
12 | down_revision = "4763f4b8141a"  # pragma: allowlist secret
13 | branch_labels = None
14 | depends_on = None
15 | 
16 | 
17 | def upgrade():
18 |     op.create_table(
19 |         "aliases",
20 |         sa.Column("alias", sa.String(), nullable=False),
21 |         sa.Column("dataset", sa.String(), nullable=False),
22 |         sa.Column("version", sa.String(), nullable=False),
23 |         sa.Column(
24 |             "created_on", sa.DateTime(), server_default=sa.text("now()"), nullable=True
25 |         ),
26 |         sa.Column(
27 |             "updated_on", sa.DateTime(), server_default=sa.text("now()"), nullable=True
28 |         ),
29 |         sa.ForeignKeyConstraint(
30 |             ["dataset", "version"],
31 |             ["versions.dataset", "versions.version"],
32 |             name="fk",
33 |             onupdate="CASCADE",
34 |             ondelete="CASCADE",
35 |         ),
36 |         sa.PrimaryKeyConstraint("dataset", "alias"),
37 |     )
38 | 
39 | 
40 | def downgrade():
41 |     op.drop_table("aliases")
42 | 


--------------------------------------------------------------------------------
/app/models/orm/migrations/versions/aa5aefcbdfcf_.py:
--------------------------------------------------------------------------------
 1 | """empty message.
 2 | 
 3 | Revision ID: aa5aefcbdfcf
 4 | Revises: 4763f4b8141a
 5 | Create Date: 2021-10-12 22:17:29.106137
 6 | """
 7 | import sqlalchemy as sa
 8 | from alembic import op
 9 | from sqlalchemy.dialects import postgresql
10 | 
11 | # revision identifiers, used by Alembic.
12 | revision = "aa5aefcbdfcf"  # pragma: allowlist secret
13 | down_revision = "a5787f2eefe5"  # pragma: allowlist secret
14 | branch_labels = None
15 | depends_on = None
16 | 
17 | 
18 | def upgrade():
19 |     # ### commands auto generated by Alembic - please adjust! ###
20 |     op.add_column(
21 |         "assets",
22 |         sa.Column(
23 |             "revision_history",
24 |             postgresql.ARRAY(postgresql.JSONB(astext_type=sa.Text())),
25 |             nullable=True,
26 |         ),
27 |     )
28 |     op.add_column("assets", sa.Column("latest_revision", sa.String(), nullable=True))
29 |     op.add_column("assets", sa.Column("source_version", sa.String(), nullable=True))
30 |     # ### end Alembic commands ###
31 | 
32 | 
33 | def downgrade():
34 |     # ### commands auto generated by Alembic - please adjust! ###
35 |     op.drop_column("assets", "source_version")
36 |     op.drop_column("assets", "latest_revision")
37 |     op.drop_column("assets", "revision_history")
38 |     # ### end Alembic commands ###
39 | 


--------------------------------------------------------------------------------
/app/models/orm/migrations/versions/d62a9b15f844_.py:
--------------------------------------------------------------------------------
 1 | """Create API Key Table.
 2 | 
 3 | Revision ID: d62a9b15f844
 4 | Revises: 73fb3f5e39b8
 5 | Create Date: 2021-05-01 01:29:13.157933
 6 | """
 7 | import sqlalchemy as sa
 8 | from alembic import op
 9 | from sqlalchemy.dialects import postgresql
10 | 
11 | # revision identifiers, used by Alembic.
12 | revision = "d62a9b15f844"  # pragma: allowlist secret
13 | down_revision = "73fb3f5e39b8"  # pragma: allowlist secret
14 | branch_labels = None
15 | depends_on = None
16 | 
17 | 
18 | def upgrade():
19 |     # ### commands auto generated by Alembic - please adjust! ###
20 |     op.create_table(
21 |         "api_keys",
22 |         sa.Column("alias", sa.String(), nullable=False),
23 |         sa.Column("user_id", sa.String(), nullable=False),
24 |         sa.Column("api_key", postgresql.UUID(), nullable=False),
25 |         sa.Column("organization", sa.String(), nullable=False),
26 |         sa.Column("email", sa.String(), nullable=False),
27 |         sa.Column("domains", postgresql.ARRAY(sa.String()), nullable=False),
28 |         sa.Column("expires_on", sa.DateTime(), nullable=True),
29 |         sa.Column(
30 |             "created_on", sa.DateTime(), server_default=sa.text("now()"), nullable=True
31 |         ),
32 |         sa.Column(
33 |             "updated_on", sa.DateTime(), server_default=sa.text("now()"), nullable=True
34 |         ),
35 |         sa.PrimaryKeyConstraint("api_key"),
36 |     )
37 | 
38 |     op.create_index(
39 |         "api_keys_api_key_idx",
40 |         "api_keys",
41 |         ["api_key"],
42 |         unique=False,
43 |         postgresql_using="hash",
44 |     )
45 |     op.create_index(
46 |         "api_keys_user_id_idx",
47 |         "api_keys",
48 |         ["user_id"],
49 |         unique=False,
50 |         postgresql_using="btree",
51 |     )
52 |     op.create_unique_constraint("alias_user_id_uc", "api_keys", ["alias", "user_id"])
53 |     # ### end Alembic commands ###
54 | 
55 | 
56 | def downgrade():
57 |     # ### commands auto generated by Alembic - please adjust! ###
58 |     op.drop_table("api_keys")
59 |     # ### end Alembic commands ###
60 | 


--------------------------------------------------------------------------------
/app/models/orm/migrations/versions/d767b6dd2c4c_.py:
--------------------------------------------------------------------------------
 1 | """empty message.
 2 | 
 3 | Revision ID: d767b6dd2c4c
 4 | Revises: 04fcb4f2408a
 5 | Create Date: 2024-04-25 19:38:35.223004
 6 | """
 7 | import sqlalchemy as sa
 8 | from alembic import op
 9 | 
10 | # revision identifiers, used by Alembic.
11 | revision = "d767b6dd2c4c"
12 | down_revision = "04fcb4f2408a"
13 | branch_labels = None
14 | depends_on = None
15 | 
16 | 
17 | def upgrade():
18 |     # ### commands auto generated by Alembic - please adjust! ###
19 |     op.add_column("datasets", sa.Column("owner_id", sa.String(), nullable=True))
20 |     # ### end Alembic commands ###
21 | 
22 | 
23 | def downgrade():
24 |     # ### commands auto generated by Alembic - please adjust! ###
25 |     op.drop_column("datasets", "owner_id")
26 |     # ### end Alembic commands ###
27 | 


--------------------------------------------------------------------------------
/app/models/orm/migrations/versions/d8f049f00259_add_analysis_results_table.py:
--------------------------------------------------------------------------------
 1 | """Add analysis results table for datamart endpoints
 2 | 
 3 | Revision ID: d8f049f00259
 4 | Revises: 3e524ef0525f
 5 | Create Date: 2025-03-03 09:27:19.271840
 6 | 
 7 | """
 8 | 
 9 | from alembic import op
10 | import sqlalchemy as sa
11 | import sqlalchemy_utils
12 | from sqlalchemy.dialects import postgresql
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision = "d8f049f00259"
16 | down_revision = "3e524ef0525f"
17 | branch_labels = None
18 | depends_on = None
19 | 
20 | 
21 | def upgrade():
22 |     # ### commands auto generated by Alembic - please adjust! ###
23 |     op.create_table(
24 |         "analysis_results",
25 |         sa.Column(
26 |             "id",
27 |             postgresql.UUID(),
28 |             nullable=False,
29 |         ),
30 |         sa.Column("endpoint", sa.String()),
31 |         sa.Column("result", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
32 |         sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
33 |         sa.Column("status", sa.String(), nullable=False, default="pending"),
34 |         sa.Column("requested_by", postgresql.UUID(), nullable=True),
35 |         sa.Column("message", sa.String(), nullable=True),
36 |         sa.Column(
37 |             "created_on", sa.DateTime(), server_default=sa.text("now()"), nullable=True
38 |         ),
39 |         sa.Column(
40 |             "updated_on", sa.DateTime(), server_default=sa.text("now()"), nullable=True
41 |         ),
42 |         sa.ForeignKeyConstraint(
43 |             ["requested_by"],
44 |             ["api_keys.api_key"],
45 |             name="fk",
46 |             onupdate="SET NULL",
47 |             ondelete="SET NULL",
48 |         ),
49 |         sa.PrimaryKeyConstraint("id"),
50 |     )
51 |     op.create_index(
52 |         "analysis_results_id_idx",
53 |         "analysis_results",
54 |         ["id"],
55 |         unique=False,
56 |         postgresql_using="hash",
57 |     ),
58 |     # ### end Alembic commands ###
59 | 
60 | 
61 | def downgrade():
62 |     # ### commands auto generated by Alembic - please adjust! ###
63 |     op.drop_table("analysis_results")
64 | 
65 |     # ### end Alembic commands ###
66 | 


--------------------------------------------------------------------------------
/app/models/orm/migrations/versions/ef3392e8e054_.py:
--------------------------------------------------------------------------------
 1 | """update resolution metadata fields
 2 | 
 3 | Revision ID: ef3392e8e054
 4 | Revises: d767b6dd2c4c
 5 | Create Date: 2024-09-10 14:19:43.424752
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | import sqlalchemy_utils
11 | from sqlalchemy.dialects import postgresql
12 | 
13 | # revision identifiers, used by Alembic.
14 | revision = 'ef3392e8e054'
15 | down_revision = 'd767b6dd2c4c'
16 | branch_labels = None
17 | depends_on = None
18 | 
19 | 
20 | def upgrade():
21 |     # ### commands auto generated by Alembic - please adjust! ###
22 |     op.alter_column('dataset_metadata', 'resolution', nullable=True, new_column_name='spatial_resolution')
23 |     op.add_column('dataset_metadata', sa.Column('resolution_description', sa.String(), nullable=True))
24 |     op.alter_column('version_metadata', 'resolution', nullable=True, new_column_name='spatial_resolution')
25 |     op.add_column('version_metadata', sa.Column('resolution_description', sa.String(), nullable=True))
26 |     # ### end Alembic commands ###
27 | 
28 | 
29 | def downgrade():
30 |     # ### commands auto generated by Alembic - please adjust! ###
31 |     op.alter_column('dataset_metadata', 'spatial_resolution', nullable=True, new_column_name='resolution')
32 |     op.drop_column('version_metadata', 'resolution_description')
33 |     op.alter_column('version_metadata', 'spatial_resolution', nullable=True, new_column_name='resolution')
34 |     op.drop_column('dataset_metadata', 'resolution_description')
35 |     # ### end Alembic commands ###
36 | 


--------------------------------------------------------------------------------
/app/models/orm/mixins.py:
--------------------------------------------------------------------------------
 1 | from .base import db
 2 | 
 3 | 
 4 | class MetadataMixin:
 5 |     title = db.Column(db.String)
 6 |     subtitle = db.Column(db.String)
 7 |     spatial_resolution = db.Column(db.Numeric)
 8 |     resolution_description = db.Column(db.String)
 9 |     geographic_coverage = db.Column(db.String)
10 |     update_frequency = db.Column(db.String)
11 |     citation = db.Column(db.String)
12 |     scale = db.Column(db.String)
13 | 


--------------------------------------------------------------------------------
/app/models/orm/queries/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/models/orm/queries/__init__.py


--------------------------------------------------------------------------------
/app/models/orm/queries/datasets.py:
--------------------------------------------------------------------------------
 1 | from ....application import db
 2 | 
 3 | _dataset_sql = """
 4 | SELECT
 5 |   datasets.*,
 6 |   version_array AS versions,
 7 |   coalesce(metadata, '{}') as metadata
 8 | FROM
 9 |   datasets
10 |   LEFT JOIN
11 |     (
12 |       SELECT
13 |         dataset,
14 |         ARRAY_AGG(version) AS version_array
15 |       FROM
16 |         versions
17 |       GROUP BY
18 |         dataset
19 |     )
20 |     t USING (dataset)
21 |   LEFT JOIN
22 |     (
23 |       SELECT dataset, ROW_TO_JSON(dataset_metadata.*) as metadata
24 |       FROM
25 |         dataset_metadata
26 |     )
27 |     m USING (dataset)
28 |     ORDER BY dataset
29 |     LIMIT(:limit)
30 |     OFFSET(:offset);"""
31 | 
32 | all_datasets = db.text(_dataset_sql)
33 | 


--------------------------------------------------------------------------------
/app/models/orm/queries/fields.py:
--------------------------------------------------------------------------------
 1 | from ....application import db
 2 | 
 3 | _fields_sql = """
 4 | SELECT
 5 |     column_name as name, CASE WHEN data_type = 'USER-DEFINED' THEN udt_name ELSE data_type END as data_type
 6 |   FROM information_schema.columns
 7 |   WHERE
 8 |     table_schema = :dataset AND table_name = :version;"""
 9 | 
10 | fields = db.text(_fields_sql)
11 | 


--------------------------------------------------------------------------------
/app/models/orm/queries/raster_assets.py:
--------------------------------------------------------------------------------
 1 | data_environment_raster_tile_sets = """
 2 |     SELECT
 3 |       assets.asset_id,
 4 |       assets.dataset,
 5 |       assets.version,
 6 |       creation_options,
 7 |       asset_uri,
 8 |       rb.values_table
 9 |     FROM
10 |       assets
11 |       LEFT JOIN asset_metadata am
12 |         ON am.asset_id = assets.asset_id
13 |       JOIN versions
14 |         ON versions.dataset = assets.dataset
15 |         AND versions.version = assets.version
16 |       LEFT JOIN raster_band_metadata rb
17 |         ON rb.asset_metadata_id = am.id
18 |       WHERE assets.asset_type = 'Raster tile set'
19 |       AND assets.creation_options->>'pixel_meaning' NOT LIKE '%tcd%'
20 |       AND assets.creation_options->>'grid' = :grid
21 |     """
22 | 


--------------------------------------------------------------------------------
/app/models/orm/tasks.py:
--------------------------------------------------------------------------------
 1 | from .base import Base, db
 2 | 
 3 | 
 4 | class Task(Base):
 5 |     __tablename__ = "tasks"
 6 |     task_id = db.Column(db.UUID, primary_key=True)
 7 |     asset_id = db.Column(db.UUID, nullable=False)
 8 |     status = db.Column(db.String, nullable=False, default="pending")
 9 | 
10 |     change_log = db.Column(db.ARRAY(db.JSONB), default=list())
11 | 
12 |     fk = db.ForeignKeyConstraint(
13 |         ["asset_id"],
14 |         ["assets.asset_id"],
15 |         name="fk",
16 |         onupdate="CASCADE",
17 |         ondelete="CASCADE",
18 |     )
19 | 


--------------------------------------------------------------------------------
/app/models/orm/user_areas.py:
--------------------------------------------------------------------------------
 1 | from .base import Base, db
 2 | 
 3 | ########
 4 | # NOTE #
 5 | ########
 6 | # UserAreas doesn't officially inherit from Geostore in a class hierarchy, but it
 7 | # DOES inherit in the DB (via a custom migration). So any time you change the
 8 | # Geostore table, change UserAreas as well! And vice versa, of course.
 9 | 
10 | 
11 | class UserArea(Base):
12 |     __tablename__ = "userareas"
13 | 
14 |     gfw_geostore_id = db.Column(db.UUID, primary_key=True)
15 |     gfw_geojson = db.Column(db.TEXT)
16 |     gfw_area__ha = db.Column(db.Numeric)
17 |     gfw_bbox = db.Column(db.ARRAY(db.Numeric))
18 | 
19 |     _userarea_gfw_geostore_id_idx = db.Index(
20 |         "userarea_gfw_geostore_id_idx", "gfw_geostore_id", postgresql_using="hash"
21 |     )
22 | 


--------------------------------------------------------------------------------
/app/models/orm/version_metadata.py:
--------------------------------------------------------------------------------
 1 | from .base import Base, db
 2 | from .mixins import MetadataMixin
 3 | 
 4 | 
 5 | class VersionMetadata(Base, MetadataMixin):
 6 |     __tablename__ = "version_metadata"
 7 | 
 8 |     id = db.Column(db.UUID, primary_key=True)
 9 |     dataset = db.Column(db.String, nullable=False)
10 |     version = db.Column(db.String, nullable=False)
11 |     content_date = db.Column(db.Date)
12 |     content_start_date = db.Column(db.Date)
13 |     content_date_description = db.Column(db.String)
14 |     content_end_date = db.Column(db.Date)
15 |     last_update = db.Column(db.Date)
16 |     description = db.Column(db.String)
17 | 
18 |     dataset_fk = db.ForeignKeyConstraint(
19 |         ["dataset", "version"],
20 |         ["versions.dataset", "versions.version"],
21 |         name="dataset_fk",
22 |         onupdate="CASCADE",
23 |         ondelete="CASCADE",
24 |     )
25 |     _unique_dataset_version = db.UniqueConstraint(
26 |         "dataset", "version", name="dataset_version_uq"
27 |     )
28 | 


--------------------------------------------------------------------------------
/app/models/orm/versions.py:
--------------------------------------------------------------------------------
 1 | from .base import Base, db
 2 | 
 3 | 
 4 | class Version(Base):
 5 |     __tablename__ = "versions"
 6 |     dataset = db.Column(db.String, primary_key=True)
 7 |     version = db.Column(db.String, primary_key=True)
 8 |     is_latest = db.Column(db.Boolean, nullable=False, default=False)
 9 |     is_mutable = db.Column(db.Boolean, nullable=False, default=False)
10 |     is_downloadable = db.Column(db.Boolean, nullable=False, default=True)
11 |     # source_type = db.Column(db.String, nullable=False)
12 |     # source_uri = db.Column(db.ARRAY(db.String), default=list())
13 |     status = db.Column(db.String, nullable=False, default="pending")
14 |     # has_geostore = db.Column(db.Boolean, nullable=False, default=False)
15 |     # metadata = db.Column(db.JSONB, default=dict())
16 |     change_log = db.Column(db.ARRAY(db.JSONB), default=list())
17 |     # creation_options = db.Column(db.JSONB, default=dict())
18 | 
19 |     fk = db.ForeignKeyConstraint(
20 |         ["dataset"],
21 |         ["datasets.dataset"],
22 |         name="fk",
23 |         onupdate="CASCADE",
24 |         ondelete="CASCADE",
25 |     )
26 | 


--------------------------------------------------------------------------------
/app/models/pydantic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/models/pydantic/__init__.py


--------------------------------------------------------------------------------
/app/models/pydantic/analysis.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from ..enum.analysis import RasterLayer
 4 | from .base import StrictBaseModel
 5 | from .geostore import Geometry
 6 | 
 7 | 
 8 | class ZonalAnalysisRequestIn(StrictBaseModel):
 9 |     geometry: Geometry
10 |     sum: List[RasterLayer]
11 |     group_by: List[RasterLayer] = list()
12 |     filters: List[RasterLayer] = list()
13 |     start_date: Optional[str] = None
14 |     end_date: Optional[str] = None
15 | 


--------------------------------------------------------------------------------
/app/models/pydantic/base.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | from pydantic import BaseModel, Extra
 4 | 
 5 | 
 6 | class BaseORMRecord(BaseModel):
 7 |     class Config:
 8 |         orm_mode = True
 9 | 
10 | 
11 | class BaseRecord(BaseModel):
12 |     created_on: datetime
13 |     updated_on: datetime
14 | 
15 |     class Config:
16 |         orm_mode = True
17 | 
18 | 
19 | class StrictBaseModel(BaseModel):
20 |     class Config:
21 |         extra = Extra.forbid
22 |         validate_assignment = True
23 | 
24 | 


--------------------------------------------------------------------------------
/app/models/pydantic/change_log.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import List, Optional
 3 | 
 4 | from ..enum.change_log import ChangeLogStatus, ChangeLogStatusTaskIn
 5 | from .base import StrictBaseModel
 6 | from .responses import Response
 7 | 
 8 | 
 9 | class ChangeLog(StrictBaseModel):
10 |     date_time: datetime
11 |     status: ChangeLogStatus
12 |     message: str
13 |     detail: Optional[str] = None
14 | 
15 | 
16 | class ChangeLogTaskIn(StrictBaseModel):
17 |     date_time: datetime
18 |     status: ChangeLogStatusTaskIn
19 |     message: str
20 |     detail: Optional[str] = None
21 | 
22 | 
23 | class ChangeLogResponse(Response):
24 |     data: List[ChangeLog]
25 | 


--------------------------------------------------------------------------------
/app/models/pydantic/database.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Union
 2 | 
 3 | from pydantic import BaseModel, Field, fields, validator
 4 | from sqlalchemy.engine.url import URL
 5 | from starlette.datastructures import Secret
 6 | 
 7 | 
 8 | class DatabaseURL(BaseModel):
 9 |     drivername: str = Field(..., alias="driver", description="The database driver.")
10 |     host: str = Field("localhost", description="Server host.")
11 |     port: Optional[Union[str, int]] = Field(None, description="Server access port.")
12 |     username: Optional[str] = Field(None, alias="user", description="Username")
13 |     password: Optional[Union[str, Secret]] = Field(None, description="Password")
14 |     database: str = Field(..., description="Database name.")
15 |     url: Optional[URL] = None
16 | 
17 |     class Config:
18 |         arbitrary_types_allowed = True
19 |         allow_population_by_field_name = True
20 | 
21 |     @validator("url", always=True)
22 |     def build_url(cls, v: Any, field: fields.Field, values: dict):
23 |         if isinstance(v, URL):
24 |             return v
25 |         args = {k: str(v) for k, v in values.items() if v is not None}
26 |         return URL(**args)
27 | 


--------------------------------------------------------------------------------
/app/models/pydantic/datasets.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional, Union
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | from .base import BaseRecord, StrictBaseModel
 6 | from .metadata import DatasetMetadata, DatasetMetadataOut, DatasetMetadataUpdate
 7 | from .responses import PaginationLinks, PaginationMeta, Response
 8 | 
 9 | 
10 | class Dataset(BaseRecord):
11 |     dataset: str
12 |     is_downloadable: bool
13 |     metadata: Optional[Union[DatasetMetadataOut, BaseModel]]
14 |     versions: Optional[List[str]] = list()
15 | 
16 | 
17 | class DatasetCreateIn(StrictBaseModel):
18 |     is_downloadable: bool = Field(
19 |         True,
20 |         description="Flag to specify if assets associated with dataset can be downloaded."
21 |         "All associated versions and assets will inherit this value. "
22 |         "Value can be overridden at version  or asset level.",
23 |     )
24 |     metadata: DatasetMetadata
25 | 
26 | 
27 | class DatasetUpdateIn(StrictBaseModel):
28 |     is_downloadable: Optional[bool]
29 |     metadata: Optional[DatasetMetadataUpdate]
30 |     owner_id: Optional[str]
31 | 
32 | 
33 | class DatasetResponse(Response):
34 |     data: Dataset
35 | 
36 | 
37 | class DatasetsResponse(Response):
38 |     data: List[Dataset]
39 | 
40 | 
41 | class PaginatedDatasetsResponse(DatasetsResponse):
42 |     links: PaginationLinks
43 |     meta: PaginationMeta
44 | 


--------------------------------------------------------------------------------
/app/models/pydantic/downloads.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import Field
 4 | 
 5 | from app.models.enum.creation_options import Delimiters
 6 | from app.models.pydantic.base import StrictBaseModel
 7 | from app.models.pydantic.geostore import Geometry
 8 | 
 9 | 
10 | class DownloadJSONIn(StrictBaseModel):
11 |     sql: str = Field(..., description="SQL query.")
12 |     geometry: Optional[Geometry] = Field(
13 |         None, description="A geojson geometry to be used as spatial filter."
14 |     )
15 |     filename: str = Field("export.json", description="Name of export file.")
16 | 
17 | 
18 | class DownloadCSVIn(DownloadJSONIn):
19 |     filename: str = Field("export.csv", description="Name of export file.")
20 |     delimiter: Delimiters = Field(
21 |         Delimiters.comma, description="Delimiter to use for CSV file."
22 |     )
23 | 


--------------------------------------------------------------------------------
/app/models/pydantic/extent.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from ..pydantic.responses import Response
 4 | from .geostore import FeatureCollection
 5 | 
 6 | 
 7 | class Extent(FeatureCollection):
 8 |     pass
 9 | 
10 | 
11 | class ExtentResponse(Response):
12 |     data: Optional[Extent]
13 | 


--------------------------------------------------------------------------------
/app/models/pydantic/features.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List
 2 | 
 3 | from .responses import Response
 4 | 
 5 | 
 6 | class FeatureResponse(Response):
 7 |     data: Dict[str, Any]
 8 | 
 9 | 
10 | class FeaturesResponse(Response):
11 |     data: List[Dict[str, Any]]
12 | 


--------------------------------------------------------------------------------
/app/models/pydantic/query.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, List
 2 | 
 3 | from app.models.enum.creation_options import Delimiters
 4 | from app.models.pydantic.base import StrictBaseModel
 5 | from app.models.pydantic.geostore import FeatureCollection, Geometry
 6 | from pydantic import Field
 7 | 
 8 | 
 9 | class QueryRequestIn(StrictBaseModel):
10 |     geometry: Optional[Geometry]
11 |     sql: str
12 | 
13 | 
14 | class QueryBatchRequestIn(StrictBaseModel):
15 |     feature_collection: Optional[FeatureCollection] = Field(
16 |         None, description="An inline collection of GeoJson features on which to do the same query"
17 |     )
18 |     uri: Optional[str] = Field(
19 |         None, description="URI to a vector file in a variety of formats supported by Geopandas, including GeoJson and CSV format, giving a list of features on which to do the same query. For a CSV file, the column with the geometry in WKB format should be named 'WKT' (not 'WKB')"
20 |     )
21 |     geostore_ids: Optional[List[str]] = Field(
22 |         None, description="An inline list of ResourceWatch geostore ids"
23 |     )
24 |     id_field: str = Field(
25 |         "fid", description="Name of field with the feature id, for use in labeling the results for each feature. This field must contain a unique value for each feature. If geostore_ids are specified, then they will automatically be used as the feature id for labeling the results."
26 |     )
27 |     sql: str
28 | 
29 | 
30 | class CsvQueryRequestIn(QueryRequestIn):
31 |     delimiter: Delimiters = Delimiters.comma
32 | 


--------------------------------------------------------------------------------
/app/models/pydantic/raster_analysis.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional, Union
 2 | 
 3 | from ..enum.pixetl import Grid
 4 | from .asset_metadata import RasterTable
 5 | from .base import StrictBaseModel
 6 | from .creation_options import NoDataType
 7 | 
 8 | 
 9 | class BaseLayer(StrictBaseModel):
10 |     name: str
11 |     no_data: Optional[NoDataType]
12 | 
13 | 
14 | class EncodedLayer(BaseLayer):
15 |     raster_table: Optional[RasterTable] = None
16 |     decode_expression: str = ""
17 |     encode_expression: str = ""
18 | 
19 | 
20 | class SourceLayer(EncodedLayer):
21 |     source_uri: str
22 |     grid: Grid
23 |     tile_scheme: str = "nw"
24 | 
25 | 
26 | class DerivedLayer(EncodedLayer):
27 |     source_layer: str
28 |     calc: str
29 | 
30 | 
31 | Layer = Union[SourceLayer, DerivedLayer]
32 | 
33 | 
34 | class DataEnvironment(StrictBaseModel):
35 |     layers: List[Layer]
36 | 


--------------------------------------------------------------------------------
/app/models/pydantic/responses.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional
 2 | 
 3 | from pydantic import Field
 4 | 
 5 | from .base import StrictBaseModel
 6 | 
 7 | 
 8 | class Response(StrictBaseModel):
 9 |     data: Any
10 |     status: str = "success"
11 | 
12 | 
13 | class PaginationLinks(StrictBaseModel):
14 |     self: str = Field(
15 |         ...,
16 |         title="Contains the URL for the current page",
17 |         example="https://data-api.globalforestwatch.org/:model?page[number]=1&page[size]=25",
18 |     )
19 |     first: str = Field(
20 |         ...,
21 |         title="Contains the URL for the first page",
22 |         example="https://data-api.globalforestwatch.org/:model?page[number]=1&page[size]=25",
23 |     )
24 |     last: str = Field(
25 |         ...,
26 |         title="Contains the URL for the last page",
27 |         example="https://data-api.globalforestwatch.org/:model?page[number]=4&page[size]=25",
28 |     )
29 |     prev: Optional[str] = Field(
30 |         None, title="Contains the URL for the previous page", example=""
31 |     )
32 |     next: Optional[str] = Field(
33 |         None,
34 |         title="Contains the URL for the next page",
35 |         example="https://data-api.globalforestwatch.org/:model?page[number]=2&page[size]=25",
36 |     )
37 | 
38 | 
39 | class PaginationMeta(StrictBaseModel):
40 |     size: int = Field(
41 |         ...,
42 |         title="The page size. Reflects the value used in the page[size] query parameter (or the default size of 10 if not provided)",
43 |         example="25",
44 |     )
45 |     total_items: int = Field(
46 |         ...,
47 |         title="Contains the total number of items",
48 |         example="100",
49 |     )
50 |     total_pages: int = Field(
51 |         ...,
52 |         title="Contains the total number of pages, assuming the page size specified in the page[size] query parameter",
53 |         example="4",
54 |     )
55 | 


--------------------------------------------------------------------------------
/app/models/pydantic/sources.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 | 
3 | from app.models.pydantic.base import StrictBaseModel
4 | 
5 | 
6 | class Source(StrictBaseModel):
7 |     source_uri: Optional[List[str]]
8 | 


--------------------------------------------------------------------------------
/app/models/pydantic/symbology.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Optional, Tuple, Union
 2 | 
 3 | from pydantic import Field, StrictInt, validator
 4 | 
 5 | from app.models.enum.creation_options import ColorMapType
 6 | from app.models.pydantic.base import StrictBaseModel
 7 | 
 8 | 
 9 | class RGB(StrictBaseModel):
10 |     red: int = Field(..., ge=0, le=255)
11 |     green: int = Field(..., ge=0, le=255)
12 |     blue: int = Field(..., ge=0, le=255)
13 | 
14 |     def tuple(self) -> Tuple[int, int, int]:
15 |         return self.red, self.green, self.blue
16 | 
17 | 
18 | class RGBA(StrictBaseModel):
19 |     red: int = Field(..., ge=0, le=255)
20 |     green: int = Field(..., ge=0, le=255)
21 |     blue: int = Field(..., ge=0, le=255)
22 |     alpha: int = Field(..., ge=0, le=255)
23 | 
24 |     def tuple(self) -> Tuple[int, int, int, int]:
25 |         return self.red, self.green, self.blue, self.alpha
26 | 
27 | 
28 | class Symbology(StrictBaseModel):
29 |     type: ColorMapType
30 |     colormap: Optional[Dict[Union[StrictInt, float], Union[RGB, RGBA]]]
31 | 
32 |     @validator("colormap")
33 |     def colormap_alpha_val(cls, v, values):
34 |         if v is not None:
35 |             break_points = [value for key, value in v.items()]
36 |             if "type" in values and values["type"] in (
37 |                 ColorMapType.discrete_intensity,
38 |                 ColorMapType.gradient_intensity,
39 |             ):
40 |                 assert all(
41 |                     isinstance(value, RGB) for value in break_points
42 |                 ), "Breakpoints for intensity colormaps must not include alpha values"
43 |             assert (
44 |                 len(set([type(value) for value in break_points])) == 1
45 |             ), "Colormap breakpoints must be either all RGB or all RGBA"
46 |         return v
47 | 


--------------------------------------------------------------------------------
/app/models/pydantic/tasks.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from uuid import UUID
 3 | 
 4 | from .base import BaseRecord, StrictBaseModel
 5 | from .change_log import ChangeLog
 6 | from .responses import PaginationLinks, PaginationMeta, Response
 7 | 
 8 | 
 9 | class Task(BaseRecord):
10 |     task_id: UUID
11 |     asset_id: UUID
12 |     change_log: List[ChangeLog]
13 | 
14 | 
15 | class TaskCreateIn(StrictBaseModel):
16 |     asset_id: UUID
17 |     change_log: List[ChangeLog]
18 | 
19 | 
20 | class TaskUpdateIn(StrictBaseModel):
21 |     change_log: List[ChangeLog]
22 | 
23 | 
24 | class TaskResponse(Response):
25 |     data: Task
26 | 
27 | 
28 | class TasksResponse(Response):
29 |     data: List[Task]
30 | 
31 | 
32 | class PaginatedTasksResponse(TasksResponse):
33 |     links: PaginationLinks
34 |     meta: PaginationMeta
35 | 


--------------------------------------------------------------------------------
/app/models/pydantic/user_job.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from uuid import UUID
 3 | 
 4 | from pydantic import BaseModel
 5 | 
 6 | from .responses import Response
 7 | 
 8 | 
 9 | class UserJob(BaseModel):
10 |     job_id: UUID
11 |     job_link: Optional[str]   # Full URL to check the job status
12 |     status: str = "pending"   # Can be pending, success, partial_success, failure, and error
13 |     message: Optional[str]    # Error message when status is "error"
14 |     download_link: Optional[str] = None
15 |     failed_geometries_link: Optional[str] = None
16 |     progress: Optional[str] = "0%"
17 | 
18 | 
19 | class UserJobResponse(Response):
20 |     data: UserJob
21 | 


--------------------------------------------------------------------------------
/app/routes/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | from fastapi import Depends, HTTPException, Path
 4 | from fastapi.security import OAuth2PasswordBearer
 5 | 
 6 | from ..crud.versions import get_version
 7 | from ..errors import RecordNotFoundError
 8 | 
 9 | DATASET_REGEX = r"^[a-z][a-z0-9_-]{2,}$"
10 | VERSION_REGEX = r"^v\d{1,8}(\.\d{1,3}){0,2}?$|^latest$"
11 | DATE_REGEX = r"^\d{4}(\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01]))?$"
12 | oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token")
13 | 
14 | 
15 | async def dataset_dependency(
16 |     dataset: str = Path(..., title="Dataset", regex=DATASET_REGEX)
17 | ) -> str:
18 |     if dataset == "latest":
19 |         raise HTTPException(
20 |             status_code=400,
21 |             detail="Name `latest` is reserved for versions only.",
22 |         )
23 |     return dataset
24 | 
25 | 
26 | async def version_dependency(
27 |     version: str = Path(..., title="Dataset version", regex=VERSION_REGEX),
28 | ) -> str:
29 |     # Middleware should have redirected GET requests to latest version already.
30 |     # Any other request method should not use `latest` keyword.
31 |     if version == "latest":
32 |         raise HTTPException(
33 |             status_code=400,
34 |             detail="You must list version name explicitly for this operation.",
35 |         )
36 |     return version
37 | 
38 | 
39 | async def dataset_version_dependency(
40 |     dataset: str = Depends(dataset_dependency),
41 |     version: str = Depends(version_dependency),
42 | ) -> Tuple[str, str]:
43 |     # make sure version exists
44 |     try:
45 |         await get_version(dataset, version)
46 |     except RecordNotFoundError as e:
47 |         raise HTTPException(status_code=404, detail=(str(e)))
48 | 
49 |     return dataset, version
50 | 


--------------------------------------------------------------------------------
/app/routes/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/routes/analysis/__init__.py


--------------------------------------------------------------------------------
/app/routes/assets/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from fastapi.logger import logger
 4 | 
 5 | from ...models.orm.assets import Asset as ORMAsset
 6 | from ...models.pydantic.asset_metadata import asset_metadata_factory
 7 | from ...models.pydantic.assets import (
 8 |     Asset,
 9 |     AssetResponse,
10 |     AssetsResponse,
11 |     PaginatedAssetsResponse,
12 | )
13 | from ...models.pydantic.responses import PaginationLinks, PaginationMeta
14 | 
15 | 
16 | async def asset_response(asset_orm: ORMAsset) -> AssetResponse:
17 |     """Serialize ORM response."""
18 | 
19 |     data: Asset = await _serialized_asset(asset_orm)
20 |     return AssetResponse(data=data)
21 | 
22 | 
23 | async def assets_response(assets_orm: List[ORMAsset]) -> AssetsResponse:
24 |     """Serialize ORM response."""
25 |     data = [await _serialized_asset(asset_orm) for asset_orm in assets_orm]
26 |     return AssetsResponse(data=data)
27 | 
28 | 
29 | async def paginated_assets_response(
30 |     assets_orm: List[ORMAsset], links: PaginationLinks, meta: PaginationMeta
31 | ) -> PaginatedAssetsResponse:
32 |     """Serialize ORM response."""
33 |     data = [await _serialized_asset(asset_orm) for asset_orm in assets_orm]
34 |     return PaginatedAssetsResponse(data=data, links=links, meta=meta)
35 | 
36 | 
37 | async def _serialized_asset(asset_orm: ORMAsset) -> Asset:
38 |     metadata = asset_metadata_factory(asset_orm)
39 | 
40 |     if hasattr(asset_orm, "metadata"):
41 |         delattr(asset_orm, "metadata")
42 |     data: Asset = Asset.from_orm(asset_orm)
43 |     data.metadata = metadata
44 | 
45 |     logger.debug(f"Metadata: {data.metadata.dict(by_alias=True)}")
46 |     return data
47 | 


--------------------------------------------------------------------------------
/app/routes/authentication/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/routes/authentication/__init__.py


--------------------------------------------------------------------------------
/app/routes/datamart/__init__.py:
--------------------------------------------------------------------------------
 1 | OPENAPI_EXTRA = {
 2 |     "parameters": [
 3 |         {
 4 |             "name": "aoi",
 5 |             "in": "query",
 6 |             "required": True,
 7 |             "style": "deepObject",
 8 |             "explode": True,
 9 |             "examples": {
10 |                 "Geostore Area Of Interest": {
11 |                     "summary": "Geostore Area Of Interest",
12 |                     "description": "Custom area",
13 |                     "value": {
14 |                         "type": "geostore",
15 |                         "geostore_id": "637d378f-93a9-4364-bfa8-95b6afd28c3a",
16 |                     },
17 |                 },
18 |                 "Admin Area Of Interest": {
19 |                     "summary": "Admin Area Of Interest",
20 |                     "description": "Administrative Boundary",
21 |                     "value": {
22 |                         "type": "admin",
23 |                         "country": "BRA",
24 |                         "region": "12",
25 |                         "subregion": "2",
26 |                     },
27 |                 },
28 |             },
29 |             "description": "The Area of Interest",
30 |             "schema": {
31 |                 "oneOf": [
32 |                     {"$ref": "#/components/schemas/GeostoreAreaOfInterest"},
33 |                     {"$ref": "#/components/schemas/AdminAreaOfInterest"},
34 |                     {"$ref": "#/components/schemas/Global"},
35 |                 ]
36 |             },
37 |         },
38 |         {
39 |             "name": "dataset_version",
40 |             "in": "query",
41 |             "required": False,
42 |             "style": "deepObject",
43 |             "explode": True,
44 |             "schema": {
45 |                 "type": "object",
46 |                 "additionalProperties": {"type": "string"},
47 |             },
48 |             "example": {
49 |                 "umd_tree_cover_loss": "v1.11",
50 |                 "tsc_tree_cover_loss_drivers": "v2023",
51 |             },
52 |             "description": (
53 |                 "Pass dataset version overrides as bracketed query parameters.",
54 |             ),
55 |         },
56 |     ]
57 | }
58 | 


--------------------------------------------------------------------------------
/app/routes/datasets/datasets.py:
--------------------------------------------------------------------------------
 1 | """Datasets are just a bucket, for datasets which share the same core
 2 | metadata."""
 3 | from typing import Optional, Union
 4 | 
 5 | from fastapi import APIRouter, HTTPException, Query, Request
 6 | from fastapi.responses import ORJSONResponse
 7 | 
 8 | from app.crud.datasets import count_datasets as count_datasets_fn
 9 | from app.crud.datasets import get_datasets as datasets_fn
10 | from app.models.pydantic.datasets import DatasetsResponse, PaginatedDatasetsResponse
11 | from app.settings.globals import API_URL
12 | from app.utils.paginate import paginate_collection
13 | 
14 | router = APIRouter()
15 | 
16 | 
17 | @router.get(
18 |     "",
19 |     response_class=ORJSONResponse,
20 |     tags=["Datasets"],
21 |     response_model=Union[PaginatedDatasetsResponse, DatasetsResponse],
22 | )
23 | async def get_datasets(
24 |     request: Request,
25 |     page_number: Optional[int] = Query(
26 |         default=None, alias="page[number]", ge=1, description="The page number."
27 |     ),
28 |     page_size: Optional[int] = Query(
29 |         default=None,
30 |         alias="page[size]",
31 |         ge=1,
32 |         description="The number of datasets per page. Default is `10`.",
33 |     ),
34 | ) -> Union[PaginatedDatasetsResponse, DatasetsResponse]:
35 |     """Get list of all datasets.
36 | 
37 |     Will attempt to paginate if `page[size]` or `page[number]` is
38 |     provided. Otherwise, it will attempt to return the entire list of
39 |     datasets in the response.
40 |     """
41 |     if page_number or page_size:
42 |         try:
43 |             data, links, meta = await paginate_collection(
44 |                 paged_items_fn=datasets_fn,
45 |                 item_count_fn=count_datasets_fn,
46 |                 request_url=f"{API_URL}{request.url.path}",
47 |                 page=page_number,
48 |                 size=page_size,
49 |             )
50 | 
51 |             return PaginatedDatasetsResponse(data=data, links=links, meta=meta)
52 |         except ValueError as exc:
53 |             raise HTTPException(status_code=422, detail=str(exc)) from exc
54 | 
55 |     all_datasets = await datasets_fn()
56 |     return DatasetsResponse(data=all_datasets)
57 | 


--------------------------------------------------------------------------------
/app/routes/datasets/geostore.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | from uuid import UUID
 3 | 
 4 | from fastapi import APIRouter, Depends, HTTPException, Path
 5 | from fastapi.responses import ORJSONResponse
 6 | 
 7 | from ...crud import geostore
 8 | from ...errors import RecordNotFoundError
 9 | from ...models.pydantic.geostore import Geostore, GeostoreResponse
10 | from ...routes import dataset_version_dependency
11 | 
12 | router = APIRouter()
13 | 
14 | 
15 | @router.get(
16 |     "/{dataset}/{version}/geostore/{geostore_id}",
17 |     response_class=ORJSONResponse,
18 |     response_model=GeostoreResponse,
19 |     tags=["Geostore"],
20 | )
21 | async def get_geostore_by_version(
22 |     *,
23 |     dv: Tuple[str, str] = Depends(dataset_version_dependency),
24 |     geostore_id: UUID = Path(..., title="geostore_id"),
25 | ):
26 |     """Retrieve GeoJSON representation for a given geostore ID of a dataset
27 |     version.
28 | 
29 |     Obtain geostore ID from feature attributes.
30 |     """
31 |     dataset, version = dv
32 |     try:
33 |         result: Geostore = await geostore.get_geostore_by_version(
34 |             dataset, version, geostore_id
35 |         )
36 |     except RecordNotFoundError as e:
37 |         raise HTTPException(status_code=404, detail=str(e))
38 | 
39 |     return GeostoreResponse(data=result)
40 | 


--------------------------------------------------------------------------------
/app/routes/geostore/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/routes/geostore/__init__.py


--------------------------------------------------------------------------------
/app/routes/health.py:
--------------------------------------------------------------------------------
 1 | """Assets are replicas of the original source files."""
 2 | 
 3 | from fastapi import APIRouter
 4 | from fastapi.responses import ORJSONResponse
 5 | 
 6 | from ..models.pydantic.responses import Response
 7 | 
 8 | router = APIRouter()
 9 | 
10 | 
11 | @router.get(
12 |     "/ping",
13 |     response_class=ORJSONResponse,
14 |     tags=["Health"],
15 |     response_model=Response,
16 | )
17 | async def ping():
18 |     """Simple uptime check."""
19 | 
20 |     return Response(data="pong")
21 | 


--------------------------------------------------------------------------------
/app/routes/jobs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/routes/jobs/__init__.py


--------------------------------------------------------------------------------
/app/routes/political/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/routes/political/__init__.py


--------------------------------------------------------------------------------
/app/routes/tasks/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from ...models.orm.tasks import Task as ORMTask
 4 | from ...models.pydantic.responses import PaginationLinks, PaginationMeta
 5 | from ...models.pydantic.tasks import (
 6 |     PaginatedTasksResponse,
 7 |     Task,
 8 |     TaskResponse,
 9 |     TasksResponse,
10 | )
11 | 
12 | 
13 | def task_response(data: ORMTask) -> TaskResponse:
14 |     """Assure that task responses are parsed correctly and include associated
15 |     assets."""
16 | 
17 |     return TaskResponse(data=data)
18 | 
19 | 
20 | async def tasks_response(tasks_orm: List[ORMTask]) -> TasksResponse:
21 |     """Serialize ORM response."""
22 |     data = [Task.from_orm(task) for task in tasks_orm]
23 |     return TasksResponse(data=data)
24 | 
25 | 
26 | async def paginated_tasks_response(
27 |     tasks_orm: List[ORMTask], links: PaginationLinks, meta: PaginationMeta
28 | ) -> PaginatedTasksResponse:
29 |     """Serialize ORM response."""
30 |     data = [Task.from_orm(task) for task in tasks_orm]
31 |     return PaginatedTasksResponse(data=data, links=links, meta=meta)
32 | 


--------------------------------------------------------------------------------
/app/settings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/settings/__init__.py


--------------------------------------------------------------------------------
/app/settings/prestart.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | if [ "${ENV}" = "dev" ]; then
 3 |     # in dev environment, we clone a db instance for the branch from a template database
 4 | 
 5 |     # parse out DB credentials from the secret json object
 6 |     DB_HOST=$(jq -nr 'env.DB_WRITER_SECRET' | jq '.host' | sed 's/"//g')
 7 |     DB_PORT=$(jq -nr 'env.DB_WRITER_SECRET' | jq '.port' | sed 's/"//g')
 8 |     DB_USER=$(jq -nr 'env.DB_WRITER_SECRET' | jq '.username' | sed 's/"//g')
 9 |     DB_PASSWORD=$(jq -nr 'env.DB_WRITER_SECRET' | jq '.password' | sed 's/"//g')
10 |     DATABASE_MAIN=$(jq -nr 'env.DB_WRITER_SECRET' | jq '.dbname' | sed 's/"//g') # template database
11 |     DATABASE="$DATABASE_MAIN$NAME_SUFFIX" # branch database
12 | 
13 |     # return the branch database if it exists in pg_database. if not, create it.
14 |     PGPASSWORD=$DB_PASSWORD psql -h ${DB_HOST} -p ${DB_PORT} -U ${DB_USER} -d ${DATABASE_MAIN} \
15 |         -tc "SELECT 1 FROM pg_database WHERE datname = '$DATABASE'" | grep -q 1 \
16 |     || PGPASSWORD=$DB_PASSWORD psql -h ${DB_HOST} \
17 |         -p ${DB_PORT} -U ${DB_USER} -d ${DATABASE_MAIN} \
18 |         -c "CREATE DATABASE $DATABASE WITH TEMPLATE ${DATABASE_MAIN} OWNER $DB_USER"
19 | fi
20 | 
21 | alembic upgrade head


--------------------------------------------------------------------------------
/app/settings/start.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env sh
 2 | set -e
 3 | 
 4 | if [ -f /app/app/main.py ]; then
 5 |     DEFAULT_MODULE_NAME=app.main
 6 | elif [ -f /app/main.py ]; then
 7 |     DEFAULT_MODULE_NAME=main
 8 | fi
 9 | MODULE_NAME=${MODULE_NAME:-$DEFAULT_MODULE_NAME}
10 | VARIABLE_NAME=${VARIABLE_NAME:-app}
11 | export APP_MODULE=${APP_MODULE:-"$MODULE_NAME:$VARIABLE_NAME"}
12 | 
13 | if [ -f /app/gunicorn_conf.py ]; then
14 |     DEFAULT_GUNICORN_CONF=/app/gunicorn_conf.py
15 | elif [ -f /app/app/gunicorn_conf.py ]; then
16 |     DEFAULT_GUNICORN_CONF=/app/app/gunicorn_conf.py
17 | else
18 |     DEFAULT_GUNICORN_CONF=/gunicorn_conf.py
19 | fi
20 | export GUNICORN_CONF=${GUNICORN_CONF:-$DEFAULT_GUNICORN_CONF}
21 | export WORKER_CLASS=${WORKER_CLASS:-"uvicorn.workers.UvicornWorker"}
22 | 
23 | # If there's a prestart.sh script in the /app directory or other path specified, run it before starting
24 | PRE_START_PATH=${PRE_START_PATH:-/app/prestart.sh}
25 | echo "Checking for script in $PRE_START_PATH"
26 | if [ -f $PRE_START_PATH ] ; then
27 |     echo "Running script $PRE_START_PATH"
28 |     . "$PRE_START_PATH"
29 | else
30 |     echo "There is no script $PRE_START_PATH"
31 | fi
32 | 
33 | export NEW_RELIC_LICENSE_KEY=$(jq -nr 'env.NEW_RELIC_LICENSE_KEY' | jq '.license_key' | sed 's/"//g')
34 | NEW_RELIC_CONFIG_FILE=/app/newrelic.ini
35 | export NEW_RELIC_CONFIG_FILE
36 | 
37 | if [ "${ENV}" = "staging" ]; then
38 |     export NEW_RELIC_ENVIRONMENT=staging
39 |     # Start Gunicorn
40 |     exec newrelic-admin run-program gunicorn -k "$WORKER_CLASS" -c "$GUNICORN_CONF" "$APP_MODULE"
41 | elif [ "${ENV}" = "production" ]; then
42 |     export NEW_RELIC_ENVIRONMENT=production
43 |     # Start Gunicorn
44 |     exec newrelic-admin run-program gunicorn -k "$WORKER_CLASS" -c "$GUNICORN_CONF" "$APP_MODULE"
45 | else
46 |     exec gunicorn -k "$WORKER_CLASS" -c "$GUNICORN_CONF" "$APP_MODULE"
47 | fi


--------------------------------------------------------------------------------
/app/static/gfw-data-api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/static/gfw-data-api.png


--------------------------------------------------------------------------------
/app/tasks/raster_tile_cache_assets/__init__.py:
--------------------------------------------------------------------------------
1 | from .raster_tile_cache_assets import (  # noqa: F401
2 |     raster_tile_cache_asset,
3 |     raster_tile_cache_validator,
4 | )
5 | 


--------------------------------------------------------------------------------
/app/tasks/raster_tile_set_assets/__init__.py:
--------------------------------------------------------------------------------
1 | from .raster_tile_set_assets import raster_tile_set_asset  # noqa: F401
2 | 


--------------------------------------------------------------------------------
/app/tasks/utils.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | from typing import Any, List
 3 | 
 4 | from app.settings.globals import CHUNK_SIZE
 5 | 
 6 | ALLOWABLE_CHARS = set(string.ascii_letters + string.digits + "-" + "_")
 7 | 
 8 | 
 9 | class RingOfLists:
10 |     """A data structure that consists of a number of lists attached to a
11 |     circular buffer.
12 | 
13 |     One may iterate over it and append items to the element in hand in
14 |     order to evenly distribute whatever it is one has amongst the
15 |     different lists. Later, one can call the all() method to get all the
16 |     lists to do with as one pleases.
17 |     """
18 | 
19 |     def __init__(self, size: int) -> None:
20 |         self._size: int = size
21 |         self._lists: List[List[Any]] = [list() for i in range(0, size)]
22 |         self._idx: int = -1
23 | 
24 |     def __next__(self):
25 |         self._idx += 1
26 |         if self._idx >= self._size:
27 |             self._idx = 0
28 |         return self._lists[self._idx]
29 | 
30 |     def __iter__(self):
31 |         self.idx = -1
32 |         return self
33 | 
34 |     def all(self):
35 |         return self._lists
36 | 
37 | 
38 | def sanitize_batch_job_name(proposed_name: str) -> str:
39 |     """Make a string acceptable as an AWS Batch job name According to AWS docs,
40 |     the first character must be alphanumeric, the name can be up to 128
41 |     characters, and ASCII uppercase + lowercase letters, numbers, hyphens, and
42 |     underscores are allowed."""
43 |     short_name: str = proposed_name[:125]
44 | 
45 |     if not str.isalnum(short_name[0]):
46 |         short_name = "x_" + proposed_name[:]
47 | 
48 |     filtered_name = ""
49 |     for char in short_name:
50 |         if char in ALLOWABLE_CHARS:
51 |             filtered_name += char
52 |         else:
53 |             filtered_name += "_"
54 | 
55 |     return filtered_name
56 | 
57 | 
58 | def chunk_list(data: List[Any], chunk_size: int = CHUNK_SIZE) -> List[List[Any]]:
59 |     """Split list into chunks of fixed size."""
60 |     return [data[x : x + chunk_size] for x in range(0, len(data), chunk_size)]
61 | 


--------------------------------------------------------------------------------
/app/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/utils/__init__.py


--------------------------------------------------------------------------------
/app/utils/decorators.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | 
 4 | # See https://stackoverflow.com/questions/6358481/using-functools-lru-cache-with-dictionary-arguments
 5 | # Required to use a dict argument with @alru_cache, since it needs to be hasheable/immutable
 6 | def hash_dict(func):
 7 |     """Transform mutable dictionnary Into immutable Useful to be compatible
 8 |     with cache."""
 9 | 
10 |     class HDict(dict):
11 |         def __hash__(self):
12 |             return hash(frozenset(self.items()))
13 | 
14 |     @functools.wraps(func)
15 |     def wrapped(*args, **kwargs):
16 |         args = tuple([HDict(arg) if isinstance(arg, dict) else arg for arg in args])
17 |         kwargs = {k: HDict(v) if isinstance(v, dict) else v for k, v in kwargs.items()}
18 |         return func(*args, **kwargs)
19 | 
20 |     return wrapped
21 | 


--------------------------------------------------------------------------------
/app/utils/fields.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List
 2 | 
 3 | from ..crud.assets import get_default_asset
 4 | from ..crud.metadata import get_asset_fields_dicts
 5 | from ..models.orm.assets import Asset as ORMAsset
 6 | from ..models.pydantic.creation_options import CreationOptions
 7 | 
 8 | 
 9 | async def get_field_attributes(
10 |     dataset: str, version: str, creation_options: CreationOptions
11 | ) -> List[Dict[str, Any]]:
12 |     """Get list of field attributes on the asset which are marked as `is_feature_info`
13 |     If a field list is provided in creation options, limit the list to those provided,
14 |     in the order provided. Invalid provided fields are silently ignored.
15 |     """
16 | 
17 |     default_asset: ORMAsset = await get_default_asset(dataset, version)
18 |     asset_fields = await get_asset_fields_dicts(default_asset)
19 | 
20 |     name_to_feature_fields: Dict[str, Dict] = {
21 |         field["name"]: field
22 |         for field in asset_fields
23 |         if field["is_feature_info"]
24 |     }
25 | 
26 |     if (
27 |         "field_attributes" in creation_options.__fields__
28 |         and creation_options.field_attributes
29 |     ):
30 |         asset_field_attributes = [
31 |             name_to_feature_fields[field_name]
32 |             for field_name in creation_options.field_attributes
33 |             if field_name in name_to_feature_fields
34 |         ]
35 |     else:
36 |         asset_field_attributes = list(name_to_feature_fields.values())
37 | 
38 |     return asset_field_attributes
39 | 


--------------------------------------------------------------------------------
/app/utils/gadm.py:
--------------------------------------------------------------------------------
 1 | GADM_41_IDS_MISSING_REVISION = (
 2 |     "IDN.35.4",
 3 |     "IDN.35.8",
 4 |     "IDN.35.9",
 5 |     "IDN.35.13",
 6 |     "IDN.35.14",
 7 | )
 8 | 
 9 | 
10 | def extract_level_id(adm_level: int, id_string: str):
11 |     """Given a desired admin level and a string containing at least that level
12 |     of id, return the id of just that level."""
13 | 
14 |     # Exception because of bad formatting of GHA gids in v4.1
15 |     # (corrected by us in gadm_administrative_boundaries/v4.1.85 and higher)
16 |     if id_string.startswith("GHA") and not id_string.startswith("GHA."):
17 |         id_string = "GHA." + id_string[3:]
18 |     # Exception because bad ids IDN.35.4, IDN.35.8, IDN.35.9, IDN.35.13, IDN.35.14
19 |     # (they are missing final '_1') in gadm_administrative_boundaries/v4.1
20 |     if id_string.startswith("IDN") and "_" not in id_string:
21 |         id_string += "_1"
22 | 
23 |     return (id_string.rsplit("_")[0]).split(".")[adm_level]
24 | 
25 | 
26 | def fix_id_pattern(adm_level: int, id_pattern_string: str, provider: str, version: str):
27 |     """Given an admin level and a GADM id pattern suitable for a SQL LIKE
28 |     clause, return an id pattern adjusted for observed errors in GADM
29 |     records."""
30 |     new_pattern: str = id_pattern_string
31 | 
32 |     if provider == "gadm" and version == "4.1":
33 |         if id_pattern_string.rstrip(r"\__") in GADM_41_IDS_MISSING_REVISION:
34 |             new_pattern = new_pattern.rstrip(r"\__")
35 | 
36 |     return new_pattern
37 | 


--------------------------------------------------------------------------------
/app/utils/generators.py:
--------------------------------------------------------------------------------
1 | from typing import Any, AsyncGenerator, List
2 | 
3 | 
4 | async def list_to_async_generator(input_list: List[Any]) -> AsyncGenerator[Any, None]:
5 |     """Transform a List to an AsyncGenerator."""
6 |     for i in input_list:
7 |         yield i
8 | 


--------------------------------------------------------------------------------
/app/utils/google.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from functools import lru_cache
 3 | from typing import List, Optional, Sequence, Dict
 4 | 
 5 | from google.cloud.storage import Client
 6 | from google.oauth2 import service_account
 7 | 
 8 | from .aws import get_secret_client
 9 | from ..settings.globals import AWS_GCS_KEY_SECRET_ARN
10 | 
11 | 
12 | @lru_cache(maxsize=1)
13 | def get_gcs_service_account_auth_info() -> Dict[str, str]:
14 |     secret_client = get_secret_client()
15 |     response = secret_client.get_secret_value(SecretId=AWS_GCS_KEY_SECRET_ARN)
16 |     return json.loads(response["SecretString"])
17 | 
18 | 
19 | def get_prefix_objects(bucket: str, prefix: str, limit: Optional[int] = None) -> List[str]:
20 |     """Get ALL object names under a bucket and prefix in GCS."""
21 | 
22 |     auth_info = get_gcs_service_account_auth_info()
23 |     scopes = [
24 |         "https://www.googleapis.com/auth/devstorage.read_only",
25 |         "https://www.googleapis.com/auth/cloud-platform.read-only",
26 |     ]
27 | 
28 |     account_info = {
29 |         "scopes": scopes,
30 |         **auth_info
31 |     }
32 | 
33 |     service_account_info = service_account.Credentials.from_service_account_info(
34 |         account_info
35 |     )
36 |     client = Client(project=None, credentials=service_account_info)
37 | 
38 |     blobs = client.list_blobs(bucket, prefix=prefix, max_results=limit)
39 |     return [blob.name for blob in blobs]
40 | 
41 | 
42 | def get_gs_files(
43 |     bucket: str,
44 |     prefix: str,
45 |     limit: Optional[int] = None,
46 |     exit_after_max: Optional[int] = None,
47 |     extensions: Sequence[str] = tuple()
48 | ) -> List[str]:
49 |     """Get matching object names under a bucket and prefix in GCS."""
50 | 
51 |     matches: List[str] = list()
52 |     num_matches: int = 0
53 | 
54 |     for blob_name in get_prefix_objects(bucket, prefix, limit):
55 |         if not extensions or any(blob_name.endswith(ext) for ext in extensions):
56 |             matches.append(f"/vsigs/{bucket}/{blob_name}")
57 |             num_matches += 1
58 |             if exit_after_max and num_matches >= exit_after_max:
59 |                 return matches
60 | 
61 |     return matches
62 | 


--------------------------------------------------------------------------------
/app/utils/tile_cache.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from uuid import UUID
 3 | 
 4 | from botocore.exceptions import ClientError
 5 | from fastapi.logger import logger
 6 | 
 7 | from ..crud import assets
 8 | from ..models.enum.change_log import ChangeLogStatus
 9 | from ..models.pydantic.change_log import ChangeLog
10 | from ..settings.globals import TILE_CACHE_CLUSTER, TILE_CACHE_SERVICE
11 | from ..tasks.aws_tasks import update_ecs_service
12 | 
13 | 
14 | async def redeploy_tile_cache_service(asset_id: UUID) -> None:
15 |     """Redeploy Tile cache service to make sure dynamic tile cache is
16 |     recognized."""
17 |     try:
18 |         update_ecs_service(TILE_CACHE_CLUSTER, TILE_CACHE_SERVICE)
19 |         ecs_change_log = ChangeLog(
20 |             date_time=datetime.now(),
21 |             status=ChangeLogStatus.success,
22 |             message="Redeployed Tile Cache Service",
23 |         )
24 |     except ClientError as e:
25 |         # Let's don't make this a blocker but make sure it gets logged in case something goes wrong
26 |         logger.exception(str(e))
27 |         ecs_change_log = ChangeLog(
28 |             date_time=datetime.now(),
29 |             status=ChangeLogStatus.failed,
30 |             message="Failed to redeploy Tile Cache Service",
31 |             detail=str(e),
32 |         )
33 |     await assets.update_asset(asset_id, change_log=[ecs_change_log.dict(by_alias=True)])
34 | 


--------------------------------------------------------------------------------
/batch/.dockerignore:
--------------------------------------------------------------------------------
 1 | # IDE Fragments
 2 | /.vscode
 3 | *__pycache__*
 4 | *.idea*
 5 | 
 6 | #MyPy
 7 | .mypy_cache
 8 | 
 9 | # GIT
10 | .git
11 | 
12 | # Docker Files
13 | docker-compose.dev.yml
14 | docker-compose.test.yml
15 | 
16 | # Ignore Files
17 | .gitignore
18 | 
19 | # Terraform
20 | *terraform*
21 | 
22 | # Virtual Environments
23 | .venv


--------------------------------------------------------------------------------
/batch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/batch/__init__.py


--------------------------------------------------------------------------------
/batch/pixetl.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM globalforestwatch/pixetl:v1.7.7
 2 | 
 3 | # Copy scripts
 4 | COPY ./batch/scripts/ /opt/scripts/
 5 | COPY ./batch/python/ /opt/python/
 6 | 
 7 | RUN ln -f -s /usr/bin/python3 /usr/bin/python
 8 | 
 9 | # make sure scripts are executable
10 | RUN chmod +x -R /opt/scripts/
11 | RUN chmod +x -R /opt/python/
12 | 
13 | ENV PATH="/opt/scripts:${PATH}"
14 | ENV PATH="/opt/python:${PATH}"
15 | 
16 | WORKDIR /tmp
17 | 
18 | ENV LC_ALL=C.UTF-8
19 | ENV LANG=C.UTF-8
20 | 
21 | ENTRYPOINT ["/opt/scripts/report_status.sh"]
22 | 


--------------------------------------------------------------------------------
/batch/python/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/batch/python/__init__.py


--------------------------------------------------------------------------------
/batch/python/adjust_num_processes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | 
 5 | import boto3
 6 | 
 7 | OOM_ERROR = "OutOfMemoryError: Container killed due to memory usage"
 8 | 
 9 | 
10 | def calc_num_processes(job_id: str, original_num_proc, batch_client):
11 |     jobs_desc = batch_client.describe_jobs(jobs=[job_id])
12 | 
13 |     new_num_proc = original_num_proc
14 | 
15 |     # For each previous attempt resulting in OOM, divide NUM_PROCESSES by 2
16 |     for attempt in jobs_desc["jobs"][0]["attempts"]:
17 |         if (
18 |             attempt["container"].get("exitCode") == 137
19 |             or attempt["container"].get("reason") == OOM_ERROR
20 |         ):
21 |             new_num_proc = max(1, int(new_num_proc / 2))
22 | 
23 |     return new_num_proc
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     job_id = os.getenv("AWS_BATCH_JOB_ID")
28 |     if job_id is None:
29 |         raise ValueError("No AWS Batch Job ID found")
30 |     original_num_proc = os.getenv("NUM_PROCESSES", os.getenv("CORES", os.cpu_count()))
31 |     if original_num_proc is None:
32 |         raise ValueError("Neither number of processes nor number of cores are set")
33 |     else:
34 |         original_num_proc = int(original_num_proc)
35 | 
36 |     batch_client = boto3.client("batch", region_name=os.getenv("AWS_REGION"))
37 | 
38 |     print(calc_num_processes(job_id, original_num_proc, batch_client))
39 | 


--------------------------------------------------------------------------------
/batch/python/aws_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import List, Sequence, Tuple, Dict, Any
 3 | 
 4 | import boto3
 5 | 
 6 | AWS_REGION = os.environ.get("AWS_REGION")
 7 | AWS_ENDPOINT_URL = os.environ.get("ENDPOINT_URL")  # For boto
 8 | 
 9 | 
10 | def get_s3_client(aws_region=AWS_REGION, endpoint_url=AWS_ENDPOINT_URL):
11 |     return boto3.client("s3", region_name=aws_region, endpoint_url=endpoint_url)
12 | 
13 | 
14 | def get_s3_path_parts(s3url) -> Tuple[str, str]:
15 |     """Splits an S3 URL into bucket and key."""
16 |     just_path = s3url.split("s3://")[1]
17 |     bucket = just_path.split("/")[0]
18 |     key = "/".join(just_path.split("/")[1:])
19 |     return bucket, key
20 | 
21 | 
22 | def exists_in_s3(target_bucket, target_key):
23 |     """Returns whether or not target_key exists in target_bucket."""
24 |     s3_client = get_s3_client()
25 |     response = s3_client.list_objects_v2(
26 |         Bucket=target_bucket,
27 |         Prefix=target_key,
28 |     )
29 |     for obj in response.get("Contents", []):
30 |         if obj["Key"] == target_key:
31 |             return obj["Size"] > 0
32 | 
33 | 
34 | def get_aws_files(
35 |     bucket: str, prefix: str, extensions: Sequence[str] = (".tif",)
36 | ) -> List[str]:
37 |     """Get all matching files in S3."""
38 |     files: List[str] = list()
39 | 
40 |     s3_client = get_s3_client()
41 |     paginator = s3_client.get_paginator("list_objects_v2")
42 | 
43 |     print("get_aws_files")
44 |     for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
45 |         try:
46 |             contents = page["Contents"]
47 |         except KeyError:
48 |             break
49 | 
50 |         for obj in contents:
51 |             key = str(obj["Key"])
52 |             if any(key.endswith(ext) for ext in extensions):
53 |                 files.append(f"s3://{bucket}/{key}")
54 | 
55 |     print("done get_aws_files")
56 |     return files
57 | 
58 | 
59 | def upload_s3(path: str, bucket: str, dst: str) -> Dict[str, Any]:
60 |     s3_client = get_s3_client()
61 |     return s3_client.upload_file(path, bucket, dst)
62 | 


--------------------------------------------------------------------------------
/batch/python/check_csv.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import sys
 3 | from typing import Type
 4 | from urllib.parse import urlparse
 5 | 
 6 | import boto3
 7 | from logger import get_logger
 8 | 
 9 | LOGGER = get_logger(__name__)
10 | 
11 | s3_uri = sys.argv[1]
12 | s3 = boto3.client("s3", region_name="us-east-1")
13 | o = urlparse(s3_uri, allow_fragments=False)
14 | bucket = o.netloc
15 | key = o.path.lstrip("/")
16 | 
17 | bytes_range = "bytes=0-4096"
18 | response = s3.get_object(Bucket=bucket, Key=key, Range=bytes_range)
19 | data = response["Body"].read().decode("utf-8")
20 | 
21 | try:
22 |     dialect: Type[csv.Dialect] = csv.Sniffer().sniff(data)
23 |     # TODO: verify if dialect is correct (delimiter etc)
24 | except csv.Error:
25 |     raise TypeError("Not a valid CSV file")
26 | 
27 | LOGGER.debug(dialect.delimiter)
28 | 


--------------------------------------------------------------------------------
/batch/python/check_raster.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import boto3
 4 | import rasterio
 5 | from logger import get_logger
 6 | 
 7 | LOGGER = get_logger(__name__)
 8 | 
 9 | s3_uri = sys.argv[1]
10 | zipped = sys.argv[2]
11 | s3 = boto3.client("s3", region_name="us-east-1")
12 | 
13 | if zipped:
14 |     s3_uri = f"zip+{s3_uri}"
15 | 
16 | with rasterio.open(s3_uri) as src:
17 |     driver = src.driver
18 |     LOGGER.debug(driver)
19 | 


--------------------------------------------------------------------------------
/batch/python/check_vector.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import boto3
 4 | import fiona
 5 | from logger import get_logger
 6 | 
 7 | LOGGER = get_logger(__name__)
 8 | 
 9 | s3_uri = sys.argv[1]
10 | zipped = sys.argv[2]
11 | s3 = boto3.client("s3", region_name="us-east-1")
12 | 
13 | if zipped:
14 |     s3_uri = f"zip+{s3_uri}"
15 | 
16 | with fiona.open(s3_uri) as src:
17 |     driver = src.driver
18 |     LOGGER.debug(driver)
19 | 


--------------------------------------------------------------------------------
/batch/python/errors.py:
--------------------------------------------------------------------------------
1 | class GDALError(Exception):
2 |     pass
3 | 
4 | 
5 | class SubprocessKilledError(Exception):
6 |     pass
7 | 


--------------------------------------------------------------------------------
/batch/python/extract_geometries.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import csv
 4 | from typing import Dict, List, Optional, Type, Union
 5 | 
 6 | import click
 7 | import pandas
 8 | from shapely import wkb
 9 | from shapely.geometry import (
10 |     GeometryCollection,
11 |     MultiLineString,
12 |     MultiPoint,
13 |     MultiPolygon,
14 | )
15 | from shapely.geometry.base import BaseGeometry
16 | 
17 | MultiGeometry = Union[MultiPolygon, MultiLineString, MultiPoint]
18 | 
19 | 
20 | @click.command()
21 | @click.argument("input_file", type=click.Path(exists=True))
22 | @click.argument("output_file", required=False)
23 | @click.option("--delimiter", default="\t", help="Delimiter")
24 | def cli(input_file: str, output_file: Optional[str], delimiter: str) -> None:
25 | 
26 |     if not output_file:
27 |         output_file = input_file
28 | 
29 |     df = pandas.read_csv(input_file, delimiter=delimiter, header=0)
30 |     df["geom"] = df["geom"].map(lambda x: extract(wkb.loads(x, hex=True)))
31 | 
32 |     df.to_csv(
33 |         output_file,
34 |         sep=delimiter,
35 |         header=True,
36 |         index=False,
37 |         quoting=csv.QUOTE_MINIMAL,
38 |         quotechar='"',
39 |     )
40 | 
41 | 
42 | def extract(
43 |     geometry: Union[BaseGeometry, GeometryCollection], geom_type: str = "Polygon"
44 | ) -> BaseGeometry:
45 |     new_geometry_type: Dict[str, Type[MultiGeometry]] = {
46 |         "Polygon": MultiPolygon,
47 |         "LineString": MultiLineString,
48 |         "Point": MultiPoint,
49 |     }
50 | 
51 |     if geometry.geometryType() == "GeometryCollection":
52 |         geom_buffer: List[BaseGeometry] = list()
53 |         for geom in geometry.geoms:
54 |             if geom.geometryType() == geom_type:
55 |                 geom_buffer.append(geom)
56 |             elif geom.geometryType() == f"Multi{geom_type}":
57 |                 for g in geom.geoms:
58 |                     geom_buffer.append(g)
59 |         new_geom: MultiGeometry = new_geometry_type[geom_type](geom_buffer)
60 |         return new_geom.wkb_hex
61 |     else:
62 |         return geometry.wkb_hex
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     cli()
67 | 


--------------------------------------------------------------------------------
/batch/python/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | ENV: str = os.environ.get("ENV", "dev")
 5 | 
 6 | 
 7 | def get_logger(name):
 8 |     formatter = logging.Formatter(
 9 |         "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
10 |     )
11 | 
12 |     sh = logging.StreamHandler()
13 |     sh.setFormatter(formatter)
14 | 
15 |     logger = logging.getLogger(name)
16 |     logger.addHandler(sh)
17 |     if ENV != "production":
18 |         logger.setLevel(logging.DEBUG)
19 |     else:
20 |         logger.setLevel(logging.INFO)
21 |     return logger
22 | 


--------------------------------------------------------------------------------
/batch/python/logging_utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | from logging.handlers import QueueHandler
 4 | 
 5 | 
 6 | def listener_configurer():
 7 |     """Run this in the parent process to configure logger."""
 8 |     root = logging.getLogger()
 9 |     h = logging.StreamHandler(stream=sys.stdout)
10 |     root.addHandler(h)
11 | 
12 | 
13 | def log_listener(queue, configurer):
14 |     """Run this in the parent process to listen for log messages from
15 |     children."""
16 |     configurer()
17 |     while True:
18 |         try:
19 |             record = queue.get()
20 |             if (
21 |                 record is None
22 |             ):  # We send this as a sentinel to tell the listener to quit.
23 |                 break
24 |             logger = logging.getLogger(record.name)
25 |             logger.handle(record)  # No level or filter logic applied - just do it!
26 |         except Exception:
27 |             import traceback
28 | 
29 |             print("Encountered a problem in the log listener!", file=sys.stderr)
30 |             traceback.print_exc(file=sys.stderr)
31 |             raise
32 | 
33 | 
34 | def log_client_configurer(queue):
35 |     """Run this in child processes to configure sending logs to parent."""
36 |     h = QueueHandler(queue)
37 |     root = logging.getLogger()
38 |     root.addHandler(h)
39 |     root.setLevel(logging.INFO)
40 | 


--------------------------------------------------------------------------------
/batch/scripts/_add_gfw_fields_sql.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -u
 3 | 
 4 | # This script is meant to be sourced by another shell script, and all it
 5 | # does is compose a SQL snippet and set a variable to it. Note that it
 6 | # requires the environment variables used below to be set, and exits with
 7 | # an error if one is not (thanks to the set -u).
 8 | 
 9 | ADD_GFW_FIELDS_SQL="
10 |   ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN ${GEOMETRY_NAME}_wm geometry(${GEOMETRY_TYPE},3857);
11 |   ALTER TABLE ${TABLE_MISSING_COLUMNS} ALTER COLUMN ${GEOMETRY_NAME}_wm SET STORAGE EXTERNAL;
12 |   ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN gfw_area__ha NUMERIC;
13 |   ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN gfw_geostore_id UUID;
14 |   ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN gfw_geojson TEXT COLLATE pg_catalog.\"default\";
15 |   ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN gfw_bbox NUMERIC[];
16 |   ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN created_on timestamp without time zone DEFAULT now();
17 |   ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN updated_on timestamp without time zone DEFAULT now();"


--------------------------------------------------------------------------------
/batch/scripts/_add_point_geometry_fields_sql.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -u
 3 | 
 4 | # This script is meant to be sourced by another shell script, and all it
 5 | # does is compose a SQL snippet and assign it to a variable. Note that it
 6 | # requires the environment variables used below to be set, and exits with
 7 | # an error if one is not (thanks to the set -u).
 8 | 
 9 | ADD_POINT_GEOMETRY_FIELDS_SQL="
10 |   ALTER TABLE
11 |     \"$TEMP_TABLE\"
12 |   ADD COLUMN
13 |     ${GEOMETRY_NAME} geometry(Point,4326);
14 | 
15 |   ALTER TABLE
16 |     \"$TEMP_TABLE\"
17 |   ADD COLUMN
18 |     ${GEOMETRY_NAME}_wm geometry(Point,3857);"


--------------------------------------------------------------------------------
/batch/scripts/_fill_gfw_fields_sql.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -u
 3 | 
 4 | # This script is meant to be sourced by another shell script, and all it
 5 | # does is compose a SQL snippet and set a variable to it. Note that it
 6 | # requires the environment variables used below to be set, and exits with
 7 | # an error if one is not (thanks to the set -u).
 8 | 
 9 | FILL_GFW_FIELDS_SQL="
10 |   UPDATE
11 |     $TABLE_MISSING_COLUMNS
12 |   SET
13 |     gfw_area__ha = ST_Area($GEOMETRY_NAME::geography)/10000,
14 |     gfw_geostore_id = md5(ST_asgeojson($GEOMETRY_NAME))::uuid,
15 |     gfw_geojson = ST_asGeojson($GEOMETRY_NAME),
16 |     gfw_bbox = ARRAY[
17 |       ST_XMin(ST_Envelope($GEOMETRY_NAME)::geometry),
18 |       ST_YMin(ST_Envelope($GEOMETRY_NAME)::geometry),
19 |       ST_XMax(ST_Envelope($GEOMETRY_NAME)::geometry),
20 |       ST_YMax(ST_Envelope($GEOMETRY_NAME)::geometry)
21 |     ]::NUMERIC[]"


--------------------------------------------------------------------------------
/batch/scripts/_fill_point_geometry_fields_sql.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -u
 3 | 
 4 | # This script is meant to be sourced by another shell script, and all it
 5 | # does is compose a SQL snippet and assign it to a variable. Note that it
 6 | # requires the environment variables used below to be set, and exits with
 7 | # an error if one is not (thanks to the set -u).
 8 | 
 9 | FILL_POINT_GEOMETRY_FIELDS_SQL="
10 |   UPDATE
11 |     \"$TEMP_TABLE\"
12 |   SET
13 |     ${GEOMETRY_NAME} = ST_SetSRID(ST_MakePoint($LNG, $LAT),4326),
14 |     ${GEOMETRY_NAME}_wm = ST_Transform(ST_SetSRID(ST_MakePoint($LNG, $LAT),4326), 3857)
15 |   WHERE
16 |     ${GEOMETRY_NAME} IS null OR ${GEOMETRY_NAME}_wm IS null;"


--------------------------------------------------------------------------------
/batch/scripts/_get_geometry_type_sql.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -u
 3 | 
 4 | # This script is meant to be sourced by another shell script, and all it
 5 | # does is compose a SQL snippet and set a variable to it. Note that it
 6 | # requires the environment variables used below to be set, and exits with
 7 | # an error if one is not (thanks to the set -u).
 8 | 
 9 | GEOMETRY_TYPE_SQL="
10 |   SELECT type
11 |   FROM geometry_columns
12 |   WHERE f_table_schema = '${DATASET}'
13 |     AND f_table_name = '${VERSION}'
14 |     AND f_geometry_column = '${GEOMETRY_NAME}';"


--------------------------------------------------------------------------------
/batch/scripts/_tiff_crosses_dateline.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # USAGE: _tiff_crosses_dateline.sh raster_file
 4 | #
 5 | # Prints the string "true" if the input raster will cross the dateline
 6 | # when converting to EPSG:4326, "false" otherwise
 7 | #
 8 | # Needs GDAL 2.0+ and Python
 9 | #
10 | # Credit: Slightly modified from https://gis.stackexchange.com/a/222341
11 | 
12 | 
13 | if [ -z "${1}" ]; then
14 |     echo -e "Error: No input raster file given.\n> USAGE: _tiff_crosses_dateline.sh raster_file"
15 |     exit 1
16 | fi
17 | 
18 | # Get raster info, save it to a variable as we need it several times
19 | gdalinfo=$(gdalinfo "${1}" -json)
20 | 
21 | # Exit if -json switch is not available
22 | if [ ! -z $(echo $gdalinfo | grep "^Usage:") ]; then
23 |     echo -e "Error: GDAL command failed, Version 2.0+ is needed"
24 |     exit 1
25 | fi
26 | 
27 | function jsonq {
28 |     echo "${1}" | python -c "import json,sys; jdata = sys.stdin.read(); data = json.loads(jdata); print(data${2});"
29 | }
30 | 
31 | ulx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][0][0]")
32 | llx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][1][0]")
33 | lrx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][3][0]")
34 | urx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][2][0]")
35 | 
36 | crossing_dateline=false
37 | test $(python -c "print(${ulx}>${lrx})") = True && crossing_dateline=true
38 | test $(python -c "print(${llx}>${urx})") = True && crossing_dateline=true
39 | 
40 | echo -n "${crossing_dateline}"


--------------------------------------------------------------------------------
/batch/scripts/_warp_and_upload.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # arguments:
 6 | # $0 - The name of this script
 7 | # $1 - local_src_file
 8 | # $2 - local_warped_file
 9 | # $3 - target_crs
10 | # $4 - remote target file
11 | 
12 | if aws s3 ls "$4"; then
13 |   echo "Remote target file $4 already exists, skipping..."
14 |   exit 0
15 | fi
16 | 
17 | warp_options=("-co" "COMPRESS=DEFLATE" "-co" "TILED=yes")
18 | 
19 | echo "Seeing if TIFF crosses the dateline"
20 | crosses="$(_tiff_crosses_dateline.sh $1)"
21 | if [ "${crosses}" = "true" ]; then
22 |   echo "$1 crosses the dateline"
23 |   warp_options+=("--config" "CENTER_LONG" "180")
24 | else
25 |   echo "$1 does not cross the dateline"
26 | fi
27 | 
28 | echo "Now warping $1 to $2"
29 | gdalwarp "$1" "$2" -t_srs "$3" "${warp_options[@]}"
30 | echo "Done warping $1 to $2"
31 | 
32 | echo "Now uploading $2 to $4"
33 | aws s3 cp --no-progress "$2" "$4"
34 | echo "Done uploading $2 to $4"
35 | 
36 | echo "Finally, deleting local files $1 and $2"
37 | rm "$1" "$2"
38 | echo "Done deleting local files $1 and $2"
39 | 


--------------------------------------------------------------------------------
/batch/scripts/add_gfw_fields.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | 
 9 | # optional arguments
10 | # -g | --geometry_name (get_arguments.sh specifies default)
11 | 
12 | ME=$(basename "$0")
13 | . get_arguments.sh "$@"
14 | 
15 | set -u
16 | 
17 | # Add GFW-specific columns to the new table
18 | TABLE_MISSING_COLUMNS="\"$DATASET\".\"$VERSION\""
19 | 
20 | # Get geometry type of the new table
21 | # GEOMETRY_TYPE_SQL is defined by sourcing _get_geometry_type_sql.sh
22 | # It contains the SQL snippet we'll pass to the psql client command
23 | . _get_geometry_type_sql.sh
24 | 
25 | # Get the geometry type of the new table
26 | GEOMETRY_TYPE=$(psql -X -A -t -c "${GEOMETRY_TYPE_SQL}")
27 | 
28 | # ADD_GFW_FIELDS_SQL is defined by sourcing _add_gfw_fields_sql.sh
29 | # It contains the SQL snippet we'll pass to the psql client command
30 | . _add_gfw_fields_sql.sh
31 | 
32 | echo "PSQL: ALTER TABLE $TABLE_MISSING_COLUMNS. Add GFW columns"
33 | psql -c "$ADD_GFW_FIELDS_SQL"
34 | 
35 | # Set gfw_geostore_id not NULL to be compliant with GEOSTORE
36 | echo "PSQL: ALTER TABLE \"$DATASET\".\"$VERSION\". ALTER COLUMN gfw_geostore_id SET NOT NULL"
37 | psql -c "ALTER TABLE \"$DATASET\".\"$VERSION\" ALTER COLUMN gfw_geostore_id SET NOT NULL;"


--------------------------------------------------------------------------------
/batch/scripts/add_point_geometry_fields.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | 
 9 | # optional arguments
10 | # -g | --geometry_name (get_arguments.sh specifies default)
11 | 
12 | ME=$(basename "$0")
13 | . get_arguments.sh "$@"
14 | 
15 | # Add point geometry fields
16 | echo "PSQL: ALTER TABLE \"$DATASET\".\"$VERSION\". Add Point columns"
17 | psql -c "ALTER TABLE \"$DATASET\".\"$VERSION\" ADD COLUMN ${GEOMETRY_NAME} geometry(Point,4326);
18 |          ALTER TABLE \"$DATASET\".\"$VERSION\" ADD COLUMN ${GEOMETRY_NAME}_wm geometry(Point,3857);"


--------------------------------------------------------------------------------
/batch/scripts/apply_colormap.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -j | --json
 9 | # -n | --no_data
10 | # -s | --source
11 | # -T | --target
12 | 
13 | ME=$(basename "$0")
14 | . get_arguments.sh "$@"
15 | 
16 | echo "Apply symbology and upload RGB asset to S3"
17 | 
18 | # Build an array of arguments to pass to apply_symbology.py
19 | ARG_ARRAY=("--dataset" "${DATASET}" "--version" "${VERSION}")
20 | 
21 | ARG_ARRAY+=("--symbology" "${JSON}")
22 | 
23 | ARG_ARRAY+=("--no-data" "${NO_DATA}")
24 | 
25 | ARG_ARRAY+=("--source-uri" "${SRC}")
26 | 
27 | ARG_ARRAY+=("--target-prefix" "${TARGET}")
28 | 
29 | apply_colormap.py "${ARG_ARRAY[@]}"


--------------------------------------------------------------------------------
/batch/scripts/clip_and_reproject_geom.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | 
 9 | # optional arguments
10 | # -g | --geometry_name (get_arguments.sh specifies default)
11 | 
12 | ME=$(basename "$0")
13 | . get_arguments.sh "$@"
14 | 
15 | set -u
16 | 
17 | # Transform to web mercator (WM) in two steps to isolate the more involved
18 | # one for polygons that overflow WM lat bounds of -85/85 degrees
19 | 
20 | # Reproject all polygons within WM bounds
21 | psql -c "
22 |   UPDATE
23 |     \"$DATASET\".\"$VERSION\"
24 |   SET
25 |     ${GEOMETRY_NAME}_wm = ST_Multi(ST_Transform(ST_Force2D($GEOMETRY_NAME), 3857))
26 |   WHERE
27 |     ${GEOMETRY_NAME}_wm IS NULL
28 |   AND
29 |     ST_Within($GEOMETRY_NAME, ST_MakeEnvelope(-180, -85, 180, 85, 4326));"
30 | 
31 | # For all polygons outside of WM bounds, clip then reproject to WM
32 | psql -c "
33 |   UPDATE
34 |     \"$DATASET\".\"$VERSION\"
35 |   SET
36 |     ${GEOMETRY_NAME}_wm = ST_Multi(ST_Transform(ST_Force2D(ST_Buffer(ST_Intersection($GEOMETRY_NAME, ST_MakeEnvelope(-180, -85, 180, 85, 4326)), 0)), 3857))
37 |   WHERE
38 |     ${GEOMETRY_NAME}_wm IS NULL
39 |   AND
40 |     NOT ST_Within($GEOMETRY_NAME, ST_MakeEnvelope(-180, -85, 180, 85, 4326));"
41 | 
42 | set +u


--------------------------------------------------------------------------------
/batch/scripts/cluster_partitions.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -p | --partition_type
 9 | # -P | --partition_schema
10 | # -x | --index_type
11 | # -C | --column_names
12 | 
13 | ME=$(basename "$0")
14 | . get_arguments.sh "$@"
15 | 
16 | # While it seems unnecessary here to pass the arguments through the get_arguments.sh script
17 | # I prefer to still do it. This way, we have a consistent way to log the env variables and can make sure
18 | # that argument names are used consistently across all tools.
19 | echo "PYTHON: Cluster partitions"
20 | cluster_partitions.py -d "$DATASET" -v "$VERSION" -p "$PARTITION_TYPE" -P "$PARTITION_SCHEMA" -x "$INDEX_TYPE" -C "$COLUMN_NAMES"
21 | 


--------------------------------------------------------------------------------
/batch/scripts/cluster_table.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -C | --column_names
 9 | # -x | --index_type
10 | ME=$(basename "$0")
11 | . get_arguments.sh "$@"
12 | 
13 | COLUMN_NAMES_UNDERSCORED="$(echo "$COLUMN_NAMES" | sed 's/,/_/g' | cut -c 1-63)"
14 | echo "PSQL: CLUSTER \"$DATASET\".\"$VERSION\" USING \"${VERSION}_${COLUMN_NAMES_UNDERSCORED}_${INDEX_TYPE}_idx\""
15 | psql -c "CLUSTER \"$DATASET\".\"$VERSION\" USING \"${VERSION}_${COLUMN_NAMES_UNDERSCORED}_${INDEX_TYPE}_idx\";"


--------------------------------------------------------------------------------
/batch/scripts/create_index.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -c | --column_name
 9 | # -x | --index_type
10 | ME=$(basename "$0")
11 | . get_arguments.sh "$@"
12 | 
13 | COLUMN_NAMES_UNDERSCORED="$(echo "$COLUMN_NAMES" | sed 's/,/_/g' | cut -c 1-63)"
14 | psql -c "CREATE INDEX IF NOT EXISTS \"${VERSION}_${COLUMN_NAMES_UNDERSCORED}_${INDEX_TYPE}_idx\"
15 |      ON \"$DATASET\".\"$VERSION\" USING $INDEX_TYPE
16 |      (${COLUMN_NAMES});"


--------------------------------------------------------------------------------
/batch/scripts/create_partitions.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -p | --partition_type
 9 | # -P | --partition_schema
10 | ME=$(basename "$0")
11 | . get_arguments.sh "$@"
12 | 
13 | # While it seems unnecessary here to pass the arguments through the get_arguments.sh script
14 | # I prefer to still do it. This way, we have a consistent way to log the env variables and can make sure
15 | # that argument names are used consistently across all tools.
16 | echo "PYTHON: Create partitions"
17 | create_partitions.py -d "$DATASET" -v "$VERSION" -p "$PARTITION_TYPE" -P "$PARTITION_SCHEMA"
18 | 


--------------------------------------------------------------------------------
/batch/scripts/create_vector_tile_cache.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -s | --source
 9 | # -Z | --min_zoom
10 | # -z | --max_zoom
11 | # -t | --tile_strategy
12 | # -I | --implementation
13 | 
14 | # optional arguments
15 | # --filter
16 | 
17 | ME=$(basename "$0")
18 | . get_arguments.sh "$@"
19 | 
20 | 
21 | NDJSON_FILE="${DATASET}.json"
22 | 
23 | # Build an array of arguments to pass to tippecanoe
24 | TIPPE_ARG_ARRAY=(
25 |   "-e" "tilecache"
26 |   "-Z${MIN_ZOOM}"
27 |   "-z${MAX_ZOOM}"
28 |   "--preserve-input-order"
29 |   "-P"
30 |   "-n" "${DATASET}"
31 |   "-l" "${DATASET}"
32 | )
33 | 
34 | case ${TILE_STRATEGY} in
35 | discontinuous) # Discontinuous polygon features
36 |   TIPPE_ARG_ARRAY+=("--drop-densest-as-needed" "--extend-zooms-if-still-dropping")
37 |   ;;
38 | continuous) # Continuous polygon features
39 |   TIPPE_ARG_ARRAY+=("--coalesce-densest-as-needed" "--extend-zooms-if-still-dropping")
40 |   ;;
41 | keep_all) # never drop or coalesce feature, ignore size and feature count
42 |   TIPPE_ARG_ARRAY+=("-r1")
43 |   ;;
44 | *)
45 |   echo "Invalid Tile Cache option -${TILE_STRATEGY}"
46 |   exit 1
47 |   ;;
48 | esac
49 | 
50 | if [ -n "${FILTER}" ]; then
51 |   echo "${FILTER}" > feature_filter.txt
52 |   TIPPE_ARG_ARRAY+=("-J" "feature_filter.txt")
53 | fi
54 | 
55 | TIPPE_ARG_ARRAY+=("${NDJSON_FILE}")
56 | 
57 | echo "Fetching NDJSON file from the Data Lake: ${SRC} -> ${NDJSON_FILE}..."
58 | aws s3 cp "${SRC}" "${NDJSON_FILE}" --no-progress
59 | 
60 | echo "Building Tile Cache with Tippecanoe..."
61 | tippecanoe "${TIPPE_ARG_ARRAY[@]}"
62 | 
63 | echo "Uploading tiles to S3 with TilePutty..."
64 | tileputty tilecache --bucket "${TILE_CACHE}" --dataset "${DATASET}" --version "${VERSION}" --implementation "${IMPLEMENTATION}" --cores "${NUM_PROCESSES}"


--------------------------------------------------------------------------------
/batch/scripts/export_1x1_grid.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # required arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -C | --column_names
 9 | # -T | --target
10 | #
11 | # optional arguments
12 | # --include_tile_id
13 | 
14 | ME=$(basename "$0")
15 | . get_arguments.sh "$@"
16 | 
17 | echo "PYTHON: Create 1x1 grid files"
18 | ARG_ARRAY=("--dataset" "${DATASET}"
19 |            "--version" "${VERSION}"
20 |            "-C" "${COLUMN_NAMES}")
21 | 
22 | if [ -n "${INCLUDE_TILE_ID}" ]; then
23 |   ARG_ARRAY+=("--include_tile_id")
24 | fi
25 | export_1x1_grid.py "${ARG_ARRAY[@]}"
26 | 
27 | echo "Combine output files"
28 | echo ./*.tmp | xargs cat >> "${DATASET}_${VERSION}_1x1.tsv"
29 | 
30 | echo "Post-process geometries"
31 | extract_geometries.py "${DATASET}_${VERSION}_1x1.tsv"
32 | 
33 | echo "AWSCLI: upload to data lake"
34 | aws s3 cp "${DATASET}_${VERSION}_1x1.tsv" "$TARGET"
35 | 


--------------------------------------------------------------------------------
/batch/scripts/export_vector_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -f | --local_file
 9 | # -F | --format
10 | # -T | --target
11 | # -w | --where
12 | # -C | --column_names
13 | # -X | --zipped
14 | 
15 | # optional arguments
16 | # -g | --geometry_name (get_arguments.sh specifies default)
17 | # -i | --fid_name (get_arguments.sh specifies default)
18 | 
19 | ME=$(basename "$0")
20 | . get_arguments.sh "$@"
21 | 
22 | echo "OGR2OGR: Export table \"${DATASET}\".\"${VERSION}\" using format ${FORMAT}"
23 | echo "Export columns $COLUMN_NAMES"
24 | ogr2ogr -f "$FORMAT" "$LOCAL_FILE" PG:"password=$PGPASSWORD host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER" \
25 |       -sql "SELECT $COLUMN_NAMES, $GEOMETRY_NAME FROM \"${DATASET}\".\"${VERSION}\" $WHERE" -geomfield "${GEOMETRY_NAME}" \
26 |       -lco FID="$FID_NAME"
27 | 
28 | if [ "${ZIPPED}" == "True" ]; then
29 |   BASE_NAME="${LOCAL_FILE%.*}"
30 |   LOCAL_FILE="${BASE_NAME}.zip"
31 |   find . -name "${BASE_NAME}.*" | zip -@ -j "${LOCAL_FILE}"
32 | fi
33 | 
34 | echo "AWSCLI: COPY DATA FROM $LOCAL_FILE TO $TARGET"
35 | aws s3 cp "$LOCAL_FILE" "$TARGET"
36 | 
37 | echo "Done"


--------------------------------------------------------------------------------
/batch/scripts/inherit_geostore.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | ME=$(basename "$0")
 9 | . get_arguments.sh "$@"
10 | 
11 | # Inherit from geostore
12 | echo "PSQL: ALTER TABLE. Inherit from geostore"
13 | psql -c "ALTER TABLE \"$DATASET\".\"$VERSION\" INHERIT public.geostore;"


--------------------------------------------------------------------------------
/batch/scripts/load_tabular_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -s | --source
 9 | # -D | --delimiter
10 | 
11 | # optional arguments
12 | # --lat
13 | # --lng
14 | # -g | --geometry_name (get_arguments.sh specifies default)
15 | 
16 | ME=$(basename "$0")
17 | . get_arguments.sh "$@"
18 | 
19 | 
20 | # Unescape TAB character
21 | if [ "$DELIMITER" == "\t" ]; then
22 |   DELIMITER=$(echo -e "\t")
23 | fi
24 | 
25 | # I think Postgres temporary tables are such that concurrent jobs won't
26 | # interfere with each other, but make the temp table name unique just
27 | # in case.
28 | UUID=$(python -c 'import uuid; print(uuid.uuid4(), end="")' | sed s/-//g)
29 | TEMP_TABLE="temp_${UUID}"
30 | 
31 | # IF GEOMETRY_NAME, LAT and LNG are defined, set ADD_POINT_GEOMETRY_FIELDS_SQL
32 | # by sourcing _add_point_geometry_fields_sql.sh
33 | # It defines a SQL snippet we'll run later
34 | if [[ -n "${GEOMETRY_NAME:-}" ]] && [[ -n "${LAT:-}" ]] && [[ -n "${LNG:-}" ]]
35 | then
36 |   . _add_point_geometry_fields_sql.sh
37 |   . _fill_point_geometry_fields_sql.sh
38 | else
39 |   ADD_POINT_GEOMETRY_FIELDS_SQL=""
40 |   FILL_POINT_GEOMETRY_FIELDS_SQL=""
41 | fi
42 | 
43 | for uri in "${SRC[@]}"; do
44 | # https://stackoverflow.com/questions/48019381/how-postgresql-copy-to-stdin-with-csv-do-on-conflic-do-update
45 |   aws s3 cp "${uri}" - | psql -c "BEGIN;
46 |     CREATE TEMP TABLE \"$TEMP_TABLE\"
47 |     (LIKE \"$DATASET\".\"$VERSION\" INCLUDING DEFAULTS)
48 |     ON COMMIT DROP;
49 | 
50 |     ALTER TABLE \"$TEMP_TABLE\" DROP COLUMN IF EXISTS ${GEOMETRY_NAME};
51 |     ALTER TABLE \"$TEMP_TABLE\" DROP COLUMN IF EXISTS ${GEOMETRY_NAME}_wm;
52 | 
53 |     COPY \"$TEMP_TABLE\" FROM STDIN WITH (FORMAT CSV, DELIMITER '$DELIMITER', HEADER);
54 | 
55 |     $ADD_POINT_GEOMETRY_FIELDS_SQL
56 |     $FILL_POINT_GEOMETRY_FIELDS_SQL
57 | 
58 |     INSERT INTO \"$DATASET\".\"$VERSION\"
59 |     SELECT * FROM \"$TEMP_TABLE\"
60 |     ON CONFLICT DO NOTHING;
61 | 
62 |     COMMIT;"
63 | done


--------------------------------------------------------------------------------
/batch/scripts/raster_tile_cache.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -I | --implementation
 9 | # --skip
10 | # --target_bucket
11 | # --zoom_level
12 | 
13 | # and positional arguments
14 | # asset_prefix
15 | 
16 | ME=$(basename "$0")
17 | . get_arguments.sh "$@"
18 | 
19 | ARG_ARRAY=("--dataset" "${DATASET}"
20 |            "--version" "${VERSION}"
21 |            "--implementation" "${IMPLEMENTATION}"
22 |            "--target-bucket" "${TARGET_BUCKET}"
23 |            "--zoom-level" "${ZOOM_LEVEL}"
24 |            "--bit-depth" "${BIT_DEPTH}")
25 | 
26 | if [ -n "${SKIP}" ]; then
27 |   ARG_ARRAY+=("--skip_empty_tiles")
28 | fi
29 | 
30 | echo "Generate raster tile cache with GDAL2Tiles and upload to target S3 bucket"
31 | 
32 | raster_tile_cache.py  "${ARG_ARRAY[@]}" "${POSITIONAL[@]}"
33 | 


--------------------------------------------------------------------------------
/batch/scripts/resample.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -s | --source
 9 | # -r | --resampling_method
10 | # --zoom_level
11 | # -T | --target
12 | 
13 | ME=$(basename "$0")
14 | . get_arguments.sh "$@"
15 | 
16 | echo "Reproject to WM and resample"
17 | 
18 | # Build an array of arguments to pass to resample.py
19 | ARG_ARRAY=("--dataset" "${DATASET}" "--version" "${VERSION}")
20 | 
21 | ARG_ARRAY+=("--source-uri" "${SRC}")
22 | 
23 | ARG_ARRAY+=("--resampling-method" "${RESAMPLE}")
24 | 
25 | ARG_ARRAY+=("--target-zoom" "${ZOOM_LEVEL}")
26 | 
27 | ARG_ARRAY+=("--target-prefix" "${TARGET}")
28 | 
29 | # Run resample.py with the array of arguments
30 | resample.py "${ARG_ARRAY[@]}"


--------------------------------------------------------------------------------
/batch/scripts/run_pixetl.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -j | --json
 9 | 
10 | # optional arguments
11 | # --subset
12 | # --overwrite
13 | 
14 | ME=$(basename "$0")
15 | . get_arguments.sh "$@"
16 | 
17 | # in get_arguments.sh we call pushd to jump into the batchID subfolder
18 | # pixETL expects /tmp as workdir and will make attempt to create subfolder itself
19 | popd
20 | 
21 | echo "Build Raster Tile Set and upload to S3"
22 | 
23 | # Build an array of arguments to pass to pixetl
24 | ARG_ARRAY=("--dataset" "${DATASET}" "--version" "${VERSION}")
25 | 
26 | if [ -n "${OVERWRITE}" ]; then
27 |   ARG_ARRAY+=("--overwrite")
28 | fi
29 | 
30 | if [ -n "${SUBSET}" ]; then
31 |   ARG_ARRAY+=("--subset")
32 |   ARG_ARRAY+=("${SUBSET}")
33 | fi
34 | 
35 | ARG_ARRAY+=("${JSON}")
36 | 
37 | # Run pixetl with the array of arguments
38 | pixetl "${ARG_ARRAY[@]}"


--------------------------------------------------------------------------------
/batch/scripts/test_mock_s3_awscli.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #set -e
 4 | 
 5 | # requires arguments
 6 | # -s | --source
 7 | ME=$(basename "$0")
 8 | . get_arguments.sh "$@"
 9 | 
10 | echo "AWSCLI: COPY DATA FROM S3 to STDOUT"
11 | aws s3 cp "${SRC}" -
12 | 


--------------------------------------------------------------------------------
/batch/scripts/test_mock_s3_ogr2ogr.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # -s | --source
 9 | # -l | --source_layer
10 | # -f | --local_file
11 | ME=$(basename "$0")
12 | . get_arguments.sh "$@"
13 | 
14 | echo "AWSCLI: COPY DATA FROM S3 to STDOUT"
15 | # shellcheck disable=SC2086
16 | aws s3 cp "$SRC" "$LOCAL_FILE"
17 | 
18 | echo "OGR2OGR: Import ${DATASET}.${VERSION} from ${LOCAL_FILE} ${SRC_LAYER}"
19 | # Create schema only, using ogr2ogr
20 | ogr2ogr -f "PostgreSQL" PG:"password=$PGPASSWORD host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER" \
21 |      "$LOCAL_FILE" "$SRC_LAYER" \
22 |      -nlt PROMOTE_TO_MULTI -nln "$VERSION" \
23 |      -lco SCHEMA="$DATASET" -lco GEOMETRY_NAME="$GEOMETRY_NAME" -lco SPATIAL_INDEX=NONE -lco FID="$FID_NAME" \
24 |      -t_srs EPSG:4326 -limit 0
25 | 


--------------------------------------------------------------------------------
/batch/scripts/tmp/create_1x1_grid.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | psql -c "
 6 | CREATE MATERIALIZED VIEW $DATASET.{$VERSION}__1x1
 7 | 	WITH a AS (
 8 | 			SELECT $FID_NAME
 9 | 				,gfw_grid_1x1_id
10 | 				,gfw_grid_10x10_id
11 | 				,st_makevalid(st_intersection(w.$GEOMETRY_NAME, g.geom)) AS $GEOMETRY_NAME
12 | 			FROM $DATASET.$VERSION w
13 | 				,gfw_grid_1x1 g
14 | 			WHERE st_intersects(w.$GEOMETRY_NAME, g.geom)
15 | 			)
16 | 		,b AS (
17 | 			SELECT $FID_NAME
18 | 				,gfw_grid_1x1_id
19 | 				,gfw_grid_10x10_id
20 | 				,CASE
21 | 					WHEN st_geometrytype($GEOMETRY_NAME) = 'ST_GeometryCollection'::TEXT
22 | 						THEN st_collectionextract($GEOMETRY_NAME, 3)
23 | 					ELSE $GEOMETRY_NAME
24 | 					END AS $GEOMETRY_NAME
25 | 			FROM a
26 | 			)
27 | 
28 | SELECT $FID_NAME
29 | 	,gfw_grid_1x1_id
30 | 	,gfw_grid_10x10_id
31 | 	,$GEOMETRY_NAME
32 | FROM b
33 | WHERE st_geometrytype($GEOMETRY_NAME) = 'ST_Polygon'
34 | 	OR st_geometrytype($GEOMETRY_NAME) = 'ST_MultiPolygon'
35 | GROUP BY $FID_NAME;"
36 | 
37 | 
38 | # Create indices
39 | psql -c "CREATE INDEX IF NOT EXISTS ${VERSION}__1x1_${GEOMETRY_NAME}_id_idx
40 |      ON $DATASET.${VERSION}__1x1 USING gist
41 |      (${GEOMETRY_NAME});"


--------------------------------------------------------------------------------
/batch/scripts/tmp/export_vector_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | 
5 | echo "OGR2OGR: Export ${DATASET}.${VERSION} to ${DST} using driver ${DRIVER}"
6 | ogr2ogr -f "${DRIVER}" "${DST}" \
7 |         PG:"password=$PGPASSWORD host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER" \
8 |         -sql "select ${COLUMNS} from ${DATASET}.${VERSION}"
9 | 


--------------------------------------------------------------------------------
/batch/scripts/tmp/import_vector_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | echo "OGR2OGR: Import ${DATASET}.${VERSION} from ${SRC} ${SRC_LAYER}"
 6 | ogr2ogr -f "PostgreSQL" PG:"password=$PGPASSWORD host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER" \
 7 |      "$SRC" "$SRC_LAYER" \
 8 |      -nlt PROMOTE_TO_MULTI -nln "$VERSION" \
 9 |      -lco SCHEMA="$DATASET" -lco GEOMETRY_NAME="$GEOMETRY_NAME" -lco SPATIAL_INDEX=NONE -lco FID="$FID_NAME" \
10 |      -t_srs EPSG:4326 -limit 0
11 | #     --config PG_USE_COPY YES -makevalid
12 | 
13 | 
14 | echo "PSQL: Add GFW specific layers"
15 | psql -c "ALTER TABLE $DATASET.$VERSION ADD COLUMN ${GEOMETRY_NAME}_wm geometry(MultiPolygon,3857);
16 |          ALTER TABLE $DATASET.$VERSION ADD COLUMN gfw_area__ha NUMERIC;
17 |          ALTER TABLE $DATASET.$VERSION ADD COLUMN gfw_geostore_id UUID;
18 |          ALTER TABLE $DATASET.$VERSION ADD COLUMN gfw_geojson TEXT;
19 |          ALTER TABLE $DATASET.$VERSION ADD COLUMN gfw_bbox BOX2D;"
20 | 
21 | 
22 | # http://blog.cleverelephant.ca/2018/09/postgis-external-storage.html
23 | echo "PSQL: Set storage to external for faster querying"
24 | psql -c "ALTER TABLE $DATASET.$VERSION ALTER COLUMN $GEOMETRY_NAME SET STORAGE EXTERNAL;
25 |          ALTER TABLE $DATASET.$VERSION ALTER COLUMN ${GEOMETRY_NAME}_wm SET STORAGE EXTERNAL;"
26 | 


--------------------------------------------------------------------------------
/batch/scripts/unify_projection.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -s | --source
 7 | # -T | --target
 8 | #      --target_crs
 9 | 
10 | ME=$(basename "$0")
11 | . get_arguments.sh "$@"
12 | 
13 | echo "Reproject to a common CRS"
14 | 
15 | src_count=0
16 | CMD_ARGS=()
17 | 
18 | for s in "${SRC[@]}"; do
19 |   source_dir="SRC_${src_count}"
20 |   mkdir -p "$source_dir"
21 | 
22 |   echo "Now recursively downloading $s to $source_dir"
23 |   if [[ $s == gs://* ]]; then
24 |     gsutil -m cp -r "$s" "$source_dir"
25 |   elif [[ $s == s3://* ]]; then
26 |     aws s3 cp --recursive --no-progress "$s" "$source_dir"
27 |   fi
28 |   echo "Done downloading $s to $source_dir"
29 | 
30 |   reprojected_dir="REPROJECTED_${src_count}"
31 |   mkdir -p "$reprojected_dir"
32 | 
33 |   cd "${source_dir}"
34 |   for d in $(find . -type d | sed 's/.\///'); do
35 |     mkdir -p "../${reprojected_dir}/${d}"
36 |   done
37 | 
38 |   for f in $(find . -iname "*.tif"| sed 's/.\///'); do
39 |     local_src_file="${source_dir}/${f}"
40 |     local_warped_file="${reprojected_dir}/${f}"
41 |     remote_target_file="${TARGET}/SRC_${src_count}/${f}"
42 | 
43 |     CMD_ARGS+=("${local_src_file}" "${local_warped_file}" "${TARGET_CRS}" "${remote_target_file}")
44 |   done
45 |   cd ..
46 | 
47 |   src_count=$(($src_count+1))
48 | done
49 | 
50 | echo "${CMD_ARGS[@]}" | xargs -n 4 -P 32 _warp_and_upload.sh
51 | 


--------------------------------------------------------------------------------
/batch/scripts/update_point_geometry.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # requires arguments
 6 | # -d | --dataset
 7 | # -v | --version
 8 | # --lat
 9 | # --lng
10 | 
11 | # optional arguments
12 | # -g | --geometry_name (get_arguments.sh specifies default)
13 | 
14 | ME=$(basename "$0")
15 | . get_arguments.sh "$@"
16 | 
17 | # Update GFW columns
18 | echo "PSQL: UPDATE \"$DATASET\".\"$VERSION\". Update Point columns"
19 | psql -c "UPDATE \"$DATASET\".\"$VERSION\" SET ${GEOMETRY_NAME} = ST_SetSRID(ST_MakePoint($LNG, $LAT),4326),
20 |                         ${GEOMETRY_NAME}_wm = ST_Transform(ST_SetSRID(ST_MakePoint($LNG, $LAT),4326), 3857)
21 |                         WHERE ${GEOMETRY_NAME} IS null OR ${GEOMETRY_NAME}_wm IS null;"


--------------------------------------------------------------------------------
/batch/universal_batch.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.3
 2 | LABEL desc="Docker image with ALL THE THINGS for use in Batch by the GFW data API"
 3 | LABEL version="v1.0"
 4 | 
 5 | ENV TIPPECANOE_VERSION=2.75.1
 6 | 
 7 | ENV VENV_DIR="/.venv"
 8 | 
 9 | RUN apt-get update -y \
10 |     && apt-get install --no-install-recommends -y python3 python-dev-is-python3 python3-venv \
11 |         postgresql-client jq curl libsqlite3-dev zlib1g-dev zip libpq-dev build-essential gcc g++ \
12 |     && apt-get clean \
13 |     && rm -rf /var/lib/apt/lists/*
14 | 
15 | # --system-site-packages is needed to copy the GDAL Python libs into the venv
16 | RUN python -m venv ${VENV_DIR} --system-site-packages \
17 |     && . ${VENV_DIR}/bin/activate \
18 |     && python -m ensurepip --upgrade \
19 |     && python -m pip install \
20 |         agate~=1.12.0 \
21 |         asyncpg~=0.30.0 \
22 |         awscli~=1.36.18 \
23 |         awscli-plugin-endpoint~=0.4 \
24 |         boto3~=1.35.77 \
25 |         click~=8.1.7 \
26 |         csvkit~=2.0.1 \
27 |         earthengine-api~=0.1.408 \
28 |         fiona~=1.9.6 \
29 |         gsutil~=5.31 \
30 |         numpy~=1.26.4 \
31 |         pandas~=2.1.4 \
32 |         psycopg2~=2.9.10 \
33 |         rasterio~=1.3.11 \
34 |         setuptools~=75.6 \
35 |         shapely~=2.0.4 \
36 |         SQLAlchemy~=1.3.24 \
37 |         tileputty~=0.2.10
38 | 
39 | # Install TippeCanoe
40 | RUN mkdir -p /opt/src
41 | WORKDIR /opt/src
42 | RUN curl https://codeload.github.com/felt/tippecanoe/tar.gz/${TIPPECANOE_VERSION} | tar -xz \
43 |     && cd /opt/src/tippecanoe-${TIPPECANOE_VERSION} \
44 |     && make \
45 |     && make install \
46 |     && rm -R /opt/src/tippecanoe-${TIPPECANOE_VERSION}
47 | 
48 | # Copy scripts
49 | COPY ./batch/scripts/ /opt/scripts/
50 | COPY ./batch/python/ /opt/python/
51 | 
52 | # Make sure scripts are executable
53 | RUN chmod +x -R /opt/scripts/
54 | RUN chmod +x -R /opt/python/
55 | 
56 | ENV PATH="/opt/scripts:${PATH}"
57 | ENV PATH="/opt/python:${PATH}"
58 | 
59 | ENV WORKDIR="/"
60 | WORKDIR /
61 | 
62 | ENTRYPOINT ["/opt/scripts/report_status.sh"]


--------------------------------------------------------------------------------
/docker-compose.prod.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   app:
 3 |     build:
 4 |       context: .
 5 |       args:
 6 |         - ENV=dev
 7 |     container_name: app_dev
 8 |     restart: on-failure
 9 |     volumes:
10 |       - $HOME/.aws:/root/.aws:ro
11 |       - ./app:/app/app
12 |       - /var/run/docker.sock:/var/run/docker.sock:ro
13 |     environment:
14 |       - DB_HOST=database
15 |       - DATABASE=geostore
16 |       - DB_USER=gfw
17 |       - DB_PASSWORD=postgres  # pragma: allowlist secret
18 |       - DB_PORT=5432
19 |       - DB_HOST_RO=database
20 |       - DATABASE_RO=geostore
21 |       - DB_USER_RO=gfw_readonly
22 |       - DB_PASSWORD_RO=readonly  # pragma: allowlist secret
23 |       - DB_PORT_RO=5432
24 |       - AWS_DEFAULT_PROFILE=gfw-dev
25 |       - LOG_LEVEL=debug
26 |       - POSTGRESQL_CLIENT_JOB_DEFINITION=postgres_client_jd
27 |       - GDAL_PYTHON_JOB_DEFINITION=gdal_python_jd
28 |       - TILE_CACHE_JOB_DEFINITION=tile_cache_jd
29 |       - PIXETL_JOB_DEFINITION=pixetl_jd
30 |       - AURORA_JOB_QUEUE=aurora_jq
31 |       - AURORA_JOB_QUEUE_FAST=aurora_fast_jq
32 |       - DATA_LAKE_JOB_QUEUE=data_lake_jq
33 |       - TILE_CACHE_JOB_QUEUE=tile_cache_jq
34 |       - PIXETL_JOB_QUEUE=pixetl_jq
35 |       - ON_DEMAND_COMPUTE_JOB_QUEUE=cogify_jq
36 |       - RASTER_ANALYSIS_LAMBDA_NAME=raster_analysis
37 |       - API_URL="http://app_dev:80"
38 |       - RW_API_URL=https://api.resourcewatch.org
39 |       - GOOGLE_APPLICATION_CREDENTIALS=/root/.gcs/private_key.json
40 |       - 'RW_API_KEY={"api-key": null}'
41 |     ports:
42 |       - "8000:80"
43 |     entrypoint: wait_for_postgres.sh /app/start.sh
44 |     depends_on:
45 |       - database
46 | 
47 |   database:
48 |     container_name: gfw-data-api-database
49 |     image: postgis/postgis:14-3.4-alpine
50 |     restart: on-failure
51 |     ports:
52 |       - "54320:5432"
53 |     environment:
54 |       - POSTGRES_DB=geostore
55 |       - POSTGRES_USER=gfw
56 |       - POSTGRES_PASSWORD=postgres  # pragma: allowlist secret
57 |     volumes:
58 |       - database_data:/var/lib/postgresql/data
59 | 
60 | volumes:
61 |   database_data:
62 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "gfw-data-api"
 3 | version = "0.1.0"
 4 | description = "Add your description here"
 5 | readme = "README.md"
 6 | requires-python = "~= 3.10"
 7 | dependencies = [
 8 |     "aenum",
 9 |     "aiofiles",
10 |     "aiohttp",
11 |     "alembic",
12 |     "arq",
13 |     "asgi_lifespan",
14 |     "async-lru",
15 |     "asyncpg",
16 |     "boto3",
17 |     "botocore",
18 |     "email-validator",
19 |     "fastapi",
20 |     "geoalchemy2<0.12",
21 |     "geojson",
22 |     "gino",
23 |     "gino_starlette",
24 |     "google-cloud-storage",
25 |     "gunicorn",
26 |     "httpcore",
27 |     "httpx",
28 |     "httpx-auth",
29 |     "newrelic",
30 |     "numpy<2",
31 |     "openapi_spec_validator",
32 |     "orjson",
33 |     "packaging",
34 |     "pendulum<3",
35 |     "pglast<2",
36 |     "psutil",
37 |     "psycopg2",
38 |     "pydantic<2",
39 |     "pyproj",
40 |     "python-multipart",
41 |     "retrying",
42 |     "shapely",
43 |     "sqlalchemy<1.4",
44 |     "sqlalchemy-utils",
45 |     "starlette",
46 |     "typer",
47 |     "unidecode",
48 |     "uvicorn[standard]",
49 | ]
50 | 
51 | [dependency-groups]
52 | dev = [
53 |     "asgi-lifespan",
54 |     "docker",
55 |     "geopandas", # Needed by pixetl in batch script test
56 |     # Pixetl is already installed in the pixetl image that's run in Batch,
57 |     # this is to enable tests to run in the test container:
58 |     "gfw-pixetl",
59 |     "moto[awslambda, batch, ec2, s3, secretsmanager]<5",
60 |     "pandas<2.2", # Needed by pixetl in batch script test
61 |     "pre-commit",
62 |     "pytest",
63 |     "pytest-asyncio",
64 |     "pytest-cov",
65 |     "pytest-timeout",
66 |     "pytest-unordered>=0.6.1",
67 |     "rasterio==1.3.11",
68 |     "retrying", # Needed by pixetl in batch script test
69 | ]
70 | 
71 | [tool.setuptools]
72 | packages = ["app", "batch"]
73 | 
74 | [tool.uv.sources]
75 | gfw-pixetl = { git = "https://github.com/wri/gfw_pixetl.git", branch = "develop" }
76 | 


--------------------------------------------------------------------------------
/scripts/delete_workspace:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | 
 6 | # allow to overwrite GIT_SHA
 7 | POSITIONAL=()
 8 | GIT_SHA=$(git rev-parse HEAD)
 9 | while [[ $# -gt 0 ]]
10 | do
11 | key="$1"
12 | 
13 | case $key in
14 |     -g|--git_sha)
15 |     GIT_SHA="$2"
16 |     shift # past argument
17 |     shift # past value
18 |     ;;
19 |     *)    # unknown option
20 |     POSITIONAL+=("$1") # save it in an array for later
21 |     shift # past argument
22 |     ;;
23 | esac
24 | done
25 | set -- "${POSITIONAL[@]}" # restore positional parameters
26 | 
27 | docker compose -f terraform/docker/docker-compose.yml build
28 | docker compose -f terraform/docker/docker-compose.yml run --entrypoint delete_workspace --rm  terraform "$@" -var="git_sha=${GIT_SHA}"


--------------------------------------------------------------------------------
/scripts/develop:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | # Default values
 6 | POSITIONAL=()
 7 | BUILD=true
 8 | # extracting cmd line arguments
 9 | while [[ $# -gt 0 ]]
10 | do
11 |   key="$1"
12 | 
13 |   case $key in
14 |       --no_build)
15 |       BUILD=false
16 |       shift # past argument
17 |       ;;
18 |       *)    # unknown option
19 |       POSITIONAL+=("$1") # save it in an array for later
20 |       shift # past argument
21 |       ;;
22 |   esac
23 | done
24 | set -- "${POSITIONAL[@]}" # restore positional parameters
25 | 
26 | if [ "${BUILD}" = true ]; then
27 |   docker build -t batch_jobs_test . -f batch/universal_batch.dockerfile
28 |   docker build -t pixetl_test . -f batch/pixetl.dockerfile
29 |   docker compose -f docker-compose.dev.yml --project-name gfw-data-api_dev up --abort-on-container-exit --remove-orphans --build
30 | else
31 |   docker compose -f docker-compose.dev.yml --project-name gfw-data-api_dev up --abort-on-container-exit --remove-orphans
32 | fi


--------------------------------------------------------------------------------
/scripts/infra:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | 
5 | GIT_SHA=$(git rev-parse HEAD)
6 | 
7 | docker compose -f terraform/docker/docker-compose.yml build
8 | docker compose -f terraform/docker/docker-compose.yml run --rm terraform "$@" -var="git_sha=${GIT_SHA}"
9 | 


--------------------------------------------------------------------------------
/scripts/migrate:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | 
5 | docker compose -f docker-compose.dev.yml build app
6 | docker compose -f docker-compose.dev.yml --project-name gfw-data-api_dev run --rm --entrypoint alembic app revision --autogenerate
7 | 
8 | git add app/models/orm/migrations/versions


--------------------------------------------------------------------------------
/scripts/migration_dry_run:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | 
5 | docker compose -f docker-compose.dev.yml build app
6 | docker compose -f docker-compose.dev.yml --project-name gfw-data-api_dev run --rm --entrypoint alembic app -x dry-run upgrade head
7 | 


--------------------------------------------------------------------------------
/scripts/setup:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | PYTHON_VERSION="3.10"
 6 | VIRTUAL_ENV="${VIRTUAL_ENV:-.venv_uv}"
 7 | 
 8 | # Install Python and create the venv all in one go
 9 | if command -v "uv" >/dev/null 2>&1; then
10 |   echo "uv (needed to create venv) found in path"
11 | else
12 |   echo "uv (needed to create venv) NOT found in path"
13 |   echo "If uv is installed, add its binary directory to your path and try "
14 |   echo "running setup script again. For example: "
15 |   echo "'PATH=\$PATH:~/bin ./scripts/setup' for uv installed in ~/bin"
16 |   echo "Otherwise, see https://docs.astral.sh/uv/getting-started/installation/"
17 |   exit 1
18 | fi
19 | uv venv ${VIRTUAL_ENV} --python ${PYTHON_VERSION} --seed
20 | 
21 | # Now install all Data API deps in the venv
22 | # But first avoid some common pitfalls
23 | if command -v "pg_config" >/dev/null 2>&1; then
24 |   echo "pg_config (needed to build psycopg2) found in path"
25 | else
26 |   echo "pg_config (needed to build psycopg2) NOT found in path"
27 |   echo "If libpq is installed, add its binary directory to your path and try "
28 |   echo "running setup script again. For example: "
29 |   echo "'PATH=\$PATH:/opt/homebrew/opt/libpq/bin ./scripts/setup' for "
30 |   echo "libpg installed with homebrew"
31 |   exit 1
32 | fi
33 | 
34 | echo "Installing deps into venv"
35 | echo "If this fails with a message like 'ld: library 'ssl' not found' "
36 | echo "try adding your OpenSSL dir to your library path like so (modify "
37 | echo "for your OpenSSL version): "
38 | echo "LIBRARY_PATH=/opt/homebrew/Cellar/openssl@3/3.4.0/lib ./scripts/setup"
39 | uv sync --locked --no-install-project
40 | 
41 | . ${VIRTUAL_ENV}/bin/activate
42 | 
43 | echo "Installing pre-commit hooks"
44 | pre-commit install
45 | pre-commit
46 | 


--------------------------------------------------------------------------------
/scripts/terraform:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | 
5 | docker compose -f terraform/docker/docker-compose.yml run --rm --entrypoint terraform --workdir /usr/local/src/terraform terraform "$@"


--------------------------------------------------------------------------------
/scripts/test_v2:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | 
 6 | # Default values
 7 | POSITIONAL=()
 8 | BUILD=true
 9 | DO_COV=
10 | DISABLE_WARNINGS="--disable-warnings"
11 | SHOW_STDOUT=
12 | # extracting cmd line arguments
13 | while [[ $# -gt 0 ]]
14 | do
15 |   key="$1"
16 | 
17 |   case $key in
18 |       --no_build)
19 |       BUILD=false
20 |       shift # past argument
21 |       ;;
22 |       --do-cov)
23 |       DO_COV=--cov=app
24 |       shift # past argument
25 |       ;;
26 |       --show-warnings)
27 |       DISABLE_WARNINGS=
28 |       shift # past argument
29 |       ;;
30 |       --show-stdout)
31 |       SHOW_STDOUT=--capture=no
32 |       shift # past argument
33 |       ;;
34 |       --moto-port=*)
35 |       # prevent port binding issues by explicitly setting the motoserver s3 port
36 |       # https://developer.apple.com/forums/thread/682332
37 |       export MOTO_PORT="${key#*=}"
38 |       shift # past argument=value
39 |       ;;
40 |       *)    # unknown option
41 |       POSITIONAL+=("$1") # save it in an array for later
42 |       shift # past argument
43 |       ;;
44 |   esac
45 | done
46 | set -- "${POSITIONAL[@]}" # restore positional parameters
47 | 
48 | # If no tests specified, do whole tests_v2 directory
49 | args=$*
50 | if [ $# -eq 0 ]; then
51 |    args=tests_v2
52 | fi
53 | 
54 | if [ "${BUILD}" = true ]; then
55 |   docker build -t batch_jobs_test . -f batch/universal_batch.dockerfile
56 |   docker build -t pixetl_test . -f batch/pixetl.dockerfile
57 |   docker compose -f docker-compose.test.yml --project-name gfw-data-api_test build --no-cache app_test
58 | fi
59 | 
60 | set +e
61 | 
62 | # Everything from "--cov-report on" become the arguments to the pytest run inside the docker.
63 | docker compose -f docker-compose.test.yml --project-name gfw-data-api_test run --rm --name app_test app_test --cov-report xml:/app/tests_v2/cobertura.xml $DO_COV $DISABLE_WARNINGS $SHOW_STDOUT $args
64 | exit_code=$?
65 | docker compose -f docker-compose.test.yml --project-name gfw-data-api_test down --remove-orphans
66 | exit $exit_code
67 | 


--------------------------------------------------------------------------------
/terraform.md:
--------------------------------------------------------------------------------
 1 | ## Requirements
 2 | 
 3 | | Name | Version |
 4 | |------|---------|
 5 | | terraform | >=0.12.26 |
 6 | | aws | ~> 2.65.0 |
 7 | 
 8 | ## Providers
 9 | 
10 | | Name | Version |
11 | |------|---------|
12 | | aws | ~> 2.65.0 |
13 | | template | n/a |
14 | | terraform | n/a |
15 | 
16 | ## Inputs
17 | 
18 | | Name | Description | Type | Default | Required |
19 | |------|-------------|------|---------|:--------:|
20 | | auto\_scaling\_cooldown | n/a | `number` | `300` | no |
21 | | auto\_scaling\_max\_capacity | n/a | `number` | `15` | no |
22 | | auto\_scaling\_max\_cpu\_util | n/a | `number` | `75` | no |
23 | | auto\_scaling\_min\_capacity | n/a | `number` | `1` | no |
24 | | container\_name | n/a | `string` | `"gfw-data-api"` | no |
25 | | container\_port | n/a | `number` | `80` | no |
26 | | desired\_count | n/a | `number` | `1` | no |
27 | | environment | An environment namespace for the infrastructure. | `string` | n/a | yes |
28 | | fargate\_cpu | n/a | `number` | `256` | no |
29 | | fargate\_memory | n/a | `number` | `2048` | no |
30 | | key\_pair | n/a | `string` | `"tmaschler_gfw"` | no |
31 | | listener\_port | n/a | `number` | `80` | no |
32 | | log\_level | n/a | `any` | n/a | yes |
33 | | log\_retention | n/a | `number` | `30` | no |
34 | | region | n/a | `string` | `"us-east-1"` | no |
35 | 
36 | ## Outputs
37 | 
38 | | Name | Description |
39 | |------|-------------|
40 | | loadbalancer\_dns | n/a |
41 | 
42 | 


--------------------------------------------------------------------------------
/terraform/api_gateway/api_key_authorizer_lambda.py:
--------------------------------------------------------------------------------
 1 | def handler(event, context):
 2 |     api_key = event["headers"].get("x-api-key", None) or event[
 3 |         "queryStringParameters"
 4 |     ].get("x-api-key", "")
 5 |     policy = {
 6 |         "principalId": "x-api-key",
 7 |         "policyDocument": {
 8 |             "Version": "2012-10-17",
 9 |             "Statement": [
10 |                 {
11 |                     "Action": "execute-api:Invoke",
12 |                     "Effect": "Allow",
13 |                     "Resource": event["methodArn"],
14 |                 }
15 |             ],
16 |         },
17 |         "usageIdentifierKey": api_key.strip(),
18 |     }
19 |     print(policy)
20 | 
21 |     return policy
22 | 


--------------------------------------------------------------------------------
/terraform/api_gateway/api_key_authorizer_lambda.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/terraform/api_gateway/api_key_authorizer_lambda.zip


--------------------------------------------------------------------------------
/terraform/docker/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.7"
 2 | services:
 3 |   terraform:
 4 |     image: globalforestwatch/terraform:1.2.2
 5 |     volumes:
 6 |       - ../../:/usr/local/src
 7 |       - $HOME/.aws:/root/.aws:ro
 8 |       - /var/run/docker.sock:/var/run/docker.sock
 9 |     environment:
10 |       - ENV
11 |       - AWS_ACCESS_KEY_ID
12 |       - AWS_SECRET_ACCESS_KEY
13 |       - AWS_REGION=us-east-1
14 |     working_dir: /usr/local/src
15 |     entrypoint: infra
16 | 


--------------------------------------------------------------------------------
/terraform/generate_port.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import random
 3 | import json
 4 | 
 5 | 
 6 | try:
 7 |     input_string = sys.argv[1]
 8 |     min_port = int(sys.argv[2])
 9 |     max_port = int(sys.argv[3])
10 | 
11 |     random.seed(input_string)
12 |     port = random.randint(min_port, max_port)
13 | 
14 |     output = {"port": str(port)}
15 |     print(json.dumps(output))
16 | except Exception as e:
17 |     print(f"Error: {str(e)}", file=sys.stderr)
18 |     sys.exit(1)
19 | 


--------------------------------------------------------------------------------
/terraform/iam.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_iam_policy" "run_batch_jobs" {
 2 |   name   = substr("${local.project}-run_batch_jobs${local.name_suffix}", 0, 64)
 3 |   policy = data.template_file.task_batch_policy.rendered
 4 | }
 5 | 
 6 | resource "aws_iam_policy" "query_batch_jobs" {
 7 |   name   = substr("${local.project}-query_batch_jobs${local.name_suffix}", 0, 64)
 8 |   policy = data.template_file.query_batch_task_policy.rendered
 9 | }
10 | 
11 | resource "aws_iam_policy" "s3_read_only" {
12 |   name   = substr("${local.project}-s3_read_only${local.name_suffix}", 0, 64)
13 |   policy = data.local_file.iam_s3_read_only.content
14 | }
15 | 
16 | resource "aws_iam_policy" "lambda_invoke" {
17 |   name = substr("${local.project}-lambda_invoke${local.name_suffix}", 0, 64)
18 |   //  policy = data.template_file.iam_lambda_invoke.rendered
19 |   policy = data.local_file.iam_lambda_invoke.content
20 | }
21 | 
22 | resource "aws_iam_policy" "iam_api_gateway_policy" {
23 |   name = substr("${local.project}-api_gateway${local.name_suffix}", 0, 64)
24 |   policy = data.local_file.iam_api_gateway_policy.content
25 | }
26 | 
27 | resource "aws_iam_policy" "read_gcs_secret" {
28 |   name = substr("${local.project}-read_gcs_secret${local.name_suffix}", 0, 64)
29 |   policy = data.aws_iam_policy_document.read_gcs_secret_doc.json
30 | }
31 | 
32 | resource "aws_iam_policy" "read_new_relic_secret" {
33 |   name = substr("${local.project}-read_new-relic_secret${local.name_suffix}", 0, 64)
34 |   policy = data.aws_iam_policy_document.read_new_relic_lic.json
35 | }
36 | 
37 | resource "aws_iam_policy" "read_rw_api_key_secret" {
38 |   name = substr("${local.project}-read_rw_api_key_secret${local.name_suffix}", 0, 64)
39 |   policy = data.aws_iam_policy_document.read_rw_api_key.json
40 | }
41 | 
42 | resource "aws_iam_policy" "tile_cache_bucket_policy" {
43 |   name   = substr("${local.project}-tile_cache_bucket_policy${local.name_suffix}", 0, 64)
44 |   policy = data.template_file.tile_cache_bucket_policy.rendered
45 | }
46 | 
47 | resource "aws_iam_policy" "step_function_policy" {
48 |   name   = substr("${local.project}-step_function_policy${local.name_suffix}", 0, 64)
49 |   policy = data.template_file.step_function_policy.rendered
50 | }
51 | 


--------------------------------------------------------------------------------
/terraform/logging.tf:
--------------------------------------------------------------------------------
1 | #
2 | # CloudWatch Resources
3 | #
4 | resource "aws_cloudwatch_log_group" "default" {
5 |   name              = substr("/aws/ecs/${local.project}-log${local.name_suffix}", 0, 64)
6 |   retention_in_days = var.log_retention
7 | }
8 | 


--------------------------------------------------------------------------------
/terraform/modules/api_gateway/endpoint/main.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_api_gateway_method" "method" {
 2 |   rest_api_id = var.rest_api_id
 3 |   resource_id = var.api_resource.id
 4 |   http_method = var.http_method
 5 |   authorization = var.authorization
 6 |   authorizer_id = var.authorizer_id
 7 |   request_parameters = var.method_parameters
 8 |   api_key_required = var.require_api_key
 9 | }
10 | 
11 | 
12 | resource "aws_api_gateway_integration" "integration" {
13 |   rest_api_id = var.rest_api_id
14 |   resource_id = var.api_resource.id
15 |   http_method = aws_api_gateway_method.method.http_method
16 | 
17 | 
18 |   integration_http_method = "ANY"
19 |   type = "HTTP_PROXY"
20 |   uri = var.integration_uri
21 | 
22 |   request_parameters = var.integration_parameters
23 | }


--------------------------------------------------------------------------------
/terraform/modules/api_gateway/endpoint/outputs.tf:
--------------------------------------------------------------------------------
1 | output "integration_point" {
2 |   value = aws_api_gateway_integration.integration
3 | }


--------------------------------------------------------------------------------
/terraform/modules/api_gateway/endpoint/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "rest_api_id" {
 2 |   type        = string
 3 |   description = "Id of API Gateway to add resource to"
 4 | }
 5 | 
 6 | variable "authorizer_id" {
 7 |     type    = string
 8 |     default = ""
 9 | }
10 | 
11 | variable "require_api_key" {
12 |   type    = bool
13 |   default = false
14 | }
15 | 
16 | variable "http_method" {
17 |   type = string
18 | 
19 |   validation {
20 |     condition = contains([
21 |       "ANY",
22 |       "DELETE",
23 |       "GET",
24 |       "HEAD",  
25 |       "OPTIONS",
26 |       "PATCH",
27 |       "POST",
28 |       "PUT"
29 |     ], var.http_method)
30 |     error_message = "Invalid HTTP method passed."
31 |   }
32 | }
33 | 
34 | variable "authorization" {
35 |   validation {
36 |     condition = contains([
37 |       "NONE",
38 |       "CUSTOM",
39 |       "AWS_IAM",
40 |       "COGNITO_USER_POOLS"
41 |     ], var.authorization)
42 |     error_message = "Unknown authorization method."
43 |   }
44 | }
45 | 
46 | variable "integration_uri" {
47 |   type = string
48 | }
49 | 
50 | variable "integration_parameters" {
51 |   type = map
52 | }
53 | 
54 | variable "method_parameters" {
55 |   type = map
56 | }
57 | 
58 | variable "api_resource" {
59 |   type = object({
60 |     id        = string
61 |     path_part = string
62 |   })
63 |   description = "Instance of aws_api_gateway_resource"
64 | }
65 | 


--------------------------------------------------------------------------------
/terraform/modules/api_gateway/gateway/outputs.tf:
--------------------------------------------------------------------------------
 1 | output "internal_usage_plan_id" {
 2 |   value = aws_api_gateway_usage_plan.internal.id
 3 | }
 4 | 
 5 | output "external_usage_plan_id" {
 6 |   value = aws_api_gateway_usage_plan.external.id
 7 | }
 8 | 
 9 | output "api_gateway_id" {
10 |   value = aws_api_gateway_rest_api.api_gw_api.id
11 | }
12 | 
13 | output "invoke_url" {
14 |   value = aws_api_gateway_stage.api_gw_stage.invoke_url
15 | }
16 | 


--------------------------------------------------------------------------------
/terraform/modules/api_gateway/gateway/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "name" {
 2 |   type        = string
 3 |   description = "Name of API Gateway instance"
 4 |   default     = "GFWDataAPIGateway"
 5 | }
 6 | 
 7 | variable "description" {
 8 |   type        = string
 9 |   description = "Description of API Gateway Instance"
10 |   default     = "GFW Data API Gateway"
11 | }
12 | 
13 | variable "stage_name" {
14 |   type        = string
15 |   description = "The stage under which the instance will be deployed"
16 |   default     = "deploy"
17 | }
18 | 
19 | variable "download_endpoints" {
20 |   type        = list(string)
21 |   description = "path parts to download endpoints"
22 | 
23 |   # listing spatial endpoints as gateway needs them explicitly created
24 |   # in order to apply endpoint-level throttling to them
25 |   default = ["geotiff", "gpkg", "shp"]
26 | }
27 | 
28 | variable "lb_dns_name" {
29 |   type        = string
30 |   description = "Application load balancer to forward requests to"
31 | }
32 | 
33 | variable "api_gateway_role_policy" {
34 |   type = string
35 | }
36 | 
37 | variable "lambda_role_policy" {
38 |   type = string
39 | }
40 | 
41 | variable "cloudwatch_policy" {
42 |   type = string
43 | }
44 | 
45 | variable "lambda_invoke_policy" {
46 |   type = string
47 | }
48 | 
49 | variable "api_gateway_usage_plans" {
50 |   type        = map(any)
51 |   description = "Throttling limits for API Gateway"
52 |   default = {
53 |     internal_apps = {
54 |       quota_limit = 1000000 # per day
55 |       burst_limit = 1000
56 |       rate_limit  = 200 # per second
57 |     }
58 |     external_apps = {
59 |       quota_limit = 10000
60 |       burst_limit = 20
61 |       rate_limit  = 10
62 |     }
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/terraform/modules/api_gateway/resource/outputs.tf:
--------------------------------------------------------------------------------
1 | output "aws_api_gateway_resource" {
2 |   value = aws_api_gateway_resource.aws_api_gateway_resource
3 | }


--------------------------------------------------------------------------------
/terraform/modules/api_gateway/resource/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "rest_api_id" {
 2 |   type = string
 3 | }
 4 | 
 5 | variable "parent_id" {
 6 |   type = string
 7 | }
 8 | 
 9 | variable "path_part" {
10 |   type = string
11 | }
12 | 


--------------------------------------------------------------------------------
/terraform/modules/batch/outputs.tf:
--------------------------------------------------------------------------------
 1 | output "aurora_job_definition" {
 2 |   value = aws_batch_job_definition.aurora
 3 | }
 4 | 
 5 | output "aurora_job_definition_arn" {
 6 |   value = aws_batch_job_definition.aurora.arn
 7 | }
 8 | 
 9 | output "aurora_job_queue_arn" {
10 |   value = aws_batch_job_queue.aurora.arn
11 | }
12 | 
13 | output "aurora_job_queue_fast_arn" {
14 |   value = aws_batch_job_queue.aurora_fast.arn
15 | }
16 | 
17 | output "data_lake_job_definition_arn" {
18 |   value = aws_batch_job_definition.data_lake.arn
19 | }
20 | 
21 | output "data_lake_job_definition" {
22 |   value = aws_batch_job_definition.data_lake
23 | }
24 | 
25 | output "data_lake_job_queue_arn" {
26 |   value = aws_batch_job_queue.data_lake.arn
27 | }
28 | 
29 | output "pixetl_job_definition_arn" {
30 |   value = aws_batch_job_definition.pixetl.arn
31 | }
32 | 
33 | output "pixetl_job_definition" {
34 |   value = aws_batch_job_definition.pixetl
35 | }
36 | 
37 | output "pixetl_job_queue_arn" {
38 |   value = aws_batch_job_queue.pixetl.arn
39 | }
40 | 
41 | output "on_demand_compute_job_queue_arn" {
42 |   value = aws_batch_job_queue.on_demand.arn
43 | }
44 | 
45 | output "tile_cache_job_definition_arn" {
46 |   value = aws_batch_job_definition.tile_cache.arn
47 | }
48 | 
49 | output "tile_cache_job_definition" {
50 |   value = aws_batch_job_definition.tile_cache
51 | }
52 | 
53 | output "tile_cache_job_queue_arn" {
54 |   value = aws_batch_job_queue.tile_cache.arn
55 | }
56 | 


--------------------------------------------------------------------------------
/terraform/modules/batch/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "project" { type = string }
 2 | variable "name_suffix" { type = string }
 3 | variable "aurora_compute_environment_arn" { type = string }
 4 | variable "data_lake_compute_environment_arn" { type = string }
 5 | variable "cogify_compute_environment_arn" { type = string }
 6 | variable "tile_cache_compute_environment_arn" { type = string }
 7 | variable "pixetl_compute_environment_arn" { type = string }
 8 | variable "gdal_repository_url" { type = string }
 9 | variable "postgres_repository_url" { type = string }
10 | variable "pixetl_repository_url" { type = string }
11 | variable "tile_cache_repository_url" { type = string }
12 | variable "environment" { type = string }
13 | variable "iam_policy_arn" { type = list(string) }
14 | variable "aurora_max_vcpus" { type = number }
15 | variable "gcs_secret" { type = string }
16 | 


--------------------------------------------------------------------------------
/terraform/outputs.tf:
--------------------------------------------------------------------------------
1 | output "loadbalancer_dns" {
2 |   value = coalesce(module.fargate_autoscaling.lb_dns_name, var.lb_dns_name)
3 | }
4 | 
5 | output "generated_port" {
6 |   value = length(data.external.generate_port) > 0 ? data.external.generate_port[0].result["port"] : var.listener_port
7 | }
8 | 


--------------------------------------------------------------------------------
/terraform/scripts/hash.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # 
 3 | # Calculates hash of Docker image source contents
 4 | #
 5 | # Must be identical to the script that is used by the
 6 | # gfw-terraform-modules:terraform/modules/container_registry Terraform module.
 7 | #
 8 | # Usage:
 9 | #
10 | # $ ./hash.sh .
11 | #
12 | 
13 | set -e
14 | 
15 | pushd () {
16 |     command pushd "$@" > /dev/null
17 | }
18 | 
19 | popd () {
20 |     command popd "$@" > /dev/null
21 | }
22 | 
23 | ROOT_DIR=${1:-.}
24 | DOCKER_PATH=${2:-.}
25 | IGNORE="${DOCKER_PATH}/.dockerignore"
26 | 
27 | pushd "$ROOT_DIR"
28 | 
29 | # Hash all source files of the Docker image
30 | if [ -f "$IGNORE" ]; then
31 |     # We don't want to compute hashes for files listed in .dockerignore
32 |     # to match regex pattern we need to escape leading .
33 |     a=$(printf "! -regex ^./%s.* " `< .dockerignore`)
34 |     b=${a//\/.//\\\.}
35 | 
36 |     file_hashes="$(
37 |        find . -type f $b -exec md5sum {} \;
38 |   )"
39 | else
40 |   # Exclude Python cache files, dot files
41 |   file_hashes="$(
42 |         find . -type f -not -name '*.pyc' -not -path './.**' -exec md5sum {} \;
43 |   )"
44 | fi
45 | 
46 | popd
47 | 
48 | hash="$(echo "$file_hashes" | md5sum | cut -d' ' -f1)"
49 | 
50 | echo '{ "hash": "'"$hash"'" }'
51 | 


--------------------------------------------------------------------------------
/terraform/templates/api_gateway_policy.json.tmpl:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Sid": "",
 6 |             "Effect": "Allow",
 7 |             "Action": [
 8 |                 "apigateway:DELETE",
 9 |                 "apigateway:POST",
10 |                 "apigateway:GET"
11 |             ],
12 |             "Resource": [
13 |                 "arn:aws:apigateway:*::/apikeys/*",
14 |                 "arn:aws:apigateway:*::/apikeys",
15 |                 "arn:aws:apigateway:*::/usageplans/*/keys" 
16 |             ]
17 |         }
18 |     ]
19 | }


--------------------------------------------------------------------------------
/terraform/templates/cloudwatch_log_policy.json.tmpl:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Version": "2012-10-17",
 3 |   "Statement": [
 4 |     {
 5 |       "Effect": "Allow",
 6 |       "Action": [
 7 |           "logs:CreateLogGroup",
 8 |           "logs:CreateLogStream",
 9 |           "logs:DescribeLogGroups",
10 |           "logs:DescribeLogStreams",
11 |           "logs:PutLogEvents",
12 |           "logs:GetLogEvents",
13 |           "logs:FilterLogEvents"
14 |       ],
15 |       "Resource": "*"
16 |     }
17 |   ]
18 | }


--------------------------------------------------------------------------------
/terraform/templates/container_properties.json.tmpl:
--------------------------------------------------------------------------------
 1 | {
 2 |   "command": [],
 3 |   "image": "${image_url}",
 4 |   "vcpus": ${cpu},
 5 |   "memory": ${memory},
 6 |   "retry_strategy": {
 7 |     "attempts": 1
 8 |   },
 9 |   "timeout": {
10 |     "attempt_duration_seconds":7500
11 |   },
12 |   "environment": [
13 |     {
14 |       "name": "ENV",
15 |       "value": "${environment}"
16 |     },
17 |     {
18 |       "name": "JOB_ROLE_ARN",
19 |       "value": "${clone_role_arn}"
20 |     },
21 |     {
22 |       "name": "ECS_TASK_METADATA_RPS_LIMIT",
23 |       "value": "100,150"
24 |     },
25 |     {
26 |       "name": "LC_ALL",
27 |       "value": "C.UTF-8"
28 |     },
29 |     {
30 |       "name": "LANG",
31 |       "value": "C.UTF-8"
32 |     },
33 |     {
34 |       "name": "TILE_CACHE",
35 |       "value": "${tile_cache}"
36 |     },
37 |     {
38 |       "name": "DATA_LAKE",
39 |       "value": "${data_lake}"
40 |     },
41 |     {
42 |       "name": "MAX_TASKS",
43 |       "value": "${max_tasks}"
44 |     }
45 |   ],
46 |   "jobRoleArn": "${job_role_arn}",
47 |   "volumes": [
48 |     {
49 |       "host": {
50 |         "sourcePath": "/tmp"
51 |       },
52 |       "name": "tmp"
53 |     }
54 |   ],
55 |   "mountPoints": [
56 |     {
57 |       "sourceVolume": "tmp",
58 |       "containerPath": "/tmp",
59 |       "readOnly": false
60 |     }
61 |   ],
62 |   "ulimits": [
63 |     {
64 |       "hardLimit": ${hardULimit},
65 |       "name": "nofile",
66 |       "softLimit": ${softULimit}
67 |     }
68 |   ]
69 | }


--------------------------------------------------------------------------------
/terraform/templates/iam_assume_role.json.tmpl:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Version": "2012-10-17",
 3 |   "Statement": [
 4 |     {
 5 |       "Sid": "",
 6 |       "Effect": "Allow",
 7 |       "Action": "sts:AssumeRole",
 8 |       "Resource": "${role_arn}"
 9 |     }
10 |   ]
11 | }


--------------------------------------------------------------------------------
/terraform/templates/iam_s3_read_only.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Effect": "Allow",
 6 |             "Action": [
 7 |                 "s3:Get*",
 8 |                 "s3:List*"
 9 |             ],
10 |             "Resource": "*"
11 |         }
12 |     ]
13 | }


--------------------------------------------------------------------------------
/terraform/templates/iam_trust_entity.json.tmpl:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Version": "2012-10-17",
 3 |   "Statement": [
 4 |     {
 5 |       "Sid": "",
 6 |       "Action": "sts:AssumeRole",
 7 |       "Effect": "Allow",
 8 |       "Principal": {
 9 |         "AWS": "${role_arn}"
10 |       }
11 |     }
12 |   ]
13 | }


--------------------------------------------------------------------------------
/terraform/templates/lambda_invoke_policy.json.tmpl:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Version": "2012-10-17",
 3 |   "Statement": [
 4 |   {
 5 |     "Effect": "Allow",
 6 |     "Action": "lambda:InvokeFunction",
 7 |     "Resource": "*"
 8 |   }
 9 |   ]
10 | }
11 | 


--------------------------------------------------------------------------------
/terraform/templates/query_batch_policy.json.tmpl:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Effect": "Allow",
 6 |             "Action": [
 7 |                 "batch:ListJobs",
 8 |                 "batch:DescribeJobs"
 9 |             ],
10 |             "Resource": "*"
11 |         }
12 |     ]
13 | }


--------------------------------------------------------------------------------
/terraform/templates/role-trust-policy.json.tmpl:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |     {
 5 |         "Action": "sts:AssumeRole",
 6 |         "Effect": "Allow",
 7 |         "Principal": {
 8 |         "Service": "${service}.amazonaws.com"
 9 |         }
10 |     }
11 |     ]
12 | }


--------------------------------------------------------------------------------
/terraform/templates/run_batch_policy.json.tmpl:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Effect": "Allow",
 6 |             "Action": [
 7 |                 "batch:SubmitJob",
 8 |                 "batch:CancelJob",
 9 |                 "batch:TerminateJob",
10 |                 "batch:TagResource"
11 |             ],
12 |             "Resource": [
13 |                 "${aurora_job_queue_arn}",
14 |                 "${aurora_job_queue_fast_arn}",
15 |                 "${aurora_job_definition_arn}",
16 | 
17 |                 "${data_lake_job_queue_arn}",
18 |                 "${data_lake_job_definition_arn}",
19 | 
20 |                 "${tile_cache_job_queue_arn}",
21 |                 "${tile_cache_job_definition_arn}",
22 | 
23 |                 "${pixetl_job_queue_arn}",
24 |                 "${pixetl_job_definition_arn}",
25 | 
26 |                 "${on_demand_compute_job_queue_arn}"
27 |             ]
28 |         },
29 |         {
30 |             "Effect": "Allow",
31 |             "Action": [
32 |                 "batch:ListJobs",
33 |                 "batch:DescribeJobs",
34 |                 "batch:DescribeJobQueues",
35 |                 "batch:DescribeComputeEnvironments",
36 |                 "batch:DescribeJobDefinitions"
37 |             ],
38 |             "Resource": "*"
39 |         }
40 |     ]
41 | }


--------------------------------------------------------------------------------
/terraform/templates/step_function_policy.json.tmpl:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Effect": "Allow",
 6 |             "Action": [
 7 |                 "states:StartExecution"
 8 |             ],
 9 |             "Resource": [
10 |                 "${raster_analysis_state_machine_arn}"
11 |             ]
12 |         },
13 |         {
14 |             "Effect": "Allow",
15 |             "Action": [
16 |                 "states:DescribeExecution",
17 |                 "states:DescribeMapRun",
18 |                 "states:ListMapRuns"
19 |             ],
20 |             "Resource": "*"
21 |         }
22 |     ]
23 | }


--------------------------------------------------------------------------------
/terraform/templates/tile_cache_bucket_policy.json.tmpl:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Effect": "Allow",
 6 |             "Action": [
 7 |                 "s3:ListBucket",
 8 |                 "s3:PutLifecycleConfiguration"
 9 |             ],
10 |             "Resource": "${bucket_arn}"
11 |         },
12 |         {
13 |             "Effect": "Allow",
14 |             "Action": "s3:*",
15 |             "Resource": [
16 |                 "${bucket_arn}/*"
17 |             ]
18 |         }
19 |     ]
20 | }


--------------------------------------------------------------------------------
/terraform/vars/backend-dev.tfvars:
--------------------------------------------------------------------------------
1 | bucket = "gfw-terraform-dev"
2 | 


--------------------------------------------------------------------------------
/terraform/vars/backend-production.tfvars:
--------------------------------------------------------------------------------
1 | bucket = "gfw-terraform"


--------------------------------------------------------------------------------
/terraform/vars/backend-staging.tfvars:
--------------------------------------------------------------------------------
1 | bucket = "gfw-terraform-staging"
2 | 


--------------------------------------------------------------------------------
/terraform/vars/terraform-dev.tfvars:
--------------------------------------------------------------------------------
 1 | environment                    = "dev"
 2 | log_level                      = "debug"
 3 | service_url                    = "https://dev-data-api.globalforestwatch.org" # fake, needed for CloudFront
 4 | rw_api_url                     = "https://api.resourcewatch.org"
 5 | rw_api_key_arn                 = "arn:aws:secretsmanager:us-east-1:563860007740:secret:gfw-api/rw-api-key-YhLbaM"  # pragma: allowlist secret
 6 | desired_count                  = 1
 7 | auto_scaling_min_capacity      = 1
 8 | auto_scaling_max_capacity      = 5
 9 | lambda_analysis_workspace      = "feature-otf_lists"
10 | key_pair                       = "dmannarino_gfw"
11 | create_cloudfront_distribution = false
12 | new_relic_license_key_arn      = "arn:aws:secretsmanager:us-east-1:563860007740:secret:newrelic/license_key-lolw24"
13 | load_balancer_security_group   = "sg-07c9331c01f8da1c8"
14 | load_balancer_arn              = "arn:aws:elasticloadbalancing:us-east-1:563860007740:loadbalancer/app/gfw-data-api-elb-shared-dev-lb/60c3ad42ca6522e3"
15 | lb_dns_name                    = "gfw-data-api-elb-shared-dev-lb-10091095.us-east-1.elb.amazonaws.com"
16 | api_gateway_id                 = "vzgmihei77"
17 | api_gw_external_app_id         = "f10vmg"
18 | api_gw_internal_app_id         = "ka6k5w"
19 | api_gateway_url                = "https://wddlsuo04c.execute-api.us-east-1.amazonaws.com/deploy"
20 | 


--------------------------------------------------------------------------------
/terraform/vars/terraform-production.tfvars:
--------------------------------------------------------------------------------
 1 | environment               = "production"
 2 | log_level                 = "info"
 3 | service_url               = "https://data-api.globalforestwatch.org"
 4 | rw_api_url                = "https://api.resourcewatch.org"
 5 | rw_api_key_arn            = "arn:aws:secretsmanager:us-east-1:401951483516:secret:gfw-api/rw-api-key-YQ50uP"  # pragma: allowlist secret
 6 | desired_count             = 2
 7 | auto_scaling_min_capacity = 2
 8 | auto_scaling_max_capacity = 15
 9 | fargate_cpu               = 2048
10 | fargate_memory            = 4096
11 | lambda_analysis_workspace = "default"
12 | key_pair                  = "dmannarino_gfw"
13 | new_relic_license_key_arn = "arn:aws:secretsmanager:us-east-1:401951483516:secret:newrelic/license_key-CyqUPX"
14 | 


--------------------------------------------------------------------------------
/terraform/vars/terraform-staging.tfvars:
--------------------------------------------------------------------------------
 1 | environment               = "staging"
 2 | log_level                 = "info"
 3 | service_url               = "https://staging-data-api.globalforestwatch.org"
 4 | rw_api_url                = "https://api.resourcewatch.org"
 5 | rw_api_key_arn            = "arn:aws:secretsmanager:us-east-1:274931322839:secret:gfw-api/rw-api-key-xG9YwX"  # pragma: allowlist secret
 6 | desired_count             = 1
 7 | auto_scaling_min_capacity = 1
 8 | auto_scaling_max_capacity = 15
 9 | lambda_analysis_workspace = "default"
10 | key_pair                  = "dmannarino_gfw"
11 | new_relic_license_key_arn = "arn:aws:secretsmanager:us-east-1:274931322839:secret:newrelic/license_key-1wKZAY"
12 | 


--------------------------------------------------------------------------------
/terraform/versions.tf:
--------------------------------------------------------------------------------
 1 | terraform {
 2 |   required_providers {
 3 |     aws = {
 4 |       source  = "hashicorp/aws"
 5 |       version = ">= 3, < 4"
 6 |     }
 7 |     local = {
 8 |       source = "hashicorp/local"
 9 |     }
10 |     template = {
11 |       source = "hashicorp/template"
12 |     }
13 |   }
14 |   required_version = ">= 0.13, < 0.14"
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/crud/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests/crud/__init__.py


--------------------------------------------------------------------------------
/tests/fixtures/append_test.tsv:
--------------------------------------------------------------------------------
1 | iso	adm1	adm2	longitude	latitude	alert__date	alert__time_utc	confidence__cat	bright_ti4__K	bright_ti5__K	frp__MW	wdpa_protected_area__iucn_cat	is__umd_regional_primary_forest_2001	is__birdlife_alliance_for_zero_extinction_site	is__birdlife_key_biodiversity_area	is__landmark_land_right	gfw_plantation__type	is__gfw_mining	is__gfw_managed_forest	rspo_oil_palm__certification_status	is__gfw_wood_fiber	is__peatland	is__idn_forest_moratorium	is__gfw_oil_palm	idn_forest_area__type	per_forest_concession__type	is__gfw_oil_gas	is__gmw_mangroves_2016	is__ifl_intact_forest_landscape_2016	bra_biome__name	alert__count
2 | QRC	7	18	38.24999	-14.63781	2018-08-19	1022	h	367.0	303.2	6.1	""	false	false	false	false	""	false	false	""	false	false	false	false	""	""	false	false	false	""	1
3 | XON	3	7	-66.29684	-14.5566	2019-10-16	1811	n	334.6	294.7	27.5	Other Category	true	false	false	true	""	false	false	""	false	false	false	false	""	""	false	false	false	""	1
4 | 


--------------------------------------------------------------------------------
/tests/fixtures/aws/config:
--------------------------------------------------------------------------------
1 | [default]
2 | s3 =
3 |     endpoint_url = http://motoserver:50000
4 | 
5 | [plugins]
6 | endpoint = awscli_plugin_endpoint


--------------------------------------------------------------------------------
/tests/fixtures/test.csv:
--------------------------------------------------------------------------------
1 | alert__date,geom
2 | 2001-03-01,0103000000010000000500000000000000385050C000000000406045C000000000385050C000000000D06045C000000000805050C000000000D06045C000000000805050C000000000406045C000000000385050C000000000406045C0
3 | 


--------------------------------------------------------------------------------
/tests/fixtures/test.geojson:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "FeatureCollection",
 3 |   "features": [
 4 |     {
 5 |       "type": "Feature",
 6 |       "properties": {},
 7 |       "geometry": {
 8 |         "type": "MultiPolygon",
 9 |         "coordinates": [
10 |           [
11 |             [
12 |               [
13 |                 10.67647933959961,
14 |                 53.8577916408477
15 |               ],
16 |               [
17 |                 10.699653625488281,
18 |                 53.8577916408477
19 |               ],
20 |               [
21 |                 10.699653625488281,
22 |                 53.87575866462502
23 |               ],
24 |               [
25 |                 10.67647933959961,
26 |                 53.87575866462502
27 |               ],
28 |               [
29 |                 10.67647933959961,
30 |                 53.8577916408477
31 |               ]
32 |             ]
33 |           ]
34 |         ]
35 |       }
36 |     }
37 |   ]
38 | }


--------------------------------------------------------------------------------
/tests/fixtures/test.gpkg.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests/fixtures/test.gpkg.zip


--------------------------------------------------------------------------------
/tests/fixtures/test.shp.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests/fixtures/test.shp.zip


--------------------------------------------------------------------------------
/tests/fixtures/test2.csv:
--------------------------------------------------------------------------------
1 | alert__date,geom
2 | 2001-03-01,010300000001000000050000000000000008AD50C000000000101143C00000000008AD50C000000000A01143C00000000050AD50C000000000A01143C00000000050AD50C000000000101143C00000000008AD50C000000000101143C0
3 | 


--------------------------------------------------------------------------------
/tests/fixtures/test2.geojson:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"type": "FeatureCollection",
 3 | 	"features": [{
 4 | 		"type": "Feature",
 5 | 		"properties": {},
 6 | 		"geometry": {
 7 | 			"type": "Polygon",
 8 | 			"coordinates": [
 9 | 				[
10 | 					[
11 | 						-77.04093933105469,
12 | 						38.995840128965035
13 | 					],
14 | 					[
15 | 						-77.1653938293457,
16 | 						38.90305681295715
17 | 					],
18 | 					[
19 | 						-77.03973770141602,
20 | 						38.791556581282244
21 | 					],
22 | 					[
23 | 						-76.9094467163086,
24 | 						38.8930369656108
25 | 					],
26 | 					[
27 | 						-77.04093933105469,
28 | 						38.995840128965035
29 | 					]
30 | 				]
31 | 			]
32 | 		}
33 | 	}]
34 | }


--------------------------------------------------------------------------------
/tests/models/test_jobs.py:
--------------------------------------------------------------------------------
 1 | from uuid import uuid4
 2 | 
 3 | from app.models.pydantic.jobs import Job
 4 | from app.tasks import callback_constructor
 5 | 
 6 | 
 7 | def test_jobs_model():
 8 | 
 9 |     callback = callback_constructor(uuid4())
10 | 
11 |     job = Job(
12 |         dataset="test",
13 |         job_name="test",
14 |         job_queue="test",
15 |         job_definition="test",
16 |         command=["1"],
17 |         environment=[{"name": "TEST", "value": "TEST"}],
18 |         vcpus=1,
19 |         memory=2,
20 |         attempts=1,
21 |         attempt_duration_seconds=1,
22 |         parents=None,
23 |         callback=callback,
24 |     )
25 | 
26 |     assert job.environment == [
27 |         {"name": "TEST", "value": "TEST"},
28 |         {"name": "CORES", "value": "1"},
29 |         {"name": "MAX_MEM", "value": "2"},
30 |     ]
31 | 
32 |     job.vcpus = 45
33 |     assert job.environment == [
34 |         {"name": "TEST", "value": "TEST"},
35 |         {"name": "CORES", "value": "45"},
36 |         {"name": "MAX_MEM", "value": "2"},
37 |     ]
38 | 
39 |     job.memory = 100
40 |     assert job.environment == [
41 |         {"name": "TEST", "value": "TEST"},
42 |         {"name": "CORES", "value": "45"},
43 |         {"name": "MAX_MEM", "value": "100"},
44 |     ]
45 | 


--------------------------------------------------------------------------------
/tests/routes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests/routes/__init__.py


--------------------------------------------------------------------------------
/tests/routes/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests/routes/datasets/__init__.py


--------------------------------------------------------------------------------
/tests/routes/test_authorization.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from fastapi import HTTPException
 3 | 
 4 | from app.authentication.token import is_admin, is_service_account
 5 | from app.utils.rw_api import who_am_i
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | async def test_is_admin():
10 | 
11 |     message = ""
12 |     try:
13 |         await is_admin("my_fake_token")
14 |     except HTTPException as e:
15 |         message = e.detail
16 | 
17 |     assert message == "Unauthorized"
18 | 
19 | 
20 | @pytest.mark.asyncio
21 | async def test_is_service_account():
22 | 
23 |     message = ""
24 |     try:
25 |         await is_service_account("my_fake_token")
26 |     except HTTPException as e:
27 |         message = e.detail
28 | 
29 |     assert message == "Unauthorized"
30 | 
31 | 
32 | @pytest.mark.asyncio
33 | async def test_who_am_i():
34 |     response = await who_am_i("my_fake_token")
35 |     assert response.status_code == 401
36 | 
37 | 
38 | @pytest.mark.asyncio
39 | async def test_login(async_client):
40 |     response = await async_client.post(
41 |         "/auth/token", data={"username": "name", "password": "secret"}
42 |     )
43 |     assert response.status_code == 401
44 | 


--------------------------------------------------------------------------------
/tests/tasks/test_default_assets.py:
--------------------------------------------------------------------------------
 1 | import pytest as pytest
 2 | from httpx import AsyncClient
 3 | 
 4 | from .. import BUCKET, SHP_NAME
 5 | from ..utils import create_default_asset
 6 | 
 7 | 
 8 | @pytest.mark.asyncio
 9 | async def test_default_asset_cant_delete(batch_client, async_client: AsyncClient):
10 |     _, logs = batch_client
11 | 
12 |     dataset = "test"
13 | 
14 |     version = "v1.1.1"
15 |     input_data = {
16 |         "creation_options": {
17 |             "source_type": "vector",
18 |             "source_uri": [f"s3://{BUCKET}/{SHP_NAME}"],
19 |             "source_driver": "ESRI Shapefile",
20 |             "create_dynamic_vector_tile_cache": False,
21 |         },
22 |     }
23 | 
24 |     asset = await create_default_asset(
25 |         dataset,
26 |         version,
27 |         version_payload=input_data,
28 |         async_client=async_client,
29 |         logs=logs,
30 |         execute_batch_jobs=False,
31 |         skip_dataset=False,
32 |     )
33 |     asset_id = asset["asset_id"]
34 | 
35 |     response = await async_client.delete(f"/asset/{asset_id}")
36 |     assert response.status_code == 409
37 |     expected_message = (
38 |         "Deletion failed. You cannot delete a default asset. "
39 |         "To delete a default asset you must delete the parent version."
40 |     )
41 |     assert response.json()["message"] == expected_message
42 | 


--------------------------------------------------------------------------------
/tests/tasks/test_delete_assets.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from app.application import ContextEngine, db
 4 | from app.settings.globals import DATA_LAKE_BUCKET
 5 | from app.tasks.delete_assets import (
 6 |     delete_database_table_asset,
 7 |     delete_raster_tileset_assets,
 8 | )
 9 | from app.utils.aws import get_s3_client
10 | from tests import TSV_PATH
11 | 
12 | 
13 | @pytest.mark.asyncio
14 | async def test_delete_raster_tileset_assets():
15 |     s3_client = get_s3_client()
16 |     dataset = "test_delete_raster_tileset"
17 |     version = "table"
18 |     srid = "epsg-4326"
19 |     grid = "10/40000"
20 |     value = "year"
21 | 
22 |     for i in range(0, 10):
23 |         s3_client.upload_file(
24 |             TSV_PATH,
25 |             DATA_LAKE_BUCKET,
26 |             f"{dataset}/{version}/raster/{srid}/{grid}/{value}/test_{i}.tsv",
27 |         )
28 | 
29 |     response = s3_client.list_objects_v2(Bucket=DATA_LAKE_BUCKET, Prefix=dataset)
30 | 
31 |     assert response["KeyCount"] == 10
32 | 
33 |     await delete_raster_tileset_assets(dataset, version, srid, grid, value)
34 | 
35 |     response = s3_client.list_objects_v2(Bucket=DATA_LAKE_BUCKET, Prefix=dataset)
36 |     assert response["KeyCount"] == 0
37 | 
38 | 
39 | @pytest.mark.asyncio
40 | async def test_delete_database_table(app):
41 |     dataset = "test"
42 |     version = "table"
43 | 
44 |     async with ContextEngine("WRITE"):
45 |         # create schema and stable
46 |         await db.all(f"CREATE SCHEMA {dataset};")
47 |         await db.all(f"CREATE TABLE {dataset}.{version} (col1 text);")
48 | 
49 |         rows = await db.all(f"select * from pg_tables where schemaname='{dataset}';")
50 |         assert len(rows) == 1
51 | 
52 |         # test if function drops table
53 |         await delete_database_table_asset(dataset, version)
54 | 
55 |         rows = await db.all(f"select * from pg_tables where schemaname='{dataset}';")
56 |         assert len(rows) == 0
57 | 
58 |         # clean up
59 |         await db.all(f"DROP SCHEMA {dataset};")
60 | 


--------------------------------------------------------------------------------
/tests/utils/test_path.py:
--------------------------------------------------------------------------------
 1 | from app.utils.path import get_layer_name, is_zipped
 2 | from tests import BUCKET, GEOJSON_NAME, SHP_NAME
 3 | 
 4 | 
 5 | def test_zipped():
 6 |     s3_uri = f"s3://{BUCKET}/{GEOJSON_NAME}"
 7 |     zipped = is_zipped(s3_uri)
 8 |     assert zipped is False
 9 | 
10 |     s3_uri = f"s3://{BUCKET}/{SHP_NAME}"
11 |     zipped = is_zipped(s3_uri)
12 |     assert zipped is True
13 | 
14 |     found = True
15 |     s3_uri = f"s3://{BUCKET}/doesntexist"
16 |     try:
17 |         is_zipped(s3_uri)
18 |     except FileNotFoundError:
19 |         found = False
20 | 
21 |     assert not found
22 | 
23 | 
24 | def test_get_layer_name():
25 |     s3_uri = f"s3://{BUCKET}/{SHP_NAME}"
26 |     layer = get_layer_name(s3_uri)
27 |     assert layer == "test"
28 | 
29 |     s3_uri = f"s3://{BUCKET}/{GEOJSON_NAME}"
30 |     layer = get_layer_name(s3_uri)
31 |     assert layer == "test"
32 | 


--------------------------------------------------------------------------------
/tests_v2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/__init__.py


--------------------------------------------------------------------------------
/tests_v2/fixtures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/fixtures/__init__.py


--------------------------------------------------------------------------------
/tests_v2/fixtures/authentication/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/fixtures/authentication/__init__.py


--------------------------------------------------------------------------------
/tests_v2/fixtures/authentication/api_keys.py:
--------------------------------------------------------------------------------
 1 | GOOD_ORGANIZATIONS = ["WRI", "Global Forest Watch"]
 2 | GOOD_EMAILS = [
 3 |     "info@wri.org",
 4 |     "admin@globalforestwatch.org",
 5 |     "firstname.lastname@test.com",
 6 | ]
 7 | GOOD_DOMAINS = [
 8 |     "www.globalforestwatch.org",
 9 |     "*.globalforestwatch.org",
10 |     "globalforestwatch.org",
11 |     "localhost",
12 | ]
13 | 
14 | BAD_EMAILS = ["not an email", "also_not@n-email", "nope", None]
15 | BAD_DOMAINS = [
16 |     "www.*.com",
17 |     "*",
18 |     "www.test*.org",
19 |     "www.test.*",
20 |     "*.com",
21 |     "globalforestwatch.org:443",
22 |     "localhost:3000",
23 | ]
24 | 


--------------------------------------------------------------------------------
/tests_v2/fixtures/creation_options/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/fixtures/creation_options/__init__.py


--------------------------------------------------------------------------------
/tests_v2/fixtures/creation_options/versions.py:
--------------------------------------------------------------------------------
 1 | # Vector source creation options
 2 | bucket = "my_bucket"
 3 | shp_name = "my_shape.zip"
 4 | tif_name = "tile.tif"
 5 | 
 6 | VECTOR_SOURCE_CREATION_OPTIONS = {
 7 |     "source_driver": "ESRI Shapefile",
 8 |     "source_type": "vector",
 9 |     "source_uri": [f"s3://{bucket}/{shp_name}"],
10 |     "indices": [
11 |         {"column_names": ["geom"], "index_type": "gist"},
12 |         {"column_names": ["geom_wm"], "index_type": "gist"},
13 |         {"column_names": ["gfw_geostore_id"], "index_type": "hash"},
14 |     ],
15 |     "create_dynamic_vector_tile_cache": True,
16 |     "add_to_geostore": True,
17 | }
18 | 
19 | RASTER_CREATION_OPTIONS = {
20 |     "source_driver": "GeoTIFF",
21 |     "source_type": "raster",
22 |     "source_uri": [f"s3://{bucket}/{tif_name}"],
23 |     "pixel_meaning": "year",
24 |     "data_type": "uint16",
25 |     "grid": "10/40000",
26 |     "compute_stats": False,
27 | }
28 | 


--------------------------------------------------------------------------------
/tests_v2/fixtures/geojson/test.geojson:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "FeatureCollection",
 3 |   "features": [
 4 |     {
 5 |       "type": "Feature",
 6 |       "properties": {},
 7 |       "geometry": {
 8 |         "type": "MultiPolygon",
 9 |         "coordinates": [
10 |           [
11 |             [
12 |               [
13 |                 10.67647933959961,
14 |                 53.8577916408477
15 |               ],
16 |               [
17 |                 10.699653625488281,
18 |                 53.8577916408477
19 |               ],
20 |               [
21 |                 10.699653625488281,
22 |                 53.87575866462502
23 |               ],
24 |               [
25 |                 10.67647933959961,
26 |                 53.87575866462502
27 |               ],
28 |               [
29 |                 10.67647933959961,
30 |                 53.8577916408477
31 |               ]
32 |             ]
33 |           ]
34 |         ]
35 |       }
36 |     }
37 |   ]
38 | }


--------------------------------------------------------------------------------
/tests_v2/fixtures/geojson/test_bad.geojson:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "FeatureCollection",
 3 |   "features": [
 4 |     {
 5 |       "type": "Feature",
 6 |       "properties": {},
 7 |       "geometry": {
 8 |         "type": "LineString",
 9 |         "coordinates": [
10 |             [
11 |               [
12 |                 10.67647933959961,
13 |                 53.8577916408477
14 |               ],
15 |               [
16 |                 10.699653625488281,
17 |                 53.8577916408477
18 |               ],
19 |               [
20 |                 10.699653625488281,
21 |                 53.87575866462502
22 |               ],
23 |               [
24 |                 10.67647933959961,
25 |                 53.87575866462502
26 |               ],
27 |               [
28 |                 10.67647933959961,
29 |                 53.8577916408477
30 |               ]
31 |             ]
32 |           ]
33 |       }
34 |     }
35 |   ]
36 | }
37 | 


--------------------------------------------------------------------------------
/tests_v2/fixtures/geojson/test_huge.geojson:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "FeatureCollection",
 3 |   "features": [
 4 |     {
 5 |       "type": "Feature",
 6 |       "properties": {},
 7 |       "geometry": {
 8 |         "type": "Polygon",
 9 |         "coordinates": [
10 |           [
11 |             [
12 |               2.109375,
13 |               7.013667927566642
14 |             ],
15 |             [
16 |               150.1171875,
17 |               7.013667927566642
18 |             ],
19 |             [
20 |               150.1171875,
21 |               71.52490903732816
22 |             ],
23 |             [
24 |               2.109375,
25 |               71.52490903732816
26 |             ],
27 |             [
28 |               2.109375,
29 |               7.013667927566642
30 |             ]
31 |           ]
32 |         ]
33 |       }
34 |     }
35 |   ]
36 | }


--------------------------------------------------------------------------------
/tests_v2/fixtures/metadata/dataset.py:
--------------------------------------------------------------------------------
1 | DATASET_METADATA = {
2 |     "title": "test metadata",
3 |     "source": "Source Organization test",
4 |     "license": "[CC BY 4.0](https://creativecommons.org/licenses/by/4.0/)",
5 |     "data_language": "en",
6 |     "overview": "Some detailed data description",
7 | }
8 | 


--------------------------------------------------------------------------------
/tests_v2/fixtures/metadata/version.py:
--------------------------------------------------------------------------------
1 | VERSION_METADATA = {
2 |     "content_date_range": {"start_date": "2000-01-01", "end_date": "2021-01-01"},
3 |     "content_date_description": "2000 - present",
4 |     "last_update": "2020-01-03",
5 |     "spatial_resolution": 10,
6 |     "resolution_description": "10 meters",
7 | }
8 | 


--------------------------------------------------------------------------------
/tests_v2/fixtures/otf_payload/otf_payload.py:
--------------------------------------------------------------------------------
 1 | environment = [
 2 |     {
 3 |         "name": "my_first_dataset__date_conf",
 4 |         "no_data": 0,
 5 |         "raster_table": None,
 6 |         "decode_expression": "",
 7 |         "encode_expression": "",
 8 |         "source_uri": "s3://gfw-data-lake-test/my_first_dataset/v1/raster/epsg-4326/10/40000/date_conf/geotiff/{tile_id}.tif",
 9 |         "grid": "10/40000",
10 |         "tile_scheme": "nw",
11 |     },
12 |     {
13 |         "name": "my_first_dataset__date",
14 |         "no_data": 0,
15 |         "raster_table": None,
16 |         "decode_expression": "(A + 16435).astype('datetime64[D]').astype(str)",
17 |         "encode_expression": "(datetime64(A) - 16435).astype(uint16)",
18 |         "source_layer": "my_first_dataset__date_conf",
19 |         "calc": "A % 10000",
20 |     },
21 |     {
22 |         "name": "my_first_dataset__confidence",
23 |         "no_data": 0,
24 |         "raster_table": {
25 |             "rows": [
26 |                 {"value": 2, "meaning": "nominal"},
27 |                 {"value": 3, "meaning": "high"},
28 |                 {"value": 4, "meaning": "highest"},
29 |             ],
30 |             "default_meaning": "not_detected",
31 |         },
32 |         "decode_expression": "",
33 |         "encode_expression": "",
34 |         "source_layer": "my_first_dataset__date_conf",
35 |         "calc": "floor(A / 10000).astype(uint8)",
36 |     },
37 | ]
38 | 
39 | sql = "select sum(area__ha) from data where is__umd_regional_primary_forest_2001 != 'false' and umd_tree_cover_density_2000__threshold >= 30 and umd_tree_cover_loss__year >= 2001 group by umd_tree_cover_loss__year"
40 | 


--------------------------------------------------------------------------------
/tests_v2/fixtures/sample_rw_geostore_response.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | from app.models.pydantic.geostore import Geometry, GeostoreCommon
 4 | 
 5 | response_body: Dict = {
 6 |     "data": {
 7 |         "type": "geoStore",
 8 |         "id": "d8907d30eb5ec7e33a68aa31aaf918a4",
 9 |         "attributes": {
10 |             "geojson": {
11 |                 "crs": {},
12 |                 "type": "FeatureCollection",
13 |                 "features": [
14 |                     {
15 |                         "geometry": {
16 |                             "coordinates": [
17 |                                 [
18 |                                     [13.286161423, 2.22263581],
19 |                                     [13.895623684, 2.613460107],
20 |                                     [14.475367069, 2.43969337],
21 |                                     [15.288956165, 1.338479182],
22 |                                     [13.44381094, 0.682623753],
23 |                                     [13.286161423, 2.22263581],
24 |                                 ]
25 |                             ],
26 |                             "type": "Polygon",
27 |                         },
28 |                         "type": "Feature",
29 |                     }
30 |                 ],
31 |             },
32 |             "hash": "d8907d30eb5ec7e33a68aa31aaf918a4",  # pragma: allowlist secret
33 |             "provider": {},
34 |             "areaHa": 2950164.393265342,
35 |             "bbox": [13.286161423, 0.682623753, 15.288956165, 2.613460107],
36 |             "lock": False,
37 |             "info": {"use": {}},
38 |         },
39 |     }
40 | }
41 | 
42 | data: Dict = response_body["data"]["attributes"]
43 | geojson: Dict = data["geojson"]["features"][0]["geometry"]
44 | geometry: Geometry = Geometry.parse_obj(geojson)
45 | geostore_common: GeostoreCommon = GeostoreCommon(
46 |     geostore_id=data["hash"],
47 |     geojson=geometry,
48 |     area__ha=data["areaHa"],
49 |     bbox=data["bbox"],
50 | )
51 | 


--------------------------------------------------------------------------------
/tests_v2/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/authentication/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/authentication/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/crud/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/crud/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/crud/test_assets.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from app.application import ContextEngine
 4 | from app.crud.assets import get_default_asset, update_asset
 5 | from app.crud.datasets import get_dataset
 6 | from app.crud.versions import get_version
 7 | 
 8 | 
 9 | @pytest.mark.asyncio
10 | async def test_update_version__is_downloadable(generic_vector_source_version):
11 |     dataset, version, _ = generic_vector_source_version
12 |     dataset_row = await get_dataset(dataset)
13 |     version_row = await get_version(dataset, version)
14 |     asset_row = await get_default_asset(dataset, version)
15 | 
16 |     # Check if default value is correctly populated
17 |     assert dataset_row.is_downloadable is True
18 |     assert version_row.is_downloadable is True
19 |     assert asset_row.is_downloadable is True
20 | 
21 |     # This should update the downstream versions and assets only
22 |     async with ContextEngine("WRITE"):
23 |         await update_asset(asset_row.asset_id, **{"is_downloadable": False})
24 | 
25 |     dataset_row = await get_dataset(dataset)
26 |     version_row = await get_version(dataset, version)
27 |     asset_row = await get_default_asset(dataset, version)
28 | 
29 |     assert dataset_row.is_downloadable is True
30 |     assert version_row.is_downloadable is True
31 |     assert asset_row.is_downloadable is False
32 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/crud/test_datasets.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from app.application import ContextEngine
 4 | from app.crud.assets import get_default_asset
 5 | from app.crud.datasets import get_dataset, update_dataset
 6 | from app.crud.versions import get_version
 7 | 
 8 | 
 9 | @pytest.mark.asyncio
10 | async def test_update_dataset__is_downloadable(generic_vector_source_version):
11 |     dataset, version, _ = generic_vector_source_version
12 |     dataset_row = await get_dataset(dataset)
13 |     version_row = await get_version(dataset, version)
14 |     asset_row = await get_default_asset(dataset, version)
15 | 
16 |     # Check if default value is correctly populated
17 |     assert dataset_row.is_downloadable is True
18 |     assert version_row.is_downloadable is True
19 |     assert asset_row.is_downloadable is True
20 | 
21 |     # This should update the downstream versions and assets only
22 |     async with ContextEngine("WRITE"):
23 |         await update_dataset(dataset, **{"is_downloadable": False, })
24 | 
25 |     dataset_row = await get_dataset(dataset)
26 |     version_row = await get_version(dataset, version)
27 |     asset_row = await get_default_asset(dataset, version)
28 | 
29 |     assert dataset_row.is_downloadable is False
30 |     assert version_row.is_downloadable is False
31 |     assert asset_row.is_downloadable is False
32 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/crud/test_versions.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from app.application import ContextEngine
 4 | from app.crud.assets import get_default_asset
 5 | from app.crud.datasets import get_dataset
 6 | from app.crud.versions import get_version, update_version
 7 | 
 8 | 
 9 | @pytest.mark.asyncio
10 | async def test_update_version__is_downloadable(generic_vector_source_version):
11 |     dataset, version, _ = generic_vector_source_version
12 |     dataset_row = await get_dataset(dataset)
13 |     version_row = await get_version(dataset, version)
14 |     asset_row = await get_default_asset(dataset, version)
15 | 
16 |     # Check if default value is correctly populated
17 |     assert dataset_row.is_downloadable is True
18 |     assert version_row.is_downloadable is True
19 |     assert asset_row.is_downloadable is True
20 | 
21 |     # This should update the downstream versions and assets only
22 |     async with ContextEngine("WRITE"):
23 |         await update_version(dataset, version, **{"is_downloadable": False})
24 | 
25 |     dataset_row = await get_dataset(dataset)
26 |     version_row = await get_version(dataset, version)
27 |     asset_row = await get_default_asset(dataset, version)
28 | 
29 |     assert dataset_row.is_downloadable is True
30 |     assert version_row.is_downloadable is False
31 |     assert asset_row.is_downloadable is False
32 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/models/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/models/pydantic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/models/pydantic/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/analysis/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/assets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/assets/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/assets/test_assets_with_no_pagination.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from httpx import AsyncClient
 3 | 
 4 | from app.models.pydantic.assets import AssetsResponse
 5 | 
 6 | 
 7 | @pytest.mark.asyncio
 8 | async def test_get_assets_returns_assets_response(async_client: AsyncClient) -> None:
 9 |     resp = await async_client.get("/assets")
10 |     assert AssetsResponse(**resp.json())
11 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/assets/test_assets_with_pagination.py:
--------------------------------------------------------------------------------
 1 | import pytest as pytest
 2 | from httpx import AsyncClient
 3 | 
 4 | from app.models.pydantic.assets import PaginatedAssetsResponse
 5 | 
 6 | 
 7 | @pytest.mark.asyncio
 8 | async def test_adding_page_number_returns_paginated_assets_response(
 9 |     async_client: AsyncClient,
10 | ) -> None:
11 | 
12 |     resp = await async_client.get("/assets", params=[("page[number]", "1")])
13 |     assert PaginatedAssetsResponse(**resp.json())
14 | 
15 | 
16 | @pytest.mark.asyncio
17 | async def test_adding_size_parameter_returns_paginated_assets_response(
18 |     async_client: AsyncClient,
19 | ) -> None:
20 | 
21 |     resp = await async_client.get("/assets", params=[("page[size]", "10")])
22 |     assert PaginatedAssetsResponse(**resp.json())
23 | 
24 | 
25 | @pytest.mark.asyncio
26 | async def test_adding_both_page_and_size_parameter_returns_paginated_assets_response(
27 |     async_client: AsyncClient,
28 | ) -> None:
29 | 
30 |     resp = await async_client.get(
31 |         "/assets", params=[("page[number]", "1"), ("page[size]", "10")]
32 |     )
33 |     assert PaginatedAssetsResponse(**resp.json())
34 | 
35 | 
36 | @pytest.mark.asyncio
37 | async def test_get_paginated_asset_with_pagesize_less_than_1_returns_4xx(
38 |     async_client: AsyncClient,
39 | ) -> None:
40 |     resp = await async_client.get("/assets", params=[("page[size]", "0")])
41 |     assert resp.status_code == 422
42 | 
43 | 
44 | @pytest.mark.asyncio
45 | async def test_get_paginated_asset_with_pagenumber_less_than_1_returns_4xx(
46 |     async_client: AsyncClient,
47 | ) -> None:
48 |     resp = await async_client.get("/assets", params=[("page[number]", "0")])
49 |     assert resp.status_code == 422
50 | 
51 | 
52 | @pytest.mark.asyncio
53 | async def test_get_paginated_asset_with_pagenumber_more_than_max_pages_returns_4xx(
54 |     async_client: AsyncClient,
55 | ) -> None:
56 |     resp = await async_client.get("/assets", params=[("page[number]", "100")])
57 |     assert resp.status_code == 422
58 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/authentication/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/authentication/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/datamart/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/datamart/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/datasets/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/datasets/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/datasets/datasets/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/datasets/datasets/assets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/datasets/datasets/assets/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/datasets/datasets/assets/test_dataset_assets_with_no_pagination.py:
--------------------------------------------------------------------------------
 1 | import pytest as pytest
 2 | from httpx import AsyncClient
 3 | 
 4 | from app.models.pydantic.assets import AssetsResponse
 5 | 
 6 | 
 7 | @pytest.mark.asyncio
 8 | async def test_get_assets_returns_assets_of_a_specific_dataset_and_version_response(
 9 |     async_client: AsyncClient, generic_vector_source_version
10 | ) -> None:
11 |     dataset_name, dataset_version, _ = generic_vector_source_version
12 |     resp = await async_client.get(f"/dataset/{dataset_name}/{dataset_version}/assets")
13 |     assert AssetsResponse(**resp.json())
14 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/datasets/datasets/test_datasets_with_no_pagination.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import pytest as pytest
 4 | from httpx import AsyncClient
 5 | 
 6 | from app.models.pydantic.datasets import DatasetsResponse
 7 | 
 8 | 
 9 | @pytest.mark.asyncio
10 | async def test_get_datasets_returns_datasets_response(
11 |     async_client: AsyncClient, generic_dataset: Tuple[str, str]
12 | ) -> None:
13 | 
14 |     resp = await async_client.get("/datasets")
15 |     assert DatasetsResponse(**resp.json())
16 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/datasets/datasets/test_datasets_with_pagination.py:
--------------------------------------------------------------------------------
 1 | import pytest as pytest
 2 | from httpx import AsyncClient
 3 | 
 4 | from app.models.pydantic.datasets import PaginatedDatasetsResponse
 5 | 
 6 | 
 7 | @pytest.mark.asyncio
 8 | async def test_adding_page_number_returns_paginated_datasets_response(
 9 |     async_client: AsyncClient,
10 | ) -> None:
11 | 
12 |     resp = await async_client.get("/datasets", params=[("page[number]", "1")])
13 |     assert PaginatedDatasetsResponse(**resp.json())
14 | 
15 | 
16 | @pytest.mark.asyncio
17 | async def test_adding_size_parameter_returns_paginated_datasets_response(
18 |     async_client: AsyncClient,
19 | ) -> None:
20 | 
21 |     resp = await async_client.get("/datasets", params=[("page[size]", "10")])
22 |     assert PaginatedDatasetsResponse(**resp.json())
23 | 
24 | 
25 | @pytest.mark.asyncio
26 | async def test_adding_both_page_and_size_parameter_returns_paginated_datasets_response(
27 |     async_client: AsyncClient,
28 | ) -> None:
29 | 
30 |     resp = await async_client.get(
31 |         "/datasets", params=[("page[number]", "1"), ("page[size]", "10")]
32 |     )
33 |     assert PaginatedDatasetsResponse(**resp.json())
34 | 
35 | 
36 | @pytest.mark.asyncio
37 | async def test_get_paginated_dataset_with_pagesize_less_than_1_returns_4xx(
38 |     async_client: AsyncClient,
39 | ) -> None:
40 |     resp = await async_client.get("/datasets", params=[("page[size]", "0")])
41 |     assert resp.status_code == 422
42 | 
43 | 
44 | @pytest.mark.asyncio
45 | async def test_get_paginated_dataset_with_pagenumber_less_than_1_returns_4xx(
46 |     async_client: AsyncClient,
47 | ) -> None:
48 |     resp = await async_client.get("/datasets", params=[("page[number]", "0")])
49 |     assert resp.status_code == 422
50 | 
51 | 
52 | @pytest.mark.asyncio
53 | async def test_get_paginated_dataset_with_pagenumber_more_than_max_pages_returns_4xx(
54 |     async_client: AsyncClient,
55 | ) -> None:
56 |     resp = await async_client.get("/datasets", params=[("page[number]", "100")])
57 |     assert resp.status_code == 422
58 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/geostore/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/geostore/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/health/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/health/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/health/test_health.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from httpx import AsyncClient
 3 | 
 4 | from tests_v2.unit.app.routes.utils import assert_jsend
 5 | 
 6 | 
 7 | @pytest.mark.asyncio
 8 | async def test_ping(async_client: AsyncClient):
 9 |     response = await async_client.get("/ping")
10 | 
11 |     assert_jsend(response.json())
12 |     assert response.status_code == 200
13 |     assert response.json()["data"] == "pong"
14 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/jobs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/jobs/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/political/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/political/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/political/id_lookup/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/political/id_lookup/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/tasks/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/tasks/test_asset_tasks_with_no_pagination.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from httpx import AsyncClient
 3 | 
 4 | from app.models.pydantic.tasks import TasksResponse
 5 | 
 6 | 
 7 | @pytest.mark.asyncio
 8 | async def test_get_asset_tasks_returns_tasks_response(
 9 |     async_client: AsyncClient, generic_vector_source_version
10 | ) -> None:
11 | 
12 |     dataset_name, dataset_version, _ = generic_vector_source_version
13 |     version_resp = await async_client.get(
14 |         f"/dataset/{dataset_name}/{dataset_version}/assets"
15 |     )
16 |     asset_id = version_resp.json()["data"][0]["asset_id"]
17 |     resp = await async_client.get(f"/asset/{asset_id}/tasks")
18 | 
19 |     assert TasksResponse(**resp.json())
20 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/routes/utils.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import Dict
 3 | 
 4 | 
 5 | def assert_jsend(resp_obj: Dict):
 6 |     assert resp_obj.get("status") in ("success", "error", "failed")
 7 |     if resp_obj.get("status") == "success":
 8 |         assert resp_obj.get("data") is not None
 9 |     else:
10 |         assert resp_obj.get("message") is not None
11 | 
12 | 
13 | def assert_is_datetime(value: str):
14 |     datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f")
15 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/tasks/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/tasks/datamart/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/tasks/datamart/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/tasks/raster_tile_cache_assets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/tasks/raster_tile_cache_assets/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/tasks/raster_tile_cache_assets/collaborators/__init__.py:
--------------------------------------------------------------------------------
1 | MODULE_PATH_UNDER_TEST = "app.tasks.raster_tile_cache_assets.raster_tile_cache_assets"
2 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/tasks/raster_tile_cache_assets/collaborators/test_crud_collaboration.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch
 2 | 
 3 | import pytest
 4 | 
 5 | from app.tasks.raster_tile_cache_assets import raster_tile_cache_asset
 6 | 
 7 | from . import MODULE_PATH_UNDER_TEST
 8 | 
 9 | 
10 | @patch(f"{MODULE_PATH_UNDER_TEST}.execute", autospec=True)
11 | @patch(f"{MODULE_PATH_UNDER_TEST}.symbology_constructor", autospec=True)
12 | @patch(f"{MODULE_PATH_UNDER_TEST}.reproject_to_web_mercator", autospec=True)
13 | @patch(f"{MODULE_PATH_UNDER_TEST}.get_asset", autospec=True)
14 | class TestCrudCollaboration:
15 |     @pytest.mark.asyncio
16 |     async def test_source_asset_is_retrieved_by_uuid(
17 |         self,
18 |         get_asset_mock,
19 |         web_mercator_dummy,
20 |         symbology_constructor_dummy,
21 |         execute_dummy,
22 |         tile_cache_asset_uuid,
23 |         creation_options_dict,
24 |         source_asset,
25 |         reprojection,
26 |         symbology_info,
27 |         change_log,
28 |     ):
29 |         get_asset_mock.return_value = source_asset
30 |         symbology_constructor_dummy.__getitem__.return_value = symbology_info
31 |         web_mercator_dummy.return_value = reprojection
32 |         execute_dummy.return_value = change_log
33 | 
34 |         await raster_tile_cache_asset(
35 |             "test_dataset", "2022", tile_cache_asset_uuid, creation_options_dict
36 |         )
37 | 
38 |         get_asset_mock.assert_called_with(
39 |             creation_options_dict["creation_options"]["source_asset_id"]
40 |         )
41 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/tasks/raster_tile_cache_assets/collaborators/test_raster_tile_cache_assets_happy_path.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch
 2 | 
 3 | import pytest
 4 | 
 5 | from app.models.enum.change_log import ChangeLogStatus
 6 | from app.tasks.raster_tile_cache_assets import raster_tile_cache_asset
 7 | 
 8 | from . import MODULE_PATH_UNDER_TEST
 9 | 
10 | 
11 | @patch(f"{MODULE_PATH_UNDER_TEST}.execute", autospec=True)
12 | @patch(f"{MODULE_PATH_UNDER_TEST}.symbology_constructor", autospec=True)
13 | @patch(f"{MODULE_PATH_UNDER_TEST}.reproject_to_web_mercator", autospec=True)
14 | @patch(f"{MODULE_PATH_UNDER_TEST}.get_asset", autospec=True)
15 | @pytest.mark.asyncio
16 | async def test_exploratory_test_runs_without_error(
17 |     get_asset_dummy,
18 |     web_mercator_dummy,
19 |     symbology_constructor_dummy,
20 |     execute_dummy,
21 |     tile_cache_asset_uuid,
22 |     creation_options_dict,
23 |     source_asset,
24 |     reprojection,
25 |     symbology_info,
26 |     change_log,
27 | ):
28 |     """Goal of this test is to determine the minimum amount of patching we need
29 |     to do to get the function to run as much side-effect free code as
30 |     possible."""
31 |     get_asset_dummy.return_value = source_asset
32 |     symbology_constructor_dummy.__getitem__.return_value = symbology_info
33 |     web_mercator_dummy.return_value = reprojection
34 |     execute_dummy.return_value = change_log
35 | 
36 |     result = await raster_tile_cache_asset(
37 |         "test_dataset", "2022", tile_cache_asset_uuid, creation_options_dict
38 |     )
39 | 
40 |     assert result.status == ChangeLogStatus.success
41 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/tasks/test_batch.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | from unittest.mock import MagicMock, patch
 3 | 
 4 | from fastapi.logger import logger
 5 | 
 6 | from app.tasks.batch import submit_batch_job
 7 | from app.tasks.vector_source_assets import _create_add_gfw_fields_job
 8 | from tests_v2.conftest import mock_callback
 9 | 
10 | TEST_JOB_ENV: List[Dict[str, str]] = [{"name": "PASSWORD", "value": "DON'T LOG ME"}]
11 | 
12 | 
13 | @patch("app.utils.aws.boto3.client")
14 | @patch.object(logger, "info")  # Patch the logger.info directly
15 | @patch("app.tasks.batch.UUID")  # Patch the UUID class
16 | async def test_submit_batch_job(mock_uuid, mock_logging_info, mock_boto3_client):
17 |     mock_client = MagicMock()
18 |     mock_boto3_client.return_value = mock_client
19 | 
20 |     attempt_duration_seconds: int = 100
21 | 
22 |     job = await _create_add_gfw_fields_job(
23 |         "some_dataset",
24 |         "v1",
25 |         parents=list(),
26 |         job_env=TEST_JOB_ENV,
27 |         callback=mock_callback,
28 |         attempt_duration_seconds=attempt_duration_seconds,
29 |     )
30 | 
31 |     # Call the function you want to test
32 |     submit_batch_job(job)
33 | 
34 |     mock_boto3_client.assert_called_once_with(
35 |         "batch", region_name="us-east-1", endpoint_url=None
36 |     )
37 | 
38 |     # Assert that the logger.info was called with the expected log message
39 |     assert "add_gfw_fields" in mock_logging_info.call_args.args[0]
40 |     assert "DON'T LOG ME" not in mock_logging_info.call_args.args[0]
41 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/utils/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/utils/paginate/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/utils/paginate/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/app/utils/paginate/test_offset_calculation.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import AsyncMock
 2 | 
 3 | import pytest
 4 | 
 5 | from app.utils.paginate import paginate_collection
 6 | 
 7 | DONT_CARE: int = 1
 8 | 
 9 | 
10 | @pytest.mark.asyncio
11 | async def test_offset_is_0_for_page_1_when_size_is_given():
12 |     spy_get_collection = AsyncMock()
13 |     dummy_count_collection = AsyncMock(return_value=DONT_CARE)
14 | 
15 |     await paginate_collection(
16 |         paged_items_fn=spy_get_collection,
17 |         item_count_fn=dummy_count_collection,
18 |         size=10,
19 |         page=1,
20 |     )
21 | 
22 |     spy_get_collection.assert_called_with(10, 0)
23 | 
24 | 
25 | @pytest.mark.asyncio
26 | async def test_offset_is_0_when_no_page_is_given():
27 |     spy_get_collection = AsyncMock()
28 |     dummy_count_collection = AsyncMock(return_value=DONT_CARE)
29 | 
30 |     await paginate_collection(
31 |         paged_items_fn=spy_get_collection, item_count_fn=dummy_count_collection, size=10
32 |     )
33 | 
34 |     spy_get_collection.assert_called_with(10, 0)
35 | 
36 | 
37 | @pytest.mark.asyncio
38 | async def test_offset_is_10_for_page_2_when_page_size_is_10():
39 |     spy_get_collection = AsyncMock()
40 |     stub_count_collection = AsyncMock(return_value=15)
41 | 
42 |     await paginate_collection(
43 |         paged_items_fn=spy_get_collection,
44 |         item_count_fn=stub_count_collection,
45 |         size=10,
46 |         page=2,
47 |     )
48 | 
49 |     spy_get_collection.assert_called_with(10, 10)
50 | 


--------------------------------------------------------------------------------
/tests_v2/unit/app/utils/test_google.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from unittest.mock import Mock
 3 | 
 4 | from _pytest.monkeypatch import MonkeyPatch
 5 | 
 6 | from app.utils import google
 7 | from app.utils.google import get_gs_files
 8 | 
 9 | 
10 | good_bucket: str = "good_bucket"
11 | good_prefix: str = "good_prefix"
12 | all_the_files: List[str] = [
13 |     f"{good_prefix}/irrelevant.mp3",
14 |     f"{good_prefix}/something.csv",
15 |     f"{good_prefix}/world.tif"
16 | ]
17 | 
18 | all_the_files_gdal_notation: List[str] = [
19 |     f"/vsigs/{good_bucket}/{x}" for x in all_the_files
20 | ]
21 | 
22 | 
23 | def test_get_matching_gs_files_no_filtering(monkeypatch: MonkeyPatch):
24 |     mock_get_prefix_objects = Mock(return_value=all_the_files)
25 |     monkeypatch.setattr(google, "get_prefix_objects", mock_get_prefix_objects)
26 | 
27 |     keys = get_gs_files(good_bucket, good_prefix)
28 |     assert len(keys) == 3
29 |     assert set(keys) == set(all_the_files_gdal_notation)
30 | 
31 | 
32 | def test_get_matching_gs_files_match_extensions(monkeypatch: MonkeyPatch):
33 |     mock_get_prefix_objects = Mock(return_value=all_the_files)
34 |     monkeypatch.setattr(google, "get_prefix_objects", mock_get_prefix_objects)
35 | 
36 |     keys = get_gs_files(good_bucket, good_prefix, extensions=[".tif"])
37 |     assert keys == [f"/vsigs/{good_bucket}/{good_prefix}/world.tif"]
38 | 
39 | 
40 | def test_get_matching_gs_files_no_matches(monkeypatch: MonkeyPatch):
41 |     mock_get_prefix_objects = Mock(return_value=all_the_files)
42 |     monkeypatch.setattr(google, "get_prefix_objects", mock_get_prefix_objects)
43 | 
44 |     keys = get_gs_files(good_bucket, good_prefix, extensions=[".pdf"])
45 |     assert keys == []
46 | 
47 | 
48 | def test_get_matching_gs_files_early_exit(monkeypatch: MonkeyPatch):
49 |     mock_get_prefix_objects = Mock(return_value=all_the_files)
50 |     monkeypatch.setattr(google, "get_prefix_objects", mock_get_prefix_objects)
51 | 
52 |     keys = get_gs_files(good_bucket, good_prefix, exit_after_max=1)
53 |     assert len(keys) == 1
54 |     assert keys[0] in all_the_files_gdal_notation
55 | 


--------------------------------------------------------------------------------
/tests_v2/unit/batch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/batch/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/batch/python/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/batch/python/__init__.py


--------------------------------------------------------------------------------
/tests_v2/unit/batch/python/test_adjust_num_processes.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from batch.python.adjust_num_processes import calc_num_processes
 4 | from tests_v2.utils import BatchJobMock
 5 | 
 6 | job_descriptions = [
 7 |     {
 8 |         "jobId": "8e76ecf5-99a0-43a1-9b97-8e6616b90983",
 9 |         "attempts": [
10 |             {"container": {"exitCode": 137}},
11 |             {"container": {"exitCode": 1}},
12 |             {"container": {"exitCode": 137}},
13 |         ],
14 |     }
15 | ]
16 | 
17 | 
18 | @pytest.mark.parametrize("orig_num_processes,expected", [(96, 24), (5, 1), (0, 1)])
19 | def test_calc_num_processes(orig_num_processes, expected):
20 |     job_id: str = "8e76ecf5-99a0-43a1-9b97-8e6616b90983"
21 |     batch_client = BatchJobMock(job_desc=job_descriptions)
22 | 
23 |     new_cores_val = calc_num_processes(job_id, orig_num_processes, batch_client)
24 |     assert new_cores_val == expected
25 | 


--------------------------------------------------------------------------------
/wait_for_postgres.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # wait-for-postgres.sh
 3 | 
 4 | set -e
 5 | 
 6 | until PGPASSWORD=$DB_PASSWORD psql -h "$DB_HOST" -U "$DB_USER" -d "$DATABASE" -c '\q'; do
 7 |   >&2 echo "Postgres is unavailable - sleeping"
 8 |   sleep 1
 9 | done
10 | 
11 | >&2 echo "Postgres is up - executing command"
12 | exec "$@"


--------------------------------------------------------------------------------