├── .dist.env ├── .dockerignore ├── .flake8 ├── .github └── workflows │ ├── terraform_build.yaml │ ├── terraform_destroy_on_delete.yaml │ └── terraform_plan.yaml ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── .secrets.baseline ├── Dockerfile ├── README.md ├── alembic.ini ├── app ├── __init__.py ├── application.py ├── authentication │ ├── __init__.py │ ├── api_keys.py │ └── token.py ├── crud │ ├── __init__.py │ ├── api_keys.py │ ├── assets.py │ ├── datamart.py │ ├── datasets.py │ ├── geostore.py │ ├── metadata.py │ ├── tasks.py │ └── versions.py ├── errors.py ├── main.py ├── middleware.py ├── models │ ├── __init__.py │ ├── enum │ │ ├── analysis.py │ │ ├── assets.py │ │ ├── change_log.py │ │ ├── creation_options.py │ │ ├── entity.py │ │ ├── geostore.py │ │ ├── pg_admin_functions.py │ │ ├── pg_sys_functions.py │ │ ├── pg_types.py │ │ ├── pixetl.py │ │ ├── queries.py │ │ ├── sources.py │ │ └── versions.py │ ├── orm │ │ ├── __init__.py │ │ ├── api_keys.py │ │ ├── asset_metadata.py │ │ ├── assets.py │ │ ├── base.py │ │ ├── datamart.py │ │ ├── dataset_metadata.py │ │ ├── datasets.py │ │ ├── geostore.py │ │ ├── migrations │ │ │ ├── README │ │ │ ├── draft_versions │ │ │ │ └── 04fcb4f2408a_add_metadata_table.py │ │ │ ├── env.py │ │ │ ├── script.py.mako │ │ │ └── versions │ │ │ │ ├── 04fcb4f2408a_add_metadata_table.py │ │ │ │ ├── 167eebbf29e4_.py │ │ │ │ ├── 3e524ef0525f_.py │ │ │ │ ├── 4763f4b8141a_.py │ │ │ │ ├── 604bf4e66c2b_.py │ │ │ │ ├── 73fb3f5e39b8_.py │ │ │ │ ├── 86ae41de358d_.py │ │ │ │ ├── a5787f2eefe5_.py │ │ │ │ ├── aa5aefcbdfcf_.py │ │ │ │ ├── d62a9b15f844_.py │ │ │ │ ├── d767b6dd2c4c_.py │ │ │ │ ├── d8f049f00259_add_analysis_results_table.py │ │ │ │ └── ef3392e8e054_.py │ │ ├── mixins.py │ │ ├── queries │ │ │ ├── __init__.py │ │ │ ├── datasets.py │ │ │ ├── fields.py │ │ │ └── raster_assets.py │ │ ├── tasks.py │ │ ├── user_areas.py │ │ ├── version_metadata.py │ │ └── versions.py │ └── pydantic │ │ ├── __init__.py │ │ ├── analysis.py │ │ ├── asset_metadata.py │ │ ├── assets.py │ │ ├── authentication.py │ │ ├── base.py │ │ ├── change_log.py │ │ ├── creation_options.py │ │ ├── database.py │ │ ├── datamart.py │ │ ├── datasets.py │ │ ├── downloads.py │ │ ├── extent.py │ │ ├── features.py │ │ ├── geostore.py │ │ ├── jobs.py │ │ ├── metadata.py │ │ ├── political.py │ │ ├── query.py │ │ ├── raster_analysis.py │ │ ├── responses.py │ │ ├── sources.py │ │ ├── statistics.py │ │ ├── symbology.py │ │ ├── tasks.py │ │ ├── user_job.py │ │ └── versions.py ├── responses.py ├── routes │ ├── __init__.py │ ├── analysis │ │ ├── __init__.py │ │ └── analysis.py │ ├── assets │ │ ├── __init__.py │ │ ├── asset.py │ │ └── assets.py │ ├── authentication │ │ ├── __init__.py │ │ └── authentication.py │ ├── datamart │ │ ├── __init__.py │ │ └── land.py │ ├── datasets │ │ ├── __init__.py │ │ ├── asset.py │ │ ├── dataset.py │ │ ├── datasets.py │ │ ├── downloads.py │ │ ├── features.py │ │ ├── geostore.py │ │ ├── queries.py │ │ └── versions.py │ ├── geostore │ │ ├── __init__.py │ │ └── geostore.py │ ├── health.py │ ├── jobs │ │ ├── __init__.py │ │ └── job.py │ ├── political │ │ ├── __init__.py │ │ └── id_lookup.py │ └── tasks │ │ ├── __init__.py │ │ └── task.py ├── settings │ ├── __init__.py │ ├── globals.py │ ├── gunicorn_conf.py │ ├── prestart.sh │ └── start.sh ├── static │ └── gfw-data-api.png ├── tasks │ ├── __init__.py │ ├── assets.py │ ├── aws_tasks.py │ ├── batch.py │ ├── cog_assets.py │ ├── datamart │ │ └── land.py │ ├── default_assets.py │ ├── delete_assets.py │ ├── dynamic_vector_tile_cache_assets.py │ ├── raster_tile_cache_assets │ │ ├── __init__.py │ │ ├── raster_tile_cache_assets.py │ │ ├── symbology.py │ │ └── utils.py │ ├── raster_tile_set_assets │ │ ├── __init__.py │ │ ├── raster_tile_set_assets.py │ │ └── utils.py │ ├── static_vector_1x1_assets.py │ ├── static_vector_file_assets.py │ ├── static_vector_tile_cache_assets.py │ ├── table_source_assets.py │ ├── utils.py │ └── vector_source_assets.py └── utils │ ├── __init__.py │ ├── aws.py │ ├── decorators.py │ ├── fields.py │ ├── gadm.py │ ├── generators.py │ ├── geostore.py │ ├── google.py │ ├── paginate.py │ ├── path.py │ ├── rw_api.py │ ├── stats.py │ └── tile_cache.py ├── batch ├── .dockerignore ├── __init__.py ├── pixetl.dockerfile ├── python │ ├── 16bpp_gdal2tiles.py │ ├── 8bpp_gdal2tiles.py │ ├── __init__.py │ ├── adjust_num_processes.py │ ├── apply_colormap.py │ ├── aws_utils.py │ ├── check_csv.py │ ├── check_raster.py │ ├── check_vector.py │ ├── cluster_partitions.py │ ├── create_partitions.py │ ├── errors.py │ ├── export_1x1_grid.py │ ├── export_to_gee.py │ ├── extract_geometries.py │ ├── gdal_utils.py │ ├── logger.py │ ├── logging_utils.py │ ├── raster_tile_cache.py │ ├── resample.py │ └── tiles_geojson.py ├── scripts │ ├── _add_gfw_fields_sql.sh │ ├── _add_point_geometry_fields_sql.sh │ ├── _fill_gfw_fields_sql.sh │ ├── _fill_point_geometry_fields_sql.sh │ ├── _get_geometry_type_sql.sh │ ├── _tiff_crosses_dateline.sh │ ├── _warp_and_upload.sh │ ├── add_gfw_fields.sh │ ├── add_point_geometry_fields.sh │ ├── apply_colormap.sh │ ├── clip_and_reproject_geom.sh │ ├── cluster_partitions.sh │ ├── cluster_table.sh │ ├── cogify.sh │ ├── create_index.sh │ ├── create_partitions.sh │ ├── create_tabular_schema.sh │ ├── create_vector_schema.sh │ ├── create_vector_tile_cache.sh │ ├── export_1x1_grid.sh │ ├── export_vector_data.sh │ ├── get_arguments.sh │ ├── inherit_geostore.sh │ ├── load_tabular_data.sh │ ├── load_vector_csv_data.sh │ ├── load_vector_data.sh │ ├── raster_tile_cache.sh │ ├── report_status.sh │ ├── resample.sh │ ├── run_pixetl.sh │ ├── test_mock_s3_awscli.sh │ ├── test_mock_s3_ogr2ogr.sh │ ├── tmp │ │ ├── create_1x1_grid.sh │ │ ├── export_vector_data.sh │ │ ├── import_vector_data.sh │ │ └── post_process_vector_data.sh │ ├── unify_projection.sh │ └── update_point_geometry.sh └── universal_batch.dockerfile ├── docker-compose.dev.yml ├── docker-compose.prod.yml ├── docker-compose.test.yml ├── newrelic.ini ├── pyproject.toml ├── scripts ├── delete_workspace ├── develop ├── infra ├── migrate ├── migration_dry_run ├── setup ├── terraform ├── test └── test_v2 ├── terraform.md ├── terraform ├── api_gateway │ ├── api_key_authorizer_lambda.py │ └── api_key_authorizer_lambda.zip ├── cloudfront.tf ├── data.tf ├── docker │ └── docker-compose.yml ├── generate_port.py ├── iam.tf ├── logging.tf ├── main.tf ├── modules │ ├── api_gateway │ │ ├── endpoint │ │ │ ├── main.tf │ │ │ ├── outputs.tf │ │ │ └── variables.tf │ │ ├── gateway │ │ │ ├── main.tf │ │ │ ├── outputs.tf │ │ │ └── variables.tf │ │ └── resource │ │ │ ├── main.tf │ │ │ ├── outputs.tf │ │ │ └── variables.tf │ └── batch │ │ ├── main.tf │ │ ├── outputs.tf │ │ └── variables.tf ├── outputs.tf ├── scripts │ └── hash.sh ├── templates │ ├── api_gateway_policy.json.tmpl │ ├── cloudwatch_log_policy.json.tmpl │ ├── container_definition.json.tmpl │ ├── container_properties.json.tmpl │ ├── iam_assume_role.json.tmpl │ ├── iam_s3_read_only.json │ ├── iam_trust_entity.json.tmpl │ ├── lambda_invoke_policy.json.tmpl │ ├── query_batch_policy.json.tmpl │ ├── role-trust-policy.json.tmpl │ ├── run_batch_policy.json.tmpl │ ├── step_function_policy.json.tmpl │ └── tile_cache_bucket_policy.json.tmpl ├── variables.tf ├── vars │ ├── backend-dev.tfvars │ ├── backend-production.tfvars │ ├── backend-staging.tfvars │ ├── terraform-dev.tfvars │ ├── terraform-production.tfvars │ └── terraform-staging.tfvars └── versions.tf ├── tests ├── __init__.py ├── conftest.py ├── crud │ ├── __init__.py │ ├── test_assets.py │ ├── test_datasets.py │ └── test_versions.py ├── fixtures │ ├── append_test.tsv │ ├── aws │ │ └── config │ ├── test.csv │ ├── test.geojson │ ├── test.gpkg.zip │ ├── test.shp.zip │ ├── test.tsv │ ├── test2.csv │ └── test2.geojson ├── models │ └── test_jobs.py ├── routes │ ├── __init__.py │ ├── datasets │ │ ├── __init__.py │ │ ├── test_assets.py │ │ ├── test_datasets.py │ │ └── test_versions.py │ ├── test_analysis.py │ ├── test_authorization.py │ ├── test_features.py │ ├── test_geostore.py │ └── test_tasks.py ├── tasks │ ├── __init__.py │ ├── test_aws_tasks.py │ ├── test_batch.py │ ├── test_batch_scheduler.py │ ├── test_default_assets.py │ ├── test_delete_assets.py │ ├── test_table_source_assets.py │ ├── test_vector_source_assets.py │ └── test_vector_tile_assets.py ├── utils.py └── utils │ └── test_path.py ├── tests_v2 ├── __init__.py ├── conftest.py ├── fixtures │ ├── __init__.py │ ├── authentication │ │ ├── __init__.py │ │ └── api_keys.py │ ├── creation_options │ │ ├── __init__.py │ │ └── versions.py │ ├── geojson │ │ ├── test.geojson │ │ ├── test_bad.geojson │ │ └── test_huge.geojson │ ├── metadata │ │ ├── dataset.py │ │ └── version.py │ ├── otf_payload │ │ └── otf_payload.py │ └── sample_rw_geostore_response.py ├── unit │ ├── __init__.py │ ├── app │ │ ├── __init__.py │ │ ├── authentication │ │ │ ├── __init__.py │ │ │ └── test_api_keys.py │ │ ├── crud │ │ │ ├── __init__.py │ │ │ ├── test_api_keys.py │ │ │ ├── test_assets.py │ │ │ ├── test_datasets.py │ │ │ ├── test_geostore.py │ │ │ └── test_versions.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ └── pydantic │ │ │ │ ├── __init__.py │ │ │ │ ├── test_authentication.py │ │ │ │ └── test_datamart.py │ │ ├── routes │ │ │ ├── __init__.py │ │ │ ├── analysis │ │ │ │ ├── __init__.py │ │ │ │ └── test_analysis.py │ │ │ ├── assets │ │ │ │ ├── __init__.py │ │ │ │ ├── test_assets_with_no_pagination.py │ │ │ │ └── test_assets_with_pagination.py │ │ │ ├── authentication │ │ │ │ ├── __init__.py │ │ │ │ └── test_api_keys.py │ │ │ ├── datamart │ │ │ │ ├── __init__.py │ │ │ │ └── test_land.py │ │ │ ├── datasets │ │ │ │ ├── __init__.py │ │ │ │ ├── datasets │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── assets │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── test_dataset_assets_with_no_pagination.py │ │ │ │ │ │ └── test_dataset_assets_with_pagination.py │ │ │ │ │ ├── test_datasets_with_no_pagination.py │ │ │ │ │ └── test_datasets_with_pagination.py │ │ │ │ ├── test_asset_metadata.py │ │ │ │ ├── test_dataset.py │ │ │ │ ├── test_download.py │ │ │ │ ├── test_query.py │ │ │ │ └── test_version.py │ │ │ ├── geostore │ │ │ │ ├── __init__.py │ │ │ │ └── test_geostore.py │ │ │ ├── health │ │ │ │ ├── __init__.py │ │ │ │ └── test_health.py │ │ │ ├── jobs │ │ │ │ ├── __init__.py │ │ │ │ └── test_job.py │ │ │ ├── political │ │ │ │ ├── __init__.py │ │ │ │ └── id_lookup │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── test_id_lookup.py │ │ │ ├── tasks │ │ │ │ ├── __init__.py │ │ │ │ ├── test_asset_tasks_with_no_pagination.py │ │ │ │ └── test_asset_tasks_with_pagination.py │ │ │ └── utils.py │ │ ├── tasks │ │ │ ├── __init__.py │ │ │ ├── datamart │ │ │ │ ├── __init__.py │ │ │ │ └── test_tree_cover_loss_by_driver.py │ │ │ ├── raster_tile_cache_assets │ │ │ │ ├── __init__.py │ │ │ │ ├── collaborators │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── conftest.py │ │ │ │ │ ├── test_building_raster_tile_set_source_creation_options.py │ │ │ │ │ ├── test_create_tile_cache_collaboration.py │ │ │ │ │ ├── test_crud_collaboration.py │ │ │ │ │ ├── test_raster_tile_cache_assets_happy_path.py │ │ │ │ │ ├── test_symbology_function_collaboration.py │ │ │ │ │ ├── test_task_execution_collaboration.py │ │ │ │ │ └── test_web_mercator_reprojection_collaboration.py │ │ │ │ └── test_symbology.py │ │ │ ├── test_batch.py │ │ │ └── test_vector_source_assets.py │ │ ├── test_middleware.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── paginate │ │ │ ├── __init__.py │ │ │ ├── test_offset_calculation.py │ │ │ ├── test_pagination_links_info.py │ │ │ └── test_pagination_meta_info.py │ │ │ ├── test_aws.py │ │ │ ├── test_fields.py │ │ │ ├── test_gadm.py │ │ │ ├── test_geostore.py │ │ │ ├── test_google.py │ │ │ └── test_rw_api.py │ └── batch │ │ ├── __init__.py │ │ └── python │ │ ├── __init__.py │ │ ├── test_adjust_num_processes.py │ │ └── test_resample.py └── utils.py ├── uv.lock └── wait_for_postgres.sh /.dist.env: -------------------------------------------------------------------------------- 1 | 2 | ### Application Variables 3 | 4 | # TO ADD APPLICATION GLOBALS: 5 | # 1. Duplicate .dist.env, rename to .env 6 | # 2. Add variable to the list below. 7 | # 3. Add variable to app/settings/globals.py, define defaults, etc. 8 | 9 | DATABASE=fill 10 | DB_USER=fill # Optional, remove if unnecessary 11 | DB_PASSWORD=fill # Optional, remove if unnecessary 12 | DB_HOST=fill # Optional, remove if unnecessary 13 | DB_PORT=fill # Optional, remove if unnecessary 14 | 15 | REDIS_IP=fill 16 | REDIS_PORT=fill 17 | 18 | # NOTE: Separate function references by comma. Example: 19 | # ARQ_BACKGROUND_FUNCTIONS=app.path.example_function, app.tasks.example.other_function 20 | ARQ_BACKGROUND_FUNCTIONS=app.tasks.messaging.send_message 21 | 22 | ### Docker Runtime Variables 23 | # All available options at https://github.com/tiangolo/uvicorn-gunicorn-docker 24 | # Includes custom gunicorn, concurrency, workers and logging settings 25 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # IDE Fragments 2 | /.vscode 3 | *__pycache__* 4 | *.idea* 5 | 6 | # MyPy 7 | .mypy_cache/* 8 | 9 | docker-compose.dev.yml 10 | docker-compose.prod.yml 11 | docker-compose.test.yml 12 | 13 | # GIT 14 | .git 15 | 16 | # Ignore Files 17 | .gitignore 18 | 19 | # Mac stuff 20 | *.DS_Store 21 | 22 | # Test stuff 23 | tests/cobertura.xml 24 | tests_v2/cobertura.xml 25 | 26 | # Terraform stuff 27 | *terraform* 28 | 29 | # Virtual Environments 30 | .venv* -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | max-complexity = 18 4 | select = B,C,E,F,W,T4,B9 5 | ignore = E203, E266, E501, W503, F403, E402, W605 -------------------------------------------------------------------------------- /.github/workflows/terraform_destroy_on_delete.yaml: -------------------------------------------------------------------------------- 1 | name: Destroy state and delete workspace after deleting feature branch 2 | 3 | on: [delete] 4 | 5 | jobs: 6 | build: 7 | if: github.event.ref_type == 'branch' && (github.event.ref != 'refs/heads/master') && (github.event.ref != 'refs/heads/develop') 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v1 11 | - name: Destroy state and delete workspace 12 | env: 13 | ENV: dev 14 | AWS_ACCESS_KEY_ID: ${{ secrets.aws_key_dev }} 15 | AWS_SECRET_ACCESS_KEY: ${{ secrets.aws_secret_dev }} 16 | AWS_REGION: ${{ secrets.aws_region_dev }} 17 | run: ./scripts/delete_workspace -w ${{ github.event.ref }} -g "no_sha_available" 18 | -------------------------------------------------------------------------------- /.github/workflows/terraform_plan.yaml: -------------------------------------------------------------------------------- 1 | name: Plan terraform changes for base branch 2 | 3 | on: [pull_request] 4 | 5 | jobs: 6 | plan: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v1 11 | - name: Plan production 12 | if: success() && github.base_ref == 'master' 13 | env: 14 | ENV: production 15 | AWS_ACCESS_KEY_ID: ${{ secrets.aws_key_production }} 16 | AWS_SECRET_ACCESS_KEY: ${{ secrets.aws_secret_production }} 17 | AWS_REGION: ${{ secrets.aws_region_production }} 18 | run: ./scripts/infra plan -w ${{ github.base_ref }} 19 | 20 | - name: Plan staging 21 | if: success() && github.base_ref == 'develop' 22 | env: 23 | ENV: staging 24 | AWS_ACCESS_KEY_ID: ${{ secrets.aws_key_staging }} 25 | AWS_SECRET_ACCESS_KEY: ${{ secrets.aws_secret_staging }} 26 | AWS_REGION: ${{ secrets.aws_region_staging }} 27 | run: ./scripts/infra plan -w ${{ github.base_ref }} 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE Fragments 2 | /.vscode 3 | *__pycache__* 4 | *.idea* 5 | 6 | # MyPy 7 | .mypy_cache 8 | 9 | # GIT 10 | .git 11 | 12 | # Environment Files 13 | /.env 14 | .python-version 15 | 16 | # Mac stuff 17 | *.DS_Store 18 | 19 | # Test stuff 20 | tests/cobertura.xml 21 | tests_v2/cobertura.xml 22 | 23 | # Terraform stuff 24 | **/.terraform/* 25 | 26 | # .tfstate files 27 | *.tfstate 28 | *.tfstate.* 29 | 30 | # .tfplan files 31 | *.tfplan 32 | 33 | # Virtual Environments 34 | .venv* -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | line_length = 88 3 | multi_line_output = 3 4 | include_trailing_comma = True 5 | known_third_party = _pytest,aenum,affine,alembic,asgi_lifespan,async_lru,asyncpg,aws_utils,boto3,botocore,click,docker,ee,errors,fastapi,fiona,gdal_utils,geoalchemy2,geojson,gfw_pixetl,gino,gino_starlette,google,httpx,httpx_auth,jsonschema,logger,logging_utils,moto,numpy,orjson,osgeo,pandas,pendulum,pglast,psutil,psycopg2,pydantic,pyproj,pytest,pytest_asyncio,pytest_unordered,rasterio,shapely,sqlalchemy,sqlalchemy_utils,starlette,tileputty,tiles_geojson,typer,unidecode 6 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: '^$' 2 | fail_fast: false 3 | repos: 4 | - repo: https://github.com/asottile/seed-isort-config 5 | rev: v2.2.0 6 | hooks: 7 | - id: seed-isort-config 8 | - repo: https://github.com/pre-commit/mirrors-isort 9 | rev: v5.10.1 10 | hooks: 11 | - id: isort 12 | - repo: https://github.com/myint/docformatter 13 | rev: eb1df347edd128b30cd3368dddc3aa65edcfac38 # pragma: allowlist secret 14 | hooks: 15 | - id: docformatter 16 | args: [--in-place] 17 | - repo: https://github.com/ambv/black 18 | rev: 24.10.0 19 | hooks: 20 | - id: black 21 | language_version: python3.10 22 | - repo: https://github.com/pre-commit/pre-commit-hooks 23 | rev: v5.0.0 24 | hooks: 25 | - id: detect-aws-credentials 26 | - id: detect-private-key 27 | - id: trailing-whitespace 28 | - repo: https://github.com/pycqa/flake8 29 | rev: 7.1.1 30 | hooks: 31 | - id: flake8 32 | - repo: https://github.com/pre-commit/mirrors-mypy 33 | rev: v1.14.1 34 | hooks: 35 | - id: mypy 36 | - repo: https://github.com/Yelp/detect-secrets 37 | rev: v1.5.0 38 | hooks: 39 | - id: detect-secrets 40 | args: ['--baseline', '.secrets.baseline'] # run: `pip install detect-secrets` to establish baseline 41 | exclude: Pipfile.lock -------------------------------------------------------------------------------- /alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = app/models/orm/migrations 6 | 7 | # template used to generate migration files 8 | # file_template = %%(rev)s_%%(slug)s 9 | 10 | # timezone to use when rendering the date 11 | # within the migration file as well as the filename. 12 | # string value is passed to dateutil.tz.gettz() 13 | # leave blank for localtime 14 | # timezone = 15 | 16 | # max length of characters to apply to the 17 | # "slug" field 18 | # truncate_slug_length = 40 19 | 20 | # set to 'true' to run the environment during 21 | # the 'revision' command, regardless of autogenerate 22 | # revision_environment = false 23 | 24 | # set to 'true' to allow .pyc and .pyo files without 25 | # a source .py file to be detected as revisions in the 26 | # versions/ directory 27 | # sourceless = false 28 | 29 | # version location specification; this defaults 30 | # to app/models/orm/alembic/versions. When using multiple version 31 | # directories, initial revisions must be specified with --version-path 32 | # version_locations = %(here)s/bar %(here)s/bat app/models/orm/alembic/versions 33 | 34 | # the output encoding used when revision files 35 | # are written from script.py.mako 36 | # output_encoding = utf-8 37 | 38 | [alembic:exclude] 39 | tables = spatial_ref_sys 40 | 41 | # Logging configuration 42 | [loggers] 43 | keys = root,sqlalchemy,alembic 44 | 45 | [handlers] 46 | keys = console 47 | 48 | [formatters] 49 | keys = generic 50 | 51 | [logger_root] 52 | level = WARN 53 | handlers = console 54 | qualname = 55 | 56 | [logger_sqlalchemy] 57 | level = WARN 58 | handlers = 59 | qualname = sqlalchemy.engine 60 | 61 | [logger_alembic] 62 | level = INFO 63 | handlers = 64 | qualname = alembic 65 | 66 | [handler_console] 67 | class = StreamHandler 68 | args = (sys.stderr,) 69 | level = NOTSET 70 | formatter = generic 71 | 72 | [formatter_generic] 73 | format = %(levelname)-5.5s [%(name)s] %(message)s 74 | datefmt = %H:%M:%S 75 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/__init__.py -------------------------------------------------------------------------------- /app/authentication/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/authentication/__init__.py -------------------------------------------------------------------------------- /app/crud/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Any, Dict, Union 3 | 4 | from pydantic.main import BaseModel 5 | 6 | from ..application import db 7 | from ..models.pydantic.change_log import ChangeLog 8 | 9 | 10 | async def update_data( 11 | row: db.Model, input_data: Union[BaseModel, Dict[str, Any]] # type: ignore 12 | ) -> db.Model: # type: ignore 13 | """Merge updated metadata filed with existing fields.""" 14 | 15 | if not input_data: 16 | return row 17 | 18 | if isinstance(input_data, BaseModel): 19 | input_data = input_data.dict(skip_defaults=True, by_alias=True) 20 | 21 | if input_data.get("change_log"): 22 | change_log = row.change_log 23 | # Make sure dates are correctly parsed as strings 24 | _logs = list() 25 | for data in input_data["change_log"]: 26 | _log = ChangeLog(**data).json() 27 | _logs.append(json.loads(_log)) 28 | 29 | change_log.extend(_logs) 30 | input_data["change_log"] = change_log 31 | 32 | await row.update(**input_data).apply() 33 | 34 | return row 35 | -------------------------------------------------------------------------------- /app/crud/datamart.py: -------------------------------------------------------------------------------- 1 | """CRUD functions for data mart analysis results.""" 2 | 3 | import json 4 | import uuid 5 | 6 | from app.errors import RecordNotFoundError 7 | from app.models.orm.datamart import AnalysisResult 8 | from app.models.pydantic.datamart import DataMartResource 9 | 10 | 11 | async def save_result(result_data: DataMartResource) -> AnalysisResult: 12 | 13 | analysis_result: AnalysisResult = await AnalysisResult.create( 14 | **json.loads(result_data.json(by_alias=False)) 15 | ) 16 | 17 | return analysis_result 18 | 19 | 20 | async def get_result(result_id: uuid.UUID) -> AnalysisResult: 21 | analysis_result: AnalysisResult = await AnalysisResult.get([result_id]) 22 | if analysis_result is None: 23 | raise RecordNotFoundError(f"Could not find requested result {result_id}") 24 | 25 | return analysis_result 26 | 27 | 28 | async def update_result(result_id: uuid.UUID, result_data) -> AnalysisResult: 29 | analysis_result: AnalysisResult = await get_result(result_id) 30 | await analysis_result.update(**json.loads(result_data.json(by_alias=False))).apply() 31 | 32 | return analysis_result 33 | 34 | 35 | async def delete_result(result_id: uuid.UUID) -> AnalysisResult: 36 | analysis_result: AnalysisResult = await get_result(result_id) 37 | await AnalysisResult.delete.where(AnalysisResult.id == result_id).gino.status() 38 | 39 | return analysis_result 40 | -------------------------------------------------------------------------------- /app/errors.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import traceback 3 | 4 | from fastapi import HTTPException 5 | from fastapi.responses import ORJSONResponse 6 | 7 | from app.settings.globals import ENV 8 | 9 | 10 | class TooManyRetriesError(RecursionError): 11 | def __init__(self, message: str, detail: str): 12 | self.message = message 13 | self.detail = detail 14 | 15 | 16 | class RecordNotFoundError(Exception): 17 | pass 18 | 19 | 20 | class RecordAlreadyExistsError(Exception): 21 | pass 22 | 23 | 24 | class BadRequestError(Exception): 25 | pass 26 | 27 | 28 | class BadResponseError(Exception): 29 | pass 30 | 31 | 32 | class InvalidResponseError(Exception): 33 | pass 34 | 35 | 36 | class UnauthorizedError(Exception): 37 | pass 38 | 39 | 40 | def http_error_handler(exc: HTTPException) -> ORJSONResponse: 41 | 42 | message = exc.detail 43 | if exc.status_code < 500: 44 | status = "failed" 45 | else: 46 | status = "error" 47 | # In dev and test print full traceback of internal server errors 48 | if ENV == "test" or ENV == "dev": 49 | exc_type, exc_value, exc_traceback = sys.exc_info() 50 | message = traceback.format_exception(exc_type, exc_value, exc_traceback) 51 | return ORJSONResponse( 52 | status_code=exc.status_code, content={"status": status, "message": message} 53 | ) 54 | 55 | 56 | class BadAdminSourceException(Exception): 57 | pass 58 | 59 | 60 | class BadAdminVersionException(Exception): 61 | pass 62 | 63 | 64 | class GeometryIsNullError(Exception): 65 | pass 66 | -------------------------------------------------------------------------------- /app/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/models/__init__.py -------------------------------------------------------------------------------- /app/models/enum/change_log.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class ChangeLogStatusTaskIn(str, Enum): 5 | success = "success" 6 | failed = "failed" 7 | 8 | 9 | class ChangeLogStatus(str, Enum): 10 | success = "success" 11 | failed = "failed" 12 | pending = "pending" 13 | -------------------------------------------------------------------------------- /app/models/enum/entity.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class EntityType(str, Enum): 5 | saved = "dataset" 6 | pending = "version" 7 | failed = "asset" 8 | -------------------------------------------------------------------------------- /app/models/enum/geostore.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class GeostoreOrigin(str, Enum): 5 | gfw = "gfw" 6 | rw = "rw" 7 | -------------------------------------------------------------------------------- /app/models/enum/pg_types.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from aenum import Enum, extend_enum 4 | 5 | 6 | class PGOtherType(str, Enum): 7 | __doc__ = "Other PostgreSQL data types" 8 | array = "ARRAY" 9 | boolean = "boolean" 10 | jsonb = "jsonb" 11 | time = "time" 12 | uuid = "uuid" 13 | xml = "xml" 14 | 15 | 16 | class PGNumericType(str, Enum): 17 | __doc__ = "Numeric PostgreSQL data types" 18 | bigint = "bigint" 19 | double_precision = "double precision" 20 | integer = "integer" 21 | numeric = "numeric" 22 | smallint = "smallint" 23 | 24 | 25 | class PGTextType(str, Enum): 26 | __doc__ = "Text PostgreSQL data types" 27 | character_varying = "character varying" 28 | text = "text" 29 | 30 | 31 | class PGDateType(str, Enum): 32 | __doc__ = "Date PostgreSQL data types" 33 | date = "date" 34 | timestamp = "timestamp" 35 | timestamp_wtz = "timestamp without time zone" 36 | 37 | 38 | class PGGeometryType(str, Enum): 39 | __doc__ = "Geometry PostgreSQL data types" 40 | geometry = "geometry" 41 | 42 | 43 | class PGType(str, Enum): 44 | __doc__ = "PostgreSQL data type enumeration" 45 | 46 | 47 | # extent PGTYPE with types listed above 48 | sub_classes: List[Enum] = [ 49 | PGDateType, 50 | PGTextType, 51 | PGNumericType, 52 | PGGeometryType, 53 | PGOtherType, 54 | ] 55 | for sub_class in sub_classes: 56 | for field in sub_class: 57 | extend_enum(PGType, field.name, field.value) 58 | -------------------------------------------------------------------------------- /app/models/enum/queries.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class QueryFormat(str, Enum): 5 | json = "json" 6 | csv = "csv" 7 | 8 | 9 | class QueryType(str, Enum): 10 | table = "table" 11 | raster = "raster" 12 | -------------------------------------------------------------------------------- /app/models/enum/sources.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class SourceType(str, Enum): 5 | raster = "raster" 6 | table = "table" 7 | vector = "vector" 8 | 9 | 10 | class RasterSourceType(str, Enum): 11 | raster = "raster" 12 | 13 | 14 | class TableSourceType(str, Enum): 15 | __doc__ = "Source type of input file." 16 | table = "table" 17 | 18 | 19 | class VectorSourceType(str, Enum): 20 | vector = "vector" 21 | -------------------------------------------------------------------------------- /app/models/enum/versions.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Enum 2 | 3 | 4 | class VersionStatus(str, Enum): 5 | saved = "saved" 6 | pending = "pending" 7 | failed = "failed" 8 | -------------------------------------------------------------------------------- /app/models/orm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/models/orm/__init__.py -------------------------------------------------------------------------------- /app/models/orm/api_keys.py: -------------------------------------------------------------------------------- 1 | from app.application import db 2 | from app.models.orm.base import Base 3 | 4 | 5 | class ApiKey(Base): 6 | __tablename__ = "api_keys" 7 | alias = db.Column(db.String, nullable=False) 8 | user_id = db.Column(db.String, nullable=False) 9 | api_key = db.Column(db.UUID, primary_key=True) 10 | organization = db.Column(db.String, nullable=False) 11 | email = db.Column(db.String, nullable=False) 12 | domains = db.Column(db.ARRAY(db.String), nullable=False) 13 | expires_on = db.Column(db.DateTime) 14 | 15 | _api_keys_alias_user_id_unique = db.UniqueConstraint( 16 | "alias", "user_id", name="alias_user_id_uc" 17 | ) 18 | _api_keys_api_key_idx = db.Index( 19 | "api_keys_api_key_idx", "api_key", postgresql_using="hash" 20 | ) 21 | _api_keys_user_id_idx = db.Index( 22 | "api_keys_user_id_idx", "user_id", postgresql_using="btree" 23 | ) 24 | -------------------------------------------------------------------------------- /app/models/orm/assets.py: -------------------------------------------------------------------------------- 1 | from .base import Base, db 2 | 3 | 4 | class Asset(Base): 5 | __tablename__ = "assets" 6 | asset_id = db.Column(db.UUID, primary_key=True) 7 | dataset = db.Column(db.String, nullable=False, index=True) 8 | version = db.Column(db.String, nullable=False, index=True) 9 | asset_type = db.Column(db.String, nullable=False) 10 | asset_uri = db.Column(db.String, nullable=False) 11 | status = db.Column(db.String, nullable=False, default="pending") 12 | is_managed = db.Column(db.Boolean, nullable=False, default=True) 13 | is_default = db.Column(db.Boolean, nullable=False, default=False) 14 | is_downloadable = db.Column(db.Boolean, nullable=False, default=True) 15 | creation_options = db.Column(db.JSONB, nullable=False, default=dict()) 16 | # metadata = db.Column(db.JSONB, nullable=False, default=dict()) 17 | fields = db.Column(db.JSONB, nullable=False, default=list()) 18 | extent = db.Column(db.JSONB, nullable=True, default=None) 19 | stats = db.Column(db.JSONB, nullable=True, default=None) 20 | change_log = db.Column(db.ARRAY(db.JSONB), nullable=False, default=list()) 21 | 22 | fk = db.ForeignKeyConstraint( 23 | ["dataset", "version"], 24 | ["versions.dataset", "versions.version"], 25 | name="fk", 26 | onupdate="CASCADE", 27 | ondelete="CASCADE", 28 | ) 29 | 30 | uq_asset_uri = db.UniqueConstraint("asset_uri", name="uq_asset_uri") 31 | -------------------------------------------------------------------------------- /app/models/orm/base.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from geoalchemy2 import Geometry 4 | from sqlalchemy.dialects.postgresql import ARRAY, JSONB, TEXT, UUID 5 | from sqlalchemy_utils import EmailType, generic_repr 6 | 7 | from ...application import db 8 | 9 | db.JSONB, db.UUID, db.ARRAY, db.EmailType, db.TEXT, db.Geometry = ( 10 | JSONB, 11 | UUID, 12 | ARRAY, 13 | EmailType, 14 | TEXT, 15 | Geometry, 16 | ) 17 | 18 | 19 | @generic_repr 20 | class Base(db.Model): # type: ignore 21 | __abstract__ = True 22 | created_on = db.Column( 23 | db.DateTime, default=datetime.utcnow, server_default=db.func.now() 24 | ) 25 | updated_on = db.Column( 26 | db.DateTime, 27 | default=datetime.utcnow, 28 | onupdate=datetime.utcnow, 29 | server_default=db.func.now(), 30 | ) 31 | -------------------------------------------------------------------------------- /app/models/orm/datamart.py: -------------------------------------------------------------------------------- 1 | from .base import Base, db 2 | 3 | 4 | class AnalysisResult(Base): 5 | __tablename__ = "analysis_results" 6 | id = db.Column(db.UUID, primary_key=True) 7 | endpoint = db.Column(db.String) 8 | result = db.Column(db.JSONB) 9 | metadata = db.Column(db.JSONB) 10 | status = db.Column(db.String) 11 | requested_by = db.Column( 12 | db.UUID, db.ForeignKey("api_keys.api_key", name="api_key_fk") 13 | ) 14 | message = db.Column(db.String) 15 | 16 | _api_keys_api_key_idx = db.Index( 17 | "analysis_results_id_idx", "id", postgresql_using="hash" 18 | ) 19 | -------------------------------------------------------------------------------- /app/models/orm/dataset_metadata.py: -------------------------------------------------------------------------------- 1 | import sqlalchemy as sa 2 | 3 | from .base import Base, db 4 | from .mixins import MetadataMixin 5 | 6 | 7 | class DatasetMetadata(Base, MetadataMixin): 8 | __tablename__ = "dataset_metadata" 9 | 10 | id = db.Column(db.UUID, primary_key=True) 11 | dataset = db.Column(db.String, nullable=False, unique=True) 12 | source = db.Column(db.String, nullable=True) 13 | license = db.Column(db.String) 14 | data_language = db.Column(db.String, nullable=True) 15 | overview = db.Column(db.String, nullable=True) 16 | 17 | function = db.Column(db.String) 18 | cautions = db.Column(db.String) 19 | key_restrictions = db.Column(db.String) 20 | tags = db.Column(sa.ARRAY(db.String)) 21 | why_added = db.Column(db.String) 22 | learn_more = db.Column(db.String) 23 | 24 | fk = db.ForeignKeyConstraint( 25 | ["dataset"], 26 | ["datasets.dataset"], 27 | name="fk", 28 | onupdate="CASCADE", 29 | ondelete="CASCADE", 30 | ) 31 | -------------------------------------------------------------------------------- /app/models/orm/datasets.py: -------------------------------------------------------------------------------- 1 | from .base import Base, db 2 | 3 | 4 | class Dataset(Base): 5 | __tablename__ = "datasets" 6 | dataset = db.Column(db.String, primary_key=True) 7 | is_downloadable = db.Column(db.Boolean, nullable=False, default=True) 8 | owner_id = db.Column(db.String, nullable=True, default=None) 9 | # metadata = db.Column(db.JSONB, default=dict()) 10 | -------------------------------------------------------------------------------- /app/models/orm/geostore.py: -------------------------------------------------------------------------------- 1 | from .base import Base, db 2 | 3 | ######## 4 | # NOTE # 5 | ######## 6 | # UserAreas doesn't officially inherit from Geostore in a class hierarchy, but it 7 | # DOES inherit in the DB (via a custom migration). So any time you change the 8 | # Geostore table, change UserAreas as well! And vice versa, of course. 9 | 10 | 11 | class Geostore(Base): 12 | __tablename__ = "geostore" 13 | 14 | gfw_geostore_id = db.Column(db.UUID, primary_key=True) 15 | gfw_geojson = db.Column(db.TEXT) 16 | gfw_area__ha = db.Column(db.Numeric) 17 | gfw_bbox = db.Column(db.ARRAY(db.Numeric)) 18 | 19 | _geostore_gfw_geostore_id_idx = db.Index( 20 | "geostore_gfw_geostore_id_idx", "gfw_geostore_id", postgresql_using="hash" 21 | ) 22 | -------------------------------------------------------------------------------- /app/models/orm/migrations/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /app/models/orm/migrations/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | import sqlalchemy_utils 11 | ${imports if imports else ""} 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = ${repr(up_revision)} 15 | down_revision = ${repr(down_revision)} 16 | branch_labels = ${repr(branch_labels)} 17 | depends_on = ${repr(depends_on)} 18 | 19 | 20 | def upgrade(): 21 | ${upgrades if upgrades else "pass"} 22 | 23 | 24 | def downgrade(): 25 | ${downgrades if downgrades else "pass"} 26 | -------------------------------------------------------------------------------- /app/models/orm/migrations/versions/3e524ef0525f_.py: -------------------------------------------------------------------------------- 1 | """empty message. 2 | 3 | Revision ID: 3e524ef0525f 4 | Revises: 604bf4e66c2b 5 | Create Date: 2024-12-18 00:43:46.681427 6 | """ 7 | import sqlalchemy as sa 8 | from alembic import op 9 | 10 | # revision identifiers, used by Alembic. 11 | revision = "3e524ef0525f" 12 | down_revision = "604bf4e66c2b" 13 | branch_labels = None 14 | depends_on = None 15 | 16 | 17 | def upgrade(): 18 | # ### commands auto generated by Alembic - please adjust! ### 19 | op.create_index(op.f("ix_assets_dataset"), "assets", ["dataset"], unique=False) 20 | op.create_index(op.f("ix_assets_version"), "assets", ["version"], unique=False) 21 | op.add_column("dataset_metadata", sa.Column("subtitle", sa.String(), nullable=True)) 22 | op.add_column("version_metadata", sa.Column("subtitle", sa.String(), nullable=True)) 23 | # ### end Alembic commands ### 24 | 25 | 26 | def downgrade(): 27 | # ### commands auto generated by Alembic - please adjust! ### 28 | op.drop_column("version_metadata", "subtitle") 29 | op.drop_column("dataset_metadata", "subtitle") 30 | op.drop_index(op.f("ix_assets_version"), table_name="assets") 31 | op.drop_index(op.f("ix_assets_dataset"), table_name="assets") 32 | # ### end Alembic commands ### 33 | -------------------------------------------------------------------------------- /app/models/orm/migrations/versions/604bf4e66c2b_.py: -------------------------------------------------------------------------------- 1 | """Add content_date_description to version_metadata 2 | 3 | Revision ID: 604bf4e66c2b 4 | Revises: ef3392e8e054 5 | Create Date: 2024-10-31 16:52:56.571782 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | import sqlalchemy_utils 11 | from sqlalchemy.dialects import postgresql 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = '604bf4e66c2b' 15 | down_revision = 'ef3392e8e054' 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | op.add_column('version_metadata', sa.Column('content_date_description', sa.String(), nullable=True)) 23 | # ### end Alembic commands ### 24 | 25 | 26 | def downgrade(): 27 | # ### commands auto generated by Alembic - please adjust! ### 28 | op.drop_column('version_metadata', 'content_date_description') 29 | # ### end Alembic commands ### 30 | -------------------------------------------------------------------------------- /app/models/orm/migrations/versions/a5787f2eefe5_.py: -------------------------------------------------------------------------------- 1 | """Adding dataset version alias table. 2 | 3 | Revision ID: a5787f2eefe5 4 | Revises: 4763f4b8141a 5 | Create Date: 2021-09-27 22:12:26.964711 6 | """ 7 | import sqlalchemy as sa 8 | from alembic import op 9 | 10 | # revision identifiers, used by Alembic. 11 | revision = "a5787f2eefe5" 12 | down_revision = "4763f4b8141a" # pragma: allowlist secret 13 | branch_labels = None 14 | depends_on = None 15 | 16 | 17 | def upgrade(): 18 | op.create_table( 19 | "aliases", 20 | sa.Column("alias", sa.String(), nullable=False), 21 | sa.Column("dataset", sa.String(), nullable=False), 22 | sa.Column("version", sa.String(), nullable=False), 23 | sa.Column( 24 | "created_on", sa.DateTime(), server_default=sa.text("now()"), nullable=True 25 | ), 26 | sa.Column( 27 | "updated_on", sa.DateTime(), server_default=sa.text("now()"), nullable=True 28 | ), 29 | sa.ForeignKeyConstraint( 30 | ["dataset", "version"], 31 | ["versions.dataset", "versions.version"], 32 | name="fk", 33 | onupdate="CASCADE", 34 | ondelete="CASCADE", 35 | ), 36 | sa.PrimaryKeyConstraint("dataset", "alias"), 37 | ) 38 | 39 | 40 | def downgrade(): 41 | op.drop_table("aliases") 42 | -------------------------------------------------------------------------------- /app/models/orm/migrations/versions/aa5aefcbdfcf_.py: -------------------------------------------------------------------------------- 1 | """empty message. 2 | 3 | Revision ID: aa5aefcbdfcf 4 | Revises: 4763f4b8141a 5 | Create Date: 2021-10-12 22:17:29.106137 6 | """ 7 | import sqlalchemy as sa 8 | from alembic import op 9 | from sqlalchemy.dialects import postgresql 10 | 11 | # revision identifiers, used by Alembic. 12 | revision = "aa5aefcbdfcf" # pragma: allowlist secret 13 | down_revision = "a5787f2eefe5" # pragma: allowlist secret 14 | branch_labels = None 15 | depends_on = None 16 | 17 | 18 | def upgrade(): 19 | # ### commands auto generated by Alembic - please adjust! ### 20 | op.add_column( 21 | "assets", 22 | sa.Column( 23 | "revision_history", 24 | postgresql.ARRAY(postgresql.JSONB(astext_type=sa.Text())), 25 | nullable=True, 26 | ), 27 | ) 28 | op.add_column("assets", sa.Column("latest_revision", sa.String(), nullable=True)) 29 | op.add_column("assets", sa.Column("source_version", sa.String(), nullable=True)) 30 | # ### end Alembic commands ### 31 | 32 | 33 | def downgrade(): 34 | # ### commands auto generated by Alembic - please adjust! ### 35 | op.drop_column("assets", "source_version") 36 | op.drop_column("assets", "latest_revision") 37 | op.drop_column("assets", "revision_history") 38 | # ### end Alembic commands ### 39 | -------------------------------------------------------------------------------- /app/models/orm/migrations/versions/d62a9b15f844_.py: -------------------------------------------------------------------------------- 1 | """Create API Key Table. 2 | 3 | Revision ID: d62a9b15f844 4 | Revises: 73fb3f5e39b8 5 | Create Date: 2021-05-01 01:29:13.157933 6 | """ 7 | import sqlalchemy as sa 8 | from alembic import op 9 | from sqlalchemy.dialects import postgresql 10 | 11 | # revision identifiers, used by Alembic. 12 | revision = "d62a9b15f844" # pragma: allowlist secret 13 | down_revision = "73fb3f5e39b8" # pragma: allowlist secret 14 | branch_labels = None 15 | depends_on = None 16 | 17 | 18 | def upgrade(): 19 | # ### commands auto generated by Alembic - please adjust! ### 20 | op.create_table( 21 | "api_keys", 22 | sa.Column("alias", sa.String(), nullable=False), 23 | sa.Column("user_id", sa.String(), nullable=False), 24 | sa.Column("api_key", postgresql.UUID(), nullable=False), 25 | sa.Column("organization", sa.String(), nullable=False), 26 | sa.Column("email", sa.String(), nullable=False), 27 | sa.Column("domains", postgresql.ARRAY(sa.String()), nullable=False), 28 | sa.Column("expires_on", sa.DateTime(), nullable=True), 29 | sa.Column( 30 | "created_on", sa.DateTime(), server_default=sa.text("now()"), nullable=True 31 | ), 32 | sa.Column( 33 | "updated_on", sa.DateTime(), server_default=sa.text("now()"), nullable=True 34 | ), 35 | sa.PrimaryKeyConstraint("api_key"), 36 | ) 37 | 38 | op.create_index( 39 | "api_keys_api_key_idx", 40 | "api_keys", 41 | ["api_key"], 42 | unique=False, 43 | postgresql_using="hash", 44 | ) 45 | op.create_index( 46 | "api_keys_user_id_idx", 47 | "api_keys", 48 | ["user_id"], 49 | unique=False, 50 | postgresql_using="btree", 51 | ) 52 | op.create_unique_constraint("alias_user_id_uc", "api_keys", ["alias", "user_id"]) 53 | # ### end Alembic commands ### 54 | 55 | 56 | def downgrade(): 57 | # ### commands auto generated by Alembic - please adjust! ### 58 | op.drop_table("api_keys") 59 | # ### end Alembic commands ### 60 | -------------------------------------------------------------------------------- /app/models/orm/migrations/versions/d767b6dd2c4c_.py: -------------------------------------------------------------------------------- 1 | """empty message. 2 | 3 | Revision ID: d767b6dd2c4c 4 | Revises: 04fcb4f2408a 5 | Create Date: 2024-04-25 19:38:35.223004 6 | """ 7 | import sqlalchemy as sa 8 | from alembic import op 9 | 10 | # revision identifiers, used by Alembic. 11 | revision = "d767b6dd2c4c" 12 | down_revision = "04fcb4f2408a" 13 | branch_labels = None 14 | depends_on = None 15 | 16 | 17 | def upgrade(): 18 | # ### commands auto generated by Alembic - please adjust! ### 19 | op.add_column("datasets", sa.Column("owner_id", sa.String(), nullable=True)) 20 | # ### end Alembic commands ### 21 | 22 | 23 | def downgrade(): 24 | # ### commands auto generated by Alembic - please adjust! ### 25 | op.drop_column("datasets", "owner_id") 26 | # ### end Alembic commands ### 27 | -------------------------------------------------------------------------------- /app/models/orm/migrations/versions/d8f049f00259_add_analysis_results_table.py: -------------------------------------------------------------------------------- 1 | """Add analysis results table for datamart endpoints 2 | 3 | Revision ID: d8f049f00259 4 | Revises: 3e524ef0525f 5 | Create Date: 2025-03-03 09:27:19.271840 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | import sqlalchemy_utils 12 | from sqlalchemy.dialects import postgresql 13 | 14 | # revision identifiers, used by Alembic. 15 | revision = "d8f049f00259" 16 | down_revision = "3e524ef0525f" 17 | branch_labels = None 18 | depends_on = None 19 | 20 | 21 | def upgrade(): 22 | # ### commands auto generated by Alembic - please adjust! ### 23 | op.create_table( 24 | "analysis_results", 25 | sa.Column( 26 | "id", 27 | postgresql.UUID(), 28 | nullable=False, 29 | ), 30 | sa.Column("endpoint", sa.String()), 31 | sa.Column("result", postgresql.JSONB(astext_type=sa.Text()), nullable=True), 32 | sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True), 33 | sa.Column("status", sa.String(), nullable=False, default="pending"), 34 | sa.Column("requested_by", postgresql.UUID(), nullable=True), 35 | sa.Column("message", sa.String(), nullable=True), 36 | sa.Column( 37 | "created_on", sa.DateTime(), server_default=sa.text("now()"), nullable=True 38 | ), 39 | sa.Column( 40 | "updated_on", sa.DateTime(), server_default=sa.text("now()"), nullable=True 41 | ), 42 | sa.ForeignKeyConstraint( 43 | ["requested_by"], 44 | ["api_keys.api_key"], 45 | name="fk", 46 | onupdate="SET NULL", 47 | ondelete="SET NULL", 48 | ), 49 | sa.PrimaryKeyConstraint("id"), 50 | ) 51 | op.create_index( 52 | "analysis_results_id_idx", 53 | "analysis_results", 54 | ["id"], 55 | unique=False, 56 | postgresql_using="hash", 57 | ), 58 | # ### end Alembic commands ### 59 | 60 | 61 | def downgrade(): 62 | # ### commands auto generated by Alembic - please adjust! ### 63 | op.drop_table("analysis_results") 64 | 65 | # ### end Alembic commands ### 66 | -------------------------------------------------------------------------------- /app/models/orm/migrations/versions/ef3392e8e054_.py: -------------------------------------------------------------------------------- 1 | """update resolution metadata fields 2 | 3 | Revision ID: ef3392e8e054 4 | Revises: d767b6dd2c4c 5 | Create Date: 2024-09-10 14:19:43.424752 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | import sqlalchemy_utils 11 | from sqlalchemy.dialects import postgresql 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = 'ef3392e8e054' 15 | down_revision = 'd767b6dd2c4c' 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | # ### commands auto generated by Alembic - please adjust! ### 22 | op.alter_column('dataset_metadata', 'resolution', nullable=True, new_column_name='spatial_resolution') 23 | op.add_column('dataset_metadata', sa.Column('resolution_description', sa.String(), nullable=True)) 24 | op.alter_column('version_metadata', 'resolution', nullable=True, new_column_name='spatial_resolution') 25 | op.add_column('version_metadata', sa.Column('resolution_description', sa.String(), nullable=True)) 26 | # ### end Alembic commands ### 27 | 28 | 29 | def downgrade(): 30 | # ### commands auto generated by Alembic - please adjust! ### 31 | op.alter_column('dataset_metadata', 'spatial_resolution', nullable=True, new_column_name='resolution') 32 | op.drop_column('version_metadata', 'resolution_description') 33 | op.alter_column('version_metadata', 'spatial_resolution', nullable=True, new_column_name='resolution') 34 | op.drop_column('dataset_metadata', 'resolution_description') 35 | # ### end Alembic commands ### 36 | -------------------------------------------------------------------------------- /app/models/orm/mixins.py: -------------------------------------------------------------------------------- 1 | from .base import db 2 | 3 | 4 | class MetadataMixin: 5 | title = db.Column(db.String) 6 | subtitle = db.Column(db.String) 7 | spatial_resolution = db.Column(db.Numeric) 8 | resolution_description = db.Column(db.String) 9 | geographic_coverage = db.Column(db.String) 10 | update_frequency = db.Column(db.String) 11 | citation = db.Column(db.String) 12 | scale = db.Column(db.String) 13 | -------------------------------------------------------------------------------- /app/models/orm/queries/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/models/orm/queries/__init__.py -------------------------------------------------------------------------------- /app/models/orm/queries/datasets.py: -------------------------------------------------------------------------------- 1 | from ....application import db 2 | 3 | _dataset_sql = """ 4 | SELECT 5 | datasets.*, 6 | version_array AS versions, 7 | coalesce(metadata, '{}') as metadata 8 | FROM 9 | datasets 10 | LEFT JOIN 11 | ( 12 | SELECT 13 | dataset, 14 | ARRAY_AGG(version) AS version_array 15 | FROM 16 | versions 17 | GROUP BY 18 | dataset 19 | ) 20 | t USING (dataset) 21 | LEFT JOIN 22 | ( 23 | SELECT dataset, ROW_TO_JSON(dataset_metadata.*) as metadata 24 | FROM 25 | dataset_metadata 26 | ) 27 | m USING (dataset) 28 | ORDER BY dataset 29 | LIMIT(:limit) 30 | OFFSET(:offset);""" 31 | 32 | all_datasets = db.text(_dataset_sql) 33 | -------------------------------------------------------------------------------- /app/models/orm/queries/fields.py: -------------------------------------------------------------------------------- 1 | from ....application import db 2 | 3 | _fields_sql = """ 4 | SELECT 5 | column_name as name, CASE WHEN data_type = 'USER-DEFINED' THEN udt_name ELSE data_type END as data_type 6 | FROM information_schema.columns 7 | WHERE 8 | table_schema = :dataset AND table_name = :version;""" 9 | 10 | fields = db.text(_fields_sql) 11 | -------------------------------------------------------------------------------- /app/models/orm/queries/raster_assets.py: -------------------------------------------------------------------------------- 1 | data_environment_raster_tile_sets = """ 2 | SELECT 3 | assets.asset_id, 4 | assets.dataset, 5 | assets.version, 6 | creation_options, 7 | asset_uri, 8 | rb.values_table 9 | FROM 10 | assets 11 | LEFT JOIN asset_metadata am 12 | ON am.asset_id = assets.asset_id 13 | JOIN versions 14 | ON versions.dataset = assets.dataset 15 | AND versions.version = assets.version 16 | LEFT JOIN raster_band_metadata rb 17 | ON rb.asset_metadata_id = am.id 18 | WHERE assets.asset_type = 'Raster tile set' 19 | AND assets.creation_options->>'pixel_meaning' NOT LIKE '%tcd%' 20 | AND assets.creation_options->>'grid' = :grid 21 | """ 22 | -------------------------------------------------------------------------------- /app/models/orm/tasks.py: -------------------------------------------------------------------------------- 1 | from .base import Base, db 2 | 3 | 4 | class Task(Base): 5 | __tablename__ = "tasks" 6 | task_id = db.Column(db.UUID, primary_key=True) 7 | asset_id = db.Column(db.UUID, nullable=False) 8 | status = db.Column(db.String, nullable=False, default="pending") 9 | 10 | change_log = db.Column(db.ARRAY(db.JSONB), default=list()) 11 | 12 | fk = db.ForeignKeyConstraint( 13 | ["asset_id"], 14 | ["assets.asset_id"], 15 | name="fk", 16 | onupdate="CASCADE", 17 | ondelete="CASCADE", 18 | ) 19 | -------------------------------------------------------------------------------- /app/models/orm/user_areas.py: -------------------------------------------------------------------------------- 1 | from .base import Base, db 2 | 3 | ######## 4 | # NOTE # 5 | ######## 6 | # UserAreas doesn't officially inherit from Geostore in a class hierarchy, but it 7 | # DOES inherit in the DB (via a custom migration). So any time you change the 8 | # Geostore table, change UserAreas as well! And vice versa, of course. 9 | 10 | 11 | class UserArea(Base): 12 | __tablename__ = "userareas" 13 | 14 | gfw_geostore_id = db.Column(db.UUID, primary_key=True) 15 | gfw_geojson = db.Column(db.TEXT) 16 | gfw_area__ha = db.Column(db.Numeric) 17 | gfw_bbox = db.Column(db.ARRAY(db.Numeric)) 18 | 19 | _userarea_gfw_geostore_id_idx = db.Index( 20 | "userarea_gfw_geostore_id_idx", "gfw_geostore_id", postgresql_using="hash" 21 | ) 22 | -------------------------------------------------------------------------------- /app/models/orm/version_metadata.py: -------------------------------------------------------------------------------- 1 | from .base import Base, db 2 | from .mixins import MetadataMixin 3 | 4 | 5 | class VersionMetadata(Base, MetadataMixin): 6 | __tablename__ = "version_metadata" 7 | 8 | id = db.Column(db.UUID, primary_key=True) 9 | dataset = db.Column(db.String, nullable=False) 10 | version = db.Column(db.String, nullable=False) 11 | content_date = db.Column(db.Date) 12 | content_start_date = db.Column(db.Date) 13 | content_date_description = db.Column(db.String) 14 | content_end_date = db.Column(db.Date) 15 | last_update = db.Column(db.Date) 16 | description = db.Column(db.String) 17 | 18 | dataset_fk = db.ForeignKeyConstraint( 19 | ["dataset", "version"], 20 | ["versions.dataset", "versions.version"], 21 | name="dataset_fk", 22 | onupdate="CASCADE", 23 | ondelete="CASCADE", 24 | ) 25 | _unique_dataset_version = db.UniqueConstraint( 26 | "dataset", "version", name="dataset_version_uq" 27 | ) 28 | -------------------------------------------------------------------------------- /app/models/orm/versions.py: -------------------------------------------------------------------------------- 1 | from .base import Base, db 2 | 3 | 4 | class Version(Base): 5 | __tablename__ = "versions" 6 | dataset = db.Column(db.String, primary_key=True) 7 | version = db.Column(db.String, primary_key=True) 8 | is_latest = db.Column(db.Boolean, nullable=False, default=False) 9 | is_mutable = db.Column(db.Boolean, nullable=False, default=False) 10 | is_downloadable = db.Column(db.Boolean, nullable=False, default=True) 11 | # source_type = db.Column(db.String, nullable=False) 12 | # source_uri = db.Column(db.ARRAY(db.String), default=list()) 13 | status = db.Column(db.String, nullable=False, default="pending") 14 | # has_geostore = db.Column(db.Boolean, nullable=False, default=False) 15 | # metadata = db.Column(db.JSONB, default=dict()) 16 | change_log = db.Column(db.ARRAY(db.JSONB), default=list()) 17 | # creation_options = db.Column(db.JSONB, default=dict()) 18 | 19 | fk = db.ForeignKeyConstraint( 20 | ["dataset"], 21 | ["datasets.dataset"], 22 | name="fk", 23 | onupdate="CASCADE", 24 | ondelete="CASCADE", 25 | ) 26 | -------------------------------------------------------------------------------- /app/models/pydantic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/models/pydantic/__init__.py -------------------------------------------------------------------------------- /app/models/pydantic/analysis.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from ..enum.analysis import RasterLayer 4 | from .base import StrictBaseModel 5 | from .geostore import Geometry 6 | 7 | 8 | class ZonalAnalysisRequestIn(StrictBaseModel): 9 | geometry: Geometry 10 | sum: List[RasterLayer] 11 | group_by: List[RasterLayer] = list() 12 | filters: List[RasterLayer] = list() 13 | start_date: Optional[str] = None 14 | end_date: Optional[str] = None 15 | -------------------------------------------------------------------------------- /app/models/pydantic/base.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from pydantic import BaseModel, Extra 4 | 5 | 6 | class BaseORMRecord(BaseModel): 7 | class Config: 8 | orm_mode = True 9 | 10 | 11 | class BaseRecord(BaseModel): 12 | created_on: datetime 13 | updated_on: datetime 14 | 15 | class Config: 16 | orm_mode = True 17 | 18 | 19 | class StrictBaseModel(BaseModel): 20 | class Config: 21 | extra = Extra.forbid 22 | validate_assignment = True 23 | 24 | -------------------------------------------------------------------------------- /app/models/pydantic/change_log.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import List, Optional 3 | 4 | from ..enum.change_log import ChangeLogStatus, ChangeLogStatusTaskIn 5 | from .base import StrictBaseModel 6 | from .responses import Response 7 | 8 | 9 | class ChangeLog(StrictBaseModel): 10 | date_time: datetime 11 | status: ChangeLogStatus 12 | message: str 13 | detail: Optional[str] = None 14 | 15 | 16 | class ChangeLogTaskIn(StrictBaseModel): 17 | date_time: datetime 18 | status: ChangeLogStatusTaskIn 19 | message: str 20 | detail: Optional[str] = None 21 | 22 | 23 | class ChangeLogResponse(Response): 24 | data: List[ChangeLog] 25 | -------------------------------------------------------------------------------- /app/models/pydantic/database.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Union 2 | 3 | from pydantic import BaseModel, Field, fields, validator 4 | from sqlalchemy.engine.url import URL 5 | from starlette.datastructures import Secret 6 | 7 | 8 | class DatabaseURL(BaseModel): 9 | drivername: str = Field(..., alias="driver", description="The database driver.") 10 | host: str = Field("localhost", description="Server host.") 11 | port: Optional[Union[str, int]] = Field(None, description="Server access port.") 12 | username: Optional[str] = Field(None, alias="user", description="Username") 13 | password: Optional[Union[str, Secret]] = Field(None, description="Password") 14 | database: str = Field(..., description="Database name.") 15 | url: Optional[URL] = None 16 | 17 | class Config: 18 | arbitrary_types_allowed = True 19 | allow_population_by_field_name = True 20 | 21 | @validator("url", always=True) 22 | def build_url(cls, v: Any, field: fields.Field, values: dict): 23 | if isinstance(v, URL): 24 | return v 25 | args = {k: str(v) for k, v in values.items() if v is not None} 26 | return URL(**args) 27 | -------------------------------------------------------------------------------- /app/models/pydantic/datasets.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Union 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | from .base import BaseRecord, StrictBaseModel 6 | from .metadata import DatasetMetadata, DatasetMetadataOut, DatasetMetadataUpdate 7 | from .responses import PaginationLinks, PaginationMeta, Response 8 | 9 | 10 | class Dataset(BaseRecord): 11 | dataset: str 12 | is_downloadable: bool 13 | metadata: Optional[Union[DatasetMetadataOut, BaseModel]] 14 | versions: Optional[List[str]] = list() 15 | 16 | 17 | class DatasetCreateIn(StrictBaseModel): 18 | is_downloadable: bool = Field( 19 | True, 20 | description="Flag to specify if assets associated with dataset can be downloaded." 21 | "All associated versions and assets will inherit this value. " 22 | "Value can be overridden at version or asset level.", 23 | ) 24 | metadata: DatasetMetadata 25 | 26 | 27 | class DatasetUpdateIn(StrictBaseModel): 28 | is_downloadable: Optional[bool] 29 | metadata: Optional[DatasetMetadataUpdate] 30 | owner_id: Optional[str] 31 | 32 | 33 | class DatasetResponse(Response): 34 | data: Dataset 35 | 36 | 37 | class DatasetsResponse(Response): 38 | data: List[Dataset] 39 | 40 | 41 | class PaginatedDatasetsResponse(DatasetsResponse): 42 | links: PaginationLinks 43 | meta: PaginationMeta 44 | -------------------------------------------------------------------------------- /app/models/pydantic/downloads.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import Field 4 | 5 | from app.models.enum.creation_options import Delimiters 6 | from app.models.pydantic.base import StrictBaseModel 7 | from app.models.pydantic.geostore import Geometry 8 | 9 | 10 | class DownloadJSONIn(StrictBaseModel): 11 | sql: str = Field(..., description="SQL query.") 12 | geometry: Optional[Geometry] = Field( 13 | None, description="A geojson geometry to be used as spatial filter." 14 | ) 15 | filename: str = Field("export.json", description="Name of export file.") 16 | 17 | 18 | class DownloadCSVIn(DownloadJSONIn): 19 | filename: str = Field("export.csv", description="Name of export file.") 20 | delimiter: Delimiters = Field( 21 | Delimiters.comma, description="Delimiter to use for CSV file." 22 | ) 23 | -------------------------------------------------------------------------------- /app/models/pydantic/extent.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from ..pydantic.responses import Response 4 | from .geostore import FeatureCollection 5 | 6 | 7 | class Extent(FeatureCollection): 8 | pass 9 | 10 | 11 | class ExtentResponse(Response): 12 | data: Optional[Extent] 13 | -------------------------------------------------------------------------------- /app/models/pydantic/features.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List 2 | 3 | from .responses import Response 4 | 5 | 6 | class FeatureResponse(Response): 7 | data: Dict[str, Any] 8 | 9 | 10 | class FeaturesResponse(Response): 11 | data: List[Dict[str, Any]] 12 | -------------------------------------------------------------------------------- /app/models/pydantic/query.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | 3 | from app.models.enum.creation_options import Delimiters 4 | from app.models.pydantic.base import StrictBaseModel 5 | from app.models.pydantic.geostore import FeatureCollection, Geometry 6 | from pydantic import Field 7 | 8 | 9 | class QueryRequestIn(StrictBaseModel): 10 | geometry: Optional[Geometry] 11 | sql: str 12 | 13 | 14 | class QueryBatchRequestIn(StrictBaseModel): 15 | feature_collection: Optional[FeatureCollection] = Field( 16 | None, description="An inline collection of GeoJson features on which to do the same query" 17 | ) 18 | uri: Optional[str] = Field( 19 | None, description="URI to a vector file in a variety of formats supported by Geopandas, including GeoJson and CSV format, giving a list of features on which to do the same query. For a CSV file, the column with the geometry in WKB format should be named 'WKT' (not 'WKB')" 20 | ) 21 | geostore_ids: Optional[List[str]] = Field( 22 | None, description="An inline list of ResourceWatch geostore ids" 23 | ) 24 | id_field: str = Field( 25 | "fid", description="Name of field with the feature id, for use in labeling the results for each feature. This field must contain a unique value for each feature. If geostore_ids are specified, then they will automatically be used as the feature id for labeling the results." 26 | ) 27 | sql: str 28 | 29 | 30 | class CsvQueryRequestIn(QueryRequestIn): 31 | delimiter: Delimiters = Delimiters.comma 32 | -------------------------------------------------------------------------------- /app/models/pydantic/raster_analysis.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Union 2 | 3 | from ..enum.pixetl import Grid 4 | from .asset_metadata import RasterTable 5 | from .base import StrictBaseModel 6 | from .creation_options import NoDataType 7 | 8 | 9 | class BaseLayer(StrictBaseModel): 10 | name: str 11 | no_data: Optional[NoDataType] 12 | 13 | 14 | class EncodedLayer(BaseLayer): 15 | raster_table: Optional[RasterTable] = None 16 | decode_expression: str = "" 17 | encode_expression: str = "" 18 | 19 | 20 | class SourceLayer(EncodedLayer): 21 | source_uri: str 22 | grid: Grid 23 | tile_scheme: str = "nw" 24 | 25 | 26 | class DerivedLayer(EncodedLayer): 27 | source_layer: str 28 | calc: str 29 | 30 | 31 | Layer = Union[SourceLayer, DerivedLayer] 32 | 33 | 34 | class DataEnvironment(StrictBaseModel): 35 | layers: List[Layer] 36 | -------------------------------------------------------------------------------- /app/models/pydantic/responses.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional 2 | 3 | from pydantic import Field 4 | 5 | from .base import StrictBaseModel 6 | 7 | 8 | class Response(StrictBaseModel): 9 | data: Any 10 | status: str = "success" 11 | 12 | 13 | class PaginationLinks(StrictBaseModel): 14 | self: str = Field( 15 | ..., 16 | title="Contains the URL for the current page", 17 | example="https://data-api.globalforestwatch.org/:model?page[number]=1&page[size]=25", 18 | ) 19 | first: str = Field( 20 | ..., 21 | title="Contains the URL for the first page", 22 | example="https://data-api.globalforestwatch.org/:model?page[number]=1&page[size]=25", 23 | ) 24 | last: str = Field( 25 | ..., 26 | title="Contains the URL for the last page", 27 | example="https://data-api.globalforestwatch.org/:model?page[number]=4&page[size]=25", 28 | ) 29 | prev: Optional[str] = Field( 30 | None, title="Contains the URL for the previous page", example="" 31 | ) 32 | next: Optional[str] = Field( 33 | None, 34 | title="Contains the URL for the next page", 35 | example="https://data-api.globalforestwatch.org/:model?page[number]=2&page[size]=25", 36 | ) 37 | 38 | 39 | class PaginationMeta(StrictBaseModel): 40 | size: int = Field( 41 | ..., 42 | title="The page size. Reflects the value used in the page[size] query parameter (or the default size of 10 if not provided)", 43 | example="25", 44 | ) 45 | total_items: int = Field( 46 | ..., 47 | title="Contains the total number of items", 48 | example="100", 49 | ) 50 | total_pages: int = Field( 51 | ..., 52 | title="Contains the total number of pages, assuming the page size specified in the page[size] query parameter", 53 | example="4", 54 | ) 55 | -------------------------------------------------------------------------------- /app/models/pydantic/sources.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from app.models.pydantic.base import StrictBaseModel 4 | 5 | 6 | class Source(StrictBaseModel): 7 | source_uri: Optional[List[str]] 8 | -------------------------------------------------------------------------------- /app/models/pydantic/symbology.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional, Tuple, Union 2 | 3 | from pydantic import Field, StrictInt, validator 4 | 5 | from app.models.enum.creation_options import ColorMapType 6 | from app.models.pydantic.base import StrictBaseModel 7 | 8 | 9 | class RGB(StrictBaseModel): 10 | red: int = Field(..., ge=0, le=255) 11 | green: int = Field(..., ge=0, le=255) 12 | blue: int = Field(..., ge=0, le=255) 13 | 14 | def tuple(self) -> Tuple[int, int, int]: 15 | return self.red, self.green, self.blue 16 | 17 | 18 | class RGBA(StrictBaseModel): 19 | red: int = Field(..., ge=0, le=255) 20 | green: int = Field(..., ge=0, le=255) 21 | blue: int = Field(..., ge=0, le=255) 22 | alpha: int = Field(..., ge=0, le=255) 23 | 24 | def tuple(self) -> Tuple[int, int, int, int]: 25 | return self.red, self.green, self.blue, self.alpha 26 | 27 | 28 | class Symbology(StrictBaseModel): 29 | type: ColorMapType 30 | colormap: Optional[Dict[Union[StrictInt, float], Union[RGB, RGBA]]] 31 | 32 | @validator("colormap") 33 | def colormap_alpha_val(cls, v, values): 34 | if v is not None: 35 | break_points = [value for key, value in v.items()] 36 | if "type" in values and values["type"] in ( 37 | ColorMapType.discrete_intensity, 38 | ColorMapType.gradient_intensity, 39 | ): 40 | assert all( 41 | isinstance(value, RGB) for value in break_points 42 | ), "Breakpoints for intensity colormaps must not include alpha values" 43 | assert ( 44 | len(set([type(value) for value in break_points])) == 1 45 | ), "Colormap breakpoints must be either all RGB or all RGBA" 46 | return v 47 | -------------------------------------------------------------------------------- /app/models/pydantic/tasks.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from uuid import UUID 3 | 4 | from .base import BaseRecord, StrictBaseModel 5 | from .change_log import ChangeLog 6 | from .responses import PaginationLinks, PaginationMeta, Response 7 | 8 | 9 | class Task(BaseRecord): 10 | task_id: UUID 11 | asset_id: UUID 12 | change_log: List[ChangeLog] 13 | 14 | 15 | class TaskCreateIn(StrictBaseModel): 16 | asset_id: UUID 17 | change_log: List[ChangeLog] 18 | 19 | 20 | class TaskUpdateIn(StrictBaseModel): 21 | change_log: List[ChangeLog] 22 | 23 | 24 | class TaskResponse(Response): 25 | data: Task 26 | 27 | 28 | class TasksResponse(Response): 29 | data: List[Task] 30 | 31 | 32 | class PaginatedTasksResponse(TasksResponse): 33 | links: PaginationLinks 34 | meta: PaginationMeta 35 | -------------------------------------------------------------------------------- /app/models/pydantic/user_job.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from uuid import UUID 3 | 4 | from pydantic import BaseModel 5 | 6 | from .responses import Response 7 | 8 | 9 | class UserJob(BaseModel): 10 | job_id: UUID 11 | job_link: Optional[str] # Full URL to check the job status 12 | status: str = "pending" # Can be pending, success, partial_success, failure, and error 13 | message: Optional[str] # Error message when status is "error" 14 | download_link: Optional[str] = None 15 | failed_geometries_link: Optional[str] = None 16 | progress: Optional[str] = "0%" 17 | 18 | 19 | class UserJobResponse(Response): 20 | data: UserJob 21 | -------------------------------------------------------------------------------- /app/routes/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | from fastapi import Depends, HTTPException, Path 4 | from fastapi.security import OAuth2PasswordBearer 5 | 6 | from ..crud.versions import get_version 7 | from ..errors import RecordNotFoundError 8 | 9 | DATASET_REGEX = r"^[a-z][a-z0-9_-]{2,}$" 10 | VERSION_REGEX = r"^v\d{1,8}(\.\d{1,3}){0,2}?$|^latest$" 11 | DATE_REGEX = r"^\d{4}(\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01]))?$" 12 | oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/token") 13 | 14 | 15 | async def dataset_dependency( 16 | dataset: str = Path(..., title="Dataset", regex=DATASET_REGEX) 17 | ) -> str: 18 | if dataset == "latest": 19 | raise HTTPException( 20 | status_code=400, 21 | detail="Name `latest` is reserved for versions only.", 22 | ) 23 | return dataset 24 | 25 | 26 | async def version_dependency( 27 | version: str = Path(..., title="Dataset version", regex=VERSION_REGEX), 28 | ) -> str: 29 | # Middleware should have redirected GET requests to latest version already. 30 | # Any other request method should not use `latest` keyword. 31 | if version == "latest": 32 | raise HTTPException( 33 | status_code=400, 34 | detail="You must list version name explicitly for this operation.", 35 | ) 36 | return version 37 | 38 | 39 | async def dataset_version_dependency( 40 | dataset: str = Depends(dataset_dependency), 41 | version: str = Depends(version_dependency), 42 | ) -> Tuple[str, str]: 43 | # make sure version exists 44 | try: 45 | await get_version(dataset, version) 46 | except RecordNotFoundError as e: 47 | raise HTTPException(status_code=404, detail=(str(e))) 48 | 49 | return dataset, version 50 | -------------------------------------------------------------------------------- /app/routes/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/routes/analysis/__init__.py -------------------------------------------------------------------------------- /app/routes/assets/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from fastapi.logger import logger 4 | 5 | from ...models.orm.assets import Asset as ORMAsset 6 | from ...models.pydantic.asset_metadata import asset_metadata_factory 7 | from ...models.pydantic.assets import ( 8 | Asset, 9 | AssetResponse, 10 | AssetsResponse, 11 | PaginatedAssetsResponse, 12 | ) 13 | from ...models.pydantic.responses import PaginationLinks, PaginationMeta 14 | 15 | 16 | async def asset_response(asset_orm: ORMAsset) -> AssetResponse: 17 | """Serialize ORM response.""" 18 | 19 | data: Asset = await _serialized_asset(asset_orm) 20 | return AssetResponse(data=data) 21 | 22 | 23 | async def assets_response(assets_orm: List[ORMAsset]) -> AssetsResponse: 24 | """Serialize ORM response.""" 25 | data = [await _serialized_asset(asset_orm) for asset_orm in assets_orm] 26 | return AssetsResponse(data=data) 27 | 28 | 29 | async def paginated_assets_response( 30 | assets_orm: List[ORMAsset], links: PaginationLinks, meta: PaginationMeta 31 | ) -> PaginatedAssetsResponse: 32 | """Serialize ORM response.""" 33 | data = [await _serialized_asset(asset_orm) for asset_orm in assets_orm] 34 | return PaginatedAssetsResponse(data=data, links=links, meta=meta) 35 | 36 | 37 | async def _serialized_asset(asset_orm: ORMAsset) -> Asset: 38 | metadata = asset_metadata_factory(asset_orm) 39 | 40 | if hasattr(asset_orm, "metadata"): 41 | delattr(asset_orm, "metadata") 42 | data: Asset = Asset.from_orm(asset_orm) 43 | data.metadata = metadata 44 | 45 | logger.debug(f"Metadata: {data.metadata.dict(by_alias=True)}") 46 | return data 47 | -------------------------------------------------------------------------------- /app/routes/authentication/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/routes/authentication/__init__.py -------------------------------------------------------------------------------- /app/routes/datamart/__init__.py: -------------------------------------------------------------------------------- 1 | OPENAPI_EXTRA = { 2 | "parameters": [ 3 | { 4 | "name": "aoi", 5 | "in": "query", 6 | "required": True, 7 | "style": "deepObject", 8 | "explode": True, 9 | "examples": { 10 | "Geostore Area Of Interest": { 11 | "summary": "Geostore Area Of Interest", 12 | "description": "Custom area", 13 | "value": { 14 | "type": "geostore", 15 | "geostore_id": "637d378f-93a9-4364-bfa8-95b6afd28c3a", 16 | }, 17 | }, 18 | "Admin Area Of Interest": { 19 | "summary": "Admin Area Of Interest", 20 | "description": "Administrative Boundary", 21 | "value": { 22 | "type": "admin", 23 | "country": "BRA", 24 | "region": "12", 25 | "subregion": "2", 26 | }, 27 | }, 28 | }, 29 | "description": "The Area of Interest", 30 | "schema": { 31 | "oneOf": [ 32 | {"$ref": "#/components/schemas/GeostoreAreaOfInterest"}, 33 | {"$ref": "#/components/schemas/AdminAreaOfInterest"}, 34 | {"$ref": "#/components/schemas/Global"}, 35 | ] 36 | }, 37 | }, 38 | { 39 | "name": "dataset_version", 40 | "in": "query", 41 | "required": False, 42 | "style": "deepObject", 43 | "explode": True, 44 | "schema": { 45 | "type": "object", 46 | "additionalProperties": {"type": "string"}, 47 | }, 48 | "example": { 49 | "umd_tree_cover_loss": "v1.11", 50 | "tsc_tree_cover_loss_drivers": "v2023", 51 | }, 52 | "description": ( 53 | "Pass dataset version overrides as bracketed query parameters.", 54 | ), 55 | }, 56 | ] 57 | } 58 | -------------------------------------------------------------------------------- /app/routes/datasets/datasets.py: -------------------------------------------------------------------------------- 1 | """Datasets are just a bucket, for datasets which share the same core 2 | metadata.""" 3 | from typing import Optional, Union 4 | 5 | from fastapi import APIRouter, HTTPException, Query, Request 6 | from fastapi.responses import ORJSONResponse 7 | 8 | from app.crud.datasets import count_datasets as count_datasets_fn 9 | from app.crud.datasets import get_datasets as datasets_fn 10 | from app.models.pydantic.datasets import DatasetsResponse, PaginatedDatasetsResponse 11 | from app.settings.globals import API_URL 12 | from app.utils.paginate import paginate_collection 13 | 14 | router = APIRouter() 15 | 16 | 17 | @router.get( 18 | "", 19 | response_class=ORJSONResponse, 20 | tags=["Datasets"], 21 | response_model=Union[PaginatedDatasetsResponse, DatasetsResponse], 22 | ) 23 | async def get_datasets( 24 | request: Request, 25 | page_number: Optional[int] = Query( 26 | default=None, alias="page[number]", ge=1, description="The page number." 27 | ), 28 | page_size: Optional[int] = Query( 29 | default=None, 30 | alias="page[size]", 31 | ge=1, 32 | description="The number of datasets per page. Default is `10`.", 33 | ), 34 | ) -> Union[PaginatedDatasetsResponse, DatasetsResponse]: 35 | """Get list of all datasets. 36 | 37 | Will attempt to paginate if `page[size]` or `page[number]` is 38 | provided. Otherwise, it will attempt to return the entire list of 39 | datasets in the response. 40 | """ 41 | if page_number or page_size: 42 | try: 43 | data, links, meta = await paginate_collection( 44 | paged_items_fn=datasets_fn, 45 | item_count_fn=count_datasets_fn, 46 | request_url=f"{API_URL}{request.url.path}", 47 | page=page_number, 48 | size=page_size, 49 | ) 50 | 51 | return PaginatedDatasetsResponse(data=data, links=links, meta=meta) 52 | except ValueError as exc: 53 | raise HTTPException(status_code=422, detail=str(exc)) from exc 54 | 55 | all_datasets = await datasets_fn() 56 | return DatasetsResponse(data=all_datasets) 57 | -------------------------------------------------------------------------------- /app/routes/datasets/geostore.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | from uuid import UUID 3 | 4 | from fastapi import APIRouter, Depends, HTTPException, Path 5 | from fastapi.responses import ORJSONResponse 6 | 7 | from ...crud import geostore 8 | from ...errors import RecordNotFoundError 9 | from ...models.pydantic.geostore import Geostore, GeostoreResponse 10 | from ...routes import dataset_version_dependency 11 | 12 | router = APIRouter() 13 | 14 | 15 | @router.get( 16 | "/{dataset}/{version}/geostore/{geostore_id}", 17 | response_class=ORJSONResponse, 18 | response_model=GeostoreResponse, 19 | tags=["Geostore"], 20 | ) 21 | async def get_geostore_by_version( 22 | *, 23 | dv: Tuple[str, str] = Depends(dataset_version_dependency), 24 | geostore_id: UUID = Path(..., title="geostore_id"), 25 | ): 26 | """Retrieve GeoJSON representation for a given geostore ID of a dataset 27 | version. 28 | 29 | Obtain geostore ID from feature attributes. 30 | """ 31 | dataset, version = dv 32 | try: 33 | result: Geostore = await geostore.get_geostore_by_version( 34 | dataset, version, geostore_id 35 | ) 36 | except RecordNotFoundError as e: 37 | raise HTTPException(status_code=404, detail=str(e)) 38 | 39 | return GeostoreResponse(data=result) 40 | -------------------------------------------------------------------------------- /app/routes/geostore/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/routes/geostore/__init__.py -------------------------------------------------------------------------------- /app/routes/health.py: -------------------------------------------------------------------------------- 1 | """Assets are replicas of the original source files.""" 2 | 3 | from fastapi import APIRouter 4 | from fastapi.responses import ORJSONResponse 5 | 6 | from ..models.pydantic.responses import Response 7 | 8 | router = APIRouter() 9 | 10 | 11 | @router.get( 12 | "/ping", 13 | response_class=ORJSONResponse, 14 | tags=["Health"], 15 | response_model=Response, 16 | ) 17 | async def ping(): 18 | """Simple uptime check.""" 19 | 20 | return Response(data="pong") 21 | -------------------------------------------------------------------------------- /app/routes/jobs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/routes/jobs/__init__.py -------------------------------------------------------------------------------- /app/routes/political/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/routes/political/__init__.py -------------------------------------------------------------------------------- /app/routes/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from ...models.orm.tasks import Task as ORMTask 4 | from ...models.pydantic.responses import PaginationLinks, PaginationMeta 5 | from ...models.pydantic.tasks import ( 6 | PaginatedTasksResponse, 7 | Task, 8 | TaskResponse, 9 | TasksResponse, 10 | ) 11 | 12 | 13 | def task_response(data: ORMTask) -> TaskResponse: 14 | """Assure that task responses are parsed correctly and include associated 15 | assets.""" 16 | 17 | return TaskResponse(data=data) 18 | 19 | 20 | async def tasks_response(tasks_orm: List[ORMTask]) -> TasksResponse: 21 | """Serialize ORM response.""" 22 | data = [Task.from_orm(task) for task in tasks_orm] 23 | return TasksResponse(data=data) 24 | 25 | 26 | async def paginated_tasks_response( 27 | tasks_orm: List[ORMTask], links: PaginationLinks, meta: PaginationMeta 28 | ) -> PaginatedTasksResponse: 29 | """Serialize ORM response.""" 30 | data = [Task.from_orm(task) for task in tasks_orm] 31 | return PaginatedTasksResponse(data=data, links=links, meta=meta) 32 | -------------------------------------------------------------------------------- /app/settings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/settings/__init__.py -------------------------------------------------------------------------------- /app/settings/prestart.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if [ "${ENV}" = "dev" ]; then 3 | # in dev environment, we clone a db instance for the branch from a template database 4 | 5 | # parse out DB credentials from the secret json object 6 | DB_HOST=$(jq -nr 'env.DB_WRITER_SECRET' | jq '.host' | sed 's/"//g') 7 | DB_PORT=$(jq -nr 'env.DB_WRITER_SECRET' | jq '.port' | sed 's/"//g') 8 | DB_USER=$(jq -nr 'env.DB_WRITER_SECRET' | jq '.username' | sed 's/"//g') 9 | DB_PASSWORD=$(jq -nr 'env.DB_WRITER_SECRET' | jq '.password' | sed 's/"//g') 10 | DATABASE_MAIN=$(jq -nr 'env.DB_WRITER_SECRET' | jq '.dbname' | sed 's/"//g') # template database 11 | DATABASE="$DATABASE_MAIN$NAME_SUFFIX" # branch database 12 | 13 | # return the branch database if it exists in pg_database. if not, create it. 14 | PGPASSWORD=$DB_PASSWORD psql -h ${DB_HOST} -p ${DB_PORT} -U ${DB_USER} -d ${DATABASE_MAIN} \ 15 | -tc "SELECT 1 FROM pg_database WHERE datname = '$DATABASE'" | grep -q 1 \ 16 | || PGPASSWORD=$DB_PASSWORD psql -h ${DB_HOST} \ 17 | -p ${DB_PORT} -U ${DB_USER} -d ${DATABASE_MAIN} \ 18 | -c "CREATE DATABASE $DATABASE WITH TEMPLATE ${DATABASE_MAIN} OWNER $DB_USER" 19 | fi 20 | 21 | alembic upgrade head -------------------------------------------------------------------------------- /app/settings/start.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env sh 2 | set -e 3 | 4 | if [ -f /app/app/main.py ]; then 5 | DEFAULT_MODULE_NAME=app.main 6 | elif [ -f /app/main.py ]; then 7 | DEFAULT_MODULE_NAME=main 8 | fi 9 | MODULE_NAME=${MODULE_NAME:-$DEFAULT_MODULE_NAME} 10 | VARIABLE_NAME=${VARIABLE_NAME:-app} 11 | export APP_MODULE=${APP_MODULE:-"$MODULE_NAME:$VARIABLE_NAME"} 12 | 13 | if [ -f /app/gunicorn_conf.py ]; then 14 | DEFAULT_GUNICORN_CONF=/app/gunicorn_conf.py 15 | elif [ -f /app/app/gunicorn_conf.py ]; then 16 | DEFAULT_GUNICORN_CONF=/app/app/gunicorn_conf.py 17 | else 18 | DEFAULT_GUNICORN_CONF=/gunicorn_conf.py 19 | fi 20 | export GUNICORN_CONF=${GUNICORN_CONF:-$DEFAULT_GUNICORN_CONF} 21 | export WORKER_CLASS=${WORKER_CLASS:-"uvicorn.workers.UvicornWorker"} 22 | 23 | # If there's a prestart.sh script in the /app directory or other path specified, run it before starting 24 | PRE_START_PATH=${PRE_START_PATH:-/app/prestart.sh} 25 | echo "Checking for script in $PRE_START_PATH" 26 | if [ -f $PRE_START_PATH ] ; then 27 | echo "Running script $PRE_START_PATH" 28 | . "$PRE_START_PATH" 29 | else 30 | echo "There is no script $PRE_START_PATH" 31 | fi 32 | 33 | export NEW_RELIC_LICENSE_KEY=$(jq -nr 'env.NEW_RELIC_LICENSE_KEY' | jq '.license_key' | sed 's/"//g') 34 | NEW_RELIC_CONFIG_FILE=/app/newrelic.ini 35 | export NEW_RELIC_CONFIG_FILE 36 | 37 | if [ "${ENV}" = "staging" ]; then 38 | export NEW_RELIC_ENVIRONMENT=staging 39 | # Start Gunicorn 40 | exec newrelic-admin run-program gunicorn -k "$WORKER_CLASS" -c "$GUNICORN_CONF" "$APP_MODULE" 41 | elif [ "${ENV}" = "production" ]; then 42 | export NEW_RELIC_ENVIRONMENT=production 43 | # Start Gunicorn 44 | exec newrelic-admin run-program gunicorn -k "$WORKER_CLASS" -c "$GUNICORN_CONF" "$APP_MODULE" 45 | else 46 | exec gunicorn -k "$WORKER_CLASS" -c "$GUNICORN_CONF" "$APP_MODULE" 47 | fi -------------------------------------------------------------------------------- /app/static/gfw-data-api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/static/gfw-data-api.png -------------------------------------------------------------------------------- /app/tasks/raster_tile_cache_assets/__init__.py: -------------------------------------------------------------------------------- 1 | from .raster_tile_cache_assets import ( # noqa: F401 2 | raster_tile_cache_asset, 3 | raster_tile_cache_validator, 4 | ) 5 | -------------------------------------------------------------------------------- /app/tasks/raster_tile_set_assets/__init__.py: -------------------------------------------------------------------------------- 1 | from .raster_tile_set_assets import raster_tile_set_asset # noqa: F401 2 | -------------------------------------------------------------------------------- /app/tasks/utils.py: -------------------------------------------------------------------------------- 1 | import string 2 | from typing import Any, List 3 | 4 | from app.settings.globals import CHUNK_SIZE 5 | 6 | ALLOWABLE_CHARS = set(string.ascii_letters + string.digits + "-" + "_") 7 | 8 | 9 | class RingOfLists: 10 | """A data structure that consists of a number of lists attached to a 11 | circular buffer. 12 | 13 | One may iterate over it and append items to the element in hand in 14 | order to evenly distribute whatever it is one has amongst the 15 | different lists. Later, one can call the all() method to get all the 16 | lists to do with as one pleases. 17 | """ 18 | 19 | def __init__(self, size: int) -> None: 20 | self._size: int = size 21 | self._lists: List[List[Any]] = [list() for i in range(0, size)] 22 | self._idx: int = -1 23 | 24 | def __next__(self): 25 | self._idx += 1 26 | if self._idx >= self._size: 27 | self._idx = 0 28 | return self._lists[self._idx] 29 | 30 | def __iter__(self): 31 | self.idx = -1 32 | return self 33 | 34 | def all(self): 35 | return self._lists 36 | 37 | 38 | def sanitize_batch_job_name(proposed_name: str) -> str: 39 | """Make a string acceptable as an AWS Batch job name According to AWS docs, 40 | the first character must be alphanumeric, the name can be up to 128 41 | characters, and ASCII uppercase + lowercase letters, numbers, hyphens, and 42 | underscores are allowed.""" 43 | short_name: str = proposed_name[:125] 44 | 45 | if not str.isalnum(short_name[0]): 46 | short_name = "x_" + proposed_name[:] 47 | 48 | filtered_name = "" 49 | for char in short_name: 50 | if char in ALLOWABLE_CHARS: 51 | filtered_name += char 52 | else: 53 | filtered_name += "_" 54 | 55 | return filtered_name 56 | 57 | 58 | def chunk_list(data: List[Any], chunk_size: int = CHUNK_SIZE) -> List[List[Any]]: 59 | """Split list into chunks of fixed size.""" 60 | return [data[x : x + chunk_size] for x in range(0, len(data), chunk_size)] 61 | -------------------------------------------------------------------------------- /app/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/app/utils/__init__.py -------------------------------------------------------------------------------- /app/utils/decorators.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | 4 | # See https://stackoverflow.com/questions/6358481/using-functools-lru-cache-with-dictionary-arguments 5 | # Required to use a dict argument with @alru_cache, since it needs to be hasheable/immutable 6 | def hash_dict(func): 7 | """Transform mutable dictionnary Into immutable Useful to be compatible 8 | with cache.""" 9 | 10 | class HDict(dict): 11 | def __hash__(self): 12 | return hash(frozenset(self.items())) 13 | 14 | @functools.wraps(func) 15 | def wrapped(*args, **kwargs): 16 | args = tuple([HDict(arg) if isinstance(arg, dict) else arg for arg in args]) 17 | kwargs = {k: HDict(v) if isinstance(v, dict) else v for k, v in kwargs.items()} 18 | return func(*args, **kwargs) 19 | 20 | return wrapped 21 | -------------------------------------------------------------------------------- /app/utils/fields.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List 2 | 3 | from ..crud.assets import get_default_asset 4 | from ..crud.metadata import get_asset_fields_dicts 5 | from ..models.orm.assets import Asset as ORMAsset 6 | from ..models.pydantic.creation_options import CreationOptions 7 | 8 | 9 | async def get_field_attributes( 10 | dataset: str, version: str, creation_options: CreationOptions 11 | ) -> List[Dict[str, Any]]: 12 | """Get list of field attributes on the asset which are marked as `is_feature_info` 13 | If a field list is provided in creation options, limit the list to those provided, 14 | in the order provided. Invalid provided fields are silently ignored. 15 | """ 16 | 17 | default_asset: ORMAsset = await get_default_asset(dataset, version) 18 | asset_fields = await get_asset_fields_dicts(default_asset) 19 | 20 | name_to_feature_fields: Dict[str, Dict] = { 21 | field["name"]: field 22 | for field in asset_fields 23 | if field["is_feature_info"] 24 | } 25 | 26 | if ( 27 | "field_attributes" in creation_options.__fields__ 28 | and creation_options.field_attributes 29 | ): 30 | asset_field_attributes = [ 31 | name_to_feature_fields[field_name] 32 | for field_name in creation_options.field_attributes 33 | if field_name in name_to_feature_fields 34 | ] 35 | else: 36 | asset_field_attributes = list(name_to_feature_fields.values()) 37 | 38 | return asset_field_attributes 39 | -------------------------------------------------------------------------------- /app/utils/gadm.py: -------------------------------------------------------------------------------- 1 | GADM_41_IDS_MISSING_REVISION = ( 2 | "IDN.35.4", 3 | "IDN.35.8", 4 | "IDN.35.9", 5 | "IDN.35.13", 6 | "IDN.35.14", 7 | ) 8 | 9 | 10 | def extract_level_id(adm_level: int, id_string: str): 11 | """Given a desired admin level and a string containing at least that level 12 | of id, return the id of just that level.""" 13 | 14 | # Exception because of bad formatting of GHA gids in v4.1 15 | # (corrected by us in gadm_administrative_boundaries/v4.1.85 and higher) 16 | if id_string.startswith("GHA") and not id_string.startswith("GHA."): 17 | id_string = "GHA." + id_string[3:] 18 | # Exception because bad ids IDN.35.4, IDN.35.8, IDN.35.9, IDN.35.13, IDN.35.14 19 | # (they are missing final '_1') in gadm_administrative_boundaries/v4.1 20 | if id_string.startswith("IDN") and "_" not in id_string: 21 | id_string += "_1" 22 | 23 | return (id_string.rsplit("_")[0]).split(".")[adm_level] 24 | 25 | 26 | def fix_id_pattern(adm_level: int, id_pattern_string: str, provider: str, version: str): 27 | """Given an admin level and a GADM id pattern suitable for a SQL LIKE 28 | clause, return an id pattern adjusted for observed errors in GADM 29 | records.""" 30 | new_pattern: str = id_pattern_string 31 | 32 | if provider == "gadm" and version == "4.1": 33 | if id_pattern_string.rstrip(r"\__") in GADM_41_IDS_MISSING_REVISION: 34 | new_pattern = new_pattern.rstrip(r"\__") 35 | 36 | return new_pattern 37 | -------------------------------------------------------------------------------- /app/utils/generators.py: -------------------------------------------------------------------------------- 1 | from typing import Any, AsyncGenerator, List 2 | 3 | 4 | async def list_to_async_generator(input_list: List[Any]) -> AsyncGenerator[Any, None]: 5 | """Transform a List to an AsyncGenerator.""" 6 | for i in input_list: 7 | yield i 8 | -------------------------------------------------------------------------------- /app/utils/google.py: -------------------------------------------------------------------------------- 1 | import json 2 | from functools import lru_cache 3 | from typing import List, Optional, Sequence, Dict 4 | 5 | from google.cloud.storage import Client 6 | from google.oauth2 import service_account 7 | 8 | from .aws import get_secret_client 9 | from ..settings.globals import AWS_GCS_KEY_SECRET_ARN 10 | 11 | 12 | @lru_cache(maxsize=1) 13 | def get_gcs_service_account_auth_info() -> Dict[str, str]: 14 | secret_client = get_secret_client() 15 | response = secret_client.get_secret_value(SecretId=AWS_GCS_KEY_SECRET_ARN) 16 | return json.loads(response["SecretString"]) 17 | 18 | 19 | def get_prefix_objects(bucket: str, prefix: str, limit: Optional[int] = None) -> List[str]: 20 | """Get ALL object names under a bucket and prefix in GCS.""" 21 | 22 | auth_info = get_gcs_service_account_auth_info() 23 | scopes = [ 24 | "https://www.googleapis.com/auth/devstorage.read_only", 25 | "https://www.googleapis.com/auth/cloud-platform.read-only", 26 | ] 27 | 28 | account_info = { 29 | "scopes": scopes, 30 | **auth_info 31 | } 32 | 33 | service_account_info = service_account.Credentials.from_service_account_info( 34 | account_info 35 | ) 36 | client = Client(project=None, credentials=service_account_info) 37 | 38 | blobs = client.list_blobs(bucket, prefix=prefix, max_results=limit) 39 | return [blob.name for blob in blobs] 40 | 41 | 42 | def get_gs_files( 43 | bucket: str, 44 | prefix: str, 45 | limit: Optional[int] = None, 46 | exit_after_max: Optional[int] = None, 47 | extensions: Sequence[str] = tuple() 48 | ) -> List[str]: 49 | """Get matching object names under a bucket and prefix in GCS.""" 50 | 51 | matches: List[str] = list() 52 | num_matches: int = 0 53 | 54 | for blob_name in get_prefix_objects(bucket, prefix, limit): 55 | if not extensions or any(blob_name.endswith(ext) for ext in extensions): 56 | matches.append(f"/vsigs/{bucket}/{blob_name}") 57 | num_matches += 1 58 | if exit_after_max and num_matches >= exit_after_max: 59 | return matches 60 | 61 | return matches 62 | -------------------------------------------------------------------------------- /app/utils/tile_cache.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from uuid import UUID 3 | 4 | from botocore.exceptions import ClientError 5 | from fastapi.logger import logger 6 | 7 | from ..crud import assets 8 | from ..models.enum.change_log import ChangeLogStatus 9 | from ..models.pydantic.change_log import ChangeLog 10 | from ..settings.globals import TILE_CACHE_CLUSTER, TILE_CACHE_SERVICE 11 | from ..tasks.aws_tasks import update_ecs_service 12 | 13 | 14 | async def redeploy_tile_cache_service(asset_id: UUID) -> None: 15 | """Redeploy Tile cache service to make sure dynamic tile cache is 16 | recognized.""" 17 | try: 18 | update_ecs_service(TILE_CACHE_CLUSTER, TILE_CACHE_SERVICE) 19 | ecs_change_log = ChangeLog( 20 | date_time=datetime.now(), 21 | status=ChangeLogStatus.success, 22 | message="Redeployed Tile Cache Service", 23 | ) 24 | except ClientError as e: 25 | # Let's don't make this a blocker but make sure it gets logged in case something goes wrong 26 | logger.exception(str(e)) 27 | ecs_change_log = ChangeLog( 28 | date_time=datetime.now(), 29 | status=ChangeLogStatus.failed, 30 | message="Failed to redeploy Tile Cache Service", 31 | detail=str(e), 32 | ) 33 | await assets.update_asset(asset_id, change_log=[ecs_change_log.dict(by_alias=True)]) 34 | -------------------------------------------------------------------------------- /batch/.dockerignore: -------------------------------------------------------------------------------- 1 | # IDE Fragments 2 | /.vscode 3 | *__pycache__* 4 | *.idea* 5 | 6 | #MyPy 7 | .mypy_cache 8 | 9 | # GIT 10 | .git 11 | 12 | # Docker Files 13 | docker-compose.dev.yml 14 | docker-compose.test.yml 15 | 16 | # Ignore Files 17 | .gitignore 18 | 19 | # Terraform 20 | *terraform* 21 | 22 | # Virtual Environments 23 | .venv -------------------------------------------------------------------------------- /batch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/batch/__init__.py -------------------------------------------------------------------------------- /batch/pixetl.dockerfile: -------------------------------------------------------------------------------- 1 | FROM globalforestwatch/pixetl:v1.7.7 2 | 3 | # Copy scripts 4 | COPY ./batch/scripts/ /opt/scripts/ 5 | COPY ./batch/python/ /opt/python/ 6 | 7 | RUN ln -f -s /usr/bin/python3 /usr/bin/python 8 | 9 | # make sure scripts are executable 10 | RUN chmod +x -R /opt/scripts/ 11 | RUN chmod +x -R /opt/python/ 12 | 13 | ENV PATH="/opt/scripts:${PATH}" 14 | ENV PATH="/opt/python:${PATH}" 15 | 16 | WORKDIR /tmp 17 | 18 | ENV LC_ALL=C.UTF-8 19 | ENV LANG=C.UTF-8 20 | 21 | ENTRYPOINT ["/opt/scripts/report_status.sh"] 22 | -------------------------------------------------------------------------------- /batch/python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/batch/python/__init__.py -------------------------------------------------------------------------------- /batch/python/adjust_num_processes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | 5 | import boto3 6 | 7 | OOM_ERROR = "OutOfMemoryError: Container killed due to memory usage" 8 | 9 | 10 | def calc_num_processes(job_id: str, original_num_proc, batch_client): 11 | jobs_desc = batch_client.describe_jobs(jobs=[job_id]) 12 | 13 | new_num_proc = original_num_proc 14 | 15 | # For each previous attempt resulting in OOM, divide NUM_PROCESSES by 2 16 | for attempt in jobs_desc["jobs"][0]["attempts"]: 17 | if ( 18 | attempt["container"].get("exitCode") == 137 19 | or attempt["container"].get("reason") == OOM_ERROR 20 | ): 21 | new_num_proc = max(1, int(new_num_proc / 2)) 22 | 23 | return new_num_proc 24 | 25 | 26 | if __name__ == "__main__": 27 | job_id = os.getenv("AWS_BATCH_JOB_ID") 28 | if job_id is None: 29 | raise ValueError("No AWS Batch Job ID found") 30 | original_num_proc = os.getenv("NUM_PROCESSES", os.getenv("CORES", os.cpu_count())) 31 | if original_num_proc is None: 32 | raise ValueError("Neither number of processes nor number of cores are set") 33 | else: 34 | original_num_proc = int(original_num_proc) 35 | 36 | batch_client = boto3.client("batch", region_name=os.getenv("AWS_REGION")) 37 | 38 | print(calc_num_processes(job_id, original_num_proc, batch_client)) 39 | -------------------------------------------------------------------------------- /batch/python/aws_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List, Sequence, Tuple, Dict, Any 3 | 4 | import boto3 5 | 6 | AWS_REGION = os.environ.get("AWS_REGION") 7 | AWS_ENDPOINT_URL = os.environ.get("ENDPOINT_URL") # For boto 8 | 9 | 10 | def get_s3_client(aws_region=AWS_REGION, endpoint_url=AWS_ENDPOINT_URL): 11 | return boto3.client("s3", region_name=aws_region, endpoint_url=endpoint_url) 12 | 13 | 14 | def get_s3_path_parts(s3url) -> Tuple[str, str]: 15 | """Splits an S3 URL into bucket and key.""" 16 | just_path = s3url.split("s3://")[1] 17 | bucket = just_path.split("/")[0] 18 | key = "/".join(just_path.split("/")[1:]) 19 | return bucket, key 20 | 21 | 22 | def exists_in_s3(target_bucket, target_key): 23 | """Returns whether or not target_key exists in target_bucket.""" 24 | s3_client = get_s3_client() 25 | response = s3_client.list_objects_v2( 26 | Bucket=target_bucket, 27 | Prefix=target_key, 28 | ) 29 | for obj in response.get("Contents", []): 30 | if obj["Key"] == target_key: 31 | return obj["Size"] > 0 32 | 33 | 34 | def get_aws_files( 35 | bucket: str, prefix: str, extensions: Sequence[str] = (".tif",) 36 | ) -> List[str]: 37 | """Get all matching files in S3.""" 38 | files: List[str] = list() 39 | 40 | s3_client = get_s3_client() 41 | paginator = s3_client.get_paginator("list_objects_v2") 42 | 43 | print("get_aws_files") 44 | for page in paginator.paginate(Bucket=bucket, Prefix=prefix): 45 | try: 46 | contents = page["Contents"] 47 | except KeyError: 48 | break 49 | 50 | for obj in contents: 51 | key = str(obj["Key"]) 52 | if any(key.endswith(ext) for ext in extensions): 53 | files.append(f"s3://{bucket}/{key}") 54 | 55 | print("done get_aws_files") 56 | return files 57 | 58 | 59 | def upload_s3(path: str, bucket: str, dst: str) -> Dict[str, Any]: 60 | s3_client = get_s3_client() 61 | return s3_client.upload_file(path, bucket, dst) 62 | -------------------------------------------------------------------------------- /batch/python/check_csv.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import sys 3 | from typing import Type 4 | from urllib.parse import urlparse 5 | 6 | import boto3 7 | from logger import get_logger 8 | 9 | LOGGER = get_logger(__name__) 10 | 11 | s3_uri = sys.argv[1] 12 | s3 = boto3.client("s3", region_name="us-east-1") 13 | o = urlparse(s3_uri, allow_fragments=False) 14 | bucket = o.netloc 15 | key = o.path.lstrip("/") 16 | 17 | bytes_range = "bytes=0-4096" 18 | response = s3.get_object(Bucket=bucket, Key=key, Range=bytes_range) 19 | data = response["Body"].read().decode("utf-8") 20 | 21 | try: 22 | dialect: Type[csv.Dialect] = csv.Sniffer().sniff(data) 23 | # TODO: verify if dialect is correct (delimiter etc) 24 | except csv.Error: 25 | raise TypeError("Not a valid CSV file") 26 | 27 | LOGGER.debug(dialect.delimiter) 28 | -------------------------------------------------------------------------------- /batch/python/check_raster.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import boto3 4 | import rasterio 5 | from logger import get_logger 6 | 7 | LOGGER = get_logger(__name__) 8 | 9 | s3_uri = sys.argv[1] 10 | zipped = sys.argv[2] 11 | s3 = boto3.client("s3", region_name="us-east-1") 12 | 13 | if zipped: 14 | s3_uri = f"zip+{s3_uri}" 15 | 16 | with rasterio.open(s3_uri) as src: 17 | driver = src.driver 18 | LOGGER.debug(driver) 19 | -------------------------------------------------------------------------------- /batch/python/check_vector.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import boto3 4 | import fiona 5 | from logger import get_logger 6 | 7 | LOGGER = get_logger(__name__) 8 | 9 | s3_uri = sys.argv[1] 10 | zipped = sys.argv[2] 11 | s3 = boto3.client("s3", region_name="us-east-1") 12 | 13 | if zipped: 14 | s3_uri = f"zip+{s3_uri}" 15 | 16 | with fiona.open(s3_uri) as src: 17 | driver = src.driver 18 | LOGGER.debug(driver) 19 | -------------------------------------------------------------------------------- /batch/python/errors.py: -------------------------------------------------------------------------------- 1 | class GDALError(Exception): 2 | pass 3 | 4 | 5 | class SubprocessKilledError(Exception): 6 | pass 7 | -------------------------------------------------------------------------------- /batch/python/extract_geometries.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import csv 4 | from typing import Dict, List, Optional, Type, Union 5 | 6 | import click 7 | import pandas 8 | from shapely import wkb 9 | from shapely.geometry import ( 10 | GeometryCollection, 11 | MultiLineString, 12 | MultiPoint, 13 | MultiPolygon, 14 | ) 15 | from shapely.geometry.base import BaseGeometry 16 | 17 | MultiGeometry = Union[MultiPolygon, MultiLineString, MultiPoint] 18 | 19 | 20 | @click.command() 21 | @click.argument("input_file", type=click.Path(exists=True)) 22 | @click.argument("output_file", required=False) 23 | @click.option("--delimiter", default="\t", help="Delimiter") 24 | def cli(input_file: str, output_file: Optional[str], delimiter: str) -> None: 25 | 26 | if not output_file: 27 | output_file = input_file 28 | 29 | df = pandas.read_csv(input_file, delimiter=delimiter, header=0) 30 | df["geom"] = df["geom"].map(lambda x: extract(wkb.loads(x, hex=True))) 31 | 32 | df.to_csv( 33 | output_file, 34 | sep=delimiter, 35 | header=True, 36 | index=False, 37 | quoting=csv.QUOTE_MINIMAL, 38 | quotechar='"', 39 | ) 40 | 41 | 42 | def extract( 43 | geometry: Union[BaseGeometry, GeometryCollection], geom_type: str = "Polygon" 44 | ) -> BaseGeometry: 45 | new_geometry_type: Dict[str, Type[MultiGeometry]] = { 46 | "Polygon": MultiPolygon, 47 | "LineString": MultiLineString, 48 | "Point": MultiPoint, 49 | } 50 | 51 | if geometry.geometryType() == "GeometryCollection": 52 | geom_buffer: List[BaseGeometry] = list() 53 | for geom in geometry.geoms: 54 | if geom.geometryType() == geom_type: 55 | geom_buffer.append(geom) 56 | elif geom.geometryType() == f"Multi{geom_type}": 57 | for g in geom.geoms: 58 | geom_buffer.append(g) 59 | new_geom: MultiGeometry = new_geometry_type[geom_type](geom_buffer) 60 | return new_geom.wkb_hex 61 | else: 62 | return geometry.wkb_hex 63 | 64 | 65 | if __name__ == "__main__": 66 | cli() 67 | -------------------------------------------------------------------------------- /batch/python/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | ENV: str = os.environ.get("ENV", "dev") 5 | 6 | 7 | def get_logger(name): 8 | formatter = logging.Formatter( 9 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 10 | ) 11 | 12 | sh = logging.StreamHandler() 13 | sh.setFormatter(formatter) 14 | 15 | logger = logging.getLogger(name) 16 | logger.addHandler(sh) 17 | if ENV != "production": 18 | logger.setLevel(logging.DEBUG) 19 | else: 20 | logger.setLevel(logging.INFO) 21 | return logger 22 | -------------------------------------------------------------------------------- /batch/python/logging_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from logging.handlers import QueueHandler 4 | 5 | 6 | def listener_configurer(): 7 | """Run this in the parent process to configure logger.""" 8 | root = logging.getLogger() 9 | h = logging.StreamHandler(stream=sys.stdout) 10 | root.addHandler(h) 11 | 12 | 13 | def log_listener(queue, configurer): 14 | """Run this in the parent process to listen for log messages from 15 | children.""" 16 | configurer() 17 | while True: 18 | try: 19 | record = queue.get() 20 | if ( 21 | record is None 22 | ): # We send this as a sentinel to tell the listener to quit. 23 | break 24 | logger = logging.getLogger(record.name) 25 | logger.handle(record) # No level or filter logic applied - just do it! 26 | except Exception: 27 | import traceback 28 | 29 | print("Encountered a problem in the log listener!", file=sys.stderr) 30 | traceback.print_exc(file=sys.stderr) 31 | raise 32 | 33 | 34 | def log_client_configurer(queue): 35 | """Run this in child processes to configure sending logs to parent.""" 36 | h = QueueHandler(queue) 37 | root = logging.getLogger() 38 | root.addHandler(h) 39 | root.setLevel(logging.INFO) 40 | -------------------------------------------------------------------------------- /batch/scripts/_add_gfw_fields_sql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -u 3 | 4 | # This script is meant to be sourced by another shell script, and all it 5 | # does is compose a SQL snippet and set a variable to it. Note that it 6 | # requires the environment variables used below to be set, and exits with 7 | # an error if one is not (thanks to the set -u). 8 | 9 | ADD_GFW_FIELDS_SQL=" 10 | ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN ${GEOMETRY_NAME}_wm geometry(${GEOMETRY_TYPE},3857); 11 | ALTER TABLE ${TABLE_MISSING_COLUMNS} ALTER COLUMN ${GEOMETRY_NAME}_wm SET STORAGE EXTERNAL; 12 | ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN gfw_area__ha NUMERIC; 13 | ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN gfw_geostore_id UUID; 14 | ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN gfw_geojson TEXT COLLATE pg_catalog.\"default\"; 15 | ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN gfw_bbox NUMERIC[]; 16 | ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN created_on timestamp without time zone DEFAULT now(); 17 | ALTER TABLE ${TABLE_MISSING_COLUMNS} ADD COLUMN updated_on timestamp without time zone DEFAULT now();" -------------------------------------------------------------------------------- /batch/scripts/_add_point_geometry_fields_sql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -u 3 | 4 | # This script is meant to be sourced by another shell script, and all it 5 | # does is compose a SQL snippet and assign it to a variable. Note that it 6 | # requires the environment variables used below to be set, and exits with 7 | # an error if one is not (thanks to the set -u). 8 | 9 | ADD_POINT_GEOMETRY_FIELDS_SQL=" 10 | ALTER TABLE 11 | \"$TEMP_TABLE\" 12 | ADD COLUMN 13 | ${GEOMETRY_NAME} geometry(Point,4326); 14 | 15 | ALTER TABLE 16 | \"$TEMP_TABLE\" 17 | ADD COLUMN 18 | ${GEOMETRY_NAME}_wm geometry(Point,3857);" -------------------------------------------------------------------------------- /batch/scripts/_fill_gfw_fields_sql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -u 3 | 4 | # This script is meant to be sourced by another shell script, and all it 5 | # does is compose a SQL snippet and set a variable to it. Note that it 6 | # requires the environment variables used below to be set, and exits with 7 | # an error if one is not (thanks to the set -u). 8 | 9 | FILL_GFW_FIELDS_SQL=" 10 | UPDATE 11 | $TABLE_MISSING_COLUMNS 12 | SET 13 | gfw_area__ha = ST_Area($GEOMETRY_NAME::geography)/10000, 14 | gfw_geostore_id = md5(ST_asgeojson($GEOMETRY_NAME))::uuid, 15 | gfw_geojson = ST_asGeojson($GEOMETRY_NAME), 16 | gfw_bbox = ARRAY[ 17 | ST_XMin(ST_Envelope($GEOMETRY_NAME)::geometry), 18 | ST_YMin(ST_Envelope($GEOMETRY_NAME)::geometry), 19 | ST_XMax(ST_Envelope($GEOMETRY_NAME)::geometry), 20 | ST_YMax(ST_Envelope($GEOMETRY_NAME)::geometry) 21 | ]::NUMERIC[]" -------------------------------------------------------------------------------- /batch/scripts/_fill_point_geometry_fields_sql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -u 3 | 4 | # This script is meant to be sourced by another shell script, and all it 5 | # does is compose a SQL snippet and assign it to a variable. Note that it 6 | # requires the environment variables used below to be set, and exits with 7 | # an error if one is not (thanks to the set -u). 8 | 9 | FILL_POINT_GEOMETRY_FIELDS_SQL=" 10 | UPDATE 11 | \"$TEMP_TABLE\" 12 | SET 13 | ${GEOMETRY_NAME} = ST_SetSRID(ST_MakePoint($LNG, $LAT),4326), 14 | ${GEOMETRY_NAME}_wm = ST_Transform(ST_SetSRID(ST_MakePoint($LNG, $LAT),4326), 3857) 15 | WHERE 16 | ${GEOMETRY_NAME} IS null OR ${GEOMETRY_NAME}_wm IS null;" -------------------------------------------------------------------------------- /batch/scripts/_get_geometry_type_sql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -u 3 | 4 | # This script is meant to be sourced by another shell script, and all it 5 | # does is compose a SQL snippet and set a variable to it. Note that it 6 | # requires the environment variables used below to be set, and exits with 7 | # an error if one is not (thanks to the set -u). 8 | 9 | GEOMETRY_TYPE_SQL=" 10 | SELECT type 11 | FROM geometry_columns 12 | WHERE f_table_schema = '${DATASET}' 13 | AND f_table_name = '${VERSION}' 14 | AND f_geometry_column = '${GEOMETRY_NAME}';" -------------------------------------------------------------------------------- /batch/scripts/_tiff_crosses_dateline.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # USAGE: _tiff_crosses_dateline.sh raster_file 4 | # 5 | # Prints the string "true" if the input raster will cross the dateline 6 | # when converting to EPSG:4326, "false" otherwise 7 | # 8 | # Needs GDAL 2.0+ and Python 9 | # 10 | # Credit: Slightly modified from https://gis.stackexchange.com/a/222341 11 | 12 | 13 | if [ -z "${1}" ]; then 14 | echo -e "Error: No input raster file given.\n> USAGE: _tiff_crosses_dateline.sh raster_file" 15 | exit 1 16 | fi 17 | 18 | # Get raster info, save it to a variable as we need it several times 19 | gdalinfo=$(gdalinfo "${1}" -json) 20 | 21 | # Exit if -json switch is not available 22 | if [ ! -z $(echo $gdalinfo | grep "^Usage:") ]; then 23 | echo -e "Error: GDAL command failed, Version 2.0+ is needed" 24 | exit 1 25 | fi 26 | 27 | function jsonq { 28 | echo "${1}" | python -c "import json,sys; jdata = sys.stdin.read(); data = json.loads(jdata); print(data${2});" 29 | } 30 | 31 | ulx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][0][0]") 32 | llx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][1][0]") 33 | lrx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][3][0]") 34 | urx=$(jsonq "$gdalinfo" "['wgs84Extent']['coordinates'][0][2][0]") 35 | 36 | crossing_dateline=false 37 | test $(python -c "print(${ulx}>${lrx})") = True && crossing_dateline=true 38 | test $(python -c "print(${llx}>${urx})") = True && crossing_dateline=true 39 | 40 | echo -n "${crossing_dateline}" -------------------------------------------------------------------------------- /batch/scripts/_warp_and_upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # arguments: 6 | # $0 - The name of this script 7 | # $1 - local_src_file 8 | # $2 - local_warped_file 9 | # $3 - target_crs 10 | # $4 - remote target file 11 | 12 | if aws s3 ls "$4"; then 13 | echo "Remote target file $4 already exists, skipping..." 14 | exit 0 15 | fi 16 | 17 | warp_options=("-co" "COMPRESS=DEFLATE" "-co" "TILED=yes") 18 | 19 | echo "Seeing if TIFF crosses the dateline" 20 | crosses="$(_tiff_crosses_dateline.sh $1)" 21 | if [ "${crosses}" = "true" ]; then 22 | echo "$1 crosses the dateline" 23 | warp_options+=("--config" "CENTER_LONG" "180") 24 | else 25 | echo "$1 does not cross the dateline" 26 | fi 27 | 28 | echo "Now warping $1 to $2" 29 | gdalwarp "$1" "$2" -t_srs "$3" "${warp_options[@]}" 30 | echo "Done warping $1 to $2" 31 | 32 | echo "Now uploading $2 to $4" 33 | aws s3 cp --no-progress "$2" "$4" 34 | echo "Done uploading $2 to $4" 35 | 36 | echo "Finally, deleting local files $1 and $2" 37 | rm "$1" "$2" 38 | echo "Done deleting local files $1 and $2" 39 | -------------------------------------------------------------------------------- /batch/scripts/add_gfw_fields.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | 9 | # optional arguments 10 | # -g | --geometry_name (get_arguments.sh specifies default) 11 | 12 | ME=$(basename "$0") 13 | . get_arguments.sh "$@" 14 | 15 | set -u 16 | 17 | # Add GFW-specific columns to the new table 18 | TABLE_MISSING_COLUMNS="\"$DATASET\".\"$VERSION\"" 19 | 20 | # Get geometry type of the new table 21 | # GEOMETRY_TYPE_SQL is defined by sourcing _get_geometry_type_sql.sh 22 | # It contains the SQL snippet we'll pass to the psql client command 23 | . _get_geometry_type_sql.sh 24 | 25 | # Get the geometry type of the new table 26 | GEOMETRY_TYPE=$(psql -X -A -t -c "${GEOMETRY_TYPE_SQL}") 27 | 28 | # ADD_GFW_FIELDS_SQL is defined by sourcing _add_gfw_fields_sql.sh 29 | # It contains the SQL snippet we'll pass to the psql client command 30 | . _add_gfw_fields_sql.sh 31 | 32 | echo "PSQL: ALTER TABLE $TABLE_MISSING_COLUMNS. Add GFW columns" 33 | psql -c "$ADD_GFW_FIELDS_SQL" 34 | 35 | # Set gfw_geostore_id not NULL to be compliant with GEOSTORE 36 | echo "PSQL: ALTER TABLE \"$DATASET\".\"$VERSION\". ALTER COLUMN gfw_geostore_id SET NOT NULL" 37 | psql -c "ALTER TABLE \"$DATASET\".\"$VERSION\" ALTER COLUMN gfw_geostore_id SET NOT NULL;" -------------------------------------------------------------------------------- /batch/scripts/add_point_geometry_fields.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | 9 | # optional arguments 10 | # -g | --geometry_name (get_arguments.sh specifies default) 11 | 12 | ME=$(basename "$0") 13 | . get_arguments.sh "$@" 14 | 15 | # Add point geometry fields 16 | echo "PSQL: ALTER TABLE \"$DATASET\".\"$VERSION\". Add Point columns" 17 | psql -c "ALTER TABLE \"$DATASET\".\"$VERSION\" ADD COLUMN ${GEOMETRY_NAME} geometry(Point,4326); 18 | ALTER TABLE \"$DATASET\".\"$VERSION\" ADD COLUMN ${GEOMETRY_NAME}_wm geometry(Point,3857);" -------------------------------------------------------------------------------- /batch/scripts/apply_colormap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -j | --json 9 | # -n | --no_data 10 | # -s | --source 11 | # -T | --target 12 | 13 | ME=$(basename "$0") 14 | . get_arguments.sh "$@" 15 | 16 | echo "Apply symbology and upload RGB asset to S3" 17 | 18 | # Build an array of arguments to pass to apply_symbology.py 19 | ARG_ARRAY=("--dataset" "${DATASET}" "--version" "${VERSION}") 20 | 21 | ARG_ARRAY+=("--symbology" "${JSON}") 22 | 23 | ARG_ARRAY+=("--no-data" "${NO_DATA}") 24 | 25 | ARG_ARRAY+=("--source-uri" "${SRC}") 26 | 27 | ARG_ARRAY+=("--target-prefix" "${TARGET}") 28 | 29 | apply_colormap.py "${ARG_ARRAY[@]}" -------------------------------------------------------------------------------- /batch/scripts/clip_and_reproject_geom.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | 9 | # optional arguments 10 | # -g | --geometry_name (get_arguments.sh specifies default) 11 | 12 | ME=$(basename "$0") 13 | . get_arguments.sh "$@" 14 | 15 | set -u 16 | 17 | # Transform to web mercator (WM) in two steps to isolate the more involved 18 | # one for polygons that overflow WM lat bounds of -85/85 degrees 19 | 20 | # Reproject all polygons within WM bounds 21 | psql -c " 22 | UPDATE 23 | \"$DATASET\".\"$VERSION\" 24 | SET 25 | ${GEOMETRY_NAME}_wm = ST_Multi(ST_Transform(ST_Force2D($GEOMETRY_NAME), 3857)) 26 | WHERE 27 | ${GEOMETRY_NAME}_wm IS NULL 28 | AND 29 | ST_Within($GEOMETRY_NAME, ST_MakeEnvelope(-180, -85, 180, 85, 4326));" 30 | 31 | # For all polygons outside of WM bounds, clip then reproject to WM 32 | psql -c " 33 | UPDATE 34 | \"$DATASET\".\"$VERSION\" 35 | SET 36 | ${GEOMETRY_NAME}_wm = ST_Multi(ST_Transform(ST_Force2D(ST_Buffer(ST_Intersection($GEOMETRY_NAME, ST_MakeEnvelope(-180, -85, 180, 85, 4326)), 0)), 3857)) 37 | WHERE 38 | ${GEOMETRY_NAME}_wm IS NULL 39 | AND 40 | NOT ST_Within($GEOMETRY_NAME, ST_MakeEnvelope(-180, -85, 180, 85, 4326));" 41 | 42 | set +u -------------------------------------------------------------------------------- /batch/scripts/cluster_partitions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -p | --partition_type 9 | # -P | --partition_schema 10 | # -x | --index_type 11 | # -C | --column_names 12 | 13 | ME=$(basename "$0") 14 | . get_arguments.sh "$@" 15 | 16 | # While it seems unnecessary here to pass the arguments through the get_arguments.sh script 17 | # I prefer to still do it. This way, we have a consistent way to log the env variables and can make sure 18 | # that argument names are used consistently across all tools. 19 | echo "PYTHON: Cluster partitions" 20 | cluster_partitions.py -d "$DATASET" -v "$VERSION" -p "$PARTITION_TYPE" -P "$PARTITION_SCHEMA" -x "$INDEX_TYPE" -C "$COLUMN_NAMES" 21 | -------------------------------------------------------------------------------- /batch/scripts/cluster_table.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -C | --column_names 9 | # -x | --index_type 10 | ME=$(basename "$0") 11 | . get_arguments.sh "$@" 12 | 13 | COLUMN_NAMES_UNDERSCORED="$(echo "$COLUMN_NAMES" | sed 's/,/_/g' | cut -c 1-63)" 14 | echo "PSQL: CLUSTER \"$DATASET\".\"$VERSION\" USING \"${VERSION}_${COLUMN_NAMES_UNDERSCORED}_${INDEX_TYPE}_idx\"" 15 | psql -c "CLUSTER \"$DATASET\".\"$VERSION\" USING \"${VERSION}_${COLUMN_NAMES_UNDERSCORED}_${INDEX_TYPE}_idx\";" -------------------------------------------------------------------------------- /batch/scripts/create_index.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -c | --column_name 9 | # -x | --index_type 10 | ME=$(basename "$0") 11 | . get_arguments.sh "$@" 12 | 13 | COLUMN_NAMES_UNDERSCORED="$(echo "$COLUMN_NAMES" | sed 's/,/_/g' | cut -c 1-63)" 14 | psql -c "CREATE INDEX IF NOT EXISTS \"${VERSION}_${COLUMN_NAMES_UNDERSCORED}_${INDEX_TYPE}_idx\" 15 | ON \"$DATASET\".\"$VERSION\" USING $INDEX_TYPE 16 | (${COLUMN_NAMES});" -------------------------------------------------------------------------------- /batch/scripts/create_partitions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -p | --partition_type 9 | # -P | --partition_schema 10 | ME=$(basename "$0") 11 | . get_arguments.sh "$@" 12 | 13 | # While it seems unnecessary here to pass the arguments through the get_arguments.sh script 14 | # I prefer to still do it. This way, we have a consistent way to log the env variables and can make sure 15 | # that argument names are used consistently across all tools. 16 | echo "PYTHON: Create partitions" 17 | create_partitions.py -d "$DATASET" -v "$VERSION" -p "$PARTITION_TYPE" -P "$PARTITION_SCHEMA" 18 | -------------------------------------------------------------------------------- /batch/scripts/create_vector_tile_cache.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -s | --source 9 | # -Z | --min_zoom 10 | # -z | --max_zoom 11 | # -t | --tile_strategy 12 | # -I | --implementation 13 | 14 | # optional arguments 15 | # --filter 16 | 17 | ME=$(basename "$0") 18 | . get_arguments.sh "$@" 19 | 20 | 21 | NDJSON_FILE="${DATASET}.json" 22 | 23 | # Build an array of arguments to pass to tippecanoe 24 | TIPPE_ARG_ARRAY=( 25 | "-e" "tilecache" 26 | "-Z${MIN_ZOOM}" 27 | "-z${MAX_ZOOM}" 28 | "--preserve-input-order" 29 | "-P" 30 | "-n" "${DATASET}" 31 | "-l" "${DATASET}" 32 | ) 33 | 34 | case ${TILE_STRATEGY} in 35 | discontinuous) # Discontinuous polygon features 36 | TIPPE_ARG_ARRAY+=("--drop-densest-as-needed" "--extend-zooms-if-still-dropping") 37 | ;; 38 | continuous) # Continuous polygon features 39 | TIPPE_ARG_ARRAY+=("--coalesce-densest-as-needed" "--extend-zooms-if-still-dropping") 40 | ;; 41 | keep_all) # never drop or coalesce feature, ignore size and feature count 42 | TIPPE_ARG_ARRAY+=("-r1") 43 | ;; 44 | *) 45 | echo "Invalid Tile Cache option -${TILE_STRATEGY}" 46 | exit 1 47 | ;; 48 | esac 49 | 50 | if [ -n "${FILTER}" ]; then 51 | echo "${FILTER}" > feature_filter.txt 52 | TIPPE_ARG_ARRAY+=("-J" "feature_filter.txt") 53 | fi 54 | 55 | TIPPE_ARG_ARRAY+=("${NDJSON_FILE}") 56 | 57 | echo "Fetching NDJSON file from the Data Lake: ${SRC} -> ${NDJSON_FILE}..." 58 | aws s3 cp "${SRC}" "${NDJSON_FILE}" --no-progress 59 | 60 | echo "Building Tile Cache with Tippecanoe..." 61 | tippecanoe "${TIPPE_ARG_ARRAY[@]}" 62 | 63 | echo "Uploading tiles to S3 with TilePutty..." 64 | tileputty tilecache --bucket "${TILE_CACHE}" --dataset "${DATASET}" --version "${VERSION}" --implementation "${IMPLEMENTATION}" --cores "${NUM_PROCESSES}" -------------------------------------------------------------------------------- /batch/scripts/export_1x1_grid.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # required arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -C | --column_names 9 | # -T | --target 10 | # 11 | # optional arguments 12 | # --include_tile_id 13 | 14 | ME=$(basename "$0") 15 | . get_arguments.sh "$@" 16 | 17 | echo "PYTHON: Create 1x1 grid files" 18 | ARG_ARRAY=("--dataset" "${DATASET}" 19 | "--version" "${VERSION}" 20 | "-C" "${COLUMN_NAMES}") 21 | 22 | if [ -n "${INCLUDE_TILE_ID}" ]; then 23 | ARG_ARRAY+=("--include_tile_id") 24 | fi 25 | export_1x1_grid.py "${ARG_ARRAY[@]}" 26 | 27 | echo "Combine output files" 28 | echo ./*.tmp | xargs cat >> "${DATASET}_${VERSION}_1x1.tsv" 29 | 30 | echo "Post-process geometries" 31 | extract_geometries.py "${DATASET}_${VERSION}_1x1.tsv" 32 | 33 | echo "AWSCLI: upload to data lake" 34 | aws s3 cp "${DATASET}_${VERSION}_1x1.tsv" "$TARGET" 35 | -------------------------------------------------------------------------------- /batch/scripts/export_vector_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -f | --local_file 9 | # -F | --format 10 | # -T | --target 11 | # -w | --where 12 | # -C | --column_names 13 | # -X | --zipped 14 | 15 | # optional arguments 16 | # -g | --geometry_name (get_arguments.sh specifies default) 17 | # -i | --fid_name (get_arguments.sh specifies default) 18 | 19 | ME=$(basename "$0") 20 | . get_arguments.sh "$@" 21 | 22 | echo "OGR2OGR: Export table \"${DATASET}\".\"${VERSION}\" using format ${FORMAT}" 23 | echo "Export columns $COLUMN_NAMES" 24 | ogr2ogr -f "$FORMAT" "$LOCAL_FILE" PG:"password=$PGPASSWORD host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER" \ 25 | -sql "SELECT $COLUMN_NAMES, $GEOMETRY_NAME FROM \"${DATASET}\".\"${VERSION}\" $WHERE" -geomfield "${GEOMETRY_NAME}" \ 26 | -lco FID="$FID_NAME" 27 | 28 | if [ "${ZIPPED}" == "True" ]; then 29 | BASE_NAME="${LOCAL_FILE%.*}" 30 | LOCAL_FILE="${BASE_NAME}.zip" 31 | find . -name "${BASE_NAME}.*" | zip -@ -j "${LOCAL_FILE}" 32 | fi 33 | 34 | echo "AWSCLI: COPY DATA FROM $LOCAL_FILE TO $TARGET" 35 | aws s3 cp "$LOCAL_FILE" "$TARGET" 36 | 37 | echo "Done" -------------------------------------------------------------------------------- /batch/scripts/inherit_geostore.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | ME=$(basename "$0") 9 | . get_arguments.sh "$@" 10 | 11 | # Inherit from geostore 12 | echo "PSQL: ALTER TABLE. Inherit from geostore" 13 | psql -c "ALTER TABLE \"$DATASET\".\"$VERSION\" INHERIT public.geostore;" -------------------------------------------------------------------------------- /batch/scripts/load_tabular_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -s | --source 9 | # -D | --delimiter 10 | 11 | # optional arguments 12 | # --lat 13 | # --lng 14 | # -g | --geometry_name (get_arguments.sh specifies default) 15 | 16 | ME=$(basename "$0") 17 | . get_arguments.sh "$@" 18 | 19 | 20 | # Unescape TAB character 21 | if [ "$DELIMITER" == "\t" ]; then 22 | DELIMITER=$(echo -e "\t") 23 | fi 24 | 25 | # I think Postgres temporary tables are such that concurrent jobs won't 26 | # interfere with each other, but make the temp table name unique just 27 | # in case. 28 | UUID=$(python -c 'import uuid; print(uuid.uuid4(), end="")' | sed s/-//g) 29 | TEMP_TABLE="temp_${UUID}" 30 | 31 | # IF GEOMETRY_NAME, LAT and LNG are defined, set ADD_POINT_GEOMETRY_FIELDS_SQL 32 | # by sourcing _add_point_geometry_fields_sql.sh 33 | # It defines a SQL snippet we'll run later 34 | if [[ -n "${GEOMETRY_NAME:-}" ]] && [[ -n "${LAT:-}" ]] && [[ -n "${LNG:-}" ]] 35 | then 36 | . _add_point_geometry_fields_sql.sh 37 | . _fill_point_geometry_fields_sql.sh 38 | else 39 | ADD_POINT_GEOMETRY_FIELDS_SQL="" 40 | FILL_POINT_GEOMETRY_FIELDS_SQL="" 41 | fi 42 | 43 | for uri in "${SRC[@]}"; do 44 | # https://stackoverflow.com/questions/48019381/how-postgresql-copy-to-stdin-with-csv-do-on-conflic-do-update 45 | aws s3 cp "${uri}" - | psql -c "BEGIN; 46 | CREATE TEMP TABLE \"$TEMP_TABLE\" 47 | (LIKE \"$DATASET\".\"$VERSION\" INCLUDING DEFAULTS) 48 | ON COMMIT DROP; 49 | 50 | ALTER TABLE \"$TEMP_TABLE\" DROP COLUMN IF EXISTS ${GEOMETRY_NAME}; 51 | ALTER TABLE \"$TEMP_TABLE\" DROP COLUMN IF EXISTS ${GEOMETRY_NAME}_wm; 52 | 53 | COPY \"$TEMP_TABLE\" FROM STDIN WITH (FORMAT CSV, DELIMITER '$DELIMITER', HEADER); 54 | 55 | $ADD_POINT_GEOMETRY_FIELDS_SQL 56 | $FILL_POINT_GEOMETRY_FIELDS_SQL 57 | 58 | INSERT INTO \"$DATASET\".\"$VERSION\" 59 | SELECT * FROM \"$TEMP_TABLE\" 60 | ON CONFLICT DO NOTHING; 61 | 62 | COMMIT;" 63 | done -------------------------------------------------------------------------------- /batch/scripts/raster_tile_cache.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -I | --implementation 9 | # --skip 10 | # --target_bucket 11 | # --zoom_level 12 | 13 | # and positional arguments 14 | # asset_prefix 15 | 16 | ME=$(basename "$0") 17 | . get_arguments.sh "$@" 18 | 19 | ARG_ARRAY=("--dataset" "${DATASET}" 20 | "--version" "${VERSION}" 21 | "--implementation" "${IMPLEMENTATION}" 22 | "--target-bucket" "${TARGET_BUCKET}" 23 | "--zoom-level" "${ZOOM_LEVEL}" 24 | "--bit-depth" "${BIT_DEPTH}") 25 | 26 | if [ -n "${SKIP}" ]; then 27 | ARG_ARRAY+=("--skip_empty_tiles") 28 | fi 29 | 30 | echo "Generate raster tile cache with GDAL2Tiles and upload to target S3 bucket" 31 | 32 | raster_tile_cache.py "${ARG_ARRAY[@]}" "${POSITIONAL[@]}" 33 | -------------------------------------------------------------------------------- /batch/scripts/resample.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -s | --source 9 | # -r | --resampling_method 10 | # --zoom_level 11 | # -T | --target 12 | 13 | ME=$(basename "$0") 14 | . get_arguments.sh "$@" 15 | 16 | echo "Reproject to WM and resample" 17 | 18 | # Build an array of arguments to pass to resample.py 19 | ARG_ARRAY=("--dataset" "${DATASET}" "--version" "${VERSION}") 20 | 21 | ARG_ARRAY+=("--source-uri" "${SRC}") 22 | 23 | ARG_ARRAY+=("--resampling-method" "${RESAMPLE}") 24 | 25 | ARG_ARRAY+=("--target-zoom" "${ZOOM_LEVEL}") 26 | 27 | ARG_ARRAY+=("--target-prefix" "${TARGET}") 28 | 29 | # Run resample.py with the array of arguments 30 | resample.py "${ARG_ARRAY[@]}" -------------------------------------------------------------------------------- /batch/scripts/run_pixetl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -j | --json 9 | 10 | # optional arguments 11 | # --subset 12 | # --overwrite 13 | 14 | ME=$(basename "$0") 15 | . get_arguments.sh "$@" 16 | 17 | # in get_arguments.sh we call pushd to jump into the batchID subfolder 18 | # pixETL expects /tmp as workdir and will make attempt to create subfolder itself 19 | popd 20 | 21 | echo "Build Raster Tile Set and upload to S3" 22 | 23 | # Build an array of arguments to pass to pixetl 24 | ARG_ARRAY=("--dataset" "${DATASET}" "--version" "${VERSION}") 25 | 26 | if [ -n "${OVERWRITE}" ]; then 27 | ARG_ARRAY+=("--overwrite") 28 | fi 29 | 30 | if [ -n "${SUBSET}" ]; then 31 | ARG_ARRAY+=("--subset") 32 | ARG_ARRAY+=("${SUBSET}") 33 | fi 34 | 35 | ARG_ARRAY+=("${JSON}") 36 | 37 | # Run pixetl with the array of arguments 38 | pixetl "${ARG_ARRAY[@]}" -------------------------------------------------------------------------------- /batch/scripts/test_mock_s3_awscli.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #set -e 4 | 5 | # requires arguments 6 | # -s | --source 7 | ME=$(basename "$0") 8 | . get_arguments.sh "$@" 9 | 10 | echo "AWSCLI: COPY DATA FROM S3 to STDOUT" 11 | aws s3 cp "${SRC}" - 12 | -------------------------------------------------------------------------------- /batch/scripts/test_mock_s3_ogr2ogr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # -s | --source 9 | # -l | --source_layer 10 | # -f | --local_file 11 | ME=$(basename "$0") 12 | . get_arguments.sh "$@" 13 | 14 | echo "AWSCLI: COPY DATA FROM S3 to STDOUT" 15 | # shellcheck disable=SC2086 16 | aws s3 cp "$SRC" "$LOCAL_FILE" 17 | 18 | echo "OGR2OGR: Import ${DATASET}.${VERSION} from ${LOCAL_FILE} ${SRC_LAYER}" 19 | # Create schema only, using ogr2ogr 20 | ogr2ogr -f "PostgreSQL" PG:"password=$PGPASSWORD host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER" \ 21 | "$LOCAL_FILE" "$SRC_LAYER" \ 22 | -nlt PROMOTE_TO_MULTI -nln "$VERSION" \ 23 | -lco SCHEMA="$DATASET" -lco GEOMETRY_NAME="$GEOMETRY_NAME" -lco SPATIAL_INDEX=NONE -lco FID="$FID_NAME" \ 24 | -t_srs EPSG:4326 -limit 0 25 | -------------------------------------------------------------------------------- /batch/scripts/tmp/create_1x1_grid.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | psql -c " 6 | CREATE MATERIALIZED VIEW $DATASET.{$VERSION}__1x1 7 | WITH a AS ( 8 | SELECT $FID_NAME 9 | ,gfw_grid_1x1_id 10 | ,gfw_grid_10x10_id 11 | ,st_makevalid(st_intersection(w.$GEOMETRY_NAME, g.geom)) AS $GEOMETRY_NAME 12 | FROM $DATASET.$VERSION w 13 | ,gfw_grid_1x1 g 14 | WHERE st_intersects(w.$GEOMETRY_NAME, g.geom) 15 | ) 16 | ,b AS ( 17 | SELECT $FID_NAME 18 | ,gfw_grid_1x1_id 19 | ,gfw_grid_10x10_id 20 | ,CASE 21 | WHEN st_geometrytype($GEOMETRY_NAME) = 'ST_GeometryCollection'::TEXT 22 | THEN st_collectionextract($GEOMETRY_NAME, 3) 23 | ELSE $GEOMETRY_NAME 24 | END AS $GEOMETRY_NAME 25 | FROM a 26 | ) 27 | 28 | SELECT $FID_NAME 29 | ,gfw_grid_1x1_id 30 | ,gfw_grid_10x10_id 31 | ,$GEOMETRY_NAME 32 | FROM b 33 | WHERE st_geometrytype($GEOMETRY_NAME) = 'ST_Polygon' 34 | OR st_geometrytype($GEOMETRY_NAME) = 'ST_MultiPolygon' 35 | GROUP BY $FID_NAME;" 36 | 37 | 38 | # Create indices 39 | psql -c "CREATE INDEX IF NOT EXISTS ${VERSION}__1x1_${GEOMETRY_NAME}_id_idx 40 | ON $DATASET.${VERSION}__1x1 USING gist 41 | (${GEOMETRY_NAME});" -------------------------------------------------------------------------------- /batch/scripts/tmp/export_vector_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | echo "OGR2OGR: Export ${DATASET}.${VERSION} to ${DST} using driver ${DRIVER}" 6 | ogr2ogr -f "${DRIVER}" "${DST}" \ 7 | PG:"password=$PGPASSWORD host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER" \ 8 | -sql "select ${COLUMNS} from ${DATASET}.${VERSION}" 9 | -------------------------------------------------------------------------------- /batch/scripts/tmp/import_vector_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | echo "OGR2OGR: Import ${DATASET}.${VERSION} from ${SRC} ${SRC_LAYER}" 6 | ogr2ogr -f "PostgreSQL" PG:"password=$PGPASSWORD host=$PGHOST port=$PGPORT dbname=$PGDATABASE user=$PGUSER" \ 7 | "$SRC" "$SRC_LAYER" \ 8 | -nlt PROMOTE_TO_MULTI -nln "$VERSION" \ 9 | -lco SCHEMA="$DATASET" -lco GEOMETRY_NAME="$GEOMETRY_NAME" -lco SPATIAL_INDEX=NONE -lco FID="$FID_NAME" \ 10 | -t_srs EPSG:4326 -limit 0 11 | # --config PG_USE_COPY YES -makevalid 12 | 13 | 14 | echo "PSQL: Add GFW specific layers" 15 | psql -c "ALTER TABLE $DATASET.$VERSION ADD COLUMN ${GEOMETRY_NAME}_wm geometry(MultiPolygon,3857); 16 | ALTER TABLE $DATASET.$VERSION ADD COLUMN gfw_area__ha NUMERIC; 17 | ALTER TABLE $DATASET.$VERSION ADD COLUMN gfw_geostore_id UUID; 18 | ALTER TABLE $DATASET.$VERSION ADD COLUMN gfw_geojson TEXT; 19 | ALTER TABLE $DATASET.$VERSION ADD COLUMN gfw_bbox BOX2D;" 20 | 21 | 22 | # http://blog.cleverelephant.ca/2018/09/postgis-external-storage.html 23 | echo "PSQL: Set storage to external for faster querying" 24 | psql -c "ALTER TABLE $DATASET.$VERSION ALTER COLUMN $GEOMETRY_NAME SET STORAGE EXTERNAL; 25 | ALTER TABLE $DATASET.$VERSION ALTER COLUMN ${GEOMETRY_NAME}_wm SET STORAGE EXTERNAL;" 26 | -------------------------------------------------------------------------------- /batch/scripts/unify_projection.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -s | --source 7 | # -T | --target 8 | # --target_crs 9 | 10 | ME=$(basename "$0") 11 | . get_arguments.sh "$@" 12 | 13 | echo "Reproject to a common CRS" 14 | 15 | src_count=0 16 | CMD_ARGS=() 17 | 18 | for s in "${SRC[@]}"; do 19 | source_dir="SRC_${src_count}" 20 | mkdir -p "$source_dir" 21 | 22 | echo "Now recursively downloading $s to $source_dir" 23 | if [[ $s == gs://* ]]; then 24 | gsutil -m cp -r "$s" "$source_dir" 25 | elif [[ $s == s3://* ]]; then 26 | aws s3 cp --recursive --no-progress "$s" "$source_dir" 27 | fi 28 | echo "Done downloading $s to $source_dir" 29 | 30 | reprojected_dir="REPROJECTED_${src_count}" 31 | mkdir -p "$reprojected_dir" 32 | 33 | cd "${source_dir}" 34 | for d in $(find . -type d | sed 's/.\///'); do 35 | mkdir -p "../${reprojected_dir}/${d}" 36 | done 37 | 38 | for f in $(find . -iname "*.tif"| sed 's/.\///'); do 39 | local_src_file="${source_dir}/${f}" 40 | local_warped_file="${reprojected_dir}/${f}" 41 | remote_target_file="${TARGET}/SRC_${src_count}/${f}" 42 | 43 | CMD_ARGS+=("${local_src_file}" "${local_warped_file}" "${TARGET_CRS}" "${remote_target_file}") 44 | done 45 | cd .. 46 | 47 | src_count=$(($src_count+1)) 48 | done 49 | 50 | echo "${CMD_ARGS[@]}" | xargs -n 4 -P 32 _warp_and_upload.sh 51 | -------------------------------------------------------------------------------- /batch/scripts/update_point_geometry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # requires arguments 6 | # -d | --dataset 7 | # -v | --version 8 | # --lat 9 | # --lng 10 | 11 | # optional arguments 12 | # -g | --geometry_name (get_arguments.sh specifies default) 13 | 14 | ME=$(basename "$0") 15 | . get_arguments.sh "$@" 16 | 17 | # Update GFW columns 18 | echo "PSQL: UPDATE \"$DATASET\".\"$VERSION\". Update Point columns" 19 | psql -c "UPDATE \"$DATASET\".\"$VERSION\" SET ${GEOMETRY_NAME} = ST_SetSRID(ST_MakePoint($LNG, $LAT),4326), 20 | ${GEOMETRY_NAME}_wm = ST_Transform(ST_SetSRID(ST_MakePoint($LNG, $LAT),4326), 3857) 21 | WHERE ${GEOMETRY_NAME} IS null OR ${GEOMETRY_NAME}_wm IS null;" -------------------------------------------------------------------------------- /batch/universal_batch.dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/osgeo/gdal:ubuntu-full-3.9.3 2 | LABEL desc="Docker image with ALL THE THINGS for use in Batch by the GFW data API" 3 | LABEL version="v1.0" 4 | 5 | ENV TIPPECANOE_VERSION=2.75.1 6 | 7 | ENV VENV_DIR="/.venv" 8 | 9 | RUN apt-get update -y \ 10 | && apt-get install --no-install-recommends -y python3 python-dev-is-python3 python3-venv \ 11 | postgresql-client jq curl libsqlite3-dev zlib1g-dev zip libpq-dev build-essential gcc g++ \ 12 | && apt-get clean \ 13 | && rm -rf /var/lib/apt/lists/* 14 | 15 | # --system-site-packages is needed to copy the GDAL Python libs into the venv 16 | RUN python -m venv ${VENV_DIR} --system-site-packages \ 17 | && . ${VENV_DIR}/bin/activate \ 18 | && python -m ensurepip --upgrade \ 19 | && python -m pip install \ 20 | agate~=1.12.0 \ 21 | asyncpg~=0.30.0 \ 22 | awscli~=1.36.18 \ 23 | awscli-plugin-endpoint~=0.4 \ 24 | boto3~=1.35.77 \ 25 | click~=8.1.7 \ 26 | csvkit~=2.0.1 \ 27 | earthengine-api~=0.1.408 \ 28 | fiona~=1.9.6 \ 29 | gsutil~=5.31 \ 30 | numpy~=1.26.4 \ 31 | pandas~=2.1.4 \ 32 | psycopg2~=2.9.10 \ 33 | rasterio~=1.3.11 \ 34 | setuptools~=75.6 \ 35 | shapely~=2.0.4 \ 36 | SQLAlchemy~=1.3.24 \ 37 | tileputty~=0.2.10 38 | 39 | # Install TippeCanoe 40 | RUN mkdir -p /opt/src 41 | WORKDIR /opt/src 42 | RUN curl https://codeload.github.com/felt/tippecanoe/tar.gz/${TIPPECANOE_VERSION} | tar -xz \ 43 | && cd /opt/src/tippecanoe-${TIPPECANOE_VERSION} \ 44 | && make \ 45 | && make install \ 46 | && rm -R /opt/src/tippecanoe-${TIPPECANOE_VERSION} 47 | 48 | # Copy scripts 49 | COPY ./batch/scripts/ /opt/scripts/ 50 | COPY ./batch/python/ /opt/python/ 51 | 52 | # Make sure scripts are executable 53 | RUN chmod +x -R /opt/scripts/ 54 | RUN chmod +x -R /opt/python/ 55 | 56 | ENV PATH="/opt/scripts:${PATH}" 57 | ENV PATH="/opt/python:${PATH}" 58 | 59 | ENV WORKDIR="/" 60 | WORKDIR / 61 | 62 | ENTRYPOINT ["/opt/scripts/report_status.sh"] -------------------------------------------------------------------------------- /docker-compose.prod.yml: -------------------------------------------------------------------------------- 1 | services: 2 | app: 3 | build: 4 | context: . 5 | args: 6 | - ENV=dev 7 | container_name: app_dev 8 | restart: on-failure 9 | volumes: 10 | - $HOME/.aws:/root/.aws:ro 11 | - ./app:/app/app 12 | - /var/run/docker.sock:/var/run/docker.sock:ro 13 | environment: 14 | - DB_HOST=database 15 | - DATABASE=geostore 16 | - DB_USER=gfw 17 | - DB_PASSWORD=postgres # pragma: allowlist secret 18 | - DB_PORT=5432 19 | - DB_HOST_RO=database 20 | - DATABASE_RO=geostore 21 | - DB_USER_RO=gfw_readonly 22 | - DB_PASSWORD_RO=readonly # pragma: allowlist secret 23 | - DB_PORT_RO=5432 24 | - AWS_DEFAULT_PROFILE=gfw-dev 25 | - LOG_LEVEL=debug 26 | - POSTGRESQL_CLIENT_JOB_DEFINITION=postgres_client_jd 27 | - GDAL_PYTHON_JOB_DEFINITION=gdal_python_jd 28 | - TILE_CACHE_JOB_DEFINITION=tile_cache_jd 29 | - PIXETL_JOB_DEFINITION=pixetl_jd 30 | - AURORA_JOB_QUEUE=aurora_jq 31 | - AURORA_JOB_QUEUE_FAST=aurora_fast_jq 32 | - DATA_LAKE_JOB_QUEUE=data_lake_jq 33 | - TILE_CACHE_JOB_QUEUE=tile_cache_jq 34 | - PIXETL_JOB_QUEUE=pixetl_jq 35 | - ON_DEMAND_COMPUTE_JOB_QUEUE=cogify_jq 36 | - RASTER_ANALYSIS_LAMBDA_NAME=raster_analysis 37 | - API_URL="http://app_dev:80" 38 | - RW_API_URL=https://api.resourcewatch.org 39 | - GOOGLE_APPLICATION_CREDENTIALS=/root/.gcs/private_key.json 40 | - 'RW_API_KEY={"api-key": null}' 41 | ports: 42 | - "8000:80" 43 | entrypoint: wait_for_postgres.sh /app/start.sh 44 | depends_on: 45 | - database 46 | 47 | database: 48 | container_name: gfw-data-api-database 49 | image: postgis/postgis:14-3.4-alpine 50 | restart: on-failure 51 | ports: 52 | - "54320:5432" 53 | environment: 54 | - POSTGRES_DB=geostore 55 | - POSTGRES_USER=gfw 56 | - POSTGRES_PASSWORD=postgres # pragma: allowlist secret 57 | volumes: 58 | - database_data:/var/lib/postgresql/data 59 | 60 | volumes: 61 | database_data: 62 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "gfw-data-api" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | requires-python = "~= 3.10" 7 | dependencies = [ 8 | "aenum", 9 | "aiofiles", 10 | "aiohttp", 11 | "alembic", 12 | "arq", 13 | "asgi_lifespan", 14 | "async-lru", 15 | "asyncpg", 16 | "boto3", 17 | "botocore", 18 | "email-validator", 19 | "fastapi", 20 | "geoalchemy2<0.12", 21 | "geojson", 22 | "gino", 23 | "gino_starlette", 24 | "google-cloud-storage", 25 | "gunicorn", 26 | "httpcore", 27 | "httpx", 28 | "httpx-auth", 29 | "newrelic", 30 | "numpy<2", 31 | "openapi_spec_validator", 32 | "orjson", 33 | "packaging", 34 | "pendulum<3", 35 | "pglast<2", 36 | "psutil", 37 | "psycopg2", 38 | "pydantic<2", 39 | "pyproj", 40 | "python-multipart", 41 | "retrying", 42 | "shapely", 43 | "sqlalchemy<1.4", 44 | "sqlalchemy-utils", 45 | "starlette", 46 | "typer", 47 | "unidecode", 48 | "uvicorn[standard]", 49 | ] 50 | 51 | [dependency-groups] 52 | dev = [ 53 | "asgi-lifespan", 54 | "docker", 55 | "geopandas", # Needed by pixetl in batch script test 56 | # Pixetl is already installed in the pixetl image that's run in Batch, 57 | # this is to enable tests to run in the test container: 58 | "gfw-pixetl", 59 | "moto[awslambda, batch, ec2, s3, secretsmanager]<5", 60 | "pandas<2.2", # Needed by pixetl in batch script test 61 | "pre-commit", 62 | "pytest", 63 | "pytest-asyncio", 64 | "pytest-cov", 65 | "pytest-timeout", 66 | "pytest-unordered>=0.6.1", 67 | "rasterio==1.3.11", 68 | "retrying", # Needed by pixetl in batch script test 69 | ] 70 | 71 | [tool.setuptools] 72 | packages = ["app", "batch"] 73 | 74 | [tool.uv.sources] 75 | gfw-pixetl = { git = "https://github.com/wri/gfw_pixetl.git", branch = "develop" } 76 | -------------------------------------------------------------------------------- /scripts/delete_workspace: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | 6 | # allow to overwrite GIT_SHA 7 | POSITIONAL=() 8 | GIT_SHA=$(git rev-parse HEAD) 9 | while [[ $# -gt 0 ]] 10 | do 11 | key="$1" 12 | 13 | case $key in 14 | -g|--git_sha) 15 | GIT_SHA="$2" 16 | shift # past argument 17 | shift # past value 18 | ;; 19 | *) # unknown option 20 | POSITIONAL+=("$1") # save it in an array for later 21 | shift # past argument 22 | ;; 23 | esac 24 | done 25 | set -- "${POSITIONAL[@]}" # restore positional parameters 26 | 27 | docker compose -f terraform/docker/docker-compose.yml build 28 | docker compose -f terraform/docker/docker-compose.yml run --entrypoint delete_workspace --rm terraform "$@" -var="git_sha=${GIT_SHA}" -------------------------------------------------------------------------------- /scripts/develop: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # Default values 6 | POSITIONAL=() 7 | BUILD=true 8 | # extracting cmd line arguments 9 | while [[ $# -gt 0 ]] 10 | do 11 | key="$1" 12 | 13 | case $key in 14 | --no_build) 15 | BUILD=false 16 | shift # past argument 17 | ;; 18 | *) # unknown option 19 | POSITIONAL+=("$1") # save it in an array for later 20 | shift # past argument 21 | ;; 22 | esac 23 | done 24 | set -- "${POSITIONAL[@]}" # restore positional parameters 25 | 26 | if [ "${BUILD}" = true ]; then 27 | docker build -t batch_jobs_test . -f batch/universal_batch.dockerfile 28 | docker build -t pixetl_test . -f batch/pixetl.dockerfile 29 | docker compose -f docker-compose.dev.yml --project-name gfw-data-api_dev up --abort-on-container-exit --remove-orphans --build 30 | else 31 | docker compose -f docker-compose.dev.yml --project-name gfw-data-api_dev up --abort-on-container-exit --remove-orphans 32 | fi -------------------------------------------------------------------------------- /scripts/infra: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | GIT_SHA=$(git rev-parse HEAD) 6 | 7 | docker compose -f terraform/docker/docker-compose.yml build 8 | docker compose -f terraform/docker/docker-compose.yml run --rm terraform "$@" -var="git_sha=${GIT_SHA}" 9 | -------------------------------------------------------------------------------- /scripts/migrate: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | docker compose -f docker-compose.dev.yml build app 6 | docker compose -f docker-compose.dev.yml --project-name gfw-data-api_dev run --rm --entrypoint alembic app revision --autogenerate 7 | 8 | git add app/models/orm/migrations/versions -------------------------------------------------------------------------------- /scripts/migration_dry_run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | docker compose -f docker-compose.dev.yml build app 6 | docker compose -f docker-compose.dev.yml --project-name gfw-data-api_dev run --rm --entrypoint alembic app -x dry-run upgrade head 7 | -------------------------------------------------------------------------------- /scripts/setup: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | PYTHON_VERSION="3.10" 6 | VIRTUAL_ENV="${VIRTUAL_ENV:-.venv_uv}" 7 | 8 | # Install Python and create the venv all in one go 9 | if command -v "uv" >/dev/null 2>&1; then 10 | echo "uv (needed to create venv) found in path" 11 | else 12 | echo "uv (needed to create venv) NOT found in path" 13 | echo "If uv is installed, add its binary directory to your path and try " 14 | echo "running setup script again. For example: " 15 | echo "'PATH=\$PATH:~/bin ./scripts/setup' for uv installed in ~/bin" 16 | echo "Otherwise, see https://docs.astral.sh/uv/getting-started/installation/" 17 | exit 1 18 | fi 19 | uv venv ${VIRTUAL_ENV} --python ${PYTHON_VERSION} --seed 20 | 21 | # Now install all Data API deps in the venv 22 | # But first avoid some common pitfalls 23 | if command -v "pg_config" >/dev/null 2>&1; then 24 | echo "pg_config (needed to build psycopg2) found in path" 25 | else 26 | echo "pg_config (needed to build psycopg2) NOT found in path" 27 | echo "If libpq is installed, add its binary directory to your path and try " 28 | echo "running setup script again. For example: " 29 | echo "'PATH=\$PATH:/opt/homebrew/opt/libpq/bin ./scripts/setup' for " 30 | echo "libpg installed with homebrew" 31 | exit 1 32 | fi 33 | 34 | echo "Installing deps into venv" 35 | echo "If this fails with a message like 'ld: library 'ssl' not found' " 36 | echo "try adding your OpenSSL dir to your library path like so (modify " 37 | echo "for your OpenSSL version): " 38 | echo "LIBRARY_PATH=/opt/homebrew/Cellar/openssl@3/3.4.0/lib ./scripts/setup" 39 | uv sync --locked --no-install-project 40 | 41 | . ${VIRTUAL_ENV}/bin/activate 42 | 43 | echo "Installing pre-commit hooks" 44 | pre-commit install 45 | pre-commit 46 | -------------------------------------------------------------------------------- /scripts/terraform: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | docker compose -f terraform/docker/docker-compose.yml run --rm --entrypoint terraform --workdir /usr/local/src/terraform terraform "$@" -------------------------------------------------------------------------------- /scripts/test_v2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | 6 | # Default values 7 | POSITIONAL=() 8 | BUILD=true 9 | DO_COV= 10 | DISABLE_WARNINGS="--disable-warnings" 11 | SHOW_STDOUT= 12 | # extracting cmd line arguments 13 | while [[ $# -gt 0 ]] 14 | do 15 | key="$1" 16 | 17 | case $key in 18 | --no_build) 19 | BUILD=false 20 | shift # past argument 21 | ;; 22 | --do-cov) 23 | DO_COV=--cov=app 24 | shift # past argument 25 | ;; 26 | --show-warnings) 27 | DISABLE_WARNINGS= 28 | shift # past argument 29 | ;; 30 | --show-stdout) 31 | SHOW_STDOUT=--capture=no 32 | shift # past argument 33 | ;; 34 | --moto-port=*) 35 | # prevent port binding issues by explicitly setting the motoserver s3 port 36 | # https://developer.apple.com/forums/thread/682332 37 | export MOTO_PORT="${key#*=}" 38 | shift # past argument=value 39 | ;; 40 | *) # unknown option 41 | POSITIONAL+=("$1") # save it in an array for later 42 | shift # past argument 43 | ;; 44 | esac 45 | done 46 | set -- "${POSITIONAL[@]}" # restore positional parameters 47 | 48 | # If no tests specified, do whole tests_v2 directory 49 | args=$* 50 | if [ $# -eq 0 ]; then 51 | args=tests_v2 52 | fi 53 | 54 | if [ "${BUILD}" = true ]; then 55 | docker build -t batch_jobs_test . -f batch/universal_batch.dockerfile 56 | docker build -t pixetl_test . -f batch/pixetl.dockerfile 57 | docker compose -f docker-compose.test.yml --project-name gfw-data-api_test build --no-cache app_test 58 | fi 59 | 60 | set +e 61 | 62 | # Everything from "--cov-report on" become the arguments to the pytest run inside the docker. 63 | docker compose -f docker-compose.test.yml --project-name gfw-data-api_test run --rm --name app_test app_test --cov-report xml:/app/tests_v2/cobertura.xml $DO_COV $DISABLE_WARNINGS $SHOW_STDOUT $args 64 | exit_code=$? 65 | docker compose -f docker-compose.test.yml --project-name gfw-data-api_test down --remove-orphans 66 | exit $exit_code 67 | -------------------------------------------------------------------------------- /terraform.md: -------------------------------------------------------------------------------- 1 | ## Requirements 2 | 3 | | Name | Version | 4 | |------|---------| 5 | | terraform | >=0.12.26 | 6 | | aws | ~> 2.65.0 | 7 | 8 | ## Providers 9 | 10 | | Name | Version | 11 | |------|---------| 12 | | aws | ~> 2.65.0 | 13 | | template | n/a | 14 | | terraform | n/a | 15 | 16 | ## Inputs 17 | 18 | | Name | Description | Type | Default | Required | 19 | |------|-------------|------|---------|:--------:| 20 | | auto\_scaling\_cooldown | n/a | `number` | `300` | no | 21 | | auto\_scaling\_max\_capacity | n/a | `number` | `15` | no | 22 | | auto\_scaling\_max\_cpu\_util | n/a | `number` | `75` | no | 23 | | auto\_scaling\_min\_capacity | n/a | `number` | `1` | no | 24 | | container\_name | n/a | `string` | `"gfw-data-api"` | no | 25 | | container\_port | n/a | `number` | `80` | no | 26 | | desired\_count | n/a | `number` | `1` | no | 27 | | environment | An environment namespace for the infrastructure. | `string` | n/a | yes | 28 | | fargate\_cpu | n/a | `number` | `256` | no | 29 | | fargate\_memory | n/a | `number` | `2048` | no | 30 | | key\_pair | n/a | `string` | `"tmaschler_gfw"` | no | 31 | | listener\_port | n/a | `number` | `80` | no | 32 | | log\_level | n/a | `any` | n/a | yes | 33 | | log\_retention | n/a | `number` | `30` | no | 34 | | region | n/a | `string` | `"us-east-1"` | no | 35 | 36 | ## Outputs 37 | 38 | | Name | Description | 39 | |------|-------------| 40 | | loadbalancer\_dns | n/a | 41 | 42 | -------------------------------------------------------------------------------- /terraform/api_gateway/api_key_authorizer_lambda.py: -------------------------------------------------------------------------------- 1 | def handler(event, context): 2 | api_key = event["headers"].get("x-api-key", None) or event[ 3 | "queryStringParameters" 4 | ].get("x-api-key", "") 5 | policy = { 6 | "principalId": "x-api-key", 7 | "policyDocument": { 8 | "Version": "2012-10-17", 9 | "Statement": [ 10 | { 11 | "Action": "execute-api:Invoke", 12 | "Effect": "Allow", 13 | "Resource": event["methodArn"], 14 | } 15 | ], 16 | }, 17 | "usageIdentifierKey": api_key.strip(), 18 | } 19 | print(policy) 20 | 21 | return policy 22 | -------------------------------------------------------------------------------- /terraform/api_gateway/api_key_authorizer_lambda.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/terraform/api_gateway/api_key_authorizer_lambda.zip -------------------------------------------------------------------------------- /terraform/docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | terraform: 4 | image: globalforestwatch/terraform:1.2.2 5 | volumes: 6 | - ../../:/usr/local/src 7 | - $HOME/.aws:/root/.aws:ro 8 | - /var/run/docker.sock:/var/run/docker.sock 9 | environment: 10 | - ENV 11 | - AWS_ACCESS_KEY_ID 12 | - AWS_SECRET_ACCESS_KEY 13 | - AWS_REGION=us-east-1 14 | working_dir: /usr/local/src 15 | entrypoint: infra 16 | -------------------------------------------------------------------------------- /terraform/generate_port.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | import json 4 | 5 | 6 | try: 7 | input_string = sys.argv[1] 8 | min_port = int(sys.argv[2]) 9 | max_port = int(sys.argv[3]) 10 | 11 | random.seed(input_string) 12 | port = random.randint(min_port, max_port) 13 | 14 | output = {"port": str(port)} 15 | print(json.dumps(output)) 16 | except Exception as e: 17 | print(f"Error: {str(e)}", file=sys.stderr) 18 | sys.exit(1) 19 | -------------------------------------------------------------------------------- /terraform/iam.tf: -------------------------------------------------------------------------------- 1 | resource "aws_iam_policy" "run_batch_jobs" { 2 | name = substr("${local.project}-run_batch_jobs${local.name_suffix}", 0, 64) 3 | policy = data.template_file.task_batch_policy.rendered 4 | } 5 | 6 | resource "aws_iam_policy" "query_batch_jobs" { 7 | name = substr("${local.project}-query_batch_jobs${local.name_suffix}", 0, 64) 8 | policy = data.template_file.query_batch_task_policy.rendered 9 | } 10 | 11 | resource "aws_iam_policy" "s3_read_only" { 12 | name = substr("${local.project}-s3_read_only${local.name_suffix}", 0, 64) 13 | policy = data.local_file.iam_s3_read_only.content 14 | } 15 | 16 | resource "aws_iam_policy" "lambda_invoke" { 17 | name = substr("${local.project}-lambda_invoke${local.name_suffix}", 0, 64) 18 | // policy = data.template_file.iam_lambda_invoke.rendered 19 | policy = data.local_file.iam_lambda_invoke.content 20 | } 21 | 22 | resource "aws_iam_policy" "iam_api_gateway_policy" { 23 | name = substr("${local.project}-api_gateway${local.name_suffix}", 0, 64) 24 | policy = data.local_file.iam_api_gateway_policy.content 25 | } 26 | 27 | resource "aws_iam_policy" "read_gcs_secret" { 28 | name = substr("${local.project}-read_gcs_secret${local.name_suffix}", 0, 64) 29 | policy = data.aws_iam_policy_document.read_gcs_secret_doc.json 30 | } 31 | 32 | resource "aws_iam_policy" "read_new_relic_secret" { 33 | name = substr("${local.project}-read_new-relic_secret${local.name_suffix}", 0, 64) 34 | policy = data.aws_iam_policy_document.read_new_relic_lic.json 35 | } 36 | 37 | resource "aws_iam_policy" "read_rw_api_key_secret" { 38 | name = substr("${local.project}-read_rw_api_key_secret${local.name_suffix}", 0, 64) 39 | policy = data.aws_iam_policy_document.read_rw_api_key.json 40 | } 41 | 42 | resource "aws_iam_policy" "tile_cache_bucket_policy" { 43 | name = substr("${local.project}-tile_cache_bucket_policy${local.name_suffix}", 0, 64) 44 | policy = data.template_file.tile_cache_bucket_policy.rendered 45 | } 46 | 47 | resource "aws_iam_policy" "step_function_policy" { 48 | name = substr("${local.project}-step_function_policy${local.name_suffix}", 0, 64) 49 | policy = data.template_file.step_function_policy.rendered 50 | } 51 | -------------------------------------------------------------------------------- /terraform/logging.tf: -------------------------------------------------------------------------------- 1 | # 2 | # CloudWatch Resources 3 | # 4 | resource "aws_cloudwatch_log_group" "default" { 5 | name = substr("/aws/ecs/${local.project}-log${local.name_suffix}", 0, 64) 6 | retention_in_days = var.log_retention 7 | } 8 | -------------------------------------------------------------------------------- /terraform/modules/api_gateway/endpoint/main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_api_gateway_method" "method" { 2 | rest_api_id = var.rest_api_id 3 | resource_id = var.api_resource.id 4 | http_method = var.http_method 5 | authorization = var.authorization 6 | authorizer_id = var.authorizer_id 7 | request_parameters = var.method_parameters 8 | api_key_required = var.require_api_key 9 | } 10 | 11 | 12 | resource "aws_api_gateway_integration" "integration" { 13 | rest_api_id = var.rest_api_id 14 | resource_id = var.api_resource.id 15 | http_method = aws_api_gateway_method.method.http_method 16 | 17 | 18 | integration_http_method = "ANY" 19 | type = "HTTP_PROXY" 20 | uri = var.integration_uri 21 | 22 | request_parameters = var.integration_parameters 23 | } -------------------------------------------------------------------------------- /terraform/modules/api_gateway/endpoint/outputs.tf: -------------------------------------------------------------------------------- 1 | output "integration_point" { 2 | value = aws_api_gateway_integration.integration 3 | } -------------------------------------------------------------------------------- /terraform/modules/api_gateway/endpoint/variables.tf: -------------------------------------------------------------------------------- 1 | variable "rest_api_id" { 2 | type = string 3 | description = "Id of API Gateway to add resource to" 4 | } 5 | 6 | variable "authorizer_id" { 7 | type = string 8 | default = "" 9 | } 10 | 11 | variable "require_api_key" { 12 | type = bool 13 | default = false 14 | } 15 | 16 | variable "http_method" { 17 | type = string 18 | 19 | validation { 20 | condition = contains([ 21 | "ANY", 22 | "DELETE", 23 | "GET", 24 | "HEAD", 25 | "OPTIONS", 26 | "PATCH", 27 | "POST", 28 | "PUT" 29 | ], var.http_method) 30 | error_message = "Invalid HTTP method passed." 31 | } 32 | } 33 | 34 | variable "authorization" { 35 | validation { 36 | condition = contains([ 37 | "NONE", 38 | "CUSTOM", 39 | "AWS_IAM", 40 | "COGNITO_USER_POOLS" 41 | ], var.authorization) 42 | error_message = "Unknown authorization method." 43 | } 44 | } 45 | 46 | variable "integration_uri" { 47 | type = string 48 | } 49 | 50 | variable "integration_parameters" { 51 | type = map 52 | } 53 | 54 | variable "method_parameters" { 55 | type = map 56 | } 57 | 58 | variable "api_resource" { 59 | type = object({ 60 | id = string 61 | path_part = string 62 | }) 63 | description = "Instance of aws_api_gateway_resource" 64 | } 65 | -------------------------------------------------------------------------------- /terraform/modules/api_gateway/gateway/outputs.tf: -------------------------------------------------------------------------------- 1 | output "internal_usage_plan_id" { 2 | value = aws_api_gateway_usage_plan.internal.id 3 | } 4 | 5 | output "external_usage_plan_id" { 6 | value = aws_api_gateway_usage_plan.external.id 7 | } 8 | 9 | output "api_gateway_id" { 10 | value = aws_api_gateway_rest_api.api_gw_api.id 11 | } 12 | 13 | output "invoke_url" { 14 | value = aws_api_gateway_stage.api_gw_stage.invoke_url 15 | } 16 | -------------------------------------------------------------------------------- /terraform/modules/api_gateway/gateway/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | type = string 3 | description = "Name of API Gateway instance" 4 | default = "GFWDataAPIGateway" 5 | } 6 | 7 | variable "description" { 8 | type = string 9 | description = "Description of API Gateway Instance" 10 | default = "GFW Data API Gateway" 11 | } 12 | 13 | variable "stage_name" { 14 | type = string 15 | description = "The stage under which the instance will be deployed" 16 | default = "deploy" 17 | } 18 | 19 | variable "download_endpoints" { 20 | type = list(string) 21 | description = "path parts to download endpoints" 22 | 23 | # listing spatial endpoints as gateway needs them explicitly created 24 | # in order to apply endpoint-level throttling to them 25 | default = ["geotiff", "gpkg", "shp"] 26 | } 27 | 28 | variable "lb_dns_name" { 29 | type = string 30 | description = "Application load balancer to forward requests to" 31 | } 32 | 33 | variable "api_gateway_role_policy" { 34 | type = string 35 | } 36 | 37 | variable "lambda_role_policy" { 38 | type = string 39 | } 40 | 41 | variable "cloudwatch_policy" { 42 | type = string 43 | } 44 | 45 | variable "lambda_invoke_policy" { 46 | type = string 47 | } 48 | 49 | variable "api_gateway_usage_plans" { 50 | type = map(any) 51 | description = "Throttling limits for API Gateway" 52 | default = { 53 | internal_apps = { 54 | quota_limit = 1000000 # per day 55 | burst_limit = 1000 56 | rate_limit = 200 # per second 57 | } 58 | external_apps = { 59 | quota_limit = 10000 60 | burst_limit = 20 61 | rate_limit = 10 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /terraform/modules/api_gateway/resource/outputs.tf: -------------------------------------------------------------------------------- 1 | output "aws_api_gateway_resource" { 2 | value = aws_api_gateway_resource.aws_api_gateway_resource 3 | } -------------------------------------------------------------------------------- /terraform/modules/api_gateway/resource/variables.tf: -------------------------------------------------------------------------------- 1 | variable "rest_api_id" { 2 | type = string 3 | } 4 | 5 | variable "parent_id" { 6 | type = string 7 | } 8 | 9 | variable "path_part" { 10 | type = string 11 | } 12 | -------------------------------------------------------------------------------- /terraform/modules/batch/outputs.tf: -------------------------------------------------------------------------------- 1 | output "aurora_job_definition" { 2 | value = aws_batch_job_definition.aurora 3 | } 4 | 5 | output "aurora_job_definition_arn" { 6 | value = aws_batch_job_definition.aurora.arn 7 | } 8 | 9 | output "aurora_job_queue_arn" { 10 | value = aws_batch_job_queue.aurora.arn 11 | } 12 | 13 | output "aurora_job_queue_fast_arn" { 14 | value = aws_batch_job_queue.aurora_fast.arn 15 | } 16 | 17 | output "data_lake_job_definition_arn" { 18 | value = aws_batch_job_definition.data_lake.arn 19 | } 20 | 21 | output "data_lake_job_definition" { 22 | value = aws_batch_job_definition.data_lake 23 | } 24 | 25 | output "data_lake_job_queue_arn" { 26 | value = aws_batch_job_queue.data_lake.arn 27 | } 28 | 29 | output "pixetl_job_definition_arn" { 30 | value = aws_batch_job_definition.pixetl.arn 31 | } 32 | 33 | output "pixetl_job_definition" { 34 | value = aws_batch_job_definition.pixetl 35 | } 36 | 37 | output "pixetl_job_queue_arn" { 38 | value = aws_batch_job_queue.pixetl.arn 39 | } 40 | 41 | output "on_demand_compute_job_queue_arn" { 42 | value = aws_batch_job_queue.on_demand.arn 43 | } 44 | 45 | output "tile_cache_job_definition_arn" { 46 | value = aws_batch_job_definition.tile_cache.arn 47 | } 48 | 49 | output "tile_cache_job_definition" { 50 | value = aws_batch_job_definition.tile_cache 51 | } 52 | 53 | output "tile_cache_job_queue_arn" { 54 | value = aws_batch_job_queue.tile_cache.arn 55 | } 56 | -------------------------------------------------------------------------------- /terraform/modules/batch/variables.tf: -------------------------------------------------------------------------------- 1 | variable "project" { type = string } 2 | variable "name_suffix" { type = string } 3 | variable "aurora_compute_environment_arn" { type = string } 4 | variable "data_lake_compute_environment_arn" { type = string } 5 | variable "cogify_compute_environment_arn" { type = string } 6 | variable "tile_cache_compute_environment_arn" { type = string } 7 | variable "pixetl_compute_environment_arn" { type = string } 8 | variable "gdal_repository_url" { type = string } 9 | variable "postgres_repository_url" { type = string } 10 | variable "pixetl_repository_url" { type = string } 11 | variable "tile_cache_repository_url" { type = string } 12 | variable "environment" { type = string } 13 | variable "iam_policy_arn" { type = list(string) } 14 | variable "aurora_max_vcpus" { type = number } 15 | variable "gcs_secret" { type = string } 16 | -------------------------------------------------------------------------------- /terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "loadbalancer_dns" { 2 | value = coalesce(module.fargate_autoscaling.lb_dns_name, var.lb_dns_name) 3 | } 4 | 5 | output "generated_port" { 6 | value = length(data.external.generate_port) > 0 ? data.external.generate_port[0].result["port"] : var.listener_port 7 | } 8 | -------------------------------------------------------------------------------- /terraform/scripts/hash.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Calculates hash of Docker image source contents 4 | # 5 | # Must be identical to the script that is used by the 6 | # gfw-terraform-modules:terraform/modules/container_registry Terraform module. 7 | # 8 | # Usage: 9 | # 10 | # $ ./hash.sh . 11 | # 12 | 13 | set -e 14 | 15 | pushd () { 16 | command pushd "$@" > /dev/null 17 | } 18 | 19 | popd () { 20 | command popd "$@" > /dev/null 21 | } 22 | 23 | ROOT_DIR=${1:-.} 24 | DOCKER_PATH=${2:-.} 25 | IGNORE="${DOCKER_PATH}/.dockerignore" 26 | 27 | pushd "$ROOT_DIR" 28 | 29 | # Hash all source files of the Docker image 30 | if [ -f "$IGNORE" ]; then 31 | # We don't want to compute hashes for files listed in .dockerignore 32 | # to match regex pattern we need to escape leading . 33 | a=$(printf "! -regex ^./%s.* " `< .dockerignore`) 34 | b=${a//\/.//\\\.} 35 | 36 | file_hashes="$( 37 | find . -type f $b -exec md5sum {} \; 38 | )" 39 | else 40 | # Exclude Python cache files, dot files 41 | file_hashes="$( 42 | find . -type f -not -name '*.pyc' -not -path './.**' -exec md5sum {} \; 43 | )" 44 | fi 45 | 46 | popd 47 | 48 | hash="$(echo "$file_hashes" | md5sum | cut -d' ' -f1)" 49 | 50 | echo '{ "hash": "'"$hash"'" }' 51 | -------------------------------------------------------------------------------- /terraform/templates/api_gateway_policy.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "", 6 | "Effect": "Allow", 7 | "Action": [ 8 | "apigateway:DELETE", 9 | "apigateway:POST", 10 | "apigateway:GET" 11 | ], 12 | "Resource": [ 13 | "arn:aws:apigateway:*::/apikeys/*", 14 | "arn:aws:apigateway:*::/apikeys", 15 | "arn:aws:apigateway:*::/usageplans/*/keys" 16 | ] 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /terraform/templates/cloudwatch_log_policy.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "logs:CreateLogGroup", 8 | "logs:CreateLogStream", 9 | "logs:DescribeLogGroups", 10 | "logs:DescribeLogStreams", 11 | "logs:PutLogEvents", 12 | "logs:GetLogEvents", 13 | "logs:FilterLogEvents" 14 | ], 15 | "Resource": "*" 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /terraform/templates/container_properties.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "command": [], 3 | "image": "${image_url}", 4 | "vcpus": ${cpu}, 5 | "memory": ${memory}, 6 | "retry_strategy": { 7 | "attempts": 1 8 | }, 9 | "timeout": { 10 | "attempt_duration_seconds":7500 11 | }, 12 | "environment": [ 13 | { 14 | "name": "ENV", 15 | "value": "${environment}" 16 | }, 17 | { 18 | "name": "JOB_ROLE_ARN", 19 | "value": "${clone_role_arn}" 20 | }, 21 | { 22 | "name": "ECS_TASK_METADATA_RPS_LIMIT", 23 | "value": "100,150" 24 | }, 25 | { 26 | "name": "LC_ALL", 27 | "value": "C.UTF-8" 28 | }, 29 | { 30 | "name": "LANG", 31 | "value": "C.UTF-8" 32 | }, 33 | { 34 | "name": "TILE_CACHE", 35 | "value": "${tile_cache}" 36 | }, 37 | { 38 | "name": "DATA_LAKE", 39 | "value": "${data_lake}" 40 | }, 41 | { 42 | "name": "MAX_TASKS", 43 | "value": "${max_tasks}" 44 | } 45 | ], 46 | "jobRoleArn": "${job_role_arn}", 47 | "volumes": [ 48 | { 49 | "host": { 50 | "sourcePath": "/tmp" 51 | }, 52 | "name": "tmp" 53 | } 54 | ], 55 | "mountPoints": [ 56 | { 57 | "sourceVolume": "tmp", 58 | "containerPath": "/tmp", 59 | "readOnly": false 60 | } 61 | ], 62 | "ulimits": [ 63 | { 64 | "hardLimit": ${hardULimit}, 65 | "name": "nofile", 66 | "softLimit": ${softULimit} 67 | } 68 | ] 69 | } -------------------------------------------------------------------------------- /terraform/templates/iam_assume_role.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "", 6 | "Effect": "Allow", 7 | "Action": "sts:AssumeRole", 8 | "Resource": "${role_arn}" 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /terraform/templates/iam_s3_read_only.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "s3:Get*", 8 | "s3:List*" 9 | ], 10 | "Resource": "*" 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /terraform/templates/iam_trust_entity.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Sid": "", 6 | "Action": "sts:AssumeRole", 7 | "Effect": "Allow", 8 | "Principal": { 9 | "AWS": "${role_arn}" 10 | } 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /terraform/templates/lambda_invoke_policy.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": "lambda:InvokeFunction", 7 | "Resource": "*" 8 | } 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /terraform/templates/query_batch_policy.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "batch:ListJobs", 8 | "batch:DescribeJobs" 9 | ], 10 | "Resource": "*" 11 | } 12 | ] 13 | } -------------------------------------------------------------------------------- /terraform/templates/role-trust-policy.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Action": "sts:AssumeRole", 6 | "Effect": "Allow", 7 | "Principal": { 8 | "Service": "${service}.amazonaws.com" 9 | } 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /terraform/templates/run_batch_policy.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "batch:SubmitJob", 8 | "batch:CancelJob", 9 | "batch:TerminateJob", 10 | "batch:TagResource" 11 | ], 12 | "Resource": [ 13 | "${aurora_job_queue_arn}", 14 | "${aurora_job_queue_fast_arn}", 15 | "${aurora_job_definition_arn}", 16 | 17 | "${data_lake_job_queue_arn}", 18 | "${data_lake_job_definition_arn}", 19 | 20 | "${tile_cache_job_queue_arn}", 21 | "${tile_cache_job_definition_arn}", 22 | 23 | "${pixetl_job_queue_arn}", 24 | "${pixetl_job_definition_arn}", 25 | 26 | "${on_demand_compute_job_queue_arn}" 27 | ] 28 | }, 29 | { 30 | "Effect": "Allow", 31 | "Action": [ 32 | "batch:ListJobs", 33 | "batch:DescribeJobs", 34 | "batch:DescribeJobQueues", 35 | "batch:DescribeComputeEnvironments", 36 | "batch:DescribeJobDefinitions" 37 | ], 38 | "Resource": "*" 39 | } 40 | ] 41 | } -------------------------------------------------------------------------------- /terraform/templates/step_function_policy.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "states:StartExecution" 8 | ], 9 | "Resource": [ 10 | "${raster_analysis_state_machine_arn}" 11 | ] 12 | }, 13 | { 14 | "Effect": "Allow", 15 | "Action": [ 16 | "states:DescribeExecution", 17 | "states:DescribeMapRun", 18 | "states:ListMapRuns" 19 | ], 20 | "Resource": "*" 21 | } 22 | ] 23 | } -------------------------------------------------------------------------------- /terraform/templates/tile_cache_bucket_policy.json.tmpl: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "s3:ListBucket", 8 | "s3:PutLifecycleConfiguration" 9 | ], 10 | "Resource": "${bucket_arn}" 11 | }, 12 | { 13 | "Effect": "Allow", 14 | "Action": "s3:*", 15 | "Resource": [ 16 | "${bucket_arn}/*" 17 | ] 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /terraform/vars/backend-dev.tfvars: -------------------------------------------------------------------------------- 1 | bucket = "gfw-terraform-dev" 2 | -------------------------------------------------------------------------------- /terraform/vars/backend-production.tfvars: -------------------------------------------------------------------------------- 1 | bucket = "gfw-terraform" -------------------------------------------------------------------------------- /terraform/vars/backend-staging.tfvars: -------------------------------------------------------------------------------- 1 | bucket = "gfw-terraform-staging" 2 | -------------------------------------------------------------------------------- /terraform/vars/terraform-dev.tfvars: -------------------------------------------------------------------------------- 1 | environment = "dev" 2 | log_level = "debug" 3 | service_url = "https://dev-data-api.globalforestwatch.org" # fake, needed for CloudFront 4 | rw_api_url = "https://api.resourcewatch.org" 5 | rw_api_key_arn = "arn:aws:secretsmanager:us-east-1:563860007740:secret:gfw-api/rw-api-key-YhLbaM" # pragma: allowlist secret 6 | desired_count = 1 7 | auto_scaling_min_capacity = 1 8 | auto_scaling_max_capacity = 5 9 | lambda_analysis_workspace = "feature-otf_lists" 10 | key_pair = "dmannarino_gfw" 11 | create_cloudfront_distribution = false 12 | new_relic_license_key_arn = "arn:aws:secretsmanager:us-east-1:563860007740:secret:newrelic/license_key-lolw24" 13 | load_balancer_security_group = "sg-07c9331c01f8da1c8" 14 | load_balancer_arn = "arn:aws:elasticloadbalancing:us-east-1:563860007740:loadbalancer/app/gfw-data-api-elb-shared-dev-lb/60c3ad42ca6522e3" 15 | lb_dns_name = "gfw-data-api-elb-shared-dev-lb-10091095.us-east-1.elb.amazonaws.com" 16 | api_gateway_id = "vzgmihei77" 17 | api_gw_external_app_id = "f10vmg" 18 | api_gw_internal_app_id = "ka6k5w" 19 | api_gateway_url = "https://wddlsuo04c.execute-api.us-east-1.amazonaws.com/deploy" 20 | -------------------------------------------------------------------------------- /terraform/vars/terraform-production.tfvars: -------------------------------------------------------------------------------- 1 | environment = "production" 2 | log_level = "info" 3 | service_url = "https://data-api.globalforestwatch.org" 4 | rw_api_url = "https://api.resourcewatch.org" 5 | rw_api_key_arn = "arn:aws:secretsmanager:us-east-1:401951483516:secret:gfw-api/rw-api-key-YQ50uP" # pragma: allowlist secret 6 | desired_count = 2 7 | auto_scaling_min_capacity = 2 8 | auto_scaling_max_capacity = 15 9 | fargate_cpu = 2048 10 | fargate_memory = 4096 11 | lambda_analysis_workspace = "default" 12 | key_pair = "dmannarino_gfw" 13 | new_relic_license_key_arn = "arn:aws:secretsmanager:us-east-1:401951483516:secret:newrelic/license_key-CyqUPX" 14 | -------------------------------------------------------------------------------- /terraform/vars/terraform-staging.tfvars: -------------------------------------------------------------------------------- 1 | environment = "staging" 2 | log_level = "info" 3 | service_url = "https://staging-data-api.globalforestwatch.org" 4 | rw_api_url = "https://api.resourcewatch.org" 5 | rw_api_key_arn = "arn:aws:secretsmanager:us-east-1:274931322839:secret:gfw-api/rw-api-key-xG9YwX" # pragma: allowlist secret 6 | desired_count = 1 7 | auto_scaling_min_capacity = 1 8 | auto_scaling_max_capacity = 15 9 | lambda_analysis_workspace = "default" 10 | key_pair = "dmannarino_gfw" 11 | new_relic_license_key_arn = "arn:aws:secretsmanager:us-east-1:274931322839:secret:newrelic/license_key-1wKZAY" 12 | -------------------------------------------------------------------------------- /terraform/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | aws = { 4 | source = "hashicorp/aws" 5 | version = ">= 3, < 4" 6 | } 7 | local = { 8 | source = "hashicorp/local" 9 | } 10 | template = { 11 | source = "hashicorp/template" 12 | } 13 | } 14 | required_version = ">= 0.13, < 0.14" 15 | } 16 | -------------------------------------------------------------------------------- /tests/crud/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests/crud/__init__.py -------------------------------------------------------------------------------- /tests/fixtures/append_test.tsv: -------------------------------------------------------------------------------- 1 | iso adm1 adm2 longitude latitude alert__date alert__time_utc confidence__cat bright_ti4__K bright_ti5__K frp__MW wdpa_protected_area__iucn_cat is__umd_regional_primary_forest_2001 is__birdlife_alliance_for_zero_extinction_site is__birdlife_key_biodiversity_area is__landmark_land_right gfw_plantation__type is__gfw_mining is__gfw_managed_forest rspo_oil_palm__certification_status is__gfw_wood_fiber is__peatland is__idn_forest_moratorium is__gfw_oil_palm idn_forest_area__type per_forest_concession__type is__gfw_oil_gas is__gmw_mangroves_2016 is__ifl_intact_forest_landscape_2016 bra_biome__name alert__count 2 | QRC 7 18 38.24999 -14.63781 2018-08-19 1022 h 367.0 303.2 6.1 "" false false false false "" false false "" false false false false "" "" false false false "" 1 3 | XON 3 7 -66.29684 -14.5566 2019-10-16 1811 n 334.6 294.7 27.5 Other Category true false false true "" false false "" false false false false "" "" false false false "" 1 4 | -------------------------------------------------------------------------------- /tests/fixtures/aws/config: -------------------------------------------------------------------------------- 1 | [default] 2 | s3 = 3 | endpoint_url = http://motoserver:50000 4 | 5 | [plugins] 6 | endpoint = awscli_plugin_endpoint -------------------------------------------------------------------------------- /tests/fixtures/test.csv: -------------------------------------------------------------------------------- 1 | alert__date,geom 2 | 2001-03-01,0103000000010000000500000000000000385050C000000000406045C000000000385050C000000000D06045C000000000805050C000000000D06045C000000000805050C000000000406045C000000000385050C000000000406045C0 3 | -------------------------------------------------------------------------------- /tests/fixtures/test.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "properties": {}, 7 | "geometry": { 8 | "type": "MultiPolygon", 9 | "coordinates": [ 10 | [ 11 | [ 12 | [ 13 | 10.67647933959961, 14 | 53.8577916408477 15 | ], 16 | [ 17 | 10.699653625488281, 18 | 53.8577916408477 19 | ], 20 | [ 21 | 10.699653625488281, 22 | 53.87575866462502 23 | ], 24 | [ 25 | 10.67647933959961, 26 | 53.87575866462502 27 | ], 28 | [ 29 | 10.67647933959961, 30 | 53.8577916408477 31 | ] 32 | ] 33 | ] 34 | ] 35 | } 36 | } 37 | ] 38 | } -------------------------------------------------------------------------------- /tests/fixtures/test.gpkg.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests/fixtures/test.gpkg.zip -------------------------------------------------------------------------------- /tests/fixtures/test.shp.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests/fixtures/test.shp.zip -------------------------------------------------------------------------------- /tests/fixtures/test2.csv: -------------------------------------------------------------------------------- 1 | alert__date,geom 2 | 2001-03-01,010300000001000000050000000000000008AD50C000000000101143C00000000008AD50C000000000A01143C00000000050AD50C000000000A01143C00000000050AD50C000000000101143C00000000008AD50C000000000101143C0 3 | -------------------------------------------------------------------------------- /tests/fixtures/test2.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [{ 4 | "type": "Feature", 5 | "properties": {}, 6 | "geometry": { 7 | "type": "Polygon", 8 | "coordinates": [ 9 | [ 10 | [ 11 | -77.04093933105469, 12 | 38.995840128965035 13 | ], 14 | [ 15 | -77.1653938293457, 16 | 38.90305681295715 17 | ], 18 | [ 19 | -77.03973770141602, 20 | 38.791556581282244 21 | ], 22 | [ 23 | -76.9094467163086, 24 | 38.8930369656108 25 | ], 26 | [ 27 | -77.04093933105469, 28 | 38.995840128965035 29 | ] 30 | ] 31 | ] 32 | } 33 | }] 34 | } -------------------------------------------------------------------------------- /tests/models/test_jobs.py: -------------------------------------------------------------------------------- 1 | from uuid import uuid4 2 | 3 | from app.models.pydantic.jobs import Job 4 | from app.tasks import callback_constructor 5 | 6 | 7 | def test_jobs_model(): 8 | 9 | callback = callback_constructor(uuid4()) 10 | 11 | job = Job( 12 | dataset="test", 13 | job_name="test", 14 | job_queue="test", 15 | job_definition="test", 16 | command=["1"], 17 | environment=[{"name": "TEST", "value": "TEST"}], 18 | vcpus=1, 19 | memory=2, 20 | attempts=1, 21 | attempt_duration_seconds=1, 22 | parents=None, 23 | callback=callback, 24 | ) 25 | 26 | assert job.environment == [ 27 | {"name": "TEST", "value": "TEST"}, 28 | {"name": "CORES", "value": "1"}, 29 | {"name": "MAX_MEM", "value": "2"}, 30 | ] 31 | 32 | job.vcpus = 45 33 | assert job.environment == [ 34 | {"name": "TEST", "value": "TEST"}, 35 | {"name": "CORES", "value": "45"}, 36 | {"name": "MAX_MEM", "value": "2"}, 37 | ] 38 | 39 | job.memory = 100 40 | assert job.environment == [ 41 | {"name": "TEST", "value": "TEST"}, 42 | {"name": "CORES", "value": "45"}, 43 | {"name": "MAX_MEM", "value": "100"}, 44 | ] 45 | -------------------------------------------------------------------------------- /tests/routes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests/routes/__init__.py -------------------------------------------------------------------------------- /tests/routes/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests/routes/datasets/__init__.py -------------------------------------------------------------------------------- /tests/routes/test_authorization.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from fastapi import HTTPException 3 | 4 | from app.authentication.token import is_admin, is_service_account 5 | from app.utils.rw_api import who_am_i 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_is_admin(): 10 | 11 | message = "" 12 | try: 13 | await is_admin("my_fake_token") 14 | except HTTPException as e: 15 | message = e.detail 16 | 17 | assert message == "Unauthorized" 18 | 19 | 20 | @pytest.mark.asyncio 21 | async def test_is_service_account(): 22 | 23 | message = "" 24 | try: 25 | await is_service_account("my_fake_token") 26 | except HTTPException as e: 27 | message = e.detail 28 | 29 | assert message == "Unauthorized" 30 | 31 | 32 | @pytest.mark.asyncio 33 | async def test_who_am_i(): 34 | response = await who_am_i("my_fake_token") 35 | assert response.status_code == 401 36 | 37 | 38 | @pytest.mark.asyncio 39 | async def test_login(async_client): 40 | response = await async_client.post( 41 | "/auth/token", data={"username": "name", "password": "secret"} 42 | ) 43 | assert response.status_code == 401 44 | -------------------------------------------------------------------------------- /tests/tasks/test_default_assets.py: -------------------------------------------------------------------------------- 1 | import pytest as pytest 2 | from httpx import AsyncClient 3 | 4 | from .. import BUCKET, SHP_NAME 5 | from ..utils import create_default_asset 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_default_asset_cant_delete(batch_client, async_client: AsyncClient): 10 | _, logs = batch_client 11 | 12 | dataset = "test" 13 | 14 | version = "v1.1.1" 15 | input_data = { 16 | "creation_options": { 17 | "source_type": "vector", 18 | "source_uri": [f"s3://{BUCKET}/{SHP_NAME}"], 19 | "source_driver": "ESRI Shapefile", 20 | "create_dynamic_vector_tile_cache": False, 21 | }, 22 | } 23 | 24 | asset = await create_default_asset( 25 | dataset, 26 | version, 27 | version_payload=input_data, 28 | async_client=async_client, 29 | logs=logs, 30 | execute_batch_jobs=False, 31 | skip_dataset=False, 32 | ) 33 | asset_id = asset["asset_id"] 34 | 35 | response = await async_client.delete(f"/asset/{asset_id}") 36 | assert response.status_code == 409 37 | expected_message = ( 38 | "Deletion failed. You cannot delete a default asset. " 39 | "To delete a default asset you must delete the parent version." 40 | ) 41 | assert response.json()["message"] == expected_message 42 | -------------------------------------------------------------------------------- /tests/tasks/test_delete_assets.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from app.application import ContextEngine, db 4 | from app.settings.globals import DATA_LAKE_BUCKET 5 | from app.tasks.delete_assets import ( 6 | delete_database_table_asset, 7 | delete_raster_tileset_assets, 8 | ) 9 | from app.utils.aws import get_s3_client 10 | from tests import TSV_PATH 11 | 12 | 13 | @pytest.mark.asyncio 14 | async def test_delete_raster_tileset_assets(): 15 | s3_client = get_s3_client() 16 | dataset = "test_delete_raster_tileset" 17 | version = "table" 18 | srid = "epsg-4326" 19 | grid = "10/40000" 20 | value = "year" 21 | 22 | for i in range(0, 10): 23 | s3_client.upload_file( 24 | TSV_PATH, 25 | DATA_LAKE_BUCKET, 26 | f"{dataset}/{version}/raster/{srid}/{grid}/{value}/test_{i}.tsv", 27 | ) 28 | 29 | response = s3_client.list_objects_v2(Bucket=DATA_LAKE_BUCKET, Prefix=dataset) 30 | 31 | assert response["KeyCount"] == 10 32 | 33 | await delete_raster_tileset_assets(dataset, version, srid, grid, value) 34 | 35 | response = s3_client.list_objects_v2(Bucket=DATA_LAKE_BUCKET, Prefix=dataset) 36 | assert response["KeyCount"] == 0 37 | 38 | 39 | @pytest.mark.asyncio 40 | async def test_delete_database_table(app): 41 | dataset = "test" 42 | version = "table" 43 | 44 | async with ContextEngine("WRITE"): 45 | # create schema and stable 46 | await db.all(f"CREATE SCHEMA {dataset};") 47 | await db.all(f"CREATE TABLE {dataset}.{version} (col1 text);") 48 | 49 | rows = await db.all(f"select * from pg_tables where schemaname='{dataset}';") 50 | assert len(rows) == 1 51 | 52 | # test if function drops table 53 | await delete_database_table_asset(dataset, version) 54 | 55 | rows = await db.all(f"select * from pg_tables where schemaname='{dataset}';") 56 | assert len(rows) == 0 57 | 58 | # clean up 59 | await db.all(f"DROP SCHEMA {dataset};") 60 | -------------------------------------------------------------------------------- /tests/utils/test_path.py: -------------------------------------------------------------------------------- 1 | from app.utils.path import get_layer_name, is_zipped 2 | from tests import BUCKET, GEOJSON_NAME, SHP_NAME 3 | 4 | 5 | def test_zipped(): 6 | s3_uri = f"s3://{BUCKET}/{GEOJSON_NAME}" 7 | zipped = is_zipped(s3_uri) 8 | assert zipped is False 9 | 10 | s3_uri = f"s3://{BUCKET}/{SHP_NAME}" 11 | zipped = is_zipped(s3_uri) 12 | assert zipped is True 13 | 14 | found = True 15 | s3_uri = f"s3://{BUCKET}/doesntexist" 16 | try: 17 | is_zipped(s3_uri) 18 | except FileNotFoundError: 19 | found = False 20 | 21 | assert not found 22 | 23 | 24 | def test_get_layer_name(): 25 | s3_uri = f"s3://{BUCKET}/{SHP_NAME}" 26 | layer = get_layer_name(s3_uri) 27 | assert layer == "test" 28 | 29 | s3_uri = f"s3://{BUCKET}/{GEOJSON_NAME}" 30 | layer = get_layer_name(s3_uri) 31 | assert layer == "test" 32 | -------------------------------------------------------------------------------- /tests_v2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/__init__.py -------------------------------------------------------------------------------- /tests_v2/fixtures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/fixtures/__init__.py -------------------------------------------------------------------------------- /tests_v2/fixtures/authentication/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/fixtures/authentication/__init__.py -------------------------------------------------------------------------------- /tests_v2/fixtures/authentication/api_keys.py: -------------------------------------------------------------------------------- 1 | GOOD_ORGANIZATIONS = ["WRI", "Global Forest Watch"] 2 | GOOD_EMAILS = [ 3 | "info@wri.org", 4 | "admin@globalforestwatch.org", 5 | "firstname.lastname@test.com", 6 | ] 7 | GOOD_DOMAINS = [ 8 | "www.globalforestwatch.org", 9 | "*.globalforestwatch.org", 10 | "globalforestwatch.org", 11 | "localhost", 12 | ] 13 | 14 | BAD_EMAILS = ["not an email", "also_not@n-email", "nope", None] 15 | BAD_DOMAINS = [ 16 | "www.*.com", 17 | "*", 18 | "www.test*.org", 19 | "www.test.*", 20 | "*.com", 21 | "globalforestwatch.org:443", 22 | "localhost:3000", 23 | ] 24 | -------------------------------------------------------------------------------- /tests_v2/fixtures/creation_options/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/fixtures/creation_options/__init__.py -------------------------------------------------------------------------------- /tests_v2/fixtures/creation_options/versions.py: -------------------------------------------------------------------------------- 1 | # Vector source creation options 2 | bucket = "my_bucket" 3 | shp_name = "my_shape.zip" 4 | tif_name = "tile.tif" 5 | 6 | VECTOR_SOURCE_CREATION_OPTIONS = { 7 | "source_driver": "ESRI Shapefile", 8 | "source_type": "vector", 9 | "source_uri": [f"s3://{bucket}/{shp_name}"], 10 | "indices": [ 11 | {"column_names": ["geom"], "index_type": "gist"}, 12 | {"column_names": ["geom_wm"], "index_type": "gist"}, 13 | {"column_names": ["gfw_geostore_id"], "index_type": "hash"}, 14 | ], 15 | "create_dynamic_vector_tile_cache": True, 16 | "add_to_geostore": True, 17 | } 18 | 19 | RASTER_CREATION_OPTIONS = { 20 | "source_driver": "GeoTIFF", 21 | "source_type": "raster", 22 | "source_uri": [f"s3://{bucket}/{tif_name}"], 23 | "pixel_meaning": "year", 24 | "data_type": "uint16", 25 | "grid": "10/40000", 26 | "compute_stats": False, 27 | } 28 | -------------------------------------------------------------------------------- /tests_v2/fixtures/geojson/test.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "properties": {}, 7 | "geometry": { 8 | "type": "MultiPolygon", 9 | "coordinates": [ 10 | [ 11 | [ 12 | [ 13 | 10.67647933959961, 14 | 53.8577916408477 15 | ], 16 | [ 17 | 10.699653625488281, 18 | 53.8577916408477 19 | ], 20 | [ 21 | 10.699653625488281, 22 | 53.87575866462502 23 | ], 24 | [ 25 | 10.67647933959961, 26 | 53.87575866462502 27 | ], 28 | [ 29 | 10.67647933959961, 30 | 53.8577916408477 31 | ] 32 | ] 33 | ] 34 | ] 35 | } 36 | } 37 | ] 38 | } -------------------------------------------------------------------------------- /tests_v2/fixtures/geojson/test_bad.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "properties": {}, 7 | "geometry": { 8 | "type": "LineString", 9 | "coordinates": [ 10 | [ 11 | [ 12 | 10.67647933959961, 13 | 53.8577916408477 14 | ], 15 | [ 16 | 10.699653625488281, 17 | 53.8577916408477 18 | ], 19 | [ 20 | 10.699653625488281, 21 | 53.87575866462502 22 | ], 23 | [ 24 | 10.67647933959961, 25 | 53.87575866462502 26 | ], 27 | [ 28 | 10.67647933959961, 29 | 53.8577916408477 30 | ] 31 | ] 32 | ] 33 | } 34 | } 35 | ] 36 | } 37 | -------------------------------------------------------------------------------- /tests_v2/fixtures/geojson/test_huge.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "features": [ 4 | { 5 | "type": "Feature", 6 | "properties": {}, 7 | "geometry": { 8 | "type": "Polygon", 9 | "coordinates": [ 10 | [ 11 | [ 12 | 2.109375, 13 | 7.013667927566642 14 | ], 15 | [ 16 | 150.1171875, 17 | 7.013667927566642 18 | ], 19 | [ 20 | 150.1171875, 21 | 71.52490903732816 22 | ], 23 | [ 24 | 2.109375, 25 | 71.52490903732816 26 | ], 27 | [ 28 | 2.109375, 29 | 7.013667927566642 30 | ] 31 | ] 32 | ] 33 | } 34 | } 35 | ] 36 | } -------------------------------------------------------------------------------- /tests_v2/fixtures/metadata/dataset.py: -------------------------------------------------------------------------------- 1 | DATASET_METADATA = { 2 | "title": "test metadata", 3 | "source": "Source Organization test", 4 | "license": "[CC BY 4.0](https://creativecommons.org/licenses/by/4.0/)", 5 | "data_language": "en", 6 | "overview": "Some detailed data description", 7 | } 8 | -------------------------------------------------------------------------------- /tests_v2/fixtures/metadata/version.py: -------------------------------------------------------------------------------- 1 | VERSION_METADATA = { 2 | "content_date_range": {"start_date": "2000-01-01", "end_date": "2021-01-01"}, 3 | "content_date_description": "2000 - present", 4 | "last_update": "2020-01-03", 5 | "spatial_resolution": 10, 6 | "resolution_description": "10 meters", 7 | } 8 | -------------------------------------------------------------------------------- /tests_v2/fixtures/otf_payload/otf_payload.py: -------------------------------------------------------------------------------- 1 | environment = [ 2 | { 3 | "name": "my_first_dataset__date_conf", 4 | "no_data": 0, 5 | "raster_table": None, 6 | "decode_expression": "", 7 | "encode_expression": "", 8 | "source_uri": "s3://gfw-data-lake-test/my_first_dataset/v1/raster/epsg-4326/10/40000/date_conf/geotiff/{tile_id}.tif", 9 | "grid": "10/40000", 10 | "tile_scheme": "nw", 11 | }, 12 | { 13 | "name": "my_first_dataset__date", 14 | "no_data": 0, 15 | "raster_table": None, 16 | "decode_expression": "(A + 16435).astype('datetime64[D]').astype(str)", 17 | "encode_expression": "(datetime64(A) - 16435).astype(uint16)", 18 | "source_layer": "my_first_dataset__date_conf", 19 | "calc": "A % 10000", 20 | }, 21 | { 22 | "name": "my_first_dataset__confidence", 23 | "no_data": 0, 24 | "raster_table": { 25 | "rows": [ 26 | {"value": 2, "meaning": "nominal"}, 27 | {"value": 3, "meaning": "high"}, 28 | {"value": 4, "meaning": "highest"}, 29 | ], 30 | "default_meaning": "not_detected", 31 | }, 32 | "decode_expression": "", 33 | "encode_expression": "", 34 | "source_layer": "my_first_dataset__date_conf", 35 | "calc": "floor(A / 10000).astype(uint8)", 36 | }, 37 | ] 38 | 39 | sql = "select sum(area__ha) from data where is__umd_regional_primary_forest_2001 != 'false' and umd_tree_cover_density_2000__threshold >= 30 and umd_tree_cover_loss__year >= 2001 group by umd_tree_cover_loss__year" 40 | -------------------------------------------------------------------------------- /tests_v2/fixtures/sample_rw_geostore_response.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from app.models.pydantic.geostore import Geometry, GeostoreCommon 4 | 5 | response_body: Dict = { 6 | "data": { 7 | "type": "geoStore", 8 | "id": "d8907d30eb5ec7e33a68aa31aaf918a4", 9 | "attributes": { 10 | "geojson": { 11 | "crs": {}, 12 | "type": "FeatureCollection", 13 | "features": [ 14 | { 15 | "geometry": { 16 | "coordinates": [ 17 | [ 18 | [13.286161423, 2.22263581], 19 | [13.895623684, 2.613460107], 20 | [14.475367069, 2.43969337], 21 | [15.288956165, 1.338479182], 22 | [13.44381094, 0.682623753], 23 | [13.286161423, 2.22263581], 24 | ] 25 | ], 26 | "type": "Polygon", 27 | }, 28 | "type": "Feature", 29 | } 30 | ], 31 | }, 32 | "hash": "d8907d30eb5ec7e33a68aa31aaf918a4", # pragma: allowlist secret 33 | "provider": {}, 34 | "areaHa": 2950164.393265342, 35 | "bbox": [13.286161423, 0.682623753, 15.288956165, 2.613460107], 36 | "lock": False, 37 | "info": {"use": {}}, 38 | }, 39 | } 40 | } 41 | 42 | data: Dict = response_body["data"]["attributes"] 43 | geojson: Dict = data["geojson"]["features"][0]["geometry"] 44 | geometry: Geometry = Geometry.parse_obj(geojson) 45 | geostore_common: GeostoreCommon = GeostoreCommon( 46 | geostore_id=data["hash"], 47 | geojson=geometry, 48 | area__ha=data["areaHa"], 49 | bbox=data["bbox"], 50 | ) 51 | -------------------------------------------------------------------------------- /tests_v2/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/authentication/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/authentication/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/crud/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/crud/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/crud/test_assets.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from app.application import ContextEngine 4 | from app.crud.assets import get_default_asset, update_asset 5 | from app.crud.datasets import get_dataset 6 | from app.crud.versions import get_version 7 | 8 | 9 | @pytest.mark.asyncio 10 | async def test_update_version__is_downloadable(generic_vector_source_version): 11 | dataset, version, _ = generic_vector_source_version 12 | dataset_row = await get_dataset(dataset) 13 | version_row = await get_version(dataset, version) 14 | asset_row = await get_default_asset(dataset, version) 15 | 16 | # Check if default value is correctly populated 17 | assert dataset_row.is_downloadable is True 18 | assert version_row.is_downloadable is True 19 | assert asset_row.is_downloadable is True 20 | 21 | # This should update the downstream versions and assets only 22 | async with ContextEngine("WRITE"): 23 | await update_asset(asset_row.asset_id, **{"is_downloadable": False}) 24 | 25 | dataset_row = await get_dataset(dataset) 26 | version_row = await get_version(dataset, version) 27 | asset_row = await get_default_asset(dataset, version) 28 | 29 | assert dataset_row.is_downloadable is True 30 | assert version_row.is_downloadable is True 31 | assert asset_row.is_downloadable is False 32 | -------------------------------------------------------------------------------- /tests_v2/unit/app/crud/test_datasets.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from app.application import ContextEngine 4 | from app.crud.assets import get_default_asset 5 | from app.crud.datasets import get_dataset, update_dataset 6 | from app.crud.versions import get_version 7 | 8 | 9 | @pytest.mark.asyncio 10 | async def test_update_dataset__is_downloadable(generic_vector_source_version): 11 | dataset, version, _ = generic_vector_source_version 12 | dataset_row = await get_dataset(dataset) 13 | version_row = await get_version(dataset, version) 14 | asset_row = await get_default_asset(dataset, version) 15 | 16 | # Check if default value is correctly populated 17 | assert dataset_row.is_downloadable is True 18 | assert version_row.is_downloadable is True 19 | assert asset_row.is_downloadable is True 20 | 21 | # This should update the downstream versions and assets only 22 | async with ContextEngine("WRITE"): 23 | await update_dataset(dataset, **{"is_downloadable": False, }) 24 | 25 | dataset_row = await get_dataset(dataset) 26 | version_row = await get_version(dataset, version) 27 | asset_row = await get_default_asset(dataset, version) 28 | 29 | assert dataset_row.is_downloadable is False 30 | assert version_row.is_downloadable is False 31 | assert asset_row.is_downloadable is False 32 | -------------------------------------------------------------------------------- /tests_v2/unit/app/crud/test_versions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from app.application import ContextEngine 4 | from app.crud.assets import get_default_asset 5 | from app.crud.datasets import get_dataset 6 | from app.crud.versions import get_version, update_version 7 | 8 | 9 | @pytest.mark.asyncio 10 | async def test_update_version__is_downloadable(generic_vector_source_version): 11 | dataset, version, _ = generic_vector_source_version 12 | dataset_row = await get_dataset(dataset) 13 | version_row = await get_version(dataset, version) 14 | asset_row = await get_default_asset(dataset, version) 15 | 16 | # Check if default value is correctly populated 17 | assert dataset_row.is_downloadable is True 18 | assert version_row.is_downloadable is True 19 | assert asset_row.is_downloadable is True 20 | 21 | # This should update the downstream versions and assets only 22 | async with ContextEngine("WRITE"): 23 | await update_version(dataset, version, **{"is_downloadable": False}) 24 | 25 | dataset_row = await get_dataset(dataset) 26 | version_row = await get_version(dataset, version) 27 | asset_row = await get_default_asset(dataset, version) 28 | 29 | assert dataset_row.is_downloadable is True 30 | assert version_row.is_downloadable is False 31 | assert asset_row.is_downloadable is False 32 | -------------------------------------------------------------------------------- /tests_v2/unit/app/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/models/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/models/pydantic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/models/pydantic/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/analysis/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/assets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/assets/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/assets/test_assets_with_no_pagination.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from httpx import AsyncClient 3 | 4 | from app.models.pydantic.assets import AssetsResponse 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_get_assets_returns_assets_response(async_client: AsyncClient) -> None: 9 | resp = await async_client.get("/assets") 10 | assert AssetsResponse(**resp.json()) 11 | -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/assets/test_assets_with_pagination.py: -------------------------------------------------------------------------------- 1 | import pytest as pytest 2 | from httpx import AsyncClient 3 | 4 | from app.models.pydantic.assets import PaginatedAssetsResponse 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_adding_page_number_returns_paginated_assets_response( 9 | async_client: AsyncClient, 10 | ) -> None: 11 | 12 | resp = await async_client.get("/assets", params=[("page[number]", "1")]) 13 | assert PaginatedAssetsResponse(**resp.json()) 14 | 15 | 16 | @pytest.mark.asyncio 17 | async def test_adding_size_parameter_returns_paginated_assets_response( 18 | async_client: AsyncClient, 19 | ) -> None: 20 | 21 | resp = await async_client.get("/assets", params=[("page[size]", "10")]) 22 | assert PaginatedAssetsResponse(**resp.json()) 23 | 24 | 25 | @pytest.mark.asyncio 26 | async def test_adding_both_page_and_size_parameter_returns_paginated_assets_response( 27 | async_client: AsyncClient, 28 | ) -> None: 29 | 30 | resp = await async_client.get( 31 | "/assets", params=[("page[number]", "1"), ("page[size]", "10")] 32 | ) 33 | assert PaginatedAssetsResponse(**resp.json()) 34 | 35 | 36 | @pytest.mark.asyncio 37 | async def test_get_paginated_asset_with_pagesize_less_than_1_returns_4xx( 38 | async_client: AsyncClient, 39 | ) -> None: 40 | resp = await async_client.get("/assets", params=[("page[size]", "0")]) 41 | assert resp.status_code == 422 42 | 43 | 44 | @pytest.mark.asyncio 45 | async def test_get_paginated_asset_with_pagenumber_less_than_1_returns_4xx( 46 | async_client: AsyncClient, 47 | ) -> None: 48 | resp = await async_client.get("/assets", params=[("page[number]", "0")]) 49 | assert resp.status_code == 422 50 | 51 | 52 | @pytest.mark.asyncio 53 | async def test_get_paginated_asset_with_pagenumber_more_than_max_pages_returns_4xx( 54 | async_client: AsyncClient, 55 | ) -> None: 56 | resp = await async_client.get("/assets", params=[("page[number]", "100")]) 57 | assert resp.status_code == 422 58 | -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/authentication/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/authentication/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/datamart/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/datamart/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/datasets/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/datasets/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/datasets/datasets/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/datasets/datasets/assets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/datasets/datasets/assets/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/datasets/datasets/assets/test_dataset_assets_with_no_pagination.py: -------------------------------------------------------------------------------- 1 | import pytest as pytest 2 | from httpx import AsyncClient 3 | 4 | from app.models.pydantic.assets import AssetsResponse 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_get_assets_returns_assets_of_a_specific_dataset_and_version_response( 9 | async_client: AsyncClient, generic_vector_source_version 10 | ) -> None: 11 | dataset_name, dataset_version, _ = generic_vector_source_version 12 | resp = await async_client.get(f"/dataset/{dataset_name}/{dataset_version}/assets") 13 | assert AssetsResponse(**resp.json()) 14 | -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/datasets/datasets/test_datasets_with_no_pagination.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import pytest as pytest 4 | from httpx import AsyncClient 5 | 6 | from app.models.pydantic.datasets import DatasetsResponse 7 | 8 | 9 | @pytest.mark.asyncio 10 | async def test_get_datasets_returns_datasets_response( 11 | async_client: AsyncClient, generic_dataset: Tuple[str, str] 12 | ) -> None: 13 | 14 | resp = await async_client.get("/datasets") 15 | assert DatasetsResponse(**resp.json()) 16 | -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/datasets/datasets/test_datasets_with_pagination.py: -------------------------------------------------------------------------------- 1 | import pytest as pytest 2 | from httpx import AsyncClient 3 | 4 | from app.models.pydantic.datasets import PaginatedDatasetsResponse 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_adding_page_number_returns_paginated_datasets_response( 9 | async_client: AsyncClient, 10 | ) -> None: 11 | 12 | resp = await async_client.get("/datasets", params=[("page[number]", "1")]) 13 | assert PaginatedDatasetsResponse(**resp.json()) 14 | 15 | 16 | @pytest.mark.asyncio 17 | async def test_adding_size_parameter_returns_paginated_datasets_response( 18 | async_client: AsyncClient, 19 | ) -> None: 20 | 21 | resp = await async_client.get("/datasets", params=[("page[size]", "10")]) 22 | assert PaginatedDatasetsResponse(**resp.json()) 23 | 24 | 25 | @pytest.mark.asyncio 26 | async def test_adding_both_page_and_size_parameter_returns_paginated_datasets_response( 27 | async_client: AsyncClient, 28 | ) -> None: 29 | 30 | resp = await async_client.get( 31 | "/datasets", params=[("page[number]", "1"), ("page[size]", "10")] 32 | ) 33 | assert PaginatedDatasetsResponse(**resp.json()) 34 | 35 | 36 | @pytest.mark.asyncio 37 | async def test_get_paginated_dataset_with_pagesize_less_than_1_returns_4xx( 38 | async_client: AsyncClient, 39 | ) -> None: 40 | resp = await async_client.get("/datasets", params=[("page[size]", "0")]) 41 | assert resp.status_code == 422 42 | 43 | 44 | @pytest.mark.asyncio 45 | async def test_get_paginated_dataset_with_pagenumber_less_than_1_returns_4xx( 46 | async_client: AsyncClient, 47 | ) -> None: 48 | resp = await async_client.get("/datasets", params=[("page[number]", "0")]) 49 | assert resp.status_code == 422 50 | 51 | 52 | @pytest.mark.asyncio 53 | async def test_get_paginated_dataset_with_pagenumber_more_than_max_pages_returns_4xx( 54 | async_client: AsyncClient, 55 | ) -> None: 56 | resp = await async_client.get("/datasets", params=[("page[number]", "100")]) 57 | assert resp.status_code == 422 58 | -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/geostore/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/geostore/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/health/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/health/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/health/test_health.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from httpx import AsyncClient 3 | 4 | from tests_v2.unit.app.routes.utils import assert_jsend 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_ping(async_client: AsyncClient): 9 | response = await async_client.get("/ping") 10 | 11 | assert_jsend(response.json()) 12 | assert response.status_code == 200 13 | assert response.json()["data"] == "pong" 14 | -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/jobs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/jobs/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/political/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/political/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/political/id_lookup/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/political/id_lookup/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/routes/tasks/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/tasks/test_asset_tasks_with_no_pagination.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from httpx import AsyncClient 3 | 4 | from app.models.pydantic.tasks import TasksResponse 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_get_asset_tasks_returns_tasks_response( 9 | async_client: AsyncClient, generic_vector_source_version 10 | ) -> None: 11 | 12 | dataset_name, dataset_version, _ = generic_vector_source_version 13 | version_resp = await async_client.get( 14 | f"/dataset/{dataset_name}/{dataset_version}/assets" 15 | ) 16 | asset_id = version_resp.json()["data"][0]["asset_id"] 17 | resp = await async_client.get(f"/asset/{asset_id}/tasks") 18 | 19 | assert TasksResponse(**resp.json()) 20 | -------------------------------------------------------------------------------- /tests_v2/unit/app/routes/utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Dict 3 | 4 | 5 | def assert_jsend(resp_obj: Dict): 6 | assert resp_obj.get("status") in ("success", "error", "failed") 7 | if resp_obj.get("status") == "success": 8 | assert resp_obj.get("data") is not None 9 | else: 10 | assert resp_obj.get("message") is not None 11 | 12 | 13 | def assert_is_datetime(value: str): 14 | datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f") 15 | -------------------------------------------------------------------------------- /tests_v2/unit/app/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/tasks/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/tasks/datamart/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/tasks/datamart/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/tasks/raster_tile_cache_assets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/tasks/raster_tile_cache_assets/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/tasks/raster_tile_cache_assets/collaborators/__init__.py: -------------------------------------------------------------------------------- 1 | MODULE_PATH_UNDER_TEST = "app.tasks.raster_tile_cache_assets.raster_tile_cache_assets" 2 | -------------------------------------------------------------------------------- /tests_v2/unit/app/tasks/raster_tile_cache_assets/collaborators/test_crud_collaboration.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | 5 | from app.tasks.raster_tile_cache_assets import raster_tile_cache_asset 6 | 7 | from . import MODULE_PATH_UNDER_TEST 8 | 9 | 10 | @patch(f"{MODULE_PATH_UNDER_TEST}.execute", autospec=True) 11 | @patch(f"{MODULE_PATH_UNDER_TEST}.symbology_constructor", autospec=True) 12 | @patch(f"{MODULE_PATH_UNDER_TEST}.reproject_to_web_mercator", autospec=True) 13 | @patch(f"{MODULE_PATH_UNDER_TEST}.get_asset", autospec=True) 14 | class TestCrudCollaboration: 15 | @pytest.mark.asyncio 16 | async def test_source_asset_is_retrieved_by_uuid( 17 | self, 18 | get_asset_mock, 19 | web_mercator_dummy, 20 | symbology_constructor_dummy, 21 | execute_dummy, 22 | tile_cache_asset_uuid, 23 | creation_options_dict, 24 | source_asset, 25 | reprojection, 26 | symbology_info, 27 | change_log, 28 | ): 29 | get_asset_mock.return_value = source_asset 30 | symbology_constructor_dummy.__getitem__.return_value = symbology_info 31 | web_mercator_dummy.return_value = reprojection 32 | execute_dummy.return_value = change_log 33 | 34 | await raster_tile_cache_asset( 35 | "test_dataset", "2022", tile_cache_asset_uuid, creation_options_dict 36 | ) 37 | 38 | get_asset_mock.assert_called_with( 39 | creation_options_dict["creation_options"]["source_asset_id"] 40 | ) 41 | -------------------------------------------------------------------------------- /tests_v2/unit/app/tasks/raster_tile_cache_assets/collaborators/test_raster_tile_cache_assets_happy_path.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | 5 | from app.models.enum.change_log import ChangeLogStatus 6 | from app.tasks.raster_tile_cache_assets import raster_tile_cache_asset 7 | 8 | from . import MODULE_PATH_UNDER_TEST 9 | 10 | 11 | @patch(f"{MODULE_PATH_UNDER_TEST}.execute", autospec=True) 12 | @patch(f"{MODULE_PATH_UNDER_TEST}.symbology_constructor", autospec=True) 13 | @patch(f"{MODULE_PATH_UNDER_TEST}.reproject_to_web_mercator", autospec=True) 14 | @patch(f"{MODULE_PATH_UNDER_TEST}.get_asset", autospec=True) 15 | @pytest.mark.asyncio 16 | async def test_exploratory_test_runs_without_error( 17 | get_asset_dummy, 18 | web_mercator_dummy, 19 | symbology_constructor_dummy, 20 | execute_dummy, 21 | tile_cache_asset_uuid, 22 | creation_options_dict, 23 | source_asset, 24 | reprojection, 25 | symbology_info, 26 | change_log, 27 | ): 28 | """Goal of this test is to determine the minimum amount of patching we need 29 | to do to get the function to run as much side-effect free code as 30 | possible.""" 31 | get_asset_dummy.return_value = source_asset 32 | symbology_constructor_dummy.__getitem__.return_value = symbology_info 33 | web_mercator_dummy.return_value = reprojection 34 | execute_dummy.return_value = change_log 35 | 36 | result = await raster_tile_cache_asset( 37 | "test_dataset", "2022", tile_cache_asset_uuid, creation_options_dict 38 | ) 39 | 40 | assert result.status == ChangeLogStatus.success 41 | -------------------------------------------------------------------------------- /tests_v2/unit/app/tasks/test_batch.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | from unittest.mock import MagicMock, patch 3 | 4 | from fastapi.logger import logger 5 | 6 | from app.tasks.batch import submit_batch_job 7 | from app.tasks.vector_source_assets import _create_add_gfw_fields_job 8 | from tests_v2.conftest import mock_callback 9 | 10 | TEST_JOB_ENV: List[Dict[str, str]] = [{"name": "PASSWORD", "value": "DON'T LOG ME"}] 11 | 12 | 13 | @patch("app.utils.aws.boto3.client") 14 | @patch.object(logger, "info") # Patch the logger.info directly 15 | @patch("app.tasks.batch.UUID") # Patch the UUID class 16 | async def test_submit_batch_job(mock_uuid, mock_logging_info, mock_boto3_client): 17 | mock_client = MagicMock() 18 | mock_boto3_client.return_value = mock_client 19 | 20 | attempt_duration_seconds: int = 100 21 | 22 | job = await _create_add_gfw_fields_job( 23 | "some_dataset", 24 | "v1", 25 | parents=list(), 26 | job_env=TEST_JOB_ENV, 27 | callback=mock_callback, 28 | attempt_duration_seconds=attempt_duration_seconds, 29 | ) 30 | 31 | # Call the function you want to test 32 | submit_batch_job(job) 33 | 34 | mock_boto3_client.assert_called_once_with( 35 | "batch", region_name="us-east-1", endpoint_url=None 36 | ) 37 | 38 | # Assert that the logger.info was called with the expected log message 39 | assert "add_gfw_fields" in mock_logging_info.call_args.args[0] 40 | assert "DON'T LOG ME" not in mock_logging_info.call_args.args[0] 41 | -------------------------------------------------------------------------------- /tests_v2/unit/app/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/utils/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/utils/paginate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/app/utils/paginate/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/app/utils/paginate/test_offset_calculation.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock 2 | 3 | import pytest 4 | 5 | from app.utils.paginate import paginate_collection 6 | 7 | DONT_CARE: int = 1 8 | 9 | 10 | @pytest.mark.asyncio 11 | async def test_offset_is_0_for_page_1_when_size_is_given(): 12 | spy_get_collection = AsyncMock() 13 | dummy_count_collection = AsyncMock(return_value=DONT_CARE) 14 | 15 | await paginate_collection( 16 | paged_items_fn=spy_get_collection, 17 | item_count_fn=dummy_count_collection, 18 | size=10, 19 | page=1, 20 | ) 21 | 22 | spy_get_collection.assert_called_with(10, 0) 23 | 24 | 25 | @pytest.mark.asyncio 26 | async def test_offset_is_0_when_no_page_is_given(): 27 | spy_get_collection = AsyncMock() 28 | dummy_count_collection = AsyncMock(return_value=DONT_CARE) 29 | 30 | await paginate_collection( 31 | paged_items_fn=spy_get_collection, item_count_fn=dummy_count_collection, size=10 32 | ) 33 | 34 | spy_get_collection.assert_called_with(10, 0) 35 | 36 | 37 | @pytest.mark.asyncio 38 | async def test_offset_is_10_for_page_2_when_page_size_is_10(): 39 | spy_get_collection = AsyncMock() 40 | stub_count_collection = AsyncMock(return_value=15) 41 | 42 | await paginate_collection( 43 | paged_items_fn=spy_get_collection, 44 | item_count_fn=stub_count_collection, 45 | size=10, 46 | page=2, 47 | ) 48 | 49 | spy_get_collection.assert_called_with(10, 10) 50 | -------------------------------------------------------------------------------- /tests_v2/unit/app/utils/test_google.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from unittest.mock import Mock 3 | 4 | from _pytest.monkeypatch import MonkeyPatch 5 | 6 | from app.utils import google 7 | from app.utils.google import get_gs_files 8 | 9 | 10 | good_bucket: str = "good_bucket" 11 | good_prefix: str = "good_prefix" 12 | all_the_files: List[str] = [ 13 | f"{good_prefix}/irrelevant.mp3", 14 | f"{good_prefix}/something.csv", 15 | f"{good_prefix}/world.tif" 16 | ] 17 | 18 | all_the_files_gdal_notation: List[str] = [ 19 | f"/vsigs/{good_bucket}/{x}" for x in all_the_files 20 | ] 21 | 22 | 23 | def test_get_matching_gs_files_no_filtering(monkeypatch: MonkeyPatch): 24 | mock_get_prefix_objects = Mock(return_value=all_the_files) 25 | monkeypatch.setattr(google, "get_prefix_objects", mock_get_prefix_objects) 26 | 27 | keys = get_gs_files(good_bucket, good_prefix) 28 | assert len(keys) == 3 29 | assert set(keys) == set(all_the_files_gdal_notation) 30 | 31 | 32 | def test_get_matching_gs_files_match_extensions(monkeypatch: MonkeyPatch): 33 | mock_get_prefix_objects = Mock(return_value=all_the_files) 34 | monkeypatch.setattr(google, "get_prefix_objects", mock_get_prefix_objects) 35 | 36 | keys = get_gs_files(good_bucket, good_prefix, extensions=[".tif"]) 37 | assert keys == [f"/vsigs/{good_bucket}/{good_prefix}/world.tif"] 38 | 39 | 40 | def test_get_matching_gs_files_no_matches(monkeypatch: MonkeyPatch): 41 | mock_get_prefix_objects = Mock(return_value=all_the_files) 42 | monkeypatch.setattr(google, "get_prefix_objects", mock_get_prefix_objects) 43 | 44 | keys = get_gs_files(good_bucket, good_prefix, extensions=[".pdf"]) 45 | assert keys == [] 46 | 47 | 48 | def test_get_matching_gs_files_early_exit(monkeypatch: MonkeyPatch): 49 | mock_get_prefix_objects = Mock(return_value=all_the_files) 50 | monkeypatch.setattr(google, "get_prefix_objects", mock_get_prefix_objects) 51 | 52 | keys = get_gs_files(good_bucket, good_prefix, exit_after_max=1) 53 | assert len(keys) == 1 54 | assert keys[0] in all_the_files_gdal_notation 55 | -------------------------------------------------------------------------------- /tests_v2/unit/batch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/batch/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/batch/python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wri/gfw-data-api/1cea4d4dd440db195ad51469a839ceae5675ec94/tests_v2/unit/batch/python/__init__.py -------------------------------------------------------------------------------- /tests_v2/unit/batch/python/test_adjust_num_processes.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from batch.python.adjust_num_processes import calc_num_processes 4 | from tests_v2.utils import BatchJobMock 5 | 6 | job_descriptions = [ 7 | { 8 | "jobId": "8e76ecf5-99a0-43a1-9b97-8e6616b90983", 9 | "attempts": [ 10 | {"container": {"exitCode": 137}}, 11 | {"container": {"exitCode": 1}}, 12 | {"container": {"exitCode": 137}}, 13 | ], 14 | } 15 | ] 16 | 17 | 18 | @pytest.mark.parametrize("orig_num_processes,expected", [(96, 24), (5, 1), (0, 1)]) 19 | def test_calc_num_processes(orig_num_processes, expected): 20 | job_id: str = "8e76ecf5-99a0-43a1-9b97-8e6616b90983" 21 | batch_client = BatchJobMock(job_desc=job_descriptions) 22 | 23 | new_cores_val = calc_num_processes(job_id, orig_num_processes, batch_client) 24 | assert new_cores_val == expected 25 | -------------------------------------------------------------------------------- /wait_for_postgres.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # wait-for-postgres.sh 3 | 4 | set -e 5 | 6 | until PGPASSWORD=$DB_PASSWORD psql -h "$DB_HOST" -U "$DB_USER" -d "$DATABASE" -c '\q'; do 7 | >&2 echo "Postgres is unavailable - sleeping" 8 | sleep 1 9 | done 10 | 11 | >&2 echo "Postgres is up - executing command" 12 | exec "$@" --------------------------------------------------------------------------------