├── .dockerignore ├── stack ├── __init__.py ├── config.py └── app.py ├── tests ├── __init__.py ├── routes │ ├── __init__.py │ └── v1 │ │ ├── __init__.py │ │ ├── test_sites.py │ │ ├── test_ogc.py │ │ ├── test_metadata.py │ │ ├── test_tiles.py │ │ └── test_datasets.py ├── fixtures │ └── cog.tif ├── test_main.py └── conftest.py ├── covid_api ├── db │ ├── __init__.py │ ├── static │ │ ├── __init__.py │ │ ├── errors.py │ │ ├── groups │ │ │ ├── wq.json │ │ │ ├── aq.json │ │ │ ├── ec.json │ │ │ └── __init__.py │ │ ├── sites │ │ │ ├── be.json │ │ │ ├── tk.json │ │ │ ├── togo.json │ │ │ ├── ny.json │ │ │ ├── sc.json │ │ │ ├── sf.json │ │ │ ├── la.json │ │ │ ├── du.json │ │ │ ├── gh.json │ │ │ ├── gl.json │ │ │ └── __init__.py │ │ └── datasets │ │ │ ├── gibs-population.json │ │ │ ├── slowdown.json │ │ │ ├── detections-contrail.json │ │ │ ├── detections-ship.json │ │ │ ├── recovery.json │ │ │ ├── togo-ag.json │ │ │ ├── detections-plane.json │ │ │ ├── detections-vehicles.json │ │ │ ├── fb-population-density.json │ │ │ ├── nightlights-viirs.json │ │ │ ├── water-spm.json │ │ │ ├── nightlights-hd.json │ │ │ ├── co2-diff.json │ │ │ ├── no2-diff.json │ │ │ ├── water-chlorophyll.json │ │ │ ├── water-pzd.json │ │ │ ├── agriculture.json │ │ │ ├── no2.json │ │ │ ├── co2.json │ │ │ └── __init__.py │ ├── memcache.py │ └── utils.py ├── api │ ├── __init__.py │ └── api_v1 │ │ ├── __init__.py │ │ ├── endpoints │ │ ├── __init__.py │ │ ├── operations.py │ │ ├── sites.py │ │ ├── groups.py │ │ ├── detections.py │ │ ├── modis.py │ │ ├── planet.py │ │ ├── datasets.py │ │ ├── ogc.py │ │ ├── metadata.py │ │ ├── tiles.py │ │ └── timelapse.py │ │ └── api.py ├── core │ ├── __init__.py │ └── config.py ├── models │ ├── __init__.py │ ├── mapbox.py │ ├── timelapse.py │ └── static.py ├── templates │ ├── __init__.py │ └── wmts.xml ├── ressources │ ├── __init__.py │ ├── enums.py │ ├── common.py │ └── responses.py ├── errors.py ├── __init__.py └── main.py ├── cdk.json ├── .DS_Store ├── MANIFEST.in ├── lambda ├── dataset_metadata_generator │ ├── __init__.py │ ├── src │ │ ├── __init__.py │ │ └── main.py │ └── tests │ │ ├── conftest.py │ │ ├── test_metadata_generator.py │ │ └── __init__.py └── handler.py ├── guidelines ├── images │ ├── chart-bands.png │ ├── chart-confidence.png │ ├── chart-indicator.png │ ├── chart-interactive.gif │ └── bar-chart-interactive.gif ├── blackmarble-nightlights-data │ ├── bmhd-ancillary-data-location.md │ └── COVIDDashboardProductReadme.docx ├── README.md ├── vector.md ├── data-usage.md ├── raster.md ├── indicators.md └── api-usage.md ├── .env.example ├── localstack └── setup.sh ├── Dockerfiles ├── ecs │ └── Dockerfile └── lambda │ └── Dockerfile ├── tox.ini ├── LICENSE ├── .pre-commit-config.yaml ├── README.md ├── .gitignore ├── setup.py ├── docker-compose.yml ├── .circleci └── config.yml └── ship-to-api.py /.dockerignore: -------------------------------------------------------------------------------- 1 | cdk.out/* 2 | *.tif 3 | -------------------------------------------------------------------------------- /stack/__init__.py: -------------------------------------------------------------------------------- 1 | """AWS App.""" 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api.""" 2 | -------------------------------------------------------------------------------- /covid_api/db/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api.db""" 2 | -------------------------------------------------------------------------------- /tests/routes/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api.""" 2 | -------------------------------------------------------------------------------- /tests/routes/v1/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api.""" 2 | -------------------------------------------------------------------------------- /cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "python3 stack/app.py" 3 | } -------------------------------------------------------------------------------- /covid_api/api/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api.api""" 2 | -------------------------------------------------------------------------------- /covid_api/core/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api.core""" 2 | -------------------------------------------------------------------------------- /covid_api/models/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api.models""" 2 | -------------------------------------------------------------------------------- /covid_api/api/api_v1/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api.api.api_v1""" 2 | -------------------------------------------------------------------------------- /covid_api/templates/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api.templates.""" 2 | -------------------------------------------------------------------------------- /covid_api/ressources/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api.ressources.""" 2 | -------------------------------------------------------------------------------- /covid_api/db/static/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api static api responses""" 2 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/covid-api/HEAD/.DS_Store -------------------------------------------------------------------------------- /covid_api/api/api_v1/endpoints/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api.api.api_v1.endpoints""" 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include covid_api/templates/*.html 2 | include covid_api/templates/*.xml 3 | -------------------------------------------------------------------------------- /lambda/dataset_metadata_generator/__init__.py: -------------------------------------------------------------------------------- 1 | """ Dataset metadata generator module.""" 2 | -------------------------------------------------------------------------------- /lambda/dataset_metadata_generator/src/__init__.py: -------------------------------------------------------------------------------- 1 | """ Dataset metadata generator module.""" 2 | -------------------------------------------------------------------------------- /tests/fixtures/cog.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/covid-api/HEAD/tests/fixtures/cog.tif -------------------------------------------------------------------------------- /guidelines/images/chart-bands.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/covid-api/HEAD/guidelines/images/chart-bands.png -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | AWS_ACCESS_KEY_ID=myawsaccesskeyid 2 | AWS_SECRET_ACCESS_KEY=myawssecretaccesskey 3 | PLANET_API_KEY=myplanetapikey -------------------------------------------------------------------------------- /covid_api/errors.py: -------------------------------------------------------------------------------- 1 | """covid_api error classes.""" 2 | 3 | 4 | class TilerError(Exception): 5 | """Base exception class.""" 6 | -------------------------------------------------------------------------------- /guidelines/images/chart-confidence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/covid-api/HEAD/guidelines/images/chart-confidence.png -------------------------------------------------------------------------------- /guidelines/images/chart-indicator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/covid-api/HEAD/guidelines/images/chart-indicator.png -------------------------------------------------------------------------------- /guidelines/images/chart-interactive.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/covid-api/HEAD/guidelines/images/chart-interactive.gif -------------------------------------------------------------------------------- /covid_api/__init__.py: -------------------------------------------------------------------------------- 1 | """covid_api.""" 2 | 3 | import pkg_resources 4 | 5 | version = pkg_resources.get_distribution(__package__).version 6 | -------------------------------------------------------------------------------- /guidelines/images/bar-chart-interactive.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/covid-api/HEAD/guidelines/images/bar-chart-interactive.gif -------------------------------------------------------------------------------- /covid_api/api/api_v1/endpoints/operations.py: -------------------------------------------------------------------------------- 1 | """API operations.""" 2 | 3 | # /point 4 | # /static/{lon},{lat},{zoom}/{width}x{height}.{format}? 5 | -------------------------------------------------------------------------------- /covid_api/db/static/errors.py: -------------------------------------------------------------------------------- 1 | """static db errors""" 2 | 3 | 4 | class InvalidIdentifier(Exception): 5 | """Raise if no key is found""" 6 | -------------------------------------------------------------------------------- /covid_api/db/static/groups/wq.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "wq", 3 | "label": "Water Quality", 4 | "prose": null, 5 | "indicators": ["wq"] 6 | } -------------------------------------------------------------------------------- /covid_api/db/static/groups/aq.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "aq", 3 | "label": "Air Quality", 4 | "prose": null, 5 | "indicators": ["no2-15day", "xco2"] 6 | } 7 | -------------------------------------------------------------------------------- /covid_api/db/static/groups/ec.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "ec", 3 | "label": "Economy", 4 | "prose": null, 5 | "indicators": ["nightlights", "ship-detections"] 6 | } -------------------------------------------------------------------------------- /lambda/handler.py: -------------------------------------------------------------------------------- 1 | """AWS Lambda handler.""" 2 | 3 | from mangum import Mangum 4 | 5 | from covid_api.main import app 6 | 7 | handler = Mangum(app, enable_lifespan=False) 8 | -------------------------------------------------------------------------------- /guidelines/blackmarble-nightlights-data/bmhd-ancillary-data-location.md: -------------------------------------------------------------------------------- 1 | The ancillary data refered to in `COVIDDashboardProductReadme.docx` can be found at `s3://covid-eo-data/bmhd_ancillary/` -------------------------------------------------------------------------------- /guidelines/blackmarble-nightlights-data/COVIDDashboardProductReadme.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NASA-IMPACT/covid-api/HEAD/guidelines/blackmarble-nightlights-data/COVIDDashboardProductReadme.docx -------------------------------------------------------------------------------- /covid_api/ressources/enums.py: -------------------------------------------------------------------------------- 1 | """covid_api Enums.""" 2 | 3 | from enum import Enum 4 | 5 | 6 | class ImageType(str, Enum): 7 | """Image Type Enums.""" 8 | 9 | png = "png" 10 | npy = "npy" 11 | tif = "tif" 12 | jpg = "jpg" 13 | webp = "webp" 14 | -------------------------------------------------------------------------------- /covid_api/ressources/common.py: -------------------------------------------------------------------------------- 1 | """Commons.""" 2 | 3 | 4 | extensions = dict(JPEG="jpg", PNG="png", GTiff="tif", WEBP="webp") 5 | 6 | drivers = dict(jpg="JPEG", png="PNG", tif="GTiff", webp="WEBP") 7 | 8 | mimetype = dict( 9 | png="image/png", 10 | npy="application/x-binary", 11 | tif="image/tiff", 12 | jpg="image/jpg", 13 | webp="image/webp", 14 | ) 15 | -------------------------------------------------------------------------------- /covid_api/db/static/sites/be.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "be", 3 | "label": "Beijing", 4 | "center": [ 116.38, 39.9 ], 5 | "polygon": { 6 | "type": "Polygon", 7 | "coordinates": [ [ 8 | [ 115.84, 39.62 ], 9 | [ 116.85, 39.62 ], 10 | [ 116.85, 40.22 ], 11 | [ 115.84, 40.22 ], 12 | [ 115.84, 39.62 ] 13 | ] ] 14 | }, 15 | "bounding_box": [ 115.84, 39.62, 116.85, 40.22 ] 16 | } 17 | -------------------------------------------------------------------------------- /covid_api/db/static/sites/tk.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "tk", 3 | "label": "Tokyo", 4 | "center": [ 139.78, 35.61 ], 5 | "polygon": { 6 | "type": "Polygon", 7 | "coordinates": [ [ 8 | [ 139.37, 35.33 ], 9 | [ 140.19, 35.33 ], 10 | [ 140.19, 35.85 ], 11 | [ 139.37, 35.85 ], 12 | [ 139.37, 35.33 ] 13 | ] ] 14 | }, 15 | "bounding_box": [ 139.37, 35.33, 140.19, 35.85 ] 16 | } 17 | -------------------------------------------------------------------------------- /covid_api/db/static/sites/togo.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "togo", 3 | "label": "Togo", 4 | "center": [ 0.95, 8.7 ], 5 | "polygon": { 6 | "type": "Polygon", 7 | "coordinates": [ [ 8 | [ -0.049784, 11.018681 ], 9 | [ -0.049784, 5.928837 ], 10 | [ 1.865240, 5.928837 ], 11 | [ 1.865240, 11.018681 ], 12 | [ -0.049784, 11.018681 ] 13 | ] ] 14 | }, 15 | "bounding_box": [ -0.049784, 5.928837, 1.865240, 11.018681 ] 16 | } 17 | -------------------------------------------------------------------------------- /covid_api/db/static/sites/ny.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "ny", 3 | "label": "New York", 4 | "center": [ -73.09, 41.0114 ], 5 | "polygon": { 6 | "type": "Polygon", 7 | "coordinates": [ [ 8 | [ -71.74516, 41.54467 ], 9 | [ -74.43395, 41.54943 ], 10 | [ -74.43219, 40.47812 ], 11 | [ -71.74516, 40.48343 ], 12 | [ -71.74516, 41.54467 ] 13 | ] ] 14 | }, 15 | "bounding_box": [ -74.43395, 40.47812, -71.74516, 41.54467 ] 16 | } 17 | -------------------------------------------------------------------------------- /covid_api/db/static/sites/sc.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "sc", 3 | "label": "Suez Canal", 4 | "center": [32.56,30.05], 5 | "polygon": { 6 | "type": "Polygon", 7 | "coordinates": [ 8 | [ [ 32.51197, 29.89066 ], 9 | [ 32.62664, 29.89066 ], 10 | [ 32.62664, 30.22347 ], 11 | [ 32.51197, 30.22347 ], 12 | [ 32.51197, 29.89066 ] ] 13 | ] 14 | }, 15 | "bounding_box": [ 16 | 32.62664, 17 | 30.22347, 18 | 32.51197, 19 | 29.89066 20 | ] 21 | } -------------------------------------------------------------------------------- /covid_api/db/static/sites/sf.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "sf", 3 | "label": "San Francisco", 4 | "center": [ -122.416389, 37.7775 ], 5 | "polygon": { 6 | "type": "Polygon", 7 | "coordinates": [ [ 8 | [ -122.63570045, 38.31172386 ], 9 | [ -122.53518996, 37.11988178 ], 10 | [ -121.53519174, 37.17901736 ], 11 | [ -121.64821141, 38.35512939 ], 12 | [ -122.63570045, 38.31172386 ] 13 | ] ] 14 | }, 15 | "bounding_box": [ -122.63570045, 37.11988178, -121.53518996, 38.35512939 ] 16 | } 17 | -------------------------------------------------------------------------------- /localstack/setup.sh: -------------------------------------------------------------------------------- 1 | 2 | if awslocal s3 ls s3://"${DATA_BUCKET_NAME}"/"${DATASET_METADATA_FILENAME}" 3 | then 4 | echo "Dataset metadata file found in local S3 bucket. To force re-generation: 5 | run `docker-compose down --volumes` to clear the S3 bucket, and start 6 | the api again" 7 | else 8 | awslocal s3 mb s3://"${DATA_BUCKET_NAME}" 9 | echo "Dataset metadata file not found in local S3 bucket. Generating..." 10 | python3 /docker-entrypoint-initaws.d/main.py 11 | echo "Done!" 12 | fi -------------------------------------------------------------------------------- /covid_api/db/static/sites/la.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "la", 3 | "label": "Los Angeles", 4 | "center": [ -118.25, 34.05 ], 5 | "polygon": { 6 | "type": "Polygon", 7 | "coordinates": [ [ 8 | [ -117.66703694, 33.42673544 ], 9 | [ -117.07333302, 34.14299552 ], 10 | [ -117.80010186, 34.30197535 ], 11 | [ -118.67592739, 34.34392384 ], 12 | [ -118.68741566, 33.73867555 ], 13 | [ -117.66703694, 33.42673544 ] 14 | ] ] 15 | }, 16 | "bounding_box": [ -118.67592739, 33.42673544, -117.07333302, 34.34392384 ] 17 | } 18 | -------------------------------------------------------------------------------- /Dockerfiles/ecs/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tiangolo/uvicorn-gunicorn:python3.8 2 | # Ref https://github.com/tiangolo/uvicorn-gunicorn-fastapi-docker/issues/15 3 | # Cuts image size by 50% 4 | # FROM tiangolo/uvicorn-gunicorn-fastapi:python3.7 5 | 6 | ENV CURL_CA_BUNDLE /etc/ssl/certs/ca-certificates.crt 7 | RUN sed -i 's/DEFAULT@SECLEVEL=2/DEFAULT@SECLEVEL=1/' /etc/ssl/openssl.cnf 8 | 9 | COPY README.md /app/README.md 10 | COPY covid_api/ /app/covid_api/ 11 | COPY setup.py /app/setup.py 12 | 13 | RUN pip install -e /app/. boto3 --no-cache-dir 14 | 15 | CMD ["/start-reload.sh"] 16 | -------------------------------------------------------------------------------- /covid_api/db/static/sites/du.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "du", 3 | "label": "Port of Dunkirk", 4 | "center": [ 2.250141, 51.02986 ], 5 | "polygon": { 6 | "type": "Polygon", 7 | "coordinates": [ [ 8 | [ 2.08355962, 51.03423481 ], 9 | [ 2.14826632, 50.96553938 ], 10 | [ 2.41646888, 51.02097784 ], 11 | [ 2.38289168, 51.07488218 ], 12 | [ 2.32298564, 51.08773119 ], 13 | [ 2.15844656, 51.05891125 ], 14 | [ 2.08355962, 51.03423481 ] 15 | ] ] 16 | }, 17 | "bounding_box": [ 2.008355962, 50.96553938, 2.41646888, 51.08773119 ] 18 | } 19 | -------------------------------------------------------------------------------- /covid_api/db/static/sites/gh.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "gh", 3 | "label": "Port of Ghent", 4 | "center": [ 3.77245, 51.15934 ], 5 | "polygon": { 6 | "type": "Polygon", 7 | "coordinates": [ [ 8 | [ 3.64539683, 51.09482029 ], 9 | [ 3.66610478, 51.07164212 ], 10 | [ 3.74584324, 51.06663625 ], 11 | [ 3.79612713, 51.11582801 ], 12 | [ 3.84588693, 51.17990464 ], 13 | [ 3.82746305, 51.21762622 ], 14 | [ 3.85833337, 51.28454634 ], 15 | [ 3.81774134, 51.28873095 ], 16 | [ 3.72217390, 51.12261565 ], 17 | [ 3.64539683, 51.09482029 ] 18 | ] ] 19 | }, 20 | "bounding_box": [ 3.64539683, 51.06663625, 3.858333337, 51.28873095 ] 21 | } 22 | -------------------------------------------------------------------------------- /covid_api/api/api_v1/endpoints/sites.py: -------------------------------------------------------------------------------- 1 | """sites endpoint.""" 2 | 3 | from covid_api.db.static.sites import SiteNames, sites 4 | from covid_api.models.static import Site, Sites 5 | 6 | from fastapi import APIRouter 7 | 8 | router = APIRouter() 9 | 10 | 11 | @router.get( 12 | "/sites", 13 | responses={200: dict(description="return a list of all available sites")}, 14 | response_model=Sites, 15 | ) 16 | def get_sites(): 17 | """Return list of sites.""" 18 | return sites.get_all() 19 | 20 | 21 | @router.get( 22 | "/sites/{id}", 23 | responses={200: dict(description="return a site")}, 24 | response_model=Site, 25 | ) 26 | def get_site(id: SiteNames): 27 | """Return site info.""" 28 | return sites.get(id.value) 29 | -------------------------------------------------------------------------------- /covid_api/db/static/sites/gl.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "gl", 3 | "label": "Great Lakes", 4 | "center": [ 5 | -83.05755, 6 | 43.22715 7 | ], 8 | "polygon": { 9 | "type": "Polygon", 10 | "coordinates": [ 11 | [ 12 | [ 13 | -84.3695, 14 | 41.2530 15 | ], 16 | [ 17 | -81.7492, 18 | 41.2530 19 | ], 20 | [ 21 | -81.7492, 22 | 45.2013 23 | ], 24 | [ 25 | -84.3695, 26 | 45.2013 27 | ], 28 | [ 29 | -84.3695, 30 | 41.2530 31 | ] 32 | ] 33 | ] 34 | }, 35 | "bounding_box": [ 36 | -84.3695, 37 | 45.2013, 38 | -81.7492, 39 | 41.2530 40 | ] 41 | } -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | """Test covid_api.main.app.""" 2 | 3 | 4 | def test_health(app): 5 | """Test /ping endpoint.""" 6 | response = app.get("/ping") 7 | assert response.status_code == 200 8 | assert response.json() == {"ping": "pong!"} 9 | 10 | 11 | def test_index(app): 12 | """Test /ping endpoint.""" 13 | response = app.get("/") 14 | assert response.status_code == 200 15 | assert response.headers["content-type"] == "text/html; charset=utf-8" 16 | assert response.headers["content-encoding"] == "gzip" 17 | 18 | response = app.get("/index.html") 19 | assert response.status_code == 200 20 | assert response.headers["content-type"] == "text/html; charset=utf-8" 21 | assert response.headers["content-encoding"] == "gzip" 22 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py37 3 | 4 | 5 | [testenv] 6 | extras = test 7 | commands= 8 | python -m pytest --cov covid_api --cov-report term-missing --ignore=venv 9 | deps= 10 | numpy 11 | 12 | 13 | [testenv:black] 14 | basepython = python3 15 | skip_install = true 16 | deps = 17 | black 18 | commands = 19 | black 20 | 21 | 22 | # Lint 23 | [flake8] 24 | ignore = D203 25 | exclude = .git,__pycache__,docs/source/conf.py,old,build,dist 26 | max-complexity = 14 27 | max-line-length = 90 28 | 29 | [mypy] 30 | no_strict_optional = True 31 | ignore_missing_imports = True 32 | 33 | [tool:isort] 34 | profile=black 35 | known_first_party = covid_api 36 | forced_separate = fastapi,starlette 37 | known_third_party = rasterio,morecantile,rio_tiler 38 | default_section = THIRDPARTY 39 | -------------------------------------------------------------------------------- /covid_api/api/api_v1/endpoints/groups.py: -------------------------------------------------------------------------------- 1 | """Groups endpoint.""" 2 | 3 | from covid_api.db.static.groups import groups 4 | from covid_api.models.static import IndicatorGroup, IndicatorGroups 5 | 6 | from fastapi import APIRouter 7 | 8 | router = APIRouter() 9 | 10 | 11 | @router.get( 12 | "/indicator_groups", 13 | responses={ 14 | 200: dict(description="return a list of all available indicator groups") 15 | }, 16 | response_model=IndicatorGroups, 17 | ) 18 | def get_groups(): 19 | """Return group list.""" 20 | return groups.get_all() 21 | 22 | 23 | @router.get( 24 | "/indicator_groups/{id}", 25 | responses={200: dict(description="return a group")}, 26 | response_model=IndicatorGroup, 27 | ) 28 | def get_group(id: str): 29 | """Return group info.""" 30 | return groups.get(id) 31 | -------------------------------------------------------------------------------- /covid_api/models/mapbox.py: -------------------------------------------------------------------------------- 1 | """Common response models.""" 2 | 3 | from typing import List, Optional, Tuple 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | 8 | class TileJSON(BaseModel): 9 | """ 10 | TileJSON model. 11 | 12 | Based on https://github.com/mapbox/tilejson-spec/tree/master/2.2.0 13 | 14 | """ 15 | 16 | tilejson: str = "2.2.0" 17 | name: Optional[str] 18 | description: Optional[str] 19 | version: str = "1.0.0" 20 | attribution: Optional[str] 21 | template: Optional[str] 22 | legend: Optional[str] 23 | scheme: str = "xyz" 24 | tiles: List[str] 25 | grids: Optional[List[str]] 26 | data: Optional[List[str]] 27 | minzoom: int = Field(0, ge=0, le=30) 28 | maxzoom: int = Field(30, ge=0, le=30) 29 | bounds: List[float] = [-180, -90, 180, 90] 30 | center: Tuple[float, float, int] 31 | -------------------------------------------------------------------------------- /tests/routes/v1/test_sites.py: -------------------------------------------------------------------------------- 1 | """Test /v1/sites endpoints""" 2 | 3 | import boto3 4 | from moto import mock_s3 5 | 6 | from covid_api.core.config import INDICATOR_BUCKET 7 | 8 | 9 | @mock_s3 10 | def _setup_s3(): 11 | s3 = boto3.resource("s3") 12 | bucket = s3.Bucket(INDICATOR_BUCKET) 13 | bucket.create() 14 | s3_keys = [ 15 | ("indicators/test/super.csv", b"test"), 16 | ] 17 | for key, content in s3_keys: 18 | bucket.put_object(Body=content, Key=key) 19 | return bucket 20 | 21 | 22 | @mock_s3 23 | def test_sites(app): 24 | _setup_s3() 25 | """test /sites endpoint""" 26 | 27 | response = app.get("/v1/sites") 28 | assert response.status_code == 200 29 | 30 | 31 | @mock_s3 32 | def test_site_id(app): 33 | _setup_s3() 34 | """test /sites/{id} endpoint""" 35 | 36 | response = app.get("/v1/sites/be") 37 | assert response.status_code == 200 38 | -------------------------------------------------------------------------------- /covid_api/ressources/responses.py: -------------------------------------------------------------------------------- 1 | """Common response models.""" 2 | 3 | from starlette.background import BackgroundTask 4 | from starlette.responses import Response 5 | 6 | 7 | class XMLResponse(Response): 8 | """XML Response""" 9 | 10 | media_type = "application/xml" 11 | 12 | 13 | class TileResponse(Response): 14 | """Tiler's response.""" 15 | 16 | def __init__( 17 | self, 18 | content: bytes, 19 | media_type: str, 20 | status_code: int = 200, 21 | headers: dict = {}, 22 | background: BackgroundTask = None, 23 | ttl: int = 3600, 24 | ) -> None: 25 | """Init tiler response.""" 26 | headers.update({"Content-Type": media_type}) 27 | if ttl: 28 | headers.update({"Cache-Control": "max-age=3600"}) 29 | self.body = self.render(content) 30 | self.status_code = 200 31 | self.media_type = media_type 32 | self.background = background 33 | self.init_headers(headers) 34 | -------------------------------------------------------------------------------- /guidelines/README.md: -------------------------------------------------------------------------------- 1 | # Guidelines for Data Contributors 2 | The API serves two types of data: 3 | 4 | - raster data in Cloud Optimized GeotTIFF (COG) format. This is primarily used to show layers on a map. 5 | - time-series indicator data by geographic area. This is mostly used to show evolution over time in graphs. 6 | 7 | See the [api usage](./api-usage.md) docyment for an overview on how to explore datasets and directly access source data through the API. 8 | 9 | See the [data usage](./data-usage.md) document for an overview of how the data may be displayed. 10 | 11 | These guidelines are written for contributors of data, and contain information on the input formats and delivery mechanisms: 12 | 13 | * [contributing raster data](raster.md) 14 | * [contributing indicator data](indicators.md) 15 | 16 | ## Relevant links 17 | 18 | * [API documentation](https://8ib71h0627.execute-api.us-east-1.amazonaws.com/docs) 19 | * [Overview](https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/sites) of Spotlight Areas 20 | -------------------------------------------------------------------------------- /covid_api/api/api_v1/api.py: -------------------------------------------------------------------------------- 1 | """covid_api api.""" 2 | 3 | from covid_api.api.api_v1.endpoints import detections, datasets # isort:skip 4 | from covid_api.api.api_v1.endpoints import ( 5 | groups, 6 | metadata, 7 | modis, 8 | ogc, 9 | planet, 10 | sites, 11 | tiles, 12 | timelapse, 13 | ) 14 | 15 | from fastapi import APIRouter 16 | 17 | api_router = APIRouter() 18 | api_router.include_router(tiles.router, tags=["tiles"]) 19 | api_router.include_router(metadata.router, tags=["metadata"]) 20 | api_router.include_router(ogc.router, tags=["OGC"]) 21 | api_router.include_router(timelapse.router, tags=["timelapse"]) 22 | api_router.include_router(datasets.router, tags=["datasets"]) 23 | api_router.include_router(sites.router, tags=["sites"]) 24 | api_router.include_router(groups.router, tags=["indicator groups"]) 25 | api_router.include_router(detections.router, tags=["detections"]) 26 | api_router.include_router(planet.router, tags=["planet"]) 27 | api_router.include_router(modis.router, tags=["modis"]) 28 | -------------------------------------------------------------------------------- /covid_api/db/static/datasets/gibs-population.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "gibs-population", 3 | "name": "Population", 4 | "type": "raster", 5 | "time_unit": "day", 6 | "source": { 7 | "type": "raster", 8 | "tiles": [ 9 | "https://gibs.earthdata.nasa.gov/wmts/epsg3857/best/GPW_Population_Density_2020/default/2020-05-14T00:00:00Z/GoogleMapsCompatible_Level7/{z}/{y}/{x}.png" 10 | ] 11 | }, 12 | "exclusive_with": [ 13 | "agriculture", 14 | "no2", 15 | "no2-diff", 16 | "co2-diff", 17 | "co2", 18 | "car-count", 19 | "nightlights-viirs", 20 | "nightlights-hd", 21 | "detection-ship", 22 | "detection-multi", 23 | "water-chlorophyll", 24 | "water-spm", 25 | "water-pzd" 26 | ], 27 | "swatch": { 28 | "color": "#C0C0C0", 29 | "name": "Grey" 30 | }, 31 | "legend": { 32 | "type": "gradient", 33 | "min": "less", 34 | "max": "more", 35 | "stops": [ 36 | "#FFEFCB", 37 | "#FBA54A", 38 | "#FB9F46", 39 | "#F35228", 40 | "#BD0026" 41 | ] 42 | } 43 | } -------------------------------------------------------------------------------- /tests/routes/v1/test_ogc.py: -------------------------------------------------------------------------------- 1 | """test /v1/metadata endpoints.""" 2 | 3 | # from typing import Dict 4 | 5 | from mock import patch 6 | 7 | from ...conftest import mock_rio 8 | 9 | 10 | @patch("covid_api.api.api_v1.endpoints.ogc.rasterio") 11 | def test_wmts(rio, app): 12 | """test wmts endpoints.""" 13 | rio.open = mock_rio 14 | 15 | response = app.get("/v1/WMTSCapabilities.xml?url=https://myurl.com/cog.tif") 16 | assert response.status_code == 200 17 | assert response.headers["content-type"] == "application/xml" 18 | assert ( 19 | "http://testserver/v1/{TileMatrix}/{TileCol}/{TileRow}@1x.png?url=https" 20 | in response.content.decode() 21 | ) 22 | 23 | response = app.get( 24 | "/v1/WMTSCapabilities.xml?url=https://myurl.com/cog.tif&tile_scale=2&tile_format=jpg" 25 | ) 26 | assert response.status_code == 200 27 | assert response.headers["content-type"] == "application/xml" 28 | assert ( 29 | "http://testserver/v1/{TileMatrix}/{TileCol}/{TileRow}@2x.jpg?url=https" 30 | in response.content.decode() 31 | ) 32 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """``pytest`` configuration.""" 2 | 3 | import os 4 | 5 | import pytest 6 | import rasterio 7 | from rasterio.io import DatasetReader 8 | 9 | from starlette.testclient import TestClient 10 | 11 | 12 | @pytest.fixture(autouse=True) 13 | def aws_credentials(monkeypatch): 14 | monkeypatch.setenv("DISABLE_CACHE", "YESPLEASE") 15 | monkeypatch.setenv("AWS_ACCESS_KEY_ID", "jqt") 16 | monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "rde") 17 | 18 | 19 | @pytest.fixture 20 | def app() -> TestClient: 21 | """Make sure we use monkeypatch env.""" 22 | 23 | from covid_api.main import app 24 | 25 | return TestClient(app) 26 | 27 | 28 | def mock_rio(src_path: str) -> DatasetReader: 29 | """Mock rasterio.open.""" 30 | prefix = os.path.join(os.path.dirname(__file__), "fixtures") 31 | assert src_path.startswith("https://myurl.com/") 32 | return rasterio.open(os.path.join(prefix, "cog.tif")) 33 | 34 | 35 | @pytest.fixture 36 | def dataset_manager(monkeypatch): 37 | 38 | from covid_api.db.static.datasets import DatasetManager 39 | 40 | return DatasetManager 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Development Seed 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Dockerfiles/lambda/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM lambci/lambda:build-python3.7 2 | 3 | 4 | # We install covid_api and mangum 5 | WORKDIR /app 6 | 7 | COPY README.md /app/README.md 8 | COPY covid_api/ /app/covid_api/ 9 | COPY setup.py /app/setup.py 10 | 11 | RUN pip install --upgrade pip 12 | RUN pip install . "mangum==0.9.2" -t /var/task --no-binary numpy, pydantic 13 | 14 | # Reduce package size and remove useless files 15 | RUN cd /var/task && find . -type f -name '*.pyc' | while read f; do n=$(echo $f | sed 's/__pycache__\///' | sed 's/.cpython-[2-3][0-9]//'); cp $f $n; done; 16 | RUN cd /var/task && find . -type d -a -name '__pycache__' -print0 | xargs -0 rm -rf 17 | RUN cd /var/task && find . -type f -a -name '*.py' -print0 | xargs -0 rm -f 18 | RUN find /var/task -type d -a -name 'tests' -print0 | xargs -0 rm -rf 19 | RUN echo "Remove lambda python packages" 20 | RUN rm -rdf /var/task/numpy/doc/ 21 | RUN rm -rdf /var/task/stack 22 | RUN rm -rdf /var/task/boto3* 23 | RUN rm -rdf /var/task/botocore* 24 | RUN rm -rdf /var/task/docutils* 25 | RUN rm -rdf /var/task/dateutil* 26 | RUN rm -rdf /var/task/jmespath* 27 | RUN rm -rdf /var/task/s3transfer* 28 | 29 | 30 | COPY lambda/handler.py /var/task/handler.py 31 | 32 | -------------------------------------------------------------------------------- /lambda/dataset_metadata_generator/tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test configuration class for the dataset metadata generator lambda's unit tests 3 | """ 4 | 5 | import pytest 6 | 7 | from . import DATASETS, SITES 8 | 9 | 10 | @pytest.fixture(autouse=True) 11 | def aws_credentials(monkeypatch): 12 | """Make sure we use monkeypatch env.""" 13 | monkeypatch.setenv("DISABLE_CACHE", "YESPLEASE") 14 | monkeypatch.setenv("AWS_ACCESS_KEY_ID", "jqt") 15 | monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "rde") 16 | monkeypatch.setenv("DATA_BUCKET_NAME", "covid-eo-data") 17 | monkeypatch.setenv("DATASET_METADATA_FILENAME", "dev-dataset-metadata.json") 18 | 19 | 20 | @pytest.fixture 21 | def gather_datasets_metadata(): 22 | """Yield the main function to unit test""" 23 | # Why is this imported here? 24 | # See: https://github.com/spulec/moto#what-about-those-pesky-imports 25 | 26 | from ..src.main import _gather_datasets_metadata 27 | 28 | return _gather_datasets_metadata 29 | 30 | 31 | @pytest.fixture 32 | def datasets(): 33 | """Dataset metadata items""" 34 | return DATASETS 35 | 36 | 37 | @pytest.fixture 38 | def sites(): 39 | """Site metadata items""" 40 | return SITES 41 | -------------------------------------------------------------------------------- /covid_api/core/config.py: -------------------------------------------------------------------------------- 1 | """Config.""" 2 | 3 | import os 4 | 5 | API_VERSION_STR = "/v1" 6 | 7 | PROJECT_NAME = "covid_api" 8 | 9 | SERVER_NAME = os.getenv("SERVER_NAME") 10 | SERVER_HOST = os.getenv("SERVER_HOST") 11 | BACKEND_CORS_ORIGINS = os.getenv( 12 | "BACKEND_CORS_ORIGINS", default="*" 13 | ) # a string of origins separated by commas, e.g: "http://localhost, http://localhost:4200, http://localhost:3000, http://localhost:8080, http://local.dockertoolbox.tiangolo.com" 14 | 15 | DISABLE_CACHE = os.getenv("DISABLE_CACHE") 16 | MEMCACHE_HOST = os.environ.get("MEMCACHE_HOST") 17 | MEMCACHE_PORT = int(os.environ.get("MEMCACHE_PORT", 11211)) 18 | MEMCACHE_USERNAME = os.environ.get("MEMCACHE_USERNAME") 19 | MEMCACHE_PASSWORD = os.environ.get("MEMCACHE_PASSWORD") 20 | 21 | INDICATOR_BUCKET = os.environ.get("INDICATOR_BUCKET", "covid-eo-data") 22 | 23 | DATASET_METADATA_FILENAME = os.environ.get( 24 | "DATASET_METADATA_FILENAME", "dev-dataset-metadata.json" 25 | ) 26 | 27 | DATASET_METADATA_GENERATOR_FUNCTION_NAME = os.environ.get( 28 | "DATASET_METADATA_GENERATOR_FUNCTION_NAME", "dev-dataset-metadata-generator" 29 | ) 30 | 31 | DT_FORMAT = "%Y-%m-%d" 32 | MT_FORMAT = "%Y%m" 33 | PLANET_API_KEY = os.environ.get("PLANET_API_KEY") 34 | 35 | TIMELAPSE_MAX_AREA = 200000 # km^2 36 | -------------------------------------------------------------------------------- /covid_api/db/static/datasets/slowdown.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "slowdown", 3 | "name": "Slowdown Proxy Maps", 4 | "type": "raster", 5 | "s3_location": "slowdown_proxy_map", 6 | "source": { 7 | "type": "raster", 8 | "tiles": [ 9 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/slowdown_proxy_map/{spotlightId}.tif&resampling_method=bilinear&bidx=1%2C2%2C3%24" 10 | ] 11 | }, 12 | "paint": { 13 | "raster-opacity": 0.9 14 | }, 15 | "exclusiveWith": [ 16 | "agriculture", 17 | "co2", 18 | "co2-diff", 19 | "no2", 20 | "no2-diff", 21 | "fb-population-density", 22 | "gibs-population", 23 | "nightlights-viirs", 24 | "nightlights-hd", 25 | "detection-ship", 26 | "detections-contrail", 27 | "detections-vehicles", 28 | "detections-plane", 29 | "detection-multi", 30 | "water-chlorophyll", 31 | "water-spm", 32 | "water-pzd" 33 | ], 34 | "enabled": true, 35 | "swatch": { 36 | "color": "#C0C0C0", 37 | "name": "Grey" 38 | }, 39 | "info": "Slowdown Proxy Maps show areas with the greatest reduction in car activity shaded in blue. Darker blues indicate areas of greater change." 40 | } -------------------------------------------------------------------------------- /covid_api/api/api_v1/endpoints/detections.py: -------------------------------------------------------------------------------- 1 | """ Machine Learning Detections. """ 2 | import json 3 | from enum import Enum 4 | 5 | from covid_api.core import config 6 | from covid_api.db.static.sites import SiteNames 7 | from covid_api.db.utils import s3_get 8 | from covid_api.models.static import Detection 9 | 10 | from fastapi import APIRouter, HTTPException 11 | 12 | router = APIRouter() 13 | 14 | # TODO: unhardcoded types and dates 15 | MLTypes = Enum("MLTypes", [(ml, ml) for ml in ["ship", "multiple", "plane", "vehicles", "contrail"]]) # type: ignore 16 | 17 | 18 | @router.get( 19 | "/detections/{ml_type}/{site}/{date}.geojson", 20 | responses={ 21 | 200: dict(description="return a detection geojson"), 22 | 404: dict(description="no detections found"), 23 | }, 24 | response_model=Detection, 25 | ) 26 | def get_detection(ml_type: MLTypes, site: SiteNames, date: str): 27 | """ Handle /detections requests.""" 28 | 29 | try: 30 | return json.loads( 31 | s3_get( 32 | bucket=config.INDICATOR_BUCKET, 33 | key=f"detections-{ml_type.value}/{site.value}/{date}.geojson", 34 | ) 35 | ) 36 | except Exception: 37 | raise HTTPException(status_code=404, detail="Detections not found") 38 | -------------------------------------------------------------------------------- /covid_api/db/static/datasets/detections-contrail.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "detections-contrail", 3 | "name": "Contrails", 4 | "type": "inference-timeseries", 5 | "s3_location": "detections-contrail", 6 | "is_periodic": false, 7 | "time_unit": "day", 8 | "source": { 9 | "type": "geojson", 10 | "tiles": [ 11 | "{api_url}/detections/contrail/{spotlightId}/{date}.geojson" 12 | ] 13 | }, 14 | "background_source": { 15 | "type": "raster", 16 | "tiles": [ 17 | "{api_url}/modis/{z}/{x}/{y}?date={date}" 18 | ] 19 | }, 20 | "exclusive_with": [ 21 | "agriculture", 22 | "no2", 23 | "co2-diff", 24 | "co2", 25 | "fb-population-density", 26 | "gibs-population", 27 | "car-count", 28 | "nightlights-viirs", 29 | "nightlights-hd", 30 | "detection-multi", 31 | "water-chlorophyll", 32 | "water-spm", 33 | "water-pzd", 34 | "detections-ship", 35 | "detections-plane", 36 | "detections-vehicles" 37 | ], 38 | "enabled": false, 39 | "swatch": { 40 | "color": "#C0C0C0", 41 | "name": "Grey" 42 | }, 43 | "info": "Contrails detected each day, displayed over MODIS Imagery" 44 | } -------------------------------------------------------------------------------- /covid_api/db/static/datasets/detections-ship.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "detections-ship", 3 | "name": "Shipping", 4 | "type": "inference-timeseries", 5 | "s3_location": "detections-ship", 6 | "is_periodic": false, 7 | "time_unit": "day", 8 | "source": { 9 | "type": "geojson", 10 | "tiles": [ 11 | "{api_url}/detections/ship/{spotlightId}/{date}.geojson" 12 | ] 13 | }, 14 | "background_source": { 15 | "type": "raster", 16 | "tiles": [ 17 | "{api_url}/planet/{z}/{x}/{y}?date={date}&site={spotlightId}" 18 | ] 19 | }, 20 | "exclusive_with": [ 21 | "agriculture", 22 | "no2", 23 | "co2-diff", 24 | "co2", 25 | "fb-population-density", 26 | "gibs-population", 27 | "car-count", 28 | "nightlights-viirs", 29 | "nightlights-hd", 30 | "detection-multi", 31 | "water-chlorophyll", 32 | "water-spm", 33 | "water-pzd", 34 | "detections-plane", 35 | "detections-contrail", 36 | "detections-vehicles" 37 | ], 38 | "enabled": false, 39 | "swatch": { 40 | "color": "#C0C0C0", 41 | "name": "Grey" 42 | }, 43 | "info": "Ships detected each day in PlanetScope imagery are shown in orange." 44 | } -------------------------------------------------------------------------------- /covid_api/db/static/datasets/recovery.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "recovery", 3 | "name": "Recovery Proxy Map", 4 | "description": "Recovery Proxy Maps show areas with the greatest increase in car activity shaded in orange. Darker orange indicates areas of greater change.", 5 | "type": "raster", 6 | "s3_location": "rpm", 7 | "source": { 8 | "type": "raster", 9 | "tiles": [ 10 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/rpm/rpm-{spotlightId}.cog.tif&resampling_method=bilinear&bidx=1%2C2%2C3%24" 11 | ] 12 | }, 13 | "paint": { 14 | "raster-opacity": 0.9 15 | }, 16 | "exclusiveWith": [ 17 | "agriculture", 18 | "co2", 19 | "co2-diff", 20 | "gifb-population-density", 21 | "gibs-population", 22 | "car-count", 23 | "nightlights-viirs", 24 | "nightlights-hd", 25 | "detection-ship", 26 | "detection-multi", 27 | "water-chlorophyll", 28 | "water-spm", 29 | "water-pzd" 30 | ], 31 | "enabled": true, 32 | "swatch": { 33 | "color": "#C0C0C0", 34 | "name": "Grey" 35 | }, 36 | "info": "Recovery Proxy Maps show areas with the greatest increase in car activity shaded in orange. Darker orange indicates areas of greater change." 37 | } -------------------------------------------------------------------------------- /covid_api/db/static/datasets/togo-ag.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "togo-ag", 3 | "name": "Agriculture", 4 | "type": "raster", 5 | "description": "Dark purple colors indicate lower probability of cropland while lighter yellow colors indicate higher probability of cropland within each pixel.", 6 | "s3_location": "Togo", 7 | "source": { 8 | "type": "raster", 9 | "tiles": [ 10 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/Togo/togo_cropland_v7-1_cog_v2.tif&resampling_method=bilinear&bidx=1&rescale=0%2C1&color_map=inferno" 11 | ] 12 | }, 13 | "enabled": true, 14 | "exclusive_with": [], 15 | "swatch": { 16 | "color": "#C0C0C0", 17 | "name": "Grey" 18 | }, 19 | "legend": { 20 | "type": "gradient", 21 | "min": "low", 22 | "max": "high", 23 | "stops": [ 24 | "#000000", 25 | "#1a0b40", 26 | "#4b0c6b", 27 | "#791c6d", 28 | "#a42c60", 29 | "#cf4446", 30 | "#ed6825", 31 | "#fb9b06", 32 | "#f6d13c", 33 | "#fbfda2" 34 | ] 35 | }, 36 | "info": "Dark purple colors indicate lower probability of cropland while lighter yellow colors indicate higher probability of cropland within each pixel." 37 | } -------------------------------------------------------------------------------- /covid_api/db/static/datasets/detections-plane.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "detections-plane", 3 | "name": "Airplanes", 4 | "type": "inference-timeseries", 5 | "s3_location": "detections-plane", 6 | "is_periodic": false, 7 | "time_unit": "day", 8 | "source": { 9 | "type": "geojson", 10 | "tiles": [ 11 | "{api_url}/detections/plane/{spotlightId}/{date}.geojson" 12 | ] 13 | }, 14 | "background_source": { 15 | "type": "raster", 16 | "tiles": [ 17 | "{api_url}/planet/{z}/{x}/{y}?date={date}&site={spotlightId}" 18 | ] 19 | }, 20 | "exclusive_with": [ 21 | "agriculture", 22 | "no2", 23 | "co2-diff", 24 | "co2", 25 | "fb-population-density", 26 | "gibs-population", 27 | "car-count", 28 | "nightlights-viirs", 29 | "nightlights-hd", 30 | "detection-multi", 31 | "water-chlorophyll", 32 | "water-spm", 33 | "water-pzd", 34 | "detections-ship", 35 | "detections-contrail", 36 | "detections-vehicles" 37 | ], 38 | "enabled": false, 39 | "swatch": { 40 | "color": "#C0C0C0", 41 | "name": "Grey" 42 | }, 43 | "info": "Grounded airplanes detected each day in PlanetScope imagery are shown in orange." 44 | } -------------------------------------------------------------------------------- /covid_api/db/static/groups/__init__.py: -------------------------------------------------------------------------------- 1 | """ covid_api indicator groups """ 2 | import os 3 | from typing import List 4 | 5 | from covid_api.db.static.errors import InvalidIdentifier 6 | from covid_api.models.static import IndicatorGroup, IndicatorGroups 7 | 8 | data_dir = os.path.join(os.path.dirname(__file__)) 9 | 10 | 11 | class GroupManager(object): 12 | """Default Group holder.""" 13 | 14 | def __init__(self): 15 | """Load all groups in a dict.""" 16 | groups = [ 17 | os.path.splitext(f)[0] for f in os.listdir(data_dir) if f.endswith(".json") 18 | ] 19 | 20 | self._data = { 21 | group: IndicatorGroup.parse_file(os.path.join(data_dir, f"{group}.json")) 22 | for group in groups 23 | } 24 | 25 | def get(self, identifier: str) -> IndicatorGroup: 26 | """Fetch a Group.""" 27 | try: 28 | return self._data[identifier] 29 | except KeyError: 30 | raise InvalidIdentifier(f"Invalid identifier: {identifier}") 31 | 32 | def get_all(self) -> IndicatorGroups: 33 | """Fetch all Groups.""" 34 | return IndicatorGroups(groups=[group.dict() for group in self._data.values()]) 35 | 36 | def list(self) -> List[str]: 37 | """List all groups""" 38 | return list(self._data.keys()) 39 | 40 | 41 | groups = GroupManager() 42 | -------------------------------------------------------------------------------- /covid_api/db/static/datasets/detections-vehicles.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "detections-vehicles", 3 | "name": "Vehicle", 4 | "type": "inference-timeseries", 5 | "s3_location": "detections-vehicles", 6 | "is_periodic": false, 7 | "time_unit": "day", 8 | "source": { 9 | "type": "geojson", 10 | "tiles": [ 11 | "{api_url}/detections/vehicles/{spotlightId}/{date}.geojson" 12 | ] 13 | }, 14 | "background_source": { 15 | "type": "raster", 16 | "tiles": [ 17 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/detections-vehicles/background/{spotlightId}/{date}.tif" 18 | ] 19 | }, 20 | "exclusive_with": [ 21 | "agriculture", 22 | "no2", 23 | "co2-diff", 24 | "co2", 25 | "fb-population-density", 26 | "gibs-population", 27 | "car-count", 28 | "nightlights-viirs", 29 | "nightlights-hd", 30 | "detection-multi", 31 | "water-chlorophyll", 32 | "water-spm", 33 | "water-pzd", 34 | "detections-ship", 35 | "detections-plane", 36 | "detections-contrail" 37 | ], 38 | "enabled": false, 39 | "swatch": { 40 | "color": "#C0C0C0", 41 | "name": "Grey" 42 | }, 43 | "info": "Vehicles detected each day in PlanetScope imagery are shown in orange." 44 | } -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 19.10b0 4 | hooks: 5 | - id: black 6 | language_version: python3 7 | args: ["--safe"] 8 | 9 | - repo: https://github.com/PyCQA/isort 10 | rev: 5.4.2 11 | hooks: 12 | - id: isort 13 | language_version: python3 14 | 15 | - repo: https://github.com/PyCQA/flake8 16 | rev: 3.8.3 17 | hooks: 18 | - id: flake8 19 | language_version: python3 20 | args: [ 21 | # E501 let black handle all line length decisions 22 | # W503 black conflicts with "line break before operator" rule 23 | # E203 black conflicts with "whitespace before ':'" rule 24 | "--ignore=E501,W503,E203", 25 | ] 26 | 27 | - repo: https://github.com/PyCQA/pydocstyle 28 | rev: 5.1.1 29 | hooks: 30 | - id: pydocstyle 31 | language_version: python3 32 | args: [ 33 | # Check for docstring presence only 34 | "--select=D1", 35 | # Don't require docstrings for tests 36 | '--match=(?!test).*\.py', 37 | ] 38 | 39 | - repo: https://github.com/pre-commit/mirrors-mypy 40 | rev: v0.770 41 | hooks: 42 | - id: mypy 43 | language_version: python3 44 | args: ["--no-strict-optional", "--ignore-missing-imports"] 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # covid-api 2 | 3 | A lightweight tile server for COVID data, based on [titiler](https://github.com/developmentseed/titiler). 4 | 5 | ## Contributing data 6 | More information for data contributors like expected input format and delivery mechanisms, can be found in the [data guidelines](guidelines/README.md). 7 | 8 | ## Local environment 9 | 10 | First, add your AWS and Planet credentials to a new file called `.env`. You can see an example of this file at `.env.example`. 11 | 12 | To run the API locally: 13 | 14 | ``` 15 | $ docker-compose up --build 16 | ``` 17 | 18 | The API should be running on `http://localhost:8000`. 19 | 20 | ## Contribution & Development 21 | 22 | Issues and pull requests are more than welcome. 23 | 24 | **dev install** 25 | 26 | ```bash 27 | $ git clone https://github.com/NASA-IMPACT/covid-api.git 28 | $ cd covid-api 29 | $ pip install -e .[dev] 30 | ``` 31 | 32 | This repo is set to use `pre-commit` to run *my-py*, *flake8*, *pydocstring* and *black* ("uncompromising Python code formatter") when commiting new code. 33 | 34 | ```bash 35 | $ pre-commit install 36 | ``` 37 | 38 | ``` 39 | $ git add . 40 | $ git commit -m'fix a really important thing' 41 | black....................................................................Passed 42 | Flake8...................................................................Passed 43 | Verifying PEP257 Compliance..............................................Passed 44 | mypy.....................................................................Passed 45 | [precommit cc12c5a] fix a really important thing 46 | ``` -------------------------------------------------------------------------------- /covid_api/db/static/datasets/fb-population-density.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "fb-population-density", 3 | "name": "Facebook Population Density", 4 | "type": "raster", 5 | "time_unit": "day", 6 | "source": { 7 | "type": "raster", 8 | "tiles": [ 9 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/dataforgood-fb-population-density/hrsl_general_latest_global_cog.tif&rescale=0.0%2C72.0&resampling_method=nearest&color_map=ylorrd" 10 | ] 11 | }, 12 | "paint": { 13 | "raster-opacity": 0.9 14 | }, 15 | "exclusive_with": [ 16 | "agriculture", 17 | "no2", 18 | "co2-diff", 19 | "gibs-population", 20 | "car-count", 21 | "nightlights-viirs", 22 | "nightlights-hd", 23 | "detection-multi", 24 | "water-chlorophyll", 25 | "water-spm", 26 | "water-pzd", 27 | "detections-ship", 28 | "detections-plane", 29 | "detections-vehicles" 30 | ], 31 | "enabled": true, 32 | "swatch": { 33 | "color": "#C0C0C0", 34 | "name": "Grey" 35 | }, 36 | "legend": { 37 | "type": "gradient", 38 | "min": "0 people/30m²", 39 | "max": "72 people/30m²", 40 | "stops": [ 41 | "#FFEFCB", 42 | "#FBA54A", 43 | "#FB9F46", 44 | "#F35228", 45 | "#BD0026" 46 | ] 47 | }, 48 | "info": "Facebook high-resolution population density: Darker areas indicate higher population density areas and lighter areas indicate lower population density areas, with a 30m² resolution" 49 | } -------------------------------------------------------------------------------- /covid_api/db/static/datasets/nightlights-viirs.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "nightlights-viirs", 3 | "name": "Nightlights VIIRS", 4 | "type": "raster-timeseries", 5 | "time_unit": "day", 6 | "s3_location": "bm_500m_daily", 7 | "is_periodic": true, 8 | "source": { 9 | "type": "raster", 10 | "tiles": [ 11 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/bm_500m_daily/VNP46A2_V011_{spotlightId}_{date}_cog.tif&resampling_method=nearest&bidx=1&rescale=0%2C100&color_map=viridis" 12 | ] 13 | }, 14 | "exclusive_with": [ 15 | "agriculture", 16 | "no2", 17 | "co2-diff", 18 | "co2", 19 | "fb-population-density", 20 | "gibs-population", 21 | "car-count", 22 | "nightlights-hd", 23 | "detection-multi", 24 | "water-chlorophyll", 25 | "water-spm", 26 | "water-pzd", 27 | "detections-ship", 28 | "detections-plane", 29 | "detections-contrail", 30 | "detections-vehicles" 31 | ], 32 | "swatch": { 33 | "color": "#C0C0C0", 34 | "name": "Grey" 35 | }, 36 | "legend": { 37 | "type": "gradient", 38 | "min": "less", 39 | "max": "more", 40 | "stops": [ 41 | "#440357", 42 | "#3b508a", 43 | "#208f8c", 44 | "#5fc961", 45 | "#fde725" 46 | ] 47 | }, 48 | "info": "Darker colors indicate fewer night lights and less activity. Lighter colors indicate more night lights and more activity. Check out the HD dataset to see a light-corrected version of this dataset." 49 | } -------------------------------------------------------------------------------- /covid_api/db/static/datasets/water-spm.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "water-spm", 3 | "name": "Turbidity", 4 | "type": "raster-timeseries", 5 | "time_unit": "day", 6 | "s3_location": "spm_anomaly", 7 | "is_periodic": false, 8 | "source": { 9 | "type": "raster", 10 | "tiles": [ 11 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/spm_anomaly/anomaly-spm-{spotlightId}-{date}.tif&resampling_method=bilinear&bidx=1&rescale=-100%2C100&color_map=rdbu_r" 12 | ] 13 | }, 14 | "exclusive_with": [ 15 | "agriculture", 16 | "no2", 17 | "co2-diff", 18 | "co2", 19 | "fb-population-density", 20 | "gibs-population", 21 | "car-count", 22 | "nightlights-viirs", 23 | "nightlights-hd", 24 | "detection-multi", 25 | "water-chlorophyll", 26 | "water-pzd", 27 | "detections-ship", 28 | "detections-plane", 29 | "detections-contrail", 30 | "detections-vehicles" 31 | ], 32 | "swatch": { 33 | "color": "#154F8D", 34 | "name": "Deep blue" 35 | }, 36 | "legend": { 37 | "type": "gradient", 38 | "min": "less", 39 | "max": "more", 40 | "stops": [ 41 | "#3A88BD", 42 | "#C9E0ED", 43 | "#E4EEF3", 44 | "#FDDCC9", 45 | "#DE725B", 46 | "#67001F" 47 | ] 48 | }, 49 | "info": "Turbidity refers to the amount of sediment or particles suspended in water. Redder colors indicate more sediment and murkier water. Bluer colors indicate less sediment and clearer water." 50 | } -------------------------------------------------------------------------------- /covid_api/db/static/datasets/nightlights-hd.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "nightlights-hd", 3 | "name": "Nightlights HD", 4 | "type": "raster-timeseries", 5 | "s3_location": "bmhd_30m_monthly", 6 | "is_periodic": true, 7 | "time_unit": "month", 8 | "source": { 9 | "type": "raster", 10 | "tiles": [ 11 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/bmhd_30m_monthly/BMHD_VNP46A2_{spotlightId}_{date}_cog.tif&bidx=1&rescale=0,255&color_map=inferno" 12 | ] 13 | }, 14 | "exclusive_with": [ 15 | "agriculture", 16 | "no2", 17 | "co2-diff", 18 | "co2", 19 | "fb-population-density", 20 | "gibs-population", 21 | "car-count", 22 | "nightlights-viirs", 23 | "detection-multi", 24 | "water-chlorophyll", 25 | "water-spm", 26 | "water-pzd", 27 | "detections-ship", 28 | "detections-plane", 29 | "detections-contrail", 30 | "detections-vehicles" 31 | ], 32 | "swatch": { 33 | "color": "#C0C0C0", 34 | "name": "Grey" 35 | }, 36 | "legend": { 37 | "type": "gradient", 38 | "min": "less", 39 | "max": "more", 40 | "stops": [ 41 | "#08041d", 42 | "#1f0a46", 43 | "#52076c", 44 | "#f57c16", 45 | "#f7cf39" 46 | ] 47 | }, 48 | "info": "The High Definition Nightlights dataset is processed to eliminate light sources, including moonlight reflectance and other interferences. Darker colors indicate fewer night lights and less activity. Lighter colors indicate more night lights and more activity." 49 | } -------------------------------------------------------------------------------- /covid_api/db/static/datasets/co2-diff.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "co2-diff", 3 | "name": "CO\u2082 (Diff)", 4 | "type": "raster-timeseries", 5 | "time_unit": "day", 6 | "is_periodic": true, 7 | "s3_location": "xco2-diff", 8 | "source": { 9 | "type": "raster", 10 | "tiles": [ 11 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/xco2-diff/xco2_16day_diff.{date}.tif&resampling_method=bilinear&bidx=1&rescale=-0.000001%2C0.000001&color_map=rdbu_r" 12 | ] 13 | }, 14 | "exclusive_with": [ 15 | "agriculture", 16 | "no2", 17 | "no2-diff", 18 | "co2", 19 | "fb-population-density", 20 | "gibs-population", 21 | "car-count", 22 | "nightlights-viirs", 23 | "nightlights-hd", 24 | "detection-multi", 25 | "water-chlorophyll", 26 | "water-spm", 27 | "water-pzd", 28 | "detections-ship", 29 | "detections-plane", 30 | "detections-contrail", 31 | "detections-vehicles" 32 | ], 33 | "enabled": false, 34 | "swatch": { 35 | "color": "#189C54", 36 | "name": "Dark Green" 37 | }, 38 | "legend": { 39 | "type": "gradient", 40 | "min": "< -1 ppm", 41 | "max": "> 1 ppm", 42 | "stops": [ 43 | "#3A88BD", 44 | "#C9E0ED", 45 | "#E4EEF3", 46 | "#FDDCC9", 47 | "#DD7059" 48 | ] 49 | }, 50 | "info": "This layer shows changes in carbon dioxide (CO₂) levels during coronavirus lockdowns versus previous years. Redder colors indicate increases in CO₂. Bluer colors indicate lower levels of CO₂." 51 | } -------------------------------------------------------------------------------- /covid_api/db/static/datasets/no2-diff.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "no2-diff", 3 | "name": "NO\u2082 (Diff)", 4 | "type": "raster-timeseries", 5 | "time_unit": "month", 6 | "is_periodic": false, 7 | "s3_location": "OMNO2d_HRMDifference", 8 | "source": { 9 | "type": "raster", 10 | "tiles": [ 11 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/OMNO2d_HRMDifference/OMI_trno2_0.10x0.10_{date}_Col3_V4.nc.tif&resampling_method=bilinear&bidx=1&rescale=-8000000000000000%2C8000000000000000&color_map=rdbu_r" 12 | ] 13 | }, 14 | "paint": { 15 | "raster-opacity": 0.9 16 | }, 17 | "exclusive_with": [ 18 | "co2", 19 | "co2-diff", 20 | "fb-population-density", 21 | "gibs-population", 22 | "car-count", 23 | "nightlights-viirs", 24 | "nightlights-hd", 25 | "detection-ship", 26 | "detection-multi", 27 | "water-chlorophyll", 28 | "water-spm", 29 | "water-pzd", 30 | "no2" 31 | ], 32 | "enabled": false, 33 | "swatch": { 34 | "color": "#f2a73a", 35 | "name": "Gold" 36 | }, 37 | "legend": { 38 | "type": "gradient", 39 | "min": "< -3", 40 | "max": "> 3", 41 | "stops": [ 42 | "#3A88BD", 43 | "#C9E0ED", 44 | "#E4EEF3", 45 | "#FDDCC9", 46 | "#DD7059" 47 | ] 48 | }, 49 | "info": "This layer shows changes in nitrogen dioxide (NO₂) levels. Redder colors indicate increases in NO₂. Bluer colors indicate lower levels of NO₂. Missing pixels indicate areas of no data most likely associated with cloud cover or snow." 50 | } -------------------------------------------------------------------------------- /covid_api/db/static/datasets/water-chlorophyll.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "water-chlorophyll", 3 | "name": "Chlorophyll-a Anomaly", 4 | "type": "raster-timeseries", 5 | "time_unit": "day", 6 | "is_periodic": false, 7 | "s3_location": "oc3_chla_anomaly", 8 | "source": { 9 | "type": "raster", 10 | "tiles": [ 11 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/oc3_chla_anomaly/anomaly-chl-{spotlightId}-{date}.tif&resampling_method=bilinear&bidx=1&rescale=-100%2C100&color_map=rdbu_r" 12 | ] 13 | }, 14 | "exclusive_with": [ 15 | "agriculture", 16 | "no2", 17 | "co2-diff", 18 | "co2", 19 | "fb-population-density", 20 | "gibs-population", 21 | "car-count", 22 | "nightlights-viirs", 23 | "nightlights-hd", 24 | "detection-multi", 25 | "water-spm", 26 | "water-pzd", 27 | "detections-ship", 28 | "detections-plane", 29 | "detections-contrail", 30 | "detections-vehicles" 31 | ], 32 | "swatch": { 33 | "color": "#154F8D", 34 | "name": "Deep blue" 35 | }, 36 | "legend": { 37 | "type": "gradient", 38 | "min": "less", 39 | "max": "more", 40 | "stops": [ 41 | "#3A88BD", 42 | "#C9E0ED", 43 | "#E4EEF3", 44 | "#FDDCC9", 45 | "#DE725B", 46 | "#67001F" 47 | ] 48 | }, 49 | "info": "Chlorophyll-a is an indicator of algae growth. Redder colors indicate increases in chlorophyll-a and worse water quality. Bluer colors indicate decreases in chlorophyll-a and improved water quality. White areas indicate no change." 50 | } -------------------------------------------------------------------------------- /covid_api/db/static/datasets/water-pzd.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "water-pzd", 3 | "name": "Photic Zone Depth Anomaly", 4 | "type": "raster-timeseries", 5 | "time_unit": "day", 6 | "is_periodic": false, 7 | "s3_location": "pzd_anomaly", 8 | "source": { 9 | "type": "raster", 10 | "tiles": [ 11 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/pzd_anomaly/anomaly-pzd-{spotlightId}-{date}.tif&resampling_method=bilinear&bidx=1&rescale=-100%2C100&color_map=rdbu_r" 12 | ] 13 | }, 14 | "exclusive_with": [ 15 | "agriculture", 16 | "no2", 17 | "co2-diff", 18 | "co2", 19 | "fb-population-density", 20 | "gibs-population", 21 | "car-count", 22 | "nightlights-viirs", 23 | "nightlights-hd", 24 | "detection-multi", 25 | "water-spm", 26 | "water-chlorophyll", 27 | "detections-ship", 28 | "detections-plane", 29 | "detections-contrail", 30 | "detections-vehicles" 31 | ], 32 | "swatch": { 33 | "color": "#154F8D", 34 | "name": "Deep blue" 35 | }, 36 | "legend": { 37 | "type": "gradient", 38 | "min": "less", 39 | "max": "more", 40 | "stops": [ 41 | "#3A88BD", 42 | "#C9E0ED", 43 | "#E4EEF3", 44 | "#FDDCC9", 45 | "#DE725B", 46 | "#67001F" 47 | ] 48 | }, 49 | "info": "Photic zone depth (PZD) is the depth at which only 1% of the surface irradiation remains, and is an indicator of water clarity. PZD is impacted by concentrations of chlorophyll-a, suspended solids, and CDOM. Redder colors indicate increased water clarity. Bluer colors indicate decreased water clarity. White areas indicate no change." 50 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | Pipfile 28 | 29 | cdk.context.json 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # dotenv 86 | .env 87 | *.env* 88 | 89 | # virtualenv 90 | .venv 91 | venv/ 92 | ENV/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | cdk.out/* 108 | *.tif 109 | 110 | # vscode 111 | .vscode 112 | -------------------------------------------------------------------------------- /covid_api/db/static/sites/__init__.py: -------------------------------------------------------------------------------- 1 | """ covid_api static sites """ 2 | import os 3 | from enum import Enum 4 | from typing import List 5 | 6 | from covid_api.db.static.errors import InvalidIdentifier 7 | from covid_api.db.utils import get_indicators, indicator_exists, indicator_folders 8 | from covid_api.models.static import Site, Sites 9 | 10 | data_dir = os.path.join(os.path.dirname(__file__)) 11 | 12 | 13 | class SiteManager(object): 14 | """Default Site holder.""" 15 | 16 | def __init__(self): 17 | """Load all datasets in a dict.""" 18 | site_ids = [ 19 | os.path.splitext(f)[0] for f in os.listdir(data_dir) if f.endswith(".json") 20 | ] 21 | 22 | self._data = { 23 | site: Site.parse_file(os.path.join(data_dir, f"{site}.json")) 24 | for site in site_ids 25 | } 26 | 27 | def get(self, identifier: str) -> Site: 28 | """Fetch a Site.""" 29 | try: 30 | site = self._data[identifier] 31 | site.indicators = get_indicators(identifier) 32 | return site 33 | except KeyError: 34 | raise InvalidIdentifier(f"Invalid identifier: {identifier}") 35 | 36 | def get_all(self) -> Sites: 37 | """Fetch all Sites.""" 38 | all_sites = [site.dict() for site in self._data.values()] 39 | indicators = indicator_folders() 40 | # add indicator ids 41 | for site in all_sites: 42 | site["indicators"] = [ 43 | ind for ind in indicators if indicator_exists(site["id"], ind) 44 | ] 45 | return Sites(sites=all_sites) 46 | 47 | def list(self) -> List[str]: 48 | """List all sites""" 49 | return list(self._data.keys()) 50 | 51 | 52 | sites = SiteManager() 53 | 54 | SiteNames = Enum("SiteNames", [(site, site) for site in sites.list()]) # type: ignore 55 | -------------------------------------------------------------------------------- /guidelines/vector.md: -------------------------------------------------------------------------------- 1 | # Contributing Vector Data 2 | 3 | ## Data structure 4 | All vector data for the site is stored as [GeoJSON](https://geojson.org/). GeoJSON can be validated at [https://geojsonlint.com/](https://geojsonlint.com/) or with a variety of other software tools. 5 | 6 | ## Naming convention 7 | 8 | All vector files are currently used for representing machine learning predictions. They should be added to a folder named `detections-[type]` where type is the class name of the object being detected (e.g. ships, planes). After that, the GeoJSON files should be added to site specific folders, where sites can be found at https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/sites. Then the file should be named with the date, formatted as: 9 | - `YYYYMM` for monthly data 10 | - `YYYY_MM_DD` for sub-monthly data (anything with daily or semi-periodic data) 11 | 12 | So the final file path on AWS S3 will appear as: 13 | 14 | ```sh 15 | s3://covid-eo-data/detections-[type]/[site]/[date].geojson 16 | ``` 17 | 18 | One example is: 19 | 20 | ```sh 21 | s3://covid-eo-data/detections-ship/ny/2020_01_20.geojson 22 | ``` 23 | 24 | ## Associated Raster Data 25 | 26 | Please indicate if there is associated raster data which should be shown alongside the machine learning predictions. Current predictions are displayed along select imagery from [Planet](https://www.planet.com/) 27 | 28 | ## Delivery mechanism 29 | 30 | There are two mechanisms for making vector data available through this API: 31 | - **send to API maintainers**: if you'd like to keep the source data stored privately, please contact olaf@developmentseed.org or drew@developmentseed.org, and we can discuss other hosting options for the data. 32 | - **upload directly**: some science partners have direct S3 upload access. Those partners can upload to `s3://covid-eo-data/[dataset_folder]` where `[dataset_folder]` is an S3 folder containing the data. Each dataset should have a 1-1 relationship with a folder. 33 | -------------------------------------------------------------------------------- /guidelines/data-usage.md: -------------------------------------------------------------------------------- 1 | # Data usage 2 | 3 | The COVID-19 dashboard supports raster layers in COG format and indicator data for specific spotlight areas. This document outlines how the data can be presented to the user. 4 | 5 | # COGs 6 | 7 | *Coming soon* 8 | 9 | # Indicator data 10 | 11 | ## Timeseries line chart 12 | The timeseries line chart can be used to plot data over a given time period. It supports a main indicator value and its confidence zone (depicted in blue), a baseline value and its confidence zone, and highlight bands. 13 | The chart is interactive, allowing the user to view the values on hover. 14 | 15 | ![Interactive chart gif](./images/chart-interactive.gif) 16 | 17 | ### Trendline - required 18 | As listed in the [Contributing Indicators Data](./indicators.md) document, only the `date` and `indicator` properties are required which will result in a simple chart 19 | 20 | ![Chart with indicator line](./images/chart-indicator.png) 21 | 22 | ### Baseline and confidence intervals - optional 23 | If available in the dataset, the chart will show the confidence region for both the indicator and the baseline as a shaded version of the line color. 24 | 25 | ![Chart with indicator line and confidence](./images/chart-confidence.png) 26 | 27 | ### Highlight bands - optional 28 | The highlight bands are useful to call the user's attention to a specific time interval. 29 | They're defined by providing an interval with a start and end dates and an optional label. 30 | 31 | ![Chart with indicator line and highlight bands](./images/chart-bands.png) 32 | 33 | ## Bar chart 34 | The bar chart can be used to display discrete values on specific dates, where there's no linear correlation beteween time periods. It will show a bar for each date entry at monthly intervals. 35 | The chart is interactive, allowing the user to view the values on hover. 36 | 37 | ![Interactive chart gif](./images/bar-chart-interactive.gif) 38 | 39 | ### Data 40 | The only required properties for the bar chart arre `date` and `indicator`. 41 | -------------------------------------------------------------------------------- /covid_api/db/static/datasets/agriculture.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "agriculture", 3 | "name": "Agriculture", 4 | "type": "raster-timeseries", 5 | "s3_location": "agriculture-cropmonitor", 6 | "info": "", 7 | "time_unit": "month", 8 | "is_periodic": true, 9 | "source": { 10 | "type": "raster", 11 | "tiles": [ 12 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/agriculture-cropmonitor/CropMonitor_{date}.tif&resampling_method=nearest&bidx=1&color_map=custom_cropmonitor" 13 | ] 14 | }, 15 | "exclusive_with": [ 16 | "no2", 17 | "no2-diff", 18 | "co2", 19 | "co2-diff", 20 | "fb-population-density", 21 | "gibs-population", 22 | "car-count", 23 | "nightlights-viirs", 24 | "nightlights-hd", 25 | "detection-multi", 26 | "water-chlorophyll", 27 | "water-spm", 28 | "water-pzd", 29 | "detections-ship", 30 | "detections-plane", 31 | "detections-contrail", 32 | "detections-vehicles" 33 | ], 34 | "enabled": false, 35 | "swatch": { 36 | "color": "#C0C0C0", 37 | "name": "Grey" 38 | }, 39 | "legend": { 40 | "type": "categorical", 41 | "stops": [ 42 | { 43 | "color": "#3C8EC4", 44 | "label": "Exceptional" 45 | }, 46 | { 47 | "color": "#6ECC51", 48 | "label": "Favourable" 49 | }, 50 | { 51 | "color": "#F3EF4F", 52 | "label": "Watch" 53 | }, 54 | { 55 | "color": "#DF6335", 56 | "label": "Poor" 57 | }, 58 | { 59 | "color": "#7E170E", 60 | "label": "Failure" 61 | }, 62 | { 63 | "color": "#777879", 64 | "label": "Out of season" 65 | }, 66 | { 67 | "color": "#794416", 68 | "label": "No data" 69 | } 70 | ] 71 | } 72 | } -------------------------------------------------------------------------------- /stack/config.py: -------------------------------------------------------------------------------- 1 | """STACK Configs.""" 2 | 3 | import os 4 | 5 | PROJECT_NAME = "covid-api" 6 | STAGE = os.environ.get("STAGE", "dev") 7 | 8 | # primary bucket 9 | BUCKET = "covid-eo-data" 10 | 11 | # Additional environement variable to set in the task/lambda 12 | TASK_ENV: dict = dict() 13 | 14 | # Existing VPC to point ECS/LAMBDA stacks towards. Defaults to creating a new 15 | # VPC if no ID is supplied. 16 | VPC_ID = os.environ.get("VPC_ID") 17 | 18 | 19 | ################################################################################ 20 | # # 21 | # ECS # 22 | # # 23 | ################################################################################ 24 | # Min/Max Number of ECS images 25 | MIN_ECS_INSTANCES: int = 5 26 | MAX_ECS_INSTANCES: int = 50 27 | 28 | # CPU value | Memory value 29 | # 256 (.25 vCPU) | 0.5 GB, 1 GB, 2 GB 30 | # 512 (.5 vCPU) | 1 GB, 2 GB, 3 GB, 4 GB 31 | # 1024 (1 vCPU) | 2 GB, 3 GB, 4 GB, 5 GB, 6 GB, 7 GB, 8 GB 32 | # 2048 (2 vCPU) | Between 4 GB and 16 GB in 1-GB increments 33 | # 4096 (4 vCPU) | Between 8 GB and 30 GB in 1-GB increments 34 | TASK_CPU: int = 256 35 | TASK_MEMORY: int = 512 36 | 37 | ################################################################################ 38 | # # 39 | # LAMBDA # 40 | # # 41 | ################################################################################ 42 | # TIMEOUT: int = 10 43 | TIMEOUT: int = 30 44 | MEMORY: int = 3008 45 | # MEMORY: int = 10240 46 | 47 | # stack skips setting concurrency if this value is 0 48 | # the stack will instead use unreserved lambda concurrency 49 | MAX_CONCURRENT: int = 500 if STAGE == "prod" else 0 50 | 51 | # Cache 52 | CACHE_NODE_TYPE = "cache.m5.large" 53 | CACHE_ENGINE = "memcached" 54 | CACHE_NODE_NUM = 1 55 | -------------------------------------------------------------------------------- /covid_api/db/static/datasets/no2.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "no2", 3 | "name": "NO\u2082", 4 | "type": "raster-timeseries", 5 | "s3_location": "OMNO2d_HRM", 6 | "time_unit": "month", 7 | "is_periodic": true, 8 | "source": { 9 | "type": "raster", 10 | "tiles": [ 11 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/OMNO2d_HRM/OMI_trno2_0.10x0.10_{date}_Col3_V4.nc.tif&resampling_method=bilinear&bidx=1&rescale=0%2C1.5e16&color_map=custom_no2&color_formula=gamma r {gamma}" 12 | ] 13 | }, 14 | "paint": { 15 | "raster-opacity": 0.9 16 | }, 17 | "exclusive_with": [ 18 | "agriculture", 19 | "co2", 20 | "co2-diff", 21 | "fb-population-density", 22 | "gibs-population", 23 | "car-count", 24 | "nightlights-viirs", 25 | "nightlights-hd", 26 | "detection-multi", 27 | "water-chlorophyll", 28 | "water-spm", 29 | "water-pzd", 30 | "detections-ship", 31 | "detections-plane", 32 | "detections-contrail", 33 | "detections-vehicles" 34 | ], 35 | "enabled": true, 36 | "compare": { 37 | "enabled": true, 38 | "help": "Compare with baseline (5 previous years)", 39 | "map_label": "{date}: Base vs Mean", 40 | "year_diff": 2, 41 | "time_unit": "month_only", 42 | "source": { 43 | "type": "raster", 44 | "tiles": [ 45 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/OMNO2d_HRMBaseline/OMI_trno2_0.10x0.10_Baseline_{date}_Col3_V4.nc.tif&esampling_method=bilinear&bidx=1&rescale=0%2C1.5e16&color_map=custom_no2&color_formula=gamma r {gamma}" 46 | ] 47 | } 48 | }, 49 | "swatch": { 50 | "color": "#f2a73a", 51 | "name": "Gold" 52 | }, 53 | "legend": { 54 | "type": "gradient-adjustable", 55 | "min": "less", 56 | "max": "more", 57 | "stops": [ 58 | "#99c5e0", 59 | "#f9eaa9", 60 | "#f7765d", 61 | "#c13b72", 62 | "#461070", 63 | "#050308" 64 | ] 65 | }, 66 | "info": "Darker colors indicate higher nitrogen dioxide (NO₂) levels and more activity. Lighter colors indicate lower levels of NO₂ and less activity. Missing pixels indicate areas of no data most likely associated with cloud cover or snow." 67 | } -------------------------------------------------------------------------------- /covid_api/db/static/datasets/co2.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "co2", 3 | "name": "CO\u2082 (Avg)", 4 | "type": "raster-timeseries", 5 | "time_unit": "day", 6 | "s3_location": "xco2-mean", 7 | "is_periodic": true, 8 | "source": { 9 | "type": "raster", 10 | "tiles": [ 11 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/xco2-mean/xco2_16day_mean.{date}.tif&resampling_method=bilinear&bidx=1&rescale=0.0003908%2C0.0004225&color_map=rdylbu_r&color_formula=gamma r {gamma}" 12 | ] 13 | }, 14 | "exclusive_with": [ 15 | "agriculture", 16 | "no2", 17 | "co2-diff", 18 | "fb-population-density", 19 | "gibs-population", 20 | "car-count", 21 | "nightlights-viirs", 22 | "nightlights-hd", 23 | "detection-multi", 24 | "water-chlorophyll", 25 | "water-spm", 26 | "water-pzd", 27 | "detections-ship", 28 | "detections-plane", 29 | "detections-contrail", 30 | "detections-vehicles" 31 | ], 32 | "enabled": false, 33 | "compare": { 34 | "enabled": true, 35 | "help": "Compare with baseline", 36 | "year_diff": 0, 37 | "map_label": "{date}: Base vs Mean", 38 | "source": { 39 | "type": "raster", 40 | "tiles": [ 41 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/xco2-base/xco2_16day_base.{date}.tif&resampling_method=bilinear&bidx=1&rescale=0.000408%2C0.000419&color_map=rdylbu_r&color_formula=gamma r {gamma}" 42 | ] 43 | } 44 | }, 45 | "swatch": { 46 | "color": "#189C54", 47 | "name": "Dark Green" 48 | }, 49 | "legend": { 50 | "type": "gradient-adjustable", 51 | "min": "< 391 ppm", 52 | "max": "> 423 ppm", 53 | "stops": [ 54 | "#313695", 55 | "#588cbf", 56 | "#a3d2e5", 57 | "#e8f6e8", 58 | "#fee89c", 59 | "#fba55c", 60 | "#e24932" 61 | ] 62 | }, 63 | "info": "This layer shows the average background concentration of carbon dioxide (CO₂) in our atmosphere for 2020. Redder colors indicate more CO₂. Bluer colors indicate less CO₂." 64 | } -------------------------------------------------------------------------------- /covid_api/db/memcache.py: -------------------------------------------------------------------------------- 1 | """covid_api.cache.memcache: memcached layer.""" 2 | 3 | from typing import Dict, Optional, Tuple, Union 4 | 5 | from bmemcached import Client 6 | 7 | from covid_api.models.static import Datasets 8 | from covid_api.ressources.enums import ImageType 9 | 10 | 11 | class CacheLayer(object): 12 | """Memcache Wrapper.""" 13 | 14 | def __init__( 15 | self, 16 | host, 17 | port: int = 11211, 18 | user: Optional[str] = None, 19 | password: Optional[str] = None, 20 | ): 21 | """Init Cache Layer.""" 22 | self.client = Client((f"{host}:{port}",), user, password) 23 | 24 | def get_image_from_cache(self, img_hash: str) -> Tuple[bytes, ImageType]: 25 | """ 26 | Get image body from cache layer. 27 | 28 | Attributes 29 | ---------- 30 | img_hash : str 31 | file url. 32 | 33 | Returns 34 | ------- 35 | img : bytes 36 | image body. 37 | ext : str 38 | image ext 39 | 40 | """ 41 | content, ext = self.client.get(img_hash) 42 | return content, ext 43 | 44 | def set_image_cache( 45 | self, img_hash: str, body: Tuple[bytes, ImageType], timeout: int = 432000 46 | ) -> bool: 47 | """ 48 | Set base64 encoded image body in cache layer. 49 | 50 | Attributes 51 | ---------- 52 | img_hash : str 53 | file url. 54 | body : tuple 55 | image body + ext 56 | Returns 57 | ------- 58 | bool 59 | 60 | """ 61 | try: 62 | return self.client.set(img_hash, body, time=timeout) 63 | except Exception: 64 | return False 65 | 66 | def get_dataset_from_cache(self, ds_hash: str) -> Union[Dict, bool]: 67 | """Get dataset response from cache layer""" 68 | return self.client.get(ds_hash) 69 | 70 | def set_dataset_cache( 71 | self, ds_hash: str, body: Datasets, timeout: int = 3600 72 | ) -> bool: 73 | """Set dataset response in cache layer""" 74 | try: 75 | return self.client.set(ds_hash, body.json(), time=timeout) 76 | except Exception: 77 | return False 78 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup covid_api.""" 2 | 3 | from setuptools import find_packages, setup 4 | 5 | with open("README.md") as f: 6 | long_description = f.read() 7 | 8 | inst_reqs = [ 9 | "fastapi==0.60.0", 10 | "jinja2==3.0.3", 11 | "python-binary-memcached", 12 | "rio-color", 13 | "rio-tiler==2.0a.11", 14 | "fiona", 15 | "shapely", 16 | "area", 17 | "rasterio==1.1.8", 18 | "rasterstats", 19 | "geojson-pydantic", 20 | "requests", 21 | "mercantile", 22 | ] 23 | extra_reqs = { 24 | "dev": ["pytest", "pytest-cov", "pytest-asyncio", "pre-commit"], 25 | "server": ["uvicorn", "click==7.0"], 26 | "deploy": [ 27 | "docker", 28 | "attrs", 29 | "aws-cdk.core>=1.72.0", 30 | "aws-cdk.aws_lambda>=1.72.0", 31 | "aws-cdk.aws_apigatewayv2>=1.72.0", 32 | "aws-cdk.aws_apigatewayv2_integrations>=1.72.0", 33 | "aws-cdk.aws_ecs>=1.72.0", 34 | "aws-cdk.aws_ec2>=1.72.0", 35 | "aws-cdk.aws_autoscaling>=1.72.0", 36 | "aws-cdk.aws_ecs_patterns>=1.72.0", 37 | "aws-cdk.aws_iam>=1.72.0", 38 | "aws-cdk.aws_elasticache>=1.72.0", 39 | ], 40 | "test": ["moto[iam]", "mock", "pytest", "pytest-cov", "pytest-asyncio", "requests"], 41 | } 42 | 43 | 44 | setup( 45 | name="covid_api", 46 | version="0.6.2", 47 | description=u"", 48 | long_description=long_description, 49 | long_description_content_type="text/markdown", 50 | python_requires=">=3", 51 | classifiers=[ 52 | "Intended Audience :: Information Technology", 53 | "Intended Audience :: Science/Research", 54 | "License :: OSI Approved :: BSD License", 55 | "Programming Language :: Python :: 3.6", 56 | "Programming Language :: Python :: 3.7", 57 | ], 58 | keywords="", 59 | author=u"Development Seed", 60 | author_email="info@developmentseed.org", 61 | url="https://github.com/developmentseed/covid_api", 62 | license="MIT", 63 | packages=find_packages(exclude=["ez_setup", "examples", "tests"]), 64 | package_data={ 65 | "covid_api": ["templates/*.html", "templates/*.xml", "db/static/**/*.json"] 66 | }, 67 | include_package_data=True, 68 | zip_safe=False, 69 | install_requires=inst_reqs, 70 | extras_require=extra_reqs, 71 | ) 72 | -------------------------------------------------------------------------------- /covid_api/models/timelapse.py: -------------------------------------------------------------------------------- 1 | """Tilelapse models.""" 2 | 3 | import re 4 | from typing import List, Optional 5 | 6 | from area import area 7 | from geojson_pydantic.features import Feature 8 | from geojson_pydantic.geometries import Polygon 9 | from pydantic import BaseModel, validator 10 | 11 | from covid_api.core import config 12 | 13 | 14 | def to_camel(s): 15 | """Convert string s from `snake_case` to `camelCase`""" 16 | return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), s) 17 | 18 | 19 | class PolygonFeature(Feature): 20 | """Feature model.""" 21 | 22 | geometry: Polygon 23 | 24 | 25 | class TimelapseValue(BaseModel): 26 | """"Timelapse values model.""" 27 | 28 | # TODO: there should be a check that either 29 | # mean and median are not-null OR error is not-null, 30 | # but NOT BOTH. 31 | date: Optional[str] 32 | mean: Optional[float] 33 | median: Optional[float] 34 | error: Optional[str] 35 | 36 | 37 | class TimelapseRequest(BaseModel): 38 | """"Timelapse request model.""" 39 | 40 | # TODO: parse date/date_range into a python `datetime` object (maybe using a validator? ) 41 | # TODO: validate that exactly one of `date` or `date_range` is supplied 42 | date: Optional[str] 43 | date_range: Optional[List[str]] 44 | geojson: PolygonFeature 45 | dataset_id: str 46 | spotlight_id: Optional[str] 47 | 48 | @validator("geojson") 49 | def validate_query_area(cls, v, values): 50 | """Ensure that requested AOI is is not larger than 200 000 km^2, otherwise 51 | query takes too long""" 52 | if area(v.geometry.dict()) / ( 53 | 1000 * 1000 54 | ) > config.TIMELAPSE_MAX_AREA and values.get("date_range"): 55 | 56 | raise ValueError( 57 | "AOI cannot exceed 200 000 km^2, when queried with a date range. " 58 | "To query with this AOI please query with a single date" 59 | ) 60 | return v 61 | 62 | @validator("date_range") 63 | def validate_date_objects(cls, v): 64 | 65 | """Validator""" 66 | if not len(v) == 2: 67 | raise ValueError("Field `dateRange` must contain exactly 2 dates") 68 | return v 69 | 70 | class Config: 71 | """Generate alias to convert `camelCase` requests to `snake_case` fields to be used 72 | within the code""" 73 | 74 | alias_generator = to_camel 75 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.2' 2 | 3 | services: 4 | api: 5 | build: 6 | context: . 7 | dockerfile: Dockerfiles/ecs/Dockerfile 8 | ports: 9 | - "8000:8000" 10 | volumes: 11 | - type: bind 12 | source: ./covid_api 13 | target: /app/covid_api 14 | environment: 15 | - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 16 | - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 17 | - AWS_ENDPOINT_URL=http://localstack:4566 18 | - CPL_TMPDIR=/tmp 19 | - GDAL_CACHEMAX=75% 20 | - GDAL_DISABLE_READDIR_ON_OPEN=EMPTY_DIR 21 | - GDAL_HTTP_MERGE_CONSECUTIVE_RANGES=YES 22 | - GDAL_HTTP_MULTIPLEX=YES 23 | - GDAL_HTTP_VERSION=2 24 | - MODULE_NAME=covid_api.main 25 | - MEMCACHE_HOST=memcached 26 | - MEMCACHE_USERNAME=myuser 27 | - MEMCACHE_PASSWORD=mypassword 28 | - DISABLE_CACHE=yesplase 29 | - PLANET_API_KEY=${PLANET_API_KEY} 30 | - DATASET_METADATA_FILENAME=dev-dataset-metadata.json 31 | - DATASET_METADATA_GENERATOR_FUNCTION_NAME=dev-dataset-metadata-generator 32 | - PORT=8000 33 | - PYTHONWARNINGS=ignore 34 | - VARIABLE_NAME=app 35 | - VSI_CACHE=TRUE 36 | - VSI_CACHE_SIZE=536870912 37 | - WORKERS_PER_CORE=5 38 | 39 | # localstack for local development only. AWS S3 used for staging/production 40 | localstack: 41 | image: localstack/localstack:0.12.7 42 | environment: 43 | SERVICES: s3 44 | AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID} 45 | AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY} 46 | AWS_ENDPOINT_URL: http://localhost:4566 47 | AWS_DEFAULT_REGION: us-east-1 48 | HOSTNAME: localhost 49 | HOSTNAME_EXTERNAL: localhost 50 | DATA_BUCKET_NAME: covid-eo-data 51 | DATASET_METADATA_FILENAME: dev-dataset-metadata.json 52 | ports: 53 | - "4566:4566" 54 | volumes: 55 | - ./localstack/setup.sh:/docker-entrypoint-initaws.d/setup.sh 56 | - ./lambda/dataset_metadata_generator/src/main.py:/docker-entrypoint-initaws.d/main.py 57 | - ./covid_api/db/static/datasets:/docker-entrypoint-initaws.d/datasets 58 | - ./covid_api/db/static/sites:/docker-entrypoint-initaws.d/sites 59 | 60 | 61 | memcached: 62 | image: remotepixel/memcached-sasl:latest 63 | environment: 64 | - MEMCACHED_USERNAME=myuser 65 | - MEMCACHED_PASSWORD=mypassword 66 | ports: 67 | - "11211:11211" 68 | 69 | -------------------------------------------------------------------------------- /tests/routes/v1/test_metadata.py: -------------------------------------------------------------------------------- 1 | """test /v1/metadata endpoints.""" 2 | 3 | # from typing import Dict 4 | 5 | from mock import patch 6 | 7 | from ...conftest import mock_rio 8 | 9 | 10 | @patch("covid_api.api.api_v1.endpoints.metadata.cogeo.rasterio") 11 | def test_tilejson(rio, app): 12 | """test /tilejson endpoint.""" 13 | rio.open = mock_rio 14 | 15 | response = app.get("/v1/tilejson.json?url=https://myurl.com/cog.tif") 16 | assert response.status_code == 200 17 | body = response.json() 18 | assert body["tilejson"] == "2.2.0" 19 | assert body["version"] == "1.0.0" 20 | assert body["scheme"] == "xyz" 21 | assert len(body["tiles"]) == 1 22 | assert body["tiles"][0].startswith("http://testserver/v1/{z}/{x}/{y}@1x?url=https") 23 | assert body["minzoom"] == 5 24 | assert body["maxzoom"] == 8 25 | assert body["bounds"] 26 | assert body["center"] 27 | 28 | response = app.get( 29 | "/v1/tilejson.json?url=https://myurl.com/cog.tif&tile_format=png&tile_scale=2" 30 | ) 31 | assert response.status_code == 200 32 | body = response.json() 33 | assert body["tiles"][0].startswith( 34 | "http://testserver/v1/{z}/{x}/{y}@2x.png?url=https" 35 | ) 36 | 37 | 38 | @patch("covid_api.api.api_v1.endpoints.metadata.cogeo.rasterio") 39 | def test_bounds(rio, app): 40 | """test /bounds endpoint.""" 41 | rio.open = mock_rio 42 | 43 | response = app.get("/v1/bounds?url=https://myurl.com/cog.tif") 44 | assert response.status_code == 200 45 | body = response.json() 46 | assert body["address"] == "https://myurl.com/cog.tif" 47 | assert len(body["bounds"]) == 4 48 | 49 | 50 | @patch("covid_api.api.api_v1.endpoints.metadata.cogeo.rasterio") 51 | def test_metadata(rio, app): 52 | """test /metadata endpoint.""" 53 | rio.open = mock_rio 54 | 55 | response = app.get("/v1/metadata?url=https://myurl.com/cog.tif") 56 | assert response.status_code == 200 57 | body = response.json() 58 | assert body["address"] == "https://myurl.com/cog.tif" 59 | assert len(body["bounds"]) == 4 60 | assert body["statistics"] 61 | assert len(body["statistics"]["1"]["histogram"][0]) == 20 62 | assert body["band_descriptions"] == [[1, "band1"]] 63 | assert body["dtype"] == "uint16" 64 | assert body["colorinterp"] == ["gray"] 65 | assert body["nodata_type"] == "None" 66 | 67 | response = app.get( 68 | "/v1/metadata?url=https://myurl.com/cog.tif&histogram_bins=5&histogram_range=1,1000&nodata=0" 69 | ) 70 | assert response.status_code == 200 71 | body = response.json() 72 | assert len(body["statistics"]["1"]["histogram"][0]) == 5 73 | -------------------------------------------------------------------------------- /covid_api/api/api_v1/endpoints/modis.py: -------------------------------------------------------------------------------- 1 | """API planet mosaic tiles.""" 2 | from functools import partial 3 | from typing import Any, Dict 4 | 5 | from rio_tiler.utils import render 6 | 7 | from covid_api.api import utils 8 | from covid_api.db.memcache import CacheLayer 9 | from covid_api.ressources.common import mimetype 10 | from covid_api.ressources.enums import ImageType 11 | from covid_api.ressources.responses import TileResponse 12 | 13 | from fastapi import APIRouter, Depends, Path, Query 14 | 15 | from starlette.concurrency import run_in_threadpool 16 | 17 | _render = partial(run_in_threadpool, render) 18 | _tile = partial(run_in_threadpool, utils.modis_tile) 19 | 20 | router = APIRouter() 21 | responses = { 22 | 200: { 23 | "content": { 24 | "image/png": {}, 25 | "image/jpg": {}, 26 | "image/webp": {}, 27 | "image/tiff": {}, 28 | "application/x-binary": {}, 29 | }, 30 | "description": "Return an image.", 31 | } 32 | } 33 | tile_routes_params: Dict[str, Any] = dict( 34 | responses=responses, tags=["modis"], response_class=TileResponse 35 | ) 36 | 37 | 38 | @router.get(r"/modis/{z}/{x}/{y}", **tile_routes_params) 39 | async def tile( 40 | z: int = Path(..., ge=0, le=30, description="Mercator tiles's zoom level"), 41 | x: int = Path(..., description="Mercator tiles's column"), 42 | y: int = Path(..., description="Mercator tiles's row"), 43 | date: str = Query(..., description="date of site for detections"), 44 | cache_client: CacheLayer = Depends(utils.get_cache), 45 | ) -> TileResponse: 46 | """Handle /modis requests.""" 47 | timings = [] 48 | headers: Dict[str, str] = {} 49 | 50 | tile_hash = utils.get_hash(**dict(z=z, x=x, y=y, date=date, modis=True)) 51 | 52 | content = None 53 | if cache_client: 54 | try: 55 | content, ext = cache_client.get_image_from_cache(tile_hash) 56 | headers["X-Cache"] = "HIT" 57 | except Exception: 58 | content = None 59 | 60 | if not content: 61 | with utils.Timer() as t: 62 | content = await _tile(x, y, z, date) 63 | 64 | timings.append(("Read", t.elapsed)) 65 | timings.append(("Format", t.elapsed)) 66 | 67 | if cache_client and content: 68 | cache_client.set_image_cache(tile_hash, (content, ImageType.png)) 69 | 70 | if timings: 71 | headers["X-Server-Timings"] = "; ".join( 72 | ["{} - {:0.2f}".format(name, time * 1000) for (name, time) in timings] 73 | ) 74 | 75 | return TileResponse( 76 | content, media_type=mimetype[ImageType.png.value], headers=headers 77 | ) 78 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | test: 4 | docker: 5 | - image: circleci/python:3.7 6 | environment: 7 | - TOXENV: py37 8 | working_directory: ~/covid-api 9 | steps: 10 | - checkout 11 | - run: 12 | name: install dependencies 13 | command: pip install tox codecov pre-commit --user 14 | - run: 15 | name: test 16 | command: ~/.local/bin/tox 17 | - run: 18 | name: run pre-commit 19 | command: ~/.local/bin/pre-commit run --all-files 20 | 21 | deploy-staging: 22 | machine: 23 | image: ubuntu-2004:202010-01 24 | working_directory: ~/covid-api 25 | steps: 26 | - checkout 27 | - run: 28 | name: use python 3 29 | command: | 30 | pyenv global 3.8.5 31 | 32 | - run: 33 | name: install dependencies 34 | command: | 35 | pip install -e .["deploy"] --user 36 | npm install -g cdk 37 | 38 | - deploy: 39 | name: develop branch deployed to staging cdk stack 40 | command: | 41 | if [ "${CIRCLE_BRANCH}" == "develop" ]; then 42 | STAGE='staging' cdk deploy covid-api-dataset-metadata-generator-staging --region us-east-1 --require-approval never 43 | STAGE='staging' cdk deploy covid-api-lambda-staging --region us-east-1 --require-approval never 44 | fi 45 | 46 | deploy-production: 47 | machine: 48 | image: ubuntu-2004:202010-01 49 | working_directory: ~/covid-api 50 | steps: 51 | - checkout 52 | - run: 53 | name: use python 3 54 | command: | 55 | pyenv global 3.8.5 56 | 57 | - run: 58 | name: install dependencies 59 | command: | 60 | pip install -e .["deploy"] --user 61 | npm install -g cdk 62 | 63 | - deploy: 64 | name: master branch deployed to production cdk stack 65 | command: | 66 | if [ "${CIRCLE_BRANCH}" == "master" ]; then 67 | STAGE='prod' cdk deploy covid-api-dataset-metadata-generator-prod --region us-east-1 --require-approval never 68 | STAGE='prod' cdk deploy covid-api-lambda-prod --region us-east-1 --require-approval never 69 | fi 70 | 71 | workflows: 72 | version: 2 73 | test_and_deploy: 74 | jobs: 75 | - test 76 | - deploy-staging: 77 | requires: 78 | - test 79 | filters: 80 | branches: 81 | # only: /^feature\/.*/ 82 | only: develop 83 | - deploy-production: 84 | requires: 85 | - test 86 | filters: 87 | branches: 88 | only: master -------------------------------------------------------------------------------- /covid_api/api/api_v1/endpoints/planet.py: -------------------------------------------------------------------------------- 1 | """API planet mosaic tiles.""" 2 | from functools import partial 3 | from typing import Any, Dict 4 | 5 | from rio_tiler.utils import render 6 | 7 | from covid_api.api import utils 8 | from covid_api.db.memcache import CacheLayer 9 | from covid_api.ressources.common import mimetype 10 | from covid_api.ressources.enums import ImageType 11 | from covid_api.ressources.responses import TileResponse 12 | 13 | from fastapi import APIRouter, Depends, Path, Query 14 | 15 | from starlette.concurrency import run_in_threadpool 16 | 17 | _render = partial(run_in_threadpool, render) 18 | _tile = partial(run_in_threadpool, utils.planet_mosaic_tile) 19 | 20 | router = APIRouter() 21 | responses = { 22 | 200: { 23 | "content": { 24 | "image/png": {}, 25 | "image/jpg": {}, 26 | "image/webp": {}, 27 | "image/tiff": {}, 28 | "application/x-binary": {}, 29 | }, 30 | "description": "Return an image.", 31 | } 32 | } 33 | tile_routes_params: Dict[str, Any] = dict( 34 | responses=responses, tags=["planet"], response_class=TileResponse 35 | ) 36 | 37 | 38 | @router.get(r"/planet/{z}/{x}/{y}", **tile_routes_params) 39 | async def tile( 40 | z: int = Path(..., ge=0, le=30, description="Mercator tiles's zoom level"), 41 | x: int = Path(..., description="Mercator tiles's column"), 42 | y: int = Path(..., description="Mercator tiles's row"), 43 | date: str = Query(..., description="date of site for detections"), 44 | site: str = Query(..., description="id of site for detections"), 45 | cache_client: CacheLayer = Depends(utils.get_cache), 46 | ) -> TileResponse: 47 | """Handle /planet requests.""" 48 | timings = [] 49 | headers: Dict[str, str] = {} 50 | 51 | scenes = utils.site_date_to_scenes(site, date) 52 | 53 | tile_hash = utils.get_hash(**dict(z=z, x=x, y=y, scenes=scenes, planet=True)) 54 | 55 | content = None 56 | if cache_client: 57 | try: 58 | content, ext = cache_client.get_image_from_cache(tile_hash) 59 | headers["X-Cache"] = "HIT" 60 | except Exception: 61 | content = None 62 | 63 | if not content: 64 | with utils.Timer() as t: 65 | tile, mask = await _tile(scenes, x, y, z) 66 | timings.append(("Read", t.elapsed)) 67 | 68 | content = await _render(tile, mask) 69 | 70 | timings.append(("Format", t.elapsed)) 71 | 72 | if cache_client and content: 73 | cache_client.set_image_cache(tile_hash, (content, ImageType.png)) 74 | 75 | if timings: 76 | headers["X-Server-Timings"] = "; ".join( 77 | ["{} - {:0.2f}".format(name, time * 1000) for (name, time) in timings] 78 | ) 79 | 80 | return TileResponse( 81 | content, media_type=mimetype[ImageType.png.value], headers=headers 82 | ) 83 | -------------------------------------------------------------------------------- /tests/routes/v1/test_tiles.py: -------------------------------------------------------------------------------- 1 | """test /v1/tiles endpoints.""" 2 | 3 | from io import BytesIO 4 | from typing import Dict 5 | 6 | import numpy 7 | from mock import patch 8 | from rasterio.io import MemoryFile 9 | 10 | from ...conftest import mock_rio 11 | 12 | 13 | def parse_img(content: bytes) -> Dict: 14 | with MemoryFile(content) as mem: 15 | with mem.open() as dst: 16 | return dst.meta 17 | 18 | 19 | @patch("covid_api.api.api_v1.endpoints.tiles.cogeo.rasterio") 20 | def test_tile(rio, app): 21 | """test tile endpoints.""" 22 | rio.open = mock_rio 23 | 24 | # full tile 25 | response = app.get("/v1/8/87/48?url=https://myurl.com/cog.tif&rescale=0,1000") 26 | assert response.status_code == 200 27 | assert response.headers["content-type"] == "image/jpg" 28 | meta = parse_img(response.content) 29 | assert meta["width"] == 256 30 | assert meta["height"] == 256 31 | 32 | response = app.get( 33 | "/v1/8/87/48@2x?url=https://myurl.com/cog.tif&rescale=0,1000&color_formula=Gamma R 3" 34 | ) 35 | assert response.status_code == 200 36 | assert response.headers["content-type"] == "image/jpg" 37 | meta = parse_img(response.content) 38 | assert meta["width"] == 512 39 | assert meta["height"] == 512 40 | 41 | response = app.get("/v1/8/87/48.jpg?url=https://myurl.com/cog.tif&rescale=0,1000") 42 | assert response.status_code == 200 43 | assert response.headers["content-type"] == "image/jpg" 44 | 45 | response = app.get( 46 | "/v1/8/87/48@2x.jpg?url=https://myurl.com/cog.tif&rescale=0,1000" 47 | ) 48 | assert response.status_code == 200 49 | assert response.headers["content-type"] == "image/jpg" 50 | 51 | response = app.get( 52 | "/v1/8/87/48@2x.tif?url=https://myurl.com/cog.tif&nodata=0&bidx=1" 53 | ) 54 | assert response.status_code == 200 55 | assert response.headers["content-type"] == "image/tiff" 56 | meta = parse_img(response.content) 57 | assert meta["dtype"] == "uint16" 58 | assert meta["count"] == 2 59 | assert meta["width"] == 512 60 | assert meta["height"] == 512 61 | 62 | response = app.get("/v1/8/87/48.npy?url=https://myurl.com/cog.tif&nodata=0") 63 | assert response.status_code == 200 64 | assert response.headers["content-type"] == "application/x-binary" 65 | t, m = numpy.load(BytesIO(response.content), allow_pickle=True) 66 | assert t.shape == (1, 256, 256) 67 | assert m.shape == (256, 256) 68 | 69 | # partial 70 | response = app.get("/v1/8/84/47?url=https://myurl.com/cog.tif&rescale=0,1000") 71 | assert response.status_code == 200 72 | assert response.headers["content-type"] == "image/png" 73 | 74 | response = app.get( 75 | "/v1/8/84/47?url=https://myurl.com/cog.tif&nodata=0&rescale=0,1000&color_map=viridis" 76 | ) 77 | assert response.status_code == 200 78 | assert response.headers["content-type"] == "image/png" 79 | -------------------------------------------------------------------------------- /covid_api/api/api_v1/endpoints/datasets.py: -------------------------------------------------------------------------------- 1 | """Dataset endpoints.""" 2 | from covid_api.api import utils 3 | from covid_api.core import config 4 | from covid_api.db.memcache import CacheLayer 5 | from covid_api.db.static.datasets import datasets 6 | from covid_api.db.static.errors import InvalidIdentifier 7 | from covid_api.models.static import Datasets 8 | 9 | from fastapi import APIRouter, Depends, HTTPException, Response 10 | 11 | from starlette.requests import Request 12 | 13 | router = APIRouter() 14 | 15 | 16 | @router.get( 17 | "/datasets", 18 | responses={200: dict(description="return a list of all available datasets")}, 19 | response_model=Datasets, 20 | ) 21 | def get_datasets( 22 | request: Request, 23 | response: Response, 24 | cache_client: CacheLayer = Depends(utils.get_cache), 25 | ): 26 | """Return a list of datasets.""" 27 | dataset_hash = utils.get_hash(spotlight_id="all") 28 | content = None 29 | 30 | if cache_client: 31 | content = cache_client.get_dataset_from_cache(dataset_hash) 32 | if content: 33 | content = Datasets.parse_raw(content) 34 | response.headers["X-Cache"] = "HIT" 35 | if not content: 36 | scheme = request.url.scheme 37 | host = request.headers["host"] 38 | if config.API_VERSION_STR: 39 | host += config.API_VERSION_STR 40 | 41 | content = datasets.get_all(api_url=f"{scheme}://{host}") 42 | 43 | if cache_client and content: 44 | cache_client.set_dataset_cache(dataset_hash, content) 45 | 46 | return content 47 | 48 | 49 | @router.get( 50 | "/datasets/{spotlight_id}", 51 | responses={ 52 | 200: dict(description="return datasets available for a given spotlight") 53 | }, 54 | response_model=Datasets, 55 | ) 56 | def get_dataset( 57 | request: Request, 58 | spotlight_id: str, 59 | response: Response, 60 | cache_client: CacheLayer = Depends(utils.get_cache), 61 | ): 62 | """Return dataset info for all datasets available for a given spotlight""" 63 | try: 64 | dataset_hash = utils.get_hash(spotlight_id=spotlight_id) 65 | content = None 66 | 67 | if cache_client: 68 | content = cache_client.get_dataset_from_cache(dataset_hash) 69 | if content: 70 | content = Datasets.parse_raw(content) 71 | response.headers["X-Cache"] = "HIT" 72 | if not content: 73 | scheme = request.url.scheme 74 | host = request.headers["host"] 75 | if config.API_VERSION_STR: 76 | host += config.API_VERSION_STR 77 | 78 | content = datasets.get(spotlight_id, api_url=f"{scheme}://{host}") 79 | 80 | if cache_client and content: 81 | cache_client.set_dataset_cache(dataset_hash, content) 82 | 83 | return content 84 | except InvalidIdentifier: 85 | raise HTTPException( 86 | status_code=404, detail=f"Invalid spotlight identifier: {spotlight_id}" 87 | ) 88 | -------------------------------------------------------------------------------- /ship-to-api.py: -------------------------------------------------------------------------------- 1 | """processing script for ship detection data""" 2 | import csv 3 | import json 4 | import os 5 | import subprocess 6 | from glob import glob 7 | 8 | 9 | def get_location(loc): 10 | """get location id of detections""" 11 | if loc == "New York": 12 | return "ny" 13 | elif loc == "San Francisco": 14 | return "sf" 15 | else: 16 | return "la" 17 | 18 | 19 | def file_to_scene(f): 20 | """convert filename of tif to scene""" 21 | b = ( 22 | os.path.basename(f) 23 | .replace("reprojected_", "") 24 | .replace("_resampled", "") 25 | .replace("_3B_Visual.tif", "") 26 | .replace("T", "_") 27 | ) 28 | if b[-5] != "_": 29 | b = f"{b[:-4]}_{b[-4:]}" 30 | s = b.split("_") 31 | if len(s[1]) > 6: 32 | b = f"{s[0]}_{s[1][:6]}_{s[1][6:]}_{s[2]}" 33 | return b 34 | 35 | 36 | with open("ships.geojson") as f: 37 | data = json.load(f) 38 | 39 | with open("tifs.txt") as f: 40 | tifs = [line.strip() for line in f.readlines()] 41 | 42 | scene_to_file_dict = dict(zip([file_to_scene(t) for t in tifs], tifs)) 43 | 44 | 45 | def scene_to_file(s): 46 | """convert scene to file name""" 47 | file = scene_to_file_dict.get(s) 48 | if not file: 49 | possible = [t for t in tifs if s in t] 50 | if possible: 51 | file = possible[0] 52 | else: 53 | print(f"no match for {s}") 54 | return file 55 | 56 | 57 | for d in data: 58 | if "features" in d["ship_detections"]: 59 | date = d["date"].replace("-", "_") 60 | location = get_location(d["location"]) 61 | detections = d["ship_detections"] 62 | # write geojson of detection 63 | with open(f"ship/{location}/{date}.geojson", "w") as w: 64 | w.write(json.dumps(detections)) 65 | # append detection count to csv 66 | with open(f"ship_csvs/{location}.csv", "a+") as a: 67 | a.write(f'{date},{len(detections["features"])}\n') 68 | # create VRT for scene_ids 69 | with open("filelist.txt", "w") as fl: 70 | files = [ 71 | f"/vsis3/image-labeling-tool-internal/{scene_to_file(s)}\n" 72 | for s in d["scene_ids"] 73 | if scene_to_file(s) 74 | ] 75 | fl.writelines(files) 76 | 77 | subprocess.run( 78 | [ 79 | "gdalbuildvrt", 80 | "-input_file_list", 81 | "filelist.txt", 82 | f"{location}-{date}.vrt", 83 | ] 84 | ) 85 | 86 | # sort and add headers to the csvs 87 | for file in glob("ship_csvs/*"): 88 | with open(file) as csvfile: 89 | reader = csv.reader(csvfile) 90 | sortedlist = sorted(reader, key=lambda row: row[0]) 91 | 92 | with open(file, "w") as f: 93 | fieldnames = ["date", "count"] 94 | writer = csv.DictWriter(f, fieldnames=fieldnames) 95 | writer.writeheader() 96 | for row in sortedlist: 97 | writer.writerow(dict(date=row[0], count=row[1])) 98 | -------------------------------------------------------------------------------- /guidelines/raster.md: -------------------------------------------------------------------------------- 1 | # Contributing Raster Data 2 | 3 | ## Data structure 4 | All raster data for the site is stored as [Cloud Optimized GeoTIFF](https://www.cogeo.org/) (COG). One way to validate that data is in the proper format is using [rio-cogeo](https://github.com/cogeotiff/rio-cogeo): 5 | 6 | - First, check that it passes validation with `rio cogeo validate my_raster.tif` 7 | - Then ensure that it has a `nodata` value set and that it matches the value which represents non-valid pixels within your GeoTIFF. You can see the `nodata` value like so: 8 | 9 | ```sh 10 | rio info my_raster.tif --nodata 11 | ``` 12 | 13 | *note: `nan` values in the data will not be treated as non-valid pixels unless the `nodata` tag is `nan`.* 14 | 15 | This same library can also create a Cloud Optimized GeoTIFF with the following command: 16 | 17 | ```sh 18 | rio cogeo create my_raster.tif my_cog_raster.tif 19 | ``` 20 | 21 | ## Naming convention 22 | 23 | New raster files are added to the dashboard manually so the naming convention is rather liberal. The only requirement is that for date-specific data, the file name must include the date, formatted as: 24 | - `YYYYMM` for monthly data 25 | - `YYYY_MM_DD` for sub-monthly data (anything with daily or semi-periodic data) 26 | 27 | If the file doesn't have global coverage, please use a portion of the file name to indicate the spotlight area it covers. We provide data for the following [spotlight areas](https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/sites). For example: 28 | 29 | ```sh 30 | my_tif_tk_2020_02_19.tif # Data for Tokyo on February 19th, 2020. 31 | ``` 32 | 33 | ## Metadata 34 | 35 | When sending the data, please include the following information 36 | - A short description (1-2 sentences) of the data to be included on the dashboard. 37 | - The time and spatial domain covered by the dataset 38 | - Best practices/standards of color maps for visualizing the data 39 | 40 | ## Delivery mechanism 41 | 42 | There are three mechanisms for making raster data available through this API: 43 | - **publicly available**: any publicly available COG can be accessed through this API. Because the API is run on Amazon Web Services in the `us-east-1` region, data hosted on S3 in this region will have faster response times to the API. 44 | - **send to API maintainers**: if you'd like to keep the source data stored privately, please contact olaf@developmentseed.org or drew@developmentseed.org, and we can discuss other hosting options for the data. 45 | - **upload directly**: some science partners have direct S3 upload access. Those partners can upload to `s3://covid-eo-data/[dataset_folder]` where `[dataset_folder]` is an S3 folder containing the data. Each dataset should have a 1-1 relationship with a folder. 46 | 47 | ## Visualization 48 | 49 | Once ingested or otherwise made accessible, the data is available as map tiles as detailed in the [API documentation](https://8ib71h0627.execute-api.us-east-1.amazonaws.com/docs). There are a variety of parameters that can be used to customize the visualization, in particular, a [number of colormaps](https://github.com/cogeotiff/rio-tiler/blob/master/docs/colormap.md). The remaining parameter descriptions are shown [here](https://github.com/developmentseed/cogeo-tiler#tiles). 50 | -------------------------------------------------------------------------------- /covid_api/api/api_v1/endpoints/ogc.py: -------------------------------------------------------------------------------- 1 | """API ogc.""" 2 | 3 | from urllib.parse import urlencode 4 | 5 | import rasterio 6 | from rasterio import warp 7 | from rio_tiler import constants 8 | from rio_tiler.mercator import get_zooms 9 | 10 | from covid_api.core import config 11 | from covid_api.ressources.common import mimetype 12 | from covid_api.ressources.enums import ImageType 13 | from covid_api.ressources.responses import XMLResponse 14 | 15 | from fastapi import APIRouter, Query 16 | 17 | from starlette.requests import Request 18 | from starlette.responses import Response 19 | from starlette.templating import Jinja2Templates 20 | 21 | router = APIRouter() 22 | templates = Jinja2Templates(directory="covid_api/templates") 23 | 24 | 25 | @router.get( 26 | r"/WMTSCapabilities.xml", 27 | responses={200: {"content": {"application/xml": {}}}}, 28 | response_class=XMLResponse, 29 | ) 30 | def wtms( 31 | request: Request, 32 | response: Response, 33 | url: str = Query(..., description="Cloud Optimized GeoTIFF URL."), 34 | tile_format: ImageType = Query( 35 | ImageType.png, description="Output image type. Default is png." 36 | ), 37 | tile_scale: int = Query( 38 | 1, gt=0, lt=4, description="Tile size scale. 1=256x256, 2=512x512..." 39 | ), 40 | ): 41 | """Wmts endpoit.""" 42 | scheme = request.url.scheme 43 | host = request.headers["host"] 44 | if config.API_VERSION_STR: 45 | host += config.API_VERSION_STR 46 | endpoint = f"{scheme}://{host}" 47 | 48 | kwargs = dict(request.query_params) 49 | kwargs.pop("tile_format", None) 50 | kwargs.pop("tile_scale", None) 51 | qs = urlencode(list(kwargs.items())) 52 | 53 | with rasterio.open(url) as src_dst: 54 | bounds = list( 55 | warp.transform_bounds( 56 | src_dst.crs, constants.WGS84_CRS, *src_dst.bounds, densify_pts=21 57 | ) 58 | ) 59 | minzoom, maxzoom = get_zooms(src_dst) 60 | 61 | media_type = mimetype[tile_format.value] 62 | tilesize = tile_scale * 256 63 | tileMatrix = [] 64 | for zoom in range(minzoom, maxzoom + 1): 65 | tileMatrix.append( 66 | f""" 67 | {zoom} 68 | {559082264.02872 / 2 ** zoom / tile_scale} 69 | -20037508.34278925 20037508.34278925 70 | {tilesize} 71 | {tilesize} 72 | {2 ** zoom} 73 | {2 ** zoom} 74 | """ 75 | ) 76 | 77 | return templates.TemplateResponse( 78 | "wmts.xml", 79 | { 80 | "request": request, 81 | "endpoint": endpoint, 82 | "bounds": bounds, 83 | "tileMatrix": tileMatrix, 84 | "title": "Cloud Optimized GeoTIFF", 85 | "query_string": qs, 86 | "tile_scale": tile_scale, 87 | "tile_format": tile_format.value, 88 | "media_type": media_type, 89 | }, 90 | media_type="application/xml", 91 | ) 92 | -------------------------------------------------------------------------------- /covid_api/templates/wmts.xml: -------------------------------------------------------------------------------- 1 | 9 | 10 | "{{ title }}" 11 | OGC WMTS 12 | 1.0.0 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | RESTful 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | RESTful 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | {{ title }} 45 | cogeo 46 | {{ title }} 47 | 48 | {{ bounds[0] }} {{ bounds[1] }} 49 | {{ bounds[2] }} {{ bounds[3] }} 50 | 51 | 54 | {{ media_type }} 55 | 56 | GoogleMapsCompatible 57 | 58 | 59 | 60 | 61 | GoogleMapsCompatible 62 | GoogleMapsCompatible EPSG:3857 63 | GoogleMapsCompatible 64 | urn:ogc:def:crs:EPSG::3857 65 | {% for item in tileMatrix %} 66 | {{ item | safe }} 67 | {% endfor %} 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /covid_api/main.py: -------------------------------------------------------------------------------- 1 | """covid_api app.""" 2 | from typing import Any, Dict 3 | 4 | from covid_api import version 5 | from covid_api.api.api_v1.api import api_router 6 | from covid_api.core import config 7 | from covid_api.db.memcache import CacheLayer 8 | 9 | from fastapi import FastAPI 10 | 11 | from starlette.middleware.cors import CORSMiddleware 12 | from starlette.middleware.gzip import GZipMiddleware 13 | from starlette.requests import Request 14 | from starlette.responses import HTMLResponse 15 | from starlette.templating import Jinja2Templates 16 | 17 | templates = Jinja2Templates(directory="covid_api/templates") 18 | 19 | if config.MEMCACHE_HOST and not config.DISABLE_CACHE: 20 | kwargs: Dict[str, Any] = { 21 | k: v 22 | for k, v in zip( 23 | ["port", "user", "password"], 24 | [config.MEMCACHE_PORT, config.MEMCACHE_USERNAME, config.MEMCACHE_PASSWORD], 25 | ) 26 | if v 27 | } 28 | cache = CacheLayer(config.MEMCACHE_HOST, **kwargs) 29 | else: 30 | cache = None 31 | 32 | 33 | app = FastAPI( 34 | title=config.PROJECT_NAME, 35 | openapi_url="/api/v1/openapi.json", 36 | description="A lightweight Cloud Optimized GeoTIFF tile server", 37 | version=version, 38 | ) 39 | 40 | # Set all CORS enabled origins 41 | if config.BACKEND_CORS_ORIGINS: 42 | origins = [origin.strip() for origin in config.BACKEND_CORS_ORIGINS.split(",")] 43 | app.add_middleware( 44 | CORSMiddleware, 45 | allow_origins=origins, 46 | allow_credentials=True, 47 | allow_methods=["*"], 48 | allow_headers=["*"], 49 | ) 50 | 51 | app.add_middleware(GZipMiddleware, minimum_size=0) 52 | 53 | 54 | @app.middleware("http") 55 | async def cache_middleware(request: Request, call_next): 56 | """Add cache layer.""" 57 | request.state.cache = cache 58 | response = await call_next(request) 59 | if cache: 60 | request.state.cache.client.disconnect_all() 61 | return response 62 | 63 | 64 | @app.get( 65 | "/", 66 | responses={200: {"content": {"application/hmtl": {}}}}, 67 | response_class=HTMLResponse, 68 | ) 69 | @app.get( 70 | "/index.html", 71 | responses={200: {"content": {"application/hmtl": {}}}}, 72 | response_class=HTMLResponse, 73 | ) 74 | def index(request: Request): 75 | """Demo Page.""" 76 | scheme = request.url.scheme 77 | host = request.headers["host"] 78 | if config.API_VERSION_STR: 79 | host += config.API_VERSION_STR 80 | endpoint = f"{scheme}://{host}" 81 | 82 | return templates.TemplateResponse( 83 | "index.html", {"request": request, "endpoint": endpoint}, media_type="text/html" 84 | ) 85 | 86 | 87 | @app.get( 88 | "/simple_viewer.html", 89 | responses={200: {"content": {"application/hmtl": {}}}}, 90 | response_class=HTMLResponse, 91 | ) 92 | def simple(request: Request): 93 | """Demo Page.""" 94 | scheme = request.url.scheme 95 | host = request.headers["host"] 96 | if config.API_VERSION_STR: 97 | host += config.API_VERSION_STR 98 | endpoint = f"{scheme}://{host}" 99 | 100 | return templates.TemplateResponse( 101 | "simple.html", 102 | {"request": request, "endpoint": endpoint}, 103 | media_type="text/html", 104 | ) 105 | 106 | 107 | @app.get("/ping", description="Health Check") 108 | def ping(): 109 | """Health check.""" 110 | return {"ping": "pong!"} 111 | 112 | 113 | app.include_router(api_router, prefix=config.API_VERSION_STR) 114 | -------------------------------------------------------------------------------- /guidelines/indicators.md: -------------------------------------------------------------------------------- 1 | # Contributing Indicator Data 2 | Timeseries indicator data is provided in CSV format. It's mostly used in the dashboard in charts that show [evolution over time](./data-usage.md)). 3 | 4 | Each indicator requires: 5 | 6 | - a metadata file describing the indicator 7 | - time-series data for each spotlight area 8 | 9 | For an overview of the current spotlight areas, see the [/sites API endpoint](https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/sites). 10 | 11 | ## Data structure 12 | Expected format: 13 | 14 | - single CSV file per spotlight area 15 | - each row contains values for a single timestep 16 | - each row should have at least a column with a date and an indicator value 17 | - numbers should be unquoted, can’t contain thousand separators, and must be parsable as `float` by python 18 | - all columns indicated as relevant by the metadata should be numbers except for `date` and optionally `anomaly` 19 | - exactly one header row showing column names in the first row 20 | 21 | ### Error / Fill values 22 | Don't include rows with error or fill values like `-999`. 23 | 24 | ``` 25 | 27/01/2020,-999,7.21537286398442,-999,8.03568780281027,-999,-999 26 | ``` 27 | 28 | Omit these rows, the frontend is built to deal with missing data. 29 | 30 | ### Example 31 | 32 | ``` csv 33 | date_obs,median,baseline_median,baseline_mean 34 | 02/07/2014,15.502651,20.564545686759864,221,610 35 | 02/08/2014,17.219058,10.755911295434752,244,607 36 | ``` 37 | 38 | ## Metadata 39 | In addition to the data itself, each indicator needs a metadata file with: 40 | 41 | ``` json 42 | { 43 | "date": { 44 | "column": "date_obs", 45 | "format": "%d/%m/%Y" 46 | }, 47 | "indicator": { 48 | "column": "median" 49 | }, 50 | "baseline": { 51 | "column": "baseline_median" 52 | }, 53 | "highlight_bands": [ 54 | { 55 | "label": "Lockdown", 56 | "interval": ["2020-02-01", "2020-03-06"] 57 | } 58 | ] 59 | } 60 | ``` 61 | 62 | The date format should use options found in the [python `strptime` documentation](https://docs.python.org/3.7/library/datetime.html#strftime-and-strptime-behavior) 63 | 64 | ### Mandatory fields 65 | 66 | - `date`: the column where each observation date is shown and the format used to correctly parse it 67 | - `indicator` - the primary indicator column 68 | 69 | ### Optional fields 70 | 71 | - `indicator_conf_low` and `indicator_conf_high`: columns used for confidence intervals 72 | - `baseline`: columns used for a baseline value to compare with the primary indicator 73 | - `baseline_conf_low` and `baseline_conf_high`: columns used for confidence intervals for the baseline values 74 | - `anomaly`: column used to indicate if the value is anomalous, accepts any string 75 | 76 | ### Additional Metadata 77 | 78 | For any commentary which is both indicator and site specific, additional `.json` files can be added alongside the csv files. So for example, `be.csv` can have a `be.json` file which adds contextual information for what is happening with a given indicator in Beijing. This JSON object can have two properties: 79 | - `notes`: text field to be added below the chart 80 | - `highlight_bands`: used to highlight a time interval on the chart (eg. lockdown) 81 | 82 | An example metadata file looks like this: 83 | 84 | ```json 85 | { 86 | "notes": "description", 87 | "highlight_bands": [ 88 | { 89 | "label": "Detection", 90 | "interval": ["2020-02-01", "2020-03-30"] 91 | }, 92 | { 93 | "label": "Emergency state", 94 | "interval": ["2020-03-15", "2020-05-15"] 95 | } 96 | ] 97 | } 98 | ``` 99 | 100 | ## Delivery mechanism 101 | Data can be provided to a S3 bucket: 102 | 103 | - each indicator will have a separate folder on S3 104 | - each file contains time-series data for a single spotlight area 105 | - name of the file: [area ID].csv 106 | - with each data update, the full file for the area needs to be replaced 107 | 108 | ## Examples 109 | 110 | ### NO2 15 day average 111 | 112 | [`/no2-omi/metadata.json`](https://covid-eo-example.s3.amazonaws.com/no2-omi/metadata.json) 113 | [`/no2-omi/be.csv`](https://covid-eo-example.s3.amazonaws.com/no2-omi/be.csv) 114 | [`/no2-omi/du.csv`](https://covid-eo-example.s3.amazonaws.com/no2-omi/du.csv) 115 | [`/no2-omi/ls.csv`](https://covid-eo-example.s3.amazonaws.com/no2-omi/ls.csv) 116 | -------------------------------------------------------------------------------- /covid_api/models/static.py: -------------------------------------------------------------------------------- 1 | """Static models.""" 2 | 3 | from typing import Any, List, Optional, Union 4 | 5 | from geojson_pydantic.features import FeatureCollection 6 | from geojson_pydantic.geometries import Polygon 7 | from pydantic import BaseModel # , validator 8 | 9 | # from pydantic.color import Color 10 | 11 | 12 | def to_camel(snake_str: str) -> str: 13 | """ 14 | Converts snake_case_string to camelCaseString 15 | """ 16 | first, *others = snake_str.split("_") 17 | return "".join([first.lower(), *map(str.title, others)]) 18 | 19 | 20 | class Source(BaseModel): 21 | """Base Source Model""" 22 | 23 | type: str 24 | 25 | 26 | class NonGeoJsonSource(Source): 27 | """Source Model for all non-geojson data types""" 28 | 29 | tiles: List[str] 30 | 31 | 32 | class GeoJsonSource(Source): 33 | """Source Model for geojson data types""" 34 | 35 | data: str 36 | 37 | 38 | class Swatch(BaseModel): 39 | """Swatch Model.""" 40 | 41 | color: str 42 | name: str 43 | 44 | 45 | class LabelStop(BaseModel): 46 | """Model for Legend stops with color + label""" 47 | 48 | color: str 49 | label: str 50 | 51 | 52 | class Legend(BaseModel): 53 | """Legend Model.""" 54 | 55 | type: str 56 | min: Optional[str] 57 | max: Optional[str] 58 | stops: Union[List[str], List[LabelStop]] 59 | 60 | 61 | class DatasetComparison(BaseModel): 62 | """ Dataset `compare` Model.""" 63 | 64 | enabled: bool 65 | help: str 66 | year_diff: int 67 | map_label: str 68 | source: NonGeoJsonSource 69 | time_unit: Optional[str] 70 | 71 | 72 | def snake_case_to_kebab_case(s): 73 | """Util method to convert kebab-case fieldnames to snake_case.""" 74 | return s.replace("_", "-") 75 | 76 | 77 | class Paint(BaseModel): 78 | """Paint Model.""" 79 | 80 | raster_opacity: float 81 | 82 | class Config: 83 | """Paint Model Config""" 84 | 85 | alias_generator = snake_case_to_kebab_case 86 | allow_population_by_field_name = True 87 | 88 | 89 | class Dataset(BaseModel): 90 | """Dataset Model.""" 91 | 92 | id: str 93 | name: str 94 | type: str 95 | 96 | is_periodic: bool = False 97 | time_unit: str = "" 98 | domain: List[str] = [] 99 | source: Union[NonGeoJsonSource, GeoJsonSource] 100 | background_source: Optional[Union[NonGeoJsonSource, GeoJsonSource]] 101 | exclusive_with: List[str] = [] 102 | swatch: Swatch 103 | compare: Optional[DatasetComparison] 104 | legend: Optional[Legend] 105 | paint: Optional[Paint] 106 | info: str = "" 107 | 108 | 109 | class DatasetExternal(Dataset): 110 | """ Public facing dataset model (uses camelCase fieldnames) """ 111 | 112 | class Config: 113 | """Generates alias to convert all fieldnames from snake_case to camelCase""" 114 | 115 | alias_generator = to_camel 116 | allow_population_by_field_name = True 117 | 118 | 119 | class DatasetInternal(Dataset): 120 | """ Private dataset model (includes the dataset's location in s3) """ 121 | 122 | s3_location: Optional[str] 123 | 124 | 125 | class Datasets(BaseModel): 126 | """Dataset List Model.""" 127 | 128 | datasets: List[DatasetExternal] 129 | 130 | 131 | class Site(BaseModel): 132 | """Site Model.""" 133 | 134 | id: str 135 | label: str 136 | center: List[float] 137 | polygon: Optional[Polygon] = None 138 | bounding_box: Optional[List[float]] = None 139 | indicators: List[Any] = [] 140 | 141 | 142 | class Sites(BaseModel): 143 | """Site List Model.""" 144 | 145 | sites: List[Site] 146 | 147 | 148 | class IndicatorObservation(BaseModel): 149 | """Indicator Observation Model.""" 150 | 151 | indicator: float 152 | indicator_conf_low: Optional[float] = None 153 | indicator_conf_high: Optional[float] = None 154 | baseline: Optional[float] = None 155 | baseline_conf_low: Optional[float] = None 156 | baseline_conf_high: Optional[float] = None 157 | anomaly: Optional[str] = None 158 | 159 | 160 | class IndicatorGroup(BaseModel): 161 | """Indicator Group Model.""" 162 | 163 | id: str 164 | label: str 165 | prose: Optional[str] 166 | indicators: List[str] 167 | 168 | 169 | class IndicatorGroups(BaseModel): 170 | """Indicator Group List Model.""" 171 | 172 | groups: List[IndicatorGroup] 173 | 174 | 175 | class Detection(FeatureCollection): 176 | """Detection Model""" 177 | 178 | pass 179 | -------------------------------------------------------------------------------- /tests/routes/v1/test_datasets.py: -------------------------------------------------------------------------------- 1 | """Test /v1/datasets endpoints""" 2 | 3 | 4 | import json 5 | from unittest.mock import patch 6 | 7 | import boto3 8 | import botocore 9 | from moto import mock_s3 10 | 11 | from covid_api.core.config import INDICATOR_BUCKET 12 | 13 | DATASET_METADATA_FILENAME = "dev-dataset-metadata.json" 14 | DATASET_METADATA_GENERATOR_FUNCTION_NAME = "dev-dataset-metadata-generator" 15 | 16 | 17 | @mock_s3 18 | def _setup_s3(empty=False): 19 | s3 = boto3.resource("s3") 20 | bucket = s3.Bucket(INDICATOR_BUCKET) 21 | bucket.create() 22 | if empty: 23 | return bucket 24 | s3_keys = [ 25 | ("indicators/test/super.csv", b"test"), 26 | ( 27 | DATASET_METADATA_FILENAME, 28 | json.dumps( 29 | { 30 | "_all": { 31 | "co2": { 32 | "domain": ["2019-01-01T00:00:00Z", "2020-01-01T00:00:00Z"] 33 | }, 34 | "detections-plane": { 35 | "domain": [ 36 | "2019-01-01T00:00:00Z", 37 | "2019-10-10T00:00:00Z", 38 | "2020-01-01T:00:00:00Z", 39 | ] 40 | }, 41 | }, 42 | "global": { 43 | "co2": { 44 | "domain": ["2019-01-01T00:00:00Z", "2020-01-01T00:00:00Z"] 45 | } 46 | }, 47 | "tk": { 48 | "detections-plane": { 49 | "domain": [ 50 | "2019-01-01T00:00:00Z", 51 | "2019-10-10T00:00:00Z", 52 | "2020-01-01T:00:00:00Z", 53 | ] 54 | } 55 | }, 56 | "ny": { 57 | "detections-ship": { 58 | "domain": [ 59 | "2019-01-01T00:00:00Z", 60 | "2019-10-10T00:00:00Z", 61 | "2020-01-01T:00:00:00Z", 62 | ] 63 | } 64 | }, 65 | } 66 | ), 67 | ), 68 | ] 69 | for key, content in s3_keys: 70 | bucket.put_object(Body=content, Key=key) 71 | return bucket 72 | 73 | 74 | @mock_s3 75 | def test_metadata_file_generation_triggered_if_not_found( 76 | app, dataset_manager, monkeypatch 77 | ): 78 | 79 | _setup_s3(empty=True) 80 | 81 | with patch("covid_api.db.static.datasets.invoke_lambda") as mocked_invoke_lambda: 82 | 83 | mocked_invoke_lambda.return_value = {"result": "success"} 84 | # Load dataset will invoke the mocked-lambda and then attempt to load the file 85 | # from S3 once the lambda finished executing. Since the mocked lambda 86 | # doesn't actually write anything to S3 in this test, the call to load the file 87 | # from S3 will fail. This is not a problem since this test is just to ascertain 88 | # that the lambda was in fact triggered. 89 | try: 90 | dataset_manager()._load_domain_metadata() 91 | except botocore.exceptions.ClientError as e: 92 | if e.response["Error"]["Code"] == "404": 93 | pass 94 | 95 | mocked_invoke_lambda.assert_called_with( 96 | lambda_function_name=DATASET_METADATA_GENERATOR_FUNCTION_NAME 97 | ) 98 | 99 | 100 | @mock_s3 101 | def test_datasets(app): 102 | _setup_s3() 103 | response = app.get("v1/datasets") 104 | 105 | assert response.status_code == 200 106 | content = json.loads(response.content) 107 | 108 | assert "co2" in [d["id"] for d in content["datasets"]] 109 | assert "detections-plane" in [d["id"] for d in content["datasets"]] 110 | 111 | 112 | @mock_s3 113 | def test_spotlight_datasets(app): 114 | _setup_s3() 115 | response = app.get("v1/datasets/tk") 116 | 117 | assert response.status_code == 200 118 | 119 | content = json.loads(response.content) 120 | assert "co2" in [d["id"] for d in content["datasets"]] 121 | assert "detections-plane" in [d["id"] for d in content["datasets"]] 122 | assert "detections-ship" not in [d["id"] for d in content["datasets"]] 123 | 124 | 125 | @mock_s3 126 | def test_incorrect_dataset_id(app): 127 | _setup_s3() 128 | 129 | response = app.get("/v1/datasets/NOT_A_VALID_DATASET") 130 | assert response.status_code == 404 131 | -------------------------------------------------------------------------------- /lambda/dataset_metadata_generator/tests/test_metadata_generator.py: -------------------------------------------------------------------------------- 1 | """Test class for metadata generator lambda""" 2 | from datetime import datetime 3 | 4 | import boto3 5 | from moto import mock_s3 6 | 7 | 8 | @mock_s3 9 | def _setup_s3(): 10 | 11 | s3 = boto3.resource("s3") 12 | 13 | bucket = s3.Bucket("covid-eo-data") 14 | bucket.create() 15 | s3_keys = [ 16 | ("indicators/test/super.csv", b"test"), 17 | ("xco2-mean/GOSAT_XCO2_2019_01_01_be_BG_circle_cog.tif", b"test"), 18 | ("xco2-mean/GOSAT_XCO2_2019_04_01_be_BG_circle_cog.tif", b"test"), 19 | ("xco2-mean/GOSAT_XCO2_2019_06_01_be_BG_circle_cog.tif", b"test"), 20 | ("oc3_chla_anomaly/anomaly-chl-tk-2020_01_29.tif", b"test"), 21 | ("oc3_chla_anomaly/anomaly-chl-tk-2020_02_05.tif", b"test"), 22 | ("oc3_chla_anomaly/anomaly-chl-tk-2020_03_02.tif", b"test"), 23 | ("bm_500m_daily/VNP46A2_V011_be_2020_01_01_cog.tif", b"test"), 24 | ("bm_500m_daily/VNP46A2_V011_be_2020_02_29_cog.tif", b"test"), 25 | ("bm_500m_daily/VNP46A2_V011_be_2020_03_20_cog.tif", b"test"), 26 | ("bm_500m_daily/VNP46A2_V011_EUPorts_2020_01_01_cog.tif", b"test"), 27 | ("bm_500m_daily/VNP46A2_V011_EUPorts_2020_02_29_cog.tif", b"test"), 28 | ("bm_500m_daily/VNP46A2_V011_EUPorts_2020_03_20_cog.tif", b"test"), 29 | ("bmhd_30m_monthly/BMHD_VNP46A2_du_202005_cog.tif", b"test"), 30 | ("bmhd_30m_monthly/BMHD_VNP46A2_du_202006_cog.tif", b"test"), 31 | ("bmhd_30m_monthly/BMHD_VNP46A2_du_202007_cog.tif", b"test"), 32 | ("OMNO2d_HRM/OMI_trno2_0.10x0.10_200401_Col3_V4.nc.tif", b"test"), 33 | ("OMNO2d_HRM/OMI_trno2_0.10x0.10_200708_Col3_V4.nc.tif", b"test"), 34 | ("OMNO2d_HRM/OMI_trno2_0.10x0.10_200901_Col3_V4.nc.tif", b"test"), 35 | ("detections-plane/ny/2020_01_09.geojson", b"test"), 36 | ("detections-plane/ny/2020_01_21.geojson", b"test"), 37 | ("detections-plane/ny/2020_02_02.geoson", b"test"), 38 | ("detections-ship/ny/2020_01_09.geojson", b"test"), 39 | ("detections-ship/ny/2020_01_21.geojson", b"test"), 40 | ("detections-ship/ny/2020_02_02.geoson", b"test"), 41 | ("indicators/test/super.csv", b"test"), 42 | ] 43 | for key, content in s3_keys: 44 | bucket.put_object(Body=content, Key=key) 45 | return bucket 46 | 47 | 48 | @mock_s3 49 | def test_datasets(gather_datasets_metadata, datasets, sites): 50 | """Tests for basic (/) query""" 51 | 52 | _setup_s3() 53 | 54 | content = gather_datasets_metadata(datasets, sites) 55 | 56 | assert content is not None 57 | 58 | assert "global" in content.keys() 59 | assert "tk" in content.keys() 60 | 61 | 62 | @mock_s3 63 | def test_global_datasets(gather_datasets_metadata, datasets, sites): 64 | """Test for correct extraction of global datasets""" 65 | 66 | _setup_s3() 67 | 68 | content = gather_datasets_metadata(datasets, sites) 69 | 70 | assert content is not None 71 | 72 | assert "global" in content 73 | assert set(content["global"].keys()) == {"co2"} 74 | 75 | assert "_all" in content 76 | assert set(content["_all"].keys()) == { 77 | "co2", 78 | "detections-plane", 79 | "nightlights-hd", 80 | "nightlights-viirs", 81 | "water-chlorophyll", 82 | } 83 | 84 | 85 | @mock_s3 86 | def test_periodic_daily_global_datasets(gather_datasets_metadata, datasets, sites): 87 | """Test domain of periodic (domain only contains start and stop 88 | date) global datasets""" 89 | 90 | _setup_s3() 91 | 92 | content = gather_datasets_metadata(datasets, sites) 93 | 94 | assert content is not None 95 | 96 | dataset_info = content["global"]["co2"] 97 | 98 | assert dataset_info["domain"][0] == datetime.strftime( 99 | datetime(2019, 1, 1), "%Y-%m-%dT%H:%M:%SZ" 100 | ) 101 | assert dataset_info["domain"][1] == datetime.strftime( 102 | datetime(2019, 6, 1), "%Y-%m-%dT%H:%M:%SZ" 103 | ) 104 | 105 | 106 | @mock_s3 107 | def test_non_periodic_daily_spotlight_dataset( 108 | gather_datasets_metadata, datasets, sites 109 | ): 110 | """Test non periodic (domain has all available dates) spotlight 111 | sepecific datasets 112 | """ 113 | 114 | _setup_s3() 115 | 116 | content = gather_datasets_metadata(datasets, sites) 117 | 118 | assert content is not None 119 | assert "ny" in content 120 | 121 | dataset_info = content["ny"]["detections-plane"] 122 | 123 | assert len(dataset_info["domain"]) > 2 124 | 125 | 126 | @mock_s3 127 | def test_euports_datasets(gather_datasets_metadata, datasets, sites): 128 | """Test that an EUPorts datasets (du) searchs both for it's own spotlight id 129 | AND EUPorts""" 130 | 131 | _setup_s3() 132 | 133 | content = gather_datasets_metadata(datasets, sites) 134 | 135 | assert "du" in content 136 | assert set(content["du"].keys()) == { 137 | "nightlights-hd", 138 | "nightlights-viirs", 139 | } 140 | -------------------------------------------------------------------------------- /covid_api/api/api_v1/endpoints/metadata.py: -------------------------------------------------------------------------------- 1 | """API metadata.""" 2 | 3 | import os 4 | import re 5 | from functools import partial 6 | from typing import Any, Dict, Optional, Union 7 | from urllib.parse import urlencode 8 | 9 | import numpy 10 | from rio_tiler.io import cogeo 11 | 12 | from covid_api.api.utils import info as cogInfo 13 | from covid_api.core import config 14 | from covid_api.models.mapbox import TileJSON 15 | from covid_api.ressources.enums import ImageType 16 | 17 | from fastapi import APIRouter, Query 18 | 19 | from starlette.concurrency import run_in_threadpool 20 | from starlette.requests import Request 21 | from starlette.responses import Response 22 | 23 | _info = partial(run_in_threadpool, cogInfo) 24 | _bounds = partial(run_in_threadpool, cogeo.bounds) 25 | _metadata = partial(run_in_threadpool, cogeo.metadata) 26 | _spatial_info = partial(run_in_threadpool, cogeo.spatial_info) 27 | 28 | router = APIRouter() 29 | 30 | 31 | @router.get( 32 | "/tilejson.json", 33 | response_model=TileJSON, 34 | responses={200: {"description": "Return a tilejson"}}, 35 | response_model_include={ 36 | "tilejson", 37 | "scheme", 38 | "version", 39 | "minzoom", 40 | "maxzoom", 41 | "bounds", 42 | "center", 43 | "tiles", 44 | }, # https://github.com/tiangolo/fastapi/issues/528#issuecomment-589659378 45 | ) 46 | async def tilejson( 47 | request: Request, 48 | response: Response, 49 | url: str = Query(..., description="Cloud Optimized GeoTIFF URL."), 50 | tile_format: Optional[ImageType] = Query( 51 | None, description="Output image type. Default is auto." 52 | ), 53 | tile_scale: int = Query( 54 | 1, gt=0, lt=4, description="Tile size scale. 1=256x256, 2=512x512..." 55 | ), 56 | ): 57 | """Handle /tilejson.json requests.""" 58 | scheme = request.url.scheme 59 | host = request.headers["host"] 60 | if config.API_VERSION_STR: 61 | host += config.API_VERSION_STR 62 | 63 | kwargs = dict(request.query_params) 64 | kwargs.pop("tile_format", None) 65 | kwargs.pop("tile_scale", None) 66 | 67 | qs = urlencode(list(kwargs.items())) 68 | if tile_format: 69 | tile_url = ( 70 | f"{scheme}://{host}/{{z}}/{{x}}/{{y}}@{tile_scale}x.{tile_format}?{qs}" 71 | ) 72 | else: 73 | tile_url = f"{scheme}://{host}/{{z}}/{{x}}/{{y}}@{tile_scale}x?{qs}" 74 | 75 | meta = await _spatial_info(url) 76 | response.headers["Cache-Control"] = "max-age=3600" 77 | return dict( 78 | bounds=meta["bounds"], 79 | center=meta["center"], 80 | minzoom=meta["minzoom"], 81 | maxzoom=meta["maxzoom"], 82 | name=os.path.basename(url), 83 | tiles=[tile_url], 84 | ) 85 | 86 | 87 | @router.get( 88 | "/bounds", responses={200: {"description": "Return the bounds of the COG."}} 89 | ) 90 | async def bounds( 91 | response: Response, 92 | url: str = Query(..., description="Cloud Optimized GeoTIFF URL."), 93 | ): 94 | """Handle /bounds requests.""" 95 | response.headers["Cache-Control"] = "max-age=3600" 96 | return await _bounds(url) 97 | 98 | 99 | @router.get("/info", responses={200: {"description": "Return basic info on COG."}}) 100 | async def info( 101 | response: Response, 102 | url: str = Query(..., description="Cloud Optimized GeoTIFF URL."), 103 | ): 104 | """Handle /info requests.""" 105 | response.headers["Cache-Control"] = "max-age=3600" 106 | return await _info(url) 107 | 108 | 109 | @router.get( 110 | "/metadata", responses={200: {"description": "Return the metadata of the COG."}} 111 | ) 112 | async def metadata( 113 | request: Request, 114 | response: Response, 115 | url: str = Query(..., description="Cloud Optimized GeoTIFF URL."), 116 | bidx: Optional[str] = Query(None, description="Coma (',') delimited band indexes"), 117 | nodata: Optional[Union[str, int, float]] = Query( 118 | None, description="Overwrite internal Nodata value." 119 | ), 120 | pmin: float = 2.0, 121 | pmax: float = 98.0, 122 | max_size: int = 1024, 123 | histogram_bins: int = 20, 124 | histogram_range: Optional[str] = Query( 125 | None, description="Coma (',') delimited Min,Max bounds" 126 | ), 127 | ): 128 | """Handle /metadata requests.""" 129 | kwargs = dict(request.query_params) 130 | kwargs.pop("url", None) 131 | kwargs.pop("bidx", None) 132 | kwargs.pop("nodata", None) 133 | kwargs.pop("pmin", None) 134 | kwargs.pop("pmax", None) 135 | kwargs.pop("max_size", None) 136 | kwargs.pop("histogram_bins", None) 137 | kwargs.pop("histogram_range", None) 138 | 139 | indexes = tuple(int(s) for s in re.findall(r"\d+", bidx)) if bidx else None 140 | 141 | if nodata is not None: 142 | nodata = numpy.nan if nodata == "nan" else float(nodata) 143 | 144 | hist_options: Dict[str, Any] = dict() 145 | if histogram_bins: 146 | hist_options.update(dict(bins=histogram_bins)) 147 | if histogram_range: 148 | hist_options.update(dict(range=list(map(float, histogram_range.split(","))))) 149 | 150 | response.headers["Cache-Control"] = "max-age=3600" 151 | return await _metadata( 152 | url, 153 | pmin, 154 | pmax, 155 | nodata=nodata, 156 | indexes=indexes, 157 | hist_options=hist_options, 158 | **kwargs, 159 | ) 160 | -------------------------------------------------------------------------------- /covid_api/api/api_v1/endpoints/tiles.py: -------------------------------------------------------------------------------- 1 | """API tiles.""" 2 | 3 | import re 4 | from functools import partial 5 | from io import BytesIO 6 | from typing import Any, Dict, Optional, Union 7 | 8 | import numpy 9 | from rio_tiler.colormap import get_colormap 10 | from rio_tiler.io import cogeo 11 | from rio_tiler.profiles import img_profiles 12 | from rio_tiler.utils import geotiff_options, render 13 | 14 | from covid_api.api import utils 15 | from covid_api.db.memcache import CacheLayer 16 | from covid_api.ressources.common import drivers, mimetype 17 | from covid_api.ressources.enums import ImageType 18 | from covid_api.ressources.responses import TileResponse 19 | 20 | from fastapi import APIRouter, Depends, Path, Query 21 | 22 | from starlette.concurrency import run_in_threadpool 23 | 24 | _tile = partial(run_in_threadpool, cogeo.tile) 25 | _render = partial(run_in_threadpool, render) 26 | _postprocess = partial(run_in_threadpool, utils.postprocess) 27 | 28 | 29 | router = APIRouter() 30 | responses = { 31 | 200: { 32 | "content": { 33 | "image/png": {}, 34 | "image/jpg": {}, 35 | "image/webp": {}, 36 | "image/tiff": {}, 37 | "application/x-binary": {}, 38 | }, 39 | "description": "Return an image.", 40 | } 41 | } 42 | tile_routes_params: Dict[str, Any] = dict( 43 | responses=responses, tags=["tiles"], response_class=TileResponse 44 | ) 45 | 46 | 47 | @router.get(r"/{z}/{x}/{y}", **tile_routes_params) 48 | @router.get(r"/{z}/{x}/{y}\.{ext}", **tile_routes_params) 49 | @router.get(r"/{z}/{x}/{y}@{scale}x", **tile_routes_params) 50 | @router.get(r"/{z}/{x}/{y}@{scale}x\.{ext}", **tile_routes_params) 51 | async def tile( 52 | z: int = Path(..., ge=0, le=30, description="Mercator tiles's zoom level"), 53 | x: int = Path(..., description="Mercator tiles's column"), 54 | y: int = Path(..., description="Mercator tiles's row"), 55 | scale: int = Query( 56 | 1, gt=0, lt=4, description="Tile size scale. 1=256x256, 2=512x512..." 57 | ), 58 | ext: ImageType = Query(None, description="Output image type. Default is auto."), 59 | url: str = Query(..., description="Cloud Optimized GeoTIFF URL."), 60 | bidx: Optional[str] = Query(None, description="Coma (',') delimited band indexes"), 61 | nodata: Optional[Union[str, int, float]] = Query( 62 | None, description="Overwrite internal Nodata value." 63 | ), 64 | rescale: Optional[str] = Query( 65 | None, description="Coma (',') delimited Min,Max bounds" 66 | ), 67 | color_formula: Optional[str] = Query(None, title="rio-color formula"), 68 | color_map: Optional[utils.ColorMapName] = Query( 69 | None, title="rio-tiler color map name" 70 | ), 71 | cache_client: CacheLayer = Depends(utils.get_cache), 72 | ) -> TileResponse: 73 | """Handle /tiles requests.""" 74 | timings = [] 75 | headers: Dict[str, str] = {} 76 | 77 | tile_hash = utils.get_hash( 78 | **dict( 79 | z=z, 80 | x=x, 81 | y=y, 82 | ext=ext, 83 | scale=scale, 84 | url=url, 85 | bidx=bidx, 86 | nodata=nodata, 87 | rescale=rescale, 88 | color_formula=color_formula, 89 | color_map=color_map.value if color_map else "", 90 | ) 91 | ) 92 | tilesize = scale * 256 93 | 94 | content = None 95 | if cache_client: 96 | try: 97 | content, ext = cache_client.get_image_from_cache(tile_hash) 98 | headers["X-Cache"] = "HIT" 99 | except Exception: 100 | content = None 101 | 102 | if not content: 103 | indexes = tuple(int(s) for s in re.findall(r"\d+", bidx)) if bidx else None 104 | 105 | if nodata is not None: 106 | nodata = numpy.nan if nodata == "nan" else float(nodata) 107 | 108 | with utils.Timer() as t: 109 | tile, mask = await _tile( 110 | url, x, y, z, indexes=indexes, tilesize=tilesize, nodata=nodata 111 | ) 112 | timings.append(("Read", t.elapsed)) 113 | 114 | if not ext: 115 | ext = ImageType.jpg if mask.all() else ImageType.png 116 | 117 | with utils.Timer() as t: 118 | tile = await _postprocess( 119 | tile, mask, rescale=rescale, color_formula=color_formula 120 | ) 121 | timings.append(("Post-process", t.elapsed)) 122 | 123 | if color_map: 124 | if color_map.value.startswith("custom_"): 125 | color_map = utils.get_custom_cmap(color_map.value) # type: ignore 126 | else: 127 | color_map = get_colormap(color_map.value) # type: ignore 128 | 129 | with utils.Timer() as t: 130 | if ext == ImageType.npy: 131 | sio = BytesIO() 132 | numpy.save(sio, (tile, mask)) 133 | sio.seek(0) 134 | content = sio.getvalue() 135 | else: 136 | driver = drivers[ext.value] 137 | options = img_profiles.get(driver.lower(), {}) 138 | if ext == ImageType.tif: 139 | options = geotiff_options(x, y, z, tilesize=tilesize) 140 | 141 | content = await _render( 142 | tile, mask, img_format=driver, colormap=color_map, **options 143 | ) 144 | 145 | timings.append(("Format", t.elapsed)) 146 | 147 | if cache_client and content: 148 | cache_client.set_image_cache(tile_hash, (content, ext)) 149 | 150 | if timings: 151 | headers["X-Server-Timings"] = "; ".join( 152 | ["{} - {:0.2f}".format(name, time * 1000) for (name, time) in timings] 153 | ) 154 | 155 | return TileResponse(content, media_type=mimetype[ext.value], headers=headers) 156 | -------------------------------------------------------------------------------- /guidelines/api-usage.md: -------------------------------------------------------------------------------- 1 | # Using the API to explore and access datasets 2 | 3 | The production API is currently accessible at this URL: https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/ 4 | 5 | API documentation can be found under https://8ib71h0627.execute-api.us-east-1.amazonaws.com/docs 6 | 7 | Metadata and configuration information for all the datasets available in the dashboard can be found at the [`/datasets` endpoint](https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/datasets). 8 | 9 | There are two categories of datasets: `global`, datasets that span the entire globe, and `spotlight`, datasets that only exist for certain spotlight cities/areas. The spotlight areas available and their metadata can be found at the [`/sites` endpoint](https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/sites). 10 | 11 | Appending a site id from the `/sites` endpoint to the `/datasets` endpoint will return all the datasets that are available for that spotlight (ie: [`/datasets/be`](https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/) will return all datasets available for the Beijing spotlight). The [`/datasets/global` endpoint](https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/datasets/global) will return all of the global spotlights. Note that all global spotlights are also included in the datasets specific to a spotlight, since the data does exist for that spotlight. 12 | 13 | Here is the metadata for one of the datasets (Nightlights-VIIRS) availabe in the Beijing spotlight (dataset metadata for all Beijing datasets can be found at the `/datasets/be` enpoint) 14 | ```plain 15 | 16 | { 17 | "id": "nightlights-viirs", 18 | "name": "Nightlights VIIRS", 19 | "type": "raster-timeseries", 20 | "isPeriodic": true, 21 | "timeUnit": "day", 22 | "domain": [ 23 | "2020-01-01T00:00:00Z", 24 | "2020-12-01T00:00:00Z" 25 | ], 26 | "source": { 27 | "type": "raster", 28 | "tiles": [ 29 | "https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/{z}/{x}/{y}@1x?url=s3://covid-eo-data/bm_500m_daily/VNP46A2_V011_be_{date}_cog.tif&resampling_method=nearest&bidx=1&rescale=0%2C100&color_map=viridis" 30 | ] 31 | }, 32 | "backgroundSource": null, 33 | "exclusiveWith": [ 34 | "agriculture", 35 | "no2", 36 | "co2-diff", 37 | "co2", 38 | "fb-population-density", 39 | "car-count", 40 | "nightlights-hd", 41 | "detection-multi", 42 | "water-chlorophyll", 43 | "water-spm", 44 | "detections-ship", 45 | "detections-plane", 46 | "detections-vehicles" 47 | ], 48 | "swatch": { 49 | "color": "#C0C0C0", 50 | "name": "Grey" 51 | }, 52 | "compare": null, 53 | "legend": { 54 | "type": "gradient", 55 | "min": "less", 56 | "max": "more", 57 | "stops": [ 58 | "#440357", 59 | "#3b508a", 60 | "#208f8c", 61 | "#5fc961", 62 | "#fde725" 63 | ] 64 | }, 65 | "paint": null, 66 | "info": "Darker colors indicate fewer night lights and less activity. Lighter colors indicate more night lights and more activity. Check out the HD dataset to see a light-corrected version of this dataset." 67 | } 68 | ``` 69 | The dataset source tiles are under the key `source.tiles`. Items surrounded by curly braces `{` and `}` should be replaced with apropriate values. 70 | 71 | The `{x}` and `{y}` values, in combination with the zoom level, `{z}`, identify the [Slippy Map Tilename](https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames) of the tile to fetch. (eg: a tile containing Los Angeles has `/{z}/{x}/{y}` values: `9/87/204`). 72 | 73 | `{date}` should be of the format `YYYYMM`, if the value of `timeUnit` is `month` , and `YYYY_MM_DD`, if the value of `timeUnit` is `day`. 74 | 75 | Dates available for the dataset are given by the `domain` key. If the `isPeriodic` value is `True`, then `domain` will only contain 2 dates, the start and end date. Any date within that range will be valid (remember that dates can either be daily (`YYYY_MM_DD`) or monthly ( `YYYYMM`)). For example, a periodic, monthly dataset can be requested with `202001` as the `{date}` field, to get data for January 2020 and `202002` for February 2020). 76 | 77 | The URL for requesting data from the `nightlights-viirs` data for Beijing, for xyz coordinates `z=9, x=421, y=193` on January 1st, 2020 would look something like: 78 | 79 | [https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/9/421/193@1x?url=s3://covid-eo-data/bm_500m_daily/VNP46A2_V011_be_2020_01_01_cog.tif&resampling_method=nearest&bidx=1&rescale=0%2C100&color_map=viridis](https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/9/421/193@1x?url=s3://covid-eo-data/bm_500m_daily/VNP46A2_V011_be_2020_01_01_cog.tif&resampling_method=nearest&bidx=1&rescale=0%2C100&color_map=viridis) 80 | 81 | Which will display in the browser. 82 | 83 | You can also download the above tile with a curl command: 84 | 85 | ```bash 86 | curl 'https://8ib71h0627.execute-api.us-east-1.amazonaws.com/v1/9/421/193@1x?url=s3://covid-eo-data/bm_500m_daily/VNP46A2_V011_be_2020_01_01_cog.tif&resampling_method=nearest&bidx=1&rescale=0%2C100&color_map=viridis' -o tile.tif 87 | ``` 88 | 89 | The source COGs, from which the API generates the tiles are publicly available at the location specified by the `url` parameter in the link above. For example, the source COG for the above tile can be downloaded using the [aws-cli](https://aws.amazon.com/cli/) with the following command: 90 | 91 | ```bash 92 | aws s3 cp s3://covid-eo-data/bm_500m_daily/VNP46A2_V011_be_2020_01_01_cog.tif ./cog.tif --no-sign-request 93 | ``` 94 | 95 | Some of the COGs can be quite large. To verify the size before downloading use this command: 96 | 97 | ```bash 98 | aws s3 ls s3://covid-eo-data/bm_500m_daily/VNP46A2_V011_be_2020_01_01_cog.tif --summarize --human-readable --no-sign-request 99 | 100 | # Output: 101 | 2021-03-24 08:50:12 5.9 MiB VNP46A2_V011_be_2020_01_01_cog.tif 102 | 103 | Total Objects: 1 104 | Total Size: 5.9 MiB 105 | ``` 106 | -------------------------------------------------------------------------------- /covid_api/api/api_v1/endpoints/timelapse.py: -------------------------------------------------------------------------------- 1 | """API metadata.""" 2 | import re 3 | from concurrent import futures 4 | from datetime import datetime, timedelta 5 | from typing import List, Union 6 | 7 | from dateutil.relativedelta import relativedelta 8 | 9 | from covid_api.api.utils import get_zonal_stat 10 | from covid_api.core.config import API_VERSION_STR 11 | from covid_api.db.static.datasets import datasets as _datasets 12 | from covid_api.db.static.errors import InvalidIdentifier 13 | from covid_api.db.static.sites import sites 14 | from covid_api.models.static import Dataset 15 | from covid_api.models.timelapse import TimelapseRequest, TimelapseValue 16 | 17 | from fastapi import APIRouter, HTTPException 18 | 19 | from starlette.requests import Request 20 | 21 | router = APIRouter() 22 | 23 | 24 | # TODO: validate inputs with typing/pydantic models 25 | def _get_mean_median(query, url, dataset): 26 | 27 | # format S3 URL template with spotlightId, if dataset is 28 | # spotlight specific 29 | if "{spotlightId}" in url: 30 | if not query.spotlight_id: 31 | raise HTTPException( 32 | status_code=400, 33 | detail=f"Must provide a `spotlight_id` for dataset: {dataset.id}", 34 | ) 35 | url = _insert_spotlight_id(url, query.spotlight_id) 36 | try: 37 | mean, median = get_zonal_stat(query.geojson, url) 38 | return dict(mean=mean, median=median) 39 | 40 | except Exception: 41 | raise HTTPException( 42 | status_code=400, 43 | detail=( 44 | "Unable to calculate mean/median values. This either due to a bounding box " 45 | "extending beyond the edges of the COG or there are no COGs available for the " 46 | "requested date range." 47 | ), 48 | ) 49 | 50 | 51 | @router.post( 52 | "/timelapse", 53 | responses={200: {"description": "Return timelapse values for a given geometry"}}, 54 | response_model=Union[List[TimelapseValue], TimelapseValue], 55 | response_model_exclude_none=True, 56 | ) 57 | def timelapse(request: Request, query: TimelapseRequest): 58 | """Handle /timelapse requests.""" 59 | 60 | # get dataset metadata for the requested dataset 61 | # will be used to validate other parts of the query 62 | dataset = _get_dataset_metadata(request, query) 63 | 64 | # extract S3 URL template from dataset metadata info 65 | url = _extract_s3_url(dataset) 66 | 67 | if query.date: 68 | 69 | # format S3 URL template with date object 70 | url = _insert_date(url, dataset, query.date) 71 | return _get_mean_median(query, url, dataset) 72 | 73 | if query.date_range: 74 | 75 | if dataset.time_unit == "day": 76 | # Get start and end dates 77 | start = _validate_query_date(dataset, query.date_range[0]) 78 | end = _validate_query_date(dataset, query.date_range[1]) 79 | 80 | # Populate all days in between Add 1 to days to ensure it contains the end date as well 81 | dates = [ 82 | datetime.strftime((start + timedelta(days=x)), "%Y_%m_%d") 83 | for x in range(0, (end - start).days + 1) 84 | ] 85 | 86 | if dataset.time_unit == "month": 87 | start = datetime.strptime(query.date_range[0], "%Y%m") 88 | end = datetime.strptime(query.date_range[1], "%Y%m") 89 | 90 | num_months = (end.year - start.year) * 12 + (end.month - start.month) 91 | 92 | dates = [ 93 | datetime.strftime((start + relativedelta(months=+x)), "%Y%m") 94 | for x in range(0, num_months + 1) 95 | ] 96 | 97 | with futures.ThreadPoolExecutor(max_workers=10) as executor: 98 | future_stats_queries = { 99 | executor.submit( 100 | _get_mean_median, query, _insert_date(url, dataset, date), dataset 101 | ): date 102 | for date in dates 103 | } 104 | 105 | stats = [] 106 | 107 | for future in futures.as_completed(future_stats_queries): 108 | date = future_stats_queries[future] 109 | try: 110 | stats.append({"date": date, **future.result()}) 111 | except HTTPException as e: 112 | stats.append({"date": date, "error": e.detail}) 113 | 114 | return sorted(stats, key=lambda s: s["date"]) 115 | 116 | 117 | def _get_dataset_metadata(request: Request, query: TimelapseRequest): 118 | 119 | scheme = request.url.scheme 120 | host = request.headers["host"] 121 | 122 | if API_VERSION_STR: 123 | host += API_VERSION_STR 124 | 125 | dataset = list( 126 | filter( 127 | lambda d: d.id == query.dataset_id, 128 | _datasets.get_all(api_url=f"{scheme}://{host}").datasets, 129 | ) 130 | ) 131 | 132 | if not dataset: 133 | raise HTTPException( 134 | status_code=404, detail=f"No dataset found for id: {query.dataset_id}" 135 | ) 136 | 137 | dataset = dataset[0] 138 | 139 | if dataset.source.type != "raster": 140 | raise HTTPException( 141 | status_code=400, 142 | detail=f"Dataset {query.dataset_id} is not a raster-type dataset", 143 | ) 144 | 145 | return dataset 146 | 147 | 148 | def _extract_s3_url(dataset: Dataset): 149 | url_search = re.search(r"url=([^&\s]*)", dataset.source.tiles[0]) 150 | if not url_search: 151 | raise HTTPException(status_code=500) 152 | 153 | return url_search.group(1) 154 | 155 | 156 | def _insert_date(url: str, dataset: Dataset, date: str): 157 | _validate_query_date(dataset, date) 158 | return url.replace("{date}", date) 159 | 160 | 161 | def _validate_query_date(dataset: Dataset, date: str): 162 | date_format = "%Y_%m_%d" if dataset.time_unit == "day" else "%Y%m" 163 | try: 164 | return datetime.strptime(date, date_format) 165 | except ValueError: 166 | raise HTTPException( 167 | status_code=400, 168 | detail=( 169 | f"Invalid date format. {date} should be like " 170 | f"{'YYYYMM' if dataset.time_unit == 'month' else 'YYYY_MM_DD'}" 171 | ), 172 | ) 173 | 174 | 175 | def _insert_spotlight_id(url: str, spotlight_id: str): 176 | if not spotlight_id: 177 | raise HTTPException(status_code=400, detail="Missing spotlightId") 178 | try: 179 | sites.get(spotlight_id) 180 | except InvalidIdentifier: 181 | raise HTTPException( 182 | status_code=404, detail=f"No spotlight found for id: {spotlight_id}" 183 | ) 184 | 185 | return url.replace("{spotlightId}", spotlight_id) 186 | -------------------------------------------------------------------------------- /covid_api/db/static/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """ covid_api static datasets """ 2 | import json 3 | import os 4 | from typing import List 5 | 6 | import botocore 7 | 8 | from covid_api.core.config import ( 9 | DATASET_METADATA_FILENAME, 10 | DATASET_METADATA_GENERATOR_FUNCTION_NAME, 11 | INDICATOR_BUCKET, 12 | ) 13 | from covid_api.db.static.errors import InvalidIdentifier 14 | from covid_api.db.static.sites import sites 15 | from covid_api.db.utils import invoke_lambda, s3_get 16 | from covid_api.models.static import DatasetInternal, Datasets, GeoJsonSource 17 | 18 | data_dir = os.path.join(os.path.dirname(__file__)) 19 | 20 | 21 | class DatasetManager(object): 22 | """Default Dataset holder.""" 23 | 24 | def __init__(self): 25 | """Load all datasets in a dict.""" 26 | 27 | pass 28 | 29 | def _data(self): 30 | datasets = [ 31 | os.path.splitext(f)[0] for f in os.listdir(data_dir) if f.endswith(".json") 32 | ] 33 | return { 34 | dataset: DatasetInternal.parse_file( 35 | os.path.join(data_dir, f"{dataset}.json") 36 | ) 37 | for dataset in datasets 38 | } 39 | 40 | def _load_domain_metadata(self): 41 | try: 42 | return json.loads( 43 | s3_get(bucket=INDICATOR_BUCKET, key=DATASET_METADATA_FILENAME) 44 | ) 45 | except botocore.errorfactory.ClientError as e: 46 | 47 | if e.response["Error"]["Code"] == "NoSuchKey": 48 | print( 49 | "No datasets domain metadata file found, requesting generation" 50 | " of a new file. This may take several minutes." 51 | ) 52 | # invoke_lambda should return the output of the lambda's execution 53 | # however there are issues with accessing the output object within the 54 | # "Payload" returned by the lambda_invocation (see docstring). 55 | # Instead the thread is held while the lambda executes and then 56 | # loads the metadata from s3. 57 | 58 | invoke_lambda( 59 | lambda_function_name=DATASET_METADATA_GENERATOR_FUNCTION_NAME 60 | ) 61 | return json.loads( 62 | s3_get(bucket=INDICATOR_BUCKET, key=DATASET_METADATA_FILENAME) 63 | ) 64 | 65 | def get(self, spotlight_id: str, api_url: str) -> Datasets: 66 | """ 67 | Fetches all the datasets avilable for a given spotlight. If the 68 | spotlight_id provided is "global" then this method will return 69 | all datasets that are NOT spotlight specific. Raises an 70 | `InvalidIdentifier` exception if the provided spotlight_id does 71 | not exist. 72 | 73 | Params: 74 | ------- 75 | spotlight_id (str): spotlight id to return datasets for 76 | api_url(str): {scheme}://{host} of request originator in order 77 | to return correctly formated source urls 78 | 79 | Returns: 80 | ------- 81 | (Datasets) pydantic model contains a list of datasets' metadata 82 | """ 83 | 84 | global_datasets = self._process( 85 | self._load_domain_metadata()["global"], 86 | api_url=api_url, 87 | spotlight_id="global", 88 | ) 89 | 90 | if spotlight_id == "global": 91 | return Datasets(datasets=[dataset.dict() for dataset in global_datasets]) 92 | 93 | # Verify that the requested spotlight exists 94 | try: 95 | site = sites.get(spotlight_id) 96 | except InvalidIdentifier: 97 | raise 98 | 99 | spotlight_datasets = self._process( 100 | self._load_domain_metadata()[site.id], 101 | api_url=api_url, 102 | spotlight_id=site.id, 103 | ) 104 | 105 | return Datasets( 106 | datasets=[ 107 | dataset.dict() for dataset in [*global_datasets, *spotlight_datasets] 108 | ] 109 | ) 110 | 111 | def get_all(self, api_url: str) -> Datasets: 112 | """Fetch all Datasets. Overload domain with S3 scanned domain""" 113 | datasets = self._process( 114 | datasets_domains_metadata=self._load_domain_metadata()["_all"], 115 | api_url=api_url, 116 | ) 117 | return Datasets(datasets=[dataset.dict() for dataset in datasets]) 118 | 119 | def list(self) -> List[str]: 120 | """List all datasets""" 121 | return list(self._data().keys()) 122 | 123 | def _format_urls(self, tiles: List[str], api_url: str, spotlight_id: str = None): 124 | if spotlight_id: 125 | return [ 126 | tile.replace("{api_url}", api_url).replace( 127 | "{spotlightId}", spotlight_id 128 | ) 129 | for tile in tiles 130 | ] 131 | return [tile.replace("{api_url}", api_url) for tile in tiles] 132 | 133 | def _process( 134 | self, datasets_domains_metadata: dict, api_url: str, spotlight_id: str = None 135 | ): 136 | """ 137 | Processes datasets to be returned to the API consumer: 138 | - Updates dataset domains for all returned datasets 139 | - Inserts api url into source urls 140 | - Inserts spotlight id into source url (if a spotlight id is provided) 141 | 142 | Params: 143 | ------- 144 | output_datasets (dict): Dataset domains for the datasets to be returned. 145 | api_url (str): 146 | Base url, of the form {schema}://{host}, extracted from the request, to 147 | prepend all tile source urls with. 148 | spotlight_id (Optional[str]): 149 | Spotlight ID (if requested), to be inserted into the source urls 150 | 151 | Returns: 152 | -------- 153 | (list) : datasets metadata objects (to be serialized as a pydantic Datasets 154 | model) 155 | """ 156 | 157 | output_datasets = { 158 | k: v 159 | for k, v in self._data().items() 160 | if k in datasets_domains_metadata.keys() 161 | } 162 | 163 | for k, dataset in output_datasets.items(): 164 | 165 | # overload domain with domain returned from s3 file 166 | dataset.domain = datasets_domains_metadata.get(k, {}).get("domain", []) 167 | 168 | # format url to contain the correct API host and 169 | # spotlight id (if a spotlight was requested) 170 | format_url_params = dict(api_url=api_url) 171 | if spotlight_id: 172 | if k == "nightlights-viirs" and spotlight_id in ["du", "gh"]: 173 | spotlight_id = "EUPorts" 174 | format_url_params.update(dict(spotlight_id=spotlight_id)) 175 | 176 | dataset.source.tiles = self._format_urls( 177 | tiles=dataset.source.tiles, **format_url_params 178 | ) 179 | if dataset.background_source: 180 | dataset.background_source.tiles = self._format_urls( 181 | tiles=dataset.background_source.tiles, **format_url_params 182 | ) 183 | if dataset.compare: 184 | dataset.compare.source.tiles = self._format_urls( 185 | tiles=dataset.compare.source.tiles, **format_url_params 186 | ) 187 | # source URLs of background tiles for `detections-*` datasets are 188 | # handled differently in the front end so the the `source` objects 189 | # get updated here 190 | if k.startswith("detections-"): 191 | dataset.source = GeoJsonSource( 192 | type=dataset.source.type, data=dataset.source.tiles[0] 193 | ).dict() 194 | 195 | if spotlight_id == "tk" and k == "water-chlorophyll": 196 | dataset.source.tiles = [ 197 | tile.replace("&rescale=-100%2C100", "") 198 | for tile in dataset.source.tiles 199 | ] 200 | 201 | return output_datasets.values() 202 | 203 | 204 | datasets = DatasetManager() 205 | -------------------------------------------------------------------------------- /covid_api/db/utils.py: -------------------------------------------------------------------------------- 1 | """Db tools.""" 2 | 3 | import csv 4 | import json 5 | import os 6 | from datetime import datetime 7 | from typing import Dict, List 8 | 9 | import boto3 10 | from botocore import config 11 | 12 | from covid_api.core.config import DT_FORMAT, INDICATOR_BUCKET 13 | from covid_api.models.static import IndicatorObservation 14 | 15 | s3_params = dict(service_name="s3") 16 | lambda_params = dict( 17 | service_name="lambda", 18 | region_name="us-east-1", 19 | config=config.Config( 20 | read_timeout=900, connect_timeout=900, retries={"max_attempts": 0} 21 | ), 22 | ) 23 | 24 | if os.environ.get("AWS_ENDPOINT_URL"): 25 | print("Loading from local") 26 | s3_params["endpoint_url"] = os.environ["AWS_ENDPOINT_URL"] 27 | lambda_params["endpoint_url"] = os.environ["AWS_ENDPOINT_URL"] 28 | 29 | s3 = boto3.client(**s3_params) 30 | 31 | _lambda = boto3.client(**lambda_params) 32 | 33 | 34 | def invoke_lambda( 35 | lambda_function_name: str, payload: dict = None, invocation_type="RequestResponse" 36 | ): 37 | """Invokes a lambda function using the boto3 lambda client. 38 | 39 | Params: 40 | ------- 41 | lambda_function_name (str): name of the lambda to invoke 42 | payload (Optional[dict]): data into invoke the lambda function with (will be accessible 43 | in the lambda handler function under the `event` param) 44 | invocation_type (Optional[str] = ["RequestResponse", "Event", "DryRun"]): 45 | RequestReponse will run the lambda synchronously (holding up the thread 46 | until the lambda responds 47 | Event will run asynchronously 48 | DryRun will only verify that the user/role has the correct permissions to invoke 49 | the lambda function 50 | 51 | Returns: 52 | -------- 53 | (dict) Lambda invocation response, see: 54 | https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/lambda.html#Lambda.Client.invoke 55 | 56 | - NOTE: 57 | The current configuration specifies a RequestResponse invocation, which does 58 | indeed run synchronously, but returns a status succeeded of 202 (Accepted) when 59 | it should return a 200 status. 202 status is expected from the `Event` invocation 60 | type (indicated lamdba was initiated but we don't know it's status) 61 | 62 | - NOTE: 63 | The current configuration should directly return the lambda output under 64 | response["Payload"]: StreamingBody, however the byte string currently being returned 65 | contains lambda invocation/runtime details from the logs. (eg: 66 | 67 | ``` 68 | START RequestId: 7c61eb52-735d-1ce4-0df2-a975197924eb Version: 1 69 | END RequestId: 7c61eb52-735d-1ce4-0df2-a975197924eb 70 | REPORT RequestId: 7c61eb52-735d-1ce4-0df2-a975197924eb Init Duration: 232.54 ms Duration: 3.02 ms Billed Duration: 100 ms Memory Size: 128 MB Max Memory Used: 33 MB 71 | 72 | {"result":"success","input":"test"} 73 | 74 | ``` 75 | when we only expect the JSON object: {"result":"success", "input":"test"} to be returned 76 | ) 77 | 78 | To load just the lambda output use: 79 | 80 | ``` 81 | response = r["Payload"].read().decode("utf-8") 82 | lambda_output = json.loads( 83 | response[response.index("{") : (response.index("}") + 1)] 84 | ) 85 | ``` 86 | where r is the output of this function. 87 | """ 88 | lambda_invoke_params = dict( 89 | FunctionName=lambda_function_name, InvocationType=invocation_type 90 | ) 91 | if payload: 92 | lambda_invoke_params.update(dict(Payload=json.dumps(payload))) 93 | return _lambda.invoke(**lambda_invoke_params) 94 | 95 | 96 | def s3_get(bucket: str, key: str): 97 | """Get AWS S3 Object.""" 98 | response = s3.get_object(Bucket=bucket, Key=key) 99 | return response["Body"].read() 100 | 101 | 102 | def get_indicator_site_metadata(identifier: str, folder: str) -> Dict: 103 | """Get Indicator metadata for a specific site.""" 104 | try: 105 | key = f"indicators/{folder}/{identifier}.json" 106 | return json.loads(s3_get(INDICATOR_BUCKET, key)) 107 | except Exception: 108 | return {} 109 | 110 | 111 | def indicator_folders() -> List: 112 | """Get Indicator folders.""" 113 | response = s3.list_objects_v2( 114 | Bucket=INDICATOR_BUCKET, Prefix="indicators/", Delimiter="/", 115 | ) 116 | return [obj["Prefix"].split("/")[1] for obj in response.get("CommonPrefixes", [])] 117 | 118 | 119 | def indicator_exists(identifier: str, indicator: str): 120 | """Check if an indicator exists for a site""" 121 | try: 122 | s3.head_object( 123 | Bucket=INDICATOR_BUCKET, Key=f"indicators/{indicator}/{identifier}.csv", 124 | ) 125 | return True 126 | except Exception: 127 | try: 128 | s3.head_object( 129 | Bucket=INDICATOR_BUCKET, 130 | Key=f"indicators/{indicator}/{identifier}.json", 131 | ) 132 | return True 133 | except Exception: 134 | return False 135 | 136 | 137 | def get_indicators(identifier) -> List: 138 | """Return indicators info.""" 139 | indicators = [] 140 | for folder in indicator_folders(): 141 | if indicator_exists(identifier, folder): 142 | indicator = dict(id=folder) 143 | try: 144 | data = [] 145 | # metadata for reading the data and converting to a consistent format 146 | metadata_json = s3_get( 147 | INDICATOR_BUCKET, f"indicators/{folder}/metadata.json" 148 | ) 149 | metadata_dict = json.loads(metadata_json.decode("utf-8")) 150 | 151 | # read the actual indicator data 152 | indicator_csv = s3_get( 153 | INDICATOR_BUCKET, f"indicators/{folder}/{identifier}.csv" 154 | ) 155 | indicator_lines = indicator_csv.decode("utf-8").split("\n") 156 | reader = csv.DictReader(indicator_lines,) 157 | 158 | # top level metadata is added directly to the response 159 | top_level_fields = { 160 | k: v for k, v in metadata_dict.items() if isinstance(v, str) 161 | } 162 | 163 | # for each row (observation), format the data correctly 164 | for row in reader: 165 | date = datetime.strptime( 166 | row[metadata_dict["date"]["column"]], 167 | metadata_dict["date"]["format"], 168 | ).strftime(DT_FORMAT) 169 | 170 | other_fields = { 171 | k: row.get(v["column"], None) 172 | for k, v in metadata_dict.items() 173 | if isinstance(v, dict) and v.get("column") and k != "date" 174 | } 175 | 176 | # validate and parse the row 177 | i = IndicatorObservation(**other_fields) 178 | 179 | data.append(dict(date=date, **i.dict(exclude_none=True))) 180 | 181 | # add to the indicator dictionary 182 | indicator["domain"] = dict( 183 | date=[ 184 | min( 185 | data, key=lambda x: datetime.strptime(x["date"], DT_FORMAT), 186 | )["date"], 187 | max( 188 | data, key=lambda x: datetime.strptime(x["date"], DT_FORMAT), 189 | )["date"], 190 | ], 191 | indicator=[ 192 | min(data, key=lambda x: x["indicator"])["indicator"], 193 | max(data, key=lambda x: x["indicator"])["indicator"], 194 | ], 195 | ) 196 | indicator["data"] = data 197 | indicator.update(top_level_fields) 198 | 199 | except Exception as e: 200 | print(e) 201 | pass 202 | 203 | try: 204 | site_metadata = get_indicator_site_metadata(identifier, folder) 205 | # this will, intentionally, overwrite the name from the data if present 206 | if "name" in site_metadata: 207 | indicator["name"] = site_metadata.get("name") 208 | indicator["notes"] = site_metadata.get("notes", None) 209 | indicator["highlight_bands"] = site_metadata.get( 210 | "highlight_bands", None 211 | ) 212 | except Exception as e: 213 | print(e) 214 | pass 215 | 216 | indicators.append(indicator) 217 | 218 | return indicators 219 | -------------------------------------------------------------------------------- /lambda/dataset_metadata_generator/src/main.py: -------------------------------------------------------------------------------- 1 | """ Dataset metadata generator lambda. """ 2 | import datetime 3 | import json 4 | import os 5 | import re 6 | from typing import Any, Dict, List, Optional, Union 7 | 8 | import boto3 9 | 10 | BASE_PATH = os.path.dirname(os.path.abspath(__file__)) 11 | DATASETS_JSON_FILEPATH = os.path.join(BASE_PATH, "datasets") 12 | SITES_JSON_FILEPATH = os.path.join(BASE_PATH, "sites") 13 | 14 | 15 | BUCKET_NAME = os.environ["DATA_BUCKET_NAME"] 16 | DATASET_METADATA_FILENAME = os.environ["DATASET_METADATA_FILENAME"] 17 | 18 | # Use this bucket to read dataset info from prod S3 bucket 19 | bucket = boto3.resource("s3").Bucket(BUCKET_NAME) 20 | # If running in AWS, save metadata file to same bucket 21 | metadata_host_bucket = bucket 22 | 23 | # If running locally, save metadata file to local S3 bucket 24 | if os.environ.get("AWS_ENDPOINT_URL"): 25 | metadata_host_bucket = boto3.resource( 26 | "s3", endpoint_url=os.environ["AWS_ENDPOINT_URL"] 27 | ).Bucket(BUCKET_NAME) 28 | 29 | 30 | DT_FORMAT = "%Y-%m-%d" 31 | MT_FORMAT = "%Y%m" 32 | 33 | 34 | def handler(event, context): 35 | """ 36 | Params: 37 | ------- 38 | event (dict): 39 | content (dict): 40 | 41 | Both params are standard lambda handler invocation params but not used within this 42 | lambda's code. 43 | 44 | Returns: 45 | ------- 46 | (string): JSON-encoded dict with top level keys for each of the possible 47 | queries that can be run against the `/datasets` endpoint (key: _all_ contains 48 | result of the LIST operation, each of other keys contain the result of 49 | GET /datasets/{spotlight_id | "global"}) 50 | """ 51 | 52 | # TODO: defined TypedDicts for these! 53 | datasets = _gather_json_data(DATASETS_JSON_FILEPATH) 54 | sites = _gather_json_data(SITES_JSON_FILEPATH) 55 | 56 | result = json.dumps(_gather_datasets_metadata(datasets, sites)) 57 | 58 | print( 59 | f"Saving generated metadata to {DATASET_METADATA_FILENAME} in bucket {metadata_host_bucket.name}" 60 | ) 61 | metadata_host_bucket.put_object( 62 | Body=result, Key=DATASET_METADATA_FILENAME, ContentType="application/json", 63 | ) 64 | return result 65 | 66 | 67 | def _gather_datasets_metadata(datasets: List[dict], sites: List[dict]): 68 | """Reads through the s3 bucket to generate a file that contains 69 | the datasets for each given spotlight option (_all, global, tk, ny, sf, 70 | la, be, du, gh) and their respective domain for each spotlight 71 | 72 | Params: 73 | ------- 74 | datasets (List[dict]): list of dataset metadata objects (contains fields 75 | like: s3_location, time_unit, swatch, exclusive_with, etc), to use 76 | to generate the result of each of the possible `/datasets` endpoint 77 | queries. 78 | sites (List[dict]): list of site metadata objects 79 | 80 | Returns: 81 | -------- 82 | (dict): python object with result of each possible query against the `/datasets` 83 | endpoint with each dataset's associated domain. 84 | """ 85 | 86 | metadata: Dict[str, dict] = {} 87 | 88 | for dataset in datasets: 89 | print(f"Processing dataset: {dataset['name']}") 90 | if not dataset.get("s3_location"): 91 | domain = [] 92 | else: 93 | domain_args = { 94 | "dataset_folder": dataset["s3_location"], 95 | "is_periodic": dataset.get("is_periodic"), 96 | "time_unit": dataset.get("time_unit"), 97 | } 98 | 99 | domain = _get_dataset_domain(**domain_args) 100 | 101 | metadata.setdefault("_all", {}).update({dataset["id"]: {"domain": domain}}) 102 | 103 | if _is_global_dataset(dataset): 104 | 105 | metadata.setdefault("global", {}).update( 106 | {dataset["id"]: {"domain": domain}} 107 | ) 108 | continue 109 | 110 | for site in sites: 111 | 112 | domain_args["spotlight_id"] = site["id"] 113 | 114 | if site["id"] in ["du", "gh"]: 115 | domain_args["spotlight_id"] = ["du", "gh", "EUPorts"] 116 | 117 | # skip adding dataset to metadata object if no dates were found for the given 118 | # spotlight (indicates dataset is not valid for that spotlight) 119 | try: 120 | domain = _get_dataset_domain(**domain_args) 121 | except NoKeysFoundForSpotlight: 122 | continue 123 | 124 | metadata.setdefault(site["id"], {}).update( 125 | {dataset["id"]: {"domain": domain}} 126 | ) 127 | return metadata 128 | 129 | 130 | def _gather_json_data(dirpath: str) -> List[dict]: 131 | """Gathers all JSON files from within a diven directory""" 132 | 133 | results = [] 134 | 135 | for filename in os.listdir(dirpath): 136 | if not filename.endswith(".json"): 137 | continue 138 | with open(os.path.join(dirpath, filename)) as f: 139 | results.append(json.load(f)) 140 | return results 141 | 142 | 143 | def _is_global_dataset(dataset: dict) -> bool: 144 | """Returns wether the given dataset is spotlight specific (FALSE) 145 | or non-spotlight specific (TRUE)""" 146 | return not any( 147 | [ 148 | i in dataset["source"]["tiles"][0] 149 | for i in ["{spotlightId}", "greatlakes", "togo"] 150 | ] 151 | ) 152 | 153 | 154 | def _gather_s3_keys( 155 | spotlight_id: Optional[Union[str, List]] = None, prefix: Optional[str] = "", 156 | ) -> List[str]: 157 | """ 158 | Returns a set of S3 keys. If no args are provided, the keys will represent 159 | the entire S3 bucket. 160 | Params: 161 | ------- 162 | spotlight_id (Optional[str]): 163 | Id of a spotlight to filter keys by 164 | prefix (Optional[str]): 165 | S3 Prefix under which to gather keys, used to specifcy a specific 166 | dataset folder to search within. 167 | 168 | Returns: 169 | ------- 170 | set(str) 171 | 172 | """ 173 | 174 | keys = [x.key for x in bucket.objects.filter(Prefix=prefix)] 175 | 176 | if not spotlight_id: 177 | return keys 178 | 179 | if isinstance(spotlight_id, list): 180 | spotlight_id = "|".join([s for s in spotlight_id]) 181 | 182 | pattern = re.compile(rf"""[^a-zA-Z0-9]({spotlight_id})[^a-zA-Z0-9]""") 183 | return list({key for key in keys if pattern.search(key, re.IGNORECASE,)}) 184 | 185 | 186 | def _get_dataset_domain( 187 | dataset_folder: str, 188 | is_periodic: bool, 189 | spotlight_id: Optional[Union[str, List]] = None, 190 | time_unit: Optional[str] = "day", 191 | ): 192 | """ 193 | Returns a domain for a given dataset as identified by a folder. If a 194 | time_unit is passed as a function parameter, the function will assume 195 | that the domain is periodic and with only return the min/max dates, 196 | otherwise ALL dates available for that dataset/spotlight will be returned. 197 | 198 | Params: 199 | ------ 200 | dataset_folder (str): dataset folder to search within 201 | time_unit (Optional[str]): time_unit from the dataset's metadata json file 202 | spotlight_id (Optional[str]): a dictionary containing the 203 | `spotlight_id` of a spotlight to restrict the 204 | domain search to. 205 | time_unit (Optional[str] - one of ["day", "month"]): 206 | Wether the {date} object in the S3 filenames should be matched 207 | to YYYY_MM_DD (day) or YYYYMM (month) 208 | 209 | Return: 210 | ------ 211 | List[datetime] 212 | """ 213 | s3_keys_args: Dict[str, Any] = {"prefix": dataset_folder} 214 | if spotlight_id: 215 | s3_keys_args["spotlight_id"] = spotlight_id 216 | 217 | keys = _gather_s3_keys(**s3_keys_args) 218 | 219 | if not keys: 220 | raise NoKeysFoundForSpotlight 221 | 222 | dates = [] 223 | 224 | for key in keys: 225 | 226 | # matches either dates like: YYYYMM or YYYY_MM_DD 227 | pattern = re.compile( 228 | r"[^a-zA-Z0-9]((?P\d{4})_(?P\d{2})_(?P\d{2}))[^a-zA-Z0-9]" 229 | ) 230 | if time_unit == "month": 231 | pattern = re.compile( 232 | r"[^a-zA-Z0-9](?P(\d{4}))(?P(\d{2}))[^a-zA-Z0-9]" 233 | ) 234 | 235 | result = pattern.search(key, re.IGNORECASE,) 236 | 237 | if not result: 238 | continue 239 | 240 | date = None 241 | try: 242 | date = datetime.datetime( 243 | int(result.group("YEAR")), 244 | int(result.group("MONTH")), 245 | int(result.groupdict().get("DAY", 1)), 246 | ) 247 | 248 | except ValueError: 249 | # Invalid date value matched - skip date 250 | continue 251 | 252 | # Some files happen to have 6 consecutive digits (likely an ID of sorts) 253 | # that sometimes gets matched as a date. This further restriction of 254 | # matched timestamps will reduce the number of "false" positives (although 255 | # ID's between 201011 and 203011 will slip by) 256 | if not datetime.datetime(2010, 1, 1) < date < datetime.datetime(2030, 1, 1): 257 | continue 258 | 259 | dates.append(date.strftime("%Y-%m-%dT%H:%M:%SZ")) 260 | 261 | if is_periodic and len(dates): 262 | return [min(dates), max(dates)] 263 | 264 | return sorted(set(dates)) 265 | 266 | 267 | class NoKeysFoundForSpotlight(Exception): 268 | """Exception to be thrown if no keys are found for a given spotlight""" 269 | 270 | pass 271 | 272 | 273 | if __name__ == "__main__": 274 | handler(event={}, context={}) 275 | -------------------------------------------------------------------------------- /lambda/dataset_metadata_generator/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dataset metadata generator lambda test class. This file contains dataset and site metadata 3 | used by the unit tests. The data in this file should be updated to reflect and modification 4 | in metadata content or format of the actual metadatda files (under `covid_api/db/static/`) 5 | """ 6 | DATASETS = [ 7 | { 8 | "id": "co2", 9 | "name": "CO₂ (Avg)", 10 | "type": "raster-timeseries", 11 | "time_unit": "day", 12 | "s3_location": "xco2-mean", 13 | "is_periodic": True, 14 | "source": { 15 | "type": "raster", 16 | "tiles": [ 17 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/xco2-mean/xco2_16day_mean.{date}.tif&resampling_method=bilinear&bidx=1&rescale=0.000408%2C0.000419&color_map=rdylbu_r&color_formula=gamma r {gamma}" 18 | ], 19 | }, 20 | "exclusive_with": [ 21 | "agriculture", 22 | "no2", 23 | "co2-diff", 24 | "fb-population-density", 25 | "car-count", 26 | "nightlights-viirs", 27 | "nightlights-hd", 28 | "detection-multi", 29 | "water-chlorophyll", 30 | "water-spm", 31 | "detections-ship", 32 | "detections-plane", 33 | "detections-vehicles", 34 | ], 35 | "enabled": False, 36 | "compare": { 37 | "enabled": True, 38 | "help": "Compare with baseline", 39 | "year_diff": 0, 40 | "map_label": "{date}: Base vs Mean", 41 | "source": { 42 | "type": "raster", 43 | "tiles": [ 44 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/xco2-base/xco2_16day_base.{date}.tif&resampling_method=bilinear&bidx=1&rescale=0.000408%2C0.000419&color_map=rdylbu_r&color_formula=gamma r {gamma}" 45 | ], 46 | }, 47 | }, 48 | "swatch": {"color": "#189C54", "name": "Dark Green"}, 49 | "legend": { 50 | "type": "gradient-adjustable", 51 | "min": "< 408 ppm", 52 | "max": "> 419 ppm", 53 | "stops": [ 54 | "#313695", 55 | "#588cbf", 56 | "#a3d2e5", 57 | "#e8f6e8", 58 | "#fee89c", 59 | "#fba55c", 60 | "#e24932", 61 | ], 62 | }, 63 | "info": "This layer shows the average background concentration of carbon dioxide (CO₂) in our atmosphere for 2020. Redder colors indicate more CO₂. Bluer colors indicate less CO₂.", 64 | }, 65 | { 66 | "id": "detections-plane", 67 | "name": "Airplanes", 68 | "type": "inference-timeseries", 69 | "s3_location": "detections-plane", 70 | "is_periodic": False, 71 | "time_unit": "day", 72 | "source": { 73 | "type": "geojson", 74 | "tiles": ["{api_url}/detections-plane/{spotlightId}/{date}.geojson"], 75 | }, 76 | "background_source": { 77 | "type": "raster", 78 | "tiles": ["{api_url}/planet/{z}/{x}/{y}?date={date}&site={spotlightId}"], 79 | }, 80 | "exclusive_with": [ 81 | "agriculture", 82 | "no2", 83 | "co2-diff", 84 | "co2", 85 | "fb-population-density", 86 | "car-count", 87 | "nightlights-viirs", 88 | "nightlights-hd", 89 | "detection-multi", 90 | "water-chlorophyll", 91 | "water-spm", 92 | "detections-ship", 93 | "detections-vehicles", 94 | ], 95 | "enabled": False, 96 | "swatch": {"color": "#C0C0C0", "name": "Grey"}, 97 | "info": "Grounded airplanes detected each day in PlanetScope imagery are shown in orange.", 98 | }, 99 | { 100 | "id": "nightlights-hd", 101 | "name": "Nightlights HD", 102 | "type": "raster-timeseries", 103 | "s3_location": "bmhd_30m_monthly", 104 | "is_periodic": True, 105 | "time_unit": "month", 106 | "source": { 107 | "type": "raster", 108 | "tiles": [ 109 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/bmhd_30m_monthly/BMHD_VNP46A2_{spotlightId}_{date}_cog.tif&resampling_method=bilinear&bidx=1%2C2%2C3" 110 | ], 111 | }, 112 | "exclusive_with": [ 113 | "agriculture", 114 | "no2", 115 | "co2-diff", 116 | "co2", 117 | "fb-population-density", 118 | "car-count", 119 | "nightlights-viirs", 120 | "detection-multi", 121 | "water-chlorophyll", 122 | "water-spm", 123 | "detections-ship", 124 | "detections-plane", 125 | "detections-vehicles", 126 | ], 127 | "swatch": {"color": "#C0C0C0", "name": "Grey"}, 128 | "legend": { 129 | "type": "gradient", 130 | "min": "less", 131 | "max": "more", 132 | "stops": ["#08041d", "#1f0a46", "#52076c", "#f57c16", "#f7cf39"], 133 | }, 134 | "info": "The High Definition Nightlights dataset is processed to eliminate light sources, including moonlight reflectance and other interferences. Darker colors indicate fewer night lights and less activity. Lighter colors indicate more night lights and more activity.", 135 | }, 136 | { 137 | "id": "nightlights-viirs", 138 | "name": "Nightlights VIIRS", 139 | "type": "raster-timeseries", 140 | "time_unit": "day", 141 | "s3_location": "bm_500m_daily", 142 | "is_periodic": True, 143 | "source": { 144 | "type": "raster", 145 | "tiles": [ 146 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/bm_500m_daily/VNP46A2_V011_{spotlightId}_{date}_cog.tif&resampling_method=nearest&bidx=1&rescale=0%2C100&color_map=viridis" 147 | ], 148 | }, 149 | "exclusive_with": [ 150 | "agriculture", 151 | "no2", 152 | "co2-diff", 153 | "co2", 154 | "fb-population-density", 155 | "car-count", 156 | "nightlights-hd", 157 | "detection-multi", 158 | "water-chlorophyll", 159 | "water-spm", 160 | "detections-ship", 161 | "detections-plane", 162 | "detections-vehicles", 163 | ], 164 | "swatch": {"color": "#C0C0C0", "name": "Grey"}, 165 | "legend": { 166 | "type": "gradient", 167 | "min": "less", 168 | "max": "more", 169 | "stops": ["#440357", "#3b508a", "#208f8c", "#5fc961", "#fde725"], 170 | }, 171 | "info": "Darker colors indicate fewer night lights and less activity. Lighter colors indicate more night lights and more activity. Check out the HD dataset to see a light-corrected version of this dataset.", 172 | }, 173 | { 174 | "id": "water-chlorophyll", 175 | "name": "Chlorophyll", 176 | "type": "raster-timeseries", 177 | "time_unit": "day", 178 | "is_periodic": False, 179 | "s3_location": "oc3_chla_anomaly", 180 | "source": { 181 | "type": "raster", 182 | "tiles": [ 183 | "{api_url}/{z}/{x}/{y}@1x?url=s3://covid-eo-data/oc3_chla_anomaly/anomaly-chl-{spotlightId}-{date}.tif&resampling_method=bilinear&bidx=1&rescale=-100%2C100&color_map=rdbu_r" 184 | ], 185 | }, 186 | "exclusive_with": [ 187 | "agriculture", 188 | "no2", 189 | "co2-diff", 190 | "co2", 191 | "fb-population-density", 192 | "car-count", 193 | "nightlights-viirs", 194 | "nightlights-hd", 195 | "detection-multi", 196 | "water-spm", 197 | "detections-ship", 198 | "detections-plane", 199 | "detections-vehicles", 200 | ], 201 | "swatch": {"color": "#154F8D", "name": "Deep blue"}, 202 | "legend": { 203 | "type": "gradient", 204 | "min": "less", 205 | "max": "more", 206 | "stops": ["#3A88BD", "#C9E0ED", "#E4EEF3", "#FDDCC9", "#DE725B", "#67001F"], 207 | }, 208 | "info": "Chlorophyll is an indicator of algae growth. Redder colors indicate increases in chlorophyll-a and worse water quality. Bluer colors indicate decreases in chlorophyll-a and improved water quality. White areas indicate no change.", 209 | }, 210 | ] 211 | SITES = [ 212 | { 213 | "id": "du", 214 | "label": "Port of Dunkirk", 215 | "center": [2.250141, 51.02986], 216 | "polygon": { 217 | "type": "Polygon", 218 | "coordinates": [ 219 | [ 220 | [2.08355962, 51.03423481], 221 | [2.14826632, 50.96553938], 222 | [2.41646888, 51.02097784], 223 | [2.38289168, 51.07488218], 224 | [2.32298564, 51.08773119], 225 | [2.15844656, 51.05891125], 226 | [2.08355962, 51.03423481], 227 | ] 228 | ], 229 | }, 230 | "bounding_box": [2.008355962, 50.96553938, 2.41646888, 51.08773119], 231 | }, 232 | { 233 | "id": "ny", 234 | "label": "New York", 235 | "center": [-73.09, 41.0114], 236 | "polygon": { 237 | "type": "Polygon", 238 | "coordinates": [ 239 | [ 240 | [-71.74516, 41.54467], 241 | [-74.43395, 41.54943], 242 | [-74.43219, 40.47812], 243 | [-71.74516, 40.48343], 244 | [-71.74516, 41.54467], 245 | ] 246 | ], 247 | }, 248 | "bounding_box": [-74.43395, 40.47812, -71.74516, 41.54467], 249 | }, 250 | { 251 | "id": "tk", 252 | "label": "Tokyo", 253 | "center": [139.78, 35.61], 254 | "polygon": { 255 | "type": "Polygon", 256 | "coordinates": [ 257 | [ 258 | [139.37, 35.33], 259 | [140.19, 35.33], 260 | [140.19, 35.85], 261 | [139.37, 35.85], 262 | [139.37, 35.33], 263 | ] 264 | ], 265 | }, 266 | "bounding_box": [139.37, 35.33, 140.19, 35.85], 267 | }, 268 | ] 269 | -------------------------------------------------------------------------------- /stack/app.py: -------------------------------------------------------------------------------- 1 | """Construct App.""" 2 | 3 | import os 4 | import shutil 5 | from typing import Any, Union 6 | 7 | import config 8 | 9 | # import docker 10 | from aws_cdk import aws_apigatewayv2 as apigw 11 | from aws_cdk import aws_apigatewayv2_integrations as apigw_integrations 12 | from aws_cdk import aws_ec2 as ec2 13 | from aws_cdk import aws_ecs as ecs 14 | from aws_cdk import aws_ecs_patterns as ecs_patterns 15 | from aws_cdk import aws_elasticache as escache 16 | from aws_cdk import aws_events, aws_events_targets 17 | from aws_cdk import aws_iam as iam 18 | from aws_cdk import aws_lambda, aws_s3, core 19 | 20 | s3_full_access_to_data_bucket = iam.PolicyStatement( 21 | actions=["s3:*"], resources=[f"arn:aws:s3:::{config.BUCKET}*"] 22 | ) 23 | 24 | DEFAULT_ENV = dict( 25 | CPL_TMPDIR="/tmp", 26 | CPL_VSIL_CURL_ALLOWED_EXTENSIONS=".tif", 27 | GDAL_CACHEMAX="75%", 28 | GDAL_DISABLE_READDIR_ON_OPEN="EMPTY_DIR", 29 | GDAL_HTTP_MERGE_CONSECUTIVE_RANGES="YES", 30 | GDAL_HTTP_MULTIPLEX="YES", 31 | GDAL_HTTP_VERSION="2", 32 | PYTHONWARNINGS="ignore", 33 | VSI_CACHE="TRUE", 34 | VSI_CACHE_SIZE="1000000", 35 | ) 36 | 37 | 38 | class covidApiLambdaStack(core.Stack): 39 | """ 40 | Covid API Lambda Stack 41 | 42 | This code is freely adapted from 43 | - https://github.com/leothomas/titiler/blob/10df64fbbdd342a0762444eceebaac18d8867365/stack/app.py author: @leothomas 44 | - https://github.com/ciaranevans/titiler/blob/3a4e04cec2bd9b90e6f80decc49dc3229b6ef569/stack/app.py author: @ciaranevans 45 | 46 | """ 47 | 48 | def __init__( 49 | self, 50 | scope: core.Construct, 51 | id: str, 52 | dataset_metadata_filename: str, 53 | dataset_metadata_generator_function_name: str, 54 | memory: int = 1024, 55 | timeout: int = 30, 56 | concurrent: int = 100, 57 | code_dir: str = "./", 58 | **kwargs: Any, 59 | ) -> None: 60 | """Define stack.""" 61 | super().__init__(scope, id, **kwargs) 62 | 63 | # add cache 64 | if config.VPC_ID: 65 | vpc = ec2.Vpc.from_lookup(self, f"{id}-vpc", vpc_id=config.VPC_ID,) 66 | else: 67 | vpc = ec2.Vpc(self, f"{id}-vpc") 68 | 69 | sb_group = escache.CfnSubnetGroup( 70 | self, 71 | f"{id}-subnet-group", 72 | description=f"{id} subnet group", 73 | subnet_ids=[sb.subnet_id for sb in vpc.private_subnets], 74 | ) 75 | 76 | lambda_function_security_group = ec2.SecurityGroup( 77 | self, f"{id}-lambda-sg", vpc=vpc 78 | ) 79 | lambda_function_security_group.add_egress_rule( 80 | ec2.Peer.any_ipv4(), 81 | connection=ec2.Port(protocol=ec2.Protocol("ALL"), string_representation=""), 82 | description="Allow lambda security group all outbound access", 83 | ) 84 | 85 | cache_security_group = ec2.SecurityGroup(self, f"{id}-cache-sg", vpc=vpc) 86 | 87 | cache_security_group.add_ingress_rule( 88 | lambda_function_security_group, 89 | connection=ec2.Port(protocol=ec2.Protocol("ALL"), string_representation=""), 90 | description="Allow Lambda security group access to Cache security group", 91 | ) 92 | 93 | cache = escache.CfnCacheCluster( 94 | self, 95 | f"{id}-cache", 96 | cache_node_type=config.CACHE_NODE_TYPE, 97 | engine=config.CACHE_ENGINE, 98 | num_cache_nodes=config.CACHE_NODE_NUM, 99 | vpc_security_group_ids=[cache_security_group.security_group_id], 100 | cache_subnet_group_name=sb_group.ref, 101 | ) 102 | 103 | logs_access = iam.PolicyStatement( 104 | actions=[ 105 | "logs:CreateLogGroup", 106 | "logs:CreateLogStream", 107 | "logs:PutLogEvents", 108 | ], 109 | resources=["*"], 110 | ) 111 | ec2_network_access = iam.PolicyStatement( 112 | actions=[ 113 | "ec2:CreateNetworkInterface", 114 | "ec2:DescribeNetworkInterfaces", 115 | "ec2:DeleteNetworkInterface", 116 | ], 117 | resources=["*"], 118 | ) 119 | 120 | lambda_env = DEFAULT_ENV.copy() 121 | lambda_env.update( 122 | dict( 123 | MODULE_NAME="covid_api.main", 124 | VARIABLE_NAME="app", 125 | WORKERS_PER_CORE="1", 126 | LOG_LEVEL="error", 127 | MEMCACHE_HOST=cache.attr_configuration_endpoint_address, 128 | MEMCACHE_PORT=cache.attr_configuration_endpoint_port, 129 | DATASET_METADATA_FILENAME=dataset_metadata_filename, 130 | DATASET_METADATA_GENERATOR_FUNCTION_NAME=dataset_metadata_generator_function_name, 131 | PLANET_API_KEY=os.environ["PLANET_API_KEY"], 132 | ) 133 | ) 134 | 135 | lambda_function_props = dict( 136 | runtime=aws_lambda.Runtime.PYTHON_3_7, 137 | code=self.create_package(code_dir), 138 | handler="handler.handler", 139 | memory_size=memory, 140 | timeout=core.Duration.seconds(timeout), 141 | environment=lambda_env, 142 | security_groups=[lambda_function_security_group], 143 | vpc=vpc, 144 | ) 145 | 146 | if concurrent: 147 | lambda_function_props["reserved_concurrent_executions"] = concurrent 148 | 149 | lambda_function = aws_lambda.Function( 150 | self, f"{id}-lambda", **lambda_function_props 151 | ) 152 | 153 | lambda_function.add_to_role_policy(s3_full_access_to_data_bucket) 154 | lambda_function.add_to_role_policy(logs_access) 155 | lambda_function.add_to_role_policy(ec2_network_access) 156 | 157 | # defines an API Gateway Http API resource backed by our "dynamoLambda" function. 158 | apigw.HttpApi( 159 | self, 160 | f"{id}-endpoint", 161 | default_integration=apigw_integrations.LambdaProxyIntegration( 162 | handler=lambda_function 163 | ), 164 | ) 165 | 166 | def create_package(self, code_dir: str) -> aws_lambda.Code: 167 | """Build docker image and create package.""" 168 | 169 | return aws_lambda.Code.from_asset( 170 | path=os.path.abspath(code_dir), 171 | bundling=core.BundlingOptions( 172 | image=core.BundlingDockerImage.from_asset( 173 | path=os.path.abspath(code_dir), 174 | file="Dockerfiles/lambda/Dockerfile", 175 | ), 176 | command=["bash", "-c", "cp -R /var/task/. /asset-output/."], 177 | ), 178 | ) 179 | 180 | 181 | class covidApiECSStack(core.Stack): 182 | """Covid API ECS Fargate Stack.""" 183 | 184 | def __init__( 185 | self, 186 | scope: core.Construct, 187 | id: str, 188 | cpu: Union[int, float] = 256, 189 | memory: Union[int, float] = 512, 190 | mincount: int = 1, 191 | maxcount: int = 50, 192 | task_env: dict = {}, 193 | code_dir: str = "./", 194 | **kwargs: Any, 195 | ) -> None: 196 | """Define stack.""" 197 | super().__init__(scope, id, **kwargs) 198 | 199 | # add cache 200 | if config.VPC_ID: 201 | vpc = ec2.Vpc.from_lookup(self, f"{id}-vpc", vpc_id=config.VPC_ID,) 202 | else: 203 | vpc = ec2.Vpc(self, f"{id}-vpc") 204 | 205 | cluster = ecs.Cluster(self, f"{id}-cluster", vpc=vpc) 206 | 207 | task_env = DEFAULT_ENV.copy() 208 | task_env.update( 209 | dict( 210 | MODULE_NAME="covid_api.main", 211 | VARIABLE_NAME="app", 212 | WORKERS_PER_CORE="1", 213 | LOG_LEVEL="error", 214 | ) 215 | ) 216 | task_env.update(task_env) 217 | 218 | fargate_service = ecs_patterns.ApplicationLoadBalancedFargateService( 219 | self, 220 | f"{id}-service", 221 | cluster=cluster, 222 | cpu=cpu, 223 | memory_limit_mib=memory, 224 | desired_count=mincount, 225 | public_load_balancer=True, 226 | listener_port=80, 227 | task_image_options=dict( 228 | image=ecs.ContainerImage.from_asset( 229 | code_dir, 230 | exclude=["cdk.out", ".git"], 231 | file="Dockerfiles/ecs/Dockerfile", 232 | ), 233 | container_port=80, 234 | environment=task_env, 235 | ), 236 | ) 237 | 238 | scalable_target = fargate_service.service.auto_scale_task_count( 239 | min_capacity=mincount, max_capacity=maxcount 240 | ) 241 | 242 | # https://github.com/awslabs/aws-rails-provisioner/blob/263782a4250ca1820082bfb059b163a0f2130d02/lib/aws-rails-provisioner/scaling.rb#L343-L387 243 | scalable_target.scale_on_request_count( 244 | "RequestScaling", 245 | requests_per_target=50, 246 | scale_in_cooldown=core.Duration.seconds(240), 247 | scale_out_cooldown=core.Duration.seconds(30), 248 | target_group=fargate_service.target_group, 249 | ) 250 | 251 | # scalable_target.scale_on_cpu_utilization( 252 | # "CpuScaling", target_utilization_percent=70, 253 | # ) 254 | 255 | fargate_service.service.connections.allow_from_any_ipv4( 256 | port_range=ec2.Port( 257 | protocol=ec2.Protocol.ALL, 258 | string_representation="All port 80", 259 | from_port=80, 260 | ), 261 | description="Allows traffic on port 80 from NLB", 262 | ) 263 | 264 | 265 | class covidApiDatasetMetadataGeneratorStack(core.Stack): 266 | """Dataset metadata generator stack - comprises a lambda and a Cloudwatch 267 | event that triggers a new lambda execution every 24hrs""" 268 | 269 | def __init__( 270 | self, 271 | scope: core.Construct, 272 | id: str, 273 | dataset_metadata_filename: str, 274 | dataset_metadata_generator_function_name: str, 275 | code_dir: str = "./", 276 | **kwargs: Any, 277 | ) -> None: 278 | """Define stack.""" 279 | super().__init__(scope, id, *kwargs) 280 | 281 | base = os.path.abspath(os.path.join("covid_api", "db", "static")) 282 | lambda_deployment_package_location = os.path.abspath( 283 | os.path.join(code_dir, "lambda", "dataset_metadata_generator") 284 | ) 285 | for e in ["datasets", "sites"]: 286 | self.copy_metadata_files_to_lambda_deployment_package( 287 | from_dir=os.path.join(base, e), 288 | to_dir=os.path.join(lambda_deployment_package_location, "src", e), 289 | ) 290 | 291 | data_bucket = aws_s3.Bucket.from_bucket_name( 292 | self, id=f"{id}-data-bucket", bucket_name=config.BUCKET 293 | ) 294 | 295 | dataset_metadata_updater_function = aws_lambda.Function( 296 | self, 297 | f"{id}-metadata-updater-lambda", 298 | runtime=aws_lambda.Runtime.PYTHON_3_8, 299 | code=aws_lambda.Code.from_asset(lambda_deployment_package_location), 300 | handler="src.main.handler", 301 | environment={ 302 | "DATASET_METADATA_FILENAME": dataset_metadata_filename, 303 | "DATA_BUCKET_NAME": data_bucket.bucket_name, 304 | }, 305 | function_name=dataset_metadata_generator_function_name, 306 | timeout=core.Duration.minutes(5), 307 | ) 308 | 309 | for e in ["datasets", "sites"]: 310 | shutil.rmtree(os.path.join(lambda_deployment_package_location, "src", e)) 311 | 312 | data_bucket.grant_read_write(dataset_metadata_updater_function) 313 | 314 | aws_events.Rule( 315 | self, 316 | f"{id}-metadata-update-daily-trigger", 317 | # triggers everyday 318 | schedule=aws_events.Schedule.rate(duration=core.Duration.days(1)), 319 | targets=[ 320 | aws_events_targets.LambdaFunction(dataset_metadata_updater_function) 321 | ], 322 | ) 323 | 324 | def copy_metadata_files_to_lambda_deployment_package(self, from_dir, to_dir): 325 | """Copies dataset metadata files to the lambda deployment package 326 | so that the dataset domain extractor lambda has access to the necessary 327 | metadata items at runtime 328 | Params: 329 | ------- 330 | from_dir (str): relative filepath from which to copy all `.json` files 331 | to_dir (str): relative filepath to copy `.json` files to 332 | Return: 333 | ------- 334 | None 335 | """ 336 | files = [ 337 | os.path.abspath(os.path.join(d, f)) 338 | for d, _, fnames in os.walk(from_dir) 339 | for f in fnames 340 | if f.endswith(".json") 341 | ] 342 | 343 | try: 344 | os.mkdir(to_dir) 345 | except FileExistsError: 346 | pass 347 | 348 | for f in files: 349 | shutil.copy(f, to_dir) 350 | 351 | 352 | app = core.App() 353 | 354 | 355 | # Tag infrastructure 356 | for key, value in { 357 | "Project": config.PROJECT_NAME, 358 | "Stack": config.STAGE, 359 | "Owner": os.environ.get("OWNER"), 360 | "Client": os.environ.get("CLIENT"), 361 | }.items(): 362 | if value: 363 | core.Tag.add(app, key, value) 364 | 365 | ecs_stackname = f"{config.PROJECT_NAME}-ecs-{config.STAGE}" 366 | covidApiECSStack( 367 | app, 368 | ecs_stackname, 369 | cpu=config.TASK_CPU, 370 | memory=config.TASK_MEMORY, 371 | mincount=config.MIN_ECS_INSTANCES, 372 | maxcount=config.MAX_ECS_INSTANCES, 373 | task_env=config.TASK_ENV, 374 | env=dict( 375 | account=os.environ["CDK_DEFAULT_ACCOUNT"], 376 | region=os.environ["CDK_DEFAULT_REGION"], 377 | ), 378 | ) 379 | 380 | lambda_stackname = f"{config.PROJECT_NAME}-lambda-{config.STAGE}" 381 | covidApiLambdaStack( 382 | app, 383 | lambda_stackname, 384 | memory=config.MEMORY, 385 | timeout=config.TIMEOUT, 386 | concurrent=config.MAX_CONCURRENT, 387 | dataset_metadata_filename=f"{config.STAGE}-dataset-metadata.json", 388 | dataset_metadata_generator_function_name=f"{config.STAGE}-dataset-metadata-generator", 389 | env=dict( 390 | account=os.environ["CDK_DEFAULT_ACCOUNT"], 391 | region=os.environ["CDK_DEFAULT_REGION"], 392 | ), 393 | ) 394 | 395 | dataset_metadata_generator_stackname = ( 396 | f"{config.PROJECT_NAME}-dataset-metadata-generator-{config.STAGE}" 397 | ) 398 | covidApiDatasetMetadataGeneratorStack( 399 | app, 400 | dataset_metadata_generator_stackname, 401 | dataset_metadata_filename=f"{config.STAGE}-dataset-metadata.json", 402 | dataset_metadata_generator_function_name=f"{config.STAGE}-dataset-metadata-generator", 403 | ) 404 | 405 | app.synth() 406 | --------------------------------------------------------------------------------