├── .dockerignore
├── .gitattributes
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── cube-in-a-box-dea-azurerm.json
├── cube-in-a-box-dea-cloudformation.yml
├── docker-compose.yml
├── install-cube.sh
├── migrate.sh
├── notebooks
    ├── 01_Beginners_guide
    │   ├── 01_Jupyter_notebooks.ipynb
    │   ├── 02_DEA.ipynb
    │   ├── 03_Products_and_measurements.ipynb
    │   ├── 04_Loading_data.ipynb
    │   ├── 05_Plotting.ipynb
    │   ├── 06_Basic_analysis.ipynb
    │   ├── 07_Intro_to_numpy.ipynb
    │   ├── 08_Intro_to_xarray.ipynb
    │   └── 09_Parallel_processing_with_Dask.ipynb
    ├── 02_Real_world_examples
    │   ├── Burnt_area_mapping.ipynb
    │   ├── Calculating_band_indices.ipynb
    │   ├── Change_detection.ipynb
    │   ├── Coastal_erosion.ipynb
    │   ├── Contour_extraction.ipynb
    │   ├── Image_segmentation.ipynb
    │   ├── Urban_change_detection.ipynb
    │   ├── Using_load_ard.ipynb
    │   ├── Vegetation_phenology.ipynb
    │   └── outputs
    │   │   ├── Burnt_area_mapping
    │   │       └── .gitkeep
    │   │   ├── Change_detection
    │   │       └── .gitkeep
    │   │   ├── Coastal_erosion
    │   │       └── .gitkeep
    │   │   ├── Contour_extraction
    │   │       └── .gitkeep
    │   │   └── Image_segmentation
    │   │       └── .gitkeep
    ├── 03_EY_challenge1
    │   ├── Challenge1_Getting_started.ipynb
    │   ├── Image_analysis_in_python.ipynb
    │   └── resources
    │   │   ├── challenge1_test.csv
    │   │   ├── challenge1_train.csv
    │   │   ├── fire_boundaries.cpg
    │   │   ├── fire_boundaries.dbf
    │   │   ├── fire_boundaries.prj
    │   │   ├── fire_boundaries.shp
    │   │   ├── fire_boundaries.shx
    │   │   └── fire_example.jpg
    ├── 04_EY_challenge2
    │   ├── Challenge2_Getting_started.ipynb
    │   ├── Linescan_loading_examples.ipynb
    │   ├── Sentinel-1_example_rgb.ipynb
    │   └── resources
    │   │   ├── animated_timeseries.gif
    │   │   ├── challenge2_test.csv
    │   │   └── challenge2_train.csv
    ├── Supplementary_data
    │   ├── 02_DEA
    │   │   ├── dea_products.jpg
    │   │   ├── nbar_nbart_animation.gif
    │   │   └── odc.png
    │   ├── 03_Products_and_measurements
    │   │   └── DEAExplorer.JPG
    │   ├── 04_Loading_data
    │   │   ├── dea_logo.jpg
    │   │   └── progress_bar.jpg
    │   ├── 06_Basic_analysis
    │   │   └── latlong_buffer.png
    │   ├── 07_Intro_to_numpy
    │   │   ├── africa.png
    │   │   └── numpy_array_t.png
    │   ├── 08_Intro_to_xarray
    │   │   ├── dataset-diagram.png
    │   │   └── example_netcdf.nc
    │   ├── EY_logo.png
    │   ├── dea_logo.jpg
    │   └── dea_logo_wide.jpg
    ├── datacube_viewer.ipynb
    └── scripts
    │   ├── dea_bandindices.py
    │   ├── dea_bom.py
    │   ├── dea_classificationtools.py
    │   ├── dea_climate.py
    │   ├── dea_coastaltools.py
    │   ├── dea_dask.py
    │   ├── dea_datahandling.py
    │   ├── dea_plotting.py
    │   ├── dea_spatialtools.py
    │   ├── dea_temporal.py
    │   ├── dea_temporaltools.py
    │   └── dea_waterbodies.py
├── parameters.json
└── scripts
    ├── data
        ├── add_azure_data.sh
        ├── eo3_landsat_ard.odc-type.yaml
        ├── eo_plus.odc-type.yaml
        ├── esa_s1_rtc.odc-product.yaml
        ├── ga_ls7e_ard_3.odc-product.yaml
        ├── ga_ls8c_ard_3.odc-product.yaml
        ├── ga_s2a_ard_nbar_granule.odc-product.yaml
        ├── ga_s2b_ard_nbar_granule.odc-product.yaml
        ├── linescan.odc-product.yaml
        ├── linescan.tar.gz
        ├── ls78.tar.gz
        ├── metadata.eo_plus.yaml
        ├── s2ab.tar.gz
        └── sentinel-1.tar.gz
    ├── linescan.odc-product.yaml
    ├── ls7-vic-scenes.txt
    ├── ls8-vic-scenes.txt
    ├── s-2-vic-scenes.txt
    └── vic-scenes.tar.gz


/.dockerignore:
--------------------------------------------------------------------------------
1 | data
2 | output
3 | .env
4 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | *.pyc
3 | .env
4 | notebooks/.*


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Alex Leith
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # You can follow the steps below in order to get yourself a local ODC.
  2 | 
  3 | # 1. Start your Docker environment
  4 | up:
  5 | 	docker-compose up
  6 | 
  7 | # 2. Prepare your database and set up the ODC
  8 | prepare:
  9 | 	./install-cube.sh secretpassword true
 10 | 
 11 | 
 12 | 
 13 | 
 14 | 
 15 | 
 16 | ## BELOW IS NOT REQUIRED!!!
 17 | # Delete everything
 18 | down:
 19 | 	docker-compose down
 20 | 
 21 | # Find Sentinel-2, Landsat 7 and Landsat 8 scenes over Victoria
 22 | # First search for all scenes... careful, this takes a very long time.
 23 | find-s-2-documents:
 24 | 	docker-compose exec jupyter \
 25 | 		bash -c \
 26 | 			"s3-find --no-sign-request s3://dea-public-data/L2/sentinel-2-nbar/S2MSIARD_NBAR/**/ARD-METADATA.yaml \
 27 | 		 	| gzip > /scripts/s-2-all-scenes.txt.gz"
 28 | 
 29 | find-ls7-documents:
 30 | 	docker-compose exec jupyter \
 31 | 		bash -c \
 32 | 			"s3-find --no-sign-request s3://dea-public-data/baseline/ga_ls7e_ard_3/**/*.odc-metadata.yaml \
 33 | 		 	| gzip > /scripts/ls7-all-scenes.txt.gz"
 34 | 
 35 | find-ls8-documents:
 36 | 	docker-compose exec jupyter \
 37 | 		bash -c \
 38 | 			"s3-find --no-sign-request s3://dea-public-data/baseline/ga_ls8c_ard_3/**/*.odc-metadata.yaml \
 39 | 		 	| gzip > /scripts/ls8-all-scenes.txt.gz"
 40 | 
 41 | # Next extract the area of interest using pathrows or MGRS tiles and dates
 42 | scripts/dates.txt:
 43 | 	./make_dates.sh > ./scripts/dates.txt
 44 | 
 45 | filter-s-2-documents:
 46 | 	docker-compose exec jupyter \
 47 | 		bash -c \
 48 | 			"gunzip -c /scripts/s-2-all-scenes.txt.gz \
 49 | 			| grep -f /scripts/vic-tiles-s-2.txt \
 50 | 			| grep -f /scripts/dates.txt > /scripts/s-2-vic-scenes.txt"
 51 | 
 52 | filter-ls7-documents:
 53 | 	docker-compose exec jupyter \
 54 | 		bash -c \
 55 | 			"gunzip -c /scripts/ls7-all-scenes.txt.gz \
 56 | 			| grep -f /scripts/vic-tiles-ls.txt \
 57 | 			| grep -f /scripts/dates.txt > /scripts/ls7-vic-scenes.txt"
 58 | 
 59 | filter-ls8-documents:
 60 | 	docker-compose exec jupyter \
 61 | 		bash -c \
 62 | 			"gunzip -c /scripts/ls8-all-scenes.txt.gz \
 63 | 			| grep -f /scripts/vic-tiles-ls.txt \
 64 | 			| grep -f /scripts/dates.txt > /scripts/ls8-vic-scenes.txt"
 65 | 
 66 | # Some extra commands to help in managing things.
 67 | # Rebuild the image
 68 | build:
 69 | 	docker-compose build
 70 | 
 71 | # Start an interactive shell
 72 | shell:
 73 | 	docker-compose exec jupyter bash
 74 | 
 75 | # OTHER
 76 | metadata-landsat:
 77 | 	docker-compose exec jupyter \
 78 | 		datacube metadata add https://raw.githubusercontent.com/GeoscienceAustralia/digitalearthau/develop/digitalearthau/config/eo3/eo3_landsat_ard.odc-type.yaml
 79 | 
 80 | product-landsat:
 81 | 	docker-compose exec jupyter \
 82 | 		bash -c "\
 83 | 			datacube product add https://raw.githubusercontent.com/GeoscienceAustralia/digitalearthau/develop/digitalearthau/config/eo3/products-aws/ard_ls5.odc-product.yaml;\
 84 | 			datacube product add https://raw.githubusercontent.com/GeoscienceAustralia/digitalearthau/develop/digitalearthau/config/eo3/products-aws/ard_ls7.odc-product.yaml;\
 85 | 			datacube product add https://raw.githubusercontent.com/GeoscienceAustralia/digitalearthau/develop/digitalearthau/config/eo3/products-aws/ard_ls8.odc-product.yaml;"
 86 | 
 87 | 
 88 | index-landsat:
 89 | 	docker-compose exec jupyter \
 90 | 		bash -c "\
 91 | 			s3-find --no-sign-request "s3://dea-public-data-dev/analysis-ready-data/ga_ls8c_ard_3/**/*.odc-metadata.yaml"\
 92 | 			| s3-to-tar --no-sign-request | dc-index-from-tar --product ga_ls8c_ard_3 --ignore-lineage"
 93 | 
 94 | index-landsat-one:
 95 | 	docker-compose exec jupyter \
 96 | 		datacube dataset add --ignore-lineage --confirm-ignore-lineage \
 97 | 		https://dea-public-data-dev.s3-ap-southeast-2.amazonaws.com/analysis-ready-data/ga_ls8c_ard_3/115/074/2013/05/20/ga_ls8c_ard_3-0-0_115074_2013-05-20_final.proc-info.yaml
 98 | 
 99 | 
100 | # CLOUD FORMATION
101 | # Update S3 template (this is owned by Digital Earth Australia)
102 | upload-s3:
103 | 	aws s3 cp cube-in-a-box-dea-cloudformation.yml s3://opendatacube-cube-in-a-box/ --acl public-read
104 | 
105 | # This section can be used to deploy onto CloudFormation instead of the 'magic link'
106 | create-infra:
107 | 	aws cloudformation create-stack \
108 | 		--region ap-southeast-2 \
109 | 		--stack-name odc-test \
110 | 		--template-body file://cube-in-a-box-dea-cloudformation.yml \
111 | 		--parameter file://parameters.json \
112 | 		--tags Key=Name,Value=OpenDataCube \
113 | 		--capabilities CAPABILITY_NAMED_IAM
114 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Welcome to the 2021 Better Working World Data Challenge
2 | 
3 | For more information and getting started, head to the [wiki](https://github.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/wiki)
4 | 


--------------------------------------------------------------------------------
/cube-in-a-box-dea-azurerm.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  3 |   "contentVersion": "1.0.0.0",
  4 |   "parameters": {
  5 |     "InstanceName": {
  6 |       "type": "string",
  7 |       "defaultValue": "CubeInABox",
  8 |       "metadata": {
  9 |         "description": "The name of the Cube in a Box instance."
 10 |       }
 11 |     },
 12 |     "InstanceType": {
 13 |       "type": "string",
 14 |       "defaultValue": "CPU-16GB",
 15 |       "allowedValues": [
 16 |         "CPU-16GB",
 17 |         "GPU-56GB"
 18 |       ],
 19 |       "metadata": {
 20 |         "description": "Instance size"
 21 |       }
 22 |     },
 23 |     "SshPublicKey": {
 24 |       "type": "string",
 25 |       "metadata": {
 26 |         "description": "SSH Public Key for the Virtual Machine."
 27 |       }
 28 |     },
 29 |     "SecretPassword": {
 30 |       "type": "secureString",
 31 |       "metadata": {
 32 |         "description": "Password to access Jupyter notebook."
 33 |       }
 34 |     }
 35 |   },
 36 |   "variables": {
 37 |     "Region": "[resourceGroup().location]",
 38 |     "networkInterfaceName": "[concat(parameters('InstanceName'),'NetInt')]",
 39 |     "publicIpAddressName": "[concat(parameters('InstanceName'), 'PublicIP' )]",
 40 |     "subnetRef": "[resourceId('Microsoft.Network/virtualNetworks/subnets', variables('virtualNetworkName'), variables('subnetName'))]",
 41 |     "storageAccountName": "[concat('storage', uniqueString(resourceGroup().id))]",
 42 |     "storageAccountKind": "Storage",
 43 |     "vmSize": {
 44 |       "CPU-4GB": "Standard_B2s",
 45 |       "CPU-7GB": "Standard_DS2_v2",
 46 |       "CPU-8GB": "Standard_D2s_v3",
 47 |       "CPU-14GB": "Standard_DS3_v2",
 48 |       "CPU-16GB": "Standard_D4s_v3",
 49 |       "GPU-56GB": "Standard_NC6_Promo"
 50 |     },
 51 |     "diskSizeGB": "32",
 52 |     "vmName": "[concat(parameters('InstanceName'),'-', parameters('InstanceType'))]",
 53 |     "virtualNetworkName": "dea-vnet",
 54 |     "subnetName": "dea-subnet"
 55 |   },
 56 |   "resources": [
 57 |     {
 58 |       "type": "Microsoft.Network/virtualNetworks",
 59 |       "apiVersion": "2019-06-01",
 60 |       "name": "[variables('virtualNetworkName')]",
 61 |       "location": "[variables('Region')]",
 62 |       "properties": {
 63 |         "addressSpace": {
 64 |           "addressPrefixes": [
 65 |             "10.0.0.0/24"
 66 |           ]
 67 |         },
 68 |         "subnets": [
 69 |           {
 70 |             "name": "[variables('subnetName')]",
 71 |             "properties": {
 72 |               "addressPrefix": "10.0.0.0/24"
 73 |             }
 74 |           }
 75 |         ]
 76 |       }
 77 |     },
 78 |     {
 79 |       "type": "Microsoft.Network/publicIpAddresses",
 80 |       "apiVersion": "2019-06-01",
 81 |       "name": "[variables('publicIpAddressName')]",
 82 |       "location": "[variables('Region')]",
 83 |       "properties": {
 84 |         "publicIpAllocationMethod": "Dynamic"
 85 |       },
 86 |       "sku": {
 87 |         "name": "Basic",
 88 |         "tier": "Regional"
 89 |       }
 90 |     },
 91 |     {
 92 |       "type": "Microsoft.Network/networkInterfaces",
 93 |       "apiVersion": "2019-06-01",
 94 |       "name": "[variables('networkInterfaceName')]",
 95 |       "location": "[variables('Region')]",
 96 |       "dependsOn": [
 97 |         "[resourceId('Microsoft.Network/virtualNetworks/', variables('virtualNetworkName'))]",
 98 |         "[resourceId('Microsoft.Network/publicIpAddresses/', variables('publicIpAddressName'))]"
 99 |       ],
100 |       "properties": {
101 |         "ipConfigurations": [
102 |           {
103 |             "name": "ipconfig1",
104 |             "properties": {
105 |               "subnet": {
106 |                 "id": "[variables('subnetRef')]"
107 |               },
108 |               "privateIPAllocationMethod": "Dynamic",
109 |               "publicIpAddress": {
110 |                 "id": "[resourceId('Microsoft.Network/publicIpAddresses', variables('publicIpAddressName'))]"
111 |               }
112 |             }
113 |           }
114 |         ]
115 |       }
116 |     },
117 |     {
118 |       "type": "Microsoft.Storage/storageAccounts",
119 |       "apiVersion": "2019-06-01",
120 |       "name": "[variables('storageAccountName')]",
121 |       "location": "[variables('Region')]",
122 |       "sku": {
123 |         "name": "Standard_LRS"
124 |       },
125 |        "kind": "[variables('storageAccountKind')]"
126 |     },
127 |     {
128 |       "type": "Microsoft.Compute/virtualMachines",
129 |       "apiVersion": "2019-07-01",
130 |       "name": "[variables('vmName')]",
131 |       "location": "[variables('Region')]",
132 |       "dependsOn": [
133 |         "[resourceId('Microsoft.Network/networkInterfaces/', variables('networkInterfaceName'))]",
134 |         "[resourceId('Microsoft.Storage/storageAccounts/', variables('storageAccountName'))]"
135 |       ],
136 |       "properties": {
137 |         "hardwareProfile": {
138 |           "vmSize": "[variables('vmSize')[parameters('InstanceType')]]"
139 |         },
140 |         "storageProfile": {
141 |           "osDisk": {
142 |             "createOption": "FromImage",
143 |             "managedDisk": {
144 |               "storageAccountType": "Standard_LRS"
145 |             }
146 |           },
147 |           "dataDisks": [
148 |             {
149 |               "lun": 0,
150 |               "name": "[concat('Datadisk', parameters('InstanceName'))]",
151 |               "createOption": "Empty",
152 |               "diskSizeGB": "[variables('diskSizeGB')]",
153 |               "managedDisk": {
154 |                 "storageAccountType": "StandardSSD_LRS"
155 |               }
156 |             }
157 |           ],
158 |           "imageReference": {
159 |             "publisher": "Canonical",
160 |             "offer": "UbuntuServer",
161 |             "sku": "18.04-LTS",
162 |             "version": "latest"
163 |           }
164 |         },
165 |         "networkProfile": {
166 |           "networkInterfaces": [
167 |             {
168 |               "id": "[resourceId('Microsoft.Network/networkInterfaces', variables('networkInterfaceName'))]"
169 |             }
170 |           ]
171 |         },
172 |         "osProfile": {
173 |           "computerName": "[parameters('InstanceName')]",
174 |           "adminUsername": "ubuntu",
175 |           "adminPassword": "",
176 |           "linuxConfiguration": {
177 |             "disablePasswordAuthentication": true,
178 |             "ssh": {
179 |               "publicKeys": [
180 |                 {
181 |                   "path": "/home/ubuntu/.ssh/authorized_keys",
182 |                   "keyData": "[parameters('sshPublicKey')]"
183 |                 }
184 |               ]
185 |             }
186 |           }
187 |         },
188 |         "diagnosticsProfile": {
189 |           "bootDiagnostics": {
190 |             "enabled": true,
191 |             "storageUri": "[concat(reference(variables('storageAccountName')).primaryEndpoints.blob)]"
192 |           }
193 |         }
194 |       },
195 |       "resources": [
196 |         {
197 |           "type": "Microsoft.Compute/virtualMachines/extensions",
198 |           "apiVersion": "2019-07-01",
199 |           "name": "[concat(variables('vmName'),'/installscript')]",
200 |           "location": "[variables('Region')]",
201 |           "dependsOn": [
202 |             "[resourceId('Microsoft.Compute/virtualMachines', variables('vmName'))]"
203 |           ],
204 |           "tags": {
205 |             "displayName": "Execute my custom script"
206 |           },
207 |           "properties": {
208 |             "publisher": "Microsoft.Azure.Extensions",
209 |             "type": "CustomScript",
210 |             "typeHandlerVersion": "2.0",
211 |             "autoUpgradeMinorVersion": true,
212 |             "protectedSettings": {
213 |               "fileUris": [
214 |                 "https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/main/install-cube.sh"
215 |               ],
216 |               "commandToExecute": "[concat('bash install-cube.sh ', parameters('SecretPassword'), ' 2>&1 | tee -a /var/log/install-cube.log')]"
217 |             }
218 |           }
219 |         }
220 |       ]
221 |     }
222 |   ],
223 |   "outputs": { }
224 | }
225 | 


--------------------------------------------------------------------------------
/cube-in-a-box-dea-cloudformation.yml:
--------------------------------------------------------------------------------
  1 | AWSTemplateFormatVersion: '2010-09-09'
  2 | Metadata:
  3 |   License: Apache-2.0
  4 | Description: 'Open Data Cube template with EC2 instance and RDS.'
  5 | Parameters:
  6 |   KeyName:
  7 |     Description: Name of an existing EC2 KeyPair to enable SSH access to the instance
  8 |     Type: AWS::EC2::KeyPair::KeyName
  9 |     ConstraintDescription: must be the name of an existing EC2 KeyPair.
 10 |   InstanceType:
 11 |     Description: WebServer EC2 instance type
 12 |     Type: String
 13 |     Default: t2.small
 14 |     AllowedValues: [
 15 |       t2.small, t2.medium, t2.large,
 16 |       m1.large, m1.xlarge, m2.xlarge,
 17 |       c4.large, c4.xlarge, c4.2xlarge,
 18 |       g2.8xlarge, r3.large, r3.xlarge]
 19 |     ConstraintDescription: must be a valid EC2 instance type.
 20 |   SecretPassword:
 21 |     Description: Password to open up the Jupyter notebook
 22 |     Type: String
 23 |     Default: 'secretpassword'
 24 |   EC2InstanceName:
 25 |     Description: The name of the Cube in a Box EC2 instance
 26 |     Type: String
 27 |     Default: 'CubeInABox'
 28 |   SSHLocation:
 29 |     Description: The IP address range that can be used to access the Cube in a Box
 30 |     Type: String
 31 |     MinLength: '9'
 32 |     MaxLength: '18'
 33 |     Default: 0.0.0.0/0
 34 |     AllowedPattern: (\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2})
 35 |     ConstraintDescription: must be a valid IP CIDR range of the form x.x.x.x/x.
 36 |   Region:
 37 |     Description: The AWS region to deploy in
 38 |     Type: String
 39 |     Default: ap-southeast-2
 40 |     AllowedValues: [ap-southeast-2]
 41 | Mappings:
 42 |   RegionMap:
 43 |     us-west-2:
 44 |       "HVM64": "ami-04ef7170e45541f07"
 45 |     ap-southeast-2:
 46 |       "HVM64": "ami-033c54f661460cfd2"
 47 | Resources:
 48 |   EC2Instance:
 49 |     Type: AWS::EC2::Instance
 50 |     Properties:
 51 |       InstanceType: !Ref 'InstanceType'
 52 |       SecurityGroups: [!Ref 'InstanceSecurityGroup']
 53 |       KeyName: !Ref 'KeyName'
 54 |       ImageId: !FindInMap [RegionMap, !Ref "AWS::Region", HVM64]
 55 |       IamInstanceProfile: !Ref ODCInstanceProfile
 56 |       BlockDeviceMappings:
 57 |         - DeviceName: /dev/sda1
 58 |           Ebs:
 59 |             VolumeSize: 40
 60 |       UserData:
 61 |         Fn::Base64: !Sub |
 62 |           #!/bin/bash -ex
 63 |           # Quickly bootstrap our environment and run the common bash script.
 64 |           export DEBIAN_FRONTEND=noninteractive
 65 |           apt-get update && apt-get install wget
 66 |           wget https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/main/install-cube.sh -O /tmp/install-cube.sh
 67 |           chmod +x /tmp/install-cube.sh && bash /tmp/install-cube.sh ${SecretPassword} 2>&1 | tee -a /tmp/install-cube.log
 68 |       Tags:
 69 |         - Key:   "Name"
 70 |           Value: !Ref 'EC2InstanceName'
 71 | 
 72 |   InstanceSecurityGroup:
 73 |     Type: AWS::EC2::SecurityGroup
 74 |     Properties:
 75 |       GroupDescription: Enable access
 76 |       SecurityGroupIngress:
 77 |       - IpProtocol: tcp
 78 |         FromPort: '22'
 79 |         ToPort: '22'
 80 |         CidrIp: !Ref 'SSHLocation'
 81 |       - IpProtocol: tcp
 82 |         FromPort: '80'
 83 |         ToPort: '80'
 84 |         CidrIp: !Ref 'SSHLocation'
 85 |       - IpProtocol: tcp
 86 |         FromPort: '8888'
 87 |         ToPort: '8888'
 88 |         CidrIp: !Ref 'SSHLocation'
 89 | 
 90 |   ODCRole:
 91 |     Type: AWS::IAM::Role
 92 |     Properties:
 93 |       AssumeRolePolicyDocument:
 94 |         Version: '2012-10-17'
 95 |         Statement:
 96 |         - Effect: Allow
 97 |           Principal:
 98 |             Service:
 99 |             - ec2.amazonaws.com
100 |           Action:
101 |           - sts:AssumeRole
102 |       Path: "/"
103 |   RolePolicies:
104 |     Type: AWS::IAM::Policy
105 |     Properties:
106 |       PolicyName: odc-policy
107 |       PolicyDocument:
108 |         Version: '2012-10-17'
109 |         Statement:
110 |         - Effect: Allow
111 |           Action: ["S3:ListBucket"]
112 |           Resource: ["arn:aws:s3:::dea-public-data"]
113 |         - Effect: Allow
114 |           Action: ["S3:GetObject"]
115 |           Resource: ["arn:aws:s3:::dea-public-data/*"]
116 |       Roles:
117 |       - !Ref ODCRole
118 |   ODCInstanceProfile:
119 |     Type: AWS::IAM::InstanceProfile
120 |     Properties:
121 |       Path: "/"
122 |       Roles:
123 |       - !Ref ODCRole
124 | 
125 | Outputs:
126 |   InstanceId:
127 |     Description: InstanceId of the newly created EC2 instance
128 |     Value: !Ref 'EC2Instance'
129 |   PublicDNS:
130 |     Description: Public DNSName of the newly created EC2 instance
131 |     Value: !GetAtt [EC2Instance, PublicDnsName]
132 |   PublicIP:
133 |     Description: Public IP address of the newly created EC2 instance
134 |     Value: !GetAtt [EC2Instance, PublicIp]
135 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   postgres:
 5 |     image: kartoza/postgis:11.0-2.5
 6 |     environment:
 7 |       - POSTGRES_DB=opendatacube
 8 |       - POSTGRES_PASSWORD=opendatacubepassword
 9 |       - POSTGRES_USER=opendatacube
10 |     ports:
11 |       - 5432:5432
12 |     restart: always
13 |   
14 |   jupyter:
15 |     image: geoscienceaustralia/sandbox
16 |     environment:
17 |       - DB_HOSTNAME=postgres
18 |       - DB_USERNAME=opendatacube
19 |       - DB_PASSWORD=opendatacubepassword
20 |       - DB_DATABASE=opendatacube
21 |       - AWS_NO_SIGN_REQUEST=true
22 |       - AWS_DEFAULT_REGION=ap-southeast-2
23 |     ports:
24 |       - "80:8888"
25 |     volumes:
26 |       - ./install-cube.sh:/usr/local/bin/install-cube.sh
27 |       - ./scripts:/scripts
28 |       - ./notebooks:/home/jovyan
29 |     depends_on:
30 |       - postgres
31 |     restart: always
32 |     command: jupyter notebook --allow-root --ip="0.0.0.0" --NotebookApp.token='secretpassword'
33 | 


--------------------------------------------------------------------------------
/install-cube.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -lt 1 ]; then
 4 |   cat <<EOS
 5 | Usage:
 6 |   ./install-cube.sh PASSWORD
 7 | 
 8 | Arguments:
 9 |   PASSWORD    # Password to set for Jupyter login
10 | EOS
11 | exit 1
12 | fi
13 | 
14 | # Capture a password
15 | PASSWORD="${1}"
16 | 
17 | # Do it different if it's local Docker
18 | LOCAL="${2}"
19 | 
20 | set -ex
21 | # Log start time
22 | echo "Started $(date)"
23 | readlink -f $0
24 | 
25 | function try() {
26 |   for i in $(seq 10); do
27 |     $* && break
28 |     sleep 10
29 |   done
30 | }
31 | 
32 | # Install our dependencies
33 | if ! [[ $LOCAL = "true" ]]; then
34 |   export DEBIAN_FRONTEND=noninteractive
35 |   try 'curl -fsSL https://download.docker.com/linux/ubuntu/gpg' > docker.gpg
36 |   try apt-get update
37 |   apt-key add docker.gpg 
38 |   apt-key list
39 |   add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
40 |   try apt-get update 
41 |   try apt-get install -y docker-ce python3-pip unzip wget
42 |   try pip3 install --upgrade pip
43 |   try pip3 install docker-compose
44 | 
45 |   # Get our code
46 |   url=https://codeload.github.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/zip/main
47 |   try wget $url -O /tmp/archive.zip 
48 |   unzip /tmp/archive.zip
49 |   mv 2021-Better-Working-World-Data-Challenge-main /opt/odc
50 | 
51 |   # We need to change some local vars.
52 |   sed --in-place "s/secretpassword/${PASSWORD}/g" /opt/odc/docker-compose.yml
53 | 
54 |   # We need write access in these places
55 |   chmod -R 777 /opt/odc/notebooks
56 |   cd /opt/odc
57 | 
58 |   # Start the machines
59 |   docker-compose up -d
60 | 
61 |   # Wait for them to wake up
62 |   sleep 20
63 | fi
64 | 
65 | # Initialise and load a product, and then some data
66 | # Note to future self, we can't use make here because of TTY interactivity (the -T flag)
67 | # Initialise the datacube DB
68 | try docker-compose exec -T jupyter datacube -v system init
69 | # Add some custom metadata
70 | docker-compose exec -T jupyter datacube metadata add /scripts/data/metadata.eo_plus.yaml
71 | docker-compose exec -T jupyter datacube metadata add /scripts/data/eo3_landsat_ard.odc-type.yaml
72 | # And add some product definitions
73 | docker-compose exec -T jupyter datacube product add /scripts/data/ga_s2a_ard_nbar_granule.odc-product.yaml
74 | docker-compose exec -T jupyter datacube product add /scripts/data/ga_s2b_ard_nbar_granule.odc-product.yaml
75 | docker-compose exec -T jupyter datacube product add /scripts/data/ga_ls7e_ard_3.odc-product.yaml
76 | docker-compose exec -T jupyter datacube product add /scripts/data/ga_ls8c_ard_3.odc-product.yaml
77 | docker-compose exec -T jupyter datacube product add /scripts/data/linescan.odc-product.yaml
78 | docker-compose exec -T jupyter datacube product add /scripts/data/esa_s1_rtc.odc-product.yaml
79 | # Now index some datasets
80 | docker-compose exec -T jupyter bash -c "dc-index-from-tar --protocol https --ignore-lineage -p ga_ls7e_ard_3 -p ga_ls8c_ard_3 /scripts/data/ls78.tar.gz"
81 | docker-compose exec -T jupyter bash -c "dc-index-from-tar --protocol https --ignore-lineage -p ga_s2a_ard_nbar_granule -p ga_s2b_ard_nbar_granule /scripts/data/s2ab.tar.gz"
82 | docker-compose exec -T jupyter bash -c "dc-index-from-tar --protocol https --ignore-lineage -p linescan /scripts/data/linescan.tar.gz"
83 | docker-compose exec -T jupyter bash -c "dc-index-from-tar --protocol https --ignore-lineage --stac -p s1_rtc /scripts/data/sentinel-1.tar.gz"
84 | 
85 | echo "Finished $(date)"
86 | 


--------------------------------------------------------------------------------
/migrate.sh:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env bash
 2 | 
 3 | # Parse commands and options
 4 | while [[ $# -gt 0 ]]; do
 5 |   key="$1"
 6 | 
 7 |   case $key in
 8 |     -c|--container)
 9 |       _CONTAINER=$2
10 |       shift
11 |     ;;
12 |     -h|--help)
13 |       _HELP=1
14 |     ;;
15 |     -a|--all)
16 |       _ALL=1
17 |     ;;
18 |     -v|--verbose)
19 |       _VERBOSE=1
20 |     ;;
21 |     -f|--file)
22 |       _FILE=$2
23 |       shift
24 |     ;;
25 |     *)
26 |       _commands="$_commands $key"
27 |     ;;
28 |   esac
29 |   shift || true
30 | done
31 | 
32 | # Reset remaining, unmatched arguments
33 | set -- $_commands
34 | 
35 | if [ "$_HELP" ] || [ -z "$_FILE" ]; then cat >&2 <<EOS
36 | Usage: $0 [options]
37 | This script will take a list of S3 objects from a file and copy them to the
38 | target Blob container. Any files in the same folder as the S3 object will also be copied.
39 | Options:
40 |   -h,--help        Show this help and exit
41 |   -c,--container   Storage blob container to upload to
42 |   -f,--file        Path to the file containing a list of source S3 objects.
43 |   -a,--all         Copy all objects without prompting
44 | EOS
45 | exit; fi
46 | 
47 | mkdir -p tmp
48 | _BLOB="https://${AZURE_STORAGE_ACCOUNT}.blob.core.windows.net/${_CONTAINER}"
49 | 
50 | echo File: $_FILE
51 | 
52 | while read -u 3 _LINE; do
53 |   echo "Source Object: $_LINE"
54 | 
55 |   if [ -z "$_ALL" ]; then
56 |     read -n 1 -p "Begin Transfer? [ync]: " _CHOICE
57 |     echo
58 | 
59 |     case $_CHOICE in
60 |     y) echo Begin transfer     ;;
61 |     c) echo Cancel; exit       ;;
62 |     *) echo Skipping; continue ;;
63 |     esac
64 |   fi
65 | 
66 |   # Get "mybucket" from "s3://mybucket/path/to/file.txt"
67 |   _BUCKET=$(echo $_LINE | cut -d/ -f 3)
68 | 
69 |   # Get bucket region
70 |   _REGION=$(curl -sI https://$_BUCKET.s3.amazonaws.com | grep 'x-amz-bucket-region' | awk '{print $2}' | tr -d '\r')
71 | 
72 |   # Get "s3://mybucket/path/to" from "s3://mybucket/path/to/file.txt"
73 |   _FOLDER=$(dirname $_LINE)
74 | 
75 |   # Get "path/to/" from "s3://mybucket/path/to/file.txt"
76 |   _PREFIX=${_FOLDER:$((${#_BUCKET} + 6))}
77 | 
78 |   # Construct the source URI
79 |   _SRC="https://$_BUCKET.s3.$_REGION.amazonaws.com/$_PREFIX"
80 | 
81 |   # Blob URI
82 |   _DST="$_BLOB/$_PREFIX/?${AZURE_STORAGE_SAS_TOKEN}"
83 | 
84 |   echo "Bucket: $_BUCKET"
85 |   echo "Region: $_REGION"
86 |   echo "Folder: $_FOLDER"
87 |   echo "Prefix: $_PREFIX"
88 |   echo "Source: $_SRC"
89 |   echo "Destin: $_DST"
90 | 
91 |   azcopy copy --recursive=true "$_SRC/*" "$_DST"
92 | done 3< $_FILE


--------------------------------------------------------------------------------
/notebooks/01_Beginners_guide/01_Jupyter_notebooks.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Introduction to Jupyter notebooks <img align=\"right\" src=\"../Supplementary_data/dea_logo.jpg\">\n",
  8 |     "\n",
  9 |     "* **Acknowledgement**: This notebook was originally created by [Digital Eath Australia (DEA)](https://www.ga.gov.au/about/projects/geographic/digital-earth-australia) and has been modified for use in the EY Data Science Program\n",
 10 |     "* **Prerequisites**: \n",
 11 |     "    * There is no prerequisite learning required, as this document is designed for a novice user of the Jupyter environment"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Background\n",
 19 |     "Access to implementations of the [Open Data Cube](https://www.opendatacube.org/) such as [Digital Earth Australia](https://www.ga.gov.au/dea) and [Digital Earth Africa](https://www.digitalearthafrica.org/) is achieved through the use of Python code and [Jupyter Notebooks](https://jupyterlab.readthedocs.io/en/stable/user/notebook.html).\n",
 20 |     "The Jupyter Notebook (also termed notebook from here onwards) is an interactive web application that allows for the viewing, creation and documentation of live code.\n",
 21 |     "Notebook applications include data transformation, visualisation, modelling and machine learning.\n",
 22 |     "The default web interface to access notebooks when using either the National Computational Infrastructure (NCI) or the DEA Sandbox is [JupyterLab](https://jupyterlab.readthedocs.io/en/stable/)."
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Description\n",
 30 |     "This notebook is designed to introduce users to the basics of using Python code in Jupyter Notebooks via JupyterLab.\n",
 31 |     "\n",
 32 |     "Topics covered include:\n",
 33 |     "\n",
 34 |     "1. How to run (execute) a Jupyter Notebook cell\n",
 35 |     "2. The different types of Jupyter Notebook cells\n",
 36 |     "3. Stopping a process or restarting a Jupyter Notebook\n",
 37 |     "4. Saving and exporting your work\n",
 38 |     "5. Starting a new Jupyter Notebook\n",
 39 |     "\n",
 40 |     "***"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "## Getting started"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "### Running (executing) a cell\n",
 55 |     "Jupyter Notebooks allow code to be separated into sections that can be executed independent of one another.\n",
 56 |     "These sections are called \"cells\".\n",
 57 |     "\n",
 58 |     "Python code is written into individual cells that can be executed by placing the cursor in the cell and typing `Shift-Enter` on the keyboard or selecting the &#9658; \"Run the selected cells and advance\" button in the ribbon at the top of the notebook.\n",
 59 |     "These options will run a single cell at a time.\n",
 60 |     "\n",
 61 |     "To automatically run all cells in a notebook, navigate to the \"Run\" tab of the menu bar at the top of JupyterLab and select \"Run All Cells\" (or the option that best suits your needs).\n",
 62 |     "When a cell is run, the cell's content is executed.\n",
 63 |     "Any output produced from running the cell will appear directly below it.\n",
 64 |     "\n",
 65 |     "Run the cell below:"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 1,
 71 |    "metadata": {},
 72 |    "outputs": [
 73 |     {
 74 |      "name": "stdout",
 75 |      "output_type": "stream",
 76 |      "text": [
 77 |       "I ran a cell!\n"
 78 |      ]
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "print(\"I ran a cell!\")"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "### Cell status"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "The `[ ]:` symbol to the left of each Code cell describes the state of the cell:\n",
 97 |     "\n",
 98 |     "* `[ ]:` means that the cell has not been run yet.\n",
 99 |     "* `[*]:` means that the cell is currently running.\n",
100 |     "* `[1]:` means that the cell has finished running and was the first cell run.\n",
101 |     "\n",
102 |     "The number indicates the order that the cells were run in.\n",
103 |     "\n",
104 |     "> **Note:** To check whether a cell is currently executing in a Jupyter notebook, inspect the small circle in the top-right of the window. \n",
105 |     "The circle will turn grey (\"Kernel busy\") when the cell is running, and return to empty (\"Kernel idle\") when the process is complete."
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "## Jupyter notebook cell types\n",
113 |     "Cells are identified as either Code, Markdown, or Raw. \n",
114 |     "This designation can be changed using the ribbon at the top of the notebook."
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "### Code cells\n",
122 |     "\n",
123 |     "All code operations are performed in Code cells. \n",
124 |     "Code cells can be used to edit and write new code, and perform tasks like loading data, plotting data and running analyses. \n",
125 |     "\n",
126 |     "Click on the cell below. \n",
127 |     "Note that the ribbon at the top of the notebook describes it as a Code cell."
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 2,
133 |    "metadata": {},
134 |    "outputs": [
135 |     {
136 |      "name": "stdout",
137 |      "output_type": "stream",
138 |      "text": [
139 |       "This is a code cell\n"
140 |      ]
141 |     }
142 |    ],
143 |    "source": [
144 |     "print(\"This is a code cell\")"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "### Markdown cells\n",
152 |     "Place the cursor in this cell by double clicking.\n",
153 |     "\n",
154 |     "The cell format has changed to allow for editing. \n",
155 |     "Note that the ribbon at the top of the notebook describes this as a Markdown cell.\n",
156 |     "\n",
157 |     "Run this cell to return the formatted version.\n",
158 |     "\n",
159 |     "Markdown cells provide the narrative to a notebook.\n",
160 |     "They are used for text and are useful to describe the code operations in the following cells. \n",
161 |     "To see some of the formatting options for text in a Markdown cell, navigate to the \"Help\" tab of the menu bar at the top of JupyterLab and select \"Markdown Reference\".\n",
162 |     "Here you will see a wide range of text formatting options including headings, dot points, italics, hyperlinking and creating tables."
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "metadata": {},
168 |    "source": [
169 |     "### Raw cells\n",
170 |     "Information in Raw cells is stored in the notebook metadata and can be used to render different code formats into HTML or $\\LaTeX$.\n",
171 |     "There are a range of available Raw cell formats that differ depending on how they are to be rendered.\n",
172 |     "For the purposes of this beginner's guide, raw cells are rarely used by the authors and not required for most notebook users.\n",
173 |     "\n",
174 |     "There is a Raw cell associated with the [Tags](#Tags) section of this notebook below.\n",
175 |     "As this cell is in the \"ReStructured Text\" format, its contents are not visible nor are they executed in any way.\n",
176 |     "This cell is used by the authors to store information tags in the metadata that is relevant to the notebook, and create an [index of tags on the Digital Earth Australia user guide](https://docs.dea.ga.gov.au/genindex.html)."
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "## Stopping a process or restarting a Jupyter Notebook\n",
184 |     "Sometimes it can be useful to stop a cell execution before it finishes (e.g. if a process is taking too long to complete, or if the code needs to be modified before running the cell). \n",
185 |     "To interrupt a cell execution, click the &#9632; \"stop\" button (\"Interrupt the kernel\") in the ribbon above the notebook. \n",
186 |     "\n",
187 |     "To test this, run the following code cell.\n",
188 |     "This will run a piece of code that will take 20 seconds to complete.\n",
189 |     "To interrupt this code, press the &#9632; \"stop\" button. \n",
190 |     "The notebook should stop executing the cell.\n"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 3,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "import time\n",
200 |     "time.sleep(20)"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "markdown",
205 |    "metadata": {},
206 |    "source": [
207 |     "If the approach above does not work (e.g. if the notebook has frozen or refuses to respond), try restarting the entire notebook.\n",
208 |     "To do this, navigate to the \"Kernel\" tab of the menu bar, then select \"Restart Kernel\".\n",
209 |     "Alternatively, click the &#x21bb; \"Restart the kernel\" button in the ribbon above the notebook.\n",
210 |     "\n",
211 |     "Restarting a notebook can also be useful for testing whether code will work correctly the first time a new user tries to run the notebook.\n",
212 |     "To restart and then run every cell in a notebook, navigate to the \"Kernel\" tab, then select \"Restart and Run All Cells\"."
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {},
218 |    "source": [
219 |     "## Saving and exporting your work\n",
220 |     "\n",
221 |     "Modifications to Jupyter Notebooks are automatically saved every few minutes.\n",
222 |     "To actively save the notebook, navigate to \"File\" in the menu bar, then select \"Save Notebook\".\n",
223 |     "Alternatively, click the &#128190; \"save\" icon on the left of the ribbon above the notebook.\n",
224 |     "\n",
225 |     "\n",
226 |     "### Exporting Jupyter Notebooks to Python scripts\n",
227 |     "The standard file extension for a Jupyter Notebook is `.ipynb`.\n",
228 |     "\n",
229 |     "There are a range of export options that allow you to save your work for access outside of the Jupyter environment. \n",
230 |     "For example, Python code can easily be saved as `.py` Python scripts by navigating to the \"File\" tab of the menu bar in JupyterLab and selecting \"Export Notebook As\" followed by \"Export Notebook To Executable Script\".\n"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "## Starting a new notebook\n",
238 |     "To create a new notebook, use JupyterLab's file browser to navigate to the directory you would like the notebook to be created in (if the file browser is not visible, re-open it by clicking on the &#128193; \"File browser\" icon at the top-left of the screen).\n",
239 |     "\n",
240 |     "Once you have navigated to the desired location, press the &#10010; \"New Launcher\" button above the browser. \n",
241 |     "This will bring up JupyterLab's \"Launcher\" page which allows you to launch a range of new files or utilities. \n",
242 |     "Below the heading \"Notebook\", click the large \"Python 3\" button.\n",
243 |     "This will create a new notebook entitled \"Untitled.ipynb\" in the chosen directory.\n",
244 |     "\n",
245 |     "To rename this notebook to something more useful, right-click on it in the file browser and select \"Rename\"."
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "markdown",
250 |    "metadata": {},
251 |    "source": [
252 |     "## Recommended next steps\n",
253 |     "\n",
254 |     "For more advanced information about working with Jupyter Notebooks or JupyterLab, see the [JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/notebook.html).\n",
255 |     "\n",
256 |     "To continue working through the notebooks in this beginner's guide, the following notebooks are designed to be worked through in the following order:\n",
257 |     "\n",
258 |     "1. **Jupyter Notebooks (this notebook)**\n",
259 |     "2. [Digital Earth Australia](02_DEA.ipynb)\n",
260 |     "3. [Products and Measurements](03_Products_and_measurements.ipynb)\n",
261 |     "4. [Loading data](04_Loading_data.ipynb)\n",
262 |     "5. [Plotting](05_Plotting.ipynb)\n",
263 |     "6. [Performing a basic analysis](06_Basic_analysis.ipynb)\n",
264 |     "7. [Introduction to Numpy](07_Intro_to_numpy.ipynb)\n",
265 |     "8. [Introduction to Xarray](08_Intro_to_xarray.ipynb)\n",
266 |     "9. [Parallel processing with Dask](09_Parallel_processing_with_dask.ipynb)\n",
267 |     "\n",
268 |     "Once you have worked through the beginner's guide, you can join advanced users by exploring:\n",
269 |     "\n",
270 |     "* The \"DEA datasets\" directory in the repository, where you can explore DEA products in depth.\n",
271 |     "* The \"Frequently used code\" directory, which contains a recipe book of common techniques and methods for analysing DEA data.\n",
272 |     "* The \"Real-world examples\" directory, which provides more complex workflows and analysis case studies."
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "***\n",
280 |     "## Additional information\n",
281 |     "\n",
282 |     "**License:** The code in this notebook is licensed under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0). \n",
283 |     "Digital Earth Australia data is licensed under the [Creative Commons by Attribution 4.0](https://creativecommons.org/licenses/by/4.0/) license.\n",
284 |     "\n",
285 |     "**Contact:** If you need assistance, please review the FAQ section and support options on the [EY Data Science platform](https://datascience.ey.com/)."
286 |    ]
287 |   }
288 |  ],
289 |  "metadata": {
290 |   "kernelspec": {
291 |    "display_name": "Python 3",
292 |    "language": "python",
293 |    "name": "python3"
294 |   },
295 |   "language_info": {
296 |    "codemirror_mode": {
297 |     "name": "ipython",
298 |     "version": 3
299 |    },
300 |    "file_extension": ".py",
301 |    "mimetype": "text/x-python",
302 |    "name": "python",
303 |    "nbconvert_exporter": "python",
304 |    "pygments_lexer": "ipython3",
305 |    "version": "3.6.9"
306 |   },
307 |   "widgets": {
308 |    "application/vnd.jupyter.widget-state+json": {
309 |     "state": {},
310 |     "version_major": 2,
311 |     "version_minor": 0
312 |    }
313 |   }
314 |  },
315 |  "nbformat": 4,
316 |  "nbformat_minor": 4
317 | }
318 | 


--------------------------------------------------------------------------------
/notebooks/02_Real_world_examples/outputs/Burnt_area_mapping/.gitkeep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/notebooks/02_Real_world_examples/outputs/Change_detection/.gitkeep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/notebooks/02_Real_world_examples/outputs/Coastal_erosion/.gitkeep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/notebooks/02_Real_world_examples/outputs/Contour_extraction/.gitkeep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/notebooks/02_Real_world_examples/outputs/Image_segmentation/.gitkeep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/notebooks/03_EY_challenge1/resources/challenge1_train.csv:
--------------------------------------------------------------------------------
  1 | id,label,dateTimeLocal,dateTimeUTC
  2 | 0,ROSEDALE_P1_201901041439_MGA94_55,4/01/2019 14:39,4/01/2019 3:39
  3 | 1,ROSEDALE_1_P1_201901041446_MGA94_55,4/01/2019 14:46,4/01/2019 3:46
  4 | 2,ROSEDALE_3_P1_201901041501_MGA94_55,4/01/2019 15:01,4/01/2019 4:01
  5 | 3,POINT_H_62_P1_201901162128_MGA94_55,16/01/2019 21:28,16/01/2019 10:28
  6 | 4,NUNNETT_73_P1_201901171134_MGA94_55,17/01/2019 11:34,17/01/2019 0:34
  7 | 5,NUNNETT_88_P1_201901171656_MGA94_55,17/01/2019 16:56,17/01/2019 5:56
  8 | 6,NUNNETT_96_P1_201901172230_MGA94_55,17/01/2019 22:30,17/01/2019 11:30
  9 | 7,NUNNETT_107_P1_201901181517_MGA94_55,18/01/2019 15:17,18/01/2019 4:17
 10 | 8,NUNNETT_121_P1_201901191642_MGA94_55,19/01/2019 16:42,19/01/2019 5:42
 11 | 9,NUNNETT_128_P1_201901192201_MGA94_55,19/01/2019 22:01,19/01/2019 11:01
 12 | 10,NUNNETT_173_P1_201901251120_MGA94_55,25/01/2019 11:20,25/01/2019 0:20
 13 | 11,NUNNETT_174_P1_201901251128_MGA94_55,25/01/2019 11:28,25/01/2019 0:28
 14 | 12,NUNNETT_175_P1_201901251137_MGA94_55,25/01/2019 11:37,25/01/2019 0:37
 15 | 13,NUNNETT_176_P1_201901251157_MGA94_55,25/01/2019 11:57,25/01/2019 0:57
 16 | 14,NUNNETT_177_P1_201901251211_MGA94_55,25/01/2019 12:11,25/01/2019 1:11
 17 | 15,NUNNETT_179_P1_201901251234_MGA94_55,25/01/2019 12:34,25/01/2019 1:34
 18 | 16,NUNNETT_180_P1_201901251246_MGA94_55,25/01/2019 12:46,25/01/2019 1:46
 19 | 17,NUNNETT_181_P1_201901251254_MGA94_55,25/01/2019 12:54,25/01/2019 1:54
 20 | 18,NUNNETT_182_P1_201901251321_MGA94_55,25/01/2019 13:21,25/01/2019 2:21
 21 | 19,NUNNETT_183_P1_201901251342_MGA94_55,25/01/2019 13:42,25/01/2019 2:42
 22 | 20,NUNNETT_184_P1_201901251400_MGA94_55,25/01/2019 14:00,25/01/2019 3:00
 23 | 21,NUNNETT_185_P1_201901251419_MGA94_55,25/01/2019 14:19,25/01/2019 3:19
 24 | 22,NUNNETT_186_P1_201901251432_MGA94_55,25/01/2019 14:32,25/01/2019 3:32
 25 | 23,ABERFELDY_WEST_200_P1_201901260955_MGA94_55,26/01/2019 9:55,25/01/2019 22:55
 26 | 24,SUCKLINGS_RD_203_P1_201901261039_MGA94_55,26/01/2019 10:39,25/01/2019 23:39
 27 | 25,SUCKLINGS_JIM_TRACK__CREAM_CAN_HILL_205_P1_201901261050_MGA94_55,26/01/2019 10:50,25/01/2019 23:50
 28 | 26,ABERFELDY_WEST_214_P1_201901261750_MGA94_55,26/01/2019 17:50,26/01/2019 6:50
 29 | 27,CREAM_JIM_JORDAN_217_P1_201901262218_MGA94_55,26/01/2019 22:18,26/01/2019 11:18
 30 | 28,SUCKLINGS_RD_KNAPPING_TRACK_226_P1_201901271405_MGA94_55,27/01/2019 14:05,27/01/2019 3:05
 31 | 29,JORDAN_231_P1_201901271500_MGA94_55,27/01/2019 15:00,27/01/2019 4:00
 32 | 30,JORDAN_233_P1_201901271511_MGA94_55,27/01/2019 15:11,27/01/2019 4:11
 33 | 31,JORDAN_234_P1_201901271901_MGA94_55,27/01/2019 19:01,27/01/2019 8:01
 34 | 32,JORDAN_264_P1_201901301524_MGA94_55,30/01/2019 15:24,30/01/2019 4:24
 35 | 33,MCCALLISTER79_LA_TROBE_97_266_P1_201901301552_MGA94_55,30/01/2019 15:52,30/01/2019 4:52
 36 | 34,LA_TROBE_97_MCCALLISTER_79_268_P1_201901301611_MGA94_55,30/01/2019 16:11,30/01/2019 5:11
 37 | 35,MCCALLISTER_79_LA_TROBE_97_269_P1_201901301624_MGA94_55,30/01/2019 16:24,30/01/2019 5:24
 38 | 36,WALHALLA_295_P1_201902011156_MGA94_55,1/02/2019 11:56,1/02/2019 0:56
 39 | 37,JORDAN_310_P1_201902012046_MGA94_55,1/02/2019 20:46,1/02/2019 9:46
 40 | 38,WALHALLA_339_P1_201902030520_MGA94_55,3/02/2019 5:20,2/02/2019 18:20
 41 | 39,WALHALLA_346_P1_201902031122_MGA94_55,3/02/2019 11:22,3/02/2019 0:22
 42 | 40,WALHALLA_352_P1_201902031612_MGA94_55,3/02/2019 16:12,3/02/2019 5:12
 43 | 41,WALHALLA_354_P1_201902031637_MGA94_55,3/02/2019 16:37,3/02/2019 5:37
 44 | 42,WALHALLA_362_P1_201902040015_MGA94_55,4/02/2019 0:15,3/02/2019 13:15
 45 | 43,WALHALLA_363_P1_201902040024_MGA94_55,4/02/2019 0:24,3/02/2019 13:24
 46 | 44,WALHALLA_368_P1_201902040621_MGA94_55,4/02/2019 6:21,3/02/2019 19:21
 47 | 45,WALHALLA_373_P1_201902040748_MGA94_55,4/02/2019 7:48,3/02/2019 20:48
 48 | 46,WALHALLA_378_P1_201902041304_MGA94_55,4/02/2019 13:04,4/02/2019 2:04
 49 | 47,WALHALLA_379_P1_201902041319_MGA94_55,4/02/2019 13:19,4/02/2019 2:19
 50 | 48,WALHALLA_380_P1_201902061347_MGA94_55,6/02/2019 13:47,6/02/2019 2:47
 51 | 49,WALHALLA_381_P1_201902061356_MGA94_55,6/02/2019 13:56,6/02/2019 2:56
 52 | 50,WALHALLA_397_P1_201902251311_MGA94_55,25/02/2019 13:11,25/02/2019 2:11
 53 | 51,WALHALLA_398_P1_201902251323_MGA94_55,25/02/2019 13:23,25/02/2019 2:23
 54 | 52,JORDAN_399_P1_201902251330_MGA94_55,25/02/2019 13:30,25/02/2019 2:30
 55 | 53,JORDAN_400_P1_201902251339_MGA94_55,25/02/2019 13:39,25/02/2019 2:39
 56 | 54,WALHALLA_401_P1_201902251342_MGA94_55,25/02/2019 13:42,25/02/2019 2:42
 57 | 55,WALHALLA_402_P1_201902251413_MGA94_55,25/02/2019 14:13,25/02/2019 3:13
 58 | 56,WALHALLA_413_P1_201902261957_MGA94_55,26/02/2019 19:57,26/02/2019 8:57
 59 | 57,WALHALLA_414_P1_201902262006_MGA94_55,26/02/2019 20:06,26/02/2019 9:06
 60 | 58,GIPPSLAND_TRACK1_447_P1_201903011212_MGA94_55,1/03/2019 12:12,1/03/2019 1:12
 61 | 59,MACALISTER_612_P1_201903051406_MGA94_55,5/03/2019 14:06,5/03/2019 3:06
 62 | 60,MACALISTER_613_P1_201903051418_MGA94_55,5/03/2019 14:18,5/03/2019 3:18
 63 | 61,MACALISTER_695_P1_201903091542_MGA94_55,9/03/2019 15:42,9/03/2019 4:42
 64 | 62,MACALISTER_696_P1_201903091545_MGA94_55,9/03/2019 15:45,9/03/2019 4:45
 65 | 63,MACALISTER_697_P1_201903091555_MGA94_55,9/03/2019 15:55,9/03/2019 4:55
 66 | 64,MACALISTER_698_P1_201903091600_MGA94_55,9/03/2019 16:00,9/03/2019 5:00
 67 | 65,MACALISTER_699_P1_201903091610_MGA94_55,9/03/2019 16:10,9/03/2019 5:10
 68 | 66,MACALISTER_700_P1_201903091613_MGA94_55,9/03/2019 16:13,9/03/2019 5:13
 69 | 67,YARRA51_620_P1_201903051812_MGA94_55,5/03/2019 18:12,5/03/2019 7:12
 70 | 68,YARRA51_622_P1_201903051841_MGA94_55,5/03/2019 18:41,5/03/2019 7:41
 71 | 69,MACALISTER85_624_P1_201903051907_MGA94_55,5/03/2019 19:07,5/03/2019 8:07
 72 | 70,YARRA51_633_P1_201903061644_MGA94_55,6/03/2019 16:44,6/03/2019 5:44
 73 | 71,MACALISTER85_684_P1_201903080536_MGA94_55,8/03/2019 5:36,7/03/2019 18:36
 74 | 72,MACALISTER85_723_P1_201903100101_MGA94_55,10/03/2019 1:01,9/03/2019 14:01
 75 | 73,MACALISTER87_719_P1_201903100033_MGA94_55,10/03/2019 0:33,9/03/2019 13:33
 76 | 74,MACALISTER89_678_P1_201903080454_MGA94_55,8/03/2019 4:54,7/03/2019 17:54
 77 | 75,MACALISTER89_693_P1_201903091518_MGA94_55,9/03/2019 15:18,9/03/2019 4:18
 78 | 76,MACALISTER89_716_P1_201903100003_MGA94_55,10/03/2019 0:03,9/03/2019 13:03
 79 | 77,MACALISTER89_763_P1_201903131205_MGA94_55,13/03/2019 12:05,13/03/2019 1:05
 80 | 78,MACALISTER89_773_P1_201903141329_MGA94_55,14/03/2019 13:29,14/03/2019 2:29
 81 | 79,MACALISTER89_782_P1_201903141444_MGA94_55,14/03/2019 14:44,14/03/2019 3:44
 82 | 80,MACALISTER89_786_P1_201903151302_MGA94_55,15/03/2019 13:02,15/03/2019 2:02
 83 | 81,MACALISTER89_801_P1_201903161430_MGA94_55,16/03/2019 14:30,16/03/2019 3:30
 84 | 82,MACALISTER91_615_P1_201903051438_MGA94_55,5/03/2019 14:38,5/03/2019 3:38
 85 | 83,MACALISTER91_616_P1_201903051446_MGA94_55,5/03/2019 14:46,5/03/2019 3:46
 86 | 84,MACALISTER91_617_P1_201903051456_MGA94_55,5/03/2019 14:56,5/03/2019 3:56
 87 | 85,MACALISTER91_618_P1_201903051503_MGA94_55,5/03/2019 15:03,5/03/2019 4:03
 88 | 86,MACALISTER91_621_P1_201903051827_MGA94_55,5/03/2019 18:27,5/03/2019 7:27
 89 | 87,YARRA51_704_P1_201903091659_MGA94_55,9/03/2019 16:59,9/03/2019 5:59
 90 | 88,MACALISTER91_681_P1_201903080517_MGA94_55,8/03/2019 5:17,7/03/2019 18:17
 91 | 89,MACALISTER91_682_P1_201903080526_MGA94_55,8/03/2019 5:26,7/03/2019 18:26
 92 | 90,MACALISTER91_685_P1_201903080539_MGA94_55,8/03/2019 5:39,7/03/2019 18:39
 93 | 91,MACALISTER91_752_P1_201903111601_MGA94_55,11/03/2019 16:01,11/03/2019 5:01
 94 | 92,MACALISTER91_755_P1_201903111619_MGA94_55,11/03/2019 16:19,11/03/2019 5:19
 95 | 93,MACALISTER91_760_P1_201903111709_MGA94_55,11/03/2019 17:09,11/03/2019 6:09
 96 | 94,YARRA51_726_P1_201903100129_MGA94_55,10/03/2019 1:29,9/03/2019 14:29
 97 | 95,MACALISTER91_761_P1_201903111720_MGA94_55,11/03/2019 17:20,11/03/2019 6:20
 98 | 96,MACALISTER91_762_P1_201903111730_MGA94_55,11/03/2019 17:30,11/03/2019 6:30
 99 | 97,MACALISTER91_766_P1_201903131239_MGA94_55,13/03/2019 12:39,13/03/2019 1:39
100 | 98,MACALISTER91_767_P1_201903131252_MGA94_55,13/03/2019 12:52,13/03/2019 1:52
101 | 99,MACALISTER91_770_P1_201903131314_MGA94_55,13/03/2019 13:14,13/03/2019 2:14
102 | 100,MACALISTER91_783_P1_201903141643_MGA94_55,14/03/2019 16:43,14/03/2019 5:43
103 | 101,MACALISTER91_789_P1_201903151330_MGA94_55,15/03/2019 13:30,15/03/2019 2:30
104 | 102,MACALISTER91_790_P1_201903151340_MGA94_55,15/03/2019 13:40,15/03/2019 2:40
105 | 103,MACALISTER91_793_P1_201903151358_MGA94_55,15/03/2019 13:58,15/03/2019 2:58
106 | 104,MACALISTER91_804_P1_201903161503_MGA94_55,16/03/2019 15:03,16/03/2019 4:03
107 | 105,MACALISTER91_805_P1_201903161517_MGA94_55,16/03/2019 15:17,16/03/2019 4:17
108 | 106,MACALISTER91_808_P1_201903161539_MGA94_55,16/03/2019 15:39,16/03/2019 4:39
109 | 107,MACALISTER91_99_649_P1_201903070453_MGA94_55,7/03/2019 4:53,6/03/2019 17:53
110 | 108,MACALISTER97_680_P1_201903080512_MGA94_55,8/03/2019 5:12,7/03/2019 18:12
111 | 109,MACALISTER97_720_P1_201903100042_MGA94_55,10/03/2019 0:42,9/03/2019 13:42
112 | 110,MACALISTER97_753_P1_201903111605_MGA94_55,11/03/2019 16:05,11/03/2019 5:05
113 | 111,MACALISTER97_765_P1_201903131230_MGA94_55,13/03/2019 12:30,13/03/2019 1:30
114 | 112,MACALISTER97_768_P1_201903131255_MGA94_55,13/03/2019 12:55,13/03/2019 1:55
115 | 113,YARRA51_794_P1_201903151412_MGA94_55,15/03/2019 14:12,15/03/2019 3:12
116 | 114,YARRA54_795_P1_201903151422_MGA94_55,15/03/2019 14:22,15/03/2019 3:22
117 | 115,MACALISTER97_769_P1_201903131306_MGA94_55,13/03/2019 13:06,13/03/2019 2:06
118 | 116,MACALISTER97_774_P1_201903141339_MGA94_55,14/03/2019 13:39,14/03/2019 2:39
119 | 117,MACALISTER97_777_P1_201903141358_MGA94_55,14/03/2019 13:58,14/03/2019 2:58
120 | 118,MACALISTER97_778_P1_201903141405_MGA94_55,14/03/2019 14:05,14/03/2019 3:05
121 | 119,MACALISTER97_788_P1_201903151324_MGA94_55,15/03/2019 13:24,15/03/2019 2:24
122 | 120,MACALISTER97_791_P1_201903151343_MGA94_55,15/03/2019 13:43,15/03/2019 2:43
123 | 121,MACALISTER97_792_P1_201903151352_MGA94_55,15/03/2019 13:52,15/03/2019 2:52
124 | 122,MACALISTER97_803_P1_201903161455_MGA94_55,16/03/2019 14:55,16/03/2019 3:55
125 | 123,YARRA51_809_P1_201903161558_MGA94_55,16/03/2019 15:58,16/03/2019 4:58
126 | 124,MACALISTER97_806_P1_201903161522_MGA94_55,16/03/2019 15:22,16/03/2019 4:22
127 | 125,MACALISTER97_807_P1_201903161532_MGA94_55,16/03/2019 15:32,16/03/2019 4:32
128 | 126,MACALISTER99_623_P1_201903051858_MGA94_55,5/03/2019 18:58,5/03/2019 7:58
129 | 127,MACALISTER99_646_P1_201903070440_MGA94_55,7/03/2019 4:40,6/03/2019 17:40
130 | 128,MACALISTER99_683_P1_201903080529_MGA94_55,8/03/2019 5:29,7/03/2019 18:29
131 | 


--------------------------------------------------------------------------------
/notebooks/03_EY_challenge1/resources/fire_boundaries.cpg:
--------------------------------------------------------------------------------
1 | UTF-8


--------------------------------------------------------------------------------
/notebooks/03_EY_challenge1/resources/fire_boundaries.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/03_EY_challenge1/resources/fire_boundaries.dbf


--------------------------------------------------------------------------------
/notebooks/03_EY_challenge1/resources/fire_boundaries.prj:
--------------------------------------------------------------------------------
1 | PROJCS["GDA_1994_MGA_Zone_55",GEOGCS["GCS_GDA_1994",DATUM["D_GDA_1994",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",500000.0],PARAMETER["False_Northing",10000000.0],PARAMETER["Central_Meridian",147.0],PARAMETER["Scale_Factor",0.9996],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]


--------------------------------------------------------------------------------
/notebooks/03_EY_challenge1/resources/fire_boundaries.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/03_EY_challenge1/resources/fire_boundaries.shp


--------------------------------------------------------------------------------
/notebooks/03_EY_challenge1/resources/fire_boundaries.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/03_EY_challenge1/resources/fire_boundaries.shx


--------------------------------------------------------------------------------
/notebooks/03_EY_challenge1/resources/fire_example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/03_EY_challenge1/resources/fire_example.jpg


--------------------------------------------------------------------------------
/notebooks/04_EY_challenge2/Challenge2_Getting_started.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Challenge 2 - Getting started <img align=\"right\" src=\"../Supplementary_data/EY_logo.png\" style=\"margin:0px 50px\">\n",
  8 |     "\n",
  9 |     "Welcome to the 2021 Better Working World Data Challenge! \n",
 10 |     "\n",
 11 |     "Prior to running this notebook, make sure you have:\n",
 12 |     "* **Registered** for \"Challenge 2: Fire behavior\" on the [EY Data Science Platform](https://datascience.ey.com/).\n",
 13 |     "* **Completed** \"Challenge 1: Fire mapping\" to a reasonable accuracy before starting on Challenge 2. You will use your process/model from Challenge 1 to create more annotated data for Challenge 2.\n"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "### Context \n",
 21 |     "\n",
 22 |     "Airborne infrared linescan images are currently considered one of the best sources of information about fire intensity and location. However, there are times when it is not possible to acquire infrared linescan imagery, for example due to resource contraints or unsafe conditions. An alternative source of images for fire mapping is via satellite. The availability and resolution of satellite imagery has increased substantially in recent years, making it possible to monitor bushfires from space. Satellite imagery is not always available, however it is a valuable complement to other information sources.\n",
 23 |     "\n",
 24 |     "While the number of satellite passes per day is continuing to increase, especially with commercial operators expanding their service offerings, there are still long periods when there is no coverage. For times when neither linescan nor satellite data are available, it is possible to extrapolate from previous observations to forecast the current location of the fire. It can also be useful for firefighting teams to forecast future locations of fire based on current observations.\n",
 25 |     "\n",
 26 |     "\n",
 27 |     "### Your task\n",
 28 |     "\n",
 29 |     "<img src=\"resources/animated_timeseries.gif\" align=\"right\" width=300px style=\"margin:0px 40px 40px 40px\">\n",
 30 |     "\n",
 31 |     "The training dataset you worked with in Challenge 1 contains 129 linescan images, plus an additional 5 linescan images that were used for testing. These images were captured over seven different fire events. For each of the fire events, a narrative sequence of images can be produced from a combination of linescan and satellite images. In two of the seven fire events, linescan images have been withheld at key time points. Your task is to produce a map of the fire at those time points.\n",
 32 |     "\n",
 33 |     "Similar to Challenge 1, the `test.csv` file contains the details of the pixels that you must forecast using the narrative series of each fire event.\n",
 34 |     "\n",
 35 |     "All linescan and satellite images are served via the Open Data Cube python library.\n",
 36 |     "\n",
 37 |     " To forecast the fire spread over time, you may need to use information such as:\n",
 38 |     "- terrain, vegetation type and vegetation condition prior to the fire (available from satellite images), for example the NDVI product as a proxy for fuel loading or land use type (see resources in the 02_Real_world_examples folder)\n",
 39 |     "- linescan and satellite images taken during the fire\n",
 40 |     "\n",
 41 |     "Note that the timeseries animation shown here was created in the [Linescan loading examples](../04_EY_challenge2/Linescan_loading_examples.ipynb) notebook."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 1,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stderr",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "/env/lib/python3.6/site-packages/geopandas/_compat.py:88: UserWarning: The Shapely GEOS version (3.7.2-CAPI-1.11.0 ) is incompatible with the GEOS version PyGEOS was compiled with (3.9.0-CAPI-1.16.2). Conversions between both will be slow.\n",
 54 |       "  shapely_geos_version, geos_capi_version_string\n"
 55 |      ]
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "%matplotlib inline\n",
 60 |     "import sys\n",
 61 |     "import numpy as np\n",
 62 |     "import pandas as pd\n",
 63 |     "import geopandas as gpd\n",
 64 |     "\n",
 65 |     "from odc.ui import show_datasets\n",
 66 |     "from datacube import Datacube\n",
 67 |     "from datacube.testutils.io import native_geobox\n",
 68 |     "\n",
 69 |     "import ipyleaflet as L"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 2,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "dc = Datacube()"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 3,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "linescan_datasets = dc.find_datasets(product='linescan')\n",
 88 |     "linescan_datasets = sorted(linescan_datasets, key = lambda ds: (ds.center_time, ds.id))"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 4,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "vector_file = '../03_EY_challenge1/resources/fire_boundaries.shp'\n",
 98 |     "gdf = gpd.read_file(vector_file)\n",
 99 |     "# gdf.head(1).T"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "## Exploring fire events"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "The `train.csv` file lists all the linescans that are available, including an \"event\" column showing which fire event they are part of."
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 5,
119 |    "metadata": {
120 |     "scrolled": true
121 |    },
122 |    "outputs": [
123 |     {
124 |      "data": {
125 |       "text/html": [
126 |        "<div>\n",
127 |        "<style scoped>\n",
128 |        "    .dataframe tbody tr th:only-of-type {\n",
129 |        "        vertical-align: middle;\n",
130 |        "    }\n",
131 |        "\n",
132 |        "    .dataframe tbody tr th {\n",
133 |        "        vertical-align: top;\n",
134 |        "    }\n",
135 |        "\n",
136 |        "    .dataframe thead th {\n",
137 |        "        text-align: right;\n",
138 |        "    }\n",
139 |        "</style>\n",
140 |        "<table border=\"1\" class=\"dataframe\">\n",
141 |        "  <thead>\n",
142 |        "    <tr style=\"text-align: right;\">\n",
143 |        "      <th></th>\n",
144 |        "      <th>id</th>\n",
145 |        "      <th>label</th>\n",
146 |        "      <th>dateTimeLocal</th>\n",
147 |        "      <th>dateTimeUTC</th>\n",
148 |        "      <th>event</th>\n",
149 |        "    </tr>\n",
150 |        "  </thead>\n",
151 |        "  <tbody>\n",
152 |        "    <tr>\n",
153 |        "      <th>0</th>\n",
154 |        "      <td>0</td>\n",
155 |        "      <td>ROSEDALE_P1_201901041439_MGA94_55</td>\n",
156 |        "      <td>4/01/2019 14:39</td>\n",
157 |        "      <td>4/01/2019 3:39</td>\n",
158 |        "      <td>Rosedale</td>\n",
159 |        "    </tr>\n",
160 |        "    <tr>\n",
161 |        "      <th>1</th>\n",
162 |        "      <td>1</td>\n",
163 |        "      <td>ROSEDALE_1_P1_201901041446_MGA94_55</td>\n",
164 |        "      <td>4/01/2019 14:46</td>\n",
165 |        "      <td>4/01/2019 3:46</td>\n",
166 |        "      <td>Rosedale</td>\n",
167 |        "    </tr>\n",
168 |        "    <tr>\n",
169 |        "      <th>2</th>\n",
170 |        "      <td>2</td>\n",
171 |        "      <td>ROSEDALE_3_P1_201901041501_MGA94_55</td>\n",
172 |        "      <td>4/01/2019 15:01</td>\n",
173 |        "      <td>4/01/2019 4:01</td>\n",
174 |        "      <td>Rosedale</td>\n",
175 |        "    </tr>\n",
176 |        "  </tbody>\n",
177 |        "</table>\n",
178 |        "</div>"
179 |       ],
180 |       "text/plain": [
181 |        "   id                                label    dateTimeLocal     dateTimeUTC  \\\n",
182 |        "0   0    ROSEDALE_P1_201901041439_MGA94_55  4/01/2019 14:39  4/01/2019 3:39   \n",
183 |        "1   1  ROSEDALE_1_P1_201901041446_MGA94_55  4/01/2019 14:46  4/01/2019 3:46   \n",
184 |        "2   2  ROSEDALE_3_P1_201901041501_MGA94_55  4/01/2019 15:01  4/01/2019 4:01   \n",
185 |        "\n",
186 |        "      event  \n",
187 |        "0  Rosedale  \n",
188 |        "1  Rosedale  \n",
189 |        "2  Rosedale  "
190 |       ]
191 |      },
192 |      "execution_count": 5,
193 |      "metadata": {},
194 |      "output_type": "execute_result"
195 |     }
196 |    ],
197 |    "source": [
198 |     "train = pd.read_csv('resources/challenge2_train.csv')\n",
199 |     "train.head(3)"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "Note that two linescans are relevant for multiple fire events that were occurring simultaneously, Macalister91 & Macalister97. There are also some additional linescans marked \"other\", that are not part of the seven main fire events."
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 6,
212 |    "metadata": {},
213 |    "outputs": [
214 |     {
215 |      "data": {
216 |       "text/plain": [
217 |        "Macalister91                   30\n",
218 |        "Walhalla                       24\n",
219 |        "Macalister97                   23\n",
220 |        "Tambo76                        20\n",
221 |        "Latrobe86                      15\n",
222 |        "Other                          10\n",
223 |        "Yarra51                         7\n",
224 |        "Rosedale                        3\n",
225 |        "Macalister91 & Macalister97     2\n",
226 |        "Name: event, dtype: int64"
227 |       ]
228 |      },
229 |      "execution_count": 6,
230 |      "metadata": {},
231 |      "output_type": "execute_result"
232 |     }
233 |    ],
234 |    "source": [
235 |     "train.event.value_counts()"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "Using this file, we can group the linescans into discreet events. First, we'll join the event-to-linescan mapping from `train.csv` onto our list of which linescan datasets that are available, by creating a new \"event\" property in the list."
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 7,
248 |    "metadata": {
249 |     "scrolled": false
250 |    },
251 |    "outputs": [],
252 |    "source": [
253 |     "for ls in linescan_datasets:\n",
254 |     "    ls.event = train.loc[train.label==ls.metadata_doc[\"label\"], 'event'].values[0]"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "markdown",
259 |    "metadata": {},
260 |    "source": [
261 |     "Now let's explore just the Yarra51 event. The cell below uses list comprehension to return a subset of the `linescan_dataset` list, and the following cell prints the \"label\" property of each linecan in the subset."
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": 8,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": [
270 |     "Yarra51_ls = [ls for ls in linescan_datasets if ls.event == 'Yarra51']"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 9,
276 |    "metadata": {},
277 |    "outputs": [
278 |     {
279 |      "name": "stdout",
280 |      "output_type": "stream",
281 |      "text": [
282 |       "YARRA51_620_P1_201903051812_MGA94_55\n",
283 |       "YARRA51_622_P1_201903051841_MGA94_55\n",
284 |       "YARRA51_633_P1_201903061644_MGA94_55\n",
285 |       "YARRA51_704_P1_201903091659_MGA94_55\n",
286 |       "YARRA51_726_P1_201903100129_MGA94_55\n",
287 |       "YARRA51_794_P1_201903151412_MGA94_55\n",
288 |       "YARRA51_809_P1_201903161558_MGA94_55\n"
289 |      ]
290 |     }
291 |    ],
292 |    "source": [
293 |     "for ls in Yarra51_ls:\n",
294 |     "    print(ls.metadata_doc['label'])"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "markdown",
299 |    "metadata": {},
300 |    "source": [
301 |     "We'll also filter the polygon dataset to get a quick look at what ground truth annotations are available for this fire event. You are encouraged to use your solution from Challenge 1 to fill in the blanks where there is no ground truth annotation available. From this list you can see that there are no polygons provided for \"YARRA51_622_P1_201903051841_MGA94_55\"."
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": 10,
307 |    "metadata": {},
308 |    "outputs": [
309 |     {
310 |      "data": {
311 |       "text/plain": [
312 |        "array(['yarra51 620 p1_201903051812_mga94_55.jpg',\n",
313 |        "       'yarra51 633 p1_201903061644_mga94_55.jpg',\n",
314 |        "       'yarra51 704 p1_201903091659_mga94_55.jpg',\n",
315 |        "       'yarra51 726 p1_201903100129_mga94_55.jpg',\n",
316 |        "       'yarra51 794 p1_201903151412_mga94_55.jpg',\n",
317 |        "       'ObservationsAreaEditing_20190312_1700',\n",
318 |        "       'yarra51 809 p1_201903161558_mga94_55.jpg'], dtype=object)"
319 |       ]
320 |      },
321 |      "execution_count": 10,
322 |      "metadata": {},
323 |      "output_type": "execute_result"
324 |     }
325 |    ],
326 |    "source": [
327 |     "Yarra51_gdf = gdf.loc[gdf.event == 'Yarra51']\n",
328 |     "Yarra51_gdf.SourceName.unique()"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "markdown",
333 |    "metadata": {},
334 |    "source": [
335 |     "Let's see what this looks like on a map, alongside the available polygons. Note we will also need to change the CRS of the polygons to epsg:4326 to display them on the map."
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": 93,
341 |    "metadata": {},
342 |    "outputs": [
343 |     {
344 |      "name": "stderr",
345 |      "output_type": "stream",
346 |      "text": [
347 |       "/env/lib/python3.6/site-packages/geopandas/geodataframe.py:853: SettingWithCopyWarning: \n",
348 |       "A value is trying to be set on a copy of a slice from a DataFrame.\n",
349 |       "Try using .loc[row_indexer,col_indexer] = value instead\n",
350 |       "\n",
351 |       "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
352 |       "  super(GeoDataFrame, self).__setitem__(key, value)\n"
353 |      ]
354 |     }
355 |    ],
356 |    "source": [
357 |     "m = show_datasets(Yarra51_ls)\n",
358 |     "\n",
359 |     "Yarra51_gdf.geometry = Yarra51_gdf.geometry.to_crs('epsg:4326')\n",
360 |     "\n",
361 |     "layer_gdf = L.GeoData(geo_dataframe=Yarra51_gdf, name = 'polygons',\n",
362 |     "                      style={'color': 'black', 'fillColor': '#3366cc', 'opacity':0.05,\n",
363 |     "                             'weight':1.9, 'fillOpacity':0.6}\n",
364 |     "                     )\n",
365 |     "\n",
366 |     "m.add_layer(layer = layer_gdf)"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": 56,
372 |    "metadata": {
373 |     "scrolled": false
374 |    },
375 |    "outputs": [
376 |     {
377 |      "data": {
378 |       "application/vnd.jupyter.widget-view+json": {
379 |        "model_id": "acb1860603a94a1593ace549916a9d15",
380 |        "version_major": 2,
381 |        "version_minor": 0
382 |       },
383 |       "text/plain": [
384 |        "Map(center=[-37.58633193103924, 145.88252890518146], controls=(ZoomControl(options=['position', 'zoom_in_text'…"
385 |       ]
386 |      },
387 |      "metadata": {},
388 |      "output_type": "display_data"
389 |     }
390 |    ],
391 |    "source": [
392 |     "m"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "code",
397 |    "execution_count": 92,
398 |    "metadata": {
399 |     "scrolled": false
400 |    },
401 |    "outputs": [],
402 |    "source": [
403 |     "# for ls in Yarra51_ls:\n",
404 |     "#     ng = native_geobox(ls)\n",
405 |     "#     ls_data = dc.load(product='linescan',\n",
406 |     "#                       id=ls.metadata_doc['id'],\n",
407 |     "#                       output_crs = ls.metadata_doc['crs'],\n",
408 |     "#                       resolution = ng.resolution)\n",
409 |     "#     ls_data['linescan'].squeeze().plot.imshow(cmap='inferno', robust=False, size=8)"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "markdown",
414 |    "metadata": {},
415 |    "source": [
416 |     "Using the functions in the [Linescan loading examples](../04_EY_challenge2/Linescan_loading_examples.ipynb) notebook you can also find satellite data relevant to the fire event. This could be used to supplement time periods between linescans, or to provide information about the conditions of the fire area prior to the fire. You can also use the functions in that notebook to find a common resolution and extent between linescans, and to create animations of the event."
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "markdown",
421 |    "metadata": {},
422 |    "source": [
423 |     "### Forecast fire progression"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "markdown",
428 |    "metadata": {},
429 |    "source": [
430 |     "Your task is now to forecast the progression of each fire event. You could develop a method using one fire or set of training images, and then validate it using other fire events/sets of training images.\n",
431 |     "\n",
432 |     "For example, in a basic solution you could find the edge of the fire front at a given time, observe how far it travels between two adjacent images, and then adjust this distance based on the time elapsed to a third image. You could also adjust the speed based on local ground conditions such as vegetation and land use type.\n",
433 |     "\n",
434 |     "For a more sophisticated solution you could implement a neural network to do next-frame prediction for each 'frame' of the fire event, based on a string of previous frames."
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "markdown",
439 |    "metadata": {},
440 |    "source": [
441 |     "### Making a submission\n",
442 |     "The `test.csv` file provides a list of 5000 coordinates that require classification at five additional fire observations where linescans have not been provided. Note that the coordinates are denoted in the CRS mentioned above, epsg:28355. Follow the same process described in the Challenge 1 example notebook to create a submission.\n",
443 |     "\n",
444 |     "Of the five linescans which the test coordinate pairs have been selected from, three are from the Tambo76 event and two are from the Rosedale event. In both cases, some training images from the start of the fire event, prior to the linescans selected for testing, have been provided: 3 from the Rosedale event and 20 from the Tambo76 event."
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "code",
449 |    "execution_count": 62,
450 |    "metadata": {},
451 |    "outputs": [
452 |     {
453 |      "data": {
454 |       "text/html": [
455 |        "<div>\n",
456 |        "<style scoped>\n",
457 |        "    .dataframe tbody tr th:only-of-type {\n",
458 |        "        vertical-align: middle;\n",
459 |        "    }\n",
460 |        "\n",
461 |        "    .dataframe tbody tr th {\n",
462 |        "        vertical-align: top;\n",
463 |        "    }\n",
464 |        "\n",
465 |        "    .dataframe thead th {\n",
466 |        "        text-align: right;\n",
467 |        "    }\n",
468 |        "</style>\n",
469 |        "<table border=\"1\" class=\"dataframe\">\n",
470 |        "  <thead>\n",
471 |        "    <tr style=\"text-align: right;\">\n",
472 |        "      <th></th>\n",
473 |        "      <th>id</th>\n",
474 |        "      <th>event</th>\n",
475 |        "      <th>x</th>\n",
476 |        "      <th>y</th>\n",
477 |        "      <th>dateTimeLocal</th>\n",
478 |        "      <th>dateTimeUTC</th>\n",
479 |        "      <th>target</th>\n",
480 |        "    </tr>\n",
481 |        "  </thead>\n",
482 |        "  <tbody>\n",
483 |        "    <tr>\n",
484 |        "      <th>0</th>\n",
485 |        "      <td>0</td>\n",
486 |        "      <td>Rosedale</td>\n",
487 |        "      <td>491391</td>\n",
488 |        "      <td>5769660</td>\n",
489 |        "      <td>4/01/2019 15:40</td>\n",
490 |        "      <td>4/01/2019 4:40</td>\n",
491 |        "      <td>NaN</td>\n",
492 |        "    </tr>\n",
493 |        "    <tr>\n",
494 |        "      <th>1</th>\n",
495 |        "      <td>1</td>\n",
496 |        "      <td>Rosedale</td>\n",
497 |        "      <td>486132</td>\n",
498 |        "      <td>5764884</td>\n",
499 |        "      <td>4/01/2019 15:40</td>\n",
500 |        "      <td>4/01/2019 4:40</td>\n",
501 |        "      <td>NaN</td>\n",
502 |        "    </tr>\n",
503 |        "    <tr>\n",
504 |        "      <th>2</th>\n",
505 |        "      <td>2</td>\n",
506 |        "      <td>Rosedale</td>\n",
507 |        "      <td>484371</td>\n",
508 |        "      <td>5776757</td>\n",
509 |        "      <td>4/01/2019 15:40</td>\n",
510 |        "      <td>4/01/2019 4:40</td>\n",
511 |        "      <td>NaN</td>\n",
512 |        "    </tr>\n",
513 |        "  </tbody>\n",
514 |        "</table>\n",
515 |        "</div>"
516 |       ],
517 |       "text/plain": [
518 |        "   id     event       x        y    dateTimeLocal     dateTimeUTC  target\n",
519 |        "0   0  Rosedale  491391  5769660  4/01/2019 15:40  4/01/2019 4:40     NaN\n",
520 |        "1   1  Rosedale  486132  5764884  4/01/2019 15:40  4/01/2019 4:40     NaN\n",
521 |        "2   2  Rosedale  484371  5776757  4/01/2019 15:40  4/01/2019 4:40     NaN"
522 |       ]
523 |      },
524 |      "execution_count": 62,
525 |      "metadata": {},
526 |      "output_type": "execute_result"
527 |     }
528 |    ],
529 |    "source": [
530 |     "test = pd.read_csv('resources/challenge2_test.csv')\n",
531 |     "test.head(3)"
532 |    ]
533 |   },
534 |   {
535 |    "cell_type": "code",
536 |    "execution_count": 67,
537 |    "metadata": {},
538 |    "outputs": [
539 |     {
540 |      "data": {
541 |       "text/plain": [
542 |        "Tambo76     3.0\n",
543 |        "Rosedale    2.0\n",
544 |        "Name: event, dtype: float64"
545 |       ]
546 |      },
547 |      "execution_count": 67,
548 |      "metadata": {},
549 |      "output_type": "execute_result"
550 |     }
551 |    ],
552 |    "source": [
553 |     "test.event.value_counts()/1000"
554 |    ]
555 |   },
556 |   {
557 |    "cell_type": "markdown",
558 |    "metadata": {},
559 |    "source": [
560 |     "***\n",
561 |     "## Additional information\n",
562 |     "\n",
563 |     "**License:** The code in this notebook is licensed under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0). \n",
564 |     "Digital Earth Australia data is licensed under the [Creative Commons by Attribution 4.0](https://creativecommons.org/licenses/by/4.0/) license.\n",
565 |     "\n",
566 |     "**Contact:** If you need assistance, please review the FAQ section and support options on the [EY Data Science Platform](https://datascience.ey.com/)."
567 |    ]
568 |   }
569 |  ],
570 |  "metadata": {
571 |   "kernelspec": {
572 |    "display_name": "Python 3",
573 |    "language": "python",
574 |    "name": "python3"
575 |   },
576 |   "language_info": {
577 |    "codemirror_mode": {
578 |     "name": "ipython",
579 |     "version": 3
580 |    },
581 |    "file_extension": ".py",
582 |    "mimetype": "text/x-python",
583 |    "name": "python",
584 |    "nbconvert_exporter": "python",
585 |    "pygments_lexer": "ipython3",
586 |    "version": "3.6.9"
587 |   }
588 |  },
589 |  "nbformat": 4,
590 |  "nbformat_minor": 4
591 | }
592 | 


--------------------------------------------------------------------------------
/notebooks/04_EY_challenge2/Sentinel-1_example_rgb.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "\n",
  8 |     "# Sentinel-1 Radar imagery <img align=\"right\" src=\"../Supplementary_data/EY_logo.png\" style=\"margin:0px 50px\">\n",
  9 |     "\n",
 10 |     "\n",
 11 |     "Radar imagery can be used as a complementary dataset for Challenge 2. \n",
 12 |     "\n",
 13 |     "Radar is an active measurement technique, illuminating the Earth's surface and detecting the light scattering effects, rather than relying on the sun to illuminate the Earth, such as in passive, optical techniques. Synthetic Aperture Radar (SAR) is a form of radar often used on satellites, where the motion of the satellite over the landscape allows for higher spatial resolution measurements to be obtained. SAR measurements can penetrate cloud and smoke cover, making it particularly useful for assessing bushfires, and can also operate at night.\n",
 14 |     "\n",
 15 |     "The European Space Agency (ESA)'s Sentinel-1 mission consists of two satellites: Sentinel-1A (launched in 2014) and Sentinel-1B (launched in 2016). Each houses an SAR instrument on board. These instruments provide single C-band 1 dB radiometric accuracy with a central frequency at 5.405 GHz. \n",
 16 |     "\n",
 17 |     "Together, the Sentinel-1 satellites provide:\n",
 18 |     "- global coverage \n",
 19 |     "- frequent imaging (the satellites have a frequent revisit time of 6 days.)\n",
 20 |     "- different polarizations to capture different geospatial properties (e.g., VV, VH, as explained further below)\n",
 21 |     "- and can detect millimetre changes in elevation.\n",
 22 |     "\n",
 23 |     "This notebook explores the Sentinel-1 datasets. You can use these in your analysis to assist with the fire progression prediction in Challenge 2. \n",
 24 |     "\n",
 25 |     "As an example, this notebook shows what information we can obtain from the Sentinel-1 Radio Terrain Correction (RTC) gamma-0 data product `s1_rtc`.  The RTC gamma-0 data are corrected for variations caused by changing observation geometries."
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "### Import libraries"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "%matplotlib inline\n",
 42 |     "import matplotlib.pyplot as plt\n",
 43 |     "from IPython.display import Image\n",
 44 |     "from datacube import Datacube\n",
 45 |     "import numpy as np\n",
 46 |     "import xarray as xr\n",
 47 |     "\n",
 48 |     "import sys\n",
 49 |     "sys.path.append(\"../scripts\")\n",
 50 |     "import pandas as pd\n",
 51 |     "from dea_plotting import display_map\n",
 52 |     "from dea_plotting import rgb"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "<!-- ### Set up distributed computing\n",
 60 |     "\n",
 61 |     "Calculating the maximum value for each pixel in an image can be computationally expensive. However, it is possible to reduce the computation time by parallelising the process through Dask. Access to Dask is provided in the Azure environment. \n",
 62 |     "\n",
 63 |     "For more information about using Dask, refer to the Parallel processing with Dask notebook.\n",
 64 |     "\n",
 65 |     "To set up distributed computing with Dask, you need to first set up a Dask client using the function below: -->"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "### Load Sentinel-1 data"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "# instantiate a datacube object\n",
 82 |     "dc = Datacube(app=\"Sentinel-1 example\")"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "# locate the Sentinel-1 RTC product dataset\n",
 92 |     "senti_datasets = dc.find_datasets(product='s1_rtc')\n",
 93 |     "senti_datasets = sorted(senti_datasets, \n",
 94 |     "                        key = lambda ds: (ds.center_time, ds.id))\n",
 95 |     "\n",
 96 |     "# check available information for a random dataset\n",
 97 |     "sample = senti_datasets[100]\n",
 98 |     "print(sample, '\\n')\n",
 99 |     "print('sample_0 metadata_label: ', sample.metadata_doc['label'])\n",
100 |     "print(f'\\nNumber of s1_rtc datasets: {len(senti_datasets)}')"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "# read other infomation in the metadata\n",
110 |     "sample.metadata_doc"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "You can use the metadata to match your training data, e.g., by coordinates and datetime properties. This is part of the challenge! Here we load a random single radar image to learn about its features."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "# load the Sentinel-1 backscatter data\n",
127 |     "s1 = dc.load(product='s1_rtc', \n",
128 |     "             id='e320bae8-996e-56ea-ae68-0efbefa47e9a', \n",
129 |     "             output_crs='epsg:4326', \n",
130 |     "             resolution=(-0.0002,0.0002))"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "print(s1)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "id = 'e320bae8-996e-56ea-ae68-0efbefa47e9a'\n",
149 |     "selected_id = [ds for ds in senti_datasets if ds.metadata_doc['id'] == id]\n",
150 |     "\n",
151 |     "# extract latitude, longtitude, and datetime information \n",
152 |     "lat_dict = selected_id[0].metadata_doc['extent']['lat']\n",
153 |     "lon_dict = selected_id[0].metadata_doc['extent']['lon']\n",
154 |     "dt_string = selected_id[0].metadata_doc['properties']['datetime']"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {},
161 |    "outputs": [],
162 |    "source": [
163 |     "latitude = (lat_dict['begin'], lat_dict['end'])\n",
164 |     "longitude = (lon_dict['begin'], lon_dict['end'])"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "###  View the selected location interactively\n",
172 |     "The next cell will display the selected area on an interactive map. Feel free to zoom in and out to get a better understanding of the area you’ll be analysing. Clicking on any point of the map will reveal the latitude and longitude coordinates of that point."
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {
179 |     "scrolled": true
180 |    },
181 |    "outputs": [],
182 |    "source": [
183 |     "display_map(x=longitude, y=latitude)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "### Visualise loaded data in different channels\n",
191 |     "\n",
192 |     "Sentinel-1 backscatter data has two measurements, VV and VH, which correspond to the polarisation of the light sent and received by the satellite. VV refers to the satellite sending out vertically-polarised light and receiving vertically-polarised light back, whereas VH refers to the satellite sending out vertically-polarised light and receiving horizontally-polarised light back. These two measurement bands can tell us different information about the area we’re studying.\n",
193 |     "\n",
194 |     "The Co-polarisation (VV) channel is indicative of surface scattering such as soil, rock and corner relections from buildings. Relatively smooth surfaces such as water and bare grounds have very little backscatter and will appear black in the image.\n",
195 |     "\n",
196 |     "Cross-polarisation (VH) channel is indicative of volume scattering. It can be used to separte vegetated areas and non-vegetated areas. Water, bare soil and rock will also appear black in the VH images. Vegetated areas have larger volume scattering and will appear brighter.\n",
197 |     "\n",
198 |     "The ratio of the VV and VH channel is used to highlight the similarity and discrepancy of the two channels. The ratio image provides a higher contrast for areas where the surface scattering and volume scattering act differently. For example, vegetated areas will have a high VH/VV value, whereas bare grounds will have a low VH/VV value. "
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "# add two VH/VV and VV/VH ratio channels\n",
208 |     "s1[\"vv_vh\"] = s1.vv/s1.vh\n",
209 |     "s1[\"vh_vv\"] = s1.vh/s1.vv\n",
210 |     "\n",
211 |     "# add normalised channels for making RGB images below\n",
212 |     "s1[\"vv_r\"] = s1.vv/s1.vv.mean()\n",
213 |     "s1[\"vh_g\"] = s1.vh/s1.vh.mean()\n",
214 |     "s1[\"vhvv_b\"] = s1.vh_vv/s1.vh_vv.mean()"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "metadata": {
221 |     "scrolled": true
222 |    },
223 |    "outputs": [],
224 |    "source": [
225 |     "# Plot the VH observation\n",
226 |     "s1.vh_g.plot(cmap=\"viridis\", robust=True, figsize=(8, 8))\n",
227 |     "plt.show()"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": [
236 |     "# Plot the VV observation\n",
237 |     "s1.vv_r.plot(cmap=\"viridis\", robust=True, figsize=(8, 8))\n",
238 |     "plt.show()"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {
245 |     "scrolled": true
246 |    },
247 |    "outputs": [],
248 |    "source": [
249 |     "# Plot the VH/VH ration image\n",
250 |     "s1.vh_vv.plot(cmap=\"viridis\", robust=True, figsize=(8, 8))\n",
251 |     "plt.show()"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "markdown",
256 |    "metadata": {},
257 |    "source": [
258 |     "### Make a false-color composite image"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": null,
264 |    "metadata": {},
265 |    "outputs": [],
266 |    "source": [
267 |     "# method 1\n",
268 |     "s1[[\"vv_r\", \"vh_g\", \"vhvv_b\"]].isel(time=0).to_array().plot.imshow(robust=True, figsize=(6, 6))"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "metadata": {},
275 |    "outputs": [],
276 |    "source": [
277 |     "# method 2\n",
278 |     "# normalise the datacube so that RGB channels are in a similar range\n",
279 |     "red_arr = xr.DataArray(s1.vv) / np.nanmean(xr.DataArray(s1.vv))\n",
280 |     "green_arr = xr.DataArray(s1.vh) / np.nanmean(xr.DataArray(s1.vh))\n",
281 |     "blue_arr = xr.DataArray(s1.vh_vv) / np.nanmean(xr.DataArray(s1.vh_vv))\n",
282 |     "composite_arr = xr.Dataset({'red': red_arr,\n",
283 |     "                           'green': green_arr,\n",
284 |     "                           'blue': blue_arr})\n",
285 |     "rgb(composite_arr, \n",
286 |     "    bands=['red', 'green', 'blue'], \n",
287 |     "    size=10, index=0,\n",
288 |     "    percentile_stretch = [0.05, 0.95])\n",
289 |     "plt.show()"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "### Conclusion\n",
297 |     "The red colors in the RGB images highlight regions of bare soil or rock (high VV values, low VH and VH/VV values).  The blue areas are low VV and high VH values, indicative of vegetated regions. The dark areas could indicate water. These images can help trace the effect of fire before and after a disaster.\n",
298 |     "\n",
299 |     "Other examples about using the Sentinel-1 data, including making use of\n",
300 |     "the parallel-computing library `Dask` can be found here:\n",
301 |     "https://docs.dea.ga.gov.au/notebooks/Real_world_examples/Shipping_lane_identification.html"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "***\n",
309 |     "## Additional information\n",
310 |     "\n",
311 |     "**License:** The code in this notebook is licensed under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0). \n",
312 |     "Digital Earth Australia data is licensed under the [Creative Commons by Attribution 4.0](https://creativecommons.org/licenses/by/4.0/) license.\n",
313 |     "\n",
314 |     "**Contact:** If you need assistance, please review the FAQ section and support options on the [EY Data Science Platform](https://datascience.ey.com/)."
315 |    ]
316 |   }
317 |  ],
318 |  "metadata": {
319 |   "kernelspec": {
320 |    "display_name": "Python 3",
321 |    "language": "python",
322 |    "name": "python3"
323 |   },
324 |   "language_info": {
325 |    "codemirror_mode": {
326 |     "name": "ipython",
327 |     "version": 3
328 |    },
329 |    "file_extension": ".py",
330 |    "mimetype": "text/x-python",
331 |    "name": "python",
332 |    "nbconvert_exporter": "python",
333 |    "pygments_lexer": "ipython3",
334 |    "version": "3.6.9"
335 |   }
336 |  },
337 |  "nbformat": 4,
338 |  "nbformat_minor": 4
339 | }
340 | 


--------------------------------------------------------------------------------
/notebooks/04_EY_challenge2/resources/animated_timeseries.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/04_EY_challenge2/resources/animated_timeseries.gif


--------------------------------------------------------------------------------
/notebooks/04_EY_challenge2/resources/challenge2_train.csv:
--------------------------------------------------------------------------------
  1 | id,label,dateTimeLocal,dateTimeUTC,event
  2 | 0,ROSEDALE_P1_201901041439_MGA94_55,4/01/2019 14:39,4/01/2019 3:39,Rosedale
  3 | 1,ROSEDALE_1_P1_201901041446_MGA94_55,4/01/2019 14:46,4/01/2019 3:46,Rosedale
  4 | 2,ROSEDALE_3_P1_201901041501_MGA94_55,4/01/2019 15:01,4/01/2019 4:01,Rosedale
  5 | 3,POINT_H_62_P1_201901162128_MGA94_55,16/01/2019 21:28,16/01/2019 10:28,Tambo76
  6 | 4,NUNNETT_73_P1_201901171134_MGA94_55,17/01/2019 11:34,17/01/2019 0:34,Tambo76
  7 | 5,NUNNETT_88_P1_201901171656_MGA94_55,17/01/2019 16:56,17/01/2019 5:56,Tambo76
  8 | 6,NUNNETT_96_P1_201901172230_MGA94_55,17/01/2019 22:30,17/01/2019 11:30,Tambo76
  9 | 7,NUNNETT_107_P1_201901181517_MGA94_55,18/01/2019 15:17,18/01/2019 4:17,Tambo76
 10 | 8,NUNNETT_121_P1_201901191642_MGA94_55,19/01/2019 16:42,19/01/2019 5:42,Tambo76
 11 | 9,NUNNETT_128_P1_201901192201_MGA94_55,19/01/2019 22:01,19/01/2019 11:01,Tambo76
 12 | 10,NUNNETT_173_P1_201901251120_MGA94_55,25/01/2019 11:20,25/01/2019 0:20,Tambo76
 13 | 11,NUNNETT_174_P1_201901251128_MGA94_55,25/01/2019 11:28,25/01/2019 0:28,Tambo76
 14 | 12,NUNNETT_175_P1_201901251137_MGA94_55,25/01/2019 11:37,25/01/2019 0:37,Tambo76
 15 | 13,NUNNETT_176_P1_201901251157_MGA94_55,25/01/2019 11:57,25/01/2019 0:57,Tambo76
 16 | 14,NUNNETT_177_P1_201901251211_MGA94_55,25/01/2019 12:11,25/01/2019 1:11,Tambo76
 17 | 15,NUNNETT_179_P1_201901251234_MGA94_55,25/01/2019 12:34,25/01/2019 1:34,Tambo76
 18 | 16,NUNNETT_180_P1_201901251246_MGA94_55,25/01/2019 12:46,25/01/2019 1:46,Tambo76
 19 | 17,NUNNETT_181_P1_201901251254_MGA94_55,25/01/2019 12:54,25/01/2019 1:54,Tambo76
 20 | 18,NUNNETT_182_P1_201901251321_MGA94_55,25/01/2019 13:21,25/01/2019 2:21,Tambo76
 21 | 19,NUNNETT_183_P1_201901251342_MGA94_55,25/01/2019 13:42,25/01/2019 2:42,Tambo76
 22 | 20,NUNNETT_184_P1_201901251400_MGA94_55,25/01/2019 14:00,25/01/2019 3:00,Tambo76
 23 | 21,NUNNETT_185_P1_201901251419_MGA94_55,25/01/2019 14:19,25/01/2019 3:19,Tambo76
 24 | 22,NUNNETT_186_P1_201901251432_MGA94_55,25/01/2019 14:32,25/01/2019 3:32,Tambo76
 25 | 23,ABERFELDY_WEST_200_P1_201901260955_MGA94_55,26/01/2019 9:55,25/01/2019 22:55,Latrobe86
 26 | 24,SUCKLINGS_RD_203_P1_201901261039_MGA94_55,26/01/2019 10:39,25/01/2019 23:39,Latrobe86
 27 | 25,SUCKLINGS_JIM_TRACK__CREAM_CAN_HILL_205_P1_201901261050_MGA94_55,26/01/2019 10:50,25/01/2019 23:50,Latrobe86
 28 | 26,ABERFELDY_WEST_214_P1_201901261750_MGA94_55,26/01/2019 17:50,26/01/2019 6:50,Latrobe86
 29 | 27,CREAM_JIM_JORDAN_217_P1_201901262218_MGA94_55,26/01/2019 22:18,26/01/2019 11:18,Latrobe86
 30 | 28,SUCKLINGS_RD_KNAPPING_TRACK_226_P1_201901271405_MGA94_55,27/01/2019 14:05,27/01/2019 3:05,Latrobe86
 31 | 29,JORDAN_231_P1_201901271500_MGA94_55,27/01/2019 15:00,27/01/2019 4:00,Latrobe86
 32 | 30,JORDAN_233_P1_201901271511_MGA94_55,27/01/2019 15:11,27/01/2019 4:11,Latrobe86
 33 | 31,JORDAN_234_P1_201901271901_MGA94_55,27/01/2019 19:01,27/01/2019 8:01,Latrobe86
 34 | 32,JORDAN_264_P1_201901301524_MGA94_55,30/01/2019 15:24,30/01/2019 4:24,Latrobe86
 35 | 33,MCCALLISTER79_LA_TROBE_97_266_P1_201901301552_MGA94_55,30/01/2019 15:52,30/01/2019 4:52,Walhalla
 36 | 34,LA_TROBE_97_MCCALLISTER_79_268_P1_201901301611_MGA94_55,30/01/2019 16:11,30/01/2019 5:11,Walhalla
 37 | 35,MCCALLISTER_79_LA_TROBE_97_269_P1_201901301624_MGA94_55,30/01/2019 16:24,30/01/2019 5:24,Walhalla
 38 | 36,WALHALLA_295_P1_201902011156_MGA94_55,1/02/2019 11:56,1/02/2019 0:56,Walhalla
 39 | 37,JORDAN_310_P1_201902012046_MGA94_55,1/02/2019 20:46,1/02/2019 9:46,Latrobe86
 40 | 38,WALHALLA_339_P1_201902030520_MGA94_55,3/02/2019 5:20,2/02/2019 18:20,Walhalla
 41 | 39,WALHALLA_346_P1_201902031122_MGA94_55,3/02/2019 11:22,3/02/2019 0:22,Walhalla
 42 | 40,WALHALLA_352_P1_201902031612_MGA94_55,3/02/2019 16:12,3/02/2019 5:12,Walhalla
 43 | 41,WALHALLA_354_P1_201902031637_MGA94_55,3/02/2019 16:37,3/02/2019 5:37,Walhalla
 44 | 42,WALHALLA_362_P1_201902040015_MGA94_55,4/02/2019 0:15,3/02/2019 13:15,Walhalla
 45 | 43,WALHALLA_363_P1_201902040024_MGA94_55,4/02/2019 0:24,3/02/2019 13:24,Walhalla
 46 | 44,WALHALLA_368_P1_201902040621_MGA94_55,4/02/2019 6:21,3/02/2019 19:21,Walhalla
 47 | 45,WALHALLA_373_P1_201902040748_MGA94_55,4/02/2019 7:48,3/02/2019 20:48,Walhalla
 48 | 46,WALHALLA_378_P1_201902041304_MGA94_55,4/02/2019 13:04,4/02/2019 2:04,Walhalla
 49 | 47,WALHALLA_379_P1_201902041319_MGA94_55,4/02/2019 13:19,4/02/2019 2:19,Walhalla
 50 | 48,WALHALLA_380_P1_201902061347_MGA94_55,6/02/2019 13:47,6/02/2019 2:47,Walhalla
 51 | 49,WALHALLA_381_P1_201902061356_MGA94_55,6/02/2019 13:56,6/02/2019 2:56,Walhalla
 52 | 50,WALHALLA_397_P1_201902251311_MGA94_55,25/02/2019 13:11,25/02/2019 2:11,Walhalla
 53 | 51,WALHALLA_398_P1_201902251323_MGA94_55,25/02/2019 13:23,25/02/2019 2:23,Walhalla
 54 | 52,JORDAN_399_P1_201902251330_MGA94_55,25/02/2019 13:30,25/02/2019 2:30,Latrobe86
 55 | 53,JORDAN_400_P1_201902251339_MGA94_55,25/02/2019 13:39,25/02/2019 2:39,Latrobe86
 56 | 54,WALHALLA_401_P1_201902251342_MGA94_55,25/02/2019 13:42,25/02/2019 2:42,Walhalla
 57 | 55,WALHALLA_402_P1_201902251413_MGA94_55,25/02/2019 14:13,25/02/2019 3:13,Walhalla
 58 | 56,WALHALLA_413_P1_201902261957_MGA94_55,26/02/2019 19:57,26/02/2019 8:57,Walhalla
 59 | 57,WALHALLA_414_P1_201902262006_MGA94_55,26/02/2019 20:06,26/02/2019 9:06,Walhalla
 60 | 58,GIPPSLAND_TRACK1_447_P1_201903011212_MGA94_55,1/03/2019 12:12,1/03/2019 1:12,Other
 61 | 59,MACALISTER_612_P1_201903051406_MGA94_55,5/03/2019 14:06,5/03/2019 3:06,Macalister91 & Macalister97
 62 | 60,MACALISTER_613_P1_201903051418_MGA94_55,5/03/2019 14:18,5/03/2019 3:18,Macalister91 & Macalister97
 63 | 61,MACALISTER_695_P1_201903091542_MGA94_55,9/03/2019 15:42,9/03/2019 4:42,Macalister91
 64 | 62,MACALISTER_696_P1_201903091545_MGA94_55,9/03/2019 15:45,9/03/2019 4:45,Macalister97
 65 | 63,MACALISTER_697_P1_201903091555_MGA94_55,9/03/2019 15:55,9/03/2019 4:55,Macalister97
 66 | 64,MACALISTER_698_P1_201903091600_MGA94_55,9/03/2019 16:00,9/03/2019 5:00,Macalister91
 67 | 65,MACALISTER_699_P1_201903091610_MGA94_55,9/03/2019 16:10,9/03/2019 5:10,Macalister91
 68 | 66,MACALISTER_700_P1_201903091613_MGA94_55,9/03/2019 16:13,9/03/2019 5:13,Macalister97
 69 | 67,YARRA51_620_P1_201903051812_MGA94_55,5/03/2019 18:12,5/03/2019 7:12,Yarra51
 70 | 68,YARRA51_622_P1_201903051841_MGA94_55,5/03/2019 18:41,5/03/2019 7:41,Yarra51
 71 | 69,MACALISTER85_624_P1_201903051907_MGA94_55,5/03/2019 19:07,5/03/2019 8:07,Macalister97
 72 | 70,YARRA51_633_P1_201903061644_MGA94_55,6/03/2019 16:44,6/03/2019 5:44,Yarra51
 73 | 71,MACALISTER85_684_P1_201903080536_MGA94_55,8/03/2019 5:36,7/03/2019 18:36,Macalister97
 74 | 72,MACALISTER85_723_P1_201903100101_MGA94_55,10/03/2019 1:01,9/03/2019 14:01,Macalister97
 75 | 73,MACALISTER87_719_P1_201903100033_MGA94_55,10/03/2019 0:33,9/03/2019 13:33,Macalister97
 76 | 74,MACALISTER89_678_P1_201903080454_MGA94_55,8/03/2019 4:54,7/03/2019 17:54,Other
 77 | 75,MACALISTER89_693_P1_201903091518_MGA94_55,9/03/2019 15:18,9/03/2019 4:18,Other
 78 | 76,MACALISTER89_716_P1_201903100003_MGA94_55,10/03/2019 0:03,9/03/2019 13:03,Other
 79 | 77,MACALISTER89_763_P1_201903131205_MGA94_55,13/03/2019 12:05,13/03/2019 1:05,Other
 80 | 78,MACALISTER89_773_P1_201903141329_MGA94_55,14/03/2019 13:29,14/03/2019 2:29,Other
 81 | 79,MACALISTER89_782_P1_201903141444_MGA94_55,14/03/2019 14:44,14/03/2019 3:44,Other
 82 | 80,MACALISTER89_786_P1_201903151302_MGA94_55,15/03/2019 13:02,15/03/2019 2:02,Other
 83 | 81,MACALISTER89_801_P1_201903161430_MGA94_55,16/03/2019 14:30,16/03/2019 3:30,Other
 84 | 82,MACALISTER91_615_P1_201903051438_MGA94_55,5/03/2019 14:38,5/03/2019 3:38,Macalister91
 85 | 83,MACALISTER91_616_P1_201903051446_MGA94_55,5/03/2019 14:46,5/03/2019 3:46,Macalister91
 86 | 84,MACALISTER91_617_P1_201903051456_MGA94_55,5/03/2019 14:56,5/03/2019 3:56,Macalister91
 87 | 85,MACALISTER91_618_P1_201903051503_MGA94_55,5/03/2019 15:03,5/03/2019 4:03,Macalister91
 88 | 86,MACALISTER91_621_P1_201903051827_MGA94_55,5/03/2019 18:27,5/03/2019 7:27,Macalister91
 89 | 87,YARRA51_704_P1_201903091659_MGA94_55,9/03/2019 16:59,9/03/2019 5:59,Yarra51
 90 | 88,MACALISTER91_681_P1_201903080517_MGA94_55,8/03/2019 5:17,7/03/2019 18:17,Macalister91
 91 | 89,MACALISTER91_682_P1_201903080526_MGA94_55,8/03/2019 5:26,7/03/2019 18:26,Macalister91
 92 | 90,MACALISTER91_685_P1_201903080539_MGA94_55,8/03/2019 5:39,7/03/2019 18:39,Macalister91
 93 | 91,MACALISTER91_752_P1_201903111601_MGA94_55,11/03/2019 16:01,11/03/2019 5:01,Macalister91
 94 | 92,MACALISTER91_755_P1_201903111619_MGA94_55,11/03/2019 16:19,11/03/2019 5:19,Macalister91
 95 | 93,MACALISTER91_760_P1_201903111709_MGA94_55,11/03/2019 17:09,11/03/2019 6:09,Macalister91
 96 | 94,YARRA51_726_P1_201903100129_MGA94_55,10/03/2019 1:29,9/03/2019 14:29,Yarra51
 97 | 95,MACALISTER91_761_P1_201903111720_MGA94_55,11/03/2019 17:20,11/03/2019 6:20,Macalister91
 98 | 96,MACALISTER91_762_P1_201903111730_MGA94_55,11/03/2019 17:30,11/03/2019 6:30,Macalister91
 99 | 97,MACALISTER91_766_P1_201903131239_MGA94_55,13/03/2019 12:39,13/03/2019 1:39,Macalister91
100 | 98,MACALISTER91_767_P1_201903131252_MGA94_55,13/03/2019 12:52,13/03/2019 1:52,Macalister91
101 | 99,MACALISTER91_770_P1_201903131314_MGA94_55,13/03/2019 13:14,13/03/2019 2:14,Macalister91
102 | 100,MACALISTER91_783_P1_201903141643_MGA94_55,14/03/2019 16:43,14/03/2019 5:43,Macalister91
103 | 101,MACALISTER91_789_P1_201903151330_MGA94_55,15/03/2019 13:30,15/03/2019 2:30,Macalister91
104 | 102,MACALISTER91_790_P1_201903151340_MGA94_55,15/03/2019 13:40,15/03/2019 2:40,Macalister91
105 | 103,MACALISTER91_793_P1_201903151358_MGA94_55,15/03/2019 13:58,15/03/2019 2:58,Macalister91
106 | 104,MACALISTER91_804_P1_201903161503_MGA94_55,16/03/2019 15:03,16/03/2019 4:03,Macalister91
107 | 105,MACALISTER91_805_P1_201903161517_MGA94_55,16/03/2019 15:17,16/03/2019 4:17,Macalister91
108 | 106,MACALISTER91_808_P1_201903161539_MGA94_55,16/03/2019 15:39,16/03/2019 4:39,Macalister91
109 | 107,MACALISTER91_99_649_P1_201903070453_MGA94_55,7/03/2019 4:53,6/03/2019 17:53,Macalister91
110 | 108,MACALISTER97_680_P1_201903080512_MGA94_55,8/03/2019 5:12,7/03/2019 18:12,Macalister97
111 | 109,MACALISTER97_720_P1_201903100042_MGA94_55,10/03/2019 0:42,9/03/2019 13:42,Macalister97
112 | 110,MACALISTER97_753_P1_201903111605_MGA94_55,11/03/2019 16:05,11/03/2019 5:05,Macalister97
113 | 111,MACALISTER97_765_P1_201903131230_MGA94_55,13/03/2019 12:30,13/03/2019 1:30,Macalister97
114 | 112,MACALISTER97_768_P1_201903131255_MGA94_55,13/03/2019 12:55,13/03/2019 1:55,Macalister97
115 | 113,YARRA51_794_P1_201903151412_MGA94_55,15/03/2019 14:12,15/03/2019 3:12,Yarra51
116 | 114,YARRA54_795_P1_201903151422_MGA94_55,15/03/2019 14:22,15/03/2019 3:22,Other
117 | 115,MACALISTER97_769_P1_201903131306_MGA94_55,13/03/2019 13:06,13/03/2019 2:06,Macalister97
118 | 116,MACALISTER97_774_P1_201903141339_MGA94_55,14/03/2019 13:39,14/03/2019 2:39,Macalister97
119 | 117,MACALISTER97_777_P1_201903141358_MGA94_55,14/03/2019 13:58,14/03/2019 2:58,Macalister97
120 | 118,MACALISTER97_778_P1_201903141405_MGA94_55,14/03/2019 14:05,14/03/2019 3:05,Macalister97
121 | 119,MACALISTER97_788_P1_201903151324_MGA94_55,15/03/2019 13:24,15/03/2019 2:24,Macalister97
122 | 120,MACALISTER97_791_P1_201903151343_MGA94_55,15/03/2019 13:43,15/03/2019 2:43,Macalister97
123 | 121,MACALISTER97_792_P1_201903151352_MGA94_55,15/03/2019 13:52,15/03/2019 2:52,Macalister97
124 | 122,MACALISTER97_803_P1_201903161455_MGA94_55,16/03/2019 14:55,16/03/2019 3:55,Macalister97
125 | 123,YARRA51_809_P1_201903161558_MGA94_55,16/03/2019 15:58,16/03/2019 4:58,Yarra51
126 | 124,MACALISTER97_806_P1_201903161522_MGA94_55,16/03/2019 15:22,16/03/2019 4:22,Macalister97
127 | 125,MACALISTER97_807_P1_201903161532_MGA94_55,16/03/2019 15:32,16/03/2019 4:32,Macalister97
128 | 126,MACALISTER99_623_P1_201903051858_MGA94_55,5/03/2019 18:58,5/03/2019 7:58,Macalister91
129 | 127,MACALISTER99_646_P1_201903070440_MGA94_55,7/03/2019 4:40,6/03/2019 17:40,Macalister91
130 | 128,MACALISTER99_683_P1_201903080529_MGA94_55,8/03/2019 5:29,7/03/2019 18:29,Macalister97
131 | 129,JORDAN_235_P1_201901281204_MGA94_55,28/01/2019 12:04,28/01/2019 1:04,Latrobe86
132 | 130,JORDAN_294_P1_201902011150_MGA94_55,1/02/2019 11:50,1/02/2019 0:50,Latrobe86
133 | 131,MACALISTER91_648_P1_201903070444_MGA94_55,3/07/2019 4:44,2/07/2019 17:44,Macalister91
134 | 132,WALHALLA_313_P1_201902020733_MGA94_55,2/02/2019 7:33,1/02/2019 20:33,Walhalla
135 | 133,WALHALLA_353_P1_201902031625_MGA94_55,3/02/2019 16:25,3/02/2019 5:25,Walhalla
136 | 


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/02_DEA/dea_products.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/02_DEA/dea_products.jpg


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/02_DEA/nbar_nbart_animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/02_DEA/nbar_nbart_animation.gif


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/02_DEA/odc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/02_DEA/odc.png


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/03_Products_and_measurements/DEAExplorer.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/03_Products_and_measurements/DEAExplorer.JPG


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/04_Loading_data/dea_logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/04_Loading_data/dea_logo.jpg


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/04_Loading_data/progress_bar.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/04_Loading_data/progress_bar.jpg


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/06_Basic_analysis/latlong_buffer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/06_Basic_analysis/latlong_buffer.png


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/07_Intro_to_numpy/africa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/07_Intro_to_numpy/africa.png


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/07_Intro_to_numpy/numpy_array_t.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/07_Intro_to_numpy/numpy_array_t.png


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/08_Intro_to_xarray/dataset-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/08_Intro_to_xarray/dataset-diagram.png


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/08_Intro_to_xarray/example_netcdf.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/08_Intro_to_xarray/example_netcdf.nc


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/EY_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/EY_logo.png


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/dea_logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/dea_logo.jpg


--------------------------------------------------------------------------------
/notebooks/Supplementary_data/dea_logo_wide.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/notebooks/Supplementary_data/dea_logo_wide.jpg


--------------------------------------------------------------------------------
/notebooks/datacube_viewer.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Datacube Viewer\n",
 8 |     "\n",
 9 |     "This notebook contains a viewer that shows what scenes are available in this Open Data Cube instance.\n",
10 |     "\n",
11 |     "There are products from five satellites: Sentinel-1, Sentinel-2a, Sentinel-2b, Landsat-7 and Landsat-8. There is also a Linescan product which was taken by airplane during bushfire events. \n",
12 |     "\n",
13 |     "Each scene can be loaded in part or as a whole or along with other scenes using the Open Data Cube."
14 |    ]
15 |   },
16 |   {
17 |    "cell_type": "code",
18 |    "execution_count": 1,
19 |    "metadata": {},
20 |    "outputs": [
21 |     {
22 |      "data": {
23 |       "application/vnd.jupyter.widget-view+json": {
24 |        "model_id": "20ebc72439e14be3bd3fe1dc8ff21b67",
25 |        "version_major": 2,
26 |        "version_minor": 0
27 |       },
28 |       "text/plain": [
29 |        "VBox(children=(HBox(children=(Dropdown(layout=Layout(flex='0 1 auto', width='10em'), options=('ga_ls7e_ard_3',…"
30 |       ]
31 |      },
32 |      "metadata": {},
33 |      "output_type": "display_data"
34 |     }
35 |    ],
36 |    "source": [
37 |     "from odc.ui import DcViewer\n",
38 |     "from datacube import Datacube\n",
39 |     "\n",
40 |     "dc = Datacube()\n",
41 |     "\n",
42 |     "DcViewer(\n",
43 |     "    dc, \n",
44 |     "    time='2019-01-01',\n",
45 |     "    zoom=5,\n",
46 |     "    center=(-38, 142),\n",
47 |     "    height='500px', width='800px',\n",
48 |     "    products='non-empty',\n",
49 |     "    style={\n",
50 |     "        'fillOpacity': 0.05,\n",
51 |     "        'color': 'teal',\n",
52 |     "        'weight': 0.7\n",
53 |     "    }\n",
54 |     ")"
55 |    ]
56 |   }
57 |  ],
58 |  "metadata": {
59 |   "kernelspec": {
60 |    "display_name": "Python 3",
61 |    "language": "python",
62 |    "name": "python3"
63 |   },
64 |   "language_info": {
65 |    "codemirror_mode": {
66 |     "name": "ipython",
67 |     "version": 3
68 |    },
69 |    "file_extension": ".py",
70 |    "mimetype": "text/x-python",
71 |    "name": "python",
72 |    "nbconvert_exporter": "python",
73 |    "pygments_lexer": "ipython3",
74 |    "version": "3.6.9"
75 |   }
76 |  },
77 |  "nbformat": 4,
78 |  "nbformat_minor": 4
79 | }
80 | 


--------------------------------------------------------------------------------
/notebooks/scripts/dea_bandindices.py:
--------------------------------------------------------------------------------
  1 | ## dea_bandindices.py
  2 | '''
  3 | Description: This file contains a set of python functions for computing
  4 | remote sensing band indices on Digital Earth Australia data.
  5 | 
  6 | License: The code in this notebook is licensed under the Apache License,
  7 | Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0). Digital Earth
  8 | Australia data is licensed under the Creative Commons by Attribution 4.0
  9 | license (https://creativecommons.org/licenses/by/4.0/).
 10 | 
 11 | Contact: If you need assistance, please post a question on the Open Data
 12 | Cube Slack channel (http://slack.opendatacube.org/) or on the GIS Stack
 13 | Exchange (https://gis.stackexchange.com/questions/ask?tags=open-data-cube)
 14 | using the `open-data-cube` tag (you can view previously asked questions
 15 | here: https://gis.stackexchange.com/questions/tagged/open-data-cube).
 16 | 
 17 | If you would like to report an issue with this script, you can file one
 18 | on Github (https://github.com/GeoscienceAustralia/dea-notebooks/issues/new).
 19 | 
 20 | Last modified: September 2020
 21 | 
 22 | '''
 23 | 
 24 | # Import required packages
 25 | import warnings
 26 | 
 27 | # Define custom functions
 28 | def calculate_indices(ds,
 29 |                       index=None,
 30 |                       collection=None,
 31 |                       custom_varname=None,
 32 |                       normalise=True,
 33 |                       drop=False,
 34 |                       inplace=False):
 35 |     """
 36 |     Takes an xarray dataset containing spectral bands, calculates one of
 37 |     a set of remote sensing indices, and adds the resulting array as a 
 38 |     new variable in the original dataset.  
 39 |     
 40 |     Note: by default, this function will create a new copy of the data
 41 |     in memory. This can be a memory-expensive operation, so to avoid
 42 |     this, set `inplace=True`.
 43 | 
 44 |     Last modified: September 2020
 45 |     
 46 |     Parameters
 47 |     ----------
 48 |     ds : xarray Dataset
 49 |         A two-dimensional or multi-dimensional array with containing the
 50 |         spectral bands required to calculate the index. These bands are
 51 |         used as inputs to calculate the selected water index.
 52 |     index : str or list of strs
 53 |         A string giving the name of the index to calculate or a list of
 54 |         strings giving the names of the indices to calculate:
 55 |         'AWEI_ns (Automated Water Extraction Index,
 56 |                   no shadows, Feyisa 2014)
 57 |         'AWEI_sh' (Automated Water Extraction Index,
 58 |                    shadows, Feyisa 2014)
 59 |         'BAEI' (Built-Up Area Extraction Index, Bouzekri et al. 2015)
 60 |         'BAI' (Burn Area Index, Martin 1998)
 61 |         'BSI' (Bare Soil Index, Rikimaru et al. 2002)
 62 |         'BUI' (Built-Up Index, He et al. 2010)
 63 |         'CMR' (Clay Minerals Ratio, Drury 1987)
 64 |         'EVI' (Enhanced Vegetation Index, Huete 2002)
 65 |         'FMR' (Ferrous Minerals Ratio, Segal 1982)
 66 |         'IOR' (Iron Oxide Ratio, Segal 1982)
 67 |         'LAI' (Leaf Area Index, Boegh 2002)
 68 |         'MNDWI' (Modified Normalised Difference Water Index, Xu 1996)
 69 |         'MSAVI' (Modified Soil Adjusted Vegetation Index,
 70 |                  Qi et al. 1994)              
 71 |         'NBI' (New Built-Up Index, Jieli et al. 2010)
 72 |         'NBR' (Normalised Burn Ratio, Lopez Garcia 1991)
 73 |         'NDBI' (Normalised Difference Built-Up Index, Zha 2003)
 74 |         'NDCI' (Normalised Difference Chlorophyll Index, 
 75 |                 Mishra & Mishra, 2012)
 76 |         'NDMI' (Normalised Difference Moisture Index, Gao 1996)        
 77 |         'NDSI' (Normalised Difference Snow Index, Hall 1995)
 78 |         'NDTI' (Normalise Difference Tillage Index,
 79 |                 Van Deventeret et al. 1997)
 80 |         'NDVI' (Normalised Difference Vegetation Index, Rouse 1973)
 81 |         'NDWI' (Normalised Difference Water Index, McFeeters 1996)
 82 |         'SAVI' (Soil Adjusted Vegetation Index, Huete 1988)
 83 |         'TCB' (Tasseled Cap Brightness, Crist 1985)
 84 |         'TCG' (Tasseled Cap Greeness, Crist 1985)
 85 |         'TCW' (Tasseled Cap Wetness, Crist 1985)
 86 |         'WI' (Water Index, Fisher 2016) 
 87 |     collection : str
 88 |         An string that tells the function what data collection is 
 89 |         being used to calculate the index. This is necessary because 
 90 |         different collections use different names for bands covering 
 91 |         a similar spectra. Valid options are 'ga_ls_2' (for GA 
 92 |         Landsat Collection 2), 'ga_ls_3' (for GA Landsat Collection 3) 
 93 |         and 'ga_s2_1' (for GA Sentinel 2 Collection 1).
 94 |     custom_varname : str, optional
 95 |         By default, the original dataset will be returned with 
 96 |         a new index variable named after `index` (e.g. 'NDVI'). To 
 97 |         specify a custom name instead, you can supply e.g. 
 98 |         `custom_varname='custom_name'`. Defaults to None, which uses
 99 |         `index` to name the variable. 
100 |     normalise : bool, optional
101 |         Some coefficient-based indices (e.g. 'WI', 'BAEI', 'AWEI_ns', 
102 |         'AWEI_sh', 'TCW', 'TCG', 'TCB', 'EVI', 'LAI', 'SAVI', 'MSAVI') 
103 |         produce different results if surface reflectance values are not 
104 |         scaled between 0.0 and 1.0 prior to calculating the index. 
105 |         Setting `normalise=True` first scales values to a 0.0-1.0 range
106 |         by dividing by 10000.0. Defaults to True.  
107 |     drop : bool, optional
108 |         Provides the option to drop the original input data, thus saving 
109 |         space. if drop = True, returns only the index and its values.
110 |     inplace: bool, optional
111 |         If `inplace=True`, calculate_indices will modify the original
112 |         array in-place, adding bands to the input dataset. The default
113 |         is `inplace=False`, which will instead make a new copy of the
114 |         original data (and use twice the memory).
115 |         
116 |     Returns
117 |     -------
118 |     ds : xarray Dataset
119 |         The original xarray Dataset inputted into the function, with a 
120 |         new varible containing the remote sensing index as a DataArray.
121 |         If drop = True, the new variable/s as DataArrays in the 
122 |         original Dataset. 
123 |     """
124 |     
125 |     # Set ds equal to a copy of itself in order to prevent the function 
126 |     # from editing the input dataset. This can prevent unexpected
127 |     # behaviour though it uses twice as much memory.    
128 |     if not inplace:
129 |         ds = ds.copy(deep=True)
130 |     
131 |     # Capture input band names in order to drop these if drop=True
132 |     if drop:
133 |         bands_to_drop=list(ds.data_vars)
134 |         print(f'Dropping bands {bands_to_drop}')
135 | 
136 |     # Dictionary containing remote sensing index band recipes
137 |     index_dict = {
138 |                   # Normalised Difference Vegation Index, Rouse 1973
139 |                   'NDVI': lambda ds: (ds.nir - ds.red) /
140 |                                      (ds.nir + ds.red),
141 | 
142 |                   # Enhanced Vegetation Index, Huete 2002
143 |                   'EVI': lambda ds: ((2.5 * (ds.nir - ds.red)) /
144 |                                      (ds.nir + 6 * ds.red -
145 |                                       7.5 * ds.blue + 1)),
146 | 
147 |                   # Leaf Area Index, Boegh 2002
148 |                   'LAI': lambda ds: (3.618 * ((2.5 * (ds.nir - ds.red)) /
149 |                                      (ds.nir + 6 * ds.red -
150 |                                       7.5 * ds.blue + 1)) - 0.118),
151 | 
152 |                   # Soil Adjusted Vegetation Index, Huete 1988
153 |                   'SAVI': lambda ds: ((1.5 * (ds.nir - ds.red)) /
154 |                                       (ds.nir + ds.red + 0.5)),
155 |       
156 |                   # Mod. Soil Adjusted Vegetation Index, Qi et al. 1994
157 |                   'MSAVI': lambda ds: ((2 * ds.nir + 1 - 
158 |                                       ((2 * ds.nir + 1)**2 - 
159 |                                        8 * (ds.nir - ds.red))**0.5) / 2),    
160 | 
161 |                   # Normalised Difference Moisture Index, Gao 1996
162 |                   'NDMI': lambda ds: (ds.nir - ds.swir1) /
163 |                                      (ds.nir + ds.swir1),
164 | 
165 |                   # Normalised Burn Ratio, Lopez Garcia 1991
166 |                   'NBR': lambda ds: (ds.nir - ds.swir2) /
167 |                                     (ds.nir + ds.swir2),
168 | 
169 |                   # Burn Area Index, Martin 1998
170 |                   'BAI': lambda ds: (1.0 / ((0.10 - ds.red) ** 2 +
171 |                                             (0.06 - ds.nir) ** 2)),
172 |         
173 |                  # Normalised Difference Chlorophyll Index, 
174 |                  # (Mishra & Mishra, 2012)
175 |                   'NDCI': lambda ds: (ds.red_edge_1 - ds.red) /
176 |                                      (ds.red_edge_1 + ds.red),
177 | 
178 |                   # Normalised Difference Snow Index, Hall 1995
179 |                   'NDSI': lambda ds: (ds.green - ds.swir1) /
180 |                                      (ds.green + ds.swir1),
181 | 
182 |                   # Normalised Difference Tillage Index,
183 |                   # Van Deventer et al. 1997
184 |                   'NDTI': lambda ds: (ds.swir1 - ds.swir2) /
185 |                                      (ds.swir1 + ds.swir2),
186 | 
187 |                   # Normalised Difference Water Index, McFeeters 1996
188 |                   'NDWI': lambda ds: (ds.green - ds.nir) /
189 |                                      (ds.green + ds.nir),
190 | 
191 |                   # Modified Normalised Difference Water Index, Xu 2006
192 |                   'MNDWI': lambda ds: (ds.green - ds.swir1) /
193 |                                       (ds.green + ds.swir1),
194 |       
195 |                   # Normalised Difference Built-Up Index, Zha 2003
196 |                   'NDBI': lambda ds: (ds.swir1 - ds.nir) /
197 |                                      (ds.swir1 + ds.nir),
198 |       
199 |                   # Built-Up Index, He et al. 2010
200 |                   'BUI': lambda ds:  ((ds.swir1 - ds.nir) /
201 |                                       (ds.swir1 + ds.nir)) -
202 |                                      ((ds.nir - ds.red) /
203 |                                       (ds.nir + ds.red)),
204 |       
205 |                   # Built-up Area Extraction Index, Bouzekri et al. 2015
206 |                   'BAEI': lambda ds: (ds.red + 0.3) /
207 |                                      (ds.green + ds.swir1),
208 |       
209 |                   # New Built-up Index, Jieli et al. 2010
210 |                   'NBI': lambda ds: (ds.swir1 + ds.red) / ds.nir,
211 |       
212 |                   # Bare Soil Index, Rikimaru et al. 2002
213 |                   'BSI': lambda ds: ((ds.swir1 + ds.red) - 
214 |                                      (ds.nir + ds.blue)) / 
215 |                                     ((ds.swir1 + ds.red) + 
216 |                                      (ds.nir + ds.blue)),
217 | 
218 |                   # Automated Water Extraction Index (no shadows), Feyisa 2014
219 |                   'AWEI_ns': lambda ds: (4 * (ds.green - ds.swir1) -
220 |                                         (0.25 * ds.nir * + 2.75 * ds.swir2)),
221 | 
222 |                   # Automated Water Extraction Index (shadows), Feyisa 2014
223 |                   'AWEI_sh': lambda ds: (ds.blue + 2.5 * ds.green -
224 |                                          1.5 * (ds.nir + ds.swir1) -
225 |                                          0.25 * ds.swir2),
226 | 
227 |                   # Water Index, Fisher 2016
228 |                   'WI': lambda ds: (1.7204 + 171 * ds.green + 3 * ds.red -
229 |                                     70 * ds.nir - 45 * ds.swir1 -
230 |                                     71 * ds.swir2),
231 | 
232 |                   # Tasseled Cap Wetness, Crist 1985
233 |                   'TCW': lambda ds: (0.0315 * ds.blue + 0.2021 * ds.green +
234 |                                      0.3102 * ds.red + 0.1594 * ds.nir +
235 |                                     -0.6806 * ds.swir1 + -0.6109 * ds.swir2),
236 | 
237 |                   # Tasseled Cap Greeness, Crist 1985
238 |                   'TCG': lambda ds: (-0.1603 * ds.blue + -0.2819 * ds.green +
239 |                                      -0.4934 * ds.red + 0.7940 * ds.nir +
240 |                                      -0.0002 * ds.swir1 + -0.1446 * ds.swir2),
241 | 
242 |                   # Tasseled Cap Brightness, Crist 1985
243 |                   'TCB': lambda ds: (0.2043 * ds.blue + 0.4158 * ds.green +
244 |                                      0.5524 * ds.red + 0.5741 * ds.nir +
245 |                                      0.3124 * ds.swir1 + -0.2303 * ds.swir2),
246 | 
247 |                   # Clay Minerals Ratio, Drury 1987
248 |                   'CMR': lambda ds: (ds.swir1 / ds.swir2),
249 | 
250 |                   # Ferrous Minerals Ratio, Segal 1982
251 |                   'FMR': lambda ds: (ds.swir1 / ds.nir),
252 | 
253 |                   # Iron Oxide Ratio, Segal 1982
254 |                   'IOR': lambda ds: (ds.red / ds.blue)
255 |     }
256 |     
257 |     # If index supplied is not a list, convert to list. This allows us to
258 |     # iterate through either multiple or single indices in the loop below
259 |     indices = index if isinstance(index, list) else [index]
260 |     
261 |     #calculate for each index in the list of indices supplied (indexes)
262 |     for index in indices:
263 | 
264 |         # Select an index function from the dictionary
265 |         index_func = index_dict.get(str(index))
266 | 
267 |         # If no index is provided or if no function is returned due to an 
268 |         # invalid option being provided, raise an exception informing user to 
269 |         # choose from the list of valid options
270 |         if index is None:
271 | 
272 |             raise ValueError(f"No remote sensing `index` was provided. Please "
273 |                               "refer to the function \ndocumentation for a full "
274 |                               "list of valid options for `index` (e.g. 'NDVI')")
275 | 
276 |         elif (index in ['WI', 'BAEI', 'AWEI_ns', 'AWEI_sh', 'TCW', 
277 |                         'TCG', 'TCB', 'EVI', 'LAI', 'SAVI', 'MSAVI'] 
278 |               and not normalise):
279 | 
280 |             warnings.warn(f"\nA coefficient-based index ('{index}') normally "
281 |                            "applied to surface reflectance values in the \n"
282 |                            "0.0-1.0 range was applied to values in the 0-10000 "
283 |                            "range. This can produce unexpected results; \nif "
284 |                            "required, resolve this by setting `normalise=True`")
285 | 
286 |         elif index_func is None:
287 | 
288 |             raise ValueError(f"The selected index '{index}' is not one of the "
289 |                               "valid remote sensing index options. \nPlease "
290 |                               "refer to the function documentation for a full "
291 |                               "list of valid options for `index`")
292 | 
293 |         # Rename bands to a consistent format if depending on what collection
294 |         # is specified in `collection`. This allows the same index calculations
295 |         # to be applied to all collections. If no collection was provided, 
296 |         # raise an exception.
297 |         if collection is None:
298 | 
299 |             raise ValueError("'No `collection` was provided. Please specify "
300 |                              "either 'ga_ls_2', 'ga_ls_3' or 'ga_s2_1' \nto "
301 |                              "ensure the function calculates indices using the "
302 |                              "correct spectral bands")
303 | 
304 |         elif collection == 'ga_ls_3':
305 | 
306 |             # Dictionary mapping full data names to simpler 'red' alias names
307 |             bandnames_dict = {
308 |                 'nbart_nir': 'nir',
309 |                 'nbart_red': 'red',
310 |                 'nbart_green': 'green',
311 |                 'nbart_blue': 'blue',
312 |                 'nbart_swir_1': 'swir1',
313 |                 'nbart_swir_2': 'swir2',
314 |                 'nbar_red': 'red',
315 |                 'nbar_green': 'green',
316 |                 'nbar_blue': 'blue',
317 |                 'nbar_nir': 'nir',
318 |                 'nbar_swir_1': 'swir1',
319 |                 'nbar_swir_2': 'swir2'
320 |             }
321 | 
322 |             # Rename bands in dataset to use simple names (e.g. 'red')
323 |             bands_to_rename = {
324 |                 a: b for a, b in bandnames_dict.items() if a in ds.variables
325 |             }
326 | 
327 |         elif collection == 'ga_s2_1':
328 | 
329 |             # Dictionary mapping full data names to simpler 'red' alias names
330 |             bandnames_dict = {
331 |                 'nbart_red': 'red',
332 |                 'nbart_green': 'green',
333 |                 'nbart_blue': 'blue',
334 |                 'nbart_nir_1': 'nir',
335 |                 'nbart_red_edge_1': 'red_edge_1', 
336 |                 'nbart_red_edge_2': 'red_edge_2',    
337 |                 'nbart_swir_2': 'swir1',
338 |                 'nbart_swir_3': 'swir2',
339 |                 'nbar_red': 'red',
340 |                 'nbar_green': 'green',
341 |                 'nbar_blue': 'blue',
342 |                 'nbar_nir_1': 'nir',
343 |                 'nbar_red_edge_1': 'red_edge_1',   
344 |                 'nbar_red_edge_2': 'red_edge_2',   
345 |                 'nbar_swir_2': 'swir1',
346 |                 'nbar_swir_3': 'swir2'
347 |             }
348 | 
349 |             # Rename bands in dataset to use simple names (e.g. 'red')
350 |             bands_to_rename = {
351 |                 a: b for a, b in bandnames_dict.items() if a in ds.variables
352 |             }
353 | 
354 |         elif collection == 'ga_ls_2':
355 | 
356 |             # Pass an empty dict as no bands need renaming
357 |             bands_to_rename = {}
358 | 
359 |         # Raise error if no valid collection name is provided:
360 |         else:
361 |             raise ValueError(f"'{collection}' is not a valid option for "
362 |                               "`collection`. Please specify either \n"
363 |                               "'ga_ls_2', 'ga_ls_3' or 'ga_s2_1'")
364 | 
365 |         # Apply index function 
366 |         try:
367 |             # If normalised=True, divide data by 10,000 before applying func
368 |             mult = 10000.0 if normalise else 1.0
369 |             index_array = index_func(ds.rename(bands_to_rename) / mult)
370 |         except AttributeError:
371 |             raise ValueError(f'Please verify that all bands required to '
372 |                              f'compute {index} are present in `ds`. \n'
373 |                              f'These bands may vary depending on the `collection` '
374 |                              f'(e.g. the Landsat `nbart_nir` band \n'
375 |                              f'is equivelent to `nbart_nir_1` for Sentinel 2)')
376 | 
377 |         # Add as a new variable in dataset
378 |         output_band_name = custom_varname if custom_varname else index
379 |         ds[output_band_name] = index_array
380 |     
381 |     # Once all indexes are calculated, drop input bands if inplace=False
382 |     if drop and not inplace:
383 |         ds = ds.drop(bands_to_drop)
384 | 
385 |     # If inplace == True, delete bands in-place instead of using drop
386 |     if drop and inplace:
387 |         for band_to_drop in bands_to_drop:
388 |             del ds[band_to_drop]
389 | 
390 |     # Return input dataset with added water index variable
391 |     return ds
392 | 


--------------------------------------------------------------------------------
/notebooks/scripts/dea_bom.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Scraping some BOM data
  3 | """
  4 | import datetime
  5 | import pytz
  6 | import ciso8601
  7 | from types import SimpleNamespace
  8 | import requests
  9 | import lxml
 10 | import lxml.etree
 11 | 
 12 | __all__ = (
 13 |     'get_stations',
 14 |     'get_station_data',
 15 |     'mk_station_selector',
 16 | )
 17 | 
 18 | def get_stations(time=None,
 19 |                  observation='http://bom.gov.au/waterdata/services/parameters/Water Course Discharge',
 20 |                  url='http://www.bom.gov.au/waterdata/services'):
 21 |     """ Get list of stations
 22 | 
 23 |         :param time: tuple of datetime.datetime objects, or None to query from 1980-1-1 to Now
 24 | 
 25 |         Returns
 26 |         ========
 27 |         List of stations:
 28 |          .name -- string, human readable station name
 29 |          .pos  -- Coordinate of the station or None
 30 |          .url  -- service url identifier
 31 |     """
 32 | 
 33 |     data = tpl_get_stations.format(observation=observation,
 34 |                                    **_fmt_time(time))
 35 |     rr = requests.post(url, data=data)
 36 | 
 37 |     return _parse_station_data(rr.text)
 38 | 
 39 | 
 40 | def get_station_data(station,
 41 |                      time=None,
 42 |                      observation='http://bom.gov.au/waterdata/services/parameters/Water Course Discharge',
 43 |                      url='http://www.bom.gov.au/waterdata/services'):
 44 |     """
 45 |     Query Gauge Data.
 46 | 
 47 |     :param station: One of the stations see get_stations
 48 |     :param time: tuple of datetime.datetime objects, or None to query from 1980-1-1 to Now
 49 | 
 50 |     Returns
 51 |     ========
 52 |     Pandas dataframe with Timestamp(index), Value columns
 53 |     """
 54 | 
 55 |     data = tpl_get_obs.format(station=station.url,
 56 |                               observation=observation,
 57 |                               **_fmt_time(time))
 58 |     rr = requests.post(url, data=data)
 59 |     return _parse_get_data(rr.text)
 60 | 
 61 | 
 62 | def mk_station_selector(on_select,
 63 |                         stations=None,
 64 |                         dst_map=None,
 65 |                         **kw):
 66 |     """
 67 |     Add stations to the map and register on_click event.
 68 | 
 69 |     :param on_select: Will be called when user selects station on the map `on_select(station)`
 70 |     :param stations: List of stations as returned from get_stations
 71 |     :param dst_map: Map to add stations markers to
 72 | 
 73 |     Any other arguments are passed on  to Map(..) constructor.
 74 | 
 75 |     Returns
 76 |     =======
 77 | 
 78 |     (map, marker_cluster)
 79 | 
 80 |     Passes through map=dst_map if not None, or returns newly constructed Map object.
 81 |     """
 82 |     import ipyleaflet as L
 83 | 
 84 |     if stations is None:
 85 |         stations = get_stations()
 86 | 
 87 |     stations = [st for st in stations if st.pos is not None]
 88 |     pos2st = {st.pos: st for st in stations}
 89 | 
 90 |     def on_click(event='', type='', coordinates=None):
 91 |         pos = tuple(coordinates)
 92 |         st = pos2st.get(pos)
 93 |         if st is None:
 94 |             # should probably log warning here
 95 |             print("Can't map click to station")
 96 |             return
 97 | 
 98 |         on_select(st)
 99 | 
100 |     markers = [L.Marker(location=st.pos,
101 |                         draggable=False,
102 |                         title=st.name)
103 |                for st in stations]
104 | 
105 |     cluster = L.MarkerCluster(markers=markers)
106 | 
107 |     if dst_map is None:
108 |         dst_map = L.Map(**kw)
109 | 
110 |     dst_map.add_layer(cluster)
111 |     cluster.on_click(on_click)
112 | 
113 |     return dst_map, cluster
114 | 
115 | 
116 | def ui_select_station(stations,
117 |                       zoom=3,
118 |                       center=(-24, 138),
119 |                       **kw):
120 |     """
121 |     Returns
122 |     =======
123 | 
124 |     (gauge_data, station)
125 |     """
126 |     import ipywidgets as W
127 |     from IPython.display import display
128 |     import matplotlib.pyplot as plt
129 |     import ipyleaflet as L
130 |     from odc.ui import ui_poll
131 | 
132 |     dbg_display = W.Output()
133 |     fig_display = W.Output()
134 |     btn_done = W.Button(description='Done')
135 |     scroll_wheel_zoom = kw.pop('scroll_wheel_zoom', True)
136 |     map_widget = L.Map(zoom=zoom,
137 |                        center=center,
138 |                        scroll_wheel_zoom=scroll_wheel_zoom,
139 |                        **kw)
140 | 
141 |     state = SimpleNamespace(pos=None,
142 |                             gauge_data=None,
143 |                             finished=False,
144 |                             station=None)
145 | 
146 |     plt_interactive_state = plt.isinteractive()
147 |     plt.interactive(False)
148 | 
149 |     with fig_display:
150 |         fig, ax = plt.subplots(1, figsize=(14,4))
151 |         ax.set_visible(False)
152 |         display(fig)
153 | 
154 |     def _on_select(station):
155 |         if state.finished:
156 |             print('Please re-run the cell')
157 |             return
158 | 
159 |         state.station = station
160 |         state.pos = station.pos
161 |         state.gauge_data = None
162 | 
163 |         print('Fetching data for: {}'.format(station.name))
164 |         try:
165 |             xx = get_station_data(station).dropna()
166 |         except Exception:
167 |             print('Failed to read data')
168 |             return
169 |         print('Got {} observations'.format(xx.shape[0]))
170 | 
171 |         state.gauge_data = xx
172 | 
173 |         with fig_display:
174 |             ax.clear()
175 |             ax.set_visible(True)
176 |             xx.plot(ax=ax)
177 |             ax.set_xlabel("Date")
178 |             ax.set_ylabel("Cubic meters per second")
179 |             ax.legend([station.name])
180 | 
181 |         fig_display.clear_output(wait=True)
182 |         with fig_display:
183 |             display(fig)
184 | 
185 |     def on_select(station):
186 |         with dbg_display:
187 |             _on_select(station)
188 | 
189 |     def on_done(btn):
190 |         if state.finished:
191 |             with dbg_display:
192 |                 print('Please re-run the cell')
193 |                 return
194 | 
195 |         state.finished = True
196 |         n_obs = 0 if state.gauge_data is None else state.gauge_data.shape[0]
197 | 
198 |         with dbg_display:
199 |             print('''Finished
200 | Station: {}
201 | Number of Observations: {}'''.format(state.station.name, n_obs))
202 | 
203 |     def on_poll():
204 |         with dbg_display:
205 |             if state.finished:
206 |                 return state.gauge_data, state.station
207 |             return None
208 | 
209 |     mk_station_selector(on_select,
210 |                         stations=stations,
211 |                         dst_map=map_widget)
212 | 
213 |     ## UI:
214 |     ##
215 |     ##  MMMMMMMMMMMMM BBBBB
216 |     ##  MMMMMMMMMMMMM .....
217 |     ##  MMMMMMMMMMMMM .....
218 |     ##  MMMMMMMMMMMMM .....
219 |     ##  MMMMMMMMMMMMM .....
220 |     ##  FFFFFFFFFFFFFFFFFFF
221 |     ##  FFFFFFFFFFFFFFFFFFF
222 | 
223 |     #  M - Map     F - Figure
224 |     #  B - Button  . - Debug output
225 | 
226 |     btn_done.on_click(on_done)
227 |     r_panel = W.VBox([btn_done, dbg_display],
228 |                      layout=W.Layout(width='30%'))
229 | 
230 |     ui = W.VBox([W.HBox([map_widget, r_panel]),
231 |                  fig_display])
232 | 
233 |     display(ui)
234 | 
235 |     result = ui_poll(on_poll, 1/20)  # this will block until done is pressed
236 | 
237 |     #restore interactive state
238 |     fig_display.clear_output(wait=True)
239 |     with fig_display:
240 |         plt.interactive(plt_interactive_state)
241 |         plt.show()
242 | 
243 |     return result
244 | 
245 | 
246 | def _fmt_time(time=None):
247 |     if time is None:
248 |         time = (datetime.datetime(1980, 1, 1),
249 |                 datetime.datetime.now())
250 | 
251 |     t_start, t_end = (t.isoformat() for t in time)
252 |     return dict(t_start=t_start, t_end=t_end)
253 | 
254 | def _parse_float(x):
255 |     if x is None:
256 |         return float('nan')
257 |     try:
258 |         return float(x)
259 |     except ValueError:
260 |         return float('nan')
261 | 
262 | def _parse_time(x):
263 |     t = ciso8601.parse_datetime(x).astimezone(pytz.utc)
264 |     return t.replace(tzinfo=None)
265 | 
266 | 
267 | def _parse_get_data(text, raw=False):
268 |     root = lxml.etree.fromstring(text)
269 | 
270 |     data = [[e.text for e in root.findall('.//{http://www.opengis.net/waterml/2.0}' + t)]
271 |             for t in ['time', 'value']]
272 | 
273 |     dd = [(_parse_time(t),
274 |            _parse_float(v))
275 |           for t, v in zip(*data)]
276 | 
277 |     if raw:
278 |         return dd
279 | 
280 |     import pandas as pd
281 |     return pd.DataFrame(dd, columns=('Timestamp', 'Value')).set_index('Timestamp')
282 | 
283 | 
284 | def _parse_station_data(text):
285 |     def parse_pos(pos):
286 |         if pos is None:
287 |             return None
288 |         return tuple(_parse_float(x)
289 |                      for x in pos.split(' '))
290 | 
291 |     root = lxml.etree.fromstring(text)
292 | 
293 |     data = [[e.text for e in root.findall('.//{http://www.opengis.net/gml/3.2}' + t)]
294 |             for t in ['name', 'identifier', 'pos']]
295 | 
296 |     return [SimpleNamespace(name=name, url=url, pos=parse_pos(pos))
297 |             for name, url, pos in zip(*data)]
298 | 
299 | 
300 | 
301 | # observation = 'http://bom.gov.au/waterdata/services/parameters/Water Course Discharge'
302 | #
303 | tpl_get_stations = '''
304 | <soap12:Envelope xmlns:soap12="http://www.w3.org/2003/05/soap-envelope"
305 |   xmlns:sos="http://www.opengis.net/sos/2.0"
306 |   xmlns:wsa="http://www.w3.org/2005/08/addressing"
307 |   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
308 |   xmlns:ows="http://www.opengis.net/ows/1.1"
309 |   xmlns:fes="http://www.opengis.net/fes/2.0"
310 |   xmlns:gml="http://www.opengis.net/gml/3.2"
311 |   xmlns:swes="http://www.opengis.net/swes/2.0"
312 |   xsi:schemaLocation="http://www.w3.org/2003/05/soap-envelope http://www.w3.org/2003/05/soap-envelope/soap-envelope.xsd http://www.opengis.net/sos/2.0 http://schemas.opengis.net/sos/2.0/sos.xsd"
313 | >
314 |     <soap12:Header>
315 |         <wsa:To>http://www.ogc.org/SOS</wsa:To>
316 |         <wsa:Action>http://www.opengis.net/def/serviceOperation/sos/foiRetrieval/2.0/GetFeatureOfInterest</wsa:Action>
317 |         <wsa:ReplyTo>
318 |             <wsa:Address>http://www.w3.org/2005/08/addressing/anonymous</wsa:Address>
319 |         </wsa:ReplyTo>
320 |         <wsa:MessageID>0</wsa:MessageID>
321 |     </soap12:Header>
322 |     <soap12:Body>
323 |         <sos:GetFeatureOfInterest service="SOS" version="2.0.0">
324 |             <sos:observedProperty>{observation}</sos:observedProperty>
325 |             <sos:temporalFilter>
326 |                 <fes:During>
327 |                     <fes:ValueReference>om:phenomenonTime</fes:ValueReference>
328 |                     <gml:TimePeriod gml:id="tp1">
329 |                         <gml:beginPosition>{t_start}</gml:beginPosition>
330 |                         <gml:endPosition>{t_end}</gml:endPosition>
331 |                     </gml:TimePeriod>
332 |                 </fes:During>
333 |             </sos:temporalFilter>
334 |         </sos:GetFeatureOfInterest>
335 |     </soap12:Body>
336 | </soap12:Envelope>
337 | '''
338 | 
339 | # {station}, {observation}, {t_start}, {t_end}
340 | tpl_get_obs = '''
341 | <soap12:Envelope xmlns:soap12="http://www.w3.org/2003/05/soap-envelope"
342 |                  xmlns:sos="http://www.opengis.net/sos/2.0"
343 |                  xmlns:wsa="http://www.w3.org/2005/08/addressing"
344 |                  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
345 |                  xmlns:ows="http://www.opengis.net/ows/1.1"
346 |                  xmlns:fes="http://www.opengis.net/fes/2.0"
347 |                  xmlns:gml="http://www.opengis.net/gml/3.2"
348 |                  xmlns:swes="http://www.opengis.net/swes/2.0"
349 |                  xsi:schemaLocation="http://www.w3.org/2003/05/soap-envelope http://www.w3.org/2003/05/soap-envelope/soap-envelope.xsd http://www.opengis.net/sos/2.0 http://schemas.opengis.net/sos/2.0/sos.xsd"
350 | >
351 |     <soap12:Header>
352 |         <wsa:To>http://www.ogc.org/SOS</wsa:To>
353 |         <wsa:Action>http://www.opengis.net/def/serviceOperation/sos/core/2.0/GetObservation</wsa:Action>
354 |         <wsa:ReplyTo>
355 |             <wsa:Address>http://www.w3.org/2005/08/addressing/anonymous</wsa:Address>
356 |         </wsa:ReplyTo>
357 |         <wsa:MessageID>0</wsa:MessageID>
358 |     </soap12:Header>
359 |     <soap12:Body>
360 |         <sos:GetObservation service="SOS" version="2.0.0">
361 |             <sos:procedure>http://bom.gov.au/waterdata/services/tstypes/Pat4_C_B_1_DailyMean</sos:procedure>
362 |             <sos:observedProperty>{observation}</sos:observedProperty>
363 |             <sos:featureOfInterest>{station}</sos:featureOfInterest>
364 |             <sos:temporalFilter>
365 |                 <fes:During>
366 |                     <fes:ValueReference>om:phenomenonTime</fes:ValueReference>
367 |                     <gml:TimePeriod gml:id="tp1">
368 |                         <gml:beginPosition>{t_start}</gml:beginPosition>
369 |                         <gml:endPosition>{t_end}</gml:endPosition>
370 |                     </gml:TimePeriod>
371 |                 </fes:During>
372 |             </sos:temporalFilter>
373 |         </sos:GetObservation>
374 |     </soap12:Body>
375 | </soap12:Envelope>
376 | '''
377 | 


--------------------------------------------------------------------------------
/notebooks/scripts/dea_climate.py:
--------------------------------------------------------------------------------
  1 | # dea_climate.py
  2 | '''
  3 | Description: A set of python functions to retrieve and manipulate 
  4 | gridded climate data.
  5 | 
  6 | Adapted from scripts by Andrew Cherry and Brian Killough.
  7 | 
  8 | License: The code in this notebook is licensed under the Apache License,
  9 | Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0). Digital Earth 
 10 | Australia data is licensed under the Creative Commons by Attribution 4.0 
 11 | license (https://creativecommons.org/licenses/by/4.0/).
 12 | 
 13 | Contact: If you need assistance, please post a question on the Open Data 
 14 | Cube Slack channel (http://slack.opendatacube.org/) or on the GIS Stack 
 15 | Exchange (https://gis.stackexchange.com/questions/ask?tags=open-data-cube) 
 16 | using the `open-data-cube` tag (you can view previously asked questions 
 17 | here: https://gis.stackexchange.com/questions/tagged/open-data-cube). 
 18 | 
 19 | If you would like to report an issue with this script, you can file one on 
 20 | Github https://github.com/digitalearthafrica/deafrica-sandbox-notebooks/issues
 21 | 
 22 | Functions included:
 23 |     get_era5_daily
 24 |     era5_area_crop
 25 |     era5_area_nearest
 26 |     load_era5    
 27 |     
 28 | Last modified: October 2020
 29 | 
 30 | '''
 31 | 
 32 | import os
 33 | import datetime
 34 | import numpy as np
 35 | from dateutil.parser import parse
 36 | import boto3
 37 | import botocore
 38 | import xarray as xr
 39 | import warnings
 40 | 
 41 | ERA5_VARS = [
 42 |     "air_pressure_at_mean_sea_level",
 43 |     "air_temperature_at_2_metres",
 44 |     "air_temperature_at_2_metres_1hour_Maximum",
 45 |     "air_temperature_at_2_metres_1hour_Minimum",
 46 |     "dew_point_temperature_at_2_metres",
 47 |     "eastward_wind_at_100_metres",
 48 |     "eastward_wind_at_10_metres",
 49 |     "integral_wrt_time_of_surface_direct_downwelling_shortwave_flux_in_air_1hour_Accumulation",
 50 |     "lwe_thickness_of_surface_snow_amount",
 51 |     "northward_wind_at_100_metres",
 52 |     "northward_wind_at_10_metres",
 53 |     "precipitation_amount_1hour_Accumulation",
 54 |     "sea_surface_temperature",
 55 |     "sea_surface_wave_from_direction",
 56 |     "sea_surface_wave_mean_period",
 57 |     "significant_height_of_wind_and_swell_waves",
 58 |     "snow_density",
 59 |     "surface_air_pressure",
 60 | ]
 61 | 
 62 | 
 63 | def get_era5_daily(var,
 64 |                    date_from_arg,
 65 |                    date_to_arg=None,
 66 |                    reduce_func=None,
 67 |                    cache_dir='era5',
 68 |                    resample='1D'):
 69 |     """
 70 |     Download and return an variable from the European Centre for Medium 
 71 |     Range Weather Forecasts (ECMWF) global climate reanalysis product 
 72 |     (ERA5) for a defined time window.
 73 | 
 74 |     Parameters
 75 |     ----------     
 76 |     var : string
 77 |         Name of the ERA5 climate variable to download, e.g 
 78 |         "air_temperature_at_2_metres" 
 79 | 
 80 |     date_from_arg: string or datetime object
 81 |         Starting date of the time window.
 82 |         
 83 |     date_to_arg: string or datetime object
 84 |         End date of the time window. If not supplied, set to be the same
 85 |         as starting date.
 86 | 
 87 |     reduce_func: numpy function
 88 |         lets you specify a function to apply to each day's worth of data.  
 89 |         The default is np.mean, which computes daily average. To get a 
 90 |         sum, use np.sum.
 91 | 
 92 |     cache_dir: sting
 93 |         Path to save downloaded ERA5 data. The path will be created if 
 94 |         not already exists.
 95 |         The default is 'era5'.
 96 |         
 97 |     resample: string
 98 |         Temporal resampling frequency to be used for xarray's resample
 99 |         function. The default is '1D', which is daily. Since ERA5 data 
100 |         is provided as one file per month, maximum resampling period is 
101 |         '1M'.
102 | 
103 |     Returns
104 |     -------
105 |     A lazy-loaded xarray dataset containing an ERA5 variable for the 
106 |     selected time window.
107 | 
108 |     """
109 | 
110 |     # Massage input data
111 |     assert var in ERA5_VARS, "var must be one of [{}] (got {})".format(
112 |         ','.join(ERA5_VARS), var)
113 |     if not os.path.exists(cache_dir):
114 |         os.mkdir(cache_dir)
115 |     if reduce_func is None:
116 |         reduce_func = np.mean
117 |     if type(date_from_arg) == str:
118 |         date_from_arg = parse(date_from_arg)
119 |     if type(date_to_arg) == str:
120 |         date_to_arg = parse(date_to_arg)
121 |     if date_to_arg is None:
122 |         date_to_arg = date_from_arg
123 |         
124 |     # Make sure our dates are in the correct order
125 |     from_date = min(date_from_arg, date_to_arg)
126 |     to_date = max(date_from_arg, date_to_arg)
127 |     
128 |     # Download ERA5 files to local cache if they don't already exist
129 |     client = None  # Boto client (if needed)
130 |     local_files = []  # Will hold list of local filenames
131 |     Y, M = from_date.year, from_date.month  # Loop vars
132 |     loop_end = to_date.year * 12 + to_date.month  # Loop sentinel
133 |     while Y * 12 + M <= loop_end:
134 |         local_file = os.path.join(
135 |             cache_dir, "{Y:04}_{M:02}_{var}.nc".format(Y=Y, M=M, var=var))
136 |         data_key = "{Y:04}/{M:02}/data/{var}.nc".format(Y=Y, M=M, var=var)
137 |         if not os.path.isfile(
138 |                 local_file
139 |         ):  # check if file already exists (TODO: move to temp, catch failed download)
140 |             if client is None:
141 |                 client = boto3.client('s3',
142 |                                       config=botocore.client.Config(
143 |                                           signature_version=botocore.UNSIGNED))
144 |             client.download_file('era5-pds', data_key, local_file)
145 |         local_files.append(local_file)
146 |         if M == 12:
147 |             Y += 1
148 |             M = 1
149 |         else:
150 |             M += 1
151 |             
152 |     # Load and merge the locally-cached ERA5 data from the list of filenames
153 |     date_slice = slice(str(from_date.date()), str(to_date.date(
154 |     )))  # I do this to INCLUDE the whole end date, not just 00:00
155 | 
156 |     def prepro(ds):
157 |         if 'time0' in ds.dims:
158 |             ds = ds.rename({"time0": "time"})
159 |         if 'time1' in ds.dims:
160 |             ds = ds.rename({
161 |                 "time1": "time"
162 |             })  # This should INTENTIONALLY error if both times are defined
163 |         ds = ds[[var]]
164 |         output = ds.sel(time=date_slice).resample(
165 |             time=resample).reduce(reduce_func)
166 |         output.attrs = ds.attrs
167 |         for v in output.data_vars:
168 |             output[v].attrs = ds[v].attrs
169 |         return output
170 | 
171 |     return xr.open_mfdataset(local_files,
172 |                              combine='by_coords',
173 |                              compat='equals',
174 |                              preprocess=prepro,
175 |                              parallel=True)
176 | 
177 | 
178 | def era5_area_crop(ds, lat, lon):
179 |     """
180 |     Crop a dataset containing European Centre for Medium Range Weather 
181 |     Forecasts (ECMWF) global climate reanalysis product (ERA5) variables
182 |     to a location. 
183 |     
184 |     The output spatial grid will either include input grid points within 
185 |     lat/lon boundaries or the nearest point if none is within the search
186 |     location.  
187 | 
188 |     Parameters
189 |     ----------     
190 |     ds : xarray dataset
191 |         A dataset containing ERA5 variables of interest.
192 | 
193 |     lat: tuple or list
194 |         Latitude range for query.
195 | 
196 |     lon: tuple or list
197 |         Longitude range for query.
198 | 
199 |     Returns
200 |     -------
201 |     An xarray dataset containing ERA5 variables for the selected 
202 |     location.
203 | 
204 |     """
205 |     
206 |     # Handle single value lat/lon args by wrapping them in lists
207 |     try:
208 |         min(lat)
209 |     except TypeError:
210 |         lat = [lat]
211 |         
212 |     try:
213 |         min(lon)
214 |     except TypeError:
215 |         lon = [lon]
216 |         
217 |     if min(lon) < 0:
218 |         # re-order along longitude to go from -180 to 180
219 |         ds = ds.assign_coords({"lon": (((ds.lon + 180) % 360) - 180)})
220 |         ds = ds.reindex({ "lon": np.sort(ds.lon)})
221 |         
222 |     # Issue warnings if args outside range.
223 |     if min(lat) < ds.lat.min() or max(lat) > ds.lat.max():
224 |         warnings.warn("Lats must be in range {} .. {}.  Got: {}".format(
225 |             ds.lat.min().values,
226 |             ds.lat.max().values, lat))
227 |     if min(lon) < ds.lon.min() or max(lon) > ds.lon.max():
228 |         warnings.warn("Lons must be in range {} .. {}.  Got: {}".format(
229 |             ds.lon.min().values,
230 |             ds.lon.max().values, lon))
231 |         
232 |     # Find existing coords between min&max
233 |     lats = ds.lat[np.logical_and(
234 |         ds.lat >= min(lat), ds.lat <= max(lat))].values
235 |     
236 |     # If there was nothing between, just plan to grab closest
237 |     if len(lats) == 0:
238 |         lats = np.unique(ds.lat.sel(lat=np.array(lat), method="nearest"))
239 |     lons = ds.lon[np.logical_and(
240 |         ds.lon >= min(lon), ds.lon <= max(lon))].values
241 |     if len(lons) == 0:
242 |         lons = np.unique(ds.lon.sel(lon=np.array(lon), method="nearest"))
243 |         
244 |     # crop and keep attrs
245 |     output = ds.sel(lat=lats, lon=lons)
246 |     output.attrs = ds.attrs
247 |     for var in output.data_vars:
248 |         output[var].attrs = ds[var].attrs
249 |     return output
250 | 
251 | 
252 | def era5_area_nearest(ds, lat, lon):
253 |     """
254 |     Crop a dataset containing European Centre for Medium 
255 |     Range Weather Forecasts (ECMWF) global climate reanalysis product 
256 |     (ERA5) variables to a location. 
257 |     
258 |     The output spatial grid is snapped to the nearest input grid points.  
259 | 
260 |     Parameters
261 |     ----------     
262 |     ds : xarray dataset
263 |         A dataset containing ERA5 variables of interest.
264 | 
265 |     lat: tuple or list
266 |         Latitude range for query.
267 | 
268 |     lon: tuple or list
269 |         Longitude range for query.
270 | 
271 |     Returns
272 |     -------
273 |     An xarray dataset containing ERA5 variables for the selected location.
274 | 
275 |     """
276 |     
277 |     if min(lon) < 0:
278 |         # re-order along longitude to go from -180 to 180
279 |         ds = ds.assign_coords({"lon": (((ds.lon + 180) % 360) - 180)})
280 |         ds = ds.reindex({ "lon": np.sort(ds.lon)})
281 |         
282 |     # find the nearest lat lon boundary points
283 |     test = ds.sel(lat=lat, lon=lon, method='nearest')
284 |     
285 |     # define the lat/lon grid
286 |     lat_range = slice(test.lat.max().values, test.lat.min().values)
287 |     lon_range = slice(test.lon.min().values, test.lon.max().values)
288 |     
289 |     # crop and keep attrs
290 |     output = ds.sel(lat=lat_range, lon=lon_range)
291 |     output.attrs = ds.attrs
292 |     
293 |     for var in output.data_vars:
294 |         output[var].attrs = ds[var].attrs
295 |     return output
296 | 
297 | 
298 | def load_era5(var, lat, lon, time, grid='nearest', **kwargs):
299 |     """
300 |     Returns a European Centre for Medium Range Weather Forecasts (ECMWF)
301 |     global climate reanalysis product (ERA5) variable for a selected 
302 |     location and time window. 
303 | 
304 |     Parameters
305 |     ----------     
306 |     var : string
307 |         Name of the ERA5 climate variable to download, e.g 
308 |         "air_temperature_at_2_metres" 
309 | 
310 |     lat: tuple or list
311 |         Latitude range for query.
312 | 
313 |     lon: tuple or list
314 |         Longitude range for query.
315 |     
316 |     time: tuple or list
317 |         Time range for query.
318 |     
319 |     grid: string
320 |         Option for output spatial gridding.
321 |         The default is 'nearest', for which output spatial grid is 
322 |         snapped to the nearest ERA5 input grid points.
323 |         Alternatively, output spatial grid will either include input 
324 |         grid points within lat/lon boundaries or the nearest point if 
325 |         none is within the search location. 
326 |         
327 |     Returns
328 |     -------
329 |     An xarray dataset containing the variable for the selected location
330 |     and time window.
331 | 
332 |     """
333 | 
334 |     ds = get_era5_daily(var, time[0], time[1], **kwargs)
335 |     if grid == 'nearest':
336 |         return era5_area_nearest(ds, lat, lon).compute()
337 |     else:
338 |         return era5_area_crop(ds, lat, lon).compute()
339 | 


--------------------------------------------------------------------------------
/notebooks/scripts/dea_coastaltools.py:
--------------------------------------------------------------------------------
  1 | ## dea_coastaltools.py
  2 | '''
  3 | Description: This file contains a set of python functions for conducting 
  4 | coastal analyses on Digital Earth Australia data.
  5 | 
  6 | License: The code in this notebook is licensed under the Apache License, 
  7 | Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0). Digital Earth 
  8 | Australia data is licensed under the Creative Commons by Attribution 4.0 
  9 | license (https://creativecommons.org/licenses/by/4.0/).
 10 | 
 11 | Contact: If you need assistance, post a question on the Open Data Cube 
 12 | Slack channel (http://slack.opendatacube.org/) or the GIS Stack Exchange 
 13 | (https://gis.stackexchange.com/questions/ask?tags=open-data-cube) using 
 14 | the `open-data-cube` tag (you can view previously asked questions here: 
 15 | https://gis.stackexchange.com/questions/tagged/open-data-cube). 
 16 | 
 17 | If you would like to report an issue with this script, you can file one 
 18 | on Github (https://github.com/GeoscienceAustralia/dea-notebooks/issues/new).
 19 | 
 20 | Functions included:
 21 |     tidal_tag
 22 |     tidal_stats
 23 | 
 24 | Last modified: February 2020
 25 | 
 26 | '''
 27 | 
 28 | # Import required packages
 29 | import numpy as np
 30 | import xarray as xr
 31 | import pandas as pd
 32 | import matplotlib.pyplot as plt
 33 | from scipy import stats
 34 | from otps import TimePoint
 35 | from otps import predict_tide
 36 | from datacube.utils.geometry import CRS
 37 | 
 38 | # Fix converters for tidal plot
 39 | from pandas.plotting import register_matplotlib_converters
 40 | register_matplotlib_converters()
 41 | 
 42 | 
 43 | def tidal_tag(ds,
 44 |               tidepost_lat=None, 
 45 |               tidepost_lon=None, 
 46 |               ebb_flow=False, 
 47 |               swap_dims=False,
 48 |               return_tideposts=False):
 49 |     """
 50 |     Takes an xarray.Dataset and returns the same dataset with a new 
 51 |     `tide_height` variable giving the height of the tide at the exact
 52 |     moment of each satellite acquisition. 
 53 |     
 54 |     By default, the function models tides for the centroid of the 
 55 |     dataset, but a custom tidal modelling location can be specified 
 56 |     using `tidepost_lat` and `tidepost_lon`.
 57 |     
 58 |     Tides are modelled using the OTPS tidal modelling software based on
 59 |     the TPXO8 tidal model: http://volkov.oce.orst.edu/tides/tpxo8_atlas.html
 60 |     
 61 |     Parameters
 62 |     ----------     
 63 |     ds : xarray.Dataset
 64 |         An xarray.Dataset object with x, y and time dimensions  
 65 |     tidepost_lat, tidepost_lon : float or int, optional
 66 |         Optional coordinates used to model tides. The default is None,
 67 |         which uses the centroid of the dataset as the tide modelling 
 68 |         location.
 69 |     ebb_flow : bool, optional
 70 |         An optional boolean indicating whether to compute if the 
 71 |         tide phase was ebbing (falling) or flowing (rising) for each 
 72 |         observation. The default is False; if set to True, a new 
 73 |         `ebb_flow` variable will be added to the dataset with each 
 74 |         observation labelled with 'Ebb' or 'Flow'.
 75 |     swap_dims : bool, optional
 76 |         An optional boolean indicating whether to swap the `time` 
 77 |         dimension in the original xarray.Dataset to the new 
 78 |         `tide_height` variable. Defaults to False.
 79 |     return_tideposts : bool, optional
 80 |         An optional boolean indicating whether to return the `tidepost_lat`
 81 |         and `tidepost_lon` location used to model tides in addition to the
 82 |         xarray.Dataset. Defaults to False.
 83 |         
 84 |     Returns
 85 |     -------
 86 |     The original xarray.Dataset with a new `tide_height` variable giving
 87 |     the height of the tide (and optionally, its ebb-flow phase) at the 
 88 |     exact moment of each satellite acquisition.  
 89 |     
 90 |     (if `return_tideposts=True`, the function will also return the 
 91 |     `tidepost_lon` and `tidepost_lat` location used in the analysis)
 92 |     
 93 |     """
 94 | 
 95 |     # If custom tide modelling locations are not provided, use the
 96 |     # dataset centroid
 97 |     if not tidepost_lat or not tidepost_lon:
 98 | 
 99 |         tidepost_lon, tidepost_lat = ds.extent.centroid.to_crs(
100 |             crs=CRS('EPSG:4326')).coords[0]
101 |         print(f'Setting tide modelling location from dataset centroid: '
102 |               f'{tidepost_lon:.2f}, {tidepost_lat:.2f}')
103 | 
104 |     else:
105 |         print(f'Using user-supplied tide modelling location: '
106 |               f'{tidepost_lon:.2f}, {tidepost_lat:.2f}')
107 | 
108 |     # Use the tidal model to compute tide heights for each observation:
109 |     obs_datetimes = ds.time.data.astype('M8[s]').astype('O').tolist()    
110 |     obs_timepoints = [TimePoint(tidepost_lon, tidepost_lat, dt) 
111 |                       for dt in obs_datetimes]
112 |     obs_predictedtides = predict_tide(obs_timepoints)   
113 | 
114 |     # If tides cannot be successfully modeled (e.g. if the centre of the 
115 |     # xarray dataset is located is over land), raise an exception
116 |     if len(obs_predictedtides) > 0:
117 | 
118 |         # Extract tide heights
119 |         obs_tideheights = [predictedtide.tide_m for predictedtide 
120 |                            in obs_predictedtides]
121 | 
122 |         # Assign tide heights to the dataset as a new variable
123 |         ds['tide_height'] = xr.DataArray(obs_tideheights, [('time', ds.time)])
124 | 
125 |         # Optionally calculate the tide phase for each observation
126 |         if ebb_flow:
127 |             
128 |             # Model tides for a time 15 minutes prior to each previously
129 |             # modelled satellite acquisition time. This allows us to compare
130 |             # tide heights to see if they are rising or falling.
131 |             print('Modelling tidal phase (e.g. ebb or flow)')
132 |             pre_times = (ds.time - pd.Timedelta('15 min'))
133 |             pre_datetimes = pre_times.data.astype('M8[s]').astype('O').tolist()   
134 |             pre_timepoints = [TimePoint(tidepost_lon, tidepost_lat, dt) 
135 |                               for dt in pre_datetimes]
136 |             pre_predictedtides = predict_tide(pre_timepoints)
137 |             
138 |             # Compare tides computed for each timestep. If the previous tide 
139 |             # was higher than the current tide, the tide is 'ebbing'. If the
140 |             # previous tide was lower, the tide is 'flowing'
141 |             tidal_phase = ['Ebb' if pre.tide_m > obs.tide_m else 'Flow'
142 |                            for pre, obs in zip(pre_predictedtides, 
143 |                                                obs_predictedtides)]
144 |             
145 |             # Assign tide phase to the dataset as a new variable
146 |             ds['ebb_flow'] = xr.DataArray(tidal_phase, [('time', ds.time)]) 
147 |             
148 |         # If swap_dims = True, make tide height the primary dimension 
149 |         # instead of time
150 |         if swap_dims:
151 | 
152 |             # Swap dimensions and sort by tide height
153 |             ds = ds.swap_dims({'time': 'tide_height'})          
154 |             ds = ds.sortby('tide_height')  
155 |             ds = ds.drop('time')
156 |             
157 |         if return_tideposts:
158 |             return ds, tidepost_lon, tidepost_lat
159 |         else:
160 |             return ds
161 |     
162 |     else:
163 |         
164 |         raise ValueError(
165 |             f'Tides could not be modelled for dataset centroid located '
166 |             f'at {tidepost_lon:.2f}, {tidepost_lat:.2f}. This can occur if '
167 |             f'this coordinate occurs over land. Please manually specify '
168 |             f'a tide modelling location located over water using the '
169 |             f'`tidepost_lat` and `tidepost_lon` parameters.'
170 |         )
171 | 
172 | 
173 | def tidal_stats(ds, 
174 |                 tidepost_lat=None,
175 |                 tidepost_lon=None,
176 |                 plain_english=True, 
177 |                 plot=True,
178 |                 modelled_freq='2h',
179 |                 round_stats=3): 
180 |     """
181 |     Takes an xarray.Dataset and statistically compares the tides 
182 |     modelled for each satellite observation against the full modelled 
183 |     tidal range. This comparison can be used to evaluate whether the 
184 |     tides observed by satellites (e.g. Landsat) are biased compared to 
185 |     the natural tidal range (e.g. fail to observe either the highest or 
186 |     lowest tides etc).    
187 |        
188 |     By default, the function models tides for the centroid of the 
189 |     dataset, but a custom tidal modelling location can be specified 
190 |     using `tidepost_lat` and `tidepost_lon`.
191 |     
192 |     Tides are modelled using the OTPS tidal modelling software based on
193 |     the TPXO8 tidal model: http://volkov.oce.orst.edu/tides/tpxo8_atlas.html
194 |     
195 |     For more information about the tidal statistics computed by this 
196 |     function, refer to Figure 8 in Bishop-Taylor et al. 2018:
197 |     https://www.sciencedirect.com/science/article/pii/S0272771418308783#fig8
198 |     
199 |     Parameters
200 |     ----------     
201 |     ds : xarray.Dataset
202 |         An xarray.Dataset object with x, y and time dimensions  
203 |     tidepost_lat, tidepost_lon : float or int, optional
204 |         Optional coordinates used to model tides. The default is None,
205 |         which uses the centroid of the dataset as the tide modelling 
206 |         location.
207 |     plain_english : bool, optional
208 |         An optional boolean indicating whether to print a plain english 
209 |         version of the tidal statistics to the screen. Defaults to True.
210 |     plot : bool, optional
211 |         An optional boolean indicating whether to plot how satellite-
212 |         observed tide heights compare against the full tidal range. 
213 |         Defaults to True.
214 |     modelled_freq : str, optional
215 |         An optional string giving the frequency at which to model tides 
216 |         when computing the full modelled tidal range. Defaults to '2h', 
217 |         which computes a tide height for every two hours across the
218 |         temporal extent of `ds`.        
219 |     round_stats : int, optional
220 |         The number of decimal places used to round the output statistics.
221 |         Defaults to 3.
222 |         
223 |     Returns
224 |     -------
225 |     A pandas.Series object containing the following statistics:
226 |     
227 |         tidepost_lat: latitude used for modelling tide heights
228 |         tidepost_lon: longitude used for modelling tide heights
229 |         observed_min_m: minimum tide height observed by the satellite
230 |         all_min_m: minimum tide height from full modelled tidal range
231 |         observed_max_m: maximum tide height observed by the satellite
232 |         all_max_m: maximum tide height from full modelled tidal range
233 |         observed_range_m: tidal range observed by the satellite
234 |         all_range_m: full modelled tidal range 
235 |         spread_m: proportion of the full modelled tidal range observed 
236 |                   by the satellite (see Bishop-Taylor et al. 2018)
237 |         low_tide_offset: proportion of the lowest tides never observed
238 |                   by the satellite (see Bishop-Taylor et al. 2018)
239 |         high_tide_offset: proportion of the highest tides never observed
240 |                   by the satellite (see Bishop-Taylor et al. 2018)
241 |         observed_slope: slope of any relationship between observed tide 
242 |                   heights and time
243 |         all_slope: slope of any relationship between all modelled tide 
244 |                   heights and time
245 |         observed_pval: significance/p-value of any relationship between 
246 |                   observed tide heights and time
247 |         all_pval: significance/p-value of any relationship between 
248 |                   all modelled tide heights and time
249 |     
250 |     """
251 |     
252 |     # Model tides for each observation in the supplied xarray object
253 |     ds_tides, tidepost_lon, tidepost_lat = tidal_tag(ds,
254 |                                                      tidepost_lat=tidepost_lat,
255 |                                                      tidepost_lon=tidepost_lon,
256 |                                                      return_tideposts=True)
257 | 
258 |     # Generate range of times covering entire period of satellite record
259 |     all_timerange = pd.date_range(start=ds_tides.time.min().item(),
260 |                                   end=ds_tides.time.max().item(),
261 |                                   freq=modelled_freq)
262 |     all_datetimes = all_timerange.values.astype('M8[s]').astype('O').tolist()  
263 | 
264 |     # Use the tidal model to compute tide heights for each observation:  
265 |     all_timepoints = [TimePoint(tidepost_lon, tidepost_lat, dt) 
266 |                       for dt in all_datetimes]
267 |     all_predictedtides = predict_tide(all_timepoints)   
268 |     all_tideheights = [predictedtide.tide_m for predictedtide 
269 |                         in all_predictedtides]
270 | 
271 |     # Get coarse statistics on all and observed tidal ranges
272 |     obs_mean = ds_tides.tide_height.mean().item()
273 |     all_mean = np.mean(all_tideheights)
274 |     obs_min, obs_max = ds_tides.tide_height.quantile([0.0, 1.0]).values
275 |     all_min, all_max = np.quantile(all_tideheights, [0.0, 1.0])
276 | 
277 |     # Calculate tidal range
278 |     obs_range = (obs_max - obs_min)
279 |     all_range = (all_max - all_min)
280 | 
281 |     # Calculate Bishop-Taylor et al. 2018 tidal metrics
282 |     spread = obs_range / all_range
283 |     low_tide_offset = abs(all_min - obs_min) / all_range
284 |     high_tide_offset = abs(all_max - obs_max) / all_range  
285 |     
286 |     # Extract x (time in decimal years) and y (distance) values
287 |     all_x = (all_timerange.year + 
288 |              ((all_timerange.dayofyear - 1) / 365) +
289 |              ((all_timerange.hour - 1) / 24))
290 |     all_y = all_tideheights
291 |     time_period = all_x.max() - all_x.min()
292 | 
293 |     # Extract x (time in decimal years) and y (distance) values
294 |     obs_x = (ds_tides.time.dt.year + 
295 |              ((ds_tides.time.dt.dayofyear - 1) / 365) + 
296 |              ((ds_tides.time.dt.hour - 1) / 24))
297 |     obs_y = ds_tides.tide_height.values.astype(np.float)           
298 | 
299 |     # Compute linear regression
300 |     obs_linreg = stats.linregress(x=obs_x, y=obs_y)  
301 |     all_linreg = stats.linregress(x=all_x, y=all_y)
302 |     
303 |     if plain_english:
304 |         
305 |         print(f'\n{spread:.0%} of the full {all_range:.2f} m modelled tidal '
306 |               f'range is observed at this location.\nThe lowest '
307 |               f'{low_tide_offset:.0%} and highest {high_tide_offset:.0%} '
308 |               f'of tides are never observed.\n')
309 |         
310 |         # Plain english
311 |         if obs_linreg.pvalue > 0.05:
312 |             print(f'Observed tides do not increase or decrease significantly '
313 |                   f'over the ~{time_period:.0f} year period.')
314 |         else:
315 |             obs_slope_desc = 'decrease' if obs_linreg.slope < 0 else 'increase'
316 |             print(f'Observed tides {obs_slope_desc} significantly '
317 |                   f'(p={obs_linreg.pvalue:.3f}) over time by '
318 |                   f'{obs_linreg.slope:.03f} m per year (i.e. a '
319 |                   f'~{time_period * obs_linreg.slope:.2f} m '
320 |                   f'{obs_slope_desc} over the ~{time_period:.0f} year period).')
321 | 
322 |         if all_linreg.pvalue > 0.05:
323 |             print(f'All tides do not increase or decrease significantly over '
324 |                   f'the ~{time_period:.0f} year period.')
325 |         else:
326 |             all_slope_desc = 'decrease' if all_linreg.slope < 0 else 'increase'
327 |             print(f'All tides {all_slope_desc} significantly '
328 |                   f'(p={all_linreg.pvalue:.3f}) over time by '
329 |                   f'{all_linreg.slope:.03f} m per year (i.e. a '
330 |                   f'~{time_period * all_linreg.slope:.2f} m '
331 |                   f'{all_slope_desc} over the ~{time_period:.0f} year period).')
332 | 
333 |     if plot:
334 |         
335 |         # Create plot and add all time and observed tide data
336 |         fig, ax = plt.subplots(figsize=(10, 5))
337 |         ax.plot(all_timerange, all_tideheights, alpha=0.4)
338 |         ds_tides.tide_height.plot.line(ax=ax, 
339 |                                        marker='o',
340 |                                        linewidth=0.0, 
341 |                                        color='black',
342 |                                        markersize=2)
343 | 
344 |         # Add horizontal lines for spread/offsets
345 |         ax.axhline(obs_min, color='black', linestyle=':', linewidth=1)
346 |         ax.axhline(obs_max, color='black', linestyle=':', linewidth=1)
347 |         ax.axhline(all_min, color='black', linestyle=':', linewidth=1)
348 |         ax.axhline(all_max, color='black', linestyle=':', linewidth=1)
349 | 
350 |         # Add text annotations for spread/offsets
351 |         ax.annotate('    High tide\n    offset', 
352 |                      xy=(all_timerange.max(), 
353 |                          np.mean([all_max, obs_max])), 
354 |                      va='center')
355 |         ax.annotate('    Spread', 
356 |                      xy=(all_timerange.max(), 
357 |                          np.mean([obs_min, obs_max])), 
358 |                      va='center')
359 |         ax.annotate('    Low tide\n    offset', 
360 |                      xy=(all_timerange.max(), 
361 |                          np.mean([all_min, obs_min])))
362 | 
363 |         # Remove top right axes and add labels
364 |         ax.spines['right'].set_visible(False)
365 |         ax.spines['top'].set_visible(False)
366 |         ax.set_ylabel('Tide height (m)')
367 |         ax.set_xlabel('');
368 |         ax.margins(x=0.015)
369 |         
370 |     # Export pandas.Series containing tidal stats
371 |     return pd.Series({'tidepost_lat': tidepost_lat,
372 |                       'tidepost_lon': tidepost_lon,
373 |                       'observed_mean_m': obs_mean,
374 |                       'all_mean_m': all_mean,
375 |                       'observed_min_m': obs_min,
376 |                       'all_min_m': all_min,
377 |                       'observed_max_m': obs_max,
378 |                       'all_max_m': all_max,
379 |                       'observed_range_m': obs_range,
380 |                       'all_range_m': all_range,
381 |                       'spread': spread,
382 |                       'low_tide_offset': low_tide_offset,
383 |                       'high_tide_offset': high_tide_offset,
384 |                       'observed_slope': obs_linreg.slope,
385 |                       'all_slope': all_linreg.slope,
386 |                       'observed_pval': obs_linreg.pvalue,
387 |                       'all_pval': all_linreg.pvalue}).round(round_stats)
388 | 


--------------------------------------------------------------------------------
/notebooks/scripts/dea_dask.py:
--------------------------------------------------------------------------------
 1 | ## dea_dask.py
 2 | '''
 3 | Description: A set of python functions for simplifying the creation of a
 4 | local dask cluster.
 5 | 
 6 | License: The code in this notebook is licensed under the Apache License,
 7 | Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0). Digital Earth
 8 | Australia data is licensed under the Creative Commons by Attribution 4.0
 9 | license (https://creativecommons.org/licenses/by/4.0/).
10 | 
11 | Contact: If you need assistance, please post a question on the Open Data
12 | Cube Slack channel (http://slack.opendatacube.org/) or on the GIS Stack
13 | Exchange (https://gis.stackexchange.com/questions/ask?tags=open-data-cube)
14 | using the `open-data-cube` tag (you can view previously asked questions
15 | here: https://gis.stackexchange.com/questions/tagged/open-data-cube).
16 | 
17 | If you would like to report an issue with this script, you can file one on
18 | Github (https://github.com/GeoscienceAustralia/dea-notebooks/issues/new).
19 | 
20 | Functions included:
21 |     create_local_dask_cluster
22 | 
23 | Last modified: March 2020
24 | 
25 | '''
26 | 
27 | 
28 | from importlib.util import find_spec
29 | import os
30 | import dask
31 | from IPython.display import display
32 | from datacube.utils.dask import start_local_dask
33 | from datacube.utils.rio import configure_s3_access
34 | 
35 | _HAVE_PROXY = bool(find_spec('jupyter_server_proxy'))
36 | _IS_AWS = ('AWS_ACCESS_KEY_ID' in os.environ or
37 |            'AWS_DEFAULT_REGION' in os.environ)
38 | 
39 | 
40 | def create_local_dask_cluster(spare_mem='3Gb', display_client=True):
41 |     """
42 |     Using the datacube utils function `start_local_dask`, generate
43 |     a local dask cluster. Automatically detects if on AWS or NCI.
44 | 
45 |     Example use :
46 | 
47 |         import sys
48 |         sys.path.append("../Scripts")
49 |         from dea_dask import create_local_dask_cluster
50 | 
51 |         create_local_dask_cluster(spare_mem='4Gb')
52 | 
53 |     Parameters
54 |     ----------
55 |     spare_mem : String, optional
56 |         The amount of memory, in Gb, to leave for the notebook to run.
57 |         This memory will not be used by the cluster. e.g '3Gb'
58 |     display_client : Bool, optional
59 |         An optional boolean indicating whether to display a summary of
60 |         the dask client, including a link to monitor progress of the
61 |         analysis. Set to False to hide this display.
62 | 
63 |     """
64 | 
65 |     if _HAVE_PROXY:
66 |         # Configure dashboard link to go over proxy
67 |         prefix = os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/')
68 |         dask.config.set({"distributed.dashboard.link":
69 |                          prefix + "proxy/{port}/status"})
70 | 
71 |     # Start up a local cluster
72 |     client = start_local_dask(mem_safety_margin=spare_mem)
73 | 
74 |     if _IS_AWS:
75 |         # Configure GDAL for s3 access
76 |         configure_s3_access(aws_unsigned=True,
77 |                             client=client)
78 | 
79 |     # Show the dask cluster settings
80 |     if display_client:
81 |         display(client)
82 | 


--------------------------------------------------------------------------------
/notebooks/scripts/dea_temporaltools.py:
--------------------------------------------------------------------------------
  1 | ## dea_temporaltools.py
  2 | '''
  3 | Description: This file contains a set of python functions for conducting
  4 | temporal (time-domain) analyses on Digital Earth Australia data.
  5 | 
  6 | License: The code in this notebook is licensed under the Apache License,
  7 | Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0). Digital Earth
  8 | Australia data is licensed under the Creative Commons by Attribution 4.0
  9 | license (https://creativecommons.org/licenses/by/4.0/).
 10 | 
 11 | Contact: If you need assistance, please post a question on the Open Data
 12 | Cube Slack channel (http://slack.opendatacube.org/) or on the GIS Stack
 13 | Exchange (https://gis.stackexchange.com/questions/ask?tags=open-data-cube)
 14 | using the `open-data-cube` tag (you can view previously asked questions
 15 | here: https://gis.stackexchange.com/questions/tagged/open-data-cube).
 16 | 
 17 | If you would like to report an issue with this script, file one on
 18 | Github: https://github.com/GeoscienceAustralia/dea-notebooks/issues/new
 19 | 
 20 | Functions included:
 21 |     time_buffer
 22 | 
 23 | Last modified: September 2020
 24 | 
 25 | '''
 26 | 
 27 | # Import required packages
 28 | import numpy as np
 29 | import pandas as pd
 30 | import scipy.signal
 31 | 
 32 | 
 33 | def time_buffer(input_date, buffer='30 days', output_format='%Y-%m-%d'):
 34 | 
 35 |     """
 36 |     Create a buffer of a given duration (e.g. days) around a time query. 
 37 |     Output is a string in the correct format for a datacube query.
 38 | 
 39 |     Parameters
 40 |     ----------
 41 |     input_date : str, yyyy-mm-dd
 42 |         Time to buffer
 43 |     buffer : str, optional
 44 |         Default is '30 days', can be any string supported by the 
 45 |         `pandas.Timedelta` function 
 46 |     output_format : str, optional
 47 |         Optional string giving the `strftime` format used to convert
 48 |         buffered times to strings; defaults to '%Y-%m-%d' 
 49 |         (e.g. '2017-12-02')
 50 |             
 51 |     Returns
 52 |     -------
 53 |     early_buffer, late_buffer : str
 54 |         A tuple of strings to pass to the datacube query function
 55 |         e.g. `('2017-12-02', '2018-01-31')` for input 
 56 |         `input_date='2018-01-01'` and `buffer='30 days'`  
 57 |     """
 58 |     # Use assertions to check we have the correct function input
 59 |     assert isinstance(input_date, str), "Input date must be a string in quotes in 'yyyy-mm-dd' format"
 60 |     assert isinstance(buffer, str), "Buffer must be a string supported by `pandas.Timedelta`, e.g. '5 days'"
 61 |     
 62 |     # Convert inputs to pandas format
 63 |     buffer = pd.Timedelta(buffer)
 64 |     input_date = pd.to_datetime(input_date)
 65 |     
 66 |     # Apply buffer
 67 |     early_buffer = input_date - buffer
 68 |     late_buffer = input_date + buffer
 69 |     
 70 |     # Convert back to string using strftime
 71 |     early_buffer = early_buffer.strftime(output_format)
 72 |     late_buffer = late_buffer.strftime(output_format)
 73 |     
 74 |     return early_buffer, late_buffer
 75 | 
 76 | 
 77 | def calculate_vector_stat(
 78 |     vec: "data dim",
 79 |     stat: "data dim -> target dim",
 80 |     window_size=365,
 81 |     step=10,
 82 |     target_dim=365,
 83 |     progress=None,
 84 |     window="hann",
 85 | ):
 86 |     """Calculates a vector statistic over a rolling window.
 87 |     
 88 |     Parameters
 89 |     ----------
 90 |     vec : d-dimensional np.ndarray
 91 |         Vector to calculate over, e.g. a time series.
 92 |     stat : R^d -> R^t function
 93 |         Statistic function.
 94 |     window_size : int
 95 |         Sliding window size (default 365).
 96 |     step : int
 97 |         Step size (default 10).
 98 |     target_dim : int
 99 |         Dimensionality of the output of `stat` (default 365).
100 |     progress : iterator -> iterator
101 |         Optional progress decorator, e.g. tqdm.notebook.tqdm. Default None.
102 |     window : str
103 |         What kind of window function to use. Default 'hann', but you might
104 |         also want to use 'boxcar'. Any scipy window
105 |         function is allowed (see documentation for scipy.signal.get_window
106 |         for more information).
107 |         
108 |     Returns
109 |     -------
110 |     (d / step)-dimensional np.ndarray
111 |         y values (the time axis)
112 |     t-dimensional np.ndarray
113 |         x values (the statistic axis)
114 |     (d / step) x t-dimensional np.ndarray
115 |         The vector statistic array.
116 |     """
117 |     # Initialise output array.
118 |     spectrogram_values = np.zeros((vec.shape[0] // step, target_dim))
119 | 
120 |     # Apply the progress decorator, if specified.
121 |     r = range(0, vec.shape[0] - window_size, step)
122 |     if progress:
123 |         r = progress(r)
124 | 
125 |     # Convert the window str argument into a window function.
126 |     window = scipy.signal.get_window(window, window_size)
127 | 
128 |     # Iterate over the sliding window and compute the statistic.
129 |     for base in r:
130 |         win = vec[base : base + window_size] * window
131 |         sad = stat(win)
132 |         spectrogram_values[base // step, :] = sad
133 | 
134 |     return (
135 |         np.linspace(0, vec.shape[0], vec.shape[0] // step, endpoint=False),
136 |         np.arange(target_dim),
137 |         spectrogram_values,
138 |     )


--------------------------------------------------------------------------------
/notebooks/scripts/dea_waterbodies.py:
--------------------------------------------------------------------------------
  1 | ## dea_waterbodies.py
  2 | """
  3 | Description: This file contains a set of python functions for loading
  4 | and processing DEA Waterbodies.
  5 | 
  6 | License: The code in this notebook is licensed under the Apache License, 
  7 | Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0). Digital Earth 
  8 | Australia data is licensed under the Creative Commons by Attribution 4.0 
  9 | license (https://creativecommons.org/licenses/by/4.0/).
 10 | 
 11 | Contact: If you need assistance, please post a question on the Open Data 
 12 | Cube Slack channel (http://slack.opendatacube.org/) or on the GIS Stack 
 13 | Exchange (https://gis.stackexchange.com/questions/ask?tags=open-data-cube) 
 14 | using the `open-data-cube` tag (you can view previously asked questions 
 15 | here: https://gis.stackexchange.com/questions/tagged/open-data-cube). 
 16 | 
 17 | If you would like to report an issue with this script, file one on 
 18 | Github: https://github.com/GeoscienceAustralia/dea-notebooks/issues/new
 19 | 
 20 | Functions included:
 21 |     get_waterbody
 22 |     get_waterbodies
 23 |     get_geohashes
 24 |     get_time_series
 25 | 
 26 | Last modified: November 2020
 27 | """
 28 | 
 29 | import geopandas as gpd
 30 | from owslib.wfs import WebFeatureService
 31 | from owslib.fes import PropertyIsEqualTo
 32 | from owslib.etree import etree
 33 | import pandas as pd
 34 | 
 35 | WFS_ADDRESS = "https://geoserver.dea.ga.gov.au/geoserver/wfs"
 36 | 
 37 | 
 38 | def get_waterbody(geohash: str) -> gpd.GeoDataFrame:
 39 |     """Gets a waterbody polygon and metadata by geohash.
 40 |     
 41 |     Parameters
 42 |     ----------
 43 |     geohash : str
 44 |         The geohash/UID for a waterbody in DEA Waterbodies.
 45 |     
 46 |     Returns
 47 |     -------
 48 |     gpd.GeoDataFrame
 49 |         A GeoDataFrame with the polygon.
 50 |     """
 51 |     wfs = WebFeatureService(url=WFS_ADDRESS, version="1.1.0")
 52 |     filter_ = PropertyIsEqualTo(propertyname="uid", literal=geohash)
 53 |     filterxml = etree.tostring(filter_.toXML()).decode("utf-8")
 54 |     response = wfs.getfeature(
 55 |         typename="DigitalEarthAustraliaWaterbodies",
 56 |         filter=filterxml,
 57 |         outputFormat="json",
 58 |     )
 59 |     wb_gpd = gpd.read_file(response)
 60 |     return wb_gpd
 61 | 
 62 | 
 63 | def get_waterbodies(bbox: tuple, crs="EPSG:4326") -> gpd.GeoDataFrame:
 64 |     """Gets the polygons and metadata for multiple waterbodies by bbox.
 65 |     
 66 |     Parameters
 67 |     ----------
 68 |     bbox : (xmin, ymin, xmax, ymax)
 69 |         Bounding box.
 70 |     crs : str
 71 |         Optional CRS for the bounding box.
 72 |     
 73 |     Returns
 74 |     -------
 75 |     gpd.GeoDataFrame
 76 |         A GeoDataFrame with the polygons and metadata.
 77 |     """
 78 |     wfs = WebFeatureService(url=WFS_ADDRESS, version="1.1.0")
 79 |     response = wfs.getfeature(
 80 |         typename="DigitalEarthAustraliaWaterbodies",
 81 |         bbox=tuple(bbox) + (crs,),
 82 |         outputFormat="json",
 83 |     )
 84 |     wb_gpd = gpd.read_file(response)
 85 |     return wb_gpd
 86 | 
 87 | 
 88 | def get_geohashes(bbox: tuple = None, crs: str = "EPSG:4326") -> [str]:
 89 |     """Gets all waterbody geohashes.
 90 |     
 91 |     Parameters
 92 |     ----------
 93 |     bbox : (xmin, ymin, xmax, ymax)
 94 |         Optional bounding box.
 95 |     crs : str
 96 |         Optional CRS for the bounding box.
 97 |     
 98 |     Returns
 99 |     -------
100 |     [str]
101 |         A list of geohashes.
102 |     """
103 |     wfs = WebFeatureService(url=WFS_ADDRESS, version="1.1.0")
104 |     if bbox is not None:
105 |         bbox = tuple(bbox) + (crs,)
106 |     response = wfs.getfeature(
107 |         typename="DigitalEarthAustraliaWaterbodies",
108 |         propertyname="uid",
109 |         outputFormat="json",
110 |         bbox=bbox,
111 |     )
112 |     wb_gpd = gpd.read_file(response)
113 |     return list(wb_gpd["uid"])
114 | 
115 | 
116 | def get_time_series(geohash: str = None, waterbody: pd.Series = None) -> pd.DataFrame:
117 |     """Gets the time series for a waterbody. Specify either a GeoDataFrame row or a geohash.
118 |     
119 |     Parameters
120 |     ----------
121 |     geohash : str
122 |         The geohash/UID for a waterbody in DEA Waterbodies.
123 |     waterbody : pd.Series
124 |         One row of a GeoDataFrame representing a waterbody.
125 |     
126 |     Returns
127 |     -------
128 |     pd.DataFrame
129 |         A time series for the waterbody.
130 |     """
131 |     if waterbody is not None and geohash is not None:
132 |         raise ValueError("One of waterbody and geohash must be None")
133 |     if waterbody is None and geohash is None:
134 |         raise ValueError("One of waterbody and geohash must be specified")
135 | 
136 |     if geohash is not None:
137 |         wb = get_waterbody(geohash)
138 |         url = wb.timeseries[0]
139 |     else:
140 |         url = waterbody.timeseries
141 |     wb_timeseries = pd.read_csv(url)
142 |     # Tidy up the dataframe.
143 |     wb_timeseries.dropna(inplace=True)
144 |     wb_timeseries.columns = ["date", "pc_wet", "px_wet"]
145 |     wb_timeseries = wb_timeseries.set_index("date")
146 |     wb_timeseries.index = pd.to_datetime(wb_timeseries.index)
147 |     return wb_timeseries
148 | 


--------------------------------------------------------------------------------
/parameters.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "ParameterKey": "KeyName",
 4 |     "ParameterValue": "aleith"
 5 |   },
 6 |   {
 7 |     "ParameterKey": "InstanceType",
 8 |     "ParameterValue": "t2.large"
 9 |   },
10 |   {
11 |     "ParameterKey": "SecretPassword",
12 |     "ParameterValue": "lemmein"
13 |   }
14 | ]
15 | 


--------------------------------------------------------------------------------
/scripts/data/add_azure_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | datacube metadata add eo3_landsat_ard.odc-type.yaml
 4 | datacube metadata add eo_plus.odc-type.yaml
 5 | datacube product add ga_ls7e_ard_3.odc-product.yaml
 6 | datacube product add ga_ls8c_ard_3.odc-product.yaml
 7 | datacube product add ga_s2a_ard_nbar_granule.odc-product.yaml
 8 | datacube product add ga_s2b_ard_nbar_granule.odc-product.yaml
 9 | datacube product add linescan.odc-product.yaml
10 | datacube product add esa_s1_rtc.odc-product.yaml
11 | 
12 | dc-index-from-tar --protocol https --ignore-lineage -p "ga_ls7e_ard_3" -p "ga_ls8c_ard_3" ls78.tar.gz
13 | dc-index-from-tar --protocol https --ignore-lineage -p "ga_s2a_ard_nbar_granule" -p "ga_s2b_ard_nbar_granule" s2ab.tar.gz
14 | dc-index-from-tar --protocol https --ignore-lineage -p "linescan" linescan.tar.gz
15 | dc-index-from-tar --protocol https --ignore-lineage --stac -p "s1_rtc" sentinel-1.tar.gz
16 | 


--------------------------------------------------------------------------------
/scripts/data/eo3_landsat_ard.odc-type.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | # Metadata Type
  3 | # url: https://explorer.sandbox.dea.ga.gov.au/metadata-type/eo3_landsat_ard.odc-type.yaml
  4 | name: eo3_landsat_ard
  5 | description: EO3 for ARD Landsat Collection 3
  6 | dataset:
  7 |   id:
  8 |   - id
  9 |   label:
 10 |   - label
 11 |   format:
 12 |   - properties
 13 |   - odc:file_format
 14 |   sources:
 15 |   - lineage
 16 |   - source_datasets
 17 |   creation_dt:
 18 |   - properties
 19 |   - odc:processing_datetime
 20 |   grid_spatial:
 21 |   - grid_spatial
 22 |   - projection
 23 |   measurements:
 24 |   - measurements
 25 |   search_fields:
 26 |     gqa:
 27 |       type: double
 28 |       offset:
 29 |       - properties
 30 |       - gqa:cep90
 31 |       description: GQA Circular error probable (90%)
 32 |     lat:
 33 |       type: double-range
 34 |       max_offset:
 35 |       - - extent
 36 |         - lat
 37 |         - end
 38 |       min_offset:
 39 |       - - extent
 40 |         - lat
 41 |         - begin
 42 |       description: Latitude range
 43 |     lon:
 44 |       type: double-range
 45 |       max_offset:
 46 |       - - extent
 47 |         - lon
 48 |         - end
 49 |       min_offset:
 50 |       - - extent
 51 |         - lon
 52 |         - begin
 53 |       description: Longitude range
 54 |     time:
 55 |       type: datetime-range
 56 |       max_offset:
 57 |       - - properties
 58 |         - dtr:end_datetime
 59 |       - - properties
 60 |         - datetime
 61 |       min_offset:
 62 |       - - properties
 63 |         - dtr:start_datetime
 64 |       - - properties
 65 |         - datetime
 66 |       description: Acquisition time range
 67 |     eo_gsd:
 68 |       type: double
 69 |       offset:
 70 |       - properties
 71 |       - eo:gsd
 72 |       indexed: false
 73 |       description: Ground sample distance, meters
 74 |     platform:
 75 |       offset:
 76 |       - properties
 77 |       - eo:platform
 78 |       indexed: false
 79 |       description: Platform code
 80 |     gqa_abs_x:
 81 |       type: double
 82 |       offset:
 83 |       - properties
 84 |       - gqa:abs_x
 85 |       indexed: false
 86 |       description: 'TODO: <gqa:abs_x>'
 87 |     gqa_abs_y:
 88 |       type: double
 89 |       offset:
 90 |       - properties
 91 |       - gqa:abs_y
 92 |       indexed: false
 93 |       description: 'TODO: <gqa:abs_y>'
 94 |     gqa_cep90:
 95 |       type: double
 96 |       offset:
 97 |       - properties
 98 |       - gqa:cep90
 99 |       indexed: false
100 |       description: 'TODO: <gqa:cep90>'
101 |     fmask_snow:
102 |       type: double
103 |       offset:
104 |       - properties
105 |       - fmask:snow
106 |       indexed: false
107 |       description: 'TODO: <fmask:snow>'
108 |     gqa_abs_xy:
109 |       type: double
110 |       offset:
111 |       - properties
112 |       - gqa:abs_xy
113 |       indexed: false
114 |       description: 'TODO: <gqa:abs_xy>'
115 |     gqa_mean_x:
116 |       type: double
117 |       offset:
118 |       - properties
119 |       - gqa:mean_x
120 |       indexed: false
121 |       description: 'TODO: <gqa:mean_x>'
122 |     gqa_mean_y:
123 |       type: double
124 |       offset:
125 |       - properties
126 |       - gqa:mean_y
127 |       indexed: false
128 |       description: 'TODO: <gqa:mean_y>'
129 |     instrument:
130 |       offset:
131 |       - properties
132 |       - eo:instrument
133 |       indexed: false
134 |       description: Instrument name
135 |     cloud_cover:
136 |       type: double
137 |       offset:
138 |       - properties
139 |       - eo:cloud_cover
140 |       description: Cloud cover percentage [0, 100]
141 |     fmask_clear:
142 |       type: double
143 |       offset:
144 |       - properties
145 |       - fmask:clear
146 |       indexed: false
147 |       description: 'TODO: <fmask:clear>'
148 |     fmask_water:
149 |       type: double
150 |       offset:
151 |       - properties
152 |       - fmask:water
153 |       indexed: false
154 |       description: 'TODO: <fmask:water>'
155 |     gqa_mean_xy:
156 |       type: double
157 |       offset:
158 |       - properties
159 |       - gqa:mean_xy
160 |       indexed: false
161 |       description: 'TODO: <gqa:mean_xy>'
162 |     region_code:
163 |       offset:
164 |       - properties
165 |       - odc:region_code
166 |       description: "Spatial reference code from the provider. For Landsat region_code\
167 |         \ is a scene path row:\n        '{:03d}{:03d}.format(path,row)'\nFor Sentinel\
168 |         \ it is MGRS code. In general it is a unique string identifier that datasets\
169 |         \ covering roughly the same spatial region share.\n"
170 |     gqa_stddev_x:
171 |       type: double
172 |       offset:
173 |       - properties
174 |       - gqa:stddev_x
175 |       indexed: false
176 |       description: 'TODO: <gqa:stddev_x>'
177 |     gqa_stddev_y:
178 |       type: double
179 |       offset:
180 |       - properties
181 |       - gqa:stddev_y
182 |       indexed: false
183 |       description: 'TODO: <gqa:stddev_y>'
184 |     gqa_stddev_xy:
185 |       type: double
186 |       offset:
187 |       - properties
188 |       - gqa:stddev_xy
189 |       indexed: false
190 |       description: 'TODO: <gqa:stddev_xy>'
191 |     eo_sun_azimuth:
192 |       type: double
193 |       offset:
194 |       - properties
195 |       - eo:sun_azimuth
196 |       indexed: false
197 |       description: 'TODO: <eo:sun_azimuth>'
198 |     product_family:
199 |       offset:
200 |       - properties
201 |       - odc:product_family
202 |       indexed: false
203 |       description: Product family code
204 |     dataset_maturity:
205 |       offset:
206 |       - properties
207 |       - dea:dataset_maturity
208 |       description: One of - final|interim|nrt  (near real time)
209 |     eo_sun_elevation:
210 |       type: double
211 |       offset:
212 |       - properties
213 |       - eo:sun_elevation
214 |       indexed: false
215 |       description: 'TODO: <eo:sun_elevation>'
216 |     fmask_cloud_shadow:
217 |       type: double
218 |       offset:
219 |       - properties
220 |       - fmask:cloud_shadow
221 |       indexed: false
222 |       description: 'TODO: <fmask:cloud_shadow>'
223 |     gqa_iterative_mean_x:
224 |       type: double
225 |       offset:
226 |       - properties
227 |       - gqa:iterative_mean_x
228 |       indexed: false
229 |       description: 'TODO: <gqa:iterative_mean_x>'
230 |     gqa_iterative_mean_y:
231 |       type: double
232 |       offset:
233 |       - properties
234 |       - gqa:iterative_mean_y
235 |       indexed: false
236 |       description: 'TODO: <gqa:iterative_mean_y>'
237 |     gqa_iterative_mean_xy:
238 |       type: double
239 |       offset:
240 |       - properties
241 |       - gqa:iterative_mean_xy
242 |       indexed: false
243 |       description: 'TODO: <gqa:iterative_mean_xy>'
244 |     gqa_iterative_stddev_x:
245 |       type: double
246 |       offset:
247 |       - properties
248 |       - gqa:iterative_stddev_x
249 |       indexed: false
250 |       description: 'TODO: <gqa:iterative_stddev_x>'
251 |     gqa_iterative_stddev_y:
252 |       type: double
253 |       offset:
254 |       - properties
255 |       - gqa:iterative_stddev_y
256 |       indexed: false
257 |       description: 'TODO: <gqa:iterative_stddev_y>'
258 |     gqa_iterative_stddev_xy:
259 |       type: double
260 |       offset:
261 |       - properties
262 |       - gqa:iterative_stddev_xy
263 |       indexed: false
264 |       description: 'TODO: <gqa:iterative_stddev_xy>'
265 |     gqa_abs_iterative_mean_x:
266 |       type: double
267 |       offset:
268 |       - properties
269 |       - gqa:abs_iterative_mean_x
270 |       indexed: false
271 |       description: 'TODO: <gqa:abs_iterative_mean_x>'
272 |     gqa_abs_iterative_mean_y:
273 |       type: double
274 |       offset:
275 |       - properties
276 |       - gqa:abs_iterative_mean_y
277 |       indexed: false
278 |       description: 'TODO: <gqa:abs_iterative_mean_y>'
279 |     gqa_abs_iterative_mean_xy:
280 |       type: double
281 |       offset:
282 |       - properties
283 |       - gqa:abs_iterative_mean_xy
284 |       indexed: false
285 |       description: 'TODO: <gqa:abs_iterative_mean_xy>'
286 | ...
287 | 


--------------------------------------------------------------------------------
/scripts/data/eo_plus.odc-type.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | # Metadata Type
  3 | # url: https://explorer.sandbox.dea.ga.gov.au/metadata-type/eo_plus.odc-type.yaml
  4 | name: eo_plus
  5 | description: EO metadata for DEA products with GQA.
  6 | dataset:
  7 |   id:
  8 |   - id
  9 |   label:
 10 |   - ga_label
 11 |   format:
 12 |   - format
 13 |   - name
 14 |   sources:
 15 |   - lineage
 16 |   - source_datasets
 17 |   creation_dt:
 18 |   - system_information
 19 |   - time_processed
 20 |   grid_spatial:
 21 |   - grid_spatial
 22 |   - projection
 23 |   measurements:
 24 |   - image
 25 |   - bands
 26 |   search_fields:
 27 |     gqa:
 28 |       type: double
 29 |       offset:
 30 |       - gqa
 31 |       - cep90
 32 |       indexed: false
 33 |       description: GQA circular error probable (90%)
 34 |     lat:
 35 |       type: double-range
 36 |       max_offset:
 37 |       - - extent
 38 |         - coord
 39 |         - ur
 40 |         - lat
 41 |       - - extent
 42 |         - coord
 43 |         - lr
 44 |         - lat
 45 |       - - extent
 46 |         - coord
 47 |         - ul
 48 |         - lat
 49 |       - - extent
 50 |         - coord
 51 |         - ll
 52 |         - lat
 53 |       min_offset:
 54 |       - - extent
 55 |         - coord
 56 |         - ur
 57 |         - lat
 58 |       - - extent
 59 |         - coord
 60 |         - lr
 61 |         - lat
 62 |       - - extent
 63 |         - coord
 64 |         - ul
 65 |         - lat
 66 |       - - extent
 67 |         - coord
 68 |         - ll
 69 |         - lat
 70 |       description: Latitude range
 71 |     lon:
 72 |       type: double-range
 73 |       max_offset:
 74 |       - - extent
 75 |         - coord
 76 |         - ul
 77 |         - lon
 78 |       - - extent
 79 |         - coord
 80 |         - ur
 81 |         - lon
 82 |       - - extent
 83 |         - coord
 84 |         - ll
 85 |         - lon
 86 |       - - extent
 87 |         - coord
 88 |         - lr
 89 |         - lon
 90 |       min_offset:
 91 |       - - extent
 92 |         - coord
 93 |         - ul
 94 |         - lon
 95 |       - - extent
 96 |         - coord
 97 |         - ur
 98 |         - lon
 99 |       - - extent
100 |         - coord
101 |         - ll
102 |         - lon
103 |       - - extent
104 |         - coord
105 |         - lr
106 |         - lon
107 |       description: Longitude range
108 |     time:
109 |       type: datetime-range
110 |       max_offset:
111 |       - - extent
112 |         - to_dt
113 |       min_offset:
114 |       - - extent
115 |         - from_dt
116 |       description: Acquisition time
117 |     format:
118 |       offset:
119 |       - format
120 |       - name
121 |       indexed: false
122 |       description: File format (GeoTIFF, NetCDF)
123 |     platform:
124 |       offset:
125 |       - platform
126 |       - code
127 |       description: Platform code
128 |     gqa_cep90:
129 |       type: double
130 |       offset:
131 |       - gqa
132 |       - residual
133 |       - cep90
134 |       indexed: false
135 |       description: Circular error probable (90%) of the values of the GCP residuals
136 |     gqa_abs_xy:
137 |       type: double
138 |       offset:
139 |       - gqa
140 |       - residual
141 |       - abs
142 |       - xy
143 |       indexed: false
144 |       description: Absolute value of the total GCP residual
145 |     instrument:
146 |       offset:
147 |       - instrument
148 |       - name
149 |       description: Instrument name
150 |     gqa_mean_xy:
151 |       type: double
152 |       offset:
153 |       - gqa
154 |       - residual
155 |       - mean
156 |       - xy
157 |       indexed: false
158 |       description: Mean of the values of the GCP residuals
159 |     region_code:
160 |       offset:
161 |       - provider
162 |       - reference_code
163 |       description: Spatial reference code from the provider
164 |     product_type:
165 |       offset:
166 |       - product_type
167 |       description: Product code
168 |     gqa_stddev_xy:
169 |       type: double
170 |       offset:
171 |       - gqa
172 |       - residual
173 |       - stddev
174 |       - xy
175 |       indexed: false
176 |       description: Standard Deviation of the values of the GCP residuals
177 |     gqa_ref_source:
178 |       offset:
179 |       - gqa
180 |       - ref_source
181 |       indexed: false
182 |       description: GQA reference imagery collection name
183 |     gqa_error_message:
184 |       offset:
185 |       - gqa
186 |       - error_message
187 |       indexed: false
188 |       description: GQA error message
189 |     gqa_final_qa_count:
190 |       type: integer
191 |       offset:
192 |       - gqa
193 |       - final_qa_count
194 |       indexed: false
195 |       description: GQA QA point count
196 |     gqa_iterative_mean_xy:
197 |       type: double
198 |       offset:
199 |       - gqa
200 |       - residual
201 |       - iterative_mean
202 |       - xy
203 |       indexed: false
204 |       description: Mean of the values of the GCP residuals after removal of outliers
205 |     gqa_iterative_stddev_xy:
206 |       type: double
207 |       offset:
208 |       - gqa
209 |       - residual
210 |       - iterative_stddev
211 |       - xy
212 |       indexed: false
213 |       description: Standard Deviation of the values of the GCP residuals after removal
214 |         of outliers
215 |     gqa_abs_iterative_mean_xy:
216 |       type: double
217 |       offset:
218 |       - gqa
219 |       - residual
220 |       - abs_iterative_mean
221 |       - xy
222 |       indexed: false
223 |       description: Mean of the absolute values of the GCP residuals after removal
224 |         of outliers
225 | ...
226 | 


--------------------------------------------------------------------------------
/scripts/data/esa_s1_rtc.odc-product.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: s1_rtc
 3 | description: Sentinel 1 Gamma0 normalised radar backscatter
 4 | metadata_type: eo3
 5 | 
 6 | license: CC-BY-4.0
 7 | 
 8 | metadata:
 9 |   product:
10 |     name: s1_rtc
11 | 
12 | storage:
13 |   crs: EPSG:4326
14 |   resolution:
15 |     longitude: 0.0002
16 |     latitude: -0.0002
17 | 
18 | measurements:
19 |   - name: "vv"
20 |     aliases: [VV]
21 |     units: "1"
22 |     dtype: float32
23 |     nodata: .nan
24 | 
25 |   - name: "vh"
26 |     aliases: [VH]
27 |     units: "1"
28 |     dtype: float32
29 |     nodata: .nan
30 | 
31 |   - name: "angle"
32 |     aliases: [ANGLE, local_incidence_angle]
33 |     units: "1"
34 |     dtype: uint8
35 |     nodata: 255
36 | 
37 |   - name: "area"
38 |     aliases: [AREA, normalised_scattering_area]
39 |     units: "1"
40 |     dtype: float32
41 |     nodata: .nan
42 | 
43 |   - name: "mask"
44 |     aliases: [MASK]
45 |     units: "1"
46 |     dtype: uint8
47 |     nodata: 0
48 |     flags_definition:
49 |       qa:
50 |         bits: [0, 1, 2, 3, 4, 5, 6, 7]
51 |         values:
52 |           0: no data
53 |           1: valid data
54 |           2: invalid data
55 | 


--------------------------------------------------------------------------------
/scripts/data/ga_ls7e_ard_3.odc-product.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | # Product
  3 | # url: https://explorer.sandbox.dea.ga.gov.au/product/ga_ls7e_ard_3.odc-product.yaml
  4 | name: ga_ls7e_ard_3
  5 | license: CC-BY-4.0
  6 | metadata_type: eo3_landsat_ard
  7 | description: Geoscience Australia Landsat 7 Enhanced Thematic Mapper Plus Analysis
  8 |   Ready Data Collection 3
  9 | metadata:
 10 |   product:
 11 |     name: ga_ls7e_ard_3
 12 |   properties:
 13 |     eo:platform: landsat-7
 14 |     odc:producer: ga.gov.au
 15 |     eo:instrument: ETM
 16 |     odc:product_family: ard
 17 |     landsat:collection_number: 1
 18 | measurements:
 19 | - name: nbart_blue
 20 |   dtype: int16
 21 |   units: '1'
 22 |   nodata: -999
 23 |   aliases:
 24 |   - nbart_band01
 25 |   - blue
 26 | - name: nbart_green
 27 |   dtype: int16
 28 |   units: '1'
 29 |   nodata: -999
 30 |   aliases:
 31 |   - nbart_band02
 32 |   - green
 33 | - name: nbart_red
 34 |   dtype: int16
 35 |   units: '1'
 36 |   nodata: -999
 37 |   aliases:
 38 |   - nbart_band03
 39 |   - red
 40 | - name: nbart_nir
 41 |   dtype: int16
 42 |   units: '1'
 43 |   nodata: -999
 44 |   aliases:
 45 |   - nbart_band04
 46 |   - red
 47 | - name: nbart_swir_1
 48 |   dtype: int16
 49 |   units: '1'
 50 |   nodata: -999
 51 |   aliases:
 52 |   - nbart_band05
 53 |   - swir_1
 54 |   - swir1
 55 | - name: nbart_swir_2
 56 |   dtype: int16
 57 |   units: '1'
 58 |   nodata: -999
 59 |   aliases:
 60 |   - nbart_band07
 61 |   - swir_2
 62 |   - swir2
 63 | - name: nbart_panchromatic
 64 |   dtype: int16
 65 |   units: '1'
 66 |   nodata: -999
 67 |   aliases:
 68 |   - nbart_band08
 69 |   - panchromatic
 70 | - name: oa_fmask
 71 |   dtype: uint8
 72 |   units: '1'
 73 |   nodata: 0
 74 |   aliases:
 75 |   - fmask
 76 |   flags_definition:
 77 |     fmask:
 78 |       bits:
 79 |       - 0
 80 |       - 1
 81 |       - 2
 82 |       - 3
 83 |       - 4
 84 |       - 5
 85 |       - 6
 86 |       - 7
 87 |       values:
 88 |         '0': nodata
 89 |         '1': valid
 90 |         '2': cloud
 91 |         '3': shadow
 92 |         '4': snow
 93 |         '5': water
 94 |       description: Fmask
 95 | - name: oa_nbart_contiguity
 96 |   dtype: uint8
 97 |   units: '1'
 98 |   nodata: 255
 99 |   aliases:
100 |   - nbart_contiguity
101 |   flags_definition:
102 |     contiguous:
103 |       bits:
104 |       - 0
105 |       values:
106 |         '0': false
107 |         '1': true
108 | - name: oa_azimuthal_exiting
109 |   dtype: float32
110 |   units: '1'
111 |   nodata: NaN
112 |   aliases:
113 |   - azimuthal_exiting
114 | - name: oa_azimuthal_incident
115 |   dtype: float32
116 |   units: '1'
117 |   nodata: NaN
118 |   aliases:
119 |   - azimuthal_incident
120 | - name: oa_combined_terrain_shadow
121 |   dtype: uint8
122 |   units: '1'
123 |   nodata: 255
124 |   aliases:
125 |   - combined_terrain_shadow
126 | - name: oa_exiting_angle
127 |   dtype: float32
128 |   units: '1'
129 |   nodata: NaN
130 |   aliases:
131 |   - exiting_angle
132 | - name: oa_incident_angle
133 |   dtype: float32
134 |   units: '1'
135 |   nodata: NaN
136 |   aliases:
137 |   - incident_angle
138 | - name: oa_relative_azimuth
139 |   dtype: float32
140 |   units: '1'
141 |   nodata: NaN
142 |   aliases:
143 |   - relative_azimuth
144 | - name: oa_relative_slope
145 |   dtype: float32
146 |   units: '1'
147 |   nodata: NaN
148 |   aliases:
149 |   - relative_slope
150 | - name: oa_satellite_azimuth
151 |   dtype: float32
152 |   units: '1'
153 |   nodata: NaN
154 |   aliases:
155 |   - satellite_azimuth
156 | - name: oa_satellite_view
157 |   dtype: float32
158 |   units: '1'
159 |   nodata: NaN
160 |   aliases:
161 |   - satellite_view
162 | - name: oa_solar_azimuth
163 |   dtype: float32
164 |   units: '1'
165 |   nodata: NaN
166 |   aliases:
167 |   - solar_azimuth
168 | - name: oa_solar_zenith
169 |   dtype: float32
170 |   units: '1'
171 |   nodata: NaN
172 |   aliases:
173 |   - solar_zenith
174 | - name: oa_time_delta
175 |   dtype: float32
176 |   units: '1'
177 |   nodata: NaN
178 |   aliases:
179 |   - time_delta
180 | ...
181 | 


--------------------------------------------------------------------------------
/scripts/data/ga_ls8c_ard_3.odc-product.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | # Product
  3 | # url: https://explorer.sandbox.dea.ga.gov.au/product/ga_ls8c_ard_3.odc-product.yaml
  4 | name: ga_ls8c_ard_3
  5 | license: CC-BY-4.0
  6 | metadata_type: eo3_landsat_ard
  7 | description: Geoscience Australia Landsat 8 Operational Land Imager and Thermal Infra-Red
  8 |   Scanner Analysis Ready Data Collection 3
  9 | metadata:
 10 |   product:
 11 |     name: ga_ls8c_ard_3
 12 |   properties:
 13 |     eo:platform: landsat-8
 14 |     odc:producer: ga.gov.au
 15 |     eo:instrument: OLI_TIRS
 16 |     odc:product_family: ard
 17 |     landsat:collection_number: 1
 18 | measurements:
 19 | - name: nbart_coastal_aerosol
 20 |   dtype: int16
 21 |   units: '1'
 22 |   nodata: -999
 23 |   aliases:
 24 |   - nbart_band01
 25 |   - coastal_aerosol
 26 | - name: nbart_blue
 27 |   dtype: int16
 28 |   units: '1'
 29 |   nodata: -999
 30 |   aliases:
 31 |   - nbart_band02
 32 |   - blue
 33 | - name: nbart_green
 34 |   dtype: int16
 35 |   units: '1'
 36 |   nodata: -999
 37 |   aliases:
 38 |   - nbart_band03
 39 |   - green
 40 | - name: nbart_red
 41 |   dtype: int16
 42 |   units: '1'
 43 |   nodata: -999
 44 |   aliases:
 45 |   - nbart_band04
 46 |   - red
 47 | - name: nbart_nir
 48 |   dtype: int16
 49 |   units: '1'
 50 |   nodata: -999
 51 |   aliases:
 52 |   - nbart_band05
 53 |   - nir
 54 | - name: nbart_swir_1
 55 |   dtype: int16
 56 |   units: '1'
 57 |   nodata: -999
 58 |   aliases:
 59 |   - nbart_band06
 60 |   - swir_1
 61 |   - swir1
 62 | - name: nbart_swir_2
 63 |   dtype: int16
 64 |   units: '1'
 65 |   nodata: -999
 66 |   aliases:
 67 |   - nbart_band07
 68 |   - swir_2
 69 |   - swir2
 70 | - name: nbart_panchromatic
 71 |   dtype: int16
 72 |   units: '1'
 73 |   nodata: -999
 74 |   aliases:
 75 |   - nbart_band08
 76 |   - panchromatic
 77 | - name: oa_fmask
 78 |   dtype: uint8
 79 |   units: '1'
 80 |   nodata: 0
 81 |   aliases:
 82 |   - fmask
 83 |   flags_definition:
 84 |     fmask:
 85 |       bits:
 86 |       - 0
 87 |       - 1
 88 |       - 2
 89 |       - 3
 90 |       - 4
 91 |       - 5
 92 |       - 6
 93 |       - 7
 94 |       values:
 95 |         '0': nodata
 96 |         '1': valid
 97 |         '2': cloud
 98 |         '3': shadow
 99 |         '4': snow
100 |         '5': water
101 |       description: Fmask
102 | - name: oa_nbart_contiguity
103 |   dtype: uint8
104 |   units: '1'
105 |   nodata: 255
106 |   aliases:
107 |   - nbart_contiguity
108 |   flags_definition:
109 |     contiguous:
110 |       bits:
111 |       - 0
112 |       values:
113 |         '0': false
114 |         '1': true
115 | - name: oa_azimuthal_exiting
116 |   dtype: float32
117 |   units: '1'
118 |   nodata: NaN
119 |   aliases:
120 |   - azimuthal_exiting
121 | - name: oa_azimuthal_incident
122 |   dtype: float32
123 |   units: '1'
124 |   nodata: NaN
125 |   aliases:
126 |   - azimuthal_incident
127 | - name: oa_combined_terrain_shadow
128 |   dtype: uint8
129 |   units: '1'
130 |   nodata: 255
131 |   aliases:
132 |   - combined_terrain_shadow
133 | - name: oa_exiting_angle
134 |   dtype: float32
135 |   units: '1'
136 |   nodata: NaN
137 |   aliases:
138 |   - exiting_angle
139 | - name: oa_incident_angle
140 |   dtype: float32
141 |   units: '1'
142 |   nodata: NaN
143 |   aliases:
144 |   - incident_angle
145 | - name: oa_relative_azimuth
146 |   dtype: float32
147 |   units: '1'
148 |   nodata: NaN
149 |   aliases:
150 |   - relative_azimuth
151 | - name: oa_relative_slope
152 |   dtype: float32
153 |   units: '1'
154 |   nodata: NaN
155 |   aliases:
156 |   - relative_slope
157 | - name: oa_satellite_azimuth
158 |   dtype: float32
159 |   units: '1'
160 |   nodata: NaN
161 |   aliases:
162 |   - satellite_azimuth
163 | - name: oa_satellite_view
164 |   dtype: float32
165 |   units: '1'
166 |   nodata: NaN
167 |   aliases:
168 |   - satellite_view
169 | - name: oa_solar_azimuth
170 |   dtype: float32
171 |   units: '1'
172 |   nodata: NaN
173 |   aliases:
174 |   - solar_azimuth
175 | - name: oa_solar_zenith
176 |   dtype: float32
177 |   units: '1'
178 |   nodata: NaN
179 |   aliases:
180 |   - solar_zenith
181 | - name: oa_time_delta
182 |   dtype: float32
183 |   units: '1'
184 |   nodata: NaN
185 |   aliases:
186 |   - time_delta
187 | ...
188 | 


--------------------------------------------------------------------------------
/scripts/data/linescan.odc-product.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: linescan
 3 | description: Auto-generated product example for linescan
 4 | metadata_type: eo3
 5 | 
 6 | metadata:
 7 |   product:
 8 |     name: linescan
 9 | 
10 | measurements:
11 | - name: 'linescan'
12 |   units: '1'
13 |   dtype: 'uint8'
14 |   nodata: 0.0
15 | ...


--------------------------------------------------------------------------------
/scripts/data/linescan.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/scripts/data/linescan.tar.gz


--------------------------------------------------------------------------------
/scripts/data/ls78.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/scripts/data/ls78.tar.gz


--------------------------------------------------------------------------------
/scripts/data/metadata.eo_plus.yaml:
--------------------------------------------------------------------------------
  1 | dataset:
  2 |     creation_dt:
  3 |     - system_information
  4 |     - time_processed
  5 |     format:
  6 |     - format
  7 |     - name
  8 |     grid_spatial:
  9 |     - grid_spatial
 10 |     - projection
 11 |     id:
 12 |     - id
 13 |     label:
 14 |     - ga_label
 15 |     measurements:
 16 |     - image
 17 |     - bands
 18 |     search_fields:
 19 |         format:
 20 |             description: File format (GeoTIFF, NetCDF)
 21 |             indexed: false
 22 |             offset:
 23 |             - format
 24 |             - name
 25 |         gqa:
 26 |             description: GQA circular error probable (90%)
 27 |             indexed: false
 28 |             offset:
 29 |             - gqa
 30 |             - cep90
 31 |             type: double
 32 |         gqa_abs_iterative_mean_xy:
 33 |             description: Mean of the absolute values of the GCP residuals after removal
 34 |                 of outliers
 35 |             indexed: false
 36 |             offset:
 37 |             - gqa
 38 |             - residual
 39 |             - abs_iterative_mean
 40 |             - xy
 41 |             type: double
 42 |         gqa_abs_xy:
 43 |             description: Absolute value of the total GCP residual
 44 |             indexed: false
 45 |             offset:
 46 |             - gqa
 47 |             - residual
 48 |             - abs
 49 |             - xy
 50 |             type: double
 51 |         gqa_cep90:
 52 |             description: Circular error probable (90%) of the values of the GCP residuals
 53 |             indexed: false
 54 |             offset:
 55 |             - gqa
 56 |             - residual
 57 |             - cep90
 58 |             type: double
 59 |         gqa_error_message:
 60 |             description: GQA error message
 61 |             indexed: false
 62 |             offset:
 63 |             - gqa
 64 |             - error_message
 65 |         gqa_final_qa_count:
 66 |             description: GQA QA point count
 67 |             indexed: false
 68 |             offset:
 69 |             - gqa
 70 |             - final_qa_count
 71 |             type: integer
 72 |         gqa_iterative_mean_xy:
 73 |             description: Mean of the values of the GCP residuals after removal of
 74 |                 outliers
 75 |             indexed: false
 76 |             offset:
 77 |             - gqa
 78 |             - residual
 79 |             - iterative_mean
 80 |             - xy
 81 |             type: double
 82 |         gqa_iterative_stddev_xy:
 83 |             description: Standard Deviation of the values of the GCP residuals after
 84 |                 removal of outliers
 85 |             indexed: false
 86 |             offset:
 87 |             - gqa
 88 |             - residual
 89 |             - iterative_stddev
 90 |             - xy
 91 |             type: double
 92 |         gqa_mean_xy:
 93 |             description: Mean of the values of the GCP residuals
 94 |             indexed: false
 95 |             offset:
 96 |             - gqa
 97 |             - residual
 98 |             - mean
 99 |             - xy
100 |             type: double
101 |         gqa_ref_source:
102 |             description: GQA reference imagery collection name
103 |             indexed: false
104 |             offset:
105 |             - gqa
106 |             - ref_source
107 |         gqa_stddev_xy:
108 |             description: Standard Deviation of the values of the GCP residuals
109 |             indexed: false
110 |             offset:
111 |             - gqa
112 |             - residual
113 |             - stddev
114 |             - xy
115 |             type: double
116 |         instrument:
117 |             description: Instrument name
118 |             offset:
119 |             - instrument
120 |             - name
121 |         lat:
122 |             description: Latitude range
123 |             max_offset:
124 |             -   - extent
125 |                 - coord
126 |                 - ur
127 |                 - lat
128 |             -   - extent
129 |                 - coord
130 |                 - lr
131 |                 - lat
132 |             -   - extent
133 |                 - coord
134 |                 - ul
135 |                 - lat
136 |             -   - extent
137 |                 - coord
138 |                 - ll
139 |                 - lat
140 |             min_offset:
141 |             -   - extent
142 |                 - coord
143 |                 - ur
144 |                 - lat
145 |             -   - extent
146 |                 - coord
147 |                 - lr
148 |                 - lat
149 |             -   - extent
150 |                 - coord
151 |                 - ul
152 |                 - lat
153 |             -   - extent
154 |                 - coord
155 |                 - ll
156 |                 - lat
157 |             type: double-range
158 |         lon:
159 |             description: Longitude range
160 |             max_offset:
161 |             -   - extent
162 |                 - coord
163 |                 - ul
164 |                 - lon
165 |             -   - extent
166 |                 - coord
167 |                 - ur
168 |                 - lon
169 |             -   - extent
170 |                 - coord
171 |                 - ll
172 |                 - lon
173 |             -   - extent
174 |                 - coord
175 |                 - lr
176 |                 - lon
177 |             min_offset:
178 |             -   - extent
179 |                 - coord
180 |                 - ul
181 |                 - lon
182 |             -   - extent
183 |                 - coord
184 |                 - ur
185 |                 - lon
186 |             -   - extent
187 |                 - coord
188 |                 - ll
189 |                 - lon
190 |             -   - extent
191 |                 - coord
192 |                 - lr
193 |                 - lon
194 |             type: double-range
195 |         platform:
196 |             description: Platform code
197 |             offset:
198 |             - platform
199 |             - code
200 |         product_type:
201 |             description: Product code
202 |             offset:
203 |             - product_type
204 |         region_code:
205 |             description: Spatial reference code from the provider
206 |             offset:
207 |             - provider
208 |             - reference_code
209 |         time:
210 |             description: Acquisition time
211 |             max_offset:
212 |             -   - extent
213 |                 - to_dt
214 |             min_offset:
215 |             -   - extent
216 |                 - from_dt
217 |             type: datetime-range
218 |     sources:
219 |     - lineage
220 |     - source_datasets
221 | description: EO metadata for DEA products with GQA.
222 | name: eo_plus


--------------------------------------------------------------------------------
/scripts/data/s2ab.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/scripts/data/s2ab.tar.gz


--------------------------------------------------------------------------------
/scripts/data/sentinel-1.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/scripts/data/sentinel-1.tar.gz


--------------------------------------------------------------------------------
/scripts/linescan.odc-product.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: linescan
 3 | description: Auto-generated product example for linescan
 4 | metadata_type: eo3
 5 | 
 6 | metadata:
 7 |   product:
 8 |     name: linescan
 9 | 
10 | measurements:
11 | - name: 'thermal'
12 |   units: '1'
13 |   dtype: 'uint8'
14 |   nodata: 0.0
15 | ...


--------------------------------------------------------------------------------
/scripts/vic-scenes.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EY-Data-Science-Program/2021-Better-Working-World-Data-Challenge/5860d80a3cca9f2b1458e3a0213e4317840e638c/scripts/vic-scenes.tar.gz


--------------------------------------------------------------------------------