├── README.md ├── Scalable Geospatial Analysis.pdf ├── gridded.ipynb ├── raster.ipynb └── tabular.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # scalable-geospatial-with-dask 2 | -------------------------------------------------------------------------------- /Scalable Geospatial Analysis.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomAugspurger/scalable-geospatial-with-dask/2d3b045cd218b62700017f19e081bd65118916c4/Scalable Geospatial Analysis.pdf -------------------------------------------------------------------------------- /raster.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0c2bc39d-4bf2-4bfc-8dc5-1978803d407a", 6 | "metadata": {}, 7 | "source": [ 8 | "# Remote Sensing Data\n", 9 | "\n", 10 | "This notebook works with some [Landsat 8 Collection 2 Level 2](https://planetarycomputer.microsoft.com/dataset/landsat-8-c2-l2) data, hosted by the Planetary Computer. We'll use the Planetary Computer's [STAC endpoint](https://planetarycomputer.microsoft.com/docs/reference/stac/) to query for a specific date range and area of interest, and [stackstac](https://stackstac.readthedocs.io/en/latest/) to construct an [xarray](http://xarray.pydata.org/) DataArray with all of our data.\n", 11 | "\n", 12 | "We'll use [Dask](https://dask.org/) to distribute the computation." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "be3d4c49-ea8e-4828-ae44-d23b4bdd94e0", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import numpy as np\n", 23 | "import xarray as xr\n", 24 | "import rioxarray\n", 25 | "\n", 26 | "import rasterio\n", 27 | "import stackstac\n", 28 | "import pystac_client\n", 29 | "import planetary_computer\n", 30 | "import rasterio.features\n", 31 | "\n", 32 | "import matplotlib.pyplot as plt\n", 33 | "import cartopy.crs\n", 34 | "import matplotlib.animation\n", 35 | "import pandas as pd\n", 36 | "import xrspatial.multispectral\n", 37 | "from IPython.display import GeoJSON\n", 38 | "\n", 39 | "from dask_gateway import GatewayCluster" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "id": "5abda848-2e9f-4423-ba29-9c744850d2af", 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "cluster = GatewayCluster()\n", 50 | "client = cluster.get_client()\n", 51 | "\n", 52 | "cluster.scale(72)\n", 53 | "cluster" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "id": "37f3cac8-631a-4d79-b2b0-9b2b5741936b", 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "aoi = {\n", 64 | " \"type\": \"FeatureCollection\",\n", 65 | " \"features\": [\n", 66 | " {\n", 67 | " \"type\": \"Feature\",\n", 68 | " \"properties\": {},\n", 69 | " \"geometry\": {\n", 70 | " \"type\": \"Polygon\",\n", 71 | " \"coordinates\": [\n", 72 | " [\n", 73 | " [-93.0816650390625, 40.60144147645398],\n", 74 | " [-91.724853515625, 40.60144147645398],\n", 75 | " [-91.724853515625, 41.68111756290652],\n", 76 | " [-93.0816650390625, 41.68111756290652],\n", 77 | " [-93.0816650390625, 40.60144147645398]\n", 78 | " ]\n", 79 | " ]\n", 80 | " }\n", 81 | " }\n", 82 | " ]\n", 83 | "}\n", 84 | "bbox = rasterio.features.bounds(aoi)\n", 85 | "GeoJSON(aoi)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "id": "f638ba96-40a3-4bab-bd7d-48de956b6554", 91 | "metadata": {}, 92 | "source": [ 93 | "With `pystac_client` we can translate our desired \"give me this data covering this area over this time period\" into code quite naturally." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "id": "e4d043ca-fb2c-43af-8458-a7f720ccf080", 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "%%time\n", 104 | "stac = pystac_client.Client.open(\"https://planetarycomputer.microsoft.com/api/stac/v1\")\n", 105 | "\n", 106 | "search = stac.search( # I want...\n", 107 | " collections=[\"landsat-8-c2-l2\"], # this data...\n", 108 | " bbox=bbox, # covering this area...\n", 109 | " datetime=\"2016-01-01/2020-12-31\", # over this time period\n", 110 | " limit=500, # batch size\n", 111 | ")\n", 112 | "items = list(search.items())" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "id": "bd746f61-2381-40ec-9f9e-563fe1ce53e1", 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "search.matched()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "id": "bedbf262-11f6-47ac-bb3f-2406bee6a624", 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "items[0].assets['SR_B3']" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "id": "2b1e1457-fe84-4db9-9087-d02fec252802", 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "items[0].properties" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "id": "fdb4d22f-e972-4185-b9d4-4c88f78208b0", 148 | "metadata": {}, 149 | "source": [ 150 | "The actual items in blob storage aren't publicly readable (we don't want people moving all of Landsat 8 out of Azure West Europe for \"free\"). You just need to \"sign\" the assets. You can sign them anonymously, you'll just be throttled. See https://planetarycomputer.microsoft.com/docs/concepts/sas/ for more." 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "id": "eaa3b21e-32fb-44b2-a67c-cbebeea4ade1", 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "%%time\n", 161 | "items = pystac_client.ItemCollection([\n", 162 | " planetary_computer.sign_assets(item)\n", 163 | " for item in items\n", 164 | " if item.properties[\"eo:cloud_cover\"] < 25 # percent cloudy\n", 165 | "])" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "id": "b6dbc7e8-b689-4334-8041-ac3c7a4628b9", 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "%%time\n", 176 | "ds = (\n", 177 | " stackstac.stack(items, assets=[\"SR_B2\", \"SR_B3\", \"SR_B4\", \"SR_B5\"],\n", 178 | " bounds_latlon=bbox, chunksize=2056)\n", 179 | " .assign_coords(band=[\"blue\", \"green\", \"red\", \"nir\"])\n", 180 | ").persist()\n", 181 | "ds" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "id": "e56aac9d-c6c9-41d8-b665-6150493dd662", 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "median = ds.median(dim=\"time\").persist()\n", 192 | "median" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "id": "09d5a571-9fde-4f90-8126-f18244706d1f", 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "img = xrspatial.multispectral.true_color(\n", 203 | " *median.isel(x=slice(2000, 3000),\n", 204 | " y=slice(2000, 3000)).sel(band=[\"red\", \"green\", \"blue\"])\n", 205 | ").compute()\n", 206 | "\n", 207 | "fig, ax = plt.subplots(figsize=(16, 16),\n", 208 | " subplot_kw=dict(projection=cartopy.crs.AlbersEqualArea()))\n", 209 | "\n", 210 | "image = img.plot.imshow(ax=ax, add_labels=False)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "id": "3adc7dc1-05ac-42a3-8475-c2e519457088", 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "ndvi = xrspatial.ndvi(median.sel(band=\"nir\"), median.sel(band=\"red\")).persist()\n", 221 | "ndvi" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "id": "09b349cb-6e76-4c8d-8ce2-42bee8231774", 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "import ipyleaflet\n", 232 | "\n", 233 | "m = ipyleaflet.Map(center=[41.01447619471347, -92.48968322606777], scroll_wheel_zoom=True)\n", 234 | "m.layout.height = \"600px\"\n", 235 | "stackstac.add_to_map(ndvi, m, name=\"ndvi\", range=(-0.15, 0.5))\n", 236 | "m" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "id": "20fff68f-9127-4f3e-8fa0-016e4d237f9d", 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "ds" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "id": "37e708e7-13aa-493c-ad4a-5fcb282834a1", 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "red = ds.sel(band=\"red\")\n", 257 | "nir = ds.sel(band=\"nir\")\n", 258 | "\n", 259 | "ndvi_ts = (nir - red) / (nir + red)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "id": "c80289ed-5f7b-4f86-88dd-d7bdc24de134", 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "fig, ax = plt.subplots(figsize=(12, 6))\n", 270 | "\n", 271 | "ndvi_ts.median(dim=[\"y\", \"x\"]).plot(ax=ax);" 272 | ] 273 | } 274 | ], 275 | "metadata": { 276 | "kernelspec": { 277 | "display_name": "Python 3", 278 | "language": "python", 279 | "name": "python3" 280 | }, 281 | "language_info": { 282 | "codemirror_mode": { 283 | "name": "ipython", 284 | "version": 3 285 | }, 286 | "file_extension": ".py", 287 | "mimetype": "text/x-python", 288 | "name": "python", 289 | "nbconvert_exporter": "python", 290 | "pygments_lexer": "ipython3", 291 | "version": "3.8.8" 292 | }, 293 | "widgets": { 294 | "application/vnd.jupyter.widget-state+json": { 295 | "state": {}, 296 | "version_major": 2, 297 | "version_minor": 0 298 | } 299 | } 300 | }, 301 | "nbformat": 4, 302 | "nbformat_minor": 5 303 | } 304 | --------------------------------------------------------------------------------