├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── data ├── Delmarva_PL_House_Final2_extent_epsg26918.geojson ├── Delmarva_PL_House_Final2_extent_epsg4326.geojson ├── delmarva_valid_naip_area.geojson ├── karnataka_predictions_polygons_validated_2020.geojson ├── naip_tiles_that_intersect_with_delmarva.txt ├── poultry_barn_6013_random_polygons_epsg26918.geojson ├── poultry_barn_labels.csv ├── solar_farm_labels.csv ├── solar_farms_935_random_polygons_epsg4326.geojson └── solar_farms_valid_s2_area.geojson ├── environment.yml ├── experiments ├── run_parameter_sweep.py └── run_parameter_sweep_color.py ├── images ├── data_examples.png └── sentinel2_demo.gif ├── notebooks ├── Basic library function demos.ipynb ├── Data - Create random polygons.ipynb ├── Demo - Planetary computer dataloader.ipynb ├── Experiments - Color over time.ipynb ├── Experiments - Heuristic method.ipynb ├── Experiments - Learned theta and LR methods.ipynb ├── Utils - Labeling tool.ipynb └── Utils - Make predictions by year.ipynb ├── predictions ├── poultry_barns-64-200_predictions.geojson ├── poultry_barns-64-200_predictions_2011.geojson ├── poultry_barns-64-200_predictions_2012.geojson ├── poultry_barns-64-200_predictions_2013.geojson ├── poultry_barns-64-200_predictions_2014.geojson ├── poultry_barns-64-200_predictions_2015.geojson ├── poultry_barns-64-200_predictions_2016.geojson ├── poultry_barns-64-200_predictions_2017.geojson └── poultry_barns-64-200_predictions_2018.geojson ├── results ├── color │ ├── poultry_barns-0-100 │ │ └── results.csv │ ├── poultry_barns-0-200 │ │ └── results.csv │ ├── poultry_barns-0-400 │ │ └── results.csv │ ├── poultry_barns_random-0-100 │ │ └── results.csv │ ├── poultry_barns_random-0-200 │ │ └── results.csv │ ├── poultry_barns_random-0-400 │ │ └── results.csv │ ├── solar_farms_reduced-0-0.016 │ │ └── results.csv │ ├── solar_farms_reduced-0-0.024 │ │ └── results.csv │ ├── solar_farms_reduced_random-0-0.016 │ │ └── results.csv │ └── solar_farms_reduced_random-0-0.024 │ │ └── results.csv ├── heuristic-theta_results.csv ├── kl │ ├── poultry_barns-128-100 │ │ └── results.csv │ ├── poultry_barns-128-200 │ │ └── results.csv │ ├── poultry_barns-128-400 │ │ └── results.csv │ ├── poultry_barns-16-100 │ │ └── results.csv │ ├── poultry_barns-16-200 │ │ └── results.csv │ ├── poultry_barns-16-400 │ │ └── results.csv │ ├── poultry_barns-16-50 │ │ └── results.csv │ ├── poultry_barns-32-100 │ │ └── results.csv │ ├── poultry_barns-32-200 │ │ └── results.csv │ ├── poultry_barns-32-400 │ │ └── results.csv │ ├── poultry_barns-64-100 │ │ └── results.csv │ ├── poultry_barns-64-200 │ │ └── results.csv │ ├── poultry_barns-64-400 │ │ └── results.csv │ ├── poultry_barns_random-16-100 │ │ └── results.csv │ ├── poultry_barns_random-16-200 │ │ └── results.csv │ ├── poultry_barns_random-16-400 │ │ └── results.csv │ ├── poultry_barns_random-32-100 │ │ └── results.csv │ ├── poultry_barns_random-32-200 │ │ └── results.csv │ ├── poultry_barns_random-32-400 │ │ └── results.csv │ ├── poultry_barns_random-64-100 │ │ └── results.csv │ ├── poultry_barns_random-64-200 │ │ └── results.csv │ ├── poultry_barns_random-64-400 │ │ └── results.csv │ ├── solar_farms_reduced-128-0.016 │ │ └── results.csv │ ├── solar_farms_reduced-128-0.024 │ │ └── results.csv │ ├── solar_farms_reduced-16-0.016 │ │ └── results.csv │ ├── solar_farms_reduced-16-0.024 │ │ └── results.csv │ ├── solar_farms_reduced-32-0.016 │ │ └── results.csv │ ├── solar_farms_reduced-32-0.024 │ │ └── results.csv │ ├── solar_farms_reduced-64-0.016 │ │ └── results.csv │ ├── solar_farms_reduced-64-0.024 │ │ └── results.csv │ ├── solar_farms_reduced_random-16-0.016 │ │ └── results.csv │ ├── solar_farms_reduced_random-16-0.024 │ │ └── results.csv │ ├── solar_farms_reduced_random-32-0.016 │ │ └── results.csv │ ├── solar_farms_reduced_random-32-0.024 │ │ └── results.csv │ ├── solar_farms_reduced_random-64-0.016 │ │ └── results.csv │ └── solar_farms_reduced_random-64-0.024 │ │ └── results.csv ├── learned-theta_lr_results.csv ├── poultry_barn_inter_year_color_distances.npy └── solar_farm_inter_year_color_distances.npy ├── run_algorithm.py └── temporal_cluster_matching ├── DataInterface.py ├── __init__.py ├── algorithms.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .ipynb_checkpoints 3 | __pycache__ 4 | 5 | data/tmp/ 6 | output/ 7 | 8 | data/Delmarva_PL_House_Final2_epsg26918.geojson 9 | data/Delmarva_PL_House_Final2_epsg4326.geojson -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Temporal Cluster Matching for Change Detection of Structures from Satellite Imagery 2 | 3 | **Jump to: [Setup](#setup) | [Overview of the implementation](#overview-of-the-implementation) | [Running experiments](#running-experiments) | [Results](#results) | [Data sources](#data-sources)** 4 | 5 | This repo contains an implementation of the Temporal Cluster Matching (TCM) algorithm and several baselines from the paper ["Temporal Cluster Matching for Change Detection of Structures from Satellite Imagery"](https://arxiv.org/abs/2103.09787). The Temporal Cluster Matching algorithm attempts to determine when a structure was constructed given that structure's footprint at a known point in time, as well as a time series of remotely sensed imagery leading up to that point in time. In the accompanying paper we use this algorithm with a dataset of _poultry barn footprints_ from the Delmarva Peninsula in the US and dataset of _solar farms footprints_ over the Indian state of Karnataka. 6 | 7 | For a quick overview of the functionality implemented in this repo, see [notebooks/Basic library function demos.ipynb](notebooks/Basic%20library%20function%20demos.ipynb). 8 | 9 | If you make use of this implementation in your own project or want to refer to it in a scientific publication, **please consider referencing this GitHub repository and citing our [paper](https://arxiv.org/pdf/2103.09787.pdf)**: 10 | ``` 11 | @inproceedings{robinsonTemporal2021, 12 | author = {Caleb Robinson and Anthony Ortiz and Juan M. Lavista Ferres and Brandon Anderson and Daniel E. Ho}, 13 | title = {Temporal Cluster Matching for Change Detection of Structures from Satellite Imagery}, 14 | year = {2021}, 15 | booktitle={Proceedings of the 4th ACM SIGCAS Conference on Computing and Sustainable Societies}, 16 | } 17 | ``` 18 | 19 | 20 | ## Changelog 21 | 22 | - **7/30/2021** 23 | - Added a dataloader for Sentinel 2 imagery based on the [Planetary Computer](https://planetarycomputer.microsoft.com/). 24 | - Added a notebook for demonstrating the new dataloader. This notebook downloads the time series of non-cloudy Sentinel 2 scenes for an input geometry and allows, for example, more precise change dates to be calculated. 25 | 26 | 27 | - Fixed a bug in `algorithms.calculate_change_values` (if there were empty clusters then KL divergence value could be 'inf'). 28 | 29 | 30 | ## Example figure 31 | 32 | (A and B) Examples of two poultry barn footprints over 5 years of NAIP imagery. We observe inter-year variability of NAIP imagery and the change in the relation of color/texture between the footprint and neighborhood when a footprint is "developed". (C and D) Examples of two solar farm footprints over 5 years of Sentinel 2 imagery. Note, in A we outline the building footprint location in yellow through the entire series of imagery, but omit this outline in remaining rows. 33 | 34 |

35 | 36 |

37 | 38 | 39 | ## Setup 40 | 41 | First, run the following commands to create a conda environment, `tcm`, with the necessary dependencies for running the scripts and notebooks in this repository. 42 | ``` 43 | conda env create -f environment.yml 44 | conda activate tcm 45 | ``` 46 | 47 | Then, download the `Delmarva_PL_House_Final.zip` file from the Soroka and Duren 2020 *Poultry barn* dataset from [here](https://www.sciencebase.gov/catalog/item/5e0a3fcde4b0b207aa0d794e). From the unzipped directory run: 48 | ``` 49 | conda activate tcm 50 | ogr2ogr -of GeoJSON -t_srs epsg:4326 Delmarva_PL_House_Final2_epsg4326.geojson Delmarva_PL_House_Final2.shp 51 | ogr2ogr -of GeoJSON -t_srs epsg:26918 Delmarva_PL_House_Final2_epsg26918.geojson Delmarva_PL_House_Final2.shp 52 | ``` 53 | 54 | Finally, copy the two generated files, `Delmarva_PL_House_Final2_epsg4326.geojson` and `Delmarva_PL_House_Final2_epsg26918.geojson`, to the `data/` directory in this repository. 55 | 56 | 57 | ## Overview of the implementation 58 | 59 | Our implementation of TCM is broken up into a [data loader implementation](temporal_cluster_matching/DataInterface.py) and the [main algorithm implementation](temporal_cluster_matching/algorithms.py). 60 | For each dataset that we would like to run our algorithm on, we must implement a data loader class following the interface defined in the `AbstractDataLoader`. This class is repsonsible for interfacing with the remotely sensed imagery (i.e. grabbing patches of imagery over time around arbitrary footprints). 61 | For example, we have implemented a `NAIPDataLoader` that can load a series of patches of NAIP imagery over time given an input geometry. 62 | With this, the [main algorithm implementation](temporal_cluster_matching/algorithms.py) can be agnostic of the dataset we want to use. 63 | The main algorithm implementation, i.e. the `temporal_cluster_matching.algorithms.calculate_change_values(...)` method, will return a list of KL-divergence values for a single footprint / series of imagery patches, then it is up to the user to use these in a decision function. In the paper we describe the main TCM algorithm with a decision threshold value (theta), as well as baselines that use the KL-divergence values in a logistic regression model. For an example of how these two parts of the implementation fit together, see [notebooks/Basic library function demos.ipynb](notebooks/Basic library function demos.ipynb). 64 | 65 | The `run_algorithm.py` script is used to run the main algorithm implementation over a set of footprints with a given dataloader. For examples of this see the next section on [running experiments](#running-experiments). 66 | 67 | 68 | ## Running experiments 69 | 70 | To reproduce the main experiments we have included in the paper, first generate the KL-divergence values over the footprints for each dataset (as well over random polygons sampled from the respective AOIs): 71 | ``` 72 | python experiments/run_parameter_sweep_color.py # note, this will take a long time 73 | python experiments/run_parameter_sweep.py # note, this will take even longer 74 | ``` 75 | 76 | As these take a long time to run, we have included the results of these runs in the repository already (see `results/`). You can reproduce a single run as: 77 | ``` 78 | python run_algorithm.py --dataset poultry_barns --num_clusters 32 --buffer 400 --output_dir results/kl/poultry_barns-32-400/ --algorithm kl 79 | ``` 80 | 81 | The second step is to generate predictions on when each footprint was constructed. See the following notebooks for examples that do this: 82 | - `notebooks/Experiments - Learned theta and LR methods.ipynb` 83 | - `notebooks/Experiments - Heuristic method.ipynb` 84 | 85 | 86 | ## Results 87 | 88 | As mentioned in the previous section, we have included the results that we generated for the paper. The following is a list of the files containing these results and a brief description of how they were generated / what they contain: 89 | - `results/poultry_barn_inter_year_color_distances.npy` 90 | - Generated by `notebooks/Experiment - Color over time.ipynb`. 91 | - This contains the differences in average footprint color between consecutive dates for each footprint in the poultry barn dataset. 92 | - `results/solar_farm_inter_year_color_distances.npy` 93 | - Generated by `notebooks/Experiment - Color over time.ipynb`. 94 | - This contains the differences in average footprint color between consecutive dates for each footprint in the solar farm dataset. 95 | - `results/heuristic-theta_results.csv` 96 | - Generated by `notebooks/Experiments - Heuristic method.ipynb` 97 | - This contains the Bhattacharyya coefficient, accuracy, and MAE results for each combination of {dataset, buffer, number of clusters} that we experiment with. 98 | - `results/learned-theta_lr_results.csv` 99 | - Generated by `notebooks/Experiments - Learned theta and LR methods.ipynb` 100 | - This contains the accuracy and MAE results for each combination of {dataset, buffer, number of clusters} that we experiment with. 101 | - `results/kl/*` 102 | - Generated by `experiments/run_parameter_sweep.py` 103 | - These are the raw output (i.e. list of KL divergences) for each combination of {dataset, buffer, number of clusters} that we experiment with using the TCM algorithm. 104 | - `results/color/*` 105 | - Generated by `experiments/run_parameter_sweep_color.py` 106 | - These are the raw output (i.e. list of Euclidean distances) for each combination of {dataset, buffer, number of clusters} that we experiment with using the "Average-color" baseline. 107 | 108 | 109 | ## Data sources 110 | 111 | This project uses two datasets, **Poultry barns** and **Solar farms**: 112 | - The **Poultry barns** dataset is a [public dataset from the USGS](https://www.sciencebase.gov/catalog/item/5e0a3fcde4b0b207aa0d794e). See the following citation: 113 | - Soroka, A.M., and Duren, Z., 2020, Poultry feeding operations on the Delaware, Maryland, and Virginia Peninsula from 2016 to 2017: U.S. Geological Survey data release, https://doi.org/10.5066/P9MO25Z7. 114 | - The **Solar farms** dataset is a collection of model detected solar farms (polygons) over the Indian state of Karnataka, included as `data/karnataka_predictions_polygons_validated_2020.geojson`. 115 | 116 | The `data/delmarva_valid_naip_area.geojson` and `data/solar_farms_valid_s2_area.geojson` files give the spatial extent of these datasets. The `data/naip_tiles_that_intersect_with_delmarva.txt` file contains a list of NAIP tiles (with respect to the [Azure Open Dataset NAIP catalog](https://azure.microsoft.com/en-us/services/open-datasets/catalog/naip/)) that intersect with the spatial extent of the **Poultry barns**. The `temporal_cluster_matching/DataInterface.py` file contains methods for interfacing with the NAIP and Sentinel 2 imagery we use in each dataset. Finally, our manual construction date labels for these two datasets can be found in `data/poultry_barn_labels.csv` and `data/solar_farm_labels.csv`. 117 | 118 | 119 | The `data/poultry_barn_6013_random_polygons_epsg26918.geojson` and `data/solar_farms_935_random_polygons_epsg4326.geojson` files contain the random selection of polygons over the two datasets used in our proposed heuristic method. These were generated by `notebooks/Data - Create random polygons.ipynb`. 120 | 121 | 122 | # License 123 | 124 | This project is licensed under the [MIT License](LICENSE). 125 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). 7 | - **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /data/Delmarva_PL_House_Final2_extent_epsg26918.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "name": "gtextent", 4 | "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:EPSG::26918" } }, 5 | "features": [ 6 | { "type": "Feature", "properties": { "MINX": 398239.74870000035, "MINY": 4135212.1492000017, "MAXX": 489763.13830596552, "MAXY": 4377752.8646177603, "CNTX": 444001.44350298296, "CNTY": 4256482.5069088805, "AREA": 22198148392.48904, "PERIM": 668128.21004744747, "HEIGHT": 242540.71541775856, "WIDTH": 91523.389605965174 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 398239.748699384566862, 4135212.149090151302516 ], [ 489763.138305904110894, 4135212.149090138264 ], [ 489763.138305896311067, 4377752.864504520781338 ], [ 398239.748699322342873, 4377752.864504532888532 ], [ 398239.748699384566862, 4135212.149090151302516 ] ] ] } } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /data/Delmarva_PL_House_Final2_extent_epsg4326.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "name": "gtextent", 4 | "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, 5 | "features": [ 6 | { "type": "Feature", "properties": { "MINX": 398239.74870000035, "MINY": 4135212.1492000017, "MAXX": 489763.13830596552, "MAXY": 4377752.8646177603, "CNTX": 444001.44350298296, "CNTY": 4256482.5069088805, "AREA": 22198148392.48904, "PERIM": 668128.21004744747, "HEIGHT": 242540.71541775856, "WIDTH": 91523.389605965174 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -76.149071073387091, 37.358047323260465 ], [ -75.115604587404903, 37.363572429560534 ], [ -75.119146687775839, 39.549392006228942 ], [ -76.184268592970369, 39.543419582746161 ], [ -76.149071073387091, 37.358047323260465 ] ] ] } } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /data/delmarva_valid_naip_area.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "name": "delmarva_valid_naip_area", 4 | "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:EPSG::26918" } }, 5 | "features": [ 6 | { "type": "Feature", "properties": { "url": "v002\/md\/2011\/md_100cm_2011\/37075\/m_3707501_ne_18_1_20110604.tif", "layer": "delmarva_testing_set_tiles delmarva_testing_set", "path": "C:\/Users\/davrob\/data\/cafo\/delmarva_testing_set_tiles.geojson" }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 439474.8, 4136354.0 ], [ 439474.8, 4129516.2 ], [ 439432.0, 4129516.2 ], [ 439432.0, 4129460.0 ], [ 439431.0, 4129460.0 ], [ 439431.0, 4129459.0 ], [ 433238.0, 4129459.0 ], [ 433238.0, 4129501.0 ], [ 427700.0, 4129501.0 ], [ 427700.0, 4129546.0 ], [ 422162.0, 4129546.0 ], [ 422162.0, 4129595.0 ], [ 416624.0, 4129595.0 ], [ 416624.0, 4129649.0 ], [ 411084.0, 4129649.0 ], [ 411084.0, 4129705.0 ], [ 405546.0, 4129705.0 ], [ 405546.0, 4137306.0 ], [ 411158.0, 4137306.0 ], [ 411158.0, 4144180.0 ], [ 411162.0, 4144180.0 ], [ 411162.0, 4144231.8 ], [ 411232.0, 4144231.8 ], [ 411232.0, 4151114.0 ], [ 411306.0, 4151114.0 ], [ 411306.0, 4158048.0 ], [ 411309.0, 4158048.0 ], [ 411309.0, 4158100.2 ], [ 411378.0, 4158100.2 ], [ 411378.0, 4164982.0 ], [ 416968.0, 4164982.0 ], [ 416968.0, 4171860.0 ], [ 417016.2, 4171860.0 ], [ 417016.2, 4171911.6 ], [ 422548.0, 4171911.6 ], [ 422548.0, 4178134.0 ], [ 417106.0, 4178134.0 ], [ 417106.0, 4178188.0 ], [ 411600.0, 4178188.0 ], [ 411600.0, 4185178.0 ], [ 406174.0, 4185178.0 ], [ 406174.0, 4192780.0 ], [ 406252.0, 4192780.0 ], [ 406252.0, 4199716.0 ], [ 406300.2, 4199716.0 ], [ 406300.2, 4199767.2 ], [ 406322.0, 4199767.2 ], [ 406322.0, 4206658.0 ], [ 406400.0, 4206658.0 ], [ 406400.0, 4212968.0 ], [ 401000.0, 4212968.0 ], [ 401000.0, 4220596.0 ], [ 401084.0, 4220596.0 ], [ 401084.0, 4226906.0 ], [ 395690.0, 4226906.0 ], [ 395690.0, 4234534.0 ], [ 395780.0, 4234534.0 ], [ 395780.0, 4241470.0 ], [ 395838.0, 4241470.0 ], [ 395838.0, 4241510.4 ], [ 395870.0, 4241510.4 ], [ 395870.0, 4248406.0 ], [ 395960.0, 4248406.0 ], [ 395960.0, 4255342.0 ], [ 396016.2, 4255342.0 ], [ 396016.2, 4255380.6 ], [ 396050.0, 4255380.6 ], [ 396050.0, 4262272.0 ], [ 396140.0, 4262272.0 ], [ 396140.0, 4269208.0 ], [ 396195.0, 4269208.0 ], [ 396195.0, 4269251.4 ], [ 396230.0, 4269251.4 ], [ 396230.0, 4276144.0 ], [ 396320.0, 4276144.0 ], [ 396320.0, 4283080.0 ], [ 396373.8, 4283080.0 ], [ 396373.8, 4283122.8 ], [ 396404.0, 4283122.8 ], [ 396404.0, 4290016.0 ], [ 396494.0, 4290016.0 ], [ 396494.0, 4296952.0 ], [ 396553.8, 4296952.0 ], [ 396553.8, 4296994.2 ], [ 396584.0, 4296994.2 ], [ 396584.0, 4303888.0 ], [ 396680.0, 4303888.0 ], [ 396680.0, 4310824.0 ], [ 396733.8, 4310824.0 ], [ 396733.8, 4310865.6 ], [ 396770.0, 4310865.6 ], [ 396770.0, 4317760.0 ], [ 396860.0, 4317760.0 ], [ 396860.0, 4324696.0 ], [ 396914.4, 4324696.0 ], [ 396914.4, 4324738.2 ], [ 396950.0, 4324738.2 ], [ 396950.0, 4331632.0 ], [ 397040.0, 4331632.0 ], [ 397040.0, 4338568.0 ], [ 397095.6, 4338568.0 ], [ 397095.6, 4338610.2 ], [ 397130.0, 4338610.2 ], [ 397130.0, 4345504.0 ], [ 397220.0, 4345504.0 ], [ 397220.0, 4352440.0 ], [ 397276.8, 4352440.0 ], [ 397276.8, 4352482.8 ], [ 397310.0, 4352482.8 ], [ 397310.0, 4359376.0 ], [ 397400.0, 4359376.0 ], [ 397400.0, 4365755.4 ], [ 392129.4, 4365755.4 ], [ 392129.4, 4373310.0 ], [ 392210.0, 4373310.0 ], [ 392210.0, 4380262.0 ], [ 392224.8, 4380262.0 ], [ 392224.8, 4380302.4 ], [ 398334.0, 4380302.4 ], [ 398334.0, 4380228.6 ], [ 403660.2, 4380228.6 ], [ 403660.2, 4380190.0 ], [ 403672.0, 4380190.0 ], [ 403672.0, 4380160.2 ], [ 409072.2, 4380160.2 ], [ 409072.2, 4380094.8 ], [ 414398.4, 4380094.8 ], [ 414398.4, 4380052.0 ], [ 414412.0, 4380052.0 ], [ 414412.0, 4380033.6 ], [ 419809.8, 4380033.6 ], [ 419809.8, 4379975.4 ], [ 425136.0, 4379975.4 ], [ 425136.0, 4379932.0 ], [ 425146.0, 4379932.0 ], [ 425146.0, 4379921.4 ], [ 430548.0, 4379921.4 ], [ 430548.0, 4379871.0 ], [ 435874.2, 4379871.0 ], [ 435874.2, 4379830.0 ], [ 435886.0, 4379830.0 ], [ 435886.0, 4379824.8 ], [ 441285.6, 4379824.8 ], [ 441285.6, 4379781.6 ], [ 446611.8, 4379781.6 ], [ 446611.8, 4379742.6 ], [ 452023.8, 4379742.6 ], [ 452023.8, 4379706.6 ], [ 457349.4, 4379706.6 ], [ 457349.4, 4372083.6 ], [ 457349.0, 4372083.6 ], [ 457349.0, 4372083.0 ], [ 451980.0, 4372083.0 ], [ 451980.0, 4365834.0 ], [ 457272.6, 4365834.0 ], [ 457272.6, 4358812.0 ], [ 462617.0, 4358812.0 ], [ 462617.0, 4358811.0 ], [ 462660.0, 4358811.0 ], [ 462660.0, 4351902.0 ], [ 467971.8, 4351902.0 ], [ 467971.8, 4351848.0 ], [ 467972.0, 4351848.0 ], [ 467972.0, 4344286.0 ], [ 467943.0, 4344286.0 ], [ 467943.0, 4337406.0 ], [ 467943.0, 4337350.0 ], [ 467914.2, 4337350.0 ], [ 467914.2, 4330414.8 ], [ 467886.0, 4330414.8 ], [ 467886.0, 4324134.6 ], [ 473307.6, 4324134.6 ], [ 473307.6, 4317124.0 ], [ 478653.0, 4317124.0 ], [ 478653.0, 4309570.0 ], [ 478634.0, 4309570.0 ], [ 478634.0, 4303236.0 ], [ 484036.0, 4303236.0 ], [ 484036.0, 4303234.8 ], [ 484079.4, 4303234.8 ], [ 484079.4, 4296341.4 ], [ 489447.6, 4296341.4 ], [ 489447.6, 4296332.4 ], [ 494917.8, 4296332.4 ], [ 494917.8, 4288735.8 ], [ 494913.0, 4288735.8 ], [ 494913.0, 4281856.2 ], [ 494908.2, 4281856.2 ], [ 494908.2, 4274865.0 ], [ 494903.4, 4274865.0 ], [ 494903.4, 4267985.4 ], [ 494898.6, 4267985.4 ], [ 494898.6, 4260994.8 ], [ 494893.8, 4260994.8 ], [ 494893.8, 4254115.2 ], [ 494889.6, 4254115.2 ], [ 494889.6, 4247125.2 ], [ 494884.8, 4247125.2 ], [ 494884.8, 4240245.6 ], [ 494880.0, 4240245.6 ], [ 494880.0, 4233255.0 ], [ 494848.0, 4233255.0 ], [ 494848.0, 4233242.0 ], [ 489376.0, 4233242.0 ], [ 489376.0, 4226312.0 ], [ 489364.0, 4226312.0 ], [ 489364.0, 4219376.0 ], [ 489358.0, 4219376.0 ], [ 489358.0, 4212446.0 ], [ 483896.4, 4212446.0 ], [ 483896.4, 4205531.4 ], [ 483862.0, 4205531.4 ], [ 483862.0, 4205516.0 ], [ 478352.0, 4205516.0 ], [ 478352.0, 4198609.0 ], [ 478350.0, 4198609.0 ], [ 478334.0, 4198609.0 ], [ 478334.0, 4191674.0 ], [ 478333.0, 4191674.0 ], [ 472861.2, 4191674.0 ], [ 472861.2, 4184812.8 ], [ 472818.0, 4184812.8 ], [ 472818.0, 4184756.0 ], [ 466692.0, 4184756.0 ], [ 466692.0, 4184757.0 ], [ 466692.0, 4184758.0 ], [ 466690.0, 4184758.0 ], [ 466690.0, 4184777.0 ], [ 461190.0, 4184777.0 ], [ 461190.0, 4184778.0 ], [ 461190.0, 4184799.0 ], [ 456291.0, 4184799.0 ], [ 456291.0, 4177894.0 ], [ 456290.0, 4177894.0 ], [ 456290.0, 4177893.0 ], [ 456254.0, 4177893.0 ], [ 456254.0, 4170960.0 ], [ 456253.0, 4170960.0 ], [ 456253.0, 4170959.0 ], [ 450749.4, 4170959.0 ], [ 450749.4, 4164057.6 ], [ 450708.0, 4164057.6 ], [ 450708.0, 4157178.6 ], [ 450666.6, 4157178.6 ], [ 450666.6, 4150189.2 ], [ 450624.0, 4150189.2 ], [ 450624.0, 4150187.0 ], [ 445057.0, 4150187.0 ], [ 445057.0, 4143289.0 ], [ 445057.0, 4143288.0 ], [ 445012.0, 4143288.0 ], [ 445012.0, 4136354.0 ], [ 439474.8, 4136354.0 ] ], [ [ 428827.8, 4185677.4 ], [ 433682.0, 4185677.4 ], [ 433682.0, 4191909.0 ], [ 428240.0, 4191909.0 ], [ 428240.0, 4191944.0 ], [ 423348.0, 4191944.0 ], [ 423348.0, 4185727.8 ], [ 428827.8, 4185727.8 ], [ 428827.8, 4185677.4 ] ] ] ] } } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /data/poultry_barn_labels.csv: -------------------------------------------------------------------------------- 1 | idx,year 2 | 3377,2011 3 | 55,2011 4 | 2642,2015 5 | 5298,2011 6 | 2081,2011 7 | 1424,2011 8 | 2617,2011 9 | 4562,2011 10 | 2518,2011 11 | 1936,2011 12 | 935,2011 13 | 137,2011 14 | 1614,2011 15 | 3182,2011 16 | 2907,2013 17 | 939,2011 18 | 4149,2011 19 | 5885,2017 20 | 5622,2011 21 | 4731,2011 22 | 3549,2011 23 | 4081,2011 24 | 1951,2011 25 | 4751,2015 26 | 537,2011 27 | 3461,2015 28 | 2936,2013 29 | 1799,2011 30 | 1387,2011 31 | 664,2011 32 | 1006,2011 33 | 3378,2011 34 | 2180,2011 35 | 3230,2011 36 | 5034,2011 37 | 4431,2011 38 | 4714,2017 39 | 3571,2011 40 | 5432,2011 41 | 608,2011 42 | 4885,2011 43 | 3109,2017 44 | 88,2011 45 | 3243,2011 46 | 3426,2011 47 | 2032,2011 48 | 2999,2017 49 | 5487,2011 50 | 4542,2011 51 | 244,2011 52 | 2461,2011 53 | 4205,2011 54 | 2408,2011 55 | 1031,2011 56 | 1026,2011 57 | 3940,2011 58 | 1649,2011 59 | 3714,2017 60 | 4820,2011 61 | 3142,2017 62 | 838,2011 63 | 2334,2011 64 | 517,2017 65 | 3869,2011 66 | 2417,2011 67 | 3712,2011 68 | 2814,2011 69 | 1381,2011 70 | 478,2011 71 | 781,2011 72 | 2060,2011 73 | 922,2011 74 | 3649,2011 75 | 770,2011 76 | 1784,2011 77 | 2880,2015 78 | 4060,2011 79 | 429,2011 80 | 4213,2017 81 | 4645,2011 82 | 264,2011 83 | 5575,2017 84 | 4239,2011 85 | 1618,2011 86 | 4321,2017 87 | 5401,2011 88 | 4989,2011 89 | 4943,2011 90 | 1459,2011 91 | 361,2011 92 | 4328,2011 93 | 1837,2011 94 | 5987,2011 95 | 4796,2015 96 | 402,2011 97 | 2991,2011 98 | 25,2011 99 | 4822,2011 100 | 702,2011 101 | 4444,2011 102 | 3227,2011 103 | 446,2011 104 | 1202,2011 105 | 2324,2015 106 | 5398,2011 107 | 2109,2011 108 | 2849,2011 109 | 1633,2011 110 | 4215,2011 111 | 947,2011 112 | 2457,2011 113 | 4247,2011 114 | 1570,2017 115 | 4357,2011 116 | 2272,2017 117 | 1,2016 118 | 3593,2011 119 | 5930,2011 120 | 2952,2013 121 | 1644,2015 122 | 2160,2011 123 | 1478,2011 124 | 974,2011 125 | 3123,2017 126 | 4008,2011 127 | 3205,2011 128 | 2389,2011 129 | 628,2011 130 | 3148,2011 131 | 1864,2011 132 | 363,2011 133 | 636,2011 134 | 5166,2011 135 | 4293,2011 136 | 4076,2011 137 | 3673,2011 138 | 3655,2011 139 | 3920,2011 140 | 4781,2011 141 | 5915,2011 142 | 895,2011 143 | 4015,2011 144 | 4759,2011 145 | 1301,2011 146 | 2131,2011 147 | 3727,2017 148 | 4380,2011 149 | 5257,2011 150 | 780,2011 151 | 5836,2011 152 | 3015,2011 153 | 3699,2011 154 | 3744,2011 155 | 2234,2011 156 | 5447,2011 157 | 341,2011 158 | 4043,2011 159 | 146,2011 160 | 4053,2011 161 | 5131,2011 162 | 592,2011 163 | 1278,2011 164 | 3302,2011 165 | 5911,2011 166 | 5165,2011 167 | 3908,2011 168 | 5366,2011 169 | 4399,2011 170 | 570,2011 171 | 2038,2011 172 | 4769,2011 173 | 2346,2011 174 | 3237,2011 175 | 2607,2017 176 | 1509,2011 177 | 1566,2011 178 | 5481,2011 179 | 5998,2011 180 | 4806,2011 181 | 2605,2015 182 | 435,2017 183 | 5347,2011 184 | 2434,2017 185 | 2606,2015 186 | 5516,2017 187 | 5544,2011 188 | 1987,2011 189 | 1115,2011 190 | 4964,2011 191 | 2347,2011 192 | 2172,2011 193 | 2540,2017 194 | 350,2011 195 | 3163,2011 196 | 1506,2011 197 | 4686,2017 198 | 12,2011 199 | 615,2011 200 | 2067,2011 201 | 5220,2015 202 | 328,2011 203 | 732,2011 204 | 993,2011 205 | 3108,2015 206 | 2893,2011 207 | 1136,2011 208 | 3636,2011 209 | 2105,2011 210 | 609,2011 211 | 1035,2011 212 | 5238,2011 213 | 2193,2011 214 | 3813,2011 215 | 194,2011 216 | 4616,2011 217 | 4306,2011 218 | 1722,2017 219 | 3385,2011 220 | 2717,2011 221 | 3657,2011 222 | 4892,2017 223 | 1312,2015 224 | 5462,2011 225 | 2348,2011 226 | 3596,2015 227 | 2404,2011 228 | 2531,2017 229 | 5172,2011 230 | 1104,2011 231 | 6005,2011 232 | 5994,2011 233 | 5210,2011 234 | 415,2011 235 | 4660,2017 236 | 5345,2011 237 | 5883,2011 238 | 2402,2011 239 | 541,2011 240 | 3477,2011 241 | 668,2015 242 | 3600,2011 243 | 737,2011 244 | 2195,2011 245 | 4886,2011 246 | 1181,2017 247 | 1956,2011 248 | 818,2011 249 | 1093,2011 250 | 4821,2011 251 | 4630,2011 252 | 623,2011 253 | 3969,2011 254 | 5947,2011 255 | 2593,2011 256 | 5145,2011 257 | 3925,2011 258 | 2291,2017 259 | 2830,2011 260 | 2012,2011 261 | 2929,2013 262 | 57,2011 263 | 1863,2011 264 | 683,2011 265 | 518,2011 266 | 3791,2011 267 | 2129,2011 268 | 3634,2013 269 | 4593,2011 270 | 557,2011 271 | 31,2011 272 | 1775,2011 273 | 5371,2011 274 | 4352,2011 275 | 4054,2011 276 | 3028,2011 277 | 3997,2011 278 | 4103,2011 279 | 4052,2011 280 | 4706,2011 281 | 4949,2011 282 | 3874,2011 283 | 2694,2013 284 | 4313,2011 285 | 1954,2011 286 | 4742,2011 287 | 3755,2011 288 | 2135,2011 289 | 3610,2011 290 | 377,2011 291 | 1353,2011 292 | 4427,2011 293 | 2874,2011 294 | 3882,2011 295 | 4858,2011 296 | 5664,2011 297 | 4657,2017 298 | 4386,2017 299 | 3893,2011 300 | 1469,2011 301 | 4689,2017 302 | 748,2011 303 | 4860,2011 304 | 3164,2011 305 | 5564,2011 306 | 2113,2011 307 | 445,2011 308 | 4125,2017 309 | 2772,2017 310 | 743,2011 311 | 3446,2017 312 | 5778,2017 313 | 1634,2011 314 | 4967,2011 315 | 1368,2011 316 | 168,2011 317 | 4680,2011 318 | 4107,2011 319 | 1937,2011 320 | 5639,2011 321 | 3537,2011 322 | 5554,2011 323 | 4739,2017 324 | 3189,2011 325 | 1872,2015 326 | 2974,2011 327 | 698,2011 328 | 4128,2011 329 | 3308,2011 330 | 5522,2017 331 | 806,2011 332 | 4684,2017 333 | 340,2011 334 | 3294,2011 335 | 1105,2011 336 | 331,2011 337 | 3365,2011 338 | 4927,2011 339 | 491,2017 340 | 5935,2011 341 | 6000,2011 342 | 5181,2011 343 | 1640,2011 344 | 532,2011 345 | 694,2011 346 | 917,2011 347 | 2321,2011 348 | 1597,2013 349 | 4766,2011 350 | 4703,2011 351 | 5517,2017 352 | 5060,2011 353 | 5105,2011 354 | 4879,2011 355 | 1753,2011 356 | 2283,2011 357 | 2677,2017 358 | 1018,2011 359 | 4244,2011 360 | 1103,2011 361 | 5213,2011 362 | 3094,2011 363 | 800,2011 364 | 1479,2011 365 | 5095,2011 366 | 1126,2011 367 | 2868,2011 368 | 2372,2017 369 | 387,2011 370 | 2078,2011 371 | 3312,2011 372 | 980,2011 373 | 2669,2011 374 | 3405,2011 375 | 692,2011 376 | 1586,2011 377 | 2165,2011 378 | 3719,2017 379 | 957,2011 380 | 2332,2011 381 | 148,2011 382 | 333,2011 383 | 1727,2011 384 | 4722,2011 385 | 1335,2015 386 | 2235,2011 387 | 5837,2011 388 | 2304,2011 389 | 2230,2011 390 | 5336,2011 391 | 343,2011 392 | 4687,2017 393 | 3353,2011 394 | 3886,2011 395 | 4816,2011 396 | 2459,2015 397 | 2102,2011 398 | 3789,2011 399 | 5565,2011 400 | 2558,2011 401 | 3569,2011 402 | 4744,2011 403 | 2271,2017 404 | 813,2011 405 | 2515,2011 406 | 409,2011 407 | 2287,2011 408 | 4146,2011 409 | 10,2011 410 | 599,2011 411 | 3824,2011 412 | 1378,2015 413 | 1159,2011 414 | 3710,2015 415 | 3467,2011 416 | 2473,2011 417 | 4438,2011 418 | 3102,2011 419 | 5928,2011 420 | 3279,2011 421 | 371,2011 422 | 1765,2017 423 | 5329,2011 424 | 1520,2011 425 | 2489,2011 426 | 4859,2011 427 | 3959,2011 428 | 3829,2011 429 | 1949,2011 430 | 3935,2011 431 | 3567,2011 432 | 981,2011 433 | 4417,2011 434 | 4737,2011 435 | 966,2011 436 | 656,2011 437 | 234,2011 438 | 1488,2011 439 | 4481,2011 440 | 3315,2011 441 | 1669,2011 442 | 3837,2011 443 | 4048,2017 444 | 1579,2011 445 | 4656,2017 446 | 5635,2011 447 | 503,2011 448 | 3199,2017 449 | 2462,2011 450 | 1916,2011 451 | 3801,2011 452 | 1123,2011 453 | 4690,2017 454 | 4487,2011 455 | 3091,2011 456 | 5393,2011 457 | 441,2011 458 | 5284,2011 459 | 163,2011 460 | 5892,2011 461 | 1079,2011 462 | 863,2011 463 | 709,2011 464 | 3057,2011 465 | 3742,2011 466 | 2945,2011 467 | 248,2011 468 | 3751,2011 469 | 2563,2011 470 | 2073,2011 471 | 5052,2011 472 | 1398,2017 473 | 4373,2011 474 | 699,2011 475 | 269,2011 476 | 5813,2011 477 | 5765,2015 478 | 1676,2011 479 | 1144,2011 480 | 2232,2011 481 | 1587,2011 482 | 327,2011 483 | 896,2011 484 | 3100,2011 485 | 4842,2011 486 | 2968,2011 487 | 2224,2011 488 | 841,2011 489 | 1564,2011 490 | 2791,2011 491 | 2813,2011 492 | 5670,2011 493 | 5155,2011 494 | 3174,2011 495 | 3475,2017 496 | 2862,2011 497 | 1788,2011 498 | 4941,2011 499 | 2817,2011 500 | 5828,2011 501 | 2693,2011 502 | 2157,2011 503 | 1315,2017 504 | 2295,2011 505 | 474,2011 506 | 969,2011 507 | 3272,2011 508 | 5216,2011 509 | 87,2011 510 | 5608,2011 511 | 440,2011 512 | 3316,2011 513 | 2721,2017 514 | 2256,2017 515 | 2935,2013 516 | 1670,2011 517 | 3892,2011 518 | 2662,2015 519 | 4545,2011 520 | 1266,2011 521 | 188,2011 522 | 335,2011 523 | 5144,2011 524 | 5184,2011 525 | 2453,2011 526 | 4947,2011 527 | 3950,2011 528 | 4929,2011 529 | 967,2011 530 | 5845,2011 531 | 630,2013 532 | 3505,2015 533 | 1324,2011 534 | 2827,2011 535 | 2051,2011 536 | 4494,2011 537 | 2543,2015 538 | 2222,2011 539 | 2553,2011 540 | 3412,2011 541 | 4400,2011 542 | 5297,2011 543 | 1862,2011 544 | 1947,2011 545 | 5317,2011 546 | 4096,2011 547 | 2706,2011 548 | 1883,2011 549 | 4260,2011 550 | 5281,2011 551 | 3563,2011 552 | 2524,2011 553 | 4672,2011 554 | 5602,2011 555 | 890,2011 556 | 2093,2011 557 | 411,2011 558 | 4229,2011 559 | 2361,2011 560 | 82,2014 561 | 2520,2011 562 | 3605,2011 563 | 3253,2015 564 | 1793,2015 565 | 5509,2011 566 | 5956,2011 567 | 4763,2011 568 | 4365,2011 569 | 4518,2011 570 | 723,2011 571 | 1497,2011 572 | 5428,2011 573 | 6007,2011 574 | 5513,2011 575 | 4,2011 576 | 4163,2011 577 | 5762,2015 578 | 1621,2011 579 | 1758,2013 580 | 1003,2011 581 | 3819,2011 582 | 5659,2011 583 | 613,2011 584 | 5301,2011 585 | 4050,2011 586 | 701,2011 587 | 4441,2011 588 | 881,2011 589 | 5182,2011 590 | 4709,2011 591 | 5992,2011 592 | 2338,2011 593 | 5089,2011 594 | 1043,2011 595 | 324,2011 596 | 4069,2011 597 | 4999,2015 598 | 4028,2011 599 | 2716,2011 600 | 5204,2017 601 | 824,2011 602 | 4876,2011 603 | 71,2011 604 | 773,2015 605 | 4097,2011 606 | 1603,2011 607 | 1454,2011 608 | 2732,2011 609 | 4939,2017 610 | 1600,2017 611 | 1108,2011 612 | 4366,2011 613 | 5616,2011 614 | 2013,2011 615 | 5443,2011 616 | 5932,2011 617 | 4471,2011 618 | 2537,2011 619 | 2491,2011 620 | 3890,2011 621 | 2052,2011 622 | 2532,2017 623 | 4249,2011 624 | 1920,2011 625 | 3898,2011 626 | 4225,2011 627 | 5186,2013 628 | 900,2011 629 | 3379,2011 630 | 1830,2011 631 | 766,2011 632 | 410,2011 633 | 2050,2011 634 | 2342,2011 635 | 2290,2017 636 | 2194,2011 637 | 4428,2011 638 | 4585,2011 639 | 3435,2011 640 | 1402,2011 641 | 1915,2011 642 | 2174,2011 643 | 578,2011 644 | 1251,2011 645 | 2667,2011 646 | 3268,2015 647 | 3198,2017 648 | 2220,2011 649 | 5138,2011 650 | 4701,2011 651 | 2233,2011 652 | 1852,2011 653 | 5840,2011 654 | 307,2011 655 | 3098,2011 656 | 2263,2015 657 | 4582,2011 658 | 5674,2011 659 | 4308,2011 660 | 3476,2017 661 | 186,2011 662 | 1371,2011 663 | 667,2015 664 | 3415,2011 665 | 1053,2011 666 | 700,2011 667 | 961,2011 668 | 5974,2017 669 | 1340,2011 670 | 3820,2011 671 | 37,2016 672 | 4449,2011 673 | 5700,2011 674 | 4176,2011 675 | 4094,2011 676 | 4743,2011 677 | 2258,2015 678 | 309,2011 679 | 2451,2011 680 | 5283,2011 681 | 3512,2015 682 | 938,2011 683 | 3524,2017 684 | 3343,2011 685 | 2730,2011 686 | 3580,2011 687 | 598,2015 688 | 5750,2015 689 | 2166,2011 690 | 3566,2011 691 | 4711,2011 692 | 5908,2011 693 | 2125,2011 694 | 4232,2013 695 | 5035,2011 696 | 5264,2011 697 | 2689,2011 698 | 3126,2017 699 | 2370,2017 700 | 576,2011 701 | 4334,2011 702 | 1364,2011 703 | 869,2011 704 | 3106,2015 705 | 5805,2017 706 | 2668,2011 707 | 3337,2011 708 | 924,2011 709 | 3884,2011 710 | 5773,2011 711 | 1004,2011 712 | 2085,2011 713 | 572,2011 714 | 1162,2011 715 | 3785,2011 716 | 2561,2011 717 | 733,2011 718 | 316,2011 719 | 2557,2011 720 | 4777,2011 721 | 4813,2011 722 | 2679,2011 723 | 4045,2011 724 | 1607,2017 725 | 3720,2017 726 | 5642,2011 727 | 2248,2011 728 | 4800,2011 729 | 436,2017 730 | 562,2011 731 | 5502,2011 732 | 4066,2011 733 | 1423,2011 734 | 2435,2011 735 | 2718,2011 736 | 2072,2011 737 | 3629,2017 738 | 4811,2011 739 | 1661,2011 740 | 928,2011 741 | 3900,2011 742 | 3620,2011 743 | 2812,2011 744 | 2959,2011 745 | 5294,2011 746 | 1877,2011 747 | 5749,2015 748 | 3004,2011 749 | 4789,2011 750 | 293,2016 751 | 686,2011 752 | 2567,2011 753 | 5055,2011 754 | 908,2011 755 | 875,2011 756 | 5515,2017 757 | 3808,2011 758 | 117,2016 759 | 1086,2011 760 | 1997,2011 761 | 5754,2015 762 | 4214,2011 763 | 5909,2011 764 | 3360,2017 765 | 854,2011 766 | 4189,2011 767 | 5985,2011 768 | 2996,2011 769 | 2018,2011 770 | 3761,2011 771 | 4288,2011 772 | 5918,2011 773 | 1515,2011 774 | 4425,2011 775 | 1630,2011 776 | 5146,2011 777 | 4401,2011 778 | 1487,2011 779 | 3421,2011 780 | 1306,2013 781 | 5560,2011 782 | 4164,2011 783 | 5702,2011 784 | 4884,2011 785 | 3550,2011 786 | 2303,2017 787 | 2881,2015 788 | 3967,2011 789 | 706,2011 790 | 1015,2011 791 | 3697,2011 792 | 2175,2011 793 | 1835,2011 794 | 5402,2011 795 | 828,2011 796 | 2253,2017 797 | 604,2011 798 | 904,2011 799 | 1397,2011 800 | 5324,2011 801 | 1127,2011 802 | 4768,2011 803 | 1444,2011 804 | 1318,2017 805 | 5938,2011 806 | 5877,2011 807 | 5867,2011 808 | 5625,2011 809 | 1891,2011 810 | 3361,2017 811 | 531,2011 812 | 2804,2011 813 | 5736,2011 814 | 1263,2011 815 | 1706,2011 816 | 4132,2011 817 | 4421,2011 818 | 3575,2011 819 | 3561,2011 820 | 2926,2011 821 | 2202,2011 822 | 1841,2011 823 | 1289,2017 824 | 4970,2011 825 | 837,2011 826 | 3617,2011 827 | 3113,2013 828 | 1438,2015 829 | 5025,2011 830 | 2547,2011 831 | 4901,2011 832 | 4243,2011 833 | 2430,2011 834 | 463,2011 835 | 271,2011 836 | 3208,2011 837 | 2682,2011 838 | 1707,2011 839 | 4875,2011 840 | 4275,2011 841 | 2306,2011 842 | 213,2011 843 | 4589,2011 844 | 937,2011 845 | 2842,2011 846 | 898,2011 847 | 3769,2011 848 | 5362,2017 849 | 3152,2011 850 | 5582,2011 851 | 4371,2011 852 | 1944,2011 853 | 1012,2017 854 | 502,2011 855 | 5796,2017 856 | 3608,2011 857 | 1583,2011 858 | 2344,2011 859 | 1317,2017 860 | 64,2011 861 | 4217,2011 862 | 5677,2011 863 | 3601,2011 864 | 2810,2011 865 | 4981,2011 866 | 524,2011 867 | 4046,2011 868 | 4381,2011 869 | 5207,2011 870 | 1798,2011 871 | 990,2011 872 | 5817,2011 873 | 2058,2011 874 | 1361,2011 875 | 3310,2011 876 | 1164,2011 877 | 1224,2013 878 | 96,2011 879 | 3285,2011 880 | 2564,2011 881 | 1576,2011 882 | 847,2011 883 | 4691,2011 884 | 5203,2017 885 | 2903,2017 886 | 391,2011 887 | 4723,2011 888 | 3607,2011 889 | 4241,2011 890 | 1861,2011 891 | 4953,2011 892 | 4499,2011 893 | 154,2011 894 | 3922,2011 895 | 5652,2011 896 | 2427,2011 897 | 4240,2011 898 | 3196,2011 899 | 1935,2011 900 | 3171,2011 901 | 1742,2011 902 | 2986,2011 903 | 2275,2017 904 | 832,2011 905 | 3885,2011 906 | 1808,2011 907 | 4505,2017 908 | 2208,2011 909 | 2179,2011 910 | 4398,2011 911 | 1973,2011 912 | 3528,2011 913 | 2509,2013 914 | 53,2011 915 | 2587,2011 916 | 665,2011 917 | 4074,2011 918 | 5226,2011 919 | 4551,2017 920 | 3300,2011 921 | 4543,2011 922 | 5309,2011 923 | 3082,2011 924 | 1610,2017 925 | 2795,2017 926 | 932,2011 927 | 4254,2011 928 | 4413,2011 929 | 5037,2011 930 | 2227,2011 931 | 676,2011 932 | 2641,2015 933 | 1325,2011 934 | 5179,2011 935 | 5473,2011 936 | 5496,2011 937 | 2231,2011 938 | 5537,2017 939 | 3322,2011 940 | 3786,2011 941 | 4116,2011 942 | 36,2016 943 | 5850,2011 944 | 4697,2011 945 | 4167,2011 946 | 2226,2011 947 | 1080,2011 948 | 2281,2011 949 | 4034,2017 950 | 2648,2011 951 | 5234,2011 952 | 3632,2017 953 | 2621,2017 954 | 3373,2011 955 | 1744,2011 956 | 215,2011 957 | 4406,2011 958 | 1304,2011 959 | 2836,2011 960 | 3139,2017 961 | 3408,2011 962 | 5508,2011 963 | 3019,2011 964 | 3066,2011 965 | 3594,2011 966 | 5763,2015 967 | 5783,2011 968 | 1546,2011 969 | 3320,2011 970 | 2492,2011 971 | 1919,2011 972 | 2549,2017 973 | 1653,2011 974 | 4455,2011 975 | 2097,2011 976 | 5094,2011 977 | 5761,2015 978 | 5533,2011 979 | 4715,2011 980 | 4634,2011 981 | 5894,2011 982 | 2454,2011 983 | 2474,2011 984 | 4078,2011 985 | 3466,2011 986 | 177,2011 987 | 4930,2011 988 | 5493,2011 989 | 423,2011 990 | 2164,2011 991 | 3374,2011 992 | 5471,2011 993 | 3545,2011 994 | 860,2011 995 | 3185,2011 996 | 1996,2011 997 | 5446,2017 998 | 2437,2011 999 | 5048,2011 1000 | 3836,2011 1001 | 1511,2011 1002 | -------------------------------------------------------------------------------- /data/solar_farm_labels.csv: -------------------------------------------------------------------------------- 1 | idx,year 2 | 0,2018 3 | 1,2018 4 | 2,-1 5 | 3,-1 6 | 4,-1 7 | 5,2018 8 | 6,-1 9 | 7,2017 10 | 8,2017 11 | 9,-1 12 | 10,2019 13 | 11,2019 14 | 12,2018 15 | 13,2018 16 | 14,-1 17 | 15,2018 18 | 16,2018 19 | 17,2017 20 | 18,2017 21 | 19,-1 22 | 20,2017 23 | 21,2018 24 | 22,2018 25 | 23,2018 26 | 24,2018 27 | 25,2018 28 | 26,2018 29 | 27,2017 30 | 28,2018 31 | 29,2018 32 | 30,2018 33 | 31,2018 34 | 32,2018 35 | 33,2018 36 | 34,2018 37 | 35,2018 38 | 36,2018 39 | 37,2017 40 | 38,2018 41 | 39,2018 42 | 40,2019 43 | 41,2016 44 | 42,2016 45 | 43,2018 46 | 44,2018 47 | 45,-1 48 | 46,-1 49 | 47,-1 50 | 48,2019 51 | 49,2019 52 | 50,2018 53 | 51,2018 54 | 52,2018 55 | 53,-1 56 | 54,2020 57 | 55,2017 58 | 56,2017 59 | 57,2017 60 | 58,2017 61 | 59,2017 62 | 60,-1 63 | 61,2020 64 | 62,-1 65 | 63,2018 66 | 64,2020 67 | 65,2018 68 | 66,2020 69 | 67,2019 70 | 68,2019 71 | 69,2019 72 | 70,-1 73 | 71,2019 74 | 72,2019 75 | 73,2018 76 | 74,2018 77 | 75,2019 78 | 76,-1 79 | 77,2018 80 | 78,2018 81 | 79,2018 82 | 80,2018 83 | 81,2018 84 | 82,2018 85 | 83,2018 86 | 84,2018 87 | 85,2018 88 | 86,2019 89 | 87,2018 90 | 88,2018 91 | 89,2018 92 | 90,2018 93 | 91,2018 94 | 92,2018 95 | 93,2018 96 | 94,2018 97 | 95,2019 98 | 96,2018 99 | 97,2017 100 | 98,-1 101 | 99,2018 102 | 100,-1 103 | 101,2020 104 | 102,2016 105 | 103,2017 106 | 104,2017 107 | 105,2020 108 | 106,2018 109 | 107,2020 110 | 108,2018 111 | 109,2018 112 | 110,2020 113 | 111,2020 114 | 112,2017 115 | 113,2018 116 | 114,-1 117 | 115,2018 118 | 116,2018 119 | 117,2018 120 | 118,2017 121 | 119,2018 122 | 120,2019 123 | 121,-1 124 | 122,2018 125 | 123,-1 126 | 124,2017 127 | 125,2019 128 | 126,-1 129 | 127,-1 130 | 128,2018 131 | 129,2018 132 | 130,2019 133 | 131,2018 134 | 132,-1 135 | 133,2018 136 | 134,2018 137 | 135,2019 138 | 136,2020 139 | 137,2018 140 | 138,-1 141 | 139,-1 142 | 140,2019 143 | 141,2019 144 | 142,2018 145 | 143,2018 146 | 144,-1 147 | 145,2016 148 | 146,2018 149 | 147,2018 150 | 148,2017 151 | 149,2017 152 | 150,2017 153 | 151,2017 154 | 152,2018 155 | 153,2017 156 | 154,-1 157 | 155,2020 158 | 156,-1 159 | 157,2016 160 | 158,2017 161 | 159,2020 162 | 160,2020 163 | 161,2020 164 | 162,2020 165 | 163,2019 166 | 164,2019 167 | 165,2018 168 | 166,-1 169 | 167,2020 170 | 168,2017 171 | 169,2018 172 | 170,-1 173 | 171,2019 174 | 172,2018 175 | 173,2018 176 | 174,2018 177 | 175,2018 178 | 176,2018 179 | 177,2019 180 | 178,2018 181 | 179,2019 182 | 180,2018 183 | 181,2017 184 | 182,2018 185 | 183,-1 186 | 184,2018 187 | 185,2018 188 | 186,2018 189 | 187,2018 190 | 188,-1 191 | 189,2020 192 | 190,2018 193 | 191,-1 194 | 192,2018 195 | 193,2018 196 | 194,2018 197 | 195,-1 198 | 196,-1 199 | 197,2018 200 | 198,2019 201 | 199,2019 202 | 200,2019 203 | 201,2020 204 | 202,-1 205 | 203,2019 206 | 204,-1 207 | 205,2019 208 | 206,2019 209 | 207,2019 210 | 208,2018 211 | 209,2019 212 | 210,-1 213 | 211,2018 214 | 212,-1 215 | 213,2020 216 | 214,2020 217 | 215,2020 218 | 216,2020 219 | 217,2018 220 | 218,-1 221 | 219,2018 222 | 220,2018 223 | 221,2018 224 | 222,2018 225 | 223,2019 226 | 224,2019 227 | 225,2019 228 | 226,2019 229 | 227,-1 230 | 228,-1 231 | 229,2019 232 | 230,2020 233 | 231,2019 234 | 232,2020 235 | 233,2020 236 | 234,2019 237 | 235,2019 238 | 236,-1 239 | 237,2019 240 | 238,2020 241 | 239,-1 242 | 240,2018 243 | 241,2018 244 | 242,2018 245 | 243,2018 246 | 244,2019 247 | 245,2018 248 | 246,2018 249 | 247,2019 250 | 248,2019 251 | 249,2019 252 | 250,2018 253 | 251,2017 254 | 252,2017 255 | 253,2020 256 | 254,2017 257 | 255,2018 258 | 256,2020 259 | 257,2020 260 | 258,2018 261 | 259,2018 262 | 260,2018 263 | 261,2018 264 | 262,2017 265 | 263,-1 266 | 264,2019 267 | 265,2018 268 | 266,-1 269 | 267,2020 270 | 268,2016 271 | 269,2018 272 | 270,2016 273 | 271,2018 274 | 272,2018 275 | 273,2019 276 | 274,2018 277 | 275,2018 278 | 276,-1 279 | 277,2017 280 | 278,-1 281 | 279,2017 282 | 280,-1 283 | 281,2017 284 | 282,-1 285 | 283,-1 286 | 284,2018 287 | 285,-1 288 | 286,2018 289 | 287,-1 290 | 288,2018 291 | 289,2018 292 | 290,2019 293 | 291,2018 294 | 292,2018 295 | 293,2018 296 | 294,2018 297 | 295,2020 298 | 296,2020 299 | 297,2020 300 | 298,2020 301 | 299,2016 302 | 300,-1 303 | 301,2020 304 | 302,-1 305 | 303,-1 306 | 304,2019 307 | 305,-1 308 | 306,2019 309 | 307,2019 310 | 308,-1 311 | 309,2017 312 | 310,2018 313 | 311,2017 314 | 312,2017 315 | 313,2018 316 | 314,-1 317 | 315,2018 318 | 316,2018 319 | 317,2018 320 | 318,-1 321 | 319,-1 322 | 320,2020 323 | 321,2020 324 | 322,2020 325 | 323,2020 326 | 324,2018 327 | 325,2018 328 | 326,2018 329 | 327,-1 330 | 328,2018 331 | 329,2017 332 | 330,2017 333 | 331,2017 334 | 332,2017 335 | 333,2018 336 | 334,2017 337 | 335,2019 338 | 336,2019 339 | 337,2019 340 | 338,2019 341 | 339,2019 342 | 340,-1 343 | 341,2018 344 | 342,2018 345 | 343,2018 346 | 344,-1 347 | 345,2018 348 | 346,-1 349 | 347,2018 350 | 348,2018 351 | 349,-1 352 | 350,2018 353 | 351,2018 354 | 352,2018 355 | 353,2018 356 | 354,2018 357 | 355,2019 358 | 356,2018 359 | 357,-1 360 | 358,-1 361 | 359,2018 362 | 360,2020 363 | 361,2020 364 | 362,2018 365 | 363,2020 366 | 364,2018 367 | 365,2018 368 | 366,2018 369 | 367,-1 370 | 368,2017 371 | 369,2017 372 | 370,2017 373 | 371,2017 374 | 372,2017 375 | 373,2017 376 | 374,2016 377 | 375,2016 378 | 376,2017 379 | 377,2017 380 | 378,2017 381 | 379,2017 382 | 380,2020 383 | 381,2020 384 | 382,2020 385 | 383,2017 386 | 384,2017 387 | 385,2018 388 | 386,2016 389 | 387,2018 390 | 388,2018 391 | 389,2018 392 | 390,2018 393 | 391,2019 394 | 392,2019 395 | 393,-1 396 | 394,2018 397 | 395,2018 398 | 396,2018 399 | 397,2018 400 | 398,2018 401 | 399,2018 402 | 400,2018 403 | 401,2018 404 | 402,2018 405 | 403,2018 406 | 404,2018 407 | 405,2018 408 | 406,2018 409 | 407,-1 410 | 408,2017 411 | 409,2017 412 | 410,2018 413 | 411,2020 414 | 412,2017 415 | 413,2017 416 | 414,2017 417 | 415,2018 418 | 416,2017 419 | 417,2017 420 | 418,2018 421 | 419,2018 422 | 420,2018 423 | 421,2017 424 | 422,2017 425 | 423,2017 426 | 424,2017 427 | 425,2017 428 | 426,-1 429 | 427,2017 430 | 428,2017 431 | 429,2017 432 | 430,2017 433 | 431,2017 434 | 432,2017 435 | 433,-1 436 | 434,2017 437 | 435,-1 438 | 436,2017 439 | 437,2020 440 | 438,-1 441 | 439,2020 442 | 440,2018 443 | 441,2017 444 | 442,2019 445 | 443,2019 446 | 444,2019 447 | 445,2020 448 | 446,2019 449 | 447,2017 450 | 448,2019 451 | 449,2020 452 | 450,2019 453 | 451,2019 454 | 452,2020 455 | 453,2020 456 | 454,2020 457 | 455,-1 458 | 456,-1 459 | 457,-1 460 | 458,2019 461 | 459,2020 462 | 460,2019 463 | 461,2019 464 | 462,2019 465 | 463,2019 466 | 464,2019 467 | 465,2019 468 | 466,2020 469 | 467,2020 470 | 468,2019 471 | 469,2020 472 | 470,-1 473 | 471,-1 474 | 472,2020 475 | 473,2019 476 | 474,2020 477 | 475,2020 478 | 476,2020 479 | 477,-1 480 | 478,2020 481 | 479,2020 482 | 480,-1 483 | 481,-1 484 | 482,2020 485 | 483,2020 486 | 484,2019 487 | 485,2020 488 | 486,-1 489 | 487,2019 490 | 488,2020 491 | 489,2019 492 | 490,-1 493 | 491,2020 494 | 492,-1 495 | 493,2020 496 | 494,2020 497 | 495,2020 498 | 496,2020 499 | 497,2020 500 | 498,2020 501 | 499,2019 502 | 500,2019 503 | 501,2020 504 | 502,-1 505 | 503,2020 506 | 504,2019 507 | 505,2019 508 | 506,2020 509 | 507,2019 510 | 508,2020 511 | 509,2020 512 | 510,2020 513 | 511,-1 514 | 512,2020 515 | 513,2020 516 | 514,2020 517 | 515,2018 518 | 516,2018 519 | 517,2018 520 | 518,2017 521 | 519,2018 522 | 520,2018 523 | 521,2020 524 | 522,2018 525 | 523,2018 526 | 524,2020 527 | 525,2019 528 | 526,2018 529 | 527,2020 530 | 528,2019 531 | 529,2018 532 | 530,2019 533 | 531,2016 534 | 532,2019 535 | 533,2019 536 | 534,2018 537 | 535,2018 538 | 536,2018 539 | 537,2018 540 | 538,2018 541 | 539,2019 542 | 540,2018 543 | 541,2020 544 | 542,2018 545 | 543,2020 546 | 544,2020 547 | 545,2020 548 | 546,2018 549 | 547,2018 550 | 548,2020 551 | 549,2018 552 | 550,2018 553 | 551,2017 554 | 552,2018 555 | 553,2018 556 | 554,2020 557 | 555,2018 558 | 556,2018 559 | 557,2020 560 | 558,2018 561 | 559,2018 562 | 560,2018 563 | 561,2020 564 | 562,2018 565 | 563,2020 566 | 564,2020 567 | 565,2020 568 | 566,2020 569 | 567,2018 570 | 568,2018 571 | 569,2020 572 | 570,2018 573 | 571,2020 574 | 572,2020 575 | 573,2018 576 | 574,2018 577 | 575,2020 578 | 576,2018 579 | 577,2020 580 | 578,2020 581 | 579,2020 582 | 580,2020 583 | 581,2018 584 | 582,2018 585 | 583,2018 586 | 584,2018 587 | 585,2017 588 | 586,2016 589 | 587,2018 590 | 588,2017 591 | 589,2017 592 | 590,2016 593 | 591,2016 594 | 592,-1 595 | 593,2017 596 | 594,2017 597 | 595,2016 598 | 596,2016 599 | 597,2018 600 | 598,2017 601 | 599,2017 602 | 600,2018 603 | 601,2017 604 | 602,2017 605 | 603,-1 606 | 604,2017 607 | 605,2017 608 | 606,2017 609 | 607,2017 610 | 608,2017 611 | 609,2017 612 | 610,-1 613 | 611,2017 614 | 612,-1 615 | 613,2017 616 | 614,2017 617 | 615,-1 618 | 616,-1 619 | 617,-1 620 | 618,-1 621 | 619,-1 622 | 620,2017 623 | 621,-1 624 | 622,-1 625 | 623,-1 626 | 624,-1 627 | 625,2017 628 | 626,2017 629 | 627,2016 630 | 628,2017 631 | 629,2016 632 | 630,-1 633 | 631,2017 634 | 632,2017 635 | 633,2017 636 | 634,2017 637 | 635,2017 638 | 636,2019 639 | 637,2018 640 | 638,2018 641 | 639,2016 642 | 640,2018 643 | 641,2017 644 | 642,2017 645 | 643,2018 646 | 644,2018 647 | 645,2018 648 | 646,2018 649 | 647,2017 650 | 648,2016 651 | 649,2016 652 | 650,2016 653 | 651,2016 654 | 652,2016 655 | 653,2018 656 | 654,2018 657 | 655,2018 658 | 656,2018 659 | 657,2018 660 | 658,-1 661 | 659,2018 662 | 660,2018 663 | 661,2018 664 | 662,2018 665 | 663,2018 666 | 664,2018 667 | 665,2018 668 | 666,2019 669 | 667,2016 670 | 668,2019 671 | 669,2016 672 | 670,2017 673 | 671,2019 674 | 672,-1 675 | 673,-1 676 | 674,2018 677 | 675,2017 678 | 676,-1 679 | 677,2018 680 | 678,-1 681 | 679,2017 682 | 680,2017 683 | 681,-1 684 | 682,2020 685 | 683,2018 686 | 684,2018 687 | 685,2018 688 | 686,2018 689 | 687,2018 690 | 688,2018 691 | 689,2018 692 | 690,2017 693 | 691,2018 694 | 692,-1 695 | 693,2019 696 | 694,-1 697 | 695,2017 698 | 696,2018 699 | 697,2018 700 | 698,2018 701 | 699,2018 702 | 700,2018 703 | 701,2018 704 | 702,2018 705 | 703,2018 706 | 704,2018 707 | 705,2018 708 | 706,2018 709 | 707,2018 710 | 708,2018 711 | 709,2019 712 | 710,2018 713 | 711,-1 714 | 712,2016 715 | 713,2017 716 | 714,2020 717 | 715,2020 718 | 716,2019 719 | 717,2018 720 | 718,2018 721 | 719,-1 722 | 720,2018 723 | 721,2019 724 | 722,2019 725 | 723,-1 726 | 724,2020 727 | 725,2020 728 | 726,2017 729 | 727,2017 730 | 728,2017 731 | 729,-1 732 | 730,2018 733 | 731,-1 734 | 732,2019 735 | 733,2018 736 | 734,-1 737 | 735,2019 738 | 736,2019 739 | 737,2018 740 | 738,-1 741 | 739,2020 742 | 740,-1 743 | 741,2018 744 | 742,2018 745 | 743,2018 746 | 744,-1 747 | 745,2020 748 | 746,2020 749 | 747,2020 750 | 748,2020 751 | 749,2017 752 | 750,-1 753 | 751,2018 754 | 752,-1 755 | 753,-1 756 | 754,-1 757 | 755,2018 758 | 756,-1 759 | 757,-1 760 | 758,2020 761 | 759,2020 762 | 760,2020 763 | 761,2020 764 | 762,2020 765 | 763,-1 766 | 764,2019 767 | 765,-1 768 | 766,2018 769 | 767,-1 770 | 768,-1 771 | 769,2017 772 | 770,2017 773 | 771,-1 774 | 772,-1 775 | 773,2017 776 | 774,-1 777 | 775,2020 778 | 776,2020 779 | 777,2019 780 | 778,2018 781 | 779,2016 782 | 780,2018 783 | 781,-1 784 | 782,-1 785 | 783,2017 786 | 784,2017 787 | 785,2017 788 | 786,2017 789 | 787,-1 790 | 788,-1 791 | 789,2016 792 | 790,2020 793 | 791,-1 794 | 792,2017 795 | 793,-1 796 | 794,2017 797 | 795,2016 798 | 796,2017 799 | 797,-1 800 | 798,-1 801 | 799,-1 802 | 800,2016 803 | 801,2018 804 | 802,2017 805 | 803,-1 806 | 804,2019 807 | 805,-1 808 | 806,-1 809 | 807,-1 810 | 808,2019 811 | 809,2018 812 | 810,-1 813 | 811,-1 814 | 812,-1 815 | 813,-1 816 | 814,-1 817 | 815,2016 818 | 816,-1 819 | 817,-1 820 | 818,2018 821 | 819,-1 822 | 820,2018 823 | 821,2019 824 | 822,2016 825 | 823,-1 826 | 824,2019 827 | 825,2016 828 | 826,-1 829 | 827,2020 830 | 828,2020 831 | 829,2020 832 | 830,2017 833 | 831,2018 834 | 832,2019 835 | 833,2017 836 | 834,2016 837 | 835,2018 838 | 836,2016 839 | 837,2018 840 | 838,2018 841 | 839,2018 842 | 840,2019 843 | 841,2018 844 | 842,2020 845 | 843,2018 846 | 844,-1 847 | 845,2018 848 | 846,-1 849 | 847,2020 850 | 848,2016 851 | 849,2019 852 | 850,-1 853 | 851,-1 854 | 852,2017 855 | 853,-1 856 | 854,2018 857 | 855,2018 858 | 856,-1 859 | 857,2018 860 | 858,2019 861 | 859,-1 862 | 860,-1 863 | 861,2019 864 | 862,2016 865 | 863,2018 866 | 864,2016 867 | 865,2018 868 | 866,2018 869 | 867,2019 870 | 868,2018 871 | 869,2018 872 | 870,-1 873 | 871,2019 874 | 872,2019 875 | 873,2019 876 | 874,2018 877 | 875,2019 878 | 876,2018 879 | 877,2017 880 | 878,-1 881 | 879,-1 882 | 880,2017 883 | 881,2018 884 | 882,2018 885 | 883,2018 886 | 884,2018 887 | 885,-1 888 | 886,2018 889 | 887,2018 890 | 888,2018 891 | 889,2016 892 | 890,2016 893 | 891,2016 894 | 892,2016 895 | 893,2018 896 | 894,2020 897 | 895,2018 898 | 896,2016 899 | 897,2018 900 | 898,2017 901 | 899,2019 902 | 900,-1 903 | 901,-1 904 | 902,-1 905 | 903,2019 906 | 904,2017 907 | 905,2017 908 | 906,2017 909 | 907,2018 910 | 908,2018 911 | 909,-1 912 | 910,2018 913 | 911,-1 914 | 912,2018 915 | 913,2018 916 | 914,2018 917 | 915,2020 918 | 916,2018 919 | 917,2020 920 | 918,-1 921 | 919,2018 922 | 920,2017 923 | 921,2018 924 | 922,2018 925 | 923,2018 926 | 924,2018 927 | 925,-1 928 | 926,2019 929 | 927,2020 930 | 928,2017 931 | 929,2020 932 | 930,2018 933 | 931,2018 934 | 932,-1 935 | 933,2018 936 | 934,2018 -------------------------------------------------------------------------------- /data/solar_farms_valid_s2_area.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | "name": "solar_farms_valid_s2_area", 4 | "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, 5 | "features": [ 6 | { "type": "Feature", "properties": { "STATE": "KARNATAKA" }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 77.4854, 13.67835 ], [ 77.69686, 13.71845 ], [ 77.7939, 13.80191 ], [ 77.78725, 13.89467 ], [ 77.93835, 13.90892 ], [ 77.9098, 13.81217 ], [ 78.07321, 13.82549 ], [ 78.11968, 13.72745 ], [ 78.09028, 13.61032 ], [ 78.37868, 13.52312 ], [ 78.37658, 13.28375 ], [ 78.58078, 13.22639 ], [ 78.41984, 12.92138 ], [ 78.46095, 12.85948 ], [ 78.38496, 12.90263 ], [ 78.29823, 12.85761 ], [ 78.219, 12.70877 ], [ 78.22245, 12.75051 ], [ 78.06679, 12.81896 ], [ 77.9916, 12.76554 ], [ 77.85747, 12.8479 ], [ 77.81591, 12.84106 ], [ 77.78648, 12.85193 ], [ 77.74832, 12.69954 ], [ 77.69601, 12.63974 ], [ 77.62888, 12.64374 ], [ 77.54948, 12.5614 ], [ 77.61445, 12.41005 ], [ 77.59241, 12.35516 ], [ 77.4167, 12.21439 ], [ 77.44981, 12.12581 ], [ 77.7156, 12.12564 ], [ 77.74873, 12.02035 ], [ 77.6243, 11.94325 ], [ 77.63615, 11.89243 ], [ 77.5186, 11.88201 ], [ 77.30777, 11.73069 ], [ 77.23781, 11.75859 ], [ 77.06274, 11.69724 ], [ 77.00446, 11.76223 ], [ 76.91782, 11.76852 ], [ 76.80663, 11.59483 ], [ 76.54915, 11.53373 ], [ 76.49185, 11.64394 ], [ 76.40288, 11.62404 ], [ 76.32688, 11.72188 ], [ 76.08363, 11.85114 ], [ 76.07081, 11.93149 ], [ 75.94453, 11.88849 ], [ 75.91536, 11.88796 ], [ 75.826, 11.93077 ], [ 75.72796, 12.07156 ], [ 75.53014, 12.15383 ], [ 75.39928, 12.31331 ], [ 75.342, 12.38347 ], [ 75.37367, 12.48517 ], [ 75.27187, 12.47652 ], [ 75.28344, 12.54815 ], [ 75.1095, 12.63058 ], [ 75.10938, 12.67508 ], [ 75.04484, 12.64674 ], [ 74.95995, 12.7533 ], [ 74.86989, 12.72432 ], [ 74.78017, 12.90303 ], [ 74.67084, 13.35126 ], [ 74.65189, 13.60045 ], [ 74.70454, 13.58502 ], [ 74.6949, 13.62656 ], [ 74.64922, 13.62916 ], [ 74.56416, 13.89349 ], [ 74.48289, 14.01123 ], [ 74.3643, 14.50414 ], [ 74.31364, 14.50654 ], [ 74.25027, 14.75107 ], [ 74.1854, 14.81252 ], [ 74.22776, 14.86847 ], [ 74.12498, 14.88338 ], [ 74.09455, 14.94202 ], [ 74.21928, 14.96301 ], [ 74.26444, 15.18908 ], [ 74.19546, 15.2551 ], [ 74.29156, 15.31245 ], [ 74.28469, 15.3513 ], [ 74.21246, 15.46093 ], [ 74.23017, 15.54504 ], [ 74.2397, 15.6466 ], [ 74.08273, 15.66101 ], [ 74.10362, 15.70154 ], [ 74.14934, 15.77991 ], [ 74.19901, 15.72453 ], [ 74.31606, 15.76384 ], [ 74.41804, 16.03322 ], [ 74.34676, 16.06951 ], [ 74.4602, 16.10864 ], [ 74.43819, 16.24101 ], [ 74.3065, 16.29807 ], [ 74.32561, 16.40454 ], [ 74.23795, 16.53427 ], [ 74.36694, 16.54504 ], [ 74.43472, 16.63971 ], [ 74.54324, 16.62475 ], [ 74.54552, 16.5774 ], [ 74.65196, 16.6023 ], [ 74.67778, 16.70737 ], [ 74.89983, 16.75963 ], [ 74.91982, 16.95993 ], [ 75.06267, 16.97057 ], [ 75.19109, 16.81813 ], [ 75.31338, 16.87741 ], [ 75.26845, 16.93781 ], [ 75.35597, 16.95729 ], [ 75.68953, 16.9583 ], [ 75.63593, 17.28816 ], [ 75.60253, 17.3535 ], [ 75.6685, 17.43934 ], [ 75.8985, 17.28373 ], [ 76.02128, 17.29518 ], [ 76.06319, 17.33121 ], [ 76.36506, 17.29803 ], [ 76.39256, 17.33099 ], [ 76.32924, 17.56419 ], [ 76.40118, 17.55508 ], [ 76.44302, 17.60219 ], [ 76.54678, 17.71896 ], [ 76.71461, 17.66559 ], [ 76.8127, 17.78494 ], [ 76.76221, 17.84558 ], [ 76.8929, 17.87519 ], [ 76.96012, 18.05909 ], [ 76.92842, 18.09771 ], [ 77.12167, 18.11959 ], [ 77.21542, 18.2368 ], [ 77.18356, 18.28939 ], [ 77.28355, 18.3862 ], [ 77.39352, 18.39388 ], [ 77.38126, 18.28414 ], [ 77.5693, 18.27302 ], [ 77.58398, 18.26573 ], [ 77.62504, 18.25125 ], [ 77.58766, 18.19981 ], [ 77.60861, 18.06901 ], [ 77.55212, 17.99415 ], [ 77.57987, 17.94703 ], [ 77.64464, 17.9783 ], [ 77.67426, 17.93676 ], [ 77.52856, 17.76628 ], [ 77.4571, 17.57415 ], [ 77.51316, 17.50874 ], [ 77.70739, 17.47898 ], [ 77.57446, 17.40527 ], [ 77.38721, 17.1915 ], [ 77.48714, 17.09124 ], [ 77.43921, 16.73761 ], [ 77.47093, 16.58925 ], [ 77.40177, 16.48631 ], [ 77.30378, 16.45737 ], [ 77.29919, 16.36532 ], [ 77.51286, 16.30623 ], [ 77.48987, 15.88969 ], [ 77.1942, 15.93109 ], [ 77.04855, 15.82806 ], [ 77.08576, 15.64174 ], [ 77.02182, 15.61494 ], [ 77.02396, 15.46537 ], [ 76.97196, 15.47959 ], [ 77.16013, 15.11399 ], [ 77.09988, 14.97114 ], [ 76.81864, 15.03257 ], [ 76.76833, 14.94549 ], [ 76.83736, 14.93435 ], [ 76.86822, 14.85488 ], [ 76.78108, 14.74039 ], [ 76.75598, 14.57379 ], [ 76.77603, 14.51185 ], [ 76.96027, 14.44279 ], [ 76.86697, 14.30322 ], [ 76.98559, 14.20912 ], [ 77.09714, 14.20401 ], [ 77.12897, 14.31489 ], [ 77.36057, 14.16712 ], [ 77.37633, 14.27963 ], [ 77.42607, 14.26438 ], [ 77.50436, 14.17141 ], [ 77.35766, 14.09468 ], [ 77.33765, 14.02019 ], [ 77.4295, 13.90144 ], [ 77.35186, 13.87276 ], [ 77.27343, 13.98049 ], [ 77.03996, 14.00278 ], [ 76.99987, 14.14339 ], [ 76.9058, 14.12825 ], [ 76.91802, 14.07456 ], [ 77.00537, 13.89189 ], [ 76.97667, 13.7059 ], [ 77.14159, 13.73763 ], [ 77.12853, 13.85906 ], [ 77.17406, 13.86796 ], [ 77.24133, 13.81583 ], [ 77.39642, 13.80734 ], [ 77.43401, 13.68616 ], [ 77.4854, 13.67835 ] ] ] ] } } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: tcm 2 | dependencies: 3 | - python 4 | - pip 5 | - rtree 6 | - shapely 7 | - fiona 8 | - rasterio>=1.1.0 9 | - pyproj 10 | - gdal>=3.0 11 | - numpy 12 | - scipy 13 | - matplotlib 14 | - scikit-learn 15 | - scikit-image 16 | - pandas 17 | - pip: 18 | - utm 19 | - pigeon-jupyter==0.1.0 20 | - planetary-computer>=0.3.0rc3 21 | - pygifsicle 22 | - imageio 23 | -------------------------------------------------------------------------------- /experiments/run_parameter_sweep.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) Microsoft Corporation. All rights reserved. 3 | Licensed under the MIT License. 4 | ''' 5 | import time 6 | import itertools 7 | import subprocess 8 | from multiprocessing import Process, Queue 9 | 10 | def do_work(work): 11 | while not work.empty(): 12 | experiment = work.get() 13 | print(experiment) 14 | subprocess.call(experiment.split(" ")) 15 | return True 16 | 17 | NUM_PROCESSES = 5 18 | work = Queue() 19 | 20 | ################################################ 21 | # Run the algorithm with the dataset footprints 22 | ################################################ 23 | datasets = ["poultry_barns", "solar_farms_reduced"] 24 | cluster_options = { 25 | "poultry_barns": [16, 32, 64], 26 | "solar_farms_reduced": [16, 32, 64], 27 | } 28 | buffer_options = { 29 | "poultry_barns": [400,200,100], 30 | "solar_farms_reduced": [0.024,0.016], 31 | } 32 | for dataset in datasets: 33 | for num_clusters, buffer, in itertools.product(cluster_options[dataset], buffer_options[dataset]): 34 | command = f"python run_algorithm.py --dataset {dataset} --num_clusters {num_clusters} --buffer {buffer} --output_dir results/kl/{dataset}-{num_clusters}-{buffer}/ --algorithm kl" 35 | work.put(command) 36 | 37 | ################################################ 38 | # Run the algorithm with the random polygons 39 | ################################################ 40 | datasets = ["poultry_barns_random", "solar_farms_reduced_random"] 41 | cluster_options = { 42 | "poultry_barns_random": [16, 32, 64], 43 | "solar_farms_reduced_random": [16, 32, 64], 44 | } 45 | buffer_options = { 46 | "poultry_barns_random": [400,200,100], 47 | "solar_farms_reduced_random": [0.024,0.016], 48 | } 49 | for dataset in datasets: 50 | for num_clusters, buffer, in itertools.product(cluster_options[dataset], buffer_options[dataset]): 51 | command = f"python run_algorithm.py --dataset {dataset} --num_clusters {num_clusters} --buffer {buffer} --output_dir results/kl/{dataset}-{num_clusters}-{buffer}/ --algorithm kl" 52 | work.put(command) 53 | 54 | 55 | ## Start experiments 56 | processes = [] 57 | start_time = time.time() 58 | for i in range(NUM_PROCESSES): 59 | p = Process(target=do_work, args=(work,)) 60 | processes.append(p) 61 | p.start() 62 | for p in processes: 63 | p.join() -------------------------------------------------------------------------------- /experiments/run_parameter_sweep_color.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) Microsoft Corporation. All rights reserved. 3 | Licensed under the MIT License. 4 | ''' 5 | import time 6 | import subprocess 7 | from multiprocessing import Process, Queue 8 | 9 | def do_work(work): 10 | while not work.empty(): 11 | experiment = work.get() 12 | print(experiment) 13 | subprocess.call(experiment.split(" ")) 14 | return True 15 | 16 | NUM_PROCESSES = 5 17 | work = Queue() 18 | 19 | ################################################ 20 | # Run the algorithm with the dataset footprints 21 | ################################################ 22 | datasets = ["poultry_barns", "solar_farms_reduced"] 23 | buffer_options = { 24 | "poultry_barns": [400,200,100], 25 | "solar_farms_reduced": [0.024,0.016], 26 | } 27 | for dataset in datasets: 28 | for buffer in buffer_options[dataset]: 29 | command = f"python run_algorithm.py --dataset {dataset} --buffer {buffer} --output_dir results/color/{dataset}-0-{buffer}/ --algorithm color" 30 | work.put(command) 31 | 32 | ################################################ 33 | # Run the algorithm with the random polygons 34 | ################################################ 35 | datasets = ["poultry_barns_random", "solar_farms_reduced_random"] 36 | buffer_options = { 37 | "poultry_barns_random": [400,200,100], 38 | "solar_farms_reduced_random": [0.024,0.016], 39 | } 40 | for dataset in datasets: 41 | for buffer in buffer_options[dataset]: 42 | command = f"python run_algorithm.py --dataset {dataset} --buffer {buffer} --output_dir results/color/{dataset}-0-{buffer}/ --algorithm color" 43 | work.put(command) 44 | 45 | 46 | ## Start experiments 47 | processes = [] 48 | start_time = time.time() 49 | for i in range(NUM_PROCESSES): 50 | p = Process(target=do_work, args=(work,)) 51 | processes.append(p) 52 | p.start() 53 | for p in processes: 54 | p.join() -------------------------------------------------------------------------------- /images/data_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/temporal-cluster-matching/490b12fbb134e755a11a0f8612036100939ab390/images/data_examples.png -------------------------------------------------------------------------------- /images/sentinel2_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/temporal-cluster-matching/490b12fbb134e755a11a0f8612036100939ab390/images/sentinel2_demo.gif -------------------------------------------------------------------------------- /notebooks/Data - Create random polygons.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "handed-logistics", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Copyright (c) Microsoft Corporation. All rights reserved\n", 11 | "# Licensed under the MIT License.\n", 12 | "%matplotlib inline\n", 13 | "%load_ext autoreload\n", 14 | "%autoreload 2\n", 15 | "import sys\n", 16 | "sys.path.append(\"..\")\n", 17 | "import os\n", 18 | "import time\n", 19 | "\n", 20 | "import numpy as np\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "\n", 23 | "import rasterio\n", 24 | "from rasterio.windows import Window\n", 25 | "import shapely\n", 26 | "import shapely.geometry\n", 27 | "import fiona\n", 28 | "\n", 29 | "from temporal_cluster_matching import utils, DataInterface, algorithms" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "id": "composed-metropolitan", 35 | "metadata": {}, 36 | "source": [ 37 | "## NAIP" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "id": "annual-prefix", 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "geoms = utils.get_poultry_barn_geoms(\"../data/\")\n", 48 | "\n", 49 | "areas = []\n", 50 | "for geom in geoms:\n", 51 | " shape = shapely.geometry.shape(geom)\n", 52 | " areas.append(shape.area)\n", 53 | "\n", 54 | "lengths = np.sqrt(areas)\n", 55 | "mean_length = np.mean(lengths)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "id": "intermediate-merchant", 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "22.92778452118776" 68 | ] 69 | }, 70 | "execution_count": 4, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "lengths.max()\n", 77 | "lengths.min()" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "id": "miniature-european", 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "name": "stdout", 88 | "output_type": "stream", 89 | "text": [ 90 | "{'properties': OrderedDict([('MINX', 'float'), ('MINY', 'float'), ('MAXX', 'float'), ('MAXY', 'float'), ('CNTX', 'float'), ('CNTY', 'float'), ('AREA', 'float'), ('PERIM', 'float'), ('HEIGHT', 'float'), ('WIDTH', 'float')]), 'geometry': 'Polygon'}\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "with fiona.open(\"../data/Delmarva_PL_House_Final2_extent_epsg26918.geojson\") as f:\n", 96 | " extent_row = next(iter(f))\n", 97 | " print(f.schema)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 6, 103 | "id": "utility-teddy", 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "minx, miny, maxx, maxy = shapely.geometry.shape(extent_row[\"geometry\"]).bounds\n", 108 | "width = maxx-minx\n", 109 | "height = maxy-miny" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 7, 115 | "id": "considerable-camcorder", 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "with fiona.open(\"../data/delmarva_valid_naip_area.geojson\") as f:\n", 120 | " valid_polygon = shapely.geometry.shape(next(iter(f))[\"geometry\"])" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 8, 126 | "id": "informed-crash", 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "image/svg+xml": [ 132 | "" 133 | ], 134 | "text/plain": [ 135 | "" 136 | ] 137 | }, 138 | "execution_count": 8, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "valid_polygon" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 8, 150 | "id": "chubby-techno", 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "buffer = 0.05\n", 155 | "geoms = []\n", 156 | "while len(geoms) < 6013:\n", 157 | " \n", 158 | " length = np.random.choice(lengths)\n", 159 | " \n", 160 | " p_minx = (np.random.random() * width * (1 - buffer*2)) - length + minx + (width * buffer)\n", 161 | " p_miny = (np.random.random() * height * (1 - buffer*2)) - length + miny + (height * buffer)\n", 162 | " \n", 163 | " p_maxx = p_minx + length\n", 164 | " p_maxy = p_miny + length\n", 165 | " \n", 166 | " \n", 167 | " shape = shapely.geometry.box(p_minx, p_miny, p_maxx, p_maxy)\n", 168 | " \n", 169 | " if valid_polygon.contains(shape): \n", 170 | " geom = shapely.geometry.mapping(\n", 171 | " shape\n", 172 | " )\n", 173 | " geoms.append(geom)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 9, 179 | "id": "recorded-clearing", 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "schema = {\n", 184 | " \"properties\": {\"id\": \"int\"},\n", 185 | " \"geometry\": \"Polygon\"\n", 186 | "}\n", 187 | "with fiona.open(\"../data/poultry_barn_6013_random_polygons_epsg26918.geojson\", \"w\", driver=\"GeoJSON\", crs=\"epsg:26918\", schema=schema) as f:\n", 188 | " for i, geom in enumerate(geoms):\n", 189 | "\n", 190 | " f.write({\n", 191 | " \"type\":\"Feature\",\n", 192 | " \"properties\": {\"id\": i},\n", 193 | " \"geometry\": geom\n", 194 | " })" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "id": "presidential-minneapolis", 200 | "metadata": {}, 201 | "source": [ 202 | "## S2" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 9, 208 | "id": "cardiac-corps", 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "geoms = utils.get_solar_farm_geoms(\"../data/\")\n", 213 | "\n", 214 | "areas = []\n", 215 | "for geom in geoms:\n", 216 | " shape = shapely.geometry.shape(geom)\n", 217 | " areas.append(shape.area)\n", 218 | "\n", 219 | "lengths = np.sqrt(areas)\n", 220 | "mean_length = np.mean(lengths)" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 10, 226 | "id": "sealed-packing", 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/plain": [ 232 | "935" 233 | ] 234 | }, 235 | "execution_count": 10, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "len(geoms)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 11, 247 | "id": "assigned-compilation", 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "with rasterio.open(DataInterface.S2DataLoader.urls[0]) as f:\n", 252 | " minx, miny, maxx, maxy = f.bounds\n", 253 | " \n", 254 | "width = maxx-minx\n", 255 | "height = maxy-miny" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 12, 261 | "id": "different-chassis", 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "with fiona.open(\"../data/solar_farms_valid_s2_area.geojson\") as f:\n", 266 | " valid_polygon = shapely.geometry.shape(next(iter(f))[\"geometry\"])" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 8, 272 | "id": "bright-destruction", 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "geoms = []\n", 277 | "buffer = 0.05\n", 278 | "while len(geoms) < 935:\n", 279 | " \n", 280 | " length = np.random.choice(lengths)\n", 281 | " #length = mean_length\n", 282 | " \n", 283 | " p_minx = (np.random.random() * width * (1 - buffer*2)) - length + minx + (width * buffer)\n", 284 | " p_miny = (np.random.random() * height * (1 - buffer*2)) - length + miny + (height * buffer)\n", 285 | " \n", 286 | " p_maxx = p_minx + length\n", 287 | " p_maxy = p_miny + length\n", 288 | " \n", 289 | " \n", 290 | " shape = shapely.geometry.box(p_minx, p_miny, p_maxx, p_maxy)\n", 291 | " \n", 292 | " if valid_polygon.contains(shape): \n", 293 | " geom = shapely.geometry.mapping(\n", 294 | " shape\n", 295 | " )\n", 296 | " geoms.append(geom)" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 9, 302 | "id": "minute-distance", 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "schema = {\n", 307 | " \"properties\": {\"id\": \"int\"},\n", 308 | " \"geometry\": \"Polygon\"\n", 309 | "}\n", 310 | "with fiona.open(\"../data/solar_farms_935_random_polygons_epsg4326.geojson\", \"w\", driver=\"GeoJSON\", crs=\"epsg:4326\", schema=schema) as f:\n", 311 | " for i, geom in enumerate(geoms):\n", 312 | "\n", 313 | " f.write({\n", 314 | " \"type\":\"Feature\",\n", 315 | " \"properties\": {\"id\": i},\n", 316 | " \"geometry\": geom\n", 317 | " })" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "id": "stylish-norfolk", 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [] 327 | } 328 | ], 329 | "metadata": { 330 | "kernelspec": { 331 | "display_name": "backdating", 332 | "language": "python", 333 | "name": "conda-env-backdating-py" 334 | }, 335 | "language_info": { 336 | "codemirror_mode": { 337 | "name": "ipython", 338 | "version": 3 339 | }, 340 | "file_extension": ".py", 341 | "mimetype": "text/x-python", 342 | "name": "python", 343 | "nbconvert_exporter": "python", 344 | "pygments_lexer": "ipython3", 345 | "version": "3.7.7" 346 | } 347 | }, 348 | "nbformat": 4, 349 | "nbformat_minor": 5 350 | } 351 | -------------------------------------------------------------------------------- /notebooks/Experiments - Color over time.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "right-rates", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Copyright (c) Microsoft Corporation. All rights reserved\n", 11 | "# Licensed under the MIT License.\n", 12 | "%matplotlib inline\n", 13 | "%load_ext autoreload\n", 14 | "%autoreload 2\n", 15 | "import sys\n", 16 | "sys.path.append(\"..\")\n", 17 | "\n", 18 | "import numpy as np\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "\n", 21 | "from sklearn.metrics import accuracy_score, mean_absolute_error\n", 22 | "from sklearn.model_selection import train_test_split\n", 23 | "from sklearn.linear_model import LogisticRegression, LinearRegression\n", 24 | "from sklearn.preprocessing import StandardScaler\n", 25 | "\n", 26 | "from temporal_cluster_matching import utils, DataInterface, algorithms" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "id": "stupid-failing", 32 | "metadata": {}, 33 | "source": [ 34 | "## NAIP / Poultry barn data" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "id": "negative-circuit", 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/plain": [ 46 | "6013" 47 | ] 48 | }, 49 | "execution_count": 2, 50 | "metadata": {}, 51 | "output_type": "execute_result" 52 | } 53 | ], 54 | "source": [ 55 | "geoms = utils.get_poultry_barn_geoms(\"../data/\")\n", 56 | "len(geoms)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 3, 62 | "id": "organizational-truth", 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "dataloader = DataInterface.NAIPDataLoader()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "id": "divided-hawaiian", 73 | "metadata": { 74 | "scrolled": true 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "all_distances = []\n", 79 | "for i, geom in enumerate(geoms):\n", 80 | " if i % 100 == 0:\n", 81 | " print(i, len(geoms))\n", 82 | " data_images, masks, years = dataloader.get_data_stack_from_geom(geom, buffer=0)\n", 83 | " \n", 84 | " previous_year_footprint_color = None\n", 85 | " distances = [None]\n", 86 | " for image, mask in zip(data_images, masks):\n", 87 | " \n", 88 | " average_footprint_color = image[mask==1].mean(axis=0)\n", 89 | " if previous_year_footprint_color is not None:\n", 90 | " distances.append(np.linalg.norm(\n", 91 | " previous_year_footprint_color - average_footprint_color\n", 92 | " ))\n", 93 | " previous_year_footprint_color = average_footprint_color\n", 94 | " \n", 95 | " all_distances.append(distances)\n", 96 | " \n", 97 | "## One of the geoms only intersects with 4 years\n", 98 | "for i, distances in enumerate(all_distances):\n", 99 | " if len(distances) == 3:\n", 100 | " all_distances[i] = [0] + distances\n", 101 | " \n", 102 | "all_distances = np.array(all_distances)\n", 103 | "np.save(\"../results/poultry_barn_inter_year_color_distances.npy\", all_distances)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 5, 109 | "id": "accepting-referral", 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "all_distances = np.load(\"../results/poultry_barn_inter_year_color_distances.npy\")" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "id": "awful-baker", 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "labeled_idxs, labeled_years = utils.get_poultry_barn_labels(\"../data/\")" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 7, 129 | "id": "southwest-somerset", 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "x_all = []\n", 134 | "y_all = []\n", 135 | "\n", 136 | "for idx, year in zip(labeled_idxs, labeled_years):\n", 137 | " x_all.append(all_distances[idx])\n", 138 | " y_all.append(year)\n", 139 | " \n", 140 | "x_all = np.array(x_all)\n", 141 | "y_all = np.array(y_all)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 8, 147 | "id": "architectural-universe", 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "name": "stdout", 152 | "output_type": "stream", 153 | "text": [ 154 | "0.9075 0.017557049866079442\n", 155 | "0.39289999999999997 0.08057971208685223\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "accs = []\n", 161 | "maes = []\n", 162 | "for repeat in range(50):\n", 163 | " \n", 164 | " x_train, x_test, y_train, y_test = train_test_split(\n", 165 | " x_all, y_all, test_size=0.2\n", 166 | " )\n", 167 | " \n", 168 | " scaler = StandardScaler()\n", 169 | " x_train = scaler.fit_transform(x_train)\n", 170 | " x_test = scaler.transform(x_test)\n", 171 | " \n", 172 | " model = LogisticRegression()\n", 173 | " model.fit(x_train, y_train)\n", 174 | " y_pred = model.predict(x_test)\n", 175 | " y_pred = np.round(y_pred).astype(int)\n", 176 | " \n", 177 | " acc = accuracy_score(y_test, y_pred)\n", 178 | " mae = mean_absolute_error(y_test, y_pred)\n", 179 | " \n", 180 | " accs.append(acc)\n", 181 | " maes.append(mae)\n", 182 | " \n", 183 | "print(np.mean(accs), np.std(accs))\n", 184 | "print(np.mean(maes), np.std(maes))" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "id": "provincial-classroom", 190 | "metadata": {}, 191 | "source": [ 192 | "## S2 / Solar farm data" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 9, 198 | "id": "terminal-punishment", 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "935" 205 | ] 206 | }, 207 | "execution_count": 9, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "geoms = utils.get_solar_farm_geoms(\"../data/\")\n", 214 | "len(geoms)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 10, 220 | "id": "traditional-hepatitis", 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "dataloader = DataInterface.S2DataLoader()" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "id": "opponent-theorem", 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "all_distances = []\n", 235 | "for i, geom in enumerate(geoms):\n", 236 | " if i % 10 == 0:\n", 237 | " print(i, len(geoms))\n", 238 | " \n", 239 | " data_images, masks, years = dataloader.get_data_stack_from_geom(geom, buffer=0.004)\n", 240 | " \n", 241 | " previous_year_footprint_color = None\n", 242 | " distances = []\n", 243 | " for image, mask in zip(data_images, masks):\n", 244 | " image = image[:,:,:12]\n", 245 | " if image.shape[0] == mask.shape[0] and image.shape[1] == mask.shape[1]: \n", 246 | " average_footprint_color = image[mask==1].mean(axis=0)\n", 247 | " if previous_year_footprint_color is not None:\n", 248 | " distances.append(np.linalg.norm(\n", 249 | " previous_year_footprint_color - average_footprint_color\n", 250 | " ))\n", 251 | " previous_year_footprint_color = average_footprint_color\n", 252 | " else:\n", 253 | " distances = [0,0,0,0]\n", 254 | " break\n", 255 | " \n", 256 | " all_distances.append(distances)\n", 257 | " \n", 258 | "all_distances = np.array(all_distances)\n", 259 | "np.save(\"../results/solar_farm_inter_year_color_distances.npy\", all_distances)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 11, 265 | "id": "greenhouse-divorce", 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "all_distances = np.load(\"../results/solar_farm_inter_year_color_distances.npy\")" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 13, 275 | "id": "medium-trouble", 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "labeled_idxs, labeled_years = utils.get_solar_farm_labels(\"../data/\")" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 18, 285 | "id": "infrared-carolina", 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [ 289 | "x_all = []\n", 290 | "y_all = []\n", 291 | "\n", 292 | "for idx, year in zip(labeled_idxs, labeled_years):\n", 293 | " if year != -1:\n", 294 | " x_all.append(all_distances[idx])\n", 295 | " y_all.append(year)\n", 296 | " \n", 297 | "x_all = np.array(x_all)\n", 298 | "y_all = np.array(y_all)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 19, 304 | "id": "gross-astrology", 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "mask = y_all != -1\n", 309 | "x_all = x_all[mask]\n", 310 | "y_all = y_all[mask]\n", 311 | "\n", 312 | "x_all[np.isnan(x_all)] = 0" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 22, 318 | "id": "yellow-headline", 319 | "metadata": {}, 320 | "outputs": [ 321 | { 322 | "name": "stdout", 323 | "output_type": "stream", 324 | "text": [ 325 | "0.7907565789473685 0.01304345795255656\n", 326 | "0.28638157894736843 0.020847686652987744\n" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "accs = []\n", 332 | "maes = []\n", 333 | "for repeat in range(50):\n", 334 | " \n", 335 | " x_train, x_test, y_train, y_test = train_test_split(\n", 336 | " x_all, y_all, test_size=0.8\n", 337 | " )\n", 338 | " \n", 339 | " scaler = StandardScaler()\n", 340 | " x_train = scaler.fit_transform(x_train)\n", 341 | " x_test = scaler.transform(x_test)\n", 342 | " \n", 343 | " model = LogisticRegression()\n", 344 | " model.fit(x_train, y_train)\n", 345 | " y_pred = model.predict(x_test)\n", 346 | " y_pred = np.round(y_pred).astype(int)\n", 347 | " \n", 348 | " mask = y_test != -1\n", 349 | " acc = accuracy_score(y_test[mask], y_pred[mask])\n", 350 | " mae = mean_absolute_error(y_test[mask], y_pred[mask])\n", 351 | " \n", 352 | " accs.append(acc)\n", 353 | " maes.append(mae)\n", 354 | " \n", 355 | "print(np.mean(accs), np.std(accs))\n", 356 | "print(np.mean(maes), np.std(maes))" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": null, 362 | "id": "alone-modeling", 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [] 366 | } 367 | ], 368 | "metadata": { 369 | "kernelspec": { 370 | "display_name": "backdating", 371 | "language": "python", 372 | "name": "conda-env-backdating-py" 373 | }, 374 | "language_info": { 375 | "codemirror_mode": { 376 | "name": "ipython", 377 | "version": 3 378 | }, 379 | "file_extension": ".py", 380 | "mimetype": "text/x-python", 381 | "name": "python", 382 | "nbconvert_exporter": "python", 383 | "pygments_lexer": "ipython3", 384 | "version": "3.7.7" 385 | } 386 | }, 387 | "nbformat": 4, 388 | "nbformat_minor": 5 389 | } 390 | -------------------------------------------------------------------------------- /notebooks/Experiments - Heuristic method.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "serious-battle", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Copyright (c) Microsoft Corporation. All rights reserved\n", 11 | "# Licensed under the MIT License.\n", 12 | "%matplotlib inline\n", 13 | "%load_ext autoreload\n", 14 | "%autoreload 2\n", 15 | "import sys\n", 16 | "sys.path.append(\"..\")\n", 17 | "import os\n", 18 | "import time\n", 19 | "import itertools\n", 20 | "\n", 21 | "import numpy as np\n", 22 | "import pandas as pd\n", 23 | "import matplotlib as mpl\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "import scipy.optimize\n", 26 | "\n", 27 | "import shapely.geometry\n", 28 | "import fiona.transform\n", 29 | "\n", 30 | "from sklearn.metrics import accuracy_score, mean_absolute_error\n", 31 | "from sklearn.model_selection import train_test_split\n", 32 | "from sklearn.preprocessing import StandardScaler\n", 33 | "from sklearn.linear_model import LogisticRegression\n", 34 | "\n", 35 | "from temporal_cluster_matching import utils, DataInterface, algorithms" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "id": "statutory-candidate", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "def show_two_results(hist1, hist2, bins, label1, label2, title, color1=\"#1f77b4\", xlabel=\"KL divergence\"):\n", 46 | " \n", 47 | " bin_width = bins[1] - bins[0]\n", 48 | " \n", 49 | " \n", 50 | " plt.figure()\n", 51 | " plt.bar(bins[:-1], hist1, width=bin_width, alpha=0.5, label=label1, color=color1)\n", 52 | " plt.bar(bins[:-1], hist2, width=bin_width, alpha=0.5, label=label2, color=\"#ff7f0e\")\n", 53 | " plt.legend(loc=\"upper right\", fontsize=15)\n", 54 | " plt.xlabel(xlabel, fontsize=15)\n", 55 | " plt.ylabel(\"Relative frequency\", fontsize=15)\n", 56 | " plt.title(title, fontsize=15)\n", 57 | " plt.xticks(fontsize=13)\n", 58 | " plt.yticks(fontsize=13)\n", 59 | " plt.ylim([0,0.084])\n", 60 | " plt.show()\n", 61 | " plt.close()" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 3, 67 | "id": "olympic-advertising", 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQ4AAACWCAYAAAAv82Y/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAW9klEQVR4nO2deZgeVZXGfy8w7LIZFZEBIpsIg+ziggmKKKiQCIOIg5CEyBIzwqABZQvbsIoRDQl72EQEIYLoQBACsgRQQZAlaEICSFhDwipoOPPHuZWuVFd3f/frTm85v+fJU+lbt6pufd9Xb90699Z5ZWYEQRDksFRPNyAIgr5HCEcQBNmEcARBkE0IRxAE2YRwBEGQTQhHEATZhHAEQZBNCEcQBNmEcARBkE0IRxAE2YRwBEGQTQhHEATZhHAEQZBNCEcQBNmEcARBkE0IRxAE2YRwBEGQTQhHEATZhHAEQZBNCEcQBNmEcARBkE0IRxAE2YRwBEGQTQhHEATZhHAEQZBNCEcQBNmEcARBkE0IRxAE2YRwBEGQTQhHEATZhHAEQZBNCEcQBNmEcARBkE0IRxAE2YRwBEGQTQhHEATZhHAEQZBNCEcQBNmEcPRjJO0oaZqk1ySZpP17uk2dRdLg/nIufZllmt1wvSNvfA74QBe2pbM8P+vUL63Z7MaSNgL+C9gZWB9YHpgBXA2MM7M3arbZGDgNGAQsC/wJOM7Mbq2p+31gK2BrYCAw28zWa6Mt26W2bA18DFgJGGZmkzLOZ3XgWuAZ4HDgTeDuRrfPRdIWwBBgkpnNWlzH6QokHQrMy/k8g0VpWjjoXaIBnW/PcGAUcD1wBfBPYEfgJGAvSdub2VtFZUnr4xfiv4DTgfnASOAmSbuY2S2V/f8vMBcXl9U6aMuuqS2PA38GPtnE+WybjjPCzK5tYvtctgCOA6YCsxbjce4AVsC/n2Y5FG/jpM43Z8mkM8LR37gGOMXM5pfKJkr6K3AUMAL4aWndKfiFubWZPQgg6VLgEWC8pI+YmZXqr29mM1O9vwArt9OWCcAZZvaGpD1pTjiK3tfcJrbtdUh6j5m9ZmbvAv/o6fYs6USMI2Fmf6iIRsFVablZUSBpJWA3YGohGmkfrwMXABvhd/zy/mdmtOX5ukejRpE0C7gk/XlbiglYaf0ASeMlPS3pnbQcL+m9NfvqsK6kscDF1eNJmpTW75/+3knSWEmzJb0t6SFJe9e1X9JUSVtKuknSfOChtK5VjKNcJmmYpEfS/mdLGlPZtwHrAoNK7TRJ66X1n5T0W0nPSfqHpL9L+o2k7XO/h/5M9Dg6Zu20fL5UtjmwHHBPTf1pabktcN9ibFd7HArsAnwLf0R6rFghaVX8EWsD4CL80WlL4GDgs5K2M7PXMuteC3yw5ngzKu06DY/XTAAMGAZcKWn5mnjDOsCteIzpl7TfQys4CH9kvRCYh8eJTpP0jJn9LNXZF/gR8BJwcmnbF1PMagrwHPBj/DtfE/gUHmuaRgCEcLSLpKWBY/E4xs9Kq9ZKy7/XbFaUfWgxNq1dzGyypNXwC3mKmU0trR4DbAiMMrNzikJJD+KPYmOAY3LqmtlDku5p43hlBgCbFz07SRPxnsRZkq4qx5DwAPJIM7sg49TXAT5qZvPS/i8CZgOjSd+fmV0u6STgeTO7vLyxpC8AKwJfN7OeEv0+QTyqtM84YHvgWDObXipfMS3frtnmH5U6vY2hwIvAeZXyc/G78NAm6zbChPLjYPr/RGB1YHCl7lxaHn8a5eJCNNL+38R7CRs2uH3Rtt0lLZ957CWKEI42kHQi8G3gPDM7pbL6zbRcrmbT5St1ehsDgelm9q9yYfp7OvDhJus2wmM1ZY+mZXVfM8xsQeb+6+JILwOtYjdt8HPgFuAHwFxJt0o6QtK6me3o94Rw1JCCfUfjd7yDaqo8m5Z1jyNFWd1jzJKO1ZSpjbrNCG+u0CyCmb1tZp8HPo6Pmi0ATgAel5Tbu+rXhHBUkHQcPh/hUuCAypBqwcP4Y8onatYV0fc/LJ4WdpqZwMaSFolvpb83YtG7dk7dus+pykdryjYpHau7aLetZnafmZ2YRGQD4A18Pk+QCOEoIelYYCxwGT5T8926emnY9QZgsKSPlbZfGTgA+Cs9N6LSEZOB9+HtLDMylV/XZN3X03KNdo59cBqpARaO2hyEj4Dc3kjju4jXqWmnpAE1dZ/B4zztndcSR4yqJCSNAo4HnsKfc/eRFulFP29mU0p/fx/4HHCzpB8Br+IX1IeAL1V7KpL2xecPgF90y0o6Ov0928wuK9VdFx82BNg0Lb8iqRgavszMZjd5qqcD/4lPUtsKeAAfYh2Bxy1Ob7Lu/cC7wFFpuvsbwJNmdm+pzkvAvWm0Q/hw7Dp4z647Y0LTgBEpjvVYavcNwNGSdgZ+DTyZ2vgV4CMseq5LPCEcLRQTttahZfJUmdvxMX4AzOxvkj4FnAocScu7Kl+smW4OfrENqpSdWNr3ZaXygaV1BV9N/wDuxIcZszGz+andx+OT2Ibh8xUm4u/ZvNZk3ackDQeOwOdp/Bv+OZaF4whgBzzo/AG8Z/aN0hyL7uIovAcxCp/9K/wzn4zPR9krte+t1MaR+NyQIKH6R/iO6W8vuQWLjzTL82Jgx3bmeAR9iKZ7HHGRBsGSSwRHgyDIJoQjCIJsmo5xBEGw5BI9jiAIsgnhCIIgmxCOIAiyCeEIgiCbEI4gCLIJ4QiCIJsQjiAIsgnhCIIgmxCOoM8gaQtJv5P0SrI0GNvTbeosktbri+fS/Gv1Y1ftdW/HMnZ+v7CALG3zJeCwtM1yeFKZm83s223UH4/bMK6NZ2L/JvBFPFvXKrh72W+AU83s5Zrt18LTBOyC2xE8ApxmZlfX1D0Q+Exq24bAUmZWmwZQ7vPSXt7OC8xsZDvri6xjv8Rf1z8GT/7zUHvbdIbks7I/MLnsndMbSW8fr2Zm47rtmE1POR+7au+bqz52flv5KztE0qm0WEBOo8UCci/8B1pnAXkfbp0wjhYLyM2AVhaQyQiosIDcGni1PeFIKQzHAjfhF/ubeK6Qzc1sSE19AU8D15vZIZIOwr1BbsTzd7wGbIdfDHOA7czsudL2a+DpDt8PnIWL1D64KA43s0UyjicxeC+e3GcgsHY7wjGEel+UUXiqxd3N7Pq2Pou0j43w5EGHm9lZ7dXtCiQNBm4j07O3ieMIvyn8q5oUOmMfU4H1OroRdSWRyKeFXmMBKWknXDSONbNqQp+22BbPPjY5/f17YN2yOADnS7oXOB/4bvpXcCQuALuZ2Q2pHRfiplNnSro6pUwsGAw8ZWbvSvo1LcZVrTCzydUySSvgn+ccXBg7or9aWhp90NIyYhyJ3mQBiafnfwEXJyStLKmj72oo3uu5LR3vkYpotHk+iX1wS4IbSm1eAPwEz5a1a7mymc1qKydrg+wJrApc0tGdNt1Ri5ykF9fYNq4k6RRJM5L143OSLq2zNWikbur631ZzvKlpfdlycrSkJ+R2kU9IGl3Xfrmt5YclXSNpLp5qsjbGUS6T9GVJ96f9z5F0Rjl5dOr5DQLW1aKWloPT+k0lXS23sizO97b0GNw00ePomG61gEyi9Bn8LjxCnkB5LeAtSdcD3zGz52s2HQrcaGYdubi3Oh9JH8R7K1fU1C+fzy8aPpGOGYFnG28kJd/JwF24oJ6H96bAbRuXwR/nPoX3Gn+Ix1wOBnaWtI2ZPQML4ySN1L0Dt7KsHq/6uY/Ge0Ln4o+CXwfOlrSGmR1fqbsyLn534T3Y9zdw3rsCh+CpGi8Cdsd7ia+k9oHbfZ6Cu+QdVtr2Mbm/bxFvm4inmxwAbINbQNzYQBtqCeFoB/WMBeQGwNL4s//OeLDyz3iuzu8Am6cf+MLkvpI2ATbGvWA6ovhBl/OqdqulpaQNcHG83cz+1lF9M5si6Z/4hXxP2bpR0khcCM4wszGl8lvwpMOn0JL4eVgjdc1spqQpdcersBGwSUmYxuPxpKMlXViUJ94LnGxmjXxHBZsCm5rZrLT/ibg1x2iScCS7z0OBFWosLXfDBeprZtaVoh+PKh0wju63gHxPWr4P+LaZjTWz68zsf/AExpsA+1W2GZLa8n/t7VjS4XjW8vMqIz/dbWk5Ak8Q3BUJgIfiWcoXcdszsxuBB3E7x6WaqNsIV5TFwczewQ2tl8Gzo1c5M2Pf4CM6s0r7N/wRak25FUdHFI/eu0haJfPY7RLC0QbqOQvIYuTmXRbNfA4tvYTBlfKhuNnz67SBpAOAM/DuaXU4t9ssLVMvbj98OPWaLtjlQOBZM3ulZt0juBAPaKJuI+RYWr5oJV/bBmnL0hIasLU0s9txY7H9gZck3SXpeEl1xlhZhHDUoJ61gCzuYK+YWbUHMCctVy8K5F4r29AymtIKuW3BecDNwB41cZDutLTcFbcguMLMumI0IWcIvunh+jboSUvLhs7FzPYD/gP/Pb8MHA48JKl2LlCjhHBUUA9bQKbA51PAGpKqjwdFYPOFUtkQ/AdcOw9C0jB8+PUWYEiNGGFmc3Bh2L66jq63tCxc4S7oov3NANaStFrNuo/ioxcvNVG3P1la/sXMTjez3fDf0Azg1DSHpClCOEqo91hAXobfUQ6slB+cluV5D0OBO83sxepO0rDiBfhz8e4d3OGvBNaXtPDZPD1WjMYfKxqZa9EuktbEexx/6sLZmJPx3/GRlWPtgrvOXV/6HnPqNmJp+Q21uOshaVl8ZGMBHmztLl4HVq8KgaQ1qjGb9Lj0JB6zWp4miVGVhHqRBSRuN7gHPvFqI3xU5dPAN/DhtavSPlfHRyfGUCFF1C9M7boK2KNyPq9XJmadigdOfybpLLwH8nV8GPaAsmtb2v9XgEI0N0hlxfnMM7PyZLmC/fDfXFf1NgAmpf0eIZ/XcUdqzyH48OkPmqz7KD7EeoikN3HxfKESVH4Ct7ScmOrug39eJ5rZ0114jh0xDfgy8FNJd+PCdWtqz2GSrgP+hs+GHgR8AfhFeSZ0LiEcLfQaC0gze1XSDmn97mnbZ/AhuBPTxCzwyP0y1Mc3tsLvrqvh8Y0qs8vbmdnLpfMZhc87eBTY28yuqtl+D1qP7hTnM5tFZ9kWDMeDv11m+Whm/5T0BfwZ/mu4TeY8/B2jo8sXcGbdtyTtjbvUj8MDx7fTMi8CfHLcKnivbB38pnOomf24q86vQcbhwdg98ZjcUvjrElPxntSX8bjSAry38V3qv5+G6cy7Kv3qJbe+SLqTDDSzLXq6LUsS6qb3WHoznXg7dsm6SHsp9+CBzyDoVuJRpQ9jZqf3dBuCJZMYVQmCIJuwgAyCIJvocQRBkE0IRxAE2YRwBEGQTQhHEATZhHAEQZBNCEcQBNmEcARBkE0IRxAE2YRw9CHKafl7ui11SFpR0tmSnpK0IKXu7/MU9gY93Y7eRNPvquh49bq3Y+0464wF5GBavDQK3sBzLlwG/KRZp60liCPwV8zPxN3vXmu/eudI2b3n9fY3VCVtgWdqm1ROPtyX6cxLbr1JNKDr2nMlnu1KuGfGN3FLxE2Ab3XRMfornwceNrPvddPxDsX9cCct5uPsTOfylW6Bp6Ocire3zxNvx7bmTxXfjnOAx4EDJB1Vl6IvWMiaeDKbPk9Km7icmb2ZbA+CEhHj6ABzl/pp+B1n/aJc0lKSjpJ0R7LVeyc9209IDlqU6jZs6VfaZndJD6R6T0s6AXdqb4WkAZLGp3rvpOX4mnbsn9rxOUnHSpot6S1J90raPtUZJOlOSW+k9h3T0WdU7Be3HxikFhvCsaU6Q1J6/tfTv7sk7d7G/jqsm463buV4ZVvIWSk2sZWkW9N+5kq6RNL7K/sqPpedJB0jaQbuJ7NXWt8qxqEWW8e1JF0p6ZX0md2U0j0W9cbi2fIBbiu1c1Jav3z6XUyX9KakeZIelnRGR597TxI9jsYoBKNseLws8D3gl8Cv8HjItniav09L2rrmTtWIpR+Shqb9zgJOwJ3khuEp4BZB0qrA3XjuzIvw9IVb4omNPytpu2q+UDw94NK4m/2yeMr8myTth+cpPQ+3g9wLOEHSk+24mYHn7twXNyN6CbdsBI9zIOkQYDzeczsJz8q9PzBZ0oFmtjC1YUbduuMBlHuEawO/wz/La/B0isOBbSRtW3bDS5yJi/P5eK7W6bTPSuncp+H5Sgfibnu/krRZSvF4LZ6271v4d1x4scxIy/GpTZem81kat6X8bAfH7lFCOFqzoqQBtMQ4DsIvxPvN7IlSvbeBD1YSvk6UJ4u9AA+GVW33OrT0S13kH+MitZ2ZvZTKzyVdiBXG4D+0UWZ2TlEo6UE8r+QYoNprWBrYvhA2SY/i4ncN8Akzuz+VX4jnDx0FtCkcyVB7pqST8KTO5Ue91fHkyzOAj5tZYbY8AXgA+KGkX5jZvJy6ZnZ53fEqrA8cZmbjSu15BI9Z/TcuoGVWALasEZS2GIDbSS5MqCTpxXQOOwE3mdlDku7BhWOKmU2t7GMo8Nvkf9JniEeV1hyP37VewC/UQ/C7xm7lSua8BX6xS1otCU6RzPbjNftuxNJva+DfgYsL0Uh15+M9lSpDU3urCYnPxe/GQ2u2mVDpDRWmytMK0UjHfAe3ediwZh+N8nn8znx2IQRp36/iyX5Xxi+y3LqN8CowoVJ2Tipv63PJMU56Fzi7UlZ8/41+ZvOBTSVtlnHcHieEozXn4T/gXfHhxbl4l7eVJ4mkvSTdi2fufgW/gAsjntWr9WnM0q+wDny8pu6jNWUDgenVoeL093RaWxG2akfJEvHJmrqv0IDdYDsMTMtHatb9JS0/3ETdRphZNaBKf89sYz9P1JS1x7M1XjUNWzQmDsV/Kw9LmiHpghTf6tXXZjyqtOavJXuD30q6E3cgnwjsXVSS9FXcr+Q+/Ln2aVxclsbNn+u++EYs/Ypljr1gLm21o732NUtvs2hs7zi5No1dYdH4qxTQ3RW3z9gJj5P9XtJOvXVEp1erWm/AzO7GJ4B9TdInS6v2xYViRzObYGa/ToIzu5OHLIJmm9SsqyubCWysyshM+nsjuteKsI7ifDatWVdYKM5soi50bNO4vtxdbSGSlsN7Nr3JonGumV1uZiPxntDpwA548LxXEsLRGCfid5cTSmUL8B/Ews9QknCzn87wR9x8aViKmRT7XoV6A+zJuDPcAZXykan8uk62p7NMwUecRkt6T1GY/j8aty+c0kRd0t/tWTSugseoyhySyic3cS7NUmsnWcTGymUp7vVAXf3eRDyqNEBybfs57hW6g5n9Hh+B2AO4VdKl+DDeENyTszPHWiDpMHxE5j5J5+PDscPx5+d1Kpucjls3jpe0Ff6j2xLv7k5P63uMNFoyBh92vLeYv4APsW4AHJgCv1l1E9OAEZJOxIc53wVuSHNvwHswx6XA4x/xwPNwPH5UDWouTu5PbTsqjRy9gceTpgNzJF2Pf28v4L2hg/HY0g3d2MYsQjga52TcS/UE/PHk5+lOeBg+/l980UfSEiBrCjO7RtKeQGGC/QI+rfoO4OZK3fly68bj8ZGfYbgP6kTguJo5HN2OmZ0jaQ4+7+W4VPxnYGjFvzarLnAUflcehVtdCr/wCuF4Bp+Lcib+3b2Dz0/5bklcFjtm9pSk4XiwfQJ+k7kEH6Idh3sQ74SPGs0BrgdOMbNnu6uNuTRtj9DfXnIL+hdppucsMxvcw03pl4SvStAvCeFYvERwNAiCbEI4giDIJh5VgiDIJnocQRBkE8IRBEE2IRxBEGQTwhEEQTYhHEEQZBPCEQRBNiEcQRBkE8IRBEE2IRxBEGQTwhEEQTYhHEEQZBPCEQRBNiEcQRBkE8IRBEE2IRxBEGQTwhEEQTYhHEEQZBPCEQRBNiEcQRBkE8IRBEE2IRxBEGQTwhEEQTYhHEEQZBPCEQRBNiEcQRBkE8IRBEE2IRxBEGQTwhEEQTYhHEEQZBPCEQRBNiEcQRBkE8IRBEE2IRxBEGQTwhEEQTYhHEEQZBPCEQRBNiEcQRBkE8IRBEE2/w+9HNldvyoxmgAAAABJRU5ErkJggg==\n", 73 | "text/plain": [ 74 | "
" 75 | ] 76 | }, 77 | "metadata": { 78 | "needs_background": "light" 79 | }, 80 | "output_type": "display_data" 81 | } 82 | ], 83 | "source": [ 84 | "labels = ['2011 footprints', '2016/2017 footprints', 'Random footprints']\n", 85 | "colors = ['#1f77b4', '#ff7f0e', 'green']\n", 86 | "plt.figure(figsize=(4, 2.5))\n", 87 | "patches = [\n", 88 | " mpl.patches.Patch(color=color, label=label)\n", 89 | " for label, color in zip(labels, colors)\n", 90 | "]\n", 91 | "plt.axis(\"off\")\n", 92 | "plt.legend(patches, labels, loc='center', frameon=False, fontsize=18)\n", 93 | "plt.show()\n", 94 | "plt.close()" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "id": "buried-least", 100 | "metadata": {}, 101 | "source": [ 102 | "## NAIP / Poultry barns" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 4, 108 | "id": "regulated-password", 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "directories = [\n", 113 | " fn\n", 114 | " for fn in os.listdir(\"../results/kl/\")\n", 115 | " if fn.startswith(\"poultry_barns_random\") and not os.path.isfile(os.path.join(\"../results/\", fn))\n", 116 | "]" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 5, 122 | "id": "balanced-reading", 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "labeled_idxs, labeled_years = utils.get_poultry_barn_labels(\"../data/\")" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 8, 132 | "id": "other-tribune", 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "results = {\n", 137 | " \"method\": [],\n", 138 | " \"dataset\": [],\n", 139 | " \"num_clusters\": [],\n", 140 | " \"buffer\": [],\n", 141 | " \"mae\": [],\n", 142 | " \"acc\": [],\n", 143 | " \"bc_coefficient\": [],\n", 144 | " \"theta\": []\n", 145 | "}" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "id": "scientific-secretariat", 151 | "metadata": {}, 152 | "source": [ 153 | "### Figures that show distributions of KL divergences" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 9, 159 | "id": "wireless-coordination", 160 | "metadata": { 161 | "scrolled": false 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "bins = np.linspace(0,10,num=100)\n", 166 | "for directory in directories:\n", 167 | " \n", 168 | " random_dataset, num_clusters, buffer = directory.split(\"-\")\n", 169 | " num_clusters = int(num_clusters)\n", 170 | " buffer = int(buffer)\n", 171 | " \n", 172 | " original_dataset = random_dataset.replace(\"_random\", \"\")\n", 173 | " \n", 174 | " random_fn = f\"../results/kl/{random_dataset}-{num_clusters}-{buffer}/results.csv\"\n", 175 | " original_fn = f\"../results/kl/{original_dataset}-{num_clusters}-{buffer}/results.csv\"\n", 176 | " \n", 177 | " assert os.path.exists(random_fn) and os.path.exists(original_fn)\n", 178 | " \n", 179 | " _, random_all_years, random_all_distances = utils.get_results(random_fn)\n", 180 | " _, original_all_years, original_all_distances = utils.get_results(original_fn)\n", 181 | "\n", 182 | " distances1 = []\n", 183 | " distances2 = []\n", 184 | " distances3 = []\n", 185 | " for i in range(len(random_all_distances)):\n", 186 | " distances1.extend(random_all_distances[i])\n", 187 | " distances2.append(original_all_distances[i][3])\n", 188 | " distances3.append(original_all_distances[i][0])\n", 189 | " \n", 190 | " distances1 = np.array(distances1)\n", 191 | " distances2 = np.array(distances2)\n", 192 | " distances3 = np.array(distances3)\n", 193 | " distances1 = distances1[~np.isinf(distances1)]\n", 194 | " distances2 = distances2[~np.isinf(distances2)]\n", 195 | " distances3 = distances3[~np.isinf(distances3)]\n", 196 | " \n", 197 | " hist1, _ = np.histogram(distances1, bins=bins)\n", 198 | " hist2, _ = np.histogram(distances2, bins=bins)\n", 199 | " hist3, _ = np.histogram(distances3, bins=bins)\n", 200 | " \n", 201 | " hist1 = hist1 / hist1.sum()\n", 202 | " hist2 = hist2 / hist2.sum()\n", 203 | " hist3 = hist3 / hist3.sum()\n", 204 | " \n", 205 | " bc_coefficient = np.sum(np.sqrt(hist1 * hist2))\n", 206 | " theta_estimate = np.percentile(distances1, 98)\n", 207 | " \n", 208 | " acc, mae = utils.loss_function(labeled_idxs, labeled_years, original_all_distances, original_all_years, theta_estimate, 2018)\n", 209 | " \n", 210 | " results[\"method\"].append(\"heuristic-theta\")\n", 211 | " results[\"dataset\"].append(original_dataset)\n", 212 | " results[\"num_clusters\"].append(num_clusters)\n", 213 | " results[\"buffer\"].append(buffer)\n", 214 | " results[\"acc\"].append(acc)\n", 215 | " results[\"mae\"].append(mae)\n", 216 | " results[\"bc_coefficient\"].append(bc_coefficient)\n", 217 | " results[\"theta\"].append(theta_estimate)\n", 218 | " \n", 219 | " #print(num_clusters, buffer, bc_coefficient, theta_estimate, acc, mae)\n", 220 | " #show_two_results(hist1, hist2, bins, \"Random footprints\", \"Poultry barn footprints\", color1=\"green\", title=None)\n", 221 | " #show_two_results(hist3, hist2, bins, \"2011 footprints\", \"2016/2017 footprints\", color1=\"#1f77b4\", title=None)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "id": "changing-oklahoma", 227 | "metadata": {}, 228 | "source": [ 229 | "# Sentinel 2 / Solar farms" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 10, 235 | "id": "brief-intermediate", 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "directories = [\n", 240 | " fn\n", 241 | " for fn in os.listdir(\"../results/kl/\")\n", 242 | " if fn.startswith(\"solar_farms_reduced_random\")\n", 243 | "]" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "id": "golden-statistics", 249 | "metadata": {}, 250 | "source": [ 251 | "### Acc results from learning theta" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 11, 257 | "id": "stable-concord", 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "name": "stdout", 262 | "output_type": "stream", 263 | "text": [ 264 | "(760,)\n" 265 | ] 266 | } 267 | ], 268 | "source": [ 269 | "labeled_idxs, labeled_years = utils.get_solar_farm_labels(\"../data/\")\n", 270 | "labeled_idxs = np.array(labeled_idxs)\n", 271 | "labeled_years = np.array(labeled_years)\n", 272 | "mask = labeled_years != -1\n", 273 | "labeled_idxs = labeled_idxs[mask]\n", 274 | "labeled_years = labeled_years[mask]\n", 275 | "print(labeled_years.shape)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "id": "powerful-muscle", 281 | "metadata": {}, 282 | "source": [ 283 | "### Figures that show distributions of KL divergences" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 12, 289 | "id": "compact-rainbow", 290 | "metadata": { 291 | "scrolled": false 292 | }, 293 | "outputs": [], 294 | "source": [ 295 | "bins = np.linspace(0,10,num=50)\n", 296 | "for directory in directories:\n", 297 | " \n", 298 | " random_dataset, num_clusters, buffer = directory.split(\"-\")\n", 299 | " original_dataset = random_dataset.replace(\"_random\", \"\")\n", 300 | " \n", 301 | " random_fn = f\"../results/kl/{random_dataset}-{num_clusters}-{buffer}/results.csv\"\n", 302 | " original_fn = f\"../results/kl/{original_dataset}-{num_clusters}-{buffer}/results.csv\"\n", 303 | "\n", 304 | " assert os.path.exists(random_fn)\n", 305 | " assert os.path.exists(original_fn), original_fn\n", 306 | " \n", 307 | " _, random_all_years, random_all_distances = utils.get_results(random_fn)\n", 308 | " _, original_all_years, original_all_distances = utils.get_results(original_fn)\n", 309 | "\n", 310 | " distances1 = []\n", 311 | " distances2 = []\n", 312 | " distances3 = []\n", 313 | " for i in range(len(random_all_distances)):\n", 314 | " distances1.extend(random_all_distances[i])\n", 315 | " distances2.append(original_all_distances[i][3])\n", 316 | " distances3.append(original_all_distances[i][0])\n", 317 | " \n", 318 | " distances1 = np.array(distances1)\n", 319 | " distances2 = np.array(distances2)\n", 320 | " distances3 = np.array(distances3)\n", 321 | " distances1 = distances1[~np.isinf(distances1)]\n", 322 | " distances2 = distances2[~np.isinf(distances2)]\n", 323 | " distances3 = distances3[~np.isinf(distances3)]\n", 324 | " \n", 325 | " hist1, _ = np.histogram(distances1, bins=bins)\n", 326 | " hist2, _ = np.histogram(distances2, bins=bins)\n", 327 | " hist3, _ = np.histogram(distances3, bins=bins)\n", 328 | " \n", 329 | " hist1 = hist1 / hist1.sum()\n", 330 | " hist2 = hist2 / hist2.sum()\n", 331 | " hist3 = hist3 / hist3.sum()\n", 332 | " \n", 333 | " bc_coefficient = np.sum(np.sqrt(hist1 * hist2))\n", 334 | " theta_estimate = np.percentile(distances1, 98)\n", 335 | " \n", 336 | " acc, mae = utils.loss_function(labeled_idxs, labeled_years, original_all_distances, original_all_years, theta_estimate, 2020)\n", 337 | " \n", 338 | " results[\"method\"].append(\"heuristic-theta\")\n", 339 | " results[\"dataset\"].append(original_dataset)\n", 340 | " results[\"num_clusters\"].append(num_clusters)\n", 341 | " results[\"buffer\"].append(buffer)\n", 342 | " results[\"acc\"].append(acc)\n", 343 | " results[\"mae\"].append(mae)\n", 344 | " results[\"bc_coefficient\"].append(bc_coefficient)\n", 345 | " results[\"theta\"].append(theta_estimate)\n", 346 | " \n", 347 | " #print(num_clusters, buffer, bc_coefficient, theta_estimate, acc, mae)\n", 348 | " #show_two_results(hist1, hist2, bins, \"Random footprints\", \"Solar farm footprints\", color1=\"green\", title=None)\n", 349 | " #show_two_results(hist3, hist2, bins, \"2016 footprints\", \"2020 footprints\", color1=\"#1f77b4\", title=None)" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 13, 355 | "id": "wrong-chuck", 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [ 359 | "df = pd.DataFrame.from_dict(results)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 14, 365 | "id": "humanitarian-still", 366 | "metadata": {}, 367 | "outputs": [ 368 | { 369 | "data": { 370 | "text/html": [ 371 | "
\n", 372 | "\n", 385 | "\n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | "
methoddatasetnum_clustersbuffermaeaccbc_coefficienttheta
0heuristic-thetapoultry_barns161000.1630000.9240000.0841201.213688
1heuristic-thetapoultry_barns642000.1540000.9360000.0710001.931520
2heuristic-thetapoultry_barns322000.1700000.9300000.0804431.756256
3heuristic-thetapoultry_barns324000.2360000.8970000.0935052.230692
4heuristic-thetapoultry_barns644000.2040000.9220000.0720662.401980
5heuristic-thetapoultry_barns641000.1720000.9330000.0836321.513780
6heuristic-thetapoultry_barns321000.1720000.9280000.0797521.372300
7heuristic-thetapoultry_barns162000.2180000.9030000.0918191.570992
8heuristic-thetapoultry_barns164000.3850000.8290000.1127771.953870
9heuristic-thetasolar_farms_reduced320.0160.4986840.6894740.5203332.413244
10heuristic-thetasolar_farms_reduced640.0160.5184210.6881580.5261632.763796
11heuristic-thetasolar_farms_reduced160.0160.6263160.6276320.5577882.109168
12heuristic-thetasolar_farms_reduced640.0240.4868420.7078950.5029702.861806
13heuristic-thetasolar_farms_reduced160.0240.7473680.5657890.6035272.143412
14heuristic-thetasolar_farms_reduced320.0240.5447370.6644740.5232652.528604
\n", 567 | "
" 568 | ], 569 | "text/plain": [ 570 | " method dataset num_clusters buffer mae \\\n", 571 | "0 heuristic-theta poultry_barns 16 100 0.163000 \n", 572 | "1 heuristic-theta poultry_barns 64 200 0.154000 \n", 573 | "2 heuristic-theta poultry_barns 32 200 0.170000 \n", 574 | "3 heuristic-theta poultry_barns 32 400 0.236000 \n", 575 | "4 heuristic-theta poultry_barns 64 400 0.204000 \n", 576 | "5 heuristic-theta poultry_barns 64 100 0.172000 \n", 577 | "6 heuristic-theta poultry_barns 32 100 0.172000 \n", 578 | "7 heuristic-theta poultry_barns 16 200 0.218000 \n", 579 | "8 heuristic-theta poultry_barns 16 400 0.385000 \n", 580 | "9 heuristic-theta solar_farms_reduced 32 0.016 0.498684 \n", 581 | "10 heuristic-theta solar_farms_reduced 64 0.016 0.518421 \n", 582 | "11 heuristic-theta solar_farms_reduced 16 0.016 0.626316 \n", 583 | "12 heuristic-theta solar_farms_reduced 64 0.024 0.486842 \n", 584 | "13 heuristic-theta solar_farms_reduced 16 0.024 0.747368 \n", 585 | "14 heuristic-theta solar_farms_reduced 32 0.024 0.544737 \n", 586 | "\n", 587 | " acc bc_coefficient theta \n", 588 | "0 0.924000 0.084120 1.213688 \n", 589 | "1 0.936000 0.071000 1.931520 \n", 590 | "2 0.930000 0.080443 1.756256 \n", 591 | "3 0.897000 0.093505 2.230692 \n", 592 | "4 0.922000 0.072066 2.401980 \n", 593 | "5 0.933000 0.083632 1.513780 \n", 594 | "6 0.928000 0.079752 1.372300 \n", 595 | "7 0.903000 0.091819 1.570992 \n", 596 | "8 0.829000 0.112777 1.953870 \n", 597 | "9 0.689474 0.520333 2.413244 \n", 598 | "10 0.688158 0.526163 2.763796 \n", 599 | "11 0.627632 0.557788 2.109168 \n", 600 | "12 0.707895 0.502970 2.861806 \n", 601 | "13 0.565789 0.603527 2.143412 \n", 602 | "14 0.664474 0.523265 2.528604 " 603 | ] 604 | }, 605 | "execution_count": 14, 606 | "metadata": {}, 607 | "output_type": "execute_result" 608 | } 609 | ], 610 | "source": [ 611 | "df" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 15, 617 | "id": "instrumental-script", 618 | "metadata": {}, 619 | "outputs": [], 620 | "source": [ 621 | "df.to_csv(\"../results/heuristic-theta_results.csv\")" 622 | ] 623 | }, 624 | { 625 | "cell_type": "markdown", 626 | "id": "viral-stewart", 627 | "metadata": {}, 628 | "source": [ 629 | "## Rank correlations" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": 13, 635 | "id": "responsible-catholic", 636 | "metadata": {}, 637 | "outputs": [], 638 | "source": [ 639 | "from scipy.stats import spearmanr" 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": 14, 645 | "id": "executed-telephone", 646 | "metadata": {}, 647 | "outputs": [ 648 | { 649 | "data": { 650 | "text/plain": [ 651 | "SpearmanrResult(correlation=0.7666666666666667, pvalue=0.01594401657897401)" 652 | ] 653 | }, 654 | "execution_count": 14, 655 | "metadata": {}, 656 | "output_type": "execute_result" 657 | } 658 | ], 659 | "source": [ 660 | "bc_coefficients, accs = df[df.dataset==\"poultry_barns\"][[\"bc_coefficient\", \"acc\"]].values.T\n", 661 | "spearmanr(-bc_coefficients, accs)" 662 | ] 663 | }, 664 | { 665 | "cell_type": "code", 666 | "execution_count": 15, 667 | "id": "verified-girlfriend", 668 | "metadata": {}, 669 | "outputs": [ 670 | { 671 | "data": { 672 | "text/plain": [ 673 | "SpearmanrResult(correlation=-0.7029350233548073, pvalue=0.03467010659432658)" 674 | ] 675 | }, 676 | "execution_count": 15, 677 | "metadata": {}, 678 | "output_type": "execute_result" 679 | } 680 | ], 681 | "source": [ 682 | "bc_coefficients, maes = df[df.dataset==\"poultry_barns\"][[\"bc_coefficient\", \"mae\"]].values.T\n", 683 | "spearmanr(-bc_coefficients, maes)" 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": 16, 689 | "id": "standing-learning", 690 | "metadata": {}, 691 | "outputs": [ 692 | { 693 | "data": { 694 | "text/plain": [ 695 | "SpearmanrResult(correlation=0.942857142857143, pvalue=0.004804664723032055)" 696 | ] 697 | }, 698 | "execution_count": 16, 699 | "metadata": {}, 700 | "output_type": "execute_result" 701 | } 702 | ], 703 | "source": [ 704 | "bc_coefficients, accs = df[df.dataset==\"solar_farms_reduced\"][[\"bc_coefficient\", \"acc\"]].values.T\n", 705 | "spearmanr(-bc_coefficients, accs)" 706 | ] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": 17, 711 | "id": "adjacent-smoke", 712 | "metadata": {}, 713 | "outputs": [ 714 | { 715 | "data": { 716 | "text/plain": [ 717 | "SpearmanrResult(correlation=-0.942857142857143, pvalue=0.004804664723032055)" 718 | ] 719 | }, 720 | "execution_count": 17, 721 | "metadata": {}, 722 | "output_type": "execute_result" 723 | } 724 | ], 725 | "source": [ 726 | "bc_coefficients, maes = df[df.dataset==\"solar_farms_reduced\"][[\"bc_coefficient\", \"mae\"]].values.T\n", 727 | "spearmanr(-bc_coefficients, maes)" 728 | ] 729 | } 730 | ], 731 | "metadata": { 732 | "kernelspec": { 733 | "display_name": "backdating", 734 | "language": "python", 735 | "name": "conda-env-backdating-py" 736 | }, 737 | "language_info": { 738 | "codemirror_mode": { 739 | "name": "ipython", 740 | "version": 3 741 | }, 742 | "file_extension": ".py", 743 | "mimetype": "text/x-python", 744 | "name": "python", 745 | "nbconvert_exporter": "python", 746 | "pygments_lexer": "ipython3", 747 | "version": "3.7.7" 748 | } 749 | }, 750 | "nbformat": 4, 751 | "nbformat_minor": 5 752 | } 753 | -------------------------------------------------------------------------------- /notebooks/Utils - Labeling tool.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "micro-spain", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Copyright (c) Microsoft Corporation. All rights reserved\n", 11 | "# Licensed under the MIT License.\n", 12 | "%matplotlib inline\n", 13 | "%load_ext autoreload\n", 14 | "%autoreload 2\n", 15 | "import sys\n", 16 | "sys.path.append(\"..\")\n", 17 | "import os\n", 18 | "\n", 19 | "import numpy as np\n", 20 | "\n", 21 | "import shapely.geometry\n", 22 | "\n", 23 | "from pigeon import annotate\n", 24 | "\n", 25 | "from temporal_cluster_matching import utils, DataInterface, algorithms" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "id": "compact-rapid", 31 | "metadata": {}, 32 | "source": [ 33 | "## NAIP / Poultry barn labeling" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "id": "duplicate-herald", 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "OUTPUT_FN = \"../results/poultry_barn_labels2.csv\"" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "id": "peaceful-paraguay", 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "geoms = utils.get_poultry_barn_geoms(\"../data/\")\n", 54 | "len(geoms)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "id": "monetary-network", 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "dataloader = DataInterface.NAIPDataLoader()" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "id": "solved-plumbing", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "used_idxs = set()\n", 75 | "if os.path.exists(OUTPUT_FN):\n", 76 | " with open(OUTPUT_FN, \"r\") as f:\n", 77 | " lines = f.read().strip().split(\"\\n\")\n", 78 | " for i in range(1, len(lines)):\n", 79 | " parts = lines[i].split(\",\")\n", 80 | " idx = int(parts[0])\n", 81 | " used_idxs.add(idx)\n", 82 | "available_idxs = list(set(range(len(geoms))) - used_idxs)\n", 83 | "\n", 84 | "print(\"Already labeled %d idxs, choosing from %d remaining\" % (len(used_idxs), len(available_idxs)))\n", 85 | "\n", 86 | "num_to_label = 5\n", 87 | "idxs = np.random.choice(available_idxs, size=num_to_label, replace=False)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "reserved-metabolism", 94 | "metadata": { 95 | "scrolled": true 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "def show_idx(i):\n", 100 | " print(\"Labeling %d\" % (i))\n", 101 | " shape_area = shapely.geometry.shape(geoms[i]).area\n", 102 | " side_length = np.sqrt(shape_area)\n", 103 | "\n", 104 | " buffer_size = max(side_length * 2, 0.001)\n", 105 | "\n", 106 | " rgb_images, years = dataloader.get_rgb_stack_from_geom(geoms[i], buffer=buffer_size)\n", 107 | " years = [\n", 108 | " \"%d; %d\" % (j, year)\n", 109 | " for j, year in enumerate(years)\n", 110 | " ]\n", 111 | " utils.show_images(rgb_images, years)\n", 112 | " \n", 113 | "annotations = annotate(\n", 114 | " idxs,\n", 115 | " options=[str(i) for i in range(5)],\n", 116 | " display_fn=lambda i: show_idx(i)\n", 117 | ")" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "id": "temporal-agenda", 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "if not os.path.exists(OUTPUT_FN):\n", 128 | " with open(OUTPUT_FN, \"w\") as f:\n", 129 | " f.write(\"idx,year\\n\")\n", 130 | " \n", 131 | "with open(OUTPUT_FN, \"a\") as f:\n", 132 | " for idx, label in annotations:\n", 133 | " years = dataloader.get_years_from_geom(geoms[idx])\n", 134 | " label = int(label)\n", 135 | " year = years[label]\n", 136 | " f.write(\"%d,%d\\n\" % (idx, year))" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "id": "legendary-croatia", 142 | "metadata": {}, 143 | "source": [ 144 | "## Sentinel 2 / Solar farm labeling" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "id": "rental-feeding", 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "OUTPUT_FN = \"../results/solar_farm_labels.csv\"" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "id": "enabling-catering", 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "geoms = utils.get_solar_farm_geoms(\"../data/\")\n", 165 | "len(geoms)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "id": "young-perry", 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "dataloader = DataInterface.S2DataLoader()" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "id": "embedded-bicycle", 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "used_idxs = set()\n", 186 | "if os.path.exists(OUTPUT_FN):\n", 187 | " with open(OUTPUT_FN, \"r\") as f:\n", 188 | " lines = f.read().strip().split(\"\\n\")\n", 189 | " for i in range(1, len(lines)):\n", 190 | " parts = lines[i].split(\",\")\n", 191 | " idx = int(parts[0])\n", 192 | " used_idxs.add(idx)\n", 193 | "available_idxs = list(set(range(len(geoms))) - used_idxs)\n", 194 | "\n", 195 | "print(\"Already labeled %d idxs, choosing from %d remaining\" % (len(used_idxs), len(available_idxs)))\n", 196 | "\n", 197 | "num_to_label = 50\n", 198 | "idxs = np.random.choice(available_idxs, size=num_to_label, replace=False)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "id": "superior-sperm", 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "idxs = range(935)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "id": "treated-height", 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "images = []\n", 219 | "for i in idxs:\n", 220 | " if i % 50 == 0:\n", 221 | " print(i, len(idxs))\n", 222 | " rgb_images, years = dataloader.get_rgb_stack_from_geom(geoms[i], buffer=0.002)\n", 223 | " images.append(rgb_images)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "id": "smoking-cathedral", 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "titles = [\n", 234 | " \"%d, %d\" % (i + 2016, i)\n", 235 | " for i in range(5)\n", 236 | "]" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "id": "metric-avenue", 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "def show_idx(i):\n", 247 | " print(\"Labeling %d\" % (i))\n", 248 | " #shape_area = shapely.geometry.shape(geoms[i]).area\n", 249 | " #side_length = np.sqrt(shape_area)\n", 250 | "\n", 251 | " #buffer_size = max(side_length * 1.5, 0.001)\n", 252 | " #buffer_size = 0.002\n", 253 | "\n", 254 | " #rgb_images, years = dataloader.get_rgb_stack_from_geom(geoms[i], buffer=buffer_size)\n", 255 | " \n", 256 | " #titles = []\n", 257 | " #for i, year in enumerate(years):\n", 258 | " # titles.append(\"%d, %d\" % (year, i))\n", 259 | " \n", 260 | " utils.show_images(images[i], titles)\n", 261 | " \n", 262 | "annotations = annotate(\n", 263 | " idxs,\n", 264 | " options=[str(i) for i in range(5)],\n", 265 | " display_fn=lambda i: show_idx(i)\n", 266 | ")" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "id": "fresh-anaheim", 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "if not os.path.exists(OUTPUT_FN):\n", 277 | " with open(OUTPUT_FN, \"w\") as f:\n", 278 | " f.write(\"idx,year\\n\")\n", 279 | " \n", 280 | "with open(OUTPUT_FN, \"a\") as f:\n", 281 | " for idx, label in annotations:\n", 282 | " years = dataloader.get_years_from_geom(geoms[idx])\n", 283 | " label = int(label)\n", 284 | " year = years[label]\n", 285 | " f.write(\"%d,%d\\n\" % (idx, year))" 286 | ] 287 | } 288 | ], 289 | "metadata": { 290 | "kernelspec": { 291 | "display_name": "backdating", 292 | "language": "python", 293 | "name": "conda-env-backdating-py" 294 | }, 295 | "language_info": { 296 | "codemirror_mode": { 297 | "name": "ipython", 298 | "version": 3 299 | }, 300 | "file_extension": ".py", 301 | "mimetype": "text/x-python", 302 | "name": "python", 303 | "nbconvert_exporter": "python", 304 | "pygments_lexer": "ipython3", 305 | "version": "3.7.7" 306 | } 307 | }, 308 | "nbformat": 4, 309 | "nbformat_minor": 5 310 | } 311 | -------------------------------------------------------------------------------- /notebooks/Utils - Make predictions by year.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "computational-calgary", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Copyright (c) Microsoft Corporation. All rights reserved\n", 11 | "# Licensed under the MIT License.\n", 12 | "%matplotlib inline\n", 13 | "%load_ext autoreload\n", 14 | "%autoreload 2\n", 15 | "import sys\n", 16 | "sys.path.append(\"..\")\n", 17 | "import os\n", 18 | "\n", 19 | "import numpy as np\n", 20 | "\n", 21 | "import fiona\n", 22 | "\n", 23 | "from temporal_cluster_matching import utils, DataInterface, algorithms" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "id": "ignored-beatles", 29 | "metadata": {}, 30 | "source": [ 31 | "## NAIP / Poultry barns" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "id": "blond-panama", 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "0.938\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "labeled_idxs, labeled_years = utils.get_poultry_barn_labels(\"../data/\")\n", 50 | "labeled_idxs = np.array(labeled_idxs)\n", 51 | "labeled_years = np.array(labeled_years)\n", 52 | "all_idxs, all_years, all_distances = utils.get_results(\"../results/kl/poultry_barns-64-200/results.csv\")\n", 53 | "all_idxs = np.array(all_idxs)\n", 54 | "theta = 1.931520 # we get this estimate of theta from the poultry_barns-64-200 row in ../results/heuristic-theta_results.csv\n", 55 | "\n", 56 | "distances, years = [], []\n", 57 | "for idx in labeled_idxs:\n", 58 | " distances.append(all_distances[idx])\n", 59 | " years.append(all_years[idx])\n", 60 | "\n", 61 | "predicted_years = utils.decision_function(distances, years, theta, 2016)\n", 62 | "predicted_years = np.array(predicted_years)\n", 63 | "\n", 64 | "print(utils.uncertain_accuracy(labeled_years, predicted_years))" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 3, 70 | "id": "future-mandate", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "all_predicted_years = utils.decision_function(all_distances, all_years, theta, 2016)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 4, 80 | "id": "incident-final", 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "all_predicted_years = np.array(all_predicted_years)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 5, 90 | "id": "impressive-stability", 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "years = list(range(2011, 2019))\n", 95 | "idxs_per_year = []\n", 96 | "for year in years:\n", 97 | " idxs_per_year.append(np.where(all_predicted_years <= year)[0]) " 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 6, 103 | "id": "charitable-significance", 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "name": "stdout", 108 | "output_type": "stream", 109 | "text": [ 110 | "2011 5099\n", 111 | "2012 5126\n", 112 | "2013 5363\n", 113 | "2014 5377\n", 114 | "2015 5655\n", 115 | "2016 5683\n", 116 | "2017 6013\n", 117 | "2018 6013\n" 118 | ] 119 | } 120 | ], 121 | "source": [ 122 | "for idxs, year in zip(idxs_per_year, years):\n", 123 | " print(year, len(idxs))" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 7, 129 | "id": "seasonal-whole", 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "with fiona.open(\"../data/Delmarva_PL_House_Final2_epsg26918.geojson\", \"r\") as source:\n", 134 | "\n", 135 | " sink_schema = source.schema\n", 136 | " sink_schema[\"properties\"][\"first_year\"] = \"int\"\n", 137 | "\n", 138 | " with fiona.open(\n", 139 | " \"../predictions/poultry_barns-64-200_predictions.geojson\",\n", 140 | " \"w\",\n", 141 | " crs=source.crs,\n", 142 | " driver=\"GeoJSON\",\n", 143 | " schema=sink_schema,\n", 144 | " ) as sink:\n", 145 | " for i, row in enumerate(source):\n", 146 | " row[\"properties\"][\"first_year\"] = int(all_predicted_years[i])\n", 147 | " sink.write(row)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 8, 153 | "id": "declared-variety", 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "for i, year in enumerate(years):\n", 158 | " with fiona.open(\"../data/Delmarva_PL_House_Final2_epsg26918.geojson\", \"r\") as source:\n", 159 | "\n", 160 | " sink_schema = source.schema\n", 161 | "\n", 162 | " with fiona.open(\n", 163 | " \"../predictions/poultry_barns-64-200_predictions_%d.geojson\" % (year),\n", 164 | " \"w\",\n", 165 | " crs=source.crs,\n", 166 | " driver=\"GeoJSON\",\n", 167 | " schema=sink_schema,\n", 168 | " ) as sink:\n", 169 | " for j, row in enumerate(source):\n", 170 | " if j in idxs_per_year[i]:\n", 171 | " sink.write(row)" 172 | ] 173 | } 174 | ], 175 | "metadata": { 176 | "kernelspec": { 177 | "display_name": "backdating", 178 | "language": "python", 179 | "name": "conda-env-backdating-py" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 3 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython3", 191 | "version": "3.7.7" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 5 196 | } 197 | -------------------------------------------------------------------------------- /results/heuristic-theta_results.csv: -------------------------------------------------------------------------------- 1 | ,method,dataset,num_clusters,buffer,mae,acc,bc_coefficient,theta 2 | 0,heuristic-theta,poultry_barns,16,100,0.163,0.924,0.0841196348685718,1.2136880000000005 3 | 1,heuristic-theta,poultry_barns,64,200,0.154,0.936,0.07100009492028429,1.931519999999999 4 | 2,heuristic-theta,poultry_barns,32,200,0.17,0.93,0.08044289504170049,1.756255999999999 5 | 3,heuristic-theta,poultry_barns,32,400,0.236,0.897,0.09350519180630164,2.2306919999999995 6 | 4,heuristic-theta,poultry_barns,64,400,0.204,0.922,0.0720659970549971,2.401979999999998 7 | 5,heuristic-theta,poultry_barns,64,100,0.172,0.933,0.08363152320935774,1.5137799999999997 8 | 6,heuristic-theta,poultry_barns,32,100,0.172,0.928,0.07975190000865506,1.3723 9 | 7,heuristic-theta,poultry_barns,16,200,0.218,0.903,0.09181922389553562,1.5709919999999984 10 | 8,heuristic-theta,poultry_barns,16,400,0.385,0.829,0.1127769878022964,1.9538699999999993 11 | 9,heuristic-theta,solar_farms_reduced,32,0.016,0.4986842105263158,0.6894736842105263,0.5203330780923132,2.4132439999999997 12 | 10,heuristic-theta,solar_farms_reduced,64,0.016,0.5184210526315789,0.6881578947368421,0.5261627623928011,2.763796 13 | 11,heuristic-theta,solar_farms_reduced,16,0.016,0.6263157894736842,0.6276315789473684,0.5577883784350387,2.1091679999999995 14 | 12,heuristic-theta,solar_farms_reduced,64,0.024,0.4868421052631579,0.7078947368421052,0.502969885699625,2.8618060000000005 15 | 13,heuristic-theta,solar_farms_reduced,16,0.024,0.7473684210526316,0.5657894736842105,0.6035269451537504,2.143412 16 | 14,heuristic-theta,solar_farms_reduced,32,0.024,0.5447368421052632,0.6644736842105263,0.5232647746460067,2.528604000000001 17 | -------------------------------------------------------------------------------- /results/learned-theta_lr_results.csv: -------------------------------------------------------------------------------- 1 | ,method,dataset,num_clusters,buffer,mae,acc 2 | 0,learned-theta,poultry_barns,64,100,0.16 +/- 0.04,0.94 +/- 0.01 3 | 1,learned-theta,poultry_barns,128,100,0.19 +/- 0.05,0.93 +/- 0.01 4 | 2,learned-theta,poultry_barns,64,400,0.21 +/- 0.05,0.92 +/- 0.02 5 | 3,learned-theta,poultry_barns,32,100,0.20 +/- 0.06,0.93 +/- 0.02 6 | 4,learned-theta,poultry_barns,64,200,0.16 +/- 0.05,0.94 +/- 0.02 7 | 5,learned-theta,poultry_barns,128,200,0.19 +/- 0.05,0.94 +/- 0.02 8 | 6,learned-theta,poultry_barns,16,50,0.20 +/- 0.05,0.92 +/- 0.02 9 | 7,learned-theta,poultry_barns,32,200,0.18 +/- 0.05,0.93 +/- 0.02 10 | 8,learned-theta,poultry_barns,16,200,0.22 +/- 0.05,0.91 +/- 0.02 11 | 9,learned-theta,poultry_barns,16,400,0.48 +/- 0.12,0.85 +/- 0.02 12 | 10,learned-theta,poultry_barns,32,400,0.27 +/- 0.05,0.90 +/- 0.02 13 | 11,learned-theta,poultry_barns,16,100,0.21 +/- 0.05,0.92 +/- 0.02 14 | 12,learned-theta,poultry_barns,128,400,0.23 +/- 0.06,0.92 +/- 0.02 15 | 13,lr,poultry_barns,64,100,0.11 +/- 0.05,0.96 +/- 0.01 16 | 14,lr,poultry_barns,128,100,0.14 +/- 0.04,0.95 +/- 0.01 17 | 15,lr,poultry_barns,64,400,0.15 +/- 0.05,0.95 +/- 0.01 18 | 16,lr,poultry_barns,32,100,0.12 +/- 0.04,0.96 +/- 0.01 19 | 17,lr,poultry_barns,64,200,0.15 +/- 0.05,0.95 +/- 0.01 20 | 18,lr,poultry_barns,128,200,0.14 +/- 0.05,0.95 +/- 0.01 21 | 19,lr,poultry_barns,16,50,0.13 +/- 0.04,0.95 +/- 0.01 22 | 20,lr,poultry_barns,32,200,0.15 +/- 0.04,0.95 +/- 0.01 23 | 21,lr,poultry_barns,16,200,0.14 +/- 0.04,0.95 +/- 0.01 24 | 22,lr,poultry_barns,16,400,0.21 +/- 0.07,0.94 +/- 0.02 25 | 23,lr,poultry_barns,32,400,0.14 +/- 0.05,0.95 +/- 0.02 26 | 24,lr,poultry_barns,16,100,0.11 +/- 0.04,0.96 +/- 0.01 27 | 25,lr,poultry_barns,128,400,0.14 +/- 0.05,0.95 +/- 0.01 28 | 26,learned-theta,poultry_barns_color,0,400,0.47 +/- 0.08,0.87 +/- 0.02 29 | 27,learned-theta,poultry_barns_color,0,200,0.39 +/- 0.09,0.89 +/- 0.02 30 | 28,learned-theta,poultry_barns_color,0,100,0.25 +/- 0.07,0.91 +/- 0.02 31 | 29,lr,poultry_barns_color,0,400,0.15 +/- 0.05,0.94 +/- 0.02 32 | 30,lr,poultry_barns_color,0,200,0.15 +/- 0.04,0.94 +/- 0.01 33 | 31,lr,poultry_barns_color,0,100,0.17 +/- 0.05,0.94 +/- 0.01 34 | 32,learned-theta,solar_farms_reduced,16,0.024,0.75 +/- 0.07,0.56 +/- 0.04 35 | 33,learned-theta,solar_farms_reduced,32,0.024,0.55 +/- 0.06,0.67 +/- 0.03 36 | 34,learned-theta,solar_farms_reduced,32,0.016,0.51 +/- 0.06,0.68 +/- 0.03 37 | 35,learned-theta,solar_farms_reduced,64,0.016,0.53 +/- 0.05,0.68 +/- 0.03 38 | 36,learned-theta,solar_farms_reduced,128,0.024,0.48 +/- 0.06,0.70 +/- 0.03 39 | 37,learned-theta,solar_farms_reduced,128,0.016,0.60 +/- 0.09,0.64 +/- 0.04 40 | 38,learned-theta,solar_farms_reduced,64,0.024,0.51 +/- 0.08,0.70 +/- 0.04 41 | 39,learned-theta,solar_farms_reduced,16,0.016,0.66 +/- 0.08,0.62 +/- 0.04 42 | 40,lr,solar_farms_reduced,16,0.024,0.51 +/- 0.06,0.64 +/- 0.03 43 | 41,lr,solar_farms_reduced,32,0.024,0.41 +/- 0.07,0.72 +/- 0.04 44 | 42,lr,solar_farms_reduced,32,0.016,0.32 +/- 0.04,0.75 +/- 0.03 45 | 43,lr,solar_farms_reduced,64,0.016,0.29 +/- 0.04,0.78 +/- 0.03 46 | 44,lr,solar_farms_reduced,128,0.024,0.27 +/- 0.04,0.78 +/- 0.03 47 | 45,lr,solar_farms_reduced,128,0.016,0.30 +/- 0.05,0.76 +/- 0.03 48 | 46,lr,solar_farms_reduced,64,0.024,0.30 +/- 0.04,0.78 +/- 0.03 49 | 47,lr,solar_farms_reduced,16,0.016,0.42 +/- 0.06,0.71 +/- 0.03 50 | 48,learned-theta,solar_farms_color,0,0.024,0.95 +/- 0.07,0.49 +/- 0.03 51 | 49,learned-theta,solar_farms_color,0,0.016,0.94 +/- 0.10,0.48 +/- 0.04 52 | 50,lr,solar_farms_color,0,0.024,0.49 +/- 0.06,0.65 +/- 0.04 53 | 51,lr,solar_farms_color,0,0.016,0.47 +/- 0.06,0.65 +/- 0.04 54 | -------------------------------------------------------------------------------- /results/poultry_barn_inter_year_color_distances.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/temporal-cluster-matching/490b12fbb134e755a11a0f8612036100939ab390/results/poultry_barn_inter_year_color_distances.npy -------------------------------------------------------------------------------- /results/solar_farm_inter_year_color_distances.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/temporal-cluster-matching/490b12fbb134e755a11a0f8612036100939ab390/results/solar_farm_inter_year_color_distances.npy -------------------------------------------------------------------------------- /run_algorithm.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) Microsoft Corporation. All rights reserved. 3 | Licensed under the MIT License. 4 | ''' 5 | import os 6 | import time 7 | import datetime 8 | import argparse 9 | 10 | from temporal_cluster_matching import utils, DataInterface, algorithms 11 | 12 | parser = argparse.ArgumentParser(description='Script for running temporal cluster matching') 13 | parser.add_argument('--dataset', required=True, 14 | choices=["poultry_barns", "solar_farms_reduced", "poultry_barns_random", "solar_farms_reduced_random"], 15 | help='Dataset to use' 16 | ) 17 | parser.add_argument('--algorithm', default='kl', 18 | choices=( 19 | 'kl', 20 | 'color' 21 | ), 22 | help='Algorithm to use' 23 | ) 24 | 25 | parser.add_argument('--num_clusters', type=int, required=False, help='Number of clusters to use in k-means step.') 26 | 27 | group = parser.add_mutually_exclusive_group(required=True) 28 | group.add_argument('--buffer', type=float, help='Amount to buffer for defining a neighborhood. Note: this will be in terms of units of the dataset.') 29 | 30 | parser.add_argument('--output_dir', type=str, required=True, help='Path to an empty directory where outputs will be saved. This directory will be created if it does not exist.') 31 | parser.add_argument('--verbose', action="store_true", default=False, help='Enable training with feature disentanglement') 32 | parser.add_argument('--overwrite', action='store_true', default=False, help='Ignore checking whether the output directory has existing data') 33 | 34 | args = parser.parse_args() 35 | 36 | 37 | def main(): 38 | start_time = time.time() 39 | print("Starting algorithm at %s" % (str(datetime.datetime.now()))) 40 | 41 | ############################## 42 | # Ensure output directory exists 43 | ############################## 44 | if os.path.exists(args.output_dir): 45 | if not args.overwrite: 46 | print("WARNING: The output directory exists, exiting...") 47 | return 48 | else: 49 | os.makedirs(args.output_dir, exist_ok=False) 50 | 51 | output_fn = os.path.join( 52 | args.output_dir, 53 | "results.csv" 54 | ) 55 | if os.path.exists(output_fn): 56 | os.remove(output_fn) 57 | 58 | ############################## 59 | # Load geoms / create dataloader 60 | ############################## 61 | if args.dataset == "poultry_barns": 62 | geoms = utils.get_poultry_barn_geoms() 63 | dataloader = DataInterface.NAIPDataLoader() 64 | if args.buffer is not None and args.buffer < 1: 65 | print("WARNING: your buffer distance is probably set incorrectly, this should be in units of meters.") 66 | 67 | elif args.dataset == "solar_farms_reduced": 68 | geoms = utils.get_solar_farm_geoms() 69 | dataloader = DataInterface.S2DataLoader() 70 | if args.buffer is not None and args.buffer > 1: 71 | print("WARNING: your buffer distance is probably set incorrectly, this should be in units of degrees (at equator, more/less)") 72 | 73 | elif args.dataset == "poultry_barns_random": 74 | geoms = utils.get_random_polygons_over_poultry_area() 75 | dataloader = DataInterface.NAIPDataLoader() 76 | if args.buffer is not None and args.buffer < 1: 77 | print("WARNING: your buffer distance is probably set incorrectly, this should be in units of degrees (at equator, more/less)") 78 | 79 | elif args.dataset == "solar_farms_reduced_random": 80 | geoms = utils.get_random_polygons_over_solar_area() 81 | dataloader = DataInterface.S2DataLoader() 82 | if args.buffer is not None and args.buffer > 1: 83 | print("WARNING: your buffer distance is probably set incorrectly, this should be in units of degrees (at equator, more/less)") 84 | 85 | 86 | 87 | ############################## 88 | # Loop through geoms and run 89 | ############################## 90 | tic = time.time() 91 | for i in range(len(geoms)): 92 | if i % 10 == 0: 93 | print("%d/%d\t%0.2f seconds" % (i, len(geoms), time.time() - tic)) 94 | tic = time.time() 95 | 96 | 97 | data_images, masks, years = dataloader.get_data_stack_from_geom(geoms[i], buffer=args.buffer) 98 | 99 | if args.algorithm == "kl": 100 | divergence_values = algorithms.calculate_change_values(data_images, masks, n_clusters=args.num_clusters) 101 | elif args.algorithm == "color": 102 | divergence_values = algorithms.calculate_change_values_with_color(data_images, masks) 103 | 104 | with open(output_fn, "a") as f: 105 | f.write("%d," % (i)) 106 | for year in years: 107 | f.write("%d," % (year)) 108 | f.write("|,") 109 | for divergence in divergence_values: 110 | f.write("%0.4f," % (divergence)) 111 | f.write("\n") 112 | 113 | 114 | print("Finished in %0.2f seconds" % (time.time() - start_time)) 115 | 116 | if __name__ == "__main__": 117 | main() -------------------------------------------------------------------------------- /temporal_cluster_matching/DataInterface.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) Microsoft Corporation. All rights reserved. 3 | Licensed under the MIT License. 4 | ''' 5 | import abc 6 | from functools import lru_cache 7 | 8 | import numpy as np 9 | 10 | from skimage.segmentation import mark_boundaries 11 | 12 | import rasterio 13 | import rasterio.mask 14 | import rasterio.features 15 | import rasterio.windows 16 | import rasterio.warp 17 | import fiona.transform 18 | 19 | import shapely 20 | import shapely.geometry 21 | 22 | from pystac_client import Client 23 | import planetary_computer as pc 24 | 25 | from . import utils 26 | 27 | # Some tricks to make rasterio faster when using vsicurl -- see https://github.com/pangeo-data/cog-best-practices 28 | RASTERIO_BEST_PRACTICES = dict( 29 | CURL_CA_BUNDLE='/etc/ssl/certs/ca-certificates.crt', 30 | GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR', 31 | AWS_NO_SIGN_REQUEST='YES', 32 | GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000', 33 | GDAL_SWATH_SIZE='200000000', 34 | VSI_CURL_CACHE_SIZE='200000000' 35 | ) 36 | 37 | def get_mask_and_bounding_geoms(geom, buffer): 38 | '''Returns the two polygons needed to crop imagery with given a query geometry and buffer amount. 39 | The Temporal Cluster Matching algorithm will cluster all pixels in a footprint + neighborhood, then form distribution of cluster indices from the pixels within a footprint and a distribution with the pixels in the neighborhood. 40 | To calculate this, we need to crop the imagery from the entire buffered extent and know which of those pixels fall within the footprint. The two polyongs we return here let us do that. 41 | 42 | Args: 43 | geom: A polygon in GeoJSON format describing the query footprint. 44 | buffer: An amount (in units of `geom`'s coordinate system) to buffer the geom by. 45 | 46 | Returns: 47 | mask_geom: A polygon in GeoJSON format that has the same extent as `bounding_geom`, but has a hole where `geom` is. 48 | bounding_geom: A polygon in GeoJSON format that is the extent of `geom` after being buffered by `buffer`. 49 | ''' 50 | footprint_shape = shapely.geometry.shape(geom).buffer(0.0) 51 | bounding_shape = footprint_shape.envelope.buffer(buffer).envelope 52 | mask_geom = shapely.geometry.mapping(bounding_shape - footprint_shape) # full bounding area - initial footprint 53 | bounding_geom = shapely.geometry.mapping(bounding_shape) # full bounding area 54 | return mask_geom, bounding_geom 55 | 56 | 57 | ################################################################ 58 | ################################################################ 59 | class AbstractDataLoader(abc.ABC): 60 | ''' This class facilitates loading patches of imagery from a source time-series of remotely sensed imagery in a way that can be used by the Temporal Cluster Matching algorithm. 61 | ''' 62 | 63 | @abc.abstractmethod 64 | def get_rgb_stack_from_geom(self, geom, buffer, show_outline=True, geom_crs="epsg:4326"): 65 | """Returns a time-series stack of RGB image patches corresponding to a query geometry (that optionally show the outline of the query geometry). 66 | 67 | Args: 68 | geom: A polygon in GeoJSON format describing the query footprint. 69 | buffer: An amount (in units of imagery's projection) to buffer the geom by. 70 | show_outline: A flag that indicates whether the RGB image patches should be rendered with the outline of `geom`. 71 | geom_crs: The coordinate reference system (CRS) of geom 72 | 73 | Returns: 74 | rgb_images: A list of RGB image patches (with `np.uint8` dtypes), one for each date in the source time-series. Each patch should be a crop that covers the extent of the `geom` buffered by an amount specified by `buffer`. 75 | dates: A list of dates corresponding to each patch in `images`. 76 | """ 77 | raise NotImplementedError() 78 | 79 | @abc.abstractmethod 80 | def get_data_stack_from_geom(self, geom, buffer, geom_crs="epsg:4326"): 81 | """Returns a time-series stack of data images corresponding to a query geometry. While `get_rgb_stack_from_geom(.)` returns just the RGB component of the imagery, this method should return 82 | the bands to be included in processing. 83 | 84 | Args: 85 | geom: A polygon in GeoJSON format describing the query footprint. 86 | buffer: An amount (in units of imagery's projection) to buffer the geom by. 87 | geom_crs: The coordinate reference system (CRS) of geom 88 | 89 | Returns: 90 | images: A list of image patches (with a `dtype` matching the source time-series), one for each date in the source time-series. Each patch should be a crop that covers the extent of the `geom` buffered by an amount specified by `buffer`. 91 | masks: A list of masks for each patch in `images`. These should be binary, contain a 1 where the corresponding image is covered by the `geom`, and contain a 0 elsewhere. 92 | dates: A list of dates corresponding to each patch in `images`. 93 | """ 94 | raise NotImplementedError() 95 | 96 | @abc.abstractmethod 97 | def data_stack_to_rgb(self, images): 98 | """A convenience method that converts the `images` that are returned by `get_data_stack_from_geom(.)` to `rgb_images` (i.e. the kind returned by `get_rgb_stack_from_geom`). 99 | This is its own method because if you have `images` from `get_data_stack_from_geom(.)` already, it is likely cheaper to reprocess those into `rgb_images` instead of hitting your data source to re-download the 100 | RGB components of your data. 101 | 102 | Args: 103 | images: The list of image patches that are returned by `get_data_stack_from_geom(.)`. 104 | 105 | Returns: 106 | rgb_images: A list of RGB image patches (with `np.uint8` dtypes), one for each patch in `images`. These should be processed in the same way that `get_rgb_stack_from_geom(.)` processes the source imagery. 107 | """ 108 | raise NotImplementedError() 109 | 110 | @abc.abstractmethod 111 | def get_dates_from_geom(self, geom, geom_crs="epsg:4326"): 112 | """A convenience method for determining what dates of data are available for a given geometry. 113 | 114 | Args: 115 | geom: A polygon in GeoJSON format describing the query footprint. 116 | geom_crs: The coordinate reference system (CRS) of geom 117 | 118 | Returns: 119 | dates: A list of dates for which there is corresponding data for `geom`. 120 | """ 121 | raise NotImplementedError() 122 | 123 | 124 | ################################################################ 125 | ################################################################ 126 | class NAIPDataLoader(AbstractDataLoader): 127 | 128 | def __init__(self): 129 | self.index = utils.NAIPTileIndex() 130 | 131 | def _get_fns_from_geom(self, geom, src_crs): 132 | centroid = utils.get_transformed_centroid_from_geom(geom, src_crs=src_crs, dst_crs='epsg:4326') 133 | fns = self.index.lookup_tile(*centroid) 134 | fns = sorted(fns) 135 | 136 | base_state = fns[0].split("/")[1] 137 | 138 | valid_fns = [] 139 | years = [] 140 | for fn in fns: 141 | 142 | year = int(fn.split("/")[2]) 143 | state = fn.split("/")[1] 144 | 145 | if year in years: 146 | continue 147 | if state != base_state: 148 | continue 149 | 150 | valid_fns.append(fn) 151 | years.append(int(year)) 152 | 153 | valid_fns = np.array(valid_fns) 154 | years = np.array(years) 155 | 156 | idxs = np.argsort(years) 157 | valid_fns = valid_fns[idxs] 158 | 159 | return valid_fns 160 | 161 | def get_dates_from_geom(self, geom, geom_crs="epsg:26918"): 162 | fns = self._get_fns_from_geom(geom, geom_crs) 163 | 164 | years = [] 165 | for fn in fns: 166 | year = int(fn.split("/")[2]) 167 | years.append(year) 168 | return years 169 | 170 | def get_rgb_stack_from_geom(self, geom, buffer, show_outline=True, geom_crs="epsg:26918"): 171 | 172 | mask_geom, bounding_geom = get_mask_and_bounding_geoms(geom, buffer) 173 | fns = self._get_fns_from_geom(geom, geom_crs) 174 | 175 | years = [] 176 | images = [] 177 | for fn in fns: 178 | year = int(fn.split("/")[2]) 179 | years.append(year) 180 | 181 | with rasterio.Env(**RASTERIO_BEST_PRACTICES): 182 | with rasterio.open(utils.NAIP_BLOB_ROOT + fn) as f: 183 | dst_crs = f.crs.to_string() 184 | if geom_crs != dst_crs: 185 | t_mask_geom = fiona.transform.transform_geom(geom_crs, dst_crs, mask_geom) 186 | t_bounding_geom = fiona.transform.transform_geom(geom_crs, dst_crs, bounding_geom) 187 | 188 | mask_image, _ = rasterio.mask.mask(f, [t_mask_geom], crop=True, invert=False, pad=False, all_touched=True) 189 | mask_image = np.rollaxis(mask_image, 0, 3) 190 | 191 | full_image, _ = rasterio.mask.mask(f, [t_bounding_geom], crop=True, invert=False, pad=False, all_touched=True) 192 | full_image = np.rollaxis(full_image, 0, 3)[:,:,:3] 193 | 194 | mask = np.zeros((mask_image.shape[0], mask_image.shape[1]), dtype=np.uint8) 195 | mask[np.sum(mask_image == 0, axis=2) != 4] = 1 196 | 197 | if show_outline: 198 | images.append(mark_boundaries( 199 | full_image, mask 200 | )) 201 | else: 202 | images.append(full_image) 203 | 204 | return images, years 205 | 206 | def get_data_stack_from_geom(self, geom, buffer, geom_crs="epsg:26918"): 207 | 208 | mask_geom, bounding_geom = get_mask_and_bounding_geoms(geom, buffer) 209 | fns = self._get_fns_from_geom(geom, geom_crs) 210 | 211 | years = [] 212 | images = [] 213 | masks = [] 214 | for fn in fns: 215 | 216 | year = int(fn.split("/")[2]) 217 | years.append(year) 218 | 219 | with rasterio.Env(**RASTERIO_BEST_PRACTICES): 220 | with rasterio.open(utils.NAIP_BLOB_ROOT + fn) as f: 221 | mask_image, _ = rasterio.mask.mask(f, [mask_geom], crop=True, invert=False, pad=False, all_touched=True) 222 | mask_image = np.rollaxis(mask_image, 0, 3) 223 | 224 | full_image, _ = rasterio.mask.mask(f, [bounding_geom], crop=True, invert=False, pad=False, all_touched=True) 225 | full_image = np.rollaxis(full_image, 0, 3) 226 | 227 | mask = np.zeros((mask_image.shape[0], mask_image.shape[1]), dtype=np.bool) 228 | mask[np.sum(mask_image==0, axis=2) == 4] = 1 229 | 230 | images.append(full_image) 231 | masks.append(mask) 232 | 233 | return images, masks, years 234 | 235 | def data_stack_to_rgb(self, images): 236 | rgb_images = [] 237 | for image in images: 238 | rgb_images.append(image[:,:,:3]) 239 | return rgb_images 240 | 241 | 242 | ################################################################ 243 | ################################################################ 244 | class S2DataLoader(AbstractDataLoader): 245 | 246 | years = [ 247 | 2016, 2017, 2018, 2019, 2020 248 | ] 249 | urls = [ 250 | "https://researchlabwuopendata.blob.core.windows.net/sentinel-2-imagery/karnataka_change/2016/2016_merged.tif", 251 | "https://researchlabwuopendata.blob.core.windows.net/sentinel-2-imagery/karnataka_change/2017/2017_merged.tif", 252 | "https://researchlabwuopendata.blob.core.windows.net/sentinel-2-imagery/karnataka_change/2018/2018_merged.tif", 253 | "https://researchlabwuopendata.blob.core.windows.net/sentinel-2-imagery/karnataka_change/2019/2019_merged.tif", 254 | "https://researchlabwuopendata.blob.core.windows.net/sentinel-2-imagery/karnataka_change/2020/2020_merged.tif", 255 | ] 256 | 257 | def get_dates_from_geom(self, geom): 258 | return list(S2DataLoader.years) 259 | 260 | def get_rgb_stack_from_geom(self, geom, buffer, show_outline=True, geom_crs="epsg:4326"): 261 | 262 | mask_geom, bounding_geom = get_mask_and_bounding_geoms(geom, buffer) 263 | 264 | years = list(S2DataLoader.years) 265 | images = [] 266 | for url in S2DataLoader.urls: 267 | 268 | with rasterio.Env(**RASTERIO_BEST_PRACTICES): 269 | with rasterio.open(url) as f: 270 | mask_image, _ = rasterio.mask.mask(f, [mask_geom], crop=True, invert=False, pad=False, all_touched=True) 271 | mask_image = np.rollaxis(mask_image, 0, 3) 272 | mask_image = mask_image[:,:,[3,2,1]] 273 | 274 | full_image, _ = rasterio.mask.mask(f, [bounding_geom], crop=True, invert=False, pad=False, all_touched=True) 275 | full_image = np.rollaxis(full_image, 0, 3) 276 | full_image = full_image[:,:,[3,2,1]] 277 | full_image = utils.scale(1.1*full_image, 0, 2500) 278 | 279 | mask = np.zeros((mask_image.shape[0], mask_image.shape[1]), dtype=np.uint8) 280 | mask[np.sum(mask_image == 0, axis=2) != 3] = 1 281 | 282 | if show_outline: 283 | images.append(mark_boundaries( 284 | full_image, mask 285 | )) 286 | else: 287 | images.append(full_image) 288 | 289 | return images, years 290 | 291 | def get_data_stack_from_geom(self, geom, buffer, geom_crs="epsg:4326"): 292 | 293 | mask_geom, bounding_geom = get_mask_and_bounding_geoms(geom, buffer) 294 | 295 | years = list(S2DataLoader.years) 296 | images = [] 297 | masks = [] 298 | for url in S2DataLoader.urls: 299 | 300 | with rasterio.Env(**RASTERIO_BEST_PRACTICES): 301 | with rasterio.open(url) as f: 302 | mask_image, _ = rasterio.mask.mask(f, [mask_geom], crop=True, invert=False, pad=False, all_touched=True) 303 | mask_image = np.rollaxis(mask_image, 0, 3) 304 | mask_image = mask_image[:,:,[3,2,1]] 305 | 306 | full_image, _ = rasterio.mask.mask(f, [bounding_geom], crop=True, invert=False, pad=False, all_touched=True) 307 | full_image = np.rollaxis(full_image, 0, 3) 308 | full_image = full_image[:,:,[1,2,3,7]] # keep B, G, R, and NIR bands 309 | 310 | mask = np.zeros((mask_image.shape[0], mask_image.shape[1]), dtype=np.bool) 311 | mask[np.sum(mask_image == 0, axis=2) == 3] = 1 312 | 313 | images.append(full_image) 314 | masks.append(mask) 315 | 316 | return images, masks, years 317 | 318 | def data_stack_to_rgb(self, images): 319 | rgb_images = [] 320 | for image in images: 321 | image = image[:,:,[2,1,0]] 322 | image = utils.scale(1.1*image, 0, 2500) 323 | rgb_images.append(image) 324 | return rgb_images 325 | 326 | 327 | ################################################################ 328 | ################################################################ 329 | 330 | 331 | class PlanetaryComputerS2DataLoader(AbstractDataLoader): 332 | 333 | def __init__(self, geoms, pc_subscription_key, search_start="2015-01-01", search_end="2019-12-31"): 334 | pc.settings.set_subscription_key(pc_subscription_key) 335 | self.geoms = geoms 336 | self.time_range = f"{search_start}/{search_end}" 337 | 338 | @lru_cache(maxsize=None) 339 | def query_geom(self, geom_idx): 340 | geom = self.geoms[geom_idx] 341 | catalog = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1") 342 | 343 | search = catalog.search( 344 | collections=["sentinel-2-l2a"], 345 | intersects=geom, 346 | datetime=self.time_range, 347 | query={"eo:cloud_cover": {"lt": 10}}, 348 | ) 349 | 350 | items = list(search.get_items()) 351 | return items[::-1] 352 | 353 | def get_dates_from_geom(self, geom_idx): 354 | items = self.query_geom(geom_idx) 355 | dates = [] 356 | for item in items: 357 | dates.append(item.datetime.strftime("%m-%d-%Y")) 358 | return dates 359 | 360 | def get_rgb_stack_from_geom(self, geom_idx, buffer, show_outline=True, geom_crs="epsg:4326"): 361 | 362 | images, masks, dates = self.get_data_stack_from_geom(geom_idx, buffer, geom_crs) 363 | if show_outline: 364 | new_images = [] 365 | for image, mask in zip(images, masks): 366 | new_images.append(mark_boundaries( 367 | image, mask 368 | )) 369 | return new_images, dates 370 | else: 371 | return images, dates 372 | 373 | @lru_cache(maxsize=None) 374 | def get_data_stack_from_geom(self, geom_idx, buffer, geom_crs="epsg:4326"): 375 | geom = self.geoms[geom_idx] 376 | 377 | items = self.query_geom(geom_idx) 378 | dates = self.get_dates_from_geom(geom_idx) 379 | 380 | crss = set() 381 | for item in items: 382 | crss.add(item.properties["proj:epsg"]) 383 | assert len(crss) == 1 384 | dst_crs = "epsg:" + str(list(crss)[0]) 385 | 386 | geom = rasterio.warp.transform_geom(geom_crs, dst_crs, geom) 387 | mask_geom, bounding_geom = get_mask_and_bounding_geoms(geom, buffer) 388 | 389 | images = [] 390 | masks = [] 391 | for item in items: 392 | 393 | href = item.assets["visual"].href 394 | signed_href = pc.sign(href) 395 | 396 | with rasterio.Env(**RASTERIO_BEST_PRACTICES): 397 | with rasterio.open(signed_href) as f: 398 | 399 | mask_image, _ = rasterio.mask.mask(f, [mask_geom], crop=True, invert=False, pad=False, all_touched=True) 400 | mask_image = np.rollaxis(mask_image, 0, 3) 401 | 402 | full_image, _ = rasterio.mask.mask(f, [bounding_geom], crop=True, invert=False, pad=False, all_touched=True) 403 | full_image = np.rollaxis(full_image, 0, 3) 404 | 405 | mask = np.zeros((mask_image.shape[0], mask_image.shape[1]), dtype=np.uint8) 406 | mask[np.sum(mask_image == 0, axis=2) != 3] = 1 407 | 408 | images.append(full_image) 409 | masks.append(mask) 410 | 411 | return images, masks, dates 412 | 413 | def data_stack_to_rgb(self, images): 414 | raise NotImplementedError("This method is unecessary as the data is already RGB") 415 | -------------------------------------------------------------------------------- /temporal_cluster_matching/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/temporal-cluster-matching/490b12fbb134e755a11a0f8612036100939ab390/temporal_cluster_matching/__init__.py -------------------------------------------------------------------------------- /temporal_cluster_matching/algorithms.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) Microsoft Corporation. All rights reserved. 3 | Licensed under the MIT License. 4 | ''' 5 | import numpy as np 6 | import scipy.stats 7 | 8 | from sklearn.cluster import KMeans, MiniBatchKMeans 9 | from sklearn.preprocessing import StandardScaler 10 | 11 | from skimage.feature import local_binary_pattern 12 | from skimage.color import rgb2gray 13 | 14 | def calculate_change_values(images, masks, n_clusters, num_samples_for_kmeans=10000, use_minibatch=False): 15 | ''' 16 | Args: 17 | imagery: A list of `numpy.ndarray` of shape (height, width, n_channels). This imagery should cover an area that is larger than the parcel of interest by some fixed distance (i.e. a buffer value). 18 | masks: A list of boolean `numpy.ndarray` of shape (height, width) with `1` in locations where the parcel covers and `0` everywhere else. 19 | n_clusters: The number of clusters to use in the k-means model. 20 | num_samples_for_kmeans: An integer specifying the number of samples to use to fit the k-means model. If `None` then all pixels in the neighborhood + footprint are used, however this is probably overkill. 21 | use_minibatch: A flag that indicates whether we should use MiniBatchKMeans over KMeans. MiniBatchKMeans should be much faster. 22 | 23 | Returns: 24 | divergences: A list of KL-divergence values 25 | ''' 26 | divergences = [] 27 | for image, mask in zip(images, masks): 28 | h,w,c = image.shape 29 | assert mask.shape[0] == h and mask.shape[1] == w 30 | 31 | mask = mask.astype(bool) 32 | 33 | # fit a k-means model and use it to cluster the image 34 | if use_minibatch: 35 | cluster_model = MiniBatchKMeans(n_clusters=n_clusters, n_init=3, batch_size=2000, compute_labels=True, init="random") 36 | else: 37 | cluster_model = KMeans(n_clusters=n_clusters, n_init=3) 38 | features = image.reshape(h*w, c) 39 | 40 | scaler = StandardScaler() 41 | features = scaler.fit_transform(features) 42 | 43 | if num_samples_for_kmeans is None or (h*w <= num_samples_for_kmeans): 44 | labels = cluster_model.fit_predict(features) 45 | else: 46 | cluster_model.fit(features[np.random.choice(features.shape[0], size=num_samples_for_kmeans)]) 47 | labels = cluster_model.predict(features) 48 | labels = labels.reshape(h,w) 49 | 50 | # select the cluster labels that fall within the parcel and those outside of the parcel 51 | parcel_labels = labels[mask] 52 | neighborhood_labels = labels[~mask] 53 | 54 | # compute the frequency with which each cluster occurs in the parcel and outside of the parcel 55 | parcel_counts = np.bincount(parcel_labels.ravel(), minlength=n_clusters) 56 | neighborhood_counts = np.bincount(neighborhood_labels.ravel(), minlength=n_clusters) 57 | 58 | if parcel_labels.shape[0] > 0: 59 | # normalize each vector of cluster index counts into discrete distributions 60 | parcel_distribution = (parcel_counts + 1e-5) / parcel_counts.sum() 61 | neighborhood_distribution = (neighborhood_counts + 1e-5) / neighborhood_counts.sum() 62 | 63 | # compute the KL divergence between the two distributions 64 | divergence = scipy.stats.entropy(parcel_distribution, neighborhood_distribution) 65 | divergences.append(divergence) 66 | else: 67 | divergences.append(float('inf')) 68 | 69 | return divergences 70 | 71 | 72 | def calculate_change_values_with_color(images, masks): 73 | ''' 74 | Args: 75 | imagery: A list of `numpy.ndarray` of shape (height, width, n_channels). This imagery should cover an area that is larger than the parcel of interest by some fixed distance (i.e. a buffer value). 76 | masks: A list of boolean `numpy.ndarray` of shape (height, width) with `1` in locations where the parcel covers and `0` everywhere else. 77 | 78 | Returns: 79 | distances: A list of Euclidean distances 80 | ''' 81 | distances = [] 82 | for image, mask in zip(images, masks): 83 | h,w,c = image.shape 84 | assert mask.shape[0] == h and mask.shape[1] == w 85 | 86 | 87 | colors_inside = image[mask==1].mean(axis=0) 88 | colors_outside = image[mask==0].mean(axis=0) 89 | 90 | distances.append(np.linalg.norm( 91 | colors_outside - colors_inside 92 | )) 93 | 94 | 95 | return distances 96 | -------------------------------------------------------------------------------- /temporal_cluster_matching/utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (c) Microsoft Corporation. All rights reserved. 3 | Licensed under the MIT License. 4 | ''' 5 | import os 6 | import tempfile 7 | import pickle 8 | import urllib 9 | import urllib.request 10 | 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | 14 | import rtree 15 | import fiona 16 | import fiona.transform 17 | import shapely 18 | import shapely.geometry 19 | 20 | from sklearn.metrics import accuracy_score, mean_absolute_error 21 | 22 | NAIP_BLOB_ROOT = 'https://naipblobs.blob.core.windows.net/naip/' 23 | 24 | ################################################################ 25 | # Dataset methods 26 | ################################################################ 27 | def get_all_geoms_from_file(fn): 28 | geoms = [] 29 | with fiona.open(fn) as f: 30 | for row in f: 31 | geom = row["geometry"] 32 | geoms.append(geom) 33 | return geoms 34 | 35 | ## Methods for getting poultry barn geoms 36 | def get_poultry_barn_geoms(base_dir="./data/"): 37 | return get_all_geoms_from_file(os.path.join(base_dir, "Delmarva_PL_House_Final2_epsg26918.geojson")) 38 | 39 | def get_random_polygons_over_poultry_area(base_dir="./data/"): 40 | return get_all_geoms_from_file(os.path.join(base_dir, "poultry_barn_6013_random_polygons_epsg26918.geojson")) 41 | 42 | def get_poultry_barn_geoms_epsg4326(base_dir="./data/"): 43 | return get_all_geoms_from_file(os.path.join(base_dir, "Delmarva_PL_House_Final2_epsg4326.geojson")) 44 | 45 | ## Methods for getting solar farm geoms 46 | def get_solar_farm_geoms(base_dir="./data/"): 47 | return get_all_geoms_from_file(os.path.join(base_dir, "karnataka_predictions_polygons_validated_2020.geojson")) 48 | 49 | def get_random_polygons_over_solar_area(base_dir="./data/"): 50 | return get_all_geoms_from_file(os.path.join(base_dir, "solar_farms_935_random_polygons_epsg4326.geojson")) 51 | 52 | def get_labels(fn): 53 | idxs = [] 54 | years = [] 55 | with open(fn, "r") as f: 56 | lines = f.read().strip().split("\n") 57 | for i in range(1,len(lines)): 58 | parts = lines[i].split(",") 59 | idxs.append(int(parts[0])) 60 | years.append(int(parts[1])) 61 | return idxs, years 62 | 63 | def get_poultry_barn_labels(base_dir="./data/"): 64 | return get_labels(os.path.join(base_dir, "poultry_barn_labels.csv")) 65 | 66 | def get_solar_farm_labels(base_dir="./data/"): 67 | return get_labels(os.path.join(base_dir, "solar_farm_labels.csv")) 68 | 69 | ################################################################ 70 | # Visualization methods 71 | ################################################################ 72 | def show_images(images, titles=None): 73 | num_images = len(images) 74 | if titles is not None: 75 | assert len(titles) == num_images 76 | 77 | fig, axs = plt.subplots(1, num_images, figsize=(num_images*4, 4)) 78 | axs = axs.flatten() 79 | for i in range(num_images): 80 | 81 | axs[i].imshow(images[i]) 82 | if titles is not None: 83 | axs[i].set_title(titles[i]) 84 | axs[i].axis("off") 85 | axs[i].get_xaxis().set_visible(False) 86 | axs[i].get_yaxis().set_visible(False) 87 | 88 | plt.show() 89 | plt.close() 90 | 91 | def show_individual_images(images, border_size=1): 92 | for img in images: 93 | h,w,c = img.shape 94 | 95 | if img.dtype==np.uint8: 96 | img = img / 255.0 97 | 98 | img_with_border = np.zeros((h+int(2*border_size),w+int(2*border_size),c), dtype=np.float32) 99 | img_with_border[border_size:-border_size,border_size:-border_size] = img 100 | 101 | fig = plt.figure() 102 | ax = fig.add_axes([0, 0, 1, 1], frameon=False) 103 | ax.axis('off') 104 | ax.get_xaxis().set_visible(False) 105 | ax.get_yaxis().set_visible(False) 106 | ax.imshow(img_with_border) 107 | plt.show() 108 | plt.close() 109 | 110 | 111 | def scale(x, min_val, max_val, a=0, b=255, output_type=np.uint8): 112 | y = np.clip((x - min_val) / (max_val - min_val), 0, 1) 113 | y = (b-a) * y + a 114 | y = y.astype(output_type) 115 | return y 116 | 117 | 118 | ################################################################ 119 | # Geometric methods 120 | ################################################################ 121 | def get_transformed_centroid_from_geom(geom, src_crs='epsg:26918', dst_crs='epsg:4326'): 122 | shape = shapely.geometry.shape(geom) 123 | x = shape.centroid.x 124 | y = shape.centroid.y 125 | lat, lon = fiona.transform.transform(src_crs, dst_crs, xs=[x], ys=[y]) 126 | lat = lat[0] 127 | lon = lon[0] 128 | 129 | return (lat, lon) 130 | 131 | def reverse_polygon_coordinates(geom): 132 | new_coords = [] 133 | 134 | if geom["type"] == "MultiPolygon": 135 | for polygon in geom["coordinates"]: 136 | new_polygon = [] 137 | for ring in polygon: 138 | new_ring = [] 139 | for x, y in ring: 140 | new_ring.append((y,x)) 141 | new_polygon.append(new_ring) 142 | new_coords.append(new_polygon) 143 | elif geom["type"] == "Polygon": 144 | if len(geom["coordinates"][0][0]) == 2: 145 | for ring in geom["coordinates"]: 146 | new_ring = [] 147 | for x, y in ring: 148 | new_ring.append((y,x)) 149 | new_coords.append(new_ring) 150 | else: 151 | for ring in geom["coordinates"]: 152 | new_ring = [] 153 | for x, y, z in ring: 154 | new_ring.append((y,x,z)) 155 | new_coords.append(new_ring) 156 | geom["coordinates"] = new_coords 157 | return geom 158 | 159 | def bounds_to_geom(bounds, src_crs, dst_crs): 160 | left, right = bounds.left, bounds.right 161 | top, bottom = bounds.top, bounds.bottom 162 | 163 | geom = { 164 | "type": "Polygon", 165 | "coordinates": [[(left, top), (right, top), (right, bottom), (left, bottom), (left, top)]] 166 | } 167 | return fiona.transform.transform_geom(src_crs, dst_crs, geom) 168 | 169 | 170 | ################################################################ 171 | # Tile index for NAIP data 172 | ################################################################ 173 | class NAIPTileIndex: 174 | """ 175 | Utility class for performing NAIP tile lookups by location. 176 | """ 177 | index_blob_root = 'https://naipblobs.blob.core.windows.net/naip-index/rtree/' 178 | index_fns = ["tile_index.dat", "tile_index.idx", "tiles.p"] 179 | 180 | def __init__(self, base_path=None): 181 | 182 | if base_path is None: 183 | base_path = tempfile.gettempdir() 184 | 185 | for file_path in NAIPTileIndex.index_fns: 186 | download_url(NAIPTileIndex.index_blob_root + file_path, base_path) 187 | 188 | self.base_path = base_path 189 | self.tile_rtree = rtree.index.Index(base_path + "/tile_index") 190 | self.tile_index = pickle.load(open(base_path + "/tiles.p", "rb")) 191 | 192 | 193 | def lookup_tile(self, lat, lon): 194 | """" 195 | Given a lat/lon coordinate pair, return the list of NAIP tiles that contain 196 | that location. 197 | 198 | Returns an array containing [mrf filename, idx filename, lrc filename]. 199 | """ 200 | 201 | point = shapely.geometry.Point(float(lon), float(lat)) 202 | intersected_indices = list(self.tile_rtree.intersection(point.bounds)) 203 | 204 | intersected_files = [] 205 | tile_intersection = False 206 | 207 | for idx in intersected_indices: 208 | intersected_file = self.tile_index[idx][0] 209 | intersected_geom = self.tile_index[idx][1] 210 | if intersected_geom.contains(point): 211 | tile_intersection = True 212 | intersected_files.append(intersected_file) 213 | 214 | if not tile_intersection and len(intersected_indices) > 0: 215 | print('''Error: there are overlaps with tile index, but no tile completely contains selection''') 216 | return None 217 | elif len(intersected_files) <= 0: 218 | print("No tile intersections") 219 | return None 220 | else: 221 | return intersected_files 222 | 223 | 224 | def download_url(url, output_dir, force_download=False, verbose=False): 225 | """ 226 | Download a URL 227 | """ 228 | 229 | parsed_url = urllib.parse.urlparse(url) 230 | url_as_filename = os.path.basename(parsed_url.path) 231 | destination_filename = os.path.join(output_dir, url_as_filename) 232 | 233 | if (not force_download) and (os.path.isfile(destination_filename)): 234 | if verbose: print('Bypassing download of already-downloaded file {}'.format(os.path.basename(url))) 235 | return destination_filename 236 | 237 | if verbose: print('Downloading file {} to {}'.format(os.path.basename(url),destination_filename),end='') 238 | urllib.request.urlretrieve(url, destination_filename) 239 | assert(os.path.isfile(destination_filename)) 240 | nBytes = os.path.getsize(destination_filename) 241 | if verbose: print('...done, {} bytes.'.format(nBytes)) 242 | return destination_filename 243 | 244 | 245 | ################################################################ 246 | # Other stuff for processing results 247 | ################################################################ 248 | def decision_function(all_distances, all_years, theta, max_year): 249 | predicted_years = [] 250 | for distances, years in zip(all_distances, all_years): 251 | made_prediction = False 252 | for distance, year in zip(distances, years): 253 | if distance >= theta or year >= max_year: 254 | predicted_years.append(year) 255 | made_prediction = True 256 | break 257 | if not made_prediction: 258 | predicted_years.append(years[-1]) 259 | return predicted_years 260 | 261 | def get_results(fn, filter_years=None): 262 | all_idxs = [] 263 | all_years = [] 264 | all_distances = [] 265 | 266 | with open(fn) as f: 267 | lines = f.read().strip().split("\n") 268 | 269 | for line in lines: 270 | years = [] 271 | distances = [] 272 | parts = line.strip().strip(",").split(",") 273 | idx = int(parts[0]) 274 | 275 | parts = parts[1:] 276 | 277 | j = 0 278 | while parts[j] != "|": 279 | years.append(int(parts[j])) 280 | j += 1 281 | 282 | j += 1 283 | while j < len(parts): 284 | distance = float(parts[j]) 285 | if np.isnan(distance): 286 | distances.append(float('inf')) 287 | else: 288 | distances.append(distance) 289 | j += 1 290 | 291 | all_idxs.append(idx) 292 | if filter_years is None: 293 | all_years.append(years) 294 | all_distances.append(distances) 295 | else: 296 | t_years = [] 297 | t_distances = [] 298 | 299 | for distance, year in zip(distances, years): 300 | if year in filter_years: 301 | t_years.append(year) 302 | t_distances.append(distance) 303 | all_years.append(t_years) 304 | all_distances.append(t_distances) 305 | 306 | return all_idxs, all_years, all_distances 307 | 308 | 309 | def uncertain_accuracy(labeled_years, predicted_years): 310 | labeled_years = np.array(labeled_years) 311 | predicted_years = np.array(predicted_years) 312 | mask = labeled_years != -1 313 | return accuracy_score(labeled_years[mask], predicted_years[mask]) 314 | 315 | def uncertain_mae(labeled_years, predicted_years): 316 | labeled_years = np.array(labeled_years) 317 | predicted_years = np.array(predicted_years) 318 | mask = labeled_years != -1 319 | return mean_absolute_error(labeled_years[mask], predicted_years[mask]) 320 | 321 | def loss_function(labeled_idxs, labeled_years, all_distances, all_years, theta, max_year): 322 | 323 | distances, years = [], [] 324 | for idx in labeled_idxs: 325 | distances.append(all_distances[idx]) 326 | years.append(all_years[idx]) 327 | 328 | predicted_years = decision_function(distances, years, theta, max_year) 329 | 330 | acc = uncertain_accuracy(labeled_years, predicted_years) 331 | mae = uncertain_mae(labeled_years, predicted_years) 332 | 333 | return acc, mae --------------------------------------------------------------------------------