├── .gitignore
├── README.md
├── data
    └── graphs
    │   ├── cloud_cover_hist.html
    │   ├── facilities_map.html
    │   ├── operation_status_hist.html
    │   └── operation_status_per_power_plant_ts.html
├── notebooks
    ├── 01_data_matching.ipynb
    ├── 02_satellite_imagery.ipynb
    ├── 03_split_data_for_ml.ipynb
    ├── 04_dataset_loading.ipynb
    ├── 05_images_download.ipynb
    ├── 06_model_training.ipynb
    └── google_cooling_tower_on_off_data.ipynb
├── requirements.txt
├── scripts
    └── download_images.py
├── setup.cfg
├── setup.py
└── src
    └── coal_emissions_monitoring
        ├── __init__.py
        ├── constants.py
        ├── data_cleaning.py
        ├── data_viz.py
        ├── dataset.py
        ├── ml_utils.py
        ├── model.py
        ├── satellite_imagery.py
        └── transforms.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | .vscode/
131 | notebooks/lightning_logs/*
132 | data/campd/
133 | data/google/
134 | data/models/
135 | .DS_Store
136 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ccai-ss23-ai-monitoring-tutorial
2 | Experiments for the Climate Change AI summer school 2023 tutorial on "AI for Monitoring, Reporting, and Verification"
3 | 


--------------------------------------------------------------------------------
/data/graphs/facilities_map.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |     
  5 |     <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
  6 |     
  7 |         <script>
  8 |             L_NO_TOUCH = false;
  9 |             L_DISABLE_3D = false;
 10 |         </script>
 11 |     
 12 |     <style>html, body {width: 100%;height: 100%;margin: 0;padding: 0;}</style>
 13 |     <style>#map {position:absolute;top:0;bottom:0;right:0;left:0;}</style>
 14 |     <script src="https://cdn.jsdelivr.net/npm/leaflet@1.9.3/dist/leaflet.js"></script>
 15 |     <script src="https://code.jquery.com/jquery-1.12.4.min.js"></script>
 16 |     <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/js/bootstrap.bundle.min.js"></script>
 17 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.js"></script>
 18 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/leaflet@1.9.3/dist/leaflet.css"/>
 19 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/css/bootstrap.min.css"/>
 20 |     <link rel="stylesheet" href="https://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css"/>
 21 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.2.0/css/all.min.css"/>
 22 |     <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.css"/>
 23 |     <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/python-visualization/folium/folium/templates/leaflet.awesome.rotate.min.css"/>
 24 |     
 25 |             <meta name="viewport" content="width=device-width,
 26 |                 initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
 27 |             <style>
 28 |                 #map_d526cc9cc9545671bbf77877b217d906 {
 29 |                     position: relative;
 30 |                     width: 100.0%;
 31 |                     height: 100.0%;
 32 |                     left: 0.0%;
 33 |                     top: 0.0%;
 34 |                 }
 35 |                 .leaflet-container { font-size: 1rem; }
 36 |             </style>
 37 |         
 38 |     
 39 |                     <style>
 40 |                         .foliumtooltip {
 41 |                             
 42 |                         }
 43 |                        .foliumtooltip table{
 44 |                             margin: auto;
 45 |                         }
 46 |                         .foliumtooltip tr{
 47 |                             text-align: left;
 48 |                         }
 49 |                         .foliumtooltip th{
 50 |                             padding: 2px; padding-right: 8px;
 51 |                         }
 52 |                     </style>
 53 |             
 54 |     
 55 |     <script src="https://code.jquery.com/ui/1.12.1/jquery-ui.js"></script>
 56 |     <script>$( function() {
 57 |         $( ".maplegend" ).draggable({
 58 |             start: function (event, ui) {
 59 |                 $(this).css({
 60 |                     right: "auto",
 61 |                     top: "auto",
 62 |                     bottom: "auto"
 63 |                 });
 64 |             }
 65 |         });
 66 |     });
 67 |     </script>
 68 |     <style type='text/css'>
 69 |       .maplegend {
 70 |         position: absolute;
 71 |         z-index:9999;
 72 |         background-color: rgba(255, 255, 255, .8);
 73 |         border-radius: 5px;
 74 |         box-shadow: 0 0 15px rgba(0,0,0,0.2);
 75 |         padding: 10px;
 76 |         font: 12px/14px Arial, Helvetica, sans-serif;
 77 |         right: 10px;
 78 |         bottom: 20px;
 79 |       }
 80 |       .maplegend .legend-title {
 81 |         text-align: left;
 82 |         margin-bottom: 5px;
 83 |         font-weight: bold;
 84 |         }
 85 |       .maplegend .legend-scale ul {
 86 |         margin: 0;
 87 |         margin-bottom: 0px;
 88 |         padding: 0;
 89 |         float: left;
 90 |         list-style: none;
 91 |         }
 92 |       .maplegend .legend-scale ul li {
 93 |         list-style: none;
 94 |         margin-left: 0;
 95 |         line-height: 16px;
 96 |         margin-bottom: 2px;
 97 |         }
 98 |       .maplegend ul.legend-labels li span {
 99 |         display: block;
100 |         float: left;
101 |         height: 14px;
102 |         width: 14px;
103 |         margin-right: 5px;
104 |         margin-left: 0;
105 |         border: 0px solid #ccc;
106 |         }
107 |       .maplegend .legend-source {
108 |         color: #777;
109 |         clear: both;
110 |         }
111 |       .maplegend a {
112 |         color: #777;
113 |         }
114 |     </style>
115 |     
116 | </head>
117 | <body>
118 |     
119 |     
120 |     <div id='maplegend data_set' class='maplegend'>
121 |         <div class='legend-title'>data_set</div>
122 |         <div class='legend-scale'>
123 |             <ul class='legend-labels'>
124 |                 <li><span style='background:blue'></span>train</li>
125 |                 <li><span style='background:red'></span>val</li>
126 |             </ul>
127 |         </div>
128 |     </div>
129 |     
130 |     
131 |             <div class="folium-map" id="map_d526cc9cc9545671bbf77877b217d906" ></div>
132 |         
133 | </body>
134 | <script>
135 |     
136 |     
137 |             var map_d526cc9cc9545671bbf77877b217d906 = L.map(
138 |                 "map_d526cc9cc9545671bbf77877b217d906",
139 |                 {
140 |                     center: [41.935977710435914, -33.06959712557614],
141 |                     crs: L.CRS.EPSG3857,
142 |                     zoom: 10,
143 |                     zoomControl: true,
144 |                     preferCanvas: false,
145 |                 }
146 |             );
147 |             L.control.scale().addTo(map_d526cc9cc9545671bbf77877b217d906);
148 | 
149 |             
150 | 
151 |         
152 |     
153 |             var tile_layer_e0125bb988ad715f1cd21baf6d4715b4 = L.tileLayer(
154 |                 "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png",
155 |                 {"attribution": "Data by \u0026copy; \u003ca target=\"_blank\" href=\"http://openstreetmap.org\"\u003eOpenStreetMap\u003c/a\u003e, under \u003ca target=\"_blank\" href=\"http://www.openstreetmap.org/copyright\"\u003eODbL\u003c/a\u003e.", "detectRetina": false, "maxNativeZoom": 18, "maxZoom": 18, "minZoom": 0, "noWrap": false, "opacity": 1, "subdomains": "abc", "tms": false}
156 |             ).addTo(map_d526cc9cc9545671bbf77877b217d906);
157 |         
158 |     
159 |             map_d526cc9cc9545671bbf77877b217d906.fitBounds(
160 |                 [[29.726146451620195, -92.141721546561], [54.14580896925163, 26.002527295408733]],
161 |                 {}
162 |             );
163 |         
164 |     
165 |         function geo_json_b8d1cbb637917ab66114b9b80f874f1a_styler(feature) {
166 |             switch(feature.id) {
167 |                 case "4855": case "5330": case "5182": case "5271": case "967": case "1774": case "1803": case "1638": case "5007": case "1684": case "1893": case "1027": case "4906": case "75": case "5888": case "1400": case "1845": case "1489": case "1557": case "5285": case "5139": case "5867": case "56": 
168 |                     return {"color": "red", "fillColor": "red", "fillOpacity": 0.5, "weight": 2};
169 |                 default:
170 |                     return {"color": "blue", "fillColor": "blue", "fillOpacity": 0.5, "weight": 2};
171 |             }
172 |         }
173 |         function geo_json_b8d1cbb637917ab66114b9b80f874f1a_highlighter(feature) {
174 |             switch(feature.id) {
175 |                 default:
176 |                     return {"fillOpacity": 0.75};
177 |             }
178 |         }
179 |         function geo_json_b8d1cbb637917ab66114b9b80f874f1a_pointToLayer(feature, latlng) {
180 |             var opts = {"bubblingMouseEvents": true, "color": "#3388ff", "dashArray": null, "dashOffset": null, "fill": true, "fillColor": "#3388ff", "fillOpacity": 0.2, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "opacity": 1.0, "radius": 2, "stroke": true, "weight": 3};
181 |             
182 |             let style = geo_json_b8d1cbb637917ab66114b9b80f874f1a_styler(feature)
183 |             Object.assign(opts, style)
184 |             
185 |             return new L.CircleMarker(latlng, opts)
186 |         }
187 | 
188 |         function geo_json_b8d1cbb637917ab66114b9b80f874f1a_onEachFeature(feature, layer) {
189 |             layer.on({
190 |                 mouseout: function(e) {
191 |                     if(typeof e.target.setStyle === "function"){
192 |                         geo_json_b8d1cbb637917ab66114b9b80f874f1a.resetStyle(e.target);
193 |                     }
194 |                 },
195 |                 mouseover: function(e) {
196 |                     if(typeof e.target.setStyle === "function"){
197 |                         const highlightStyle = geo_json_b8d1cbb637917ab66114b9b80f874f1a_highlighter(e.target.feature)
198 |                         e.target.setStyle(highlightStyle);
199 |                     }
200 |                 },
201 |             });
202 |         };
203 |         var geo_json_b8d1cbb637917ab66114b9b80f874f1a = L.geoJson(null, {
204 |                 onEachFeature: geo_json_b8d1cbb637917ab66114b9b80f874f1a_onEachFeature,
205 |             
206 |                 style: geo_json_b8d1cbb637917ab66114b9b80f874f1a_styler,
207 |                 pointToLayer: geo_json_b8d1cbb637917ab66114b9b80f874f1a_pointToLayer
208 |         });
209 | 
210 |         function geo_json_b8d1cbb637917ab66114b9b80f874f1a_add (data) {
211 |             geo_json_b8d1cbb637917ab66114b9b80f874f1a
212 |                 .addData(data)
213 |                 .addTo(map_d526cc9cc9545671bbf77877b217d906);
214 |         }
215 |             geo_json_b8d1cbb637917ab66114b9b80f874f1a_add({"bbox": [-92.141721546561, 29.726146451620195, 26.002527295408733, 54.14580896925163], "features": [{"bbox": [6.659479347983439, 50.990633573684335, 6.668880089923844, 50.99656626563364], "geometry": {"coordinates": [[[6.668591829292612, 50.99656626563364], [6.668880089923844, 50.99081582755519], [6.659768732800111, 50.990633573684335], [6.659479347983439, 50.99638397460065], [6.668591829292612, 50.99656626563364]]], "type": "Polygon"}, "id": "592", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 15}, "type": "Feature"}, {"bbox": [6.659869421319537, 50.99014358847437, 6.669270016602852, 50.99607625084334], "geometry": {"coordinates": [[[6.668981809194339, 50.99607625084334], [6.669270016602852, 50.990325810685206], [6.660158752883278, 50.99014358847437], [6.659869421319537, 50.99589399147694], [6.668981809194339, 50.99607625084334]]], "type": "Polygon"}, "id": "481", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 12}, "type": "Feature"}, {"bbox": [6.66408957665561, 50.99103374368582, 6.673489861231445, 50.99696609559273], "geometry": {"coordinates": [[[6.673202166222345, 50.99696609559273], [6.673489861231445, 50.99121563907241], [6.664378395890605, 50.99103374368582], [6.66408957665561, 50.996784163116864], [6.673202166222345, 50.99696609559273]]], "type": "Polygon"}, "id": "667", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 17}, "type": "Feature"}, {"bbox": [7.971557541496394, 51.67388277563355, 7.98094187232494, 51.67971704891247], "geometry": {"coordinates": [[[7.980812716163443, 51.67971704891247], [7.98094187232494, 51.673963428657224], [7.971687870015223, 51.67388277563355], [7.971557541496394, 51.67963637934465], [7.980812716163443, 51.67971704891247]]], "type": "Polygon"}, "id": "1936", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 44}, "type": "Feature"}, {"bbox": [19.32154996568995, 51.26836811323884, 19.33092946228131, 51.2742517190818], "geometry": {"coordinates": [[[19.330720986815688, 51.2742517190818], [19.33092946228131, 51.268499231937675], [19.321759585213044, 51.26836811323884], [19.32154996568995, 51.27412057358077], [19.330720986815688, 51.2742517190818]]], "type": "Polygon"}, "id": "2914", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 66}, "type": "Feature"}, {"bbox": [-79.34556504050758, 40.65494399032707, -79.33785463414378, 40.660815895185266], "geometry": {"coordinates": [[[-79.33785463414378, 40.660707175315395], [-79.33799770527199, 40.65494399032707], [-79.34556504050758, 40.6550526882191], [-79.34542262007668, 40.660815895185266], [-79.33785463414378, 40.660707175315395]]], "type": "Polygon"}, "id": "143", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 4}, "type": "Feature"}, {"bbox": [-79.34460503217792, 40.65413396075789, -79.33689464248728, 40.66000592476428], "geometry": {"coordinates": [[[-79.33689464248728, 40.659897143748175], [-79.33703779213714, 40.65413396075789], [-79.34460503217792, 40.65424271978372], [-79.34446253319766, 40.66000592476428], [-79.33689464248728, 40.659897143748175]]], "type": "Polygon"}, "id": "109", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 3}, "type": "Feature"}, {"bbox": [-5.640161189388731, 42.79047122191038, -5.632099172155161, 42.79640866111795], "geometry": {"coordinates": [[[-5.632343332890363, 42.79640866111795], [-5.632099172155161, 42.79065128737053], [-5.639916305565739, 42.79047122191038], [-5.640161189388731, 42.79622855960119], [-5.632343332890363, 42.79640866111795]]], "type": "Polygon"}, "id": "2483", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 55}, "type": "Feature"}, {"bbox": [14.561900569448817, 51.41714558337483, 14.571158852690644, 51.42293423802065], "geometry": {"coordinates": [[[14.571104995860685, 51.42293423802065], [14.571158852690644, 51.417179614670935], [14.561955581999856, 51.41714558337483], [14.561900569448817, 51.422900199757315], [14.571104995860685, 51.42293423802065]]], "type": "Polygon"}, "id": "3276", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 74}, "type": "Feature"}, {"bbox": [-0.3876346367224894, 40.99549399536892, -0.3798050331397486, 41.001425895162065], "geometry": {"coordinates": [[[-0.3798050331397486, 41.00125322991747], [-0.3800332437437309, 40.99549399536892], [-0.3876346367224894, 40.995666625794506], [-0.3874070863940268, 41.001425895162065], [-0.3798050331397486, 41.00125322991747]]], "type": "Polygon"}, "id": "1261", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 31}, "type": "Feature"}, {"bbox": [14.564280681948082, 51.41749567598631, 14.57353874017901, 51.42328414538778], "geometry": {"coordinates": [[[14.573485181556311, 51.42328414538778], [14.57353874017901, 51.417529520597384], [14.564335396316599, 51.41749567598631], [14.564280681948082, 51.42325029384766], [14.573485181556311, 51.42328414538778]]], "type": "Polygon"}, "id": "3330", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 75}, "type": "Feature"}, {"bbox": [-80.82187697978605, 39.8265108702615, -80.81438270756315, 39.83228900896317], "geometry": {"coordinates": [[[-80.81438270756315, 39.832277283995005], [-80.81439822600537, 39.8265108702615], [-80.82187697978605, 39.82652259284646], [-80.82186208664541, 39.83228900896317], [-80.81438270756315, 39.832277283995005]]], "type": "Polygon"}, "id": "2995", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 69}, "type": "Feature"}, {"bbox": [7.235872120239003, 49.358705794775375, 7.244887366279925, 49.364594049386014], "geometry": {"coordinates": [[[7.2446825033921955, 49.364594049386014], [7.244887366279925, 49.35883992159835], [7.236078010088877, 49.358705794775375], [7.235872120239003, 49.36445989548373], [7.2446825033921955, 49.364594049386014]]], "type": "Polygon"}, "id": "3584", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 79}, "type": "Feature"}, {"bbox": [-5.640751140270404, 42.789361204451595, -5.632689221252315, 42.7952986785857], "geometry": {"coordinates": [[[-5.632933427208966, 42.7952986785857], [-5.632689221252315, 42.789541306450545], [-5.640506211268311, 42.789361204451595], [-5.640751140270404, 42.7951185405227], [-5.632933427208966, 42.7952986785857]]], "type": "Polygon"}, "id": "2388", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 54}, "type": "Feature"}, {"bbox": [23.030368804032772, 42.27963093894407, 23.03831154883008, 42.285528942161555], "geometry": {"coordinates": [[[23.03831154883008, 42.285391267376525], [23.038125810178553, 42.27963093894407], [23.030368804032772, 42.2797685861091], [23.030553836958592, 42.285528942161555], [23.03831154883008, 42.285391267376525]]], "type": "Polygon"}, "id": "5717", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 129}, "type": "Feature"}, {"bbox": [8.993938126611859, 52.37976293266366, 9.003341696595815, 52.38551687894731], "geometry": {"coordinates": [[[9.003341696595815, 52.38551687894731], [9.003341262222335, 52.379763040834874], [8.993938914569993, 52.37976293266366], [8.993938126611859, 52.38551677075377], [9.003341696595815, 52.38551687894731]]], "type": "Polygon"}, "id": "4060", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 87}, "type": "Feature"}, {"bbox": [19.321570130056838, 51.26678811464922, 19.330949297966292, 51.2726717176809], "geometry": {"coordinates": [[[19.330740836737924, 51.2726717176809], [19.330949297966292, 51.26691922891774], [19.321779735238955, 51.26678811464922], [19.321570130056838, 51.2725405766114], [19.330740836737924, 51.2726717176809]]], "type": "Polygon"}, "id": "2768", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 61}, "type": "Feature"}, {"bbox": [14.559460433960965, 51.41698548848548, 14.568718988184562, 51.42277433293065], "geometry": {"coordinates": [[[14.56866482525494, 51.42277433293065], [14.568718988184562, 51.41701971126458], [14.559515752599529, 51.41698548848548], [14.559460433960965, 51.42274010314513], [14.56866482525494, 51.42277433293065]]], "type": "Polygon"}, "id": "3216", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 73}, "type": "Feature"}, {"bbox": [-7.867767145196752, 43.43534980866496, -7.859752479829229, 43.44119006103782], "geometry": {"coordinates": [[[-7.859752479829229, 43.44111148034639], [-7.859860681469083, 43.43534980866496], [-7.867767145196752, 43.43542837363624], [-7.8676596935049385, 43.44119006103782], [-7.859752479829229, 43.44111148034639]]], "type": "Polygon"}, "id": "310", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 8}, "type": "Feature"}, {"bbox": [-1.2661557805668107, 52.8607649707635, -1.2564235918554922, 52.86665485206558], "geometry": {"coordinates": [[[-1.2564235918554922, 52.866515673157195], [-1.2566541688934671, 52.8607649707635], [-1.2661557805668107, 52.860904120830554], [-1.265926458684235, 52.86665485206558], [-1.2564235918554922, 52.866515673157195]]], "type": "Polygon"}, "id": "6061", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 150}, "type": "Feature"}, {"bbox": [6.316185196126979, 50.837161094071995, 6.32559424726455, 50.84311874890095], "geometry": {"coordinates": [[[6.3252653674691155, 50.84311874890095], [6.32559424726455, 50.83736966649767], [6.316515189139357, 50.837161094071995], [6.316185196126979, 50.84291013400506], [6.3252653674691155, 50.84311874890095]]], "type": "Polygon"}, "id": "4704", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 102}, "type": "Feature"}, {"bbox": [7.970307315012004, 51.67531272743657, 7.979692098761755, 51.68114709711056], "geometry": {"coordinates": [[[7.979562777595149, 51.68114709711056], [7.979692098761755, 51.675393480500325], [7.970437808631092, 51.67531272743657], [7.970307315012004, 51.681066327481965], [7.979562777595149, 51.68114709711056]]], "type": "Polygon"}, "id": "1976", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 45}, "type": "Feature"}, {"bbox": [6.320205474498026, 50.83671124143572, 6.3296139689004525, 50.842668601507576], "geometry": {"coordinates": [[[6.3292855883261945, 50.842668601507576], [6.3296139689004525, 50.83691949980974], [6.320534968275335, 50.83671124143572], [6.320205474498026, 50.84246030072728], [6.3292855883261945, 50.842668601507576]]], "type": "Polygon"}, "id": "4605", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 99}, "type": "Feature"}, {"bbox": [11.944151289108206, 51.397960642935274, 11.953709285128042, 51.40393919893154], "geometry": {"coordinates": [[[11.953709285128042, 51.40370779450515], [11.953338799409913, 51.397960642935274], [11.944151289108206, 51.39819200006437], [11.944520626353837, 51.40393919893154], [11.953709285128042, 51.40370779450515]]], "type": "Polygon"}, "id": "5636", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 127}, "type": "Feature"}, {"bbox": [-8.41718405461375, 43.167308352155686, -8.40925557531999, 43.173111514976235], "geometry": {"coordinates": [[[-8.40925557531999, 43.173071131244264], [-8.409311113496724, 43.167308352155686], [-8.41718405461375, 43.16734872780275], [-8.417129256569538, 43.173111514976235], [-8.40925557531999, 43.173071131244264]]], "type": "Polygon"}, "id": "2742", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 60}, "type": "Feature"}, {"bbox": [19.313659476136284, 51.26851781583468, 19.32303995183929, 51.27440201654872], "geometry": {"coordinates": [[[19.322830490089167, 51.27440201654872], [19.32303995183929, 51.26864955271558], [19.313870081935256, 51.26851781583468], [19.313659476136284, 51.27427025273932], [19.322830490089167, 51.27440201654872]]], "type": "Polygon"}, "id": "2958", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 67}, "type": "Feature"}, {"bbox": [12.127798624206202, 54.13983085472262, 12.13798070031396, 54.14580896925163], "geometry": {"coordinates": [[[12.137584199092293, 54.14580896925163], [12.13798070031396, 54.140064021183555], [12.12819647638755, 54.13983085472262], [12.127798624206202, 54.145575753839225], [12.137584199092293, 54.14580896925163]]], "type": "Polygon"}, "id": "4191", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 89}, "type": "Feature"}, {"bbox": [-79.19628258045897, 40.50948942836514, -79.18857709665937, 40.51537045875415], "geometry": {"coordinates": [[[-79.18857709665937, 40.51525230262164], [-79.18873221521456, 40.50948942836514], [-79.19628258045897, 40.50960756059444], [-79.1961281076671, 40.51537045875415], [-79.18857709665937, 40.51525230262164]]], "type": "Polygon"}, "id": "5970", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 145}, "type": "Feature"}, {"bbox": [6.31579529504224, 50.835981080966306, 6.325204148387411, 50.84193876201661], "geometry": {"coordinates": [[[6.324875234468044, 50.84193876201661], [6.325204148387411, 50.83618968027569], [6.316125322102311, 50.835981080966306], [6.31579529504224, 50.84173012023191], [6.324875234468044, 50.84193876201661]]], "type": "Polygon"}, "id": "4249", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 91}, "type": "Feature"}, {"bbox": [-7.859877700944518, 43.43821954320759, -7.851861924030288, 43.444060326536864], "geometry": {"coordinates": [[[-7.851861924030288, 43.44398119611084], [-7.8519708852419985, 43.43821954320759], [-7.859877700944518, 43.438298657803635], [-7.859769489783193, 43.444060326536864], [-7.851861924030288, 43.44398119611084]]], "type": "Polygon"}, "id": "421", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 10}, "type": "Feature"}, {"bbox": [-7.868447040194772, 43.434249831726675, -7.860432584852043, 43.440090037976454], "geometry": {"coordinates": [[[-7.860432584852043, 43.440011505883795], [-7.860540717812057, 43.434249831726675], [-7.868447040194772, 43.43432834810886], [-7.868339657141129, 43.440090037976454], [-7.860432584852043, 43.440011505883795]]], "type": "Polygon"}, "id": "262", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 7}, "type": "Feature"}, {"bbox": [-91.40979187502306, 35.670949013831255, -91.40260787084448, 35.67681089088075], "geometry": {"coordinates": [[[-91.40260787084448, 35.67671728304097], [-91.40272283364136, 35.670949013831255], [-91.40979187502306, 35.6710426019001], [-91.40967742049111, 35.67681089088075], [-91.40260787084448, 35.67671728304097]]], "type": "Polygon"}, "id": "5998", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 146}, "type": "Feature"}, {"bbox": [-76.67071037755838, 41.06839347563896, -76.6629499546157, 41.07426640818406], "geometry": {"coordinates": [[[-76.66309520749375, 41.07426640818406], [-76.6629499546157, 41.068503640377266], [-76.67056446033223, 41.06839347563896], [-76.67071037755838, 41.07415622122032], [-76.66309520749375, 41.07426640818406]]], "type": "Polygon"}, "id": "3965", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 85}, "type": "Feature"}, {"bbox": [14.571290969852155, 51.41895594890862, 14.580548452224255, 51.42474387239975], "geometry": {"coordinates": [[[14.580495771175961, 51.42474387239975], [14.580548452224255, 51.418989243847385], [14.571344806747629, 51.41895594890862], [14.571290969852155, 51.42471057064439], [14.580495771175961, 51.42474387239975]]], "type": "Polygon"}, "id": "3428", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 77}, "type": "Feature"}, {"bbox": [-81.82387089479633, 38.47030066314261, -81.81646939734308, 38.47611922584475], "geometry": {"coordinates": [[[-81.81653444442905, 38.47611922584475], [-81.81646939734308, 38.4703520232024], [-81.82380526343157, 38.47030066314261], [-81.82387089479633, 38.476067855237744], [-81.81653444442905, 38.47611922584475]]], "type": "Polygon"}, "id": "5526", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 123}, "type": "Feature"}, {"bbox": [14.563890551213069, 51.418545661095294, 14.573148870879216, 51.42433416027528], "geometry": {"coordinates": [[[14.573095261282122, 51.42433416027528], [14.573148870879216, 51.41857953681353], [14.5639453166256, 51.418545661095294], [14.563890551213069, 51.424300277621576], [14.573095261282122, 51.42433416027528]]], "type": "Polygon"}, "id": "3384", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 76}, "type": "Feature"}, {"bbox": [6.660539373964775, 50.99098361257039, 6.66994006392957, 50.996916226737], "geometry": {"coordinates": [[[6.66965193070741, 50.996916226737], [6.66994006392957, 50.991165784669995], [6.660828631398248, 50.99098361257039], [6.660539373964775, 50.99673401749179], [6.66965193070741, 50.996916226737]]], "type": "Polygon"}, "id": "614", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 16}, "type": "Feature"}, {"bbox": [19.316919850672978, 51.26678793940726, 19.326299577355584, 51.27267189296038], "geometry": {"coordinates": [[[19.32609053554916, 51.27267189296038], [19.326299577355584, 51.266919417834956], [19.317130036422274, 51.26678793940726], [19.316919850672978, 51.272540387657315], [19.32609053554916, 51.27267189296038]]], "type": "Polygon"}, "id": "2786", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 62}, "type": "Feature"}, {"bbox": [19.316909687923893, 51.26835793837401, 19.32628974005313, 51.274241893984126], "geometry": {"coordinates": [[[19.32608068529985, 51.274241893984126], [19.32628974005313, 51.268489420438634], [19.317119886723134, 51.26835793837401], [19.316909687923893, 51.274110385043066], [19.32608068529985, 51.274241893984126]]], "type": "Polygon"}, "id": "2859", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 64}, "type": "Feature"}, {"bbox": [-7.861557597421323, 43.43782959968042, -7.853542027560118, 43.44367027005413], "geometry": {"coordinates": [[[-7.853542027560118, 43.443591256367604], [-7.85365082786633, 43.43782959968042], [-7.861557597421323, 43.437908597560295], [-7.861449547152227, 43.44367027005413], [-7.853542027560118, 43.443591256367604]]], "type": "Polygon"}, "id": "373", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 9}, "type": "Feature"}, {"bbox": [-76.66633020625845, 41.06858361381302, -76.65857012592173, 41.07445626997967], "geometry": {"coordinates": [[[-76.65871499731098, 41.07445626997967], [-76.65857012592173, 41.06869348956203], [-76.66618467050887, 41.06858361381302], [-76.66633020625845, 41.07434637206351], [-76.65871499731098, 41.07445626997967]]], "type": "Polygon"}, "id": "3984", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 86}, "type": "Feature"}, {"bbox": [-87.03767267064549, 37.92509472720239, -87.03038761382578, 37.9308651589339], "geometry": {"coordinates": [[[-87.0303899879737, 37.9308651589339], [-87.03038761382578, 37.92509683285489], [-87.03766972755503, 37.92509472720239], [-87.03767267064549, 37.9308630528469], [-87.0303899879737, 37.9308651589339]]], "type": "Polygon"}, "id": "2294", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 52}, "type": "Feature"}, {"bbox": [-82.62285567416795, 38.16694669703737, -82.61542461327102, 38.172813198322444], "geometry": {"coordinates": [[[-82.61555190891715, 38.172813198322444], [-82.61542461327102, 38.16704740800331], [-82.62272780364387, 38.16694669703737], [-82.62285567416795, 38.172712466627054], [-82.61555190891715, 38.172813198322444]]], "type": "Polygon"}, "id": "3649", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 80}, "type": "Feature"}, {"bbox": [6.314145156581933, 50.8364010202574, 6.323554286837411, 50.84235882273631], "geometry": {"coordinates": [[[6.32322516526495, 50.84235882273631], [6.323554286837411, 50.83660974915697], [6.31447539131571, 50.8364010202574], [6.314145156581933, 50.842150051334954], [6.32322516526495, 50.84235882273631]]], "type": "Polygon"}, "id": "4402", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 95}, "type": "Feature"}, {"bbox": [-83.81476544262843, 33.05444972752097, -83.80773478001669, 33.06037019400987], "geometry": {"coordinates": [[[-83.80791796757254, 33.06037019400987], [-83.80773478001669, 33.05460415892761], [-83.81458180978233, 33.05444972752097], [-83.81476544262843, 33.06021572879617], [-83.80791796757254, 33.06037019400987]]], "type": "Polygon"}, "id": "882", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 23}, "type": "Feature"}, {"bbox": [17.87684272091352, 50.753463804826815, 17.886257832668907, 50.7594360402524], "geometry": {"coordinates": [[[17.886257832668907, 50.75921193862066], [17.88590402321042, 50.753463804826815], [17.87684272091352, 50.75368786087039], [17.87719542320716, 50.7594360402524], [17.886257832668907, 50.75921193862066]]], "type": "Polygon"}, "id": "2669", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 59}, "type": "Feature"}, {"bbox": [14.457761252829597, 51.83368162545998, 14.46711815526699, 51.83947819411681], "geometry": {"coordinates": [[[14.467050226961149, 51.83947819411681], [14.46711815526699, 51.83372406913375], [14.457830364942271, 51.83368162545998], [14.457761252829597, 51.83943574172408], [14.467050226961149, 51.83947819411681]]], "type": "Polygon"}, "id": "5412", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 117}, "type": "Feature"}, {"bbox": [-81.82555112400559, 38.473460611540226, -81.8181491681791, 38.47927927744524], "geometry": {"coordinates": [[[-81.81821435645074, 38.47927927744524], [-81.8181491681791, 38.4735120803406], [-81.82548535136458, 38.473460611540226], [-81.82555112400559, 38.479227798075605], [-81.81821435645074, 38.47927927744524]]], "type": "Polygon"}, "id": "5557", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 125}, "type": "Feature"}, {"bbox": [-87.0374224269002, 37.92035473272001, -87.03013785750433, 37.92612515343384], "geometry": {"coordinates": [[[-87.03014021173797, 37.92612515343384], [-87.03013785750433, 37.92035682268318], [-87.03741950385746, 37.92035473272001], [-87.0374224269002, 37.92612306303937], [-87.03014021173797, 37.92612515343384]]], "type": "Polygon"}, "id": "2256", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 51}, "type": "Feature"}, {"bbox": [6.661359522978291, 50.99000364362848, 6.670759914946202, 50.995936195678084], "geometry": {"coordinates": [[[6.6704718931077585, 50.995936195678084], [6.670759914946202, 50.99018574928998], [6.66164866896775, 50.99000364362848], [6.661359522978291, 50.995754052884806], [6.6704718931077585, 50.995936195678084]]], "type": "Polygon"}, "id": "445", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 11}, "type": "Feature"}, {"bbox": [14.559840564071253, 51.41593550298844, 14.569098858109088, 51.42172431843131], "geometry": {"coordinates": [[[14.56904474491613, 51.42172431843131], [14.569098858109088, 51.41596969544106], [14.559895832903532, 51.41593550298844], [14.559840564071253, 51.4216901189785], [14.56904474491613, 51.42172431843131]]], "type": "Polygon"}, "id": "3023", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 70}, "type": "Feature"}, {"bbox": [6.315455219848657, 50.83652106801533, 6.324864223564494, 50.84247877496718], "geometry": {"coordinates": [[[6.324535261553443, 50.84247877496718], [6.324864223564494, 50.836729695356404], [6.315785295033408, 50.83652106801533], [6.315455219848657, 50.84227010514491], [6.324535261553443, 50.84247877496718]]], "type": "Polygon"}, "id": "4485", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 97}, "type": "Feature"}, {"bbox": [6.31826535941742, 50.83674117050008, 6.327674083983639, 50.84269867245861], "geometry": {"coordinates": [[[6.327345464680076, 50.84269867245861], [6.327674083983639, 50.8369495798833], [6.318595091918871, 50.83674117050008], [6.31826535941742, 50.84249022063843], [6.327345464680076, 50.84269867245861]]], "type": "Polygon"}, "id": "4621", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 100}, "type": "Feature"}, {"bbox": [6.666709512594438, 50.993143838849456, 6.6761099252206755, 50.999076000395554], "geometry": {"coordinates": [[[6.675822532425958, 50.999076000395554], [6.6761099252206755, 50.993325535272746], [6.6669980297589335, 50.993143838849456], [6.666709512594438, 50.998894266922925], [6.675822532425958, 50.999076000395554]]], "type": "Polygon"}, "id": "788", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 21}, "type": "Feature"}, {"bbox": [-84.80907811963056, 39.113178822735456, -84.80150157951397, 39.11908107263269], "geometry": {"coordinates": [[[-84.80150157951397, 39.11894172118128], [-84.80168074336267, 39.113178822735456], [-84.80907811963056, 39.11331814573971], [-84.80889955749284, 39.11908107263269], [-84.80150157951397, 39.11894172118128]]], "type": "Polygon"}, "id": "26", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 0}, "type": "Feature"}, {"bbox": [-1.2643158306723195, 52.860174900215746, -1.2545835417743534, 52.86606492263255], "geometry": {"coordinates": [[[-1.2545835417743534, 52.8659255975551], [-1.2548143571412573, 52.860174900215746], [-1.2643158306723195, 52.860314196421776], [-1.2640862704120754, 52.86606492263255], [-1.2545835417743534, 52.8659255975551]]], "type": "Polygon"}, "id": "6052", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 149}, "type": "Feature"}, {"bbox": [21.922740454949153, 40.392567167416985, 21.930359866517367, 40.39839271432344], "geometry": {"coordinates": [[[21.930359866517367, 40.39833228440498], [21.930280514910287, 40.392567167416985], [21.922740454949153, 40.392627585097514], [21.92281916362319, 40.39839271432344], [21.930359866517367, 40.39833228440498]]], "type": "Polygon"}, "id": "2110", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 48}, "type": "Feature"}, {"bbox": [6.316175262438504, 50.83651109433855, 6.325584180973652, 50.842468748638225], "geometry": {"coordinates": [[[6.325255307549168, 50.842468748638225], [6.325584180973652, 50.836719665638576], [6.3165052490386815, 50.83651109433855], [6.316175262438504, 50.84226013486841], [6.325255307549168, 50.842468748638225]]], "type": "Polygon"}, "id": "4458", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 96}, "type": "Feature"}, {"bbox": [-79.198652441022, 40.50875950252555, -79.19094723610648, 40.51464038458095], "geometry": {"coordinates": [[[-79.19094723610648, 40.51452238506273], [-79.19110214777598, 40.50875950252555], [-79.198652441022, 40.508877478172124], [-79.19849817509557, 40.51464038458095], [-79.19094723610648, 40.51452238506273]]], "type": "Polygon"}, "id": "5937", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 144}, "type": "Feature"}, {"bbox": [-2.6959604115474645, 53.37168106526291, -2.6862989403479576, 53.3774587421397], "geometry": {"coordinates": [[[-2.6862989403479576, 53.37743385044185], [-2.6863412081836624, 53.37168106526291], [-2.6959604115474645, 53.37170595177353], [-2.6959194399209143, 53.3774587421397], [-2.6862989403479576, 53.37743385044185]]], "type": "Polygon"}, "id": "5848", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 140}, "type": "Feature"}, {"bbox": [-1.2646361232662606, 52.8629349113555, -1.2549032490774388, 52.86882491147215], "geometry": {"coordinates": [[[-1.2549032490774388, 52.86868560698498], [-1.2551340452408162, 52.8629349113555], [-1.2646361232662606, 52.863074186974714], [-1.264406582415485, 52.86882491147215], [-1.2549032490774388, 52.86868560698498]]], "type": "Polygon"}, "id": "6091", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 153}, "type": "Feature"}, {"bbox": [25.994692353534138, 42.14099452001867, 26.002527295408733, 42.146825354628575], "geometry": {"coordinates": [[[26.00243681920892, 42.146825354628575], [26.002527295408733, 42.1410621076981], [25.994783531848206, 42.14099452001867], [25.994692353534138, 42.14675775337382], [26.00243681920892, 42.146825354628575]]], "type": "Polygon"}, "id": "1367", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 34}, "type": "Feature"}, {"bbox": [-0.3884245482137684, 40.99457402071775, -0.380595121662918, 41.00050586981171], "geometry": {"coordinates": [[[-0.380595121662918, 41.000333259829205], [-0.3808232561145811, 40.99457402071775], [-0.3884245482137684, 40.99474659589222], [-0.3881970740087361, 41.00050586981171], [-0.380595121662918, 41.000333259829205]]], "type": "Polygon"}, "id": "1228", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 30}, "type": "Feature"}, {"bbox": [14.562280699555814, 51.416095597877266, 14.571538722618458, 51.4218842235218], "geometry": {"coordinates": [[[14.571484915513967, 51.4218842235218], [14.571538722618458, 51.416129598849594], [14.56233566231176, 51.416095597877266], [14.562280699555814, 51.421850215588506], [14.571484915513967, 51.4218842235218]]], "type": "Polygon"}, "id": "3077", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 71}, "type": "Feature"}, {"bbox": [14.574531119666311, 51.41946607502744, 14.583788302392165, 51.42525374625159], "geometry": {"coordinates": [[[14.583736027293137, 51.42525374625159], [14.583788302392165, 51.41949911582688], [14.574584550648385, 51.41946607502744], [14.574531119666311, 51.42522069868757], [14.583736027293137, 51.42525374625159]]], "type": "Polygon"}, "id": "3505", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 78}, "type": "Feature"}, {"bbox": [21.921450482290947, 40.393027208367464, 21.92906983918345, 40.398852673362434], "geometry": {"coordinates": [[[21.92906983918345, 40.398792327006426], [21.928990596307944, 40.393027208367464], [21.921450482290947, 40.39308754250249], [21.921529082217663, 40.398852673362434], [21.92906983918345, 40.398792327006426]]], "type": "Polygon"}, "id": "2175", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 49}, "type": "Feature"}, {"bbox": [21.92018051223386, 40.393427248691346, 21.92779980924744, 40.399252633028425], "geometry": {"coordinates": [[[21.92779980924744, 40.399192369005696], [21.927720673569212, 40.393427248691346], [21.92018051223386, 40.3934875005098], [21.92025900494949, 40.399252633028425], [21.92779980924744, 40.399192369005696]]], "type": "Polygon"}, "id": "2221", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 50}, "type": "Feature"}, {"bbox": [6.322185594979857, 50.83665131386589, 6.33159384841692, 50.842608529061884], "geometry": {"coordinates": [[[6.331265711836829, 50.842608529061884], [6.33159384841692, 50.83685941803115], [6.3225148447663955, 50.83665131386589], [6.322185594979857, 50.84240038252171], [6.331265711836829, 50.842608529061884]]], "type": "Polygon"}, "id": "4563", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 98}, "type": "Feature"}, {"bbox": [-84.92220854158406, 34.12205707025167, -84.91513122321177, 34.12794284309182], "geometry": {"coordinates": [[[-84.91513122321177, 34.12782522903354], [-84.91527286761715, 34.12205707025167], [-84.92220854158406, 34.122174658963885], [-84.92206736758708, 34.12794284309182], [-84.91513122321177, 34.12782522903354]]], "type": "Polygon"}, "id": "1174", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 29}, "type": "Feature"}, {"bbox": [-0.3867947303317955, 40.99646396842537, -0.3789649395152219, 41.002395922107254], "geometry": {"coordinates": [[[-0.3789649395152219, 41.00222319812924], [-0.3791932310302114, 40.99646396842537], [-0.3867947303317955, 40.99663665757272], [-0.3865670991227732, 41.002395922107254], [-0.3789649395152219, 41.00222319812924]]], "type": "Polygon"}, "id": "1313", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 32}, "type": "Feature"}, {"bbox": [6.313825180303275, 50.835991008962274, 6.32323426312965, 50.84194883403639], "geometry": {"coordinates": [[[6.322905107035324, 50.84194883403639], [6.32323426312965, 50.836199761554084], [6.314155449531754, 50.835991008962274], [6.313825180303275, 50.841740038938106], [6.322905107035324, 50.84194883403639]]], "type": "Polygon"}, "id": "4338", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 93}, "type": "Feature"}, {"bbox": [17.882662769136704, 50.749753597515095, 17.892077784317568, 50.75572624763208], "geometry": {"coordinates": [[[17.892077784317568, 50.75550170548809], [17.89172330880358, 50.749753597515095], [17.882662769136704, 50.74997809398274], [17.883016137742146, 50.75572624763208], [17.892077784317568, 50.75550170548809]]], "type": "Polygon"}, "id": "2575", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 57}, "type": "Feature"}, {"bbox": [21.925340396585238, 40.39169708488125, 21.932959924866324, 40.397522796880146], "geometry": {"coordinates": [[[21.932959924866324, 40.39746219847571], [21.932880353960506, 40.39169708488125], [21.925340396585238, 40.3917576710136], [21.92541932458793, 40.397522796880146], [21.932959924866324, 40.39746219847571]]], "type": "Polygon"}, "id": "2008", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 46}, "type": "Feature"}, {"bbox": [6.314755192820885, 50.836391042559725, 6.324164250597662, 50.84234880042915], "geometry": {"coordinates": [[[6.3238352040952295, 50.84234880042915], [6.324164250597662, 50.83659972397572], [6.315085352486226, 50.836391042559725], [6.314755192820885, 50.84214007652103], [6.3238352040952295, 50.84234880042915]]], "type": "Polygon"}, "id": "4368", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 94}, "type": "Feature"}, {"bbox": [-83.81232532638536, 33.05340979009989, -83.80529489625017, 33.05933013141959], "geometry": {"coordinates": [[[-83.80547791748731, 33.05933013141959], [-83.80529489625017, 33.053564083297665], [-83.81214185987712, 33.05340979009989], [-83.81232532638536, 33.059175804444386], [-83.80547791748731, 33.05933013141959]]], "type": "Polygon"}, "id": "824", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 22}, "type": "Feature"}, {"bbox": [-82.1227568244732, 38.930981438397644, -82.11528347431815, 38.936818450666884], "geometry": {"coordinates": [[[-82.11537379885328, 38.936818450666884], [-82.11528347431815, 38.931052211927835], [-82.12266590235541, 38.930981438397644], [-82.1227568244732, 38.936747662658], [-82.11537379885328, 38.936818450666884]]], "type": "Polygon"}, "id": "3761", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 82}, "type": "Feature"}, {"bbox": [11.94270143590659, 51.397380696654395, 11.952259138313496, 51.40335914520406], "geometry": {"coordinates": [[[11.952259138313496, 51.403127856306064], [11.951888842002287, 51.397380696654395], [11.94270143590659, 51.39761193827888], [11.94307058377763, 51.40335914520406], [11.952259138313496, 51.403127856306064]]], "type": "Polygon"}, "id": "5577", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 126}, "type": "Feature"}, {"bbox": [14.449340713306029, 51.83377129708421, 14.458698694790757, 51.839568522560434], "geometry": {"coordinates": [[[14.458629693034961, 51.839568522560434], [14.458698694790757, 51.833814405601515], [14.449410898868257, 51.83377129708421], [14.449340713306029, 51.839525405187615], [14.458629693034961, 51.839568522560434]]], "type": "Polygon"}, "id": "5455", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 119}, "type": "Feature"}, {"bbox": [6.660949449503226, 50.99048362810728, 6.670349988406517, 50.99641621119978], "geometry": {"coordinates": [[[6.670061910979659, 50.99641621119978], [6.670349988406517, 50.990665766962195], [6.6612386511106045, 50.99048362810728], [6.660949449503226, 50.99623403520617], [6.670061910979659, 50.99641621119978]]], "type": "Polygon"}, "id": "528", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 13}, "type": "Feature"}, {"bbox": [6.314455095235376, 50.83717103083693, 6.32386434815899, 50.84312881214983], "geometry": {"coordinates": [[[6.323535255668898, 50.84312881214983], [6.32386434815899, 50.83737973787689], [6.314785300936737, 50.83717103083693], [6.314455095235376, 50.84292006261231], [6.323535255668898, 50.84312881214983]]], "type": "Polygon"}, "id": "4734", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 103}, "type": "Feature"}, {"bbox": [23.02866886703489, 42.2797909932632, 23.036611485832324, 42.28568888783005], "geometry": {"coordinates": [[[23.036611485832324, 42.285551327626635], [23.03642590099232, 42.2797909932632], [23.02866886703489, 42.27992852586959], [23.028853746140474, 42.28568888783005], [23.036611485832324, 42.285551327626635]]], "type": "Polygon"}, "id": "5740", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 130}, "type": "Feature"}, {"bbox": [14.454090909278595, 51.83476148253165, 14.463448498782329, 51.840558337068025], "geometry": {"coordinates": [[[14.463380100056122, 51.840558337068025], [14.463448498782329, 51.83480421659108], [14.45416049188296, 51.83476148253165], [14.454090909278595, 51.84051559422992], [14.463380100056122, 51.840558337068025]]], "type": "Polygon"}, "id": "5489", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 121}, "type": "Feature"}, {"bbox": [-87.06258228600622, 33.62531207458986, -87.0556779441111, 33.63108782816808], "geometry": {"coordinates": [[[-87.05568165778233, 33.63108782816808], [-87.0556779441111, 33.62531537336579], [-87.06257811210037, 33.62531207458986], [-87.06258228600622, 33.63108452867545], [-87.05568165778233, 33.63108782816808]]], "type": "Polygon"}, "id": "5768", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 131}, "type": "Feature"}, {"bbox": [-84.92321844647735, 34.12060709841879, -84.91614131833467, 34.1264928149236], "geometry": {"coordinates": [[[-84.91614131833467, 34.12637526227659], [-84.91628288644162, 34.12060709841879], [-84.92321844647735, 34.120724625732436], [-84.92307734874638, 34.1264928149236], [-84.91614131833467, 34.12637526227659]]], "type": "Polygon"}, "id": "1097", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 27}, "type": "Feature"}, {"bbox": [-79.34096534882801, 40.65696384511891, -79.33325432579339, 40.662836040415726], "geometry": {"coordinates": [[[-79.33325432579339, 40.66272701461048], [-79.33339780300345, 40.65696384511891], [-79.34096534882801, 40.657072848884766], [-79.34082252237513, 40.662836040415726], [-79.33325432579339, 40.66272701461048]]], "type": "Polygon"}, "id": "168", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 5}, "type": "Feature"}, {"bbox": [-1.2644659719301081, 52.861504905421974, -1.2547334004669497, 52.86739491741643], "geometry": {"coordinates": [[[-1.2547334004669497, 52.867255601924505], [-1.2549642071363531, 52.861504905421974], [-1.2644659719301081, 52.86164419204407], [-1.2642364204665881, 52.86739491741643], [-1.2547334004669497, 52.867255601924505]]], "type": "Polygon"}, "id": "6069", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 151}, "type": "Feature"}, {"bbox": [-91.41121192847095, 35.67315905207938, -91.40402781736819, 35.67902085261555], "geometry": {"coordinates": [[[-91.40402781736819, 35.67892732316429], [-91.40414268733703, 35.67315905207938], [-91.41121192847095, 35.67325256177677], [-91.4110975668238, 35.67902085261555], [-91.40402781736819, 35.67892732316429]]], "type": "Polygon"}, "id": "6022", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 147}, "type": "Feature"}, {"bbox": [-80.82130692504421, 39.825070851444046, -80.81381276232753, 39.830849027790464], "geometry": {"coordinates": [[[-80.81381276232753, 39.83083726642661], [-80.81382832762553, 39.825070851444046], [-80.82130692504421, 39.82508261041728], [-80.82129198500274, 39.830849027790464], [-80.81381276232753, 39.83083726642661]]], "type": "Polygon"}, "id": "2986", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 68}, "type": "Feature"}, {"bbox": [-81.93999195910064, 38.97459704572455, -81.9325283404075, 38.98042284197228], "geometry": {"coordinates": [[[-81.93260398287038, 38.98042284197228], [-81.9325283404075, 38.97465631686847], [-81.93991571762149, 38.97459704572455], [-81.93999195910064, 38.980363558706195], [-81.93260398287038, 38.98042284197228]]], "type": "Polygon"}, "id": "4117", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 88}, "type": "Feature"}, {"bbox": [6.661759386640321, 50.99155365719745, 6.671160051233769, 50.99748618209675], "geometry": {"coordinates": [[[6.670872062943148, 50.99748618209675], [6.671160051233769, 50.991735735606945], [6.662048499182766, 50.99155365719745], [6.661759386640321, 50.99730406656058], [6.670872062943148, 50.99748618209675]]], "type": "Polygon"}, "id": "699", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 18}, "type": "Feature"}, {"bbox": [21.924010429013595, 40.39209712710561, 21.931629892444892, 40.39792275464525], "geometry": {"coordinates": [[[21.931629892444892, 40.397862242482525], [21.93155043384366, 40.39209712710561], [21.924010429013595, 40.392157627013724], [21.92408924469785, 40.39792275464525], [21.931629892444892, 40.397862242482525]]], "type": "Polygon"}, "id": "2057", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 47}, "type": "Feature"}, {"bbox": [-87.05913239406237, 33.63089217302272, -87.05222783611836, 33.63666772969399], "geometry": {"coordinates": [[[-87.05223132040513, 33.63666772969399], [-87.05222783611836, 33.63089527978031], [-87.05912844941412, 33.63089217302272], [-87.05913239406237, 33.63666462226145], [-87.05223132040513, 33.63666772969399]]], "type": "Polygon"}, "id": "5789", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 132}, "type": "Feature"}, {"bbox": [-2.6892904031290863, 53.367670797104275, -2.679628948923421, 53.37344901038228], "geometry": {"coordinates": [[[-2.679628948923421, 53.37342358246336], [-2.6796721091596907, 53.367670797104275], [-2.6892904031290863, 53.36769621972443], [-2.6892485387878025, 53.37344901038228], [-2.679628948923421, 53.37342358246336]]], "type": "Polygon"}, "id": "5825", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 136}, "type": "Feature"}, {"bbox": [-79.34197534862082, 40.657663876315944, -79.3342643259885, 40.663536009208954], "geometry": {"coordinates": [[[-79.3342643259885, 40.6634270480684], [-79.33440771981833, 40.657663876315944], [-79.34197534862082, 40.65777281543027], [-79.34183260557239, 40.663536009208954], [-79.3342643259885, 40.6634270480684]]], "type": "Polygon"}, "id": "205", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 6}, "type": "Feature"}, {"bbox": [14.453540983987931, 51.83371146087137, 14.462898424109266, 51.83950835873952], "geometry": {"coordinates": [[[14.462829957841155, 51.83950835873952], [14.462898424109266, 51.833754237747655], [14.453610634061652, 51.83371146087137], [14.453540983987931, 51.83946557307583], [14.462829957841155, 51.83950835873952]]], "type": "Polygon"}, "id": "5433", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 118}, "type": "Feature"}, {"bbox": [6.663409419342501, 50.992173717674625, 6.672810018509041, 50.99810612160267], "geometry": {"coordinates": [[[6.672522227787531, 50.99810612160267], [6.672810018509041, 50.99235566898609], [6.663698334360935, 50.992173717674625], [6.663409419342501, 50.99792413319023], [6.672522227787531, 50.99810612160267]]], "type": "Polygon"}, "id": "767", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 20}, "type": "Feature"}, {"bbox": [6.665729604658292, 50.99169380376618, 6.675129833204948, 50.99762603549529], "geometry": {"coordinates": [[[6.674842334122908, 50.99762603549529], [6.675129833204948, 50.9918755729353], [6.666018228013855, 50.99169380376618], [6.665729604658292, 50.99744422926242], [6.674842334122908, 50.99762603549529]]], "type": "Polygon"}, "id": "725", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 19}, "type": "Feature"}, {"bbox": [17.879292583713415, 50.7534337158847, 17.88870796986328, 50.759406129214305], "geometry": {"coordinates": [[[17.88870796986328, 50.75918183729827], [17.888353860689357, 50.7534337158847], [17.879292583713415, 50.75365796217396], [17.87964558573394, 50.759406129214305], [17.88870796986328, 50.75918183729827]]], "type": "Polygon"}, "id": "2626", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 58}, "type": "Feature"}, {"bbox": [-82.12109699406138, 38.93537148809124, -82.11362330479585, 38.94120840094468], "geometry": {"coordinates": [[[-82.1137135090091, 38.94120840094468], [-82.11362330479585, 38.93544216329534], [-82.12100619213372, 38.93537148809124], [-82.12109699406138, 38.941137711282494], [-82.1137135090091, 38.94120840094468]]], "type": "Polygon"}, "id": "3828", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 83}, "type": "Feature"}, {"bbox": [25.99367237102063, 42.1399944866441, 26.001507277940497, 42.145825388014565], "geometry": {"coordinates": [[[26.001416712400783, 42.145825388014565], [26.001507277940497, 42.14006214186724], [25.99376363863809, 42.1399944866441], [25.99367237102063, 42.145757719202464], [26.001416712400783, 42.145825388014565]]], "type": "Polygon"}, "id": "1362", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 33}, "type": "Feature"}, {"bbox": [-84.92840834471498, 34.122057233954976, -84.92133142007714, 34.127942679350284], "geometry": {"coordinates": [[[-84.92133142007714, 34.12782541549631], [-84.92147264343006, 34.122057233954976], [-84.92840834471498, 34.12217447253825], [-84.92826759177784, 34.127942679350284], [-84.92133142007714, 34.12782541549631]]], "type": "Polygon"}, "id": "1141", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 28}, "type": "Feature"}, {"bbox": [-84.92745829663806, 34.12025721072781, -84.92038146817548, 34.12614270258969], "geometry": {"coordinates": [[[-84.92038146817548, 34.126025390479114], [-84.92052274649589, 34.12025721072781], [-84.92745829663806, 34.120374497556654], [-84.92731748869058, 34.12614270258969], [-84.92038146817548, 34.126025390479114]]], "type": "Polygon"}, "id": "1086", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 26}, "type": "Feature"}, {"bbox": [6.315305145329338, 50.837161061910784, 6.324714298063795, 50.843118781069194], "geometry": {"coordinates": [[[6.324385310136335, 50.843118781069194], [6.324714298063795, 50.83736970279589], [6.315635246470534, 50.837161061910784], [6.315305145329338, 50.84291009769996], [6.324385310136335, 50.843118781069194]]], "type": "Polygon"}, "id": "4650", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 101}, "type": "Feature"}, {"bbox": [6.314435216541405, 50.835981031264275, 6.323844226890724, 50.84193881172954], "geometry": {"coordinates": [[[6.323515145865327, 50.84193881172954], [6.323844226890724, 50.8361897363728], [6.314765410702548, 50.835981031264275], [6.314435216541405, 50.84173006412421], [6.323515145865327, 50.84193881172954]]], "type": "Polygon"}, "id": "4299", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 92}, "type": "Feature"}, {"bbox": [14.564660812053225, 51.41644569048826, 14.573918610108683, 51.42223413088943], "geometry": {"coordinates": [[[14.573865101200873, 51.42223413088943], [14.573918610108683, 51.41647950477811], [14.564715476637224, 51.41644569048826], [14.564660812053225, 51.422200309676796], [14.573865101200873, 51.42223413088943]]], "type": "Polygon"}, "id": "3136", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 72}, "type": "Feature"}, {"bbox": [19.326230246923483, 51.268368289634914, 19.335609181042322, 51.274251542647974], "geometry": {"coordinates": [[[19.335401289934747, 51.274251542647974], [19.335609181042322, 51.26849904181628], [19.326439282099447, 51.268368289634914], [19.326230246923483, 51.27412076373925], [19.335401289934747, 51.274251542647974]]], "type": "Polygon"}, "id": "2908", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 65}, "type": "Feature"}, {"bbox": [17.88395282277289, 50.748513552027134, 17.893367730639834, 50.75448629313752], "geometry": {"coordinates": [[[17.893367730639834, 50.754261654682814], [17.89301311279262, 50.748513552027134], [17.88395282277289, 50.74873814478634], [17.884306333794655, 50.75448629313752], [17.893367730639834, 50.754261654682814]]], "type": "Polygon"}, "id": "2510", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 56}, "type": "Feature"}, {"bbox": [-76.67071037695578, 41.06838347564491, -76.66294995521812, 41.07425640817813], "geometry": {"coordinates": [[[-76.66309520804514, 41.07425640817813], [-76.66294995521812, 41.068493640361346], [-76.67056445978102, 41.06838347564491], [-76.67071037695578, 41.074146221236276], [-76.66309520804514, 41.07425640817813]]], "type": "Polygon"}, "id": "3897", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 84}, "type": "Feature"}, {"bbox": [6.662539532284439, 50.99058368677044, 6.671939905619546, 50.9965161525232], "geometry": {"coordinates": [[[6.671652023664016, 50.9965161525232], [6.671939905619546, 50.9907657018873], [6.662828538432005, 50.99058368677044], [6.662539532284439, 50.99633410029282], [6.671652023664016, 50.9965161525232]]], "type": "Polygon"}, "id": "548", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 14}, "type": "Feature"}, {"bbox": [6.315035253231865, 50.83596105321089, 6.3244441901998085, 50.84191878977823], "geometry": {"coordinates": [[[6.324115183131565, 50.84191878977823], [6.3244441901998085, 50.83616971158472], [6.315365373436772, 50.83596105321089], [6.315035253231865, 50.841710088917075], [6.324115183131565, 50.84191878977823]]], "type": "Polygon"}, "id": "4233", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 90}, "type": "Feature"}, {"bbox": [-2.690590095065831, 53.36572084849188, -2.6809292570619236, 53.37149895899728], "geometry": {"coordinates": [[[-2.6809292570619236, 53.37147363647086], [-2.6809722390728434, 53.36572084849188], [-2.690590095065831, 53.36574616574162], [-2.690548408799403, 53.37149895899728], [-2.6809292570619236, 53.37147363647086]]], "type": "Polygon"}, "id": "5807", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 133}, "type": "Feature"}, {"bbox": [19.326220409471617, 51.26678828991057, 19.335599018546088, 51.27267154238203], "geometry": {"coordinates": [[[19.33539113789713, 51.27267154238203], [19.335599018546088, 51.26691904001905], [19.32642943408517, 51.26678828991057], [19.326220409471617, 51.27254076554694], [19.33539113789713, 51.27267154238203]]], "type": "Polygon"}, "id": "2833", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 63}, "type": "Feature"}, {"bbox": [-1.2660256403166126, 52.85945496631701, -1.2562937321545282, 52.86534485652167], "geometry": {"coordinates": [[[-1.2562937321545282, 52.86520566958966], [-1.2565243154238004, 52.85945496631701], [-1.2660256403166126, 52.85959412440642], [-1.2657963121050613, 52.86534485652167], [-1.2562937321545282, 52.86520566958966]]], "type": "Polygon"}, "id": "6047", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 148}, "type": "Feature"}, {"bbox": [-2.689000287600716, 53.366480785123336, -2.679339064497982, 53.3722590223739], "geometry": {"coordinates": [[[-2.679339064497982, 53.3722335714734], [-2.679382261916116, 53.366480785123336], [-2.689000287600716, 53.366506230720354], [-2.6889583859851864, 53.3722590223739], [-2.679339064497982, 53.3722335714734]]], "type": "Polygon"}, "id": "5813", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 134}, "type": "Feature"}, {"bbox": [-81.82488101134513, 38.47179063214848, -81.81747928081556, 38.47760925683936], "geometry": {"coordinates": [[[-81.81754441182419, 38.47760925683936], [-81.81747928081556, 38.47184205712119], [-81.82481529601513, 38.47179063214848], [-81.82488101134513, 38.47755782130626], [-81.81754441182419, 38.47760925683936]]], "type": "Polygon"}, "id": "5551", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 124}, "type": "Feature"}, {"bbox": [14.457181326673982, 51.832621602624044, 14.466538081459229, 51.8384182169643], "geometry": {"coordinates": [[[14.466470081796963, 51.8384182169643], [14.466538081459229, 51.832664091481014], [14.457250510069825, 51.832621602624044], [14.457181326673982, 51.83837571937917], [14.466470081796963, 51.8384182169643]]], "type": "Polygon"}, "id": "5354", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 114}, "type": "Feature"}, {"bbox": [7.613149472541168, 51.635479048600445, 7.622569942887281, 51.64134077909756], "geometry": {"coordinates": [[[7.622395613775343, 51.64134077909756], [7.622569942887281, 51.635587874567314], [7.613324970796211, 51.635479048600445], [7.613149472541168, 51.64123193081747], [7.622395613775343, 51.64134077909756]]], "type": "Polygon"}, "id": "2327", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 53}, "type": "Feature"}, {"bbox": [14.449900640254146, 51.83481131912814, 14.4592587678067, 51.84060850050528], "geometry": {"coordinates": [[[14.45918983487201, 51.84060850050528], [14.4592587678067, 51.83485438403789], [14.449970757067145, 51.83481131912814], [14.449900640254146, 51.840565426748874], [14.45918983487201, 51.84060850050528]]], "type": "Polygon"}, "id": "5506", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 122}, "type": "Feature"}, {"bbox": [-1.2662659379732781, 52.86221497438124, -1.2565334343950054, 52.86810484843748], "geometry": {"coordinates": [[[-1.2565334343950054, 52.867965675698436], [-1.2567640090158967, 52.86221497438124], [-1.2662659379732781, 52.86235411827982], [-1.2660366186158243, 52.86810484843748], [-1.2565334343950054, 52.867965675698436]]], "type": "Polygon"}, "id": "6080", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 152}, "type": "Feature"}, {"bbox": [-82.61918559572591, 38.16808680521416, -82.61175469173136, 38.17395309011746], "geometry": {"coordinates": [[[-82.61188170345574, 38.17395309011746], [-82.61175469173136, 38.16818729048903], [-82.61905800908698, 38.16808680521416], [-82.61918559572591, 38.173852584159754], [-82.61188170345574, 38.17395309011746]]], "type": "Polygon"}, "id": "3668", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 81}, "type": "Feature"}, {"bbox": [-2.6941104865847296, 53.37125099109839, -2.684448865327817, 53.37702881632275], "geometry": {"coordinates": [[[-2.684448865327817, 53.37700377567355], [-2.684491381773194, 53.37125099109839], [-2.6941104865847296, 53.37127602652933], [-2.694069266314261, 53.37702881632275], [-2.684448865327817, 53.37700377567355]]], "type": "Polygon"}, "id": "5843", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 139}, "type": "Feature"}, {"bbox": [-2.6908302139283777, 53.36691085847366, -2.6811691381532015, 53.372688949005195], "geometry": {"coordinates": [[[-2.6811691381532015, 53.37266364543289], [-2.681212089710153, 53.36691085847366], [-2.6908302139283777, 53.366936156773164], [-2.690788558208268, 53.372688949005195], [-2.6811691381532015, 53.37266364543289]]], "type": "Polygon"}, "id": "5818", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 135}, "type": "Feature"}, {"bbox": [-1.2664360745682888, 52.863514980371576, -1.2567032977514638, 52.86940484243724], "geometry": {"coordinates": [[[-1.2567032977514638, 52.86926568094186], [-1.2569338607666294, 52.863514980371576], [-1.2664360745682888, 52.86365411302845], [-1.2662067669136237, 52.86940484243724], [-1.2567032977514638, 52.86926568094186]]], "type": "Polygon"}, "id": "6101", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 154}, "type": "Feature"}, {"bbox": [14.453011054746568, 51.83271144000192, 14.462368353385171, 51.83850837961973], "geometry": {"coordinates": [[[14.462299822007218, 51.83850837961973], [14.462368353385171, 51.832754258143794], [14.453080769861032, 51.83271144000192], [14.453011054746568, 51.83846555268206], [14.462299822007218, 51.83850837961973]]], "type": "Polygon"}, "id": "5376", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 115}, "type": "Feature"}, {"bbox": [-79.06258062160764, 40.38380538066205, -79.0548790576361, 40.389694507872356], "geometry": {"coordinates": [[[-79.0548790576361, 40.38956793861619], [-79.05504488382482, 40.38380538066205], [-79.06258062160764, 40.38393192429619], [-79.06241543693143, 40.389694507872356], [-79.0548790576361, 40.38956793861619]]], "type": "Polygon"}, "id": "4785", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 104}, "type": "Feature"}, {"bbox": [14.448840785966295, 51.83277127738889, 14.458198622165021, 51.838568542266295], "geometry": {"coordinates": [[[14.45812955914222, 51.838568542266295], [14.458198622165021, 51.83281442479869], [14.448911032726455, 51.83277127738889], [14.448840785966295, 51.83852538599306], [14.45812955914222, 51.838568542266295]]], "type": "Polygon"}, "id": "5399", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 116}, "type": "Feature"}, {"bbox": [-2.69568029761044, 53.370511053682165, -2.686019054330409, 53.376288753730925], "geometry": {"coordinates": [[[-2.686019054330409, 53.37626383984153], [-2.6860613580759596, 53.370511053682165], [-2.69568029761044, 53.37053596237976], [-2.695639289983191, 53.376288753730925], [-2.686019054330409, 53.37626383984153]]], "type": "Polygon"}, "id": "5833", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 137}, "type": "Feature"}, {"bbox": [-1.2647662612823194, 52.864224915809665, -1.2550331110132733, 52.87011490700849], "geometry": {"coordinates": [[[-1.2550331110132733, 52.86997561058006], [-1.2552639007862583, 52.864224915809665], [-1.2647662612823194, 52.86436418337144], [-1.2645367269181547, 52.87011490700849], [-1.2550331110132733, 52.86997561058006]]], "type": "Polygon"}, "id": "6108", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 155}, "type": "Feature"}, {"bbox": [14.458271178738665, 51.83470164555838, 14.46762822932269, 51.84049817400771], "geometry": {"coordinates": [[[14.467560363552952, 51.84049817400771], [14.46762822932269, 51.8347440495515], [14.458340228385687, 51.83470164555838], [14.458271178738665, 51.84045576130371], [14.467560363552952, 51.84049817400771]]], "type": "Polygon"}, "id": "5467", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 120}, "type": "Feature"}, {"bbox": [-79.06571051749971, 40.38418547699719, -79.05800916173554, 40.390074411514746], "geometry": {"coordinates": [[[-79.05800916173554, 40.38994804527783], [-79.05817472339871, 40.38418547699719], [-79.06571051749971, 40.384311817653185], [-79.06554559736601, 40.390074411514746], [-79.05800916173554, 40.38994804527783]]], "type": "Polygon"}, "id": "4798", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 105}, "type": "Feature"}, {"bbox": [23.0284989447459, 42.2787009995869, 23.03644140810192, 42.28459888150992], "geometry": {"coordinates": [[[23.03644140810192, 42.28446133565779], [23.036255845833796, 42.2787009995869], [23.0284989447459, 42.27883851784478], [23.02868380131839, 42.28459888150992], [23.03644140810192, 42.28446133565779]]], "type": "Polygon"}, "id": "5700", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 128}, "type": "Feature"}, {"bbox": [-79.19701247428806, 40.50818945195789, -79.18930720285084, 40.51407043516193], "geometry": {"coordinates": [[[-79.18930720285084, 40.51395232984145], [-79.18946225179393, 40.50818945195789], [-79.19701247428806, 40.50830753338527], [-79.19685807106724, 40.51407043516193], [-79.18930720285084, 40.51395232984145]]], "type": "Polygon"}, "id": "5919", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 143}, "type": "Feature"}, {"bbox": [-2.697510218237279, 53.37089112703241, -2.687849133688394, 53.37666868036264], "geometry": {"coordinates": [[[-2.687849133688394, 53.376643913827984], [-2.68789119144518, 53.37089112703241], [-2.697510218237279, 53.37091588840595], [-2.697469456629148, 53.37666868036264], [-2.687849133688394, 53.376643913827984]]], "type": "Polygon"}, "id": "5836", "properties": {"__folium_color": "blue", "data_set": "train", "facility_id": 138}, "type": "Feature"}, {"bbox": [21.786135415812137, 40.40538151755989, 21.793744905906735, 40.41119836321894], "geometry": {"coordinates": [[[21.793744905906735, 40.41114682902307], [21.79367717493129, 40.40538151755989], [21.786135415812137, 40.40543304131976], [21.78620250334984, 40.41119836321894], [21.793744905906735, 40.41114682902307]]], "type": "Polygon"}, "id": "4855", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 106}, "type": "Feature"}, {"bbox": [-81.62977679505407, 29.72761648064035, -81.62312339444132, 29.73342343903968], "geometry": {"coordinates": [[[-81.62315908797905, 29.73342343903968], [-81.62312339444132, 29.727647795005492], [-81.6297407225256, 29.72761648064035], [-81.62977679505407, 29.73339211738013], [-81.62315908797905, 29.73342343903968]]], "type": "Polygon"}, "id": "5330", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 113}, "type": "Feature"}, {"bbox": [5.480059760926154, 43.46579534288756, 5.488200613519143, 43.47172453609454], "geometry": {"coordinates": [[[5.488200613519143, 43.47155268406341], [5.487964307782803, 43.46579534288756], [5.480059760926154, 43.46596716056729], [5.480295317771899, 43.47172453609454], [5.488200613519143, 43.47155268406341]]], "type": "Polygon"}, "id": "5182", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 110}, "type": "Feature"}, {"bbox": [5.481019573464456, 43.46788530958042, 5.489160801019963, 43.47381456939904], "geometry": {"coordinates": [[[5.489160801019963, 43.473642644444276], [5.48892438690969, 43.46788530958042], [5.481019573464456, 43.46805720016939], [5.481255238605885, 43.47381456939904], [5.489160801019963, 43.473642644444276]]], "type": "Polygon"}, "id": "5271", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 111}, "type": "Feature"}, {"bbox": [-83.81463570641482, 33.060789720637686, -83.8076045163002, 33.06671020087073], "geometry": {"coordinates": [[[-83.80778773964207, 33.06671020087073], [-83.8076045163002, 33.060944171007016], [-83.81445203764294, 33.060789720637686], [-83.81463570641482, 33.06655571669358], [-83.80778773964207, 33.06671020087073]]], "type": "Polygon"}, "id": "967", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 24}, "type": "Feature"}, {"bbox": [20.067896751288743, 47.785176564392216, 20.076542777047887, 47.791003282087196], "geometry": {"coordinates": [[[20.076440773068605, 47.791003282087196], [20.076542777047887, 47.785245617710025], [20.067999698594768, 47.785176564392216], [20.067896751288743, 47.790934214916476], [20.076440773068605, 47.791003282087196]]], "type": "Polygon"}, "id": "1774", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 40}, "type": "Feature"}, {"bbox": [20.066556570034532, 47.78647651556987, 20.075202958269575, 47.79230333091257], "geometry": {"coordinates": [[[20.07510080165796, 47.79230333091257], [20.075202958269575, 47.786545670020764], [20.06665967003793, 47.78647651556987], [20.066556570034532, 47.79223416258842], [20.07510080165796, 47.79230333091257]]], "type": "Polygon"}, "id": "1803", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 41}, "type": "Feature"}, {"bbox": [23.130302383354035, 44.91054552594215, 23.138618020573226, 44.91645434391345], "geometry": {"coordinates": [[[23.138618020573226, 44.91630282766331], [23.13840440506748, 44.91054552594215], [23.130302383354035, 44.910697011949736], [23.13051519100526, 44.91645434391345], [23.138618020573226, 44.91630282766331]]], "type": "Polygon"}, "id": "1638", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 38}, "type": "Feature"}, {"bbox": [21.785065345928693, 40.40739155165925, 21.792674975822905, 40.41320832910395], "geometry": {"coordinates": [[[21.792674975822905, 40.413156862605774], [21.792607331340257, 40.40739155165925], [21.785065345928693, 40.40744300773526], [21.785132346908142, 40.41320832910395], [21.792674975822905, 40.413156862605774]]], "type": "Polygon"}, "id": "5007", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 108}, "type": "Feature"}, {"bbox": [23.12789247955961, 44.910785606711705, 23.136207924375242, 44.91669426312547], "geometry": {"coordinates": [[[23.136207924375242, 44.91654291724287], [23.135994547705405, 44.910785606711705], [23.12789247955961, 44.9109369223857], [23.128105048359746, 44.91669426312547], [23.136207924375242, 44.91654291724287]]], "type": "Polygon"}, "id": "1684", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 39}, "type": "Feature"}, {"bbox": [20.065196326538757, 47.78850646598933, 20.073843201714183, 47.794333380492354], "geometry": {"coordinates": [[[20.07374088763894, 47.794333380492354], [20.073843201714183, 47.78857572385728], [20.06529958410813, 47.78850646598933], [20.065196326538757, 47.794264108730324], [20.07374088763894, 47.794333380492354]]], "type": "Polygon"}, "id": "1893", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 43}, "type": "Feature"}, {"bbox": [-83.81154563460788, 33.06125979705102, -83.80451458811467, 33.06718012443692], "geometry": {"coordinates": [[[-83.80469761324692, 33.06718012443692], [-83.80451458811467, 33.061414079714716], [-83.81136216403054, 33.06125979705102], [-83.81154563460788, 33.067025808002256], [-83.80469761324692, 33.06718012443692]]], "type": "Polygon"}, "id": "1027", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 25}, "type": "Feature"}, {"bbox": [21.785595381364654, 40.40638153476878, 21.793204940370504, 40.41219834600221], "geometry": {"coordinates": [[[21.793204940370504, 40.41214684598588], [21.793137253078502, 40.40638153476878], [21.785595381364654, 40.4064330243561], [21.78566242518634, 40.41219834600221], [21.793204940370504, 40.41214684598588]]], "type": "Polygon"}, "id": "4906", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 107}, "type": "Feature"}, {"bbox": [14.35040542154969, 51.53481739933956, 14.359713996738323, 51.54062242301646], "geometry": {"coordinates": [[[14.359633246964686, 51.54062242301646], [14.359713996738323, 51.53486811417309], [14.350487334747301, 51.53481739933956], [14.35040542154969, 51.54057169779067], [14.359633246964686, 51.54062242301646]]], "type": "Polygon"}, "id": "75", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 2}, "type": "Feature"}, {"bbox": [-92.141721546561, 34.41855988832587, -92.13469821424951, 34.424380016443315], "geometry": {"coordinates": [[[-92.13469821424951, 34.424330939837425], [-92.13475766936955, 34.41855988832587], [-92.141721546561, 34.41860895439381], [-92.14166256981994, 34.424380016443315], [-92.13469821424951, 34.424330939837425]]], "type": "Polygon"}, "id": "5888", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 142}, "type": "Feature"}, {"bbox": [23.130262561261976, 44.90816552921172, 23.138577842615156, 44.91407434065483], "geometry": {"coordinates": [[[23.138577842615156, 44.913922833492215], [23.138364248835657, 44.90816552921172], [23.130262561261976, 44.908317006133586], [23.130475347287206, 44.91407434065483], [23.138577842615156, 44.913922833492215]]], "type": "Polygon"}, "id": "1400", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 35}, "type": "Feature"}, {"bbox": [20.06427631202842, 47.78808643252302, 20.07292321623575, 47.79391341396783], "geometry": {"coordinates": [[[20.072820802043104, 47.79391341396783], [20.07292321623575, 47.78815575840695], [20.06437966969273, 47.78808643252302], [20.06427631202842, 47.7938440741762], [20.072820802043104, 47.79391341396783]]], "type": "Polygon"}, "id": "1845", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 42}, "type": "Feature"}, {"bbox": [23.13148241927253, 44.90930548730651, 23.139797984627343, 44.915214382563406], "geometry": {"coordinates": [[[23.139797984627343, 44.91506278584649], [23.139584260550976, 44.90930548730651], [23.13148241927253, 44.90945705376483], [23.131695335549146, 44.915214382563406], [23.139797984627343, 44.91506278584649]]], "type": "Polygon"}, "id": "1489", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 36}, "type": "Feature"}, {"bbox": [23.129152524259847, 44.90937556552143, 23.13746787964396, 44.9152843043315], "geometry": {"coordinates": [[[23.13746787964396, 44.91513287274796], [23.13725438767079, 44.90937556552143], [23.129152524259847, 44.90952696687926], [23.12936520842541, 44.9152843043315], [23.13746787964396, 44.91513287274796]]], "type": "Polygon"}, "id": "1557", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 37}, "type": "Feature"}, {"bbox": [-81.63096677906879, 29.726146451620195, -81.62431341041238, 29.731953468071687], "geometry": {"coordinates": [[[-81.62434916998563, 29.731953468071687], [-81.62431341041238, 29.726177824064923], [-81.63093064053322, 29.726146451620195], [-81.63096677906879, 29.7319220883188], [-81.62434916998563, 29.731953468071687]]], "type": "Polygon"}, "id": "5285", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 112}, "type": "Feature"}, {"bbox": [21.78455530851651, 40.40842156791179, 21.792165013251854, 40.414238312843615], "geometry": {"coordinates": [[[21.792165013251854, 40.41418687853936], [21.792097409827125, 40.40842156791179], [21.78455530851651, 40.40847299180046], [21.784622268404508, 40.414238312843615], [21.792165013251854, 40.41418687853936]]], "type": "Polygon"}, "id": "5139", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 109}, "type": "Feature"}, {"bbox": [-92.13970153749837, 34.41675983263552, -92.13267822333387, 34.42258007215267], "geometry": {"coordinates": [[[-92.13267822333387, 34.42253088276245], [-92.13273781322847, 34.41675983263552], [-92.13970153749837, 34.41680901146331], [-92.13964242593931, 34.42258007215267], [-92.13267822333387, 34.42253088276245]]], "type": "Polygon"}, "id": "5867", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 141}, "type": "Feature"}, {"bbox": [14.34753524423009, 51.53480728812935, 14.356844174059574, 51.54061253424989], "geometry": {"coordinates": [[[14.356763062401601, 51.54061253424989], [14.356844174059574, 51.5348582286287], [14.347617519308736, 51.53480728812935], [14.34753524423009, 51.54056158331207], [14.356763062401601, 51.54061253424989]]], "type": "Polygon"}, "id": "56", "properties": {"__folium_color": "red", "data_set": "val", "facility_id": 1}, "type": "Feature"}], "type": "FeatureCollection"});
216 | 
217 |         
218 |     
219 |     geo_json_b8d1cbb637917ab66114b9b80f874f1a.bindTooltip(
220 |     function(layer){
221 |     let div = L.DomUtil.create('div');
222 |     
223 |     let handleObject = feature=>typeof(feature)=='object' ? JSON.stringify(feature) : feature;
224 |     let fields = ["facility_id", "data_set"];
225 |     let aliases = ["facility_id", "data_set"];
226 |     let table = '<table>' +
227 |         String(
228 |         fields.map(
229 |         (v,i)=>
230 |         `<tr>
231 |             <th>${aliases[i]}</th>
232 |             
233 |             <td>${handleObject(layer.feature.properties[v])}</td>
234 |         </tr>`).join(''))
235 |     +'</table>';
236 |     div.innerHTML=table;
237 |     
238 |     return div
239 |     }
240 |     ,{"className": "foliumtooltip", "sticky": true});
241 |                      
242 | </script>
243 | </html>


--------------------------------------------------------------------------------
/notebooks/01_data_matching.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "attachments": {},
  5 |             "cell_type": "markdown",
  6 |             "metadata": {},
  7 |             "source": [
  8 |                 "# Data matching\n",
  9 |                 "---\n",
 10 |                 "\n",
 11 |                 "Experimenting with matching data from:\n",
 12 |                 "- Global Energy Monitor (GEM)'s [Global Coal Plant Tracker](https://www.globalenergymonitor.org/coal.html)\n",
 13 |                 "- USA's [CAMPD emissions data](https://campd.epa.gov/data)\n",
 14 |                 "- OSM's [cooling_tower](https://wiki.openstreetmap.org/wiki/Tag:man_made%3Dcooling_tower) tag"
 15 |             ]
 16 |         },
 17 |         {
 18 |             "attachments": {},
 19 |             "cell_type": "markdown",
 20 |             "metadata": {},
 21 |             "source": [
 22 |                 "## Setup"
 23 |             ]
 24 |         },
 25 |         {
 26 |             "attachments": {},
 27 |             "cell_type": "markdown",
 28 |             "metadata": {},
 29 |             "source": [
 30 |                 "### Imports"
 31 |             ]
 32 |         },
 33 |         {
 34 |             "cell_type": "code",
 35 |             "execution_count": null,
 36 |             "metadata": {},
 37 |             "outputs": [],
 38 |             "source": [
 39 |                 "import pandas as pd\n",
 40 |                 "import geopandas as gpd\n",
 41 |                 "import plotly.express as px"
 42 |             ]
 43 |         },
 44 |         {
 45 |             "cell_type": "code",
 46 |             "execution_count": null,
 47 |             "metadata": {},
 48 |             "outputs": [],
 49 |             "source": [
 50 |                 "from coal_emissions_monitoring.data_cleaning import (\n",
 51 |                 "    load_clean_gcpt_gdf,\n",
 52 |                 "    load_clean_campd_facilities_gdf,\n",
 53 |                 "    load_clean_campd_emissions_df,\n",
 54 |                 "    load_osm_data,\n",
 55 |                 ")"
 56 |             ]
 57 |         },
 58 |         {
 59 |             "attachments": {},
 60 |             "cell_type": "markdown",
 61 |             "metadata": {},
 62 |             "source": [
 63 |                 "### Parameters"
 64 |             ]
 65 |         },
 66 |         {
 67 |             "cell_type": "code",
 68 |             "execution_count": null,
 69 |             "metadata": {},
 70 |             "outputs": [],
 71 |             "source": [
 72 |                 "# show all columns in pandas\n",
 73 |                 "pd.set_option(\"display.max_columns\", None)"
 74 |             ]
 75 |         },
 76 |         {
 77 |             "attachments": {},
 78 |             "cell_type": "markdown",
 79 |             "metadata": {},
 80 |             "source": [
 81 |                 "## Load data"
 82 |             ]
 83 |         },
 84 |         {
 85 |             "attachments": {},
 86 |             "cell_type": "markdown",
 87 |             "metadata": {},
 88 |             "source": [
 89 |                 "### GEM Global Coal Plant Tracker"
 90 |             ]
 91 |         },
 92 |         {
 93 |             "cell_type": "code",
 94 |             "execution_count": null,
 95 |             "metadata": {},
 96 |             "outputs": [],
 97 |             "source": [
 98 |                 "gcpt_df = load_clean_gcpt_gdf(\"/Users/adminuser/Downloads/Global-Coal-Plant-Tracker-January-2023.xlsx\")\n",
 99 |                 "gcpt_df"
100 |             ]
101 |         },
102 |         {
103 |             "attachments": {},
104 |             "cell_type": "markdown",
105 |             "metadata": {},
106 |             "source": [
107 |                 "### CAMPD facilities metadata"
108 |             ]
109 |         },
110 |         {
111 |             "cell_type": "code",
112 |             "execution_count": null,
113 |             "metadata": {},
114 |             "outputs": [],
115 |             "source": [
116 |                 "campd_facilities_df = load_clean_campd_facilities_gdf(\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/facility_attributes.csv\")\n",
117 |                 "campd_facilities_df"
118 |             ]
119 |         },
120 |         {
121 |             "cell_type": "code",
122 |             "execution_count": null,
123 |             "metadata": {},
124 |             "outputs": [],
125 |             "source": [
126 |                 "campd_facilities_df.capacity_mw.describe()"
127 |             ]
128 |         },
129 |         {
130 |             "cell_type": "code",
131 |             "execution_count": null,
132 |             "metadata": {},
133 |             "outputs": [],
134 |             "source": [
135 |                 "# find distance to the nearest facility\n",
136 |                 "for facility_id in campd_facilities_df.facility_id:\n",
137 |                 "    campd_facilities_df.loc[\n",
138 |                 "        campd_facilities_df.facility_id == facility_id,\n",
139 |                 "        \"dist_to_nearest_facility\"\n",
140 |                 "    ] = gpd.sjoin_nearest(\n",
141 |                 "        campd_facilities_df.loc[campd_facilities_df.facility_id == facility_id],\n",
142 |                 "        campd_facilities_df.loc[campd_facilities_df.facility_id != facility_id],\n",
143 |                 "        distance_col=\"dist\",\n",
144 |                 "    ).dist.min()\n",
145 |                 "campd_facilities_df.groupby(\"facility_id\").dist_to_nearest_facility.min().sort_values()"
146 |             ]
147 |         },
148 |         {
149 |             "cell_type": "code",
150 |             "execution_count": null,
151 |             "metadata": {},
152 |             "outputs": [],
153 |             "source": [
154 |                 "campd_facilities_df[campd_facilities_df.year == 2023].explore()"
155 |             ]
156 |         },
157 |         {
158 |             "attachments": {},
159 |             "cell_type": "markdown",
160 |             "metadata": {},
161 |             "source": [
162 |                 "### CAMPD emissions data"
163 |             ]
164 |         },
165 |         {
166 |             "cell_type": "code",
167 |             "execution_count": null,
168 |             "metadata": {},
169 |             "outputs": [],
170 |             "source": [
171 |                 "campd_emissions_df = load_clean_campd_emissions_df(\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/daily_emissions_facility_aggregation.csv\")\n",
172 |                 "campd_emissions_df"
173 |             ]
174 |         },
175 |         {
176 |             "cell_type": "code",
177 |             "execution_count": null,
178 |             "metadata": {},
179 |             "outputs": [],
180 |             "source": [
181 |                 "campd_emissions_df[\"year\"] = campd_emissions_df[\"date\"].dt.year\n",
182 |                 "yearly_emissions = campd_emissions_df.groupby(\"year\").co2_mass_short_tons.mean()\n",
183 |                 "yearly_emissions"
184 |             ]
185 |         },
186 |         {
187 |             "cell_type": "code",
188 |             "execution_count": null,
189 |             "metadata": {},
190 |             "outputs": [],
191 |             "source": [
192 |                 "px.line(campd_emissions_df, x=\"date\", y=\"co2_mass_short_tons\", color=\"facility_name\")"
193 |             ]
194 |         },
195 |         {
196 |             "attachments": {},
197 |             "cell_type": "markdown",
198 |             "metadata": {},
199 |             "source": [
200 |                 "### OSM cooling_tower tag"
201 |             ]
202 |         },
203 |         {
204 |             "cell_type": "code",
205 |             "execution_count": null,
206 |             "metadata": {},
207 |             "outputs": [],
208 |             "source": [
209 |                 "osm_gdf = load_osm_data()\n",
210 |                 "osm_gdf"
211 |             ]
212 |         },
213 |         {
214 |             "attachments": {},
215 |             "cell_type": "markdown",
216 |             "metadata": {},
217 |             "source": [
218 |                 "## Match data"
219 |             ]
220 |         },
221 |         {
222 |             "attachments": {},
223 |             "cell_type": "markdown",
224 |             "metadata": {},
225 |             "source": [
226 |                 "### CAMPD facilities metadata and emissions"
227 |             ]
228 |         },
229 |         {
230 |             "cell_type": "code",
231 |             "execution_count": null,
232 |             "metadata": {},
233 |             "outputs": [],
234 |             "source": [
235 |                 "campd_emissions_df[\"year\"] = pd.to_datetime(campd_emissions_df[\"date\"].dt.year, format=\"%Y\")\n",
236 |                 "campd_gdf = pd.merge(\n",
237 |                 "    campd_facilities_df,\n",
238 |                 "    campd_emissions_df,\n",
239 |                 "    on=[\"facility_id\", \"year\"],\n",
240 |                 "    how=\"inner\",\n",
241 |                 "    suffixes=(\"_delete\", \"\"),\n",
242 |                 ")\n",
243 |                 "campd_gdf = campd_gdf.drop(columns=[col for col in campd_gdf.columns if \"_delete\" in col])\n",
244 |                 "campd_gdf"
245 |             ]
246 |         },
247 |         {
248 |             "attachments": {},
249 |             "cell_type": "markdown",
250 |             "metadata": {},
251 |             "source": [
252 |                 "### CAMPD data and OSM cooling_tower tag"
253 |             ]
254 |         },
255 |         {
256 |             "cell_type": "code",
257 |             "execution_count": null,
258 |             "metadata": {},
259 |             "outputs": [],
260 |             "source": [
261 |                 "campd_ndt_gdf = gpd.sjoin_nearest(campd_gdf, osm_gdf, how=\"inner\", distance_col=\"distances\", max_distance=0.01)\n",
262 |                 "campd_ndt_gdf"
263 |             ]
264 |         },
265 |         {
266 |             "cell_type": "code",
267 |             "execution_count": null,
268 |             "metadata": {},
269 |             "outputs": [],
270 |             "source": [
271 |                 "campd_ndt_gdf.distances.describe()"
272 |             ]
273 |         },
274 |         {
275 |             "cell_type": "code",
276 |             "execution_count": null,
277 |             "metadata": {},
278 |             "outputs": [],
279 |             "source": [
280 |                 "ndt_plants = campd_ndt_gdf.facility_id.nunique()\n",
281 |                 "ndt_plants"
282 |             ]
283 |         },
284 |         {
285 |             "cell_type": "code",
286 |             "execution_count": null,
287 |             "metadata": {},
288 |             "outputs": [],
289 |             "source": []
290 |         }
291 |     ],
292 |     "metadata": {
293 |         "kernelspec": {
294 |             "display_name": "ccai_ss23",
295 |             "language": "python",
296 |             "name": "python3"
297 |         },
298 |         "language_info": {
299 |             "codemirror_mode": {
300 |                 "name": "ipython",
301 |                 "version": 3
302 |             },
303 |             "file_extension": ".py",
304 |             "mimetype": "text/x-python",
305 |             "name": "python",
306 |             "nbconvert_exporter": "python",
307 |             "pygments_lexer": "ipython3",
308 |             "version": "3.10.9"
309 |         },
310 |         "orig_nbformat": 4
311 |     },
312 |     "nbformat": 4,
313 |     "nbformat_minor": 2
314 | }
315 | 


--------------------------------------------------------------------------------
/notebooks/02_satellite_imagery.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "attachments": {},
  5 |             "cell_type": "markdown",
  6 |             "metadata": {},
  7 |             "source": [
  8 |                 "# Satellite imagery collection and processing\n",
  9 |                 "---\n",
 10 |                 "\n",
 11 |                 "Experimenting with filtering, downloading and displaying Sentinel 2 images from the [AWS STAC of Cloud-Optimized GeoTIFFs](https://registry.opendata.aws/sentinel-2-l2a-cogs/)"
 12 |             ]
 13 |         },
 14 |         {
 15 |             "attachments": {},
 16 |             "cell_type": "markdown",
 17 |             "metadata": {},
 18 |             "source": [
 19 |                 "## Setup"
 20 |             ]
 21 |         },
 22 |         {
 23 |             "attachments": {},
 24 |             "cell_type": "markdown",
 25 |             "metadata": {},
 26 |             "source": [
 27 |                 "### Imports"
 28 |             ]
 29 |         },
 30 |         {
 31 |             "cell_type": "code",
 32 |             "execution_count": null,
 33 |             "metadata": {},
 34 |             "outputs": [],
 35 |             "source": [
 36 |                 "from coal_emissions_monitoring.data_cleaning import (\n",
 37 |                 "    load_clean_campd_facilities_gdf,\n",
 38 |                 "    load_clean_campd_emissions_df,\n",
 39 |                 "    load_clean_image_metadata_df,\n",
 40 |                 "    get_final_dataset\n",
 41 |                 ")\n",
 42 |                 "from coal_emissions_monitoring.satellite_imagery import (\n",
 43 |                 "    create_aoi_for_plants,\n",
 44 |                 "    get_image_metadata_for_plants,\n",
 45 |                 "    get_image_from_cog\n",
 46 |                 ")\n",
 47 |                 "from coal_emissions_monitoring.data_viz import view_satellite_image"
 48 |             ]
 49 |         },
 50 |         {
 51 |             "attachments": {},
 52 |             "cell_type": "markdown",
 53 |             "metadata": {},
 54 |             "source": [
 55 |                 "### Parameters"
 56 |             ]
 57 |         },
 58 |         {
 59 |             "cell_type": "code",
 60 |             "execution_count": null,
 61 |             "metadata": {},
 62 |             "outputs": [],
 63 |             "source": [
 64 |                 "cloud_filter_percent = 25"
 65 |             ]
 66 |         },
 67 |         {
 68 |             "attachments": {},
 69 |             "cell_type": "markdown",
 70 |             "metadata": {},
 71 |             "source": [
 72 |                 "## Load CAMPD data"
 73 |             ]
 74 |         },
 75 |         {
 76 |             "cell_type": "code",
 77 |             "execution_count": null,
 78 |             "metadata": {},
 79 |             "outputs": [],
 80 |             "source": [
 81 |                 "campd_facilities_gdf = load_clean_campd_facilities_gdf(\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/facility_attributes.csv\")\n",
 82 |                 "campd_facilities_gdf"
 83 |             ]
 84 |         },
 85 |         {
 86 |             "cell_type": "code",
 87 |             "execution_count": null,
 88 |             "metadata": {},
 89 |             "outputs": [],
 90 |             "source": [
 91 |                 "campd_facilities_gdf = create_aoi_for_plants(campd_facilities_gdf)\n",
 92 |                 "campd_facilities_gdf"
 93 |             ]
 94 |         },
 95 |         {
 96 |             "cell_type": "code",
 97 |             "execution_count": null,
 98 |             "metadata": {},
 99 |             "outputs": [],
100 |             "source": [
101 |                 "campd_facilities_gdf.geometry.explore()"
102 |             ]
103 |         },
104 |         {
105 |             "cell_type": "code",
106 |             "execution_count": null,
107 |             "metadata": {},
108 |             "outputs": [],
109 |             "source": [
110 |                 "campd_emissions_df = load_clean_campd_emissions_df(\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/daily_emissions_facility_aggregation.csv\")\n",
111 |                 "campd_emissions_df"
112 |             ]
113 |         },
114 |         {
115 |             "attachments": {},
116 |             "cell_type": "markdown",
117 |             "metadata": {},
118 |             "source": [
119 |                 "## Filter emissions data to days when a cloudless image is available"
120 |             ]
121 |         },
122 |         {
123 |             "attachments": {},
124 |             "cell_type": "markdown",
125 |             "metadata": {},
126 |             "source": [
127 |                 "### Get image metadata for every power plant"
128 |             ]
129 |         },
130 |         {
131 |             "cell_type": "code",
132 |             "execution_count": null,
133 |             "metadata": {},
134 |             "outputs": [],
135 |             "source": [
136 |                 "image_metadata_df = get_image_metadata_for_plants(campd_facilities_gdf, max_cloud_cover_prct=cloud_filter_percent)\n",
137 |                 "image_metadata_df.to_csv(\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/image_metadata.csv\", index=False)\n",
138 |                 "image_metadata_df"
139 |             ]
140 |         },
141 |         {
142 |             "attachments": {},
143 |             "cell_type": "markdown",
144 |             "metadata": {},
145 |             "source": [
146 |                 "### Join with emissions data"
147 |             ]
148 |         },
149 |         {
150 |             "cell_type": "code",
151 |             "execution_count": null,
152 |             "metadata": {},
153 |             "outputs": [],
154 |             "source": [
155 |                 "df = get_final_dataset(\n",
156 |                 "    image_metadata_path=\"/home/adminuser/ccai-ss23-ai-monitoring-tutorial/data/image_metadata.csv\",\n",
157 |                 "    campd_facilities_path=\"https://drive.google.com/file/d/1b-5BriZUiiv2r0wFLubccLQpd2xb5ysl/view?usp=share_link\",\n",
158 |                 "    campd_emissions_path=\"https://drive.google.com/file/d/1oxZXR7GDcSXwwVoIjp66iS179cFVA5dP/view?usp=share_link\",\n",
159 |                 "    cog_type=\"all\",\n",
160 |                 ")\n",
161 |                 "df"
162 |             ]
163 |         },
164 |         {
165 |             "attachments": {},
166 |             "cell_type": "markdown",
167 |             "metadata": {},
168 |             "source": [
169 |                 "## Download and display images"
170 |             ]
171 |         },
172 |         {
173 |             "cell_type": "code",
174 |             "execution_count": null,
175 |             "metadata": {},
176 |             "outputs": [],
177 |             "source": [
178 |                 "image = get_image_from_cog(cog_url=image_metadata_df.cog_url.iloc[0], geometry=campd_facilities_gdf.geometry.iloc[0])"
179 |             ]
180 |         },
181 |         {
182 |             "cell_type": "code",
183 |             "execution_count": null,
184 |             "metadata": {},
185 |             "outputs": [],
186 |             "source": [
187 |                 "image.shape, image.min(), image.mean(), image.max()"
188 |             ]
189 |         },
190 |         {
191 |             "cell_type": "code",
192 |             "execution_count": null,
193 |             "metadata": {},
194 |             "outputs": [],
195 |             "source": [
196 |                 "view_satellite_image(image)"
197 |             ]
198 |         },
199 |         {
200 |             "cell_type": "code",
201 |             "execution_count": null,
202 |             "metadata": {},
203 |             "outputs": [],
204 |             "source": []
205 |         }
206 |     ],
207 |     "metadata": {
208 |         "kernelspec": {
209 |             "display_name": "ccai_ss23",
210 |             "language": "python",
211 |             "name": "python3"
212 |         },
213 |         "language_info": {
214 |             "codemirror_mode": {
215 |                 "name": "ipython",
216 |                 "version": 3
217 |             },
218 |             "file_extension": ".py",
219 |             "mimetype": "text/x-python",
220 |             "name": "python",
221 |             "nbconvert_exporter": "python",
222 |             "pygments_lexer": "ipython3",
223 |             "version": "3.10.9"
224 |         },
225 |         "orig_nbformat": 4
226 |     },
227 |     "nbformat": 4,
228 |     "nbformat_minor": 2
229 | }


--------------------------------------------------------------------------------
/notebooks/03_split_data_for_ml.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "attachments": {},
  5 |             "cell_type": "markdown",
  6 |             "metadata": {},
  7 |             "source": [
  8 |                 "# Split data for machine learning\n",
  9 |                 "---\n",
 10 |                 "\n",
 11 |                 "Experimenting with splitting the data for machine learning model training."
 12 |             ]
 13 |         },
 14 |         {
 15 |             "attachments": {},
 16 |             "cell_type": "markdown",
 17 |             "metadata": {},
 18 |             "source": [
 19 |                 "## Setup"
 20 |             ]
 21 |         },
 22 |         {
 23 |             "cell_type": "markdown",
 24 |             "metadata": {},
 25 |             "source": [
 26 |                 "### Imports"
 27 |             ]
 28 |         },
 29 |         {
 30 |             "cell_type": "code",
 31 |             "execution_count": null,
 32 |             "metadata": {},
 33 |             "outputs": [],
 34 |             "source": [
 35 |                 "import numpy as np\n",
 36 |                 "import geopandas as gpd"
 37 |             ]
 38 |         },
 39 |         {
 40 |             "cell_type": "code",
 41 |             "execution_count": null,
 42 |             "metadata": {},
 43 |             "outputs": [],
 44 |             "source": [
 45 |                 "from coal_emissions_monitoring.data_cleaning import get_final_dataset\n",
 46 |                 "from coal_emissions_monitoring.ml_utils import get_facility_set_mapper, split_data_in_sets"
 47 |             ]
 48 |         },
 49 |         {
 50 |             "attachments": {},
 51 |             "cell_type": "markdown",
 52 |             "metadata": {},
 53 |             "source": [
 54 |                 "### Parameters"
 55 |             ]
 56 |         },
 57 |         {
 58 |             "cell_type": "code",
 59 |             "execution_count": null,
 60 |             "metadata": {},
 61 |             "outputs": [],
 62 |             "source": [
 63 |                 "train_val_ratio = 0.8\n",
 64 |                 "test_data_year = 2023"
 65 |             ]
 66 |         },
 67 |         {
 68 |             "attachments": {},
 69 |             "cell_type": "markdown",
 70 |             "metadata": {},
 71 |             "source": [
 72 |                 "## Load data"
 73 |             ]
 74 |         },
 75 |         {
 76 |             "cell_type": "code",
 77 |             "execution_count": null,
 78 |             "metadata": {},
 79 |             "outputs": [],
 80 |             "source": [
 81 |                 "df = get_final_dataset(\n",
 82 |                 "    image_metadata_path=\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/image_metadata.csv\",\n",
 83 |                 "    campd_facilities_path=\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/facility_attributes.csv\",\n",
 84 |                 "    campd_emissions_path=\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/daily_emissions_facility_aggregation.csv\",\n",
 85 |                 ")\n",
 86 |                 "df"
 87 |             ]
 88 |         },
 89 |         {
 90 |             "cell_type": "code",
 91 |             "execution_count": null,
 92 |             "metadata": {},
 93 |             "outputs": [],
 94 |             "source": [
 95 |                 "df.to_csv(\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/final_dataset.csv\", index=False)"
 96 |             ]
 97 |         },
 98 |         {
 99 |             "cell_type": "code",
100 |             "execution_count": null,
101 |             "metadata": {},
102 |             "outputs": [],
103 |             "source": [
104 |                 "df.co2_mass_short_tons.value_counts()"
105 |             ]
106 |         },
107 |         {
108 |             "cell_type": "code",
109 |             "execution_count": null,
110 |             "metadata": {},
111 |             "outputs": [],
112 |             "source": [
113 |                 "df.isna().sum()"
114 |             ]
115 |         },
116 |         {
117 |             "attachments": {},
118 |             "cell_type": "markdown",
119 |             "metadata": {},
120 |             "source": [
121 |                 "## Split data"
122 |             ]
123 |         },
124 |         {
125 |             "cell_type": "code",
126 |             "execution_count": null,
127 |             "metadata": {},
128 |             "outputs": [],
129 |             "source": [
130 |                 "facility_set_mapper = get_facility_set_mapper(\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/facility_attributes.csv\")\n",
131 |                 "df[\"data_set\"] = df.apply(lambda row: split_data_in_sets(row=row, data_set_mapper=facility_set_mapper, test_year=test_data_year), axis=1)\n",
132 |                 "df"
133 |             ]
134 |         },
135 |         {
136 |             "cell_type": "code",
137 |             "execution_count": null,
138 |             "metadata": {},
139 |             "outputs": [],
140 |             "source": [
141 |                 "df.data_set.value_counts()"
142 |             ]
143 |         },
144 |         {
145 |             "cell_type": "code",
146 |             "execution_count": null,
147 |             "metadata": {},
148 |             "outputs": [],
149 |             "source": [
150 |                 "df.data_set.value_counts() / df.shape[0]"
151 |             ]
152 |         },
153 |         {
154 |             "cell_type": "code",
155 |             "execution_count": null,
156 |             "metadata": {},
157 |             "outputs": [],
158 |             "source": [
159 |                 "for data_set in df.data_set.unique():\n",
160 |                 "    print(data_set)\n",
161 |                 "    print(df[df.data_set == data_set].ts.dt.year.value_counts())\n",
162 |                 "    print()"
163 |             ]
164 |         },
165 |         {
166 |             "cell_type": "code",
167 |             "execution_count": null,
168 |             "metadata": {},
169 |             "outputs": [],
170 |             "source": []
171 |         }
172 |     ],
173 |     "metadata": {
174 |         "kernelspec": {
175 |             "display_name": "ccai_ss23",
176 |             "language": "python",
177 |             "name": "python3"
178 |         },
179 |         "language_info": {
180 |             "codemirror_mode": {
181 |                 "name": "ipython",
182 |                 "version": 3
183 |             },
184 |             "file_extension": ".py",
185 |             "mimetype": "text/x-python",
186 |             "name": "python",
187 |             "nbconvert_exporter": "python",
188 |             "pygments_lexer": "ipython3",
189 |             "version": "3.10.9"
190 |         },
191 |         "orig_nbformat": 4
192 |     },
193 |     "nbformat": 4,
194 |     "nbformat_minor": 2
195 | }


--------------------------------------------------------------------------------
/notebooks/04_dataset_loading.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "attachments": {},
  5 |             "cell_type": "markdown",
  6 |             "metadata": {},
  7 |             "source": [
  8 |                 "# Dataset loading\n",
  9 |                 "---\n",
 10 |                 "Experimenting with loading the PyTorch Lightning dataset and visualising its outputs."
 11 |             ]
 12 |         },
 13 |         {
 14 |             "attachments": {},
 15 |             "cell_type": "markdown",
 16 |             "metadata": {},
 17 |             "source": [
 18 |                 "## Setup"
 19 |             ]
 20 |         },
 21 |         {
 22 |             "attachments": {},
 23 |             "cell_type": "markdown",
 24 |             "metadata": {},
 25 |             "source": [
 26 |                 "### Imports"
 27 |             ]
 28 |         },
 29 |         {
 30 |             "cell_type": "code",
 31 |             "execution_count": null,
 32 |             "metadata": {},
 33 |             "outputs": [],
 34 |             "source": [
 35 |                 "from coal_emissions_monitoring.dataset import CoalEmissionsDataModule\n",
 36 |                 "from coal_emissions_monitoring.data_viz import view_satellite_image"
 37 |             ]
 38 |         },
 39 |         {
 40 |             "attachments": {},
 41 |             "cell_type": "markdown",
 42 |             "metadata": {},
 43 |             "source": [
 44 |                 "## Create the dataset"
 45 |             ]
 46 |         },
 47 |         {
 48 |             "cell_type": "code",
 49 |             "execution_count": null,
 50 |             "metadata": {},
 51 |             "outputs": [],
 52 |             "source": [
 53 |                 "data = CoalEmissionsDataModule(\n",
 54 |                 "    image_metadata_path=\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/image_metadata.csv\",\n",
 55 |                 "    campd_facilities_path=\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/facility_attributes.csv\",\n",
 56 |                 "    campd_emissions_path=\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/daily_emissions_facility_aggregation.csv\",\n",
 57 |                 "    batch_size=2,\n",
 58 |                 "    predownload_images=True,\n",
 59 |                 "    images_dir=\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/images\",\n",
 60 |                 ")\n",
 61 |                 "data.setup(stage=\"fit\")"
 62 |             ]
 63 |         },
 64 |         {
 65 |             "attachments": {},
 66 |             "cell_type": "markdown",
 67 |             "metadata": {},
 68 |             "source": [
 69 |                 "## Load some batches"
 70 |             ]
 71 |         },
 72 |         {
 73 |             "cell_type": "code",
 74 |             "execution_count": null,
 75 |             "metadata": {},
 76 |             "outputs": [],
 77 |             "source": [
 78 |                 "for batch in data.train_dataloader():\n",
 79 |                 "    break\n",
 80 |                 "print(f\"Keys in batch: {batch.keys()}\")\n",
 81 |                 "print(f\"Image shape: {batch['image'].shape}\")"
 82 |             ]
 83 |         },
 84 |         {
 85 |             "cell_type": "code",
 86 |             "execution_count": null,
 87 |             "metadata": {},
 88 |             "outputs": [],
 89 |             "source": [
 90 |                 "idx = 0\n",
 91 |                 "print(f\"Target: {batch['target'][idx]}\")\n",
 92 |                 "print(f\"Facility name: {batch['metadata']['facility_name'][idx]}\")\n",
 93 |                 "print(f\"Timestamp: {batch['metadata']['ts'][idx]}\")\n",
 94 |                 "view_satellite_image(batch[\"image\"][idx])"
 95 |             ]
 96 |         },
 97 |         {
 98 |             "cell_type": "code",
 99 |             "execution_count": null,
100 |             "metadata": {},
101 |             "outputs": [],
102 |             "source": [
103 |                 "((batch[\"image\"][idx] <= 1) | (batch[\"image\"][idx].isnan())).sum() / batch[\"image\"][idx].numel()"
104 |             ]
105 |         },
106 |         {
107 |             "cell_type": "code",
108 |             "execution_count": null,
109 |             "metadata": {},
110 |             "outputs": [],
111 |             "source": []
112 |         }
113 |     ],
114 |     "metadata": {
115 |         "kernelspec": {
116 |             "display_name": "ccai_ss23",
117 |             "language": "python",
118 |             "name": "python3"
119 |         },
120 |         "language_info": {
121 |             "codemirror_mode": {
122 |                 "name": "ipython",
123 |                 "version": 3
124 |             },
125 |             "file_extension": ".py",
126 |             "mimetype": "text/x-python",
127 |             "name": "python",
128 |             "nbconvert_exporter": "python",
129 |             "pygments_lexer": "ipython3",
130 |             "version": "3.10.9"
131 |         },
132 |         "orig_nbformat": 4
133 |     },
134 |     "nbformat": 4,
135 |     "nbformat_minor": 2
136 | }
137 | 


--------------------------------------------------------------------------------
/notebooks/05_images_download.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "attachments": {},
  5 |             "cell_type": "markdown",
  6 |             "metadata": {},
  7 |             "source": [
  8 |                 "# Images download\n",
  9 |                 "---\n",
 10 |                 "\n",
 11 |                 "Download all images before training models."
 12 |             ]
 13 |         },
 14 |         {
 15 |             "attachments": {},
 16 |             "cell_type": "markdown",
 17 |             "metadata": {},
 18 |             "source": [
 19 |                 "## Setup"
 20 |             ]
 21 |         },
 22 |         {
 23 |             "attachments": {},
 24 |             "cell_type": "markdown",
 25 |             "metadata": {},
 26 |             "source": [
 27 |                 "### Imports"
 28 |             ]
 29 |         },
 30 |         {
 31 |             "cell_type": "code",
 32 |             "execution_count": null,
 33 |             "metadata": {},
 34 |             "outputs": [],
 35 |             "source": [
 36 |                 "from tqdm.auto import tqdm"
 37 |             ]
 38 |         },
 39 |         {
 40 |             "cell_type": "code",
 41 |             "execution_count": null,
 42 |             "metadata": {},
 43 |             "outputs": [],
 44 |             "source": [
 45 |                 "from coal_emissions_monitoring.constants import ALL_BANDS\n",
 46 |                 "from coal_emissions_monitoring.data_cleaning import get_final_dataset\n",
 47 |                 "from coal_emissions_monitoring.satellite_imagery import fetch_image_path_from_cog"
 48 |             ]
 49 |         },
 50 |         {
 51 |             "attachments": {},
 52 |             "cell_type": "markdown",
 53 |             "metadata": {},
 54 |             "source": [
 55 |                 "## Get final datase"
 56 |             ]
 57 |         },
 58 |         {
 59 |             "cell_type": "code",
 60 |             "execution_count": null,
 61 |             "metadata": {},
 62 |             "outputs": [],
 63 |             "source": [
 64 |                 "df = get_final_dataset(\n",
 65 |                 "    image_metadata_path=\"/home/adminuser/ccai-ss23-ai-monitoring-tutorial/data/image_metadata.csv\",\n",
 66 |                 "    campd_facilities_path=\"https://drive.google.com/file/d/1b-5BriZUiiv2r0wFLubccLQpd2xb5ysl/view?usp=share_link\",\n",
 67 |                 "    campd_emissions_path=\"https://drive.google.com/file/d/1oxZXR7GDcSXwwVoIjp66iS179cFVA5dP/view?usp=share_link\",\n",
 68 |                 "    cog_type=\"all\",\n",
 69 |                 ")\n",
 70 |                 "df"
 71 |             ]
 72 |         },
 73 |         {
 74 |             "attachments": {},
 75 |             "cell_type": "markdown",
 76 |             "metadata": {},
 77 |             "source": [
 78 |                 "## Download images"
 79 |             ]
 80 |         },
 81 |         {
 82 |             "attachments": {},
 83 |             "cell_type": "markdown",
 84 |             "metadata": {},
 85 |             "source": [
 86 |                 "### TCI (True Color Image)"
 87 |             ]
 88 |         },
 89 |         {
 90 |             "cell_type": "code",
 91 |             "execution_count": null,
 92 |             "metadata": {},
 93 |             "outputs": [],
 94 |             "source": [
 95 |                 "tqdm.pandas(desc=\"Downloading visual images\")\n",
 96 |                 "df[\"local_image_path\"] = df.progress_apply(\n",
 97 |                 "    lambda row: fetch_image_path_from_cog(\n",
 98 |                 "        cog_url=row.cog_url,\n",
 99 |                 "        geometry=row.geometry,\n",
100 |                 "        images_dir=\"/home/adminuser/ccai-ss23-ai-monitoring-tutorial/data/images/visual/\",\n",
101 |                 "        download_missing_images=True,\n",
102 |                 "    ),\n",
103 |                 "    axis=1,\n",
104 |                 ")"
105 |             ]
106 |         },
107 |         {
108 |             "cell_type": "code",
109 |             "execution_count": null,
110 |             "metadata": {},
111 |             "outputs": [],
112 |             "source": [
113 |                 "# compress all images into one file\n",
114 |                 "!tar -czvf /home/adminuser/ccai-ss23-ai-monitoring-tutorial/data/images/visual_images.tar.gz /home/adminuser/ccai-ss23-ai-monitoring-tutorial/data/images/visual"
115 |             ]
116 |         },
117 |         {
118 |             "attachments": {},
119 |             "cell_type": "markdown",
120 |             "metadata": {},
121 |             "source": [
122 |                 "### All bands"
123 |             ]
124 |         },
125 |         {
126 |             "cell_type": "code",
127 |             "execution_count": null,
128 |             "metadata": {},
129 |             "outputs": [],
130 |             "source": [
131 |                 "tqdm.pandas(desc=\"Downloading all bands images\")\n",
132 |                 "df[\"local_image_all_bands_path\"] = df.progress_apply(\n",
133 |                 "    lambda row: fetch_image_path_from_cog(\n",
134 |                 "        cog_url=[row[band] for band in ALL_BANDS],\n",
135 |                 "        geometry=row.geometry,\n",
136 |                 "        cog_type=\"all\",\n",
137 |                 "        images_dir=\"/home/adminuser/ccai-ss23-ai-monitoring-tutorial/data/images/all_bands/\",\n",
138 |                 "        download_missing_images=True,\n",
139 |                 "    ),\n",
140 |                 "    axis=1,\n",
141 |                 ")"
142 |             ]
143 |         },
144 |         {
145 |             "cell_type": "code",
146 |             "execution_count": null,
147 |             "metadata": {},
148 |             "outputs": [],
149 |             "source": [
150 |                 "# compress all images into one file\n",
151 |                 "!tar -czvf /home/adminuser/ccai-ss23-ai-monitoring-tutorial/data/images/all_bands_images.tar.gz /home/adminuser/ccai-ss23-ai-monitoring-tutorial/data/images/all_bands"
152 |             ]
153 |         },
154 |         {
155 |             "cell_type": "code",
156 |             "execution_count": null,
157 |             "metadata": {},
158 |             "outputs": [],
159 |             "source": []
160 |         }
161 |     ],
162 |     "metadata": {
163 |         "kernelspec": {
164 |             "display_name": "ccai_ss23",
165 |             "language": "python",
166 |             "name": "python3"
167 |         },
168 |         "language_info": {
169 |             "codemirror_mode": {
170 |                 "name": "ipython",
171 |                 "version": 3
172 |             },
173 |             "file_extension": ".py",
174 |             "mimetype": "text/x-python",
175 |             "name": "python",
176 |             "nbconvert_exporter": "python",
177 |             "pygments_lexer": "ipython3",
178 |             "version": "3.10.10"
179 |         },
180 |         "orig_nbformat": 4
181 |     },
182 |     "nbformat": 4,
183 |     "nbformat_minor": 2
184 | }
185 | 


--------------------------------------------------------------------------------
/notebooks/06_model_training.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Model training\n",
  9 |     "---\n",
 10 |     "\n",
 11 |     "Experimenting with training some models over the dataset."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "attachments": {},
 16 |    "cell_type": "markdown",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "## Setup"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "attachments": {},
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "### Imports"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import timm\n",
 37 |     "from lightning import Trainer\n",
 38 |     "from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "from coal_emissions_monitoring.dataset import CoalEmissionsDataModule\n",
 48 |     "from coal_emissions_monitoring.model import CoalEmissionsModel, SmallCNN\n",
 49 |     "from coal_emissions_monitoring.transforms import efficientnet_transform"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "attachments": {},
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "### Parameters"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "batch_size = 128\n",
 67 |     "crop_size = 52\n",
 68 |     "num_workers = 0\n",
 69 |     "learning_rate = 1e-3"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "attachments": {},
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "## Create the dataset"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "data = CoalEmissionsDataModule(\n",
 87 |     "    final_dataset_path=\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/final_dataset.csv\",\n",
 88 |     "    batch_size=batch_size,\n",
 89 |     "    num_workers=num_workers,\n",
 90 |     "    predownload_images=True,\n",
 91 |     "    download_missing_images=False,\n",
 92 |     "    images_dir=\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/images/visual\",\n",
 93 |     "    crop_size=crop_size,\n",
 94 |     ")"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "data.setup(\"fit\")"
104 |    ]
105 |   },
106 |   {
107 |    "attachments": {},
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "## Create the model"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "# model = timm.create_model(\"efficientnet_b0\", pretrained=True, num_classes=1)\n",
121 |     "model = SmallCNN(num_input_channels=3, num_classes=1)"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "model = model.float().to(\"cpu\")"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "lit_model = CoalEmissionsModel(\n",
140 |     "    model=model,\n",
141 |     "    learning_rate=learning_rate,\n",
142 |     "    pos_weight=data.pos_weight,\n",
143 |     ")"
144 |    ]
145 |   },
146 |   {
147 |    "attachments": {},
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "## Confirm that the model can be run on a batch of data"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "data.setup(stage=\"fit\")\n",
161 |     "for batch in data.train_dataloader():\n",
162 |     "    break\n",
163 |     "print(f\"Keys in batch: {batch.keys()}\")\n",
164 |     "print(f\"Image shape: {batch['image'].shape}\")"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": null,
170 |    "metadata": {},
171 |    "outputs": [],
172 |    "source": [
173 |     "y_pred = lit_model(batch[\"image\"])\n",
174 |     "y_pred"
175 |    ]
176 |   },
177 |   {
178 |    "attachments": {},
179 |    "cell_type": "markdown",
180 |    "metadata": {},
181 |    "source": [
182 |     "## Check that the model can overfit a single batch"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "trainer = Trainer(\n",
192 |     "    max_epochs=1,\n",
193 |     "    callbacks=[\n",
194 |     "        EarlyStopping(monitor=\"val_loss\", mode=\"min\", patience=10),\n",
195 |     "        ModelCheckpoint(\n",
196 |     "            monitor=\"val_loss\",\n",
197 |     "            mode=\"min\",\n",
198 |     "            filename=\"{val_loss:2f}-{val_balanced_accuracy:.2f}-{epoch}-64crop_full_data\",\n",
199 |     "            save_top_k=1,\n",
200 |     "            dirpath=\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/models/\",\n",
201 |     "        )\n",
202 |     "    ],\n",
203 |     "    limit_train_batches=round(0.1 * len(data.train_dataset.gdf) / batch_size),\n",
204 |     "    limit_val_batches=round(0.4 * len(data.val_dataset.gdf) / batch_size),\n",
205 |     "    reload_dataloaders_every_n_epochs=1,\n",
206 |     "    precision=\"16-mixed\",\n",
207 |     "    accelerator=\"cpu\",\n",
208 |     "    devices=1,\n",
209 |     "    log_every_n_steps=5,\n",
210 |     "    # overfit_batches=1,\n",
211 |     ")\n",
212 |     "trainer.fit(lit_model, data)"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": null,
218 |    "metadata": {},
219 |    "outputs": [],
220 |    "source": [
221 |     "_ = trainer.test(\n",
222 |     "    model=lit_model,\n",
223 |     "    datamodule=data,\n",
224 |     "    ckpt_path=\"best\",\n",
225 |     "    verbose=True,\n",
226 |     ")"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": []
235 |   }
236 |  ],
237 |  "metadata": {
238 |   "kernelspec": {
239 |    "display_name": "ccai_ss23",
240 |    "language": "python",
241 |    "name": "python3"
242 |   },
243 |   "language_info": {
244 |    "codemirror_mode": {
245 |     "name": "ipython",
246 |     "version": 3
247 |    },
248 |    "file_extension": ".py",
249 |    "mimetype": "text/x-python",
250 |    "name": "python",
251 |    "nbconvert_exporter": "python",
252 |    "pygments_lexer": "ipython3",
253 |    "version": "3.10.9"
254 |   },
255 |   "orig_nbformat": 4
256 |  },
257 |  "nbformat": 4,
258 |  "nbformat_minor": 2
259 | }
260 | 


--------------------------------------------------------------------------------
/notebooks/google_cooling_tower_on_off_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "import geopandas as gpd\n",
 11 |     "from tqdm.auto import tqdm"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from coal_emissions_monitoring.satellite_imagery import (\n",
 21 |     "    create_aoi_for_plants,\n",
 22 |     "    get_image_metadata_for_plants,\n",
 23 |     "    get_image_from_cog\n",
 24 |     ")\n",
 25 |     "from coal_emissions_monitoring.data_viz import view_satellite_image"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "df = pd.read_csv(\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/labeled_geospatial_data.csv\")\n",
 35 |     "df"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "df.is_powered_on.value_counts()"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# get unique combinations of lat/lon\n",
 54 |     "unique_coords = df[[\"lat\", \"lon\"]].drop_duplicates().reset_index(drop=True)\n",
 55 |     "unique_coords.reset_index(inplace=True)\n",
 56 |     "unique_coords.set_index([\"lat\", \"lon\"], inplace=True)\n",
 57 |     "unique_coords = unique_coords[\"index\"].to_dict()\n",
 58 |     "unique_coords"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "# set an epsg code for each unique lat/lon\n",
 68 |     "df[\"facility_id\"] = df.apply(\n",
 69 |     "    lambda x: unique_coords[(x[\"lat\"], x[\"lon\"])], axis=1\n",
 70 |     ")\n",
 71 |     "df"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "df.facility_id.value_counts()"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "df.rename(columns={\"lat\": \"latitude\", \"lon\": \"longitude\"}, inplace=True)\n",
 90 |     "df"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "df.rename(columns={\"timestamp\": \"ts\"}, inplace=True)\n",
100 |     "df.ts = pd.to_datetime(df.ts)\n",
101 |     "df.dtypes"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs=\"EPSG:4326\")\n",
111 |     "gdf"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "gdf = create_aoi_for_plants(gdf)\n",
121 |     "gdf"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "gdf.geometry.explore()"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "# image_metadata_df = get_image_metadata_for_plants(\n",
140 |     "#     gdf,\n",
141 |     "#     start_date=gdf.ts.min(),\n",
142 |     "#     end_date=gdf.ts.max(),\n",
143 |     "#     max_cloud_cover_prct=50,\n",
144 |     "# )\n",
145 |     "image_metadata_df = pd.read_csv(\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/image_metadata.csv\")\n",
146 |     "image_metadata_df.ts = pd.to_datetime(image_metadata_df.ts)\n",
147 |     "image_metadata_df"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": null,
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "# filter the image metadata to match the day of each row of gdf\n",
157 |     "image_metadata_df[\"date\"] = image_metadata_df.ts.dt.date\n",
158 |     "gdf[\"date\"] = gdf.ts.dt.date\n",
159 |     "image_metadata_df = image_metadata_df.merge(\n",
160 |     "    gdf[[\"facility_id\", \"date\"]], on=[\"facility_id\", \"date\"]\n",
161 |     ")\n",
162 |     "image_metadata_df"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "image_metadata_df.to_csv(\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/image_metadata.csv\", index=False)"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": null,
177 |    "metadata": {},
178 |    "outputs": [],
179 |    "source": [
180 |     "gdf.merge(\n",
181 |     "    image_metadata_df.drop(columns=[\"ts\"]),\n",
182 |     "    on=[\"facility_id\", \"date\"]\n",
183 |     ").to_csv(\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/all_urls_dataset.csv\", index=False)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "gdf = gdf.merge(\n",
193 |     "    image_metadata_df[[\"facility_id\", \"date\", \"cloud_cover\", \"visual\"]],\n",
194 |     "    on=[\"facility_id\", \"date\"]\n",
195 |     ")\n",
196 |     "gdf.rename(columns={\"visual\": \"cog_url\"}, inplace=True)\n",
197 |     "gdf.drop(columns=[\"date\"], inplace=True)\n",
198 |     "gdf"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "gdf.sort_values(by=[\"facility_id\", \"ts\"], inplace=True)\n",
208 |     "gdf.to_csv(\"/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/final_dataset.csv\", index=False)"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {},
215 |    "outputs": [],
216 |    "source": [
217 |     "row = gdf.iloc[0]\n",
218 |     "image = get_image_from_cog(\n",
219 |     "    cog_url=row.cog_url,\n",
220 |     "    geometry=row.geometry,\n",
221 |     "    size=64,\n",
222 |     ")\n",
223 |     "image.shape"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": null,
229 |    "metadata": {},
230 |    "outputs": [],
231 |    "source": [
232 |     "view_satellite_image(image)"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": null,
238 |    "metadata": {},
239 |    "outputs": [],
240 |    "source": []
241 |   }
242 |  ],
243 |  "metadata": {
244 |   "kernelspec": {
245 |    "display_name": "ccai_ss23",
246 |    "language": "python",
247 |    "name": "python3"
248 |   },
249 |   "language_info": {
250 |    "codemirror_mode": {
251 |     "name": "ipython",
252 |     "version": 3
253 |    },
254 |    "file_extension": ".py",
255 |    "mimetype": "text/x-python",
256 |    "name": "python",
257 |    "nbconvert_exporter": "python",
258 |    "pygments_lexer": "ipython3",
259 |    "version": "3.10.9"
260 |   },
261 |   "orig_nbformat": 4
262 |  },
263 |  "nbformat": 4,
264 |  "nbformat_minor": 2
265 | }
266 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | overpy==0.6
 2 | pandas==1.5.3
 3 | geopandas==0.12.2
 4 | openpyxl==3.1.2
 5 | requests==2.28.2
 6 | folium==0.14.0
 7 | mapclassify==2.5.0
 8 | matplotlib==3.7.1
 9 | plotly==5.14.1
10 | nbformat==5.8.0
11 | pystac-client==0.6.1
12 | rasterio==1.3.6
13 | loguru==0.6.0
14 | tqdm==4.65.0
15 | black==23.3.0
16 | flake8==6.0.0
17 | torch==2.0.0
18 | lightning==2.0.1.post0
19 | kornia==0.6.11
20 | timm==0.6.13
21 | backoff==2.2.1


--------------------------------------------------------------------------------
/scripts/download_images.py:
--------------------------------------------------------------------------------
 1 | # %% [markdown]
 2 | # # Images download
 3 | # ---
 4 | #
 5 | # Download all images before training models.
 6 | 
 7 | # %% [markdown]
 8 | # ## Setup
 9 | 
10 | # %% [markdown]
11 | # ### Imports
12 | 
13 | # %%
14 | import os
15 | from tqdm.auto import tqdm
16 | 
17 | # %%
18 | from coal_emissions_monitoring.constants import ALL_BANDS, MAIN_COLUMNS
19 | from coal_emissions_monitoring.data_cleaning import load_final_dataset
20 | from coal_emissions_monitoring.satellite_imagery import fetch_image_path_from_cog
21 | 
22 | # %% [markdown]
23 | # ## Get final datase
24 | 
25 | # %%
26 | gdf = load_final_dataset(
27 |     "/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/all_urls_dataset.csv"
28 | )
29 | 
30 | # %% [markdown]
31 | # ## Download images
32 | 
33 | # %% [markdown]
34 | # ### TCI (True Color Image)
35 | 
36 | # %%
37 | tqdm.pandas(desc="Downloading visual images")
38 | gdf["local_image_path"] = gdf.progress_apply(
39 |     lambda row: fetch_image_path_from_cog(
40 |         cog_url=row.visual,
41 |         geometry=row.geometry,
42 |         cog_type="visual",
43 |         images_dir="/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/images/visual/",
44 |         download_missing_images=True,
45 |     ),
46 |     axis=1,
47 | )
48 | 
49 | # %%
50 | path = "/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/"
51 | os.makedirs(path, exist_ok=True)
52 | gdf.rename(columns={"visual": "cog_url"})[MAIN_COLUMNS + ["local_image_path"]].to_csv(
53 |     f"{path}final_dataset.csv",
54 |     index=False,
55 | )
56 | 
57 | # %%
58 | # compress all images into one file
59 | os.system(
60 |     "tar -czvf /Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/images/visual_images.tar.gz /Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/images/visual"
61 | )
62 | 
63 | # %% [markdown]
64 | # ### All bands
65 | 
66 | # %%
67 | tqdm.pandas(desc="Downloading all bands images")
68 | gdf["local_image_all_bands_path"] = gdf.progress_apply(
69 |     lambda row: fetch_image_path_from_cog(
70 |         cog_url=[row[band] for band in ALL_BANDS],
71 |         geometry=row.geometry,
72 |         size=32,  # smaller images to make the download faster
73 |         cog_type="all",
74 |         images_dir="/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/images/all_bands/",
75 |         download_missing_images=True,
76 |     ),
77 |     axis=1,
78 | )
79 | 
80 | # %%
81 | # compress all images into one file
82 | os.system(
83 |     "tar -czvf /Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/images/all_bands_images.tar.gz /Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/images/all_bands"
84 | )
85 | 
86 | # %%
87 | path = "/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/"
88 | os.makedirs(path, exist_ok=True)
89 | gdf.rename(columns={"visual": "cog_url"})[
90 |     MAIN_COLUMNS + ["local_image_path", "local_image_all_bands_path"]
91 | ].to_csv(
92 |     f"{path}final_dataset.csv",
93 |     index=False,
94 | )
95 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = coal-emissions-monitoring
 3 | version = 0.0.1
 4 | description = A data science project to monitor coal power emissions
 5 | long_description = file: README.md
 6 | long_description_content_type = text/markdown
 7 | url = https://github.com/AndreCNF/ccai-ss23-ai-monitoring-tutorial
 8 | 
 9 | [options]
10 | package_dir =
11 |    = src
12 | packages = find:
13 | python_requires = >=3.7
14 | install_requires =
15 |     overpy
16 |     pandas
17 |     geopandas
18 |     openpyxl
19 |     requests
20 |     folium
21 |     mapclassify
22 |     matplotlib
23 |     plotly
24 |     nbformat>=4.2.0
25 |     pystac-client
26 |     rasterio
27 |     loguru
28 |     tqdm
29 |     torch
30 |     lightning
31 |     kornia
32 |     timm
33 |     backoff
34 | 
35 | [options.packages.find]
36 | where=src


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(
4 |     use_scm_version=True,
5 |     setup_requires=["setuptools_scm"],
6 | )
7 | 


--------------------------------------------------------------------------------
/src/coal_emissions_monitoring/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AndreCNF/ccai-ss23-ai-monitoring-tutorial/8b7ba2d2b11175c8f12f87b22a7d18acb6c8628e/src/coal_emissions_monitoring/__init__.py


--------------------------------------------------------------------------------
/src/coal_emissions_monitoring/constants.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | GLOBAL_EPSG = 4326
 4 | API_URL = "https://earth-search.aws.element84.com/v0"
 5 | COLLECTION = "sentinel-s2-l2a-cogs"  # Sentinel-2, Level 2A, COGs
 6 | AOI_SIZE_METERS = 640
 7 | IMAGE_SIZE_PX = 64
 8 | CROP_SIZE_PX = 52
 9 | START_DATE = datetime(year=2016, month=1, day=1)
10 | END_DATE = datetime(year=2019, month=12, day=31)
11 | MAX_DARK_FRAC = 0.5
12 | MAX_BRIGHT_MEAN = 250
13 | MAX_CLOUD_COVER_PRCT = 50
14 | TRAIN_VAL_RATIO = 0.8
15 | TEST_YEAR = 2020
16 | BATCH_SIZE = 32
17 | MAIN_COLUMNS = [
18 |     "facility_id",
19 |     "latitude",
20 |     "longitude",
21 |     "ts",
22 |     "is_powered_on",
23 |     "cloud_cover",
24 |     "cog_url",
25 |     "geometry",
26 | ]
27 | ALL_BANDS = [
28 |     "b01",
29 |     "b02",
30 |     "b03",
31 |     "b04",
32 |     "b05",
33 |     "b06",
34 |     "b07",
35 |     "b08",
36 |     "b8a",
37 |     "b09",
38 |     "b11",
39 |     "b12",
40 | ]
41 | EMISSIONS_TARGET = "is_powered_on"
42 | EMISSIONS_CATEGORIES = {
43 |     0: "no_emissions",
44 |     1: "low",
45 |     2: "medium",
46 |     3: "high",
47 |     4: "very_high",
48 | }
49 | RANDOM_TRANSFORM_PROB = 0.5
50 | POSITIVE_THRESHOLD = 0.5
51 | 


--------------------------------------------------------------------------------
/src/coal_emissions_monitoring/data_cleaning.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import warnings
  3 | from typing import Callable, Optional, Union
  4 | 
  5 | import pandas as pd
  6 | import geopandas as gpd
  7 | import overpy
  8 | 
  9 | from coal_emissions_monitoring.constants import ALL_BANDS, GLOBAL_EPSG, MAIN_COLUMNS
 10 | from coal_emissions_monitoring.satellite_imagery import create_aoi_for_plants
 11 | 
 12 | OSM_API = overpy.Overpass()
 13 | 
 14 | # suppress geopandas CRS warning as we don't need to worry too much about
 15 | # the precision of distances
 16 | warnings.filterwarnings("ignore", message="Geometry is in a geographic CRS*")
 17 | # suppress pandas warning of setting value in copy
 18 | warnings.filterwarnings("ignore", message="A value is trying to be set on a copy*")
 19 | # suppress pandas warning on regex
 20 | warnings.filterwarnings("ignore", message="The default value of regex will change*")
 21 | 
 22 | 
 23 | def clean_column_names(
 24 |     df: Union[pd.DataFrame, gpd.GeoDataFrame]
 25 | ) -> Union[pd.DataFrame, gpd.GeoDataFrame]:
 26 |     """
 27 |     Clean column names in a data frame.
 28 | 
 29 |     Args:
 30 |         df (Union[pd.DataFrame, gpd.GeoDataFrame]):
 31 |             Data frame to clean
 32 | 
 33 |     Returns:
 34 |         df (Union[pd.DataFrame, gpd.GeoDataFrame]):
 35 |             Cleaned data frame
 36 |     """
 37 |     df.columns = (
 38 |         df.columns.str.lower()
 39 |         .str.replace(" ", "_")
 40 |         .str.replace("(", "")
 41 |         .str.replace(")", "")
 42 |         .str.replace("/", "_")
 43 |         .str.replace("-", "_")
 44 |         .str.replace(",", "_")
 45 |     )
 46 |     return df
 47 | 
 48 | 
 49 | def fix_google_drive_url(url: str) -> str:
 50 |     """
 51 |     Fix a Google Drive URL.
 52 | 
 53 |     Args:
 54 |         url (str):
 55 |             URL to fix
 56 | 
 57 |     Returns:
 58 |         url (str):
 59 |             Fixed URL
 60 |     """
 61 |     assert url.startswith(
 62 |         "https://drive.google.com/file/d/"
 63 |     ), "URL must start with https://drive.google.com/file/d/"
 64 |     return "https://drive.google.com/uc?id=" + url.split("/")[-2]
 65 | 
 66 | 
 67 | def load_csv(path: str) -> pd.DataFrame:
 68 |     """
 69 |     Load a CSV file.
 70 | 
 71 |     Args:
 72 |         path (str):
 73 |             Path to CSV file
 74 | 
 75 |     Returns:
 76 |         df (pd.DataFrame):
 77 |             Data frame
 78 |     """
 79 |     if path.startswith("https://drive.google.com/file/d/"):
 80 |         return pd.read_csv(fix_google_drive_url(path))
 81 |     else:
 82 |         return pd.read_csv(path)
 83 | 
 84 | 
 85 | def load_clean_data_df(
 86 |     data_path: Union[str, Path],
 87 |     load_func: Optional[Callable] = load_csv,
 88 |     clean_func: Optional[Callable] = clean_column_names,
 89 | ) -> pd.DataFrame:
 90 |     """
 91 |     Load and clean a data frame.
 92 | 
 93 |     Args:
 94 |         data_path (Union[str, Path]):
 95 |             Path to data
 96 |         load_func (Optional[Callable]):
 97 |             Function to load data
 98 |         clean_func (Optional[Callable]):
 99 |             Function to clean data
100 | 
101 |     Returns:
102 |         df (pd.DataFrame):
103 |             Cleaned data frame
104 |     """
105 |     df = load_func(data_path)
106 |     df = clean_func(df)
107 |     return df
108 | 
109 | 
110 | def load_clean_data_gdf(
111 |     data_path: Union[str, Path],
112 |     load_func: Optional[Callable] = load_csv,
113 |     clean_func: Optional[Callable] = clean_column_names,
114 | ) -> gpd.GeoDataFrame:
115 |     """
116 |     Load and clean a data frame, outputting it as a GeoDataFrame.
117 | 
118 |     Args:
119 |         data_path (Union[str, Path]):
120 |             Path to data
121 |         load_func (Optional[Callable]):
122 |             Function to load data
123 |         clean_func (Optional[Callable]):
124 |             Function to clean data
125 | 
126 |     Returns:
127 |         gdf (gpd.GeoDataFrame):
128 |             Cleaned data frame
129 |     """
130 |     df = load_clean_data_df(
131 |         data_path=data_path, load_func=load_func, clean_func=clean_func
132 |     )
133 |     gdf = gpd.GeoDataFrame(
134 |         df,
135 |         geometry=gpd.points_from_xy(
136 |             df["longitude"],
137 |             df["latitude"],
138 |         ),
139 |         crs=f"EPSG:{GLOBAL_EPSG}",
140 |     )
141 |     return gdf
142 | 
143 | 
144 | def load_raw_gcpt_data(gcpt_path: Union[str, Path]) -> pd.DataFrame:
145 |     """
146 |     Load GCPT data in its raw excel format from GCS.
147 | 
148 |     Returns:
149 |         df (pd.DataFrame):
150 |             GCPT data frame
151 |     """
152 |     df = pd.read_excel(
153 |         gcpt_path,
154 |         sheet_name="Units",
155 |     )
156 |     return df
157 | 
158 | 
159 | def clean_gcpt(df: pd.DataFrame) -> pd.DataFrame:
160 |     """
161 |     Clean the GCPT data frame, setting better column names.
162 | 
163 |     Args:
164 |         df (pd.DataFrame):
165 |             GCPT data frame
166 | 
167 |     Returns:
168 |         df (pd.DataFrame):
169 |             Cleaned GCPT data frame
170 |     """
171 |     df = clean_column_names(df)
172 |     df.rename(columns={"parentid": "parent_id"}, inplace=True)
173 |     df.rename(columns={"trackerloc": "tracker_loc"}, inplace=True)
174 |     return df
175 | 
176 | 
177 | def load_clean_gcpt_gdf(gcpt_path: Union[str, Path]) -> gpd.GeoDataFrame:
178 |     """
179 |     Load and clean the GCPT data frame.
180 | 
181 |     Args:
182 |         gcpt_path (Union[str, Path]):
183 |             Path to GCPT data
184 | 
185 |     Returns:
186 |         gdf (gpd.GeoDataFrame):
187 |             Cleaned GCPT data frame
188 |     """
189 |     return load_clean_data_gdf(
190 |         data_path=gcpt_path, load_func=load_raw_gcpt_data, clean_func=clean_gcpt
191 |     )
192 | 
193 | 
194 | def clean_campd_facilities(df: pd.DataFrame) -> pd.DataFrame:
195 |     """
196 |     Clean the CAMPD facilities data frame.
197 | 
198 |     Args:
199 |         df (pd.DataFrame):
200 |             CAMPD facilities data frame
201 | 
202 |     Returns:
203 |         df (pd.DataFrame):
204 |             Cleaned CAMPD facilities data frame
205 |     """
206 |     df = clean_column_names(df)
207 |     # get the capacity
208 |     df["capacity_mw"] = (
209 |         df["associated_generators_&_nameplate_capacity_mwe"]
210 |         .str.split(" ")
211 |         .str[-1]
212 |         .str.replace("(", "")
213 |         .str.replace(")", "")
214 |         .astype(float)
215 |     )
216 |     # filter to operating units
217 |     df = df[(df.operating_status == "Operating") & (df.capacity_mw > 0)]
218 |     # aggregate by facility
219 |     df = df.groupby(["facility_id", "year"]).agg(
220 |         {
221 |             "capacity_mw": "sum",
222 |             "facility_name": "first",
223 |             "latitude": "mean",
224 |             "longitude": "mean",
225 |         }
226 |     )
227 |     # rearrange columns
228 |     df = df.reset_index()[
229 |         ["facility_id", "facility_name", "year", "capacity_mw", "latitude", "longitude"]
230 |     ]
231 |     # fix datetime column data type
232 |     df.year = pd.to_datetime(df.year, format="%Y")
233 |     return df
234 | 
235 | 
236 | def load_clean_campd_facilities_gdf(
237 |     campd_facilities_path: Union[str, Path]
238 | ) -> gpd.GeoDataFrame:
239 |     """
240 |     Load and clean the CAMPD facilities data frame.
241 | 
242 |     Args:
243 |         campd_facilities_path (Union[str, Path]):
244 |             Path to CAMPD facilities data
245 | 
246 |     Returns:
247 |         gdf (gpd.GeoDataFrame):
248 |             Cleaned CAMPD facilities data frame
249 |     """
250 |     return load_clean_data_gdf(
251 |         data_path=campd_facilities_path,
252 |         load_func=load_csv,
253 |         clean_func=clean_campd_facilities,
254 |     )
255 | 
256 | 
257 | def clean_campd_emissions(df: pd.DataFrame) -> pd.DataFrame:
258 |     """
259 |     Clean the CAMPD emissions data frame.
260 | 
261 |     Args:
262 |         df (pd.DataFrame):
263 |             CAMPD emissions data frame
264 | 
265 |     Returns:
266 |         df (pd.DataFrame):
267 |             Cleaned CAMPD emissions data frame
268 |     """
269 |     df = clean_column_names(df)
270 |     # fix datetime column data type
271 |     df.date = pd.to_datetime(df.date)
272 |     # fill missing values (emissions seem to be ignored if their value is 0)
273 |     df = df.fillna(0)
274 |     return df
275 | 
276 | 
277 | def load_clean_campd_emissions_df(
278 |     campd_emissions_path: Union[str, Path]
279 | ) -> pd.DataFrame:
280 |     """
281 |     Load and clean the CAMPD emissions data frame.
282 | 
283 |     Args:
284 |         campd_emissions_path (Union[str, Path]):
285 |             Path to CAMPD emissions data
286 | 
287 |     Returns:
288 |         df (pd.DataFrame):
289 |             Cleaned CAMPD emissions data frame
290 |     """
291 |     return load_clean_data_df(
292 |         data_path=campd_emissions_path,
293 |         load_func=load_csv,
294 |         clean_func=clean_campd_emissions,
295 |     )
296 | 
297 | 
298 | def load_osm_data(
299 |     country: str = "United States", tag: str = "man_made", value: str = "cooling_tower"
300 | ) -> gpd.GeoDataFrame:
301 |     """
302 |     Load OSM data.
303 | 
304 |     Args:
305 |         country (str):
306 |             Country to filter to
307 |         tag (str):
308 |             OSM tag to filter to
309 |         value (str):
310 |             OSM value to filter to
311 | 
312 |     Returns:
313 |         gdf (gpd.GeoDataFrame):
314 |             OSM cooling towers data frame
315 |     """
316 |     # load the data
317 |     osm_results = OSM_API.query(
318 |         query=f"""
319 |         area[name="{country}"]->.searchArea;
320 |         (
321 |         node["{tag}"="{value}"](area.searchArea);
322 |         way["{tag}"="{value}"](area.searchArea);
323 |         relation["{tag}"="{value}"](area.searchArea);
324 |         );
325 |         out body;
326 |         >;
327 |         out skel qt;
328 |         """
329 |     )
330 |     df = pd.DataFrame(
331 |         [
332 |             {
333 |                 "osm_id": element.id,
334 |                 "latitude": element.lat,
335 |                 "longitude": element.lon,
336 |             }
337 |             for element in osm_results.nodes
338 |         ]
339 |     )
340 |     # convert to geodataframe
341 |     gdf = gpd.GeoDataFrame(
342 |         df,
343 |         geometry=gpd.points_from_xy(df.longitude, df.latitude),
344 |         crs="EPSG:4326",
345 |     )
346 |     return gdf
347 | 
348 | 
349 | def filter_to_cooling_tower_plants(
350 |     gdf: gpd.GeoDataFrame,
351 |     campd_facilities_path: Union[str, Path],
352 | ) -> gpd.GeoDataFrame:
353 |     """
354 |     Filter data to plants with cooling towers.
355 | 
356 |     Args:
357 |         gdf (gpd.GeoDataFrame):
358 |             Data to be filtered
359 |         campd_facilities_path (Union[str, Path]):
360 |             Path to CAMPD facilities data
361 | 
362 |     Returns:
363 |         gdf (gpd.GeoDataFrame):
364 |             Filtered data
365 |     """
366 |     # load the CAMPD facilities data
367 |     campd_facilities_gdf = load_clean_campd_facilities_gdf(campd_facilities_path)
368 |     # load the OSM data
369 |     osm_gdf = load_osm_data()
370 |     # spatial join
371 |     campd_ndt_gdf = gpd.sjoin_nearest(
372 |         campd_facilities_gdf,
373 |         osm_gdf,
374 |         how="inner",
375 |         distance_col="distances",
376 |         max_distance=0.01,
377 |     )
378 |     # filter to plants with cooling towers
379 |     gdf = gdf[gdf.facility_id.isin(campd_ndt_gdf.facility_id)]
380 |     return gdf
381 | 
382 | 
383 | def clean_image_metadata(df: pd.DataFrame, cog_type: str = "visual") -> pd.DataFrame:
384 |     """
385 |     Clean the image metadata data frame.
386 | 
387 |     Args:
388 |         df (pd.DataFrame):
389 |             Image metadata data frame
390 |         cog_type (str):
391 |             Type of COG to filter to. If "all", no filtering is done.
392 | 
393 |     Returns:
394 |         df (pd.DataFrame):
395 |             Cleaned image metadata data frame
396 |     """
397 |     df = clean_column_names(df)
398 |     # fix datetime column data type
399 |     df.ts = pd.to_datetime(df.ts)
400 |     # filter to most relevant columns
401 |     if cog_type != "all":
402 |         df.rename(columns={cog_type: "cog_url"}, inplace=True)
403 |         df = df[["facility_id", "ts", "cloud_cover", "cog_url"]]
404 |     else:
405 |         df = df[
406 |             [
407 |                 "facility_id",
408 |                 "ts",
409 |                 "cloud_cover",
410 |                 "visual",
411 |             ]
412 |             + ALL_BANDS
413 |         ]
414 |     return df
415 | 
416 | 
417 | def load_clean_image_metadata_df(
418 |     image_metadata_path: Union[str, Path], cog_type: str = "visual"
419 | ) -> pd.DataFrame:
420 |     """
421 |     Load and clean the image metadata data frame.
422 | 
423 |     Args:
424 |         image_metadata_path (Union[str, Path]):
425 |             Path to image metadata data
426 |         cog_type (str):
427 |             Type of COG to filter to
428 | 
429 |     Returns:
430 |         df (pd.DataFrame):
431 |             Cleaned image metadata data frame
432 |     """
433 |     return load_clean_data_df(
434 |         data_path=image_metadata_path,
435 |         load_func=load_csv,
436 |         clean_func=lambda df: clean_image_metadata(df, cog_type=cog_type),
437 |     )
438 | 
439 | 
440 | def get_final_dataset(
441 |     image_metadata_path: Union[str, Path],
442 |     campd_facilities_path: Union[str, Path],
443 |     campd_emissions_path: Union[str, Path],
444 |     cog_type: str = "visual",
445 | ) -> gpd.GeoDataFrame:
446 |     """
447 |     Get the final dataset that has the facility and image metadata, as well as
448 |     the emissions data that we'll train models on.
449 | 
450 |     Args:
451 |         image_metadata_path (Union[str, Path]):
452 |             Path to image metadata data
453 |         campd_facilities_path (Union[str, Path]):
454 |             Path to CAMPD facilities data
455 |         campd_emissions_path (Union[str, Path]):
456 |             Path to CAMPD emissions data
457 |         cog_type (str):
458 |             Type of COG to filter to. If "all", no filtering is done.
459 | 
460 |     Returns:
461 |         gdf (gpd.GeoDataFrame):
462 |             Final dataset that has the facility and image metadata, as well as
463 |             the emissions data that we'll train models on
464 |     """
465 |     # load all data
466 |     image_metadata_df = load_clean_image_metadata_df(
467 |         image_metadata_path=image_metadata_path, cog_type=cog_type
468 |     )
469 |     campd_facilities_gdf = load_clean_campd_facilities_gdf(
470 |         campd_facilities_path=campd_facilities_path
471 |     )
472 |     campd_facilities_gdf = create_aoi_for_plants(campd_facilities_gdf)
473 |     campd_emissions_df = load_clean_campd_emissions_df(
474 |         campd_emissions_path=campd_emissions_path
475 |     )
476 |     # remove the hour info from the date so as to join by day of the year
477 |     image_metadata_df["date_without_time"] = image_metadata_df["ts"].dt.date
478 |     campd_emissions_df["date_without_time"] = campd_emissions_df["date"].dt.date
479 |     # merge the emissions with image metadata
480 |     merged_df = pd.merge(
481 |         left=campd_emissions_df,
482 |         right=image_metadata_df,
483 |         how="inner",
484 |         on=["facility_id", "date_without_time"],
485 |     )
486 |     # merge the facilities with the merged emissions and image metadata
487 |     merged_df = pd.merge(
488 |         left=merged_df,
489 |         right=campd_facilities_gdf,
490 |         how="inner",
491 |         on="facility_id",
492 |         suffixes=("", "_to_delete"),
493 |     )
494 |     # filter to the columns that we care about for model training
495 |     if cog_type != "all":
496 |         final_columns = MAIN_COLUMNS + ["cog_url"]
497 |     else:
498 |         final_columns = MAIN_COLUMNS + ["visual"] + ALL_BANDS
499 |     merged_df = merged_df[final_columns]
500 |     merged_df.drop_duplicates(["facility_id", "ts"], inplace=True)
501 |     # make sure that it's in geopandas format
502 |     merged_df = gpd.GeoDataFrame(
503 |         merged_df,
504 |         geometry=merged_df.geometry,
505 |         crs=f"EPSG:{GLOBAL_EPSG}",
506 |     )
507 |     return merged_df
508 | 
509 | 
510 | def clean_final_dataset(df: pd.DataFrame) -> gpd.GeoDataFrame:
511 |     """
512 |     Clean the final dataset that has the facility and image metadata, as well as
513 |     the emissions data that we'll train models on.
514 | 
515 |     Args:
516 |         df (pd.DataFrame):
517 |             Final dataset that has the facility and image metadata, as well as
518 |             the emissions data that we'll train models on
519 | 
520 |     Returns:
521 |         gdf (gpd.GeoDataFrame):
522 |             Cleaned final dataset that has the facility and image metadata, as
523 |             well as the emissions data that we'll train models on
524 |     """
525 |     # fix datetime column data type
526 |     df.ts = pd.to_datetime(df.ts)
527 |     # fix geometry column data type
528 |     df.geometry = gpd.GeoSeries.from_wkt(df.geometry)
529 |     gdf = gpd.GeoDataFrame(df, geometry=df.geometry, crs=f"EPSG:{GLOBAL_EPSG}")
530 |     return gdf
531 | 
532 | 
533 | def load_final_dataset(final_dataset_path: Union[str, Path]) -> gpd.GeoDataFrame:
534 |     """
535 |     Load the final dataset that has the facility and image metadata, as well as
536 |     the emissions data that we'll train models on.
537 | 
538 |     Args:
539 |         final_dataset_path (Union[str, Path]):
540 |             Path to the final dataset
541 | 
542 |     Returns:
543 |         gdf (gpd.GeoDataFrame):
544 |             Final dataset that has the facility and image metadata, as well as
545 |             the emissions data that we'll train models on
546 |     """
547 |     return load_clean_data_df(
548 |         data_path=final_dataset_path,
549 |         load_func=load_csv,
550 |         clean_func=clean_final_dataset,
551 |     )
552 | 


--------------------------------------------------------------------------------
/src/coal_emissions_monitoring/data_viz.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | import numpy as np
 3 | import torch
 4 | from plotly.graph_objs import Figure
 5 | import plotly.express as px
 6 | 
 7 | 
 8 | def view_satellite_image(image: Union[np.ndarray, torch.Tensor]) -> Figure:
 9 |     """
10 |     View a satellite image using plotly
11 | 
12 |     Args:
13 |         image (np.ndarray):
14 |             The satellite image
15 | 
16 |     Returns:
17 |         Figure:
18 |             The plotly figure
19 |     """
20 |     if isinstance(image, torch.Tensor):
21 |         image = image.numpy()
22 |     fig = px.imshow(image.transpose(1, 2, 0), zmin=0, zmax=255)
23 |     # remove padding
24 |     fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
25 |     return fig
26 | 


--------------------------------------------------------------------------------
/src/coal_emissions_monitoring/dataset.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from typing import Optional, Union
  3 | from loguru import logger
  4 | import warnings
  5 | import numpy as np
  6 | import torch
  7 | from torch.utils.data import IterableDataset, DataLoader
  8 | from lightning import LightningDataModule
  9 | import geopandas as gpd
 10 | from tqdm.auto import tqdm
 11 | 
 12 | # surpress batch size warning
 13 | warnings.filterwarnings(
 14 |     "ignore", message="Trying to infer the `batch_size` from an ambiguous*"
 15 | )
 16 | 
 17 | from coal_emissions_monitoring.constants import (
 18 |     BATCH_SIZE,
 19 |     CROP_SIZE_PX,
 20 |     EMISSIONS_TARGET,
 21 |     MAIN_COLUMNS,
 22 |     IMAGE_SIZE_PX,
 23 |     MAX_BRIGHT_MEAN,
 24 |     MAX_CLOUD_COVER_PRCT,
 25 |     MAX_DARK_FRAC,
 26 |     TEST_YEAR,
 27 |     TRAIN_VAL_RATIO,
 28 | )
 29 | from coal_emissions_monitoring.satellite_imagery import (
 30 |     fetch_image_path_from_cog,
 31 |     get_image_from_cog,
 32 |     is_image_too_bright,
 33 |     is_image_too_dark,
 34 | )
 35 | from coal_emissions_monitoring.data_cleaning import (
 36 |     filter_to_cooling_tower_plants,
 37 |     get_final_dataset,
 38 |     load_final_dataset,
 39 | )
 40 | from coal_emissions_monitoring.ml_utils import (
 41 |     emissions_to_category,
 42 |     get_facility_set_mapper,
 43 |     split_data_in_sets,
 44 | )
 45 | from coal_emissions_monitoring.transforms import get_transform
 46 | 
 47 | 
 48 | class CoalEmissionsDataset(IterableDataset):
 49 |     def __init__(
 50 |         self,
 51 |         gdf: gpd.GeoDataFrame,
 52 |         target: str = EMISSIONS_TARGET,
 53 |         image_size: int = IMAGE_SIZE_PX,
 54 |         max_dark_frac: float = MAX_DARK_FRAC,
 55 |         max_mean_val: float = MAX_BRIGHT_MEAN,
 56 |         transforms: Optional[torch.nn.Module] = None,
 57 |         use_local_images: bool = False,
 58 |     ):
 59 |         """
 60 |         Dataset that gets images of coal power plants, their emissions
 61 |         and metadata.
 62 | 
 63 |         Args:
 64 |             gdf (gpd.GeoDataFrame):
 65 |                 A GeoDataFrame with the following columns:
 66 |                 - facility_id
 67 |                 - latitude
 68 |                 - longitude
 69 |                 - ts
 70 |                 - is_powered_on
 71 |                 - cloud_cover
 72 |                 - cog_url
 73 |                 - geometry
 74 |             target (str):
 75 |                 The target column to predict
 76 |             image_size (int):
 77 |                 The size of the image in pixels
 78 |             max_dark_frac (float):
 79 |                 The maximum fraction of dark pixels allowed for an image;
 80 |                 if the image has more dark pixels than this, it is skipped
 81 |             max_mean_val (float):
 82 |                 The maximum mean value allowed for an image; if the image
 83 |                 has a higher mean value than this, it is skipped
 84 |             transforms (Optional[torch.nn.Module]):
 85 |                 A PyTorch module that transforms the image
 86 |             use_local_images (bool):
 87 |                 Whether to use local images instead of downloading them
 88 |                 from the cloud
 89 |         """
 90 |         assert len(set(MAIN_COLUMNS) - set(gdf.columns)) == 0, (
 91 |             "gdf must have all columns of the following list:\n"
 92 |             f"{MAIN_COLUMNS}\n"
 93 |             f"Instead, gdf has the following columns:\n"
 94 |             f"{gdf.columns}"
 95 |         )
 96 |         self.gdf = gdf
 97 |         self.target = target
 98 |         self.image_size = image_size
 99 |         self.max_dark_frac = max_dark_frac
100 |         self.max_mean_val = max_mean_val
101 |         self.transforms = transforms
102 |         self.use_local_images = use_local_images
103 |         if self.use_local_images:
104 |             assert "local_image_path" in self.gdf.columns, (
105 |                 "If use_local_images is True, gdf must have a "
106 |                 "local_image_path column"
107 |             )
108 | 
109 |     def __iter__(self):
110 |         if torch.utils.data.get_worker_info():
111 |             worker_total_num = torch.utils.data.get_worker_info().num_workers
112 |             worker_id = torch.utils.data.get_worker_info().id
113 |         else:
114 |             worker_total_num = 1
115 |             worker_id = 0
116 |         for idx in range(worker_id, len(self.gdf), worker_total_num):
117 |             row = self.gdf.iloc[idx]
118 |             if self.use_local_images:
119 |                 try:
120 |                     image = np.load(row.local_image_path)
121 |                 except TypeError as e:
122 |                     logger.warning(
123 |                         f"Could not load local image at {row.local_image_path}. "
124 |                         f"Original error: {e}"
125 |                     )
126 |                     continue
127 |             else:
128 |                 image = get_image_from_cog(
129 |                     cog_url=row.cog_url, geometry=row.geometry, size=self.image_size
130 |                 )
131 |             image = torch.from_numpy(image).float()
132 |             if is_image_too_dark(
133 |                 image, max_dark_frac=self.max_dark_frac
134 |             ) or is_image_too_bright(image, max_mean_val=self.max_mean_val):
135 |                 continue
136 |             if self.transforms is not None:
137 |                 try:
138 |                     image = self.transforms(image).squeeze(0)
139 |                 except AssertionError as e:
140 |                     logger.warning(
141 |                         f"Could not transform image at {row.local_image_path}. "
142 |                         f"Original error: {e}"
143 |                     )
144 |                     continue
145 | 
146 |             target = torch.tensor(row[self.target]).float()
147 |             metadata = row.drop([self.target, "geometry", "data_set"]).to_dict()
148 |             metadata["ts"] = str(metadata["ts"])
149 |             yield {
150 |                 "image": image,
151 |                 "target": target,
152 |                 "metadata": metadata,
153 |             }
154 | 
155 | 
156 | class CoalEmissionsDataModule(LightningDataModule):
157 |     def __init__(
158 |         self,
159 |         final_dataset_path: Optional[Union[str, Path]] = None,
160 |         image_metadata_path: Optional[Union[str, Path]] = None,
161 |         campd_facilities_path: Optional[Union[str, Path]] = None,
162 |         campd_emissions_path: Optional[Union[str, Path]] = None,
163 |         target: str = EMISSIONS_TARGET,
164 |         image_size: int = IMAGE_SIZE_PX,
165 |         crop_size: int = CROP_SIZE_PX,
166 |         train_val_ratio: float = TRAIN_VAL_RATIO,
167 |         test_year: int = TEST_YEAR,
168 |         batch_size: int = BATCH_SIZE,
169 |         max_dark_frac: float = MAX_DARK_FRAC,
170 |         max_mean_val: float = MAX_BRIGHT_MEAN,
171 |         max_cloud_cover_prct: int = MAX_CLOUD_COVER_PRCT,
172 |         predownload_images: bool = False,
173 |         download_missing_images: bool = False,
174 |         images_dir: str = "images/",
175 |         num_workers: int = 0,
176 |     ):
177 |         """
178 |         Lightning Data Module that gets images of coal power plants,
179 |         their emissions and metadata, and splits them into train,
180 |         validation and test sets.
181 | 
182 |         Args:
183 |             image_metadata_path (Union[str, Path]):
184 |                 Path to image metadata data
185 |             campd_facilities_path (Union[str, Path]):
186 |                 Path to CAMPD facilities data
187 |             campd_emissions_path (Union[str, Path]):
188 |                 Path to CAMPD emissions data
189 |             target (str):
190 |                 The target column to predict
191 |             image_size (int):
192 |                 The size of the image in pixels
193 |             crop_size (int):
194 |                 The size of the crop in pixels
195 |             train_val_ratio (float):
196 |                 The ratio of train to validation data
197 |             test_year (int):
198 |                 The year to use for testing
199 |             batch_size (int):
200 |                 The batch size, i.e. the number of samples to load at once
201 |             max_dark_frac (float):
202 |                 The maximum fraction of dark pixels allowed for an image;
203 |                 if the image has more dark pixels than this, it is skipped
204 |             max_mean_val (float):
205 |                 The maximum mean value allowed for an image; if the image
206 |                 has a higher mean value than this, it is skipped
207 |             max_cloud_cover_prct (int):
208 |                 The maximum cloud cover percentage allowed for an image;
209 |                 if the image has more cloud cover than this, it is skipped
210 |             predownload_images (bool):
211 |                 Whether to pre-download images from the cloud or load each
212 |                 one on the fly
213 |             download_missing_images (bool):
214 |                 Whether to download images that are missing from the
215 |                 images_dir path
216 |             images_dir (str):
217 |                 The directory to save images to if predownload_images is True
218 |             num_workers (int):
219 |                 The number of workers to use for loading data
220 |         """
221 |         super().__init__()
222 |         self.final_dataset_path = final_dataset_path
223 |         self.image_metadata_path = image_metadata_path
224 |         self.campd_facilities_path = campd_facilities_path
225 |         self.campd_emissions_path = campd_emissions_path
226 |         self.target = target
227 |         self.image_size = image_size
228 |         self.crop_size = crop_size
229 |         self.train_val_ratio = train_val_ratio
230 |         self.test_year = test_year
231 |         self.batch_size = batch_size
232 |         self.max_dark_frac = max_dark_frac
233 |         self.max_mean_val = max_mean_val
234 |         self.max_cloud_cover_prct = max_cloud_cover_prct
235 |         self.predownload_images = predownload_images
236 |         self.download_missing_images = download_missing_images
237 |         self.images_dir = images_dir
238 |         self.num_workers = num_workers
239 |         self.emissions_quantiles = None
240 | 
241 |     def setup(self, stage: str):
242 |         """
243 |         Split the data into train, validation and test sets.
244 | 
245 |         Args:
246 |             stage (str):
247 |                 The stage of the setup
248 |         """
249 |         # load the final dataset
250 |         if self.final_dataset_path is not None:
251 |             self.gdf = load_final_dataset(self.final_dataset_path)
252 |         else:
253 |             self.gdf = get_final_dataset(
254 |                 image_metadata_path=self.image_metadata_path,
255 |                 campd_facilities_path=self.campd_facilities_path,
256 |                 campd_emissions_path=self.campd_emissions_path,
257 |             )
258 |         # filter out rows with too much cloud cover
259 |         self.gdf = self.gdf[self.gdf.cloud_cover <= self.max_cloud_cover_prct]
260 |         if self.predownload_images:
261 |             # make sure that images are already downloaded
262 |             if "local_image_path" not in self.gdf.columns:
263 |                 tqdm.pandas(desc="Downloading images")
264 |                 self.gdf["local_image_path"] = self.gdf.progress_apply(
265 |                     lambda row: fetch_image_path_from_cog(
266 |                         cog_url=row.cog_url,
267 |                         geometry=row.geometry,
268 |                         size=self.image_size,
269 |                         images_dir=self.images_dir,
270 |                         download_missing_images=self.download_missing_images,
271 |                     ),
272 |                     axis=1,
273 |                 )
274 |                 # skip rows where the image could not be downloaded
275 |                 self.gdf = self.gdf[~self.gdf.local_image_path.isna()]
276 |             else:
277 |                 # make sure that the image paths are in the right directory
278 |                 current_image_path = (
279 |                     self.gdf.local_image_path.str.split("/")
280 |                     .str[:-1]
281 |                     .str.join("/")
282 |                     .iloc[0]
283 |                 )
284 |                 if current_image_path != self.images_dir:
285 |                     self.gdf.local_image_path = self.gdf.local_image_path.str.replace(
286 |                         current_image_path, self.images_dir
287 |                     )
288 |         # split the data into train, validation and test sets
289 |         facility_set_mapper = get_facility_set_mapper(
290 |             self.gdf,
291 |             train_val_ratio=self.train_val_ratio,
292 |         )
293 |         self.gdf["data_set"] = self.gdf.apply(
294 |             lambda row: split_data_in_sets(
295 |                 row=row, data_set_mapper=facility_set_mapper, test_year=self.test_year
296 |             ),
297 |             axis=1,
298 |         )
299 |         self.pos_weight = self.get_pos_weight(self.gdf)
300 |         if stage == "fit":
301 |             self.train_dataset = CoalEmissionsDataset(
302 |                 gdf=self.gdf[self.gdf.data_set == "train"].sample(frac=1),
303 |                 target=self.target,
304 |                 image_size=self.image_size,
305 |                 transforms=get_transform(data_group="train", crop_size=self.crop_size),
306 |                 use_local_images=self.predownload_images,
307 |                 max_dark_frac=self.max_dark_frac,
308 |                 max_mean_val=self.max_mean_val,
309 |             )
310 |             self.val_dataset = CoalEmissionsDataset(
311 |                 gdf=self.gdf[self.gdf.data_set == "val"].sample(frac=1),
312 |                 target=self.target,
313 |                 image_size=self.image_size,
314 |                 transforms=get_transform(data_group="val", crop_size=self.crop_size),
315 |                 use_local_images=self.predownload_images,
316 |                 max_dark_frac=self.max_dark_frac,
317 |                 max_mean_val=self.max_mean_val,
318 |             )
319 |         elif stage == "test":
320 |             self.test_dataset = CoalEmissionsDataset(
321 |                 gdf=self.gdf[self.gdf.data_set == "test"].sample(frac=1),
322 |                 target=self.target,
323 |                 image_size=self.image_size,
324 |                 transforms=get_transform(data_group="test", crop_size=self.crop_size),
325 |                 use_local_images=self.predownload_images,
326 |                 max_dark_frac=self.max_dark_frac,
327 |                 max_mean_val=self.max_mean_val,
328 |             )
329 | 
330 |     def get_dataloader(self, data_group: str):
331 |         # reshuffle the dataset
332 |         getattr(self, f"{data_group}_dataset").gdf = getattr(
333 |             self, f"{data_group}_dataset"
334 |         ).gdf.sample(frac=1)
335 |         # reset the dataloader
336 |         return DataLoader(
337 |             getattr(self, f"{data_group}_dataset"),
338 |             batch_size=self.batch_size,
339 |             num_workers=self.num_workers,
340 |             pin_memory=True if torch.cuda.is_available() else False,
341 |         )
342 | 
343 |     def train_dataloader(self):
344 |         return self.get_dataloader("train")
345 | 
346 |     def val_dataloader(self):
347 |         return self.get_dataloader("val")
348 | 
349 |     def test_dataloader(self):
350 |         return self.get_dataloader("test")
351 | 
352 |     def get_pos_weight(self, gdf: Optional[gpd.GeoDataFrame] = None) -> float:
353 |         """
354 |         Get the positive weight for the dataset, based on class imbalance.
355 | 
356 |         Args:
357 |             gdf (Optional[gpd.GeoDataFrame]):
358 |                 The dataset to use for calculating the positive weight.
359 |                 If None, the dataset used for training will be used.
360 | 
361 |         Returns:
362 |             float:
363 |                 The positive weight
364 |         """
365 |         if gdf is None:
366 |             gdf = self.gdf
367 |         num_positives = gdf[self.target].sum()
368 |         num_negatives = len(gdf) - num_positives
369 |         return num_negatives / num_positives
370 | 


--------------------------------------------------------------------------------
/src/coal_emissions_monitoring/ml_utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Tuple
  2 | import geopandas as gpd
  3 | import numpy as np
  4 | import pandas as pd
  5 | import torch
  6 | 
  7 | from coal_emissions_monitoring.data_cleaning import load_clean_campd_facilities_gdf
  8 | from coal_emissions_monitoring.constants import TEST_YEAR, TRAIN_VAL_RATIO
  9 | 
 10 | 
 11 | def get_facility_set_mapper(
 12 |     gdf: gpd.GeoDataFrame, train_val_ratio: float = TRAIN_VAL_RATIO
 13 | ) -> Dict[int, str]:
 14 |     """
 15 |     Get a mapper from facility ID to a set of train or validation.
 16 | 
 17 |     Args:
 18 |         gdf (gpd.GeoDataFrame):
 19 |             The gdf containing the facility IDs
 20 |         train_val_ratio (float):
 21 |             The ratio of training to validation data
 22 | 
 23 |     Returns:
 24 |         Dict[int, str]:
 25 |             A mapper from facility ID to a set of train or validation
 26 |     """
 27 |     assigned_facilities = set()
 28 |     for facility_id, facility_gdf in gdf.groupby("facility_id"):
 29 |         if facility_id in assigned_facilities:
 30 |             continue
 31 |         # assign a data set to the facility
 32 |         data_set = np.random.choice(
 33 |             ["train", "val"], p=[train_val_ratio, 1 - train_val_ratio]
 34 |         )
 35 |         gdf.loc[gdf.facility_id == facility_id, "data_set"] = data_set
 36 |         assigned_facilities.add(facility_id)
 37 |         # apply the same data set to intersecting facilities
 38 |         other_facilities_gdf = gdf.loc[
 39 |             gdf.facility_id != facility_id, ["facility_id", "geometry"]
 40 |         ]
 41 |         other_facilities_gdf.rename(
 42 |             columns={"facility_id": "intersecting_facility_id"}, inplace=True
 43 |         )
 44 |         intersecting_facilities_gdf = gpd.sjoin(
 45 |             facility_gdf,
 46 |             other_facilities_gdf,
 47 |             how="inner",
 48 |             predicate="intersects",
 49 |         )
 50 |         if intersecting_facilities_gdf.empty:
 51 |             continue
 52 |         else:
 53 |             for intersecting_facility_id in intersecting_facilities_gdf[
 54 |                 "intersecting_facility_id"
 55 |             ].unique():
 56 |                 gdf.loc[
 57 |                     gdf.facility_id == intersecting_facility_id,
 58 |                     "data_set",
 59 |                 ] = data_set
 60 |                 assigned_facilities.add(intersecting_facility_id)
 61 |     # create a mapper from facility ID to a set of train or validation
 62 |     return gdf.groupby("facility_id").data_set.first().to_dict()
 63 | 
 64 | 
 65 | def split_data_in_sets(
 66 |     row: pd.DataFrame, data_set_mapper: Dict[int, str], test_year: int = TEST_YEAR
 67 | ) -> str:
 68 |     """
 69 |     Split the data in sets. This function is meant to be used with pandas.DataFrame.apply.
 70 | 
 71 |     Args:
 72 |         row (pd.DataFrame):
 73 |             The row of the DataFrame
 74 |         data_set_mapper (Dict[int, str]):
 75 |             A mapper from facility ID to a set of train or validation
 76 |         test_year (int):
 77 |             The year to use for testing
 78 | 
 79 |     Returns:
 80 |         str:
 81 |             The data set
 82 |     """
 83 |     if row.ts.year == test_year:
 84 |         data_set = "test"
 85 |     else:
 86 |         data_set = data_set_mapper[row.facility_id]
 87 |     return data_set
 88 | 
 89 | 
 90 | def emissions_to_category(
 91 |     emissions: float, quantiles: Dict[float, float], rescale: bool = False
 92 | ) -> int:
 93 |     """
 94 |     Convert emissions to a category based on quantiles. The quantiles are
 95 |     calculated from the training data. Here's how the categories are defined:
 96 |     - 0: no emissions
 97 |     - 1: low emissions
 98 |     - 2: medium emissions
 99 |     - 3: high emissions
100 |     - 4: very high emissions
101 | 
102 |     Args:
103 |         emissions (float): emissions value
104 |         quantiles (Dict[float, float]): quantiles to use for categorization
105 |         rescale (bool): whether to rescale emissions to the original range,
106 |             using the 99th quantile as the maximum value
107 | 
108 |     Returns:
109 |         int: category
110 |     """
111 |     if rescale:
112 |         emissions = emissions * quantiles[0.99]
113 |     if emissions <= 0:
114 |         return 0
115 |     elif emissions <= quantiles[0.3]:
116 |         return 1
117 |     elif emissions > quantiles[0.3] and emissions <= quantiles[0.6]:
118 |         return 2
119 |     elif emissions > quantiles[0.6] and emissions <= quantiles[0.99]:
120 |         return 3
121 |     else:
122 |         return 4
123 | 
124 | 
125 | def preds_n_targets_to_categories(
126 |     preds: torch.Tensor,
127 |     targets: torch.Tensor,
128 |     quantiles: Dict[float, float],
129 |     rescale: bool = False,
130 | ) -> Tuple[torch.Tensor, torch.Tensor]:
131 |     """
132 |     Convert emissions to a category based on quantiles. The quantiles are
133 |     calculated from the training data. Here's how the categories are defined:
134 |     - 0: no emissions
135 |     - 1: low emissions
136 |     - 2: medium emissions
137 |     - 3: high emissions
138 |     - 4: very high emissions
139 | 
140 |     Args:
141 |         preds (torch.Tensor): emissions predictions
142 |         targets (torch.Tensor): emissions targets
143 |         quantiles (Dict[float, float]): quantiles to use for categorization
144 |         rescale (bool): whether to rescale emissions to the original range,
145 |             using the 99th quantile as the maximum value
146 | 
147 |     Returns:
148 |         Tuple[torch.Tensor, torch.Tensor]: tuple of predictions and targets
149 |     """
150 |     preds_cat = torch.tensor(
151 |         [
152 |             emissions_to_category(y_pred_i, quantiles, rescale=rescale)
153 |             for y_pred_i in preds
154 |         ]
155 |     ).to(preds.device)
156 |     targets_cat = torch.tensor(
157 |         [emissions_to_category(y_i, quantiles, rescale=rescale) for y_i in targets]
158 |     ).to(targets.device)
159 |     return preds_cat, targets_cat
160 | 


--------------------------------------------------------------------------------
/src/coal_emissions_monitoring/model.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict
  2 | from lightning import LightningModule
  3 | import torch
  4 | import torchmetrics
  5 | from sklearn.metrics import balanced_accuracy_score
  6 | import warnings
  7 | 
  8 | from coal_emissions_monitoring.constants import POSITIVE_THRESHOLD
  9 | 
 10 | # surpress balanced accuracy warning
 11 | warnings.filterwarnings("ignore", message="y_pred contains classes not in y_true*")
 12 | 
 13 | 
 14 | class SmallCNN(torch.nn.Module):
 15 |     def __init__(self, num_input_channels: int = 3, num_classes: int = 1):
 16 |         super().__init__()
 17 |         self.num_input_channels = num_input_channels
 18 |         self.num_classes = num_classes
 19 |         # build a simple model with EfficientNet-like blocks, global pooling
 20 |         # and a final linear layer, compatible with images of size 32x32
 21 |         self.model = torch.nn.Sequential(
 22 |             torch.nn.Conv2d(
 23 |                 in_channels=self.num_input_channels,
 24 |                 out_channels=16,
 25 |                 kernel_size=3,
 26 |                 padding=1,
 27 |             ),
 28 |             torch.nn.ReLU(),
 29 |             torch.nn.Conv2d(
 30 |                 in_channels=16,
 31 |                 out_channels=32,
 32 |                 kernel_size=3,
 33 |                 padding=1,
 34 |             ),
 35 |             torch.nn.ReLU(),
 36 |             torch.nn.MaxPool2d(kernel_size=2),
 37 |             torch.nn.Conv2d(
 38 |                 in_channels=32,
 39 |                 out_channels=64,
 40 |                 kernel_size=3,
 41 |                 padding=1,
 42 |             ),
 43 |             torch.nn.ReLU(),
 44 |             torch.nn.Conv2d(
 45 |                 in_channels=64,
 46 |                 out_channels=64,
 47 |                 kernel_size=3,
 48 |                 padding=1,
 49 |             ),
 50 |             torch.nn.ReLU(),
 51 |             torch.nn.MaxPool2d(kernel_size=2),
 52 |             torch.nn.Conv2d(
 53 |                 in_channels=64,
 54 |                 out_channels=128,
 55 |                 kernel_size=3,
 56 |                 padding=1,
 57 |             ),
 58 |             torch.nn.ReLU(),
 59 |             torch.nn.Conv2d(
 60 |                 in_channels=128,
 61 |                 out_channels=128,
 62 |                 kernel_size=3,
 63 |                 padding=1,
 64 |             ),
 65 |             torch.nn.ReLU(),
 66 |             torch.nn.AdaptiveAvgPool2d(output_size=1),
 67 |             torch.nn.Flatten(),
 68 |             torch.nn.Linear(128, self.num_classes),
 69 |         )
 70 | 
 71 |     def forward(self, x):
 72 |         return self.model(x)
 73 | 
 74 | 
 75 | class CoalEmissionsModel(LightningModule):
 76 |     def __init__(
 77 |         self,
 78 |         model: torch.nn.Module,
 79 |         learning_rate: float = 1e-3,
 80 |         pos_weight: float = 1.0,
 81 |     ):
 82 |         super().__init__()
 83 |         self.model = model
 84 |         self.learning_rate = learning_rate
 85 |         self.pos_weight = pos_weight
 86 |         self.loss = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor(self.pos_weight))
 87 | 
 88 |     def forward(self, x):
 89 |         preds = self.model(x).squeeze(-1)
 90 |         return preds
 91 | 
 92 |     def calculate_all_metrics(
 93 |         self, preds: torch.Tensor, targets: torch.Tensor
 94 |     ) -> Dict[str, float]:
 95 |         """
 96 |         Calculate metrics for a batch of predictions and targets.
 97 | 
 98 |         Args:
 99 |             preds (torch.Tensor): predictions
100 |             targets (torch.Tensor): targets
101 | 
102 |         Returns:
103 |             Dict[str, float]: metrics
104 |         """
105 |         metrics = dict()
106 |         # calculate the cross entropy loss
107 |         metrics["loss"] = self.loss(preds, targets)
108 |         # apply sigmoid to the predictions to get a value between 0 and 1
109 |         preds = torch.sigmoid(preds)
110 |         # calculate emissions vs no-emissions accuracy
111 |         metrics["accuracy"] = (
112 |             ((preds > POSITIVE_THRESHOLD) == (targets > 0)).float().mean()
113 |         )
114 |         # calculate balanced accuracy, which accounts for class imbalance
115 |         metrics["balanced_accuracy"] = balanced_accuracy_score(
116 |             y_pred=(preds.cpu() > POSITIVE_THRESHOLD).int(),
117 |             y_true=targets.cpu().int(),
118 |         )
119 |         # calculate recall and precision
120 |         metrics["recall"] = torchmetrics.functional.recall(
121 |             preds=preds,
122 |             target=targets,
123 |             average="macro",
124 |             task="binary",
125 |         )
126 |         metrics["precision"] = torchmetrics.functional.precision(
127 |             preds=preds,
128 |             target=targets,
129 |             average="macro",
130 |             task="binary",
131 |         )
132 |         return metrics
133 | 
134 |     def shared_step(
135 |         self,
136 |         batch: Dict[str, Any],
137 |         batch_idx: int,
138 |         stage: str,
139 |     ):
140 |         if len(batch["image"].shape) == 0:
141 |             # avoid iteration over a 0-d array error
142 |             return dict()
143 |         metrics = dict()
144 |         x, y = batch["image"], batch["target"]
145 |         x, y = x.float().to(self.device), y.float().to(self.device)
146 |         # forward pass (calculate predictions)
147 |         y_pred = self(x)
148 |         # calculate metrics for the current batch
149 |         metrics = self.calculate_all_metrics(preds=y_pred, targets=y)
150 |         metrics = {
151 |             (f"{stage}_{k}" if k != "loss" or stage != "train" else k): v
152 |             for k, v in metrics.items()
153 |         }
154 |         # log metrics
155 |         for k, v in metrics.items():
156 |             if k == "loss":
157 |                 self.log(k, v, on_step=True, prog_bar=True)
158 |             else:
159 |                 self.log(k, v, on_step=False, on_epoch=True, prog_bar=True)
160 |         return metrics
161 | 
162 |     def training_step(self, batch: Dict[str, Any], batch_idx: int):
163 |         return self.shared_step(batch, batch_idx, stage="train")
164 | 
165 |     def validation_step(self, batch: Dict[str, Any], batch_idx: int):
166 |         return self.shared_step(batch, batch_idx, stage="val")
167 | 
168 |     def test_step(self, batch: Dict[str, Any], batch_idx: int):
169 |         return self.shared_step(batch, batch_idx, stage="test")
170 | 
171 |     def configure_optimizers(self):
172 |         optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
173 |         return {
174 |             "optimizer": optimizer,
175 |             "lr_scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau(
176 |                 optimizer, mode="min", factor=0.1, patience=3
177 |             ),
178 |             "monitor": "val_loss",
179 |         }
180 | 


--------------------------------------------------------------------------------
/src/coal_emissions_monitoring/satellite_imagery.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | import os
  3 | from typing import List, Optional, Union
  4 | import backoff
  5 | 
  6 | import geopandas as gpd
  7 | import numpy as np
  8 | import rasterio as rio
  9 | from rasterio.errors import RasterioIOError
 10 | import pandas as pd
 11 | import pystac_client
 12 | from loguru import logger
 13 | from pyproj.aoi import AreaOfInterest
 14 | from pyproj.database import query_utm_crs_info
 15 | from shapely.geometry.base import BaseGeometry
 16 | import torch
 17 | from tqdm.auto import tqdm
 18 | 
 19 | from coal_emissions_monitoring.constants import (
 20 |     ALL_BANDS,
 21 |     AOI_SIZE_METERS,
 22 |     API_URL,
 23 |     COLLECTION,
 24 |     END_DATE,
 25 |     GLOBAL_EPSG,
 26 |     IMAGE_SIZE_PX,
 27 |     MAX_BRIGHT_MEAN,
 28 |     MAX_CLOUD_COVER_PRCT,
 29 |     MAX_DARK_FRAC,
 30 |     START_DATE,
 31 | )
 32 | 
 33 | STAC_CLIENT = pystac_client.Client.open(API_URL)
 34 | 
 35 | 
 36 | def get_epsg_from_coords(latitude: float, longitude: float) -> int:
 37 |     """
 38 |     Get the EPSG code for a specific coordinate
 39 | 
 40 |     Args:
 41 |         latitude (float):
 42 |             The latitude of the coordinate
 43 |         longitude (float):
 44 |             The longitude of the coordinate
 45 | 
 46 |     Returns:
 47 |         int:
 48 |             The EPSG code for the coordinate
 49 |     """
 50 |     crs_info = query_utm_crs_info(
 51 |         datum_name="WGS 84",
 52 |         area_of_interest=AreaOfInterest(
 53 |             west_lon_degree=longitude,
 54 |             south_lat_degree=latitude,
 55 |             east_lon_degree=longitude,
 56 |             north_lat_degree=latitude,
 57 |         ),
 58 |     )
 59 |     return int(crs_info[0].code)
 60 | 
 61 | 
 62 | def create_aoi_for_plants(campd_facilities_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
 63 |     """
 64 |     Create a square area of interest (AOI) for each plant in the CAMPD facilities data.
 65 |     This will later be used to query for satellite imagery.
 66 | 
 67 |     Args:
 68 |         campd_facilities_gdf (gpd.GeoDataFrame):
 69 |             The CAMPD facilities data frame
 70 | 
 71 |     Returns:
 72 |         gpd.GeoDataFrame:
 73 |             A data frame containing the AOIs for each plant
 74 |     """
 75 |     facility_dfs = list()
 76 |     for _, facility_df in tqdm(
 77 |         campd_facilities_gdf.groupby("facility_id"),
 78 |         total=campd_facilities_gdf.facility_id.nunique(),
 79 |         desc="Creating AOIs for plants",
 80 |     ):
 81 |         # identify what is the local CRS for the current facility,
 82 |         # based on its latitude and longitude
 83 |         epsg = get_epsg_from_coords(
 84 |             facility_df.latitude.mean(), facility_df.longitude.mean()
 85 |         )
 86 |         # convert to the local CRS, based on the coordinates
 87 |         facility_df = facility_df.to_crs(epsg=epsg)
 88 |         # buffer the geometry into a square that is ~3.2km on each side
 89 |         facility_df.geometry = facility_df.geometry.buffer(
 90 |             AOI_SIZE_METERS / 2, cap_style=3
 91 |         )
 92 |         # convert back to the global CRS
 93 |         facility_df = facility_df.to_crs(epsg=GLOBAL_EPSG)
 94 |         facility_dfs.append(facility_df)
 95 |     return gpd.GeoDataFrame(pd.concat(facility_dfs, ignore_index=True))
 96 | 
 97 | 
 98 | def get_aws_cog_links_from_geom(
 99 |     geometry: BaseGeometry,
100 |     collection: str = COLLECTION,
101 |     start_date: Optional[datetime] = START_DATE,
102 |     end_date: Optional[datetime] = END_DATE,
103 |     max_cloud_cover_prct: Optional[int] = MAX_CLOUD_COVER_PRCT,
104 |     sort_by: str = "updated",
105 |     max_items: Optional[int] = None,
106 |     verbose: bool = True,
107 | ) -> pd.DataFrame:
108 |     """
109 |     Retrieve links from AWS' Sentinel 2 L2A STAC
110 | 
111 |     Args:
112 |         geometry (BaseGeometry):
113 |             The geometry to query for images that
114 |             contain it in STAC
115 |         collection (str):
116 |             The STAC collection to query
117 |         start_date (Optional[datetime]):
118 |             Optional start date to filter images on
119 |         end_date (Optional[datetime]):
120 |             Optional end date to filter images on
121 |         max_cloud_cover_prct (Optional[int]):
122 |             Optional maximum cloud cover to filter
123 |             images that are too cloudy. Expressed
124 |             as a percentage, e.g. 1 = 1%
125 |         sort_by (str):
126 |             Which property to sort the results by,
127 |             in descending order; needs to be a valid
128 |             property in the STAC collection
129 |         max_items (Optional[int]):
130 |             Optional maximum number of items to
131 |             return
132 |         verbose (bool):
133 |             Whether to print the progress of the
134 |             query
135 | 
136 |     Returns:
137 |         pd.DataFrame:
138 |             A dataframe containing the ID of the tile and
139 |             the links to its COGs and metadata
140 |     """
141 |     # get the bounding box from the geometry
142 |     bbox = geometry.bounds
143 |     # specify the cloud filter
144 |     if max_cloud_cover_prct == 0:
145 |         cloud_filter = "eo:cloud_cover=0"
146 |     elif max_cloud_cover_prct is not None:
147 |         cloud_filter = f"eo:cloud_cover<={max_cloud_cover_prct}"
148 |     # query the STAC collection(s) in a specific bounding box and search criteria
149 |     search = STAC_CLIENT.search(
150 |         collections=[collection],
151 |         bbox=bbox,
152 |         datetime=f"{start_date.strftime('%Y-%m-%d')}/{end_date.strftime('%Y-%m-%d')}",
153 |         query=[cloud_filter] if max_cloud_cover_prct is not None else None,
154 |     )
155 |     if verbose:
156 |         logger.info(f"Found {search.matched()} items matching the search criteria")
157 |     items = search.get_all_items()
158 |     if max_cloud_cover_prct is not None and collection == "sentinel-s2-l2a-cogs":
159 |         # some items had invalid cloud cover data and turned out very cloudy; only works for L2A
160 |         items_valid_cloud_filter = [
161 |             x for x in items if x.properties["sentinel:valid_cloud_cover"] == True
162 |         ]
163 |         if verbose:
164 |             logger.info(
165 |                 f"Removed {len(items) - len(items_valid_cloud_filter)} items for invalid cloud filters"
166 |             )
167 |         items = items_valid_cloud_filter
168 |     items = sorted(items, key=lambda x: x.properties[sort_by], reverse=True)
169 |     if max_items is not None:
170 |         items = items[:max_items]
171 |     # create a dictionary that contains the tile ID and the links to the COGs and metadata
172 |     output = dict(tile_id=[item.id for item in items])
173 |     if len(items) == 0:
174 |         return None
175 |     asset_keys = items[0].assets.keys()
176 |     for key in asset_keys:
177 |         output[key] = [item.assets[key].href for item in items]
178 |     output["cloud_cover"] = [item.properties["eo:cloud_cover"] for item in items]
179 |     output[sort_by] = [item.properties[sort_by] for item in items]
180 |     output["ts"] = [item.properties["datetime"] for item in items]
181 |     output = pd.DataFrame(output)
182 |     output["ts"] = pd.to_datetime(output["ts"])
183 |     output.drop_duplicates(subset="ts", keep="first", inplace=True)
184 |     output.sort_values("ts", inplace=True)
185 |     return output
186 | 
187 | 
188 | def get_image_metadata_for_plants(
189 |     plant_aoi_gdf: gpd.GeoDataFrame,
190 |     collection: str = COLLECTION,
191 |     start_date: datetime = START_DATE,
192 |     end_date: datetime = END_DATE,
193 |     max_cloud_cover_prct: int = MAX_CLOUD_COVER_PRCT,
194 |     sort_by: str = "updated",
195 | ) -> pd.DataFrame:
196 |     """
197 |     Get the metadata for the satellite images for each plant,
198 |     based on the AOI defined for each plant (see create_aoi_for_plants)
199 | 
200 |     Args:
201 |         plant_aoi_gdf (gpd.GeoDataFrame):
202 |             The data frame containing the AOIs for each plant
203 |         collection (str):
204 |             The STAC collection to query
205 |         start_date (Optional[datetime]):
206 |             Start date to filter images on
207 |         end_date (Optional[datetime]):
208 |             End date to filter images on
209 |         max_cloud_cover_prct (Optional[int]):
210 |             Maximum cloud cover to filter
211 |             images that are too cloudy. Expressed
212 |             as a percentage, e.g. 1 = 1%
213 |         sort_by (str):
214 |             Which property to sort the results by,
215 |             in descending order; needs to be a valid
216 |             property in the STAC collection
217 | 
218 |     Returns:
219 |         pd.DataFrame:
220 |             A dataframe containing the ID of the tile and
221 |             the links to its COGs and metadata
222 |     """
223 |     image_metadata_dfs = list()
224 |     for facility_id, geometry in tqdm(
225 |         plant_aoi_gdf.groupby("facility_id").geometry.first().items(),
226 |         total=plant_aoi_gdf.facility_id.nunique(),
227 |         desc="Querying STAC API",
228 |     ):
229 |         stac_results_df = get_aws_cog_links_from_geom(
230 |             geometry=geometry,
231 |             collection=collection,
232 |             start_date=start_date,
233 |             end_date=end_date,
234 |             max_cloud_cover_prct=max_cloud_cover_prct,
235 |             sort_by=sort_by,
236 |             verbose=False,
237 |         )
238 |         stac_results_df["facility_id"] = facility_id
239 |         image_metadata_dfs.append(stac_results_df)
240 |     return pd.concat(image_metadata_dfs, ignore_index=True)
241 | 
242 | 
243 | def pad_or_crop_to_size(image: np.ndarray, size: int = IMAGE_SIZE_PX) -> np.ndarray:
244 |     """
245 |     Pad or crop an image to a specific size
246 | 
247 |     Args:
248 |         image (np.ndarray):
249 |             The image to pad or crop, with dimensions (C, H, W),
250 |             where C is the number of channels, H is the height and
251 |             W is the width
252 |         size (int):
253 |             The size to pad or crop to
254 | 
255 |     Returns:
256 |         np.ndarray:
257 |             The padded or cropped image
258 |     """
259 |     if image.shape[1] > size:
260 |         # crop the image
261 |         image = image[:, :size, :size]
262 |     elif image.shape[1] < size:
263 |         # pad the image
264 |         image = np.pad(
265 |             image,
266 |             ((0, 0), (0, size - image.shape[1]), (0, size - image.shape[2])),
267 |         )
268 |     return image
269 | 
270 | 
271 | @backoff.on_exception(backoff.expo, RasterioIOError, max_tries=3)
272 | def get_image_from_cog(
273 |     cog_url: str, geometry: BaseGeometry, size: int = IMAGE_SIZE_PX
274 | ) -> np.ndarray:
275 |     """
276 |     Get the image from a COG, clipped to the geometry
277 | 
278 |     Args:
279 |         cog_url (str):
280 |             The URL to the COG
281 |         geometry (BaseGeometry):
282 |             The geometry to clip the image to
283 |         size (int):
284 |             The size to pad or crop to
285 | 
286 |     Returns:
287 |         np.ndarray:
288 |             The clipped image
289 |     """
290 |     # load only the bbox of the image
291 |     with rio.open(cog_url) as src:
292 |         # get the bbox converted to the right coordinate reference system (crs);
293 |         # doing all of this because geopandas has the convenient to_crs function
294 |         crs_bbox = (
295 |             gpd.GeoDataFrame(geometry=[geometry], crs=GLOBAL_EPSG)
296 |             .to_crs(src.crs)
297 |             .total_bounds
298 |         )
299 |         # define window in RasterIO
300 |         window = rio.windows.from_bounds(*crs_bbox, transform=src.transform)
301 |         # actual HTTP range request
302 |         image = src.read(window=window)
303 |     # make sure that the image has the shape that we want
304 |     image = pad_or_crop_to_size(image, size=size)
305 |     return image
306 | 
307 | 
308 | def get_all_bands_image(
309 |     cog_urls: List[str],
310 |     geometry: BaseGeometry,
311 |     size: int = IMAGE_SIZE_PX,
312 | ) -> np.ndarray:
313 |     """
314 |     Get an image that stacks all bands for a given row,
315 |     clipped to the geometry.
316 | 
317 |     Args:
318 |         cog_urls (List[str]):
319 |             The URLs to the COGs
320 |         geometry (BaseGeometry):
321 |             The geometry to clip the image to
322 |         size (int):
323 |             The size to pad or crop to
324 | 
325 |     Returns:
326 |         np.ndarray:
327 |             The stacked image
328 |     """
329 |     bands = [
330 |         get_image_from_cog(cog_url=url, geometry=geometry, size=size).squeeze()
331 |         for url in cog_urls
332 |     ]
333 |     return np.stack(bands, axis=0)
334 | 
335 | 
336 | def fetch_image_path_from_cog(
337 |     cog_url: Union[str, List[str]],
338 |     geometry: BaseGeometry,
339 |     size: int = IMAGE_SIZE_PX,
340 |     cog_type: str = "visual",
341 |     images_dir: str = "images/",
342 |     download_missing_images: bool = False,
343 | ) -> Union[str, None]:
344 |     """
345 |     Fetch the image path from a COG; if download_missing_images is True,
346 |     the image will be downloaded if it does not exist.
347 | 
348 |     Args:
349 |         cog_url (Union[str, List[str]]):
350 |             The URL to the COG
351 |         geometry (BaseGeometry):
352 |             The geometry to clip the image to
353 |         size (int):
354 |             The size to pad or crop to
355 |         cog_type (str):
356 |             The type of COG to download. Can be either "visual" or "all".
357 |         images_dir (str):
358 |             The directory to save the image to
359 |         download_missing_images (bool):
360 |             Whether to download the image if it does not exist
361 | 
362 |     Returns:
363 |         Union[str, None]:
364 |             The path to the downloaded image. If the image
365 |             doesn't exist or could not be downloaded, None is returned.
366 |     """
367 |     if cog_type == "all":
368 |         assert isinstance(cog_url, list) and len(cog_url) == len(ALL_BANDS), (
369 |             "If cog_type is 'all', cog_url must be a list "
370 |             f"of length {len(ALL_BANDS)}"
371 |         )
372 |         image_name = "_".join(cog_url[0].split("/")[-2:]).replace(".tif", "")
373 |     else:
374 |         image_name = "_".join(cog_url.split("/")[-2:]).replace(".tif", "")
375 |     lat, lon = geometry.centroid.coords[0]
376 |     patch_name = f"{image_name}_{lat}_{lon}_{size}"
377 |     image_path = os.path.join(images_dir, f"{patch_name}.npy")
378 |     if os.path.exists(image_path):
379 |         # image already exists in the expected location
380 |         return str(image_path)
381 |     else:
382 |         if not download_missing_images:
383 |             # image does not exist and we don't want to download it
384 |             return None
385 |         else:
386 |             # download and save the image
387 |             os.makedirs(images_dir, exist_ok=True)
388 |             try:
389 |                 if cog_type == "visual":
390 |                     image = get_image_from_cog(
391 |                         cog_url=cog_url, geometry=geometry, size=size
392 |                     )
393 |                 elif cog_type == "all":
394 |                     try:
395 |                         image = get_all_bands_image(
396 |                             cog_urls=cog_url, geometry=geometry, size=size
397 |                         )
398 |                     except ValueError as e:
399 |                         logger.warning(
400 |                             f"Failed to download image {cog_url}. Original error:\n{e}"
401 |                         )
402 |                         return None
403 |             except RasterioIOError as e:
404 |                 logger.warning(
405 |                     f"Failed to download image {cog_url}. Original error:\n{e}"
406 |                 )
407 |                 return None
408 |             np.save(image_path, image)
409 |             return str(image_path)
410 | 
411 | 
412 | def is_image_too_dark(
413 |     image: torch.Tensor, max_dark_frac: float = MAX_DARK_FRAC
414 | ) -> bool:
415 |     """
416 |     Check if an image is too dark, based on the fraction of pixels that are
417 |     black or NaN
418 | 
419 |     Args:
420 |         image (torch.Tensor):
421 |             The image to check, with dimensions (C, H, W),
422 |             where C is the number of channels, H is the height and
423 |             W is the width
424 |         max_dark_frac (float):
425 |             The maximum fraction of pixels that can be black or NaN
426 | 
427 |     Returns:
428 |         bool:
429 |             Whether the image is too dark
430 |     """
431 |     dark_frac = ((image <= 1) | (image.isnan())).sum() / image.numel()
432 |     return dark_frac > max_dark_frac
433 | 
434 | 
435 | def is_image_too_bright(image: torch.Tensor, max_mean_val: MAX_BRIGHT_MEAN) -> bool:
436 |     """
437 |     Check if the image is too bright, such as because of clouds or snow, based
438 |     on the mean value of the image
439 | 
440 |     Args:
441 |         image (torch.Tensor):
442 |             The image to check, with dimensions (C, H, W),
443 |             where C is the number of channels, H is the height and
444 |             W is the width
445 |         max_mean_val (float):
446 |             The maximum mean value of the image
447 | 
448 |     Returns:
449 |         bool:
450 |             Whether the image is too bright
451 |     """
452 |     return image.mean() > max_mean_val
453 | 


--------------------------------------------------------------------------------
/src/coal_emissions_monitoring/transforms.py:
--------------------------------------------------------------------------------
 1 | import kornia.augmentation as K
 2 | import torch
 3 | 
 4 | from coal_emissions_monitoring.constants import CROP_SIZE_PX, RANDOM_TRANSFORM_PROB
 5 | 
 6 | 
 7 | def get_transform(
 8 |     data_group: str, crop_size: int = CROP_SIZE_PX
 9 | ) -> K.AugmentationSequential:
10 |     """
11 |     Get the transform for the given data group, i.e. train, val, or test.
12 | 
13 |     Args:
14 |         data_group (str): data group
15 |         crop_size (int): crop size
16 | 
17 |     Returns:
18 |         K.AugmentationSequential: transforms
19 |     """
20 |     if data_group == "train":
21 |         return K.AugmentationSequential(
22 |             K.RandomCrop(size=(crop_size, crop_size)),
23 |             K.RandomHorizontalFlip(p=RANDOM_TRANSFORM_PROB),
24 |             K.RandomRotation(p=RANDOM_TRANSFORM_PROB, degrees=90),
25 |             # TODO this contrast transform is sometimes making the image too dark
26 |             # consider fixing it if needing more regularization
27 |             # K.RandomContrast(p=RANDOM_TRANSFORM_PROB, contrast=(0.9, 1.1)),
28 |             data_keys=["image"],
29 |             same_on_batch=False,
30 |             keepdim=True,
31 |         )
32 |     elif data_group == "val":
33 |         return K.AugmentationSequential(
34 |             K.CenterCrop(size=(crop_size, crop_size)),
35 |             data_keys=["image"],
36 |             same_on_batch=False,
37 |             keepdim=True,
38 |         )
39 |     elif data_group == "test":
40 |         return K.AugmentationSequential(
41 |             K.CenterCrop(size=(crop_size, crop_size)),
42 |             data_keys=["image"],
43 |             same_on_batch=False,
44 |             keepdim=True,
45 |         )
46 |     else:
47 |         raise ValueError(
48 |             f"Invalid data group: {data_group}." "Expected one of: train, val, test."
49 |         )
50 | 
51 | 
52 | efficientnet_transform = K.AugmentationSequential(
53 |     K.Resize(size=(256, 256)),
54 |     K.CenterCrop(size=(224, 224)),
55 |     K.Normalize(
56 |         mean=torch.tensor([0.485, 0.456, 0.406]),
57 |         std=torch.tensor([0.229, 0.224, 0.225]),
58 |     ),
59 |     data_keys=["image"],
60 |     same_on_batch=False,
61 |     keepdim=True,
62 | )
63 | 


--------------------------------------------------------------------------------