├── WaterControlZone_HK_shp.zip
├── MonitoringStation_wgs84_76_shp.zip
├── README.md
├── GEE_TimeSeriesApp.js
├── GEE_6S_AtmosphericParameter.js
├── Part1_ImagePreprocessing.ipynb
├── LocalProcessingPipeline_Part2_NewlyAcquiredImage.ipynb
├── LocalProcessingPipeline_Part1_ArchivedImageDatabase.ipynb
└── Part2_ModelDevelopmentAndPrediction.ipynb


/WaterControlZone_HK_shp.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ivanhykwong/Marine-Water-Quality-Time-Series-HK/HEAD/WaterControlZone_HK_shp.zip


--------------------------------------------------------------------------------
/MonitoringStation_wgs84_76_shp.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ivanhykwong/Marine-Water-Quality-Time-Series-HK/HEAD/MonitoringStation_wgs84_76_shp.zip


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Marine Water Quality Monitoring in Hong Kong - Time Series Estimated from Satellite Images (2015-2021)
 2 | 
 3 | Supplementary materials used in the following projects:
 4 | 
 5 | Journal article:
 6 | 
 7 | **Kwong, I. H. Y., Wong, F. K. K., & Fung, T. (2022). Automatic mapping and monitoring of marine water quality parameters in Hong Kong using Sentinel-2 image time-series and Google Earth Engine cloud computing. *Frontiers in Marine Science, 9*, 871470. doi: 10.3389/fmars.2022.871470**
 8 | 
 9 | - Link to the article: https://www.frontiersin.org/articles/10.3389/fmars.2022.871470/
10 | 
11 | Award-winning ArcGIS Dashboard:
12 | 
13 | **Water Quality Monitoring From Satellite Imagery: A Case Study in Hong Kong. *Common Spatial Data Infrastructure (CSDI) Awards 2024 - Merit (Open Category).* https://csdigeolab.gov.hk/en/past-events/csdi-awards-2024**
14 | 
15 | - Project presentation slides: https://csdigeolab.gov.hk/images/CSDI_Awards_2024/brief/C4-23.pdf
16 | - **Interactive ArcGIS Dashboard: https://www.arcgis.com/apps/dashboards/3b1a7e3a7ea640a1a2b2338cd774520a**
17 | - Video introduction: https://youtu.be/ghYus8dmrP0 (English) / https://youtu.be/mNEPP1INqXE (Cantonese)
18 | 
19 | ---
20 | 
21 | **Online application developed using Google Earth Engine:** https://khoyinivan.users.earthengine.app/view/marine-water-quality-hk
22 | *   Source code of the web application: GEE_TimeSeriesApp.js
23 | 
24 | **Time-series video of the estimated water quality:** https://youtu.be/b2zwPFGDKY8
25 | 
26 | ---
27 | 
28 | **Additional tool to obtain the parameters related to atmospheric constituents required for 6S atmospheric correction, including Water Vapour, Ozone and Aerosol Optical Thickness:** https://khoyinivan.users.earthengine.app/view/atmospheric-constituents-for-6s
29 | *   Source code of the web application: GEE_6S_AtmosphericParameter.js
30 | 
31 | ---
32 | 
33 | **Analysis codes:** (Python API for GEE processing; used to produce the results in the journal article)
34 | 
35 | *   Part1_ImagePreprocessing.ipynb
36 | 
37 | *   Part2_ModelDevelopmentAndPrediction.ipynb
38 | 
39 | **Analysis codes:** (Python processing in local PC; used to produce the results in the ArcGIS Dashboard)
40 | 
41 | *   LocalProcessingPipeline_Part1_ArchivedImageDatabase.ipynb
42 | 
43 | *   LocalProcessingPipeline_Part2_NewlyAcquiredImage.ipynb
44 | 
45 | **GEE application:**
46 | ![khoyinivan users earthengine app_view_marine-water-quality-hk_Apr2022](https://user-images.githubusercontent.com/68047356/161700888-ca8e0ee7-b962-48e5-96da-e224ada1982a.png)
47 | 
48 | **ArcGIS Dashboard:**
49 | ![DashboardScreenshot](https://github.com/user-attachments/assets/04ee70e7-fe94-4495-9bad-4e19b949967f)
50 | 
51 | Other files:
52 | 
53 | *   **MarineQuality_2015-2020.csv**: Marine Water Quality data measured at different Marine Water Quality Monitoring stations in Hong Kong from 2015 to 2020; Gathered from DATA.GOV.HK (https://data.gov.hk/en-data/dataset/hk-epd-marineteam-marine-water-quality-historical-data-en)
54 | 
55 | *   **MonitoringStation_wgs84_76_shp.zip**: Locations of the 76 water quality monitoring stations in the open waters of Hong Kong (ESRI shapefile format); Equivalent to the Feature Collection ("users/khoyinivan/MonitoringStation_wgs84_76") in GEE; Extracted from Appendix A of the Annual Marine Water Quality Report 2019 (https://www.epd.gov.hk/epd/english/environmentinhk/water/hkwqrc/waterquality/marine-2.html)
56 | 
57 | *   **WaterControlZone_HK_shp.zip**: Polygons of the water control zones in Hong Kong (ESRI shapefile format); It was manually digitized from Appendix A of the Annual Marine Water Quality Report 2019 (https://www.epd.gov.hk/epd/english/environmentinhk/water/hkwqrc/waterquality/marine-2.html); Only boundaries in the open water were extracted
58 | 
59 | *Last updated in July 2025*
60 | 


--------------------------------------------------------------------------------
/GEE_TimeSeriesApp.js:
--------------------------------------------------------------------------------
  1 | var aoi = ee.Geometry.Polygon([[[113.800, 22.570],[113.800, 22.120],[114.514, 22.120],[114.514, 22.570]]]);
  2 | 
  3 | var Chla = ee.ImageCollection('users/khoyinivan/S2_Chla_ANN');
  4 | var vis = {palette: ['#2b83ba', '#abdda4', '#ffffbf', '#fdae61', '#d7191c'],
  5 |     min: 0.2, max: 30.0};
  6 | 
  7 | var SS = ee.ImageCollection('users/khoyinivan/S2_SS_ANN');
  8 | var Tur = ee.ImageCollection('users/khoyinivan/S2_Tur_ANN');
  9 | 
 10 | var utils = require('users/gena/packages:utils');
 11 | var text = require('users/gena/packages:text');
 12 | 
 13 | Map.centerObject(aoi, 11);
 14 | var bounds = aoi.bounds();
 15 | 
 16 | //make the data 8-bit which is necessary for making a video
 17 | var Chla_video =  Chla.map(function(image){
 18 |   var label = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd');
 19 |   return image.visualize({
 20 |     forceRgbOutput: true,
 21 |     palette: ['#2b83ba', '#abdda4', '#ffffbf', '#fdae61', '#d7191c'],
 22 |     min: 0.2, max: 30.0
 23 |   }).set({label: label});
 24 | });
 25 | 
 26 | // annotate
 27 | var annotations = [{position: 'left', offset: '1%', margin: '1%', property: 'label', scale: Map.getScale() * 5}];
 28 | 
 29 | Chla_video = Chla_video.map(function(image) {
 30 |   return text.annotateImage(image, {}, bounds, annotations);
 31 | });
 32 | 
 33 | var Chla_list = Chla_video.toList(Chla_video.size());
 34 | 
 35 | 
 36 | /*
 37 |  * Map layer configuration
 38 |  */
 39 | 
 40 | // Create the main map
 41 | var mapPanel = ui.Map();
 42 | 
 43 | 
 44 | /*
 45 |  * Panel setup
 46 |  */
 47 | 
 48 | // Create a panel to hold title, intro text, chart and legend components.
 49 | var inspectorPanel = ui.Panel({style: {width: '30%'}});
 50 | 
 51 | // Create an intro panel with labels.
 52 | var intro = ui.Panel([
 53 |   ui.Label({
 54 |     value: 'Marine Water Quality Inspector - Time Series Estimated From Satellite Image (2015-2021)',
 55 |     style: {fontSize: '20px', fontWeight: 'bold'}
 56 |   }),
 57 |   ui.Label('Refresh the browser if the charts cannot be shown.'),
 58 |   ui.Label('Background of this app can be found in:'),
 59 |   ui.Label('https://github.com/ivanhykwong/Marine-Water-Quality-Time-Series-HK').setUrl('https://github.com/ivanhykwong/Marine-Water-Quality-Time-Series-HK'),
 60 |   ui.Label('Click a location to see its time series of Chlorophyll-a (μg/L).')
 61 | ]);
 62 | inspectorPanel.add(intro);
 63 | 
 64 | // Create panels to hold lon/lat values.
 65 | var lon = ui.Label();
 66 | var lat = ui.Label();
 67 | inspectorPanel.add(ui.Panel([lon, lat], ui.Panel.Layout.flow('horizontal')));
 68 | 
 69 | // Add placeholders for the chart and legend.
 70 | inspectorPanel.add(ui.Label('[Chart]'));
 71 | inspectorPanel.add(ui.Label('[Legend]'));
 72 | inspectorPanel.add(ui.Label('Click a location to see its time series of other indicators.'));
 73 | inspectorPanel.add(ui.Label('[Chart-SS]'));
 74 | inspectorPanel.add(ui.Label('[Chart-TUR]'));
 75 | 
 76 | 
 77 | /*
 78 |  * Chart setup
 79 |  */
 80 | 
 81 | // Generates a new time series chart of SST for the given coordinates.
 82 | var generateChart = function (coords) {
 83 |   // Update the lon/lat panel with values from the click event.
 84 |   lon.setValue('lon: ' + coords.lon.toFixed(2));
 85 |   lat.setValue('lat: ' + coords.lat.toFixed(2));
 86 | 
 87 |   // Add a dot for the point clicked on.
 88 |   var point = ee.Geometry.Point(coords.lon, coords.lat);
 89 |   var dot = ui.Map.Layer(point, {color: '000000'}, 'clicked location');
 90 |   // Add the dot as the second layer, so it shows up on top of the composite.
 91 |   mapPanel.layers().set(1, dot);
 92 | 
 93 |   // Make a chart from the time series.
 94 |   var sstChart = ui.Chart.image.series(Chla, point, ee.Reducer.mean(), 50);
 95 |   
 96 |   // Customize the chart.
 97 |   sstChart.setOptions({
 98 |     title: 'Chlorophyll-a: time series',
 99 |     titleTextStyle: {fontSize: 16},
100 |     vAxis: {title: 'Chlorophyll-a (μg/L)'}, //, viewWindow: { min: 0, max: 30 }
101 |     hAxis: {title: 'Date', format: 'MM-yyyy', gridlines: {count: 7}},
102 |     pointSize: 5,
103 |     legend: {position: 'none'}
104 |   });
105 |   // Add the chart at a fixed position, so that new charts overwrite older ones.
106 |   inspectorPanel.widgets().set(2, sstChart);
107 | };
108 | 
109 | 
110 | /*
111 |  * Legend setup
112 |  */
113 | 
114 | // Creates a color bar thumbnail image for use in legend from the given color
115 | // palette.
116 | function makeColorBarParams(palette) {
117 |   return {
118 |     bbox: [0, 0, 1, 0.1],
119 |     dimensions: '100x10',
120 |     format: 'png',
121 |     min: 0,
122 |     max: 1,
123 |     palette: palette,
124 |   };
125 | }
126 | 
127 | // Create the color bar for the legend.
128 | var colorBar = ui.Thumbnail({
129 |   image: ee.Image.pixelLonLat().select(0),
130 |   params: makeColorBarParams(vis.palette),
131 |   style: {stretch: 'horizontal', margin: '0px 8px', maxHeight: '24px'},
132 | });
133 | 
134 | // Create a panel with three numbers for the legend.
135 | var legendLabels = ui.Panel({
136 |   widgets: [
137 |     ui.Label(vis.min, {margin: '4px 8px'}),
138 |     ui.Label(
139 |         (vis.max / 2),
140 |         {margin: '4px 8px', textAlign: 'center', stretch: 'horizontal'}),
141 |     ui.Label(vis.max, {margin: '4px 8px'})
142 |   ],
143 |   layout: ui.Panel.Layout.flow('horizontal')
144 | });
145 | 
146 | var legendTitle = ui.Label({
147 |   value: 'Map Legend: Chlorophyll-a (μg/L)',
148 |   style: {fontWeight: 'bold'}
149 | });
150 | 
151 | var legendPanel = ui.Panel([legendTitle, colorBar, legendLabels]);
152 | inspectorPanel.widgets().set(3, legendPanel);
153 | 
154 | 
155 | // Generates a new time series chart of SST for the given coordinates.
156 | var generateChart_SS = function (coords) {
157 | 
158 |   // Add a dot for the point clicked on.
159 |   var point = ee.Geometry.Point(coords.lon, coords.lat);
160 | 
161 |   // Make a chart from the time series.
162 |   var sstChart = ui.Chart.image.series(SS, point, ee.Reducer.mean(), 50);
163 | 
164 |   // Customize the chart.
165 |   sstChart.setOptions({
166 |     title: 'Suspended Solids: time series',
167 |     titleTextStyle: {fontSize: 16},
168 |     vAxis: {title: 'Suspended Solids (mg/L)'}, //,viewWindow: { min: 0, max: 30 }
169 |     hAxis: {title: 'Date', format: 'MM-yyyy', gridlines: {count: 7}},
170 |     pointSize: 5,
171 |     legend: {position: 'none'}
172 |   });
173 |   // Add the chart at a fixed position, so that new charts overwrite older ones.
174 |   inspectorPanel.widgets().set(5, sstChart);
175 | };
176 | 
177 | 
178 | // Generates a new time series chart of SST for the given coordinates.
179 | var generateChart_TUR = function (coords) {
180 | 
181 |   // Add a dot for the point clicked on.
182 |   var point = ee.Geometry.Point(coords.lon, coords.lat);
183 | 
184 |   // Make a chart from the time series.
185 |   var sstChart = ui.Chart.image.series(Tur, point, ee.Reducer.mean(), 50);
186 | 
187 |   // Customize the chart.
188 |   sstChart.setOptions({
189 |     title: 'Turbidity: time series',
190 |     titleTextStyle: {fontSize: 16},
191 |     vAxis: {title: 'Turbidity (NTU)'}, //, viewWindow: { min: 0, max: 25 }
192 |     hAxis: {title: 'Date', format: 'MM-yyyy', gridlines: {count: 7}},
193 |     pointSize: 5,
194 |     legend: {position: 'none'}
195 |   });
196 |   // Add the chart at a fixed position, so that new charts overwrite older ones.
197 |   inspectorPanel.widgets().set(6, sstChart);
198 | };
199 | 
200 | 
201 | /*
202 |  * Map setup
203 |  */
204 | 
205 | 
206 | // Create a panel that contains both the slider and the label.
207 | var uilabel = ui.Label('Chlorophyll-a time series (1=earliest)');
208 | var DateSlider = ui.Slider({min: 1, max: 120, step: 1,
209 |   style: {stretch: 'horizontal', width:'500px', fontWeight: 'bold'},
210 |   onChange: (function(value) {
211 |     mapPanel.layers().reset();
212 |     mapPanel.layers().add(ee.Image(Chla_list.get(value - 1)), 'Chl-a');
213 |   })
214 | });
215 | DateSlider.setValue(120);  // Set a default value.
216 | mapPanel.layers().add(ee.Image(Chla_list.get(0)), 'Chl-a');
217 | 
218 | var uipanel = ui.Panel({
219 |   widgets: [uilabel, DateSlider],
220 |   layout: ui.Panel.Layout.flow('horizontal')
221 | });
222 | 
223 | // Add the panel to the map.
224 | mapPanel.add(uipanel);
225 | 
226 | 
227 | // Register a callback on the default map to be invoked when the map is clicked.
228 | mapPanel.onClick(generateChart);
229 | mapPanel.onClick(generateChart_SS);
230 | mapPanel.onClick(generateChart_TUR);
231 | 
232 | 
233 | // Configure the map.
234 | mapPanel.style().set('cursor', 'crosshair');
235 | 
236 | // Initialize with a test point.
237 | var initialPoint = ee.Geometry.Point(114.10, 22.30);
238 | mapPanel.centerObject(aoi, 11);
239 | 
240 | /*
241 |  * Initialize the app
242 |  */
243 | 
244 | // Replace the root with a SplitPanel that contains the inspector and map.
245 | ui.root.clear();
246 | ui.root.add(ui.SplitPanel(inspectorPanel, mapPanel));
247 | 
248 | generateChart({
249 |   lon: initialPoint.coordinates().get(0).getInfo(),
250 |   lat: initialPoint.coordinates().get(1).getInfo()
251 | });
252 | generateChart_SS({
253 |   lon: initialPoint.coordinates().get(0).getInfo(),
254 |   lat: initialPoint.coordinates().get(1).getInfo()
255 | });
256 | generateChart_TUR({
257 |   lon: initialPoint.coordinates().get(0).getInfo(),
258 |   lat: initialPoint.coordinates().get(1).getInfo()
259 | });
260 | 
261 | 


--------------------------------------------------------------------------------
/GEE_6S_AtmosphericParameter.js:
--------------------------------------------------------------------------------
  1 | // atmospheric.py, Sam Murphy (2016-10-26)
  2 | 
  3 | // Atmospheric water vapour, ozone and AOT from GEE
  4 | 
  5 | // Usage
  6 | // H2O = Atmospheric.water(geom,date)
  7 | // O3 = Atmospheric.ozone(geom,date)
  8 | // AOT = Atmospheric.aerosol(geom,date)
  9 | 
 10 | function round_date(date,xhour){
 11 |   // rounds a date of to the closest 'x' hours
 12 |   var y = date.get('year');
 13 |   var m = date.get('month');
 14 |   var d = date.get('day');
 15 |   var H = date.get('hour');
 16 |   var HH = H.divide(xhour).round().multiply(xhour);
 17 |   return ee.Date.fromYMD(y,m,d).advance(HH,'hour');
 18 | }
 19 | 
 20 | function round_month(date){
 21 |   // round date to closest month
 22 |   // start of THIS month
 23 |   var m1 = ee.Date.fromYMD(date.get('year'),date.get('month'),ee.Number(1));
 24 |   // start of NEXT month
 25 |   var m2 = m1.advance(1,'month');
 26 |   // difference from date
 27 |   var d1 = ee.Number(date.difference(m1,'day')).abs();
 28 |   var d2 = ee.Number(date.difference(m2,'day')).abs();
 29 |   // return closest start of month
 30 |   return ee.Date(ee.Algorithms.If(d2.gt(d1),m1,m2));
 31 | }  
 32 |   
 33 | function water(geom,date){
 34 |   // Water vapour column above target at time of image aquisition.
 35 |   //  (Kalnay et al., 1996, The NCEP/NCAR 40-Year Reanalysis Project. Bull. 
 36 |   //  Amer. Meteor. Soc., 77, 437-471)
 37 |   // Point geometry required
 38 |   var centroid = geom.centroid();
 39 |   // H2O datetime is in 6 hour intervals
 40 |   var H2O_date = round_date(date,6);
 41 |   // filtered water collection
 42 |   var water_ic = ee.ImageCollection('NCEP_RE/surface_wv').filterDate(H2O_date, H2O_date.advance(1,'month'));
 43 |   // water image
 44 |   var water_Py6S_units;
 45 |   if (water_ic.size().getInfo() === 0) {
 46 |     water_Py6S_units = 'Out of scope';
 47 |   } else {
 48 |     var water_img = ee.Image(water_ic.first());
 49 |     // water_vapour at target
 50 |     var water_target = water_img.reduceRegion({reducer:ee.Reducer.mean(), geometry:centroid}).get('pr_wtr');
 51 |     // convert to Py6S units (Google = kg/m^2, Py6S = g/cm^2)
 52 |     water_Py6S_units = ee.Number(water_target).divide(10);
 53 |   }
 54 |   return water_Py6S_units;
 55 | }  
 56 |   
 57 |   
 58 | function ozone(geom,date){
 59 |   // returns ozone measurement from merged TOMS/OMI dataset
 60 |   // OR
 61 |   // uses our fill value (which is mean value for that latlon and day-of-year)
 62 |   // Point geometry required
 63 |   var centroid = geom.centroid();
 64 |   
 65 |   function ozone_fill(centroid,O3_date){
 66 |     //  Gets our ozone fill value (i.e. mean value for that doy and latlon)
 67 |     //  you can see it
 68 |     //  1) compared to LEDAPS: https://code.earthengine.google.com/8e62a5a66e4920e701813e43c0ecb83e
 69 |     //  2) as a video: https://www.youtube.com/watch?v=rgqwvMRVguI&feature=youtu.be
 70 |       
 71 |     // ozone fills (i.e. one band per doy)
 72 |     var ozone_fills = ee.ImageCollection('users/samsammurphy/public/ozone_fill').toList(366);
 73 |     // day of year index
 74 |     var jan01 = ee.Date.fromYMD(O3_date.get('year'),1,1);
 75 |     var doy_index = date.difference(jan01,'day').toInt(); // (NB. index is one less than doy, so no need to +1)
 76 |     //  day of year image
 77 |     var fill_image = ee.Image(ozone_fills.get(doy_index));
 78 |     // return scalar fill value
 79 |     return fill_image.reduceRegion({reducer:ee.Reducer.mean(), geometry:centroid}).get('ozone');
 80 |   }
 81 |   
 82 |   function ozone_measurement(centroid,O3_date){
 83 |     // filtered ozone collection
 84 |     var ozone_ic = ee.ImageCollection('TOMS/MERGED').filterDate(O3_date, O3_date.advance(1,'month'));
 85 |     // ozone image
 86 |     var ozone_img = ee.Image(ozone_ic.first());
 87 |     // ozone value IF TOMS/OMI image exists ELSE use fill value
 88 |     var ozone_target = ee.Algorithms.If(ozone_img, ozone_img.reduceRegion({reducer:ee.Reducer.mean(), geometry:centroid}).get('ozone'), ozone_fill(centroid,O3_date));
 89 |     return ozone_target;
 90 |   }
 91 | 
 92 |   // O3 datetime in 24 hour intervals
 93 |   var O3_date = round_date(date,24);
 94 |   // TOMS temporal gap
 95 |   var TOMS_gap = ee.DateRange('1994-11-01','1996-08-01');
 96 |   // avoid TOMS gap entirely
 97 |   var ozone_target = ee.Algorithms.If(TOMS_gap.contains(O3_date),ozone_fill(centroid,O3_date),ozone_measurement(centroid,O3_date));
 98 |   // fix other data gaps (e.g. spatial, missing images, etc..)
 99 |   ozone_target = ee.Algorithms.If(ozone_target,ozone_target,ozone_fill(centroid,O3_date));
100 |   // convert to Py6S units 
101 |   var ozone_Py6S_units = ee.Number(ozone_target).divide(1000); // (i.e. Dobson units are milli-atm-cm )                             
102 |   return ozone_Py6S_units;
103 | }
104 | 
105 | function aerosol(geom,date){
106 |   // Aerosol Optical Thickness.
107 |   // try: MODIS Aerosol Product (monthly)
108 |   // except: fill value
109 |     
110 |   function aerosol_fill(date){
111 |     // MODIS AOT fill value for this month (i.e. no data gaps)
112 |     return ee.Image('users/samsammurphy/public/AOT_stack').select([ee.String('AOT_').cat(date.format('M'))]).rename(['AOT_550']);
113 |   }
114 |                
115 |   function aerosol_this_month(date){
116 |     // MODIS AOT original data product for this month (i.e. some data gaps)
117 |     // image for this month
118 |     var img = ee.Image(ee.ImageCollection('MODIS/061/MOD08_M3').filterDate(round_month(date)).first());
119 |       
120 |     // fill missing month (?)
121 |     img = ee.Algorithms.If(img, img.select(['Aerosol_Optical_Depth_Land_Mean_Mean_550']).divide(1000).rename(['AOT_550']), aerosol_fill(date));
122 |     return img;
123 |   }      
124 |   
125 |   function get_AOT(AOT_band,geom){
126 |     // AOT scalar value for target
127 |     return ee.Image(AOT_band).reduceRegion({reducer:ee.Reducer.mean(), geometry:geom.centroid()}).get('AOT_550');
128 |   }
129 |   
130 |   var after_modis_start = date.difference(ee.Date('2000-03-01'),'month').gt(0);
131 |   var AOT_band = ee.Algorithms.If(after_modis_start, aerosol_this_month(date), aerosol_fill(date));
132 |   var AOT = get_AOT(AOT_band,geom);
133 |   AOT = ee.Algorithms.If(AOT,AOT,get_AOT(aerosol_fill(date),geom)); // check reduce region worked (else force fill value)
134 |     
135 |   return AOT;
136 | }
137 | 
138 | 
139 | var mainPanel = ui.Panel({style: {width: '40%'}});
140 | 
141 | // Add the app title to the side panel
142 | var titleLabel = ui.Label('Atmospheric Constituent and Parameters for 6S Atmospheric Correction', {fontSize: '32px'});
143 | mainPanel.add(titleLabel);
144 | 
145 | // Add the app description to the main panel
146 | var descriptionText =
147 |     'This app allows you to obtain the parameters related to atmospheric constituents required for 6S atmospheric correction, '+
148 |     'including Water Vapour (g/cm^2), Ozone (cm-atm) and Aerosol Optical Thickness. '+
149 |     'Modified from functions created by Sam Murphy.';
150 | mainPanel.add(ui.Label(descriptionText));
151 | 
152 | var descriptionText2 =
153 |     'Reference: https://github.com/samsammurphy/gee-atmcorr-S2/blob/master/bin/atmospheric.py';
154 | mainPanel.add(ui.Label(descriptionText2, {},
155 |     'https://github.com/samsammurphy/gee-atmcorr-S2/blob/master/bin/atmospheric.py'));
156 | 
157 | var descriptionText3 =
158 |     'Enter the latitude and longitude of the target location in the following textboxes, or click on the map to obtain the coordinates. '+
159 |     'Then enter the year, month, day and hour in the corresponding textboxes. '+
160 |     'Click "Calculate" to obtain the result.';
161 | mainPanel.add(ui.Label(descriptionText3));
162 | 
163 | var Lat_textbox = ui.Textbox({placeholder: 'Latitude'}).setValue('0');
164 | mainPanel.add(ui.Panel([ui.Label('Latitude (-90 to 90):'), Lat_textbox], ui.Panel.Layout.flow('horizontal')));
165 | 
166 | var Lon_textbox = ui.Textbox({placeholder: 'Longitude'}).setValue('0');
167 | mainPanel.add(ui.Panel([ui.Label('Longitude (-180 to 180):'), Lon_textbox], ui.Panel.Layout.flow('horizontal')));
168 | 
169 | var Year_textbox = ui.Textbox({placeholder: 'Year'}).setValue('2000');
170 | mainPanel.add(ui.Panel([ui.Label('Year:'), Year_textbox], ui.Panel.Layout.flow('horizontal')));
171 | 
172 | var Month_textbox = ui.Textbox({placeholder: 'Month'}).setValue('01');
173 | mainPanel.add(ui.Panel([ui.Label('Month:'), Month_textbox], ui.Panel.Layout.flow('horizontal')));
174 | 
175 | var Day_textbox = ui.Textbox({placeholder: 'Day'}).setValue('01');
176 | mainPanel.add(ui.Panel([ui.Label('Day:'), Day_textbox], ui.Panel.Layout.flow('horizontal')));
177 | 
178 | var Hour_textbox = ui.Textbox({placeholder: 'Hour'}).setValue('00');
179 | mainPanel.add(ui.Panel([ui.Label('Hour:'), Hour_textbox], ui.Panel.Layout.flow('horizontal')));
180 | 
181 | function compute(){
182 |   var Lat = ee.Number.parse(Lat_textbox.getValue());
183 |   var Lon = ee.Number.parse(Lon_textbox.getValue());
184 |   var Year = ee.String(Year_textbox.getValue());
185 |   var Month = ee.String(Month_textbox.getValue());
186 |   var Day = ee.String(Day_textbox.getValue());
187 |   var Hour = ee.String(Hour_textbox.getValue());
188 |   var Date = Year.cat('-').cat(Month).cat('-').cat(Day).cat('T').cat(Hour).cat(':00:00');
189 |   var geom = ee.Geometry.Point([Lon, Lat]);
190 |   var H2O = water(geom,ee.Date(Date));
191 |   var O3 = ozone(geom,ee.Date(Date));
192 |   var AOT = aerosol(geom,ee.Date(Date));
193 |   Lon_display.setValue('Lon: ' + Lon.getInfo());
194 |   Lat_display.setValue('Lat: ' + Lat.getInfo());
195 |   Date_display.setValue('Date: ' + Date.getInfo());
196 |   H2O_display.setValue('Water Vapour (g/cm^2): ' + ee.String(H2O).getInfo());
197 |   O3_display.setValue('Ozone (cm-atm): ' + O3.getInfo());
198 |   AOT_display.setValue('Aerosol Optical Thickness: ' + AOT.getInfo());
199 |   mapPanel.centerObject(geom,11);
200 |   var dot = ui.Map.Layer(geom, {color: '000000'}, 'location');
201 |   mapPanel.layers().set(0, dot);
202 | }
203 | 
204 | var button = ui.Button({
205 |   label: 'Calculate',
206 |   onClick: compute
207 | });
208 | mainPanel.add(button);
209 | 
210 | var Lon_display = ui.Label();
211 | var Lat_display = ui.Label();
212 | mainPanel.add(ui.Panel([Lon_display, Lat_display], ui.Panel.Layout.flow('horizontal')));
213 | var Date_display = ui.Label();
214 | var H2O_display = ui.Label();
215 | var O3_display = ui.Label();
216 | var AOT_display = ui.Label();
217 | mainPanel.add(Date_display);
218 | mainPanel.add(H2O_display);
219 | mainPanel.add(O3_display);
220 | mainPanel.add(AOT_display);
221 | 
222 | 
223 | var mapPanel = ui.Map();
224 | mapPanel.style().set('cursor', 'crosshair');
225 | var clickmap = function (coords) {
226 |   // Update the lon/lat textbox with values from the click event.
227 |   Lat_textbox.setValue(coords.lat.toFixed(2));
228 |   Lon_textbox.setValue(coords.lon.toFixed(2));
229 | };
230 | mapPanel.onClick(clickmap);
231 | 
232 | mainPanel.add(ui.Label('------------------'));
233 | mainPanel.add(ui.Label('Created by Ivan Kwong, in December 2022'));
234 | mainPanel.add(ui.Label('GitHub page', {},
235 |     'https://github.com/ivanhykwong/Marine-Water-Quality-Time-Series-HK'));
236 | 
237 | ui.root.clear();
238 | ui.root.add(ui.SplitPanel(mainPanel, mapPanel));
239 | 


--------------------------------------------------------------------------------
/Part1_ImagePreprocessing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": []
  7 |     },
  8 |     "kernelspec": {
  9 |       "name": "python3",
 10 |       "display_name": "Python 3"
 11 |     },
 12 |     "language_info": {
 13 |       "name": "python"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "markdown",
 19 |       "source": [
 20 |         "# **Atmospheric correction of Sentinel 2 image using Py6S in Google Colab environment**\n",
 21 |         "\n",
 22 |         "This is the first part of python codes used in the article. The codes are tested inside Google Colab environment using Hong Kong water as the study area.\n",
 23 |         "\n",
 24 |         "Guidance and reference provided at the following websites are appreciated.\n",
 25 |         "\n",
 26 |         "*   https://github.com/samsammurphy/gee-atmcorr-S2\n",
 27 |         "*   https://github.com/ndminhhus/geeguide/blob/master/02.Atm-correction.md\n",
 28 |         "*   https://blog.csdn.net/qq_45110581/article/details/108629636\n",
 29 |         "\n",
 30 |         "\n",
 31 |         "\n",
 32 |         "\n",
 33 |         "\n",
 34 |         "\n"
 35 |       ],
 36 |       "metadata": {
 37 |         "id": "_Uqq-ql9K-JU"
 38 |       }
 39 |     },
 40 |     {
 41 |       "cell_type": "markdown",
 42 |       "source": [
 43 |         "# Step 1 - Set up Py6S in Google Colab"
 44 |       ],
 45 |       "metadata": {
 46 |         "id": "OCD-euOFNEcC"
 47 |       }
 48 |     },
 49 |     {
 50 |       "cell_type": "code",
 51 |       "metadata": {
 52 |         "id": "PinuZF07l__N"
 53 |       },
 54 |       "source": [
 55 |         "!gfortran -v\n",
 56 |         "!wget http://rtwilson.com/downloads/6SV-1.1.tar\n",
 57 |         "!tar xvf 6SV-1.1.tar\n",
 58 |         "!cd 6SV1.1"
 59 |       ],
 60 |       "execution_count": null,
 61 |       "outputs": []
 62 |     },
 63 |     {
 64 |       "cell_type": "markdown",
 65 |       "source": [
 66 |         "**Manual work required before executing the subsequent code**\n",
 67 |         "\n",
 68 |         "Refer to comments below"
 69 |       ],
 70 |       "metadata": {
 71 |         "id": "NOYRtE-TMZ2E"
 72 |       }
 73 |     },
 74 |     {
 75 |       "cell_type": "code",
 76 |       "metadata": {
 77 |         "id": "sBstCWxeizie"
 78 |       },
 79 |       "source": [
 80 |         "# modify \"makefile\" from \"FC = g77 $(FFLAGS)\" to \"FC = gfortran -std=legacy -ffixed-line-length-none -ffpe-summary=none $(FFLAGS)\"\n",
 81 |         "# upload modified \"makefile\" to /content/6SV1.1\n",
 82 |         "\n",
 83 |         "import os\n",
 84 |         "os.chdir(\"/content/6SV1.1\")\n",
 85 |         "!ls\n",
 86 |         "!make\n",
 87 |         "os.environ[\"PATH\"]=\"/content/6SV1.1:\"+os.environ[\"PATH\"]\n",
 88 |         "# test\n",
 89 |         "!sixsV1.1 < /content/Examples/Example_In_1.txt\n",
 90 |         "!pip install Py6S\n",
 91 |         "from Py6S import *\n",
 92 |         "SixS.test()"
 93 |       ],
 94 |       "execution_count": null,
 95 |       "outputs": []
 96 |     },
 97 |     {
 98 |       "cell_type": "markdown",
 99 |       "source": [
100 |         "# Step 2 - Define functions required in atmospheric correction"
101 |       ],
102 |       "metadata": {
103 |         "id": "Tcyi0J-jM1mL"
104 |       }
105 |     },
106 |     {
107 |       "cell_type": "markdown",
108 |       "source": [
109 |         "**Functions created by Sam Murphy**\n",
110 |         "\n",
111 |         "Modified from https://github.com/samsammurphy/gee-atmcorr-S2"
112 |       ],
113 |       "metadata": {
114 |         "id": "cTHmiwFmNrez"
115 |       }
116 |     },
117 |     {
118 |       "cell_type": "code",
119 |       "metadata": {
120 |         "id": "FeO5C5TcfV3G"
121 |       },
122 |       "source": [
123 |         "\"\"\"\n",
124 |         "atmospheric.py, Sam Murphy (2016-10-26)\n",
125 |         "\n",
126 |         "Atmospheric water vapour, ozone and AOT from GEE\n",
127 |         "\n",
128 |         "Usage\n",
129 |         "H2O = Atmospheric.water(geom,date)\n",
130 |         "O3 = Atmospheric.ozone(geom,date)\n",
131 |         "AOT = Atmospheric.aerosol(geom,date)\n",
132 |         "\n",
133 |         "\"\"\"\n",
134 |         "\n",
135 |         "\n",
136 |         "import ee\n",
137 |         "\n",
138 |         "class Atmospheric():\n",
139 |         "\n",
140 |         "  def round_date(date,xhour):\n",
141 |         "    \"\"\"\n",
142 |         "    rounds a date of to the closest 'x' hours\n",
143 |         "    \"\"\"\n",
144 |         "    y = date.get('year')\n",
145 |         "    m = date.get('month')\n",
146 |         "    d = date.get('day')\n",
147 |         "    H = date.get('hour')\n",
148 |         "    HH = H.divide(xhour).round().multiply(xhour)\n",
149 |         "    return date.fromYMD(y,m,d).advance(HH,'hour')\n",
150 |         "\n",
151 |         "  def round_month(date):\n",
152 |         "    \"\"\"\n",
153 |         "    round date to closest month\n",
154 |         "    \"\"\"\n",
155 |         "    # start of THIS month\n",
156 |         "    m1 = date.fromYMD(date.get('year'),date.get('month'),ee.Number(1))\n",
157 |         "\n",
158 |         "    # start of NEXT month\n",
159 |         "    m2 = m1.advance(1,'month')\n",
160 |         "\n",
161 |         "    # difference from date\n",
162 |         "    d1 = ee.Number(date.difference(m1,'day')).abs()\n",
163 |         "    d2 = ee.Number(date.difference(m2,'day')).abs()\n",
164 |         "\n",
165 |         "    # return closest start of month\n",
166 |         "    return ee.Date(ee.Algorithms.If(d2.gt(d1),m1,m2))\n",
167 |         "\n",
168 |         "\n",
169 |         "\n",
170 |         "  def water(geom,date):\n",
171 |         "    \"\"\"\n",
172 |         "    Water vapour column above target at time of image aquisition.\n",
173 |         "\n",
174 |         "    (Kalnay et al., 1996, The NCEP/NCAR 40-Year Reanalysis Project. Bull.\n",
175 |         "    Amer. Meteor. Soc., 77, 437-471)\n",
176 |         "    \"\"\"\n",
177 |         "\n",
178 |         "    # Point geometry required\n",
179 |         "    centroid = geom.centroid()\n",
180 |         "\n",
181 |         "    # H2O datetime is in 6 hour intervals\n",
182 |         "    H2O_date = Atmospheric.round_date(date,6)\n",
183 |         "\n",
184 |         "    # filtered water collection\n",
185 |         "    water_ic = ee.ImageCollection('NCEP_RE/surface_wv').filterDate(H2O_date, H2O_date.advance(1,'month'))\n",
186 |         "\n",
187 |         "    # water image\n",
188 |         "    water_img = ee.Image(water_ic.first())\n",
189 |         "\n",
190 |         "    # water_vapour at target\n",
191 |         "    water = water_img.reduceRegion(reducer=ee.Reducer.mean(), geometry=centroid).get('pr_wtr')\n",
192 |         "\n",
193 |         "    # convert to Py6S units (Google = kg/m^2, Py6S = g/cm^2)\n",
194 |         "    water_Py6S_units = ee.Number(water).divide(10)\n",
195 |         "\n",
196 |         "    return water_Py6S_units\n",
197 |         "\n",
198 |         "\n",
199 |         "\n",
200 |         "  def ozone(geom,date):\n",
201 |         "    \"\"\"\n",
202 |         "    returns ozone measurement from merged TOMS/OMI dataset\n",
203 |         "\n",
204 |         "    OR\n",
205 |         "\n",
206 |         "    uses our fill value (which is mean value for that latlon and day-of-year)\n",
207 |         "\n",
208 |         "    \"\"\"\n",
209 |         "\n",
210 |         "    # Point geometry required\n",
211 |         "    centroid = geom.centroid()\n",
212 |         "\n",
213 |         "    def ozone_measurement(centroid,O3_date):\n",
214 |         "\n",
215 |         "      # filtered ozone collection\n",
216 |         "      ozone_ic = ee.ImageCollection('TOMS/MERGED').filterDate(O3_date, O3_date.advance(1,'month'))\n",
217 |         "\n",
218 |         "      # ozone image\n",
219 |         "      ozone_img = ee.Image(ozone_ic.first())\n",
220 |         "\n",
221 |         "      # ozone value IF TOMS/OMI image exists ELSE use fill value\n",
222 |         "      ozone = ee.Algorithms.If(ozone_img,\\\n",
223 |         "      ozone_img.reduceRegion(reducer=ee.Reducer.mean(), geometry=centroid).get('ozone'),\\\n",
224 |         "      ozone_fill(centroid,O3_date))\n",
225 |         "\n",
226 |         "      return ozone\n",
227 |         "\n",
228 |         "    def ozone_fill(centroid,O3_date):\n",
229 |         "      \"\"\"\n",
230 |         "      Gets our ozone fill value (i.e. mean value for that doy and latlon)\n",
231 |         "\n",
232 |         "      you can see it\n",
233 |         "      1) compared to LEDAPS: https://code.earthengine.google.com/8e62a5a66e4920e701813e43c0ecb83e\n",
234 |         "      2) as a video: https://www.youtube.com/watch?v=rgqwvMRVguI&feature=youtu.be\n",
235 |         "\n",
236 |         "      \"\"\"\n",
237 |         "\n",
238 |         "      # ozone fills (i.e. one band per doy)\n",
239 |         "      ozone_fills = ee.ImageCollection('users/samsammurphy/public/ozone_fill').toList(366)\n",
240 |         "\n",
241 |         "      # day of year index\n",
242 |         "      jan01 = ee.Date.fromYMD(O3_date.get('year'),1,1)\n",
243 |         "      doy_index = date.difference(jan01,'day').toInt()# (NB. index is one less than doy, so no need to +1)\n",
244 |         "\n",
245 |         "      # day of year image\n",
246 |         "      fill_image = ee.Image(ozone_fills.get(doy_index))\n",
247 |         "\n",
248 |         "      # return scalar fill value\n",
249 |         "      return fill_image.reduceRegion(reducer=ee.Reducer.mean(), geometry=centroid).get('ozone')\n",
250 |         "\n",
251 |         "    # O3 datetime in 24 hour intervals\n",
252 |         "    O3_date = Atmospheric.round_date(date,24)\n",
253 |         "\n",
254 |         "    # TOMS temporal gap\n",
255 |         "    TOMS_gap = ee.DateRange('1994-11-01','1996-08-01')\n",
256 |         "\n",
257 |         "    # avoid TOMS gap entirely\n",
258 |         "    ozone = ee.Algorithms.If(TOMS_gap.contains(O3_date),ozone_fill(centroid,O3_date),ozone_measurement(centroid,O3_date))\n",
259 |         "\n",
260 |         "    # fix other data gaps (e.g. spatial, missing images, etc..)\n",
261 |         "    ozone = ee.Algorithms.If(ozone,ozone,ozone_fill(centroid,O3_date))\n",
262 |         "\n",
263 |         "    #convert to Py6S units\n",
264 |         "    ozone_Py6S_units = ee.Number(ozone).divide(1000)# (i.e. Dobson units are milli-atm-cm )\n",
265 |         "\n",
266 |         "    return ozone_Py6S_units\n",
267 |         "\n",
268 |         "\n",
269 |         "  def aerosol(geom,date):\n",
270 |         "    \"\"\"\n",
271 |         "    Aerosol Optical Thickness.\n",
272 |         "\n",
273 |         "    try:\n",
274 |         "      MODIS Aerosol Product (monthly)\n",
275 |         "    except:\n",
276 |         "      fill value\n",
277 |         "    \"\"\"\n",
278 |         "\n",
279 |         "    def aerosol_fill(date):\n",
280 |         "      \"\"\"\n",
281 |         "      MODIS AOT fill value for this month (i.e. no data gaps)\n",
282 |         "      \"\"\"\n",
283 |         "      return ee.Image('users/samsammurphy/public/AOT_stack')\\\n",
284 |         "               .select([ee.String('AOT_').cat(date.format('M'))])\\\n",
285 |         "               .rename(['AOT_550'])\n",
286 |         "\n",
287 |         "\n",
288 |         "    def aerosol_this_month(date):\n",
289 |         "      \"\"\"\n",
290 |         "      MODIS AOT original data product for this month (i.e. some data gaps)\n",
291 |         "      \"\"\"\n",
292 |         "      # image for this month\n",
293 |         "      img =  ee.Image(\\\n",
294 |         "                      ee.ImageCollection('MODIS/006/MOD08_M3')\\\n",
295 |         "                        .filterDate(Atmospheric.round_month(date))\\\n",
296 |         "                        .first()\\\n",
297 |         "                     )\n",
298 |         "\n",
299 |         "      # fill missing month (?)\n",
300 |         "      img = ee.Algorithms.If(img,\\\n",
301 |         "                               # all good\n",
302 |         "                               img\\\n",
303 |         "                               .select(['Aerosol_Optical_Depth_Land_Mean_Mean_550'])\\\n",
304 |         "                               .divide(1000)\\\n",
305 |         "                               .rename(['AOT_550']),\\\n",
306 |         "                              # missing month\n",
307 |         "                                aerosol_fill(date))\n",
308 |         "\n",
309 |         "      return img\n",
310 |         "\n",
311 |         "\n",
312 |         "    def get_AOT(AOT_band,geom):\n",
313 |         "      \"\"\"\n",
314 |         "      AOT scalar value for target\n",
315 |         "      \"\"\"\n",
316 |         "      return ee.Image(AOT_band).reduceRegion(reducer=ee.Reducer.mean(),\\\n",
317 |         "                                 geometry=geom.centroid())\\\n",
318 |         "                                .get('AOT_550')\n",
319 |         "\n",
320 |         "\n",
321 |         "    after_modis_start = date.difference(ee.Date('2000-03-01'),'month').gt(0)\n",
322 |         "\n",
323 |         "    AOT_band = ee.Algorithms.If(after_modis_start, aerosol_this_month(date), aerosol_fill(date))\n",
324 |         "\n",
325 |         "    AOT = get_AOT(AOT_band,geom)\n",
326 |         "\n",
327 |         "    AOT = ee.Algorithms.If(AOT,AOT,get_AOT(aerosol_fill(date),geom))\n",
328 |         "    # i.e. check reduce region worked (else force fill value)\n",
329 |         "\n",
330 |         "    return AOT"
331 |       ],
332 |       "execution_count": null,
333 |       "outputs": []
334 |     },
335 |     {
336 |       "cell_type": "markdown",
337 |       "source": [
338 |         "Import required libraries"
339 |       ],
340 |       "metadata": {
341 |         "id": "3zDQifBXNzpy"
342 |       }
343 |     },
344 |     {
345 |       "cell_type": "code",
346 |       "metadata": {
347 |         "id": "x2tuCpkef5Xt"
348 |       },
349 |       "source": [
350 |         "import ee\n",
351 |         "from Py6S import *\n",
352 |         "from datetime import datetime\n",
353 |         "import math\n",
354 |         "import os\n",
355 |         "import sys"
356 |       ],
357 |       "execution_count": null,
358 |       "outputs": []
359 |     },
360 |     {
361 |       "cell_type": "markdown",
362 |       "source": [
363 |         "**Initialize Google Earth Engine session**\n",
364 |         "\n",
365 |         "Need enter verification using GEE account"
366 |       ],
367 |       "metadata": {
368 |         "id": "juwUBcecN4Ay"
369 |       }
370 |     },
371 |     {
372 |       "cell_type": "code",
373 |       "metadata": {
374 |         "id": "MaH1Ub1zgbJl"
375 |       },
376 |       "source": [
377 |         "ee.Authenticate()\n",
378 |         "ee.Initialize()"
379 |       ],
380 |       "execution_count": null,
381 |       "outputs": []
382 |     },
383 |     {
384 |       "cell_type": "markdown",
385 |       "source": [
386 |         "**Py6S function**"
387 |       ],
388 |       "metadata": {
389 |         "id": "WnvRgEOuODrR"
390 |       }
391 |     },
392 |     {
393 |       "cell_type": "code",
394 |       "metadata": {
395 |         "id": "JdcVyK0Agox-"
396 |       },
397 |       "source": [
398 |         "# Define Py6S function\n",
399 |         "# Modified from https://github.com/ndminhhus/geeguide/blob/master/02.Atm-correction.md\n",
400 |         "\n",
401 |         "def func1(img):\n",
402 |         "  S2 = ee.Image(img)\n",
403 |         "  date = S2.date()\n",
404 |         "  # top of atmosphere reflectance\n",
405 |         "  toa = S2.divide(10000)\n",
406 |         "\n",
407 |         "  info = S2.getInfo()['properties']\n",
408 |         "  scene_date = datetime.utcfromtimestamp(info['system:time_start']/1000)# i.e. Python uses seconds, EE uses milliseconds\n",
409 |         "  solar_z = info['MEAN_SOLAR_ZENITH_ANGLE']\n",
410 |         "\n",
411 |         "  h2o = Atmospheric.water(geom,date).getInfo()\n",
412 |         "  o3 = Atmospheric.ozone(geom,date).getInfo()\n",
413 |         "  aot = Atmospheric.aerosol(geom,date).getInfo()\n",
414 |         "\n",
415 |         "  SRTM = ee.Image('CGIAR/SRTM90_V4')# Shuttle Radar Topography mission covers *most* of the Earth\n",
416 |         "  alt = SRTM.reduceRegion(reducer = ee.Reducer.mean(),geometry = geom.centroid()).get('elevation').getInfo()\n",
417 |         "  km = alt/1000 # i.e. Py6S uses units of kilometers\n",
418 |         "\n",
419 |         "  # Instantiate\n",
420 |         "  s = SixS()\n",
421 |         "\n",
422 |         "  # Atmospheric constituents\n",
423 |         "  s.atmos_profile = AtmosProfile.UserWaterAndOzone(h2o,o3)\n",
424 |         "  s.aero_profile = AeroProfile.Maritime # https://github.com/robintw/Py6S/blob/master/Py6S/Params/aeroprofile.py\n",
425 |         "  s.aot550 = aot\n",
426 |         "\n",
427 |         "  # Earth-Sun-satellite geometry\n",
428 |         "  s.geometry = Geometry.User()\n",
429 |         "  s.geometry.view_z = 0               # always NADIR\n",
430 |         "  s.geometry.solar_z = solar_z        # solar zenith angle\n",
431 |         "  s.geometry.month = scene_date.month # month and day used for Earth-Sun distance\n",
432 |         "  s.geometry.day = scene_date.day     # month and day used for Earth-Sun distance\n",
433 |         "  s.altitudes.set_sensor_satellite_level()\n",
434 |         "  s.altitudes.set_target_custom_altitude(km)\n",
435 |         "\n",
436 |         "  def spectralResponseFunction(bandname):\n",
437 |         "    \"\"\"\n",
438 |         "    Extract spectral response function for given band name\n",
439 |         "    \"\"\"\n",
440 |         "    bandSelect = {\n",
441 |         "        'B1':PredefinedWavelengths.S2A_MSI_01,\n",
442 |         "        'B2':PredefinedWavelengths.S2A_MSI_02,\n",
443 |         "        'B3':PredefinedWavelengths.S2A_MSI_03,\n",
444 |         "        'B4':PredefinedWavelengths.S2A_MSI_04,\n",
445 |         "        'B5':PredefinedWavelengths.S2A_MSI_05,\n",
446 |         "        'B6':PredefinedWavelengths.S2A_MSI_06,\n",
447 |         "        'B7':PredefinedWavelengths.S2A_MSI_07,\n",
448 |         "        'B8':PredefinedWavelengths.S2A_MSI_08,\n",
449 |         "        'B8A':PredefinedWavelengths.S2A_MSI_8A,\n",
450 |         "        'B9':PredefinedWavelengths.S2A_MSI_09,\n",
451 |         "        'B10':PredefinedWavelengths.S2A_MSI_10,\n",
452 |         "        'B11':PredefinedWavelengths.S2A_MSI_11,\n",
453 |         "        'B12':PredefinedWavelengths.S2A_MSI_12,\n",
454 |         "        }\n",
455 |         "    return Wavelength(bandSelect[bandname])\n",
456 |         "\n",
457 |         "  def toa_to_rad(bandname):\n",
458 |         "    \"\"\"\n",
459 |         "    Converts top of atmosphere reflectance to at-sensor radiance\n",
460 |         "    \"\"\"\n",
461 |         "    # solar exoatmospheric spectral irradiance\n",
462 |         "    ESUN = info['SOLAR_IRRADIANCE_'+bandname]\n",
463 |         "    solar_angle_correction = math.cos(math.radians(solar_z))\n",
464 |         "    # Earth-Sun distance (from day of year)\n",
465 |         "    doy = scene_date.timetuple().tm_yday\n",
466 |         "    d = 1 - 0.01672 * math.cos(0.9856 * (doy-4))# http://physics.stackexchange.com/questions/177949/earth-sun-distance-on-a-given-day-of-the-year\n",
467 |         "    # conversion factor\n",
468 |         "    multiplier = ESUN*solar_angle_correction/(math.pi*d**2)\n",
469 |         "    # at-sensor radiance\n",
470 |         "    rad = toa.select(bandname).multiply(multiplier)\n",
471 |         "    return rad\n",
472 |         "\n",
473 |         "  def surface_reflectance(bandname):\n",
474 |         "    \"\"\"\n",
475 |         "    Calculate surface reflectance from at-sensor radiance given waveband name\n",
476 |         "    \"\"\"\n",
477 |         "    # run 6S for this waveband\n",
478 |         "    s.wavelength = spectralResponseFunction(bandname)\n",
479 |         "    s.run()\n",
480 |         "    # extract 6S outputs\n",
481 |         "    Edir = s.outputs.direct_solar_irradiance             #direct solar irradiance\n",
482 |         "    Edif = s.outputs.diffuse_solar_irradiance            #diffuse solar irradiance\n",
483 |         "    Lp   = s.outputs.atmospheric_intrinsic_radiance      #path radiance\n",
484 |         "    absorb  = s.outputs.trans['global_gas'].upward       #absorption transmissivity\n",
485 |         "    scatter = s.outputs.trans['total_scattering'].upward #scattering transmissivity\n",
486 |         "    tau2 = absorb*scatter                                #total transmissivity\n",
487 |         "    # radiance to surface reflectance\n",
488 |         "    rad = toa_to_rad(bandname)\n",
489 |         "    ref = rad.subtract(Lp).multiply(math.pi).divide(tau2*(Edir+Edif))\n",
490 |         "    return ref\n",
491 |         "\n",
492 |         "  # all wavebands\n",
493 |         "  output = S2.select('QA60')\n",
494 |         "  for band in ['B1','B2','B3','B4','B5','B6','B7','B8','B8A','B9','B10','B11','B12']:\n",
495 |         "    print(band)\n",
496 |         "    output = output.addBands(surface_reflectance(band))\n",
497 |         "\n",
498 |         "  return output\n"
499 |       ],
500 |       "execution_count": null,
501 |       "outputs": []
502 |     },
503 |     {
504 |       "cell_type": "markdown",
505 |       "source": [
506 |         "# Step 3 - Remove clouds using cloud mask"
507 |       ],
508 |       "metadata": {
509 |         "id": "qcBG4lRpOMEp"
510 |       }
511 |     },
512 |     {
513 |       "cell_type": "code",
514 |       "metadata": {
515 |         "id": "cdThxtTHsgXz"
516 |       },
517 |       "source": [
518 |         "# Remove cloud and cloud shadow\n",
519 |         "# Modified from https://developers.google.com/earth-engine/tutorials/community/sentinel-2-s2cloudless\n",
520 |         "\n",
521 |         "AOI_point_right = ee.Geometry.Point(114.05, 22.40)  # Define AOI location\n",
522 |         "AOI_point_left = ee.Geometry.Point(113.80, 22.40)  # Mosaic with another tile is needed to cover the study area\n",
523 |         "START_DATE = '2015-01-01'   # Define start date\n",
524 |         "END_DATE = '2021-12-31'   # Define end date\n",
525 |         "CLD_PRB_THRESH = 60   #\tCloud probability (%); values greater than are considered cloud\n",
526 |         "NIR_DRK_THRESH = 0.15   # Near-infrared reflectance; values less than are considered potential cloud shadow\n",
527 |         "CLD_PRJ_DIST = 1   # Maximum distance (km) to search for cloud shadows from cloud edges\n",
528 |         "BUFFER = 100   # Distance (m) to dilate the edge of cloud-identified objects\n",
529 |         "\n",
530 |         "def get_s2_sr_cld_col(aoi, start_date, end_date):\n",
531 |         "    # Import and filter S2 SR.\n",
532 |         "    s2_sr_col = (ee.ImageCollection('COPERNICUS/S2')\n",
533 |         "        .filterBounds(aoi)\n",
534 |         "        .filterDate(start_date, end_date)\n",
535 |         "        .filter(ee.Filter.lte('CLOUDY_PIXEL_PERCENTAGE', 20)))\n",
536 |         "\n",
537 |         "    # Import and filter s2cloudless.\n",
538 |         "    s2_cloudless_col = (ee.ImageCollection('COPERNICUS/S2_CLOUD_PROBABILITY')\n",
539 |         "        .filterBounds(aoi)\n",
540 |         "        .filterDate(start_date, end_date))\n",
541 |         "\n",
542 |         "    # Join the filtered s2cloudless collection to the SR collection by the 'system:index' property.\n",
543 |         "    return ee.ImageCollection(ee.Join.saveFirst('s2cloudless').apply(**{\n",
544 |         "        'primary': s2_sr_col,\n",
545 |         "        'secondary': s2_cloudless_col,\n",
546 |         "        'condition': ee.Filter.equals(**{\n",
547 |         "            'leftField': 'system:index',\n",
548 |         "            'rightField': 'system:index'\n",
549 |         "        })\n",
550 |         "    }))\n",
551 |         "\n",
552 |         "def get_s2_sr_cld_col_left(aoi, start_date, end_date):\n",
553 |         "    # Import and filter S2 SR.\n",
554 |         "    s2_sr_col = (ee.ImageCollection('COPERNICUS/S2')\n",
555 |         "        .filterBounds(aoi)\n",
556 |         "        .filterDate(start_date, end_date))\n",
557 |         "\n",
558 |         "    # Import and filter s2cloudless.\n",
559 |         "    s2_cloudless_col = (ee.ImageCollection('COPERNICUS/S2_CLOUD_PROBABILITY')\n",
560 |         "        .filterBounds(aoi)\n",
561 |         "        .filterDate(start_date, end_date))\n",
562 |         "\n",
563 |         "    # Join the filtered s2cloudless collection to the SR collection by the 'system:index' property.\n",
564 |         "    return ee.ImageCollection(ee.Join.saveFirst('s2cloudless').apply(**{\n",
565 |         "        'primary': s2_sr_col,\n",
566 |         "        'secondary': s2_cloudless_col,\n",
567 |         "        'condition': ee.Filter.equals(**{\n",
568 |         "            'leftField': 'system:index',\n",
569 |         "            'rightField': 'system:index'\n",
570 |         "        })\n",
571 |         "    }))\n",
572 |         "\n",
573 |         "s2_right = get_s2_sr_cld_col(AOI_point_right, START_DATE, END_DATE)\n",
574 |         "s2_left = get_s2_sr_cld_col_left(AOI_point_left, START_DATE, END_DATE)\n",
575 |         "\n",
576 |         "def add_cloud_bands(img):\n",
577 |         "    # Get s2cloudless image, subset the probability band.\n",
578 |         "    cld_prb = ee.Image(img.get('s2cloudless')).select('probability')\n",
579 |         "\n",
580 |         "    # Condition s2cloudless by the probability threshold value.\n",
581 |         "    is_cloud = cld_prb.gt(CLD_PRB_THRESH).rename('clouds')\n",
582 |         "\n",
583 |         "    # Add the cloud probability layer and cloud mask as image bands.\n",
584 |         "    return img.addBands(ee.Image([cld_prb, is_cloud]))\n",
585 |         "\n",
586 |         "def add_shadow_bands(img):\n",
587 |         "\n",
588 |         "    # Identify dark NIR pixels that are not water (potential cloud shadow pixels).\n",
589 |         "    SR_BAND_SCALE = 1e4\n",
590 |         "    dark_pixels = img.select('B8').lt(NIR_DRK_THRESH*SR_BAND_SCALE).rename('dark_pixels')\n",
591 |         "\n",
592 |         "    # Determine the direction to project cloud shadow from clouds (assumes UTM projection).\n",
593 |         "    shadow_azimuth = ee.Number(90).subtract(ee.Number(img.get('MEAN_SOLAR_AZIMUTH_ANGLE')));\n",
594 |         "\n",
595 |         "    # Project shadows from clouds for the distance specified by the CLD_PRJ_DIST input.\n",
596 |         "    cld_proj = (img.select('clouds').directionalDistanceTransform(shadow_azimuth, CLD_PRJ_DIST*10)\n",
597 |         "        .reproject(**{'crs': img.select(0).projection(), 'scale': 100})\n",
598 |         "        .select('distance')\n",
599 |         "        .mask()\n",
600 |         "        .rename('cloud_transform'))\n",
601 |         "\n",
602 |         "    # Identify the intersection of dark pixels with cloud shadow projection.\n",
603 |         "    shadows = cld_proj.multiply(dark_pixels).rename('shadows')\n",
604 |         "\n",
605 |         "    # Add dark pixels, cloud projection, and identified shadows as image bands.\n",
606 |         "    return img.addBands(ee.Image([dark_pixels, cld_proj, shadows]))\n",
607 |         "\n",
608 |         "def add_cld_shdw_mask(img):\n",
609 |         "    # Add cloud component bands.\n",
610 |         "    img_cloud = add_cloud_bands(img)\n",
611 |         "\n",
612 |         "    # Add cloud shadow component bands.\n",
613 |         "    img_cloud_shadow = add_shadow_bands(img_cloud)\n",
614 |         "\n",
615 |         "    # Combine cloud and shadow mask, set cloud and shadow as value 1, else 0.\n",
616 |         "    is_cld_shdw = img_cloud_shadow.select('clouds').add(img_cloud_shadow.select('shadows')).gt(0)\n",
617 |         "\n",
618 |         "    # Remove small cloud-shadow patches and dilate remaining pixels by BUFFER input.\n",
619 |         "    # 20 m scale is for speed, and assumes clouds don't require 10 m precision.\n",
620 |         "    is_cld_shdw = (is_cld_shdw.focal_min(2).focal_max(BUFFER*2/20)\n",
621 |         "        .reproject(**{'crs': img.select([0]).projection(), 'scale': 20})\n",
622 |         "        .rename('cloudmask'))\n",
623 |         "\n",
624 |         "    # Add the final cloud-shadow mask to the image.\n",
625 |         "    return img.addBands(is_cld_shdw)\n",
626 |         "\n",
627 |         "def apply_cld_shdw_mask(img):\n",
628 |         "    # Subset the cloudmask band and invert it so clouds/shadow are 0, else 1.\n",
629 |         "    not_cld_shdw = img.select('cloudmask').Not()\n",
630 |         "\n",
631 |         "    # Subset reflectance bands and update their masks, return the result.\n",
632 |         "    # return img.select('B.*').updateMask(not_cld_shdw)\n",
633 |         "    return img.updateMask(not_cld_shdw)\n",
634 |         "\n",
635 |         "s2_cloudless_right = (s2_right.map(add_cld_shdw_mask)\n",
636 |         "                             .map(apply_cld_shdw_mask))\n",
637 |         "s2_cloudless_left = (s2_left.map(add_cld_shdw_mask)\n",
638 |         "                             .map(apply_cld_shdw_mask))\n"
639 |       ],
640 |       "execution_count": null,
641 |       "outputs": []
642 |     },
643 |     {
644 |       "cell_type": "markdown",
645 |       "source": [
646 |         "# Step 4 - Execute the image processing\n",
647 |         "\n",
648 |         "Results are saved as GEE assets"
649 |       ],
650 |       "metadata": {
651 |         "id": "7MVP8tg7Omaq"
652 |       }
653 |     },
654 |     {
655 |       "cell_type": "code",
656 |       "source": [
657 |         "# Define function to chain the above process\n",
658 |         "\n",
659 |         "def preprocess(image):\n",
660 |         "  s2_boa = func1(image)\n",
661 |         "  d1 = s2_boa.clip(aoi)\n",
662 |         "\n",
663 |         "  # export to asset\n",
664 |         "  fname = ee.String(d1.get('system:index')).getInfo()\n",
665 |         "  export = ee.batch.Export.image.toAsset(\n",
666 |         "      image = d1,\n",
667 |         "      description= 'S2_BOA_' + fname,\n",
668 |         "      assetId = 'users/khoyinivan/S2_Py6S_mask/' +'S2_BOA_' + fname, # Manually create image collection in GEE asset first\n",
669 |         "      region = aoi,\n",
670 |         "      scale = 10)\n",
671 |         "  export.start()\n",
672 |         "  print('exporting ' +fname + '--->done')\n",
673 |         "\n",
674 |         "  # find adjacent S2 tile\n",
675 |         "  d1_date = d1.date().format('yyyy-MM-dd')\n",
676 |         "  s2_left = s2_cloudless_left.filterDate(d1.date().advance(-1,'day').format('yyyy-MM-dd'), d1.date().advance(1,'day').format('yyyy-MM-dd')).first()\n",
677 |         "  s2_left_boa = func1(s2_left)\n",
678 |         "  d2 = s2_left_boa.clip(aoi)\n",
679 |         "\n",
680 |         "  # export to asset\n",
681 |         "  fname = ee.String(d2.get('system:index')).getInfo()\n",
682 |         "  export = ee.batch.Export.image.toAsset(\n",
683 |         "      image = d2,\n",
684 |         "      description= 'S2_BOA_' + fname,\n",
685 |         "      assetId = 'users/khoyinivan/S2_Py6S_mask/' +'S2_BOA_' + fname,\n",
686 |         "      region = aoi,\n",
687 |         "      scale = 10)\n",
688 |         "  export.start()\n",
689 |         "  print('exporting ' +fname + '--->done')"
690 |       ],
691 |       "metadata": {
692 |         "id": "rBmoyVhVO39i"
693 |       },
694 |       "execution_count": null,
695 |       "outputs": []
696 |     },
697 |     {
698 |       "cell_type": "code",
699 |       "metadata": {
700 |         "id": "WVIc6cbqvBQw"
701 |       },
702 |       "source": [
703 |         "# Run the preprocessing & export to asset\n",
704 |         "\n",
705 |         "aoi = ee.Geometry.Polygon([[[113.800, 22.570],[113.800, 22.120],[114.514, 22.120],[114.514, 22.570]]]) # Define output AOI\n",
706 |         "geom = aoi\n",
707 |         "dates = []\n",
708 |         "\n",
709 |         "s2_col = s2_cloudless_right\n",
710 |         "\n",
711 |         "col_length = s2_col.size().getInfo()\n",
712 |         "print(col_length)\n",
713 |         "\n",
714 |         "for i in range(0,col_length):\n",
715 |         "    print(i)\n",
716 |         "    s2_list = s2_col.toList(col_length)\n",
717 |         "    img = ee.Image(s2_list.get(i))\n",
718 |         "    d1_date_info = img.date().format('yyyy-MM-dd').getInfo()\n",
719 |         "    if d1_date_info in dates:\n",
720 |         "      continue\n",
721 |         "    dates.append(d1_date_info)\n",
722 |         "    preprocess(img)\n",
723 |         "\n",
724 |         "print(dates)\n"
725 |       ],
726 |       "execution_count": null,
727 |       "outputs": []
728 |     },
729 |     {
730 |       "cell_type": "markdown",
731 |       "source": [
732 |         "# Step 5 - Mosaic the Sentinel-2 tiles\n",
733 |         "\n",
734 |         "The study area is divided into 2 tiles & require mosaicking step"
735 |       ],
736 |       "metadata": {
737 |         "id": "f96ckM5MO8iB"
738 |       }
739 |     },
740 |     {
741 |       "cell_type": "code",
742 |       "metadata": {
743 |         "id": "7x6k9fBfKK0l"
744 |       },
745 |       "source": [
746 |         "# Mosaic the two tiles created above into one mosaic\n",
747 |         "\n",
748 |         "aoi = ee.Geometry.Polygon([[[113.800, 22.570],[113.800, 22.120],[114.514, 22.120],[114.514, 22.570]]])\n",
749 |         "\n",
750 |         "s2_boa_col = ee.ImageCollection(\"users/khoyinivan/S2_Py6S_mask\")\n",
751 |         "print(s2_boa_col.size().getInfo())\n",
752 |         "days = ee.Dictionary(s2_boa_col.aggregate_histogram('system:time_start')).keys().getInfo()\n",
753 |         "days = [datetime.fromtimestamp(float(s)/1000.0).strftime('%Y-%m-%d') for s in days]\n",
754 |         "days = list(dict.fromkeys(days))\n",
755 |         "print(len(days))\n",
756 |         "\n",
757 |         "for i in range(0,len(days)):\n",
758 |         "  print(i)\n",
759 |         "  print(days[i])\n",
760 |         "  d = ee.Date(days[i])\n",
761 |         "  t = s2_boa_col.filterDate(d,d.advance(1,'day'))\n",
762 |         "  f = ee.Image(t.first())\n",
763 |         "  t = t.mosaic().select(['B1','B2','B3','B4','B5','B6','B7','B8','B8A','B11','B12'])\n",
764 |         "  t = t.set('system:time_start',d.millis())\n",
765 |         "  t = t.copyProperties(f, f.propertyNames())\n",
766 |         "  t = ee.Image(t)\n",
767 |         "  fname = ee.String(t.get('system:index')).getInfo()\n",
768 |         "  export = ee.batch.Export.image.toAsset(\n",
769 |         "    image = t,\n",
770 |         "    description = fname,\n",
771 |         "    assetId = 'users/khoyinivan/S2_Py6S_mask_m/' + fname,\n",
772 |         "    region = aoi,\n",
773 |         "    scale = 10)\n",
774 |         "  export.start()\n",
775 |         "  print('exporting ' +fname + '--->done')"
776 |       ],
777 |       "execution_count": null,
778 |       "outputs": []
779 |     }
780 |   ]
781 | }


--------------------------------------------------------------------------------
/LocalProcessingPipeline_Part2_NewlyAcquiredImage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# Install and load libraries for image processing steps\n",
 10 |     "# pip install --user landsatxplore-master.zip\n",
 11 |     "from landsatxplore.api import API\n",
 12 |     "from landsatxplore.earthexplorer import EarthExplorer\n",
 13 |     "import os\n",
 14 |     "import shutil\n",
 15 |     "from datetime import date, datetime, timedelta\n",
 16 |     "import zipfile\n",
 17 |     "import tarfile \n",
 18 |     "import glob\n",
 19 |     "import numpy as np\n",
 20 |     "import pandas as pd\n",
 21 |     "from simpledbf import Dbf5\n",
 22 |     "import requests\n",
 23 |     "import arcpy\n",
 24 |     "from arcpy import env\n",
 25 |     "from arcpy.sa import *\n",
 26 |     "arcpy.CheckOutExtension(\"spatial\")"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 3,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "# Search and download Landsat satellite images\n",
 36 |     "def downloadlandsat(startdate, enddate):\n",
 37 |     "    # Initialize a new API instance and get an access key\n",
 38 |     "    username = \"username\"  # change your EarthExplorer username and password\n",
 39 |     "    password = \"password\"\n",
 40 |     "    api = API(username, password)\n",
 41 |     "    # 22.13,113.81,22.59,114.52\n",
 42 |     "    # https://github.com/yannforget/landsatxplore/blob/master/landsatxplore/api.py\n",
 43 |     "    # Search for Landsat TM scenes\n",
 44 |     "    scenes = api.search(\n",
 45 |     "        dataset='landsat_ot_c2_l1', bbox=(113.81, 22.13, 114.52, 22.59),\n",
 46 |     "        start_date=startdate,  # start_date='2014-01-01',\n",
 47 |     "        end_date=enddate,   # end_date='2015-12-31',\n",
 48 |     "        max_cloud_cover=20, max_results=1000\n",
 49 |     "    )\n",
 50 |     "    print(f\"{len(scenes)} Landsat scenes found.\")\n",
 51 |     "    print(scenes)\n",
 52 |     "    # Log out\n",
 53 |     "    api.logout()\n",
 54 |     "    # Downloading scenes\n",
 55 |     "    if len(scenes) > 0:\n",
 56 |     "        ee = EarthExplorer(username, password)\n",
 57 |     "        df = pd.read_csv(\"D:/WaterQuality/ImageInfo.csv\")\n",
 58 |     "        for s in scenes:\n",
 59 |     "            print(s['landsat_product_id'])\n",
 60 |     "            ee.download(s['entity_id'], output_dir='D:/WaterQuality/datadownload')\n",
 61 |     "            df.loc[len(df.index)] = [s['landsat_product_id'], s['start_time'].isoformat()]\n",
 62 |     "        df.to_csv(\"D:/WaterQuality/ImageInfo.csv\", index=False)\n",
 63 |     "        ee.logout()\n",
 64 |     "\n",
 65 |     "# Search and download Sentinel satellite images\n",
 66 |     "def downloadsentinel(startdate, enddate):\n",
 67 |     "    # WKT Representation of BBOX of AOI\n",
 68 |     "    ft = \"POLYGON((113.81 22.13, 114.52 22.13, 114.52 22.59, 113.81 22.59, 113.81 22.13))\" \n",
 69 |     "    data_collection = \"SENTINEL-2\"\n",
 70 |     "\n",
 71 |     "    def get_keycloak():\n",
 72 |     "        data = {\n",
 73 |     "            \"client_id\": \"cdse-public\",\n",
 74 |     "            \"username\": \"username\",  # change your copernicus dataspace username and password\n",
 75 |     "            \"password\": \"password\",\n",
 76 |     "            \"grant_type\": \"password\",\n",
 77 |     "        }\n",
 78 |     "        try:\n",
 79 |     "            r = requests.post(\n",
 80 |     "                \"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token\",\n",
 81 |     "                data=data,\n",
 82 |     "            )\n",
 83 |     "            r.raise_for_status()\n",
 84 |     "        except Exception as e:\n",
 85 |     "            raise Exception(\n",
 86 |     "                f\"Keycloak token creation failed. Reponse from the server was: {r.json()}\"\n",
 87 |     "            )\n",
 88 |     "        return r.json()[\"access_token\"]\n",
 89 |     "    \n",
 90 |     "    json_ = requests.get(  # cloud le 20\n",
 91 |     "        f\"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq '{data_collection}' and OData.CSC.Intersects(area=geography'SRID=4326;{ft}') and Attributes/OData.CSC.DoubleAttribute/any(att:att/Name eq 'cloudCover' and att/OData.CSC.DoubleAttribute/Value le 20.00) and ContentDate/Start gt {startdate}T00:00:00.000Z and ContentDate/Start lt {enddate}T23:59:00.000Z&$count=True&$top=1000\"\n",
 92 |     "    ).json()\n",
 93 |     "    p = pd.DataFrame.from_dict(json_[\"value\"]) # Fetch available dataset\n",
 94 |     "    print(f\" total Sentinel tiles found {len(p)}\")\n",
 95 |     "    if len(p)>0:\n",
 96 |     "        # Remove L2A and UTM50 dataset\n",
 97 |     "        p = p[~p[\"Name\"].str.contains(\"L2A\")] \n",
 98 |     "        p = p[~p[\"Name\"].str.contains(\"T50Q\")] \n",
 99 |     "        df = pd.read_csv(\"D:/WaterQuality/ImageInfo.csv\")\n",
100 |     "        for i in range(len(p)):\n",
101 |     "            url_id = p[\"Id\"].iloc[i]\n",
102 |     "            download_name = p[\"Name\"].iloc[i].split(\".\")[0]\n",
103 |     "            contentdate = p[\"ContentDate\"].iloc[i]\n",
104 |     "            print(\"Start download: \"+str(i+1)+\"/\"+str(len(p))+\"; \"+download_name)\n",
105 |     "            keycloak_token = get_keycloak()\n",
106 |     "            url = f\"https://zipper.dataspace.copernicus.eu/odata/v1/Products(\"+url_id+\")/$value\"\n",
107 |     "            headers = {\"Authorization\": f\"Bearer {keycloak_token}\"}\n",
108 |     "            session = requests.Session()\n",
109 |     "            session.headers.update(headers)\n",
110 |     "            response = session.get(url, headers=headers, stream=True)\n",
111 |     "            with open(\"D:/WaterQuality/datadownload/\"+download_name+\".zip\", \"wb\") as file:\n",
112 |     "                for chunk in response.iter_content(chunk_size=8192):\n",
113 |     "                    if chunk:\n",
114 |     "                        file.write(chunk)\n",
115 |     "            df.loc[len(df.index)] = [download_name, contentdate[\"Start\"]]\n",
116 |     "            print(\"Finish download: \"+download_name)\n",
117 |     "        df.to_csv(\"D:/WaterQuality/ImageInfo.csv\", index=False)\n",
118 |     "\n",
119 |     "# Function to preprocess a single Landsat image\n",
120 |     "def preprocessLandsat(tar):\n",
121 |     "    # extract tar\n",
122 |     "    datadir = 'D:/WaterQuality/datadownload'\n",
123 |     "    os.chdir(datadir)\n",
124 |     "    file = tarfile.open(tar)\n",
125 |     "    file.extractall('extract')\n",
126 |     "    file.close()\n",
127 |     "    # run acolite\n",
128 |     "    acolitepath = \"D:/WaterQuality/acolite/acolite_py_win/dist/acolite/acolite.exe\"\n",
129 |     "    settingpath = \"D:/WaterQuality/acolite/setting_landsat.txt\"\n",
130 |     "    os.system(acolitepath+\" --cli --settings=\"+settingpath)\n",
131 |     "    def merge_and_mask():\n",
132 |     "        # merge 7 bands\n",
133 |     "        os.chdir('atmocor')\n",
134 |     "        tiflist = glob.glob('*L2R_rhos_*.tif')\n",
135 |     "        bandorder = [2, 3, 4, 7, 8, 0, 1]\n",
136 |     "        tiflist = [tiflist[i] for i in bandorder]\n",
137 |     "        env.workspace = 'D:/WaterQuality/datadownload/atmocor'\n",
138 |     "        arcpy.CompositeBands_management(tiflist, \"compbands.tif\")\n",
139 |     "        # mask land and cloud\n",
140 |     "        ras = Raster(\"compbands.tif\")\n",
141 |     "        qaband = Raster(glob.glob(datadir+'/extract/*QA_PIXEL.TIF')[0])\n",
142 |     "        qaband_m = SetNull(qaband>22200,1)\n",
143 |     "        qaband_m = FocalStatistics(qaband_m, NbrCircle(3,\"CELL\"), \"MEAN\", \"NODATA\") # expand radius 3\n",
144 |     "        ras_m = ExtractByMask(ras, qaband_m)\n",
145 |     "        swir = Raster(\"compbands.tif\\Band_6\")\n",
146 |     "        green = Raster(\"compbands.tif\\Band_3\")\n",
147 |     "        nir = Raster(\"compbands.tif\\Band_5\")\n",
148 |     "        red = Raster(\"compbands.tif\\Band_4\")\n",
149 |     "        ndvi1 = arcpy.sa.Float((red-nir)/(red+nir))\n",
150 |     "        ndvi1_m = SetNull(ndvi1<0,1)\n",
151 |     "        ndwi = arcpy.sa.Float((green-swir)/(green+swir))\n",
152 |     "        ndwi_m = SetNull(ndwi<0,1)\n",
153 |     "        swir_m = SetNull(swir>0.15,1)\n",
154 |     "        ras_m = ExtractByMask(ras_m, ndvi1_m)\n",
155 |     "        ras_m = ExtractByMask(ras_m, ndwi_m)\n",
156 |     "        ras_m = ExtractByMask(ras_m, swir_m)\n",
157 |     "        # reproject\n",
158 |     "        aoi = \"D:/WaterQuality/aoi/aoi.shp\"\n",
159 |     "        outfilename = \"D:/WaterQuality/reflectance/\"+tar.replace(\".tar\",\".tif\")\n",
160 |     "        arcpy.management.ProjectRaster(ras_m, \"compbands_p.tif\", aoi)            \n",
161 |     "        arcpy.management.Clip(\"compbands_p.tif\", aoi, \"compbands_p_c.tif\",                                \n",
162 |     "                            \"#\", \"#\", \"NONE\",\"MAINTAIN_EXTENT\")\n",
163 |     "        arcpy.management.Resample(\"compbands_p_c.tif\", outfilename, 0.00028571429)\n",
164 |     "    merge_and_mask()\n",
165 |     "    # empty extract and atmocor\n",
166 |     "    def emptyfolder(folder):\n",
167 |     "        for filename in os.listdir(folder):\n",
168 |     "            file_path = os.path.join(folder, filename)    \n",
169 |     "            if os.path.isfile(file_path) or os.path.islink(file_path):\n",
170 |     "                os.unlink(file_path)\n",
171 |     "    emptyfolder(\"D:/WaterQuality/datadownload/extract\")\n",
172 |     "    emptyfolder(\"D:/WaterQuality/datadownload/atmocor\")\n",
173 |     "    # delete tarfile\n",
174 |     "    os.chdir(datadir)\n",
175 |     "    os.unlink(tar)\n",
176 |     "\n",
177 |     "# Function to preprocess a single Sentinel image\n",
178 |     "def preprocessSentinel(zipf):\n",
179 |     "    datadir = 'D:/WaterQuality/datadownload'\n",
180 |     "    os.chdir(datadir)\n",
181 |     "    with zipfile.ZipFile(zipf, 'r') as zip_ref:\n",
182 |     "        zip_ref.extractall()\n",
183 |     "    safefolder = glob.glob('*.SAFE')[0]\n",
184 |     "    # run acolite\n",
185 |     "    settingtemp = \"D:/WaterQuality/acolite/setting_sentinel.txt\"\n",
186 |     "    settingpath = \"D:/WaterQuality/acolite/setting_sentinel2.txt\"\n",
187 |     "    # Read in the file\n",
188 |     "    with open(settingtemp, 'r') as file:\n",
189 |     "        filedata = file.read()\n",
190 |     "        filedata = filedata.replace('inputfile=', 'inputfile='+os.path.join(datadir,safefolder))\n",
191 |     "    # Write the file out again\n",
192 |     "    with open(settingpath, 'w') as file:\n",
193 |     "        file.write(filedata)\n",
194 |     "    acolitepath = \"D:/WaterQuality/acolite/acolite_py_win/dist/acolite/acolite.exe\"\n",
195 |     "    os.system(acolitepath+\" --cli --settings=\"+settingpath)\n",
196 |     "    def merge_and_mask():\n",
197 |     "        # merge 7 bands\n",
198 |     "        os.chdir('atmocor')\n",
199 |     "        tiflist = glob.glob('*L2R_rhos_*.tif')\n",
200 |     "        if len(tiflist)==0: # if acolite does not produce any files\n",
201 |     "            return\n",
202 |     "        bandorder = [2, 3, 4, 5, 10, 0, 1]\n",
203 |     "        tiflist = [tiflist[i] for i in bandorder]\n",
204 |     "        env.workspace = 'D:/WaterQuality/datadownload/atmocor'\n",
205 |     "        arcpy.CompositeBands_management(tiflist, \"compbands.tif\")\n",
206 |     "        arcpy.management.Resample(\"compbands.tif\", \"compbands_r.tif\", 30)\n",
207 |     "        # mask land and cloud\n",
208 |     "        ras = Raster(\"compbands_r.tif\")\n",
209 |     "        swir = Raster(\"compbands_r.tif\\Band_6\")\n",
210 |     "        green = Raster(\"compbands_r.tif\\Band_3\")\n",
211 |     "        nir = Raster(\"compbands_r.tif\\Band_5\")\n",
212 |     "        red = Raster(\"compbands_r.tif\\Band_4\")\n",
213 |     "        cloud_m = SetNull((red>0.2)&(nir>0.2),1)\n",
214 |     "        cloud_m = FocalStatistics(cloud_m, NbrCircle(3,\"CELL\"), \"MEAN\", \"NODATA\") # expand radius 3\n",
215 |     "        ndvi1 = arcpy.sa.Float((red-nir)/(red+nir))\n",
216 |     "        ndvi1_m = SetNull(ndvi1<0,1)\n",
217 |     "        ndwi2 = arcpy.sa.Float((green-swir)/(green+swir))\n",
218 |     "        ndwi2_m = SetNull(ndwi2<0,1)\n",
219 |     "        swir_m = SetNull(swir>0.15,1)\n",
220 |     "        nir_m = SetNull((nir>0.03)&(red>0.08)&(ndwi2_m==1)&(swir_m==1)&(cloud_m==1),1) # remaining haze\n",
221 |     "        nir_m = FocalStatistics(nir_m, NbrCircle(1,\"CELL\"), \"MEAN\", \"NODATA\") # expand radius 1\n",
222 |     "        ras_m = ExtractByMask(ras, cloud_m)\n",
223 |     "        ras_m = ExtractByMask(ras_m, ndvi1_m)\n",
224 |     "        ras_m = ExtractByMask(ras_m, ndwi2_m)\n",
225 |     "        ras_m = ExtractByMask(ras_m, swir_m)\n",
226 |     "        ras_m = ExtractByMask(ras_m, nir_m)\n",
227 |     "        # reproject\n",
228 |     "        aoi = \"D:/WaterQuality/aoi/aoi.shp\"\n",
229 |     "        outfilename = \"D:/WaterQuality/reflectance/\"+safefolder.replace(\".SAFE\",\".tif\")\n",
230 |     "        arcpy.management.ProjectRaster(ras_m, \"compbands_p.tif\", aoi)\n",
231 |     "        arcpy.management.Clip(\"compbands_p.tif\", aoi, \"compbands_p_c.tif\",                                \n",
232 |     "                            \"#\", \"#\", \"NONE\",\"MAINTAIN_EXTENT\")\n",
233 |     "        arcpy.management.Resample(\"compbands_p_c.tif\", outfilename, 0.00028571429)\n",
234 |     "    merge_and_mask()\n",
235 |     "    # empty extract and atmocor\n",
236 |     "    def emptyfolder(folder):\n",
237 |     "        for filename in os.listdir(folder):\n",
238 |     "            file_path = os.path.join(folder, filename)    \n",
239 |     "            if os.path.isfile(file_path) or os.path.islink(file_path):\n",
240 |     "                os.unlink(file_path)\n",
241 |     "    emptyfolder(\"D:/WaterQuality/datadownload/atmocor\")\n",
242 |     "    # delete whole safefolder\n",
243 |     "    os.chdir(datadir)\n",
244 |     "    shutil.rmtree(safefolder)\n",
245 |     "    os.unlink(zipf)\n",
246 |     "\n",
247 |     "# Function to preprocess all Landsat images\n",
248 |     "def preprocessLandsat_all():\n",
249 |     "    datadir = 'D:/WaterQuality/datadownload'\n",
250 |     "    os.chdir(datadir)\n",
251 |     "    tarlist = glob.glob('*.tar')\n",
252 |     "    if len(tarlist)>0:\n",
253 |     "        for tar in tarlist:\n",
254 |     "            preprocessLandsat(tar)\n",
255 |     "\n",
256 |     "# Function to preprocess all Sentinel images\n",
257 |     "def preprocessSentinel_all():\n",
258 |     "    datadir = 'D:/WaterQuality/datadownload'\n",
259 |     "    os.chdir(datadir)\n",
260 |     "    ziplist = glob.glob('*.zip')\n",
261 |     "    if len(ziplist)>0:\n",
262 |     "        for zipf in ziplist:\n",
263 |     "            preprocessSentinel(zipf)\n",
264 |     "\n",
265 |     "# Function to get dates in each month\n",
266 |     "def monthstart(year, month):\n",
267 |     "    from datetime import date, datetime, timedelta\n",
268 |     "    first_date = datetime(year, month, 1)\n",
269 |     "    return first_date.strftime(\"%Y-%m-%d\")\n",
270 |     "def monthmid1(year, month):\n",
271 |     "    from datetime import date, datetime, timedelta\n",
272 |     "    mid_date = datetime(year, month, 15)\n",
273 |     "    return mid_date.strftime(\"%Y-%m-%d\")\n",
274 |     "def monthmid2(year, month):\n",
275 |     "    from datetime import date, datetime, timedelta\n",
276 |     "    mid_date = datetime(year, month, 16)\n",
277 |     "    return mid_date.strftime(\"%Y-%m-%d\")\n",
278 |     "def monthend(year, month):\n",
279 |     "    from datetime import date, datetime, timedelta\n",
280 |     "    if month == 12:\n",
281 |     "        last_date = datetime(year, month, 31)\n",
282 |     "    else:\n",
283 |     "        last_date = datetime(year, month + 1, 1) + timedelta(days=-1)\n",
284 |     "    return last_date.strftime(\"%Y-%m-%d\")\n",
285 |     "\n",
286 |     "# Remove Tier 2 Landsat imagery\n",
287 |     "def removeLandsatT2():\n",
288 |     "    os.chdir(\"D:/WaterQuality/reflectance\")\n",
289 |     "    Tier2list = glob.glob('LC*T2.*')\n",
290 |     "    if len(Tier2list)>0:\n",
291 |     "        for T2file in Tier2list:\n",
292 |     "            os.unlink(T2file)\n",
293 |     "\n",
294 |     "# Rename all Landsat imagery\n",
295 |     "def renameLandsat_all():\n",
296 |     "    os.chdir(\"D:/WaterQuality/reflectance\")\n",
297 |     "    Landsatlist = glob.glob('LC*')\n",
298 |     "    for Landsatfile in Landsatlist:\n",
299 |     "        nfilename = Landsatfile[0:25]+Landsatfile[40:] # first 25 characters & from 40 to end\n",
300 |     "        os.rename(Landsatfile, nfilename)\n",
301 |     "\n",
302 |     "# Rename all Sentinel imagery\n",
303 |     "def renameSentinel_all():\n",
304 |     "    os.chdir(\"D:/WaterQuality/reflectance\")\n",
305 |     "    Sentinellist = glob.glob('S2*')\n",
306 |     "    for Sentinelfile in Sentinellist:\n",
307 |     "        nfilename = Sentinelfile[0:19]+Sentinelfile[37:44]+Sentinelfile[60:]\n",
308 |     "        if os.path.isfile(nfilename) == True:\n",
309 |     "            nfilename = Sentinelfile[0:19]+Sentinelfile[37:44]+'a'+Sentinelfile[60:]\n",
310 |     "        os.rename(Sentinelfile, nfilename)\n",
311 |     "\n",
312 |     "# Mosaic tiles acquired on the same day\n",
313 |     "def mosaictiles(): \n",
314 |     "    os.chdir(\"D:/WaterQuality/reflectance\")\n",
315 |     "    env.workspace = \"D:/WaterQuality/reflectance\"\n",
316 |     "    Landsatlist = glob.glob('LC*')\n",
317 |     "    Landsatdatelist = [i[17:25] for i in Landsatlist]\n",
318 |     "    Sentinellist = glob.glob('S2*')\n",
319 |     "    Sentineldatelist = [i[11:19] for i in Sentinellist]\n",
320 |     "    datelist = sorted(list(set(Landsatdatelist+Sentineldatelist))) # get unique date\n",
321 |     "    imglist = glob.glob('*.tif')\n",
322 |     "    for d in datelist:\n",
323 |     "        img_match = [img for img in imglist if d in img]\n",
324 |     "        outfolder = \"D:/WaterQuality/preprocess_finish\"\n",
325 |     "        outfilename = \"LandsatSentinel_\"+d+\".tif\"\n",
326 |     "        if len(img_match)==1:\n",
327 |     "            arcpy.management.CopyRaster(img_match[0], os.path.join(outfolder, outfilename))\n",
328 |     "            arcpy.management.Delete(img_match[0]) # delete original file in reflectance folder\n",
329 |     "        if len(img_match)>1:\n",
330 |     "            arcpy.MosaicToNewRaster_management(img_match,outfolder,outfilename,\"\",\"32_BIT_FLOAT\",\"\",\"7\",\"MEAN\",\"\")\n",
331 |     "            arcpy.management.Delete(img_match) # delete original file in reflectance folder\n",
332 |     "        # deleteimage_lowvalid\n",
333 |     "        img1 = os.path.join(outfolder, outfilename)\n",
334 |     "        ras_np = arcpy.RasterToNumPyArray(img1,\"\",\"\",\"\",-9999)[0]\n",
335 |     "        if (ras_np != -9999).sum() < (2390000*0.1): # largest valid count = 2390000\n",
336 |     "            arcpy.management.Delete(img1)\n",
337 |     "\n",
338 |     "# Connect all functions\n",
339 |     "def download_preprocess_allimagery(startdate, enddate): # From search download to mosaic\n",
340 |     "    print(\"Start download Landsat\")\n",
341 |     "    downloadlandsat(startdate, enddate)\n",
342 |     "    print(\"Start download Sentinel\")\n",
343 |     "    downloadsentinel(startdate, enddate)\n",
344 |     "    print(\"Start preprocess Landsat\")\n",
345 |     "    preprocessLandsat_all()\n",
346 |     "    print(\"Start preprocess Sentinel\")\n",
347 |     "    preprocessSentinel_all()\n",
348 |     "    removeLandsatT2()\n",
349 |     "    renameLandsat_all()\n",
350 |     "    renameSentinel_all()\n",
351 |     "    print(\"Start mosaic\")\n",
352 |     "    mosaictiles()"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": null,
358 |    "metadata": {},
359 |    "outputs": [],
360 |    "source": [
361 |     "# Run the function from search download to mosaic\n",
362 |     "# download_preprocess_allimagery(startdate, enddate)\n",
363 |     "download_preprocess_allimagery(\"2024-07-10\", \"2024-07-10\")"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": 14,
369 |    "metadata": {},
370 |    "outputs": [],
371 |    "source": [
372 |     "# predict Chla\n",
373 |     "def predictChla(imgname): # name with full path\n",
374 |     "\toutfolder = \"D:/WaterQuality/predict\"\n",
375 |     "\tos.chdir(outfolder)\n",
376 |     "\tenv.workspace = outfolder\n",
377 |     "\t# ANN layers: 14, 6, 3, 1\n",
378 |     "\tp = pd.read_json(\"D:/WaterQuality/extract/Chla_modelweight.json\", typ=\"series\")\n",
379 |     "\t# Chla ['NR_B2B4', 'NR_B2B6', 'NR_B3B6', 'NR_B3B4', 'TB_B1B2B3', \n",
380 |     "\t# 'TB_B4B5B6', 'B2_3', 'B5', 'B4_2', 'B3_3', 'TB_B2B3B4', 'B6_3', 'NR_B1B6', 'B6_2']\n",
381 |     "\toutname = imgname.replace(\"preprocess_finish\", \"predict\").replace(\"LandsatSentinel_20\", \"Chla_20\")\n",
382 |     "\tb1 = Raster(imgname+\"/Band_1\")*10\n",
383 |     "\tb2 = Raster(imgname+\"/Band_2\")*10\n",
384 |     "\tb3 = Raster(imgname+\"/Band_3\")*10\n",
385 |     "\tb4 = Raster(imgname+\"/Band_4\")*10\n",
386 |     "\tb5 = Raster(imgname+\"/Band_5\")*10\n",
387 |     "\tb6 = Raster(imgname+\"/Band_6\")*10\n",
388 |     "\tras_0 = b1*0\n",
389 |     "\tras_neg1 = ras_0 - 1\n",
390 |     "\tras_1 = ras_0 + 1\n",
391 |     "\tv1 = CellStatistics([CellStatistics([(b2-b4)/(b2+b4),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
392 |     "\tv2 = CellStatistics([CellStatistics([(b2-b6)/(b2+b6),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
393 |     "\tv3 = CellStatistics([CellStatistics([(b3-b6)/(b3+b6),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
394 |     "\tv4 = CellStatistics([CellStatistics([(b3-b4)/(b3+b4),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
395 |     "\tv5 = CellStatistics([CellStatistics([(((1/b1)-(1/b2))*b3),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
396 |     "\tv6 = CellStatistics([CellStatistics([(((1/b4)-(1/b5))*b6),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
397 |     "\tv7 = b2 ** 3\n",
398 |     "\tv8 = b5\n",
399 |     "\tv9 = b4 ** 2\n",
400 |     "\tv10 = b3 ** 3\n",
401 |     "\tv11 = CellStatistics([CellStatistics([(((1/b2)-(1/b3))*b4),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
402 |     "\tv12 = b6 ** 3\n",
403 |     "\tv13 = CellStatistics([CellStatistics([(b1-b6)/(b1+b6),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
404 |     "\tv14 = b6 ** 2\n",
405 |     "\n",
406 |     "\th1n1 = (Exp((p[1][0]+v1*p[0][0][0]+v2*p[0][1][0]+v3*p[0][2][0]+v4*p[0][3][0]+v5*p[0][4][0]+\n",
407 |     "\t\t\tv6*p[0][5][0]+v7*p[0][6][0]+v8*p[0][7][0]+v9*p[0][8][0]+v10*p[0][9][0]+\n",
408 |     "\t\t\tv11*p[0][10][0]+v12*p[0][11][0]+v13*p[0][12][0]+v14*p[0][13][0])*(-1))+1)**-1\n",
409 |     "\th1n2 = (Exp((p[1][1]+v1*p[0][0][1]+v2*p[0][1][1]+v3*p[0][2][1]+v4*p[0][3][1]+v5*p[0][4][1]+\n",
410 |     "\t\t\tv6*p[0][5][1]+v7*p[0][6][1]+v8*p[0][7][1]+v9*p[0][8][1]+v10*p[0][9][1]+\n",
411 |     "\t\t\tv11*p[0][10][1]+v12*p[0][11][1]+v13*p[0][12][1]+v14*p[0][13][1])*(-1))+1)**-1\n",
412 |     "\th1n3 = (Exp((p[1][2]+v1*p[0][0][2]+v2*p[0][1][2]+v3*p[0][2][2]+v4*p[0][3][2]+v5*p[0][4][2]+\n",
413 |     "\t\t\tv6*p[0][5][2]+v7*p[0][6][2]+v8*p[0][7][2]+v9*p[0][8][2]+v10*p[0][9][2]+\n",
414 |     "\t\t\tv11*p[0][10][2]+v12*p[0][11][2]+v13*p[0][12][2]+v14*p[0][13][2])*(-1))+1)**-1\n",
415 |     "\th1n4 = (Exp((p[1][3]+v1*p[0][0][3]+v2*p[0][1][3]+v3*p[0][2][3]+v4*p[0][3][3]+v5*p[0][4][3]+\n",
416 |     "\t\t\tv6*p[0][5][3]+v7*p[0][6][3]+v8*p[0][7][3]+v9*p[0][8][3]+v10*p[0][9][3]+\n",
417 |     "\t\t\tv11*p[0][10][3]+v12*p[0][11][3]+v13*p[0][12][3]+v14*p[0][13][3])*(-1))+1)**-1\n",
418 |     "\th1n5 = (Exp((p[1][4]+v1*p[0][0][4]+v2*p[0][1][4]+v3*p[0][2][4]+v4*p[0][3][4]+v5*p[0][4][4]+\n",
419 |     "\t\t\tv6*p[0][5][4]+v7*p[0][6][4]+v8*p[0][7][4]+v9*p[0][8][4]+v10*p[0][9][4]+\n",
420 |     "\t\t\tv11*p[0][10][4]+v12*p[0][11][4]+v13*p[0][12][4]+v14*p[0][13][4])*(-1))+1)**-1\n",
421 |     "\th1n6 = (Exp((p[1][5]+v1*p[0][0][5]+v2*p[0][1][5]+v3*p[0][2][5]+v4*p[0][3][5]+v5*p[0][4][5]+\n",
422 |     "\t\t\tv6*p[0][5][5]+v7*p[0][6][5]+v8*p[0][7][5]+v9*p[0][8][5]+v10*p[0][9][5]+\n",
423 |     "\t\t\tv11*p[0][10][5]+v12*p[0][11][5]+v13*p[0][12][5]+v14*p[0][13][5])*(-1))+1)**-1\n",
424 |     "\n",
425 |     "\th2n1 = (Exp((p[3][0]+h1n1*p[2][0][0]+h1n2*p[2][1][0]+h1n3*p[2][2][0]+\n",
426 |     "\t\t\th1n4*p[2][3][0]+h1n5*p[2][4][0]+h1n6*p[2][5][0])*(-1))+1)**-1\n",
427 |     "\th2n2 = (Exp((p[3][1]+h1n1*p[2][0][1]+h1n2*p[2][1][1]+h1n3*p[2][2][1]+\n",
428 |     "\t\t\th1n4*p[2][3][1]+h1n5*p[2][4][1]+h1n6*p[2][5][1])*(-1))+1)**-1\n",
429 |     "\th2n3 = (Exp((p[3][2]+h1n1*p[2][0][2]+h1n2*p[2][1][2]+h1n3*p[2][2][2]+\n",
430 |     "\t\t\th1n4*p[2][3][2]+h1n5*p[2][4][2]+h1n6*p[2][5][2])*(-1))+1)**-1\n",
431 |     "\n",
432 |     "\tpred = CellStatistics([p[5][0]+h2n1*p[4][0][0]+h2n2*p[4][1][0]+h2n3*p[4][2][0],ras_0], \"MAXIMUM\")\n",
433 |     "\tarcpy.management.CopyRaster(pred, outname)\n",
434 |     "\n",
435 |     "# predict SS\n",
436 |     "def predictSS(imgname):\n",
437 |     "\toutfolder = \"D:/WaterQuality/predict\"\n",
438 |     "\tos.chdir(outfolder)\n",
439 |     "\tenv.workspace = outfolder\n",
440 |     "\t# ANN layers: 9, 6, 3, 1\n",
441 |     "\tp = pd.read_json(\"D:/WaterQuality/extract/SS_modelweight.json\", typ=\"series\")\n",
442 |     "\t# SS ['TB_B2B3B4', 'LH_B4B5B6', 'B3_3', 'B4_2', 'LH_B5B6B7', 'TB_B3B4B5', 'NR_B5B6', 'NR_B1B4', 'B2_3']\n",
443 |     "\toutname = imgname.replace(\"preprocess_finish\", \"predict\").replace(\"LandsatSentinel_20\", \"SuSo_20\")\n",
444 |     "\tb1 = Raster(imgname+\"/Band_1\")*10\n",
445 |     "\tb2 = Raster(imgname+\"/Band_2\")*10\n",
446 |     "\tb3 = Raster(imgname+\"/Band_3\")*10\n",
447 |     "\tb4 = Raster(imgname+\"/Band_4\")*10\n",
448 |     "\tb5 = Raster(imgname+\"/Band_5\")*10\n",
449 |     "\tb6 = Raster(imgname+\"/Band_6\")*10\n",
450 |     "\tb7 = Raster(imgname+\"/Band_7\")*10\n",
451 |     "\tras_0 = b1*0\n",
452 |     "\tras_neg1 = ras_0 - 1\n",
453 |     "\tras_1 = ras_0 + 1\n",
454 |     "\tv1 = CellStatistics([CellStatistics([(((1/b2)-(1/b3))*b4),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
455 |     "\tv2 = b5-b4-((b6-b4)*((865-660)/(1610-660)))\n",
456 |     "\tv3 = b3 ** 3\n",
457 |     "\tv4 = b4 ** 2\n",
458 |     "\tv5 = b6-b5-((b7-b5)*((1610-865)/(2195-865)))\n",
459 |     "\tv6 = CellStatistics([CellStatistics([(((1/b3)-(1/b4))*b5),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
460 |     "\tv7 = CellStatistics([CellStatistics([(b5-b6)/(b5+b6),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
461 |     "\tv8 = CellStatistics([CellStatistics([(b1-b4)/(b1+b4),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
462 |     "\tv9 = b2 ** 3\n",
463 |     "\n",
464 |     "\th1n1 = (Exp((p[1][0]+v1*p[0][0][0]+v2*p[0][1][0]+v3*p[0][2][0]+v4*p[0][3][0]+v5*p[0][4][0]+\n",
465 |     "\t\t\tv6*p[0][5][0]+v7*p[0][6][0]+v8*p[0][7][0]+v9*p[0][8][0])*(-1))+1)**-1\n",
466 |     "\th1n2 = (Exp((p[1][1]+v1*p[0][0][1]+v2*p[0][1][1]+v3*p[0][2][1]+v4*p[0][3][1]+v5*p[0][4][1]+\n",
467 |     "\t\t\tv6*p[0][5][1]+v7*p[0][6][1]+v8*p[0][7][1]+v9*p[0][8][1])*(-1))+1)**-1\n",
468 |     "\th1n3 = (Exp((p[1][2]+v1*p[0][0][2]+v2*p[0][1][2]+v3*p[0][2][2]+v4*p[0][3][2]+v5*p[0][4][2]+\n",
469 |     "\t\t\tv6*p[0][5][2]+v7*p[0][6][2]+v8*p[0][7][2]+v9*p[0][8][2])*(-1))+1)**-1\n",
470 |     "\th1n4 = (Exp((p[1][3]+v1*p[0][0][3]+v2*p[0][1][3]+v3*p[0][2][3]+v4*p[0][3][3]+v5*p[0][4][3]+\n",
471 |     "\t\t\tv6*p[0][5][3]+v7*p[0][6][3]+v8*p[0][7][3]+v9*p[0][8][3])*(-1))+1)**-1\n",
472 |     "\th1n5 = (Exp((p[1][4]+v1*p[0][0][4]+v2*p[0][1][4]+v3*p[0][2][4]+v4*p[0][3][4]+v5*p[0][4][4]+\n",
473 |     "\t\t\tv6*p[0][5][4]+v7*p[0][6][4]+v8*p[0][7][4]+v9*p[0][8][4])*(-1))+1)**-1\n",
474 |     "\th1n6 = (Exp((p[1][5]+v1*p[0][0][5]+v2*p[0][1][5]+v3*p[0][2][5]+v4*p[0][3][5]+v5*p[0][4][5]+\n",
475 |     "\t\t\tv6*p[0][5][5]+v7*p[0][6][5]+v8*p[0][7][5]+v9*p[0][8][5])*(-1))+1)**-1\n",
476 |     "\n",
477 |     "\th2n1 = (Exp((p[3][0]+h1n1*p[2][0][0]+h1n2*p[2][1][0]+h1n3*p[2][2][0]+\n",
478 |     "\t\t\th1n4*p[2][3][0]+h1n5*p[2][4][0]+h1n6*p[2][5][0])*(-1))+1)**-1\n",
479 |     "\th2n2 = (Exp((p[3][1]+h1n1*p[2][0][1]+h1n2*p[2][1][1]+h1n3*p[2][2][1]+\n",
480 |     "\t\t\th1n4*p[2][3][1]+h1n5*p[2][4][1]+h1n6*p[2][5][1])*(-1))+1)**-1\n",
481 |     "\th2n3 = (Exp((p[3][2]+h1n1*p[2][0][2]+h1n2*p[2][1][2]+h1n3*p[2][2][2]+\n",
482 |     "\t\t\th1n4*p[2][3][2]+h1n5*p[2][4][2]+h1n6*p[2][5][2])*(-1))+1)**-1\n",
483 |     "\n",
484 |     "\tpred = CellStatistics([p[5][0]+h2n1*p[4][0][0]+h2n2*p[4][1][0]+h2n3*p[4][2][0],ras_0], \"MAXIMUM\")\n",
485 |     "\tarcpy.management.CopyRaster(pred, outname)\n",
486 |     "\n",
487 |     "def predict_ChlaSS_all():\n",
488 |     "    imglist = glob.glob('D:/WaterQuality/preprocess_finish/*.tif')\n",
489 |     "    for i in imglist:\n",
490 |     "        predictChla(i)\n",
491 |     "        predictSS(i)\n",
492 |     "        # move img to finish folder\n",
493 |     "        imgname_move = i.replace(\"preprocess_finish\", \"preprocess_finish/finish\")\n",
494 |     "        arcpy.management.CopyRaster(i, imgname_move)\n",
495 |     "        arcpy.management.Delete(i)\n",
496 |     "\n",
497 |     "# update predicted latestimg\n",
498 |     "def update_latestimg():\n",
499 |     "    aoi_water = \"D:/WaterQuality/aoi/aoi_water.shp\"\n",
500 |     "    # Chla\n",
501 |     "    oldimg = glob.glob(\"D:/WaterQuality/predict_display/merge_Chla_*.tif\")[0]\n",
502 |     "    chlalist = glob.glob(\"D:/WaterQuality/predict/Chla_*.tif\")\n",
503 |     "    latestimg = chlalist[len(chlalist)-1]\n",
504 |     "    latestimg = latestimg[len(latestimg)-17:len(latestimg)]\n",
505 |     "    mergeras = arcpy.management.MosaicToNewRaster([oldimg]+chlalist, \"D:/WaterQuality/predict_display\", \"merge_\"+latestimg, \"\", \"32_BIT_FLOAT\", \"\", 1, \"LAST\")\n",
506 |     "    mergeras_focal = FocalStatistics(mergeras, NbrRectangle(3,3,\"CELL\"), \"MEDIAN\")\n",
507 |     "    outname = (\"D:/WaterQuality/predict_display/merge_\"+latestimg).replace(\".tif\", \"_smoothclip.tif\")\n",
508 |     "    arcpy.management.Clip(mergeras_focal, \"\", outname, aoi_water, \"\", \"ClippingGeometry\")\n",
509 |     "    arcpy.management.Delete(oldimg)\n",
510 |     "    arcpy.management.Delete(oldimg.replace(\".tif\", \"_smoothclip.tif\"))\n",
511 |     "    # SS\n",
512 |     "    oldimg = glob.glob(\"D:/WaterQuality/predict_display/merge_SuSo_*.tif\")[0]\n",
513 |     "    sslist = glob.glob(\"D:/WaterQuality/predict/SuSo_*.tif\")\n",
514 |     "    latestimg = sslist[len(sslist)-1]\n",
515 |     "    latestimg = latestimg[len(latestimg)-17:len(latestimg)]\n",
516 |     "    mergeras = arcpy.management.MosaicToNewRaster([oldimg]+sslist, \"D:/WaterQuality/predict_display\", \"merge_\"+latestimg, \"\", \"32_BIT_FLOAT\", \"\", 1, \"LAST\")\n",
517 |     "    mergeras_focal = FocalStatistics(mergeras, NbrRectangle(3,3,\"CELL\"), \"MEDIAN\")\n",
518 |     "    outname = (\"D:/WaterQuality/predict_display/merge_\"+latestimg).replace(\".tif\", \"_smoothclip.tif\")\n",
519 |     "    arcpy.management.Clip(mergeras_focal, \"\", outname, aoi_water, \"\", \"ClippingGeometry\")\n",
520 |     "    arcpy.management.Delete(oldimg)\n",
521 |     "    arcpy.management.Delete(oldimg.replace(\".tif\", \"_smoothclip.tif\"))\n",
522 |     "\n",
523 |     "# update datapoint to latest date\n",
524 |     "def update_datapoint():\n",
525 |     "    datapoint = \"D:/WaterQuality/ArcGISPro/DataPoint.shp\"\n",
526 |     "    datapoint_d1 = \"D:/WaterQuality/ArcGISPro/DataPoint_d1.shp\"\n",
527 |     "    datapoint_d2 = \"D:/WaterQuality/ArcGISPro/DataPoint_d2.shp\"\n",
528 |     "    datapoint_all = \"D:/WaterQuality/ArcGISPro/DataPoint_all.shp\"\n",
529 |     "    chlalist = glob.glob(\"D:/WaterQuality/predict/Chla_*.tif\")\n",
530 |     "    sslist = glob.glob(\"D:/WaterQuality/predict/SuSo_*.tif\")\n",
531 |     "    for i in range(0,len(chlalist)):\n",
532 |     "        chla_d1 = chlalist[i]\n",
533 |     "        ss_d1 = sslist[i]\n",
534 |     "        arcpy.management.Copy(datapoint, datapoint_d1)\n",
535 |     "        arcpy.management.Copy(datapoint, datapoint_d2)\n",
536 |     "        arcpy.sa.ExtractMultiValuesToPoints(datapoint_d1, chla_d1+\" value\", \"BILINEAR\")\n",
537 |     "        arcpy.sa.ExtractMultiValuesToPoints(datapoint_d2, ss_d1+\" value\", \"BILINEAR\")\n",
538 |     "        # remove points with no data, else add date and extract list of valid points\n",
539 |     "        newdatapt = []\n",
540 |     "        d1 = int(chla_d1[len(chla_d1)-12:len(chla_d1)-4])\n",
541 |     "        d1_month = round(d1/100)*100+15\n",
542 |     "        with arcpy.da.UpdateCursor(datapoint_d1, [\"value\",\"pt\",\"Date\",\"parameter\",\"latest\",\"DateRange\"]) as cursor:\n",
543 |     "            for row in cursor:\n",
544 |     "                if row[0] < 0:\n",
545 |     "                    cursor.deleteRow()\n",
546 |     "                else:\n",
547 |     "                    newdatapt.append(row[1])\n",
548 |     "                    row[2] = d1 # Date\n",
549 |     "                    row[3] = \"Chla\"\n",
550 |     "                    row[4] = 1 # latest\n",
551 |     "                    row[5] = \"Day\"\n",
552 |     "                    cursor.updateRow(row)\n",
553 |     "        # modify values for SS\n",
554 |     "        with arcpy.da.UpdateCursor(datapoint_d2, [\"value\",\"pt\",\"Date\",\"parameter\",\"latest\",\"DateRange\"]) as cursor:\n",
555 |     "            for row in cursor:\n",
556 |     "                if row[0] < 0:\n",
557 |     "                    cursor.deleteRow()\n",
558 |     "                else:\n",
559 |     "                    row[2] = d1 # Date\n",
560 |     "                    row[3] = \"SS\"\n",
561 |     "                    row[4] = 1 # latest\n",
562 |     "                    row[5] = \"Day\"\n",
563 |     "                    cursor.updateRow(row)    \n",
564 |     "        # modify latest in datapoint_all\n",
565 |     "        with arcpy.da.UpdateCursor(datapoint_all, [\"pt\",\"latest\",\"Date\",\"DateRange\"], \"latest = 1\") as cursor: # where clause\n",
566 |     "            for row in cursor:\n",
567 |     "                if row[0] in newdatapt: # if latest image provides obs on this pt\n",
568 |     "                    row[1] = 0\n",
569 |     "                    if row[3]==\"Month\" and row[2]==d1_month: # if same month as latest image\n",
570 |     "                        row[1] = 1\n",
571 |     "                    cursor.updateRow(row)\n",
572 |     "        arcpy.management.Append(datapoint_d1, datapoint_all)\n",
573 |     "        arcpy.management.Append(datapoint_d2, datapoint_all)\n",
574 |     "        # add monthly average data\n",
575 |     "        with arcpy.da.UpdateCursor(datapoint_d1, [\"Date\",\"DateRange\"]) as cursor:\n",
576 |     "            for row in cursor:\n",
577 |     "                row[0] = d1_month\n",
578 |     "                row[1] = \"Month\"\n",
579 |     "                cursor.updateRow(row)\n",
580 |     "        with arcpy.da.UpdateCursor(datapoint_d2, [\"Date\",\"DateRange\"]) as cursor:\n",
581 |     "            for row in cursor:\n",
582 |     "                row[0] = d1_month\n",
583 |     "                row[1] = \"Month\"\n",
584 |     "                cursor.updateRow(row)\n",
585 |     "        arcpy.management.Append(datapoint_d1, datapoint_all)\n",
586 |     "        arcpy.management.Append(datapoint_d2, datapoint_all)\n",
587 |     "        arcpy.management.Delete(datapoint_d1)\n",
588 |     "        arcpy.management.Delete(datapoint_d2)\n",
589 |     "        # move chla and SS tif to finish\n",
590 |     "        arcpy.management.CopyRaster(chla_d1, chla_d1.replace(\"predict\", \"predict/finish\"))\n",
591 |     "        arcpy.management.Delete(chla_d1)\n",
592 |     "        arcpy.management.CopyRaster(ss_d1, ss_d1.replace(\"predict\", \"predict/finish\"))\n",
593 |     "        arcpy.management.Delete(ss_d1)\n",
594 |     "        print(\"Finish: \"+ str(i+1)+\"/\"+str(len(chlalist)))\n",
595 |     "    # save shp as zip file\n",
596 |     "    os.chdir(\"D:/WaterQuality/ArcGISPro\")\n",
597 |     "    datapoint_all_new = glob.glob(\"DataPoint_all.*\")\n",
598 |     "    zipname = 'DataPoint_all_shp_to'+str(d1)+'.zip'\n",
599 |     "    with zipfile.ZipFile(zipname, 'w') as zip_object:\n",
600 |     "        for f in datapoint_all_new:\n",
601 |     "            zip_object.write(f, compress_type=zipfile.ZIP_DEFLATED)\n",
602 |     "    return zipname\n",
603 |     "# zipname = update_datapoint()\n",
604 |     "\n",
605 |     "# Function to create kmz file for ArcGIS Online display\n",
606 |     "def createkmz():\n",
607 |     "    chlalayer = glob.glob(\"D:/WaterQuality/predict_display/merge_Chla_*_smoothclip.tif\")[0]\n",
608 |     "    chlasym = \"D:/WaterQuality/ArcGISPro/chla_lyr.lyrx\"\n",
609 |     "    chlalayer_sym = arcpy.management.ApplySymbologyFromLayer(chlalayer, chlasym)\n",
610 |     "    os.unlink(\"D:/WaterQuality/ArcGISPro/chla_kmz.kmz\")\n",
611 |     "    arcpy.conversion.LayerToKML(chlalayer_sym, \n",
612 |     "                                \"D:/WaterQuality/ArcGISPro/chla_kmz.kmz\", 0, \"NO_COMPOSITE\", \n",
613 |     "                                '113.81800000012 22.1377142789301 114.50171429609 22.5711428568601 GEOGCS[\"GCS_WGS_1984\",DATUM[\"D_WGS_1984\",SPHEROID[\"WGS_1984\",6378137.0,298.257223563]],PRIMEM[\"Greenwich\",0.0],UNIT[\"Degree\",0.0174532925199433]]', \n",
614 |     "                                4096, 96, \"CLAMPED_TO_GROUND\")\n",
615 |     "    sslayer = glob.glob(\"D:/WaterQuality/predict_display/merge_SuSo_*_smoothclip.tif\")[0]\n",
616 |     "    sssym = \"D:/WaterQuality/ArcGISPro/ss_lyr.lyrx\"\n",
617 |     "    sslayer_sym = arcpy.management.ApplySymbologyFromLayer(sslayer, sssym)\n",
618 |     "    os.unlink(\"D:/WaterQuality/ArcGISPro/ss_kmz.kmz\")\n",
619 |     "    arcpy.conversion.LayerToKML(sslayer_sym, \n",
620 |     "                                \"D:/WaterQuality/ArcGISPro/ss_kmz.kmz\", 0, \"NO_COMPOSITE\", \n",
621 |     "                                '113.81800000012 22.1377142789301 114.50171429609 22.5711428568601 GEOGCS[\"GCS_WGS_1984\",DATUM[\"D_WGS_1984\",SPHEROID[\"WGS_1984\",6378137.0,298.257223563]],PRIMEM[\"Greenwich\",0.0],UNIT[\"Degree\",0.0174532925199433]]', \n",
622 |     "                                4096, 96, \"CLAMPED_TO_GROUND\")"
623 |    ]
624 |   },
625 |   {
626 |    "cell_type": "code",
627 |    "execution_count": null,
628 |    "metadata": {},
629 |    "outputs": [],
630 |    "source": [
631 |     "# Run the functions to predict and update datapoints\n",
632 |     "predict_ChlaSS_all()\n",
633 |     "update_latestimg()\n",
634 |     "zipname = update_datapoint()\n",
635 |     "print(zipname)\n",
636 |     "createkmz()"
637 |    ]
638 |   },
639 |   {
640 |    "cell_type": "code",
641 |    "execution_count": null,
642 |    "metadata": {},
643 |    "outputs": [],
644 |    "source": [
645 |     "# Upload results to ArcGIS Online\n",
646 |     "import os\n",
647 |     "from arcgis.gis import GIS\n",
648 |     "from arcgis.features import FeatureLayerCollection\n",
649 |     "gis = GIS(url=\"https://wwww.arcgis.com\", username=\"username\", password=\"password\")  # Change to your ArcGIS Online account with publisher role\n",
650 |     "# Data point FeatureLayer and shp\n",
651 |     "DataPoint_all_shp_flc = gis.content.get(\"item id\")  # Item id of the datapoint featre layer collection on AGOL\n",
652 |     "flc = FeatureLayerCollection.fromitem(DataPoint_all_shp_flc)\n",
653 |     "flc.manager.overwrite(os.path.join(\"D:/WaterQuality/ArcGISPro\",zipname))\n",
654 |     "DataPoint_all_shp_s = gis.content.get(\"item id\")  # Item id of the datapoint service on AGOL\n",
655 |     "DataPoint_all_shp_s.update(data=os.path.join(\"D:/WaterQuality/ArcGISPro\",zipname))\n",
656 |     "# kmz\n",
657 |     "chla_kmz = gis.content.get(\"item id\")  # Item id of the chla kmz on AGOL\n",
658 |     "chla_kmz.update(data=\"D:/WaterQuality/ArcGISPro/chla_kmz.kmz\")\n",
659 |     "ss_kmz = gis.content.get(\"item id\")  # Item id of the SS kmz on AGOL\n",
660 |     "ss_kmz.update(data=\"D:/WaterQuality/ArcGISPro/ss_kmz.kmz\")"
661 |    ]
662 |   }
663 |  ],
664 |  "metadata": {
665 |   "kernelspec": {
666 |    "display_name": "Python 3",
667 |    "language": "python",
668 |    "name": "python3"
669 |   },
670 |   "language_info": {
671 |    "codemirror_mode": {
672 |     "name": "ipython",
673 |     "version": 3
674 |    },
675 |    "file_extension": ".py",
676 |    "mimetype": "text/x-python",
677 |    "name": "python",
678 |    "nbconvert_exporter": "python",
679 |    "pygments_lexer": "ipython3",
680 |    "version": "3.9.11"
681 |   },
682 |   "orig_nbformat": 4
683 |  },
684 |  "nbformat": 4,
685 |  "nbformat_minor": 2
686 | }
687 | 


--------------------------------------------------------------------------------
/LocalProcessingPipeline_Part1_ArchivedImageDatabase.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": null,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "# Install and load libraries for image processing steps\n",
  10 |     "# pip install --user sentinel2tools-master.zip\n",
  11 |     "# pip install --user landsatxplore-master.zip\n",
  12 |     "from landsatxplore.api import API\n",
  13 |     "from landsatxplore.earthexplorer import EarthExplorer\n",
  14 |     "from sentinel2download.overlap import Sentinel2Overlap\n",
  15 |     "from sentinel2download.downloader import Sentinel2Downloader\n",
  16 |     "import os\n",
  17 |     "import shutil\n",
  18 |     "from datetime import date, datetime, timedelta\n",
  19 |     "import tarfile \n",
  20 |     "import glob\n",
  21 |     "import numpy as np\n",
  22 |     "import pandas as pd\n",
  23 |     "from simpledbf import Dbf5\n",
  24 |     "import requests\n",
  25 |     "import arcpy\n",
  26 |     "from arcpy import env\n",
  27 |     "from arcpy.sa import *\n",
  28 |     "arcpy.CheckOutExtension(\"spatial\")"
  29 |    ]
  30 |   },
  31 |   {
  32 |    "cell_type": "code",
  33 |    "execution_count": 3,
  34 |    "metadata": {},
  35 |    "outputs": [],
  36 |    "source": [
  37 |     "# Search and download Landsat satellite images\n",
  38 |     "def downloadlandsat(startdate, enddate):\n",
  39 |     "    # Initialize a new API instance and get an access key\n",
  40 |     "    username = \"username\"  # change your EarthExplorer username and password\n",
  41 |     "    password = \"password\"\n",
  42 |     "    api = API(username, password)\n",
  43 |     "    # 22.13,113.81,22.59,114.52\n",
  44 |     "    # https://github.com/yannforget/landsatxplore/blob/master/landsatxplore/api.py\n",
  45 |     "    # Search for Landsat TM scenes\n",
  46 |     "    scenes = api.search(\n",
  47 |     "        dataset='landsat_ot_c2_l1', bbox=(113.81, 22.13, 114.52, 22.59),\n",
  48 |     "        start_date=startdate,  # start_date='2014-01-01',\n",
  49 |     "        end_date=enddate,   # end_date='2015-12-31',\n",
  50 |     "        max_cloud_cover=20, max_results=1000\n",
  51 |     "    )\n",
  52 |     "    # print(f\"{len(scenes)} scenes found.\")\n",
  53 |     "    # Log out\n",
  54 |     "    api.logout()\n",
  55 |     "    # Downloading scenes\n",
  56 |     "    if len(scenes) > 0:\n",
  57 |     "        ee = EarthExplorer(username, password)\n",
  58 |     "        for s in scenes:\n",
  59 |     "            ee.download(s['entity_id'], output_dir='D:/WaterQuality/datadownload')\n",
  60 |     "        ee.logout()"
  61 |    ]
  62 |   },
  63 |   {
  64 |    "cell_type": "code",
  65 |    "execution_count": null,
  66 |    "metadata": {},
  67 |    "outputs": [],
  68 |    "source": [
  69 |     "# Find Sentinel overlap tiles\n",
  70 |     "aoi_path = \"D:/WaterQuality/aoi/aoi_geojson.json\"\n",
  71 |     "overlap = Sentinel2Overlap(aoi_path)\n",
  72 |     "tiles = overlap.overlap()\n",
  73 |     "print(f\"Overlapped tiles: {tiles}\")"
  74 |    ]
  75 |   },
  76 |   {
  77 |    "cell_type": "code",
  78 |    "execution_count": null,
  79 |    "metadata": {},
  80 |    "outputs": [],
  81 |    "source": [
  82 |     "# Search and download Sentinel satellite images from Google Cloud\n",
  83 |     "def downloadsentinel(startdate, enddate):\n",
  84 |     "    CONSTRAINTS = {'CLOUDY_PIXEL_PERCENTAGE': 20.0, }\n",
  85 |     "    loader = Sentinel2Downloader(api_key=\"D:/WaterQuality/xxx.json\")  # change your Google Cloud Sentinel API key\n",
  86 |     "    loaded = loader.download(product_type=\"L1C\", tiles=['49QGE','49QGF','49QHE','49QHF'], \n",
  87 |     "                            start_date=startdate, # \"2016-01-01\"\n",
  88 |     "                            end_date=enddate, # \"2016-01-05\"\n",
  89 |     "                            output_dir=\"D:/WaterQuality/datadownload\",\n",
  90 |     "                            cores=2, constraints=CONSTRAINTS, full_download=True)"
  91 |    ]
  92 |   },
  93 |   {
  94 |    "cell_type": "code",
  95 |    "execution_count": null,
  96 |    "metadata": {},
  97 |    "outputs": [],
  98 |    "source": [
  99 |     "# Download and extract acolite_py_win_20221114.0.tar.gz\n",
 100 |     "# Save the following text as a txt file \"acolite/setting_landsat.txt\"\n",
 101 |     "## ACOLITE settings\n",
 102 |     "limit=22.13,113.81,22.59,114.52\n",
 103 |     "inputfile=D:/WaterQuality/datadownload/extract\n",
 104 |     "output=D:/WaterQuality/datadownload/atmocor\n",
 105 |     "dsf_interface_reflectance=False\n",
 106 |     "dsf_residual_glint_correction=True\n",
 107 |     "glint_mask_rhos_threshold=0.15\n",
 108 |     "l2w_parameters=None\n",
 109 |     "l2r_export_geotiff=True"
 110 |    ]
 111 |   },
 112 |   {
 113 |    "cell_type": "code",
 114 |    "execution_count": null,
 115 |    "metadata": {},
 116 |    "outputs": [],
 117 |    "source": [
 118 |     "# Function to preprocess a single Landsat image\n",
 119 |     "def preprocessLandsat(tar):\n",
 120 |     "    # extract tar\n",
 121 |     "    datadir = 'D:/WaterQuality/datadownload'\n",
 122 |     "    os.chdir(datadir)\n",
 123 |     "    file = tarfile.open(tar)\n",
 124 |     "    file.extractall('extract')\n",
 125 |     "    file.close()\n",
 126 |     "    # run acolite\n",
 127 |     "    acolitepath = \"D:/WaterQuality/acolite/acolite_py_win/dist/acolite/acolite.exe\"\n",
 128 |     "    settingpath = \"D:/WaterQuality/acolite/setting_landsat.txt\"\n",
 129 |     "    os.system(acolitepath+\" --cli --settings=\"+settingpath)\n",
 130 |     "    def merge_and_mask():\n",
 131 |     "        # merge 7 bands\n",
 132 |     "        os.chdir('atmocor')\n",
 133 |     "        tiflist = glob.glob('*L2R_rhos_*.tif')\n",
 134 |     "        bandorder = [2, 3, 4, 7, 8, 0, 1]\n",
 135 |     "        tiflist = [tiflist[i] for i in bandorder]\n",
 136 |     "        env.workspace = 'D:/WaterQuality/datadownload/atmocor'\n",
 137 |     "        arcpy.CompositeBands_management(tiflist, \"compbands.tif\")\n",
 138 |     "        # mask land and cloud\n",
 139 |     "        ras = Raster(\"compbands.tif\")\n",
 140 |     "        qaband = Raster(glob.glob(datadir+'/extract/*QA_PIXEL.TIF')[0])\n",
 141 |     "        qaband_m = SetNull(qaband>22200,1)\n",
 142 |     "        qaband_m = FocalStatistics(qaband_m, NbrCircle(3,\"CELL\"), \"MEAN\", \"NODATA\") # expand radius 3\n",
 143 |     "        ras_m = ExtractByMask(ras, qaband_m)\n",
 144 |     "        swir = Raster(\"compbands.tif\\Band_6\")\n",
 145 |     "        green = Raster(\"compbands.tif\\Band_3\")\n",
 146 |     "        nir = Raster(\"compbands.tif\\Band_5\")\n",
 147 |     "        red = Raster(\"compbands.tif\\Band_4\")\n",
 148 |     "        ndvi1 = arcpy.sa.Float((red-nir)/(red+nir))\n",
 149 |     "        ndvi1_m = SetNull(ndvi1<0,1)\n",
 150 |     "        ndwi = arcpy.sa.Float((green-swir)/(green+swir))\n",
 151 |     "        ndwi_m = SetNull(ndwi<0,1)\n",
 152 |     "        swir_m = SetNull(swir>0.15,1)\n",
 153 |     "        ras_m = ExtractByMask(ras_m, ndvi1_m)\n",
 154 |     "        ras_m = ExtractByMask(ras_m, ndwi_m)\n",
 155 |     "        ras_m = ExtractByMask(ras_m, swir_m)\n",
 156 |     "        # reproject\n",
 157 |     "        aoi = \"D:/WaterQuality/aoi/aoi.shp\"\n",
 158 |     "        outfilename = \"D:/WaterQuality/reflectance/\"+tar.replace(\".tar\",\".tif\")\n",
 159 |     "        arcpy.management.ProjectRaster(ras_m, \"compbands_p.tif\", aoi)            \n",
 160 |     "        arcpy.management.Clip(\"compbands_p.tif\", aoi, \"compbands_p_c.tif\",                                \n",
 161 |     "                            \"#\", \"#\", \"NONE\",\"MAINTAIN_EXTENT\")\n",
 162 |     "        arcpy.management.Resample(\"compbands_p_c.tif\", outfilename, 0.00028571429)\n",
 163 |     "    merge_and_mask()\n",
 164 |     "    # empty extract and atmocor\n",
 165 |     "    def emptyfolder(folder):\n",
 166 |     "        for filename in os.listdir(folder):\n",
 167 |     "            file_path = os.path.join(folder, filename)    \n",
 168 |     "            if os.path.isfile(file_path) or os.path.islink(file_path):\n",
 169 |     "                os.unlink(file_path)\n",
 170 |     "    emptyfolder(\"D:/WaterQuality/datadownload/extract\")\n",
 171 |     "    emptyfolder(\"D:/WaterQuality/datadownload/atmocor\")\n",
 172 |     "    # delete tarfile\n",
 173 |     "    os.chdir(datadir)\n",
 174 |     "    os.unlink(tar)\n"
 175 |    ]
 176 |   },
 177 |   {
 178 |    "cell_type": "code",
 179 |    "execution_count": null,
 180 |    "metadata": {},
 181 |    "outputs": [],
 182 |    "source": [
 183 |     "# Function to preprocess all Landsat images\n",
 184 |     "def preprocessLandsat_all():\n",
 185 |     "    datadir = 'D:/WaterQuality/datadownload'\n",
 186 |     "    os.chdir(datadir)\n",
 187 |     "    tarlist = glob.glob('*.tar')\n",
 188 |     "    if len(tarlist)>0:\n",
 189 |     "        for tar in tarlist:\n",
 190 |     "            preprocessLandsat(tar)"
 191 |    ]
 192 |   },
 193 |   {
 194 |    "cell_type": "code",
 195 |    "execution_count": null,
 196 |    "metadata": {},
 197 |    "outputs": [],
 198 |    "source": [
 199 |     "# Save the following text as a txt file \"acolite/setting_sentinel.txt\"\n",
 200 |     "## ACOLITE settings\n",
 201 |     "limit=22.13,113.81,22.59,114.52\n",
 202 |     "inputfile=\n",
 203 |     "output=D:/WaterQuality/datadownload/atmocor\n",
 204 |     "s2_target_res=20\n",
 205 |     "dsf_interface_reflectance=False\n",
 206 |     "dsf_residual_glint_correction=True\n",
 207 |     "glint_mask_rhos_threshold=0.15\n",
 208 |     "l2w_parameters=None\n",
 209 |     "l2r_export_geotiff=True\n",
 210 |     "\n",
 211 |     "# Save the following text as a txt file \"acolite/setting_sentinel2.txt\"\n",
 212 |     "## ACOLITE settings\n",
 213 |     "limit=22.13,113.81,22.59,114.52\n",
 214 |     "inputfile=D:/WaterQuality/datadownload\\S2A_MSIL1C_20240717T025551_N0510_R032_T49QHE_20240717T053551.SAFE\n",
 215 |     "output=D:/WaterQuality_LandsatSentinel/datadownload/atmocor\n",
 216 |     "s2_target_res=20\n",
 217 |     "dsf_interface_reflectance=False\n",
 218 |     "dsf_residual_glint_correction=True\n",
 219 |     "glint_mask_rhos_threshold=0.15\n",
 220 |     "l2w_parameters=None\n",
 221 |     "l2r_export_geotiff=True"
 222 |    ]
 223 |   },
 224 |   {
 225 |    "cell_type": "code",
 226 |    "execution_count": null,
 227 |    "metadata": {},
 228 |    "outputs": [],
 229 |    "source": [
 230 |     "# Function to preprocess a single Sentinel image\n",
 231 |     "def preprocessSentinel(safefolder):\n",
 232 |     "    datadir = 'D:/WaterQuality/datadownload'\n",
 233 |     "    os.chdir(datadir)\n",
 234 |     "    # run acolite\n",
 235 |     "    settingtemp = \"D:/WaterQuality/acolite/setting_sentinel.txt\"\n",
 236 |     "    settingpath = \"D:/WaterQuality/acolite/setting_sentinel2.txt\"\n",
 237 |     "    # Read in the file\n",
 238 |     "    with open(settingtemp, 'r') as file:\n",
 239 |     "        filedata = file.read()\n",
 240 |     "        filedata = filedata.replace('inputfile=', 'inputfile='+os.path.join(datadir,safefolder))\n",
 241 |     "    # Write the file out again\n",
 242 |     "    with open(settingpath, 'w') as file:\n",
 243 |     "        file.write(filedata)\n",
 244 |     "    acolitepath = \"D:/WaterQuality/acolite/acolite_py_win/dist/acolite/acolite.exe\"\n",
 245 |     "    os.system(acolitepath+\" --cli --settings=\"+settingpath)\n",
 246 |     "    def merge_and_mask():\n",
 247 |     "        # merge 7 bands\n",
 248 |     "        os.chdir('atmocor')\n",
 249 |     "        tiflist = glob.glob('*L2R_rhos_*.tif')\n",
 250 |     "        if len(tiflist)==0: # if acolite does not produce any files\n",
 251 |     "            return\n",
 252 |     "        bandorder = [2, 3, 4, 5, 10, 0, 1]\n",
 253 |     "        tiflist = [tiflist[i] for i in bandorder]\n",
 254 |     "        env.workspace = 'D:/WaterQuality/datadownload/atmocor'\n",
 255 |     "        arcpy.CompositeBands_management(tiflist, \"compbands.tif\")\n",
 256 |     "        arcpy.management.Resample(\"compbands.tif\", \"compbands_r.tif\", 30)\n",
 257 |     "        # mask land and cloud\n",
 258 |     "        ras = Raster(\"compbands_r.tif\")\n",
 259 |     "        swir = Raster(\"compbands_r.tif\\Band_6\")\n",
 260 |     "        green = Raster(\"compbands_r.tif\\Band_3\")\n",
 261 |     "        nir = Raster(\"compbands_r.tif\\Band_5\")\n",
 262 |     "        red = Raster(\"compbands_r.tif\\Band_4\")\n",
 263 |     "        cloud_m = SetNull((red>0.2)&(nir>0.2),1)\n",
 264 |     "        cloud_m = FocalStatistics(cloud_m, NbrCircle(3,\"CELL\"), \"MEAN\", \"NODATA\") # expand radius 3\n",
 265 |     "        ndvi1 = arcpy.sa.Float((red-nir)/(red+nir))\n",
 266 |     "        ndvi1_m = SetNull(ndvi1<0,1)\n",
 267 |     "        ndwi2 = arcpy.sa.Float((green-swir)/(green+swir))\n",
 268 |     "        ndwi2_m = SetNull(ndwi2<0,1)\n",
 269 |     "        swir_m = SetNull(swir>0.15,1)\n",
 270 |     "        nir_m = SetNull((nir>0.03)&(red>0.08)&(ndwi2_m==1)&(swir_m==1)&(cloud_m==1),1) # remaining haze\n",
 271 |     "        nir_m = FocalStatistics(nir_m, NbrCircle(1,\"CELL\"), \"MEAN\", \"NODATA\") # expand radius 1\n",
 272 |     "        ras_m = ExtractByMask(ras, cloud_m)\n",
 273 |     "        ras_m = ExtractByMask(ras_m, ndvi1_m)\n",
 274 |     "        ras_m = ExtractByMask(ras_m, ndwi2_m)\n",
 275 |     "        ras_m = ExtractByMask(ras_m, swir_m)\n",
 276 |     "        ras_m = ExtractByMask(ras_m, nir_m)\n",
 277 |     "        # reproject\n",
 278 |     "        aoi = \"D:/WaterQuality/aoi/aoi.shp\"\n",
 279 |     "        outfilename = \"D:/WaterQuality/reflectance/\"+safefolder.replace(\".SAFE\",\".tif\")\n",
 280 |     "        arcpy.management.ProjectRaster(ras_m, \"compbands_p.tif\", aoi)\n",
 281 |     "        arcpy.management.Clip(\"compbands_p.tif\", aoi, \"compbands_p_c.tif\",                                \n",
 282 |     "                            \"#\", \"#\", \"NONE\",\"MAINTAIN_EXTENT\")\n",
 283 |     "        arcpy.management.Resample(\"compbands_p_c.tif\", outfilename, 0.00028571429)\n",
 284 |     "    merge_and_mask()\n",
 285 |     "    # empty extract and atmocor\n",
 286 |     "    def emptyfolder(folder):\n",
 287 |     "        for filename in os.listdir(folder):\n",
 288 |     "            file_path = os.path.join(folder, filename)    \n",
 289 |     "            if os.path.isfile(file_path) or os.path.islink(file_path):\n",
 290 |     "                os.unlink(file_path)\n",
 291 |     "    emptyfolder(\"D:/WaterQuality/datadownload/atmocor\")\n",
 292 |     "    # delete whole safefolder\n",
 293 |     "    os.chdir(datadir)\n",
 294 |     "    shutil.rmtree(safefolder)"
 295 |    ]
 296 |   },
 297 |   {
 298 |    "cell_type": "code",
 299 |    "execution_count": null,
 300 |    "metadata": {},
 301 |    "outputs": [],
 302 |    "source": [
 303 |     "# Function to preprocess all Sentinel images\n",
 304 |     "def preprocessSentinel_all():\n",
 305 |     "    datadir = 'D:/WaterQuality/datadownload'\n",
 306 |     "    os.chdir(datadir)\n",
 307 |     "    safelist = glob.glob('*.safe')\n",
 308 |     "    if len(safelist)>0:\n",
 309 |     "        for safefolder in safelist:\n",
 310 |     "            preprocessSentinel(safefolder)"
 311 |    ]
 312 |   },
 313 |   {
 314 |    "cell_type": "code",
 315 |    "execution_count": null,
 316 |    "metadata": {},
 317 |    "outputs": [],
 318 |    "source": [
 319 |     "# Function to get dates in each month\n",
 320 |     "from datetime import date, datetime, timedelta\n",
 321 |     "def monthstart(year, month):\n",
 322 |     "    first_date = datetime(year, month, 1)\n",
 323 |     "    return first_date.strftime(\"%Y-%m-%d\")\n",
 324 |     "def monthmid1(year, month):\n",
 325 |     "    mid_date = datetime(year, month, 15)\n",
 326 |     "    return mid_date.strftime(\"%Y-%m-%d\")\n",
 327 |     "def monthmid2(year, month):\n",
 328 |     "    mid_date = datetime(year, month, 16)\n",
 329 |     "    return mid_date.strftime(\"%Y-%m-%d\")\n",
 330 |     "def monthend(year, month):\n",
 331 |     "    if month == 12:\n",
 332 |     "        last_date = datetime(year, month, 31)\n",
 333 |     "    else:\n",
 334 |     "        last_date = datetime(year, month + 1, 1) + timedelta(days=-1)\n",
 335 |     "    return last_date.strftime(\"%Y-%m-%d\")"
 336 |    ]
 337 |   },
 338 |   {
 339 |    "cell_type": "code",
 340 |    "execution_count": null,
 341 |    "metadata": {},
 342 |    "outputs": [],
 343 |    "source": [
 344 |     "# Run the functions to download and preprocess all Landsat and Sentinel imagery\n",
 345 |     "for year in [2020,2021,2022]:\n",
 346 |     "    for month in range(1,13):\n",
 347 |     "        downloadlandsat(monthstart(year, month), monthend(year, month))\n",
 348 |     "        preprocessLandsat_all()\n",
 349 |     "        downloadsentinel(monthstart(year, month), monthend(year, month))\n",
 350 |     "        preprocessSentinel_all()"
 351 |    ]
 352 |   },
 353 |   {
 354 |    "cell_type": "code",
 355 |    "execution_count": null,
 356 |    "metadata": {},
 357 |    "outputs": [],
 358 |    "source": [
 359 |     "# Remove Tier 2 Landsat imagery\n",
 360 |     "def removeLandsatT2():\n",
 361 |     "    os.chdir(\"D:/WaterQuality/reflectance\")\n",
 362 |     "    Tier2list = glob.glob('LC*T2.*')\n",
 363 |     "    if len(Tier2list)>0:\n",
 364 |     "        for T2file in Tier2list:\n",
 365 |     "            os.unlink(T2file)\n",
 366 |     "# removeLandsatT2()"
 367 |    ]
 368 |   },
 369 |   {
 370 |    "cell_type": "code",
 371 |    "execution_count": null,
 372 |    "metadata": {},
 373 |    "outputs": [],
 374 |    "source": [
 375 |     "# Rename all Landsat imagery\n",
 376 |     "def renameLandsat_all():\n",
 377 |     "    os.chdir(\"D:/WaterQuality/reflectance\")\n",
 378 |     "    Landsatlist = glob.glob('LC*')\n",
 379 |     "    for Landsatfile in Landsatlist:\n",
 380 |     "        nfilename = Landsatfile[0:25]+Landsatfile[40:] # first 25 characters & from 40 to end\n",
 381 |     "        os.rename(Landsatfile, nfilename)\n",
 382 |     "# renameLandsat_all()"
 383 |    ]
 384 |   },
 385 |   {
 386 |    "cell_type": "code",
 387 |    "execution_count": null,
 388 |    "metadata": {},
 389 |    "outputs": [],
 390 |    "source": [
 391 |     "# Rename all Sentinel imagery\n",
 392 |     "def renameSentinel_all():\n",
 393 |     "    os.chdir(\"D:/WaterQuality/reflectance\")\n",
 394 |     "    Sentinellist = glob.glob('S2*')\n",
 395 |     "    for Sentinelfile in Sentinellist:\n",
 396 |     "        nfilename = Sentinelfile[0:19]+Sentinelfile[37:44]+Sentinelfile[60:]\n",
 397 |     "        if os.path.isfile(nfilename) == True:\n",
 398 |     "            nfilename = Sentinelfile[0:19]+Sentinelfile[37:44]+'a'+Sentinelfile[60:]\n",
 399 |     "        os.rename(Sentinelfile, nfilename)\n",
 400 |     "# renameSentinel_all()"
 401 |    ]
 402 |   },
 403 |   {
 404 |    "cell_type": "code",
 405 |    "execution_count": null,
 406 |    "metadata": {},
 407 |    "outputs": [],
 408 |    "source": [
 409 |     "# Mosaic tiles acquired on the same day\n",
 410 |     "def mosaictiles(): \n",
 411 |     "    os.chdir(\"D:/WaterQuality/reflectance\")\n",
 412 |     "    env.workspace = \"D:/WaterQuality/reflectance\"\n",
 413 |     "    Landsatlist = glob.glob('LC*')\n",
 414 |     "    Landsatdatelist = [i[17:25] for i in Landsatlist]\n",
 415 |     "    Sentinellist = glob.glob('S2*')\n",
 416 |     "    Sentineldatelist = [i[11:19] for i in Sentinellist]\n",
 417 |     "    datelist = sorted(list(set(Landsatdatelist+Sentineldatelist))) # get unique date\n",
 418 |     "    imglist = glob.glob('*.tif')\n",
 419 |     "    for d in datelist:\n",
 420 |     "        img_match = [img for img in imglist if d in img]\n",
 421 |     "        outfolder = \"D:/WaterQuality/preprocess_finish\"\n",
 422 |     "        outfilename = \"LandsatSentinel_\"+d+\".tif\"\n",
 423 |     "        if len(img_match)==1:\n",
 424 |     "            arcpy.management.CopyRaster(img_match[0], os.path.join(outfolder, outfilename))\n",
 425 |     "        if len(img_match)>1:\n",
 426 |     "            arcpy.MosaicToNewRaster_management(img_match,outfolder,outfilename,\"\",\"32_BIT_FLOAT\",\"\",\"7\",\"MEAN\",\"\")\n",
 427 |     "# mosaictiles()"
 428 |    ]
 429 |   },
 430 |   {
 431 |    "cell_type": "code",
 432 |    "execution_count": null,
 433 |    "metadata": {},
 434 |    "outputs": [],
 435 |    "source": [
 436 |     "# Remove images that cover too few valid pixels\n",
 437 |     "def deleteimage_lowvalid():\n",
 438 |     "    os.chdir(\"D:/WaterQuality/preprocess_finish\")\n",
 439 |     "    env.workspace = \"D:/WaterQuality/preprocess_finish\"\n",
 440 |     "    imglist = glob.glob('*.tif')\n",
 441 |     "    for img in imglist:\n",
 442 |     "        ras_np = arcpy.RasterToNumPyArray(img,\"\",\"\",\"\",-9999)[0]\n",
 443 |     "        if (ras_np != -9999).sum() < (2390000*0.2): # largest valid count = 2390000\n",
 444 |     "            arcpy.management.Delete(img)\n",
 445 |     "# deleteimage_lowvalid()"
 446 |    ]
 447 |   },
 448 |   {
 449 |    "cell_type": "code",
 450 |    "execution_count": null,
 451 |    "metadata": {},
 452 |    "outputs": [],
 453 |    "source": [
 454 |     "# Extract pixel values based on the monitoring station locations\n",
 455 |     "def extractpixel(imgname):\n",
 456 |     "    os.chdir(\"D:/WaterQuality/preprocess_finish\")\n",
 457 |     "    env.workspace = \"D:/WaterQuality/preprocess_finish\"\n",
 458 |     "    imgdate = imgname[16:24]\n",
 459 |     "    station_shp = \"D:/WaterQuality/stationdata/MonitoringStation_wgs84_76.shp\"\n",
 460 |     "    extract_dbf = \"D:/WaterQuality/extract/extract.dbf\"\n",
 461 |     "    arcpy.sa.Sample(imgname, station_shp, extract_dbf, \"BILINEAR\", \"FID\")\n",
 462 |     "    extract_df = Dbf5(extract_dbf).to_dataframe()\n",
 463 |     "    extract_df[\"monitoring\"] = range(0,len(extract_df))\n",
 464 |     "    arcpy.management.Delete(extract_dbf)\n",
 465 |     "    extract_df.columns = ['Monitoring','X','Y','B1','B2','B3','B4','B5','B6','B7','monitoring']\n",
 466 |     "    station_df = Dbf5(\"D:/WaterQuality/stationdata/MonitoringStation_wgs84_76.dbf\").to_dataframe()\n",
 467 |     "    station_df[\"monitoring\"] = range(0,len(station_df))\n",
 468 |     "    station_df = station_df[[\"monitoring\",\"WaterStati\"]]\n",
 469 |     "    extract_df = extract_df.merge(station_df, on=\"monitoring\")\n",
 470 |     "    extract_df = extract_df.drop(columns=['Monitoring','X','Y','monitoring'])\n",
 471 |     "    extract_df = extract_df[extract_df[\"B1\"] > -1] # remove no data (-9999) rows\n",
 472 |     "    extract_df[\"Date\"] = imgdate\n",
 473 |     "    return extract_df"
 474 |    ]
 475 |   },
 476 |   {
 477 |    "cell_type": "code",
 478 |    "execution_count": null,
 479 |    "metadata": {},
 480 |    "outputs": [],
 481 |    "source": [
 482 |     "# Extract and save the pixel values as a csv file\n",
 483 |     "os.chdir(\"D:/WaterQuality/preprocess_finish\")\n",
 484 |     "imglist = glob.glob('*.tif')\n",
 485 |     "dflist = [extractpixel(img) for img in imglist]\n",
 486 |     "extract_df_all = pd.concat(dflist)\n",
 487 |     "extract_df_all.to_csv(\"D:/WaterQuality/extract/extract_df_all.csv\")"
 488 |    ]
 489 |   },
 490 |   {
 491 |    "cell_type": "code",
 492 |    "execution_count": 7,
 493 |    "metadata": {},
 494 |    "outputs": [],
 495 |    "source": [
 496 |     "# Combine station data different years\n",
 497 |     "os.chdir(\"D:/WaterQuality/stationdata\")\n",
 498 |     "csvlist = glob.glob('marine-historical-*.csv')\n",
 499 |     "csvlist = [pd.read_csv(c) for c in csvlist]\n",
 500 |     "df = pd.concat(csvlist)\n",
 501 |     "df.to_csv(\"marine-historical-2010-2022.csv\")\n",
 502 |     "# Additional manual step: subset only surface water and replace <0.2 to 0.1, <0.5 to 0.25"
 503 |    ]
 504 |   },
 505 |   {
 506 |    "cell_type": "code",
 507 |    "execution_count": null,
 508 |    "metadata": {},
 509 |    "outputs": [],
 510 |    "source": [
 511 |     "# Merge image pixel value and station data that are same day\n",
 512 |     "stationdata_df = pd.read_csv(\"D:/WaterQuality/stationdata/marine-historical-2010-2022.csv\")\n",
 513 |     "extract_df_all = pd.read_csv(\"D:/WaterQuality/extract/extract_df_all.csv\", index_col=0)\n",
 514 |     "stationdata_df = stationdata_df[[\"Station\",\"Dates\",\"Chlorophyll-a (µg/L)\",\"Suspended Solids (mg/L)\",\"Turbidity (NTU)\"]]\n",
 515 |     "stationdata_df.columns = [\"WaterStati\", \"Date\", \"Chla\", \"SS\", \"Tur\"]\n",
 516 |     "stationdata_df[\"Date\"] = pd.to_datetime(stationdata_df[\"Date\"], format='%d/%m/%Y').dt.strftime('%Y%m%d').astype(int)\n",
 517 |     "img_stationdata_merge = stationdata_df.merge(extract_df_all, on=[\"WaterStati\",\"Date\"])\n",
 518 |     "img_stationdata_merge.to_csv(\"D:/WaterQuality/extract/img_stationdata_merge.csv\")\n"
 519 |    ]
 520 |   },
 521 |   {
 522 |    "cell_type": "code",
 523 |    "execution_count": 194,
 524 |    "metadata": {},
 525 |    "outputs": [],
 526 |    "source": [
 527 |     "# Create independent variables\n",
 528 |     "df = pd.read_csv(\"D:/WaterQuality/extract/img_stationdata_merge.csv\", index_col=0)\n",
 529 |     "bands = ['B' + str(b) for b in [*range(1,8)]]\n",
 530 |     "wl = [443,490,560,660,865,1610,2195]  #wavelength in nm\n",
 531 |     "\n",
 532 |     "# Multiply 10\n",
 533 |     "for i in bands:\n",
 534 |     "  df[i] = df[i]*10\n",
 535 |     "# Square and cubic\n",
 536 |     "for i in bands:\n",
 537 |     "  df[i+'_2'] = df[i]**2\n",
 538 |     "  df[i+'_3'] = df[i]**3\n",
 539 |     "# Two-band ratio\n",
 540 |     "for i in bands:\n",
 541 |     "  for j in bands:\n",
 542 |     "    if (i != j) & (i < j):\n",
 543 |     "      df['NR_'+i+j] = ((df[i] - df[j]) / (df[i] + df[j])).clip(lower=-1.0, upper=1.0)\n",
 544 |     "# Three-band ratio\n",
 545 |     "for i in range(0,7):\n",
 546 |     "  for j in range(0,7):\n",
 547 |     "    for k in range(0,7):\n",
 548 |     "      if (j == i+1) & (k == j+1):\n",
 549 |     "        df['TB_'+bands[i]+bands[j]+bands[k]] = (((1/df[bands[i]])-(1/df[bands[j]]))*df[bands[k]]).clip(lower=-1.0, upper=1.0)\n",
 550 |     "# Line height algorithm\n",
 551 |     "for i in range(0,7):\n",
 552 |     "  for j in range(0,7):\n",
 553 |     "    for k in range(0,7):\n",
 554 |     "      if (j == i+1) & (k == j+1):\n",
 555 |     "        df['LH_'+bands[i]+bands[j]+bands[k]] = df[bands[j]] - df[bands[i]] - ((df[bands[k]] - df[bands[i]]) * ((wl[j]-wl[i])/(wl[k]-wl[i])))\n",
 556 |     "\n",
 557 |     "df.to_csv('D:/WaterQuality/extract/df_variable.csv')"
 558 |    ]
 559 |   },
 560 |   {
 561 |    "cell_type": "code",
 562 |    "execution_count": 211,
 563 |    "metadata": {},
 564 |    "outputs": [],
 565 |    "source": [
 566 |     "df = pd.read_csv(\"D:/WaterQuality/extract/df_variable.csv\", index_col=0)\n",
 567 |     "df = df.drop(columns = [\"WaterStati\",\"Date\"])\n",
 568 |     "df_var = df.iloc[:,3:]"
 569 |    ]
 570 |   },
 571 |   {
 572 |    "cell_type": "code",
 573 |    "execution_count": null,
 574 |    "metadata": {},
 575 |    "outputs": [],
 576 |    "source": [
 577 |     "# Install and load libraries for neural network steps\n",
 578 |     "import tensorflow as tf\n",
 579 |     "from keras import backend as K\n",
 580 |     "from sklearn import linear_model\n",
 581 |     "from sklearn.model_selection import KFold\n",
 582 |     "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
 583 |     "import statsmodels.api as sm"
 584 |    ]
 585 |   },
 586 |   {
 587 |    "cell_type": "code",
 588 |    "execution_count": 3,
 589 |    "metadata": {},
 590 |    "outputs": [],
 591 |    "source": [
 592 |     "# Function for stepwise variable selection\n",
 593 |     "# Modified from https://datascience.stackexchange.com/questions/24405/how-to-do-stepwise-regression-using-sklearn/24447#24447\n",
 594 |     "def stepwise_selection(X, y, \n",
 595 |     "                       initial_list=[], \n",
 596 |     "                       threshold_in=0.05, \n",
 597 |     "                       threshold_out = 0.1, \n",
 598 |     "                       verbose=True):\n",
 599 |     "    \"\"\" Perform a forward-backward feature selection \n",
 600 |     "    based on p-value from statsmodels.api.OLS\n",
 601 |     "    Arguments:\n",
 602 |     "        X - pandas.DataFrame with candidate features\n",
 603 |     "        y - pandas.DataFrame with the target column\n",
 604 |     "        initial_list - list of features to start with (column names of X)\n",
 605 |     "        threshold_in - include a feature if its p-value < threshold_in\n",
 606 |     "        threshold_out - exclude a feature if its p-value > threshold_out\n",
 607 |     "        verbose - whether to print the sequence of inclusions and exclusions\n",
 608 |     "    Returns: list of selected features \n",
 609 |     "    Always set threshold_in < threshold_out to avoid infinite looping.\n",
 610 |     "    See https://en.wikipedia.org/wiki/Stepwise_regression for the details\n",
 611 |     "    \"\"\"\n",
 612 |     "    y = y.to_numpy()\n",
 613 |     "    included = list(initial_list)\n",
 614 |     "    while True:\n",
 615 |     "        changed=False\n",
 616 |     "        # forward step\n",
 617 |     "        excluded = list(set(X.columns)-set(included))\n",
 618 |     "        new_pval = pd.Series(index=excluded, dtype='float64')\n",
 619 |     "        for new_column in excluded:\n",
 620 |     "            model = sm.OLS(y, sm.add_constant(pd.DataFrame(X[included+[new_column]]))).fit()\n",
 621 |     "            new_pval[new_column] = model.pvalues[new_column]\n",
 622 |     "        best_pval = new_pval.min()\n",
 623 |     "        if best_pval < threshold_in:\n",
 624 |     "            best_feature = new_pval.index[new_pval.argmin()]\n",
 625 |     "            included.append(best_feature)\n",
 626 |     "            changed=True\n",
 627 |     "            if verbose:\n",
 628 |     "                print('Add  {:30} with p-value {:.6}'.format(best_feature, best_pval))\n",
 629 |     "\n",
 630 |     "        # backward step\n",
 631 |     "        model = sm.OLS(y, sm.add_constant(pd.DataFrame(X[included]))).fit()\n",
 632 |     "        # use all coefs except intercept\n",
 633 |     "        pvalues = model.pvalues.iloc[1:]\n",
 634 |     "        worst_pval = pvalues.max() # null if pvalues is empty\n",
 635 |     "        if worst_pval > threshold_out:\n",
 636 |     "            changed=True\n",
 637 |     "            worst_feature = pvalues.index[pvalues.argmax()]\n",
 638 |     "            included.remove(worst_feature)\n",
 639 |     "            if verbose:\n",
 640 |     "                print('Drop {:30} with p-value {:.6}'.format(worst_feature, worst_pval))\n",
 641 |     "        if not changed:\n",
 642 |     "            break\n",
 643 |     "    return included"
 644 |    ]
 645 |   },
 646 |   {
 647 |    "cell_type": "code",
 648 |    "execution_count": 4,
 649 |    "metadata": {},
 650 |    "outputs": [],
 651 |    "source": [
 652 |     "# Function to obtain model performance based on cross validation\n",
 653 |     "wq = ['Chla','SS','Tur']\n",
 654 |     "def model_cv(wq_name):\n",
 655 |     "    y = df[wq_name]\n",
 656 |     "    step_var = stepwise_selection(df_var, y, verbose=False)\n",
 657 |     "    X = df[step_var]\n",
 658 |     "    kfold = KFold(n_splits=10, shuffle=True, random_state=0)\n",
 659 |     "    df_cv = pd.DataFrame()\n",
 660 |     "    for train, test in kfold.split(X, y):\n",
 661 |     "        model = tf.keras.Sequential([\n",
 662 |     "            tf.keras.layers.Dense(6, activation='sigmoid'),\n",
 663 |     "            tf.keras.layers.Dense(3, activation='sigmoid'),\n",
 664 |     "            tf.keras.layers.Dense(1)\n",
 665 |     "        ])\n",
 666 |     "        model.compile(loss='mean_absolute_error', optimizer=tf.keras.optimizers.Adam())\n",
 667 |     "        model.fit(X.iloc[train,], y[train], epochs=2000)\n",
 668 |     "        y_predict = model.predict(X.iloc[test,]).flatten()\n",
 669 |     "        y_test = y[test]\n",
 670 |     "        df_cv1 = pd.DataFrame({\"y_predict\":y_predict, \"y_test\":y_test})\n",
 671 |     "        df_cv = pd.concat([df_cv,df_cv1])\n",
 672 |     "    corr_test = np.corrcoef(df_cv[\"y_test\"], df_cv[\"y_predict\"])[0, 1]\n",
 673 |     "    rmse_test = mean_squared_error(df_cv[\"y_test\"], df_cv[\"y_predict\"], squared=False)\n",
 674 |     "    mae_test = mean_absolute_error(df_cv[\"y_test\"], df_cv[\"y_predict\"])\n",
 675 |     "    model_cv_df = pd.DataFrame({'WQ': [wq_name], 'var':[step_var], 'corr_test': [corr_test], \n",
 676 |     "                                'rmse_test': [rmse_test], 'mae_test': [mae_test]})\n",
 677 |     "    return(model_cv_df)"
 678 |    ]
 679 |   },
 680 |   {
 681 |    "cell_type": "code",
 682 |    "execution_count": null,
 683 |    "metadata": {},
 684 |    "outputs": [],
 685 |    "source": [
 686 |     "# Obtain model performance\n",
 687 |     "model_cv_Chla = model_cv('Chla')\n",
 688 |     "model_cv_SS = model_cv('SS')"
 689 |    ]
 690 |   },
 691 |   {
 692 |    "cell_type": "code",
 693 |    "execution_count": null,
 694 |    "metadata": {},
 695 |    "outputs": [],
 696 |    "source": [
 697 |     "# Chla model\n",
 698 |     "wq_name = \"Chla\"\n",
 699 |     "y = df[wq_name]\n",
 700 |     "step_var = stepwise_selection(df_var, y, verbose=False)\n",
 701 |     "X = df[step_var]\n",
 702 |     "model = tf.keras.Sequential([\n",
 703 |     "    tf.keras.layers.Dense(6, activation='sigmoid'),\n",
 704 |     "    tf.keras.layers.Dense(3, activation='sigmoid'),\n",
 705 |     "    tf.keras.layers.Dense(1)\n",
 706 |     "])\n",
 707 |     "model.compile(loss='mean_absolute_error', optimizer=tf.keras.optimizers.Adam())\n",
 708 |     "model.fit(X, y, epochs=2000)\n",
 709 |     "model.save(\"D:/WaterQuality/extract/Chla_model.keras\")\n",
 710 |     "\n",
 711 |     "# SS model\n",
 712 |     "wq_name = \"SS\"\n",
 713 |     "y = df[wq_name]\n",
 714 |     "step_var = stepwise_selection(df_var, y, verbose=False)\n",
 715 |     "X = df[step_var]\n",
 716 |     "model = tf.keras.Sequential([\n",
 717 |     "    tf.keras.layers.Dense(6, activation='sigmoid'),\n",
 718 |     "    tf.keras.layers.Dense(3, activation='sigmoid'),\n",
 719 |     "    tf.keras.layers.Dense(1)\n",
 720 |     "])\n",
 721 |     "model.compile(loss='mean_absolute_error', optimizer=tf.keras.optimizers.Adam())\n",
 722 |     "model.fit(X, y, epochs=2000)\n",
 723 |     "model.save(\"D:/WaterQuality/extract/SS_model.keras\")"
 724 |    ]
 725 |   },
 726 |   {
 727 |    "cell_type": "code",
 728 |    "execution_count": 315,
 729 |    "metadata": {},
 730 |    "outputs": [],
 731 |    "source": [
 732 |     "# print model weights from the trained models\n",
 733 |     "Chla_model = tf.keras.models.load_model(\"D:/WaterQuality/extract/Chla_model.keras\")\n",
 734 |     "pd.Series(Chla_model.get_weights()).to_json(\"D:/WaterQuality/extract/Chla_modelweight.json\")\n",
 735 |     "SS_model = tf.keras.models.load_model(\"D:/WaterQuality/extract/SS_model.keras\")\n",
 736 |     "pd.Series(SS_model.get_weights()).to_json(\"D:/WaterQuality/extract/SS_modelweight.json\")\n",
 737 |     "# OR pd.read_json(\"D:/WaterQuality/extract/Chla_modelweight.json\", typ=\"series\")"
 738 |    ]
 739 |   },
 740 |   {
 741 |    "cell_type": "code",
 742 |    "execution_count": null,
 743 |    "metadata": {},
 744 |    "outputs": [],
 745 |    "source": [
 746 |     "# Apply model to predict all imagery\n",
 747 |     "import os\n",
 748 |     "import glob\n",
 749 |     "import numpy as np\n",
 750 |     "import pandas as pd\n",
 751 |     "import arcpy\n",
 752 |     "from arcpy import env\n",
 753 |     "from arcpy.sa import *\n",
 754 |     "arcpy.CheckOutExtension(\"spatial\")"
 755 |    ]
 756 |   },
 757 |   {
 758 |    "cell_type": "code",
 759 |    "execution_count": 7,
 760 |    "metadata": {},
 761 |    "outputs": [],
 762 |    "source": [
 763 |     "# Function to predict Chla\n",
 764 |     "def predictChla(imgname):\n",
 765 |     "        outfolder = \"D:/WaterQuality/predict\"\n",
 766 |     "        os.chdir(outfolder)\n",
 767 |     "        env.workspace = outfolder\n",
 768 |     "        # ANN layers: 14, 6, 3, 1\n",
 769 |     "        p = pd.read_json(\"D:/WaterQuality/extract/Chla_modelweight.json\", typ=\"series\")\n",
 770 |     "        # Chla ['NR_B2B4', 'NR_B2B6', 'NR_B3B6', 'NR_B3B4', 'TB_B1B2B3', \n",
 771 |     "        # 'TB_B4B5B6', 'B2_3', 'B5', 'B4_2', 'B3_3', 'TB_B2B3B4', 'B6_3', 'NR_B1B6', 'B6_2']\n",
 772 |     "        outname = imgname.replace(\"preprocess_finish\", \"predict\").replace(\"LandsatSentinel_20\", \"Chla_20\")\n",
 773 |     "        b1 = Raster(imgname+\"/Band_1\")*10\n",
 774 |     "        b2 = Raster(imgname+\"/Band_2\")*10\n",
 775 |     "        b3 = Raster(imgname+\"/Band_3\")*10\n",
 776 |     "        b4 = Raster(imgname+\"/Band_4\")*10\n",
 777 |     "        b5 = Raster(imgname+\"/Band_5\")*10\n",
 778 |     "        b6 = Raster(imgname+\"/Band_6\")*10\n",
 779 |     "        ras_0 = b1*0\n",
 780 |     "        ras_neg1 = ras_0 - 1\n",
 781 |     "        ras_1 = ras_0 + 1\n",
 782 |     "        v1 = CellStatistics([CellStatistics([(b2-b4)/(b2+b4),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
 783 |     "        v2 = CellStatistics([CellStatistics([(b2-b6)/(b2+b6),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
 784 |     "        v3 = CellStatistics([CellStatistics([(b3-b6)/(b3+b6),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
 785 |     "        v4 = CellStatistics([CellStatistics([(b3-b4)/(b3+b4),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
 786 |     "        v5 = CellStatistics([CellStatistics([(((1/b1)-(1/b2))*b3),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
 787 |     "        v6 = CellStatistics([CellStatistics([(((1/b4)-(1/b5))*b6),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
 788 |     "        v7 = b2 ** 3\n",
 789 |     "        v8 = b5\n",
 790 |     "        v9 = b4 ** 2\n",
 791 |     "        v10 = b3 ** 3\n",
 792 |     "        v11 = CellStatistics([CellStatistics([(((1/b2)-(1/b3))*b4),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
 793 |     "        v12 = b6 ** 3\n",
 794 |     "        v13 = CellStatistics([CellStatistics([(b1-b6)/(b1+b6),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
 795 |     "        v14 = b6 ** 2\n",
 796 |     "\n",
 797 |     "        h1n1 = (Exp((p[1][0]+v1*p[0][0][0]+v2*p[0][1][0]+v3*p[0][2][0]+v4*p[0][3][0]+v5*p[0][4][0]+\n",
 798 |     "                v6*p[0][5][0]+v7*p[0][6][0]+v8*p[0][7][0]+v9*p[0][8][0]+v10*p[0][9][0]+\n",
 799 |     "                v11*p[0][10][0]+v12*p[0][11][0]+v13*p[0][12][0]+v14*p[0][13][0])*(-1))+1)**-1\n",
 800 |     "        h1n2 = (Exp((p[1][1]+v1*p[0][0][1]+v2*p[0][1][1]+v3*p[0][2][1]+v4*p[0][3][1]+v5*p[0][4][1]+\n",
 801 |     "                v6*p[0][5][1]+v7*p[0][6][1]+v8*p[0][7][1]+v9*p[0][8][1]+v10*p[0][9][1]+\n",
 802 |     "                v11*p[0][10][1]+v12*p[0][11][1]+v13*p[0][12][1]+v14*p[0][13][1])*(-1))+1)**-1\n",
 803 |     "        h1n3 = (Exp((p[1][2]+v1*p[0][0][2]+v2*p[0][1][2]+v3*p[0][2][2]+v4*p[0][3][2]+v5*p[0][4][2]+\n",
 804 |     "                v6*p[0][5][2]+v7*p[0][6][2]+v8*p[0][7][2]+v9*p[0][8][2]+v10*p[0][9][2]+\n",
 805 |     "                v11*p[0][10][2]+v12*p[0][11][2]+v13*p[0][12][2]+v14*p[0][13][2])*(-1))+1)**-1\n",
 806 |     "        h1n4 = (Exp((p[1][3]+v1*p[0][0][3]+v2*p[0][1][3]+v3*p[0][2][3]+v4*p[0][3][3]+v5*p[0][4][3]+\n",
 807 |     "                v6*p[0][5][3]+v7*p[0][6][3]+v8*p[0][7][3]+v9*p[0][8][3]+v10*p[0][9][3]+\n",
 808 |     "                v11*p[0][10][3]+v12*p[0][11][3]+v13*p[0][12][3]+v14*p[0][13][3])*(-1))+1)**-1\n",
 809 |     "        h1n5 = (Exp((p[1][4]+v1*p[0][0][4]+v2*p[0][1][4]+v3*p[0][2][4]+v4*p[0][3][4]+v5*p[0][4][4]+\n",
 810 |     "                v6*p[0][5][4]+v7*p[0][6][4]+v8*p[0][7][4]+v9*p[0][8][4]+v10*p[0][9][4]+\n",
 811 |     "                v11*p[0][10][4]+v12*p[0][11][4]+v13*p[0][12][4]+v14*p[0][13][4])*(-1))+1)**-1\n",
 812 |     "        h1n6 = (Exp((p[1][5]+v1*p[0][0][5]+v2*p[0][1][5]+v3*p[0][2][5]+v4*p[0][3][5]+v5*p[0][4][5]+\n",
 813 |     "                v6*p[0][5][5]+v7*p[0][6][5]+v8*p[0][7][5]+v9*p[0][8][5]+v10*p[0][9][5]+\n",
 814 |     "                v11*p[0][10][5]+v12*p[0][11][5]+v13*p[0][12][5]+v14*p[0][13][5])*(-1))+1)**-1\n",
 815 |     "\n",
 816 |     "        h2n1 = (Exp((p[3][0]+h1n1*p[2][0][0]+h1n2*p[2][1][0]+h1n3*p[2][2][0]+\n",
 817 |     "                h1n4*p[2][3][0]+h1n5*p[2][4][0]+h1n6*p[2][5][0])*(-1))+1)**-1\n",
 818 |     "        h2n2 = (Exp((p[3][1]+h1n1*p[2][0][1]+h1n2*p[2][1][1]+h1n3*p[2][2][1]+\n",
 819 |     "                h1n4*p[2][3][1]+h1n5*p[2][4][1]+h1n6*p[2][5][1])*(-1))+1)**-1\n",
 820 |     "        h2n3 = (Exp((p[3][2]+h1n1*p[2][0][2]+h1n2*p[2][1][2]+h1n3*p[2][2][2]+\n",
 821 |     "                h1n4*p[2][3][2]+h1n5*p[2][4][2]+h1n6*p[2][5][2])*(-1))+1)**-1\n",
 822 |     "\n",
 823 |     "        pred = CellStatistics([p[5][0]+h2n1*p[4][0][0]+h2n2*p[4][1][0]+h2n3*p[4][2][0],ras_0], \"MAXIMUM\")\n",
 824 |     "        arcpy.management.CopyRaster(pred, outname)"
 825 |    ]
 826 |   },
 827 |   {
 828 |    "cell_type": "code",
 829 |    "execution_count": 8,
 830 |    "metadata": {},
 831 |    "outputs": [],
 832 |    "source": [
 833 |     "# Function to predict SS\n",
 834 |     "def predictSS(imgname):\n",
 835 |     "        outfolder = \"D:/WaterQuality/predict\"\n",
 836 |     "        os.chdir(outfolder)\n",
 837 |     "        env.workspace = outfolder\n",
 838 |     "        # ANN layers: 9, 6, 3, 1\n",
 839 |     "        p = pd.read_json(\"D:/WaterQuality/extract/SS_modelweight.json\", typ=\"series\")\n",
 840 |     "        # SS ['TB_B2B3B4', 'LH_B4B5B6', 'B3_3', 'B4_2', 'LH_B5B6B7', 'TB_B3B4B5', 'NR_B5B6', 'NR_B1B4', 'B2_3']\n",
 841 |     "        outname = imgname.replace(\"preprocess_finish\", \"predict\").replace(\"LandsatSentinel_20\", \"SuSo_20\")\n",
 842 |     "        b1 = Raster(imgname+\"/Band_1\")*10\n",
 843 |     "        b2 = Raster(imgname+\"/Band_2\")*10\n",
 844 |     "        b3 = Raster(imgname+\"/Band_3\")*10\n",
 845 |     "        b4 = Raster(imgname+\"/Band_4\")*10\n",
 846 |     "        b5 = Raster(imgname+\"/Band_5\")*10\n",
 847 |     "        b6 = Raster(imgname+\"/Band_6\")*10\n",
 848 |     "        b7 = Raster(imgname+\"/Band_7\")*10\n",
 849 |     "        ras_0 = b1*0\n",
 850 |     "        ras_neg1 = ras_0 - 1\n",
 851 |     "        ras_1 = ras_0 + 1\n",
 852 |     "        v1 = CellStatistics([CellStatistics([(((1/b2)-(1/b3))*b4),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
 853 |     "        v2 = b5-b4-((b6-b4)*((865-660)/(1610-660)))\n",
 854 |     "        v3 = b3 ** 3\n",
 855 |     "        v4 = b4 ** 2\n",
 856 |     "        v5 = b6-b5-((b7-b5)*((1610-865)/(2195-865)))\n",
 857 |     "        v6 = CellStatistics([CellStatistics([(((1/b3)-(1/b4))*b5),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
 858 |     "        v7 = CellStatistics([CellStatistics([(b5-b6)/(b5+b6),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
 859 |     "        v8 = CellStatistics([CellStatistics([(b1-b4)/(b1+b4),ras_neg1], \"MAXIMUM\"),ras_1],\"MINIMUM\")\n",
 860 |     "        v9 = b2 ** 3\n",
 861 |     "\n",
 862 |     "        h1n1 = (Exp((p[1][0]+v1*p[0][0][0]+v2*p[0][1][0]+v3*p[0][2][0]+v4*p[0][3][0]+v5*p[0][4][0]+\n",
 863 |     "                v6*p[0][5][0]+v7*p[0][6][0]+v8*p[0][7][0]+v9*p[0][8][0])*(-1))+1)**-1\n",
 864 |     "        h1n2 = (Exp((p[1][1]+v1*p[0][0][1]+v2*p[0][1][1]+v3*p[0][2][1]+v4*p[0][3][1]+v5*p[0][4][1]+\n",
 865 |     "                v6*p[0][5][1]+v7*p[0][6][1]+v8*p[0][7][1]+v9*p[0][8][1])*(-1))+1)**-1\n",
 866 |     "        h1n3 = (Exp((p[1][2]+v1*p[0][0][2]+v2*p[0][1][2]+v3*p[0][2][2]+v4*p[0][3][2]+v5*p[0][4][2]+\n",
 867 |     "                v6*p[0][5][2]+v7*p[0][6][2]+v8*p[0][7][2]+v9*p[0][8][2])*(-1))+1)**-1\n",
 868 |     "        h1n4 = (Exp((p[1][3]+v1*p[0][0][3]+v2*p[0][1][3]+v3*p[0][2][3]+v4*p[0][3][3]+v5*p[0][4][3]+\n",
 869 |     "                v6*p[0][5][3]+v7*p[0][6][3]+v8*p[0][7][3]+v9*p[0][8][3])*(-1))+1)**-1\n",
 870 |     "        h1n5 = (Exp((p[1][4]+v1*p[0][0][4]+v2*p[0][1][4]+v3*p[0][2][4]+v4*p[0][3][4]+v5*p[0][4][4]+\n",
 871 |     "                v6*p[0][5][4]+v7*p[0][6][4]+v8*p[0][7][4]+v9*p[0][8][4])*(-1))+1)**-1\n",
 872 |     "        h1n6 = (Exp((p[1][5]+v1*p[0][0][5]+v2*p[0][1][5]+v3*p[0][2][5]+v4*p[0][3][5]+v5*p[0][4][5]+\n",
 873 |     "                v6*p[0][5][5]+v7*p[0][6][5]+v8*p[0][7][5]+v9*p[0][8][5])*(-1))+1)**-1\n",
 874 |     "\n",
 875 |     "        h2n1 = (Exp((p[3][0]+h1n1*p[2][0][0]+h1n2*p[2][1][0]+h1n3*p[2][2][0]+\n",
 876 |     "                h1n4*p[2][3][0]+h1n5*p[2][4][0]+h1n6*p[2][5][0])*(-1))+1)**-1\n",
 877 |     "        h2n2 = (Exp((p[3][1]+h1n1*p[2][0][1]+h1n2*p[2][1][1]+h1n3*p[2][2][1]+\n",
 878 |     "                h1n4*p[2][3][1]+h1n5*p[2][4][1]+h1n6*p[2][5][1])*(-1))+1)**-1\n",
 879 |     "        h2n3 = (Exp((p[3][2]+h1n1*p[2][0][2]+h1n2*p[2][1][2]+h1n3*p[2][2][2]+\n",
 880 |     "                h1n4*p[2][3][2]+h1n5*p[2][4][2]+h1n6*p[2][5][2])*(-1))+1)**-1\n",
 881 |     "\n",
 882 |     "        pred = CellStatistics([p[5][0]+h2n1*p[4][0][0]+h2n2*p[4][1][0]+h2n3*p[4][2][0],ras_0], \"MAXIMUM\")\n",
 883 |     "        arcpy.management.CopyRaster(pred, outname)"
 884 |    ]
 885 |   },
 886 |   {
 887 |    "cell_type": "code",
 888 |    "execution_count": 7,
 889 |    "metadata": {},
 890 |    "outputs": [],
 891 |    "source": [
 892 |     "# Apply function to all imagery\n",
 893 |     "imglist = glob.glob('D:/WaterQuality/preprocess_finish/*.tif')\n",
 894 |     "for i in imglist:\n",
 895 |     "    predictChla(i)\n",
 896 |     "    predictSS(i)"
 897 |    ]
 898 |   },
 899 |   {
 900 |    "cell_type": "code",
 901 |    "execution_count": null,
 902 |    "metadata": {},
 903 |    "outputs": [],
 904 |    "source": [
 905 |     "# Create Chla and SS raster for the latest date\n",
 906 |     "aoi_water = \"D:/WaterQuality/aoi/aoi_water.shp\"\n",
 907 |     "# Chla\n",
 908 |     "chlalist = glob.glob(\"D:/WaterQuality/predict/Chla_*.tif\")\n",
 909 |     "latestimg = chlalist[len(chlalist)-1]\n",
 910 |     "latestimg = latestimg[len(latestimg)-17:len(latestimg)]\n",
 911 |     "mergeras = arcpy.management.MosaicToNewRaster(chlalist, \"D:/WaterQuality/predict_display\", \"merge_\"+latestimg, \"\", \"32_BIT_FLOAT\", \"\", 1, \"LAST\")\n",
 912 |     "mergeras_focal = FocalStatistics(mergeras, NbrRectangle(3,3,\"CELL\"), \"MEDIAN\")\n",
 913 |     "outname = (\"D:/WaterQuality/predict_display/merge_\"+latestimg).replace(\".tif\", \"_smoothclip.tif\")\n",
 914 |     "arcpy.management.Clip(mergeras_focal, \"\", outname, aoi_water, \"\", \"ClippingGeometry\")\n",
 915 |     "# SS\n",
 916 |     "sslist = glob.glob(\"D:/WaterQuality/predict/SuSo_*.tif\")\n",
 917 |     "latestimg = sslist[len(sslist)-1]\n",
 918 |     "latestimg = latestimg[len(latestimg)-17:len(latestimg)]\n",
 919 |     "mergeras = arcpy.management.MosaicToNewRaster(sslist, \"D:/WaterQuality/predict_display\", \"merge_\"+latestimg, \"\", \"32_BIT_FLOAT\", \"\", 1, \"LAST\")\n",
 920 |     "mergeras_focal = FocalStatistics(mergeras, NbrRectangle(3,3,\"CELL\"), \"MEDIAN\")\n",
 921 |     "outname = (\"D:/WaterQuality/predict_display/merge_\"+latestimg).replace(\".tif\", \"_smoothclip.tif\")\n",
 922 |     "arcpy.management.Clip(mergeras_focal, \"\", outname, aoi_water, \"\", \"ClippingGeometry\")\n"
 923 |    ]
 924 |   },
 925 |   {
 926 |    "cell_type": "code",
 927 |    "execution_count": null,
 928 |    "metadata": {},
 929 |    "outputs": [],
 930 |    "source": [
 931 |     "# Create datapoints using Fishnet tool in ArcGIS\n",
 932 |     "# Datapoints are used to plot charts in Dashboard\n",
 933 |     "# Extract and append values for the first date\n",
 934 |     "datapoint = \"D:/WaterQuality/ArcGISPro/DataPoint.shp\"\n",
 935 |     "datapoint_d1 = \"D:/WaterQuality/ArcGISPro/DataPoint_d1.shp\"\n",
 936 |     "datapoint_d2 = \"D:/WaterQuality/ArcGISPro/DataPoint_d2.shp\"\n",
 937 |     "datapoint_all = \"D:/WaterQuality/ArcGISPro/DataPoint_all.shp\"\n",
 938 |     "arcpy.management.Copy(datapoint, datapoint_d1)\n",
 939 |     "arcpy.management.Copy(datapoint, datapoint_d2)\n",
 940 |     "chla_d1 = \"D:/WaterQuality/predict/Chla_20130708.tif\"\n",
 941 |     "ss_d1 = \"D:/WaterQuality/predict/SuSo_20130708.tif\"\n",
 942 |     "arcpy.sa.ExtractMultiValuesToPoints(datapoint_d1, chla_d1+\" value\", \"BILINEAR\")\n",
 943 |     "arcpy.sa.ExtractMultiValuesToPoints(datapoint_d2, ss_d1+\" value\", \"BILINEAR\")\n",
 944 |     "with arcpy.da.UpdateCursor(datapoint_d1, [\"value\",\"Date\",\"parameter\",\"latest\",\"DateRange\"]) as cursor:\n",
 945 |     "    for row in cursor:\n",
 946 |     "        if row[0] < 0:\n",
 947 |     "            cursor.deleteRow()\n",
 948 |     "        else:\n",
 949 |     "            row[1] = 20130708\n",
 950 |     "            row[2] = \"Chla\"\n",
 951 |     "            row[3] = 1\n",
 952 |     "            row[4] = \"Day\"\n",
 953 |     "            cursor.updateRow(row)\n",
 954 |     "with arcpy.da.UpdateCursor(datapoint_d2, [\"value\",\"Date\",\"parameter\",\"latest\",\"DateRange\"]) as cursor:\n",
 955 |     "    for row in cursor:\n",
 956 |     "        if row[0] < 0:\n",
 957 |     "            cursor.deleteRow()\n",
 958 |     "        else:\n",
 959 |     "            row[1] = 20130708\n",
 960 |     "            row[2] = \"SS\"\n",
 961 |     "            row[3] = 1\n",
 962 |     "            row[4] = \"Day\"\n",
 963 |     "            cursor.updateRow(row)\n",
 964 |     "arcpy.management.Copy(datapoint_d1, datapoint_all)\n",
 965 |     "arcpy.management.Append(datapoint_d2, datapoint_all)\n",
 966 |     "with arcpy.da.UpdateCursor(datapoint_d1, [\"Date\",\"DateRange\"]) as cursor:\n",
 967 |     "    for row in cursor:\n",
 968 |     "        row[0] = round(20130708/100)*100+15\n",
 969 |     "        row[1] = \"Month\"\n",
 970 |     "        cursor.updateRow(row)\n",
 971 |     "with arcpy.da.UpdateCursor(datapoint_d2, [\"Date\",\"DateRange\"]) as cursor:\n",
 972 |     "    for row in cursor:\n",
 973 |     "        row[0] = round(20130708/100)*100+15\n",
 974 |     "        row[1] = \"Month\"\n",
 975 |     "        cursor.updateRow(row)\n",
 976 |     "arcpy.management.Append(datapoint_d1, datapoint_all)\n",
 977 |     "arcpy.management.Append(datapoint_d2, datapoint_all)\n",
 978 |     "arcpy.management.Delete(datapoint_d1)\n",
 979 |     "arcpy.management.Delete(datapoint_d2)"
 980 |    ]
 981 |   },
 982 |   {
 983 |    "cell_type": "code",
 984 |    "execution_count": null,
 985 |    "metadata": {},
 986 |    "outputs": [],
 987 |    "source": [
 988 |     "# Append datapoint from 2nd to latest dates\n",
 989 |     "datapoint = \"D:/WaterQuality/ArcGISPro/DataPoint.shp\"\n",
 990 |     "datapoint_d1 = \"D:/WaterQuality/ArcGISPro/DataPoint_d1.shp\"\n",
 991 |     "datapoint_d2 = \"D:/WaterQuality/ArcGISPro/DataPoint_d2.shp\"\n",
 992 |     "datapoint_all = \"D:/WaterQuality/ArcGISPro/DataPoint_all.shp\"\n",
 993 |     "chlalist = glob.glob(\"D:/WaterQuality/predict/Chla_*.tif\")\n",
 994 |     "sslist = glob.glob(\"D:/WaterQuality/predict/SuSo_*.tif\")\n",
 995 |     "for i in range(1,len(chlalist)):\n",
 996 |     "    chla_d1 = chlalist[i]\n",
 997 |     "    ss_d1 = sslist[i]\n",
 998 |     "    arcpy.management.Copy(datapoint, datapoint_d1)\n",
 999 |     "    arcpy.management.Copy(datapoint, datapoint_d2)\n",
1000 |     "    arcpy.sa.ExtractMultiValuesToPoints(datapoint_d1, chla_d1+\" value\", \"BILINEAR\")\n",
1001 |     "    arcpy.sa.ExtractMultiValuesToPoints(datapoint_d2, ss_d1+\" value\", \"BILINEAR\")\n",
1002 |     "    # remove points with no data, else add date and extract list of valid points\n",
1003 |     "    newdatapt = []\n",
1004 |     "    d1 = int(chla_d1[len(chla_d1)-12:len(chla_d1)-4])\n",
1005 |     "    d1_month = round(d1/100)*100+15\n",
1006 |     "    with arcpy.da.UpdateCursor(datapoint_d1, [\"value\",\"pt\",\"Date\",\"parameter\",\"latest\",\"DateRange\"]) as cursor:\n",
1007 |     "        for row in cursor:\n",
1008 |     "            if row[0] < 0:\n",
1009 |     "                cursor.deleteRow()\n",
1010 |     "            else:\n",
1011 |     "                newdatapt.append(row[1])\n",
1012 |     "                row[2] = d1 # Date\n",
1013 |     "                row[3] = \"Chla\"\n",
1014 |     "                row[4] = 1 # latest\n",
1015 |     "                row[5] = \"Day\"\n",
1016 |     "                cursor.updateRow(row)\n",
1017 |     "    # modify values for SS\n",
1018 |     "    with arcpy.da.UpdateCursor(datapoint_d2, [\"value\",\"pt\",\"Date\",\"parameter\",\"latest\",\"DateRange\"]) as cursor:\n",
1019 |     "        for row in cursor:\n",
1020 |     "            if row[0] < 0:\n",
1021 |     "                cursor.deleteRow()\n",
1022 |     "            else:\n",
1023 |     "                row[2] = d1 # Date\n",
1024 |     "                row[3] = \"SS\"\n",
1025 |     "                row[4] = 1 # latest\n",
1026 |     "                row[5] = \"Day\"\n",
1027 |     "                cursor.updateRow(row)    \n",
1028 |     "    # modify latest in datapoint_all\n",
1029 |     "    with arcpy.da.UpdateCursor(datapoint_all, [\"pt\",\"latest\",\"Date\",\"DateRange\"], \"latest = 1\") as cursor: # where clause\n",
1030 |     "        for row in cursor:\n",
1031 |     "            if row[0] in newdatapt: # if latest image provides obs on this pt\n",
1032 |     "                row[1] = 0\n",
1033 |     "                if row[3]==\"Month\" and row[2]==d1_month: # if same month as latest image\n",
1034 |     "                    row[1] = 1\n",
1035 |     "                cursor.updateRow(row)\n",
1036 |     "    arcpy.management.Append(datapoint_d1, datapoint_all)\n",
1037 |     "    arcpy.management.Append(datapoint_d2, datapoint_all)\n",
1038 |     "    # add monthly average data\n",
1039 |     "    with arcpy.da.UpdateCursor(datapoint_d1, [\"Date\",\"DateRange\"]) as cursor:\n",
1040 |     "        for row in cursor:\n",
1041 |     "            row[0] = d1_month\n",
1042 |     "            row[1] = \"Month\"\n",
1043 |     "            cursor.updateRow(row)\n",
1044 |     "    with arcpy.da.UpdateCursor(datapoint_d2, [\"Date\",\"DateRange\"]) as cursor:\n",
1045 |     "        for row in cursor:\n",
1046 |     "            row[0] = d1_month\n",
1047 |     "            row[1] = \"Month\"\n",
1048 |     "            cursor.updateRow(row)\n",
1049 |     "    arcpy.management.Append(datapoint_d1, datapoint_all)\n",
1050 |     "    arcpy.management.Append(datapoint_d2, datapoint_all)\n",
1051 |     "    arcpy.management.Delete(datapoint_d1)\n",
1052 |     "    arcpy.management.Delete(datapoint_d2)\n",
1053 |     "    print(\"Finish: \"+ str(i)+\"/\"+str(len(chlalist)))"
1054 |    ]
1055 |   }
1056 |  ],
1057 |  "metadata": {
1058 |   "kernelspec": {
1059 |    "display_name": "Python 3",
1060 |    "language": "python",
1061 |    "name": "python3"
1062 |   },
1063 |   "language_info": {
1064 |    "codemirror_mode": {
1065 |     "name": "ipython",
1066 |     "version": 3
1067 |    },
1068 |    "file_extension": ".py",
1069 |    "mimetype": "text/x-python",
1070 |    "name": "python",
1071 |    "nbconvert_exporter": "python",
1072 |    "pygments_lexer": "ipython3",
1073 |    "version": "3.11.10"
1074 |   },
1075 |   "orig_nbformat": 4
1076 |  },
1077 |  "nbformat": 4,
1078 |  "nbformat_minor": 2
1079 | }
1080 | 


--------------------------------------------------------------------------------
/Part2_ModelDevelopmentAndPrediction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": []
  7 |     },
  8 |     "kernelspec": {
  9 |       "name": "python3",
 10 |       "display_name": "Python 3"
 11 |     },
 12 |     "language_info": {
 13 |       "name": "python"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "markdown",
 19 |       "source": [
 20 |         "# **Estimation of water quality in Hong Kong using Sentinel-2 images in GEE and artificial neural network (ANN) in Google Colab environment**\n",
 21 |         "\n",
 22 |         "This is the second part of python codes used in the article. The codes are tested inside Google Colab environment using Hong Kong water as the study area."
 23 |       ],
 24 |       "metadata": {
 25 |         "id": "bl3AKmzrS9Ea"
 26 |       }
 27 |     },
 28 |     {
 29 |       "cell_type": "markdown",
 30 |       "source": [
 31 |         "**Import required libraries & Initialize Google Earth Engine session**"
 32 |       ],
 33 |       "metadata": {
 34 |         "id": "5S6-DoMKTCdf"
 35 |       }
 36 |     },
 37 |     {
 38 |       "cell_type": "code",
 39 |       "metadata": {
 40 |         "id": "-YTf5EwSJAbR"
 41 |       },
 42 |       "source": [
 43 |         "import ee\n",
 44 |         "import numpy as np\n",
 45 |         "import pandas as pd\n",
 46 |         "from datetime import datetime\n",
 47 |         "from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n",
 48 |         "from sklearn.neural_network import MLPRegressor\n",
 49 |         "from sklearn.model_selection import GridSearchCV\n",
 50 |         "ee.Authenticate()\n",
 51 |         "ee.Initialize()"
 52 |       ],
 53 |       "execution_count": null,
 54 |       "outputs": []
 55 |     },
 56 |     {
 57 |       "cell_type": "markdown",
 58 |       "source": [
 59 |         "# Step 1 - Match image data & in-situ station data\n",
 60 |         "\n",
 61 |         "Sun glint correction & water mask are performed to each image in this step"
 62 |       ],
 63 |       "metadata": {
 64 |         "id": "270h9bUtTImO"
 65 |       }
 66 |     },
 67 |     {
 68 |       "cell_type": "code",
 69 |       "metadata": {
 70 |         "id": "ZZtgGOLYdssE"
 71 |       },
 72 |       "source": [
 73 |         "# Load image data & in-situ station data\n",
 74 |         "assetList = ee.data.getList({'id':\"users/khoyinivan/S2_Py6S_mask_m\"})\n",
 75 |         "url = 'https://raw.githubusercontent.com/ivanhykwong/Marine-Water-Quality-Time-Series-HK/main/MarineQuality_2015-2020.csv'\n",
 76 |         "station_list = ['TM2','TM3','TM4','TM5','TM6','TM7','TM8','SM1','SM2','SM3','SM4','SM5','SM6','SM7','SM9','SM10','SM11',\n",
 77 |         "                'SM12','SM13','SM17','SM18','SM19','SM20','PM1','PM2','PM3','PM4','PM6','PM7','PM8','PM9','PM11','JM3',\n",
 78 |         "                'JM4','DM1','DM2','DM3','DM4','DM5','NM1','NM2','NM3','NM5','NM6','NM8','MM1','MM2','MM3','MM4','MM5',\n",
 79 |         "                'MM6','MM7','MM8','MM13','MM14','MM15','MM16','MM17','MM19','WM1','WM2','WM3','WM4','EM1','EM2','EM3',\n",
 80 |         "                'VM1','VM2','VM4','VM5','VM6','VM7','VM8','VM12','VM14','VM15']\n",
 81 |         "df_url = pd.read_csv(url)\n",
 82 |         "df_url = df_url[df_url['Station'].isin(station_list)]\n",
 83 |         "print(assetList)\n",
 84 |         "print(len(assetList))\n",
 85 |         "aoi = ee.Geometry.Polygon([[[113.800, 22.570],[113.800, 22.120],[114.514, 22.120],[114.514, 22.570]]])\n",
 86 |         "df_data = pd.DataFrame()\n",
 87 |         "\n",
 88 |         "for i in range(len(assetList)):\n",
 89 |         "  # Extract image date\n",
 90 |         "  assetid = assetList[i]['id']\n",
 91 |         "  print(assetid)\n",
 92 |         "  d1 = ee.Image(assetid)\n",
 93 |         "  d1_date = d1.date().format('yyyy-MM-dd')\n",
 94 |         "  print(d1_date.getInfo())\n",
 95 |         "\n",
 96 |         "  # sun glint correction by subtracting half of B11 from all bands\n",
 97 |         "  # https://www.mdpi.com/2072-4292/1/4/697/htm\n",
 98 |         "  # https://eatlas.org.au/data/uuid/2932dc63-9c9b-465f-80bf-09073aacaf1c\n",
 99 |         "  swir_half = d1.select('B11').multiply(0.5)\n",
100 |         "  d1 = d1.subtract(swir_half)\n",
101 |         "\n",
102 |         "  # water mask using MNDWI\n",
103 |         "  green = d1.select('B3')\n",
104 |         "  ndwi = d1.expression('(GREEN - SWIR) / (GREEN + SWIR)', {'GREEN': d1.select('B3'), 'SWIR': d1.select('B11')})\n",
105 |         "  mask = ndwi.gte(0.0).bitwiseAnd(green.gte(0.0)) # MNDWI >= 0\n",
106 |         "  d1 = d1.updateMask(mask)\n",
107 |         "  d1 = ee.Image(d1)\n",
108 |         "\n",
109 |         "  # Find nearest date between image & station data\n",
110 |         "  df = df_url.copy()\n",
111 |         "  df['Dates'] = pd.to_datetime(df['Dates'], format='%Y-%m-%d')\n",
112 |         "  imagedate = datetime.strptime(d1_date.getInfo(), '%Y-%m-%d')\n",
113 |         "  df['Image_date'] = imagedate\n",
114 |         "  df['Date_compare'] = abs(df['Dates'] - imagedate)\n",
115 |         "  df = df.sort_values(by=['Date_compare'])\n",
116 |         "  df = df.drop_duplicates(subset=['Station'])\n",
117 |         "\n",
118 |         "  if imagedate.year > 2020:\n",
119 |         "    continue\n",
120 |         "\n",
121 |         "  # Match image & station data, extract values to dataframe\n",
122 |         "  pts = ee.FeatureCollection(\"users/khoyinivan/MonitoringStation_wgs84_76\")\n",
123 |         "  pt_list = pts.toList(pts.size())\n",
124 |         "  df[['B1','B2','B3','B4','B5','B6','B7','B8','B8A','B11','B12']] = np.nan\n",
125 |         "  for pt in range(pt_list.length().getInfo()):\n",
126 |         "    pt1 = ee.Feature(pt_list.get(pt))\n",
127 |         "    pt1_buf = pt1.buffer(20)\n",
128 |         "    s2_dict = d1.reduceRegion(ee.Reducer.mean(), pt1_buf.geometry()).getInfo()\n",
129 |         "    n = pt1_buf.getInfo()['properties']['WaterStati']\n",
130 |         "    for b in ['B1','B2','B3','B4','B5','B6','B7','B8','B8A','B11','B12']:\n",
131 |         "      df.loc[df['Station'] == n, b] = s2_dict[b]\n",
132 |         "  df = df.dropna(subset = ['B2'])\n",
133 |         "  df['n'] = df.shape[0]\n",
134 |         "  print(df.shape)\n",
135 |         "\n",
136 |         "  # Combine all image dates\n",
137 |         "  df_data = pd.concat([df_data, df])\n",
138 |         "\n",
139 |         "# Export tables\n",
140 |         "df_data.to_csv('df_data.csv')"
141 |       ],
142 |       "execution_count": null,
143 |       "outputs": []
144 |     },
145 |     {
146 |       "cell_type": "markdown",
147 |       "source": [
148 |         "This block is written for inputing csv file as the data, skip this block if the dataframe is already loaded"
149 |       ],
150 |       "metadata": {
151 |         "id": "lCerveSYTTfy"
152 |       }
153 |     },
154 |     {
155 |       "cell_type": "code",
156 |       "metadata": {
157 |         "id": "Iu7U3QGzs3S5"
158 |       },
159 |       "source": [
160 |         "# for inputing csv file as the data, skip this block if the dataframe is already loaded\n",
161 |         "df_data = pd.read_csv('df_data.csv')\n",
162 |         "df_data['Image_date'] = pd.to_datetime(df_data['Image_date'], format='%Y-%m-%d')\n",
163 |         "df_data['Date_compare'] = pd.to_timedelta(df_data['Date_compare'])\n",
164 |         "df_data = df_data.drop(columns=['Unnamed: 0'])"
165 |       ],
166 |       "execution_count": null,
167 |       "outputs": []
168 |     },
169 |     {
170 |       "cell_type": "markdown",
171 |       "source": [
172 |         "# Step 2 - Extract observations & create variables\n",
173 |         "\n",
174 |         "Extract observations with ≤1 day difference; remove outliers & compute band combinations"
175 |       ],
176 |       "metadata": {
177 |         "id": "YUHga0c3T6QN"
178 |       }
179 |     },
180 |     {
181 |       "cell_type": "code",
182 |       "metadata": {
183 |         "id": "-qM8M5UUWB_7"
184 |       },
185 |       "source": [
186 |         "# Extract observations with ≤1 day difference\n",
187 |         "\n",
188 |         "max_day_diff = 1\n",
189 |         "\n",
190 |         "df = df_data[['Image_date', 'Dates', 'Date_compare', 'n',\n",
191 |         "              '5-day Biochemical Oxygen Demand mg_L', 'Ammonia Nitrogen mg_L', 'Chlorophyll-a ug_L', 'Dissolved Oxygen mg_L',\n",
192 |         "              'E. coli cfu_100mL', 'Faecal Coliforms cfu_100mL', 'Nitrate Nitrogen mg_L', 'Nitrite Nitrogen mg_L',\n",
193 |         "              'Orthophosphate Phosphorus mg_L', 'pH', 'Salinity psu', 'Secchi Disc Depth M', 'Silica mg_L',\n",
194 |         "              'Suspended Solids mg_L', 'Temperature C', 'Total Inorganic Nitrogen mg_L', 'Total Kjeldahl Nitrogen mg_L',\n",
195 |         "              'Total Nitrogen mg_L', 'Total Phosphorus mg_L', 'Turbidity NTU', 'Unionised Ammonia mg_L', 'Volatile Suspended Solids mg_L',\n",
196 |         "              'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8A', 'B11', 'B12']].copy()\n",
197 |         "\n",
198 |         "df = df.rename(columns={'Image_date': 'Image_Date', 'Dates': 'Station_Date',\n",
199 |         "                        '5-day Biochemical Oxygen Demand mg_L': 'BOD', 'Ammonia Nitrogen mg_L': 'AmNi', 'Chlorophyll-a ug_L': 'Chla', 'Dissolved Oxygen mg_L': 'DO',\n",
200 |         "                        'E. coli cfu_100mL': 'Ecoli', 'Faecal Coliforms cfu_100mL': 'FC', 'Nitrate Nitrogen mg_L': 'NitraNi', 'Nitrite Nitrogen mg_L': 'NitriNi',\n",
201 |         "                        'Orthophosphate Phosphorus mg_L': 'OrPh', 'pH': 'pH', 'Salinity psu': 'Sal', 'Secchi Disc Depth M': 'SDD', 'Silica mg_L': 'Si',\n",
202 |         "                        'Suspended Solids mg_L': 'SS', 'Temperature C': 'Temp', 'Total Inorganic Nitrogen mg_L': 'TIN', 'Total Kjeldahl Nitrogen mg_L': 'TKN',\n",
203 |         "                        'Total Nitrogen mg_L': 'ToNi', 'Total Phosphorus mg_L': 'ToPh', 'Turbidity NTU': 'Tur', 'Unionised Ammonia mg_L': 'UnAm', 'Volatile Suspended Solids mg_L': 'VSS'})\n",
204 |         "\n",
205 |         "df['Date_compare'] = pd.to_numeric(df['Date_compare'].dt.days)\n",
206 |         "df['Image_Year'] = pd.DatetimeIndex(df['Image_Date']).year\n",
207 |         "\n",
208 |         "df = df[(df['Date_compare'] <= max_day_diff) & (df['n'] >= 10)].copy().drop(columns=['Image_Date', 'Station_Date', 'Date_compare', 'n'])\n",
209 |         "\n",
210 |         "# Remove outlier using Tukey’s fences method\n",
211 |         "\n",
212 |         "Q1 = df.quantile(0.25)\n",
213 |         "Q3 = df.quantile(0.75)\n",
214 |         "IQR = Q3 - Q1\n",
215 |         "df = df[~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR)))[['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8A']].any(axis=1)]\n",
216 |         "\n",
217 |         "# Replace 0 to min/2 (avoid inf errors during evaluation)\n",
218 |         "\n",
219 |         "wq = ['BOD', 'AmNi', 'Chla', 'DO', 'Ecoli', 'FC', 'NitraNi', 'NitriNi', 'OrPh', 'pH', 'Sal', 'SDD', 'Si', 'SS', 'Temp', 'TIN', 'TKN', 'ToNi', 'ToPh', 'Tur', 'UnAm', 'VSS']\n",
220 |         "for a in wq:\n",
221 |         "  df[a]=df[a].replace(0, np.NaN)\n",
222 |         "  df[a]=df[a].replace(np.NaN,df[a].min()/2)\n",
223 |         "df"
224 |       ],
225 |       "execution_count": null,
226 |       "outputs": []
227 |     },
228 |     {
229 |       "cell_type": "code",
230 |       "metadata": {
231 |         "id": "Sldxw9LW4fSD"
232 |       },
233 |       "source": [
234 |         "# Create independent variables\n",
235 |         "\n",
236 |         "bands = ['B' + str(b) for b in [*range(1,8),'8A',11,12]]\n",
237 |         "wl = [443,490,560,665,705,740,783,865,1610,2190]  #wavelength in nm\n",
238 |         "\n",
239 |         "# Multiply 10\n",
240 |         "for i in bands:\n",
241 |         "  df[i] = df[i]*10\n",
242 |         "\n",
243 |         "# Square and cubic\n",
244 |         "for i in bands:\n",
245 |         "  df[i+'_2'] = df[i]**2\n",
246 |         "  df[i+'_3'] = df[i]**3\n",
247 |         "\n",
248 |         "# Two-band ratio\n",
249 |         "for i in bands:\n",
250 |         "  for j in bands:\n",
251 |         "    if (i != j) & (i < j):\n",
252 |         "      df['NR_'+i+j] = ((df[i] - df[j]) / (df[i] + df[j])).clip(lower=-1.0, upper=1.0)\n",
253 |         "\n",
254 |         "# Three-band ratio\n",
255 |         "for i in range(0,10):\n",
256 |         "  for j in range(0,10):\n",
257 |         "    for k in range(0,10):\n",
258 |         "      if (j == i+1) & (k == j+1):\n",
259 |         "        df['TB_'+bands[i]+bands[j]+bands[k]] = (((1/df[bands[i]]) - (1/df[bands[j]])) * df[bands[k]]).clip(lower=-1.0, upper=1.0)\n",
260 |         "\n",
261 |         "# Line height algorithm\n",
262 |         "for i in range(0,10):\n",
263 |         "  for j in range(0,10):\n",
264 |         "    for k in range(0,10):\n",
265 |         "      if (j == i+1) & (k == j+1):\n",
266 |         "        df['LH_'+bands[i]+bands[j]+bands[k]] = df[bands[j]] - df[bands[i]] - ((df[bands[k]] - df[bands[i]]) * ((wl[j]-wl[i])/(wl[k]-wl[i])))\n",
267 |         "\n",
268 |         "df.to_csv('df_data_filter.csv')"
269 |       ],
270 |       "execution_count": null,
271 |       "outputs": []
272 |     },
273 |     {
274 |       "cell_type": "markdown",
275 |       "source": [
276 |         "This block is written for inputing csv file as the data, skip this block if the dataframe is already loaded"
277 |       ],
278 |       "metadata": {
279 |         "id": "ZIBT5XBwUVh5"
280 |       }
281 |     },
282 |     {
283 |       "cell_type": "code",
284 |       "metadata": {
285 |         "id": "3MxIbH4UofAj"
286 |       },
287 |       "source": [
288 |         "# for inputing csv file as the data, skip this block if the dataframe is already loaded\n",
289 |         "df = pd.read_csv('df_data_filter.csv')\n",
290 |         "df = df.drop(columns=['Unnamed: 0'])\n",
291 |         "wq = ['BOD', 'AmNi', 'Chla', 'DO', 'Ecoli', 'FC', 'NitraNi', 'NitriNi', 'OrPh', 'pH', 'Sal', 'SDD', 'Si', 'SS', 'Temp', 'TIN', 'TKN', 'ToNi', 'ToPh', 'Tur', 'UnAm', 'VSS']"
292 |       ],
293 |       "execution_count": null,
294 |       "outputs": []
295 |     },
296 |     {
297 |       "cell_type": "markdown",
298 |       "source": [
299 |         "# Step 3 - Train artificial neural network (ANN) models\n",
300 |         "\n",
301 |         "Include selection of optimal variables through cross-validations; Based on GridSearchCV and MLPRegressor function in Scikit-learn"
302 |       ],
303 |       "metadata": {
304 |         "id": "7LkflG_dUMZQ"
305 |       }
306 |     },
307 |     {
308 |       "cell_type": "code",
309 |       "source": [
310 |         "# Define train and test datasets\n",
311 |         "\n",
312 |         "df_train = df[df['Image_Year'] <= 2019].drop(columns=['Image_Year']).copy()\n",
313 |         "df_test = df[df['Image_Year'] == 2020].drop(columns=['Image_Year']).copy()\n",
314 |         "X_train = df_train.drop(columns = wq)\n",
315 |         "X_test = df_test.drop(columns = wq)"
316 |       ],
317 |       "metadata": {
318 |         "id": "FHQz6nx-Bz_5"
319 |       },
320 |       "execution_count": null,
321 |       "outputs": []
322 |     },
323 |     {
324 |       "cell_type": "code",
325 |       "source": [
326 |         "# Define ANN function based on MLPRegressor\n",
327 |         "# https://scikit-learn.org/stable/modules/neural_networks_supervised.html#neural-networks-supervised\n",
328 |         "\n",
329 |         "def ANN(df_train, X_train, wq_name, first, seed):\n",
330 |         "  print('seed:' + str(seed))\n",
331 |         "  print('var:' + str(first))\n",
332 |         "  print(wq_name)\n",
333 |         "  c = df_train.corr().copy()\n",
334 |         "  c = c[wq_name][22:113]\n",
335 |         "  c = abs(c).sort_values(ascending=False)[0:first]\n",
336 |         "  var = c.index.tolist()\n",
337 |         "\n",
338 |         "  X_train2 = X_train[var]\n",
339 |         "  X_test2 = X_test[var]\n",
340 |         "  Y_train = df_train[wq_name]\n",
341 |         "  Y_test = df_test[wq_name]\n",
342 |         "\n",
343 |         "  hidden_layer_sizes = [2,4,5,6,8,10,(2,2),(4,4),(5,5),(6,6),(8,8),(10,10)]\n",
344 |         "  tuned_parameters = {'hidden_layer_sizes': hidden_layer_sizes, 'alpha': 10.0 ** -np.arange(1, 7)}\n",
345 |         "  clf = GridSearchCV(MLPRegressor(random_state=seed,activation='logistic', solver='lbfgs', max_iter=10000, early_stopping=True),\n",
346 |         "                     param_grid=tuned_parameters, scoring='r2', verbose=1, cv=5)\n",
347 |         "  clf.fit(X_train2, Y_train)\n",
348 |         "  nvar = len(var)\n",
349 |         "  best_layer = clf.best_estimator_.hidden_layer_sizes\n",
350 |         "  best_alpha = clf.best_estimator_.alpha\n",
351 |         "\n",
352 |         "  regr = MLPRegressor(random_state=seed, hidden_layer_sizes=best_layer, alpha=best_alpha, activation='logistic', solver='lbfgs', max_iter=10000, early_stopping=True).fit(X_train2, Y_train)\n",
353 |         "\n",
354 |         "  r_squared = regr.score(X_train2, Y_train)\n",
355 |         "  adjusted_r_squared = 1 - (1-r_squared)*(len(Y_train)-1)/(len(Y_train)-X_train2.shape[1]-1)\n",
356 |         "\n",
357 |         "  # Evaluate model\n",
358 |         "  Y_train_pred = regr.predict(X_train2)\n",
359 |         "  Y_train_pred[Y_train_pred<0]=0.0\n",
360 |         "  corr_model = np.corrcoef(Y_train, Y_train_pred)[0, 1]\n",
361 |         "  rmse = mean_squared_error(Y_train, Y_train_pred, squared=False)\n",
362 |         "  mae = mean_absolute_error(Y_train, Y_train_pred)\n",
363 |         "  smape = np.mean(2*(np.abs(Y_train_pred - Y_train))/(np.abs(Y_train)+np.abs(Y_train_pred)))\n",
364 |         "  print(wq_name + ': ' + str(best_layer) + ', alpha: ' + str(best_alpha) + ', best_score: ' + str(clf.best_score_) + ', R2: ' + str(r_squared) + ', RMSE: ' + str(rmse))\n",
365 |         "\n",
366 |         "  # Test model\n",
367 |         "  Y_test_pred = regr.predict(X_test2)\n",
368 |         "  Y_test_pred[Y_test_pred<0]=0.0\n",
369 |         "  r_squared_test = regr.score(X_test2, Y_test)\n",
370 |         "  corr_test = np.corrcoef(Y_test, Y_test_pred)[0, 1]\n",
371 |         "  rmse_test = mean_squared_error(Y_test, Y_test_pred, squared=False)\n",
372 |         "  mae_test = mean_absolute_error(Y_test, Y_test_pred)\n",
373 |         "  smape_test = np.mean(2*(np.abs(Y_test_pred - Y_test))/(np.abs(Y_test)+np.abs(Y_test_pred)))\n",
374 |         "  print(wq_name + ': r_squared_test: ' + str(r_squared_test) + ', rmse_test: ' + str(rmse_test) +  ', smape_test: ' + str(smape_test))\n",
375 |         "\n",
376 |         "  ANN_df = pd.DataFrame({'WQ': [wq_name], 'nvar':[nvar], 'var':[var], 'random_state':[seed],\n",
377 |         "                         'best_layer':[best_layer], 'best_alpha':[best_alpha], 'best_score':[clf.best_score_],\n",
378 |         "                         'r2': [r_squared], 'adjusted_r2': [adjusted_r_squared], 'corr_model': [corr_model],\n",
379 |         "                         'rmse': [rmse], 'mae': [mae], 'smape': [smape],\n",
380 |         "                         'r2_test': [r_squared_test], 'corr_test': [corr_test],\n",
381 |         "                         'rmse_test': [rmse_test], 'mae_test': [mae_test], 'smape_test': [smape_test]})\n",
382 |         "  return(ANN_df)"
383 |       ],
384 |       "metadata": {
385 |         "id": "pDjp8meoSA5-"
386 |       },
387 |       "execution_count": null,
388 |       "outputs": []
389 |     },
390 |     {
391 |       "cell_type": "code",
392 |       "source": [
393 |         "# Apply ANN function to all water quality parameters\n",
394 |         "# this process is very time-consuming, reduce the number of wq parameters, reduce the cross-validation variables, or use parallel sessions\n",
395 |         "\n",
396 |         "wq = ['BOD', 'AmNi', 'Chla', 'DO', 'Ecoli', 'FC', 'NitraNi', 'NitriNi', 'OrPh', 'pH', 'Sal', 'SDD', 'Si', 'SS', 'Temp', 'TIN', 'TKN', 'ToNi', 'ToPh', 'Tur', 'UnAm', 'VSS']\n",
397 |         "ANN_result_list = [ANN(df_train, X_train, wq_name=value, first=f, seed=seed) for value in wq for f in range(4,13) for seed in range(1,11)]\n",
398 |         "ANN_result = pd.concat(ANN_result_list)"
399 |       ],
400 |       "metadata": {
401 |         "id": "BYXzlU0xcUog"
402 |       },
403 |       "execution_count": null,
404 |       "outputs": []
405 |     },
406 |     {
407 |       "cell_type": "markdown",
408 |       "source": [
409 |         "Example output\n",
410 |         "![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABQEAAABVCAYAAAD5XSIqAAAe5klEQVR4nO2dW5LbuA5ArVvZTfa/E3s9vh9TmlGr+QBAgC+dU5VK2xRJvElRSud4v9/fFwAAAAAAAAAAAGzLn9fr9fr79+9oOYbx+XzQH/1HizEM9Ed/9Ef/p4L+6I/+6P9U0B/90R/9nwr6f17/Gy0EAAAAAAAAAAAAxMIhIAAAAAAAAAAAwOZwCAgAAAAAAAAAALA5HAICAAAAAAAAAABsDoeAAAAAAAAAAAAAm/PjEPA4jn//5L5LXVPqD/BkVsqHEbLOZJ+VZfGQeyb9Z4J17T9Ka/9oGXqPMTO76wcAfWD9AwDYkx+HgN/v98ffqe9S17xe/ywU3+/33z8sGAC/82RmRsg6k31WlcWr1s6k/yywrv0kFyM9Y6d1rqtPd2Vn3QCgD6x/AAD74vLPgc+F4goLBsBc9H5b58n01H/GG/5d/c+65g/2BACYH83611LXn7YmYKufYI88O9pmR50i8bTXr0NAbnIAAADmPGCFNvApAEAdaiUAwL40vwmYegvwZLUDxfvvPby3pX5XYu5zbbwZeYKOJXK6WPW2tKXGtdg2pcP17/vPUtmkuVEaXyt7am7J7yq12q+3/hL5WuJtVf0139dyVyLLTrXMSks9u19jjZWSzyyxrO3XGnfWOVPjeK8tWh9qZZPU7lmJ0F96/Sy2afX19ftZdNKgyfHWteneZ7S9PNc/TV1v3T9407v+z2KrWer/KHusUP+jbTOi/s8S/6nx73PMUP+97VU9BDyO48dh3vl5R049U7//IvXPnU9Kfa5ts/MEHUukdJHqnUq+nE1K/VLjan8fS6rP/Xd5lvI4Nd91zFQ9KM2loZZ/qb9zslljs6f+Uv9KYqEmtzR+ZtBf+30tdyX6S+Jl5/WvpZ6dXNvv12rqUs33UrQ1wCPuLHVHU/ctfpLkQk5uqWyp77U5OJII/aXXz2KbFl3v38+ikxTNXq+lRsxqL8/1T1rXvfePrYyo/7PYapb6P8oeK9T/aNuMqP+zxP8q9d/bXslDwNGL0WqMWKx68wQdU1j0Lm2U7m3RuZaTQ7o50cjmtRGZhR76W/XVLhiRc3jMde2fypHc9z3XKekN0KpIa5a0n2WuHD1sP2vcXSnJMlN8ziLHKErr7g576939G1nTVo2B6PoyOqZWqP9XGWamt3yz2WOm+h9hm9H2jp5/t/pfkq35nwOXFJppU+pBacN9HONf5/fgCTpqWU3v04c1mbU6nWOmnnJ42ic3niQ2I/0UpX8vub3G8dQ/In6ueMXFbmtZjkhfaOpSj5q7Uk33INquOf/28udoLLVsZdvk5F5Vp1Fr+Qr28lj/eu0fd2BFW0XW/xXsMar+j7JNZP0fodPK9b/FXi7/O/DTORfInW8Sn6BjilX1rr1ebNnUXV85vj8Z9Xq6YbV3TjZPIvSPji/P8aP87xk/ubFbbJDKlVk2np70qHUSX/fI5acc6l7pWSPvm9HoeWdBW8tWtU2pVqyq0+s1Zi2f3V5e61+P/eMurGqrqPq/ij1G1P8Rtomu/6P8vWr9b7FX9hDw7HgOeP+cuvbKrpvskh124Qk6RlJKuntbhJ0lb9pIx8nJdo7hNZcUSWx6zTmj/tdxo/Ozh/6p9lSO5L7X2KBlYa99tzNemy5LLOR83yqHZB/jFXeelGTR2CjqxmDEvLMg1b9WU3diJZ16ybpSDHisf6W6Pmr/lGKG+r+KrazzW96Syn03mz161/8VbLOSTivWfw97ub0JeBbK88+KN0mpBUDilKvu1wVDM8bsPEHHlC41vbX9Um0pGSRz5KiNX3s6kOqby2/JXCXuepXsJtH3Oqc1Nnvqr82r1FylOLHYc6T+1u9rOWiJxbs9zj+7oqlZJTvkrtXUJYnvpbF8/fv6faq/R9xZ6o6m7pfsKLF9LhdKtpLIlpJrpX1hhP7Xa6U1dSRWXTVxNiu1HM/9LF1rZ4+BiPWvVtdb9w/ejKr/5/UjbTVb/e9tj5Xqf5RtRtb/0fG/Wv33stefmlFKn3PXa4vfLNydI7mu1HelDdCdJ+h4J6dLTW9NP2sfrZ1r/js/fz4fkVylNslcmjFLeucKZYsM0j4R+t+vreWV1l6S9tr4tTZP/b3na5Vjp/omQVqzNHGkic/a2NqaWJvvXv9K4/aqO9q1pXUszVxaX0rmnYUo/S3fj8Kqa8teZSZa9Wzd843EO3db9B9pk9H130MGCzPV/xH2WKX+R9pmVP2fIf4l489S/z3tVTwEtPL9/n6LAAD8KB20z5ZvHrJen7ashqf+o98WsLBSrIKOGfJyhfhaQUbwZ+W6DT7sGgPUNDnY6idPsYcl93e0zY46SZk9BkIOAV+v/R0LMJKV8stD1pX0vYP+68oOZWbw7Qwy1FhBRvAHv8OuMbCrXhFgq588xR4eb3ntwI46SZk9Bo73+/1c7wAAAAAAAAAAADyAP6/X6/X379/Rcgzj8/mgP/qPFmMY6I/+6I/+TwX90R/90f+poD/6oz/6PxX0//j978AAAAAAAAAAAAAwJxwCAgAAAAAAAAAAbA6HgAAAAAAAAAAAAJvDISAAAAAAAAAAAMDmcAgIAAAAAAAAAACwOX9yDcdx/Pvz9/t9Hcfx+n6//7adP6f6pNpgHBq/3P1+fnf9OccZJ6nvtbLW+qfk1PQ/r32/32LZViVnK0m/nN9bfHqvJ6BHW2s97O01Z20cS79cW2s9ks6vvbam471vLYctttHSU3+tH1P9cmvVSP+X9LDoWGrz1v8c07qGSL8/ydlO288idw7NOBIdX692/0tjanT+S+Kxtd+1bWT+X/tY4l9aN2s69l7/rPJIfVXLv1S7Zw5Y8Fo3W/c/rXVfO442VlPXSPdA1nVDg/U++t5HYktN3WjNKQ3S9a91Hbf4tzZmrl2K1P8Re7yWveHZnmtLHgKWktdyCATjkBze3qldUzocTB3waDbOkv6lz9L5z+8+n49IrlWx+sISN5L5TzgItKP1jUdN9pqzNo6lX21M75t/zY2IVcfrZ03909pGSm/9NX4s9ZPOXcNT/1Jfi4499Lf0Lfk5971EL8v+wAPNOFId71j8n+vXsg/Tzi+9thSPHv3ujMj/2lzea2Ptut7rn1WeWr+a71LtnjlgwStvLG1eulvGscRqadwIe2iw+lGjx/W7XN8c0fVP09cjPj1yQfJZijaOeq3xkn41v/3658C1yXsWUGgjtZFq3RBLi829rWXeWn9L+1PiuEfBk4yTk83rBu0pWHK6Nda95pSMo+0XUeMscmuutdTQ+3US/b1s00N/q6wr+t9z/h76X8eOHEd643TXsWVDrkEzjiTGpXj5eJb8j+wXgXeOea6NPehZY0pY9B+5z/fKG6/67+W3iD2n5F5B2zbD/kcqq4TR96ya/WmKHnGs3RtImG2Pqe1Xs8GPQ8BSAI8OQIATYrE/x3GYC19LX4BWVo8/6t0/aP14t1uPAwMJJT08Y3Wk/rPYekYi/J/qN5v9rfEo6SfRf8f4t9pmNnlafUW9+c2Ihx+acUbHaiQtddz6oGim+udJ1KFqJL3WeA/M/zGIVKBdkvrJtCZaa/EZ3X8XJHbILczf79f0BKTWd9RTZXgGZ+ytFmeaG+SrXtd+pbbVWNWPd0p65Np28qMUaxzPbpvSemj1/0y5YY3Vlhhv2Z/MQlSOzxQbo+WZuTaMyJvZkObx7PXfQw8Ns+X4lZniuOeDGe0a3zJmK+ZDQE2QzxicEMt5+Nv69pjllVhp/6fEZOtBfO1146i+AJ6sEH+SXE0dApz97nWw1LYqLQ8jZqKkR+4gqOVNgRXegsrFf03/VR42S9dDq/9LY/a8CarJaq1VuX73MST9orDOFZ3jvdc/qzxa/WvXr1QbLP6X9vPKAY9xLHlc8uNMPrbq4WHXGerfOZ/WHx5xXDoLGIFmjfcY04L5EFDCbJvup5E6Xe85d8vptbR/riBI+j8lPmd+UgTQwsgal6JVnlqu5jY5uT69H8T19sdu/s+R8+Ns+reSi/9aHO++xrXm8YhDMK2svWvVbDxd/yhWqQ3WGi+Jm5kOAK2U/Jhqm3VtXCUeW7D4ozWOJWcBu9nbI8ZDDwFfr7lO6J9I69NFbzk8+2t02rEAAMA8NS5antIB4H3uWlskvf2xu/9rfpxJ/2vsXX9uHXNEHM9Cq/4jDgBPpLLu4mNr/O+if28i6s0IrDVeEjc7HABamWltLLFLHNeIiuPZ/RtJa4z/SQ3mZczrWDsG9CpcfbBLokhuiAHgGcxW47zlydW7Wentj9J8I2LiqfrPYHv4j9HxkMI6f+9+FkbZerRP7/Ty1WyxHYllTfHKf8860ttHs+0NU0TG8Wx6e8fxjGtcb1pi/NebgLknUDPfcECd0UnROv/9QFmb9KP1nwGJDXJPIHNjlN70rfWVygT/IfHHrHNK4qGlX00ua6xZ9W+JbWm9K8lmtfedHvqX5pD6MaqWeOlf0qPUZo3/XkTUHGscj7CBR/2z+r8WUx7rrSb+o2u8Vz8NPdbcCD1Gr39R8lgZcfjknTf372u1occBYO/4t7S1+D6q/s2U454y3Nsi41gT4z3qX8QeL3r/+yf1ZeogMCXQ+fPVIdfP1+9yDoM47sUpYuxcQuTiRzNuqX/pxqvW/9r2fr9Fcq2I1Qd37nncqy+k0do0l6sj5qyNo+1XqnH3dawl/qT6S+ZL6WitdzXZvPIvWv/SHCU/Rq5xVzz0L+lRi1VL/HvjUUdy49TiX6J/ql+03NprLTHe0s/jBr00f+u1I/tp8JzDe22UyNwqt4f+rfJo9ffa/7bgkTc1m5Xm8Mp/7TiWmmup8dK2Vt97+9EiU8qmnjlumT93TUqeqDiunQWU5pQi9X/EHq91/1vzW/IQsDTgXbhan9L1EEv0TVHEnNLNjVf/z+cjE2xBWjdrkty9FyiNHDwUsKPJPy8be83p2SbZkHlhWROl43jWNEt/CZH6a+aQjqW5RoKH/i1+sub2iPwvtXnqMav+njFu7ee9ts5Y/1vbNETGf1Rb7/WvVR7PdXCWvWWr3SJrowTvdWmGfLHQw4+W/r32wK37n95741H+997jWb7XXJM9BITnEv1UtfZWC4wndRCYwnqQxwEgAAAAAAAAQF84BIQf9DiY4fBnDSLfsiAGAAAAAAAAAPpyvN9v7sYBAAAAAAAAAAA25s/r9Xr9/ft3tBzD+Hw+6I/+o8UYBvqjP/qj/1NBf/RHf/R/KuiP/uiP/k8F/T+v/40WAgAAAAAAAAAAAGLhEBAAAAAAAAAAAGBzOAQEAAAAAAAAAADYHA4BAQAAAAAAAAAANodDQAAAAAAAAAAAgM35k/ryOI5/f/5+v6/jOF7f71fcDmMo+UHqo7tvr31TbZL+pe9rY5T65K6T9n8ST83RlWPhlF2Tt635b0Uqa6kOSGrHlVT/0vytNpDoKJG1NI7EPqV+2jYNlng8kehfWyMsY977R/tfcq2nj1vjTYp0/S7JI83jcxzt96nxNPsOCZoYksh6lUuz75HGjdf6p7WjNP6v15RkrekhqQ2pNg3W+mfJ8ZZ+ub49639pXqse1nUjJ4cWj/VPGuPaPO6x/l/H064Bknhs0d86rhTrGKWzk9R4kj2QdY/jtf/JyZa7NnV9bW3UtGnOAXJjS/De/9faNHXzbLfk/69DwFrQSoIa+lPyg9ZHuWCRHPjmrtEcFucOnlN9c99J+h/H8Xq/31V5VufpObrKod+Va7xqFv7aJigCqaylnKzla02nmo6tNtD4oySrRufctVY7tmwCrfEoHUdjC4tsPf3f28fWeJOi8UVNHkltyvmq9H1KPq3cNTQxJJX1jvbm6j5mbu/VgtaOmvi/Y2mLvPFNjaOtfx41r8WmHuu/pY5oYzVi3cjJocVr/Xu9ZIc7mjzusf5fx5OgjccW/VvHrWEdI2cry/64NJ6k3XP/k/pculYqS0sc5+bzqgUR+//SHKU5NTVOIvf/ch1SCkVsMMCHlo3lCpybeq/+XpvDFXiKnruQusmX3lSmro30v0bWO6Vrtfk+q46acaSbg2s/a5un3KVrreOU9LCOacUzH7197CW3hggbS+fVfJ+6rnUP0XJtxJ6jR/zfscZ/D/2lbS1z1GxsrVUetUEjj5Qe60jEunH9rgWv9U+DR40fURskSOzjuVZ5E2XH6DxuQTq3tR7NHMdR9e9OS/3T2vSK6HcCzhyc8GyO45hiYYN5IUbmwPqgIvXUS7Pm9Dzwb5V1F7Q5J7GRNY939kGPeGs5/LrKI5HVU/5ZfV6K41qbRafW9S/i4M6qv3fbLsymY69Y3SXHI9c/LzQ+7b3+R67x1jguHepEP6iUtkXRO1YlOdXDDtI9zky1+sch4HlSWHvKMpMCEM8MC20qmb7f779/ajGZS0bYG02MAMzCygdXp+yeORcxZgR3+UZuzjWy7MLsOpbiOGKtmm39s+pfaiuNObpu3A/BI2rDaB29eIIe1ny09BtV/2etYzOsDVI9ZpA1R002Sx63xOpsa1wJqW08/C+16a83Aa8CpoSstQO8Xv+ddl//tIxRI7cBrB1qw3NYYZF4EpabIO3iOHIzZZl7h3X1qrM15+62s445wv/X/VEuhiN8nHtQVpLFax7tdbmHclYZJTad4cZKGscRN/Be61+LLFb9S22lMT1qkYXS/ZN3bRilYw5rfESsGyPwiPGSHpbDlVa7tPjU24/a2ui93koPwWqU7lm95PFe96X34tefe8Zqbr4RdSG3H7v+7OH/EhKbJv934LPzKVSqc60dnk1ug9MyhjbWPG8yAMCPXhuX1bjqfX9qeF1zr9db2kYglUf7FHgmHU8ZSvsjrY8jZWkZbyZSNr0yq9yt9I7/Xe3oTS4eLbUBZOxiL8/1z7v+98BbTs+cirbhzPkfLduKseqN995QYtPq7wSsneTO8OQJnkFrrBGrAG2cT5Va8qjnk8vz7+vPK1J6omdtG4HkTRitnLkxR/j/LsOItzY8ZJGM50mkr2aJfSs12/TK8dXtOBrvfJyRUWvuLrHpvf55xFtPn2r1n0223vFfmm+l/e8TamMLljVeatM/pU61QQFOiA2A/Yl4k+j8rvYGj7bGjHwbKGIuyRsk2rYR5OTJ+d865opvg1l9NeIAyHqYX4pRSX8NWrlnRCJ/dI7vYEeIZ0SM7BKbLXpE5n8v21r0n0223nFYmm+HnPBghN699/8t/HoT8L7pzj1dz7XD8+gVA/cDAa0M9wLJk4a9WbVW3WOzVINr10ajkbXlAHAkGh1bxrmSa/M8eJESEY/SA8Dv9/cvUbbYpoWofJTqYc1xr9ogOZyfhdpDg4gY8ch/j7Uqaq+u8X9JR6v+pTbrfBpa6r9Vnt46luix/rXETXSOe9X/WoyX9ka5fqXve+0N73NIZE2N0aq/ZQ5rP8v6p9Fjlr1vioj7kYh1w4tV9v/W+X79TsB7x3unWjuM4fRJrmDl2qxzaOXQyJCbp7TA5ebK9T8/fz6foiw74OX/1Vi5Vl1lr8lduzba/xpZJbXDwkw6WsYp1apSDFvbtHjEY02ekv+tY3rhrX+pLTW+Nf684rYlN71I5XhtjfeU27qHye1bpPsZyTypPp7rn8aOpZiz6K+5J9G0aZCOI9lzamtDbf4e638PO1rbarHZW39rjuf0kNzr5OTy8psUSc2p3SNaxkxRyykNHuuI1I/3tvs1ve/jtHa01iPvdcOTWff/92s0Nj35cQh4XpS7uNYO4yj5xMtfknFqseM1j9dcT+DJ9lhZd01898j/EhJZW2WM7C8h0h9W3Xr6vVX/CP9FrCva/iP19xrbOm+uvZevV4p/yzi16yy+0dLL/y2+HLn/1fgjd83M+V8bZ+Ra12Pdr42jkcua+y01I7oO1j5L5PGqY9pc1I6pbZfaxsuPEXWg5/ofsW7OlP9e13v2z/7vwPBcejw5qr31AQAAAAAAAAAAfnAICD/odQDHQR8AAAAAAAAAQD+O9/vNaQwAAAAAAAAAAMDG/Hm9Xq+/f/+OlmMYn88H/dF/tBjDQH/0R3/0fyroj/7oj/5PBf3RH/3R/6mg/+f1v9FCAAAAAAAAAAAAQCwcAgIAAAAAAAAAAGwOh4AAAAAAAAAAAACbwyEgAAAAAAAAAADA5nAICAAAAAAAAAAAsDl/7l8cx5G9+Pv9hgoDMZw+lfjv6v/z+uM4Xt/vN9km6V/6vjaGps9VXkn/4zhe7/e7OO4uXO3zJDRxtxKanD6vj9TfUmPu19by1lpbtLbKYRmnZPdUW0kXaVuufQb9W3zVGjc99S/5w6PNMzakWGpptP9z+9WI+NeMk9tHpZD4X1MbNf2kePheMo4kxzX1VNNew6v+S2L8JOXHWj9NvHnhsf5f273WBq1sJSTjaHI8usZF+F9b/7RrdW2elppSukbCKP+nxpbYZgb/p+bU1r9Wm16vmbn+a/Zw92ta9P91CHgdqHawAvNz96EkgHPXnAlZGiN3jaRv6drazXRL/915au7uGgPanI72v0ae2rXSG4Pzc83HlvpnkTvXR9OmkV1r81Y89G/xlUfc9PJ/ba3yaPOKDSmWWtrL/7UNs1f9k45Tkr0kq2RvJZlP2k+Kh+8l4+R8LJm/5pvWGPCq/1q9JNdZ480Lz/U/J5+1Noyo/9Ic71HjPP1vqX+atbo2T2tNaWG0/yWftbJosa5/uTaNr6w21cqeI7r+S2zRssct6f/rnwNLnQDzk1pIIjYBPWiVfWXdWyF3/2GHGLDkdKT/NfK01COpDtcxvepfrzoacQBQG1dClP5SX1nnH+F/6QZY03bOqW3zugGqzd8yZqT/r9d7yRs1n9VXnj6W4rXmWH2captljbOOo/Hjff6R+zvv9V9yCNqrbmjkbh1nlhrXOkfkXqm1pliZwf/RezwJFv+X2jx81WNvPqKO9Nzj8jsBYWlGbIJhDYiL9TiOY7rNr4WWAx/rfDMdcGueZHrNN5v+pZthS1uELFH08v99zJn3AxJZI+Km1f+jD5K9H2L0oOWm1uuAc6Xc2IUIm1vHnNX/tVqlkVF6MNVr/ZvJ5iNlsdpcUv+0eqxQ/yPjuCaL+hDwKshdqNz3MIb7qbA1GWZZOFrk6H1TCgA6vt/vv39K68csm9lINIeI5+eIp6C9aFmrcnHjtf5pKfnD2pa6TtIWERtPyD8vNLYq+apUG639YC48cnXH3CzVcWsbjKF3PVplb6SJ1dnj2GrzVXw1gsg9bvJ3AtaEOSfNCTRzgD6Nu7964ZHEtTFqsUYhgSvUpnVIrSWSfJ7Bxy0buNrhT4rUPwcYaQPp/PfrPNaqu/4j1r+SP6xtr9f42JCup1b/tzI67q9Y9i5SX2njRtKmwcvOLW/LaQ9We+LxsL3kq9T3rXvl2SnVcWtbNBE275EzvbjHeERtGrk3srwNVorVlvvZnnpbbS7pp9VjpfovHUOzx5XIoj4EhLXwLrJSPIpYawFo7Q/7gO/X534gUsvv1CZzdko61vRfiZr/Ig4aVrfZ6NjwnEPTf7c8XpmVDgBnwyOOSwe8tWtWplTHrW3gz8havbp/a7G6e47DP7QcdFrigt8JuDGlNy1W5Cr/9e/7d5L+8BxYMP048222PKq9/dIrBkp1yVKzVkOio+YAt1WWnda/FbD6v0bvPIbfrHYAOKLe1uZsieMZYn/E+l+q49Y2iMMS4625OkNutECs+jNj/Zf0b3nT0XJGwpuA4ELvIsxbfiBBcuAAcma0W83HPd8EkMiRatuFmo45X8EeRPq/lMcr5dJKsp542dzqf8v8I2JEMqdlPWqxv6feq8XtKCJirdfhuQfaGG/J1Rn3FDPV+JlkaUGrx6z1P4f3vkY6Fm8Cbsz9aUJUEvR8y8brnyCNXiQgntVvFlPUcrp3XGvkKV2reVvo3k/i49bFtLfNpfr3qPFe+pf8UprDGjdetvGKcWvbndGx0dv/tTFGU9JfcxPsHeOamlqiVk+j/T96HY+o//cxvPTvnR9etdE6h1dNtc5/fm4dp9e9WgQ5/Wt28tDRY02pMZv/Z4uN3vufKyvtf601PnqPm30T8Bw4Neh14vPn63ezBemTufuqhat/JdekEkUSG7l5pPJL+p92eb/fojFX5sl5admgz442p6P9r5End+19MSstsqnFMkVuPAuedTRFScea/jXZPPzvpX8pH0tzWOLGU26PGLe2RcaGBOs6LBnrPm5Nf+sh1vm3h+y1G7Qrmvm8Y7zWT4N2Ha3tA1OUYrU2/0xrXI6aLyz6e+amFa/a+Hrl/ehdU7X0GKdljh57fG39K+mXarfOb60pGkb6X5Ljs/j/lK9nrkbvzT3nsNT46D1u9hBQUszvPz/tgGEVvPwiGSd3jTaptaTiUdLn8/mo51qNp+blznpr8qyHHTzksdQOa5uFSPla9ehhh1b9o/TQ7FVa8Mo5bVtkbEhosa+H/1eK/9li3EN3y9yefvSK8ag8GBn/PfSWMKo2trZpWFXHmeqf5t7M+p11Tus4I9f/1vk0rBr/mmss/T382Zo3LfrzOwHhF9Gn6tc5Uux8eAMAAAAAAAAAMAIOAeEHvQ7gOOgDAAAAAAAAAOjH8X6/OY0BAAAAAAAAAADYmP8DSH72yPbqafMAAAAASUVORK5CYII=)"
411 |       ],
412 |       "metadata": {
413 |         "id": "Yw2d3aB96lhy"
414 |       }
415 |     },
416 |     {
417 |       "cell_type": "markdown",
418 |       "source": [
419 |         "# Step 4 - Apply ANN models to all images\n",
420 |         "\n",
421 |         "Output images are exported to image collections in GEE"
422 |       ],
423 |       "metadata": {
424 |         "id": "Mn33CDTqUhqE"
425 |       }
426 |     },
427 |     {
428 |       "cell_type": "code",
429 |       "source": [
430 |         "# Choose the optimal model according to the training results in previous step\n",
431 |         "# Obtain model parameters (first, seed, best_layer and best_alpha) that were used to create the optimal model (see Example Output above)\n",
432 |         "# Then obtain weights of each neuron (regr_int and regr_coef) from the model parameters\n",
433 |         "\n",
434 |         "# Chla\n",
435 |         "wq_name='Chla'\n",
436 |         "first=9\n",
437 |         "seed=3\n",
438 |         "best_layer=(6,6)\n",
439 |         "best_alpha=0.1\n",
440 |         "activation='logistic'\n",
441 |         "c = df_train.corr().copy()\n",
442 |         "c = c[wq_name][22:113]\n",
443 |         "c = abs(c).sort_values(ascending=False)[0:first]\n",
444 |         "var = c.index.tolist()\n",
445 |         "X_train2 = X_train[var]\n",
446 |         "X_test2 = X_test[var]\n",
447 |         "Y_train = df_train[wq_name]\n",
448 |         "Y_test = df_test[wq_name]\n",
449 |         "\n",
450 |         "regr = MLPRegressor(random_state=seed, hidden_layer_sizes=best_layer, alpha=best_alpha, activation=activation, solver='lbfgs', max_iter=10000, early_stopping=True).fit(X_train2, Y_train)\n",
451 |         "regr_int = regr.intercepts_\n",
452 |         "print(regr_int)\n",
453 |         "regr_coef = regr.coefs_\n",
454 |         "print(regr_coef)\n",
455 |         "\n",
456 |         "# SS\n",
457 |         "wq_name='SS'\n",
458 |         "first=11\n",
459 |         "seed=1\n",
460 |         "best_layer=5\n",
461 |         "best_alpha=0.01\n",
462 |         "activation='logistic'\n",
463 |         "c = df_train.corr().copy()\n",
464 |         "c = c[wq_name][22:113]\n",
465 |         "c = abs(c).sort_values(ascending=False)[0:first]\n",
466 |         "var = c.index.tolist()\n",
467 |         "X_train2 = X_train[var]\n",
468 |         "X_test2 = X_test[var]\n",
469 |         "Y_train = df_train[wq_name]\n",
470 |         "Y_test = df_test[wq_name]\n",
471 |         "\n",
472 |         "regr = MLPRegressor(random_state=seed, hidden_layer_sizes=best_layer, alpha=best_alpha, activation=activation, solver='lbfgs', max_iter=10000, early_stopping=True).fit(X_train2, Y_train)\n",
473 |         "regr_int = regr.intercepts_\n",
474 |         "print(regr_int)\n",
475 |         "regr_coef = regr.coefs_\n",
476 |         "print(regr_coef)\n",
477 |         "\n",
478 |         "# Tur\n",
479 |         "wq_name='Tur'\n",
480 |         "first=8\n",
481 |         "seed=1\n",
482 |         "best_layer=2\n",
483 |         "best_alpha=0.1\n",
484 |         "activation='logistic'\n",
485 |         "c = df_train.corr().copy()\n",
486 |         "c = c[wq_name][22:113]\n",
487 |         "c = abs(c).sort_values(ascending=False)[0:first]\n",
488 |         "var = c.index.tolist()\n",
489 |         "X_train2 = X_train[var]\n",
490 |         "X_test2 = X_test[var]\n",
491 |         "Y_train = df_train[wq_name]\n",
492 |         "Y_test = df_test[wq_name]\n",
493 |         "\n",
494 |         "regr = MLPRegressor(random_state=seed, hidden_layer_sizes=best_layer, alpha=best_alpha, activation=activation, solver='lbfgs', max_iter=10000, early_stopping=True).fit(X_train2, Y_train)\n",
495 |         "regr_int = regr.intercepts_\n",
496 |         "print(regr_int)\n",
497 |         "regr_coef = regr.coefs_\n",
498 |         "print(regr_coef)"
499 |       ],
500 |       "metadata": {
501 |         "id": "V6BdGP8U0HRU"
502 |       },
503 |       "execution_count": null,
504 |       "outputs": []
505 |     },
506 |     {
507 |       "cell_type": "code",
508 |       "metadata": {
509 |         "id": "-uU2nHFJvUck"
510 |       },
511 |       "source": [
512 |         "# Apply ANN model to entire image collection to estimate Chla\n",
513 |         "\n",
514 |         "assetList = ee.data.getList({'id':\"users/khoyinivan/S2_Py6S_mask_m\"})\n",
515 |         "print(assetList)\n",
516 |         "print(len(assetList))\n",
517 |         "aoi = ee.Geometry.Polygon([[[113.800, 22.570],[113.800, 22.120],[114.514, 22.120],[114.514, 22.570]]])\n",
518 |         "\n",
519 |         "for i in range(len(assetList)):\n",
520 |         "  assetid = assetList[i]['id']\n",
521 |         "  print(assetid)\n",
522 |         "  d1 = ee.Image(assetid)\n",
523 |         "  d1_date = d1.date().format('yyyy-MM-dd')\n",
524 |         "  print(d1_date.getInfo())\n",
525 |         "  imagedate = datetime.strptime(d1_date.getInfo(), '%Y-%m-%d')\n",
526 |         "\n",
527 |         "  # sun glint correction & water mask\n",
528 |         "  swir_half = d1.select('B11').multiply(0.5)\n",
529 |         "  d1 = d1.subtract(swir_half)\n",
530 |         "  ndwi = d1.expression('(GREEN - NIR) / (GREEN + NIR)', {'GREEN': d1.select('B3'), 'NIR': d1.select('B8')})\n",
531 |         "  green = d1.select('B3')\n",
532 |         "  mask = ndwi.gte(0.0).bitwiseAnd(green.gte(0.0)) # NDWI >= 0\n",
533 |         "  d1 = d1.updateMask(mask)\n",
534 |         "  d1 = ee.Image(d1)\n",
535 |         "\n",
536 |         "  # Chla\n",
537 |         "  name = ('Chla' + d1_date.getInfo()).replace('-','')\n",
538 |         "  regr_int = [([ 3.20462862, -4.79607408,  3.72196477,  4.01253827,  2.75393548, 0.16435048]),\n",
539 |         "              ([-8.74146412,  1.66061145,  0.35839572, -4.36972238,  2.15743342, -1.37527732]),\n",
540 |         "              ([-0.08353174])]\n",
541 |         "  # Var: ['TB_B2B3B4','LH_B3B4B5','LH_B1B2B3','LH_B7B8AB11','B2','NR_B2B3','B2_2','B8A_3','B1']\n",
542 |         "  # wl = [443,490,560,665,705,740,783,865,1610,2190]\n",
543 |         "  regr_coef = [([[-6.84163335e+00,  9.11557386e-06,  1.19411541e+00,1.21073081e+00,  1.17434541e+00,  8.92068972e+00],\n",
544 |         "          [ 1.06607504e-01, -1.74092425e-05,  2.85270757e-01, 3.47071654e+00,  6.96241402e+00, -3.52038843e+00],\n",
545 |         "          [ 4.29869179e+00,  1.02256279e-05,  2.65460603e+00, 1.74870671e+00, -3.90495929e+00, -4.54783495e-01],\n",
546 |         "          [ 1.17519063e+01,  4.75687333e-06,  6.04921374e-02, -5.21168117e+00, -2.80664876e+00,  1.13555655e+00],\n",
547 |         "          [-8.89242522e-01, -5.21662319e-08, -1.27341490e+00, -4.42972054e+00, -1.33156806e+01, -4.00857609e+00],\n",
548 |         "          [ 6.44798755e+00, -2.83891110e-05, -2.00038534e+00, 1.36985042e+01, -3.79913833e+00,  3.92121192e+00],\n",
549 |         "          [ 5.14241155e+00, -6.06040494e-06, -2.24315388e+00, -5.67994435e+00, -2.52787495e+00, -3.26207134e+00],\n",
550 |         "          [ 6.30920542e+00,  1.23064247e-05, -3.37502754e-02, 2.83332915e-01,  3.75643313e+00, -4.06294587e-01],\n",
551 |         "          [-8.35038133e+00, -3.68571701e-05, -5.79237965e+00, 1.48052989e+00, -9.32440766e+00, -3.83986207e+00]]),\n",
552 |         "      ([[-2.94657990e+00, -1.42551128e+01, -3.45316647e+00, -1.09401098e+01, -7.62867142e+00, -1.62820122e+00],\n",
553 |         "          [-6.15294982e-04, -2.50226568e-04,  2.28071787e-04, 6.44446416e-05,  1.58399756e-04,  1.03145638e-04],\n",
554 |         "          [ 2.12536696e-01, -6.21840984e+00,  2.45608299e+00, 2.26263141e+00,  1.26472416e+00,  2.88959971e-01],\n",
555 |         "          [ 5.46308778e-03,  6.95531564e+00, -1.77089020e+00, 1.00952176e+01, -2.91392188e+00, -3.57717559e+00],\n",
556 |         "          [ 8.38737781e+00, -6.48407937e-02, -1.47897332e+00, 1.24298506e+01,  9.11814110e+00,  3.07254104e+00],\n",
557 |         "          [ 6.86841847e+00,  7.39998611e+00,  2.06500733e+00, -5.27889971e+00,  5.32904476e+00,  3.87425016e+00]]),\n",
558 |         "      ([[13.20989264], [20.96326861], [ 7.10314308], [12.19847729], [-8.33632165], [ 8.97286496]])]\n",
559 |         "  B1 = d1.select('B1').multiply(10).rename('0')\n",
560 |         "  B2 = d1.select('B2').multiply(10).rename('0')\n",
561 |         "  B3 = d1.select('B3').multiply(10).rename('0')\n",
562 |         "  B4 = d1.select('B4').multiply(10).rename('0')\n",
563 |         "  B5 = d1.select('B5').multiply(10).rename('0')\n",
564 |         "  B6 = d1.select('B6').multiply(10).rename('0')\n",
565 |         "  B7 = d1.select('B7').multiply(10).rename('0')\n",
566 |         "  B8A = d1.select('B8A').multiply(10).rename('0')\n",
567 |         "  B11 = d1.select('B11').multiply(10).rename('0')\n",
568 |         "  B12 = d1.select('B12').multiply(10).rename('0')\n",
569 |         "\n",
570 |         "  V1 = d1.expression('((1/B2)-(1/B3))*B4',{'B2': B2, 'B3': B3, 'B4': B4}).rename('0')\n",
571 |         "  V1 = V1.gte(-1.0).multiply(V1).subtract(V1.lt(-1.0))\n",
572 |         "  V1 = V1.lte(1.0).multiply(V1).add(V1.gt(1.0)).rename('0')\n",
573 |         "  V2 = d1.expression('B4-B3-(B5-B3)*((665-560)/(705-560))',{'B3': B3, 'B4': B4, 'B5': B5}).rename('0')\n",
574 |         "  V3 = d1.expression('B2-B1-(B3-B1)*((490-443)/(560-443))',{'B1':B1, 'B2': B2, 'B3': B3}).rename('0')\n",
575 |         "  V4 = d1.expression('B8A-B7-(B11-B7)*((865-783)/(1610-783))',{'B7': B7, 'B8A': B8A, 'B11': B11}).rename('0')\n",
576 |         "  V5 = B2\n",
577 |         "  V6 = d1.expression('(B2-B3)/(B2+B3)',{'B2': B2, 'B3': B3}).rename('0')\n",
578 |         "  V6 = V6.gte(-1.0).multiply(V6).subtract(V6.lt(-1.0))\n",
579 |         "  V6 = V6.lte(1.0).multiply(V6).add(V6.gt(1.0)).rename('0')\n",
580 |         "  V7 = B2.multiply(B2).rename('0')\n",
581 |         "  V8 = B8A.multiply(B8A).multiply(B8A).rename('0')\n",
582 |         "  V9 = B1\n",
583 |         "\n",
584 |         "  N1 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8+C9*V9',{\n",
585 |         "      'Int': regr_int[0][0], 'C1': regr_coef[0][0][0], 'C2': regr_coef[0][1][0], 'C3': regr_coef[0][2][0],\n",
586 |         "      'C4': regr_coef[0][3][0], 'C5': regr_coef[0][4][0], 'C6': regr_coef[0][5][0],\n",
587 |         "      'C7': regr_coef[0][6][0], 'C8': regr_coef[0][7][0], 'C9': regr_coef[0][8][0],\n",
588 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9\n",
589 |         "  }).multiply(-1.0).exp().add(1).pow(-1) #sigmoid\n",
590 |         "  # N1 = N1.gte(0.0).multiply(N1) #relu\n",
591 |         "\n",
592 |         "  N2 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8+C9*V9',{\n",
593 |         "      'Int': regr_int[0][1], 'C1': regr_coef[0][0][1], 'C2': regr_coef[0][1][1], 'C3': regr_coef[0][2][1],\n",
594 |         "      'C4': regr_coef[0][3][1], 'C5': regr_coef[0][4][1], 'C6': regr_coef[0][5][1],\n",
595 |         "      'C7': regr_coef[0][6][1], 'C8': regr_coef[0][7][1], 'C9': regr_coef[0][8][1],\n",
596 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9\n",
597 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
598 |         "\n",
599 |         "  N3 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8+C9*V9',{\n",
600 |         "      'Int': regr_int[0][2], 'C1': regr_coef[0][0][2], 'C2': regr_coef[0][1][2], 'C3': regr_coef[0][2][2],\n",
601 |         "      'C4': regr_coef[0][3][2], 'C5': regr_coef[0][4][2], 'C6': regr_coef[0][5][2],\n",
602 |         "      'C7': regr_coef[0][6][2], 'C8': regr_coef[0][7][2], 'C9': regr_coef[0][8][2],\n",
603 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9\n",
604 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
605 |         "\n",
606 |         "  N4 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8+C9*V9',{\n",
607 |         "      'Int': regr_int[0][3], 'C1': regr_coef[0][0][3], 'C2': regr_coef[0][1][3], 'C3': regr_coef[0][2][3],\n",
608 |         "      'C4': regr_coef[0][3][3], 'C5': regr_coef[0][4][3], 'C6': regr_coef[0][5][3],\n",
609 |         "      'C7': regr_coef[0][6][3], 'C8': regr_coef[0][7][3], 'C9': regr_coef[0][8][3],\n",
610 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9\n",
611 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
612 |         "\n",
613 |         "  N5 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8+C9*V9',{\n",
614 |         "      'Int': regr_int[0][4], 'C1': regr_coef[0][0][4], 'C2': regr_coef[0][1][4], 'C3': regr_coef[0][2][4],\n",
615 |         "      'C4': regr_coef[0][3][4], 'C5': regr_coef[0][4][4], 'C6': regr_coef[0][5][4],\n",
616 |         "      'C7': regr_coef[0][6][4], 'C8': regr_coef[0][7][4], 'C9': regr_coef[0][8][4],\n",
617 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9\n",
618 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
619 |         "\n",
620 |         "  N6 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8+C9*V9',{\n",
621 |         "      'Int': regr_int[0][5], 'C1': regr_coef[0][0][5], 'C2': regr_coef[0][1][5], 'C3': regr_coef[0][2][5],\n",
622 |         "      'C4': regr_coef[0][3][5], 'C5': regr_coef[0][4][5], 'C6': regr_coef[0][5][5],\n",
623 |         "      'C7': regr_coef[0][6][5], 'C8': regr_coef[0][7][5], 'C9': regr_coef[0][8][5],\n",
624 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9\n",
625 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
626 |         "\n",
627 |         "  N21 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6',{\n",
628 |         "      'Int': regr_int[1][0], 'C1': regr_coef[1][0][0], 'C2': regr_coef[1][1][0], 'C3': regr_coef[1][2][0],\n",
629 |         "      'C4': regr_coef[1][3][0], 'C5': regr_coef[1][4][0], 'C6': regr_coef[1][5][0],\n",
630 |         "      'V1': N1, 'V2': N2, 'V3': N3, 'V4': N4, 'V5': N5, 'V6': N6\n",
631 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
632 |         "\n",
633 |         "  N22 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6',{\n",
634 |         "      'Int': regr_int[1][1], 'C1': regr_coef[1][0][1], 'C2': regr_coef[1][1][1], 'C3': regr_coef[1][2][1],\n",
635 |         "      'C4': regr_coef[1][3][1], 'C5': regr_coef[1][4][1], 'C6': regr_coef[1][5][1],\n",
636 |         "      'V1': N1, 'V2': N2, 'V3': N3, 'V4': N4, 'V5': N5, 'V6': N6\n",
637 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
638 |         "\n",
639 |         "  N23 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6',{\n",
640 |         "      'Int': regr_int[1][2], 'C1': regr_coef[1][0][2], 'C2': regr_coef[1][1][2], 'C3': regr_coef[1][2][2],\n",
641 |         "      'C4': regr_coef[1][3][2], 'C5': regr_coef[1][4][2], 'C6': regr_coef[1][5][2],\n",
642 |         "      'V1': N1, 'V2': N2, 'V3': N3, 'V4': N4, 'V5': N5, 'V6': N6\n",
643 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
644 |         "\n",
645 |         "  N24 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6',{\n",
646 |         "      'Int': regr_int[1][3], 'C1': regr_coef[1][0][3], 'C2': regr_coef[1][1][3], 'C3': regr_coef[1][2][3],\n",
647 |         "      'C4': regr_coef[1][3][3], 'C5': regr_coef[1][4][3], 'C6': regr_coef[1][5][3],\n",
648 |         "      'V1': N1, 'V2': N2, 'V3': N3, 'V4': N4, 'V5': N5, 'V6': N6\n",
649 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
650 |         "\n",
651 |         "  N25 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6',{\n",
652 |         "      'Int': regr_int[1][4], 'C1': regr_coef[1][0][4], 'C2': regr_coef[1][1][4], 'C3': regr_coef[1][2][4],\n",
653 |         "      'C4': regr_coef[1][3][4], 'C5': regr_coef[1][4][4], 'C6': regr_coef[1][5][4],\n",
654 |         "      'V1': N1, 'V2': N2, 'V3': N3, 'V4': N4, 'V5': N5, 'V6': N6\n",
655 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
656 |         "\n",
657 |         "  N26 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6',{\n",
658 |         "      'Int': regr_int[1][5], 'C1': regr_coef[1][0][5], 'C2': regr_coef[1][1][5], 'C3': regr_coef[1][2][5],\n",
659 |         "      'C4': regr_coef[1][3][5], 'C5': regr_coef[1][4][5], 'C6': regr_coef[1][5][5],\n",
660 |         "      'V1': N1, 'V2': N2, 'V3': N3, 'V4': N4, 'V5': N5, 'V6': N6\n",
661 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
662 |         "\n",
663 |         "  d1_predict = d1.expression('Int+C1*N21+C2*N22+C3*N23+C4*N24+C5*N25+C6*N26',{\n",
664 |         "      'Int': regr_int[2][0], 'C1': regr_coef[2][0][0], 'C2': regr_coef[2][1][0], 'C3': regr_coef[2][2][0],\n",
665 |         "      'C4': regr_coef[2][3][0], 'C5': regr_coef[2][4][0], 'C6': regr_coef[2][5][0],\n",
666 |         "      'N21': N21, 'N22': N22, 'N23': N23, 'N24': N24, 'N25': N25, 'N26': N26\n",
667 |         "  })\n",
668 |         "\n",
669 |         "  d1_predict = d1_predict.rename(name).set('system:time_start', ee.Date(d1_date).millis())\n",
670 |         "\n",
671 |         "  task = ee.batch.Export.image.toAsset(image=d1_predict, description=name, assetId = 'users/khoyinivan/S2_Chla_ANN/' + name, scale = 10, region = aoi)\n",
672 |         "  task.start()"
673 |       ],
674 |       "execution_count": null,
675 |       "outputs": []
676 |     },
677 |     {
678 |       "cell_type": "code",
679 |       "source": [
680 |         "# Apply ANN model to entire image collection to estimate Suspended Solids\n",
681 |         "\n",
682 |         "assetList = ee.data.getList({'id':\"users/khoyinivan/S2_Py6S_mask_m\"})\n",
683 |         "print(assetList)\n",
684 |         "print(len(assetList))\n",
685 |         "aoi = ee.Geometry.Polygon([[[113.800, 22.570],[113.800, 22.120],[114.514, 22.120],[114.514, 22.570]]])\n",
686 |         "\n",
687 |         "for i in range(len(assetList)):\n",
688 |         "  assetid = assetList[i]['id']\n",
689 |         "  print(assetid)\n",
690 |         "  d1 = ee.Image(assetid)\n",
691 |         "  d1_date = d1.date().format('yyyy-MM-dd')\n",
692 |         "  print(d1_date.getInfo())\n",
693 |         "  imagedate = datetime.strptime(d1_date.getInfo(), '%Y-%m-%d')\n",
694 |         "\n",
695 |         "  # sun glint correction & water mask\n",
696 |         "  swir_half = d1.select('B11').multiply(0.5)\n",
697 |         "  d1 = d1.subtract(swir_half)\n",
698 |         "  ndwi = d1.expression('(GREEN - NIR) / (GREEN + NIR)', {'GREEN': d1.select('B3'), 'NIR': d1.select('B8')})\n",
699 |         "  green = d1.select('B3')\n",
700 |         "  mask = ndwi.gte(0.0).bitwiseAnd(green.gte(0.0)) # NDWI >= 0\n",
701 |         "  d1 = d1.updateMask(mask)\n",
702 |         "  d1 = ee.Image(d1)\n",
703 |         "\n",
704 |         "  # SS\n",
705 |         "  name = ('SS' + d1_date.getInfo()).replace('-','')\n",
706 |         "  regr_int = [[12.55921704, -13.73353476, -1.56213489, -15.96103343, -2.74424462],\n",
707 |         "            [5.20100899]]\n",
708 |         "  # Var: ['LH_B5B6B7', 'LH_B4B5B6', 'B5_3', 'B5_2', 'B4_2', 'B4_3', 'B4', 'B5', 'B3_2', 'B3_3', 'B3']\n",
709 |         "  # wl = [443,490,560,665,705,740,783,865,1610,2190]\n",
710 |         "  regr_coef = [[[0.00315948251, -32.9780628, -5.07213985, 7.38647208, -28.4785664],\n",
711 |         "        [-0.000983638194, 25.9594867, 9.85539378, -15.2927115, -13.2072681],\n",
712 |         "        [-0.00164435665, 36.9788436, 3.02589409, -2.5088956, -1.45870294],\n",
713 |         "        [-0.00471352417, 20.1816497, 3.80586779, -7.21854322, -1.17430456],\n",
714 |         "        [-0.00635952648, -2.15180342, -5.37556561, 5.24388819, 10.5224534],\n",
715 |         "        [-0.00342851065, -8.5595385, 2.34126206, 4.644868, 1.65118437],\n",
716 |         "        [-0.010480811, 5.77984149, 36.2489087, 3.7447754, 28.9145655],\n",
717 |         "        [-0.00881720748, -35.9948876, 12.793081, -15.1404911, 15.8537979],\n",
718 |         "        [-0.0146337387, 7.69747295, -17.8781654, 13.9578202, -9.13328681],\n",
719 |         "        [-0.0106495051, -13.5599014, -0.717967751, 15.7539373, -14.516977],\n",
720 |         "        [-0.0179451399, 18.4680285, -1.31164633, 9.56437974, -0.594566828]],\n",
721 |         "    [[0.00693088504], [60.2560856], [-22.1701438], [2.36782265], [23.713127]]]\n",
722 |         "  B1 = d1.select('B1').multiply(10).rename('0')\n",
723 |         "  B2 = d1.select('B2').multiply(10).rename('0')\n",
724 |         "  B3 = d1.select('B3').multiply(10).rename('0')\n",
725 |         "  B4 = d1.select('B4').multiply(10).rename('0')\n",
726 |         "  B5 = d1.select('B5').multiply(10).rename('0')\n",
727 |         "  B6 = d1.select('B6').multiply(10).rename('0')\n",
728 |         "  B7 = d1.select('B7').multiply(10).rename('0')\n",
729 |         "  B8A = d1.select('B8A').multiply(10).rename('0')\n",
730 |         "  B11 = d1.select('B11').multiply(10).rename('0')\n",
731 |         "  B12 = d1.select('B12').multiply(10).rename('0')\n",
732 |         "\n",
733 |         "  V1 = d1.expression('B6-B5-(B7-B5)*((740-705)/(783-705))',{'B5': B5, 'B6': B6, 'B7': B7}).rename('0')\n",
734 |         "  V2 = d1.expression('B5-B4-(B6-B4)*((705-665)/(740-665))',{'B4': B4, 'B5': B5, 'B6': B6}).rename('0')\n",
735 |         "  V3 = B5.multiply(B5).multiply(B5).rename('0')\n",
736 |         "  V4 = B5.multiply(B5).rename('0')\n",
737 |         "  V5 = B4.multiply(B4).rename('0')\n",
738 |         "  V6 = B4.multiply(B4).multiply(B4).rename('0')\n",
739 |         "  V7 = B4\n",
740 |         "  V8 = B5\n",
741 |         "  V9 = B3.multiply(B3).rename('0')\n",
742 |         "  V10 = B3.multiply(B3).multiply(B3).rename('0')\n",
743 |         "  V11 = B3\n",
744 |         "\n",
745 |         "  N1 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8+C9*V9+C10*V10+C11*V11',{\n",
746 |         "      'Int': regr_int[0][0], 'C1': regr_coef[0][0][0], 'C2': regr_coef[0][1][0], 'C3': regr_coef[0][2][0],\n",
747 |         "      'C4': regr_coef[0][3][0], 'C5': regr_coef[0][4][0], 'C6': regr_coef[0][5][0],\n",
748 |         "      'C7': regr_coef[0][6][0], 'C8': regr_coef[0][7][0], 'C9': regr_coef[0][8][0],\n",
749 |         "      'C10': regr_coef[0][9][0], 'C11': regr_coef[0][10][0],\n",
750 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9, 'V10': V10, 'V11': V11\n",
751 |         "  }).multiply(-1.0).exp().add(1).pow(-1) #sigmoid\n",
752 |         "\n",
753 |         "  N2 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8+C9*V9+C10*V10+C11*V11',{\n",
754 |         "      'Int': regr_int[0][1], 'C1': regr_coef[0][0][1], 'C2': regr_coef[0][1][1], 'C3': regr_coef[0][2][1],\n",
755 |         "      'C4': regr_coef[0][3][1], 'C5': regr_coef[0][4][1], 'C6': regr_coef[0][5][1],\n",
756 |         "      'C7': regr_coef[0][6][1], 'C8': regr_coef[0][7][1], 'C9': regr_coef[0][8][1],\n",
757 |         "      'C10': regr_coef[0][9][1], 'C11': regr_coef[0][10][1],\n",
758 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9, 'V10': V10, 'V11': V11\n",
759 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
760 |         "\n",
761 |         "  N3 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8+C9*V9+C10*V10+C11*V11',{\n",
762 |         "      'Int': regr_int[0][2], 'C1': regr_coef[0][0][2], 'C2': regr_coef[0][1][2], 'C3': regr_coef[0][2][2],\n",
763 |         "      'C4': regr_coef[0][3][2], 'C5': regr_coef[0][4][2], 'C6': regr_coef[0][5][2],\n",
764 |         "      'C7': regr_coef[0][6][2], 'C8': regr_coef[0][7][2], 'C9': regr_coef[0][8][2],\n",
765 |         "      'C10': regr_coef[0][9][2], 'C11': regr_coef[0][10][2],\n",
766 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9, 'V10': V10, 'V11': V11\n",
767 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
768 |         "\n",
769 |         "  N4 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8+C9*V9+C10*V10+C11*V11',{\n",
770 |         "      'Int': regr_int[0][3], 'C1': regr_coef[0][0][3], 'C2': regr_coef[0][1][3], 'C3': regr_coef[0][2][3],\n",
771 |         "      'C4': regr_coef[0][3][3], 'C5': regr_coef[0][4][3], 'C6': regr_coef[0][5][3],\n",
772 |         "      'C7': regr_coef[0][6][3], 'C8': regr_coef[0][7][3], 'C9': regr_coef[0][8][3],\n",
773 |         "      'C10': regr_coef[0][9][3], 'C11': regr_coef[0][10][3],\n",
774 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9, 'V10': V10, 'V11': V11\n",
775 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
776 |         "\n",
777 |         "  N5 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8+C9*V9+C10*V10+C11*V11',{\n",
778 |         "      'Int': regr_int[0][4], 'C1': regr_coef[0][0][4], 'C2': regr_coef[0][1][4], 'C3': regr_coef[0][2][4],\n",
779 |         "      'C4': regr_coef[0][3][4], 'C5': regr_coef[0][4][4], 'C6': regr_coef[0][5][4],\n",
780 |         "      'C7': regr_coef[0][6][4], 'C8': regr_coef[0][7][4], 'C9': regr_coef[0][8][4],\n",
781 |         "      'C10': regr_coef[0][9][4], 'C11': regr_coef[0][10][4],\n",
782 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8, 'V9': V9, 'V10': V10, 'V11': V11\n",
783 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
784 |         "\n",
785 |         "  d1_predict = d1.expression('Int+C1*N1+C2*N2+C3*N3+C4*N4+C5*N5',{\n",
786 |         "      'Int': regr_int[1][0], 'C1': regr_coef[1][0][0], 'C2': regr_coef[1][1][0], 'C3': regr_coef[1][2][0],\n",
787 |         "      'C4': regr_coef[1][3][0], 'C5': regr_coef[1][4][0],\n",
788 |         "      'N1': N1, 'N2': N2, 'N3': N3, 'N4': N4, 'N5': N5\n",
789 |         "  })\n",
790 |         "\n",
791 |         "  d1_predict = d1_predict.rename(name).set('system:time_start', ee.Date(d1_date).millis())\n",
792 |         "\n",
793 |         "  task = ee.batch.Export.image.toAsset(image=d1_predict, description=name, assetId = 'users/khoyinivan/S2_SS_ANN/' + name, scale = 10, region = aoi)\n",
794 |         "  task.start()"
795 |       ],
796 |       "metadata": {
797 |         "id": "wXEJ7rPQjQ3a"
798 |       },
799 |       "execution_count": null,
800 |       "outputs": []
801 |     },
802 |     {
803 |       "cell_type": "code",
804 |       "source": [
805 |         "# Apply ANN model to entire image collection to estimate Turbidity\n",
806 |         "\n",
807 |         "assetList = ee.data.getList({'id':\"users/khoyinivan/S2_Py6S_mask_m\"})\n",
808 |         "print(assetList)\n",
809 |         "print(len(assetList))\n",
810 |         "aoi = ee.Geometry.Polygon([[[113.800, 22.570],[113.800, 22.120],[114.514, 22.120],[114.514, 22.570]]])\n",
811 |         "\n",
812 |         "for i in range(len(assetList)):\n",
813 |         "  assetid = assetList[i]['id']\n",
814 |         "  print(assetid)\n",
815 |         "  d1 = ee.Image(assetid)\n",
816 |         "  d1_date = d1.date().format('yyyy-MM-dd')\n",
817 |         "  print(d1_date.getInfo())\n",
818 |         "  imagedate = datetime.strptime(d1_date.getInfo(), '%Y-%m-%d')\n",
819 |         "\n",
820 |         "  # sun glint correction & water mask\n",
821 |         "  swir_half = d1.select('B11').multiply(0.5)\n",
822 |         "  d1 = d1.subtract(swir_half)\n",
823 |         "  ndwi = d1.expression('(GREEN - NIR) / (GREEN + NIR)', {'GREEN': d1.select('B3'), 'NIR': d1.select('B8')})\n",
824 |         "  green = d1.select('B3')\n",
825 |         "  mask = ndwi.gte(0.0).bitwiseAnd(green.gte(0.0)) # NDWI >= 0\n",
826 |         "  d1 = d1.updateMask(mask)\n",
827 |         "  d1 = ee.Image(d1)\n",
828 |         "\n",
829 |         "  # Tur\n",
830 |         "  name = ('Tur' + d1_date.getInfo()).replace('-','')\n",
831 |         "  regr_int = [[1.03043919, -5.1108822], [26.60795985]]\n",
832 |         "  # Var: ['LH_B4B5B6', 'LH_B5B6B7', 'LH_B2B3B4', 'B3_3', 'B3_2', 'B5_3', 'B3', 'B5_2']\n",
833 |         "  # wl = [443,490,560,665,705,740,783,865,1610,2190]\n",
834 |         "  regr_coef = [[[-24.70594813, 14.37269736],\n",
835 |         "                [1.01369778, 0.71021701],\n",
836 |         "                [5.39600925, 7.8477396],\n",
837 |         "                [-5.40222666, -13.46987698],\n",
838 |         "                [-3.67650209, 1.77537138],\n",
839 |         "                [-1.75628292, 6.8203711],\n",
840 |         "                [5.18825675, 7.52539217],\n",
841 |         "                [10.51626892, 8.31789011]],\n",
842 |         "              [[-27.16748834], [16.79998435]]]\n",
843 |         "  B1 = d1.select('B1').multiply(10).rename('0')\n",
844 |         "  B2 = d1.select('B2').multiply(10).rename('0')\n",
845 |         "  B3 = d1.select('B3').multiply(10).rename('0')\n",
846 |         "  B4 = d1.select('B4').multiply(10).rename('0')\n",
847 |         "  B5 = d1.select('B5').multiply(10).rename('0')\n",
848 |         "  B6 = d1.select('B6').multiply(10).rename('0')\n",
849 |         "  B7 = d1.select('B7').multiply(10).rename('0')\n",
850 |         "  B8A = d1.select('B8A').multiply(10).rename('0')\n",
851 |         "  B11 = d1.select('B11').multiply(10).rename('0')\n",
852 |         "  B12 = d1.select('B12').multiply(10).rename('0')\n",
853 |         "\n",
854 |         "  V1 = d1.expression('B5-B4-(B6-B4)*((705-665)/(740-665))',{'B4': B4, 'B5': B5, 'B6': B6}).rename('0')\n",
855 |         "  V2 = d1.expression('B6-B5-(B7-B5)*((740-705)/(783-705))',{'B5': B5, 'B6': B6, 'B7': B7}).rename('0')\n",
856 |         "  V3 = d1.expression('B3-B2-(B4-B2)*((560-490)/(665-490))',{'B2': B2, 'B3': B3, 'B4': B4}).rename('0')\n",
857 |         "  V4 = B3.multiply(B3).multiply(B3).rename('0')\n",
858 |         "  V5 = B3.multiply(B3).rename('0')\n",
859 |         "  V6 = B5.multiply(B5).multiply(B5).rename('0')\n",
860 |         "  V7 = B3\n",
861 |         "  V8 = B5.multiply(B5).rename('0')\n",
862 |         "\n",
863 |         "  N1 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8',{\n",
864 |         "      'Int': regr_int[0][0], 'C1': regr_coef[0][0][0], 'C2': regr_coef[0][1][0], 'C3': regr_coef[0][2][0],\n",
865 |         "      'C4': regr_coef[0][3][0], 'C5': regr_coef[0][4][0], 'C6': regr_coef[0][5][0],\n",
866 |         "      'C7': regr_coef[0][6][0], 'C8': regr_coef[0][7][0],\n",
867 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8\n",
868 |         "  }).multiply(-1.0).exp().add(1).pow(-1) #sigmoid\n",
869 |         "\n",
870 |         "  N2 = d1.expression('Int+C1*V1+C2*V2+C3*V3+C4*V4+C5*V5+C6*V6+C7*V7+C8*V8',{\n",
871 |         "      'Int': regr_int[0][1], 'C1': regr_coef[0][0][1], 'C2': regr_coef[0][1][1], 'C3': regr_coef[0][2][1],\n",
872 |         "      'C4': regr_coef[0][3][1], 'C5': regr_coef[0][4][1], 'C6': regr_coef[0][5][1],\n",
873 |         "      'C7': regr_coef[0][6][1], 'C8': regr_coef[0][7][1],\n",
874 |         "      'V1': V1, 'V2': V2, 'V3': V3, 'V4': V4, 'V5': V5, 'V6': V6, 'V7': V7, 'V8': V8\n",
875 |         "  }).multiply(-1.0).exp().add(1).pow(-1)\n",
876 |         "\n",
877 |         "  d1_predict = d1.expression('Int+C1*N1+C2*N2',{\n",
878 |         "      'Int': regr_int[1][0], 'C1': regr_coef[1][0][0], 'C2': regr_coef[1][1][0],\n",
879 |         "      'N1': N1, 'N2': N2\n",
880 |         "  })\n",
881 |         "\n",
882 |         "  d1_predict = d1_predict.rename(name).set('system:time_start', ee.Date(d1_date).millis())\n",
883 |         "\n",
884 |         "  task = ee.batch.Export.image.toAsset(image=d1_predict, description=name, assetId = 'users/khoyinivan/S2_Tur_ANN/' + name, scale = 10, region = aoi)\n",
885 |         "  task.start()"
886 |       ],
887 |       "metadata": {
888 |         "id": "EHgPr0ROtlEq"
889 |       },
890 |       "execution_count": null,
891 |       "outputs": []
892 |     }
893 |   ]
894 | }


--------------------------------------------------------------------------------