├── LICENSE
├── README.md
├── code
    ├── 1_export_harmonics_tfrecord.js
    ├── 2_tfrecord_to_tiff.py
    ├── 3_sam_inference.py
    ├── 4_vectorise_preds.py
    ├── environment.yml
    ├── readme.md
    └── utils.py
└── ukfields.jpeg


/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Legal Code
  2 | 
  3 | CC0 1.0 Universal
  4 | 
  5 |     CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
  6 |     LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
  7 |     ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
  8 |     INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
  9 |     REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
 10 |     PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
 11 |     THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
 12 |     HEREUNDER.
 13 | 
 14 | Statement of Purpose
 15 | 
 16 | The laws of most jurisdictions throughout the world automatically confer
 17 | exclusive Copyright and Related Rights (defined below) upon the creator
 18 | and subsequent owner(s) (each and all, an "owner") of an original work of
 19 | authorship and/or a database (each, a "Work").
 20 | 
 21 | Certain owners wish to permanently relinquish those rights to a Work for
 22 | the purpose of contributing to a commons of creative, cultural and
 23 | scientific works ("Commons") that the public can reliably and without fear
 24 | of later claims of infringement build upon, modify, incorporate in other
 25 | works, reuse and redistribute as freely as possible in any form whatsoever
 26 | and for any purposes, including without limitation commercial purposes.
 27 | These owners may contribute to the Commons to promote the ideal of a free
 28 | culture and the further production of creative, cultural and scientific
 29 | works, or to gain reputation or greater distribution for their Work in
 30 | part through the use and efforts of others.
 31 | 
 32 | For these and/or other purposes and motivations, and without any
 33 | expectation of additional consideration or compensation, the person
 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
 35 | is an owner of Copyright and Related Rights in the Work, voluntarily
 36 | elects to apply CC0 to the Work and publicly distribute the Work under its
 37 | terms, with knowledge of his or her Copyright and Related Rights in the
 38 | Work and the meaning and intended legal effect of CC0 on those rights.
 39 | 
 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 41 | protected by copyright and related or neighboring rights ("Copyright and
 42 | Related Rights"). Copyright and Related Rights include, but are not
 43 | limited to, the following:
 44 | 
 45 |   i. the right to reproduce, adapt, distribute, perform, display,
 46 |      communicate, and translate a Work;
 47 |  ii. moral rights retained by the original author(s) and/or performer(s);
 48 | iii. publicity and privacy rights pertaining to a person's image or
 49 |      likeness depicted in a Work;
 50 |  iv. rights protecting against unfair competition in regards to a Work,
 51 |      subject to the limitations in paragraph 4(a), below;
 52 |   v. rights protecting the extraction, dissemination, use and reuse of data
 53 |      in a Work;
 54 |  vi. database rights (such as those arising under Directive 96/9/EC of the
 55 |      European Parliament and of the Council of 11 March 1996 on the legal
 56 |      protection of databases, and under any national implementation
 57 |      thereof, including any amended or successor version of such
 58 |      directive); and
 59 | vii. other similar, equivalent or corresponding rights throughout the
 60 |      world based on applicable law or treaty, and any national
 61 |      implementations thereof.
 62 | 
 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
 64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
 65 | irrevocably and unconditionally waives, abandons, and surrenders all of
 66 | Affirmer's Copyright and Related Rights and associated claims and causes
 67 | of action, whether now known or unknown (including existing as well as
 68 | future claims and causes of action), in the Work (i) in all territories
 69 | worldwide, (ii) for the maximum duration provided by applicable law or
 70 | treaty (including future time extensions), (iii) in any current or future
 71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
 72 | including without limitation commercial, advertising or promotional
 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
 74 | member of the public at large and to the detriment of Affirmer's heirs and
 75 | successors, fully intending that such Waiver shall not be subject to
 76 | revocation, rescission, cancellation, termination, or any other legal or
 77 | equitable action to disrupt the quiet enjoyment of the Work by the public
 78 | as contemplated by Affirmer's express Statement of Purpose.
 79 | 
 80 | 3. Public License Fallback. Should any part of the Waiver for any reason
 81 | be judged legally invalid or ineffective under applicable law, then the
 82 | Waiver shall be preserved to the maximum extent permitted taking into
 83 | account Affirmer's express Statement of Purpose. In addition, to the
 84 | extent the Waiver is so judged Affirmer hereby grants to each affected
 85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
 88 | maximum duration provided by applicable law or treaty (including future
 89 | time extensions), (iii) in any current or future medium and for any number
 90 | of copies, and (iv) for any purpose whatsoever, including without
 91 | limitation commercial, advertising or promotional purposes (the
 92 | "License"). The License shall be deemed effective as of the date CC0 was
 93 | applied by Affirmer to the Work. Should any part of the License for any
 94 | reason be judged legally invalid or ineffective under applicable law, such
 95 | partial invalidity or ineffectiveness shall not invalidate the remainder
 96 | of the License, and in such case Affirmer hereby affirms that he or she
 97 | will not (i) exercise any of his or her remaining Copyright and Related
 98 | Rights in the Work or (ii) assert any associated claims and causes of
 99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 | 
102 | 4. Limitations and Disclaimers.
103 | 
104 |  a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 |     surrendered, licensed or otherwise affected by this document.
106 |  b. Affirmer offers the Work as-is and makes no representations or
107 |     warranties of any kind concerning the Work, express, implied,
108 |     statutory or otherwise, including without limitation warranties of
109 |     title, merchantability, fitness for a particular purpose, non
110 |     infringement, or the absence of latent or other defects, accuracy, or
111 |     the present or absence of errors, whether or not discoverable, all to
112 |     the greatest extent permissible under applicable law.
113 |  c. Affirmer disclaims responsibility for clearing rights of other persons
114 |     that may apply to the Work or any use thereof, including without
115 |     limitation any person's Copyright and Related Rights in the Work.
116 |     Further, Affirmer disclaims responsibility for obtaining any necessary
117 |     consents, permissions or other rights required for any use of the
118 |     Work.
119 |  d. Affirmer understands and acknowledges that Creative Commons is not a
120 |     party to this document and has no duty or obligation with respect to
121 |     this CC0 or use of the Work.
122 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |   <img src="ukfields.jpeg" alt="ukfieldslogo" width="300"/>
 3 | </p>
 4 | 
 5 | ### [View UKFields in your browser here](https://spiruel.users.earthengine.app/view/uk-fields)
 6 | 
 7 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11110206.svg)](https://doi.org/10.5281/zenodo.11110206)
 8 | 
 9 | # UK Fields
10 | 
11 | The ukfields dataset is a publicly accessible Earth Engine asset of automatically delineated field boundaries spanning England, Wales, Scotland, and Northern Ireland. 
12 | 
13 | ### Credits
14 | This dataset was produced by [Samuel Bancroft](https://github.com/Spiruel) (University of Leeds) and [Jake Wilkins](https://github.com/jakenotjay).
15 | 
16 | ### Dataset Details
17 | The ukfields dataset contains field boundaries for the United Kingdom, derived from harmonic composites of Sentinel 2 imagery captured in 2021. The delineation process leveraged the Segment Anything Model (SAM) from Meta, ensuring efficient field segmentation at scale. The segmented fields have been masked to a 2021 Dynamic World composite of cropland.
18 | 
19 | ### Why Open Data Matters
20 | The availability of accurate field boundary data is of high importance, especially in regions where such information is scarce. Releasing the ukfields dataset and the associated code is a step towards democratising access to parcel information over large geographic areas and serves as a blueprint for replicating similar efforts in other regions all over the world.
21 | 
22 | ### Getting Started
23 | To access the ukfields dataset in Google Earth Engine, use the following asset path:
24 | 
25 | `var ukfields = ee.FeatureCollection('users/spiruel/ukfields');`
26 | 
27 | You can also download a [fiboa](https://fiboa.org/) GeoParquet from [Zenodo](https://zenodo.org/records/11110206).
28 | 
29 | For more information on accessing and working with the dataset, feel free to contact the authors or raise an issue in this Github repository.
30 | 


--------------------------------------------------------------------------------
/code/1_export_harmonics_tfrecord.js:
--------------------------------------------------------------------------------
  1 | var START_DATE = '2022-10-01'
  2 | var END_DATE = '2023-06-02'
  3 | var CLOUD_FILTER = 60;
  4 | var CLD_PRB_THRESH = 50;
  5 | var NIR_DRK_THRESH = 0.15;
  6 | var CLD_PRJ_DIST = 1;
  7 | var BUFFER = 50;
  8 | 
  9 | var aoi = ee.FeatureCollection('FAO/GAUL/2015/level0').filterMetadata('ADM0_CODE','equals',256);
 10 | 
 11 | var dw = ee.ImageCollection('GOOGLE/DYNAMICWORLD/V1')
 12 |              .filterDate(START_DATE, END_DATE)
 13 |              .filterBounds(aoi);
 14 |              
 15 | var classification = dw.select('label');
 16 | var dwComposite = classification.reduce(ee.Reducer.mode());
 17 | 
 18 | // Clip the composite and add it to the Map.
 19 | var croplands = dwComposite.updateMask(dwComposite.clip(aoi).eq(4));
 20 | var grass = dwComposite.updateMask(dwComposite.clip(aoi).eq(2));
 21 | var combinedMask = croplands.blend(grass);
 22 | 
 23 | var get_s2_sr_cld_col = function(aoi, start_date, end_date) {
 24 |     // Import and filter S2 SR.
 25 |     var s2_sr_col = (ee.ImageCollection('COPERNICUS/S2_SR')
 26 |         .filterBounds(aoi)
 27 |         .filterDate(start_date, end_date)
 28 |         .filter(ee.Filter.lte('CLOUDY_PIXEL_PERCENTAGE', CLOUD_FILTER)))
 29 | 
 30 |     // Import and filter s2cloudless.
 31 |     var s2_cloudless_col = (ee.ImageCollection('COPERNICUS/S2_CLOUD_PROBABILITY')
 32 |         .filterBounds(aoi)
 33 |         .filterDate(start_date, end_date))
 34 | 
 35 |     // Join the filtered s2cloudless collection to the SR collection by the 'system:index' property.
 36 |     return ee.ImageCollection(ee.Join.saveFirst('s2cloudless').apply({
 37 |         'primary': s2_sr_col,
 38 |         'secondary': s2_cloudless_col,
 39 |         'condition': ee.Filter.equals({
 40 |             'leftField': 'system:index',
 41 |             'rightField': 'system:index'
 42 |         })
 43 |     }))
 44 | };
 45 | 
 46 | var add_cloud_bands = function(img) {
 47 |     // Get s2cloudless image, subset the probability band.
 48 |     var cld_prb = ee.Image(img.get('s2cloudless')).select('probability');
 49 | 
 50 |     // Condition s2cloudless by the probability threshold value.
 51 |     var is_cloud = cld_prb.gt(CLD_PRB_THRESH).rename('clouds');
 52 | 
 53 |     // Add the cloud probability layer and cloud mask as image bands.
 54 |     return img.addBands(ee.Image([cld_prb, is_cloud]));
 55 | }
 56 | 
 57 | var add_shadow_bands = function(img) {
 58 |     // Identify water pixels from the SCL band.
 59 |     var not_water = img.select('SCL').neq(6)
 60 | 
 61 |     // Identify dark NIR pixels that are not water (potential cloud shadow pixels).
 62 |     var SR_BAND_SCALE = 1e4
 63 |     var dark_pixels = img.select('B8').lt(NIR_DRK_THRESH*SR_BAND_SCALE).multiply(not_water).rename('dark_pixels')
 64 | 
 65 |     // Determine the direction to project cloud shadow from clouds (assumes UTM projection).
 66 |     var shadow_azimuth = ee.Number(90).subtract(ee.Number(img.get('MEAN_SOLAR_AZIMUTH_ANGLE')));
 67 | 
 68 |     // Project shadows from clouds for the distance specified by the CLD_PRJ_DIST input.
 69 |     var cld_proj = (img.select('clouds').directionalDistanceTransform(shadow_azimuth, CLD_PRJ_DIST*10)
 70 |         .reproject({'crs': img.select(0).projection(), 'scale': 100})
 71 |         .select('distance')
 72 |         .mask()
 73 |         .rename('cloud_transform'))
 74 | 
 75 |     // Identify the intersection of dark pixels with cloud shadow projection.
 76 |     var shadows = cld_proj.multiply(dark_pixels).rename('shadows')
 77 | 
 78 |     // Add dark pixels, cloud projection, and identified shadows as image bands.
 79 |     return img.addBands(ee.Image([dark_pixels, cld_proj, shadows]))
 80 | }
 81 | 
 82 | var add_cld_shdw_mask = function(img) {
 83 |   // Add cloud component bands.
 84 |     var img_cloud = add_cloud_bands(img)
 85 | 
 86 |   //  Add cloud shadow component bands.
 87 |     var img_cloud_shadow = add_shadow_bands(img_cloud)
 88 | 
 89 |   //  Combine cloud and shadow mask, set cloud and shadow as value 1, else 0.
 90 |     var is_cld_shdw = img_cloud_shadow.select('clouds').add(img_cloud_shadow.select('shadows')).gt(0)
 91 | 
 92 |   //  Remove small cloud-shadow patches and dilate remaining pixels by BUFFER input.
 93 |   //  20 m scale is for speed, and assumes clouds don't require 10 m precision.
 94 |     is_cld_shdw = (is_cld_shdw.focalMin(2).focalMax(BUFFER*2/20)
 95 |         .reproject({'crs': img.select([0]).projection(), 'scale': 20})
 96 |         .rename('cloudmask'))
 97 | 
 98 |   //  Add the final cloud-shadow mask to the image.
 99 |     return img_cloud_shadow.addBands(is_cld_shdw)
100 |     }
101 |     
102 | var apply_cld_shdw_mask = function(img) {
103 |     // Subset the cloudmask band and invert it so clouds/shadow are 0, else 1.
104 |     var not_cld_shdw = img.select('cloudmask').not()
105 | 
106 |     // Subset reflectance bands and update their masks, return the result.
107 |     return img.select('B.*').updateMask(not_cld_shdw)
108 | }
109 | 
110 | var s2_sr_cld_col_eval = get_s2_sr_cld_col(geometry, START_DATE, END_DATE)
111 | var l8toa = s2_sr_cld_col_eval.map(add_cld_shdw_mask).map(apply_cld_shdw_mask)
112 | 
113 | //var roi = ee.Geometry.Point(-6.173543904111879, 8.062628227578857);
114 | var roi = geometry2 //ee.Geometry.Point([0.334626864725478,52.444419628380246]).buffer(10);
115 |     
116 | Map.centerObject(roi, 10);
117 | 
118 | // This field contains UNIX time in milliseconds.
119 | var timeField = 'system:time_start';
120 | 
121 | // Use this function to mask clouds in Landsat 8 imagery.
122 | var maskClouds = function(image) {
123 |   var quality = image.select('BQA');
124 |   var cloud01 = quality.eq(61440);
125 |   var cloud02 = quality.eq(53248);
126 |   var cloud03 = quality.eq(28672);
127 |   var mask = cloud01.or(cloud02).or(cloud03).not();
128 |   return image.updateMask(mask);
129 | };
130 | 
131 | // Use this function to add variables for NDVI, time and a constant
132 | // to Landsat 8 imagery.
133 | var addVariables = function(image) {
134 |   // Compute time in fractional years since the epoch.
135 |   var date = ee.Date(image.get(timeField));
136 |   var years = date.difference(ee.Date('1970-01-01'), 'year');
137 |   // Return the image with the added bands.
138 |   return image
139 |     // Add an NDVI band.
140 |     .addBands(image.normalizedDifference(['B8', 'B4']).rename('NDVI')).float()
141 |     // Add a time band.
142 |     .addBands(ee.Image(years).rename('t').float())
143 |     // Add a constant band.
144 |     .addBands(ee.Image.constant(1));
145 | };
146 | 
147 | // Remove clouds, add variables and filter to the area of interest.
148 | var filteredLandsat = l8toa
149 |   .filterBounds(geometry)
150 |   // .map(maskClouds)
151 |   .map(addVariables);
152 | 
153 | // Plot a time series of NDVI at a single location.
154 | var l8Chart = ui.Chart.image.series(filteredLandsat.select('NDVI'), roi)
155 |     .setChartType('ScatterChart')
156 |     .setOptions({
157 |       title: 'Landsat 8 NDVI time series at ROI',
158 |       trendlines: {0: {
159 |         color: 'CC0000'
160 |       }},
161 |       lineWidth: 1,
162 |       pointSize: 3,
163 |     });
164 | print(l8Chart);
165 | 
166 | // Linear trend ----------------------------------------------------------------
167 | // List of the independent variable names
168 | var independents = ee.List(['constant', 't']);
169 | 
170 | // Name of the dependent variable.
171 | var dependent = ee.String('NDVI');
172 | 
173 | // Compute a linear trend.  This will have two bands: 'residuals' and 
174 | // a 2x1 band called coefficients (columns are for dependent variables).
175 | var trend = filteredLandsat.select(independents.add(dependent))
176 |     .reduce(ee.Reducer.linearRegression(independents.length(), 1));
177 | // Map.addLayer(trend, {}, 'trend array image');
178 | 
179 | // Flatten the coefficients into a 2-band image
180 | var coefficients = trend.select('coefficients')
181 |   .arrayProject([0])
182 |   .arrayFlatten([independents]);
183 | 
184 | // Compute a de-trended series.
185 | var detrended = filteredLandsat.map(function(image) {
186 |   return image.select(dependent).subtract(
187 |           image.select(independents).multiply(coefficients).reduce('sum'))
188 |           .rename(dependent)
189 |           .copyProperties(image, [timeField]);
190 | });
191 | 
192 | // Plot the detrended results.
193 | var detrendedChart = ui.Chart.image.series(detrended, roi, null, 30)
194 |     .setOptions({
195 |       title: 'Detrended Landsat time series at ROI',
196 |       lineWidth: 1,
197 |       pointSize: 3,
198 |     });
199 | print(detrendedChart);
200 | 
201 | // Harmonic trend ----------------------------------------------------------------
202 | // Use these independent variables in the harmonic regression.
203 | var harmonicIndependents = ee.List(['constant', 't', 'cos', 'sin']);
204 | 
205 | // Add harmonic terms as new image bands.
206 | var harmonicLandsat = filteredLandsat.map(function(image) {
207 |   var timeRadians = image.select('t').multiply(2 * Math.PI);
208 |   return image
209 |     .addBands(timeRadians.cos().rename('cos'))
210 |     .addBands(timeRadians.sin().rename('sin'));
211 | });
212 |   
213 | // The output of the regression reduction is a 4x1 array image.
214 | var harmonicTrend = harmonicLandsat
215 |   .select(harmonicIndependents.add(dependent))
216 |   .reduce(ee.Reducer.linearRegression(harmonicIndependents.length(), 1));
217 | 
218 | // Turn the array image into a multi-band image of coefficients.
219 | var harmonicTrendCoefficients = harmonicTrend.select('coefficients')
220 |   .arrayProject([0])
221 |   .arrayFlatten([harmonicIndependents]);
222 | 
223 | // Compute fitted values.
224 | var fittedHarmonic = harmonicLandsat.map(function(image) {
225 |   return image.addBands(
226 |     image.select(harmonicIndependents)
227 |       .multiply(harmonicTrendCoefficients)
228 |       .reduce('sum')
229 |       .rename('fitted'));
230 | });
231 | 
232 | // Plot the fitted model and the original data at the ROI.
233 | print(ui.Chart.image.series(
234 |   fittedHarmonic.select(['fitted','NDVI']), roi, ee.Reducer.mean(), 30)
235 |     .setSeriesNames(['NDVI', 'fitted'])
236 |     .setOptions({
237 |       title: 'Harmonic model: original and fitted values',
238 |       lineWidth: 1,
239 |       pointSize: 3,
240 | }));
241 | 
242 | // Compute phase and amplitude.
243 | var phase = harmonicTrendCoefficients.select('cos').atan2(
244 |             harmonicTrendCoefficients.select('sin'));
245 |             
246 | var amplitude = harmonicTrendCoefficients.select('cos').hypot(
247 |                 harmonicTrendCoefficients.select('sin'));
248 | 
249 | // Use the HSV to RGB transform to display phase and amplitude
250 | var rgb = phase.unitScale(-Math.PI, Math.PI).addBands(
251 |           amplitude.multiply(2.5)).addBands(
252 |           filteredLandsat.select('NDVI').median()).hsvToRgb();
253 | var rgb = rgb.updateMask(combinedMask)
254 |         
255 | Map.addLayer(l8toa.filterDate('2023-05-01','2023-06-01').median(), {'bands':['B4','B3','B2'], min:0, max:3000}, 'Sentinel 2', false)
256 | Map.addLayer(l8toa.filterDate('2019-05-01','2019-06-01').median(), {'bands':['B4','B3','B2'], min:0, max:0.3}, 'Landsat 8', false)
257 | Map.addLayer(rgb, {}, 'phase (hue), amplitude (saturation)');
258 | 
259 | // use unit scale to normalize the pixel values
260 | var rgb = rgb.unitScale(0, 1);
261 | 
262 | var eightbitRGB = rgb.multiply(255).toByte();
263 | 
264 | print(eightbitRGB)
265 | 
266 | Map.addLayer(eightbitRGB, {min:0, max:255}, 'rgb_')
267 | 
268 | var image_export_options = {
269 |   'patchDimensions': [768, 768],
270 |   'maxFileSize': 104857600,
271 |   'kernelSize': [256, 256],
272 |   'compressed': true
273 | }
274 | var geometry = ee.FeatureCollection('FAO/GAUL/2015/level0').filterMetadata('ADM0_CODE','equals',256);
275 | Export.image.toDrive({
276 |   image: eightbitRGB.clip(geometry), 
277 |   description: "Export_Task",
278 |   folder: 'extra_county',
279 |   fileNamePrefix: "extra_county", 
280 |   region: geometry, 
281 |   scale: 10, 
282 |   crs: "EPSG:27700", 
283 |   maxPixels: 29073057936, 
284 |   formatOptions: image_export_options,
285 |   fileFormat: 'TFRecord',
286 | })


--------------------------------------------------------------------------------
/code/2_tfrecord_to_tiff.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import rasterio
 3 | from rasterio.transform import Affine, from_origin
 4 | from rasterio.crs import CRS
 5 | import numpy as np
 6 | import json
 7 | import gzip
 8 | 
 9 | # Path to the mixer.json file and TFRecords
10 | mixer_json_path = 'tfrecords/ukfields-mixer.json'
11 | tfrecords_path = 'tfrecords/ukfields-00000.tfrecord.gz'
12 | 
13 | # Read mixer.json for scaling and offset
14 | with open(mixer_json_path, 'r') as mixer_file:
15 | 	mixer_data = json.load(mixer_file)
16 | 
17 | crs = CRS.from_string(mixer_data['projection']['crs'])
18 | affine = Affine(*mixer_data['projection']['affine']['doubleMatrix'])
19 | 
20 | patch_width = mixer_data['patchDimensions'][0]
21 | patch_height = mixer_data['patchDimensions'][1]
22 | patches_per_row = mixer_data['patchesPerRow']
23 | 
24 | 
25 | for n in range(129):
26 | 	tfrecords_path = f'tfrecords/ukfields-00{str(n).zfill(3)}.tfrecord.gz'
27 | 	# Create a TensorFlow dataset for reading compressed TFRecords
28 | 	dataset = tf.data.TFRecordDataset(tfrecords_path, compression_type='GZIP')
29 | 
30 | 	# Loop through each TFRecord
31 | 	for num, string_record in enumerate(dataset):
32 | 		num+=33*n
33 | 		example = tf.train.Example()
34 | 		example.ParseFromString(string_record.numpy())
35 | 
36 | 		# Extract image data
37 | 		r_data = example.features.feature['red'].bytes_list.value[0]
38 | 		g_data = example.features.feature['green'].bytes_list.value[0]
39 | 		b_data = example.features.feature['blue'].bytes_list.value[0]
40 | 
41 | 		# Get image dimensions
42 | 		overlap = 256//2
43 | 		height, width = np.array(mixer_data['patchDimensions'])+2*overlap
44 | 
45 | 		# Convert to numpy array
46 | 		r_array = np.frombuffer(r_data, dtype=np.int8).reshape(height, width)
47 | 		g_array = np.frombuffer(g_data, dtype=np.int8).reshape(height, width)
48 | 		b_array = np.frombuffer(b_data, dtype=np.int8).reshape(height, width)
49 | 
50 | 		image_array = np.stack([r_array, g_array, b_array], axis=0).squeeze()
51 | 
52 | 		row = num // patches_per_row
53 | 		col = num % patches_per_row
54 | 
55 | 		x = col * mixer_data['patchDimensions'][0]
56 | 		y = row * mixer_data['patchDimensions'][1]
57 | 
58 | 		transform = affine * Affine.translation(x, y) * Affine.translation(-overlap, -overlap)
59 | 
60 | 		# Create a GeoTIFF file using rasterio
61 | 		output_path = f'tfrecords/ukfields_export/{row}_{col}_{num}.tif'
62 | 		with rasterio.open(output_path, 'w', driver='GTiff', height=height, width=width,
63 | 					   count=3, dtype='int8', crs=crs, transform=transform) as dst:
64 | 		    dst.write(image_array)
65 | 
66 | 	print('Conversion complete.', num+1)


--------------------------------------------------------------------------------
/code/3_sam_inference.py:
--------------------------------------------------------------------------------
 1 | #this script is for prototype purposes only, it is not optimized for speed or memory usage
 2 | #for large projects, consider using https://github.com/Prindle19/segment-everything
 3 | 
 4 | import os
 5 | import matplotlib.pyplot as plt
 6 | #https://github.com/aliaksandr960/segment-anything-eo/tree/main
 7 | from sameo import SamEO
 8 | 
 9 | import os
10 | import numpy as np
11 | import rasterio
12 | from tqdm import tqdm
13 | import glob
14 | 
15 | file_paths = glob.glob('ukfields/*.tif*') 
16 | 
17 | # Available SamEO arguments:
18 | # checkpoint="sam_vit_h_4b8939.pth",
19 | # model_type='vit_h',
20 | # device='cpu',
21 | # erosion_kernel=(3, 3),
22 | # mask_multiplier=255,
23 | # sam_kwargs=None
24 | 
25 | # Availble sam_kwargs:
26 | # points_per_side: Optional[int] = 32,
27 | # points_per_batch: int = 64,
28 | # pred_iou_thresh: float = 0.88,
29 | # stability_score_thresh: float = 0.95,
30 | # stability_score_offset: float = 1.0,
31 | # box_nms_thresh: float = 0.7,
32 | # crop_n_layers: int = 0,
33 | # crop_nms_thresh: float = 0.7,
34 | # crop_overlap_ratio: float = 512 / 1500,
35 | # crop_n_points_downscale_factor: int = 1,
36 | # point_grids: Optional[List[np.ndarray]] = None,
37 | # min_mask_region_area: int = 0,
38 | # output_mode: str = "binary_mask",
39 | 
40 | device = 'cuda:0'
41 | 
42 | sam_kwargs = {
43 |   "points_per_side": 64,
44 |   "pred_iou_thresh": 0.86,
45 |   "stability_score_thresh": 0.92,
46 |   "crop_n_layers": 1,
47 |   "crop_n_points_downscale_factor": 2,
48 |   "min_mask_region_area": 100,
49 | }
50 | #{'points_per_side':64, 'min_mask_region_area':0, 'pred_iou_thresh': 0.86, 'stability_score_thresh':0.92}
51 | sam_eo = SamEO(checkpoint="sam_vit_h_4b8939.pth",#"sam_vit_h_4b8939.pth", #"/extra/demo2D/sam_model_best.pth",
52 |                model_type='vit_h',
53 |                device=device,
54 |                erosion_kernel=(3,3),
55 |                mask_multiplier=255,
56 |                sam_kwargs=sam_kwargs)
57 | 
58 | def predict(patch):
59 |     # Perform prediction on the chunk, e.g. using sam_eo
60 |     pred = sam_eo(patch)
61 |     return pred
62 | 
63 | for file_path in tqdm(file_paths):
64 |     #reorder so rasterio open doesnt bottleneck!
65 |     patch_filename = f"./predictions/patch_{os.path.basename(file_path).split('.')[-2]}.tif"
66 |     if not os.path.exists(patch_filename): 
67 |         with rasterio.open(file_path, dtype=np.uint8) as dataset:
68 |             height, width = dataset.shape
69 |             # Read the entire raster data
70 |             raster_data = dataset.read()
71 |             raster_data = np.moveaxis(raster_data, 0, -1)[:, :, :3]
72 |             if np.count_nonzero(raster_data) == 0:
73 |                 os.remove(file_path)
74 |                 print('empty patch')
75 |                 continue
76 |                 
77 |             patch = raster_data.astype(np.uint8)
78 | 
79 |             #print(patch.shape, file_path)
80 |             # Apply the prediction function to the patch
81 |             try:
82 |                 prediction = predict(patch)
83 |             except Exception as e:
84 |                 print(e, file_path, patch_filename)
85 |                 continue
86 | 
87 |             # Save the prediction as a geotiff
88 |             with rasterio.open(patch_filename, 'w', driver='GTiff', count=1,
89 |                                width=width, height=height,
90 |                                dtype=prediction.dtype,
91 |                                crs=dataset.crs, transform=dataset.transform) as dst:
92 |                 dst.write(prediction, 1)
93 | 
94 |             print(f"Processed patch and saved results to {patch_filename}.")
95 |     else:
96 |         print(f"Patch already exists, skipping.")


--------------------------------------------------------------------------------
/code/4_vectorise_preds.py:
--------------------------------------------------------------------------------
 1 | from osgeo import gdal
 2 | gdal.SetConfigOption('GDAL_VRT_ENABLE_PYTHON', 'YES')
 3 | gdal.UseExceptions()
 4 | 
 5 | from utils import setup_environment, get_vrt_metadata, generate_contours, merge_contours
 6 | 
 7 | if __name__ == "__main__":
 8 |     out_name = "demo.vrt"
 9 |     pred_dir = "/home/eesjb/Documents/segment-anything/segment-anything-eo/predictions/utm27700"
10 |     weight_file = 'weights.tif'
11 |     shape = (1024, 1024)
12 |     buffer = 128
13 |     contours_dir = 'contours'
14 |     output_file = 'merged.gpkg'
15 | 
16 |     setup_environment(out_name, pred_dir, weight_file, shape, buffer)
17 |     meta, vrt_dim, transform = get_vrt_metadata(out_name)
18 |     #if large vrt file, run on high memory machine
19 |     generate_contours(out_name, vrt_dim, buffer, contours_dir)
20 |     merge_contours(vrt_dim, buffer, contours_dir, output_file)
21 | 


--------------------------------------------------------------------------------
/code/environment.yml:
--------------------------------------------------------------------------------
  1 | name: ukfields
  2 | channels:
  3 |   - conda-forge
  4 |   - defaults
  5 | dependencies:
  6 |   - _libgcc_mutex=0.1=conda_forge
  7 |   - _openmp_mutex=4.5=2_gnu
  8 |   - affine=2.4.0=pyhd8ed1ab_0
  9 |   - attrs=24.1.0=pyh71513ae_0
 10 |   - aws-c-auth=0.7.22=hbd3ac97_10
 11 |   - aws-c-cal=0.7.1=h87b94db_1
 12 |   - aws-c-common=0.9.23=h4ab18f5_0
 13 |   - aws-c-compression=0.2.18=he027950_7
 14 |   - aws-c-event-stream=0.4.2=h7671281_15
 15 |   - aws-c-http=0.8.2=he17ee6b_6
 16 |   - aws-c-io=0.14.10=h826b7d6_1
 17 |   - aws-c-mqtt=0.10.4=hcd6a914_8
 18 |   - aws-c-s3=0.6.0=h365ddd8_2
 19 |   - aws-c-sdkutils=0.1.16=he027950_3
 20 |   - aws-checksums=0.1.18=he027950_7
 21 |   - aws-crt-cpp=0.27.3=hda66527_2
 22 |   - aws-sdk-cpp=1.11.329=h46c3b66_9
 23 |   - azure-core-cpp=1.13.0=h935415a_0
 24 |   - azure-identity-cpp=1.8.0=hd126650_2
 25 |   - azure-storage-blobs-cpp=12.12.0=hd2e3451_0
 26 |   - azure-storage-common-cpp=12.7.0=h10ac4d7_1
 27 |   - blosc=1.21.6=hef167b5_0
 28 |   - branca=0.7.2=pyhd8ed1ab_0
 29 |   - brotli=1.1.0=hd590300_1
 30 |   - brotli-bin=1.1.0=hd590300_1
 31 |   - brotli-python=1.1.0=py312h30efb56_1
 32 |   - bzip2=1.0.8=h4bc722e_7
 33 |   - c-ares=1.32.3=h4bc722e_0
 34 |   - ca-certificates=2024.7.4=hbcca054_0
 35 |   - cairo=1.18.0=hebfffa5_3
 36 |   - certifi=2024.7.4=pyhd8ed1ab_0
 37 |   - cffi=1.16.0=py312hf06ca03_0
 38 |   - cfitsio=4.4.1=hf8ad068_0
 39 |   - charset-normalizer=3.3.2=pyhd8ed1ab_0
 40 |   - click=8.1.7=unix_pyh707e725_0
 41 |   - click-plugins=1.1.1=py_0
 42 |   - cligj=0.7.2=pyhd8ed1ab_1
 43 |   - colorama=0.4.6=pyhd8ed1ab_0
 44 |   - contourpy=1.2.1=py312h8572e83_0
 45 |   - cycler=0.12.1=pyhd8ed1ab_0
 46 |   - expat=2.6.2=h59595ed_0
 47 |   - fmt=11.0.1=h434a139_0
 48 |   - folium=0.17.0=pyhd8ed1ab_0
 49 |   - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
 50 |   - font-ttf-inconsolata=3.000=h77eed37_0
 51 |   - font-ttf-source-code-pro=2.038=h77eed37_0
 52 |   - font-ttf-ubuntu=0.83=h77eed37_2
 53 |   - fontconfig=2.14.2=h14ed4e7_0
 54 |   - fonts-conda-ecosystem=1=0
 55 |   - fonts-conda-forge=1=0
 56 |   - fonttools=4.53.1=py312h41a817b_0
 57 |   - freetype=2.12.1=h267a509_2
 58 |   - freexl=2.0.0=h743c826_0
 59 |   - gdal=3.9.1=py312h7eda2e2_10
 60 |   - geopandas=1.0.1=pyhd8ed1ab_0
 61 |   - geopandas-base=1.0.1=pyha770c72_0
 62 |   - geos=3.12.2=he02047a_1
 63 |   - geotiff=1.7.3=hf7fa9e8_2
 64 |   - giflib=5.2.2=hd590300_0
 65 |   - h2=4.1.0=pyhd8ed1ab_0
 66 |   - hdf4=4.2.15=h2a13503_7
 67 |   - hdf5=1.14.3=nompi_hdf9ad27_105
 68 |   - hpack=4.0.0=pyh9f0ad1d_0
 69 |   - hyperframe=6.0.1=pyhd8ed1ab_0
 70 |   - icu=75.1=he02047a_0
 71 |   - idna=3.7=pyhd8ed1ab_0
 72 |   - jinja2=3.1.4=pyhd8ed1ab_0
 73 |   - joblib=1.4.2=pyhd8ed1ab_0
 74 |   - json-c=0.17=h1220068_1
 75 |   - kealib=1.5.3=hee9dde6_1
 76 |   - keyutils=1.6.1=h166bdaf_0
 77 |   - kiwisolver=1.4.5=py312h8572e83_1
 78 |   - krb5=1.21.3=h659f571_0
 79 |   - lcms2=2.16=hb7c19ff_0
 80 |   - ld_impl_linux-64=2.40=hf3520f5_7
 81 |   - lerc=4.0.0=h27087fc_0
 82 |   - libabseil=20240116.2=cxx17_he02047a_1
 83 |   - libaec=1.1.3=h59595ed_0
 84 |   - libarchive=3.7.4=hfca40fe_0
 85 |   - libblas=3.9.0=23_linux64_openblas
 86 |   - libbrotlicommon=1.1.0=hd590300_1
 87 |   - libbrotlidec=1.1.0=hd590300_1
 88 |   - libbrotlienc=1.1.0=hd590300_1
 89 |   - libcblas=3.9.0=23_linux64_openblas
 90 |   - libcrc32c=1.1.2=h9c3ff4c_0
 91 |   - libcurl=8.9.1=hdb1bdb2_0
 92 |   - libdeflate=1.20=hd590300_0
 93 |   - libedit=3.1.20191231=he28a2e2_2
 94 |   - libev=4.33=hd590300_2
 95 |   - libexpat=2.6.2=h59595ed_0
 96 |   - libffi=3.4.2=h7f98852_5
 97 |   - libgcc-ng=14.1.0=h77fa898_0
 98 |   - libgdal=3.9.1=ha770c72_10
 99 |   - libgdal-core=3.9.1=h8f9377d_10
100 |   - libgdal-fits=3.9.1=hdd6600c_10
101 |   - libgdal-grib=3.9.1=h5f34788_10
102 |   - libgdal-hdf4=3.9.1=ha39a594_10
103 |   - libgdal-hdf5=3.9.1=ha2ed5f0_10
104 |   - libgdal-jp2openjpeg=3.9.1=h2ebfdf0_10
105 |   - libgdal-kea=3.9.1=h2b45729_10
106 |   - libgdal-netcdf=3.9.1=h94e7027_10
107 |   - libgdal-pdf=3.9.1=h562c687_10
108 |   - libgdal-pg=3.9.1=he047751_10
109 |   - libgdal-postgisraster=3.9.1=he047751_10
110 |   - libgdal-tiledb=3.9.1=h9d8aadb_10
111 |   - libgdal-xls=3.9.1=h062f1c4_10
112 |   - libgfortran-ng=14.1.0=h69a702a_0
113 |   - libgfortran5=14.1.0=hc5f4f2c_0
114 |   - libglib=2.80.3=h8a4344b_1
115 |   - libgomp=14.1.0=h77fa898_0
116 |   - libgoogle-cloud=2.26.0=h26d7fe4_0
117 |   - libgoogle-cloud-storage=2.26.0=ha262f82_0
118 |   - libgrpc=1.62.2=h15f2491_0
119 |   - libiconv=1.17=hd590300_2
120 |   - libjpeg-turbo=3.0.0=hd590300_1
121 |   - libkml=1.3.0=hbbc8833_1020
122 |   - liblapack=3.9.0=23_linux64_openblas
123 |   - libnetcdf=4.9.2=nompi_h135f659_114
124 |   - libnghttp2=1.58.0=h47da74e_1
125 |   - libnsl=2.0.1=hd590300_0
126 |   - libopenblas=0.3.27=pthreads_hac2b453_1
127 |   - libpng=1.6.43=h2797004_0
128 |   - libpq=16.3=ha72fbe1_0
129 |   - libprotobuf=4.25.3=h08a7969_0
130 |   - libre2-11=2023.09.01=h5a48ba9_2
131 |   - librttopo=1.1.0=hc670b87_16
132 |   - libspatialite=5.1.0=h15fa968_9
133 |   - libsqlite=3.46.0=hde9e2c9_0
134 |   - libssh2=1.11.0=h0841786_0
135 |   - libstdcxx-ng=14.1.0=hc0a3c3a_0
136 |   - libtiff=4.6.0=h1dd3fc0_3
137 |   - libuuid=2.38.1=h0b41bf4_0
138 |   - libwebp-base=1.4.0=hd590300_0
139 |   - libxcb=1.16=hd590300_0
140 |   - libxcrypt=4.4.36=hd590300_1
141 |   - libxml2=2.12.7=he7c6b58_4
142 |   - libxslt=1.1.39=h76b75d6_0
143 |   - libzip=1.10.1=h2629f0a_3
144 |   - libzlib=1.3.1=h4ab18f5_1
145 |   - lxml=5.2.2=py312hb90d8a5_0
146 |   - lz4-c=1.9.4=hcb278e6_0
147 |   - lzo=2.10=hd590300_1001
148 |   - mapclassify=2.7.0=pyhd8ed1ab_0
149 |   - markupsafe=2.1.5=py312h98912ed_0
150 |   - matplotlib-base=3.9.1=py312h854627b_2
151 |   - minizip=4.0.7=h401b404_0
152 |   - munkres=1.1.4=pyh9f0ad1d_0
153 |   - ncurses=6.5=h59595ed_0
154 |   - networkx=3.3=pyhd8ed1ab_1
155 |   - nspr=4.35=h27087fc_0
156 |   - nss=3.103=h593d115_0
157 |   - numpy=2.0.1=py312h1103770_0
158 |   - openjpeg=2.5.2=h488ebb8_0
159 |   - openssl=3.3.1=h4bc722e_2
160 |   - packaging=24.1=pyhd8ed1ab_0
161 |   - pandas=2.2.2=py312h1d6d2e6_1
162 |   - pcre2=10.44=h0f59acf_0
163 |   - pillow=10.4.0=py312h287a98d_0
164 |   - pip=24.2=pyhd8ed1ab_0
165 |   - pixman=0.43.2=h59595ed_0
166 |   - poppler=24.07.0=hb0d391f_0
167 |   - poppler-data=0.4.12=hd8ed1ab_0
168 |   - postgresql=16.3=h8e811e2_0
169 |   - proj=9.4.1=h54d7996_1
170 |   - pthread-stubs=0.4=h36c2ea0_1001
171 |   - pycparser=2.22=pyhd8ed1ab_0
172 |   - pyogrio=0.9.0=py312h8ad7a51_0
173 |   - pyparsing=3.1.2=pyhd8ed1ab_0
174 |   - pyproj=3.6.1=py312h01329cd_8
175 |   - pysocks=1.7.1=pyha2e5f31_6
176 |   - python=3.12.4=h194c7f8_0_cpython
177 |   - python-dateutil=2.9.0=pyhd8ed1ab_0
178 |   - python-tzdata=2024.1=pyhd8ed1ab_0
179 |   - python_abi=3.12=4_cp312
180 |   - pytz=2024.1=pyhd8ed1ab_0
181 |   - qhull=2020.2=h434a139_5
182 |   - rasterio=1.3.10=py312hff7f44f_5
183 |   - re2=2023.09.01=h7f4b329_2
184 |   - readline=8.2=h8228510_1
185 |   - requests=2.32.3=pyhd8ed1ab_0
186 |   - s2n=1.4.17=he19d79f_0
187 |   - scikit-learn=1.5.1=py312h775a589_0
188 |   - scipy=1.14.0=py312hc2bc53b_1
189 |   - setuptools=72.1.0=pyhd8ed1ab_0
190 |   - shapely=2.0.5=py312h8413631_0
191 |   - six=1.16.0=pyh6c4a22f_0
192 |   - snappy=1.2.1=ha2e4443_0
193 |   - snuggs=1.4.7=pyhd8ed1ab_1
194 |   - spdlog=1.14.1=hed91bc2_1
195 |   - sqlite=3.46.0=h6d4b2fc_0
196 |   - threadpoolctl=3.5.0=pyhc1e730c_0
197 |   - tiledb=2.25.0=h7d57ca9_4
198 |   - tk=8.6.13=noxft_h4845f30_101
199 |   - tqdm=4.66.5=pyhd8ed1ab_0
200 |   - tzcode=2024a=h3f72095_0
201 |   - tzdata=2024a=h0c530f3_0
202 |   - uriparser=0.9.8=hac33072_0
203 |   - urllib3=2.2.2=pyhd8ed1ab_1
204 |   - wheel=0.44.0=pyhd8ed1ab_0
205 |   - xerces-c=3.2.5=h666cd97_1
206 |   - xorg-kbproto=1.0.7=h7f98852_1002
207 |   - xorg-libice=1.1.1=hd590300_0
208 |   - xorg-libsm=1.2.4=h7391055_0
209 |   - xorg-libx11=1.8.9=hb711507_1
210 |   - xorg-libxau=1.0.11=hd590300_0
211 |   - xorg-libxdmcp=1.1.3=h7f98852_0
212 |   - xorg-libxext=1.3.4=h0b41bf4_2
213 |   - xorg-libxrender=0.9.11=hd590300_0
214 |   - xorg-renderproto=0.11.1=h7f98852_1002
215 |   - xorg-xextproto=7.3.0=h0b41bf4_1003
216 |   - xorg-xproto=7.0.31=h7f98852_1007
217 |   - xyzservices=2024.6.0=pyhd8ed1ab_0
218 |   - xz=5.2.6=h166bdaf_0
219 |   - zlib=1.3.1=h4ab18f5_1
220 |   - zstandard=0.23.0=py312h3483029_0
221 |   - zstd=1.5.6=ha6fb4c9_0
222 | 


--------------------------------------------------------------------------------
/code/readme.md:
--------------------------------------------------------------------------------
 1 | # UKFields code
 2 | 
 3 | This document provides a brief description of the purposes of the scripts in the current directory.
 4 | 
 5 | ## 1_export_harmonics.py
 6 | 
 7 | Produces harmonic composites over the UK, masked to arable areas. Exports to gdrive as overlapping patches.
 8 | 
 9 | ## 2_tfrecord_to_tiff.py
10 | 
11 | Converts the tfrecords to more accessible geotiffs for processing.
12 | 
13 | ## 3_sam_inference.py
14 | 
15 | Uses segment anything to produce prediction rasters
16 | 
17 | ## 4_vectorise_preds.py
18 | 
19 | Vectorises and merges the overlapping matches using radial weighting.


--------------------------------------------------------------------------------
/code/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | from lxml import etree
  4 | import copy
  5 | from typing import Tuple
  6 | 
  7 | import glob
  8 | import os
  9 | import rasterio
 10 | import numpy as np
 11 | import multiprocessing
 12 | from functools import partial
 13 | from tqdm import tqdm
 14 | import geopandas as gpd
 15 | import pandas as pd
 16 | 
 17 | #vectorisation code from https://github.com/sentinel-hub/field-delineation/blob/main/fd/vectorisation.py
 18 | 
 19 | def setup_environment(vrt_filename, pred_directory, weight_file, shape, buffer):
 20 |     """
 21 |     Sets up the environment by creating a weight file and writing the VRT file.
 22 | 
 23 |     Parameters:
 24 |     - vrt_filename: Path to the output VRT file.
 25 |     - pred_directory: Directory containing the prediction .tif files.
 26 |     - weight_file: Path to the weight file to be created.
 27 |     - shape: Tuple representing the shape (width, height) of the weight file.
 28 |     - buffer: Tuple representing the buffer (x_buffer, y_buffer) around the shape.
 29 |     """
 30 |     if os.path.exists(vrt_filename):
 31 |         os.remove(vrt_filename)
 32 | 
 33 |     tifs = sorted(glob.glob(pred_directory + '/*.tif'))#[:500]
 34 | 
 35 |     with rasterio.open(weight_file, 'w', driver='GTiff', width=shape[0], height=shape[1], count=1, dtype=np.float32) as dst:
 36 |         dst.write_band(1, get_weights(shape, buffer))
 37 | 
 38 |     write_vrt(tifs, weight_file, vrt_filename)
 39 | 
 40 | 
 41 | def get_vrt_metadata(vrt_file):
 42 |     """
 43 |     Gets metadata from the VRT file.
 44 | 
 45 |     Parameters:
 46 |     - vrt_file: Path to the VRT file.
 47 | 
 48 |     Returns:
 49 |     - meta: Metadata of the VRT file.
 50 |     - vrt_dim: Dimensions of the VRT file.
 51 |     - transform: Transform of the VRT file.
 52 |     """
 53 |     with rasterio.open(vrt_file) as src:
 54 |         meta = src.meta
 55 |         vrt_dim = meta['width'], meta['height']
 56 |         transform = meta['transform']
 57 |     return meta, vrt_dim, transform
 58 | 
 59 | 
 60 | def generate_contours(vrt_file, vrt_dim, buffer, contours_dir, multiprocessing_contour=True, pool_size=2):
 61 |     """
 62 |     Generates contours from the VRT file.
 63 | 
 64 |     Parameters:
 65 |     - vrt_file: Path to the VRT file.
 66 |     - vrt_dim: Dimensions of the VRT file.
 67 |     - buffer: Buffer size for contour generation.
 68 |     - contours_dir: Directory to save the contour files.
 69 |     - multiprocessing_contour: Boolean to enable or disable multiprocessing.
 70 |     - pool_size: Number of processes for multiprocessing.
 71 |     """
 72 |     if multiprocessing_contour:
 73 |         pool = multiprocessing.Pool(pool_size)
 74 |         run_contour_partial = partial(run_contour, size=1024, vrt_file=vrt_file, threshold=255 * 0.6, contours_dir=contours_dir)
 75 |         pool.map(run_contour_partial, [(i, j) for i in range(0, vrt_dim[0], 1024 - buffer) for j in range(0, vrt_dim[1], 1024 - buffer)])
 76 |         pool.close()
 77 |         pool.join()
 78 |     else:
 79 |         for i in tqdm(range(0, vrt_dim[0], 1024 - buffer), total=vrt_dim[0] // (1024 - buffer)):
 80 |             for j in tqdm(range(0, vrt_dim[1], 1024 - buffer), total=vrt_dim[1] // (1024 - buffer)):
 81 |                 run_contour((i, j), 1024, vrt_file, threshold=255 * 0.6, contours_dir=contours_dir)
 82 | 
 83 | 
 84 | def merge_contours(vrt_dim, buffer, contours_dir, output_file):
 85 |     """
 86 |     Merges the contour files into a single GeoPackage file.
 87 | 
 88 |     Parameters:
 89 |     - vrt_dim: Dimensions of the VRT file.
 90 |     - buffer: Buffer size used for contour generation.
 91 |     - contours_dir: Directory containing the contour files.
 92 |     - output_file: Path to the output GeoPackage file.
 93 |     """
 94 |     for col_num in tqdm(range(0, vrt_dim[0], 1024 - buffer)):
 95 |         try:
 96 |             contours = glob.glob(contours_dir + f'/merged_{col_num}_*.gpkg')
 97 |             col_gdf = gpd.GeoDataFrame(pd.concat([gpd.read_file(contour).explode(index_parts=False) for contour in contours], ignore_index=True))
 98 | 
 99 |             col_gdf['geometry'] = col_gdf['geometry'].buffer(0)
100 |             col_gdf = col_gdf.dissolve()
101 | 
102 |             if len(col_gdf) > 0:
103 |                 col_gdf.to_file(f'col_{col_num}.gpkg', driver='GPKG')
104 |         except Exception as e:
105 |             print(e)
106 |             continue
107 | 
108 |     col_files = glob.glob('col_*.gpkg')
109 |     col_gdfs = [gpd.read_file(col_file).explode(index_parts=False) for col_file in col_files]
110 | 
111 |     all_gdf = gpd.GeoDataFrame(pd.concat(col_gdfs, ignore_index=True))
112 |     all_gdf['geometry'] = all_gdf['geometry'].buffer(0)
113 |     all_gdf = all_gdf.dissolve().explode()
114 |     all_gdf.to_file(output_file, driver='GPKG')
115 | 
116 | def run_contour(row_col: tuple, size: int, vrt_file: str, threshold: float = 0.6,
117 |                 contours_dir: str = '.', cleanup: bool = True, skip_existing: bool = True, suppress_output: bool = True) -> Tuple[str, bool, str]:
118 |     """ Will create a (small) tiff file over a srcwin (row, col, size, size) and run gdal_contour on it. """
119 | 
120 |     row, col = row_col
121 |     
122 |     file = f'merged_{row}_{col}_{size}_{size}'
123 |     if skip_existing and os.path.exists(f'{contours_dir}/{file}.gpkg'):
124 |         return file, True, 'Loaded existing file ...'
125 |     try:
126 |         gdal_str = f'gdal_translate --config GDAL_VRT_ENABLE_PYTHON YES -srcwin {col} {row} {size} {size} {vrt_file} {file}.tiff'
127 |         if suppress_output:
128 |             gdal_str += ' > /dev/null'
129 |         os.system(gdal_str)
130 |         gdal_str = f'gdal_contour -of gpkg {file}.tiff {contours_dir}/{file}.gpkg -i {threshold} -amin amin -amax amax -p'
131 |         if suppress_output:
132 |             gdal_str += ' > /dev/null'
133 |         os.system(gdal_str)
134 |         if cleanup:
135 |             os.remove(f'{file}.tiff')
136 |         return f'{contours_dir}/{file}.gpkg', True, None
137 |     except Exception as exc:
138 |         return f'{contours_dir}/{file}.gpkg', False, exc
139 |     
140 | def write_vrt(files, weights_file, out_vrt, function = None):
141 |     """ Write virtual raster
142 | 
143 |     Function that will first build a temp.vrt for the input files, and then modify it for purposes of spatial merging
144 |     of overlaps using the provided function
145 |     """
146 | 
147 |     if not function:
148 |         function = average_function()
149 | 
150 |     # build a vrt from list of input files
151 |     gdal_str = f'gdalbuildvrt temp.vrt -b 1 {" ".join(files)}'
152 |     #save gdal_str to a tmp file then execute the file
153 |     with open('gdalbuildvrt.sh', 'w') as f:
154 |         f.write(gdal_str)
155 |     os.system('bash gdalbuildvrt.sh')
156 |     os.remove('gdalbuildvrt.sh')
157 | 
158 |     # fix the vrt
159 |     root = etree.parse('temp.vrt').getroot()
160 |     vrtrasterband = root.find('VRTRasterBand')
161 |     rasterbandchildren = list(vrtrasterband)
162 |     root.remove(vrtrasterband)
163 | 
164 |     dict_attr = {'dataType': 'Float32', 'band': '1', 'subClass': 'VRTDerivedRasterBand'}
165 |     raster_band_tag = etree.SubElement(root, 'VRTRasterBand', dict_attr)
166 | 
167 |     # Add childern tags to derivedRasterBand tag
168 |     pix_func_tag = etree.SubElement(raster_band_tag, 'PixelFunctionType')
169 |     pix_func_tag.text = 'average'
170 | 
171 |     pix_func_tag2 = etree.SubElement(raster_band_tag, 'PixelFunctionLanguage')
172 |     pix_func_tag2.text = 'Python'
173 | 
174 |     pix_func_code = etree.SubElement(raster_band_tag, 'PixelFunctionCode')
175 |     pix_func_code.text = etree.CDATA(function)
176 | 
177 |     new_sources = []
178 |     for child in rasterbandchildren:
179 |         if child.tag == 'NoDataValue':
180 |             pass
181 |         else:
182 |             raster_band_tag.append(child)
183 |         if child.tag == 'SimpleSource':
184 |             new_source = copy.deepcopy(child)
185 |             new_source.find('SourceFilename').text = weights_file
186 |             new_source.find('SourceProperties').attrib['DataType'] = 'Float32'
187 |             for nodata in new_source.xpath('//NODATA'):
188 |                 nodata.getparent().remove(nodata)
189 |             new_sources.append(new_source)
190 | 
191 |     for new_source in new_sources:
192 |         raster_band_tag.append(new_source)
193 | 
194 |     os.remove('temp.vrt')
195 | 
196 |     with open(out_vrt, 'w') as out:
197 |         out.writelines(etree.tounicode(root, pretty_print=True))
198 | 
199 | def get_weights(shape: Tuple[int, int], buffer: int, low: float = 0, high: float = 1) -> np.ndarray:
200 |     """ Create weights array based on linear gradient from low to high from edges to 2*buffer, and 1 elsewhere. """
201 |     weight = np.ones(shape, dtype=np.float32)
202 |     weight[..., :2 * buffer] = np.tile(np.linspace(low, high, 2 * buffer), shape[0]).reshape((shape[0], 2 * buffer))
203 |     weight[..., -2 * buffer:] = np.tile(np.linspace(high, low, 2 * buffer), shape[0]).reshape((shape[0], 2 * buffer))
204 |     weight[:2 * buffer, ...] *= np.repeat(np.linspace(low, high, shape[1]), 2 * buffer).reshape((2 * buffer, shape[1]))
205 |     weight[-2 * buffer:, ...] *= np.repeat(np.linspace(high, low, 2 * buffer), shape[1]).reshape((2 * buffer, shape[1]))
206 |     return weight
207 | 
208 | def average_function(no_data = 0, round_output: bool =False) -> str:
209 |     """ A Python function that will be added to VRT and used to calculate weighted average over overlaps
210 | 
211 |     :param no_data: no data pixel value (default = 0)
212 |     :param round_output: flag to round the output (to 0 decimals). Useful when the final result will be in Int.
213 |     :return: Function (as a string)
214 |     """
215 |     rounding = 'out = np.round(out, 0)' if round_output else ''
216 |     return f"""
217 | import numpy as np
218 | 
219 | def average(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, raster_ysize, buf_radius, gt, **kwargs):
220 |     p, w = np.split(np.array(in_ar), 2, axis=0)
221 |     n_overlaps = np.sum(p!={no_data}, axis=0)
222 |     w_sum = np.sum(w, axis=0, dtype=np.float32) 
223 |     p_sum = np.sum(p, axis=0, dtype=np.float32) 
224 |     weighted = np.sum(p*w, axis=0, dtype=np.float32)
225 |     out = np.where((n_overlaps>1) & (w_sum>0) , weighted/w_sum, p_sum/n_overlaps)
226 |     {rounding}
227 |     out_ar[:] = out
228 |     return out_ar
229 | """
230 | 


--------------------------------------------------------------------------------
/ukfields.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Spiruel/UKFields/73547f245314bdf26cd3768cc85a482d42db458e/ukfields.jpeg


--------------------------------------------------------------------------------