├── models
    ├── __init__.py
    ├── SFANet.py
    └── VGG.py
├── utils
    ├── __init__.py
    ├── preprocess.py
    ├── inference.py
    └── evaluate.py
├── scripts
    ├── __init__.py
    ├── calculate_ap.py
    ├── inference.py
    ├── tune.py
    ├── test.py
    ├── train.py
    └── prepare.py
├── environment.yml
├── LICENSE
└── README.md


/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: urban-tree-detection
 2 | dependencies:
 3 |   - python=3.8
 4 |   - tensorflow-gpu=2.4.1
 5 |   - numpy
 6 |   - imageio
 7 |   - rasterio
 8 |   - geopandas
 9 |   - h5py
10 |   - scipy
11 |   - tqdm
12 |   - scikit-image
13 |   - scikit-learn
14 |   - pip
15 |   - pip:
16 |     - optuna
17 | 


--------------------------------------------------------------------------------
/utils/preprocess.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def preprocess_RGBN(images):
 4 |     R = images[...,0:1]
 5 |     N = images[...,3:4]
 6 |     ndvi = tf.math.divide_no_nan((N-R),(N+R))
 7 |     ndvi *= 127.5
 8 |     
 9 |     bgr = tf.keras.applications.vgg16.preprocess_input(images[:,:,:,:3])
10 |     
11 |     nir = (images[:,:,:,3:4]-127.5)
12 |     
13 |     images_out = tf.concat([bgr,nir,ndvi],axis=-1)
14 | 
15 |     return images_out
16 | 
17 | def preprocess_RGB(images):
18 |     bgr = tf.keras.applications.vgg16.preprocess_input(images[:,:,:,:3])
19 |     
20 |     return bgr
21 | 
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Jonathan Ventura
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/scripts/calculate_ap.py:
--------------------------------------------------------------------------------
 1 | """ Compute average precision on test set. """
 2 | import numpy as np
 3 | import argparse
 4 | import os
 5 | import h5py as h5
 6 | import yaml
 7 | from utils.evaluate import test_all_thresholds, calculate_ap
 8 | from models import SFANet
 9 | from utils.preprocess import *
10 | import imageio
11 | 
12 | def main():
13 |     parser = argparse.ArgumentParser()
14 | 
15 |     parser.add_argument('data', help='path to data hdf5 file')
16 |     parser.add_argument('log', help='path to log directory')
17 |     parser.add_argument('--max_distance', type=float, default=10, help='max distance from gt to pred tree (in pixels)')
18 | 
19 |     args = parser.parse_args()
20 | 
21 |     f = h5.File(args.data,'r')
22 |     images = f[f'test/images'][:]
23 |     gts = f[f'test/gt'][:]
24 |     
25 |     preds_path = os.path.join(args.log,'test_preds.npy')
26 |     if os.path.exists(preds_path):
27 |         preds = np.load(preds_path)
28 |     else:
29 |         bands = f.attrs['bands']
30 | 
31 |         preprocess = eval(f'preprocess_{bands}')
32 |         training_model, model = SFANet.build_model(
33 |             images.shape[1:],
34 |             preprocess_fn=preprocess)
35 | 
36 |         weights_path = os.path.join(args.log,'weights.best.h5')
37 |         training_model.load_weights(weights_path)
38 | 
39 |         print('----- getting predictions from trained model -----')
40 |         preds = model.predict(images,verbose=True,batch_size=1)[...,0]
41 |         
42 |         np.save(preds_path,preds)
43 | 
44 |     print('----- calculating metrics -----')
45 |     thresholds, precisions, recalls = test_all_thresholds(
46 |         gts=gts,
47 |         preds=preds,
48 |         max_distance=args.max_distance)
49 |     ap = calculate_ap(precisions,recalls)
50 | 
51 |     with open(os.path.join(args.log,'ap_results.txt'),'w') as f:
52 |         f.write('average precision: '+str(ap))
53 | 
54 |     print('------- results for: ' + args.log + ' ---------')
55 |     print('average precision: ',ap)
56 | 
57 | if __name__ == '__main__':
58 |     main()
59 | 


--------------------------------------------------------------------------------
/scripts/inference.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from models import SFANet
 4 | from utils.preprocess import *
 5 | from utils.inference import run_tiled_inference
 6 | 
 7 | import argparse
 8 | import os
 9 | import sys
10 | import yaml
11 | 
12 | import rasterio
13 | 
14 | import tqdm
15 | from tqdm import trange
16 | 
17 | import glob
18 | 
19 | def main():
20 |     parser = argparse.ArgumentParser()
21 | 
22 |     parser.add_argument('input', help='path to input tiff file or directory')
23 |     parser.add_argument('output', help='path to output json file or directory')
24 |     parser.add_argument('log', help='path to log directory')
25 |     parser.add_argument('--bands', default='RGBN', help='input bands')
26 |     parser.add_argument('--tile_size', type=int, default=2048, help='tile size')
27 |     parser.add_argument('--overlap', type=int, default=32, help='overlap between tiles')
28 | 
29 |     args = parser.parse_args()
30 | 
31 |     params_path = os.path.join(args.log,'params.yaml')
32 |     if os.path.exists(params_path):
33 |         with open(params_path,'r') as f:
34 |             params = yaml.safe_load(f)
35 |             mode = params['mode']
36 |             min_distance = params['min_distance']
37 |             threshold_abs = params['threshold_abs'] if mode == 'abs' else None
38 |             threshold_rel = params['threshold_rel'] if mode == 'rel' else None
39 |     else:
40 |         print(f'warning: params.yaml missing -- using default params')
41 |         min_distance = 1
42 |         threshold_abs = None
43 |         threshold_rel = 0.2
44 |     
45 |     weights_path = os.path.join(args.log,'weights.best.h5')
46 |     padded_size = args.tile_size + args.overlap*2
47 |     preprocess = eval(f'preprocess_{args.bands}')
48 |     training_model, model = SFANet.build_model((padded_size,padded_size,len(args.bands)),preprocess_fn=preprocess)
49 |     training_model.load_weights(weights_path)
50 |     
51 |     if os.path.isdir(args.input):
52 |         os.makedirs(args.output,exist_ok=True)
53 |         paths = sorted(glob.glob(os.path.join(args.input,'*.tif')) + glob.glob(os.path.join(args.input,'*.tiff')))
54 |         pbar = tqdm.tqdm(total=len(paths))
55 |         for input_path in paths:
56 |             output_path = os.path.join(args.output,os.path.basename(input_path).split('.')[0]+'.json')
57 |             if not os.path.exists(output_path):
58 |                 run_tiled_inference(model,input_path,output_path,min_distance=min_distance,threshold_abs=threshold_abs,threshold_rel=threshold_rel)
59 |             pbar.update(1)
60 |     else:
61 |         run_tiled_inference(model,args.input,args.output,min_distance=min_distance,threshold_abs=threshold_abs,threshold_rel=threshold_rel)
62 | 
63 | if __name__ == '__main__':
64 |     main()
65 | 


--------------------------------------------------------------------------------
/scripts/tune.py:
--------------------------------------------------------------------------------
 1 | """ Run hyperparameter tuning on validation set to determine optimal detection parameters. """
 2 | 
 3 | from utils.evaluate import evaluate
 4 | import argparse
 5 | import os
 6 | import h5py as h5
 7 | from models import SFANet
 8 | from utils.preprocess import *
 9 | import optuna
10 | import yaml
11 | import numpy as np
12 | 
13 | def main():
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('data', help='path to data hdf5 file')
16 |     parser.add_argument('log', help='path to log directory')
17 |     parser.add_argument('--ntrials', type=int, default=200, help='number of trials')
18 |     parser.add_argument('--max_distance', type=float, default=10, help='max distance from gt to pred tree (in pixels)')
19 | 
20 |     args = parser.parse_args()
21 | 
22 |     f = h5.File(args.data,'r')
23 |     images = f['val/images'][:]
24 |     gts = f['val/gt'][:]
25 | 
26 |     preds_path = os.path.join(args.log,'val_preds.npy')
27 |     if os.path.exists(preds_path):
28 |         print('----- loading predictions from file -----')
29 |         preds = np.load(preds_path)
30 |     else:
31 |         bands = f.attrs['bands']
32 |         preprocess = eval(f'preprocess_{bands}')
33 |         training_model, model = SFANet.build_model(
34 |             images.shape[1:],
35 |             preprocess_fn=preprocess)
36 | 
37 |         weights_path = os.path.join(args.log,'weights.best.h5')
38 |         training_model.load_weights(weights_path)
39 | 
40 |         print('----- getting predictions from trained model -----')
41 |         preds = model.predict(images,verbose=True,batch_size=1)[...,0]
42 |         
43 |         np.save(preds_path,preds)
44 | 
45 |     def objective(trial):
46 |         min_distance = trial.suggest_int('min_distance',1,10)
47 |         mode = trial.suggest_categorical('mode',['abs','rel'])
48 |         threshold_abs = trial.suggest_float('threshold_abs',-10,10)
49 |         threshold_rel = trial.suggest_float('threshold_rel',0,1)
50 |         results = evaluate(
51 |             gts=gts,
52 |             preds=preds,
53 |             min_distance=min_distance,
54 |             threshold_rel=threshold_rel if mode=='rel' else None,
55 |             threshold_abs=threshold_abs if mode=='abs' else None,
56 |             max_distance=args.max_distance)
57 |         return 1 - results['fscore']
58 | 
59 |     print('----- running hyperparameter tuning -----')
60 |     study = optuna.create_study()
61 |     study.optimize(objective, n_trials=args.ntrials)
62 | 
63 |     print('----- best params: -----')
64 |     print(study.best_params)
65 | 
66 |     output_path = os.path.join(args.log,'params.yaml')
67 |     with open(output_path,'w') as f:
68 |         yaml.dump(study.best_params,f)
69 | 
70 | if __name__ == '__main__':
71 |     main()
72 | 


--------------------------------------------------------------------------------
/scripts/test.py:
--------------------------------------------------------------------------------
 1 | """ Compute metrics on test set. """
 2 | import numpy as np
 3 | import argparse
 4 | import os
 5 | import h5py as h5
 6 | import yaml
 7 | from utils.evaluate import evaluate, make_figure
 8 | from models import SFANet
 9 | from utils.preprocess import *
10 | import imageio
11 | import matplotlib as mpl
12 | mpl.use('Agg')
13 | 
14 | def main():
15 |     parser = argparse.ArgumentParser()
16 | 
17 |     parser.add_argument('data', help='path to data hdf5 file')
18 |     parser.add_argument('log', help='path to log directory')
19 |     parser.add_argument('--max_distance', type=float, default=10, help='max distance from gt to pred tree (in pixels)')
20 | 
21 |     args = parser.parse_args()
22 | 
23 |     params_path = os.path.join(args.log,'params.yaml')
24 |     if os.path.exists(params_path):
25 |         with open(params_path,'r') as f:
26 |             params = yaml.safe_load(f)
27 |             mode = params['mode']
28 |             min_distance = params['min_distance']
29 |             threshold_abs = params['threshold_abs'] if mode == 'abs' else None
30 |             threshold_rel = params['threshold_rel'] if mode == 'rel' else None
31 |     else:
32 |         print(f'warning: params.yaml missing -- using default params')
33 |         min_distance = 1
34 |         threshold_abs = None
35 |         threshold_rel = 0.2
36 |     
37 |     f = h5.File(args.data,'r')
38 |     images = f[f'test/images'][:]
39 |     gts = f[f'test/gt'][:]
40 | 
41 |     bands = f.attrs['bands']
42 |     
43 |     preprocess = eval(f'preprocess_{bands}')
44 |     training_model, model = SFANet.build_model(
45 |         images.shape[1:],
46 |         preprocess_fn=preprocess)
47 | 
48 |     weights_path = os.path.join(args.log,'weights.best.h5')
49 |     training_model.load_weights(weights_path)
50 | 
51 |     print('----- getting predictions from trained model -----')
52 |     preds = model.predict(images,verbose=True,batch_size=1)[...,0]
53 | 
54 |     print('----- calculating metrics -----')
55 |     results = evaluate(
56 |         gts=gts,
57 |         preds=preds,
58 |         min_distance=min_distance,
59 |         threshold_rel=threshold_rel,
60 |         threshold_abs=threshold_abs,
61 |         max_distance=args.max_distance,
62 |         return_locs=True)
63 | 
64 |     with open(os.path.join(args.log,'results.txt'),'w') as f:
65 |         f.write('precision: '+str(results['precision'])+'\n')
66 |         f.write('recall: '+str(results['recall'])+'\n')
67 |         f.write('fscore: '+str(results['fscore'])+'\n')
68 |         f.write('rmse [px]: '+str(results['rmse'])+'\n')
69 | 
70 |     print('------- results for: ' + args.log + ' ---------')
71 |     print('precision: ',results['precision'])
72 |     print('recall: ',results['recall'])
73 |     print('fscore: ',results['fscore'])
74 |     print('rmse [px]: ',results['rmse'])
75 |         
76 |     fig = make_figure(images,results)
77 |     fig.savefig(os.path.join(args.log,'figure.pdf'))
78 | 
79 | if __name__ == '__main__':
80 |     main()
81 | 


--------------------------------------------------------------------------------
/models/SFANet.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | from tensorflow.keras import Model, layers, initializers, losses
 3 | from .VGG import VGG, BaseConv
 4 | from tensorflow.keras import backend as K
 5 | import tensorflow as tf
 6 | 
 7 | class BackEnd(Model):
 8 |     def __init__(self,half_res=False):
 9 |         super(BackEnd,self).__init__()
10 |         self.half_res = half_res
11 | 
12 |         self.upsample = layers.UpSampling2D(2,interpolation='bilinear')
13 |         self.conv1 = BaseConv(256, 1, 1, activation='relu', use_bn=True)
14 |         self.conv2 = BaseConv(256, 3, 1, activation='relu', use_bn=True)
15 | 
16 |         self.conv3 = BaseConv(128, 1, 1, activation='relu', use_bn=True)
17 |         self.conv4 = BaseConv(128, 3, 1, activation='relu', use_bn=True)
18 | 
19 |         self.conv5 = BaseConv(64, 1, 1, activation='relu', use_bn=True)
20 |         self.conv6 = BaseConv(64, 3, 1, activation='relu', use_bn=True)
21 |         self.conv7 = BaseConv(32, 3, 1, activation='relu', use_bn=True)
22 | 
23 |         if not self.half_res:
24 |             self.conv8 = BaseConv(32, 1, 1, activation='relu', use_bn=True)
25 |             self.conv9 = BaseConv(32, 3, 1, activation='relu', use_bn=True)
26 |             self.conv10 = BaseConv(32, 3, 1, activation='relu', use_bn=True)
27 |     
28 |     def call(self,inputs):
29 |         if self.half_res:
30 |             conv2_2, conv3_3, conv4_3, conv5_3 = inputs
31 |         else:
32 |             conv1_2, conv2_2, conv3_3, conv4_3, conv5_3 = inputs
33 | 
34 |         x = self.upsample(conv5_3)
35 | 
36 |         x = tf.concat([x, conv4_3], axis=-1)
37 |         x = self.conv1(x)
38 |         x = self.conv2(x)
39 |         x = self.upsample(x)
40 | 
41 |         x = tf.concat([x, conv3_3], axis=-1)
42 |         x = self.conv3(x)
43 |         x = self.conv4(x)
44 |         x = self.upsample(x)
45 | 
46 |         x = tf.concat([x, conv2_2], axis=-1)
47 |         x = self.conv5(x)
48 |         x = self.conv6(x)
49 |         x = self.conv7(x)
50 |         
51 |         if not self.half_res:
52 |             x = self.upsample(x)
53 |             x = tf.concat([x, conv1_2], axis=-1)
54 |             x = self.conv8(x)
55 |             x = self.conv9(x)
56 |             x = self.conv10(x)
57 | 
58 |         return x
59 | 
60 | class SFANet(Model):
61 |     def __init__(self,half_res=True):
62 |         super(SFANet,self).__init__()
63 |         output_layers = [3,6,9,12] if half_res else [1,3,6,9,12]
64 |         self.vgg = VGG(output_layers=output_layers)
65 |         self.amp = BackEnd(half_res=half_res)
66 |         self.dmp = BackEnd(half_res=half_res)
67 |         
68 |         self.conv_att = BaseConv(1, 1, 1, activation='sigmoid', use_bn=True)
69 |         self.conv_out = BaseConv(1, 1, 1, activation=None, use_bn=False)
70 |     
71 |     def call(self,inputs):
72 |         x = inputs
73 |         x = self.vgg(x)
74 |         amp_out = self.amp(x)
75 |         dmp_out = self.dmp(x)
76 |         
77 |         amp_out = self.conv_att(amp_out)
78 |         dmp_out = amp_out * dmp_out
79 |         dmp_out = self.conv_out(dmp_out)
80 |         
81 |         return dmp_out, amp_out
82 | 
83 | def build_model(input_shape,preprocess_fn=None,bce_loss_weight=0.1,half_res=False):
84 |     image = layers.Input(input_shape)
85 |     
86 |     image_preprocessed = preprocess_fn(image)
87 | 
88 |     sfanet = SFANet(half_res=half_res)
89 |     dmp, amp = sfanet(image_preprocessed)
90 |     outputs = [dmp,amp]
91 |     sfanet.vgg.load_pretrained_vgg(image_preprocessed.shape[1:])
92 | 
93 |     training_model = Model(inputs=image,outputs=outputs)
94 |     testing_model = Model(inputs=image,outputs=dmp)
95 |     
96 |     return training_model, testing_model
97 | 
98 | 


--------------------------------------------------------------------------------
/scripts/train.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.keras.optimizers import Adam
  3 | from tensorflow.keras.callbacks import ModelCheckpoint
  4 | 
  5 | import glob
  6 | import numpy as np
  7 | 
  8 | from models import SFANet
  9 | from utils.preprocess import *
 10 | 
 11 | import argparse
 12 | import os
 13 | import sys
 14 | 
 15 | import h5py as h5
 16 | 
 17 | def generator(f,batch_size):
 18 |     train_images = f['train/images']
 19 |     train_confidence = f['train/confidence']
 20 |     train_attention = f['train/attention']
 21 |     
 22 |     inds = np.arange(len(train_images))
 23 |     np.random.shuffle(inds)
 24 |     idx = 0
 25 |     while True:
 26 |         batch_inds = inds[idx:idx+batch_size]
 27 |         batch_images = np.stack([train_images[i] for i in batch_inds])
 28 |         batch_confidence = np.stack([train_confidence[i] for i in batch_inds])
 29 |         batch_attention = np.stack([train_attention[i] for i in batch_inds])
 30 |         yield batch_images, (batch_confidence, batch_attention)
 31 |         idx += batch_size
 32 |         if idx >= len(inds):
 33 |             np.random.shuffle(inds)
 34 |             idx = 0
 35 | 
 36 | def main():
 37 |     parser = argparse.ArgumentParser()
 38 | 
 39 |     parser.add_argument('data', help='path to training data hdf5 file')
 40 |     parser.add_argument('log', help='path to log directory')
 41 | 
 42 |     parser.add_argument('--lr', type=float, default=1e-4, help='learning rate')
 43 |     parser.add_argument('--epochs', type=int, default=500, help='num epochs')
 44 |     parser.add_argument('--batch_size', type=int, default=8, help='batch size')
 45 | 
 46 |     args = parser.parse_args()
 47 | 
 48 |     physical_devices = tf.config.list_physical_devices('GPU')
 49 |     for device in physical_devices:
 50 |         try:
 51 |             tf.config.experimental.set_memory_growth(device, True)
 52 |         except:
 53 |             pass
 54 | 
 55 |     f = h5.File(args.data,'r')
 56 |     bands = f.attrs['bands']
 57 |     val_images = f['val/images'][:]
 58 |     val_confidence = f['val/confidence'][:]
 59 |     val_attention = f['val/attention'][:]
 60 |     
 61 |     preprocess_fn = eval(f'preprocess_{bands}')
 62 |     
 63 |     model, testing_model = SFANet.build_model(
 64 |         val_images.shape[1:],
 65 |         preprocess_fn=preprocess_fn)
 66 |     opt = Adam(args.lr)
 67 |     model.compile(optimizer=opt, loss=['mse','binary_crossentropy'], loss_weights=[1,0.1])
 68 | 
 69 |     print(model.summary())
 70 |     
 71 |     os.makedirs(args.log,exist_ok=True)
 72 | 
 73 |     callbacks = []
 74 | 
 75 |     weights_path = os.path.join(args.log, 'weights.best.h5')
 76 |     callbacks.append(ModelCheckpoint(
 77 |             filepath=weights_path,
 78 |             monitor='val_loss',
 79 |             verbose=True,
 80 |             save_best_only=True,
 81 |             save_weights_only=True,
 82 |             ))
 83 |     weights_path = os.path.join(args.log, 'weights.latest.h5')
 84 |     callbacks.append(ModelCheckpoint(
 85 |             filepath=weights_path,
 86 |             monitor='val_loss',
 87 |             verbose=True,
 88 |             save_best_only=False,
 89 |             save_weights_only=True,
 90 |             ))
 91 |     tensorboard_path = os.path.join(args.log,'tensorboard')
 92 |     os.system("rm -rf " + tensorboard_path)
 93 |     callbacks.append(tf.keras.callbacks.TensorBoard(tensorboard_path))
 94 | 
 95 |     gen = generator(f,args.batch_size)
 96 |     y_val = (val_confidence, val_attention)
 97 | 
 98 |     model.fit(
 99 |             gen,
100 |             validation_data=(val_images,y_val),
101 |             batch_size=args.batch_size,
102 |             epochs=args.epochs,
103 |             steps_per_epoch=len(f['train/images'])//args.batch_size+1,
104 |             verbose=True,
105 |             callbacks=callbacks,
106 |             use_multiprocessing=True)
107 | 
108 | if __name__ == '__main__':
109 |     main()
110 | 


--------------------------------------------------------------------------------
/utils/inference.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | import argparse
  4 | import os
  5 | import sys
  6 | 
  7 | import rasterio
  8 | import rasterio.transform
  9 | 
 10 | from skimage.feature import peak_local_max
 11 | 
 12 | import tempfile
 13 | 
 14 | import geopandas as gpd
 15 | 
 16 | def _tiled_inference(model,input_path,output_path,tile_size,overlap):
 17 |     nbands = model.input_shape[-1]
 18 |     with rasterio.open(input_path,'r') as src:
 19 |         meta = src.meta
 20 |         height = meta['height']
 21 |         width = meta['width']
 22 |         nodata = meta['nodata']
 23 |         
 24 |         padded_size = tile_size+overlap*2
 25 |         
 26 |         meta['count'] = 1
 27 |         meta['dtype'] = 'float32'
 28 |         with rasterio.open(output_path,'w',**meta) as dest:
 29 |         
 30 |             for row in range(overlap,height-overlap,tile_size):
 31 |                 for col in range(overlap,width-overlap,tile_size):
 32 |                     window = rasterio.windows.Window(col-overlap,row-overlap,padded_size,padded_size)
 33 |                     image = src.read(range(1,nbands+1),window=window)
 34 |                     image = np.expand_dims(np.transpose(image,[1,2,0]),axis=0)
 35 |                     
 36 |                     down_pad = max(0,padded_size-image.shape[1])
 37 |                     right_pad = max(0,padded_size-image.shape[2])
 38 |                     image = np.pad(image,((0,0),(0,down_pad),(0,right_pad),(0,0)))
 39 |             
 40 |                     output = model.predict(image,verbose=False)
 41 |                     
 42 |                     # zero out "no data" pixels
 43 |                     mask = np.all(image==nodata,axis=-1)
 44 |                     output[mask] = 0
 45 | 
 46 |                     output_crop = output[0,overlap:-overlap,overlap:-overlap,0]
 47 | 
 48 |                     h = min(height-row,output_crop.shape[0])
 49 |                     w = min(width-col,output_crop.shape[1])
 50 |                     window = rasterio.windows.Window(col,row,w,h)
 51 |                     dest.write(output_crop[None,:h,:w],window=window)
 52 | 
 53 | def _tiled_peak_finding(path,input_size,overlap,min_distance,threshold_abs,threshold_rel):
 54 |     with rasterio.open(path,'r') as f:
 55 |         meta = f.meta
 56 |         height = meta['height']
 57 |         width = meta['width']
 58 |         
 59 |         padded_size = input_size+overlap*2
 60 |         
 61 |         all_indices = []
 62 |         
 63 |         for row in range(overlap,height-overlap,input_size):
 64 |             for col in range(overlap,width-overlap,input_size):
 65 |                 window = rasterio.windows.Window(col-overlap,row-overlap,padded_size,padded_size)
 66 |                 image = np.squeeze(f.read(1,window=window))
 67 |                 
 68 |                 indices = peak_local_max(image,min_distance=min_distance,threshold_abs=threshold_abs,threshold_rel=threshold_rel)
 69 |                 
 70 |                 good = np.all(np.stack([
 71 |                     indices[:,0] >= overlap,
 72 |                     indices[:,0] < overlap+input_size,
 73 |                     indices[:,1] >= overlap,
 74 |                     indices[:,1] < overlap+input_size],
 75 |                     axis=-1),axis=-1)
 76 |                 indices = indices[good]
 77 |                 indices[:,0] += row-overlap
 78 |                 indices[:,1] += col-overlap
 79 |                 
 80 |                 all_indices.append(indices)
 81 |         all_indices = np.concatenate(all_indices,axis=0)
 82 |         return all_indices
 83 | 
 84 | def run_tiled_inference(model,input_path,output_path,min_distance,threshold_abs,threshold_rel):
 85 |     temp_path = tempfile.NamedTemporaryFile(suffix='.tif').name
 86 |     _tiled_inference(
 87 |         model=model,
 88 |         input_path=input_path,
 89 |         output_path=temp_path,
 90 |         tile_size=2048,
 91 |         overlap=32)
 92 | 
 93 |     with rasterio.open(temp_path,'r') as f:
 94 |         meta = f.meta
 95 |         epsg = meta['crs'].to_epsg()
 96 |         crs = f'EPSG:{epsg}'
 97 |         transform = meta['transform']
 98 | 
 99 |     indices = _tiled_peak_finding(temp_path,input_size=256,overlap=32,min_distance=min_distance,threshold_abs=threshold_abs,threshold_rel=threshold_rel)
100 | 
101 |     x,y = rasterio.transform.xy(transform,indices[:,0],indices[:,1])
102 | 
103 |     gdf = gpd.GeoDataFrame(geometry=gpd.points_from_xy(x,y),crs=crs)
104 |     gdf.to_file(output_path,driver='GeoJSON')
105 | 
106 |     os.remove(temp_path)
107 | 
108 | 


--------------------------------------------------------------------------------
/scripts/prepare.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | import imageio
  5 | import glob
  6 | import h5py
  7 | import numpy as np
  8 | from scipy.ndimage import distance_transform_edt
  9 | import tqdm
 10 | 
 11 | parser = argparse.ArgumentParser()
 12 | parser.add_argument('dataset',help='path to dataset')
 13 | parser.add_argument('output',help='output path for .h5 file')
 14 | parser.add_argument('--train',default='train.txt')
 15 | parser.add_argument('--val',default='val.txt')
 16 | parser.add_argument('--test',default='test.txt')
 17 | parser.add_argument('--augment',action='store_true')
 18 | parser.add_argument('--sigma',type=float,default=3,help='Gaussian kernel size in pixels')
 19 | parser.add_argument('--bands',default='RGBN',help='description of bands in input raster (RGB or RGBN)')
 20 | args = parser.parse_args()
 21 | 
 22 | images = []
 23 | transforms = []
 24 | counts = []
 25 | gts = []
 26 | densities = []
 27 | attentions = []
 28 | 
 29 | def load_data(dataset_path,names,sigma):
 30 |     data = []
 31 | 
 32 |     pbar = tqdm.tqdm(total=len(names))
 33 |     for name in names:
 34 |         image = None
 35 |         for suffix in ['.tif','.tiff','.png']:
 36 |             image_path = os.path.join(dataset_path,'images',name + suffix)
 37 |             if os.path.exists(image_path):
 38 |                 image = imageio.imread(image_path)
 39 |                 if suffix == '.png' or args.bands == 'RGB':
 40 |                     image = image[...,:3]
 41 |                 break
 42 |         if image is None:
 43 |             raise RuntimeError(f'could not find image for {name}')
 44 |         
 45 |         csv_path = os.path.join(dataset_path,'csv',name + '.csv')
 46 |         if os.path.exists(csv_path):
 47 |             points = np.loadtxt(csv_path,delimiter=',',skiprows=1).astype('int')
 48 |             if len(points.shape)==1:
 49 |                 points = points[None,:]
 50 |             
 51 |             gt = np.zeros(image.shape[:2],dtype='float32')
 52 |             gt[points[:,1],points[:,0]] = 1
 53 |         
 54 |             distance = distance_transform_edt(1-gt).astype('float32')
 55 |             confidence = np.exp(-distance**2/(2*sigma**2))
 56 |         else:
 57 |             gt = np.zeros(image.shape[:2],dtype='float32')
 58 |             confidence = np.zeros(image.shape[:2],dtype='float32')
 59 |             
 60 |         confidence = confidence[...,None]
 61 | 
 62 |         attention = confidence>0.001
 63 |         attention = attention.astype('float32')
 64 | 
 65 |         data.append({
 66 |             'name':name,
 67 |             'image':image,
 68 |             'gt':gt,
 69 |             'confidence':confidence,
 70 |             'attention':attention
 71 |         })
 72 |         
 73 |         pbar.update(1)
 74 |     
 75 |     return data
 76 | 
 77 | def augment_images(images):
 78 |     """ Augment by rotating and flipping """
 79 |     """ Adapted from https://github.com/juglab/n2v/blob/master/n2v/internals/N2V_DataGenerator.py """
 80 |     augmented = np.concatenate((images,
 81 |                               np.rot90(images, k=1, axes=(1, 2)),
 82 |                               np.rot90(images, k=2, axes=(1, 2)),
 83 |                               np.rot90(images, k=3, axes=(1, 2))))
 84 |     augmented = np.concatenate((augmented, np.flip(augmented, axis=-2)))
 85 |     return augmented
 86 |     
 87 | def read_names(filename):
 88 |     return [name.rstrip() for name in open(os.path.join(args.dataset,filename),'r')]
 89 | train_names,val_names,test_names = [read_names(split) for split in [args.train,args.val,args.test]]
 90 | 
 91 | train_data,val_data,test_data = [load_data(args.dataset,names,args.sigma) for names in [train_names,val_names,test_names]]
 92 | 
 93 | def add_data_to_h5(f,data,split,augment=False):
 94 |     if len(data)==0: return
 95 |     names = np.array([d['image'] for d in data])
 96 |     images = np.stack([d['image'] for d in data],axis=0)
 97 |     gt = np.stack([d['gt'] for d in data],axis=0)
 98 |     confidence = np.stack([d['confidence'] for d in data],axis=0)
 99 |     attention = [d['attention'] for d in data]
100 |     
101 |     if augment:
102 |         names = np.repeat(names,8)
103 |         images = augment_images(images)
104 |         gt = augment_images(gt)
105 |         confidence = augment_images(confidence)
106 |         attention = augment_images(attention)
107 | 
108 |     f.create_dataset(f'{split}/names',data=names)
109 |     f.create_dataset(f'{split}/images',data=images)
110 |     f.create_dataset(f'{split}/gt',data=gt)
111 |     f.create_dataset(f'{split}/confidence',data=confidence)
112 |     f.create_dataset(f'{split}/attention',data=attention)
113 | 
114 | with h5py.File(args.output,'w') as f:
115 |     add_data_to_h5(f,train_data,'train',augment=args.augment)
116 |     add_data_to_h5(f,val_data,'val')
117 |     add_data_to_h5(f,test_data,'test')
118 |     f.attrs['bands'] = args.bands
119 | 
120 | 


--------------------------------------------------------------------------------
/models/VGG.py:
--------------------------------------------------------------------------------
  1 | import numpy as np 
  2 | from tensorflow.keras import Model, layers, initializers, losses
  3 | from tensorflow.keras.applications import VGG16
  4 | from tensorflow.keras import backend as K
  5 | import tensorflow as tf
  6 | 
  7 | class BaseConv(layers.Layer):
  8 |     def __init__(self, out_channels, kernel, stride=1, activation=None, use_bn=False):
  9 |         super(BaseConv,self).__init__()
 10 |         self.use_bn = use_bn
 11 |         self.conv = layers.Conv2D(out_channels, kernel, strides=stride, padding='same',
 12 |                            kernel_initializer=initializers.RandomNormal(stddev=0.01))
 13 |         self.bn = layers.BatchNormalization()
 14 |         if activation is None:
 15 |             self.activation = layers.Activation(activation)
 16 |         else:
 17 |             self.activation = None
 18 |     
 19 |     def call(self,inputs):
 20 |         x = self.conv(inputs)
 21 |         if self.use_bn:
 22 |             x = self.bn(x)
 23 |         if self.activation:
 24 |             x = self.activation(x)
 25 |         return x
 26 | 
 27 | class VGG(Model):
 28 |     def __init__(self,output_layers):
 29 |         """ Initializes a custom VGG model.
 30 |             Arguments:
 31 |                 output_layers: list of layers to output (0 for first layer, 1 for second layer, etc.)
 32 |         """
 33 |         super(VGG,self).__init__()
 34 |         self.output_layers = output_layers
 35 |         self.pool = layers.MaxPooling2D(2, 2)
 36 |         self.conv1_1 = BaseConv(64, 3, 1, activation='relu', use_bn=True)
 37 |         self.conv1_2 = BaseConv(64, 3, 1, activation='relu', use_bn=True)
 38 |         self.conv2_1 = BaseConv(128, 3, 1, activation='relu', use_bn=True)
 39 |         self.conv2_2 = BaseConv(128, 3, 1, activation='relu', use_bn=True)
 40 |         self.conv3_1 = BaseConv(256, 3, 1, activation='relu', use_bn=True)
 41 |         self.conv3_2 = BaseConv(256, 3, 1, activation='relu', use_bn=True)
 42 |         self.conv3_3 = BaseConv(256, 3, 1, activation='relu', use_bn=True)
 43 |         self.conv4_1 = BaseConv(512, 3, 1, activation='relu', use_bn=True)
 44 |         self.conv4_2 = BaseConv(512, 3, 1, activation='relu', use_bn=True)
 45 |         self.conv4_3 = BaseConv(512, 3, 1, activation='relu', use_bn=True)
 46 |         self.conv5_1 = BaseConv(512, 3, 1, activation='relu', use_bn=True)
 47 |         self.conv5_2 = BaseConv(512, 3, 1, activation='relu', use_bn=True)
 48 |         self.conv5_3 = BaseConv(512, 3, 1, activation='relu', use_bn=True)
 49 | 
 50 |     def load_pretrained_vgg(self,input_shape):
 51 |         """ Load weights from the pre-trained VGG16 model. 
 52 |             This can only be called after the model has been built.
 53 |             Arguments:
 54 |                 input_shape: input shape [H,W,C] (without the batch dimension)
 55 |         """
 56 |         channels_in = input_shape[2]
 57 | 
 58 |         # get pre-trained VGG for BGR input
 59 |         vgg_bgr = VGG16(include_top=False, input_shape=(input_shape[0], input_shape[1], 3))
 60 |         
 61 |         # get weights in initial layer
 62 |         w_bgr,b_bgr = vgg_bgr.layers[1].get_weights()
 63 |         
 64 |         # make new VGG with correct input shape
 65 |         vgg = VGG16(include_top=False, input_shape=input_shape, weights=None)
 66 | 
 67 |         # copy in pre-trained weights to first layer
 68 |         w,b = vgg.layers[1].get_weights()
 69 |         w[:,:,:3,:] = w_bgr
 70 |         b = b_bgr
 71 |         vgg.layers[1].set_weights([w,b])
 72 |         
 73 |         # copy in pre-trained weights to remaining layers
 74 |         for i in range(2,len(vgg.layers)):
 75 |             vgg.layers[i].set_weights(vgg_bgr.layers[i].get_weights())
 76 | 
 77 |         # copy weights to our layers
 78 |         def set_weights(layer,layer_in):
 79 |             weights = layer.get_weights()
 80 |             weights_in = layer_in.get_weights()
 81 |             weights[0] = weights_in[0]
 82 |             weights[1] = weights_in[1]
 83 |             layer.set_weights(weights)
 84 |         
 85 |         set_weights(self.conv1_1,vgg.layers[1])
 86 |         set_weights(self.conv1_2,vgg.layers[2])
 87 |         set_weights(self.conv2_1,vgg.layers[4])
 88 |         set_weights(self.conv2_2,vgg.layers[5])
 89 |         set_weights(self.conv3_1,vgg.layers[7])
 90 |         set_weights(self.conv3_2,vgg.layers[8])
 91 |         set_weights(self.conv3_3,vgg.layers[9])
 92 |         set_weights(self.conv4_1,vgg.layers[11])
 93 |         set_weights(self.conv4_2,vgg.layers[12])
 94 |         set_weights(self.conv4_3,vgg.layers[13])
 95 |         set_weights(self.conv5_1,vgg.layers[15])
 96 |         set_weights(self.conv5_2,vgg.layers[16])
 97 |         set_weights(self.conv5_3,vgg.layers[17])
 98 | 
 99 |     def call(self,inputs):
100 |         x = inputs
101 |         l = []
102 | 
103 |         x = self.conv1_1(x) # 0
104 |         l.append(x)
105 |         x = self.conv1_2(x) # 1
106 |         l.append(x)
107 |         x = self.pool(x)
108 | 
109 |         x = self.conv2_1(x) # 2
110 |         l.append(x)
111 |         x = self.conv2_2(x) # 3
112 |         l.append(x)
113 |         x = self.pool(x)
114 | 
115 |         x = self.conv3_1(x) # 4
116 |         l.append(x)
117 |         x = self.conv3_2(x) # 5
118 |         l.append(x)
119 |         x = self.conv3_3(x) # 6
120 |         l.append(x)
121 |         x = self.pool(x)
122 | 
123 |         x = self.conv4_1(x) # 7
124 |         l.append(x)
125 |         x = self.conv4_2(x) # 8
126 |         l.append(x)
127 |         x = self.conv4_3(x) # 9
128 |         l.append(x)
129 |         x = self.pool(x)
130 | 
131 |         x = self.conv5_1(x) # 10
132 |         l.append(x)
133 |         x = self.conv5_2(x) # 11
134 |         l.append(x)
135 |         x = self.conv5_3(x) # 12
136 |         l.append(x)
137 |         
138 |         return tuple(l[i] for i in self.output_layers) 
139 | 
140 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Urban Tree Detection ##
 2 | 
 3 | This repository provides code for training and evaluating a convolutional neural network (CNN) to detect tree in urban environments with aerial imagery.   The CNN takes multispectral imagery as input and outputs a confidence map indicating the locations of trees. The individual tree locations are found by local peak finding. In our study site in Southern California, we determined that, using our trained model, 73.6% of the detected trees matched to actual trees, and 73.3% of the trees in the study area were detected.
 4 | 
 5 | ### Installation ###
 6 | 
 7 | The model is implemented with Tensorflow 2.4.1.  We have provided an `environment.yml` file so that you can easily create a conda environment with the dependencies installed:
 8 | 
 9 |     conda env create 
10 |     conda activate urban-tree-detection
11 | 
12 | ### Dataset ###
13 | 
14 | The data used in our paper can be found in [a separate Github repository](https://github.com/jonathanventura/urban-tree-detection-data/).
15 | 
16 | To prepare a dataset for training and testing, run the `prepare.py` script.  You can specify the bands in the input raster using the `--bands` flag (currently `RGB` and `RGBN` are supported.)
17 | 
18 |     python3 -m scripts.prepare <path to dataset> <path to hdf5 file> --bands <RGB or RGBN>
19 | 
20 | ### Training ###
21 | 
22 | To train the model, run the `train.py` script.
23 | 
24 |     python3 -m scripts.train <path to hdf5 file> <path to log directory>
25 | 
26 | ### Hyperparameter tuning ###
27 | 
28 | The model outputs a confidence map, and we use local peak finding to isolate individual trees.  We use the Optuna package to determine the optimal parameters of the peaking finding algorithm.  We search for the best of hyperparameters to maximize F-score on the validation set.
29 | 
30 |     python3 -m scripts.tune <path to hdf5 file> <path to log directory>
31 | 
32 | ### Evaluation on test set ###
33 | 
34 | Once hyperparameter tuning finishes, use the `test.py` script to compute evaluation metrics on the test set.
35 | 
36 |     python3 -m scripts.test <path to hdf5 file> <path to log directory> 
37 | 
38 | ### Inference on a large raster ###
39 | 
40 | To detect trees in rasters and produce GeoJSONs containing the geo-referenced trees, use the `inference.py` script.  The script can process a single raster or a directory of rasters.
41 | 
42 |     python3 -m scripts.inference <input tiff or directory> \
43 |                                  <output json or directory> \
44 |                                  <path to log directory> \
45 |                                  --bands <RGB or RGBN>
46 | 
47 | ### Pre-trained weights ###
48 | 
49 | The following pre-trained models are available:
50 | 
51 | | Imagery   | Years     | Bands    | Region                         | Log Directory Archive     |
52 | |-----------|-----------|----------|--------------------------------|---------------------------|
53 | | 60cm NAIP | 2016-2020 | RGBN     | Northern & Southern California | [OneDrive](https://cpslo-my.sharepoint.com/:u:/g/personal/jventu09_calpoly_edu/ES31TXWdeGRFj_hn3O4qZpoBfhye_ssuULyaC2WB7yaJTw?e=cYkjMf) |
54 | | 60cm NAIP | 2016-2020 | RGB      | Northern & Southern California | [OneDrive](https://cpslo-my.sharepoint.com/:u:/g/personal/jventu09_calpoly_edu/Eay6v76obwpIqJmeK23_4zUBNb5EwM6R36wcSqh_BWKj_g?e=JrOwkO)
55 | | 60cm NAIP | 2020      | RGBN     | Southern California            | [OneDrive](https://cpslo-my.sharepoint.com/:u:/g/personal/jventu09_calpoly_edu/EQMSOBZjuDFCjj_PNgSDXZ0BMQUcGQKUO_SlJ5SGH2Bl9Q?e=9RhhpN)
56 | 
57 | We also provide an [example NAIP 2020 tile from Los Angeles](https://cpslo-my.sharepoint.com/:i:/g/personal/jventu09_calpoly_edu/EU1xfporUiBDvT2ZOpW0raEBOqJcJQpqcOv1lKNMCgbCdQ?e=zsgxXs) and an [example GeoJSON predictions file from the RGBN 2016-2020 model](https://cpslo-my.sharepoint.com/:u:/g/personal/jventu09_calpoly_edu/EUHYGnWdqL5FvYc1wm9hSl8BBdL2JEgMSlqS1FiTdB0EWA?e=uZMIBc).  
58 | 
59 | You can explore a [map of predictions for the entire urban reserve of California](https://jventu09.users.earthengine.app/view/urban-tree-detector) (based on NAIP 2020 imagery) created using this pre-trained model.
60 | 
61 | ### Using your own data ###
62 | 
63 | To train on your own data, you will need to organize the data into the format expected by `prepare.py`.
64 | 
65 | * The image crops (or "chips") should all be the same size and the side length should be a multiple of 32.
66 | * The code is currently designed for three-band (RGB) or four-band (red, green, blue, near-IR) imagery.  To handle more bands, you would need to add an appropriate preprocessing function in `utils/preprocess.py`.  If RGB are not in the bands, then `models/VGG.py` would need to be modified, as the code expects the first three bands to be RGB to match the pre-trained weights.
67 | * Store the images as TIFF or PNG files in a subdirectory called `images`.
68 | * For each image, store a csv file containing x,y coordinates for the tree locations in a file `<name>.csv` where `<name>.tif`, `<name>.tiff`, or `<name>.png` is the corresponding image. The csv file should have a single header line.
69 | * Create the files `train.txt`, `val.txt`, and `test.txt` to specify the names of the files in each split.
70 | 
71 | ### Citation ###
72 | 
73 | If you use or build upon this repository, please cite our paper:
74 | 
75 | J. Ventura, C. Pawlak, M. Honsberger, C. Gonsalves, J. Rice, N.L.R. Love, S. Han, V. Nguyen, K. Sugano, J. Doremus, G.A. Fricker, J. Yost, and M. Ritter (2024). [Individual Tree Detection in Large-Scale Urban Environments using High-Resolution Multispectral Imagery.](https://www.sciencedirect.com/science/article/pii/S1569843224002024)  International Journal of Applied Earth Observation and Geoinformation, 130, 103848.
76 | 
77 | ### Acknowledgments ###
78 | 
79 | This project was funded by CAL FIRE (award number: 8GB18415) the US Forest Service (award number: 21-CS-11052021-201), and an incubation grant from the Data Science Strategic Research Initiative at California Polytechnic State University.
80 | 


--------------------------------------------------------------------------------
/utils/evaluate.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from skimage.feature import peak_local_max
  4 | from sklearn.metrics import pairwise_distances
  5 | from sklearn.neighbors import NearestNeighbors
  6 | from scipy.optimize import linear_sum_assignment
  7 | 
  8 | from matplotlib import pyplot as plt
  9 | 
 10 | import tqdm
 11 | 
 12 | def find_matching(gt_indices,pred_indices,max_distance):
 13 |     if len(gt_indices)==0 or len(pred_indices)==0:
 14 |         dists = np.ones((len(gt_indices),len(pred_indices)),dtype='float32')*np.inf
 15 |     else:
 16 |         # calculate pairwise distances
 17 |         dists = pairwise_distances(gt_indices,pred_indices)
 18 |     
 19 |         # associate each gt tree with all pred trees within radius
 20 |         dists[dists>max_distance] = np.inf
 21 | 
 22 |     # find optimal assignment
 23 |     maxval = 1e9
 24 |     cost_matrix = np.copy(dists)
 25 |     cost_matrix[np.isinf(cost_matrix)] = maxval
 26 |     row_ind, col_ind = linear_sum_assignment(cost_matrix)
 27 |     dists[:] = np.inf
 28 |     dists[row_ind,col_ind] = cost_matrix[row_ind,col_ind]
 29 |     dists[dists>=maxval] = np.inf
 30 |     
 31 |     # associated pred trees = true positives
 32 |     #assert(np.max(np.sum(~np.isinf(dists),axis=0))<=1)
 33 |     #assert(np.max(np.sum(~np.isinf(dists),axis=1))<=1)
 34 |     assoc = np.where(~np.isinf(dists))
 35 |     tp_gt_inds = assoc[0]
 36 |     tp_inds = assoc[1]
 37 |     tp = len(tp_inds)
 38 | 
 39 |     # un-associated pred trees = false positives
 40 |     fp_inds = np.where(np.all(np.isinf(dists),axis=0))[0]
 41 |     fp = len(fp_inds)
 42 | 
 43 |     # un-associated gt trees = false negatives
 44 |     fn_inds = np.where(np.all(np.isinf(dists),axis=1))[0]
 45 |     fn = len(fn_inds)
 46 |     
 47 |     if dists[:,tp_inds].size>0:
 48 |         tp_dists = np.min(dists[:,tp_inds],axis=0)
 49 |     else:
 50 |         tp_dists = []
 51 |     
 52 |     return tp, fp, fn, tp_dists
 53 | 
 54 | def test_all_thresholds(gts, preds, max_distance):
 55 |     all_gt_indices = []
 56 |     all_pred_indices = []
 57 |     all_pred_abs = []
 58 |     all_pred_rel = []
 59 | 
 60 |     for i in range(len(preds)):
 61 |         gt = gts[i]
 62 |         pred = preds[i]
 63 |         
 64 |         gt_rows, gt_cols = np.where(gt>0)
 65 |         gt_indices = np.stack([gt_rows,gt_cols],axis=-1)
 66 |         pred_indices = peak_local_max(pred,min_distance=1,threshold_abs=0,threshold_rel=None)
 67 |         pred_abs = pred[pred_indices[:,0],pred_indices[:,1]]
 68 |         pred_rel = pred_abs/pred.max()
 69 |         
 70 |         all_gt_indices.append(gt_indices)
 71 |         all_pred_indices.append(pred_indices)
 72 |         all_pred_abs.append(pred_abs)
 73 |         all_pred_rel.append(pred_rel)
 74 |     
 75 |     pred_abs_sorted = sorted(np.concatenate(all_pred_abs,axis=0).flatten(),reverse=True)
 76 |     pred_rel_sorted = sorted(np.concatenate(all_pred_rel,axis=0).flatten(),reverse=True)
 77 |     
 78 |     thresholds = []
 79 |     precisions = []
 80 |     recalls = []
 81 |     
 82 |     pbar = tqdm.tqdm(total=len(pred_abs_sorted))
 83 |     for i,thresh in enumerate(pred_abs_sorted):
 84 |         my_pred_indices = [pred_indices[pred_abs>=thresh] for pred_indices,pred_abs in zip(all_pred_indices,all_pred_abs)]
 85 |     #pbar = tqdm.tqdm(total=len(pred_rel_sorted))
 86 |     #for i,thresh in enumerate(pred_rel_sorted):
 87 |         #my_pred_indices = [pred_indices[pred_rel>=thresh] for pred_indices,pred_rel in zip(all_pred_indices,all_pred_rel)]
 88 | 
 89 |         all_tp = 0
 90 |         all_fp = 0
 91 |         all_fn = 0
 92 |         
 93 |         for gt_indices, pred_indices in zip(all_gt_indices,my_pred_indices):
 94 |             tp, fp, fn, _ = find_matching(gt_indices,pred_indices,max_distance)
 95 |             all_tp += tp
 96 |             all_fp += fp
 97 |             all_fn += fn
 98 |         
 99 |         precision = all_tp/(all_tp+all_fp) if all_tp+all_fp>0 else 0
100 |         recall = all_tp/(all_tp+all_fn) if all_tp+all_fn>0 else 0
101 |         
102 |         thresholds.append(thresh)
103 |         precisions.append(precision)
104 |         recalls.append(recall)
105 |         
106 |         pbar.update(1)
107 | 
108 |     thresholds = np.array(thresholds)
109 |     precisions = np.array(precisions)
110 |     recalls = np.array(recalls)
111 | 
112 |     return thresholds, precisions, recalls
113 | 
114 | def calculate_ap(precisions,recalls):
115 |     return np.sum((recalls[1:]-recalls[:-1])*precisions[1:])
116 | 
117 | def evaluate(gts, preds, min_distance, threshold_rel, threshold_abs, max_distance, return_locs=False):
118 |     """ Evaluate precision/recall metrics on prediction.
119 |         Arguments:
120 |             gts: ground truth annotation (0 = non-tree, 1 = tree) [N,H,W] 
121 |             preds: predicted confidence maps [N,H,W]
122 |             min_distance: minimum distance between detections
123 |             threshold_rel: relative threshold for local peak finding (None to disable)
124 |             threshold_abs: absolute threshold for local peak finding (None to disable)
125 |             max_distance: maximum distance from detection to gt point 
126 |             return_locs: whether to return the locations of true positives, false positives, etc.
127 |         Returns:
128 |             Result dictionary containing precision, recall, F-score, and RMSE metrics.
129 |             If return_locs = True, the following extra information will be included in the dictionary:
130 |                 tp_locs: x,y locations of true positives
131 |                 tp_gt_locs: x,y locations of ground truth points associated with true positives
132 |                 fp_locs: x,y locations of false positives
133 |                 fn_locs: x,y locations of false negatives
134 |                 gt_locs: x,y locations of ground truth points
135 |     """
136 |     all_tp = 0
137 |     all_fp = 0
138 |     all_fn = 0
139 |     all_tp_dists = []
140 |     
141 |     if return_locs:
142 |         all_tp_locs = []
143 |         all_tp_gt_locs = []
144 |         all_fp_locs = []
145 |         all_fn_locs = []
146 |         all_gt_locs = []
147 | 
148 |     for gt,pred in zip(gts,preds):
149 |         gt_rows, gt_cols = np.where(gt>0)
150 |         gt_indices = np.stack([gt_rows,gt_cols],axis=-1)
151 |         pred_indices = peak_local_max(pred,min_distance=min_distance,threshold_abs=threshold_abs,threshold_rel=threshold_rel)
152 | 
153 |         if len(gt_indices)==0 or len(pred_indices)==0:
154 |             dists = np.ones((len(gt_indices),len(pred_indices)),dtype='float32')*np.inf
155 |         else:
156 |             # calculate pairwise distances
157 |             dists = pairwise_distances(gt_indices,pred_indices)
158 |         
159 |             # associate each gt tree with all pred trees within radius
160 |             dists[dists>max_distance] = np.inf
161 | 
162 |         # find optimal assignment
163 |         maxval = 1e9
164 |         cost_matrix = np.copy(dists)
165 |         cost_matrix[np.isinf(cost_matrix)] = maxval
166 |         row_ind, col_ind = linear_sum_assignment(cost_matrix)
167 |         dists[:] = np.inf
168 |         dists[row_ind,col_ind] = cost_matrix[row_ind,col_ind]
169 |         dists[dists>=maxval] = np.inf
170 |         
171 |         # associated pred trees = true positives
172 |         assoc = np.where(~np.isinf(dists))
173 |         tp_gt_inds = assoc[0]
174 |         tp_inds = assoc[1]
175 |         tp = len(tp_inds)
176 | 
177 |         # un-associated pred trees = false positives
178 |         fp_inds = np.where(np.all(np.isinf(dists),axis=0))[0]
179 |         fp = len(fp_inds)
180 | 
181 |         # un-associated gt trees = false negatives
182 |         fn_inds = np.where(np.all(np.isinf(dists),axis=1))[0]
183 |         fn = len(fn_inds)
184 |         
185 |         if dists[:,tp_inds].size>0:
186 |             tp_dists = np.min(dists[:,tp_inds],axis=0)
187 |         else:
188 |             tp_dists = []
189 |         
190 |         all_tp += tp
191 |         all_fp += fp
192 |         all_fn += fn
193 |         all_tp_dists.append(tp_dists)
194 |     
195 |         if return_locs:
196 |             tp_locs = []
197 |             tp_gt_locs = []
198 |             fp_locs = []
199 |             fn_locs = []
200 |             gt_locs = []
201 | 
202 |             for y,x in gt_indices:
203 |                 gt_locs.append([x,y])
204 |             for y,x in gt_indices[fn_inds]:
205 |                 fn_locs.append([x,y])
206 |             for (y,x),(gty,gtx) in zip(pred_indices[tp_inds],
207 |                                        gt_indices[tp_gt_inds]):
208 |                 tp_locs.append([x,y])
209 |                 tp_gt_locs.append([gtx,gty])
210 |             for y,x in pred_indices[fp_inds]:
211 |                 fp_locs.append([x,y])
212 | 
213 |             tp_locs = np.array(tp_locs)
214 |             tp_gt_locs = np.array(tp_gt_locs)
215 |             fp_locs = np.array(fp_locs)
216 |             fn_locs = np.array(fn_locs)
217 |             gt_locs = np.array(gt_locs)
218 | 
219 |             all_tp_locs.append(tp_locs)
220 |             all_tp_gt_locs.append(tp_gt_locs)
221 |             all_fp_locs.append(fp_locs)
222 |             all_fn_locs.append(fn_locs)
223 |             all_gt_locs.append(gt_locs)
224 |     
225 |     all_tp_dists = np.concatenate(all_tp_dists)
226 | 
227 |     precision = all_tp/(all_tp+all_fp) if all_tp+all_fp>0 else 0
228 |     recall = all_tp/(all_tp+all_fn) if all_tp+all_fn>0 else 0
229 |     fscore = 2*(precision*recall)/(precision+recall) if precision+recall>0 else 0
230 |     rmse = np.sqrt(np.mean(all_tp_dists**2)) if len(all_tp_dists)>0 else np.inf 
231 |     
232 |     results = {
233 |         'precision':precision,
234 |         'recall':recall,
235 |         'fscore':fscore,
236 |         'rmse':rmse,
237 |     }
238 |     if return_locs:
239 |         results.update({
240 |             'tp_locs':all_tp_locs,
241 |             'tp_gt_locs':all_tp_gt_locs,
242 |             'fp_locs':all_fp_locs,
243 |             'fn_locs':all_fn_locs,
244 |             'gt_locs':all_gt_locs,
245 |         })
246 |     return results
247 | 
248 | def make_figure(images,results,num_cols=5):
249 |     num_rows = len(images)//num_cols+1
250 |     fig,ax = plt.subplots(num_rows,num_cols,figsize=(8.5,11),tight_layout=True)
251 |     for a in ax.flatten(): a.axis('off')
252 |     tp_locs = results['tp_locs']
253 |     tp_gt_locs = results['tp_gt_locs']
254 |     fp_locs = results['fp_locs']
255 |     fn_locs = results['fn_locs']
256 |     gt_locs = results['gt_locs']
257 |     for a,im,tp,tp_gt,fp,fn,gt in zip(ax.flatten(),images,
258 |                         tp_locs,tp_gt_locs,fp_locs,fn_locs,gt_locs):
259 |         a.imshow(im)
260 | 
261 |         if len(gt)>0:
262 |             if len(gt.shape)==1: gt = gt[None,:]
263 |             a.plot(gt[:,0],gt[:,1],'m.')
264 |         
265 |         if len(tp)>0:
266 |             if len(tp.shape)==1: tp = tp[None,:]
267 |             a.plot(tp[:,0],tp[:,1],'g+')
268 | 
269 |         if len(fp)>0:
270 |             if len(fp.shape)==1: fp = fp[None,:]
271 |             a.plot(fp[:,0],fp[:,1],'y^')
272 | 
273 |         if len(fn)>0:
274 |             if len(fn.shape)==1: fn = fn[None,:]
275 |             a.plot(fn[:,0],fn[:,1],'m.',markeredgecolor='k',markeredgewidth=1)
276 |         
277 |         if len(tp_gt)>0:
278 |             if len(tp_gt.shape)==1: tp_gt= tp_gt[None,:]
279 |             for t,g in zip(tp,tp_gt):
280 |                 a.plot((t[0],g[0]),(t[1],g[1]),'y-')
281 |         
282 |     return fig
283 | 
284 | 


--------------------------------------------------------------------------------