├── README.md
├── functions_for_pascal_to_spacenet.py
├── mnc_config.py
└── pascal_to_spacenet.py


/README.md:
--------------------------------------------------------------------------------
 1 | This is a GitHub repository containing python scripts to convert Pascal VOC SBD labels to geoJson Labels for the SpaceNet competition. Note that this code works for the first SpaceNet competition.
 2 | 
 3 | Run the command:
 4 | 
 5 | python pascal_to_spacenet.py jpg_dir tif_dir geojson_dir updated_geojson_dir
 6 | 
 7 | in a directory containing a subdirectory of raster tif files, a subdirectory of raster jpg files, an empty subdirectory for geojson file outputs, and another empty subdirectory for further processed geojson file outputs
 8 | 
 9 | The resulting geojson output labels will be in updated_geojson_dir.
10 | 


--------------------------------------------------------------------------------
/functions_for_pascal_to_spacenet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import json
  4 | import argparse
  5 | import time
  6 | import cv2
  7 | from osgeo import gdal,gdalnumeric
  8 | import ogr
  9 | import numpy as np
 10 | # User-defined module
 11 | import _init_paths
 12 | import caffe
 13 | from mnc_config import cfg
 14 | from transform.bbox_transform import clip_boxes
 15 | from utils.blob import prep_im_for_blob, im_list_to_blob
 16 | from transform.mask_transform import gpu_mask_voting
 17 | import matplotlib.pyplot as plt
 18 | from utils.vis_seg import _convert_pred_to_image, _get_voc_color_map
 19 | from PIL import Image
 20 | from area import area
 21 | 
 22 | def prepare_mnc_args(im, net):
 23 |     # Prepare image data blob
 24 |     blobs = {'data': None}
 25 |     processed_ims = []
 26 |     im, im_scale_factors = prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.TEST.SCALES[0], cfg.TRAIN.MAX_SIZE)
 27 |     processed_ims.append(im)
 28 |     blobs['data'] = im_list_to_blob(processed_ims)
 29 |     # Prepare image info blob
 30 |     im_scales = [np.array(im_scale_factors)]
 31 |     assert len(im_scales) == 1, 'Only single-image batch implemented'
 32 |     im_blob = blobs['data']
 33 |     blobs['im_info'] = np.array(
 34 |         [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
 35 |         dtype=np.float32)
 36 |     # Reshape network inputs and do forward
 37 |     net.blobs['data'].reshape(*blobs['data'].shape)
 38 |     net.blobs['im_info'].reshape(*blobs['im_info'].shape)
 39 |     forward_kwargs = {
 40 |         'data': blobs['data'].astype(np.float32, copy=False),
 41 |         'im_info': blobs['im_info'].astype(np.float32, copy=False)
 42 |     }
 43 |     return forward_kwargs, im_scales
 44 | 
 45 | 
 46 | def im_detect(im, net):
 47 |     forward_kwargs, im_scales = prepare_mnc_args(im, net)
 48 |     blobs_out = net.forward(**forward_kwargs)
 49 |     # output we need to collect:
 50 |     # 1. output from phase1'
 51 |     rois_phase1 = net.blobs['rois'].data.copy()
 52 |     masks_phase1 = net.blobs['mask_proposal'].data[...]
 53 |     scores_phase1 = net.blobs['seg_cls_prob'].data[...]
 54 |     # 2. output from phase2
 55 |     rois_phase2 = net.blobs['rois_ext'].data[...]
 56 |     masks_phase2 = net.blobs['mask_proposal_ext'].data[...]
 57 |     scores_phase2 = net.blobs['seg_cls_prob_ext'].data[...]
 58 |     # Boxes are in resized space, we un-scale them back
 59 |     rois_phase1 = rois_phase1[:, 1:5] / im_scales[0]
 60 |     rois_phase2 = rois_phase2[:, 1:5] / im_scales[0]
 61 |     rois_phase1, _ = clip_boxes(rois_phase1, im.shape)
 62 |     rois_phase2, _ = clip_boxes(rois_phase2, im.shape)
 63 |     # concatenate two stages to get final network output
 64 |     masks = np.concatenate((masks_phase1, masks_phase2), axis=0)
 65 |     boxes = np.concatenate((rois_phase1, rois_phase2), axis=0)
 66 |     scores = np.concatenate((scores_phase1, scores_phase2), axis=0)
 67 |     return boxes, masks, scores
 68 | 
 69 | 
 70 | def get_vis_dict(result_box, result_mask, img_name, cls_names, vis_thresh):
 71 |     box_for_img = []
 72 |     mask_for_img = []
 73 |     cls_for_img = []
 74 |     for cls_ind, cls_name in enumerate(cls_names):
 75 |         det_for_img = result_box[cls_ind]
 76 |         seg_for_img = result_mask[cls_ind]
 77 |         keep_inds = np.where(det_for_img[:, -1] >= vis_thresh)[0]
 78 |         for keep in keep_inds:
 79 |             box_for_img.append(det_for_img[keep])
 80 |             mask_for_img.append(seg_for_img[keep][0])
 81 |             cls_for_img.append(cls_ind + 1)
 82 |     res_dict = {'image_name': img_name,
 83 |                 'cls_name': cls_for_img,
 84 |                 'boxes': box_for_img,
 85 |                 'masks': mask_for_img}
 86 |     return res_dict
 87 | 
 88 | 
 89 | def process_jpg(CLASSES, net, im_name, jpg_dir, tif_name, tif_dir, geojson_name, geojson_dir, updated_geojson_name, updated_geojson_dir, vis_threshold):
 90 |     print "\n"
 91 |     print "Processing {}".format(jpg_dir+'/'+im_name)
 92 | 	#Create initia geojson (has extra DN:0 polygons)
 93 |      
 94 |     gt_image = os.path.join(jpg_dir,im_name)
 95 |     im = cv2.imread(gt_image)
 96 |     
 97 |     img_width = im.shape[1]
 98 |     img_height = im.shape[0]
 99 |     
100 |     boxes, masks, seg_scores = im_detect(im, net)
101 | 
102 |     result_mask, result_box = gpu_mask_voting(masks, boxes, seg_scores, len(CLASSES) + 1,300, im.shape[1], im.shape[0])
103 |     pred_dict = get_vis_dict(result_box, result_mask, jpg_dir + im_name, CLASSES, vis_threshold)
104 |     
105 | 
106 |     num_inst = len(pred_dict['boxes'])
107 |     
108 |     #Get image number
109 |     image_number_search = re.search('(?<=img)\w+', im_name)
110 |     image_number = image_number_search.group(0)
111 |     
112 |     #Open tif file
113 |     print "Opening {}".format(tif_dir+'/'+tif_name)
114 |     srcRas_ds = gdal.Open(tif_dir+'/'+tif_name)
115 |     
116 |     #Stuff to create before entering instance loop
117 |     geom = srcRas_ds.GetGeoTransform()
118 |     proj = srcRas_ds.GetProjection()
119 |     memdrv = gdal.GetDriverByName('MEM')
120 |     inst_img = np.zeros((img_height, img_width))
121 |     src_ds = memdrv.Create('',inst_img.shape[1],inst_img.shape[0],num_inst)
122 |     src_ds.SetGeoTransform(geom)
123 |     src_ds.SetProjection(proj)
124 |     
125 |     #Create geojson data source
126 |     drv = ogr.GetDriverByName("geojson")
127 |     dst_ds = drv.CreateDataSource(geojson_dir+'/'+geojson_name)
128 | 
129 |     #Create layer
130 |     dst_layername = "building_layer_name"
131 |     dst_layer = dst_ds.CreateLayer(dst_layername, srs=None )
132 |     dst_layer.CreateField(ogr.FieldDefn("DN",ogr.OFTInteger))
133 |     
134 |     for inst_index in xrange(num_inst):
135 |         box = np.round(pred_dict['boxes'][inst_index]).astype(int)
136 |         mask = pred_dict['masks'][inst_index]
137 |         cls_num = pred_dict['cls_name'][inst_index]
138 | 
139 |         box[0] = min(max(box[0], 0), img_width - 1)
140 |         box[1] = min(max(box[1], 0), img_height - 1)
141 |         box[2] = min(max(box[2], 0), img_width - 1)
142 |         box[3] = min(max(box[3], 0), img_height - 1)
143 | 
144 |         mask = cv2.resize(mask.astype(np.float32), (box[2]-box[0]+1, box[3]-box[1]+1))
145 |         mask = mask >= cfg.BINARIZE_THRESH
146 |         mask = mask.astype(int)
147 | 
148 |         part1 = (1) * mask.astype(np.float32)
149 |         part2 = np.multiply(np.logical_not(mask), inst_img[box[1]:box[3]+1, box[0]:box[2]+1])
150 | 
151 |         #Reset instance image to 0's
152 |         inst_img = np.zeros((img_height, img_width))
153 |         inst_img[box[1]:box[3]+1, box[0]:box[2]+1] = part1 + part2
154 |         inst_img = inst_img.astype(int)
155 | 
156 |         band = src_ds.GetRasterBand(1)
157 |         band.WriteArray(inst_img)
158 | 
159 |         gdal.Polygonize(band, None, dst_layer, 0, [], callback=None) 
160 |     dst_ds=None
161 |     
162 |     #Now reformat the geojson we just created
163 |     print 'Reformat geojson {}'.format(geojson_name)
164 |     
165 |     #set index to 0
166 |     i = 0
167 |     geojson_full_name = geojson_dir+'/'+geojson_name
168 |     with open(geojson_full_name, 'r') as f:
169 |         data = json.load(f)
170 |         while (i < len(data['features'])):
171 |             if(data['features'][i]['properties']['DN'] == 0):
172 |                 data['features'].pop(i)
173 |             elif(area(data['features'][i]['geometry']) <= 50):
174 |                 data['features'].pop(i)
175 |             else:
176 |                 i = i+1
177 | 
178 |     #Write geojson
179 |     updated_geojson_full_name = updated_geojson_dir + '/' + updated_geojson_name
180 |     with open(updated_geojson_full_name, 'w') as outfile:
181 |         json.dump(data, outfile, sort_keys=True)
182 |         print "Geojson reformatted!"
183 |     
184 | 
185 | 
186 | 


--------------------------------------------------------------------------------
/mnc_config.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """MNC config system
  3 | """
  4 | import numpy as np
  5 | import os.path
  6 | from easydict import EasyDict as edict
  7 | 
  8 | __C = edict()
  9 | cfg = __C
 10 | 
 11 | # MNC/CFM mode
 12 | __C.MNC_MODE = True
 13 | __C.CFM_MODE = False
 14 | 
 15 | __C.EXP_DIR = 'default'
 16 | __C.USE_GPU_NMS = True
 17 | __C.GPU_ID = 0
 18 | __C.RNG_SEED = 3
 19 | __C.EPS = 1e-14
 20 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
 21 | # Root directory of project
 22 | __C.ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 23 | # Data directory
 24 | __C.DATA_DIR = os.path.abspath(os.path.join(__C.ROOT_DIR, 'data'))
 25 | # Related to mask resizing and binarize predicted masks
 26 | __C.BINARIZE_THRESH = 0.4
 27 | # Mask estimation (if any) size (may be different from CFM input size)
 28 | __C.MASK_SIZE = 21
 29 | 
 30 | # Training options
 31 | __C.TRAIN = edict()
 32 | 
 33 | # ------- General setting ----
 34 | __C.TRAIN.IMS_PER_BATCH = 1
 35 | # Batch size for training Region CNN (not RPN)
 36 | __C.TRAIN.BATCH_SIZE = 16
 37 | # Make minibatches from images that have similar aspect ratios (i.e. both
 38 | # tall and thin or both short and wide) in order to avoid wasting computation
 39 | # on zero-padding.
 40 | __C.TRAIN.ASPECT_GROUPING = True
 41 | # Use flipped image for augmentation
 42 | __C.TRAIN.USE_FLIPPED = False
 43 | # Resize shortest side to 600
 44 | __C.TRAIN.SCALES = (600,)
 45 | __C.TRAIN.MAX_SIZE = 1000
 46 | __C.TRAIN.SNAPSHOT_ITERS = 5000
 47 | __C.TRAIN.SNAPSHOT_INFIX = ''
 48 | # Sample FG
 49 | __C.TRAIN.FG_FRACTION = [0.3]
 50 | __C.TRAIN.FG_THRESH_HI = [1.0]
 51 | __C.TRAIN.FG_THRESH_LO = [0.5]
 52 | # Sample BF according to remaining samples
 53 | __C.TRAIN.BG_FRACTION = [0.85, 0.15]
 54 | __C.TRAIN.BG_THRESH_HI = [0.5, 0.1]
 55 | __C.TRAIN.BG_THRESH_LO = [0.1, 0.0]
 56 | 
 57 | # ------- Proposal -------
 58 | __C.TRAIN.PROPOSAL_METHOD = 'gt'
 59 | 
 60 | # ------- BBOX Regression ---------
 61 | # Train bounding-box regressors
 62 | __C.TRAIN.BBOX_REG = True
 63 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True
 64 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = False
 65 | __C.TRAIN.BBOX_THRESH = 0.5
 66 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
 67 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
 68 | # weight of smooth L1 loss
 69 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
 70 | 
 71 | # -------- RPN ----------
 72 | # Use RPN to detect objects
 73 | __C.TRAIN.HAS_RPN = True
 74 | # IOU >= thresh: positive example
 75 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
 76 | # IO < thresh: negative example
 77 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
 78 | # If an anchor satisfied by positive and negative conditions set to negative
 79 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False
 80 | # Max number of foreground examples
 81 | # Note this is class-agnostic anchors' FG_FRACTION
 82 | __C.TRAIN.RPN_FG_FRACTION = 0.5
 83 | # Total number of examples
 84 | __C.TRAIN.RPN_BATCHSIZE = 256
 85 | # NMS threshold used on RPN proposals
 86 | __C.TRAIN.RPN_NMS_THRESH = 0.7
 87 | # Number of top scoring boxes to keep before apply NMS to RPN proposals
 88 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
 89 | # Number of top scoring boxes to keep after applying NMS to RPN proposals
 90 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000
 91 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
 92 | __C.TRAIN.RPN_MIN_SIZE = 16
 93 | # Deprecated (outside weights)
 94 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
 95 | # Give the positive RPN examples weight of p * 1 / {num positives}
 96 | # and give negatives a weight of (1 - p)
 97 | # Set to -1.0 to use uniform example weighting
 98 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
 99 | # Mix anchors used for RPN and later layer
100 | __C.TRAIN.MIX_INDEX = True
101 | 
102 | # -------- CFM ----------
103 | __C.TRAIN.CFM_INPUT_MASK_SIZE = 14
104 | __C.TRAIN.FG_DET_THRESH = 0.5
105 | __C.TRAIN.FG_SEG_THRESH = 0.5
106 | __C.TRAIN.FRACTION_SAMPLE = [0.3, 0.5, 0.2]
107 | __C.TRAIN.THRESH_LO_SAMPLE = [0.5, 0.1, 0.0]
108 | __C.TRAIN.THRESH_HI_SAMPLE = [1.0, 0.5, 0.1]
109 | 
110 | # Test option
111 | 
112 | __C.TEST = edict()
113 | # Scales to use during testing (can list multiple scales)
114 | # Each scale is the pixel size of an image's shortest side
115 | __C.TEST.SCALES = (600,)
116 | 
117 | # Max pixel size of the longest side of a scaled input image
118 | __C.TEST.MAX_SIZE = 1000
119 | 
120 | # Overlap threshold used for non-maximum suppression (suppress boxes with
121 | # IoU >= this threshold)
122 | __C.TEST.NMS = 0.3
123 | # Set this true in the yml file to specify proposed RPN
124 | __C.TEST.HAS_RPN = True
125 | # NMS threshold used on RPN proposals
126 | __C.TEST.RPN_NMS_THRESH = 0.7
127 | # Number of top scoring boxes to keep before apply NMS to RPN proposals
128 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000
129 | # Number of top scoring boxes to keep after applying NMS to RPN proposals
130 | __C.TEST.RPN_POST_NMS_TOP_N = 300
131 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
132 | __C.TEST.RPN_MIN_SIZE = 16
133 | __C.TEST.BBOX_REG = True
134 | 
135 | # Aggregate nearby masks inside box, the box_IOU threshold
136 | __C.TEST.MASK_MERGE_IOU_THRESH = 0.5
137 | __C.TEST.MASK_MERGE_NMS_THRESH = 0.2
138 | __C.TEST.CFM_INPUT_MASK_SIZE = 14
139 | 
140 | # Used for multi-scale testing, since naive implementation
141 | # will waste a lot of on zero-padding, so we group each
142 | # $GROUP_SCALE scales to feed in gpu. And max rois for
143 | # each group is specified in MAX_ROIS_GPU
144 | __C.TEST.MAX_ROIS_GPU = [2000]
145 | __C.TEST.GROUP_SCALE = 1
146 | 
147 | # 0 means use all the MCG proposals
148 | __C.TEST.USE_TOP_K_MCG = 0
149 | 
150 | # threshold for binarize a mask
151 | __C.TEST.USE_MASK_MERGE = True
152 | __C.TEST.USE_GPU_MASK_MERGE = True
153 | 
154 | 
155 | def get_output_dir(imdb, net):
156 |     """ Return the directory where experimental artifacts are placed.
157 |         A canonical path is built using the name from an imdb and a network
158 |         (if not None).
159 |     """
160 |     path = os.path.abspath(os.path.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name))
161 |     if net is None:
162 |         return path
163 |     else:
164 |         return os.path.join(path, net.name)
165 | 
166 | 
167 | def _merge_two_config(user_cfg, default_cfg):
168 |     """ Merge user's config into default config dictionary, clobbering the
169 |         options in b whenever they are also specified in a.
170 |         Need to ensure the type of two val under same key are the same
171 |         Do recursive merge when encounter hierarchical dictionary
172 |     """
173 |     if type(user_cfg) is not edict:
174 |         return
175 |     for key, val in user_cfg.iteritems():
176 |         # Since user_cfg is a sub-file of default_cfg
177 |         if not default_cfg.has_key(key):
178 |             raise KeyError('{} is not a valid config key'.format(key))
179 | 
180 |         if type(default_cfg[key]) is not type(val):
181 |             if isinstance(default_cfg[key], np.ndarray):
182 |                 val = np.array(val, dtype=default_cfg[key].dtype)
183 |             else:
184 |                 raise ValueError(
185 |                      'Type mismatch ({} vs. {}) '
186 |                      'for config key: {}'.format(type(default_cfg[key]),
187 |                                                  type(val), key))
188 |         # Recursive merge config
189 |         if type(val) is edict:
190 |             try:
191 |                 _merge_two_config(user_cfg[key], default_cfg[key])
192 |             except:
193 |                 print 'Error under config key: {}'.format(key)
194 |                 raise
195 |         else:
196 |             default_cfg[key] = val
197 | 
198 | 
199 | def cfg_from_file(file_name):
200 |     """ Load a config file and merge it into the default options.
201 |     """
202 |     import yaml
203 |     with open(file_name, 'r') as f:
204 |         yaml_cfg = edict(yaml.load(f))
205 | 
206 |     _merge_two_config(yaml_cfg, __C)
207 | 


--------------------------------------------------------------------------------
/pascal_to_spacenet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import json
 4 | from os import listdir
 5 | import argparse
 6 | import time
 7 | import cv2
 8 | from osgeo import gdal,gdalnumeric
 9 | import ogr
10 | import numpy as np
11 | 
12 | # User-defined module
13 | import _init_paths
14 | import caffe
15 | from mnc_config import cfg
16 | from transform.bbox_transform import clip_boxes
17 | from utils.blob import prep_im_for_blob, im_list_to_blob
18 | from transform.mask_transform import gpu_mask_voting
19 | import matplotlib.pyplot as plt
20 | from utils.vis_seg import _convert_pred_to_image, _get_voc_color_map
21 | from PIL import Image
22 | 
23 | from functions_for_pascal_to_spacenet import prepare_mnc_args, im_detect, get_vis_dict, process_jpg
24 | 
25 | parser = argparse.ArgumentParser()
26 | parser.add_argument("jpg_dir", help="directory for raster jpegs")
27 | parser.add_argument("tif_dir", help="directory for raster tifs")
28 | parser.add_argument("geojson_dir", help="directory for geojson outputs")
29 | parser.add_argument("updated_geojson_dir", help="directory for updated geojson outputs")
30 | 
31 | args = parser.parse_args()
32 | my_jpg_dir = args.jpg_dir
33 | my_tif_dir = args.tif_dir
34 | my_geojson_dir = args.geojson_dir
35 | my_updated_geojson_dir = args.updated_geojson_dir
36 | 
37 | 
38 | #Setting some parameters
39 | CLASSES = ['building']
40 | test_prototxt = 'MNC/models/VGG16/mnc_5stage/test.prototxt'
41 | test_model = 'MNC/output/mnc_5stage/voc_2012_train/vgg16_mnc_5stage_iter_25000.caffemodel.h5'
42 | caffe.set_mode_gpu()
43 | caffe.set_device(0)
44 | cfg.GPU_ID = 0
45 | net = caffe.Net(test_prototxt, test_model, caffe.TEST)
46 | 
47 | 
48 | 
49 | # Warm up for the first two images
50 | im = 128 * np.ones((300, 500, 3), dtype=np.float32)
51 | for i in xrange(2):
52 |     _, _, _ = im_detect(im, net)
53 | 
54 | 
55 | #Loop through jpg directory
56 | for im_name in listdir(my_jpg_dir):
57 | 
58 | 	#Get image number
59 | 	image_number_search = re.search('(?<=img)\w+', im_name)
60 | 	image_number = image_number_search.group(0)
61 | 
62 | 	
63 | 	#Fill in ... with the specifics of your filenames
64 | 	my_tif_name = 'RGB-PanSharpen_..._img'+str(image_number)+'.tif'
65 | 	my_geojson_name= 'First_AOI_..._img'+str(image_number)+'.geojson'
66 | 	my_updated_geojson_name= 'Updated_AOI_..._img'+str(image_number)+'.geojson'
67 | 
68 | 	process_jpg(CLASSES, net, im_name, my_jpg_dir, my_tif_name, my_tif_dir, my_geojson_name, my_geojson_dir, my_updated_geojson_name, my_updated_geojson_dir, 0.3)
69 | 
70 | print "Done processing!"
71 | 
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------