├── README.md ├── functions_for_pascal_to_spacenet.py ├── mnc_config.py └── pascal_to_spacenet.py /README.md: -------------------------------------------------------------------------------- 1 | This is a GitHub repository containing python scripts to convert Pascal VOC SBD labels to geoJson Labels for the SpaceNet competition. Note that this code works for the first SpaceNet competition. 2 | 3 | Run the command: 4 | 5 | python pascal_to_spacenet.py jpg_dir tif_dir geojson_dir updated_geojson_dir 6 | 7 | in a directory containing a subdirectory of raster tif files, a subdirectory of raster jpg files, an empty subdirectory for geojson file outputs, and another empty subdirectory for further processed geojson file outputs 8 | 9 | The resulting geojson output labels will be in updated_geojson_dir. 10 | -------------------------------------------------------------------------------- /functions_for_pascal_to_spacenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import json 4 | import argparse 5 | import time 6 | import cv2 7 | from osgeo import gdal,gdalnumeric 8 | import ogr 9 | import numpy as np 10 | # User-defined module 11 | import _init_paths 12 | import caffe 13 | from mnc_config import cfg 14 | from transform.bbox_transform import clip_boxes 15 | from utils.blob import prep_im_for_blob, im_list_to_blob 16 | from transform.mask_transform import gpu_mask_voting 17 | import matplotlib.pyplot as plt 18 | from utils.vis_seg import _convert_pred_to_image, _get_voc_color_map 19 | from PIL import Image 20 | from area import area 21 | 22 | def prepare_mnc_args(im, net): 23 | # Prepare image data blob 24 | blobs = {'data': None} 25 | processed_ims = [] 26 | im, im_scale_factors = prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.TEST.SCALES[0], cfg.TRAIN.MAX_SIZE) 27 | processed_ims.append(im) 28 | blobs['data'] = im_list_to_blob(processed_ims) 29 | # Prepare image info blob 30 | im_scales = [np.array(im_scale_factors)] 31 | assert len(im_scales) == 1, 'Only single-image batch implemented' 32 | im_blob = blobs['data'] 33 | blobs['im_info'] = np.array( 34 | [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], 35 | dtype=np.float32) 36 | # Reshape network inputs and do forward 37 | net.blobs['data'].reshape(*blobs['data'].shape) 38 | net.blobs['im_info'].reshape(*blobs['im_info'].shape) 39 | forward_kwargs = { 40 | 'data': blobs['data'].astype(np.float32, copy=False), 41 | 'im_info': blobs['im_info'].astype(np.float32, copy=False) 42 | } 43 | return forward_kwargs, im_scales 44 | 45 | 46 | def im_detect(im, net): 47 | forward_kwargs, im_scales = prepare_mnc_args(im, net) 48 | blobs_out = net.forward(**forward_kwargs) 49 | # output we need to collect: 50 | # 1. output from phase1' 51 | rois_phase1 = net.blobs['rois'].data.copy() 52 | masks_phase1 = net.blobs['mask_proposal'].data[...] 53 | scores_phase1 = net.blobs['seg_cls_prob'].data[...] 54 | # 2. output from phase2 55 | rois_phase2 = net.blobs['rois_ext'].data[...] 56 | masks_phase2 = net.blobs['mask_proposal_ext'].data[...] 57 | scores_phase2 = net.blobs['seg_cls_prob_ext'].data[...] 58 | # Boxes are in resized space, we un-scale them back 59 | rois_phase1 = rois_phase1[:, 1:5] / im_scales[0] 60 | rois_phase2 = rois_phase2[:, 1:5] / im_scales[0] 61 | rois_phase1, _ = clip_boxes(rois_phase1, im.shape) 62 | rois_phase2, _ = clip_boxes(rois_phase2, im.shape) 63 | # concatenate two stages to get final network output 64 | masks = np.concatenate((masks_phase1, masks_phase2), axis=0) 65 | boxes = np.concatenate((rois_phase1, rois_phase2), axis=0) 66 | scores = np.concatenate((scores_phase1, scores_phase2), axis=0) 67 | return boxes, masks, scores 68 | 69 | 70 | def get_vis_dict(result_box, result_mask, img_name, cls_names, vis_thresh): 71 | box_for_img = [] 72 | mask_for_img = [] 73 | cls_for_img = [] 74 | for cls_ind, cls_name in enumerate(cls_names): 75 | det_for_img = result_box[cls_ind] 76 | seg_for_img = result_mask[cls_ind] 77 | keep_inds = np.where(det_for_img[:, -1] >= vis_thresh)[0] 78 | for keep in keep_inds: 79 | box_for_img.append(det_for_img[keep]) 80 | mask_for_img.append(seg_for_img[keep][0]) 81 | cls_for_img.append(cls_ind + 1) 82 | res_dict = {'image_name': img_name, 83 | 'cls_name': cls_for_img, 84 | 'boxes': box_for_img, 85 | 'masks': mask_for_img} 86 | return res_dict 87 | 88 | 89 | def process_jpg(CLASSES, net, im_name, jpg_dir, tif_name, tif_dir, geojson_name, geojson_dir, updated_geojson_name, updated_geojson_dir, vis_threshold): 90 | print "\n" 91 | print "Processing {}".format(jpg_dir+'/'+im_name) 92 | #Create initia geojson (has extra DN:0 polygons) 93 | 94 | gt_image = os.path.join(jpg_dir,im_name) 95 | im = cv2.imread(gt_image) 96 | 97 | img_width = im.shape[1] 98 | img_height = im.shape[0] 99 | 100 | boxes, masks, seg_scores = im_detect(im, net) 101 | 102 | result_mask, result_box = gpu_mask_voting(masks, boxes, seg_scores, len(CLASSES) + 1,300, im.shape[1], im.shape[0]) 103 | pred_dict = get_vis_dict(result_box, result_mask, jpg_dir + im_name, CLASSES, vis_threshold) 104 | 105 | 106 | num_inst = len(pred_dict['boxes']) 107 | 108 | #Get image number 109 | image_number_search = re.search('(?<=img)\w+', im_name) 110 | image_number = image_number_search.group(0) 111 | 112 | #Open tif file 113 | print "Opening {}".format(tif_dir+'/'+tif_name) 114 | srcRas_ds = gdal.Open(tif_dir+'/'+tif_name) 115 | 116 | #Stuff to create before entering instance loop 117 | geom = srcRas_ds.GetGeoTransform() 118 | proj = srcRas_ds.GetProjection() 119 | memdrv = gdal.GetDriverByName('MEM') 120 | inst_img = np.zeros((img_height, img_width)) 121 | src_ds = memdrv.Create('',inst_img.shape[1],inst_img.shape[0],num_inst) 122 | src_ds.SetGeoTransform(geom) 123 | src_ds.SetProjection(proj) 124 | 125 | #Create geojson data source 126 | drv = ogr.GetDriverByName("geojson") 127 | dst_ds = drv.CreateDataSource(geojson_dir+'/'+geojson_name) 128 | 129 | #Create layer 130 | dst_layername = "building_layer_name" 131 | dst_layer = dst_ds.CreateLayer(dst_layername, srs=None ) 132 | dst_layer.CreateField(ogr.FieldDefn("DN",ogr.OFTInteger)) 133 | 134 | for inst_index in xrange(num_inst): 135 | box = np.round(pred_dict['boxes'][inst_index]).astype(int) 136 | mask = pred_dict['masks'][inst_index] 137 | cls_num = pred_dict['cls_name'][inst_index] 138 | 139 | box[0] = min(max(box[0], 0), img_width - 1) 140 | box[1] = min(max(box[1], 0), img_height - 1) 141 | box[2] = min(max(box[2], 0), img_width - 1) 142 | box[3] = min(max(box[3], 0), img_height - 1) 143 | 144 | mask = cv2.resize(mask.astype(np.float32), (box[2]-box[0]+1, box[3]-box[1]+1)) 145 | mask = mask >= cfg.BINARIZE_THRESH 146 | mask = mask.astype(int) 147 | 148 | part1 = (1) * mask.astype(np.float32) 149 | part2 = np.multiply(np.logical_not(mask), inst_img[box[1]:box[3]+1, box[0]:box[2]+1]) 150 | 151 | #Reset instance image to 0's 152 | inst_img = np.zeros((img_height, img_width)) 153 | inst_img[box[1]:box[3]+1, box[0]:box[2]+1] = part1 + part2 154 | inst_img = inst_img.astype(int) 155 | 156 | band = src_ds.GetRasterBand(1) 157 | band.WriteArray(inst_img) 158 | 159 | gdal.Polygonize(band, None, dst_layer, 0, [], callback=None) 160 | dst_ds=None 161 | 162 | #Now reformat the geojson we just created 163 | print 'Reformat geojson {}'.format(geojson_name) 164 | 165 | #set index to 0 166 | i = 0 167 | geojson_full_name = geojson_dir+'/'+geojson_name 168 | with open(geojson_full_name, 'r') as f: 169 | data = json.load(f) 170 | while (i < len(data['features'])): 171 | if(data['features'][i]['properties']['DN'] == 0): 172 | data['features'].pop(i) 173 | elif(area(data['features'][i]['geometry']) <= 50): 174 | data['features'].pop(i) 175 | else: 176 | i = i+1 177 | 178 | #Write geojson 179 | updated_geojson_full_name = updated_geojson_dir + '/' + updated_geojson_name 180 | with open(updated_geojson_full_name, 'w') as outfile: 181 | json.dump(data, outfile, sort_keys=True) 182 | print "Geojson reformatted!" 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /mnc_config.py: -------------------------------------------------------------------------------- 1 | 2 | """MNC config system 3 | """ 4 | import numpy as np 5 | import os.path 6 | from easydict import EasyDict as edict 7 | 8 | __C = edict() 9 | cfg = __C 10 | 11 | # MNC/CFM mode 12 | __C.MNC_MODE = True 13 | __C.CFM_MODE = False 14 | 15 | __C.EXP_DIR = 'default' 16 | __C.USE_GPU_NMS = True 17 | __C.GPU_ID = 0 18 | __C.RNG_SEED = 3 19 | __C.EPS = 1e-14 20 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) 21 | # Root directory of project 22 | __C.ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 23 | # Data directory 24 | __C.DATA_DIR = os.path.abspath(os.path.join(__C.ROOT_DIR, 'data')) 25 | # Related to mask resizing and binarize predicted masks 26 | __C.BINARIZE_THRESH = 0.4 27 | # Mask estimation (if any) size (may be different from CFM input size) 28 | __C.MASK_SIZE = 21 29 | 30 | # Training options 31 | __C.TRAIN = edict() 32 | 33 | # ------- General setting ---- 34 | __C.TRAIN.IMS_PER_BATCH = 1 35 | # Batch size for training Region CNN (not RPN) 36 | __C.TRAIN.BATCH_SIZE = 16 37 | # Make minibatches from images that have similar aspect ratios (i.e. both 38 | # tall and thin or both short and wide) in order to avoid wasting computation 39 | # on zero-padding. 40 | __C.TRAIN.ASPECT_GROUPING = True 41 | # Use flipped image for augmentation 42 | __C.TRAIN.USE_FLIPPED = False 43 | # Resize shortest side to 600 44 | __C.TRAIN.SCALES = (600,) 45 | __C.TRAIN.MAX_SIZE = 1000 46 | __C.TRAIN.SNAPSHOT_ITERS = 5000 47 | __C.TRAIN.SNAPSHOT_INFIX = '' 48 | # Sample FG 49 | __C.TRAIN.FG_FRACTION = [0.3] 50 | __C.TRAIN.FG_THRESH_HI = [1.0] 51 | __C.TRAIN.FG_THRESH_LO = [0.5] 52 | # Sample BF according to remaining samples 53 | __C.TRAIN.BG_FRACTION = [0.85, 0.15] 54 | __C.TRAIN.BG_THRESH_HI = [0.5, 0.1] 55 | __C.TRAIN.BG_THRESH_LO = [0.1, 0.0] 56 | 57 | # ------- Proposal ------- 58 | __C.TRAIN.PROPOSAL_METHOD = 'gt' 59 | 60 | # ------- BBOX Regression --------- 61 | # Train bounding-box regressors 62 | __C.TRAIN.BBOX_REG = True 63 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True 64 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = False 65 | __C.TRAIN.BBOX_THRESH = 0.5 66 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 67 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 68 | # weight of smooth L1 loss 69 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 70 | 71 | # -------- RPN ---------- 72 | # Use RPN to detect objects 73 | __C.TRAIN.HAS_RPN = True 74 | # IOU >= thresh: positive example 75 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 76 | # IO < thresh: negative example 77 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 78 | # If an anchor satisfied by positive and negative conditions set to negative 79 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False 80 | # Max number of foreground examples 81 | # Note this is class-agnostic anchors' FG_FRACTION 82 | __C.TRAIN.RPN_FG_FRACTION = 0.5 83 | # Total number of examples 84 | __C.TRAIN.RPN_BATCHSIZE = 256 85 | # NMS threshold used on RPN proposals 86 | __C.TRAIN.RPN_NMS_THRESH = 0.7 87 | # Number of top scoring boxes to keep before apply NMS to RPN proposals 88 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000 89 | # Number of top scoring boxes to keep after applying NMS to RPN proposals 90 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000 91 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) 92 | __C.TRAIN.RPN_MIN_SIZE = 16 93 | # Deprecated (outside weights) 94 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 95 | # Give the positive RPN examples weight of p * 1 / {num positives} 96 | # and give negatives a weight of (1 - p) 97 | # Set to -1.0 to use uniform example weighting 98 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0 99 | # Mix anchors used for RPN and later layer 100 | __C.TRAIN.MIX_INDEX = True 101 | 102 | # -------- CFM ---------- 103 | __C.TRAIN.CFM_INPUT_MASK_SIZE = 14 104 | __C.TRAIN.FG_DET_THRESH = 0.5 105 | __C.TRAIN.FG_SEG_THRESH = 0.5 106 | __C.TRAIN.FRACTION_SAMPLE = [0.3, 0.5, 0.2] 107 | __C.TRAIN.THRESH_LO_SAMPLE = [0.5, 0.1, 0.0] 108 | __C.TRAIN.THRESH_HI_SAMPLE = [1.0, 0.5, 0.1] 109 | 110 | # Test option 111 | 112 | __C.TEST = edict() 113 | # Scales to use during testing (can list multiple scales) 114 | # Each scale is the pixel size of an image's shortest side 115 | __C.TEST.SCALES = (600,) 116 | 117 | # Max pixel size of the longest side of a scaled input image 118 | __C.TEST.MAX_SIZE = 1000 119 | 120 | # Overlap threshold used for non-maximum suppression (suppress boxes with 121 | # IoU >= this threshold) 122 | __C.TEST.NMS = 0.3 123 | # Set this true in the yml file to specify proposed RPN 124 | __C.TEST.HAS_RPN = True 125 | # NMS threshold used on RPN proposals 126 | __C.TEST.RPN_NMS_THRESH = 0.7 127 | # Number of top scoring boxes to keep before apply NMS to RPN proposals 128 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000 129 | # Number of top scoring boxes to keep after applying NMS to RPN proposals 130 | __C.TEST.RPN_POST_NMS_TOP_N = 300 131 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) 132 | __C.TEST.RPN_MIN_SIZE = 16 133 | __C.TEST.BBOX_REG = True 134 | 135 | # Aggregate nearby masks inside box, the box_IOU threshold 136 | __C.TEST.MASK_MERGE_IOU_THRESH = 0.5 137 | __C.TEST.MASK_MERGE_NMS_THRESH = 0.2 138 | __C.TEST.CFM_INPUT_MASK_SIZE = 14 139 | 140 | # Used for multi-scale testing, since naive implementation 141 | # will waste a lot of on zero-padding, so we group each 142 | # $GROUP_SCALE scales to feed in gpu. And max rois for 143 | # each group is specified in MAX_ROIS_GPU 144 | __C.TEST.MAX_ROIS_GPU = [2000] 145 | __C.TEST.GROUP_SCALE = 1 146 | 147 | # 0 means use all the MCG proposals 148 | __C.TEST.USE_TOP_K_MCG = 0 149 | 150 | # threshold for binarize a mask 151 | __C.TEST.USE_MASK_MERGE = True 152 | __C.TEST.USE_GPU_MASK_MERGE = True 153 | 154 | 155 | def get_output_dir(imdb, net): 156 | """ Return the directory where experimental artifacts are placed. 157 | A canonical path is built using the name from an imdb and a network 158 | (if not None). 159 | """ 160 | path = os.path.abspath(os.path.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name)) 161 | if net is None: 162 | return path 163 | else: 164 | return os.path.join(path, net.name) 165 | 166 | 167 | def _merge_two_config(user_cfg, default_cfg): 168 | """ Merge user's config into default config dictionary, clobbering the 169 | options in b whenever they are also specified in a. 170 | Need to ensure the type of two val under same key are the same 171 | Do recursive merge when encounter hierarchical dictionary 172 | """ 173 | if type(user_cfg) is not edict: 174 | return 175 | for key, val in user_cfg.iteritems(): 176 | # Since user_cfg is a sub-file of default_cfg 177 | if not default_cfg.has_key(key): 178 | raise KeyError('{} is not a valid config key'.format(key)) 179 | 180 | if type(default_cfg[key]) is not type(val): 181 | if isinstance(default_cfg[key], np.ndarray): 182 | val = np.array(val, dtype=default_cfg[key].dtype) 183 | else: 184 | raise ValueError( 185 | 'Type mismatch ({} vs. {}) ' 186 | 'for config key: {}'.format(type(default_cfg[key]), 187 | type(val), key)) 188 | # Recursive merge config 189 | if type(val) is edict: 190 | try: 191 | _merge_two_config(user_cfg[key], default_cfg[key]) 192 | except: 193 | print 'Error under config key: {}'.format(key) 194 | raise 195 | else: 196 | default_cfg[key] = val 197 | 198 | 199 | def cfg_from_file(file_name): 200 | """ Load a config file and merge it into the default options. 201 | """ 202 | import yaml 203 | with open(file_name, 'r') as f: 204 | yaml_cfg = edict(yaml.load(f)) 205 | 206 | _merge_two_config(yaml_cfg, __C) 207 | -------------------------------------------------------------------------------- /pascal_to_spacenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import json 4 | from os import listdir 5 | import argparse 6 | import time 7 | import cv2 8 | from osgeo import gdal,gdalnumeric 9 | import ogr 10 | import numpy as np 11 | 12 | # User-defined module 13 | import _init_paths 14 | import caffe 15 | from mnc_config import cfg 16 | from transform.bbox_transform import clip_boxes 17 | from utils.blob import prep_im_for_blob, im_list_to_blob 18 | from transform.mask_transform import gpu_mask_voting 19 | import matplotlib.pyplot as plt 20 | from utils.vis_seg import _convert_pred_to_image, _get_voc_color_map 21 | from PIL import Image 22 | 23 | from functions_for_pascal_to_spacenet import prepare_mnc_args, im_detect, get_vis_dict, process_jpg 24 | 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument("jpg_dir", help="directory for raster jpegs") 27 | parser.add_argument("tif_dir", help="directory for raster tifs") 28 | parser.add_argument("geojson_dir", help="directory for geojson outputs") 29 | parser.add_argument("updated_geojson_dir", help="directory for updated geojson outputs") 30 | 31 | args = parser.parse_args() 32 | my_jpg_dir = args.jpg_dir 33 | my_tif_dir = args.tif_dir 34 | my_geojson_dir = args.geojson_dir 35 | my_updated_geojson_dir = args.updated_geojson_dir 36 | 37 | 38 | #Setting some parameters 39 | CLASSES = ['building'] 40 | test_prototxt = 'MNC/models/VGG16/mnc_5stage/test.prototxt' 41 | test_model = 'MNC/output/mnc_5stage/voc_2012_train/vgg16_mnc_5stage_iter_25000.caffemodel.h5' 42 | caffe.set_mode_gpu() 43 | caffe.set_device(0) 44 | cfg.GPU_ID = 0 45 | net = caffe.Net(test_prototxt, test_model, caffe.TEST) 46 | 47 | 48 | 49 | # Warm up for the first two images 50 | im = 128 * np.ones((300, 500, 3), dtype=np.float32) 51 | for i in xrange(2): 52 | _, _, _ = im_detect(im, net) 53 | 54 | 55 | #Loop through jpg directory 56 | for im_name in listdir(my_jpg_dir): 57 | 58 | #Get image number 59 | image_number_search = re.search('(?<=img)\w+', im_name) 60 | image_number = image_number_search.group(0) 61 | 62 | 63 | #Fill in ... with the specifics of your filenames 64 | my_tif_name = 'RGB-PanSharpen_..._img'+str(image_number)+'.tif' 65 | my_geojson_name= 'First_AOI_..._img'+str(image_number)+'.geojson' 66 | my_updated_geojson_name= 'Updated_AOI_..._img'+str(image_number)+'.geojson' 67 | 68 | process_jpg(CLASSES, net, im_name, my_jpg_dir, my_tif_name, my_tif_dir, my_geojson_name, my_geojson_dir, my_updated_geojson_name, my_updated_geojson_dir, 0.3) 69 | 70 | print "Done processing!" 71 | 72 | 73 | 74 | 75 | --------------------------------------------------------------------------------