├── Fashion_Test.py ├── Fashion_Train.py ├── KerasRFCN ├── Config.py ├── Data_generator.py ├── Losses.py ├── Model │ ├── BaseModel.py │ ├── Model.py │ ├── ResNet.py │ └── ResNet_dilated.py ├── Utils.py └── __init__.py ├── LICENSE ├── README.md ├── ReadmeImages ├── 1.png ├── result_1.jpg └── result_2.jpg └── data.pk /Fashion_Test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Keras RFCN 3 | Copyright (c) 2018 4 | Licensed under the MIT License (see LICENSE for details) 5 | Written by parap1uie-s@github.com 6 | """ 7 | 8 | ''' 9 | This is a demo to Eval a RFCN model with DeepFashion Dataset 10 | http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion.html 11 | ''' 12 | 13 | from KerasRFCN.Model.Model import RFCN_Model 14 | from KerasRFCN.Config import Config 15 | import KerasRFCN.Utils 16 | import os 17 | from keras.preprocessing import image 18 | import pickle 19 | import numpy as np 20 | import argparse 21 | import matplotlib.pyplot as plt 22 | import matplotlib.patches as patches 23 | 24 | class RFCNNConfig(Config): 25 | """Configuration for training on the toy shapes dataset. 26 | Derives from the base Config class and overrides values specific 27 | to the toy shapes dataset. 28 | """ 29 | # Give the configuration a recognizable name 30 | NAME = "Fashion" 31 | 32 | # Backbone model 33 | # choose one from ['resnet50', 'resnet101', 'resnet50_dilated', 'resnet101_dilated'] 34 | BACKBONE = "resnet101" 35 | 36 | # Train on 1 GPU and 8 images per GPU. We can put multiple images on each 37 | # GPU because the images are small. Batch size is 8 (GPUs * images/GPU). 38 | GPU_COUNT = 1 39 | IMAGES_PER_GPU = 1 40 | 41 | # Number of classes (including background) 42 | C = 1 + 46 # background + 2 tags 43 | NUM_CLASSES = C 44 | # Use small images for faster training. Set the limits of the small side 45 | # the large side, and that determines the image shape. 46 | IMAGE_MIN_DIM = 640 47 | IMAGE_MAX_DIM = 768 48 | 49 | # Use smaller anchors because our image and objects are small 50 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) # anchor side in pixels 51 | # Use same strides on stage 4-6 if use dilated resnet of DetNet 52 | # Like BACKBONE_STRIDES = [4, 8, 16, 16, 16] 53 | BACKBONE_STRIDES = [4, 8, 16, 32, 64] 54 | # Reduce training ROIs per image because the images are small and have 55 | # few objects. Aim to allow ROI sampling to pick 33% positive ROIs. 56 | TRAIN_ROIS_PER_IMAGE = 200 57 | 58 | # Use a small epoch since the data is simple 59 | STEPS_PER_EPOCH = 100 60 | 61 | # use small validation steps since the epoch is small 62 | VALIDATION_STEPS = 5 63 | 64 | RPN_NMS_THRESHOLD = 0.7 65 | 66 | DETECTION_MIN_CONFIDENCE = 0.4 67 | POOL_SIZE = 7 68 | 69 | 70 | def Test(model, loadpath, savepath): 71 | assert not loadpath == savepath, "loadpath should'n same with savepath" 72 | 73 | model_path = model.find_last()[1] 74 | # Load trained weights (fill in path to trained weights here) 75 | 76 | model.load_weights(model_path, by_name=True) 77 | print("Loading weights from ", model_path) 78 | 79 | if os.path.isdir(loadpath): 80 | for idx, imgname in enumerate(os.listdir(loadpath)): 81 | if not imgname.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): 82 | continue 83 | print(imgname) 84 | imageoriChannel = np.array(plt.imread( os.path.join(loadpath, imgname) )) / 255.0 85 | img = image.img_to_array( image.load_img(os.path.join(loadpath, imgname)) ) 86 | TestSinglePic(img, imageoriChannel, model, savepath=savepath, imgname=imgname) 87 | 88 | elif os.path.isfile(loadpath): 89 | if not loadpath.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): 90 | print("not image file!") 91 | return 92 | print(loadpath) 93 | imageoriChannel = np.array(plt.imread( loadpath )) / 255.0 94 | img = image.img_to_array( image.load_img(loadpath) ) 95 | (filename,extension) = os.path.splitext(loadpath) 96 | TestSinglePic(img, imageoriChannel, model, savepath=savepath, imgname=filename) 97 | 98 | def TestSinglePic(image, image_ori, model, savepath, imgname): 99 | r = model.detect([image], verbose=1)[0] 100 | print(r) 101 | def get_ax(rows=1, cols=1, size=8): 102 | _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows)) 103 | return ax 104 | 105 | ax = get_ax(1) 106 | 107 | assert not savepath == "", "empty save path" 108 | assert not imgname == "", "empty image file name" 109 | 110 | for box in r['rois']: 111 | y1, x1, y2, x2 = box 112 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 113 | alpha=0.7, linestyle="dashed", 114 | edgecolor="red", facecolor='none') 115 | ax.add_patch(p) 116 | ax.imshow(image_ori) 117 | 118 | plt.savefig(os.path.join(savepath, imgname),bbox_inches='tight') 119 | plt.clf() 120 | 121 | if __name__ == '__main__': 122 | ROOT_DIR = os.getcwd() 123 | parser = argparse.ArgumentParser() 124 | 125 | parser.add_argument('--loadpath', required=False, 126 | default="images/", 127 | metavar="evaluate images loadpath", 128 | help="evaluate images loadpath") 129 | parser.add_argument('--savepath', required=False, 130 | default="result/", 131 | metavar="evaluate images savepath", 132 | help="evaluate images savepath") 133 | 134 | config = RFCNNConfig() 135 | args = parser.parse_args() 136 | 137 | model = RFCN_Model(mode="inference", config=config, 138 | model_dir=os.path.join(ROOT_DIR, "logs") ) 139 | 140 | Test(model, args.loadpath, args.savepath) -------------------------------------------------------------------------------- /Fashion_Train.py: -------------------------------------------------------------------------------- 1 | """ 2 | Keras RFCN 3 | Copyright (c) 2018 4 | Licensed under the MIT License (see LICENSE for details) 5 | Written by parap1uie-s@github.com 6 | """ 7 | 8 | ''' 9 | This is a demo to TRAIN a RFCN model with DeepFashion Dataset 10 | http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion.html 11 | ''' 12 | 13 | from KerasRFCN.Model.Model import RFCN_Model 14 | from KerasRFCN.Config import Config 15 | from KerasRFCN.Utils import Dataset 16 | import os 17 | import pickle 18 | import numpy as np 19 | from PIL import Image 20 | 21 | ############################################################ 22 | # Config 23 | ############################################################ 24 | 25 | class RFCNNConfig(Config): 26 | """Configuration for training on the toy shapes dataset. 27 | Derives from the base Config class and overrides values specific 28 | to the toy shapes dataset. 29 | """ 30 | # Give the configuration a recognizable name 31 | NAME = "Fashion" 32 | 33 | # Backbone model 34 | # choose one from ['resnet50', 'resnet101', 'resnet50_dilated', 'resnet101_dilated'] 35 | BACKBONE = "resnet101" 36 | 37 | # Train on 1 GPU and 8 images per GPU. We can put multiple images on each 38 | # GPU because the images are small. Batch size is 8 (GPUs * images/GPU). 39 | GPU_COUNT = 1 40 | IMAGES_PER_GPU = 1 41 | 42 | # Number of classes (including background) 43 | C = 1 + 46 # background + 2 tags 44 | NUM_CLASSES = C 45 | # Use small images for faster training. Set the limits of the small side 46 | # the large side, and that determines the image shape. 47 | IMAGE_MIN_DIM = 640 48 | IMAGE_MAX_DIM = 768 49 | 50 | # Use smaller anchors because our image and objects are small 51 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) # anchor side in pixels 52 | # Use same strides on stage 4-6 if use dilated resnet of DetNet 53 | # Like BACKBONE_STRIDES = [4, 8, 16, 16, 16] 54 | BACKBONE_STRIDES = [4, 8, 16, 32, 64] 55 | # Reduce training ROIs per image because the images are small and have 56 | # few objects. Aim to allow ROI sampling to pick 33% positive ROIs. 57 | TRAIN_ROIS_PER_IMAGE = 200 58 | 59 | # Use a small epoch since the data is simple 60 | STEPS_PER_EPOCH = 1000 61 | 62 | # use small validation steps since the epoch is small 63 | VALIDATION_STEPS = 200 64 | 65 | RPN_NMS_THRESHOLD = 0.6 66 | POOL_SIZE = 7 67 | 68 | ############################################################ 69 | # Dataset 70 | ############################################################ 71 | 72 | class FashionDataset(Dataset): 73 | # count - int, images in the dataset 74 | def initDB(self, count, start = 0): 75 | self.start = start 76 | 77 | all_images, classes_count, class_mapping = pickle.load(open("data.pk", "rb")) 78 | self.classes = {} 79 | # Add classes 80 | for k,c in class_mapping.items(): 81 | self.add_class("Fashion",c,k) 82 | self.classes[c] = k 83 | 84 | for k, item in enumerate(all_images[start:count+start]): 85 | self.add_image(source="Fashion",image_id=k, path=item['filepath'], width=item['width'], height=item['height'], bboxes=item['bboxes']) 86 | 87 | self.rootpath = '/content/' 88 | 89 | # read image from file and get the 90 | def load_image(self, image_id): 91 | info = self.image_info[image_id] 92 | # tempImg = image.img_to_array( image.load_img(info['path']) ) 93 | tempImg = np.array(Image.open( os.path.join(self.rootpath, info['path']) )) 94 | return tempImg 95 | 96 | def get_keys(self, d, value): 97 | return [k for k,v in d.items() if v == value] 98 | 99 | def load_bbox(self, image_id): 100 | info = self.image_info[image_id] 101 | bboxes = [] 102 | labels = [] 103 | for item in info['bboxes']: 104 | bboxes.append((item['y1'], item['x1'], item['y2'], item['x2'])) 105 | label_key = self.get_keys(self.classes, item['class']) 106 | if len(label_key) == 0: 107 | continue 108 | labels.extend( label_key ) 109 | return np.array(bboxes), np.array(labels) 110 | 111 | if __name__ == '__main__': 112 | ROOT_DIR = os.getcwd() 113 | 114 | config = RFCNNConfig() 115 | dataset_train = FashionDataset() 116 | dataset_train.initDB(100000) 117 | dataset_train.prepare() 118 | 119 | # Validation dataset 120 | dataset_val = FashionDataset() 121 | dataset_val.initDB(5000, start=100000) 122 | dataset_val.prepare() 123 | 124 | model = RFCN_Model(mode="training", config=config, model_dir=os.path.join(ROOT_DIR, "logs") ) 125 | 126 | # This is a hack, bacause the pre-train weights are not fit with dilated ResNet 127 | model.keras_model.load_weights("resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5", by_name=True, skip_mismatch=True) 128 | 129 | try: 130 | model_path = model.find_last()[1] 131 | if model_path is not None: 132 | model.load_weights(model_path, by_name=True) 133 | except Exception as e: 134 | print(e) 135 | print("No checkpoint founded") 136 | 137 | # *** This training schedule is an example. Update to your needs *** 138 | 139 | # Training - Stage 1 140 | model.train(dataset_train, dataset_val, 141 | learning_rate=config.LEARNING_RATE, 142 | epochs=20, 143 | layers='heads') 144 | 145 | # Training - Stage 2 146 | # Finetune layers from ResNet stage 4 and up 147 | print("Fine tune Resnet stage 4 and up") 148 | model.train(dataset_train, dataset_val, 149 | learning_rate=config.LEARNING_RATE, 150 | epochs=40, 151 | layers='4+') 152 | 153 | # Training - Stage 3 154 | # Fine tune all layers 155 | print("Fine tune all layers") 156 | model.train(dataset_train, dataset_val, 157 | learning_rate=config.LEARNING_RATE, 158 | epochs=80, 159 | layers='all') 160 | 161 | # Training - Stage 3 162 | # Fine tune all layers 163 | print("Fine tune all layers") 164 | model.train(dataset_train, dataset_val, 165 | learning_rate=config.LEARNING_RATE, 166 | epochs=240, 167 | layers='all') -------------------------------------------------------------------------------- /KerasRFCN/Config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Keras RFCN 3 | Copyright (c) 2018 4 | Licensed under the MIT License (see LICENSE for details) 5 | Written by parap1uie-s@github.com 6 | """ 7 | 8 | import math 9 | import numpy as np 10 | 11 | 12 | # Base Configuration Class 13 | # Don't use this class directly. Instead, sub-class it and override 14 | # the configurations you need to change. 15 | 16 | class Config(object): 17 | """Base configuration class. For custom configurations, create a 18 | sub-class that inherits from this one and override properties 19 | that need to be changed. 20 | """ 21 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. 22 | # Useful if your code needs to do things differently depending on which 23 | # experiment is running. 24 | NAME = None # Override in sub-classes 25 | 26 | # Backbone model 27 | # choose one from ['resnet50', 'resnet101', 'resnet50_dilated', 'resnet101_dilated'] 28 | BACKBONE = "resnet101" 29 | 30 | # NUMBER OF GPUs to use. For CPU training, use 1 31 | GPU_COUNT = 1 32 | 33 | # Number of images to train with on each GPU. A 12GB GPU can typically 34 | # handle 2 images of 1024x1024px. 35 | # Adjust based on your GPU memory and image sizes. Use the highest 36 | # number that your GPU can handle for best performance. 37 | IMAGES_PER_GPU = 2 38 | 39 | # Number of training steps per epoch 40 | # This doesn't need to match the size of the training set. Tensorboard 41 | # updates are saved at the end of each epoch, so setting this to a 42 | # smaller number means getting more frequent TensorBoard updates. 43 | # Validation stats are also calculated at each epoch end and they 44 | # might take a while, so don't set this too small to avoid spending 45 | # a lot of time on validation stats. 46 | STEPS_PER_EPOCH = 1000 47 | 48 | # Number of validation steps to run at the end of every training epoch. 49 | # A bigger number improves accuracy of validation stats, but slows 50 | # down the training. 51 | VALIDATION_STEPS = 50 52 | 53 | # The strides of each layer of the FPN Pyramid. These values 54 | # are based on a Resnet101 backbone. 55 | # Use same strides on stage 4-6 if use dilated resnet of DetNet 56 | # Like BACKBONE_STRIDES = [4, 8, 16, 16, 16] 57 | BACKBONE_STRIDES = [4, 8, 16, 32, 64] 58 | 59 | # Number of classification classes (including background) 60 | NUM_CLASSES = 1 # Override in sub-classes 61 | 62 | # Length of square anchor side in pixels 63 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) 64 | 65 | # Ratios of anchors at each cell (width/height) 66 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor 67 | RPN_ANCHOR_RATIOS = [0.5, 1, 2] 68 | 69 | # Anchor stride 70 | # If 1 then anchors are created for each cell in the backbone feature map. 71 | # If 2, then anchors are created for every other cell, and so on. 72 | RPN_ANCHOR_STRIDE = 1 73 | 74 | # Non-max suppression threshold to filter RPN proposals. 75 | # You can reduce this during training to generate more propsals. 76 | RPN_NMS_THRESHOLD = 0.7 77 | 78 | # How many anchors per image to use for RPN training 79 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256 80 | 81 | # ROIs kept after non-maximum supression (training and inference) 82 | POST_NMS_ROIS_TRAINING = 2000 83 | POST_NMS_ROIS_INFERENCE = 1000 84 | 85 | # Input image resing 86 | # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and 87 | # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't 88 | # be satisfied together the IMAGE_MAX_DIM is enforced. 89 | IMAGE_MIN_DIM = 800 90 | IMAGE_MAX_DIM = 1024 91 | # If True, pad images with zeros such that they're (max_dim by max_dim) 92 | IMAGE_PADDING = True # currently, the False option is not supported 93 | 94 | # Image mean (RGB) 95 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) 96 | 97 | # Number of ROIs per image to feed to classifier/mask heads 98 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate 99 | # enough positive proposals to fill this and keep a positive:negative 100 | # ratio of 1:3. You can increase the number of proposals by adjusting 101 | # the RPN NMS threshold. 102 | TRAIN_ROIS_PER_IMAGE = 200 103 | 104 | # Percent of positive ROIs used to train classifier/mask heads 105 | ROI_POSITIVE_RATIO = 0.33 106 | 107 | # Pooled ROIs 108 | POOL_SIZE = 3 109 | 110 | # Maximum number of ground truth instances to use in one image 111 | MAX_GT_INSTANCES = 100 112 | 113 | # Bounding box refinement standard deviation for RPN and final detections. 114 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 115 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 116 | 117 | # Max number of final detections 118 | DETECTION_MAX_INSTANCES = 100 119 | 120 | # Minimum probability value to accept a detected instance 121 | # ROIs below this threshold are skipped 122 | DETECTION_MIN_CONFIDENCE = 0.8 123 | 124 | # Non-maximum suppression threshold for detection 125 | DETECTION_NMS_THRESHOLD = 0.3 126 | 127 | # Learning rate and momentum 128 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes 129 | # weights to explode. Likely due to differences in optimzer 130 | # implementation. 131 | LEARNING_RATE = 0.001 132 | LEARNING_MOMENTUM = 0.9 133 | 134 | # Weight decay regularization 135 | WEIGHT_DECAY = 0.0005 136 | 137 | # Use RPN ROIs or externally generated ROIs for training 138 | # Keep this True for most situations. Set to False if you want to train 139 | # the head branches on ROI generated by code rather than the ROIs from 140 | # the RPN. For example, to debug the classifier head without having to 141 | # train the RPN. 142 | USE_RPN_ROIS = True 143 | 144 | K = 3 145 | 146 | def __init__(self): 147 | """Set values of computed attributes.""" 148 | # Effective batch size 149 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT 150 | 151 | # Input image size 152 | self.IMAGE_SHAPE = np.array( 153 | [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3]) 154 | 155 | # Compute backbone size from input image size 156 | self.BACKBONE_SHAPES = np.array( 157 | [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)), 158 | int(math.ceil(self.IMAGE_SHAPE[1] / stride))] 159 | for stride in self.BACKBONE_STRIDES]) 160 | 161 | def display(self): 162 | """Display Configuration values.""" 163 | print("\nConfigurations:") 164 | for a in dir(self): 165 | if not a.startswith("__") and not callable(getattr(self, a)): 166 | print("{:30} {}".format(a, getattr(self, a))) 167 | print("\n") 168 | -------------------------------------------------------------------------------- /KerasRFCN/Data_generator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Keras RFCN 3 | Copyright (c) 2018 4 | Licensed under the MIT License (see LICENSE for details) 5 | Written by parap1uie-s@github.com 6 | """ 7 | 8 | import numpy as np 9 | import KerasRFCN.Utils 10 | import logging 11 | ############################################################ 12 | # Data Generator 13 | ############################################################ 14 | 15 | def load_image_gt(dataset, config, image_id, augment=False): 16 | """Load and return ground truth data for an image (image, mask, bounding boxes). 17 | 18 | augment: If true, apply random image augmentation. Currently, only 19 | horizontal flipping is offered. 20 | 21 | Returns: 22 | image: [height, width, 3] 23 | shape: the original shape of the image before resizing and cropping. 24 | class_ids: [instance_count] Integer class IDs 25 | bbox: [instance_count, (y1, x1, y2, x2)] 26 | """ 27 | # Load image and mask 28 | image = dataset.load_image(image_id) 29 | # bbox: [num_instances, (y1, x1, y2, x2)] 30 | bboxes, class_ids = dataset.load_bbox(image_id) 31 | shape = image.shape 32 | image, window, scale, padding = KerasRFCN.Utils.resize_image( 33 | image, 34 | min_dim=config.IMAGE_MIN_DIM, 35 | max_dim=config.IMAGE_MAX_DIM, 36 | padding=config.IMAGE_PADDING) 37 | bboxes = KerasRFCN.Utils.resize_bbox(bboxes, scale, padding) 38 | # img_h, img_w, img_c = image.shape 39 | 40 | # Random horizontal flips. 41 | # TODO: data-augment:fliplr the bbox coordinate 42 | # if augment: 43 | # if random.randint(0, 1): 44 | # image = np.fliplr(image) 45 | # bboxes = bbox_fliplr(bboxes, height, width) 46 | 47 | # Bounding boxes. Note that some boxes might be all zeros 48 | 49 | # Active classes 50 | # Different datasets have different classes, so track the 51 | # classes supported in the dataset of this image. 52 | active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32) 53 | source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]] 54 | active_class_ids[source_class_ids] = 1 55 | 56 | # Image meta data 57 | image_meta = compose_image_meta(image_id, shape, window, active_class_ids) 58 | 59 | return image, image_meta, class_ids, bboxes 60 | 61 | 62 | def build_detection_targets(rpn_rois, gt_class_ids, gt_boxes, config): 63 | """Generate targets for training Stage 2 classifier. 64 | This is not used in normal training. It's useful for debugging or to train 65 | the Mask RCNN heads without using the RPN head. 66 | 67 | Inputs: 68 | rpn_rois: [N, (y1, x1, y2, x2)] proposal boxes. 69 | gt_class_ids: [instance count] Integer class IDs 70 | gt_boxes: [instance count, (y1, x1, y2, x2)] 71 | 72 | Returns: 73 | rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] 74 | class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. 75 | bboxes: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (y, x, log(h), log(w))]. Class-specific 76 | bbox refinments. 77 | """ 78 | assert rpn_rois.shape[0] > 0 79 | assert gt_class_ids.dtype == np.int32, "Expected int but got {}".format( 80 | gt_class_ids.dtype) 81 | assert gt_boxes.dtype == np.int32, "Expected int but got {}".format( 82 | gt_boxes.dtype) 83 | 84 | # It's common to add GT Boxes to ROIs but we don't do that here because 85 | # according to XinLei Chen's paper, it doesn't help. 86 | 87 | # Trim empty padding in gt_boxes and gt_masks parts 88 | instance_ids = np.where(gt_class_ids > 0)[0] 89 | assert instance_ids.shape[0] > 0, "Image must contain instances." 90 | gt_class_ids = gt_class_ids[instance_ids] 91 | gt_boxes = gt_boxes[instance_ids] 92 | 93 | # Compute areas of ROIs and ground truth boxes. 94 | rpn_roi_area = (rpn_rois[:, 2] - rpn_rois[:, 0]) * \ 95 | (rpn_rois[:, 3] - rpn_rois[:, 1]) 96 | gt_box_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * \ 97 | (gt_boxes[:, 3] - gt_boxes[:, 1]) 98 | 99 | # Compute overlaps [rpn_rois, gt_boxes] 100 | overlaps = np.zeros((rpn_rois.shape[0], gt_boxes.shape[0])) 101 | for i in range(overlaps.shape[1]): 102 | gt = gt_boxes[i] 103 | overlaps[:, i] = KerasRFCN.Utils.compute_iou( 104 | gt, rpn_rois, gt_box_area[i], rpn_roi_area) 105 | 106 | # Assign ROIs to GT boxes 107 | rpn_roi_iou_argmax = np.argmax(overlaps, axis=1) 108 | rpn_roi_iou_max = overlaps[np.arange( 109 | overlaps.shape[0]), rpn_roi_iou_argmax] 110 | # GT box assigned to each ROI 111 | rpn_roi_gt_boxes = gt_boxes[rpn_roi_iou_argmax] 112 | rpn_roi_gt_class_ids = gt_class_ids[rpn_roi_iou_argmax] 113 | 114 | # Positive ROIs are those with >= 0.5 IoU with a GT box. 115 | fg_ids = np.where(rpn_roi_iou_max > 0.5)[0] 116 | 117 | # Negative ROIs are those with max IoU 0.1-0.5 (hard example mining) 118 | # TODO: To hard example mine or not to hard example mine, that's the question 119 | # bg_ids = np.where((rpn_roi_iou_max >= 0.1) & (rpn_roi_iou_max < 0.5))[0] 120 | bg_ids = np.where(rpn_roi_iou_max < 0.5)[0] 121 | 122 | # Subsample ROIs. Aim for 33% foreground. 123 | # FG 124 | fg_roi_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) 125 | if fg_ids.shape[0] > fg_roi_count: 126 | keep_fg_ids = np.random.choice(fg_ids, fg_roi_count, replace=False) 127 | else: 128 | keep_fg_ids = fg_ids 129 | # BG 130 | remaining = config.TRAIN_ROIS_PER_IMAGE - keep_fg_ids.shape[0] 131 | if bg_ids.shape[0] > remaining: 132 | keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False) 133 | else: 134 | keep_bg_ids = bg_ids 135 | # Combine indicies of ROIs to keep 136 | keep = np.concatenate([keep_fg_ids, keep_bg_ids]) 137 | # Need more? 138 | remaining = config.TRAIN_ROIS_PER_IMAGE - keep.shape[0] 139 | if remaining > 0: 140 | # Looks like we don't have enough samples to maintain the desired 141 | # balance. Reduce requirements and fill in the rest. This is 142 | # likely different from the Mask RCNN paper. 143 | 144 | # There is a small chance we have neither fg nor bg samples. 145 | if keep.shape[0] == 0: 146 | # Pick bg regions with easier IoU threshold 147 | bg_ids = np.where(rpn_roi_iou_max < 0.5)[0] 148 | assert bg_ids.shape[0] >= remaining 149 | keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False) 150 | assert keep_bg_ids.shape[0] == remaining 151 | keep = np.concatenate([keep, keep_bg_ids]) 152 | else: 153 | # Fill the rest with repeated bg rois. 154 | keep_extra_ids = np.random.choice( 155 | keep_bg_ids, remaining, replace=True) 156 | keep = np.concatenate([keep, keep_extra_ids]) 157 | assert keep.shape[0] == config.TRAIN_ROIS_PER_IMAGE, \ 158 | "keep doesn't match ROI batch size {}, {}".format( 159 | keep.shape[0], config.TRAIN_ROIS_PER_IMAGE) 160 | 161 | # Reset the gt boxes assigned to BG ROIs. 162 | rpn_roi_gt_boxes[keep_bg_ids, :] = 0 163 | rpn_roi_gt_class_ids[keep_bg_ids] = 0 164 | 165 | # For each kept ROI, assign a class_id, and for FG ROIs also add bbox refinement. 166 | rois = rpn_rois[keep] 167 | roi_gt_boxes = rpn_roi_gt_boxes[keep] 168 | roi_gt_class_ids = rpn_roi_gt_class_ids[keep] 169 | roi_gt_assignment = rpn_roi_iou_argmax[keep] 170 | 171 | # Class-aware bbox deltas. [y, x, log(h), log(w)] 172 | bboxes = np.zeros((config.TRAIN_ROIS_PER_IMAGE, 173 | config.NUM_CLASSES, 4), dtype=np.float32) 174 | pos_ids = np.where(roi_gt_class_ids > 0)[0] 175 | bboxes[pos_ids, roi_gt_class_ids[pos_ids]] = KerasRFCN.Utils.box_refinement( 176 | rois[pos_ids], roi_gt_boxes[pos_ids, :4]) 177 | # Normalize bbox refinments 178 | bboxes /= config.BBOX_STD_DEV 179 | 180 | return rois, roi_gt_class_ids, bboxes 181 | 182 | 183 | def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config): 184 | """Given the anchors and GT boxes, compute overlaps and identify positive 185 | anchors and deltas to refine them to match their corresponding GT boxes. 186 | 187 | anchors: [num_anchors, (y1, x1, y2, x2)] 188 | gt_class_ids: [num_gt_boxes] Integer class IDs. 189 | gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)] 190 | 191 | Returns: 192 | rpn_match: [N] (int32) matches between anchors and GT boxes. 193 | 1 = positive anchor, -1 = negative anchor, 0 = neutral 194 | rpn_bbox: [N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. 195 | """ 196 | # RPN Match: 1 = positive anchor, -1 = negative anchor, 0 = neutral 197 | rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32) 198 | # RPN bounding boxes: [max anchors per image, (dy, dx, log(dh), log(dw))] 199 | rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4)) 200 | 201 | # Handle COCO crowds 202 | # A crowd box in COCO is a bounding box around several instances. Exclude 203 | # them from training. A crowd box is given a negative class ID. 204 | crowd_ix = np.where(gt_class_ids < 0)[0] 205 | if crowd_ix.shape[0] > 0: 206 | # Filter out crowds from ground truth class IDs and boxes 207 | non_crowd_ix = np.where(gt_class_ids > 0)[0] 208 | crowd_boxes = gt_boxes[crowd_ix] 209 | gt_class_ids = gt_class_ids[non_crowd_ix] 210 | gt_boxes = gt_boxes[non_crowd_ix] 211 | # Compute overlaps with crowd boxes [anchors, crowds] 212 | crowd_overlaps = KerasRFCN.Utils.compute_overlaps(anchors, crowd_boxes) 213 | crowd_iou_max = np.amax(crowd_overlaps, axis=1) 214 | no_crowd_bool = (crowd_iou_max < 0.001) 215 | else: 216 | # All anchors don't intersect a crowd 217 | no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool) 218 | 219 | # Compute overlaps [num_anchors, num_gt_boxes] 220 | overlaps = KerasRFCN.Utils.compute_overlaps(anchors, gt_boxes) 221 | 222 | # Match anchors to GT Boxes 223 | # If an anchor overlaps a GT box with IoU >= 0.7 then it's positive. 224 | # If an anchor overlaps a GT box with IoU < 0.3 then it's negative. 225 | # Neutral anchors are those that don't match the conditions above, 226 | # and they don't influence the loss function. 227 | # However, don't keep any GT box unmatched (rare, but happens). Instead, 228 | # match it to the closest anchor (even if its max IoU is < 0.3). 229 | # 230 | # 1. Set negative anchors first. They get overwritten below if a GT box is 231 | # matched to them. Skip boxes in crowd areas. 232 | anchor_iou_argmax = np.argmax(overlaps, axis=1) 233 | anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax] 234 | rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1 235 | # 2. Set an anchor for each GT box (regardless of IoU value). 236 | # TODO: If multiple anchors have the same IoU match all of them 237 | gt_iou_argmax = np.argmax(overlaps, axis=0) 238 | rpn_match[gt_iou_argmax] = 1 239 | # 3. Set anchors with high overlap as positive. 240 | rpn_match[anchor_iou_max >= 0.7] = 1 241 | 242 | # Subsample to balance positive and negative anchors 243 | # Don't let positives be more than half the anchors 244 | ids = np.where(rpn_match == 1)[0] 245 | extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2) 246 | if extra > 0: 247 | # Reset the extra ones to neutral 248 | ids = np.random.choice(ids, extra, replace=False) 249 | rpn_match[ids] = 0 250 | # Same for negative proposals 251 | ids = np.where(rpn_match == -1)[0] 252 | extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE - 253 | np.sum(rpn_match == 1)) 254 | if extra > 0: 255 | # Rest the extra ones to neutral 256 | ids = np.random.choice(ids, extra, replace=False) 257 | rpn_match[ids] = 0 258 | 259 | # For positive anchors, compute shift and scale needed to transform them 260 | # to match the corresponding GT boxes. 261 | ids = np.where(rpn_match == 1)[0] 262 | ix = 0 # index into rpn_bbox 263 | # TODO: use box_refinment() rather than duplicating the code here 264 | for i, a in zip(ids, anchors[ids]): 265 | # Closest gt box (it might have IoU < 0.7) 266 | gt = gt_boxes[anchor_iou_argmax[i]] 267 | 268 | # Convert coordinates to center plus width/height. 269 | # GT Box 270 | gt_h = gt[2] - gt[0] 271 | gt_w = gt[3] - gt[1] 272 | gt_center_y = gt[0] + 0.5 * gt_h 273 | gt_center_x = gt[1] + 0.5 * gt_w 274 | # Anchor 275 | a_h = a[2] - a[0] 276 | a_w = a[3] - a[1] 277 | a_center_y = a[0] + 0.5 * a_h 278 | a_center_x = a[1] + 0.5 * a_w 279 | 280 | # Compute the bbox refinement that the RPN should predict. 281 | rpn_bbox[ix] = [ 282 | (gt_center_y - a_center_y) / a_h, 283 | (gt_center_x - a_center_x) / a_w, 284 | np.log(gt_h / a_h), 285 | np.log(gt_w / a_w), 286 | ] 287 | # Normalize 288 | rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV 289 | ix += 1 290 | 291 | return rpn_match, rpn_bbox 292 | 293 | 294 | def generate_random_rois(image_shape, count, gt_class_ids, gt_boxes): 295 | """Generates ROI proposals similar to what a region proposal network 296 | would generate. 297 | 298 | image_shape: [Height, Width, Depth] 299 | count: Number of ROIs to generate 300 | gt_class_ids: [N] Integer ground truth class IDs 301 | gt_boxes: [N, (y1, x1, y2, x2)] Ground truth boxes in pixels. 302 | 303 | Returns: [count, (y1, x1, y2, x2)] ROI boxes in pixels. 304 | """ 305 | # placeholder 306 | rois = np.zeros((count, 4), dtype=np.int32) 307 | 308 | # Generate random ROIs around GT boxes (90% of count) 309 | rois_per_box = int(0.9 * count / gt_boxes.shape[0]) 310 | for i in range(gt_boxes.shape[0]): 311 | gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[i] 312 | h = gt_y2 - gt_y1 313 | w = gt_x2 - gt_x1 314 | # random boundaries 315 | r_y1 = max(gt_y1 - h, 0) 316 | r_y2 = min(gt_y2 + h, image_shape[0]) 317 | r_x1 = max(gt_x1 - w, 0) 318 | r_x2 = min(gt_x2 + w, image_shape[1]) 319 | 320 | # To avoid generating boxes with zero area, we generate double what 321 | # we need and filter out the extra. If we get fewer valid boxes 322 | # than we need, we loop and try again. 323 | while True: 324 | y1y2 = np.random.randint(r_y1, r_y2, (rois_per_box * 2, 2)) 325 | x1x2 = np.random.randint(r_x1, r_x2, (rois_per_box * 2, 2)) 326 | # Filter out zero area boxes 327 | threshold = 1 328 | y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >= 329 | threshold][:rois_per_box] 330 | x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >= 331 | threshold][:rois_per_box] 332 | if y1y2.shape[0] == rois_per_box and x1x2.shape[0] == rois_per_box: 333 | break 334 | 335 | # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape 336 | # into x1, y1, x2, y2 order 337 | x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1) 338 | y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1) 339 | box_rois = np.hstack([y1, x1, y2, x2]) 340 | rois[rois_per_box * i:rois_per_box * (i + 1)] = box_rois 341 | 342 | # Generate random ROIs anywhere in the image (10% of count) 343 | remaining_count = count - (rois_per_box * gt_boxes.shape[0]) 344 | # To avoid generating boxes with zero area, we generate double what 345 | # we need and filter out the extra. If we get fewer valid boxes 346 | # than we need, we loop and try again. 347 | while True: 348 | y1y2 = np.random.randint(0, image_shape[0], (remaining_count * 2, 2)) 349 | x1x2 = np.random.randint(0, image_shape[1], (remaining_count * 2, 2)) 350 | # Filter out zero area boxes 351 | threshold = 1 352 | y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >= 353 | threshold][:remaining_count] 354 | x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >= 355 | threshold][:remaining_count] 356 | if y1y2.shape[0] == remaining_count and x1x2.shape[0] == remaining_count: 357 | break 358 | 359 | # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape 360 | # into x1, y1, x2, y2 order 361 | x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1) 362 | y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1) 363 | global_rois = np.hstack([y1, x1, y2, x2]) 364 | rois[-remaining_count:] = global_rois 365 | return rois 366 | 367 | 368 | def data_generator(dataset, config, shuffle=True, augment=True, random_rois=0, 369 | batch_size=1, detection_targets=False): 370 | """A generator that returns images and corresponding target class ids, 371 | bounding box deltas. 372 | 373 | dataset: The Dataset object to pick data from 374 | config: The model config object 375 | shuffle: If True, shuffles the samples before every epoch 376 | augment: If True, applies image augmentation to images (currently only 377 | horizontal flips are supported) 378 | random_rois: If > 0 then generate proposals to be used to train the 379 | network classifier. Useful if training 380 | the Mask RCNN part without the RPN. 381 | batch_size: How many images to return in each call 382 | detection_targets: If True, generate detection targets (class IDs, bbox 383 | deltas). Typically for debugging or visualizations because 384 | in trainig detection targets are generated by DetectionTargetLayer. 385 | 386 | Returns a Python generator. Upon calling next() on it, the 387 | generator returns two lists, inputs and outputs. The containtes 388 | of the lists differs depending on the received arguments: 389 | inputs list: 390 | - images: [batch, H, W, C] 391 | - image_meta: [batch, size of image meta] 392 | - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) 393 | - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. 394 | - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs 395 | - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] 396 | 397 | outputs list: Usually empty in regular training. But if detection_targets 398 | is True then the outputs list contains target class_ids, bbox deltas. 399 | """ 400 | b = 0 # batch item index 401 | image_index = -1 402 | image_ids = np.copy(dataset.image_ids) 403 | error_count = 0 404 | 405 | # Anchors 406 | # [anchor_count, (y1, x1, y2, x2)] 407 | anchors = KerasRFCN.Utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, 408 | config.RPN_ANCHOR_RATIOS, 409 | config.BACKBONE_SHAPES, 410 | config.BACKBONE_STRIDES, 411 | config.RPN_ANCHOR_STRIDE) 412 | 413 | # Keras requires a generator to run indefinately. 414 | while True: 415 | try: 416 | # Increment index to pick next image. Shuffle if at the start of an epoch. 417 | image_index = (image_index + 1) % len(image_ids) 418 | if shuffle and image_index == 0: 419 | np.random.shuffle(image_ids) 420 | 421 | # Get GT bounding boxes for image. 422 | image_id = image_ids[image_index] 423 | image, image_meta, gt_class_ids, gt_boxes = \ 424 | load_image_gt(dataset, config, image_id, augment=augment) 425 | 426 | # Skip images that have no instances. This can happen in cases 427 | # where we train on a subset of classes and the image doesn't 428 | # have any of the classes we care about. 429 | if not np.any(gt_class_ids > 0): 430 | continue 431 | 432 | # RPN Targets 433 | rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors, 434 | gt_class_ids, gt_boxes, config) 435 | 436 | # Mask R-CNN Targets 437 | if random_rois: 438 | rpn_rois = generate_random_rois( 439 | image.shape, random_rois, gt_class_ids, gt_boxes) 440 | if detection_targets: 441 | rois, mrcnn_class_ids, mrcnn_bbox =\ 442 | build_detection_targets( 443 | rpn_rois, gt_class_ids, gt_boxes, config) 444 | 445 | # Init batch arrays 446 | if b == 0: 447 | batch_image_meta = np.zeros( 448 | (batch_size,) + image_meta.shape, dtype=image_meta.dtype) 449 | batch_rpn_match = np.zeros( 450 | [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype) 451 | batch_rpn_bbox = np.zeros( 452 | [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype) 453 | batch_images = np.zeros( 454 | (batch_size,) + image.shape, dtype=np.float32) 455 | batch_gt_class_ids = np.zeros( 456 | (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32) 457 | batch_gt_boxes = np.zeros( 458 | (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32) 459 | 460 | if random_rois: 461 | batch_rpn_rois = np.zeros( 462 | (batch_size, rpn_rois.shape[0], 4), dtype=rpn_rois.dtype) 463 | if detection_targets: 464 | batch_rois = np.zeros( 465 | (batch_size,) + rois.shape, dtype=rois.dtype) 466 | batch_mrcnn_class_ids = np.zeros( 467 | (batch_size,) + mrcnn_class_ids.shape, dtype=mrcnn_class_ids.dtype) 468 | batch_mrcnn_bbox = np.zeros( 469 | (batch_size,) + mrcnn_bbox.shape, dtype=mrcnn_bbox.dtype) 470 | 471 | # If more instances than fits in the array, sub-sample from them. 472 | if gt_boxes.shape[0] > config.MAX_GT_INSTANCES: 473 | ids = np.random.choice( 474 | np.arange(gt_boxes.shape[0]), config.MAX_GT_INSTANCES, replace=False) 475 | gt_class_ids = gt_class_ids[ids] 476 | gt_boxes = gt_boxes[ids] 477 | 478 | # Add to batch 479 | batch_image_meta[b] = image_meta 480 | batch_rpn_match[b] = rpn_match[:, np.newaxis] 481 | batch_rpn_bbox[b] = rpn_bbox 482 | batch_images[b] = mold_image(image.astype(np.float32), config) 483 | batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids 484 | batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes 485 | if random_rois: 486 | batch_rpn_rois[b] = rpn_rois 487 | if detection_targets: 488 | batch_rois[b] = rois 489 | batch_mrcnn_class_ids[b] = mrcnn_class_ids 490 | batch_mrcnn_bbox[b] = mrcnn_bbox 491 | b += 1 492 | 493 | # Batch full? 494 | if b >= batch_size: 495 | inputs = [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox, 496 | batch_gt_class_ids, batch_gt_boxes] 497 | outputs = [] 498 | 499 | if random_rois: 500 | inputs.extend([batch_rpn_rois]) 501 | if detection_targets: 502 | inputs.extend([batch_rois]) 503 | # Keras requires that output and targets have the same number of dimensions 504 | batch_mrcnn_class_ids = np.expand_dims( 505 | batch_mrcnn_class_ids, -1) 506 | outputs.extend( 507 | [batch_mrcnn_class_ids, batch_mrcnn_bbox]) 508 | 509 | yield inputs, outputs 510 | 511 | # start a new batch 512 | b = 0 513 | except (GeneratorExit, KeyboardInterrupt): 514 | raise 515 | except: 516 | # Log it and skip the image 517 | logging.exception("Error processing image {}".format( 518 | dataset.image_info[image_id])) 519 | error_count += 1 520 | if error_count > 5: 521 | raise 522 | 523 | def compose_image_meta(image_id, image_shape, window, active_class_ids): 524 | """Takes attributes of an image and puts them in one 1D array. Use 525 | parse_image_meta() to parse the values back. 526 | 527 | image_id: An int ID of the image. Useful for debugging. 528 | image_shape: [height, width, channels] 529 | window: (y1, x1, y2, x2) in pixels. The area of the image where the real 530 | image is (excluding the padding) 531 | active_class_ids: List of class_ids available in the dataset from which 532 | the image came. Useful if training on images from multiple datasets 533 | where not all classes are present in all datasets. 534 | """ 535 | meta = np.array( 536 | [image_id] + # size=1 537 | list(image_shape) + # size=3 538 | list(window) + # size=4 (y1, x1, y2, x2) in image cooredinates 539 | list(active_class_ids) # size=num_classes 540 | ) 541 | return meta 542 | 543 | def mold_image(images, config): 544 | """Takes RGB images with 0-255 values and subtraces 545 | the mean pixel and converts it to float. Expects image 546 | colors in RGB order. 547 | """ 548 | return images.astype(np.float32) - config.MEAN_PIXEL -------------------------------------------------------------------------------- /KerasRFCN/Losses.py: -------------------------------------------------------------------------------- 1 | """ 2 | Keras RFCN 3 | Copyright (c) 2018 4 | Licensed under the MIT License (see LICENSE for details) 5 | Written by parap1uie-s@github.com 6 | """ 7 | 8 | import tensorflow as tf 9 | import keras.backend as K 10 | ############################################################ 11 | # Loss Functions 12 | ############################################################ 13 | 14 | def smooth_l1_loss(y_true, y_pred): 15 | """Implements Smooth-L1 loss. 16 | y_true and y_pred are typicallly: [N, 4], but could be any shape. 17 | """ 18 | diff = K.abs(y_true - y_pred) 19 | less_than_one = K.cast(K.less(diff, 1.0), "float32") 20 | loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5) 21 | return loss 22 | 23 | 24 | def rpn_class_loss_graph(rpn_match, rpn_class_logits): 25 | """RPN anchor classifier loss. 26 | 27 | rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, 28 | -1=negative, 0=neutral anchor. 29 | rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for FG/BG. 30 | """ 31 | # Squeeze last dim to simplify 32 | rpn_match = tf.squeeze(rpn_match, -1) 33 | # Get anchor classes. Convert the -1/+1 match to 0/1 values. 34 | anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32) 35 | # Positive and Negative anchors contribute to the loss, 36 | # but neutral anchors (match value = 0) don't. 37 | indices = tf.where(K.not_equal(rpn_match, 0)) 38 | # Pick rows that contribute to the loss and filter out the rest. 39 | rpn_class_logits = tf.gather_nd(rpn_class_logits, indices) 40 | anchor_class = tf.gather_nd(anchor_class, indices) 41 | # Crossentropy loss 42 | loss = K.sparse_categorical_crossentropy(target=anchor_class, 43 | output=rpn_class_logits, 44 | from_logits=True) 45 | loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) 46 | return loss 47 | 48 | 49 | def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox): 50 | """Return the RPN bounding box loss graph. 51 | 52 | config: the model config object. 53 | target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))]. 54 | Uses 0 padding to fill in unsed bbox deltas. 55 | rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, 56 | -1=negative, 0=neutral anchor. 57 | rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))] 58 | """ 59 | # Positive anchors contribute to the loss, but negative and 60 | # neutral anchors (match value of 0 or -1) don't. 61 | rpn_match = K.squeeze(rpn_match, -1) 62 | indices = tf.where(K.equal(rpn_match, 1)) 63 | 64 | # Pick bbox deltas that contribute to the loss 65 | rpn_bbox = tf.gather_nd(rpn_bbox, indices) 66 | 67 | # Trim target bounding box deltas to the same length as rpn_bbox. 68 | batch_counts = K.sum(K.cast(K.equal(rpn_match, 1), tf.int32), axis=1) 69 | target_bbox = batch_pack_graph(target_bbox, batch_counts, 70 | config.IMAGES_PER_GPU) 71 | 72 | # TODO: use smooth_l1_loss() rather than reimplementing here 73 | # to reduce code duplication 74 | diff = K.abs(target_bbox - rpn_bbox) 75 | less_than_one = K.cast(K.less(diff, 1.0), "float32") 76 | loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5) 77 | 78 | loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) 79 | return loss 80 | 81 | 82 | def mrcnn_class_loss_graph(target_class_ids, pred_class_logits, 83 | active_class_ids): 84 | """Loss for the classifier head of Mask RCNN. 85 | 86 | target_class_ids: [batch, num_rois]. Integer class IDs. Uses zero 87 | padding to fill in the array. 88 | pred_class_logits: [batch, num_rois, num_classes] 89 | active_class_ids: [batch, num_classes]. Has a value of 1 for 90 | classes that are in the dataset of the image, and 0 91 | for classes that are not in the dataset. 92 | """ 93 | target_class_ids = tf.cast(target_class_ids, 'int64') 94 | 95 | # Find predictions of classes that are not in the dataset. 96 | pred_class_ids = tf.argmax(pred_class_logits, axis=2) 97 | # TODO: Update this line to work with batch > 1. Right now it assumes all 98 | # images in a batch have the same active_class_ids 99 | pred_active = tf.gather(active_class_ids[0], pred_class_ids) 100 | 101 | # Loss 102 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits( 103 | labels=target_class_ids, logits=pred_class_logits) 104 | 105 | # Erase losses of predictions of classes that are not in the active 106 | # classes of the image. 107 | loss = loss * pred_active 108 | 109 | # Computer loss mean. Use only predictions that contribute 110 | # to the loss to get a correct mean. 111 | loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active) 112 | return loss 113 | 114 | 115 | def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox): 116 | """Loss for Mask R-CNN bounding box refinement. 117 | 118 | target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))] 119 | target_class_ids: [batch, num_rois]. Integer class IDs. 120 | pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))] 121 | """ 122 | # Reshape to merge batch and roi dimensions for simplicity. 123 | target_class_ids = K.reshape(target_class_ids, (-1,)) 124 | target_bbox = K.reshape(target_bbox, (-1, 4)) 125 | pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4)) 126 | 127 | # Only positive ROIs contribute to the loss. And only 128 | # the right class_id of each ROI. Get their indicies. 129 | positive_roi_ix = tf.where(target_class_ids > 0)[:, 0] 130 | positive_roi_class_ids = tf.cast( 131 | tf.gather(target_class_ids, positive_roi_ix), tf.int64) 132 | indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1) 133 | 134 | # Gather the deltas (predicted and true) that contribute to loss 135 | target_bbox = tf.gather(target_bbox, positive_roi_ix) 136 | pred_bbox = tf.gather_nd(pred_bbox, indices) 137 | 138 | # Smooth-L1 Loss 139 | loss = K.switch(tf.size(target_bbox) > 0, 140 | smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox), 141 | tf.constant(0.0)) 142 | loss = K.mean(loss) 143 | loss = K.reshape(loss, [1, 1]) 144 | return loss 145 | 146 | def batch_pack_graph(x, counts, num_rows): 147 | """Picks different number of values from each row 148 | in x depending on the values in counts. 149 | """ 150 | outputs = [] 151 | for i in range(num_rows): 152 | outputs.append(x[i, :counts[i]]) 153 | return tf.concat(outputs, axis=0) -------------------------------------------------------------------------------- /KerasRFCN/Model/BaseModel.py: -------------------------------------------------------------------------------- 1 | """ 2 | Keras RFCN 3 | Copyright (c) 2018 4 | Licensed under the MIT License (see LICENSE for details) 5 | Written by parap1uie-s@github.com 6 | """ 7 | 8 | ''' 9 | This is base class of RFCN Model 10 | Contain some functions like load_weights、find_last...etc 11 | ''' 12 | 13 | import re 14 | import keras 15 | import tensorflow as tf 16 | import datetime 17 | from KerasRFCN.Data_generator import data_generator 18 | import os 19 | import KerasRFCN.Utils 20 | import numpy as np 21 | 22 | class BaseModel(object): 23 | """docstring for BaseModel""" 24 | def __init__(self, arg): 25 | super(BaseModel, self).__init__() 26 | self.arg = arg 27 | 28 | def find_last(self): 29 | """Finds the last checkpoint file of the last trained model in the 30 | model directory. 31 | Returns: 32 | log_dir: The directory where events and weights are saved 33 | checkpoint_path: the path to the last checkpoint file 34 | """ 35 | # Get directory names. Each directory corresponds to a model 36 | dir_names = next(os.walk(self.model_dir))[1] 37 | key = self.config.NAME.lower() 38 | dir_names = filter(lambda f: f.startswith(key), dir_names) 39 | dir_names = sorted(dir_names) 40 | if not dir_names: 41 | return None, None 42 | # Pick last directory 43 | dir_name = os.path.join(self.model_dir, dir_names[-1]) 44 | # Find the last checkpoint 45 | checkpoints = next(os.walk(dir_name))[2] 46 | checkpoints = filter(lambda f: f.startswith("Keras-RFCN"), checkpoints) 47 | checkpoints = sorted(checkpoints) 48 | if not checkpoints: 49 | return dir_name, None 50 | checkpoint = os.path.join(dir_name, checkpoints[-1]) 51 | return dir_name, checkpoint 52 | 53 | def load_weights(self, filepath, by_name=False, exclude=None): 54 | """Modified version of the correspoding Keras function with 55 | the addition of multi-GPU support and the ability to exclude 56 | some layers from loading. 57 | exlude: list of layer names to excluce 58 | """ 59 | import h5py 60 | # Keras 2.2 use saving 61 | try: 62 | from keras.engine import saving 63 | except ImportError: 64 | # Keras before 2.2 used the 'topology' namespace. 65 | from keras.engine import topology as saving 66 | 67 | if exclude: 68 | by_name = True 69 | 70 | if h5py is None: 71 | raise ImportError('`load_weights` requires h5py.') 72 | f = h5py.File(filepath, mode='r') 73 | if 'layer_names' not in f.attrs and 'model_weights' in f: 74 | f = f['model_weights'] 75 | 76 | # In multi-GPU training, we wrap the model. Get layers 77 | # of the inner model because they have the weights. 78 | keras_model = self.keras_model 79 | layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\ 80 | else keras_model.layers 81 | 82 | # Exclude some layers 83 | if exclude: 84 | layers = filter(lambda l: l.name not in exclude, layers) 85 | 86 | if by_name: 87 | saving.load_weights_from_hdf5_group_by_name(f, layers) 88 | else: 89 | saving.load_weights_from_hdf5_group(f, layers) 90 | if hasattr(f, 'close'): 91 | f.close() 92 | 93 | # Update the log directory 94 | self.set_log_dir(filepath) 95 | 96 | def get_imagenet_weights(self): 97 | """Downloads ImageNet trained weights from Keras. 98 | Returns path to weights file. 99 | """ 100 | from keras.utils.data_utils import get_file 101 | TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/'\ 102 | 'releases/download/v0.2/'\ 103 | 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 104 | weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', 105 | TF_WEIGHTS_PATH_NO_TOP, 106 | cache_subdir='models', 107 | md5_hash='a268eb855778b3df3c7506639542a6af') 108 | return weights_path 109 | 110 | def compile(self, learning_rate, momentum): 111 | """Gets the model ready for training. Adds losses, regularization, and 112 | metrics. Then calls the Keras compile() function. 113 | """ 114 | # Optimizer object 115 | optimizer = keras.optimizers.SGD(lr=learning_rate, momentum=momentum, clipnorm=5.0) 116 | # Add Losses 117 | # First, clear previously set losses to avoid duplication 118 | self.keras_model._losses = [] 119 | self.keras_model._per_input_losses = {} 120 | loss_names = ["rpn_class_loss", "rpn_bbox_loss", 121 | "mrcnn_class_loss", "mrcnn_bbox_loss"] 122 | for name in loss_names: 123 | layer = self.keras_model.get_layer(name) 124 | if layer.output in self.keras_model.losses: 125 | continue 126 | self.keras_model.add_loss( 127 | tf.reduce_mean(layer.output, keepdims=True)) 128 | 129 | # Add L2 Regularization 130 | # Skip gamma and beta weights of batch normalization layers. 131 | reg_losses = [keras.regularizers.l2(self.config.WEIGHT_DECAY)(w) / tf.cast(tf.size(w), tf.float32) 132 | for w in self.keras_model.trainable_weights 133 | if 'gamma' not in w.name and 'beta' not in w.name] 134 | self.keras_model.add_loss(tf.add_n(reg_losses)) 135 | 136 | # Compile 137 | self.keras_model.compile(optimizer=optimizer, loss=[ 138 | None] * len(self.keras_model.outputs)) 139 | 140 | # Add metrics for losses 141 | for name in loss_names: 142 | if name in self.keras_model.metrics_names: 143 | continue 144 | layer = self.keras_model.get_layer(name) 145 | self.keras_model.metrics_names.append(name) 146 | self.keras_model.metrics_tensors.append(tf.reduce_mean( 147 | layer.output, keepdims=True)) 148 | 149 | def set_trainable(self, layer_regex, keras_model=None, indent=0, verbose=1): 150 | """Sets model layers as trainable if their names match 151 | the given regular expression. 152 | """ 153 | # Print message on the first call (but not on recursive calls) 154 | if verbose > 0 and keras_model is None: 155 | print("Selecting layers to train") 156 | 157 | keras_model = keras_model or self.keras_model 158 | 159 | # In multi-GPU training, we wrap the model. Get layers 160 | # of the inner model because they have the weights. 161 | layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\ 162 | else keras_model.layers 163 | 164 | for layer in layers: 165 | # Is the layer a model? 166 | if layer.__class__.__name__ == 'Model': 167 | print("In model: ", layer.name) 168 | self.set_trainable( 169 | layer_regex, keras_model=layer, indent=indent + 4) 170 | continue 171 | 172 | if not layer.weights: 173 | continue 174 | # Is it trainable? 175 | trainable = bool(re.fullmatch(layer_regex, layer.name)) 176 | # Update layer. If layer is a container, update inner layer. 177 | if layer.__class__.__name__ == 'TimeDistributed': 178 | layer.layer.trainable = trainable 179 | else: 180 | layer.trainable = trainable 181 | # Print trainble layer names 182 | if trainable and verbose > 0: 183 | print("{}{:20} ({})".format(" " * indent, layer.name, 184 | layer.__class__.__name__)) 185 | 186 | def set_log_dir(self, model_path=None): 187 | """Sets the model log directory and epoch counter. 188 | 189 | model_path: If None, or a format different from what this code uses 190 | then set a new log directory and start epochs from 0. Otherwise, 191 | extract the log directory and the epoch counter from the file 192 | name. 193 | """ 194 | # Set date and epoch counter as if starting a new model 195 | self.epoch = 0 196 | now = datetime.datetime.now() 197 | 198 | # If we have a model path with date and epochs use them 199 | if model_path: 200 | # Continue from we left of. Get epoch and date from the file name 201 | # A sample model path might look like: 202 | regex = r".*/\w+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})/Keras-RFCN\_\w+(\d{4})\.h5" 203 | m = re.match(regex, model_path) 204 | if m: 205 | now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)), 206 | int(m.group(4)), int(m.group(5))) 207 | self.epoch = int(m.group(6)) + 1 208 | 209 | # Directory for training logs 210 | self.log_dir = os.path.join(self.model_dir, "{}{:%Y%m%dT%H%M}".format( 211 | self.config.NAME.lower(), now)) 212 | 213 | # Path to save after each epoch. Include placeholders that get filled by Keras. 214 | self.checkpoint_path = os.path.join(self.log_dir, "Keras-RFCN_{}_*epoch*.h5".format( 215 | self.config.NAME.lower())) 216 | self.checkpoint_path = self.checkpoint_path.replace( 217 | "*epoch*", "{epoch:04d}") 218 | 219 | def train(self, train_dataset, val_dataset, learning_rate, epochs, layers): 220 | """Train the model. 221 | train_dataset, val_dataset: Training and validation Dataset objects. 222 | learning_rate: The learning rate to train with 223 | epochs: Number of training epochs. Note that previous training epochs 224 | are considered to be done alreay, so this actually determines 225 | the epochs to train in total rather than in this particaular 226 | call. 227 | layers: Allows selecting wich layers to train. It can be: 228 | - A regular expression to match layer names to train 229 | - One of these predefined values: 230 | heaads: The RPN, classifier and mask heads of the network 231 | all: All the layers 232 | 3+: Train Resnet stage 3 and up 233 | 4+: Train Resnet stage 4 and up 234 | 5+: Train Resnet stage 5 and up 235 | """ 236 | assert self.mode == "training", "Create model in training mode." 237 | 238 | # Pre-defined layer regular expressions 239 | layer_regex = { 240 | # all layers but the backbone 241 | "heads": r"(mrcnn\_.*)|(rpn\_.*)|(score_map\_.*)|(regr\_.*)|(classify\_.*)", 242 | # From a specific Resnet stage and up 243 | "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(res6.*)|(bn6.*)|(mrcnn\_.*)|(rpn\_.*)|(score_map\_.*)|(regr\_.*)|(classify\_.*)", 244 | "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(res6.*)|(bn6.*)|(mrcnn\_.*)|(rpn\_.*)|(score_map\_.*)|(regr\_.*)|(classify\_.*)", 245 | "5+": r"(res5.*)|(bn5.*)|(res6.*)|(bn6.*)|(mrcnn\_.*)|(rpn\_.*)|(score_map\_.*)|(regr\_.*)|(classify\_.*)", 246 | # All layers 247 | "all": ".*", 248 | } 249 | if layers in layer_regex.keys(): 250 | layers = layer_regex[layers] 251 | 252 | # Data generators 253 | train_generator = data_generator(train_dataset, self.config, shuffle=True, 254 | batch_size=self.config.BATCH_SIZE) 255 | val_generator = data_generator(val_dataset, self.config, shuffle=True, 256 | batch_size=self.config.BATCH_SIZE, 257 | augment=False) 258 | 259 | # Callbacks 260 | callbacks = [ 261 | keras.callbacks.TensorBoard(log_dir=self.log_dir, 262 | histogram_freq=0, write_graph=True, write_images=False), 263 | keras.callbacks.ModelCheckpoint(self.checkpoint_path, 264 | verbose=0, save_weights_only=True, save_best_only=True), 265 | keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.01, patience=10, verbose=1, mode='auto', min_delta=0.001, min_lr=0) 266 | ] 267 | 268 | # Train 269 | print("\nStarting at epoch {}. LR={}\n".format(self.epoch, learning_rate)) 270 | print("Checkpoint Path: {}".format(self.checkpoint_path)) 271 | self.set_trainable(layers, verbose=0) 272 | self.compile(learning_rate, self.config.LEARNING_MOMENTUM) 273 | 274 | # Work-around for Windows: Keras fails on Windows when using 275 | # multiprocessing workers. See discussion here: 276 | # https://github.com/matterport/Mask_RCNN/issues/13#issuecomment-353124009 277 | if os.name is 'nt': 278 | workers = 0 279 | else: 280 | workers = max(self.config.BATCH_SIZE // 2, 2) 281 | 282 | self.keras_model.fit_generator( 283 | train_generator, 284 | initial_epoch=self.epoch, 285 | epochs=epochs, 286 | steps_per_epoch=self.config.STEPS_PER_EPOCH, 287 | callbacks=callbacks, 288 | validation_data=next(val_generator), 289 | validation_steps=self.config.VALIDATION_STEPS, 290 | max_queue_size=100, 291 | workers=4, 292 | use_multiprocessing=True, 293 | ) 294 | self.epoch = max(self.epoch, epochs) 295 | 296 | def detect(self, images, verbose=0): 297 | """Runs the detection pipeline. 298 | 299 | images: List of images, potentially of different sizes. 300 | 301 | Returns a list of dicts, one dict per image. The dict contains: 302 | rois: [N, (y1, x1, y2, x2)] detection bounding boxes 303 | class_ids: [N] int class IDs 304 | scores: [N] float probability scores for the class IDs 305 | """ 306 | assert self.mode == "inference", "Create model in inference mode." 307 | assert len( 308 | images) == self.config.BATCH_SIZE, "len(images) must be equal to BATCH_SIZE" 309 | 310 | if verbose: 311 | print("Processing {} images".format(len(images))) 312 | 313 | # Mold inputs to format expected by the neural network 314 | molded_images, image_metas, windows = self.mold_inputs(images) 315 | 316 | # Run object detection 317 | detections, mrcnn_class, mrcnn_bbox, \ 318 | rois, rpn_class, rpn_bbox =\ 319 | self.keras_model.predict([molded_images, image_metas], verbose=0) 320 | 321 | # Process detections 322 | results = [] 323 | for i, image in enumerate(images): 324 | final_rois, final_class_ids, final_scores =\ 325 | self.unmold_detections(detections[i], image.shape, windows[i]) 326 | results.append({ 327 | "rois": final_rois, 328 | "class_ids": final_class_ids, 329 | "scores": final_scores 330 | }) 331 | return results 332 | 333 | def mold_inputs(self, images): 334 | """Takes a list of images and modifies them to the format expected 335 | as an input to the neural network. 336 | images: List of image matricies [height,width,depth]. Images can have 337 | different sizes. 338 | 339 | Returns 3 Numpy matricies: 340 | molded_images: [N, h, w, 3]. Images resized and normalized. 341 | image_metas: [N, length of meta data]. Details about each image. 342 | windows: [N, (y1, x1, y2, x2)]. The portion of the image that has the 343 | original image (padding excluded). 344 | """ 345 | molded_images = [] 346 | image_metas = [] 347 | windows = [] 348 | for image in images: 349 | # Resize image to fit the model expected size 350 | # TODO: move resizing to mold_image() 351 | molded_image, window, scale, padding = KerasRFCN.Utils.resize_image( 352 | image, 353 | min_dim=self.config.IMAGE_MIN_DIM, 354 | max_dim=self.config.IMAGE_MAX_DIM, 355 | padding=self.config.IMAGE_PADDING) 356 | molded_image = KerasRFCN.Utils.mold_image(molded_image, self.config) 357 | # Build image_meta 358 | image_meta = KerasRFCN.Utils.compose_image_meta( 359 | 0, image.shape, window, 360 | np.zeros([self.config.NUM_CLASSES], dtype=np.int32)) 361 | # Append 362 | molded_images.append(molded_image) 363 | windows.append(window) 364 | image_metas.append(image_meta) 365 | # Pack into arrays 366 | molded_images = np.stack(molded_images) 367 | image_metas = np.stack(image_metas) 368 | windows = np.stack(windows) 369 | return molded_images, image_metas, windows 370 | 371 | def unmold_detections(self, detections, image_shape, window): 372 | """Reformats the detections of one image from the format of the neural 373 | network output to a format suitable for use in the rest of the 374 | application. 375 | 376 | detections: [N, (y1, x1, y2, x2, class_id, score)] 377 | image_shape: [height, width, depth] Original size of the image before resizing 378 | window: [y1, x1, y2, x2] Box in the image where the real image is 379 | excluding the padding. 380 | 381 | Returns: 382 | boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels 383 | class_ids: [N] Integer class IDs for each bounding box 384 | scores: [N] Float probability scores of the class_id 385 | """ 386 | # How many detections do we have? 387 | # Detections array is padded with zeros. Find the first class_id == 0. 388 | zero_ix = np.where(detections[:, 4] == 0)[0] 389 | N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0] 390 | 391 | # Extract boxes, class_ids, scores 392 | boxes = detections[:N, :4] 393 | class_ids = detections[:N, 4].astype(np.int32) 394 | scores = detections[:N, 5] 395 | 396 | # Compute scale and shift to translate coordinates to image domain. 397 | h_scale = image_shape[0] / (window[2] - window[0]) 398 | w_scale = image_shape[1] / (window[3] - window[1]) 399 | scale = min(h_scale, w_scale) 400 | shift = window[:2] # y, x 401 | scales = np.array([scale, scale, scale, scale]) 402 | shifts = np.array([shift[0], shift[1], shift[0], shift[1]]) 403 | 404 | # Translate bounding boxes to image domain 405 | boxes = np.multiply(boxes - shifts, scales).astype(np.int32) 406 | 407 | # Filter out detections with zero area. Often only happens in early 408 | # stages of training when the network weights are still a bit random. 409 | exclude_ix = np.where( 410 | (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0] 411 | if exclude_ix.shape[0] > 0: 412 | boxes = np.delete(boxes, exclude_ix, axis=0) 413 | class_ids = np.delete(class_ids, exclude_ix, axis=0) 414 | scores = np.delete(scores, exclude_ix, axis=0) 415 | N = class_ids.shape[0] 416 | 417 | return boxes, class_ids, scores 418 | -------------------------------------------------------------------------------- /KerasRFCN/Model/Model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Keras RFCN 3 | Copyright (c) 2018 4 | Licensed under the MIT License (see LICENSE for details) 5 | Written by parap1uie-s@github.com 6 | """ 7 | 8 | ''' 9 | This is Main class of RFCN Model 10 | Contain the model's framework and call the backbone 11 | ''' 12 | 13 | from KerasRFCN.Model.ResNet import ResNet 14 | from KerasRFCN.Model.ResNet_dilated import ResNet_dilated 15 | from KerasRFCN.Model.BaseModel import BaseModel 16 | import KerasRFCN.Utils 17 | import KerasRFCN.Losses 18 | 19 | import keras.layers as KL 20 | import keras.engine as KE 21 | import tensorflow as tf 22 | import numpy as np 23 | import keras 24 | import keras.backend as K 25 | import keras.models as KM 26 | 27 | class RFCN_Model(BaseModel): 28 | """docstring for RFCN_Model""" 29 | def __init__(self, mode, config, model_dir): 30 | """ 31 | mode: Either "training" or "inference" 32 | config: A Sub-class of the Config class 33 | model_dir: Directory to save training logs and trained weights 34 | """ 35 | assert mode in ['training', 'inference'] 36 | assert config.BACKBONE in ['resnet50', 'resnet101', 'resnet50_dilated', 'resnet101_dilated'] 37 | 38 | self.mode = mode 39 | self.config = config 40 | self.model_dir = model_dir 41 | self.set_log_dir() 42 | self.keras_model = self.build(mode=mode, config=config) 43 | 44 | def build(self, mode, config): 45 | assert mode in ['training', 'inference'] 46 | 47 | h, w = config.IMAGE_SHAPE[:2] 48 | if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): 49 | raise Exception("Image size must be dividable by 2 at least 6 times " 50 | "to avoid fractions when downscaling and upscaling." 51 | "For example, use 256, 320, 384, 448, 512, ... etc. ") 52 | # Inputs 53 | input_image = KL.Input( 54 | shape=config.IMAGE_SHAPE.tolist(), name="input_image") 55 | input_image_meta = KL.Input(shape=[None], name="input_image_meta") 56 | if mode == "training": 57 | # RPN GT 58 | input_rpn_match = KL.Input( 59 | shape=[None, 1], name="input_rpn_match", dtype=tf.int32) 60 | input_rpn_bbox = KL.Input( 61 | shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32) 62 | 63 | # Detection GT (class IDs, bounding boxes) 64 | # 1. GT Class IDs (zero padded) 65 | input_gt_class_ids = KL.Input( 66 | shape=[None], name="input_gt_class_ids", dtype=tf.int32) 67 | # 2. GT Boxes in pixels (zero padded) 68 | # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates 69 | input_gt_boxes = KL.Input( 70 | shape=[None, 4], name="input_gt_boxes", dtype=tf.float32) 71 | # Normalize coordinates 72 | h, w = K.shape(input_image)[1], K.shape(input_image)[2] 73 | image_scale = K.cast(K.stack([h, w, h, w], axis=0), tf.float32) 74 | gt_boxes = KL.Lambda(lambda x: x / image_scale)(input_gt_boxes) 75 | 76 | if config.BACKBONE in ['resnet50', 'resnet101']: 77 | P2, P3, P4, P5, P6 = ResNet(input_image, architecture=config.BACKBONE).output_layers 78 | else: 79 | P2, P3, P4, P5, P6 = ResNet_dilated(input_image, architecture=config.BACKBONE).output_layers 80 | 81 | # Note that P6 is used in RPN, but not in the classifier heads. 82 | rpn_feature_maps = [P2, P3, P4, P5, P6] 83 | mrcnn_feature_maps = [P2, P3, P4, P5] 84 | 85 | ### RPN ### 86 | rpn = self.build_rpn_model(config.RPN_ANCHOR_STRIDE, 87 | len(config.RPN_ANCHOR_RATIOS), 256) 88 | # Loop through pyramid layers 89 | layer_outputs = [] # list of lists 90 | for p in rpn_feature_maps: 91 | layer_outputs.append(rpn([p])) 92 | # Concatenate layer outputs 93 | # Convert from list of lists of level outputs to list of lists 94 | # of outputs across levels. 95 | # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]] 96 | output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"] 97 | outputs = list(zip(*layer_outputs)) 98 | outputs = [KL.Concatenate(axis=1, name=n)(list(o)) 99 | for o, n in zip(outputs, output_names)] 100 | 101 | rpn_class_logits, rpn_class, rpn_bbox = outputs 102 | 103 | self.anchors = KerasRFCN.Utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, 104 | config.RPN_ANCHOR_RATIOS, 105 | config.BACKBONE_SHAPES, 106 | config.BACKBONE_STRIDES, 107 | config.RPN_ANCHOR_STRIDE) 108 | # window size K and total classed num C 109 | # Example: For coco, C = 80+1 110 | scoreMapSize = config.K * config.K 111 | ScoreMaps_classify = [] 112 | for feature_map_count, feature_map in enumerate(mrcnn_feature_maps): 113 | # [W * H * class_num] * k^2 114 | ScoreMap = KL.Conv2D(config.C * scoreMapSize, kernel_size=(1,1), name="score_map_class_{}".format(feature_map_count), padding='valid')(feature_map) 115 | ScoreMaps_classify.append(ScoreMap) 116 | 117 | ScoreMaps_regr = [] 118 | for feature_map_count, feature_map in enumerate(mrcnn_feature_maps): 119 | # [W * H * 4] * k^2 ==> 4 = (x,y,w,h) 120 | ScoreMap = KL.Conv2D(4 * scoreMapSize, kernel_size=(1,1), name="score_map_regr_{}".format(feature_map_count), padding='valid')(feature_map) 121 | ScoreMaps_regr.append(ScoreMap) 122 | 123 | # Generate proposals 124 | # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates 125 | # and zero padded. 126 | proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training"\ 127 | else config.POST_NMS_ROIS_INFERENCE 128 | rpn_rois = ProposalLayer(proposal_count=proposal_count, 129 | nms_threshold=config.RPN_NMS_THRESHOLD, 130 | name="ROI", 131 | anchors=self.anchors, 132 | config=config)([rpn_class, rpn_bbox]) 133 | 134 | if mode == "training": 135 | # Class ID mask to mark class IDs supported by the dataset the image 136 | # came from. 137 | _, _, _, active_class_ids = KL.Lambda(lambda x: parse_image_meta_graph(x))(input_image_meta) 138 | 139 | # Generate detection targets 140 | # Subsamples proposals and generates target outputs for training 141 | # Note that proposal class IDs, gt_boxes, and gt_masks are zero 142 | # padded. Equally, returned rois and targets are zero padded. 143 | rois, target_class_ids, target_bbox =\ 144 | DetectionTargetLayer(config, name="proposal_targets")([ 145 | rpn_rois, input_gt_class_ids, gt_boxes]) 146 | 147 | # size = [batch, num_rois, class_num] 148 | classify_vote = VotePooling(config.TRAIN_ROIS_PER_IMAGE, config.C, config.K, config.POOL_SIZE, config.BATCH_SIZE, config.IMAGE_SHAPE, name="classify_vote")([rois] + ScoreMaps_classify) 149 | classify_output = KL.TimeDistributed(KL.Activation('softmax'),name="classify_output")(classify_vote) 150 | 151 | # 4 k^2 rather than 4k^2*C 152 | regr_vote = VotePooling(config.TRAIN_ROIS_PER_IMAGE, 4, config.K, config.POOL_SIZE, config.BATCH_SIZE, config.IMAGE_SHAPE, name="regr_vote")([rois] + ScoreMaps_regr) 153 | regr_output = KL.TimeDistributed(KL.Activation('linear'),name="regr_output")(regr_vote) 154 | 155 | rpn_class_loss = KL.Lambda(lambda x: KerasRFCN.Losses.rpn_class_loss_graph(*x), name="rpn_class_loss")( 156 | [input_rpn_match, rpn_class_logits]) 157 | rpn_bbox_loss = KL.Lambda(lambda x: KerasRFCN.Losses.rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")( 158 | [input_rpn_bbox, input_rpn_match, rpn_bbox]) 159 | class_loss = KL.Lambda(lambda x: KerasRFCN.Losses.mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")( 160 | [target_class_ids, classify_vote, active_class_ids]) 161 | bbox_loss = KL.Lambda(lambda x: KerasRFCN.Losses.mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")( 162 | [target_bbox, target_class_ids, regr_output]) 163 | 164 | inputs = [input_image, input_image_meta, 165 | input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes] 166 | 167 | outputs = [rpn_class_logits, rpn_class, rpn_bbox, 168 | classify_vote, classify_output, regr_output, 169 | rpn_rois, rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss] 170 | 171 | keras_model = KM.Model(inputs, outputs, name='rfcn_train') 172 | else: # inference 173 | 174 | # Network Heads 175 | # Proposal classifier and BBox regressor heads 176 | # size = [batch, num_rois, class_num] 177 | classify_vote = VotePooling(proposal_count, config.C, config.K, config.POOL_SIZE, config.BATCH_SIZE, config.IMAGE_SHAPE, name="classify_vote")([rpn_rois] + ScoreMaps_classify) 178 | classify_output = KL.TimeDistributed(KL.Activation('softmax'),name="classify_output")(classify_vote) 179 | 180 | # 4 k^2 rather than 4k^2*C 181 | regr_vote = VotePooling(proposal_count, 4, config.K, config.POOL_SIZE, config.BATCH_SIZE, config.IMAGE_SHAPE, name="regr_vote")([rpn_rois] + ScoreMaps_regr) 182 | regr_output = KL.TimeDistributed(KL.Activation('linear'),name="regr_output")(regr_vote) 183 | 184 | # Detections 185 | # output is [batch, num_detections, (y1, x1, y2, x2, score)] in image coordinates 186 | detections = DetectionLayer(config, name="mrcnn_detection")( 187 | [rpn_rois, classify_output, regr_output, input_image_meta]) 188 | 189 | keras_model = KM.Model([input_image, input_image_meta], 190 | [detections, classify_output, regr_output, rpn_rois, rpn_class, rpn_bbox], 191 | name='rfcn_inference') 192 | return keras_model 193 | 194 | def build_rpn_model(self, anchor_stride, anchors_per_location, depth): 195 | """Builds a Keras model of the Region Proposal Network. 196 | It wraps the RPN graph so it can be used multiple times with shared 197 | weights. 198 | 199 | anchors_per_location: number of anchors per pixel in the feature map 200 | anchor_stride: Controls the density of anchors. Typically 1 (anchors for 201 | every pixel in the feature map), or 2 (every other pixel). 202 | depth: Depth of the backbone feature map. 203 | 204 | Returns a Keras Model object. The model outputs, when called, are: 205 | rpn_logits: [batch, H, W, 2] Anchor classifier logits (before softmax) 206 | rpn_probs: [batch, W, W, 2] Anchor classifier probabilities. 207 | rpn_bbox: [batch, H, W, (dy, dx, log(dh), log(dw))] Deltas to be 208 | applied to anchors. 209 | """ 210 | input_feature_map = KL.Input(shape=[None, None, depth], 211 | name="input_rpn_feature_map") 212 | outputs = self.rpn(input_feature_map, anchors_per_location, anchor_stride) 213 | return KM.Model([input_feature_map], outputs, name="rpn_model") 214 | 215 | def rpn(self, feature_map, anchors_per_location, anchor_stride): 216 | """Builds a Keras model of the Region Proposal Network. 217 | It wraps the RPN graph so it can be used multiple times with shared 218 | weights. 219 | 220 | anchors_per_location: number of anchors per pixel in the feature map 221 | anchor_stride: Controls the density of anchors. Typically 1 (anchors for 222 | every pixel in the feature map), or 2 (every other pixel). 223 | depth: Depth of the backbone feature map. 224 | 225 | Returns a Keras Model object. The model outputs, when called, are: 226 | rpn_logits: [batch, H, W, 2] Anchor classifier logits (before softmax) 227 | rpn_probs: [batch, W, W, 2] Anchor classifier probabilities. 228 | rpn_bbox: [batch, H, W, (dy, dx, log(dh), log(dw))] Deltas to be 229 | applied to anchors. 230 | """ 231 | 232 | shared = KL.Conv2D(512, (3, 3), padding='same', activation='relu', 233 | strides=anchor_stride, 234 | name='rpn_conv_shared')(feature_map) 235 | 236 | # Anchor Score. [batch, height, width, anchors per location * 2]. 237 | x = KL.Conv2D(2 * anchors_per_location, (1, 1), padding='valid', 238 | activation='linear', name='rpn_class_raw')(shared) 239 | 240 | # Reshape to [batch, anchors, 2] 241 | rpn_class_logits = KL.Lambda( 242 | lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 2]))(x) 243 | 244 | # Softmax on last dimension of BG/FG. 245 | rpn_probs = KL.Activation( 246 | "softmax", name="rpn_class_xxx")(rpn_class_logits) 247 | 248 | # Bounding box refinement. [batch, H, W, anchors per location, depth] 249 | # where depth is [x, y, log(w), log(h)] 250 | x = KL.Conv2D(anchors_per_location * 4, (1, 1), padding="valid", 251 | activation='linear', name='rpn_bbox_pred')(shared) 252 | 253 | # Reshape to [batch, anchors, 4] 254 | rpn_bbox = KL.Lambda(lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 4]))(x) 255 | 256 | return rpn_class_logits, rpn_probs, rpn_bbox 257 | 258 | 259 | ############################################################ 260 | # Proposal Layer 261 | ############################################################ 262 | 263 | def apply_box_deltas_graph(boxes, deltas): 264 | """Applies the given deltas to the given boxes. 265 | boxes: [N, 4] where each row is y1, x1, y2, x2 266 | deltas: [N, 4] where each row is [dy, dx, log(dh), log(dw)] 267 | """ 268 | # Convert to y, x, h, w 269 | height = boxes[:, 2] - boxes[:, 0] 270 | width = boxes[:, 3] - boxes[:, 1] 271 | center_y = boxes[:, 0] + 0.5 * height 272 | center_x = boxes[:, 1] + 0.5 * width 273 | # Apply deltas 274 | center_y += deltas[:, 0] * height 275 | center_x += deltas[:, 1] * width 276 | height *= tf.exp(deltas[:, 2]) 277 | width *= tf.exp(deltas[:, 3]) 278 | # Convert back to y1, x1, y2, x2 279 | y1 = center_y - 0.5 * height 280 | x1 = center_x - 0.5 * width 281 | y2 = y1 + height 282 | x2 = x1 + width 283 | result = tf.stack([y1, x1, y2, x2], axis=1, name="apply_box_deltas_out") 284 | return result 285 | 286 | 287 | def clip_boxes_graph(boxes, window): 288 | """ 289 | boxes: [N, 4] each row is y1, x1, y2, x2 290 | window: [4] in the form y1, x1, y2, x2 291 | """ 292 | # Split corners 293 | wy1, wx1, wy2, wx2 = tf.split(window, 4) 294 | y1, x1, y2, x2 = tf.split(boxes, 4, axis=1) 295 | # Clip 296 | y1 = tf.maximum(tf.minimum(y1, wy2), wy1) 297 | x1 = tf.maximum(tf.minimum(x1, wx2), wx1) 298 | y2 = tf.maximum(tf.minimum(y2, wy2), wy1) 299 | x2 = tf.maximum(tf.minimum(x2, wx2), wx1) 300 | clipped = tf.concat([y1, x1, y2, x2], axis=1, name="clipped_boxes") 301 | return clipped 302 | 303 | 304 | class ProposalLayer(KE.Layer): 305 | """Receives anchor scores and selects a subset to pass as proposals 306 | to the second stage. Filtering is done based on anchor scores and 307 | non-max suppression to remove overlaps. It also applies bounding 308 | box refinment detals to anchors. 309 | 310 | Inputs: 311 | rpn_probs: [batch, anchors, (bg prob, fg prob)] 312 | rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))] 313 | 314 | Returns: 315 | Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)] 316 | """ 317 | 318 | def __init__(self, proposal_count, nms_threshold, anchors, 319 | config=None, **kwargs): 320 | """ 321 | anchors: [N, (y1, x1, y2, x2)] anchors defined in image coordinates 322 | """ 323 | super(ProposalLayer, self).__init__(**kwargs) 324 | self.config = config 325 | self.proposal_count = proposal_count 326 | self.nms_threshold = nms_threshold 327 | self.anchors = anchors.astype(np.float32) 328 | 329 | def call(self, inputs): 330 | # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1] 331 | scores = inputs[0][:, :, 1] 332 | # Box deltas [batch, num_rois, 4] 333 | deltas = inputs[1] 334 | deltas = deltas * np.reshape(self.config.RPN_BBOX_STD_DEV, [1, 1, 4]) 335 | # Base anchors 336 | anchors = self.anchors 337 | 338 | # Improve performance by trimming to top anchors by score 339 | # and doing the rest on the smaller subset. 340 | pre_nms_limit = min(6000, self.anchors.shape[0]) 341 | ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, 342 | name="top_anchors").indices 343 | scores = KerasRFCN.Utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y), 344 | self.config.IMAGES_PER_GPU) 345 | deltas = KerasRFCN.Utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y), 346 | self.config.IMAGES_PER_GPU) 347 | anchors = KerasRFCN.Utils.batch_slice(ix, lambda x: tf.gather(anchors, x), 348 | self.config.IMAGES_PER_GPU, 349 | names=["pre_nms_anchors"]) 350 | 351 | # Apply deltas to anchors to get refined anchors. 352 | # [batch, N, (y1, x1, y2, x2)] 353 | boxes = KerasRFCN.Utils.batch_slice([anchors, deltas], 354 | lambda x, y: apply_box_deltas_graph(x, y), 355 | self.config.IMAGES_PER_GPU, 356 | names=["refined_anchors"]) 357 | 358 | # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)] 359 | height, width = self.config.IMAGE_SHAPE[:2] 360 | window = np.array([0, 0, height, width]).astype(np.float32) 361 | boxes = KerasRFCN.Utils.batch_slice(boxes, 362 | lambda x: clip_boxes_graph(x, window), 363 | self.config.IMAGES_PER_GPU, 364 | names=["refined_anchors_clipped"]) 365 | 366 | # Filter out small boxes 367 | # According to Xinlei Chen's paper, this reduces detection accuracy 368 | # for small objects, so we're skipping it. 369 | 370 | # Normalize dimensions to range of 0 to 1. 371 | normalized_boxes = boxes / np.array([[height, width, height, width]]) 372 | 373 | # Non-max suppression 374 | def nms(normalized_boxes, scores): 375 | indices = tf.image.non_max_suppression( 376 | normalized_boxes, scores, self.proposal_count, 377 | self.nms_threshold, name="rpn_non_max_suppression") 378 | proposals = tf.gather(normalized_boxes, indices) 379 | # Pad if needed 380 | padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0) 381 | proposals = tf.pad(proposals, [(0, padding), (0, 0)]) 382 | return proposals 383 | proposals = KerasRFCN.Utils.batch_slice([normalized_boxes, scores], nms, 384 | self.config.IMAGES_PER_GPU) 385 | return proposals 386 | 387 | def compute_output_shape(self, input_shape): 388 | return (None, self.proposal_count, 4) 389 | 390 | ############################################################ 391 | # Detection Target Layer 392 | ############################################################ 393 | 394 | def overlaps_graph(boxes1, boxes2): 395 | """Computes IoU overlaps between two sets of boxes. 396 | boxes1, boxes2: [N, (y1, x1, y2, x2)]. 397 | """ 398 | # 1. Tile boxes2 and repeate boxes1. This allows us to compare 399 | # every boxes1 against every boxes2 without loops. 400 | # TF doesn't have an equivalent to np.repeate() so simulate it 401 | # using tf.tile() and tf.reshape. 402 | b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1), 403 | [1, 1, tf.shape(boxes2)[0]]), [-1, 4]) 404 | b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1]) 405 | # 2. Compute intersections 406 | b1_y1, b1_x1, b1_y2, b1_x2 = tf.split(b1, 4, axis=1) 407 | b2_y1, b2_x1, b2_y2, b2_x2 = tf.split(b2, 4, axis=1) 408 | y1 = tf.maximum(b1_y1, b2_y1) 409 | x1 = tf.maximum(b1_x1, b2_x1) 410 | y2 = tf.minimum(b1_y2, b2_y2) 411 | x2 = tf.minimum(b1_x2, b2_x2) 412 | intersection = tf.maximum(x2 - x1, 0) * tf.maximum(y2 - y1, 0) 413 | # 3. Compute unions 414 | b1_area = (b1_y2 - b1_y1) * (b1_x2 - b1_x1) 415 | b2_area = (b2_y2 - b2_y1) * (b2_x2 - b2_x1) 416 | union = b1_area + b2_area - intersection 417 | # 4. Compute IoU and reshape to [boxes1, boxes2] 418 | iou = intersection / union 419 | overlaps = tf.reshape(iou, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]]) 420 | return overlaps 421 | 422 | 423 | def detection_targets_graph(proposals, gt_class_ids, gt_boxes, config): 424 | """Generates detection targets for one image. Subsamples proposals and 425 | generates target class IDs, bounding box deltas for each. 426 | 427 | Inputs: 428 | proposals: [N, (y1, x1, y2, x2)] in normalized coordinates. Might 429 | be zero padded if there are not enough proposals. 430 | gt_class_ids: [MAX_GT_INSTANCES] int class IDs 431 | gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. 432 | 433 | Returns: Target ROIs and corresponding class IDs, bounding box shifts 434 | rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates 435 | class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded. 436 | deltas: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (dy, dx, log(dh), log(dw))] 437 | Class-specific bbox refinments. 438 | 439 | Note: Returned arrays might be zero padded if not enough target ROIs. 440 | """ 441 | # Assertions 442 | asserts = [ 443 | tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], 444 | name="roi_assertion"), 445 | ] 446 | with tf.control_dependencies(asserts): 447 | proposals = tf.identity(proposals) 448 | 449 | # Remove zero padding 450 | proposals, _ = trim_zeros_graph(proposals, name="trim_proposals") 451 | gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes") 452 | gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, 453 | name="trim_gt_class_ids") 454 | 455 | # Handle COCO crowds 456 | # A crowd box in COCO is a bounding box around several instances. Exclude 457 | # them from training. A crowd box is given a negative class ID. 458 | crowd_ix = tf.where(gt_class_ids < 0)[:, 0] 459 | non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] 460 | crowd_boxes = tf.gather(gt_boxes, crowd_ix) 461 | 462 | gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) 463 | gt_boxes = tf.gather(gt_boxes, non_crowd_ix) 464 | 465 | # Compute overlaps matrix [proposals, gt_boxes] 466 | overlaps = overlaps_graph(proposals, gt_boxes) 467 | 468 | # Compute overlaps with crowd boxes [anchors, crowds] 469 | crowd_overlaps = overlaps_graph(proposals, crowd_boxes) 470 | crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) 471 | no_crowd_bool = (crowd_iou_max < 0.001) 472 | 473 | # Determine postive and negative ROIs 474 | roi_iou_max = tf.reduce_max(overlaps, axis=1) 475 | # 1. Positive ROIs are those with >= 0.5 IoU with a GT box 476 | positive_roi_bool = (roi_iou_max >= 0.5) 477 | positive_indices = tf.where(positive_roi_bool)[:, 0] 478 | # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds. 479 | negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] 480 | 481 | # Subsample ROIs. Aim for 33% positive 482 | # Positive ROIs 483 | positive_count = int(config.TRAIN_ROIS_PER_IMAGE * 484 | config.ROI_POSITIVE_RATIO) 485 | positive_indices = tf.random_shuffle(positive_indices)[:positive_count] 486 | positive_count = tf.shape(positive_indices)[0] 487 | # Negative ROIs. Add enough to maintain positive:negative ratio. 488 | r = 1.0 / config.ROI_POSITIVE_RATIO 489 | negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count 490 | negative_indices = tf.random_shuffle(negative_indices)[:negative_count] 491 | # Gather selected ROIs 492 | positive_rois = tf.gather(proposals, positive_indices) 493 | negative_rois = tf.gather(proposals, negative_indices) 494 | 495 | # Assign positive ROIs to GT boxes. 496 | positive_overlaps = tf.gather(overlaps, positive_indices) 497 | roi_gt_box_assignment = tf.argmax(positive_overlaps, axis=1) 498 | roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) 499 | roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) 500 | 501 | # Compute bbox refinement for positive ROIs 502 | deltas = KerasRFCN.Utils.box_refinement_graph(positive_rois, roi_gt_boxes) 503 | deltas /= config.BBOX_STD_DEV 504 | 505 | # Append negative ROIs and pad bbox deltas and masks that 506 | # are not used for negative ROIs with zeros. 507 | rois = tf.concat([positive_rois, negative_rois], axis=0) 508 | N = tf.shape(negative_rois)[0] 509 | P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) 510 | rois = tf.pad(rois, [(0, P), (0, 0)]) 511 | roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) 512 | roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) 513 | deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) 514 | 515 | return rois, roi_gt_class_ids, deltas 516 | 517 | def trim_zeros_graph(boxes, name=None): 518 | """Often boxes are represented with matricies of shape [N, 4] and 519 | are padded with zeros. This removes zero boxes. 520 | 521 | boxes: [N, 4] matrix of boxes. 522 | non_zeros: [N] a 1D boolean mask identifying the rows to keep 523 | """ 524 | non_zeros = tf.cast(tf.reduce_sum(tf.abs(boxes), axis=1), tf.bool) 525 | boxes = tf.boolean_mask(boxes, non_zeros, name=name) 526 | return boxes, non_zeros 527 | 528 | class DetectionTargetLayer(KE.Layer): 529 | """Subsamples proposals and generates target box refinment, class_ids for each. 530 | 531 | Inputs: 532 | proposals: [batch, N, (y1, x1, y2, x2)] in normalized coordinates. Might 533 | be zero padded if there are not enough proposals. 534 | gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs. 535 | gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized 536 | coordinates. 537 | 538 | Returns: Target ROIs and corresponding class IDs, bounding box shifts 539 | rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized 540 | coordinates 541 | target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs. 542 | target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, 543 | (dy, dx, log(dh), log(dw), class_id)] 544 | Class-specific bbox refinments. 545 | 546 | Note: Returned arrays might be zero padded if not enough target ROIs. 547 | """ 548 | 549 | def __init__(self, config, **kwargs): 550 | super(DetectionTargetLayer, self).__init__(**kwargs) 551 | self.config = config 552 | 553 | def call(self, inputs): 554 | proposals = inputs[0] 555 | gt_class_ids = inputs[1] 556 | gt_boxes = inputs[2] 557 | 558 | # Slice the batch and run a graph for each slice 559 | # TODO: Rename target_bbox to target_deltas for clarity 560 | names = ["rois", "target_class_ids", "target_bbox"] 561 | outputs = KerasRFCN.Utils.batch_slice( 562 | [proposals, gt_class_ids, gt_boxes], 563 | lambda w, x, y: detection_targets_graph( 564 | w, x, y, self.config), 565 | self.config.IMAGES_PER_GPU, names=names) 566 | return outputs 567 | 568 | def compute_output_shape(self, input_shape): 569 | return [ 570 | (None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # rois 571 | (None, 1), # class_ids 572 | (None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # deltas 573 | ] 574 | 575 | ############################################################ 576 | # ROI pooling on Muti Bins 577 | ############################################################ 578 | 579 | def log2_graph(x): 580 | """Implementatin of Log2. TF doesn't have a native implemenation.""" 581 | return tf.log(x) / tf.log(2.0) 582 | 583 | class VotePooling(KE.Layer): 584 | def __init__(self, num_rois, channel_num, k, pool_shape, batch_size, image_shape, **kwargs): 585 | super(VotePooling, self).__init__(**kwargs) 586 | self.channel_num = channel_num 587 | self.k = k 588 | self.num_rois = num_rois 589 | self.pool_shape = pool_shape 590 | self.batch_size = batch_size 591 | self.image_shape = image_shape 592 | 593 | def call(self, inputs): 594 | boxes = inputs[0] 595 | 596 | # Feature Maps. List of feature maps from different level of the 597 | # feature pyramid. Each is [batch, height, width, channels] 598 | score_maps = inputs[1:] 599 | 600 | # Assign each ROI to a level in the pyramid based on the ROI area. 601 | y1, x1, y2, x2 = tf.split(boxes, 4, axis=2) 602 | h = y2 - y1 603 | w = x2 - x1 604 | # Equation 1 in the Feature Pyramid Networks paper. Account for 605 | # the fact that our coordinates are normalized here. 606 | # e.g. a 224x224 ROI (in pixels) maps to P4 607 | image_area = tf.cast( 608 | self.image_shape[0] * self.image_shape[1], tf.float32) 609 | roi_level = log2_graph(tf.sqrt(h * w) / (224.0 / tf.sqrt(image_area))) 610 | roi_level = tf.minimum(5, tf.maximum( 611 | 2, 4 + tf.cast(tf.round(roi_level), tf.int32))) 612 | roi_level = tf.squeeze(roi_level, 2) 613 | 614 | # Loop through levels and apply ROI pooling to each. P2 to P5. 615 | pooled = [] 616 | box_to_level = [] 617 | for i, level in enumerate(range(2, 6)): 618 | ix = tf.where(tf.equal(roi_level, level)) 619 | level_boxes = tf.gather_nd(boxes, ix) 620 | 621 | # Box indicies for crop_and_resize. 622 | box_indices = tf.cast(ix[:, 0], tf.int32) 623 | 624 | # Keep track of which box is mapped to which level 625 | box_to_level.append(ix) 626 | 627 | # Stop gradient propogation to ROI proposals 628 | level_boxes = tf.stop_gradient(level_boxes) 629 | box_indices = tf.stop_gradient(box_indices) 630 | 631 | # Here we use the simplified approach of a single value per bin, 632 | # which is how it's done in tf.crop_and_resize() 633 | # Result: [batch * num_boxes, pool_height, pool_width, channels] 634 | pooled.append(tf.image.crop_and_resize( 635 | score_maps[i], level_boxes, box_indices, [self.pool_shape * self.k, self.pool_shape * self.k], 636 | method="bilinear")) 637 | 638 | # Pack pooled features into one tensor 639 | pooled = tf.concat(pooled, axis=0) 640 | 641 | # position-sensitive ROI pooling + classify 642 | score_map_bins = [] 643 | for channel_step in range(self.k*self.k): 644 | bin_x = K.variable( int(channel_step % self.k) * self.pool_shape, dtype='int32') 645 | bin_y = K.variable( int(channel_step / self.k) * self.pool_shape, dtype='int32') 646 | channel_indices = K.variable(list(range(channel_step*self.channel_num, (channel_step+1)*self.channel_num)), dtype='int32') 647 | croped = tf.image.crop_to_bounding_box( 648 | tf.gather( pooled, indices=channel_indices, axis=-1), bin_y, bin_x, self.pool_shape, self.pool_shape) 649 | # [pool_shape, pool_shape, channel_num] ==> [1,1,channel_num] ==> [1, channel_num] 650 | croped_mean = K.pool2d(croped, (self.pool_shape, self.pool_shape), strides=(1, 1), padding='valid', data_format="channels_last", pool_mode='avg') 651 | # [batch * num_rois, 1,1,channel_num] ==> [batch * num_rois, 1, channel_num] 652 | croped_mean = K.squeeze(croped_mean, axis=1) 653 | score_map_bins.append(croped_mean) 654 | # [batch * num_rois, k^2, channel_num] 655 | score_map_bins = tf.concat(score_map_bins, axis=1) 656 | # [batch * num_rois, k*k, channel_num] ==> [batch * num_rois,channel_num] 657 | # because "keepdims=False", the axis 1 will not keep. else will be [batch * num_rois,1,channel_num] 658 | pooled = K.sum(score_map_bins, axis=1) 659 | 660 | # Pack box_to_level mapping into one array and add another 661 | # column representing the order of pooled boxes 662 | box_to_level = tf.concat(box_to_level, axis=0) 663 | box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1) 664 | box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range], 665 | axis=1) 666 | 667 | # Rearrange pooled features to match the order of the original boxes 668 | # Sort box_to_level by batch then box index 669 | # TF doesn't have a way to sort by two columns, so merge them and sort. 670 | sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1] 671 | ix = tf.nn.top_k(sorting_tensor, k=tf.shape( 672 | box_to_level)[0]).indices[::-1] 673 | ix = tf.gather(box_to_level[:, 2], ix) 674 | pooled = tf.gather(pooled, ix) 675 | 676 | # Re-add the batch dimension 677 | pooled = tf.expand_dims(pooled, 0) 678 | 679 | return pooled 680 | 681 | def compute_output_shape(self, input_shape): 682 | return None, self.num_rois, self.channel_num 683 | 684 | ############################################################ 685 | # Detection Layer 686 | ############################################################ 687 | 688 | def clip_to_window(window, boxes): 689 | """ 690 | window: (y1, x1, y2, x2). The window in the image we want to clip to. 691 | boxes: [N, (y1, x1, y2, x2)] 692 | """ 693 | boxes[:, 0] = np.maximum(np.minimum(boxes[:, 0], window[2]), window[0]) 694 | boxes[:, 1] = np.maximum(np.minimum(boxes[:, 1], window[3]), window[1]) 695 | boxes[:, 2] = np.maximum(np.minimum(boxes[:, 2], window[2]), window[0]) 696 | boxes[:, 3] = np.maximum(np.minimum(boxes[:, 3], window[3]), window[1]) 697 | return boxes 698 | 699 | 700 | def refine_detections_graph(rois, probs, deltas, window, config): 701 | """Refine classified proposals and filter overlaps and return final 702 | detections. 703 | 704 | Inputs: 705 | rois: [N, (y1, x1, y2, x2)] in normalized coordinates 706 | probs: [N, num_classes]. Class probabilities. 707 | deltas: [N, (dy, dx, log(dh), log(dw))]. Class-specific 708 | bounding box deltas. 709 | window: (y1, x1, y2, x2) in image coordinates. The part of the image 710 | that contains the image excluding the padding. 711 | 712 | Returns detections shaped: [N, (y1, x1, y2, x2, class_id, score)] where 713 | coordinates are in image domain. 714 | """ 715 | 716 | # Class IDs per ROI 717 | class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) 718 | # Class probability of the top class of each ROI 719 | indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1) 720 | class_scores = tf.gather_nd(probs, indices) 721 | # Class-specific bounding box deltas 722 | # deltas_specific = tf.gather_nd(deltas, indices) 723 | # Apply bounding box deltas 724 | # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates 725 | refined_rois = apply_box_deltas_graph( 726 | rois, deltas * config.BBOX_STD_DEV) 727 | # Convert coordiates to image domain 728 | # TODO: better to keep them normalized until later 729 | height, width = config.IMAGE_SHAPE[:2] 730 | refined_rois *= tf.constant([height, width, height, width], dtype=tf.float32) 731 | # Clip boxes to image window 732 | refined_rois = clip_boxes_graph(refined_rois, window) 733 | # Round and cast to int since we're deadling with pixels now 734 | refined_rois = tf.to_int32(tf.rint(refined_rois)) 735 | 736 | # TODO: Filter out boxes with zero area 737 | 738 | # Filter out background boxes 739 | keep = tf.where(class_ids > 0)[:, 0] 740 | # Filter out low confidence boxes 741 | if config.DETECTION_MIN_CONFIDENCE: 742 | conf_keep = tf.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0] 743 | keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), 744 | tf.expand_dims(conf_keep, 0)) 745 | keep = tf.sparse_tensor_to_dense(keep)[0] 746 | 747 | # Apply per-class NMS 748 | # 1. Prepare variables 749 | pre_nms_class_ids = tf.gather(class_ids, keep) 750 | pre_nms_scores = tf.gather(class_scores, keep) 751 | pre_nms_rois = tf.gather(refined_rois, keep) 752 | unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] 753 | 754 | def nms_keep_map(class_id): 755 | """Apply Non-Maximum Suppression on ROIs of the given class.""" 756 | # Indices of ROIs of the given class 757 | ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] 758 | # Apply NMS 759 | class_keep = tf.image.non_max_suppression( 760 | tf.to_float(tf.gather(pre_nms_rois, ixs)), 761 | tf.gather(pre_nms_scores, ixs), 762 | max_output_size=config.DETECTION_MAX_INSTANCES, 763 | iou_threshold=config.DETECTION_NMS_THRESHOLD) 764 | # Map indicies 765 | class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) 766 | # Pad with -1 so returned tensors have the same shape 767 | gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0] 768 | class_keep = tf.pad(class_keep, [(0, gap)], 769 | mode='CONSTANT', constant_values=-1) 770 | # Set shape so map_fn() can infer result shape 771 | class_keep.set_shape([config.DETECTION_MAX_INSTANCES]) 772 | return class_keep 773 | 774 | # 2. Map over class IDs 775 | nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, 776 | dtype=tf.int64) 777 | # 3. Merge results into one list, and remove -1 padding 778 | nms_keep = tf.reshape(nms_keep, [-1]) 779 | nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) 780 | # 4. Compute intersection between keep and nms_keep 781 | keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), 782 | tf.expand_dims(nms_keep, 0)) 783 | keep = tf.sparse_tensor_to_dense(keep)[0] 784 | # Keep top detections 785 | roi_count = config.DETECTION_MAX_INSTANCES 786 | class_scores_keep = tf.gather(class_scores, keep) 787 | num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) 788 | top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] 789 | keep = tf.gather(keep, top_ids) 790 | 791 | # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] 792 | # Coordinates are in image domain. 793 | detections = tf.concat([ 794 | tf.to_float(tf.gather(refined_rois, keep)), 795 | tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis], 796 | tf.gather(class_scores, keep)[..., tf.newaxis] 797 | ], axis=1) 798 | 799 | # Pad with zeros if detections < DETECTION_MAX_INSTANCES 800 | gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] 801 | detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") 802 | return detections 803 | 804 | def parse_image_meta_graph(meta): 805 | """Parses a tensor that contains image attributes to its components. 806 | See compose_image_meta() for more details. 807 | 808 | meta: [batch, meta length] where meta length depends on NUM_CLASSES 809 | """ 810 | image_id = meta[:, 0] 811 | image_shape = meta[:, 1:4] 812 | window = meta[:, 4:8] 813 | active_class_ids = meta[:, 8:] 814 | return [image_id, image_shape, window, active_class_ids] 815 | 816 | class DetectionLayer(KE.Layer): 817 | """Takes classified proposal boxes and their bounding box deltas and 818 | returns the final detection boxes. 819 | 820 | Returns: 821 | [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] where 822 | coordinates are in image domain 823 | """ 824 | 825 | def __init__(self, config=None, **kwargs): 826 | super(DetectionLayer, self).__init__(**kwargs) 827 | self.config = config 828 | 829 | def call(self, inputs): 830 | rois = inputs[0] 831 | mrcnn_class = inputs[1] 832 | mrcnn_bbox = inputs[2] 833 | image_meta = inputs[3] 834 | 835 | # Run detection refinement graph on each item in the batch 836 | _, _, window, _ = parse_image_meta_graph(image_meta) 837 | detections_batch = KerasRFCN.Utils.batch_slice( 838 | [rois, mrcnn_class, mrcnn_bbox, window], 839 | lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config), 840 | self.config.IMAGES_PER_GPU) 841 | 842 | # Reshape output 843 | # [batch, num_detections, (y1, x1, y2, x2, class_score)] in pixels 844 | return tf.reshape( 845 | detections_batch, 846 | [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6]) 847 | 848 | def compute_output_shape(self, input_shape): 849 | return (None, self.config.DETECTION_MAX_INSTANCES, 6) 850 | -------------------------------------------------------------------------------- /KerasRFCN/Model/ResNet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Keras RFCN 3 | Copyright (c) 2018 4 | Licensed under the MIT License (see LICENSE for details) 5 | Written by parap1uie-s@github.com 6 | """ 7 | 8 | ''' 9 | This is Backbone of RFCN Model 10 | ResNet50 or 101 11 | ''' 12 | 13 | import keras.layers as KL 14 | 15 | class ResNet(object): 16 | """docstring for ResNet101""" 17 | def __init__(self, input_tensor, architecture='resnet50'): 18 | self.keras_model = "" 19 | self.input_tensor = input_tensor 20 | self.output_layers = "" 21 | assert architecture in ['resnet50', 'resnet101'], 'architecture must be resnet50 or resnet101!' 22 | self.architecture = architecture 23 | self.construct_graph(input_tensor) 24 | 25 | def construct_graph(self, input_tensor, stage5=True): 26 | assert self.input_tensor is not None, "input_tensor can not be none!" 27 | # Stage 1 28 | x = KL.ZeroPadding2D((3, 3))(input_tensor) 29 | x = KL.Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=True)(x) 30 | x = BatchNorm(axis=3, name='bn_conv1')(x) 31 | x = KL.Activation('relu')(x) 32 | C1 = x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x) 33 | # Stage 2 34 | x = self.conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) 35 | x = self.identity_block(x, 3, [64, 64, 256], stage=2, block='b') 36 | C2 = x = self.identity_block(x, 3, [64, 64, 256], stage=2, block='c') 37 | # Stage 3 38 | x = self.conv_block(x, 3, [128, 128, 512], stage=3, block='a') 39 | x = self.identity_block(x, 3, [128, 128, 512], stage=3, block='b') 40 | x = self.identity_block(x, 3, [128, 128, 512], stage=3, block='c') 41 | C3 = x = self.identity_block(x, 3, [128, 128, 512], stage=3, block='d') 42 | # Stage 4 43 | x = self.conv_block(x, 3, [256, 256, 1024], stage=4, block='a') 44 | block_count = {"resnet50": 5, "resnet101": 22}[self.architecture] 45 | for i in range(block_count): 46 | x = self.identity_block(x, 3, [256, 256, 1024], stage=4, block=chr(98 + i)) 47 | C4 = x 48 | # Stage 5 49 | if stage5: 50 | x = self.conv_block(x, 3, [512, 512, 2048], stage=5, block='a') 51 | x = self.identity_block(x, 3, [512, 512, 2048], stage=5, block='b') 52 | C5 = x = self.identity_block(x, 3, [512, 512, 2048], stage=5, block='c') 53 | else: 54 | C5 = None 55 | 56 | P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5) 57 | P4 = KL.Add(name="fpn_p4add")([ 58 | KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), 59 | KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4)]) 60 | P3 = KL.Add(name="fpn_p3add")([ 61 | KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), 62 | KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3)]) 63 | P2 = KL.Add(name="fpn_p2add")([ 64 | KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), 65 | KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2)]) 66 | 67 | # Attach 3x3 conv to all P layers to get the final feature maps. 68 | P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2) 69 | P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3) 70 | P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4) 71 | P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5) 72 | # P6 is used for the 5th anchor scale in RPN. Generated by 73 | # subsampling from P5 with stride of 2. 74 | P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5) 75 | 76 | self.output_layers = [P2, P3, P4, P5, P6] 77 | 78 | def conv_block(self, input_tensor, kernel_size, filters, stage, block, 79 | strides=(2, 2), use_bias=True): 80 | """conv_block is the block that has a conv layer at shortcut 81 | # Arguments 82 | input_tensor: input tensor 83 | kernel_size: defualt 3, the kernel size of middle conv layer at main path 84 | filters: list of integers, the nb_filters of 3 conv layer at main path 85 | stage: integer, current stage label, used for generating layer names 86 | block: 'a','b'..., current block label, used for generating layer names 87 | Note that from stage 3, the first conv layer at main path is with subsample=(2,2) 88 | And the shortcut should have subsample=(2,2) as well 89 | """ 90 | nb_filter1, nb_filter2, nb_filter3 = filters 91 | conv_name_base = 'res' + str(stage) + block + '_branch' 92 | bn_name_base = 'bn' + str(stage) + block + '_branch' 93 | 94 | x = KL.Conv2D(nb_filter1, (1, 1), strides=strides, 95 | name=conv_name_base + '2a', use_bias=use_bias)(input_tensor) 96 | x = BatchNorm(axis=3, name=bn_name_base + '2a')(x) 97 | x = KL.Activation('relu')(x) 98 | 99 | x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', 100 | name=conv_name_base + '2b', use_bias=use_bias)(x) 101 | x = BatchNorm(axis=3, name=bn_name_base + '2b')(x) 102 | x = KL.Activation('relu')(x) 103 | 104 | x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + 105 | '2c', use_bias=use_bias)(x) 106 | x = BatchNorm(axis=3, name=bn_name_base + '2c')(x) 107 | 108 | shortcut = KL.Conv2D(nb_filter3, (1, 1), strides=strides, 109 | name=conv_name_base + '1', use_bias=use_bias)(input_tensor) 110 | shortcut = BatchNorm(axis=3, name=bn_name_base + '1')(shortcut) 111 | 112 | x = KL.Add()([x, shortcut]) 113 | x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x) 114 | return x 115 | 116 | def identity_block(self, input_tensor, kernel_size, filters, stage, block, 117 | use_bias=True): 118 | """The identity_block is the block that has no conv layer at shortcut 119 | # Arguments 120 | input_tensor: input tensor 121 | kernel_size: defualt 3, the kernel size of middle conv layer at main path 122 | filters: list of integers, the nb_filters of 3 conv layer at main path 123 | stage: integer, current stage label, used for generating layer names 124 | block: 'a','b'..., current block label, used for generating layer names 125 | """ 126 | nb_filter1, nb_filter2, nb_filter3 = filters 127 | conv_name_base = 'res' + str(stage) + block + '_branch' 128 | bn_name_base = 'bn' + str(stage) + block + '_branch' 129 | 130 | x = KL.Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a', 131 | use_bias=use_bias)(input_tensor) 132 | x = BatchNorm(axis=3, name=bn_name_base + '2a')(x) 133 | x = KL.Activation('relu')(x) 134 | 135 | x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', 136 | name=conv_name_base + '2b', use_bias=use_bias)(x) 137 | x = BatchNorm(axis=3, name=bn_name_base + '2b')(x) 138 | x = KL.Activation('relu')(x) 139 | 140 | x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c', 141 | use_bias=use_bias)(x) 142 | x = BatchNorm(axis=3, name=bn_name_base + '2c')(x) 143 | 144 | x = KL.Add()([x, input_tensor]) 145 | x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x) 146 | return x 147 | 148 | class BatchNorm(KL.BatchNormalization): 149 | """Batch Normalization class. Subclasses the Keras BN class and 150 | hardcodes training=False so the BN layer doesn't update 151 | during training. 152 | 153 | Batch normalization has a negative effect on training if batches are small 154 | so we disable it here. 155 | """ 156 | 157 | def call(self, inputs, training=None): 158 | return super(self.__class__, self).call(inputs, training=False) -------------------------------------------------------------------------------- /KerasRFCN/Model/ResNet_dilated.py: -------------------------------------------------------------------------------- 1 | """ 2 | Keras RFCN 3 | Copyright (c) 2018 4 | Licensed under the MIT License (see LICENSE for details) 5 | Written by parap1uie-s@github.com 6 | """ 7 | 8 | ''' 9 | This is Backbone of RFCN Model 10 | Dilated ResNet50 or 101 11 | Paper: DetNet: A Backbone network for Object Detection 12 | https://arxiv.org/abs/1804.06215 13 | ''' 14 | 15 | import keras.layers as KL 16 | 17 | class ResNet_dilated(object): 18 | """docstring for ResNet101""" 19 | def __init__(self, input_tensor, architecture='resnet50'): 20 | self.keras_model = "" 21 | self.input_tensor = input_tensor 22 | self.output_layers = "" 23 | assert architecture in ['resnet50', 'resnet101'], 'architecture must be resnet50 or resnet101!' 24 | self.architecture = architecture 25 | self.construct_graph(input_tensor) 26 | 27 | def construct_graph(self, input_tensor, stage5=True): 28 | assert self.input_tensor is not None, "input_tensor can not be none!" 29 | # Stage 1 30 | x = KL.ZeroPadding2D((3, 3))(input_tensor) 31 | x = KL.Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=True)(x) 32 | x = BatchNorm(axis=3, name='bn_conv1')(x) 33 | x = KL.Activation('relu')(x) 34 | C1 = x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x) 35 | # Stage 2 36 | x = self.conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) 37 | x = self.identity_block(x, 3, [64, 64, 256], stage=2, block='b') 38 | C2 = x = self.identity_block(x, 3, [64, 64, 256], stage=2, block='c') 39 | # Stage 3 40 | x = self.conv_block(x, 3, [128, 128, 512], stage=3, block='a') 41 | x = self.identity_block(x, 3, [128, 128, 512], stage=3, block='b') 42 | x = self.identity_block(x, 3, [128, 128, 512], stage=3, block='c') 43 | C3 = x = self.identity_block(x, 3, [128, 128, 512], stage=3, block='d') 44 | # Stage 4 45 | x = self.conv_block(x, 3, [256, 256, 1024], stage=4, block='a') 46 | block_count = {"resnet50": 5, "resnet101": 22}[self.architecture] 47 | for i in range(block_count): 48 | x = self.identity_block(x, 3, [256, 256, 1024], stage=4, block=chr(98 + i)) 49 | C4 = x 50 | # Stage 5 51 | x = self.conv_block(x, 3, [256, 256, 256], stage=5, block='a', dilated=2, strides=(1, 1)) 52 | x = self.identity_block(x, 3, [256, 256, 256], stage=5, block='b', dilated=2) 53 | C5 = x = self.identity_block(x, 3, [256, 256, 256], stage=5, block='c', dilated=2) 54 | # Stage 6 55 | x = self.conv_block(x, 3, [256, 256, 256], stage=6, block='a', dilated=2, strides=(1, 1)) 56 | x = self.identity_block(x, 3, [256, 256, 256], stage=6, block='b', dilated=2) 57 | C6 = x = self.identity_block(x, 3, [256, 256, 256], stage=6, block='c', dilated=2) 58 | 59 | P6 = KL.Conv2D(256, (1, 1), name='fpn_c6p6')(C6) 60 | P5 = KL.Add(name="fpn_p5add")([P6, KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5)]) 61 | P4 = KL.Add(name="fpn_p4add")([P5, KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4)]) 62 | P3 = KL.Add(name="fpn_p3add")([ 63 | KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), 64 | KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3)]) 65 | P2 = KL.Add(name="fpn_p2add")([ 66 | KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), 67 | KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2)]) 68 | 69 | # Attach 3x3 conv to all P layers to get the final feature maps. 70 | P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2) 71 | P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3) 72 | P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4) 73 | P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5) 74 | # P6 is used for the 5th anchor scale in RPN. Generated by 75 | # subsampling from P5 with stride of 2. 76 | P6 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p6")(P6) 77 | 78 | self.output_layers = [P2, P3, P4, P5, P6] 79 | 80 | def conv_block(self, input_tensor, kernel_size, filters, stage, block, 81 | strides=(2, 2), use_bias=True, dilated=1): 82 | """conv_block is the block that has a conv layer at shortcut 83 | # Arguments 84 | input_tensor: input tensor 85 | kernel_size: defualt 3, the kernel size of middle conv layer at main path 86 | filters: list of integers, the nb_filters of 3 conv layer at main path 87 | stage: integer, current stage label, used for generating layer names 88 | block: 'a','b'..., current block label, used for generating layer names 89 | Note that from stage 3, the first conv layer at main path is with subsample=(2,2) 90 | And the shortcut should have subsample=(2,2) as well 91 | """ 92 | nb_filter1, nb_filter2, nb_filter3 = filters 93 | conv_name_base = 'res' + str(stage) + block + '_branch' 94 | bn_name_base = 'bn' + str(stage) + block + '_branch' 95 | 96 | x = KL.Conv2D(nb_filter1, (1, 1), strides=strides, 97 | name=conv_name_base + '2a', use_bias=use_bias)(input_tensor) 98 | x = BatchNorm(axis=3, name=bn_name_base + '2a')(x) 99 | x = KL.Activation('relu')(x) 100 | 101 | x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', 102 | name=conv_name_base + '2b', use_bias=use_bias, dilation_rate=dilated)(x) 103 | x = BatchNorm(axis=3, name=bn_name_base + '2b')(x) 104 | x = KL.Activation('relu')(x) 105 | 106 | x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + 107 | '2c', use_bias=use_bias)(x) 108 | x = BatchNorm(axis=3, name=bn_name_base + '2c')(x) 109 | 110 | shortcut = KL.Conv2D(nb_filter3, (1, 1), strides=strides, 111 | name=conv_name_base + '1', use_bias=use_bias)(input_tensor) 112 | shortcut = BatchNorm(axis=3, name=bn_name_base + '1')(shortcut) 113 | 114 | x = KL.Add()([x, shortcut]) 115 | x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x) 116 | return x 117 | 118 | def identity_block(self, input_tensor, kernel_size, filters, stage, block, 119 | use_bias=True, dilated=1): 120 | """The identity_block is the block that has no conv layer at shortcut 121 | # Arguments 122 | input_tensor: input tensor 123 | kernel_size: defualt 3, the kernel size of middle conv layer at main path 124 | filters: list of integers, the nb_filters of 3 conv layer at main path 125 | stage: integer, current stage label, used for generating layer names 126 | block: 'a','b'..., current block label, used for generating layer names 127 | """ 128 | nb_filter1, nb_filter2, nb_filter3 = filters 129 | conv_name_base = 'res' + str(stage) + block + '_branch' 130 | bn_name_base = 'bn' + str(stage) + block + '_branch' 131 | 132 | x = KL.Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a', 133 | use_bias=use_bias)(input_tensor) 134 | x = BatchNorm(axis=3, name=bn_name_base + '2a')(x) 135 | x = KL.Activation('relu')(x) 136 | 137 | x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', 138 | name=conv_name_base + '2b', use_bias=use_bias, dilation_rate=dilated)(x) 139 | x = BatchNorm(axis=3, name=bn_name_base + '2b')(x) 140 | x = KL.Activation('relu')(x) 141 | 142 | x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c', 143 | use_bias=use_bias)(x) 144 | x = BatchNorm(axis=3, name=bn_name_base + '2c')(x) 145 | 146 | x = KL.Add()([x, input_tensor]) 147 | x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x) 148 | return x 149 | 150 | class BatchNorm(KL.BatchNormalization): 151 | """Batch Normalization class. Subclasses the Keras BN class and 152 | hardcodes training=False so the BN layer doesn't update 153 | during training. 154 | 155 | Batch normalization has a negative effect on training if batches are small 156 | so we disable it here. 157 | """ 158 | 159 | def call(self, inputs, training=None): 160 | return super(self.__class__, self).call(inputs, training=False) -------------------------------------------------------------------------------- /KerasRFCN/Utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Keras RFCN 3 | Copyright (c) 2018 4 | Licensed under the MIT License (see LICENSE for details) 5 | Written by parap1uie-s@github.com 6 | """ 7 | 8 | import sys 9 | import os 10 | import math 11 | import random 12 | import numpy as np 13 | import tensorflow as tf 14 | import scipy.misc 15 | import skimage.color 16 | import skimage.io 17 | import urllib.request 18 | import shutil 19 | 20 | ############################################################ 21 | # Bounding Boxes 22 | ############################################################ 23 | 24 | # def extract_bboxes(mask): 25 | # """Compute bounding boxes. 26 | # mask: [height, width, num_instances]. Mask pixels are either 1 or 0. 27 | 28 | # Returns: bbox array [num_instances, (y1, x1, y2, x2)]. 29 | # """ 30 | # boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) 31 | # for i in range(mask.shape[-1]): 32 | # m = mask[:, :, i] 33 | # # Bounding box. 34 | # horizontal_indicies = np.where(np.any(m, axis=0))[0] 35 | # vertical_indicies = np.where(np.any(m, axis=1))[0] 36 | # if horizontal_indicies.shape[0]: 37 | # x1, x2 = horizontal_indicies[[0, -1]] 38 | # y1, y2 = vertical_indicies[[0, -1]] 39 | # # x2 and y2 should not be part of the box. Increment by 1. 40 | # x2 += 1 41 | # y2 += 1 42 | # else: 43 | # # No mask for this instance. Might happen due to 44 | # # resizing or cropping. Set bbox to zeros 45 | # x1, x2, y1, y2 = 0, 0, 0, 0 46 | # boxes[i] = np.array([y1, x1, y2, x2]) 47 | # return boxes.astype(np.int32) 48 | 49 | 50 | def compute_iou(box, boxes, box_area, boxes_area): 51 | """Calculates IoU of the given box with the array of the given boxes. 52 | box: 1D vector [y1, x1, y2, x2] 53 | boxes: [boxes_count, (y1, x1, y2, x2)] 54 | box_area: float. the area of 'box' 55 | boxes_area: array of length boxes_count. 56 | 57 | Note: the areas are passed in rather than calculated here for 58 | efficency. Calculate once in the caller to avoid duplicate work. 59 | """ 60 | # Calculate intersection areas 61 | y1 = np.maximum(box[0], boxes[:, 0]) 62 | y2 = np.minimum(box[2], boxes[:, 2]) 63 | x1 = np.maximum(box[1], boxes[:, 1]) 64 | x2 = np.minimum(box[3], boxes[:, 3]) 65 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) 66 | union = box_area + boxes_area[:] - intersection[:] 67 | iou = intersection / union 68 | return iou 69 | 70 | 71 | def compute_overlaps(boxes1, boxes2): 72 | """Computes IoU overlaps between two sets of boxes. 73 | boxes1, boxes2: [N, (y1, x1, y2, x2)]. 74 | 75 | For better performance, pass the largest set first and the smaller second. 76 | """ 77 | # Areas of anchors and GT boxes 78 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) 79 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) 80 | 81 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count] 82 | # Each cell contains the IoU value. 83 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) 84 | for i in range(overlaps.shape[1]): 85 | box2 = boxes2[i] 86 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) 87 | return overlaps 88 | 89 | 90 | def non_max_suppression(boxes, scores, threshold): 91 | """Performs non-maximum supression and returns indicies of kept boxes. 92 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. 93 | scores: 1-D array of box scores. 94 | threshold: Float. IoU threshold to use for filtering. 95 | """ 96 | assert boxes.shape[0] > 0 97 | if boxes.dtype.kind != "f": 98 | boxes = boxes.astype(np.float32) 99 | 100 | # Compute box areas 101 | y1 = boxes[:, 0] 102 | x1 = boxes[:, 1] 103 | y2 = boxes[:, 2] 104 | x2 = boxes[:, 3] 105 | area = (y2 - y1) * (x2 - x1) 106 | 107 | # Get indicies of boxes sorted by scores (highest first) 108 | ixs = scores.argsort()[::-1] 109 | 110 | pick = [] 111 | while len(ixs) > 0: 112 | # Pick top box and add its index to the list 113 | i = ixs[0] 114 | pick.append(i) 115 | # Compute IoU of the picked box with the rest 116 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) 117 | # Identify boxes with IoU over the threshold. This 118 | # returns indicies into ixs[1:], so add 1 to get 119 | # indicies into ixs. 120 | remove_ixs = np.where(iou > threshold)[0] + 1 121 | # Remove indicies of the picked and overlapped boxes. 122 | ixs = np.delete(ixs, remove_ixs) 123 | ixs = np.delete(ixs, 0) 124 | return np.array(pick, dtype=np.int32) 125 | 126 | 127 | def apply_box_deltas(boxes, deltas): 128 | """Applies the given deltas to the given boxes. 129 | boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. 130 | deltas: [N, (dy, dx, log(dh), log(dw))] 131 | """ 132 | boxes = boxes.astype(np.float32) 133 | # Convert to y, x, h, w 134 | height = boxes[:, 2] - boxes[:, 0] 135 | width = boxes[:, 3] - boxes[:, 1] 136 | center_y = boxes[:, 0] + 0.5 * height 137 | center_x = boxes[:, 1] + 0.5 * width 138 | # Apply deltas 139 | center_y += deltas[:, 0] * height 140 | center_x += deltas[:, 1] * width 141 | height *= np.exp(deltas[:, 2]) 142 | width *= np.exp(deltas[:, 3]) 143 | # Convert back to y1, x1, y2, x2 144 | y1 = center_y - 0.5 * height 145 | x1 = center_x - 0.5 * width 146 | y2 = y1 + height 147 | x2 = x1 + width 148 | return np.stack([y1, x1, y2, x2], axis=1) 149 | 150 | 151 | def box_refinement_graph(box, gt_box): 152 | """Compute refinement needed to transform box to gt_box. 153 | box and gt_box are [N, (y1, x1, y2, x2)] 154 | """ 155 | box = tf.cast(box, tf.float32) 156 | gt_box = tf.cast(gt_box, tf.float32) 157 | 158 | height = box[:, 2] - box[:, 0] 159 | width = box[:, 3] - box[:, 1] 160 | center_y = box[:, 0] + 0.5 * height 161 | center_x = box[:, 1] + 0.5 * width 162 | 163 | gt_height = gt_box[:, 2] - gt_box[:, 0] 164 | gt_width = gt_box[:, 3] - gt_box[:, 1] 165 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 166 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 167 | 168 | dy = (gt_center_y - center_y) / height 169 | dx = (gt_center_x - center_x) / width 170 | dh = tf.log(gt_height / height) 171 | dw = tf.log(gt_width / width) 172 | 173 | result = tf.stack([dy, dx, dh, dw], axis=1) 174 | return result 175 | 176 | 177 | def box_refinement(box, gt_box): 178 | """Compute refinement needed to transform box to gt_box. 179 | box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is 180 | assumed to be outside the box. 181 | """ 182 | box = box.astype(np.float32) 183 | gt_box = gt_box.astype(np.float32) 184 | 185 | height = box[:, 2] - box[:, 0] 186 | width = box[:, 3] - box[:, 1] 187 | center_y = box[:, 0] + 0.5 * height 188 | center_x = box[:, 1] + 0.5 * width 189 | 190 | gt_height = gt_box[:, 2] - gt_box[:, 0] 191 | gt_width = gt_box[:, 3] - gt_box[:, 1] 192 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 193 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 194 | 195 | dy = (gt_center_y - center_y) / height 196 | dx = (gt_center_x - center_x) / width 197 | dh = np.log(gt_height / height) 198 | dw = np.log(gt_width / width) 199 | 200 | return np.stack([dy, dx, dh, dw], axis=1) 201 | 202 | 203 | ############################################################ 204 | # Dataset 205 | ############################################################ 206 | 207 | class Dataset(object): 208 | """The base class for dataset classes. 209 | To use it, create a new class that adds functions specific to the dataset 210 | you want to use. For example: 211 | 212 | class CatsAndDogsDataset(Dataset): 213 | def load_cats_and_dogs(self): 214 | ... 215 | def load_bbox(self, image_id): 216 | ... 217 | def image_reference(self, image_id): 218 | ... 219 | 220 | See COCODataset and ShapesDataset as examples. 221 | """ 222 | 223 | def __init__(self, class_map=None): 224 | self._image_ids = [] 225 | self.image_info = [] 226 | # Background is always the first class 227 | self.class_info = [{"source": "", "id": 0, "name": "BG"}] 228 | self.source_class_ids = {} 229 | 230 | def add_class(self, source, class_id, class_name): 231 | assert "." not in source, "Source name cannot contain a dot" 232 | # Does the class exist already? 233 | for info in self.class_info: 234 | if info['source'] == source and info["id"] == class_id: 235 | # source.class_id combination already available, skip 236 | return 237 | # Add the class 238 | self.class_info.append({ 239 | "source": source, 240 | "id": class_id, 241 | "name": class_name, 242 | }) 243 | 244 | def add_image(self, source, image_id, path, **kwargs): 245 | image_info = { 246 | "id": image_id, 247 | "source": source, 248 | "path": path, 249 | } 250 | image_info.update(kwargs) 251 | self.image_info.append(image_info) 252 | 253 | def image_reference(self, image_id): 254 | """Return a link to the image in its source Website or details about 255 | the image that help looking it up or debugging it. 256 | 257 | Override for your dataset, but pass to this function 258 | if you encounter images not in your dataset. 259 | """ 260 | return "" 261 | 262 | def prepare(self, class_map=None): 263 | """Prepares the Dataset class for use. 264 | 265 | TODO: class map is not supported yet. When done, it should handle mapping 266 | classes from different datasets to the same class ID. 267 | """ 268 | def clean_name(name): 269 | """Returns a shorter version of object names for cleaner display.""" 270 | return ",".join(name.split(",")[:1]) 271 | 272 | # Build (or rebuild) everything else from the info dicts. 273 | self.num_classes = len(self.class_info) 274 | self.class_ids = np.arange(self.num_classes) 275 | self.class_names = [clean_name(c["name"]) for c in self.class_info] 276 | self.num_images = len(self.image_info) 277 | self._image_ids = np.arange(self.num_images) 278 | 279 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id 280 | for info, id in zip(self.class_info, self.class_ids)} 281 | 282 | # Map sources to class_ids they support 283 | self.sources = list(set([i['source'] for i in self.class_info])) 284 | self.source_class_ids = {} 285 | # Loop over datasets 286 | for source in self.sources: 287 | self.source_class_ids[source] = [] 288 | # Find classes that belong to this dataset 289 | for i, info in enumerate(self.class_info): 290 | # Include BG class in all datasets 291 | if i == 0 or source == info['source']: 292 | self.source_class_ids[source].append(i) 293 | 294 | def map_source_class_id(self, source_class_id): 295 | """Takes a source class ID and returns the int class ID assigned to it. 296 | 297 | For example: 298 | dataset.map_source_class_id("coco.12") -> 23 299 | """ 300 | return self.class_from_source_map[source_class_id] 301 | 302 | def get_source_class_id(self, class_id, source): 303 | """Map an internal class ID to the corresponding class ID in the source dataset.""" 304 | info = self.class_info[class_id] 305 | assert info['source'] == source 306 | return info['id'] 307 | 308 | def append_data(self, class_info, image_info): 309 | self.external_to_class_id = {} 310 | for i, c in enumerate(self.class_info): 311 | for ds, id in c["map"]: 312 | self.external_to_class_id[ds + str(id)] = i 313 | 314 | # Map external image IDs to internal ones. 315 | self.external_to_image_id = {} 316 | for i, info in enumerate(self.image_info): 317 | self.external_to_image_id[info["ds"] + str(info["id"])] = i 318 | 319 | @property 320 | def image_ids(self): 321 | return self._image_ids 322 | 323 | def source_image_link(self, image_id): 324 | """Returns the path or URL to the image. 325 | Override this to return a URL to the image if it's availble online for easy 326 | debugging. 327 | """ 328 | return self.image_info[image_id]["path"] 329 | 330 | def load_image(self, image_id): 331 | """Load the specified image and return a [H,W,3] Numpy array. 332 | """ 333 | # Load image 334 | image = skimage.io.imread(self.image_info[image_id]['path']) 335 | # If grayscale. Convert to RGB for consistency. 336 | if image.ndim != 3: 337 | image = skimage.color.gray2rgb(image) 338 | return image 339 | 340 | def load_bbox(self, image_id): 341 | """Load instance bbox for the given image. 342 | 343 | Different datasets use different ways to store bbox. Override this 344 | method to load instance bbox and return them in the form of am 345 | array of binary bbox of shape [height, width, instances]. 346 | 347 | Returns: 348 | bbox: A bool array of shape [height, width, instance count] with 349 | a binary bbox per instance. 350 | class_ids: a 1D array of class IDs of the instance bbox. 351 | """ 352 | # Override this function to load a bbox from your dataset. 353 | # Otherwise, it returns an empty bbox. 354 | bbox = np.empty([0, 0, 0]) 355 | class_ids = np.empty([0], np.int32) 356 | return bbox, class_ids 357 | 358 | 359 | def resize_image(image, min_dim=None, max_dim=None, padding=False): 360 | """ 361 | Resizes an image keeping the aspect ratio. 362 | 363 | min_dim: if provided, resizes the image such that it's smaller 364 | dimension == min_dim 365 | max_dim: if provided, ensures that the image longest side doesn't 366 | exceed this value. 367 | padding: If true, pads image with zeros so it's size is max_dim x max_dim 368 | 369 | Returns: 370 | image: the resized image 371 | window: (y1, x1, y2, x2). If max_dim is provided, padding might 372 | be inserted in the returned image. If so, this window is the 373 | coordinates of the image part of the full image (excluding 374 | the padding). The x2, y2 pixels are not included. 375 | scale: The scale factor used to resize the image 376 | padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] 377 | """ 378 | # Default window (y1, x1, y2, x2) and default scale == 1. 379 | h, w = image.shape[:2] 380 | window = (0, 0, h, w) 381 | scale = 1 382 | 383 | # Scale? 384 | if min_dim: 385 | # Scale up but not down 386 | scale = max(1, min_dim / min(h, w)) 387 | # Does it exceed max dim? 388 | if max_dim: 389 | image_max = max(h, w) 390 | if round(image_max * scale) > max_dim: 391 | scale = max_dim / image_max 392 | # Resize image and mask 393 | if scale != 1: 394 | image = scipy.misc.imresize( 395 | image, (round(h * scale), round(w * scale))) 396 | # Need padding? 397 | if padding: 398 | # Get new height and width 399 | h, w = image.shape[:2] 400 | top_pad = (max_dim - h) // 2 401 | bottom_pad = max_dim - h - top_pad 402 | left_pad = (max_dim - w) // 2 403 | right_pad = max_dim - w - left_pad 404 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 405 | image = np.pad(image, padding, mode='constant', constant_values=0) 406 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 407 | return image, window, scale, padding 408 | 409 | 410 | def resize_bbox(boxes, scale, padding): 411 | """Resizes a bbox using the given scale and padding. 412 | Typically, you get the scale and padding from resize_image() to 413 | ensure both, the image and the bbox, are resized consistently. 414 | 415 | scale: bbox scaling factor 416 | padding: Padding to add to the bbox in the form 417 | [(top, bottom), (left, right), (0, 0)] 418 | """ 419 | top_pad = padding[0][0] 420 | left_pad = padding[1][0] 421 | 422 | resized_boxes = [] 423 | for box in boxes: 424 | temp_new_box = box * scale 425 | y1 = temp_new_box[0] + top_pad 426 | x1 = temp_new_box[1] + left_pad 427 | y2 = temp_new_box[2] + top_pad 428 | x2 = temp_new_box[3] + left_pad 429 | resized_boxes.append((y1,x1,y2,x2)) 430 | return np.array(resized_boxes) 431 | 432 | ############################################################ 433 | # Anchors 434 | ############################################################ 435 | 436 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): 437 | """ 438 | scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] 439 | ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] 440 | shape: [height, width] spatial shape of the feature map over which 441 | to generate anchors. 442 | feature_stride: Stride of the feature map relative to the image in pixels. 443 | anchor_stride: Stride of anchors on the feature map. For example, if the 444 | value is 2 then generate anchors for every other feature map pixel. 445 | """ 446 | # Get all combinations of scales and ratios 447 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) 448 | scales = scales.flatten() 449 | ratios = ratios.flatten() 450 | 451 | # Enumerate heights and widths from scales and ratios 452 | heights = scales / np.sqrt(ratios) 453 | widths = scales * np.sqrt(ratios) 454 | 455 | # Enumerate shifts in feature space 456 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride 457 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride 458 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) 459 | 460 | # Enumerate combinations of shifts, widths, and heights 461 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x) 462 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y) 463 | 464 | # Reshape to get a list of (y, x) and a list of (h, w) 465 | box_centers = np.stack( 466 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) 467 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) 468 | 469 | # Convert to corner coordinates (y1, x1, y2, x2) 470 | boxes = np.concatenate([box_centers - 0.5 * box_sizes, 471 | box_centers + 0.5 * box_sizes], axis=1) 472 | return boxes 473 | 474 | 475 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, 476 | anchor_stride): 477 | """Generate anchors at different levels of a feature pyramid. Each scale 478 | is associated with a level of the pyramid, but each ratio is used in 479 | all levels of the pyramid. 480 | 481 | Returns: 482 | anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted 483 | with the same order of the given scales. So, anchors of scale[0] come 484 | first, then anchors of scale[1], and so on. 485 | """ 486 | # Anchors 487 | # [anchor_count, (y1, x1, y2, x2)] 488 | anchors = [] 489 | for i in range(len(scales)): 490 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], 491 | feature_strides[i], anchor_stride)) 492 | return np.concatenate(anchors, axis=0) 493 | 494 | 495 | ############################################################ 496 | # Miscellaneous 497 | ############################################################ 498 | 499 | def trim_zeros(x): 500 | """It's common to have tensors larger than the available data and 501 | pad with zeros. This function removes rows that are all zeros. 502 | 503 | x: [rows, columns]. 504 | """ 505 | assert len(x.shape) == 2 506 | return x[~np.all(x == 0, axis=1)] 507 | 508 | 509 | def compute_ap(gt_boxes, gt_class_ids, 510 | pred_boxes, pred_class_ids, pred_scores, 511 | iou_threshold=0.5): 512 | """Compute Average Precision at a set IoU threshold (default 0.5). 513 | 514 | Returns: 515 | mAP: Mean Average Precision 516 | precisions: List of precisions at different class score thresholds. 517 | recalls: List of recall values at different class score thresholds. 518 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 519 | """ 520 | # Trim zero padding and sort predictions by score from high to low 521 | # TODO: cleaner to do zero unpadding upstream 522 | gt_boxes = trim_zeros(gt_boxes) 523 | pred_boxes = trim_zeros(pred_boxes) 524 | pred_scores = pred_scores[:pred_boxes.shape[0]] 525 | indices = np.argsort(pred_scores)[::-1] 526 | pred_boxes = pred_boxes[indices] 527 | pred_class_ids = pred_class_ids[indices] 528 | pred_scores = pred_scores[indices] 529 | 530 | # Compute IoU overlaps [pred_boxes, gt_boxes] 531 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 532 | 533 | # Loop through ground truth boxes and find matching predictions 534 | match_count = 0 535 | pred_match = np.zeros([pred_boxes.shape[0]]) 536 | gt_match = np.zeros([gt_boxes.shape[0]]) 537 | for i in range(len(pred_boxes)): 538 | # Find best matching ground truth box 539 | sorted_ixs = np.argsort(overlaps[i])[::-1] 540 | for j in sorted_ixs: 541 | # If ground truth box is already matched, go to next one 542 | if gt_match[j] == 1: 543 | continue 544 | # If we reach IoU smaller than the threshold, end the loop 545 | iou = overlaps[i, j] 546 | if iou < iou_threshold: 547 | break 548 | # Do we have a match? 549 | if pred_class_ids[i] == gt_class_ids[j]: 550 | match_count += 1 551 | gt_match[j] = 1 552 | pred_match[i] = 1 553 | break 554 | 555 | # Compute precision and recall at each prediction box step 556 | precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1) 557 | recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match) 558 | 559 | # Pad with start and end values to simplify the math 560 | precisions = np.concatenate([[0], precisions, [0]]) 561 | recalls = np.concatenate([[0], recalls, [1]]) 562 | 563 | # Ensure precision values decrease but don't increase. This way, the 564 | # precision value at each recall threshold is the maximum it can be 565 | # for all following recall thresholds, as specified by the VOC paper. 566 | for i in range(len(precisions) - 2, -1, -1): 567 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 568 | 569 | # Compute mean AP over recall range 570 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 571 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) * 572 | precisions[indices]) 573 | 574 | return mAP, precisions, recalls, overlaps 575 | 576 | 577 | def compute_recall(pred_boxes, gt_boxes, iou): 578 | """Compute the recall at the given IoU threshold. It's an indication 579 | of how many GT boxes were found by the given prediction boxes. 580 | 581 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates 582 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates 583 | """ 584 | # Measure overlaps 585 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 586 | iou_max = np.max(overlaps, axis=1) 587 | iou_argmax = np.argmax(overlaps, axis=1) 588 | positive_ids = np.where(iou_max >= iou)[0] 589 | matched_gt_boxes = iou_argmax[positive_ids] 590 | 591 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] 592 | return recall, positive_ids 593 | 594 | 595 | # ## Batch Slicing 596 | # Some custom layers support a batch size of 1 only, and require a lot of work 597 | # to support batches greater than 1. This function slices an input tensor 598 | # across the batch dimension and feeds batches of size 1. Effectively, 599 | # an easy way to support batches > 1 quickly with little code modification. 600 | # In the long run, it's more efficient to modify the code to support large 601 | # batches and getting rid of this function. Consider this a temporary solution 602 | def batch_slice(inputs, graph_fn, batch_size, names=None): 603 | """Splits inputs into slices and feeds each slice to a copy of the given 604 | computation graph and then combines the results. It allows you to run a 605 | graph on a batch of inputs even if the graph is written to support one 606 | instance only. 607 | 608 | inputs: list of tensors. All must have the same first dimension length 609 | graph_fn: A function that returns a TF tensor that's part of a graph. 610 | batch_size: number of slices to divide the data into. 611 | names: If provided, assigns names to the resulting tensors. 612 | """ 613 | if not isinstance(inputs, list): 614 | inputs = [inputs] 615 | 616 | outputs = [] 617 | for i in range(batch_size): 618 | inputs_slice = [x[i] for x in inputs] 619 | output_slice = graph_fn(*inputs_slice) 620 | if not isinstance(output_slice, (tuple, list)): 621 | output_slice = [output_slice] 622 | outputs.append(output_slice) 623 | # Change outputs from a list of slices where each is 624 | # a list of outputs to a list of outputs and each has 625 | # a list of slices 626 | outputs = list(zip(*outputs)) 627 | 628 | if names is None: 629 | names = [None] * len(outputs) 630 | 631 | result = [tf.stack(o, axis=0, name=n) 632 | for o, n in zip(outputs, names)] 633 | if len(result) == 1: 634 | result = result[0] 635 | 636 | return result 637 | 638 | ############################################################ 639 | # Data Formatting 640 | ############################################################ 641 | 642 | def compose_image_meta(image_id, image_shape, window, active_class_ids): 643 | """Takes attributes of an image and puts them in one 1D array. Use 644 | parse_image_meta() to parse the values back. 645 | 646 | image_id: An int ID of the image. Useful for debugging. 647 | image_shape: [height, width, channels] 648 | window: (y1, x1, y2, x2) in pixels. The area of the image where the real 649 | image is (excluding the padding) 650 | active_class_ids: List of class_ids available in the dataset from which 651 | the image came. Useful if training on images from multiple datasets 652 | where not all classes are present in all datasets. 653 | """ 654 | meta = np.array( 655 | [image_id] + # size=1 656 | list(image_shape) + # size=3 657 | list(window) + # size=4 (y1, x1, y2, x2) in image cooredinates 658 | list(active_class_ids) # size=num_classes 659 | ) 660 | return meta 661 | 662 | 663 | # Two functions (for Numpy and TF) to parse image_meta tensors. 664 | def parse_image_meta(meta): 665 | """Parses an image info Numpy array to its components. 666 | See compose_image_meta() for more details. 667 | """ 668 | image_id = meta[:, 0] 669 | image_shape = meta[:, 1:4] 670 | window = meta[:, 4:8] # (y1, x1, y2, x2) window of image in in pixels 671 | active_class_ids = meta[:, 8:] 672 | return image_id, image_shape, window, active_class_ids 673 | 674 | 675 | def parse_image_meta_graph(meta): 676 | """Parses a tensor that contains image attributes to its components. 677 | See compose_image_meta() for more details. 678 | 679 | meta: [batch, meta length] where meta length depends on NUM_CLASSES 680 | """ 681 | image_id = meta[:, 0] 682 | image_shape = meta[:, 1:4] 683 | window = meta[:, 4:8] 684 | active_class_ids = meta[:, 8:] 685 | return [image_id, image_shape, window, active_class_ids] 686 | 687 | 688 | def mold_image(images, config): 689 | """Takes RGB images with 0-255 values and subtraces 690 | the mean pixel and converts it to float. Expects image 691 | colors in RGB order. 692 | """ 693 | return images.astype(np.float32) - config.MEAN_PIXEL 694 | 695 | 696 | def unmold_image(normalized_images, config): 697 | """Takes a image normalized with mold() and returns the original.""" 698 | return (normalized_images + config.MEAN_PIXEL).astype(np.uint8) -------------------------------------------------------------------------------- /KerasRFCN/__init__.py: -------------------------------------------------------------------------------- 1 | # __init.py__ 2 | __all__ = ['Config', 'Data_generator', 'Losses', 'Utils','Model.BaseModel','Model.Model','Model.ResNet_dilated','Model.ResNet'] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 parap1uie-s 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Keras-RFCN 2 | RFCN implement based on Keras&Tensorflow 3 | 4 | This is an implementation of [Region-based Fully Convolutional Networks](https://arxiv.org/pdf/1605.06409v2.pdf) on Python 3, Keras, and TensorFlow. The model generates bounding boxes for each instance of an object in the image. It's based on Feature Pyramid Network (FPN) and a [ResNet50](https://arxiv.org/abs/1512.03385) or ResNet101 backbone. 5 | 6 | The repository includes: 7 | 8 | * Source code of RFCN built on FPN and ResNet50/101. 9 | * Training code for [DeepFashion Dataset](http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion.html) with 46 clothes classes. 10 | * Pre-trained weights for [DeepFashion Dataset](http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion.html) - See release 11 | * Example of training on your own dataset - see Fashion_Train.py and Fashion_Test.py 12 | 13 | 14 | # Introduction 15 | 16 | Thanks to the [Mask-RCNN implement by matterport](https://github.com/matterport/Mask_RCNN), we have the great framework so that we don't have the needs to generate bounding box and implement the Non-Maximum-Suppression algorithm. 17 | 18 | If you are already fimilar with matterport's framework, this repository is easy to understand and use. What I have done is remove the mask head in the framework, which makes it be a Faster-RCNN, and implement a position sensitive ROI pooling layer and a VOTE layer. For more details, please read the [paper](https://arxiv.org/pdf/1605.06409v2.pdf). 19 | 20 | ![position sensitive ROI](ReadmeImages/1.png) 21 | 22 | # Getting Started 23 | 24 | ## Train on your own dataset 25 | 26 | As you can see in **Fashion_Train.py**, for a specific dataset, all you need is listed below: 27 | 28 | * A **XXConfig** class inherit from the **Config** base class in the framework. 29 | * A **XXDataset** class inherit from the **Utils.Dataset**. 30 | * Implement **load_image(image_id)** which returns a numpy array I with I.shape == (Img_h, Img_w, 3) for the specific image id. You don't have the needs to resize image, or take data augment. The framework will do all of them automatically. Just keep the data what it is. 31 | * Implement **load_bbox(image_id)** which returns a tuple with 2 numpy array for the specific image id, the first one is boundbox coordinates (y1,x1,y2,x2), and class ID for the boundbox in order. 32 | * Define your training schedule. 33 | 34 | ## Predict on your own dataset 35 | 36 | See **Fashion_Test.py** as a demo. 37 | More details in writing. 38 | 39 | # Framework 40 | 41 | This RFCN framework consists of FIVE parts: 42 | 43 | * Config.py - the base class of config for a specific dataset. 44 | * Data_generator.py - generate data for a object detection model, like background box, and true box. The images are resized in this script. 45 | * Losses.py - define the l1_smooth loss for box regression and cross-entropy loss for box classification. 46 | * Utils.py - all the auxiliary functions, like compute_iou, non-maximum-suppression, etc. 47 | * BaseModel.py - the base class of our keras model, contains the auxiliary functions for the model, like load weights, save checkpoints, decode the training schedule. 48 | * **Model.py** - the CORE script of our framework, contains the RPN, ScoreMap, position sensitive ROI pooling layer, etc. 49 | * ResNet.py - The resnet backbone, you can choose resnet50 or resnet101. 50 | * ResNet_dilated.py - Take the stages 4+ have a dilate ratio = 2, named DetNet, which is the latest research results in this [paper](https://arxiv.org/abs/1804.06215). 51 | 52 | # Experiment Result 53 | 54 | To make sure the framework work normally, we have trained the model 240 epochs with DeepFashion dataset. And the detection result might useful: 55 | 56 |
57 | 58 | 59 |
60 | 61 | # TODO 62 | 63 | * ~~Complete the load_weights function of model.(Done)~~ 64 | * Add the callback for eval mAP after each batch end 65 | * Train on MSCOCO -------------------------------------------------------------------------------- /ReadmeImages/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parap1uie-s/Keras-RFCN/5466d0da653041cf2e1ae74d26fb4c126a95330d/ReadmeImages/1.png -------------------------------------------------------------------------------- /ReadmeImages/result_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parap1uie-s/Keras-RFCN/5466d0da653041cf2e1ae74d26fb4c126a95330d/ReadmeImages/result_1.jpg -------------------------------------------------------------------------------- /ReadmeImages/result_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parap1uie-s/Keras-RFCN/5466d0da653041cf2e1ae74d26fb4c126a95330d/ReadmeImages/result_2.jpg -------------------------------------------------------------------------------- /data.pk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parap1uie-s/Keras-RFCN/5466d0da653041cf2e1ae74d26fb4c126a95330d/data.pk --------------------------------------------------------------------------------