├── README.md ├── config.pickle ├── doc ├── fit_web.jpg ├── img-fit.jpg └── mian_web.jpg ├── frcnn_api.py ├── issue_template.md ├── keras_frcnn ├── FixedBatchNormalization.py ├── RoiPoolingConv.py ├── __init__.py ├── __pycache__ │ ├── FixedBatchNormalization.cpython-35.pyc │ ├── RoiPoolingConv.cpython-35.pyc │ ├── __init__.cpython-35.pyc │ ├── config.cpython-35.pyc │ ├── data_augment.cpython-35.pyc │ ├── data_generators.cpython-35.pyc │ ├── losses.cpython-35.pyc │ ├── pascal_voc_parser.cpython-35.pyc │ ├── resnet.cpython-35.pyc │ └── roi_helpers.cpython-35.pyc ├── config.py ├── data_augment.py ├── data_generators.py ├── losses.py ├── pascal_voc_parser.py ├── resnet.py ├── roi_helpers.py ├── simple_parser.py └── vgg.py ├── measure_map.py ├── pic_tmp ├── 001.jpg ├── 002.jpg ├── 003.jpg ├── 006.jpg └── 007.jpg ├── static ├── css │ └── user_form.css ├── img │ └── logo.png └── js │ └── jquery-1.6.2.js ├── templates └── img_fit.html ├── test_frcnn.py └── train_frcnn.py /README.md: -------------------------------------------------------------------------------- 1 | # keras-frcnn-web 2 | 3 | 利用keras建立 faster rcnn网络，训练完成后，将检测api释放为rest接口，方便后端调用 4 | 5 | # linux 环境搭建 6 | ```Bash 7 | 1. wget https://www.python.org/ftp/python/3.5.2/Python-3.5.2.tar.xz 8 | 2. tar Jxvf Python-3.5.2.tar.xz 9 | 3. cd Python-3.5.2 10 | 4. ./configure --prefix=/usr/local/python3 11 | 5. make && make install # 成功提示：Ignoring ensurepip failure: pip 7.1.2 requires SSL/TLS 12 | 6. ln -s /usr/local/python3/bin/pip3.5 /usr/local/bin/pip3.5 13 | 7. ln -s /usr/local/python3/bin/python3.5 /usr/local/bin/python3 14 | 8. pip3.5 install --upgrade pip 15 | 9. pip3.5 install keras 16 | 10. pip3.5 install tensorflow 17 | 11. pip3.5 install flask 18 | 12. pip3.5 install flask_httpauth 19 | 13. pip3.5 install werkzeug 20 | 14.pip install opencv-python 21 | ``` 22 | 23 | # 数据集格式 24 | 25 | 数据集为VOC2007，数据格式如下： 26 | 27 | 顶层目录 28 | 29 | Annotations —目标真值区域 30 | 31 | ImageSets —-类别标签 32 | 33 | JPEGImages —–图像 34 | 35 | 36 | 生成数据集，可参考博客 37 | 38 | https://blog.csdn.net/xvshu/article/details/81298625 39 | 40 | # 训练命令 41 | 42 | >python train_frcnn.py -p $your_train_voc2007_path 43 | 44 | 45 | # 测试命令 46 | 47 | >python test_frcnn.py -p $your_test_img_path 48 | 49 | 50 | 测试结果： 51 |

52 | 53 | 54 | # 发布api命令 55 | 56 | >python frcnn_api.py 57 | 58 | 59 | # 测试页面 60 | ## 传入图片 61 |

62 | 63 | 64 | ## 检测结果 65 |

66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /config.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/config.pickle -------------------------------------------------------------------------------- /doc/fit_web.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/doc/fit_web.jpg -------------------------------------------------------------------------------- /doc/img-fit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/doc/img-fit.jpg -------------------------------------------------------------------------------- /doc/mian_web.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/doc/mian_web.jpg -------------------------------------------------------------------------------- /frcnn_api.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, jsonify, abort, make_response, request, url_for,render_template 2 | from flask_httpauth import HTTPBasicAuth 3 | import test_frcnn 4 | import os 5 | from werkzeug.utils import secure_filename 6 | import cv2 7 | from scipy import misc 8 | 9 | app = Flask(__name__) 10 | # 图片最大为16M 11 | app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 12 | auth = HTTPBasicAuth() 13 | 14 | #设置post请求中获取的图片保存的路径 15 | UPLOAD_FOLDER = 'pic_tmp/' 16 | if not os.path.exists(UPLOAD_FOLDER): 17 | os.makedirs(UPLOAD_FOLDER) 18 | else: 19 | pass 20 | ALLOWED_EXTENSIONS = set(['png', 'jpg', 'jpeg']) 21 | app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER 22 | 23 | 24 | @app.route('/') 25 | def index(): 26 | return render_template("img_fit.html") 27 | 28 | @app.route('/img/fit', methods=['POST']) 29 | def face_insert(): 30 | #分别获取post请求中的图片信息 31 | upload_files = request.files['imagefile'] 32 | #从post请求图片保存到本地路径中 33 | file = upload_files 34 | filename = secure_filename(file.filename) 35 | file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) 36 | image_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) 37 | print(image_path) 38 | img = cv2.imread(os.path.expanduser(image_path)) 39 | # img = misc.imread(os.path.expanduser(image_path), mode='RGB') 40 | 41 | return test_frcnn.tf_fit_img(img) 42 | 43 | 44 | @auth.get_password 45 | def get_password(username): 46 | if username == 'root': 47 | return 'root' 48 | return None 49 | 50 | 51 | @auth.error_handler 52 | def unauthorized(): 53 | return make_response(jsonify({'error': 'Unauthorized access'}), 401) 54 | 55 | 56 | @app.errorhandler(400) 57 | def not_found(error): 58 | return make_response(jsonify({'error': 'Invalid data!'}), 400) 59 | 60 | if __name__ == '__main__': 61 | app.run(host='172.30.53.250', port=8099) 62 | 63 | -------------------------------------------------------------------------------- /issue_template.md: -------------------------------------------------------------------------------- 1 | NOTE: this repo is now deprecated. Instead, I recommend keras users use https://github.com/delftrobotics/keras-retinanet which runs faster and is more accurate. 2 | -------------------------------------------------------------------------------- /keras_frcnn/FixedBatchNormalization.py: -------------------------------------------------------------------------------- 1 | from keras.engine import Layer, InputSpec 2 | from keras import initializers, regularizers 3 | from keras import backend as K 4 | 5 | 6 | class FixedBatchNormalization(Layer): 7 | 8 | def __init__(self, epsilon=1e-3, axis=-1, 9 | weights=None, beta_init='zero', gamma_init='one', 10 | gamma_regularizer=None, beta_regularizer=None, **kwargs): 11 | 12 | self.supports_masking = True 13 | self.beta_init = initializers.get(beta_init) 14 | self.gamma_init = initializers.get(gamma_init) 15 | self.epsilon = epsilon 16 | self.axis = axis 17 | self.gamma_regularizer = regularizers.get(gamma_regularizer) 18 | self.beta_regularizer = regularizers.get(beta_regularizer) 19 | self.initial_weights = weights 20 | super(FixedBatchNormalization, self).__init__(**kwargs) 21 | 22 | def build(self, input_shape): 23 | self.input_spec = [InputSpec(shape=input_shape)] 24 | shape = (input_shape[self.axis],) 25 | 26 | self.gamma = self.add_weight(shape, 27 | initializer=self.gamma_init, 28 | regularizer=self.gamma_regularizer, 29 | name='{}_gamma'.format(self.name), 30 | trainable=False) 31 | self.beta = self.add_weight(shape, 32 | initializer=self.beta_init, 33 | regularizer=self.beta_regularizer, 34 | name='{}_beta'.format(self.name), 35 | trainable=False) 36 | self.running_mean = self.add_weight(shape, initializer='zero', 37 | name='{}_running_mean'.format(self.name), 38 | trainable=False) 39 | self.running_std = self.add_weight(shape, initializer='one', 40 | name='{}_running_std'.format(self.name), 41 | trainable=False) 42 | 43 | if self.initial_weights is not None: 44 | self.set_weights(self.initial_weights) 45 | del self.initial_weights 46 | 47 | self.built = True 48 | 49 | def call(self, x, mask=None): 50 | 51 | assert self.built, 'Layer must be built before being called' 52 | input_shape = K.int_shape(x) 53 | 54 | reduction_axes = list(range(len(input_shape))) 55 | del reduction_axes[self.axis] 56 | broadcast_shape = [1] * len(input_shape) 57 | broadcast_shape[self.axis] = input_shape[self.axis] 58 | 59 | if sorted(reduction_axes) == range(K.ndim(x))[:-1]: 60 | x_normed = K.batch_normalization( 61 | x, self.running_mean, self.running_std, 62 | self.beta, self.gamma, 63 | epsilon=self.epsilon) 64 | else: 65 | # need broadcasting 66 | broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) 67 | broadcast_running_std = K.reshape(self.running_std, broadcast_shape) 68 | broadcast_beta = K.reshape(self.beta, broadcast_shape) 69 | broadcast_gamma = K.reshape(self.gamma, broadcast_shape) 70 | x_normed = K.batch_normalization( 71 | x, broadcast_running_mean, broadcast_running_std, 72 | broadcast_beta, broadcast_gamma, 73 | epsilon=self.epsilon) 74 | 75 | return x_normed 76 | 77 | def get_config(self): 78 | config = {'epsilon': self.epsilon, 79 | 'axis': self.axis, 80 | 'gamma_regularizer': self.gamma_regularizer.get_config() if self.gamma_regularizer else None, 81 | 'beta_regularizer': self.beta_regularizer.get_config() if self.beta_regularizer else None} 82 | base_config = super(FixedBatchNormalization, self).get_config() 83 | return dict(list(base_config.items()) + list(config.items())) -------------------------------------------------------------------------------- /keras_frcnn/RoiPoolingConv.py: -------------------------------------------------------------------------------- 1 | from keras.engine.topology import Layer 2 | import keras.backend as K 3 | 4 | if K.backend() == 'tensorflow': 5 | import tensorflow as tf 6 | 7 | class RoiPoolingConv(Layer): 8 | '''ROI pooling layer for 2D inputs. 9 | See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition, 10 | K. He, X. Zhang, S. Ren, J. Sun 11 | # Arguments 12 | pool_size: int 13 | Size of pooling region to use. pool_size = 7 will result in a 7x7 region. 14 | num_rois: number of regions of interest to be used 15 | # Input shape 16 | list of two 4D tensors [X_img,X_roi] with shape: 17 | X_img: 18 | `(1, channels, rows, cols)` if dim_ordering='th' 19 | or 4D tensor with shape: 20 | `(1, rows, cols, channels)` if dim_ordering='tf'. 21 | X_roi: 22 | `(1,num_rois,4)` list of rois, with ordering (x,y,w,h) 23 | # Output shape 24 | 3D tensor with shape: 25 | `(1, num_rois, channels, pool_size, pool_size)` 26 | ''' 27 | def __init__(self, pool_size, num_rois, **kwargs): 28 | 29 | self.dim_ordering = K.image_dim_ordering() 30 | assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}' 31 | 32 | self.pool_size = pool_size 33 | self.num_rois = num_rois 34 | 35 | super(RoiPoolingConv, self).__init__(**kwargs) 36 | 37 | def build(self, input_shape): 38 | if self.dim_ordering == 'th': 39 | self.nb_channels = input_shape[0][1] 40 | elif self.dim_ordering == 'tf': 41 | self.nb_channels = input_shape[0][3] 42 | 43 | def compute_output_shape(self, input_shape): 44 | if self.dim_ordering == 'th': 45 | return None, self.num_rois, self.nb_channels, self.pool_size, self.pool_size 46 | else: 47 | return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels 48 | 49 | def call(self, x, mask=None): 50 | 51 | assert(len(x) == 2) 52 | 53 | img = x[0] 54 | rois = x[1] 55 | 56 | input_shape = K.shape(img) 57 | 58 | outputs = [] 59 | 60 | for roi_idx in range(self.num_rois): 61 | 62 | x = rois[0, roi_idx, 0] 63 | y = rois[0, roi_idx, 1] 64 | w = rois[0, roi_idx, 2] 65 | h = rois[0, roi_idx, 3] 66 | 67 | row_length = w / float(self.pool_size) 68 | col_length = h / float(self.pool_size) 69 | 70 | num_pool_regions = self.pool_size 71 | 72 | #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op 73 | # in theano. The theano implementation is much less efficient and leads to long compile times 74 | 75 | if self.dim_ordering == 'th': 76 | for jy in range(num_pool_regions): 77 | for ix in range(num_pool_regions): 78 | x1 = x + ix * row_length 79 | x2 = x1 + row_length 80 | y1 = y + jy * col_length 81 | y2 = y1 + col_length 82 | 83 | x1 = K.cast(x1, 'int32') 84 | x2 = K.cast(x2, 'int32') 85 | y1 = K.cast(y1, 'int32') 86 | y2 = K.cast(y2, 'int32') 87 | 88 | x2 = x1 + K.maximum(1,x2-x1) 89 | y2 = y1 + K.maximum(1,y2-y1) 90 | 91 | new_shape = [input_shape[0], input_shape[1], 92 | y2 - y1, x2 - x1] 93 | 94 | x_crop = img[:, :, y1:y2, x1:x2] 95 | xm = K.reshape(x_crop, new_shape) 96 | pooled_val = K.max(xm, axis=(2, 3)) 97 | outputs.append(pooled_val) 98 | 99 | elif self.dim_ordering == 'tf': 100 | x = K.cast(x, 'int32') 101 | y = K.cast(y, 'int32') 102 | w = K.cast(w, 'int32') 103 | h = K.cast(h, 'int32') 104 | 105 | rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size)) 106 | outputs.append(rs) 107 | 108 | final_output = K.concatenate(outputs, axis=0) 109 | final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) 110 | 111 | if self.dim_ordering == 'th': 112 | final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3)) 113 | else: 114 | final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) 115 | 116 | return final_output 117 | 118 | 119 | def get_config(self): 120 | config = {'pool_size': self.pool_size, 121 | 'num_rois': self.num_rois} 122 | base_config = super(RoiPoolingConv, self).get_config() 123 | return dict(list(base_config.items()) + list(config.items())) 124 | -------------------------------------------------------------------------------- /keras_frcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/keras_frcnn/__init__.py -------------------------------------------------------------------------------- /keras_frcnn/__pycache__/FixedBatchNormalization.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/keras_frcnn/__pycache__/FixedBatchNormalization.cpython-35.pyc -------------------------------------------------------------------------------- /keras_frcnn/__pycache__/RoiPoolingConv.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/keras_frcnn/__pycache__/RoiPoolingConv.cpython-35.pyc -------------------------------------------------------------------------------- /keras_frcnn/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/keras_frcnn/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /keras_frcnn/__pycache__/config.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/keras_frcnn/__pycache__/config.cpython-35.pyc -------------------------------------------------------------------------------- /keras_frcnn/__pycache__/data_augment.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/keras_frcnn/__pycache__/data_augment.cpython-35.pyc -------------------------------------------------------------------------------- /keras_frcnn/__pycache__/data_generators.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/keras_frcnn/__pycache__/data_generators.cpython-35.pyc -------------------------------------------------------------------------------- /keras_frcnn/__pycache__/losses.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/keras_frcnn/__pycache__/losses.cpython-35.pyc -------------------------------------------------------------------------------- /keras_frcnn/__pycache__/pascal_voc_parser.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/keras_frcnn/__pycache__/pascal_voc_parser.cpython-35.pyc -------------------------------------------------------------------------------- /keras_frcnn/__pycache__/resnet.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/keras_frcnn/__pycache__/resnet.cpython-35.pyc -------------------------------------------------------------------------------- /keras_frcnn/__pycache__/roi_helpers.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/keras_frcnn/__pycache__/roi_helpers.cpython-35.pyc -------------------------------------------------------------------------------- /keras_frcnn/config.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | import math 3 | 4 | class Config: 5 | 6 | def __init__(self): 7 | 8 | self.verbose = True 9 | 10 | self.network = 'resnet50' 11 | 12 | # setting for data augmentation 13 | self.use_horizontal_flips = False 14 | self.use_vertical_flips = False 15 | self.rot_90 = False 16 | 17 | # anchor box scales 18 | self.anchor_box_scales = [128, 256, 512] 19 | 20 | # anchor box ratios 21 | self.anchor_box_ratios = [[1, 1], [1./math.sqrt(2), 2./math.sqrt(2)], [2./math.sqrt(2), 1./math.sqrt(2)]] 22 | 23 | # size to resize the smallest side of the image 24 | self.im_size = 600 25 | 26 | # image channel-wise mean to subtract 27 | self.img_channel_mean = [103.939, 116.779, 123.68] 28 | self.img_scaling_factor = 1.0 29 | 30 | # number of ROIs at once 31 | self.num_rois = 4 32 | 33 | # stride at the RPN (this depends on the network configuration) 34 | self.rpn_stride = 16 35 | 36 | self.balanced_classes = False 37 | 38 | # scaling the stdev 39 | self.std_scaling = 4.0 40 | self.classifier_regr_std = [8.0, 8.0, 4.0, 4.0] 41 | 42 | # overlaps for RPN 43 | self.rpn_min_overlap = 0.3 44 | self.rpn_max_overlap = 0.7 45 | 46 | # overlaps for classifier ROIs 47 | self.classifier_min_overlap = 0.1 48 | self.classifier_max_overlap = 0.5 49 | 50 | # placeholder for the class mapping, automatically generated by the parser 51 | self.class_mapping = None 52 | 53 | #location of pretrained weights for the base network 54 | # weight files can be found at: 55 | # https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5 56 | # https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5 57 | 58 | self.model_path = 'model_frcnn.vgg.hdf5' 59 | -------------------------------------------------------------------------------- /keras_frcnn/data_augment.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import copy 4 | 5 | 6 | def augment(img_data, config, augment=True): 7 | assert 'filepath' in img_data 8 | assert 'bboxes' in img_data 9 | assert 'width' in img_data 10 | assert 'height' in img_data 11 | 12 | img_data_aug = copy.deepcopy(img_data) 13 | 14 | img = cv2.imread(img_data_aug['filepath']) 15 | 16 | if augment: 17 | rows, cols = img.shape[:2] 18 | 19 | if config.use_horizontal_flips and np.random.randint(0, 2) == 0: 20 | img = cv2.flip(img, 1) 21 | for bbox in img_data_aug['bboxes']: 22 | x1 = bbox['x1'] 23 | x2 = bbox['x2'] 24 | bbox['x2'] = cols - x1 25 | bbox['x1'] = cols - x2 26 | 27 | if config.use_vertical_flips and np.random.randint(0, 2) == 0: 28 | img = cv2.flip(img, 0) 29 | for bbox in img_data_aug['bboxes']: 30 | y1 = bbox['y1'] 31 | y2 = bbox['y2'] 32 | bbox['y2'] = rows - y1 33 | bbox['y1'] = rows - y2 34 | 35 | if config.rot_90: 36 | angle = np.random.choice([0,90,180,270],1)[0] 37 | if angle == 270: 38 | img = np.transpose(img, (1,0,2)) 39 | img = cv2.flip(img, 0) 40 | elif angle == 180: 41 | img = cv2.flip(img, -1) 42 | elif angle == 90: 43 | img = np.transpose(img, (1,0,2)) 44 | img = cv2.flip(img, 1) 45 | elif angle == 0: 46 | pass 47 | 48 | for bbox in img_data_aug['bboxes']: 49 | x1 = bbox['x1'] 50 | x2 = bbox['x2'] 51 | y1 = bbox['y1'] 52 | y2 = bbox['y2'] 53 | if angle == 270: 54 | bbox['x1'] = y1 55 | bbox['x2'] = y2 56 | bbox['y1'] = cols - x2 57 | bbox['y2'] = cols - x1 58 | elif angle == 180: 59 | bbox['x2'] = cols - x1 60 | bbox['x1'] = cols - x2 61 | bbox['y2'] = rows - y1 62 | bbox['y1'] = rows - y2 63 | elif angle == 90: 64 | bbox['x1'] = rows - y2 65 | bbox['x2'] = rows - y1 66 | bbox['y1'] = x1 67 | bbox['y2'] = x2 68 | elif angle == 0: 69 | pass 70 | 71 | img_data_aug['width'] = img.shape[1] 72 | img_data_aug['height'] = img.shape[0] 73 | return img_data_aug, img 74 | -------------------------------------------------------------------------------- /keras_frcnn/data_generators.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import cv2 4 | import random 5 | import copy 6 | from . import data_augment 7 | import threading 8 | import itertools 9 | 10 | 11 | def union(au, bu, area_intersection): 12 | area_a = (au[2] - au[0]) * (au[3] - au[1]) 13 | area_b = (bu[2] - bu[0]) * (bu[3] - bu[1]) 14 | area_union = area_a + area_b - area_intersection 15 | return area_union 16 | 17 | 18 | def intersection(ai, bi): 19 | x = max(ai[0], bi[0]) 20 | y = max(ai[1], bi[1]) 21 | w = min(ai[2], bi[2]) - x 22 | h = min(ai[3], bi[3]) - y 23 | if w < 0 or h < 0: 24 | return 0 25 | return w*h 26 | 27 | 28 | def iou(a, b): 29 | # a and b should be (x1,y1,x2,y2) 30 | 31 | if a[0] >= a[2] or a[1] >= a[3] or b[0] >= b[2] or b[1] >= b[3]: 32 | return 0.0 33 | 34 | area_i = intersection(a, b) 35 | area_u = union(a, b, area_i) 36 | 37 | return float(area_i) / float(area_u + 1e-6) 38 | 39 | 40 | def get_new_img_size(width, height, img_min_side=600): 41 | if width <= height: 42 | f = float(img_min_side) / width 43 | resized_height = int(f * height) 44 | resized_width = img_min_side 45 | else: 46 | f = float(img_min_side) / height 47 | resized_width = int(f * width) 48 | resized_height = img_min_side 49 | 50 | return resized_width, resized_height 51 | 52 | 53 | class SampleSelector: 54 | def __init__(self, class_count): 55 | # ignore classes that have zero samples 56 | self.classes = [b for b in class_count.keys() if class_count[b] > 0] 57 | self.class_cycle = itertools.cycle(self.classes) 58 | self.curr_class = next(self.class_cycle) 59 | 60 | def skip_sample_for_balanced_class(self, img_data): 61 | 62 | class_in_img = False 63 | 64 | for bbox in img_data['bboxes']: 65 | 66 | cls_name = bbox['class'] 67 | 68 | if cls_name == self.curr_class: 69 | class_in_img = True 70 | self.curr_class = next(self.class_cycle) 71 | break 72 | 73 | if class_in_img: 74 | return False 75 | else: 76 | return True 77 | 78 | 79 | def calc_rpn(C, img_data, width, height, resized_width, resized_height, img_length_calc_function): 80 | 81 | downscale = float(C.rpn_stride) 82 | anchor_sizes = C.anchor_box_scales 83 | anchor_ratios = C.anchor_box_ratios 84 | num_anchors = len(anchor_sizes) * len(anchor_ratios) 85 | 86 | # calculate the output map size based on the network architecture 87 | 88 | (output_width, output_height) = img_length_calc_function(resized_width, resized_height) 89 | 90 | n_anchratios = len(anchor_ratios) 91 | 92 | # initialise empty output objectives 93 | y_rpn_overlap = np.zeros((output_height, output_width, num_anchors)) 94 | y_is_box_valid = np.zeros((output_height, output_width, num_anchors)) 95 | y_rpn_regr = np.zeros((output_height, output_width, num_anchors * 4)) 96 | 97 | num_bboxes = len(img_data['bboxes']) 98 | 99 | num_anchors_for_bbox = np.zeros(num_bboxes).astype(int) 100 | best_anchor_for_bbox = -1*np.ones((num_bboxes, 4)).astype(int) 101 | best_iou_for_bbox = np.zeros(num_bboxes).astype(np.float32) 102 | best_x_for_bbox = np.zeros((num_bboxes, 4)).astype(int) 103 | best_dx_for_bbox = np.zeros((num_bboxes, 4)).astype(np.float32) 104 | 105 | # get the GT box coordinates, and resize to account for image resizing 106 | gta = np.zeros((num_bboxes, 4)) 107 | for bbox_num, bbox in enumerate(img_data['bboxes']): 108 | # get the GT box coordinates, and resize to account for image resizing 109 | gta[bbox_num, 0] = bbox['x1'] * (resized_width / float(width)) 110 | gta[bbox_num, 1] = bbox['x2'] * (resized_width / float(width)) 111 | gta[bbox_num, 2] = bbox['y1'] * (resized_height / float(height)) 112 | gta[bbox_num, 3] = bbox['y2'] * (resized_height / float(height)) 113 | 114 | # rpn ground truth 115 | 116 | for anchor_size_idx in range(len(anchor_sizes)): 117 | for anchor_ratio_idx in range(n_anchratios): 118 | anchor_x = anchor_sizes[anchor_size_idx] * anchor_ratios[anchor_ratio_idx][0] 119 | anchor_y = anchor_sizes[anchor_size_idx] * anchor_ratios[anchor_ratio_idx][1] 120 | 121 | for ix in range(output_width): 122 | # x-coordinates of the current anchor box 123 | x1_anc = downscale * (ix + 0.5) - anchor_x / 2 124 | x2_anc = downscale * (ix + 0.5) + anchor_x / 2 125 | 126 | # ignore boxes that go across image boundaries 127 | if x1_anc < 0 or x2_anc > resized_width: 128 | continue 129 | 130 | for jy in range(output_height): 131 | 132 | # y-coordinates of the current anchor box 133 | y1_anc = downscale * (jy + 0.5) - anchor_y / 2 134 | y2_anc = downscale * (jy + 0.5) + anchor_y / 2 135 | 136 | # ignore boxes that go across image boundaries 137 | if y1_anc < 0 or y2_anc > resized_height: 138 | continue 139 | 140 | # bbox_type indicates whether an anchor should be a target 141 | bbox_type = 'neg' 142 | 143 | # this is the best IOU for the (x,y) coord and the current anchor 144 | # note that this is different from the best IOU for a GT bbox 145 | best_iou_for_loc = 0.0 146 | 147 | for bbox_num in range(num_bboxes): 148 | 149 | # get IOU of the current GT box and the current anchor box 150 | curr_iou = iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1_anc, y1_anc, x2_anc, y2_anc]) 151 | # calculate the regression targets if they will be needed 152 | if curr_iou > best_iou_for_bbox[bbox_num] or curr_iou > C.rpn_max_overlap: 153 | cx = (gta[bbox_num, 0] + gta[bbox_num, 1]) / 2.0 154 | cy = (gta[bbox_num, 2] + gta[bbox_num, 3]) / 2.0 155 | cxa = (x1_anc + x2_anc)/2.0 156 | cya = (y1_anc + y2_anc)/2.0 157 | 158 | tx = (cx - cxa) / (x2_anc - x1_anc) 159 | ty = (cy - cya) / (y2_anc - y1_anc) 160 | tw = np.log((gta[bbox_num, 1] - gta[bbox_num, 0]) / (x2_anc - x1_anc)) 161 | th = np.log((gta[bbox_num, 3] - gta[bbox_num, 2]) / (y2_anc - y1_anc)) 162 | 163 | if img_data['bboxes'][bbox_num]['class'] != 'bg': 164 | 165 | # all GT boxes should be mapped to an anchor box, so we keep track of which anchor box was best 166 | if curr_iou > best_iou_for_bbox[bbox_num]: 167 | best_anchor_for_bbox[bbox_num] = [jy, ix, anchor_ratio_idx, anchor_size_idx] 168 | best_iou_for_bbox[bbox_num] = curr_iou 169 | best_x_for_bbox[bbox_num,:] = [x1_anc, x2_anc, y1_anc, y2_anc] 170 | best_dx_for_bbox[bbox_num,:] = [tx, ty, tw, th] 171 | 172 | # we set the anchor to positive if the IOU is >0.7 (it does not matter if there was another better box, it just indicates overlap) 173 | if curr_iou > C.rpn_max_overlap: 174 | bbox_type = 'pos' 175 | num_anchors_for_bbox[bbox_num] += 1 176 | # we update the regression layer target if this IOU is the best for the current (x,y) and anchor position 177 | if curr_iou > best_iou_for_loc: 178 | best_iou_for_loc = curr_iou 179 | best_regr = (tx, ty, tw, th) 180 | 181 | # if the IOU is >0.3 and <0.7, it is ambiguous and no included in the objective 182 | if C.rpn_min_overlap < curr_iou < C.rpn_max_overlap: 183 | # gray zone between neg and pos 184 | if bbox_type != 'pos': 185 | bbox_type = 'neutral' 186 | 187 | # turn on or off outputs depending on IOUs 188 | if bbox_type == 'neg': 189 | y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1 190 | y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0 191 | elif bbox_type == 'neutral': 192 | y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0 193 | y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 0 194 | elif bbox_type == 'pos': 195 | y_is_box_valid[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1 196 | y_rpn_overlap[jy, ix, anchor_ratio_idx + n_anchratios * anchor_size_idx] = 1 197 | start = 4 * (anchor_ratio_idx + n_anchratios * anchor_size_idx) 198 | y_rpn_regr[jy, ix, start:start+4] = best_regr 199 | 200 | # we ensure that every bbox has at least one positive RPN region 201 | 202 | for idx in range(num_anchors_for_bbox.shape[0]): 203 | if num_anchors_for_bbox[idx] == 0: 204 | # no box with an IOU greater than zero ... 205 | if best_anchor_for_bbox[idx, 0] == -1: 206 | continue 207 | y_is_box_valid[ 208 | best_anchor_for_bbox[idx,0], best_anchor_for_bbox[idx,1], best_anchor_for_bbox[idx,2] + n_anchratios * 209 | best_anchor_for_bbox[idx,3]] = 1 210 | y_rpn_overlap[ 211 | best_anchor_for_bbox[idx,0], best_anchor_for_bbox[idx,1], best_anchor_for_bbox[idx,2] + n_anchratios * 212 | best_anchor_for_bbox[idx,3]] = 1 213 | start = 4 * (best_anchor_for_bbox[idx,2] + n_anchratios * best_anchor_for_bbox[idx,3]) 214 | y_rpn_regr[ 215 | best_anchor_for_bbox[idx,0], best_anchor_for_bbox[idx,1], start:start+4] = best_dx_for_bbox[idx, :] 216 | 217 | y_rpn_overlap = np.transpose(y_rpn_overlap, (2, 0, 1)) 218 | y_rpn_overlap = np.expand_dims(y_rpn_overlap, axis=0) 219 | 220 | y_is_box_valid = np.transpose(y_is_box_valid, (2, 0, 1)) 221 | y_is_box_valid = np.expand_dims(y_is_box_valid, axis=0) 222 | 223 | y_rpn_regr = np.transpose(y_rpn_regr, (2, 0, 1)) 224 | y_rpn_regr = np.expand_dims(y_rpn_regr, axis=0) 225 | 226 | pos_locs = np.where(np.logical_and(y_rpn_overlap[0, :, :, :] == 1, y_is_box_valid[0, :, :, :] == 1)) 227 | neg_locs = np.where(np.logical_and(y_rpn_overlap[0, :, :, :] == 0, y_is_box_valid[0, :, :, :] == 1)) 228 | 229 | num_pos = len(pos_locs[0]) 230 | 231 | # one issue is that the RPN has many more negative than positive regions, so we turn off some of the negative 232 | # regions. We also limit it to 256 regions. 233 | num_regions = 256 234 | 235 | if len(pos_locs[0]) > num_regions/2: 236 | val_locs = random.sample(range(len(pos_locs[0])), len(pos_locs[0]) - num_regions/2) 237 | y_is_box_valid[0, pos_locs[0][val_locs], pos_locs[1][val_locs], pos_locs[2][val_locs]] = 0 238 | num_pos = num_regions/2 239 | 240 | if len(neg_locs[0]) + num_pos > num_regions: 241 | val_locs = random.sample(range(len(neg_locs[0])), len(neg_locs[0]) - num_pos) 242 | y_is_box_valid[0, neg_locs[0][val_locs], neg_locs[1][val_locs], neg_locs[2][val_locs]] = 0 243 | 244 | y_rpn_cls = np.concatenate([y_is_box_valid, y_rpn_overlap], axis=1) 245 | y_rpn_regr = np.concatenate([np.repeat(y_rpn_overlap, 4, axis=1), y_rpn_regr], axis=1) 246 | 247 | return np.copy(y_rpn_cls), np.copy(y_rpn_regr) 248 | 249 | 250 | class threadsafe_iter: 251 | """Takes an iterator/generator and makes it thread-safe by 252 | serializing call to the `next` method of given iterator/generator. 253 | """ 254 | def __init__(self, it): 255 | self.it = it 256 | self.lock = threading.Lock() 257 | 258 | def __iter__(self): 259 | return self 260 | 261 | def next(self): 262 | with self.lock: 263 | return next(self.it) 264 | 265 | 266 | def threadsafe_generator(f): 267 | """A decorator that takes a generator function and makes it thread-safe. 268 | """ 269 | def g(*a, **kw): 270 | return threadsafe_iter(f(*a, **kw)) 271 | return g 272 | 273 | def get_anchor_gt(all_img_data, class_count, C, img_length_calc_function, backend, mode='train'): 274 | 275 | # The following line is not useful with Python 3.5, it is kept for the legacy 276 | # all_img_data = sorted(all_img_data) 277 | 278 | sample_selector = SampleSelector(class_count) 279 | 280 | while True: 281 | if mode == 'train': 282 | np.random.shuffle(all_img_data) 283 | 284 | for img_data in all_img_data: 285 | try: 286 | 287 | if C.balanced_classes and sample_selector.skip_sample_for_balanced_class(img_data): 288 | continue 289 | 290 | # read in image, and optionally add augmentation 291 | 292 | if mode == 'train': 293 | img_data_aug, x_img = data_augment.augment(img_data, C, augment=True) 294 | else: 295 | img_data_aug, x_img = data_augment.augment(img_data, C, augment=False) 296 | 297 | (width, height) = (img_data_aug['width'], img_data_aug['height']) 298 | (rows, cols, _) = x_img.shape 299 | 300 | assert cols == width 301 | assert rows == height 302 | 303 | # get image dimensions for resizing 304 | (resized_width, resized_height) = get_new_img_size(width, height, C.im_size) 305 | 306 | # resize the image so that smalles side is length = 600px 307 | x_img = cv2.resize(x_img, (resized_width, resized_height), interpolation=cv2.INTER_CUBIC) 308 | 309 | try: 310 | y_rpn_cls, y_rpn_regr = calc_rpn(C, img_data_aug, width, height, resized_width, resized_height, img_length_calc_function) 311 | except: 312 | continue 313 | 314 | # Zero-center by mean pixel, and preprocess image 315 | 316 | x_img = x_img[:,:, (2, 1, 0)] # BGR -> RGB 317 | x_img = x_img.astype(np.float32) 318 | x_img[:, :, 0] -= C.img_channel_mean[0] 319 | x_img[:, :, 1] -= C.img_channel_mean[1] 320 | x_img[:, :, 2] -= C.img_channel_mean[2] 321 | x_img /= C.img_scaling_factor 322 | 323 | x_img = np.transpose(x_img, (2, 0, 1)) 324 | x_img = np.expand_dims(x_img, axis=0) 325 | 326 | y_rpn_regr[:, y_rpn_regr.shape[1]//2:, :, :] *= C.std_scaling 327 | 328 | if backend == 'tf': 329 | x_img = np.transpose(x_img, (0, 2, 3, 1)) 330 | y_rpn_cls = np.transpose(y_rpn_cls, (0, 2, 3, 1)) 331 | y_rpn_regr = np.transpose(y_rpn_regr, (0, 2, 3, 1)) 332 | 333 | yield np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)], img_data_aug 334 | 335 | except Exception as e: 336 | print(e) 337 | continue 338 | -------------------------------------------------------------------------------- /keras_frcnn/losses.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from keras.objectives import categorical_crossentropy 3 | 4 | if K.image_dim_ordering() == 'tf': 5 | import tensorflow as tf 6 | 7 | lambda_rpn_regr = 1.0 8 | lambda_rpn_class = 1.0 9 | 10 | lambda_cls_regr = 1.0 11 | lambda_cls_class = 1.0 12 | 13 | epsilon = 1e-4 14 | 15 | 16 | def rpn_loss_regr(num_anchors): 17 | def rpn_loss_regr_fixed_num(y_true, y_pred): 18 | if K.image_dim_ordering() == 'th': 19 | x = y_true[:, 4 * num_anchors:, :, :] - y_pred 20 | x_abs = K.abs(x) 21 | x_bool = K.less_equal(x_abs, 1.0) 22 | return lambda_rpn_regr * K.sum( 23 | y_true[:, :4 * num_anchors, :, :] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :4 * num_anchors, :, :]) 24 | else: 25 | x = y_true[:, :, :, 4 * num_anchors:] - y_pred 26 | x_abs = K.abs(x) 27 | x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32) 28 | 29 | return lambda_rpn_regr * K.sum( 30 | y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors]) 31 | 32 | return rpn_loss_regr_fixed_num 33 | 34 | 35 | def rpn_loss_cls(num_anchors): 36 | def rpn_loss_cls_fixed_num(y_true, y_pred): 37 | if K.image_dim_ordering() == 'tf': 38 | return lambda_rpn_class * K.sum(y_true[:, :, :, :num_anchors] * K.binary_crossentropy(y_pred[:, :, :, :], y_true[:, :, :, num_anchors:])) / K.sum(epsilon + y_true[:, :, :, :num_anchors]) 39 | else: 40 | return lambda_rpn_class * K.sum(y_true[:, :num_anchors, :, :] * K.binary_crossentropy(y_pred[:, :, :, :], y_true[:, num_anchors:, :, :])) / K.sum(epsilon + y_true[:, :num_anchors, :, :]) 41 | 42 | return rpn_loss_cls_fixed_num 43 | 44 | 45 | def class_loss_regr(num_classes): 46 | def class_loss_regr_fixed_num(y_true, y_pred): 47 | x = y_true[:, :, 4*num_classes:] - y_pred 48 | x_abs = K.abs(x) 49 | x_bool = K.cast(K.less_equal(x_abs, 1.0), 'float32') 50 | return lambda_cls_regr * K.sum(y_true[:, :, :4*num_classes] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :4*num_classes]) 51 | return class_loss_regr_fixed_num 52 | 53 | 54 | def class_loss_cls(y_true, y_pred): 55 | return lambda_cls_class * K.mean(categorical_crossentropy(y_true[0, :, :], y_pred[0, :, :])) 56 | -------------------------------------------------------------------------------- /keras_frcnn/pascal_voc_parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import xml.etree.ElementTree as ET 4 | import numpy as np 5 | def get_data(input_path): 6 | all_imgs = [] 7 | 8 | classes_count = {} 9 | 10 | class_mapping = {} 11 | 12 | visualise = False 13 | 14 | data_paths = [os.path.join(input_path,s) for s in ['VOC2007']] 15 | 16 | 17 | print('Parsing annotation files') 18 | 19 | for data_path in data_paths: 20 | 21 | annot_path = os.path.join(data_path, 'Annotations') 22 | imgs_path = os.path.join(data_path, 'JPEGImages') 23 | imgsets_path_trainval = os.path.join(data_path, 'ImageSets','Main','trainval.txt') 24 | imgsets_path_test = os.path.join(data_path, 'ImageSets','Main','test.txt') 25 | 26 | trainval_files = [] 27 | test_files = [] 28 | try: 29 | with open(imgsets_path_trainval) as f: 30 | for line in f: 31 | trainval_files.append(line.strip() + '.jpg') 32 | except Exception as e: 33 | print(e) 34 | 35 | try: 36 | with open(imgsets_path_test) as f: 37 | for line in f: 38 | test_files.append(line.strip() + '.jpg') 39 | except Exception as e: 40 | if data_path[-7:] == 'VOC2012': 41 | # this is expected, most pascal voc distibutions dont have the test.txt file 42 | pass 43 | else: 44 | print(e) 45 | 46 | annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)] 47 | idx = 0 48 | for annot in annots: 49 | try: 50 | idx += 1 51 | 52 | et = ET.parse(annot) 53 | element = et.getroot() 54 | 55 | element_objs = element.findall('object') 56 | element_filename = element.find('filename').text 57 | element_width = int(element.find('size').find('width').text) 58 | element_height = int(element.find('size').find('height').text) 59 | 60 | if len(element_objs) > 0: 61 | annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width, 62 | 'height': element_height, 'bboxes': []} 63 | 64 | if element_filename in trainval_files: 65 | annotation_data['imageset'] = 'trainval' 66 | elif element_filename in test_files: 67 | annotation_data['imageset'] = 'test' 68 | else: 69 | annotation_data['imageset'] = 'trainval' 70 | 71 | for element_obj in element_objs: 72 | class_name = element_obj.find('name').text 73 | if class_name not in classes_count: 74 | classes_count[class_name] = 1 75 | else: 76 | classes_count[class_name] += 1 77 | 78 | if class_name not in class_mapping: 79 | class_mapping[class_name] = len(class_mapping) 80 | 81 | obj_bbox = element_obj.find('bndbox') 82 | x1 = int(round(float(obj_bbox.find('xmin').text))) 83 | y1 = int(round(float(obj_bbox.find('ymin').text))) 84 | x2 = int(round(float(obj_bbox.find('xmax').text))) 85 | y2 = int(round(float(obj_bbox.find('ymax').text))) 86 | difficulty = int(element_obj.find('difficult').text) == 1 87 | annotation_data['bboxes'].append( 88 | {'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty}) 89 | all_imgs.append(annotation_data) 90 | 91 | if visualise: 92 | img = cv2.imread(annotation_data['filepath']) 93 | for bbox in annotation_data['bboxes']: 94 | cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox[ 95 | 'x2'], bbox['y2']), (0, 0, 255)) 96 | cv2.imshow('img', img) 97 | cv2.waitKey(0) 98 | 99 | except Exception as e: 100 | print(e) 101 | continue 102 | return all_imgs, classes_count, class_mapping 103 | -------------------------------------------------------------------------------- /keras_frcnn/resnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | '''ResNet50 model for Keras. 3 | # Reference: 4 | - [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) 5 | Adapted from code contributed by BigMoyan. 6 | ''' 7 | 8 | from __future__ import print_function 9 | from __future__ import absolute_import 10 | 11 | from keras.layers import Input, Add, Dense, Activation, Flatten, Convolution2D, MaxPooling2D, ZeroPadding2D, \ 12 | AveragePooling2D, TimeDistributed 13 | 14 | from keras import backend as K 15 | 16 | from keras_frcnn.RoiPoolingConv import RoiPoolingConv 17 | from keras_frcnn.FixedBatchNormalization import FixedBatchNormalization 18 | 19 | def get_weight_path(): 20 | if K.image_dim_ordering() == 'th': 21 | return 'resnet50_weights_th_dim_ordering_th_kernels_notop.h5' 22 | else: 23 | return 'resnet50_weights_tf_dim_ordering_tf_kernels.h5' 24 | 25 | def get_img_output_length(width, height): 26 | def get_output_length(input_length): 27 | # zero_pad 28 | input_length += 6 29 | # apply 4 strided convolutions 30 | filter_sizes = [7, 3, 1, 1] 31 | stride = 2 32 | for filter_size in filter_sizes: 33 | input_length = (input_length - filter_size + stride) // stride 34 | return input_length 35 | 36 | return get_output_length(width), get_output_length(height) 37 | 38 | def identity_block(input_tensor, kernel_size, filters, stage, block, trainable=True): 39 | 40 | nb_filter1, nb_filter2, nb_filter3 = filters 41 | 42 | if K.image_dim_ordering() == 'tf': 43 | bn_axis = 3 44 | else: 45 | bn_axis = 1 46 | 47 | conv_name_base = 'res' + str(stage) + block + '_branch' 48 | bn_name_base = 'bn' + str(stage) + block + '_branch' 49 | 50 | x = Convolution2D(nb_filter1, (1, 1), name=conv_name_base + '2a', trainable=trainable)(input_tensor) 51 | x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) 52 | x = Activation('relu')(x) 53 | 54 | x = Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', trainable=trainable)(x) 55 | x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) 56 | x = Activation('relu')(x) 57 | 58 | x = Convolution2D(nb_filter3, (1, 1), name=conv_name_base + '2c', trainable=trainable)(x) 59 | x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) 60 | 61 | x = Add()([x, input_tensor]) 62 | x = Activation('relu')(x) 63 | return x 64 | 65 | 66 | def identity_block_td(input_tensor, kernel_size, filters, stage, block, trainable=True): 67 | 68 | # identity block time distributed 69 | 70 | nb_filter1, nb_filter2, nb_filter3 = filters 71 | if K.image_dim_ordering() == 'tf': 72 | bn_axis = 3 73 | else: 74 | bn_axis = 1 75 | 76 | conv_name_base = 'res' + str(stage) + block + '_branch' 77 | bn_name_base = 'bn' + str(stage) + block + '_branch' 78 | 79 | x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2a')(input_tensor) 80 | x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x) 81 | x = Activation('relu')(x) 82 | 83 | x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), trainable=trainable, kernel_initializer='normal',padding='same'), name=conv_name_base + '2b')(x) 84 | x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x) 85 | x = Activation('relu')(x) 86 | 87 | x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2c')(x) 88 | x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x) 89 | 90 | x = Add()([x, input_tensor]) 91 | x = Activation('relu')(x) 92 | 93 | return x 94 | 95 | def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), trainable=True): 96 | 97 | nb_filter1, nb_filter2, nb_filter3 = filters 98 | if K.image_dim_ordering() == 'tf': 99 | bn_axis = 3 100 | else: 101 | bn_axis = 1 102 | 103 | conv_name_base = 'res' + str(stage) + block + '_branch' 104 | bn_name_base = 'bn' + str(stage) + block + '_branch' 105 | 106 | x = Convolution2D(nb_filter1, (1, 1), strides=strides, name=conv_name_base + '2a', trainable=trainable)(input_tensor) 107 | x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) 108 | x = Activation('relu')(x) 109 | 110 | x = Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', trainable=trainable)(x) 111 | x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) 112 | x = Activation('relu')(x) 113 | 114 | x = Convolution2D(nb_filter3, (1, 1), name=conv_name_base + '2c', trainable=trainable)(x) 115 | x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) 116 | 117 | shortcut = Convolution2D(nb_filter3, (1, 1), strides=strides, name=conv_name_base + '1', trainable=trainable)(input_tensor) 118 | shortcut = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) 119 | 120 | x = Add()([x, shortcut]) 121 | x = Activation('relu')(x) 122 | return x 123 | 124 | 125 | def conv_block_td(input_tensor, kernel_size, filters, stage, block, input_shape, strides=(2, 2), trainable=True): 126 | 127 | # conv block time distributed 128 | 129 | nb_filter1, nb_filter2, nb_filter3 = filters 130 | if K.image_dim_ordering() == 'tf': 131 | bn_axis = 3 132 | else: 133 | bn_axis = 1 134 | 135 | conv_name_base = 'res' + str(stage) + block + '_branch' 136 | bn_name_base = 'bn' + str(stage) + block + '_branch' 137 | 138 | x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), input_shape=input_shape, name=conv_name_base + '2a')(input_tensor) 139 | x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x) 140 | x = Activation('relu')(x) 141 | 142 | x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2b')(x) 143 | x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x) 144 | x = Activation('relu')(x) 145 | 146 | x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), kernel_initializer='normal'), name=conv_name_base + '2c', trainable=trainable)(x) 147 | x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x) 148 | 149 | shortcut = TimeDistributed(Convolution2D(nb_filter3, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '1')(input_tensor) 150 | shortcut = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '1')(shortcut) 151 | 152 | x = Add()([x, shortcut]) 153 | x = Activation('relu')(x) 154 | return x 155 | 156 | def nn_base(input_tensor=None, trainable=False): 157 | 158 | # Determine proper input shape 159 | if K.image_dim_ordering() == 'th': 160 | input_shape = (3, None, None) 161 | else: 162 | input_shape = (None, None, 3) 163 | 164 | if input_tensor is None: 165 | img_input = Input(shape=input_shape) 166 | else: 167 | if not K.is_keras_tensor(input_tensor): 168 | img_input = Input(tensor=input_tensor, shape=input_shape) 169 | else: 170 | img_input = input_tensor 171 | 172 | if K.image_dim_ordering() == 'tf': 173 | bn_axis = 3 174 | else: 175 | bn_axis = 1 176 | 177 | x = ZeroPadding2D((3, 3))(img_input) 178 | 179 | x = Convolution2D(64, (7, 7), strides=(2, 2), name='conv1', trainable = trainable)(x) 180 | x = FixedBatchNormalization(axis=bn_axis, name='bn_conv1')(x) 181 | x = Activation('relu')(x) 182 | x = MaxPooling2D((3, 3), strides=(2, 2))(x) 183 | 184 | x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), trainable = trainable) 185 | x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', trainable = trainable) 186 | x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', trainable = trainable) 187 | 188 | x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', trainable = trainable) 189 | x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', trainable = trainable) 190 | x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', trainable = trainable) 191 | x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', trainable = trainable) 192 | 193 | x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', trainable = trainable) 194 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', trainable = trainable) 195 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', trainable = trainable) 196 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', trainable = trainable) 197 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', trainable = trainable) 198 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', trainable = trainable) 199 | 200 | return x 201 | 202 | 203 | def classifier_layers(x, input_shape, trainable=False): 204 | 205 | # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround 206 | # (hence a smaller stride in the region that follows the ROI pool) 207 | if K.backend() == 'tensorflow': 208 | x = conv_block_td(x, 3, [512, 512, 2048], stage=5, block='a', input_shape=input_shape, strides=(2, 2), trainable=trainable) 209 | elif K.backend() == 'theano': 210 | x = conv_block_td(x, 3, [512, 512, 2048], stage=5, block='a', input_shape=input_shape, strides=(1, 1), trainable=trainable) 211 | 212 | x = identity_block_td(x, 3, [512, 512, 2048], stage=5, block='b', trainable=trainable) 213 | x = identity_block_td(x, 3, [512, 512, 2048], stage=5, block='c', trainable=trainable) 214 | x = TimeDistributed(AveragePooling2D((7, 7)), name='avg_pool')(x) 215 | 216 | return x 217 | 218 | 219 | def rpn(base_layers,num_anchors): 220 | 221 | x = Convolution2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1')(base_layers) 222 | 223 | x_class = Convolution2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x) 224 | x_regr = Convolution2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x) 225 | 226 | return [x_class, x_regr, base_layers] 227 | 228 | def classifier(base_layers, input_rois, num_rois, nb_classes = 21, trainable=False): 229 | 230 | # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround 231 | 232 | if K.backend() == 'tensorflow': 233 | pooling_regions = 14 234 | input_shape = (num_rois,14,14,1024) 235 | elif K.backend() == 'theano': 236 | pooling_regions = 7 237 | input_shape = (num_rois,1024,7,7) 238 | 239 | out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois]) 240 | out = classifier_layers(out_roi_pool, input_shape=input_shape, trainable=True) 241 | 242 | out = TimeDistributed(Flatten())(out) 243 | 244 | out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) 245 | # note: no regression target for bg class 246 | out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) 247 | return [out_class, out_regr] 248 | 249 | -------------------------------------------------------------------------------- /keras_frcnn/roi_helpers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pdb 3 | import math 4 | from . import data_generators 5 | import copy 6 | 7 | 8 | def calc_iou(R, img_data, C, class_mapping): 9 | 10 | bboxes = img_data['bboxes'] 11 | (width, height) = (img_data['width'], img_data['height']) 12 | # get image dimensions for resizing 13 | (resized_width, resized_height) = data_generators.get_new_img_size(width, height, C.im_size) 14 | 15 | gta = np.zeros((len(bboxes), 4)) 16 | 17 | for bbox_num, bbox in enumerate(bboxes): 18 | # get the GT box coordinates, and resize to account for image resizing 19 | gta[bbox_num, 0] = int(round(bbox['x1'] * (resized_width / float(width))/C.rpn_stride)) 20 | gta[bbox_num, 1] = int(round(bbox['x2'] * (resized_width / float(width))/C.rpn_stride)) 21 | gta[bbox_num, 2] = int(round(bbox['y1'] * (resized_height / float(height))/C.rpn_stride)) 22 | gta[bbox_num, 3] = int(round(bbox['y2'] * (resized_height / float(height))/C.rpn_stride)) 23 | 24 | x_roi = [] 25 | y_class_num = [] 26 | y_class_regr_coords = [] 27 | y_class_regr_label = [] 28 | IoUs = [] # for debugging only 29 | 30 | for ix in range(R.shape[0]): 31 | (x1, y1, x2, y2) = R[ix, :] 32 | x1 = int(round(x1)) 33 | y1 = int(round(y1)) 34 | x2 = int(round(x2)) 35 | y2 = int(round(y2)) 36 | 37 | best_iou = 0.0 38 | best_bbox = -1 39 | for bbox_num in range(len(bboxes)): 40 | curr_iou = data_generators.iou([gta[bbox_num, 0], gta[bbox_num, 2], gta[bbox_num, 1], gta[bbox_num, 3]], [x1, y1, x2, y2]) 41 | if curr_iou > best_iou: 42 | best_iou = curr_iou 43 | best_bbox = bbox_num 44 | 45 | if best_iou < C.classifier_min_overlap: 46 | continue 47 | else: 48 | w = x2 - x1 49 | h = y2 - y1 50 | x_roi.append([x1, y1, w, h]) 51 | IoUs.append(best_iou) 52 | 53 | if C.classifier_min_overlap <= best_iou < C.classifier_max_overlap: 54 | # hard negative example 55 | cls_name = 'bg' 56 | elif C.classifier_max_overlap <= best_iou: 57 | cls_name = bboxes[best_bbox]['class'] 58 | cxg = (gta[best_bbox, 0] + gta[best_bbox, 1]) / 2.0 59 | cyg = (gta[best_bbox, 2] + gta[best_bbox, 3]) / 2.0 60 | 61 | cx = x1 + w / 2.0 62 | cy = y1 + h / 2.0 63 | 64 | tx = (cxg - cx) / float(w) 65 | ty = (cyg - cy) / float(h) 66 | tw = np.log((gta[best_bbox, 1] - gta[best_bbox, 0]) / float(w)) 67 | th = np.log((gta[best_bbox, 3] - gta[best_bbox, 2]) / float(h)) 68 | else: 69 | print('roi = {}'.format(best_iou)) 70 | raise RuntimeError 71 | 72 | class_num = class_mapping[cls_name] 73 | class_label = len(class_mapping) * [0] 74 | class_label[class_num] = 1 75 | y_class_num.append(copy.deepcopy(class_label)) 76 | coords = [0] * 4 * (len(class_mapping) - 1) 77 | labels = [0] * 4 * (len(class_mapping) - 1) 78 | if cls_name != 'bg': 79 | label_pos = 4 * class_num 80 | sx, sy, sw, sh = C.classifier_regr_std 81 | coords[label_pos:4+label_pos] = [sx*tx, sy*ty, sw*tw, sh*th] 82 | labels[label_pos:4+label_pos] = [1, 1, 1, 1] 83 | y_class_regr_coords.append(copy.deepcopy(coords)) 84 | y_class_regr_label.append(copy.deepcopy(labels)) 85 | else: 86 | y_class_regr_coords.append(copy.deepcopy(coords)) 87 | y_class_regr_label.append(copy.deepcopy(labels)) 88 | 89 | if len(x_roi) == 0: 90 | return None, None, None, None 91 | 92 | X = np.array(x_roi) 93 | Y1 = np.array(y_class_num) 94 | Y2 = np.concatenate([np.array(y_class_regr_label),np.array(y_class_regr_coords)],axis=1) 95 | 96 | return np.expand_dims(X, axis=0), np.expand_dims(Y1, axis=0), np.expand_dims(Y2, axis=0), IoUs 97 | 98 | def apply_regr(x, y, w, h, tx, ty, tw, th): 99 | try: 100 | cx = x + w/2. 101 | cy = y + h/2. 102 | cx1 = tx * w + cx 103 | cy1 = ty * h + cy 104 | w1 = math.exp(tw) * w 105 | h1 = math.exp(th) * h 106 | x1 = cx1 - w1/2. 107 | y1 = cy1 - h1/2. 108 | x1 = int(round(x1)) 109 | y1 = int(round(y1)) 110 | w1 = int(round(w1)) 111 | h1 = int(round(h1)) 112 | 113 | return x1, y1, w1, h1 114 | 115 | except ValueError: 116 | return x, y, w, h 117 | except OverflowError: 118 | return x, y, w, h 119 | except Exception as e: 120 | print(e) 121 | return x, y, w, h 122 | 123 | def apply_regr_np(X, T): 124 | try: 125 | x = X[0, :, :] 126 | y = X[1, :, :] 127 | w = X[2, :, :] 128 | h = X[3, :, :] 129 | 130 | tx = T[0, :, :] 131 | ty = T[1, :, :] 132 | tw = T[2, :, :] 133 | th = T[3, :, :] 134 | 135 | cx = x + w/2. 136 | cy = y + h/2. 137 | cx1 = tx * w + cx 138 | cy1 = ty * h + cy 139 | 140 | w1 = np.exp(tw.astype(np.float64)) * w 141 | h1 = np.exp(th.astype(np.float64)) * h 142 | x1 = cx1 - w1/2. 143 | y1 = cy1 - h1/2. 144 | 145 | x1 = np.round(x1) 146 | y1 = np.round(y1) 147 | w1 = np.round(w1) 148 | h1 = np.round(h1) 149 | return np.stack([x1, y1, w1, h1]) 150 | except Exception as e: 151 | print(e) 152 | return X 153 | 154 | def non_max_suppression_fast(boxes, probs, overlap_thresh=0.9, max_boxes=300): 155 | # code used from here: http://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/ 156 | # if there are no boxes, return an empty list 157 | if len(boxes) == 0: 158 | return [] 159 | 160 | # grab the coordinates of the bounding boxes 161 | x1 = boxes[:, 0] 162 | y1 = boxes[:, 1] 163 | x2 = boxes[:, 2] 164 | y2 = boxes[:, 3] 165 | 166 | np.testing.assert_array_less(x1, x2) 167 | np.testing.assert_array_less(y1, y2) 168 | 169 | # if the bounding boxes integers, convert them to floats -- 170 | # this is important since we'll be doing a bunch of divisions 171 | if boxes.dtype.kind == "i": 172 | boxes = boxes.astype("float") 173 | 174 | # initialize the list of picked indexes 175 | pick = [] 176 | 177 | # calculate the areas 178 | area = (x2 - x1) * (y2 - y1) 179 | 180 | # sort the bounding boxes 181 | idxs = np.argsort(probs) 182 | 183 | # keep looping while some indexes still remain in the indexes 184 | # list 185 | while len(idxs) > 0: 186 | # grab the last index in the indexes list and add the 187 | # index value to the list of picked indexes 188 | last = len(idxs) - 1 189 | i = idxs[last] 190 | pick.append(i) 191 | 192 | # find the intersection 193 | 194 | xx1_int = np.maximum(x1[i], x1[idxs[:last]]) 195 | yy1_int = np.maximum(y1[i], y1[idxs[:last]]) 196 | xx2_int = np.minimum(x2[i], x2[idxs[:last]]) 197 | yy2_int = np.minimum(y2[i], y2[idxs[:last]]) 198 | 199 | ww_int = np.maximum(0, xx2_int - xx1_int) 200 | hh_int = np.maximum(0, yy2_int - yy1_int) 201 | 202 | area_int = ww_int * hh_int 203 | 204 | # find the union 205 | area_union = area[i] + area[idxs[:last]] - area_int 206 | 207 | # compute the ratio of overlap 208 | overlap = area_int/(area_union + 1e-6) 209 | 210 | # delete all indexes from the index list that have 211 | idxs = np.delete(idxs, np.concatenate(([last], 212 | np.where(overlap > overlap_thresh)[0]))) 213 | 214 | if len(pick) >= max_boxes: 215 | break 216 | 217 | # return only the bounding boxes that were picked using the integer data type 218 | boxes = boxes[pick].astype("int") 219 | probs = probs[pick] 220 | return boxes, probs 221 | 222 | import time 223 | def rpn_to_roi(rpn_layer, regr_layer, C, dim_ordering, use_regr=True, max_boxes=300,overlap_thresh=0.9): 224 | 225 | regr_layer = regr_layer / C.std_scaling 226 | 227 | anchor_sizes = C.anchor_box_scales 228 | anchor_ratios = C.anchor_box_ratios 229 | 230 | assert rpn_layer.shape[0] == 1 231 | 232 | if dim_ordering == 'th': 233 | (rows,cols) = rpn_layer.shape[2:] 234 | 235 | elif dim_ordering == 'tf': 236 | (rows, cols) = rpn_layer.shape[1:3] 237 | 238 | curr_layer = 0 239 | if dim_ordering == 'tf': 240 | A = np.zeros((4, rpn_layer.shape[1], rpn_layer.shape[2], rpn_layer.shape[3])) 241 | elif dim_ordering == 'th': 242 | A = np.zeros((4, rpn_layer.shape[2], rpn_layer.shape[3], rpn_layer.shape[1])) 243 | 244 | for anchor_size in anchor_sizes: 245 | for anchor_ratio in anchor_ratios: 246 | 247 | anchor_x = (anchor_size * anchor_ratio[0])/C.rpn_stride 248 | anchor_y = (anchor_size * anchor_ratio[1])/C.rpn_stride 249 | if dim_ordering == 'th': 250 | regr = regr_layer[0, 4 * curr_layer:4 * curr_layer + 4, :, :] 251 | else: 252 | regr = regr_layer[0, :, :, 4 * curr_layer:4 * curr_layer + 4] 253 | regr = np.transpose(regr, (2, 0, 1)) 254 | 255 | X, Y = np.meshgrid(np.arange(cols),np. arange(rows)) 256 | 257 | A[0, :, :, curr_layer] = X - anchor_x/2 258 | A[1, :, :, curr_layer] = Y - anchor_y/2 259 | A[2, :, :, curr_layer] = anchor_x 260 | A[3, :, :, curr_layer] = anchor_y 261 | 262 | if use_regr: 263 | A[:, :, :, curr_layer] = apply_regr_np(A[:, :, :, curr_layer], regr) 264 | 265 | A[2, :, :, curr_layer] = np.maximum(1, A[2, :, :, curr_layer]) 266 | A[3, :, :, curr_layer] = np.maximum(1, A[3, :, :, curr_layer]) 267 | A[2, :, :, curr_layer] += A[0, :, :, curr_layer] 268 | A[3, :, :, curr_layer] += A[1, :, :, curr_layer] 269 | 270 | A[0, :, :, curr_layer] = np.maximum(0, A[0, :, :, curr_layer]) 271 | A[1, :, :, curr_layer] = np.maximum(0, A[1, :, :, curr_layer]) 272 | A[2, :, :, curr_layer] = np.minimum(cols-1, A[2, :, :, curr_layer]) 273 | A[3, :, :, curr_layer] = np.minimum(rows-1, A[3, :, :, curr_layer]) 274 | 275 | curr_layer += 1 276 | 277 | all_boxes = np.reshape(A.transpose((0, 3, 1,2)), (4, -1)).transpose((1, 0)) 278 | all_probs = rpn_layer.transpose((0, 3, 1, 2)).reshape((-1)) 279 | 280 | x1 = all_boxes[:, 0] 281 | y1 = all_boxes[:, 1] 282 | x2 = all_boxes[:, 2] 283 | y2 = all_boxes[:, 3] 284 | 285 | idxs = np.where((x1 - x2 >= 0) | (y1 - y2 >= 0)) 286 | 287 | all_boxes = np.delete(all_boxes, idxs, 0) 288 | all_probs = np.delete(all_probs, idxs, 0) 289 | 290 | result = non_max_suppression_fast(all_boxes, all_probs, overlap_thresh=overlap_thresh, max_boxes=max_boxes)[0] 291 | 292 | return result 293 | -------------------------------------------------------------------------------- /keras_frcnn/simple_parser.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | def get_data(input_path): 5 | found_bg = False 6 | all_imgs = {} 7 | 8 | classes_count = {} 9 | 10 | class_mapping = {} 11 | 12 | visualise = True 13 | 14 | with open(input_path,'r') as f: 15 | 16 | print('Parsing annotation files') 17 | 18 | for line in f: 19 | line_split = line.strip().split(',') 20 | (filename,x1,y1,x2,y2,class_name) = line_split 21 | 22 | if class_name not in classes_count: 23 | classes_count[class_name] = 1 24 | else: 25 | classes_count[class_name] += 1 26 | 27 | if class_name not in class_mapping: 28 | if class_name == 'bg' and found_bg == False: 29 | print('Found class name with special name bg. Will be treated as a background region (this is usually for hard negative mining).') 30 | found_bg = True 31 | class_mapping[class_name] = len(class_mapping) 32 | 33 | if filename not in all_imgs: 34 | all_imgs[filename] = {} 35 | 36 | img = cv2.imread(filename) 37 | (rows,cols) = img.shape[:2] 38 | all_imgs[filename]['filepath'] = filename 39 | all_imgs[filename]['width'] = cols 40 | all_imgs[filename]['height'] = rows 41 | all_imgs[filename]['bboxes'] = [] 42 | if np.random.randint(0,6) > 0: 43 | all_imgs[filename]['imageset'] = 'trainval' 44 | else: 45 | all_imgs[filename]['imageset'] = 'test' 46 | 47 | all_imgs[filename]['bboxes'].append({'class': class_name, 'x1': int(x1), 'x2': int(x2), 'y1': int(y1), 'y2': int(y2)}) 48 | 49 | 50 | all_data = [] 51 | for key in all_imgs: 52 | all_data.append(all_imgs[key]) 53 | 54 | # make sure the bg class is last in the list 55 | if found_bg: 56 | if class_mapping['bg'] != len(class_mapping) - 1: 57 | key_to_switch = [key for key in class_mapping.keys() if class_mapping[key] == len(class_mapping)-1][0] 58 | val_to_switch = class_mapping['bg'] 59 | class_mapping['bg'] = len(class_mapping) - 1 60 | class_mapping[key_to_switch] = val_to_switch 61 | 62 | return all_data, classes_count, class_mapping 63 | 64 | 65 | -------------------------------------------------------------------------------- /keras_frcnn/vgg.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """VGG16 model for Keras. 3 | # Reference 4 | - [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) 5 | """ 6 | from __future__ import print_function 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | 10 | import warnings 11 | 12 | from keras.models import Model 13 | from keras.layers import Flatten, Dense, Input, Conv2D, MaxPooling2D, Dropout 14 | from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, TimeDistributed 15 | from keras.engine.topology import get_source_inputs 16 | from keras.utils import layer_utils 17 | from keras.utils.data_utils import get_file 18 | from keras import backend as K 19 | from keras_frcnn.RoiPoolingConv import RoiPoolingConv 20 | 21 | 22 | def get_weight_path(): 23 | if K.image_dim_ordering() == 'th': 24 | print('pretrained weights not available for VGG with theano backend') 25 | return 26 | else: 27 | return 'vgg16_weights_tf_dim_ordering_tf_kernels.h5' 28 | 29 | 30 | def get_img_output_length(width, height): 31 | def get_output_length(input_length): 32 | return input_length//16 33 | 34 | return get_output_length(width), get_output_length(height) 35 | 36 | def nn_base(input_tensor=None, trainable=False): 37 | 38 | 39 | # Determine proper input shape 40 | if K.image_dim_ordering() == 'th': 41 | input_shape = (3, None, None) 42 | else: 43 | input_shape = (None, None, 3) 44 | 45 | if input_tensor is None: 46 | img_input = Input(shape=input_shape) 47 | else: 48 | if not K.is_keras_tensor(input_tensor): 49 | img_input = Input(tensor=input_tensor, shape=input_shape) 50 | else: 51 | img_input = input_tensor 52 | 53 | if K.image_dim_ordering() == 'tf': 54 | bn_axis = 3 55 | else: 56 | bn_axis = 1 57 | 58 | # Block 1 59 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input) 60 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) 61 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) 62 | 63 | # Block 2 64 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) 65 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) 66 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) 67 | 68 | # Block 3 69 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) 70 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) 71 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) 72 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) 73 | 74 | # Block 4 75 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) 76 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) 77 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) 78 | x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) 79 | 80 | # Block 5 81 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) 82 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) 83 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) 84 | # x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) 85 | 86 | return x 87 | 88 | def rpn(base_layers, num_anchors): 89 | 90 | x = Conv2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1')(base_layers) 91 | 92 | x_class = Conv2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x) 93 | x_regr = Conv2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x) 94 | 95 | return [x_class, x_regr, base_layers] 96 | 97 | 98 | def classifier(base_layers, input_rois, num_rois, nb_classes = 21, trainable=False): 99 | 100 | # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround 101 | 102 | if K.backend() == 'tensorflow': 103 | pooling_regions = 7 104 | input_shape = (num_rois,7,7,512) 105 | elif K.backend() == 'theano': 106 | pooling_regions = 7 107 | input_shape = (num_rois,512,7,7) 108 | 109 | out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois]) 110 | 111 | out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool) 112 | out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out) 113 | out = TimeDistributed(Dropout(0.5))(out) 114 | out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out) 115 | out = TimeDistributed(Dropout(0.5))(out) 116 | 117 | out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) 118 | # note: no regression target for bg class 119 | out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) 120 | 121 | return [out_class, out_regr] 122 | 123 | 124 | -------------------------------------------------------------------------------- /measure_map.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | import sys 5 | import pickle 6 | from optparse import OptionParser 7 | import time 8 | from keras_frcnn import config 9 | import keras_frcnn.resnet as nn 10 | from keras import backend as K 11 | from keras.layers import Input 12 | from keras.models import Model 13 | from keras_frcnn import roi_helpers 14 | from keras_frcnn import data_generators 15 | from sklearn.metrics import average_precision_score 16 | 17 | 18 | def get_map(pred, gt, f): 19 | T = {} 20 | P = {} 21 | fx, fy = f 22 | 23 | for bbox in gt: 24 | bbox['bbox_matched'] = False 25 | 26 | pred_probs = np.array([s['prob'] for s in pred]) 27 | box_idx_sorted_by_prob = np.argsort(pred_probs)[::-1] 28 | 29 | for box_idx in box_idx_sorted_by_prob: 30 | pred_box = pred[box_idx] 31 | pred_class = pred_box['class'] 32 | pred_x1 = pred_box['x1'] 33 | pred_x2 = pred_box['x2'] 34 | pred_y1 = pred_box['y1'] 35 | pred_y2 = pred_box['y2'] 36 | pred_prob = pred_box['prob'] 37 | if pred_class not in P: 38 | P[pred_class] = [] 39 | T[pred_class] = [] 40 | P[pred_class].append(pred_prob) 41 | found_match = False 42 | 43 | for gt_box in gt: 44 | gt_class = gt_box['class'] 45 | gt_x1 = gt_box['x1']/fx 46 | gt_x2 = gt_box['x2']/fx 47 | gt_y1 = gt_box['y1']/fy 48 | gt_y2 = gt_box['y2']/fy 49 | gt_seen = gt_box['bbox_matched'] 50 | if gt_class != pred_class: 51 | continue 52 | if gt_seen: 53 | continue 54 | iou = data_generators.iou((pred_x1, pred_y1, pred_x2, pred_y2), (gt_x1, gt_y1, gt_x2, gt_y2)) 55 | if iou >= 0.5: 56 | found_match = True 57 | gt_box['bbox_matched'] = True 58 | break 59 | else: 60 | continue 61 | 62 | T[pred_class].append(int(found_match)) 63 | 64 | for gt_box in gt: 65 | if not gt_box['bbox_matched'] and not gt_box['difficult']: 66 | if gt_box['class'] not in P: 67 | P[gt_box['class']] = [] 68 | T[gt_box['class']] = [] 69 | 70 | T[gt_box['class']].append(1) 71 | P[gt_box['class']].append(0) 72 | 73 | #import pdb 74 | #pdb.set_trace() 75 | return T, P 76 | 77 | sys.setrecursionlimit(40000) 78 | 79 | parser = OptionParser() 80 | 81 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.") 82 | parser.add_option("-n", "--num_rois", dest="num_rois", 83 | help="Number of ROIs per iteration. Higher means more memory use.", default=32) 84 | parser.add_option("--config_filename", dest="config_filename", help= 85 | "Location to read the metadata related to the training (generated when training).", 86 | default="config.pickle") 87 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", 88 | default="pascal_voc"), 89 | 90 | (options, args) = parser.parse_args() 91 | 92 | if not options.test_path: # if filename is not given 93 | parser.error('Error: path to test data must be specified. Pass --path to command line') 94 | 95 | 96 | if options.parser == 'pascal_voc': 97 | from keras_frcnn.pascal_voc_parser import get_data 98 | elif options.parser == 'simple': 99 | from keras_frcnn.simple_parser import get_data 100 | else: 101 | raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'") 102 | 103 | config_output_filename = options.config_filename 104 | 105 | with open(config_output_filename, 'r') as f_in: 106 | C = pickle.load(f_in) 107 | 108 | # turn off any data augmentation at test time 109 | C.use_horizontal_flips = False 110 | C.use_vertical_flips = False 111 | C.rot_90 = False 112 | 113 | img_path = options.test_path 114 | 115 | 116 | def format_img(img, C): 117 | img_min_side = float(C.im_size) 118 | (height,width,_) = img.shape 119 | 120 | if width <= height: 121 | f = img_min_side/width 122 | new_height = int(f * height) 123 | new_width = int(img_min_side) 124 | else: 125 | f = img_min_side/height 126 | new_width = int(f * width) 127 | new_height = int(img_min_side) 128 | fx = width/float(new_width) 129 | fy = height/float(new_height) 130 | img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) 131 | img = img[:, :, (2, 1, 0)] 132 | img = img.astype(np.float32) 133 | img[:, :, 0] -= C.img_channel_mean[0] 134 | img[:, :, 1] -= C.img_channel_mean[1] 135 | img[:, :, 2] -= C.img_channel_mean[2] 136 | img /= C.img_scaling_factor 137 | img = np.transpose(img, (2, 0, 1)) 138 | img = np.expand_dims(img, axis=0) 139 | return img, fx, fy 140 | 141 | 142 | class_mapping = C.class_mapping 143 | 144 | if 'bg' not in class_mapping: 145 | class_mapping['bg'] = len(class_mapping) 146 | 147 | class_mapping = {v: k for k, v in class_mapping.iteritems()} 148 | print(class_mapping) 149 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} 150 | C.num_rois = int(options.num_rois) 151 | 152 | if K.image_dim_ordering() == 'th': 153 | input_shape_img = (3, None, None) 154 | input_shape_features = (1024, None, None) 155 | else: 156 | input_shape_img = (None, None, 3) 157 | input_shape_features = (None, None, 1024) 158 | 159 | 160 | img_input = Input(shape=input_shape_img) 161 | roi_input = Input(shape=(C.num_rois, 4)) 162 | feature_map_input = Input(shape=input_shape_features) 163 | 164 | # define the base network (resnet here, can be VGG, Inception, etc) 165 | shared_layers = nn.nn_base(img_input, trainable=True) 166 | 167 | # define the RPN, built on the base layers 168 | num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) 169 | rpn_layers = nn.rpn(shared_layers, num_anchors) 170 | 171 | classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True) 172 | 173 | model_rpn = Model(img_input, rpn_layers) 174 | model_classifier_only = Model([feature_map_input, roi_input], classifier) 175 | 176 | model_classifier = Model([feature_map_input, roi_input], classifier) 177 | 178 | model_rpn.load_weights(C.model_path, by_name=True) 179 | model_classifier.load_weights(C.model_path, by_name=True) 180 | 181 | model_rpn.compile(optimizer='sgd', loss='mse') 182 | model_classifier.compile(optimizer='sgd', loss='mse') 183 | 184 | all_imgs, _, _ = get_data(options.test_path) 185 | test_imgs = [s for s in all_imgs if s['imageset'] == 'test'] 186 | 187 | 188 | T = {} 189 | P = {} 190 | for idx, img_data in enumerate(test_imgs): 191 | print('{}/{}'.format(idx,len(test_imgs))) 192 | st = time.time() 193 | filepath = img_data['filepath'] 194 | 195 | img = cv2.imread(filepath) 196 | 197 | X, fx, fy = format_img(img, C) 198 | 199 | if K.image_dim_ordering() == 'tf': 200 | X = np.transpose(X, (0, 2, 3, 1)) 201 | 202 | # get the feature maps and output from the RPN 203 | [Y1, Y2, F] = model_rpn.predict(X) 204 | 205 | R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7) 206 | 207 | # convert from (x1,y1,x2,y2) to (x,y,w,h) 208 | R[:, 2] -= R[:, 0] 209 | R[:, 3] -= R[:, 1] 210 | 211 | # apply the spatial pyramid pooling to the proposed regions 212 | bboxes = {} 213 | probs = {} 214 | 215 | for jk in range(R.shape[0] // C.num_rois + 1): 216 | ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0) 217 | if ROIs.shape[1] == 0: 218 | break 219 | 220 | if jk == R.shape[0] // C.num_rois: 221 | # pad R 222 | curr_shape = ROIs.shape 223 | target_shape = (curr_shape[0], C.num_rois, curr_shape[2]) 224 | ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) 225 | ROIs_padded[:, :curr_shape[1], :] = ROIs 226 | ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] 227 | ROIs = ROIs_padded 228 | 229 | [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) 230 | 231 | for ii in range(P_cls.shape[1]): 232 | 233 | if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1): 234 | continue 235 | 236 | cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] 237 | 238 | if cls_name not in bboxes: 239 | bboxes[cls_name] = [] 240 | probs[cls_name] = [] 241 | 242 | (x, y, w, h) = ROIs[0, ii, :] 243 | 244 | cls_num = np.argmax(P_cls[0, ii, :]) 245 | try: 246 | (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] 247 | tx /= C.classifier_regr_std[0] 248 | ty /= C.classifier_regr_std[1] 249 | tw /= C.classifier_regr_std[2] 250 | th /= C.classifier_regr_std[3] 251 | x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) 252 | except: 253 | pass 254 | bboxes[cls_name].append([16 * x, 16 * y, 16 * (x + w), 16 * (y + h)]) 255 | probs[cls_name].append(np.max(P_cls[0, ii, :])) 256 | 257 | all_dets = [] 258 | 259 | for key in bboxes: 260 | bbox = np.array(bboxes[key]) 261 | 262 | new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5) 263 | for jk in range(new_boxes.shape[0]): 264 | (x1, y1, x2, y2) = new_boxes[jk, :] 265 | det = {'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': key, 'prob': new_probs[jk]} 266 | all_dets.append(det) 267 | 268 | 269 | print('Elapsed time = {}'.format(time.time() - st)) 270 | t, p = get_map(all_dets, img_data['bboxes'], (fx, fy)) 271 | for key in t.keys(): 272 | if key not in T: 273 | T[key] = [] 274 | P[key] = [] 275 | T[key].extend(t[key]) 276 | P[key].extend(p[key]) 277 | all_aps = [] 278 | for key in T.keys(): 279 | ap = average_precision_score(T[key], P[key]) 280 | print('{} AP: {}'.format(key, ap)) 281 | all_aps.append(ap) 282 | print('mAP = {}'.format(np.mean(np.array(all_aps)))) 283 | #print(T) 284 | #print(P) -------------------------------------------------------------------------------- /pic_tmp/001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/pic_tmp/001.jpg -------------------------------------------------------------------------------- /pic_tmp/002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/pic_tmp/002.jpg -------------------------------------------------------------------------------- /pic_tmp/003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/pic_tmp/003.jpg -------------------------------------------------------------------------------- /pic_tmp/006.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/pic_tmp/006.jpg -------------------------------------------------------------------------------- /pic_tmp/007.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/pic_tmp/007.jpg -------------------------------------------------------------------------------- /static/css/user_form.css: -------------------------------------------------------------------------------- 1 | /*重置样式*/ 2 | html, body, div, span, applet, object, iframe, h1, h2, h3, h4, h5, h6, p, blockquote, pre, a, abbr, 3 | acronym, address, big, cite, code, del, dfn, em, img, ins, kbd, q, s, samp, small, strike, strong, sub 4 | , sup, tt, var, b, u, i, dl, dt, dd, ol, ul, li, fieldset, form, label, legend, table, caption, tbody, 5 | tfoot, thead, tr, th, td, article, aside, canvas, details, embed, figure, figcaption, footer, header, 6 | hgroup, menu, nav, output, section, summary, time, mark, audio, video { 7 | margin: 0; 8 | padding: 0; 9 | border: 0 10 | } 11 | body { 12 | font-family: "微软雅黑"; 13 | background: #f4f4f4; 14 | } 15 | 16 | /*header*/ 17 | .header-line { 18 | width: 100%; 19 | height: 4px; 20 | background: #0dbfdd; 21 | } 22 | 23 | /*content*/ 24 | .content { 25 | width: 28%; 26 | margin: 70px auto 0; 27 | text-align: center; 28 | } 29 | .content-logo { 30 | width: 80px; 31 | height: 80px; 32 | } 33 | .content-title { 34 | margin: 10px 0 25px 0; 35 | font-size: 2em; 36 | color: #747474; 37 | font-weight: normal; 38 | } 39 | .content-form { 40 | width: 100%; 41 | padding: 36px 0 20px; 42 | border: 1px solid #dedede; 43 | text-align: center; 44 | background: #fff; 45 | } 46 | .content-form form div { 47 | margin-bottom: 19px; 48 | } 49 | .content-form form .user, 50 | .content-form form .password { 51 | width: 77%; 52 | height: 20px; 53 | padding: 10px; 54 | font-size: 1em; 55 | border: 1px solid #cccbcb; 56 | border-radius: 7px; 57 | letter-spacing: 1px; 58 | } 59 | .content-form form input:focus { 60 | outline: none; 61 | -webkit-box-shadow: 0 0 5px #0dbfdd; 62 | box-shadow: 0 0 5px #0dbfdd; 63 | } 64 | .content-form-signup { 65 | width: 84%; 66 | margin: 0 auto; 67 | padding: 10px; 68 | border: 1px solid #cccbcb; 69 | border-radius: 7px; 70 | font-size: 1em; 71 | font-weight: bold; 72 | color: #fff; 73 | background: #0dbfdd; 74 | cursor: pointer; 75 | } 76 | .content-form-signup:hover { 77 | background: #0cb3d0; 78 | } 79 | .content-form-signup:focus { 80 | outline: none; 81 | border: 1px solid #0cb3d0; 82 | } 83 | .content-login-description { 84 | margin-top: 25px; 85 | line-height: 1.63636364; 86 | color: #747474; 87 | font-size: .91666667rem; 88 | } 89 | .content-login-link { 90 | font-size: 16px; 91 | color: #0dbfdd; 92 | text-decoration: none; 93 | } 94 | 95 | /*输入框无内容便提示*/ 96 | #remind_1, 97 | #remind_2 { 98 | width: 76%; 99 | margin: 0 auto 2px; 100 | text-align: left; 101 | font-size: .2em; 102 | color: #f00; 103 | } -------------------------------------------------------------------------------- /static/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xvshu/keras-frcnn-web/a828e7091f0fb894d4e31d997cb3b9c3b29b61a6/static/img/logo.png -------------------------------------------------------------------------------- /templates/img_fit.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 可乐识别 12 | 13 | 14 | 15 |

16 |

17 |

18 |

19 |

20 |

图片识别

21 |

22 | 31 |

32 |

33 | 34 | -------------------------------------------------------------------------------- /test_frcnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | import cv2 4 | import numpy as np 5 | import sys 6 | import json 7 | import pickle 8 | from optparse import OptionParser 9 | import time 10 | from keras_frcnn import config 11 | from keras import backend as K 12 | from keras.layers import Input 13 | from keras.models import Model 14 | from keras_frcnn import roi_helpers 15 | 16 | sys.setrecursionlimit(40000) 17 | 18 | parser = OptionParser() 19 | 20 | parser.add_option("-p", "--path", dest="test_path", help="Path to test data.",default="D:\\Data\\test") 21 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", 22 | help="Number of ROIs per iteration. Higher means more memory use.", default=32) 23 | parser.add_option("--config_filename", dest="config_filename", help= 24 | "Location to read the metadata related to the training (generated when training).", 25 | default="config.pickle") 26 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 27 | 28 | (options, args) = parser.parse_args() 29 | 30 | if not options.test_path: # if filename is not given 31 | parser.error('Error: path to test data must be specified. Pass --path to command line') 32 | 33 | 34 | config_output_filename = options.config_filename 35 | 36 | with open(config_output_filename, 'rb') as f_in: 37 | C = pickle.load(f_in) 38 | 39 | if C.network == 'resnet50': 40 | import keras_frcnn.resnet as nn 41 | elif C.network == 'vgg': 42 | import keras_frcnn.vgg as nn 43 | 44 | # turn off any data augmentation at test time 45 | C.use_horizontal_flips = False 46 | C.use_vertical_flips = False 47 | C.rot_90 = False 48 | 49 | img_path = options.test_path 50 | 51 | def format_img_size(img, C): 52 | """ formats the image size based on config """ 53 | img_min_side = float(C.im_size) 54 | (height,width,_) = img.shape 55 | 56 | if width <= height: 57 | ratio = img_min_side/width 58 | new_height = int(ratio * height) 59 | new_width = int(img_min_side) 60 | else: 61 | ratio = img_min_side/height 62 | new_width = int(ratio * width) 63 | new_height = int(img_min_side) 64 | img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) 65 | return img, ratio 66 | 67 | def format_img_channels(img, C): 68 | """ formats the image channels based on config """ 69 | img = img[:, :, (2, 1, 0)] 70 | img = img.astype(np.float32) 71 | img[:, :, 0] -= C.img_channel_mean[0] 72 | img[:, :, 1] -= C.img_channel_mean[1] 73 | img[:, :, 2] -= C.img_channel_mean[2] 74 | img /= C.img_scaling_factor 75 | img = np.transpose(img, (2, 0, 1)) 76 | img = np.expand_dims(img, axis=0) 77 | return img 78 | 79 | def format_img(img, C): 80 | """ formats an image for model prediction based on config """ 81 | img, ratio = format_img_size(img, C) 82 | img = format_img_channels(img, C) 83 | return img, ratio 84 | 85 | # Method to transform the coordinates of the bounding box to its original size 86 | def get_real_coordinates(ratio, x1, y1, x2, y2): 87 | 88 | real_x1 = int(round(x1 // ratio)) 89 | real_y1 = int(round(y1 // ratio)) 90 | real_x2 = int(round(x2 // ratio)) 91 | real_y2 = int(round(y2 // ratio)) 92 | 93 | return (real_x1, real_y1, real_x2 ,real_y2) 94 | 95 | class_mapping = C.class_mapping 96 | 97 | if 'bg' not in class_mapping: 98 | class_mapping['bg'] = len(class_mapping) 99 | 100 | class_mapping = {v: k for k, v in class_mapping.items()} 101 | print(class_mapping) 102 | class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping} 103 | C.num_rois = int(options.num_rois) 104 | 105 | if C.network == 'resnet50': 106 | num_features = 1024 107 | elif C.network == 'vgg': 108 | num_features = 512 109 | 110 | if K.image_dim_ordering() == 'th': 111 | input_shape_img = (3, None, None) 112 | input_shape_features = (num_features, None, None) 113 | else: 114 | input_shape_img = (None, None, 3) 115 | input_shape_features = (None, None, num_features) 116 | 117 | 118 | img_input = Input(shape=input_shape_img) 119 | roi_input = Input(shape=(C.num_rois, 4)) 120 | feature_map_input = Input(shape=input_shape_features) 121 | 122 | # define the base network (resnet here, can be VGG, Inception, etc) 123 | shared_layers = nn.nn_base(img_input, trainable=True) 124 | 125 | # define the RPN, built on the base layers 126 | num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) 127 | rpn_layers = nn.rpn(shared_layers, num_anchors) 128 | 129 | classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True) 130 | 131 | model_rpn = Model(img_input, rpn_layers) 132 | model_classifier_only = Model([feature_map_input, roi_input], classifier) 133 | 134 | model_classifier = Model([feature_map_input, roi_input], classifier) 135 | 136 | print('Loading weights from {}'.format(C.model_path)) 137 | model_rpn.load_weights(C.model_path, by_name=True) 138 | model_classifier.load_weights(C.model_path, by_name=True) 139 | 140 | model_rpn.compile(optimizer='sgd', loss='mse') 141 | model_classifier.compile(optimizer='sgd', loss='mse') 142 | 143 | all_imgs = [] 144 | 145 | classes = {} 146 | 147 | bbox_threshold = 0.8 148 | 149 | visualise = True 150 | 151 | def tf_fit_img(img): 152 | X, ratio = format_img(img, C) 153 | 154 | if K.image_dim_ordering() == 'tf': 155 | X = np.transpose(X, (0, 2, 3, 1)) 156 | 157 | # get the feature maps and output from the RPN 158 | print(X) 159 | [Y1, Y2, F] = model_rpn.predict(X) 160 | 161 | 162 | R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7) 163 | 164 | # convert from (x1,y1,x2,y2) to (x,y,w,h) 165 | R[:, 2] -= R[:, 0] 166 | R[:, 3] -= R[:, 1] 167 | 168 | # apply the spatial pyramid pooling to the proposed regions 169 | bboxes = {} 170 | probs = {} 171 | 172 | for jk in range(R.shape[0]//C.num_rois + 1): 173 | ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0) 174 | if ROIs.shape[1] == 0: 175 | break 176 | 177 | if jk == R.shape[0]//C.num_rois: 178 | #pad R 179 | curr_shape = ROIs.shape 180 | target_shape = (curr_shape[0],C.num_rois,curr_shape[2]) 181 | ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) 182 | ROIs_padded[:, :curr_shape[1], :] = ROIs 183 | ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] 184 | ROIs = ROIs_padded 185 | 186 | [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) 187 | 188 | for ii in range(P_cls.shape[1]): 189 | 190 | if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1): 191 | continue 192 | 193 | cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] 194 | 195 | if cls_name not in bboxes: 196 | bboxes[cls_name] = [] 197 | probs[cls_name] = [] 198 | 199 | (x, y, w, h) = ROIs[0, ii, :] 200 | 201 | cls_num = np.argmax(P_cls[0, ii, :]) 202 | try: 203 | (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)] 204 | tx /= C.classifier_regr_std[0] 205 | ty /= C.classifier_regr_std[1] 206 | tw /= C.classifier_regr_std[2] 207 | th /= C.classifier_regr_std[3] 208 | x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) 209 | except: 210 | pass 211 | bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)]) 212 | probs[cls_name].append(np.max(P_cls[0, ii, :])) 213 | 214 | all_dets = [] 215 | 216 | for key in bboxes: 217 | bbox = np.array(bboxes[key]) 218 | 219 | new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5) 220 | for jk in range(new_boxes.shape[0]): 221 | (x1, y1, x2, y2) = new_boxes[jk,:] 222 | 223 | (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) 224 | 225 | cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2) 226 | 227 | textLabel = '{}: {}'.format(key,int(100*new_probs[jk])) 228 | all_dets.append((key,100*new_probs[jk])) 229 | 230 | (retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1) 231 | textOrg = (real_x1, real_y1-0) 232 | 233 | cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2) 234 | cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1) 235 | cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) 236 | 237 | # print('Elapsed time = {}'.format(time.time() - st)) 238 | print(all_dets) 239 | return json.dumps(all_dets) 240 | # cv2.imshow('img', img) 241 | # cv2.waitKey(0) 242 | # cv2.imwrite('./results_imgs/{}.png'.format(idx),img) 243 | 244 | 245 | for idx, img_name in enumerate(sorted(os.listdir(img_path))): 246 | if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')): 247 | continue 248 | print(img_name) 249 | st = time.time() 250 | filepath = os.path.join(img_path,img_name) 251 | 252 | img = cv2.imread(filepath) 253 | 254 | X, ratio = format_img(img, C) 255 | 256 | if K.image_dim_ordering() == 'tf': 257 | X = np.transpose(X, (0, 2, 3, 1)) 258 | 259 | # get the feature maps and output from the RPN 260 | print(X) 261 | [Y1, Y2, F] = model_rpn.predict(X) 262 | 263 | 264 | R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7) 265 | 266 | # convert from (x1,y1,x2,y2) to (x,y,w,h) 267 | R[:, 2] -= R[:, 0] 268 | R[:, 3] -= R[:, 1] 269 | 270 | # apply the spatial pyramid pooling to the proposed regions 271 | bboxes = {} 272 | probs = {} 273 | 274 | for jk in range(R.shape[0]//C.num_rois + 1): 275 | ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0) 276 | if ROIs.shape[1] == 0: 277 | break 278 | 279 | if jk == R.shape[0]//C.num_rois: 280 | #pad R 281 | curr_shape = ROIs.shape 282 | target_shape = (curr_shape[0],C.num_rois,curr_shape[2]) 283 | ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) 284 | ROIs_padded[:, :curr_shape[1], :] = ROIs 285 | ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] 286 | ROIs = ROIs_padded 287 | 288 | [P_cls, P_regr] = model_classifier_only.predict([F, ROIs]) 289 | 290 | for ii in range(P_cls.shape[1]): 291 | 292 | if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1): 293 | continue 294 | 295 | cls_name = class_mapping[np.argmax(P_cls[0, ii, :])] 296 | 297 | if cls_name not in bboxes: 298 | bboxes[cls_name] = [] 299 | probs[cls_name] = [] 300 | 301 | (x, y, w, h) = ROIs[0, ii, :] 302 | 303 | cls_num = np.argmax(P_cls[0, ii, :]) 304 | try: 305 | (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)] 306 | tx /= C.classifier_regr_std[0] 307 | ty /= C.classifier_regr_std[1] 308 | tw /= C.classifier_regr_std[2] 309 | th /= C.classifier_regr_std[3] 310 | x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th) 311 | except: 312 | pass 313 | bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)]) 314 | probs[cls_name].append(np.max(P_cls[0, ii, :])) 315 | 316 | all_dets = [] 317 | 318 | for key in bboxes: 319 | bbox = np.array(bboxes[key]) 320 | 321 | new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5) 322 | for jk in range(new_boxes.shape[0]): 323 | (x1, y1, x2, y2) = new_boxes[jk,:] 324 | 325 | (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) 326 | 327 | cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2) 328 | 329 | textLabel = '{}: {}'.format(key,int(100*new_probs[jk])) 330 | all_dets.append((key,100*new_probs[jk])) 331 | 332 | (retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1) 333 | textOrg = (real_x1, real_y1-0) 334 | 335 | cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2) 336 | cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1) 337 | cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1) 338 | 339 | print('Elapsed time = {}'.format(time.time() - st)) 340 | print(all_dets) 341 | # cv2.imshow('img', img) 342 | # cv2.waitKey(0) 343 | # cv2.imwrite('./results_imgs/{}.png'.format(idx),img) 344 | -------------------------------------------------------------------------------- /train_frcnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import random 3 | import pprint 4 | import sys 5 | import time 6 | import numpy as np 7 | from optparse import OptionParser 8 | import pickle 9 | 10 | from keras import backend as K 11 | from keras.optimizers import Adam, SGD, RMSprop 12 | from keras.layers import Input 13 | from keras.models import Model 14 | from keras_frcnn import config, data_generators 15 | from keras_frcnn import losses as losses 16 | import keras_frcnn.roi_helpers as roi_helpers 17 | from keras.utils import generic_utils 18 | 19 | sys.setrecursionlimit(40000) 20 | 21 | parser = OptionParser() 22 | 23 | parser.add_option("-p", "--path", dest="train_path", help="Path to training data.") 24 | parser.add_option("-o", "--parser", dest="parser", help="Parser to use. One of simple or pascal_voc", 25 | default="pascal_voc") 26 | parser.add_option("-n", "--num_rois", type="int", dest="num_rois", help="Number of RoIs to process at once.", default=32) 27 | parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50') 28 | parser.add_option("--hf", dest="horizontal_flips", help="Augment with horizontal flips in training. (Default=false).", action="store_true", default=False) 29 | parser.add_option("--vf", dest="vertical_flips", help="Augment with vertical flips in training. (Default=false).", action="store_true", default=False) 30 | parser.add_option("--rot", "--rot_90", dest="rot_90", help="Augment with 90 degree rotations in training. (Default=false).", 31 | action="store_true", default=False) 32 | parser.add_option("--num_epochs", type="int", dest="num_epochs", help="Number of epochs.", default=100) 33 | parser.add_option("--config_filename", dest="config_filename", help= 34 | "Location to store all the metadata related to the training (to be used when testing).", 35 | default="config.pickle") 36 | parser.add_option("--output_weight_path", dest="output_weight_path", help="Output path for weights.", default='./model_frcnn.hdf5') 37 | parser.add_option("--input_weight_path", dest="input_weight_path", help="Input path for weights. If not specified, will try to load default weights provided by keras.") 38 | 39 | (options, args) = parser.parse_args() 40 | 41 | if not options.train_path: # if filename is not given 42 | parser.error('Error: path to training data must be specified. Pass --path to command line') 43 | 44 | if options.parser == 'pascal_voc': 45 | from keras_frcnn.pascal_voc_parser import get_data 46 | elif options.parser == 'simple': 47 | from keras_frcnn.simple_parser import get_data 48 | else: 49 | raise ValueError("Command line option parser must be one of 'pascal_voc' or 'simple'") 50 | 51 | # pass the settings from the command line, and persist them in the config object 52 | C = config.Config() 53 | 54 | C.use_horizontal_flips = bool(options.horizontal_flips) 55 | C.use_vertical_flips = bool(options.vertical_flips) 56 | C.rot_90 = bool(options.rot_90) 57 | 58 | C.model_path = options.output_weight_path 59 | C.num_rois = int(options.num_rois) 60 | 61 | if options.network == 'vgg': 62 | C.network = 'vgg' 63 | from keras_frcnn import vgg as nn 64 | elif options.network == 'resnet50': 65 | from keras_frcnn import resnet as nn 66 | C.network = 'resnet50' 67 | else: 68 | print('Not a valid model') 69 | raise ValueError 70 | 71 | 72 | # check if weight path was passed via command line 73 | if options.input_weight_path: 74 | C.base_net_weights = options.input_weight_path 75 | else: 76 | # set the path to weights based on backend and model 77 | C.base_net_weights = nn.get_weight_path() 78 | 79 | all_imgs, classes_count, class_mapping = get_data(options.train_path) 80 | 81 | if 'bg' not in classes_count: 82 | classes_count['bg'] = 0 83 | class_mapping['bg'] = len(class_mapping) 84 | 85 | C.class_mapping = class_mapping 86 | 87 | inv_map = {v: k for k, v in class_mapping.items()} 88 | 89 | print('Training images per class:') 90 | pprint.pprint(classes_count) 91 | print('Num classes (including bg) = {}'.format(len(classes_count))) 92 | 93 | config_output_filename = options.config_filename 94 | 95 | with open(config_output_filename, 'wb') as config_f: 96 | pickle.dump(C,config_f) 97 | print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename)) 98 | 99 | random.shuffle(all_imgs) 100 | 101 | num_imgs = len(all_imgs) 102 | 103 | train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] 104 | val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] 105 | 106 | print('Num train samples {}'.format(len(train_imgs))) 107 | print('Num val samples {}'.format(len(val_imgs))) 108 | 109 | 110 | data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train') 111 | data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length,K.image_dim_ordering(), mode='val') 112 | 113 | if K.image_dim_ordering() == 'th': 114 | input_shape_img = (3, None, None) 115 | else: 116 | input_shape_img = (None, None, 3) 117 | 118 | img_input = Input(shape=input_shape_img) 119 | roi_input = Input(shape=(None, 4)) 120 | 121 | # define the base network (resnet here, can be VGG, Inception, etc) 122 | shared_layers = nn.nn_base(img_input, trainable=True) 123 | 124 | # define the RPN, built on the base layers 125 | num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) 126 | rpn = nn.rpn(shared_layers, num_anchors) 127 | 128 | classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable=True) 129 | 130 | model_rpn = Model(img_input, rpn[:2]) 131 | model_classifier = Model([img_input, roi_input], classifier) 132 | 133 | # this is a model that holds both the RPN and the classifier, used to load/save weights for the models 134 | model_all = Model([img_input, roi_input], rpn[:2] + classifier) 135 | 136 | try: 137 | print('loading weights from {}'.format(C.base_net_weights)) 138 | model_rpn.load_weights(C.base_net_weights, by_name=True) 139 | model_classifier.load_weights(C.base_net_weights, by_name=True) 140 | except: 141 | print('Could not load pretrained model weights. Weights can be found in the keras application folder \ 142 | https://github.com/fchollet/keras/tree/master/keras/applications') 143 | 144 | optimizer = Adam(lr=1e-5) 145 | optimizer_classifier = Adam(lr=1e-5) 146 | model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)]) 147 | model_classifier.compile(optimizer=optimizer_classifier, loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1)], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'}) 148 | model_all.compile(optimizer='sgd', loss='mae') 149 | 150 | epoch_length = 10 151 | num_epochs = int(options.num_epochs) 152 | iter_num = 0 153 | 154 | losses = np.zeros((epoch_length, 5)) 155 | rpn_accuracy_rpn_monitor = [] 156 | rpn_accuracy_for_epoch = [] 157 | start_time = time.time() 158 | 159 | best_loss = np.Inf 160 | 161 | class_mapping_inv = {v: k for k, v in class_mapping.items()} 162 | print('Starting training') 163 | 164 | vis = True 165 | 166 | for epoch_num in range(num_epochs): 167 | 168 | progbar = generic_utils.Progbar(epoch_length) 169 | print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) 170 | 171 | while True: 172 | try: 173 | 174 | if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose: 175 | mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor))/len(rpn_accuracy_rpn_monitor) 176 | rpn_accuracy_rpn_monitor = [] 177 | print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, epoch_length)) 178 | if mean_overlapping_bboxes == 0: 179 | print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.') 180 | 181 | X, Y, img_data = next(data_gen_train) 182 | 183 | loss_rpn = model_rpn.train_on_batch(X, Y) 184 | 185 | P_rpn = model_rpn.predict_on_batch(X) 186 | 187 | R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], C, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300) 188 | # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format 189 | X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, C, class_mapping) 190 | 191 | if X2 is None: 192 | rpn_accuracy_rpn_monitor.append(0) 193 | rpn_accuracy_for_epoch.append(0) 194 | continue 195 | 196 | neg_samples = np.where(Y1[0, :, -1] == 1) 197 | pos_samples = np.where(Y1[0, :, -1] == 0) 198 | 199 | if len(neg_samples) > 0: 200 | neg_samples = neg_samples[0] 201 | else: 202 | neg_samples = [] 203 | 204 | if len(pos_samples) > 0: 205 | pos_samples = pos_samples[0] 206 | else: 207 | pos_samples = [] 208 | 209 | rpn_accuracy_rpn_monitor.append(len(pos_samples)) 210 | rpn_accuracy_for_epoch.append((len(pos_samples))) 211 | 212 | if C.num_rois > 1: 213 | if len(pos_samples) < C.num_rois//2: 214 | selected_pos_samples = pos_samples.tolist() 215 | else: 216 | selected_pos_samples = np.random.choice(pos_samples, C.num_rois//2, replace=False).tolist() 217 | try: 218 | selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples), replace=False).tolist() 219 | except: 220 | selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples), replace=True).tolist() 221 | 222 | sel_samples = selected_pos_samples + selected_neg_samples 223 | else: 224 | # in the extreme case where num_rois = 1, we pick a random pos or neg sample 225 | selected_pos_samples = pos_samples.tolist() 226 | selected_neg_samples = neg_samples.tolist() 227 | if np.random.randint(0, 2): 228 | sel_samples = random.choice(neg_samples) 229 | else: 230 | sel_samples = random.choice(pos_samples) 231 | 232 | loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]) 233 | 234 | losses[iter_num, 0] = loss_rpn[1] 235 | losses[iter_num, 1] = loss_rpn[2] 236 | 237 | losses[iter_num, 2] = loss_class[1] 238 | losses[iter_num, 3] = loss_class[2] 239 | losses[iter_num, 4] = loss_class[3] 240 | 241 | iter_num += 1 242 | 243 | progbar.update(iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])), 244 | ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))]) 245 | 246 | if iter_num == epoch_length: 247 | loss_rpn_cls = np.mean(losses[:, 0]) 248 | loss_rpn_regr = np.mean(losses[:, 1]) 249 | loss_class_cls = np.mean(losses[:, 2]) 250 | loss_class_regr = np.mean(losses[:, 3]) 251 | class_acc = np.mean(losses[:, 4]) 252 | 253 | mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch) 254 | rpn_accuracy_for_epoch = [] 255 | 256 | if C.verbose: 257 | print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes)) 258 | print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc)) 259 | print('Loss RPN classifier: {}'.format(loss_rpn_cls)) 260 | print('Loss RPN regression: {}'.format(loss_rpn_regr)) 261 | print('Loss Detector classifier: {}'.format(loss_class_cls)) 262 | print('Loss Detector regression: {}'.format(loss_class_regr)) 263 | print('Elapsed time: {}'.format(time.time() - start_time)) 264 | 265 | curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr 266 | iter_num = 0 267 | start_time = time.time() 268 | 269 | if curr_loss < best_loss: 270 | if C.verbose: 271 | print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss)) 272 | best_loss = curr_loss 273 | model_all.save_weights(C.model_path) 274 | 275 | break 276 | 277 | except Exception as e: 278 | print('Exception: {}'.format(e)) 279 | continue 280 | 281 | print('Training complete, exiting.') 282 | --------------------------------------------------------------------------------