├── Caffe-SSD-Models ├── MobileNet-SSD │ ├── MobileNetSSD_deploy.prototxt │ ├── MobileNetSSD_test.prototxt │ ├── MobileNetSSD_train.prototxt │ ├── iris_ssd.cpp │ ├── iris_ssd.py │ ├── mobilenet_300x300_ssd_iter_3000.caffemodel │ ├── result.bmp │ ├── solver_test.prototxt │ ├── solver_train.prototxt │ └── train_mobilenet_ssd.sh ├── ResNet10-SSD-half │ ├── deploy.half.prototxt │ ├── iris_ssd.cpp │ ├── iris_ssd.py │ ├── res10_300x300_ssd.half_iter_140000.caffemodel │ ├── result.bmp │ ├── solver.half.prototxt │ ├── test.half.prototxt │ └── train.half.prototxt └── ResNet10-SSD │ ├── deploy.prototxt │ ├── iris_ssd.cpp │ ├── iris_ssd.py │ ├── res10_300x300_ssd_iter_140000.caffemodel │ ├── result.bmp │ ├── solver.prototxt │ ├── test.prototxt │ └── train.prototxt ├── README.md ├── how_to_train_iris_detector_with_caffe_ssd.md ├── images └── S2353L09.jpg ├── results ├── 1.png ├── 10.png ├── 11.png ├── 12.png ├── 2.png ├── 3.png ├── 4.png ├── 5.png ├── 6.png ├── 7.png ├── 8.png ├── 9.png └── speed_test.png └── 如何使用Caffe-SSD框架训练虹膜检测模型.md /Caffe-SSD-Models/MobileNet-SSD/iris_ssd.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | using namespace cv; 8 | using namespace cv::dnn; 9 | 10 | #include 11 | #include 12 | using namespace std; 13 | 14 | const size_t inWidth = 300; 15 | const size_t inHeight = 300; 16 | const double inScaleFactor = 1.0; 17 | const Scalar meanVal(128); 18 | 19 | const char* about = "This sample uses Single-Shot Detector " 20 | "(https://arxiv.org/abs/1512.02325) " 21 | "with ResNet-10 architecture to detect faces on camera/video/image.\n" 22 | "More information about the training is available here: " 23 | "/samples/dnn/face_detector/how_to_train_face_detector.txt\n" 24 | ".caffemodel model's file is available here: " 25 | "/samples/dnn/face_detector/res10_300x300_ssd_iter_140000.caffemodel\n" 26 | ".prototxt file is available here: " 27 | "/samples/dnn/face_detector/deploy.prototxt\n"; 28 | 29 | //const char* params 30 | // = "{ help | false | print usage }" 31 | // "{ proto | deploy.prototxt | model configuration (deploy.prototxt) }" 32 | // "{ model | res10_300x300_ssd_iter_31000.caffemodel | model weights (res10_300x300_ssd_iter_140000.caffemodel) }" 33 | // "{ camera_device | 0 | camera device number }" 34 | // "{ video | | video or image for detection }" 35 | // "{ min_confidence | 0.5 | min confidence }"; 36 | 37 | const char* params 38 | = "{ help | false | print usage }" 39 | "{ proto | deploy.half.prototxt | model configuration (deploy.prototxt) }" 40 | "{ model | res10_300x300_ssd.half_iter_31000.caffemodel | model weights (res10_300x300_ssd_iter_140000.caffemodel) }" 41 | "{ camera_device | 0 | camera device number }" 42 | "{ video | | video or image for detection }" 43 | "{ min_confidence | 0.5 | min confidence }"; 44 | 45 | int main(int argc, char** argv) 46 | { 47 | CommandLineParser parser(argc, argv, params); 48 | 49 | if (parser.get("help")) 50 | { 51 | cout << about << endl; 52 | parser.printMessage(); 53 | return 0; 54 | } 55 | 56 | String modelConfiguration = parser.get("proto"); 57 | String modelBinary = parser.get("model"); 58 | 59 | //! [Initialize network] 60 | dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary); 61 | //! [Initialize network] 62 | 63 | if (net.empty()) 64 | { 65 | cerr << "Can't load network by using the following files: " << endl; 66 | cerr << "prototxt: " << modelConfiguration << endl; 67 | cerr << "caffemodel: " << modelBinary << endl; 68 | cerr << "Models are available here:" << endl; 69 | cerr << "/samples/dnn/face_detector" << endl; 70 | cerr << "or here:" << endl; 71 | cerr << "https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector" << endl; 72 | exit(-1); 73 | } 74 | 75 | // net.setPreferableBackend(DNN_BACKEND_HALIDE); 76 | // net.setPreferableTarget(DNN_TARGET_CPU); 77 | 78 | // VideoCapture cap; 79 | // if (parser.get("video").empty()) 80 | // { 81 | // int cameraDevice = parser.get("camera_device"); 82 | // cap = VideoCapture(cameraDevice); 83 | // if(!cap.isOpened()) 84 | // { 85 | // cout << "Couldn't find camera: " << cameraDevice << endl; 86 | // return -1; 87 | // } 88 | // } 89 | // else 90 | // { 91 | // cap.open(parser.get("video")); 92 | // if(!cap.isOpened()) 93 | // { 94 | // cout << "Couldn't open image or video: " << parser.get("video") << endl; 95 | // return -1; 96 | // } 97 | // } 98 | 99 | int cnt = 0; 100 | 101 | for(;;) 102 | { 103 | Mat image; 104 | // cap >> image; // get a new frame from camera/video or read image 105 | 106 | // if (image.empty()) 107 | // { 108 | // waitKey(); 109 | // break; 110 | // } 111 | 112 | image = cv::imread("images/S2353L09.jpg", 1); 113 | 114 | // cv::resize(image, image, cv::Size(0, 0), 0.8, 0.8); 115 | 116 | cv::Mat image_result = image.clone(); 117 | 118 | cv::Mat gray; 119 | cv::cvtColor(image, gray, cv::COLOR_BGR2GRAY); 120 | 121 | int bt = cv::getTickCount(); 122 | 123 | //! [Prepare blob] 124 | //! image: 3 channels 125 | Mat inputBlob = blobFromImage(gray, inScaleFactor, 126 | Size(inWidth, inHeight), Scalar(128), false, false); //Convert Mat to batch of images 127 | //! [Prepare blob] 128 | 129 | //! [Set input blob] 130 | net.setInput(inputBlob, "data"); //set the network input 131 | //! [Set input blob] 132 | 133 | //! [Make forward pass] 134 | Mat detection = net.forward("detection_out"); //compute output 135 | //! [Make forward pass] 136 | 137 | vector layersTimings; 138 | double freq = getTickFrequency() / 1000; 139 | double time = net.getPerfProfile(layersTimings) / freq; 140 | 141 | Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr()); 142 | 143 | int et = cv::getTickCount(); 144 | int t = (et - bt) * 1000.0 / cv::getTickFrequency(); 145 | 146 | cout << t << " ms" << endl; 147 | 148 | ostringstream ss; 149 | ss << "FPS: " << 1000/time << " ; time: " << int(time) << " ms"; 150 | putText(image_result, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255)); 151 | 152 | float confidenceThreshold = parser.get("min_confidence"); 153 | for(int i = 0; i < detectionMat.rows; i++) 154 | { 155 | float confidence = detectionMat.at(i, 2); 156 | 157 | if(confidence > confidenceThreshold) 158 | { 159 | int xLeftBottom = static_cast(detectionMat.at(i, 3) * image.cols); 160 | int yLeftBottom = static_cast(detectionMat.at(i, 4) * image.rows); 161 | int xRightTop = static_cast(detectionMat.at(i, 5) * image.cols); 162 | int yRightTop = static_cast(detectionMat.at(i, 6) * image.rows); 163 | 164 | Rect object((int)xLeftBottom, (int)yLeftBottom, 165 | (int)(xRightTop - xLeftBottom), 166 | (int)(yRightTop - yLeftBottom)); 167 | 168 | rectangle(image_result, object, Scalar(0, 255, 0)); 169 | 170 | ss.str(""); 171 | ss << confidence; 172 | String conf(ss.str()); 173 | String label = "Iris: " + conf; 174 | int baseLine = 0; 175 | Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); 176 | rectangle(image_result, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height), 177 | Size(labelSize.width, labelSize.height + baseLine)), 178 | Scalar(255, 255, 255), CV_FILLED); 179 | putText(image_result, label, Point(xLeftBottom, yLeftBottom), 180 | FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0)); 181 | } 182 | } 183 | 184 | imshow("detections", image_result); 185 | int key = waitKey(1); 186 | if (key == 'q') 187 | break; 188 | if(key == 's') { 189 | imwrite("image.jpg", image_result); 190 | } 191 | 192 | } 193 | 194 | return 0; 195 | } // main 196 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/MobileNet-SSD/iris_ssd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | import cv2 as cv 4 | try: 5 | import cv2 as cv 6 | except ImportError: 7 | raise ImportError('Can\'t find OpenCV Python module. If you\'ve built it from sources without installation, ' 8 | 'configure environemnt variable PYTHONPATH to "opencv_build_dir/lib" directory (with "python3" subdirectory if required)') 9 | 10 | from cv2 import dnn 11 | 12 | inWidth = 300 13 | inHeight = 300 14 | confThreshold = 0.5 15 | 16 | prototxt = 'MobileNetSSD_deploy.prototxt' 17 | caffemodel = 'mobilenet_300x300_ssd_iter_3000.caffemodel' 18 | 19 | if __name__ == '__main__': 20 | net = dnn.readNetFromCaffe(prototxt, caffemodel) 21 | while True: 22 | frame = cv.imread("../../images/S2353L09.jpg", 1) 23 | 24 | gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) 25 | 26 | cols = frame.shape[1] 27 | rows = frame.shape[0] 28 | 29 | net.setInput(dnn.blobFromImage(gray, 1.0, (inWidth, inHeight), (128), False, False)) 30 | detections = net.forward() 31 | 32 | # print(detections) 33 | 34 | perf_stats = net.getPerfProfile() 35 | 36 | infer_time = perf_stats[0] / cv.getTickFrequency() * 1000 37 | fps = 1000 / infer_time 38 | fps_time_str = 'fps = {0}, time = {1} ms'.format(int(fps), int(infer_time)) 39 | cv.putText(frame, fps_time_str, (50, 50), 40 | cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) 41 | 42 | for i in range(detections.shape[2]): 43 | confidence = detections[0, 0, i, 2] 44 | if confidence > confThreshold: 45 | xLeftBottom = int(detections[0, 0, i, 3] * cols) 46 | yLeftBottom = int(detections[0, 0, i, 4] * rows) 47 | xRightTop = int(detections[0, 0, i, 5] * cols) 48 | yRightTop = int(detections[0, 0, i, 6] * rows) 49 | 50 | cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), 51 | (0, 255, 0)) 52 | label = "iris: %.4f" % confidence 53 | labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) 54 | 55 | cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]), 56 | (xLeftBottom + labelSize[0], yLeftBottom + baseLine), 57 | (0, 0, 0), cv.FILLED) 58 | cv.putText(frame, label, (xLeftBottom, yLeftBottom), 59 | cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) 60 | 61 | if frame.shape[1] > 800 or frame.shape[0] > 800: 62 | frame = cv.resize(frame, dsize=(0,0), fx=0.5, fy=0.5) 63 | cv.imshow("detections", frame) 64 | if cv.waitKey(1) == int(ord('s')): 65 | cv.imwrite("result.bmp", frame) 66 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/MobileNet-SSD/mobilenet_300x300_ssd_iter_3000.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/Caffe-SSD-Models/MobileNet-SSD/mobilenet_300x300_ssd_iter_3000.caffemodel -------------------------------------------------------------------------------- /Caffe-SSD-Models/MobileNet-SSD/result.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/Caffe-SSD-Models/MobileNet-SSD/result.bmp -------------------------------------------------------------------------------- /Caffe-SSD-Models/MobileNet-SSD/solver_test.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/MobileNet-SSD/MobileNetSSD_train.prototxt" 2 | test_net: "models/MobileNet-SSD/MobileNetSSD_test.prototxt" 3 | test_iter: 673 4 | test_interval: 10000 5 | base_lr: 0.0005 6 | display: 10 7 | max_iter: 0 8 | lr_policy: "multistep" 9 | gamma: 0.5 10 | weight_decay: 0.00005 11 | snapshot: 0 12 | snapshot_prefix: "models/MobileNet-SSD/snapshot/mobilenet_300x300_ssd" 13 | solver_mode: GPU 14 | debug_info: false 15 | snapshot_after_train: false 16 | test_initialization: true 17 | average_loss: 10 18 | stepvalue: 20000 19 | stepvalue: 40000 20 | iter_size: 1 21 | type: "RMSProp" 22 | eval_type: "detection" 23 | ap_version: "11point" 24 | 25 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/MobileNet-SSD/solver_train.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/MobileNet-SSD/MobileNetSSD_train.prototxt" 2 | test_net: "models/MobileNet-SSD/MobileNetSSD_test.prototxt" 3 | test_iter: 673 4 | test_interval: 10000 5 | base_lr: 0.0005 6 | display: 10 7 | max_iter: 120000 8 | lr_policy: "multistep" 9 | gamma: 0.5 10 | weight_decay: 0.00005 11 | snapshot: 1000 12 | snapshot_prefix: "models/MobileNet-SSD/snapshot/mobilenet_300x300_ssd" 13 | solver_mode: GPU 14 | debug_info: false 15 | snapshot_after_train: true 16 | test_initialization: false 17 | average_loss: 10 18 | stepvalue: 20000 19 | stepvalue: 40000 20 | iter_size: 1 21 | type: "RMSProp" 22 | eval_type: "detection" 23 | ap_version: "11point" 24 | 25 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/MobileNet-SSD/train_mobilenet_ssd.sh: -------------------------------------------------------------------------------- 1 | nohup ./build/tools/caffe train \ 2 | --solver="models/MobileNet-SSD/solver_train.prototxt" \ 3 | --gpu 0 2>&1 | tee /home/tim/deep_learning/caffe/models/MobileNet-SSD/log/MobileNet_iris_dataset_SSD_300x300.log & 4 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD-half/deploy.half.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 1 5 | dim: 300 6 | dim: 300 7 | } 8 | 9 | layer { 10 | name: "data_bn" 11 | type: "BatchNorm" 12 | bottom: "data" 13 | top: "data_bn" 14 | param { 15 | lr_mult: 0.0 16 | } 17 | param { 18 | lr_mult: 0.0 19 | } 20 | param { 21 | lr_mult: 0.0 22 | } 23 | } 24 | layer { 25 | name: "data_scale" 26 | type: "Scale" 27 | bottom: "data_bn" 28 | top: "data_bn" 29 | param { 30 | lr_mult: 1.0 31 | decay_mult: 1.0 32 | } 33 | param { 34 | lr_mult: 2.0 35 | decay_mult: 1.0 36 | } 37 | scale_param { 38 | bias_term: true 39 | } 40 | } 41 | layer { 42 | name: "conv1_h" 43 | type: "Convolution" 44 | bottom: "data_bn" 45 | top: "conv1_h" 46 | param { 47 | lr_mult: 1.0 48 | decay_mult: 1.0 49 | } 50 | param { 51 | lr_mult: 2.0 52 | decay_mult: 1.0 53 | } 54 | convolution_param { 55 | num_output: 32 56 | pad: 3 57 | kernel_size: 7 58 | stride: 2 59 | weight_filler { 60 | type: "msra" 61 | variance_norm: FAN_OUT 62 | } 63 | bias_filler { 64 | type: "constant" 65 | value: 0.0 66 | } 67 | } 68 | } 69 | layer { 70 | name: "conv1_bn_h" 71 | type: "BatchNorm" 72 | bottom: "conv1_h" 73 | top: "conv1_h" 74 | param { 75 | lr_mult: 0.0 76 | } 77 | param { 78 | lr_mult: 0.0 79 | } 80 | param { 81 | lr_mult: 0.0 82 | } 83 | } 84 | layer { 85 | name: "conv1_scale_h" 86 | type: "Scale" 87 | bottom: "conv1_h" 88 | top: "conv1_h" 89 | param { 90 | lr_mult: 1.0 91 | decay_mult: 1.0 92 | } 93 | param { 94 | lr_mult: 2.0 95 | decay_mult: 1.0 96 | } 97 | scale_param { 98 | bias_term: true 99 | } 100 | } 101 | layer { 102 | name: "conv1_relu" 103 | type: "ReLU" 104 | bottom: "conv1_h" 105 | top: "conv1_h" 106 | } 107 | layer { 108 | name: "conv1_pool" 109 | type: "Pooling" 110 | bottom: "conv1_h" 111 | top: "conv1_pool" 112 | pooling_param { 113 | kernel_size: 3 114 | stride: 2 115 | } 116 | } 117 | layer { 118 | name: "layer_64_1_conv1_h" 119 | type: "Convolution" 120 | bottom: "conv1_pool" 121 | top: "layer_64_1_conv1_h" 122 | param { 123 | lr_mult: 1.0 124 | decay_mult: 1.0 125 | } 126 | convolution_param { 127 | num_output: 32 128 | bias_term: false 129 | pad: 1 130 | kernel_size: 3 131 | stride: 1 132 | weight_filler { 133 | type: "msra" 134 | } 135 | bias_filler { 136 | type: "constant" 137 | value: 0.0 138 | } 139 | } 140 | } 141 | layer { 142 | name: "layer_64_1_bn2_h" 143 | type: "BatchNorm" 144 | bottom: "layer_64_1_conv1_h" 145 | top: "layer_64_1_conv1_h" 146 | param { 147 | lr_mult: 0.0 148 | } 149 | param { 150 | lr_mult: 0.0 151 | } 152 | param { 153 | lr_mult: 0.0 154 | } 155 | } 156 | layer { 157 | name: "layer_64_1_scale2_h" 158 | type: "Scale" 159 | bottom: "layer_64_1_conv1_h" 160 | top: "layer_64_1_conv1_h" 161 | param { 162 | lr_mult: 1.0 163 | decay_mult: 1.0 164 | } 165 | param { 166 | lr_mult: 2.0 167 | decay_mult: 1.0 168 | } 169 | scale_param { 170 | bias_term: true 171 | } 172 | } 173 | layer { 174 | name: "layer_64_1_relu2" 175 | type: "ReLU" 176 | bottom: "layer_64_1_conv1_h" 177 | top: "layer_64_1_conv1_h" 178 | } 179 | layer { 180 | name: "layer_64_1_conv2_h" 181 | type: "Convolution" 182 | bottom: "layer_64_1_conv1_h" 183 | top: "layer_64_1_conv2_h" 184 | param { 185 | lr_mult: 1.0 186 | decay_mult: 1.0 187 | } 188 | convolution_param { 189 | num_output: 32 190 | bias_term: false 191 | pad: 1 192 | kernel_size: 3 193 | stride: 1 194 | weight_filler { 195 | type: "msra" 196 | } 197 | bias_filler { 198 | type: "constant" 199 | value: 0.0 200 | } 201 | } 202 | } 203 | layer { 204 | name: "layer_64_1_sum" 205 | type: "Eltwise" 206 | bottom: "layer_64_1_conv2_h" 207 | bottom: "conv1_pool" 208 | top: "layer_64_1_sum" 209 | } 210 | layer { 211 | name: "layer_128_1_bn1_h" 212 | type: "BatchNorm" 213 | bottom: "layer_64_1_sum" 214 | top: "layer_128_1_bn1_h" 215 | param { 216 | lr_mult: 0.0 217 | } 218 | param { 219 | lr_mult: 0.0 220 | } 221 | param { 222 | lr_mult: 0.0 223 | } 224 | } 225 | layer { 226 | name: "layer_128_1_scale1_h" 227 | type: "Scale" 228 | bottom: "layer_128_1_bn1_h" 229 | top: "layer_128_1_bn1_h" 230 | param { 231 | lr_mult: 1.0 232 | decay_mult: 1.0 233 | } 234 | param { 235 | lr_mult: 2.0 236 | decay_mult: 1.0 237 | } 238 | scale_param { 239 | bias_term: true 240 | } 241 | } 242 | layer { 243 | name: "layer_128_1_relu1" 244 | type: "ReLU" 245 | bottom: "layer_128_1_bn1_h" 246 | top: "layer_128_1_bn1_h" 247 | } 248 | layer { 249 | name: "layer_128_1_conv1_h" 250 | type: "Convolution" 251 | bottom: "layer_128_1_bn1_h" 252 | top: "layer_128_1_conv1_h" 253 | param { 254 | lr_mult: 1.0 255 | decay_mult: 1.0 256 | } 257 | convolution_param { 258 | num_output: 64 259 | bias_term: false 260 | pad: 1 261 | kernel_size: 3 262 | stride: 2 263 | weight_filler { 264 | type: "msra" 265 | } 266 | bias_filler { 267 | type: "constant" 268 | value: 0.0 269 | } 270 | } 271 | } 272 | layer { 273 | name: "layer_128_1_bn2" 274 | type: "BatchNorm" 275 | bottom: "layer_128_1_conv1_h" 276 | top: "layer_128_1_conv1_h" 277 | param { 278 | lr_mult: 0.0 279 | } 280 | param { 281 | lr_mult: 0.0 282 | } 283 | param { 284 | lr_mult: 0.0 285 | } 286 | } 287 | layer { 288 | name: "layer_128_1_scale2" 289 | type: "Scale" 290 | bottom: "layer_128_1_conv1_h" 291 | top: "layer_128_1_conv1_h" 292 | param { 293 | lr_mult: 1.0 294 | decay_mult: 1.0 295 | } 296 | param { 297 | lr_mult: 2.0 298 | decay_mult: 1.0 299 | } 300 | scale_param { 301 | bias_term: true 302 | } 303 | } 304 | layer { 305 | name: "layer_128_1_relu2" 306 | type: "ReLU" 307 | bottom: "layer_128_1_conv1_h" 308 | top: "layer_128_1_conv1_h" 309 | } 310 | layer { 311 | name: "layer_128_1_conv2" 312 | type: "Convolution" 313 | bottom: "layer_128_1_conv1_h" 314 | top: "layer_128_1_conv2" 315 | param { 316 | lr_mult: 1.0 317 | decay_mult: 1.0 318 | } 319 | convolution_param { 320 | num_output: 64 321 | bias_term: false 322 | pad: 1 323 | kernel_size: 3 324 | stride: 1 325 | weight_filler { 326 | type: "msra" 327 | } 328 | bias_filler { 329 | type: "constant" 330 | value: 0.0 331 | } 332 | } 333 | } 334 | layer { 335 | name: "layer_128_1_conv_expand_h" 336 | type: "Convolution" 337 | bottom: "layer_128_1_bn1_h" 338 | top: "layer_128_1_conv_expand_h" 339 | param { 340 | lr_mult: 1.0 341 | decay_mult: 1.0 342 | } 343 | convolution_param { 344 | num_output: 64 345 | bias_term: false 346 | pad: 0 347 | kernel_size: 1 348 | stride: 2 349 | weight_filler { 350 | type: "msra" 351 | } 352 | bias_filler { 353 | type: "constant" 354 | value: 0.0 355 | } 356 | } 357 | } 358 | layer { 359 | name: "layer_128_1_sum" 360 | type: "Eltwise" 361 | bottom: "layer_128_1_conv2" 362 | bottom: "layer_128_1_conv_expand_h" 363 | top: "layer_128_1_sum" 364 | } 365 | layer { 366 | name: "layer_256_1_bn1" 367 | type: "BatchNorm" 368 | bottom: "layer_128_1_sum" 369 | top: "layer_256_1_bn1" 370 | param { 371 | lr_mult: 0.0 372 | } 373 | param { 374 | lr_mult: 0.0 375 | } 376 | param { 377 | lr_mult: 0.0 378 | } 379 | } 380 | layer { 381 | name: "layer_256_1_scale1" 382 | type: "Scale" 383 | bottom: "layer_256_1_bn1" 384 | top: "layer_256_1_bn1" 385 | param { 386 | lr_mult: 1.0 387 | decay_mult: 1.0 388 | } 389 | param { 390 | lr_mult: 2.0 391 | decay_mult: 1.0 392 | } 393 | scale_param { 394 | bias_term: true 395 | } 396 | } 397 | layer { 398 | name: "layer_256_1_relu1" 399 | type: "ReLU" 400 | bottom: "layer_256_1_bn1" 401 | top: "layer_256_1_bn1" 402 | } 403 | layer { 404 | name: "layer_256_1_conv1" 405 | type: "Convolution" 406 | bottom: "layer_256_1_bn1" 407 | top: "layer_256_1_conv1" 408 | param { 409 | lr_mult: 1.0 410 | decay_mult: 1.0 411 | } 412 | convolution_param { 413 | num_output: 128 414 | bias_term: false 415 | pad: 1 416 | kernel_size: 3 417 | stride: 2 418 | weight_filler { 419 | type: "msra" 420 | } 421 | bias_filler { 422 | type: "constant" 423 | value: 0.0 424 | } 425 | } 426 | } 427 | layer { 428 | name: "layer_256_1_bn2" 429 | type: "BatchNorm" 430 | bottom: "layer_256_1_conv1" 431 | top: "layer_256_1_conv1" 432 | param { 433 | lr_mult: 0.0 434 | } 435 | param { 436 | lr_mult: 0.0 437 | } 438 | param { 439 | lr_mult: 0.0 440 | } 441 | } 442 | layer { 443 | name: "layer_256_1_scale2" 444 | type: "Scale" 445 | bottom: "layer_256_1_conv1" 446 | top: "layer_256_1_conv1" 447 | param { 448 | lr_mult: 1.0 449 | decay_mult: 1.0 450 | } 451 | param { 452 | lr_mult: 2.0 453 | decay_mult: 1.0 454 | } 455 | scale_param { 456 | bias_term: true 457 | } 458 | } 459 | layer { 460 | name: "layer_256_1_relu2" 461 | type: "ReLU" 462 | bottom: "layer_256_1_conv1" 463 | top: "layer_256_1_conv1" 464 | } 465 | layer { 466 | name: "layer_256_1_conv2" 467 | type: "Convolution" 468 | bottom: "layer_256_1_conv1" 469 | top: "layer_256_1_conv2" 470 | param { 471 | lr_mult: 1.0 472 | decay_mult: 1.0 473 | } 474 | convolution_param { 475 | num_output: 128 476 | bias_term: false 477 | pad: 1 478 | kernel_size: 3 479 | stride: 1 480 | weight_filler { 481 | type: "msra" 482 | } 483 | bias_filler { 484 | type: "constant" 485 | value: 0.0 486 | } 487 | } 488 | } 489 | layer { 490 | name: "layer_256_1_conv_expand" 491 | type: "Convolution" 492 | bottom: "layer_256_1_bn1" 493 | top: "layer_256_1_conv_expand" 494 | param { 495 | lr_mult: 1.0 496 | decay_mult: 1.0 497 | } 498 | convolution_param { 499 | num_output: 128 500 | bias_term: false 501 | pad: 0 502 | kernel_size: 1 503 | stride: 2 504 | weight_filler { 505 | type: "msra" 506 | } 507 | bias_filler { 508 | type: "constant" 509 | value: 0.0 510 | } 511 | } 512 | } 513 | layer { 514 | name: "layer_256_1_sum" 515 | type: "Eltwise" 516 | bottom: "layer_256_1_conv2" 517 | bottom: "layer_256_1_conv_expand" 518 | top: "layer_256_1_sum" 519 | } 520 | layer { 521 | name: "layer_512_1_bn1" 522 | type: "BatchNorm" 523 | bottom: "layer_256_1_sum" 524 | top: "layer_512_1_bn1" 525 | param { 526 | lr_mult: 0.0 527 | } 528 | param { 529 | lr_mult: 0.0 530 | } 531 | param { 532 | lr_mult: 0.0 533 | } 534 | } 535 | layer { 536 | name: "layer_512_1_scale1" 537 | type: "Scale" 538 | bottom: "layer_512_1_bn1" 539 | top: "layer_512_1_bn1" 540 | param { 541 | lr_mult: 1.0 542 | decay_mult: 1.0 543 | } 544 | param { 545 | lr_mult: 2.0 546 | decay_mult: 1.0 547 | } 548 | scale_param { 549 | bias_term: true 550 | } 551 | } 552 | layer { 553 | name: "layer_512_1_relu1" 554 | type: "ReLU" 555 | bottom: "layer_512_1_bn1" 556 | top: "layer_512_1_bn1" 557 | } 558 | layer { 559 | name: "layer_512_1_conv1_h" 560 | type: "Convolution" 561 | bottom: "layer_512_1_bn1" 562 | top: "layer_512_1_conv1_h" 563 | param { 564 | lr_mult: 1.0 565 | decay_mult: 1.0 566 | } 567 | convolution_param { 568 | num_output: 64 569 | bias_term: false 570 | pad: 1 571 | kernel_size: 3 572 | stride: 1 # 2 573 | weight_filler { 574 | type: "msra" 575 | } 576 | bias_filler { 577 | type: "constant" 578 | value: 0.0 579 | } 580 | } 581 | } 582 | layer { 583 | name: "layer_512_1_bn2_h" 584 | type: "BatchNorm" 585 | bottom: "layer_512_1_conv1_h" 586 | top: "layer_512_1_conv1_h" 587 | param { 588 | lr_mult: 0.0 589 | } 590 | param { 591 | lr_mult: 0.0 592 | } 593 | param { 594 | lr_mult: 0.0 595 | } 596 | } 597 | layer { 598 | name: "layer_512_1_scale2_h" 599 | type: "Scale" 600 | bottom: "layer_512_1_conv1_h" 601 | top: "layer_512_1_conv1_h" 602 | param { 603 | lr_mult: 1.0 604 | decay_mult: 1.0 605 | } 606 | param { 607 | lr_mult: 2.0 608 | decay_mult: 1.0 609 | } 610 | scale_param { 611 | bias_term: true 612 | } 613 | } 614 | layer { 615 | name: "layer_512_1_relu2" 616 | type: "ReLU" 617 | bottom: "layer_512_1_conv1_h" 618 | top: "layer_512_1_conv1_h" 619 | } 620 | layer { 621 | name: "layer_512_1_conv2_h" 622 | type: "Convolution" 623 | bottom: "layer_512_1_conv1_h" 624 | top: "layer_512_1_conv2_h" 625 | param { 626 | lr_mult: 1.0 627 | decay_mult: 1.0 628 | } 629 | convolution_param { 630 | num_output: 128 631 | bias_term: false 632 | pad: 2 # 1 633 | kernel_size: 3 634 | stride: 1 635 | dilation: 2 636 | weight_filler { 637 | type: "msra" 638 | } 639 | bias_filler { 640 | type: "constant" 641 | value: 0.0 642 | } 643 | } 644 | } 645 | layer { 646 | name: "layer_512_1_conv_expand_h" 647 | type: "Convolution" 648 | bottom: "layer_512_1_bn1" 649 | top: "layer_512_1_conv_expand_h" 650 | param { 651 | lr_mult: 1.0 652 | decay_mult: 1.0 653 | } 654 | convolution_param { 655 | num_output: 128 656 | bias_term: false 657 | pad: 0 658 | kernel_size: 1 659 | stride: 1 # 2 660 | weight_filler { 661 | type: "msra" 662 | } 663 | bias_filler { 664 | type: "constant" 665 | value: 0.0 666 | } 667 | } 668 | } 669 | layer { 670 | name: "layer_512_1_sum" 671 | type: "Eltwise" 672 | bottom: "layer_512_1_conv2_h" 673 | bottom: "layer_512_1_conv_expand_h" 674 | top: "layer_512_1_sum" 675 | } 676 | layer { 677 | name: "last_bn_h" 678 | type: "BatchNorm" 679 | bottom: "layer_512_1_sum" 680 | top: "layer_512_1_sum" 681 | param { 682 | lr_mult: 0.0 683 | } 684 | param { 685 | lr_mult: 0.0 686 | } 687 | param { 688 | lr_mult: 0.0 689 | } 690 | } 691 | layer { 692 | name: "last_scale_h" 693 | type: "Scale" 694 | bottom: "layer_512_1_sum" 695 | top: "layer_512_1_sum" 696 | param { 697 | lr_mult: 1.0 698 | decay_mult: 1.0 699 | } 700 | param { 701 | lr_mult: 2.0 702 | decay_mult: 1.0 703 | } 704 | scale_param { 705 | bias_term: true 706 | } 707 | } 708 | layer { 709 | name: "last_relu" 710 | type: "ReLU" 711 | bottom: "layer_512_1_sum" 712 | top: "fc7" 713 | } 714 | 715 | layer { 716 | name: "conv6_1_h" 717 | type: "Convolution" 718 | bottom: "fc7" 719 | top: "conv6_1_h" 720 | param { 721 | lr_mult: 1 722 | decay_mult: 1 723 | } 724 | param { 725 | lr_mult: 2 726 | decay_mult: 0 727 | } 728 | convolution_param { 729 | num_output: 64 730 | pad: 0 731 | kernel_size: 1 732 | stride: 1 733 | weight_filler { 734 | type: "xavier" 735 | } 736 | bias_filler { 737 | type: "constant" 738 | value: 0 739 | } 740 | } 741 | } 742 | layer { 743 | name: "conv6_1_relu" 744 | type: "ReLU" 745 | bottom: "conv6_1_h" 746 | top: "conv6_1_h" 747 | } 748 | layer { 749 | name: "conv6_2_h" 750 | type: "Convolution" 751 | bottom: "conv6_1_h" 752 | top: "conv6_2_h" 753 | param { 754 | lr_mult: 1 755 | decay_mult: 1 756 | } 757 | param { 758 | lr_mult: 2 759 | decay_mult: 0 760 | } 761 | convolution_param { 762 | num_output: 128 763 | pad: 1 764 | kernel_size: 3 765 | stride: 2 766 | weight_filler { 767 | type: "xavier" 768 | } 769 | bias_filler { 770 | type: "constant" 771 | value: 0 772 | } 773 | } 774 | } 775 | layer { 776 | name: "conv6_2_relu" 777 | type: "ReLU" 778 | bottom: "conv6_2_h" 779 | top: "conv6_2_h" 780 | } 781 | layer { 782 | name: "conv7_1_h" 783 | type: "Convolution" 784 | bottom: "conv6_2_h" 785 | top: "conv7_1_h" 786 | param { 787 | lr_mult: 1 788 | decay_mult: 1 789 | } 790 | param { 791 | lr_mult: 2 792 | decay_mult: 0 793 | } 794 | convolution_param { 795 | num_output: 32 796 | pad: 0 797 | kernel_size: 1 798 | stride: 1 799 | weight_filler { 800 | type: "xavier" 801 | } 802 | bias_filler { 803 | type: "constant" 804 | value: 0 805 | } 806 | } 807 | } 808 | layer { 809 | name: "conv7_1_relu" 810 | type: "ReLU" 811 | bottom: "conv7_1_h" 812 | top: "conv7_1_h" 813 | } 814 | layer { 815 | name: "conv7_2_h" 816 | type: "Convolution" 817 | bottom: "conv7_1_h" 818 | top: "conv7_2_h" 819 | param { 820 | lr_mult: 1 821 | decay_mult: 1 822 | } 823 | param { 824 | lr_mult: 2 825 | decay_mult: 0 826 | } 827 | convolution_param { 828 | num_output: 64 829 | pad: 1 830 | kernel_size: 3 831 | stride: 2 832 | weight_filler { 833 | type: "xavier" 834 | } 835 | bias_filler { 836 | type: "constant" 837 | value: 0 838 | } 839 | } 840 | } 841 | layer { 842 | name: "conv7_2_relu" 843 | type: "ReLU" 844 | bottom: "conv7_2_h" 845 | top: "conv7_2_h" 846 | } 847 | layer { 848 | name: "conv8_1_h" 849 | type: "Convolution" 850 | bottom: "conv7_2_h" 851 | top: "conv8_1_h" 852 | param { 853 | lr_mult: 1 854 | decay_mult: 1 855 | } 856 | param { 857 | lr_mult: 2 858 | decay_mult: 0 859 | } 860 | convolution_param { 861 | num_output: 32 862 | pad: 0 863 | kernel_size: 1 864 | stride: 1 865 | weight_filler { 866 | type: "xavier" 867 | } 868 | bias_filler { 869 | type: "constant" 870 | value: 0 871 | } 872 | } 873 | } 874 | layer { 875 | name: "conv8_1_relu" 876 | type: "ReLU" 877 | bottom: "conv8_1_h" 878 | top: "conv8_1_h" 879 | } 880 | layer { 881 | name: "conv8_2_h" 882 | type: "Convolution" 883 | bottom: "conv8_1_h" 884 | top: "conv8_2_h" 885 | param { 886 | lr_mult: 1 887 | decay_mult: 1 888 | } 889 | param { 890 | lr_mult: 2 891 | decay_mult: 0 892 | } 893 | convolution_param { 894 | num_output: 64 895 | pad: 1 896 | kernel_size: 3 897 | stride: 1 898 | weight_filler { 899 | type: "xavier" 900 | } 901 | bias_filler { 902 | type: "constant" 903 | value: 0 904 | } 905 | } 906 | } 907 | layer { 908 | name: "conv8_2_relu" 909 | type: "ReLU" 910 | bottom: "conv8_2_h" 911 | top: "conv8_2_h" 912 | } 913 | layer { 914 | name: "conv9_1_h" 915 | type: "Convolution" 916 | bottom: "conv8_2_h" 917 | top: "conv9_1_h" 918 | param { 919 | lr_mult: 1 920 | decay_mult: 1 921 | } 922 | param { 923 | lr_mult: 2 924 | decay_mult: 0 925 | } 926 | convolution_param { 927 | num_output: 32 928 | pad: 0 929 | kernel_size: 1 930 | stride: 1 931 | weight_filler { 932 | type: "xavier" 933 | } 934 | bias_filler { 935 | type: "constant" 936 | value: 0 937 | } 938 | } 939 | } 940 | layer { 941 | name: "conv9_1_relu" 942 | type: "ReLU" 943 | bottom: "conv9_1_h" 944 | top: "conv9_1_h" 945 | } 946 | layer { 947 | name: "conv9_2_h" 948 | type: "Convolution" 949 | bottom: "conv9_1_h" 950 | top: "conv9_2_h" 951 | param { 952 | lr_mult: 1 953 | decay_mult: 1 954 | } 955 | param { 956 | lr_mult: 2 957 | decay_mult: 0 958 | } 959 | convolution_param { 960 | num_output: 64 961 | pad: 1 962 | kernel_size: 3 963 | stride: 1 964 | weight_filler { 965 | type: "xavier" 966 | } 967 | bias_filler { 968 | type: "constant" 969 | value: 0 970 | } 971 | } 972 | } 973 | layer { 974 | name: "conv9_2_relu" 975 | type: "ReLU" 976 | bottom: "conv9_2_h" 977 | top: "conv9_2_h" 978 | } 979 | layer { 980 | name: "conv4_3_norm" 981 | type: "Normalize" 982 | bottom: "layer_256_1_bn1" 983 | top: "conv4_3_norm" 984 | norm_param { 985 | across_spatial: false 986 | scale_filler { 987 | type: "constant" 988 | value: 20 989 | } 990 | channel_shared: false 991 | } 992 | } 993 | layer { 994 | name: "conv4_3_norm_mbox_loc" 995 | type: "Convolution" 996 | bottom: "conv4_3_norm" 997 | top: "conv4_3_norm_mbox_loc" 998 | param { 999 | lr_mult: 1 1000 | decay_mult: 1 1001 | } 1002 | param { 1003 | lr_mult: 2 1004 | decay_mult: 0 1005 | } 1006 | convolution_param { 1007 | num_output: 16 1008 | pad: 1 1009 | kernel_size: 3 1010 | stride: 1 1011 | weight_filler { 1012 | type: "xavier" 1013 | } 1014 | bias_filler { 1015 | type: "constant" 1016 | value: 0 1017 | } 1018 | } 1019 | } 1020 | layer { 1021 | name: "conv4_3_norm_mbox_loc_perm" 1022 | type: "Permute" 1023 | bottom: "conv4_3_norm_mbox_loc" 1024 | top: "conv4_3_norm_mbox_loc_perm" 1025 | permute_param { 1026 | order: 0 1027 | order: 2 1028 | order: 3 1029 | order: 1 1030 | } 1031 | } 1032 | layer { 1033 | name: "conv4_3_norm_mbox_loc_flat" 1034 | type: "Flatten" 1035 | bottom: "conv4_3_norm_mbox_loc_perm" 1036 | top: "conv4_3_norm_mbox_loc_flat" 1037 | flatten_param { 1038 | axis: 1 1039 | } 1040 | } 1041 | layer { 1042 | name: "conv4_3_norm_mbox_conf" 1043 | type: "Convolution" 1044 | bottom: "conv4_3_norm" 1045 | top: "conv4_3_norm_mbox_conf" 1046 | param { 1047 | lr_mult: 1 1048 | decay_mult: 1 1049 | } 1050 | param { 1051 | lr_mult: 2 1052 | decay_mult: 0 1053 | } 1054 | convolution_param { 1055 | num_output: 8 # 84 1056 | pad: 1 1057 | kernel_size: 3 1058 | stride: 1 1059 | weight_filler { 1060 | type: "xavier" 1061 | } 1062 | bias_filler { 1063 | type: "constant" 1064 | value: 0 1065 | } 1066 | } 1067 | } 1068 | layer { 1069 | name: "conv4_3_norm_mbox_conf_perm" 1070 | type: "Permute" 1071 | bottom: "conv4_3_norm_mbox_conf" 1072 | top: "conv4_3_norm_mbox_conf_perm" 1073 | permute_param { 1074 | order: 0 1075 | order: 2 1076 | order: 3 1077 | order: 1 1078 | } 1079 | } 1080 | layer { 1081 | name: "conv4_3_norm_mbox_conf_flat" 1082 | type: "Flatten" 1083 | bottom: "conv4_3_norm_mbox_conf_perm" 1084 | top: "conv4_3_norm_mbox_conf_flat" 1085 | flatten_param { 1086 | axis: 1 1087 | } 1088 | } 1089 | layer { 1090 | name: "conv4_3_norm_mbox_priorbox" 1091 | type: "PriorBox" 1092 | bottom: "conv4_3_norm" 1093 | bottom: "data" 1094 | top: "conv4_3_norm_mbox_priorbox" 1095 | prior_box_param { 1096 | min_size: 30.0 1097 | max_size: 60.0 1098 | aspect_ratio: 2 1099 | flip: true 1100 | clip: false 1101 | variance: 0.1 1102 | variance: 0.1 1103 | variance: 0.2 1104 | variance: 0.2 1105 | step: 8 1106 | offset: 0.5 1107 | } 1108 | } 1109 | layer { 1110 | name: "fc7_mbox_loc" 1111 | type: "Convolution" 1112 | bottom: "fc7" 1113 | top: "fc7_mbox_loc" 1114 | param { 1115 | lr_mult: 1 1116 | decay_mult: 1 1117 | } 1118 | param { 1119 | lr_mult: 2 1120 | decay_mult: 0 1121 | } 1122 | convolution_param { 1123 | num_output: 24 1124 | pad: 1 1125 | kernel_size: 3 1126 | stride: 1 1127 | weight_filler { 1128 | type: "xavier" 1129 | } 1130 | bias_filler { 1131 | type: "constant" 1132 | value: 0 1133 | } 1134 | } 1135 | } 1136 | layer { 1137 | name: "fc7_mbox_loc_perm" 1138 | type: "Permute" 1139 | bottom: "fc7_mbox_loc" 1140 | top: "fc7_mbox_loc_perm" 1141 | permute_param { 1142 | order: 0 1143 | order: 2 1144 | order: 3 1145 | order: 1 1146 | } 1147 | } 1148 | layer { 1149 | name: "fc7_mbox_loc_flat" 1150 | type: "Flatten" 1151 | bottom: "fc7_mbox_loc_perm" 1152 | top: "fc7_mbox_loc_flat" 1153 | flatten_param { 1154 | axis: 1 1155 | } 1156 | } 1157 | layer { 1158 | name: "fc7_mbox_conf" 1159 | type: "Convolution" 1160 | bottom: "fc7" 1161 | top: "fc7_mbox_conf" 1162 | param { 1163 | lr_mult: 1 1164 | decay_mult: 1 1165 | } 1166 | param { 1167 | lr_mult: 2 1168 | decay_mult: 0 1169 | } 1170 | convolution_param { 1171 | num_output: 12 # 126 1172 | pad: 1 1173 | kernel_size: 3 1174 | stride: 1 1175 | weight_filler { 1176 | type: "xavier" 1177 | } 1178 | bias_filler { 1179 | type: "constant" 1180 | value: 0 1181 | } 1182 | } 1183 | } 1184 | layer { 1185 | name: "fc7_mbox_conf_perm" 1186 | type: "Permute" 1187 | bottom: "fc7_mbox_conf" 1188 | top: "fc7_mbox_conf_perm" 1189 | permute_param { 1190 | order: 0 1191 | order: 2 1192 | order: 3 1193 | order: 1 1194 | } 1195 | } 1196 | layer { 1197 | name: "fc7_mbox_conf_flat" 1198 | type: "Flatten" 1199 | bottom: "fc7_mbox_conf_perm" 1200 | top: "fc7_mbox_conf_flat" 1201 | flatten_param { 1202 | axis: 1 1203 | } 1204 | } 1205 | layer { 1206 | name: "fc7_mbox_priorbox" 1207 | type: "PriorBox" 1208 | bottom: "fc7" 1209 | bottom: "data" 1210 | top: "fc7_mbox_priorbox" 1211 | prior_box_param { 1212 | min_size: 60.0 1213 | max_size: 111.0 1214 | aspect_ratio: 2 1215 | aspect_ratio: 3 1216 | flip: true 1217 | clip: false 1218 | variance: 0.1 1219 | variance: 0.1 1220 | variance: 0.2 1221 | variance: 0.2 1222 | step: 16 1223 | offset: 0.5 1224 | } 1225 | } 1226 | layer { 1227 | name: "conv6_2_mbox_loc" 1228 | type: "Convolution" 1229 | bottom: "conv6_2_h" 1230 | top: "conv6_2_mbox_loc" 1231 | param { 1232 | lr_mult: 1 1233 | decay_mult: 1 1234 | } 1235 | param { 1236 | lr_mult: 2 1237 | decay_mult: 0 1238 | } 1239 | convolution_param { 1240 | num_output: 24 1241 | pad: 1 1242 | kernel_size: 3 1243 | stride: 1 1244 | weight_filler { 1245 | type: "xavier" 1246 | } 1247 | bias_filler { 1248 | type: "constant" 1249 | value: 0 1250 | } 1251 | } 1252 | } 1253 | layer { 1254 | name: "conv6_2_mbox_loc_perm" 1255 | type: "Permute" 1256 | bottom: "conv6_2_mbox_loc" 1257 | top: "conv6_2_mbox_loc_perm" 1258 | permute_param { 1259 | order: 0 1260 | order: 2 1261 | order: 3 1262 | order: 1 1263 | } 1264 | } 1265 | layer { 1266 | name: "conv6_2_mbox_loc_flat" 1267 | type: "Flatten" 1268 | bottom: "conv6_2_mbox_loc_perm" 1269 | top: "conv6_2_mbox_loc_flat" 1270 | flatten_param { 1271 | axis: 1 1272 | } 1273 | } 1274 | layer { 1275 | name: "conv6_2_mbox_conf" 1276 | type: "Convolution" 1277 | bottom: "conv6_2_h" 1278 | top: "conv6_2_mbox_conf" 1279 | param { 1280 | lr_mult: 1 1281 | decay_mult: 1 1282 | } 1283 | param { 1284 | lr_mult: 2 1285 | decay_mult: 0 1286 | } 1287 | convolution_param { 1288 | num_output: 12 # 126 1289 | pad: 1 1290 | kernel_size: 3 1291 | stride: 1 1292 | weight_filler { 1293 | type: "xavier" 1294 | } 1295 | bias_filler { 1296 | type: "constant" 1297 | value: 0 1298 | } 1299 | } 1300 | } 1301 | layer { 1302 | name: "conv6_2_mbox_conf_perm" 1303 | type: "Permute" 1304 | bottom: "conv6_2_mbox_conf" 1305 | top: "conv6_2_mbox_conf_perm" 1306 | permute_param { 1307 | order: 0 1308 | order: 2 1309 | order: 3 1310 | order: 1 1311 | } 1312 | } 1313 | layer { 1314 | name: "conv6_2_mbox_conf_flat" 1315 | type: "Flatten" 1316 | bottom: "conv6_2_mbox_conf_perm" 1317 | top: "conv6_2_mbox_conf_flat" 1318 | flatten_param { 1319 | axis: 1 1320 | } 1321 | } 1322 | layer { 1323 | name: "conv6_2_mbox_priorbox" 1324 | type: "PriorBox" 1325 | bottom: "conv6_2_h" 1326 | bottom: "data" 1327 | top: "conv6_2_mbox_priorbox" 1328 | prior_box_param { 1329 | min_size: 111.0 1330 | max_size: 162.0 1331 | aspect_ratio: 2 1332 | aspect_ratio: 3 1333 | flip: true 1334 | clip: false 1335 | variance: 0.1 1336 | variance: 0.1 1337 | variance: 0.2 1338 | variance: 0.2 1339 | step: 32 1340 | offset: 0.5 1341 | } 1342 | } 1343 | layer { 1344 | name: "conv7_2_mbox_loc" 1345 | type: "Convolution" 1346 | bottom: "conv7_2_h" 1347 | top: "conv7_2_mbox_loc" 1348 | param { 1349 | lr_mult: 1 1350 | decay_mult: 1 1351 | } 1352 | param { 1353 | lr_mult: 2 1354 | decay_mult: 0 1355 | } 1356 | convolution_param { 1357 | num_output: 24 1358 | pad: 1 1359 | kernel_size: 3 1360 | stride: 1 1361 | weight_filler { 1362 | type: "xavier" 1363 | } 1364 | bias_filler { 1365 | type: "constant" 1366 | value: 0 1367 | } 1368 | } 1369 | } 1370 | layer { 1371 | name: "conv7_2_mbox_loc_perm" 1372 | type: "Permute" 1373 | bottom: "conv7_2_mbox_loc" 1374 | top: "conv7_2_mbox_loc_perm" 1375 | permute_param { 1376 | order: 0 1377 | order: 2 1378 | order: 3 1379 | order: 1 1380 | } 1381 | } 1382 | layer { 1383 | name: "conv7_2_mbox_loc_flat" 1384 | type: "Flatten" 1385 | bottom: "conv7_2_mbox_loc_perm" 1386 | top: "conv7_2_mbox_loc_flat" 1387 | flatten_param { 1388 | axis: 1 1389 | } 1390 | } 1391 | layer { 1392 | name: "conv7_2_mbox_conf" 1393 | type: "Convolution" 1394 | bottom: "conv7_2_h" 1395 | top: "conv7_2_mbox_conf" 1396 | param { 1397 | lr_mult: 1 1398 | decay_mult: 1 1399 | } 1400 | param { 1401 | lr_mult: 2 1402 | decay_mult: 0 1403 | } 1404 | convolution_param { 1405 | num_output: 12 # 126 1406 | pad: 1 1407 | kernel_size: 3 1408 | stride: 1 1409 | weight_filler { 1410 | type: "xavier" 1411 | } 1412 | bias_filler { 1413 | type: "constant" 1414 | value: 0 1415 | } 1416 | } 1417 | } 1418 | layer { 1419 | name: "conv7_2_mbox_conf_perm" 1420 | type: "Permute" 1421 | bottom: "conv7_2_mbox_conf" 1422 | top: "conv7_2_mbox_conf_perm" 1423 | permute_param { 1424 | order: 0 1425 | order: 2 1426 | order: 3 1427 | order: 1 1428 | } 1429 | } 1430 | layer { 1431 | name: "conv7_2_mbox_conf_flat" 1432 | type: "Flatten" 1433 | bottom: "conv7_2_mbox_conf_perm" 1434 | top: "conv7_2_mbox_conf_flat" 1435 | flatten_param { 1436 | axis: 1 1437 | } 1438 | } 1439 | layer { 1440 | name: "conv7_2_mbox_priorbox" 1441 | type: "PriorBox" 1442 | bottom: "conv7_2_h" 1443 | bottom: "data" 1444 | top: "conv7_2_mbox_priorbox" 1445 | prior_box_param { 1446 | min_size: 162.0 1447 | max_size: 213.0 1448 | aspect_ratio: 2 1449 | aspect_ratio: 3 1450 | flip: true 1451 | clip: false 1452 | variance: 0.1 1453 | variance: 0.1 1454 | variance: 0.2 1455 | variance: 0.2 1456 | step: 64 1457 | offset: 0.5 1458 | } 1459 | } 1460 | layer { 1461 | name: "conv8_2_mbox_loc" 1462 | type: "Convolution" 1463 | bottom: "conv8_2_h" 1464 | top: "conv8_2_mbox_loc" 1465 | param { 1466 | lr_mult: 1 1467 | decay_mult: 1 1468 | } 1469 | param { 1470 | lr_mult: 2 1471 | decay_mult: 0 1472 | } 1473 | convolution_param { 1474 | num_output: 16 1475 | pad: 1 1476 | kernel_size: 3 1477 | stride: 1 1478 | weight_filler { 1479 | type: "xavier" 1480 | } 1481 | bias_filler { 1482 | type: "constant" 1483 | value: 0 1484 | } 1485 | } 1486 | } 1487 | layer { 1488 | name: "conv8_2_mbox_loc_perm" 1489 | type: "Permute" 1490 | bottom: "conv8_2_mbox_loc" 1491 | top: "conv8_2_mbox_loc_perm" 1492 | permute_param { 1493 | order: 0 1494 | order: 2 1495 | order: 3 1496 | order: 1 1497 | } 1498 | } 1499 | layer { 1500 | name: "conv8_2_mbox_loc_flat" 1501 | type: "Flatten" 1502 | bottom: "conv8_2_mbox_loc_perm" 1503 | top: "conv8_2_mbox_loc_flat" 1504 | flatten_param { 1505 | axis: 1 1506 | } 1507 | } 1508 | layer { 1509 | name: "conv8_2_mbox_conf" 1510 | type: "Convolution" 1511 | bottom: "conv8_2_h" 1512 | top: "conv8_2_mbox_conf" 1513 | param { 1514 | lr_mult: 1 1515 | decay_mult: 1 1516 | } 1517 | param { 1518 | lr_mult: 2 1519 | decay_mult: 0 1520 | } 1521 | convolution_param { 1522 | num_output: 8 # 84 1523 | pad: 1 1524 | kernel_size: 3 1525 | stride: 1 1526 | weight_filler { 1527 | type: "xavier" 1528 | } 1529 | bias_filler { 1530 | type: "constant" 1531 | value: 0 1532 | } 1533 | } 1534 | } 1535 | layer { 1536 | name: "conv8_2_mbox_conf_perm" 1537 | type: "Permute" 1538 | bottom: "conv8_2_mbox_conf" 1539 | top: "conv8_2_mbox_conf_perm" 1540 | permute_param { 1541 | order: 0 1542 | order: 2 1543 | order: 3 1544 | order: 1 1545 | } 1546 | } 1547 | layer { 1548 | name: "conv8_2_mbox_conf_flat" 1549 | type: "Flatten" 1550 | bottom: "conv8_2_mbox_conf_perm" 1551 | top: "conv8_2_mbox_conf_flat" 1552 | flatten_param { 1553 | axis: 1 1554 | } 1555 | } 1556 | layer { 1557 | name: "conv8_2_mbox_priorbox" 1558 | type: "PriorBox" 1559 | bottom: "conv8_2_h" 1560 | bottom: "data" 1561 | top: "conv8_2_mbox_priorbox" 1562 | prior_box_param { 1563 | min_size: 213.0 1564 | max_size: 264.0 1565 | aspect_ratio: 2 1566 | flip: true 1567 | clip: false 1568 | variance: 0.1 1569 | variance: 0.1 1570 | variance: 0.2 1571 | variance: 0.2 1572 | step: 100 1573 | offset: 0.5 1574 | } 1575 | } 1576 | layer { 1577 | name: "conv9_2_mbox_loc" 1578 | type: "Convolution" 1579 | bottom: "conv9_2_h" 1580 | top: "conv9_2_mbox_loc" 1581 | param { 1582 | lr_mult: 1 1583 | decay_mult: 1 1584 | } 1585 | param { 1586 | lr_mult: 2 1587 | decay_mult: 0 1588 | } 1589 | convolution_param { 1590 | num_output: 16 1591 | pad: 1 1592 | kernel_size: 3 1593 | stride: 1 1594 | weight_filler { 1595 | type: "xavier" 1596 | } 1597 | bias_filler { 1598 | type: "constant" 1599 | value: 0 1600 | } 1601 | } 1602 | } 1603 | layer { 1604 | name: "conv9_2_mbox_loc_perm" 1605 | type: "Permute" 1606 | bottom: "conv9_2_mbox_loc" 1607 | top: "conv9_2_mbox_loc_perm" 1608 | permute_param { 1609 | order: 0 1610 | order: 2 1611 | order: 3 1612 | order: 1 1613 | } 1614 | } 1615 | layer { 1616 | name: "conv9_2_mbox_loc_flat" 1617 | type: "Flatten" 1618 | bottom: "conv9_2_mbox_loc_perm" 1619 | top: "conv9_2_mbox_loc_flat" 1620 | flatten_param { 1621 | axis: 1 1622 | } 1623 | } 1624 | layer { 1625 | name: "conv9_2_mbox_conf" 1626 | type: "Convolution" 1627 | bottom: "conv9_2_h" 1628 | top: "conv9_2_mbox_conf" 1629 | param { 1630 | lr_mult: 1 1631 | decay_mult: 1 1632 | } 1633 | param { 1634 | lr_mult: 2 1635 | decay_mult: 0 1636 | } 1637 | convolution_param { 1638 | num_output: 8 # 84 1639 | pad: 1 1640 | kernel_size: 3 1641 | stride: 1 1642 | weight_filler { 1643 | type: "xavier" 1644 | } 1645 | bias_filler { 1646 | type: "constant" 1647 | value: 0 1648 | } 1649 | } 1650 | } 1651 | layer { 1652 | name: "conv9_2_mbox_conf_perm" 1653 | type: "Permute" 1654 | bottom: "conv9_2_mbox_conf" 1655 | top: "conv9_2_mbox_conf_perm" 1656 | permute_param { 1657 | order: 0 1658 | order: 2 1659 | order: 3 1660 | order: 1 1661 | } 1662 | } 1663 | layer { 1664 | name: "conv9_2_mbox_conf_flat" 1665 | type: "Flatten" 1666 | bottom: "conv9_2_mbox_conf_perm" 1667 | top: "conv9_2_mbox_conf_flat" 1668 | flatten_param { 1669 | axis: 1 1670 | } 1671 | } 1672 | layer { 1673 | name: "conv9_2_mbox_priorbox" 1674 | type: "PriorBox" 1675 | bottom: "conv9_2_h" 1676 | bottom: "data" 1677 | top: "conv9_2_mbox_priorbox" 1678 | prior_box_param { 1679 | min_size: 264.0 1680 | max_size: 315.0 1681 | aspect_ratio: 2 1682 | flip: true 1683 | clip: false 1684 | variance: 0.1 1685 | variance: 0.1 1686 | variance: 0.2 1687 | variance: 0.2 1688 | step: 300 1689 | offset: 0.5 1690 | } 1691 | } 1692 | layer { 1693 | name: "mbox_loc" 1694 | type: "Concat" 1695 | bottom: "conv4_3_norm_mbox_loc_flat" 1696 | bottom: "fc7_mbox_loc_flat" 1697 | bottom: "conv6_2_mbox_loc_flat" 1698 | bottom: "conv7_2_mbox_loc_flat" 1699 | bottom: "conv8_2_mbox_loc_flat" 1700 | bottom: "conv9_2_mbox_loc_flat" 1701 | top: "mbox_loc" 1702 | concat_param { 1703 | axis: 1 1704 | } 1705 | } 1706 | layer { 1707 | name: "mbox_conf" 1708 | type: "Concat" 1709 | bottom: "conv4_3_norm_mbox_conf_flat" 1710 | bottom: "fc7_mbox_conf_flat" 1711 | bottom: "conv6_2_mbox_conf_flat" 1712 | bottom: "conv7_2_mbox_conf_flat" 1713 | bottom: "conv8_2_mbox_conf_flat" 1714 | bottom: "conv9_2_mbox_conf_flat" 1715 | top: "mbox_conf" 1716 | concat_param { 1717 | axis: 1 1718 | } 1719 | } 1720 | layer { 1721 | name: "mbox_priorbox" 1722 | type: "Concat" 1723 | bottom: "conv4_3_norm_mbox_priorbox" 1724 | bottom: "fc7_mbox_priorbox" 1725 | bottom: "conv6_2_mbox_priorbox" 1726 | bottom: "conv7_2_mbox_priorbox" 1727 | bottom: "conv8_2_mbox_priorbox" 1728 | bottom: "conv9_2_mbox_priorbox" 1729 | top: "mbox_priorbox" 1730 | concat_param { 1731 | axis: 2 1732 | } 1733 | } 1734 | 1735 | layer { 1736 | name: "mbox_conf_reshape" 1737 | type: "Reshape" 1738 | bottom: "mbox_conf" 1739 | top: "mbox_conf_reshape" 1740 | reshape_param { 1741 | shape { 1742 | dim: 0 1743 | dim: -1 1744 | dim: 2 1745 | } 1746 | } 1747 | } 1748 | layer { 1749 | name: "mbox_conf_softmax" 1750 | type: "Softmax" 1751 | bottom: "mbox_conf_reshape" 1752 | top: "mbox_conf_softmax" 1753 | softmax_param { 1754 | axis: 2 1755 | } 1756 | } 1757 | layer { 1758 | name: "mbox_conf_flatten" 1759 | type: "Flatten" 1760 | bottom: "mbox_conf_softmax" 1761 | top: "mbox_conf_flatten" 1762 | flatten_param { 1763 | axis: 1 1764 | } 1765 | } 1766 | 1767 | layer { 1768 | name: "detection_out" 1769 | type: "DetectionOutput" 1770 | bottom: "mbox_loc" 1771 | bottom: "mbox_conf_flatten" 1772 | bottom: "mbox_priorbox" 1773 | top: "detection_out" 1774 | include { 1775 | phase: TEST 1776 | } 1777 | detection_output_param { 1778 | num_classes: 2 1779 | share_location: true 1780 | background_label_id: 0 1781 | nms_param { 1782 | nms_threshold: 0.45 1783 | top_k: 400 1784 | } 1785 | code_type: CENTER_SIZE 1786 | keep_top_k: 200 1787 | confidence_threshold: 0.01 1788 | } 1789 | } 1790 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD-half/iris_ssd.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | using namespace cv; 8 | using namespace cv::dnn; 9 | 10 | #include 11 | #include 12 | using namespace std; 13 | 14 | const size_t inWidth = 300; 15 | const size_t inHeight = 300; 16 | const double inScaleFactor = 1.0; 17 | const Scalar meanVal(128); 18 | 19 | const char* about = "This sample uses Single-Shot Detector " 20 | "(https://arxiv.org/abs/1512.02325) " 21 | "with ResNet-10 architecture to detect faces on camera/video/image.\n" 22 | "More information about the training is available here: " 23 | "/samples/dnn/face_detector/how_to_train_face_detector.txt\n" 24 | ".caffemodel model's file is available here: " 25 | "/samples/dnn/face_detector/res10_300x300_ssd_iter_140000.caffemodel\n" 26 | ".prototxt file is available here: " 27 | "/samples/dnn/face_detector/deploy.prototxt\n"; 28 | 29 | //const char* params 30 | // = "{ help | false | print usage }" 31 | // "{ proto | deploy.prototxt | model configuration (deploy.prototxt) }" 32 | // "{ model | res10_300x300_ssd_iter_31000.caffemodel | model weights (res10_300x300_ssd_iter_140000.caffemodel) }" 33 | // "{ camera_device | 0 | camera device number }" 34 | // "{ video | | video or image for detection }" 35 | // "{ min_confidence | 0.5 | min confidence }"; 36 | 37 | const char* params 38 | = "{ help | false | print usage }" 39 | "{ proto | deploy.half.prototxt | model configuration (deploy.prototxt) }" 40 | "{ model | res10_300x300_ssd.half_iter_31000.caffemodel | model weights (res10_300x300_ssd_iter_140000.caffemodel) }" 41 | "{ camera_device | 0 | camera device number }" 42 | "{ video | | video or image for detection }" 43 | "{ min_confidence | 0.5 | min confidence }"; 44 | 45 | int main(int argc, char** argv) 46 | { 47 | CommandLineParser parser(argc, argv, params); 48 | 49 | if (parser.get("help")) 50 | { 51 | cout << about << endl; 52 | parser.printMessage(); 53 | return 0; 54 | } 55 | 56 | String modelConfiguration = parser.get("proto"); 57 | String modelBinary = parser.get("model"); 58 | 59 | //! [Initialize network] 60 | dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary); 61 | //! [Initialize network] 62 | 63 | if (net.empty()) 64 | { 65 | cerr << "Can't load network by using the following files: " << endl; 66 | cerr << "prototxt: " << modelConfiguration << endl; 67 | cerr << "caffemodel: " << modelBinary << endl; 68 | cerr << "Models are available here:" << endl; 69 | cerr << "/samples/dnn/face_detector" << endl; 70 | cerr << "or here:" << endl; 71 | cerr << "https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector" << endl; 72 | exit(-1); 73 | } 74 | 75 | // net.setPreferableBackend(DNN_BACKEND_HALIDE); 76 | // net.setPreferableTarget(DNN_TARGET_CPU); 77 | 78 | // VideoCapture cap; 79 | // if (parser.get("video").empty()) 80 | // { 81 | // int cameraDevice = parser.get("camera_device"); 82 | // cap = VideoCapture(cameraDevice); 83 | // if(!cap.isOpened()) 84 | // { 85 | // cout << "Couldn't find camera: " << cameraDevice << endl; 86 | // return -1; 87 | // } 88 | // } 89 | // else 90 | // { 91 | // cap.open(parser.get("video")); 92 | // if(!cap.isOpened()) 93 | // { 94 | // cout << "Couldn't open image or video: " << parser.get("video") << endl; 95 | // return -1; 96 | // } 97 | // } 98 | 99 | int cnt = 0; 100 | 101 | for(;;) 102 | { 103 | Mat image; 104 | // cap >> image; // get a new frame from camera/video or read image 105 | 106 | // if (image.empty()) 107 | // { 108 | // waitKey(); 109 | // break; 110 | // } 111 | 112 | image = cv::imread("images/S2353L09.jpg", 1); 113 | 114 | // cv::resize(image, image, cv::Size(0, 0), 0.8, 0.8); 115 | 116 | cv::Mat image_result = image.clone(); 117 | 118 | cv::Mat gray; 119 | cv::cvtColor(image, gray, cv::COLOR_BGR2GRAY); 120 | 121 | int bt = cv::getTickCount(); 122 | 123 | //! [Prepare blob] 124 | //! image: 3 channels 125 | Mat inputBlob = blobFromImage(gray, inScaleFactor, 126 | Size(inWidth, inHeight), Scalar(128), false, false); //Convert Mat to batch of images 127 | //! [Prepare blob] 128 | 129 | //! [Set input blob] 130 | net.setInput(inputBlob, "data"); //set the network input 131 | //! [Set input blob] 132 | 133 | //! [Make forward pass] 134 | Mat detection = net.forward("detection_out"); //compute output 135 | //! [Make forward pass] 136 | 137 | vector layersTimings; 138 | double freq = getTickFrequency() / 1000; 139 | double time = net.getPerfProfile(layersTimings) / freq; 140 | 141 | Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr()); 142 | 143 | int et = cv::getTickCount(); 144 | int t = (et - bt) * 1000.0 / cv::getTickFrequency(); 145 | 146 | cout << t << " ms" << endl; 147 | 148 | ostringstream ss; 149 | ss << "FPS: " << 1000/time << " ; time: " << int(time) << " ms"; 150 | putText(image_result, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255)); 151 | 152 | float confidenceThreshold = parser.get("min_confidence"); 153 | for(int i = 0; i < detectionMat.rows; i++) 154 | { 155 | float confidence = detectionMat.at(i, 2); 156 | 157 | if(confidence > confidenceThreshold) 158 | { 159 | int xLeftBottom = static_cast(detectionMat.at(i, 3) * image.cols); 160 | int yLeftBottom = static_cast(detectionMat.at(i, 4) * image.rows); 161 | int xRightTop = static_cast(detectionMat.at(i, 5) * image.cols); 162 | int yRightTop = static_cast(detectionMat.at(i, 6) * image.rows); 163 | 164 | Rect object((int)xLeftBottom, (int)yLeftBottom, 165 | (int)(xRightTop - xLeftBottom), 166 | (int)(yRightTop - yLeftBottom)); 167 | 168 | rectangle(image_result, object, Scalar(0, 255, 0)); 169 | 170 | ss.str(""); 171 | ss << confidence; 172 | String conf(ss.str()); 173 | String label = "Iris: " + conf; 174 | int baseLine = 0; 175 | Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); 176 | rectangle(image_result, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height), 177 | Size(labelSize.width, labelSize.height + baseLine)), 178 | Scalar(255, 255, 255), CV_FILLED); 179 | putText(image_result, label, Point(xLeftBottom, yLeftBottom), 180 | FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0)); 181 | } 182 | } 183 | 184 | imshow("detections", image_result); 185 | int key = waitKey(1); 186 | if (key == 'q') 187 | break; 188 | if(key == 's') { 189 | imwrite("image.jpg", image_result); 190 | } 191 | 192 | } 193 | 194 | return 0; 195 | } // main 196 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD-half/iris_ssd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | import cv2 as cv 4 | try: 5 | import cv2 as cv 6 | except ImportError: 7 | raise ImportError('Can\'t find OpenCV Python module. If you\'ve built it from sources without installation, ' 8 | 'configure environemnt variable PYTHONPATH to "opencv_build_dir/lib" directory (with "python3" subdirectory if required)') 9 | 10 | from cv2 import dnn 11 | 12 | inWidth = 300 13 | inHeight = 300 14 | confThreshold = 0.5 15 | 16 | prototxt = 'deploy.half.prototxt' 17 | caffemodel = 'res10_300x300_ssd.half_iter_140000.caffemodel' 18 | 19 | if __name__ == '__main__': 20 | net = dnn.readNetFromCaffe(prototxt, caffemodel) 21 | while True: 22 | frame = cv.imread("../../images/S2353L09.jpg", 1) 23 | 24 | gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) 25 | 26 | cols = frame.shape[1] 27 | rows = frame.shape[0] 28 | 29 | net.setInput(dnn.blobFromImage(gray, 1.0, (inWidth, inHeight), (128), False, False)) 30 | detections = net.forward() 31 | 32 | # print(detections) 33 | 34 | perf_stats = net.getPerfProfile() 35 | 36 | infer_time = perf_stats[0] / cv.getTickFrequency() * 1000 37 | fps = 1000 / infer_time 38 | fps_time_str = 'fps = {0}, time = {1} ms'.format(int(fps), int(infer_time)) 39 | cv.putText(frame, fps_time_str, (50, 50), 40 | cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) 41 | 42 | for i in range(detections.shape[2]): 43 | confidence = detections[0, 0, i, 2] 44 | if confidence > confThreshold: 45 | xLeftBottom = int(detections[0, 0, i, 3] * cols) 46 | yLeftBottom = int(detections[0, 0, i, 4] * rows) 47 | xRightTop = int(detections[0, 0, i, 5] * cols) 48 | yRightTop = int(detections[0, 0, i, 6] * rows) 49 | 50 | cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), 51 | (0, 255, 0)) 52 | label = "iris: %.4f" % confidence 53 | labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) 54 | 55 | cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]), 56 | (xLeftBottom + labelSize[0], yLeftBottom + baseLine), 57 | (0, 0, 0), cv.FILLED) 58 | cv.putText(frame, label, (xLeftBottom, yLeftBottom), 59 | cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) 60 | 61 | if frame.shape[1] > 800 or frame.shape[0] > 800: 62 | frame = cv.resize(frame, dsize=(0,0), fx=0.5, fy=0.5) 63 | cv.imshow("detections", frame) 64 | if cv.waitKey(1) == int(ord('s')): 65 | cv.imwrite("result.bmp", frame) 66 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD-half/res10_300x300_ssd.half_iter_140000.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/Caffe-SSD-Models/ResNet10-SSD-half/res10_300x300_ssd.half_iter_140000.caffemodel -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD-half/result.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/Caffe-SSD-Models/ResNet10-SSD-half/result.bmp -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD-half/solver.half.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/ResNet10/train.prototxt" 2 | test_net: "models/ResNet10/test.prototxt" 3 | 4 | test_iter: 2312 5 | test_interval: 5000 6 | test_initialization: true 7 | 8 | base_lr: 0.01 9 | display: 10 10 | lr_policy: "multistep" 11 | max_iter: 140000 12 | stepvalue: 80000 13 | stepvalue: 120000 14 | gamma: 0.1 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | average_loss: 500 18 | iter_size: 1 19 | type: "SGD" 20 | 21 | solver_mode: GPU 22 | random_seed: 0 23 | debug_info: false 24 | snapshot: 1000 25 | snapshot_prefix: "models/ResNet10/snapshot/res10_300x300_ssd" 26 | 27 | eval_type: "detection" 28 | ap_version: "11point" 29 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD-half/test.half.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "data" 3 | type: "AnnotatedData" 4 | top: "data" 5 | top: "label" 6 | include { 7 | phase: TEST 8 | } 9 | transform_param { 10 | mean_value: 128 11 | resize_param { 12 | prob: 1 13 | resize_mode: WARP 14 | height: 300 15 | width: 300 16 | interp_mode: LINEAR 17 | } 18 | emit_constraint { 19 | emit_type: CENTER 20 | } 21 | } 22 | data_param { 23 | source: "examples/iris_dataset/iris_dataset_test_lmdb" 24 | batch_size: 8 25 | backend: LMDB 26 | } 27 | annotated_data_param { 28 | label_map_file: "labelmap.prototxt" 29 | } 30 | } 31 | 32 | layer { 33 | name: "data_bn" 34 | type: "BatchNorm" 35 | bottom: "data" 36 | top: "data_bn" 37 | param { 38 | lr_mult: 0.0 39 | } 40 | param { 41 | lr_mult: 0.0 42 | } 43 | param { 44 | lr_mult: 0.0 45 | } 46 | } 47 | layer { 48 | name: "data_scale" 49 | type: "Scale" 50 | bottom: "data_bn" 51 | top: "data_bn" 52 | param { 53 | lr_mult: 1.0 54 | decay_mult: 1.0 55 | } 56 | param { 57 | lr_mult: 2.0 58 | decay_mult: 1.0 59 | } 60 | scale_param { 61 | bias_term: true 62 | } 63 | } 64 | layer { 65 | name: "conv1_h" 66 | type: "Convolution" 67 | bottom: "data_bn" 68 | top: "conv1_h" 69 | param { 70 | lr_mult: 1.0 71 | decay_mult: 1.0 72 | } 73 | param { 74 | lr_mult: 2.0 75 | decay_mult: 1.0 76 | } 77 | convolution_param { 78 | num_output: 32 79 | pad: 3 80 | kernel_size: 7 81 | stride: 2 82 | weight_filler { 83 | type: "msra" 84 | variance_norm: FAN_OUT 85 | } 86 | bias_filler { 87 | type: "constant" 88 | value: 0.0 89 | } 90 | } 91 | } 92 | layer { 93 | name: "conv1_bn_h" 94 | type: "BatchNorm" 95 | bottom: "conv1_h" 96 | top: "conv1_h" 97 | param { 98 | lr_mult: 0.0 99 | } 100 | param { 101 | lr_mult: 0.0 102 | } 103 | param { 104 | lr_mult: 0.0 105 | } 106 | } 107 | layer { 108 | name: "conv1_scale_h" 109 | type: "Scale" 110 | bottom: "conv1_h" 111 | top: "conv1_h" 112 | param { 113 | lr_mult: 1.0 114 | decay_mult: 1.0 115 | } 116 | param { 117 | lr_mult: 2.0 118 | decay_mult: 1.0 119 | } 120 | scale_param { 121 | bias_term: true 122 | } 123 | } 124 | layer { 125 | name: "conv1_relu" 126 | type: "ReLU" 127 | bottom: "conv1_h" 128 | top: "conv1_h" 129 | } 130 | layer { 131 | name: "conv1_pool" 132 | type: "Pooling" 133 | bottom: "conv1_h" 134 | top: "conv1_pool" 135 | pooling_param { 136 | kernel_size: 3 137 | stride: 2 138 | } 139 | } 140 | layer { 141 | name: "layer_64_1_conv1_h" 142 | type: "Convolution" 143 | bottom: "conv1_pool" 144 | top: "layer_64_1_conv1_h" 145 | param { 146 | lr_mult: 1.0 147 | decay_mult: 1.0 148 | } 149 | convolution_param { 150 | num_output: 32 151 | bias_term: false 152 | pad: 1 153 | kernel_size: 3 154 | stride: 1 155 | weight_filler { 156 | type: "msra" 157 | } 158 | bias_filler { 159 | type: "constant" 160 | value: 0.0 161 | } 162 | } 163 | } 164 | layer { 165 | name: "layer_64_1_bn2_h" 166 | type: "BatchNorm" 167 | bottom: "layer_64_1_conv1_h" 168 | top: "layer_64_1_conv1_h" 169 | param { 170 | lr_mult: 0.0 171 | } 172 | param { 173 | lr_mult: 0.0 174 | } 175 | param { 176 | lr_mult: 0.0 177 | } 178 | } 179 | layer { 180 | name: "layer_64_1_scale2_h" 181 | type: "Scale" 182 | bottom: "layer_64_1_conv1_h" 183 | top: "layer_64_1_conv1_h" 184 | param { 185 | lr_mult: 1.0 186 | decay_mult: 1.0 187 | } 188 | param { 189 | lr_mult: 2.0 190 | decay_mult: 1.0 191 | } 192 | scale_param { 193 | bias_term: true 194 | } 195 | } 196 | layer { 197 | name: "layer_64_1_relu2" 198 | type: "ReLU" 199 | bottom: "layer_64_1_conv1_h" 200 | top: "layer_64_1_conv1_h" 201 | } 202 | layer { 203 | name: "layer_64_1_conv2_h" 204 | type: "Convolution" 205 | bottom: "layer_64_1_conv1_h" 206 | top: "layer_64_1_conv2_h" 207 | param { 208 | lr_mult: 1.0 209 | decay_mult: 1.0 210 | } 211 | convolution_param { 212 | num_output: 32 213 | bias_term: false 214 | pad: 1 215 | kernel_size: 3 216 | stride: 1 217 | weight_filler { 218 | type: "msra" 219 | } 220 | bias_filler { 221 | type: "constant" 222 | value: 0.0 223 | } 224 | } 225 | } 226 | layer { 227 | name: "layer_64_1_sum" 228 | type: "Eltwise" 229 | bottom: "layer_64_1_conv2_h" 230 | bottom: "conv1_pool" 231 | top: "layer_64_1_sum" 232 | } 233 | layer { 234 | name: "layer_128_1_bn1_h" 235 | type: "BatchNorm" 236 | bottom: "layer_64_1_sum" 237 | top: "layer_128_1_bn1_h" 238 | param { 239 | lr_mult: 0.0 240 | } 241 | param { 242 | lr_mult: 0.0 243 | } 244 | param { 245 | lr_mult: 0.0 246 | } 247 | } 248 | layer { 249 | name: "layer_128_1_scale1_h" 250 | type: "Scale" 251 | bottom: "layer_128_1_bn1_h" 252 | top: "layer_128_1_bn1_h" 253 | param { 254 | lr_mult: 1.0 255 | decay_mult: 1.0 256 | } 257 | param { 258 | lr_mult: 2.0 259 | decay_mult: 1.0 260 | } 261 | scale_param { 262 | bias_term: true 263 | } 264 | } 265 | layer { 266 | name: "layer_128_1_relu1" 267 | type: "ReLU" 268 | bottom: "layer_128_1_bn1_h" 269 | top: "layer_128_1_bn1_h" 270 | } 271 | layer { 272 | name: "layer_128_1_conv1_h" 273 | type: "Convolution" 274 | bottom: "layer_128_1_bn1_h" 275 | top: "layer_128_1_conv1_h" 276 | param { 277 | lr_mult: 1.0 278 | decay_mult: 1.0 279 | } 280 | convolution_param { 281 | num_output: 64 282 | bias_term: false 283 | pad: 1 284 | kernel_size: 3 285 | stride: 2 286 | weight_filler { 287 | type: "msra" 288 | } 289 | bias_filler { 290 | type: "constant" 291 | value: 0.0 292 | } 293 | } 294 | } 295 | layer { 296 | name: "layer_128_1_bn2" 297 | type: "BatchNorm" 298 | bottom: "layer_128_1_conv1_h" 299 | top: "layer_128_1_conv1_h" 300 | param { 301 | lr_mult: 0.0 302 | } 303 | param { 304 | lr_mult: 0.0 305 | } 306 | param { 307 | lr_mult: 0.0 308 | } 309 | } 310 | layer { 311 | name: "layer_128_1_scale2" 312 | type: "Scale" 313 | bottom: "layer_128_1_conv1_h" 314 | top: "layer_128_1_conv1_h" 315 | param { 316 | lr_mult: 1.0 317 | decay_mult: 1.0 318 | } 319 | param { 320 | lr_mult: 2.0 321 | decay_mult: 1.0 322 | } 323 | scale_param { 324 | bias_term: true 325 | } 326 | } 327 | layer { 328 | name: "layer_128_1_relu2" 329 | type: "ReLU" 330 | bottom: "layer_128_1_conv1_h" 331 | top: "layer_128_1_conv1_h" 332 | } 333 | layer { 334 | name: "layer_128_1_conv2" 335 | type: "Convolution" 336 | bottom: "layer_128_1_conv1_h" 337 | top: "layer_128_1_conv2" 338 | param { 339 | lr_mult: 1.0 340 | decay_mult: 1.0 341 | } 342 | convolution_param { 343 | num_output: 64 344 | bias_term: false 345 | pad: 1 346 | kernel_size: 3 347 | stride: 1 348 | weight_filler { 349 | type: "msra" 350 | } 351 | bias_filler { 352 | type: "constant" 353 | value: 0.0 354 | } 355 | } 356 | } 357 | layer { 358 | name: "layer_128_1_conv_expand_h" 359 | type: "Convolution" 360 | bottom: "layer_128_1_bn1_h" 361 | top: "layer_128_1_conv_expand_h" 362 | param { 363 | lr_mult: 1.0 364 | decay_mult: 1.0 365 | } 366 | convolution_param { 367 | num_output: 64 368 | bias_term: false 369 | pad: 0 370 | kernel_size: 1 371 | stride: 2 372 | weight_filler { 373 | type: "msra" 374 | } 375 | bias_filler { 376 | type: "constant" 377 | value: 0.0 378 | } 379 | } 380 | } 381 | layer { 382 | name: "layer_128_1_sum" 383 | type: "Eltwise" 384 | bottom: "layer_128_1_conv2" 385 | bottom: "layer_128_1_conv_expand_h" 386 | top: "layer_128_1_sum" 387 | } 388 | layer { 389 | name: "layer_256_1_bn1" 390 | type: "BatchNorm" 391 | bottom: "layer_128_1_sum" 392 | top: "layer_256_1_bn1" 393 | param { 394 | lr_mult: 0.0 395 | } 396 | param { 397 | lr_mult: 0.0 398 | } 399 | param { 400 | lr_mult: 0.0 401 | } 402 | } 403 | layer { 404 | name: "layer_256_1_scale1" 405 | type: "Scale" 406 | bottom: "layer_256_1_bn1" 407 | top: "layer_256_1_bn1" 408 | param { 409 | lr_mult: 1.0 410 | decay_mult: 1.0 411 | } 412 | param { 413 | lr_mult: 2.0 414 | decay_mult: 1.0 415 | } 416 | scale_param { 417 | bias_term: true 418 | } 419 | } 420 | layer { 421 | name: "layer_256_1_relu1" 422 | type: "ReLU" 423 | bottom: "layer_256_1_bn1" 424 | top: "layer_256_1_bn1" 425 | } 426 | layer { 427 | name: "layer_256_1_conv1" 428 | type: "Convolution" 429 | bottom: "layer_256_1_bn1" 430 | top: "layer_256_1_conv1" 431 | param { 432 | lr_mult: 1.0 433 | decay_mult: 1.0 434 | } 435 | convolution_param { 436 | num_output: 128 437 | bias_term: false 438 | pad: 1 439 | kernel_size: 3 440 | stride: 2 441 | weight_filler { 442 | type: "msra" 443 | } 444 | bias_filler { 445 | type: "constant" 446 | value: 0.0 447 | } 448 | } 449 | } 450 | layer { 451 | name: "layer_256_1_bn2" 452 | type: "BatchNorm" 453 | bottom: "layer_256_1_conv1" 454 | top: "layer_256_1_conv1" 455 | param { 456 | lr_mult: 0.0 457 | } 458 | param { 459 | lr_mult: 0.0 460 | } 461 | param { 462 | lr_mult: 0.0 463 | } 464 | } 465 | layer { 466 | name: "layer_256_1_scale2" 467 | type: "Scale" 468 | bottom: "layer_256_1_conv1" 469 | top: "layer_256_1_conv1" 470 | param { 471 | lr_mult: 1.0 472 | decay_mult: 1.0 473 | } 474 | param { 475 | lr_mult: 2.0 476 | decay_mult: 1.0 477 | } 478 | scale_param { 479 | bias_term: true 480 | } 481 | } 482 | layer { 483 | name: "layer_256_1_relu2" 484 | type: "ReLU" 485 | bottom: "layer_256_1_conv1" 486 | top: "layer_256_1_conv1" 487 | } 488 | layer { 489 | name: "layer_256_1_conv2" 490 | type: "Convolution" 491 | bottom: "layer_256_1_conv1" 492 | top: "layer_256_1_conv2" 493 | param { 494 | lr_mult: 1.0 495 | decay_mult: 1.0 496 | } 497 | convolution_param { 498 | num_output: 128 499 | bias_term: false 500 | pad: 1 501 | kernel_size: 3 502 | stride: 1 503 | weight_filler { 504 | type: "msra" 505 | } 506 | bias_filler { 507 | type: "constant" 508 | value: 0.0 509 | } 510 | } 511 | } 512 | layer { 513 | name: "layer_256_1_conv_expand" 514 | type: "Convolution" 515 | bottom: "layer_256_1_bn1" 516 | top: "layer_256_1_conv_expand" 517 | param { 518 | lr_mult: 1.0 519 | decay_mult: 1.0 520 | } 521 | convolution_param { 522 | num_output: 128 523 | bias_term: false 524 | pad: 0 525 | kernel_size: 1 526 | stride: 2 527 | weight_filler { 528 | type: "msra" 529 | } 530 | bias_filler { 531 | type: "constant" 532 | value: 0.0 533 | } 534 | } 535 | } 536 | layer { 537 | name: "layer_256_1_sum" 538 | type: "Eltwise" 539 | bottom: "layer_256_1_conv2" 540 | bottom: "layer_256_1_conv_expand" 541 | top: "layer_256_1_sum" 542 | } 543 | layer { 544 | name: "layer_512_1_bn1" 545 | type: "BatchNorm" 546 | bottom: "layer_256_1_sum" 547 | top: "layer_512_1_bn1" 548 | param { 549 | lr_mult: 0.0 550 | } 551 | param { 552 | lr_mult: 0.0 553 | } 554 | param { 555 | lr_mult: 0.0 556 | } 557 | } 558 | layer { 559 | name: "layer_512_1_scale1" 560 | type: "Scale" 561 | bottom: "layer_512_1_bn1" 562 | top: "layer_512_1_bn1" 563 | param { 564 | lr_mult: 1.0 565 | decay_mult: 1.0 566 | } 567 | param { 568 | lr_mult: 2.0 569 | decay_mult: 1.0 570 | } 571 | scale_param { 572 | bias_term: true 573 | } 574 | } 575 | layer { 576 | name: "layer_512_1_relu1" 577 | type: "ReLU" 578 | bottom: "layer_512_1_bn1" 579 | top: "layer_512_1_bn1" 580 | } 581 | layer { 582 | name: "layer_512_1_conv1_h" 583 | type: "Convolution" 584 | bottom: "layer_512_1_bn1" 585 | top: "layer_512_1_conv1_h" 586 | param { 587 | lr_mult: 1.0 588 | decay_mult: 1.0 589 | } 590 | convolution_param { 591 | num_output: 64 592 | bias_term: false 593 | pad: 1 594 | kernel_size: 3 595 | stride: 1 # 2 596 | weight_filler { 597 | type: "msra" 598 | } 599 | bias_filler { 600 | type: "constant" 601 | value: 0.0 602 | } 603 | } 604 | } 605 | layer { 606 | name: "layer_512_1_bn2_h" 607 | type: "BatchNorm" 608 | bottom: "layer_512_1_conv1_h" 609 | top: "layer_512_1_conv1_h" 610 | param { 611 | lr_mult: 0.0 612 | } 613 | param { 614 | lr_mult: 0.0 615 | } 616 | param { 617 | lr_mult: 0.0 618 | } 619 | } 620 | layer { 621 | name: "layer_512_1_scale2_h" 622 | type: "Scale" 623 | bottom: "layer_512_1_conv1_h" 624 | top: "layer_512_1_conv1_h" 625 | param { 626 | lr_mult: 1.0 627 | decay_mult: 1.0 628 | } 629 | param { 630 | lr_mult: 2.0 631 | decay_mult: 1.0 632 | } 633 | scale_param { 634 | bias_term: true 635 | } 636 | } 637 | layer { 638 | name: "layer_512_1_relu2" 639 | type: "ReLU" 640 | bottom: "layer_512_1_conv1_h" 641 | top: "layer_512_1_conv1_h" 642 | } 643 | layer { 644 | name: "layer_512_1_conv2_h" 645 | type: "Convolution" 646 | bottom: "layer_512_1_conv1_h" 647 | top: "layer_512_1_conv2_h" 648 | param { 649 | lr_mult: 1.0 650 | decay_mult: 1.0 651 | } 652 | convolution_param { 653 | num_output: 128 654 | bias_term: false 655 | pad: 2 # 1 656 | kernel_size: 3 657 | stride: 1 658 | dilation: 2 659 | weight_filler { 660 | type: "msra" 661 | } 662 | bias_filler { 663 | type: "constant" 664 | value: 0.0 665 | } 666 | } 667 | } 668 | layer { 669 | name: "layer_512_1_conv_expand_h" 670 | type: "Convolution" 671 | bottom: "layer_512_1_bn1" 672 | top: "layer_512_1_conv_expand_h" 673 | param { 674 | lr_mult: 1.0 675 | decay_mult: 1.0 676 | } 677 | convolution_param { 678 | num_output: 128 679 | bias_term: false 680 | pad: 0 681 | kernel_size: 1 682 | stride: 1 # 2 683 | weight_filler { 684 | type: "msra" 685 | } 686 | bias_filler { 687 | type: "constant" 688 | value: 0.0 689 | } 690 | } 691 | } 692 | layer { 693 | name: "layer_512_1_sum" 694 | type: "Eltwise" 695 | bottom: "layer_512_1_conv2_h" 696 | bottom: "layer_512_1_conv_expand_h" 697 | top: "layer_512_1_sum" 698 | } 699 | layer { 700 | name: "last_bn_h" 701 | type: "BatchNorm" 702 | bottom: "layer_512_1_sum" 703 | top: "layer_512_1_sum" 704 | param { 705 | lr_mult: 0.0 706 | } 707 | param { 708 | lr_mult: 0.0 709 | } 710 | param { 711 | lr_mult: 0.0 712 | } 713 | } 714 | layer { 715 | name: "last_scale_h" 716 | type: "Scale" 717 | bottom: "layer_512_1_sum" 718 | top: "layer_512_1_sum" 719 | param { 720 | lr_mult: 1.0 721 | decay_mult: 1.0 722 | } 723 | param { 724 | lr_mult: 2.0 725 | decay_mult: 1.0 726 | } 727 | scale_param { 728 | bias_term: true 729 | } 730 | } 731 | layer { 732 | name: "last_relu" 733 | type: "ReLU" 734 | bottom: "layer_512_1_sum" 735 | top: "fc7" 736 | } 737 | 738 | layer { 739 | name: "conv6_1_h" 740 | type: "Convolution" 741 | bottom: "fc7" 742 | top: "conv6_1_h" 743 | param { 744 | lr_mult: 1 745 | decay_mult: 1 746 | } 747 | param { 748 | lr_mult: 2 749 | decay_mult: 0 750 | } 751 | convolution_param { 752 | num_output: 64 753 | pad: 0 754 | kernel_size: 1 755 | stride: 1 756 | weight_filler { 757 | type: "xavier" 758 | } 759 | bias_filler { 760 | type: "constant" 761 | value: 0 762 | } 763 | } 764 | } 765 | layer { 766 | name: "conv6_1_relu" 767 | type: "ReLU" 768 | bottom: "conv6_1_h" 769 | top: "conv6_1_h" 770 | } 771 | layer { 772 | name: "conv6_2_h" 773 | type: "Convolution" 774 | bottom: "conv6_1_h" 775 | top: "conv6_2_h" 776 | param { 777 | lr_mult: 1 778 | decay_mult: 1 779 | } 780 | param { 781 | lr_mult: 2 782 | decay_mult: 0 783 | } 784 | convolution_param { 785 | num_output: 128 786 | pad: 1 787 | kernel_size: 3 788 | stride: 2 789 | weight_filler { 790 | type: "xavier" 791 | } 792 | bias_filler { 793 | type: "constant" 794 | value: 0 795 | } 796 | } 797 | } 798 | layer { 799 | name: "conv6_2_relu" 800 | type: "ReLU" 801 | bottom: "conv6_2_h" 802 | top: "conv6_2_h" 803 | } 804 | layer { 805 | name: "conv7_1_h" 806 | type: "Convolution" 807 | bottom: "conv6_2_h" 808 | top: "conv7_1_h" 809 | param { 810 | lr_mult: 1 811 | decay_mult: 1 812 | } 813 | param { 814 | lr_mult: 2 815 | decay_mult: 0 816 | } 817 | convolution_param { 818 | num_output: 32 819 | pad: 0 820 | kernel_size: 1 821 | stride: 1 822 | weight_filler { 823 | type: "xavier" 824 | } 825 | bias_filler { 826 | type: "constant" 827 | value: 0 828 | } 829 | } 830 | } 831 | layer { 832 | name: "conv7_1_relu" 833 | type: "ReLU" 834 | bottom: "conv7_1_h" 835 | top: "conv7_1_h" 836 | } 837 | layer { 838 | name: "conv7_2_h" 839 | type: "Convolution" 840 | bottom: "conv7_1_h" 841 | top: "conv7_2_h" 842 | param { 843 | lr_mult: 1 844 | decay_mult: 1 845 | } 846 | param { 847 | lr_mult: 2 848 | decay_mult: 0 849 | } 850 | convolution_param { 851 | num_output: 64 852 | pad: 1 853 | kernel_size: 3 854 | stride: 2 855 | weight_filler { 856 | type: "xavier" 857 | } 858 | bias_filler { 859 | type: "constant" 860 | value: 0 861 | } 862 | } 863 | } 864 | layer { 865 | name: "conv7_2_relu" 866 | type: "ReLU" 867 | bottom: "conv7_2_h" 868 | top: "conv7_2_h" 869 | } 870 | layer { 871 | name: "conv8_1_h" 872 | type: "Convolution" 873 | bottom: "conv7_2_h" 874 | top: "conv8_1_h" 875 | param { 876 | lr_mult: 1 877 | decay_mult: 1 878 | } 879 | param { 880 | lr_mult: 2 881 | decay_mult: 0 882 | } 883 | convolution_param { 884 | num_output: 32 885 | pad: 0 886 | kernel_size: 1 887 | stride: 1 888 | weight_filler { 889 | type: "xavier" 890 | } 891 | bias_filler { 892 | type: "constant" 893 | value: 0 894 | } 895 | } 896 | } 897 | layer { 898 | name: "conv8_1_relu" 899 | type: "ReLU" 900 | bottom: "conv8_1_h" 901 | top: "conv8_1_h" 902 | } 903 | layer { 904 | name: "conv8_2_h" 905 | type: "Convolution" 906 | bottom: "conv8_1_h" 907 | top: "conv8_2_h" 908 | param { 909 | lr_mult: 1 910 | decay_mult: 1 911 | } 912 | param { 913 | lr_mult: 2 914 | decay_mult: 0 915 | } 916 | convolution_param { 917 | num_output: 64 918 | pad: 1 919 | kernel_size: 3 920 | stride: 1 921 | weight_filler { 922 | type: "xavier" 923 | } 924 | bias_filler { 925 | type: "constant" 926 | value: 0 927 | } 928 | } 929 | } 930 | layer { 931 | name: "conv8_2_relu" 932 | type: "ReLU" 933 | bottom: "conv8_2_h" 934 | top: "conv8_2_h" 935 | } 936 | layer { 937 | name: "conv9_1_h" 938 | type: "Convolution" 939 | bottom: "conv8_2_h" 940 | top: "conv9_1_h" 941 | param { 942 | lr_mult: 1 943 | decay_mult: 1 944 | } 945 | param { 946 | lr_mult: 2 947 | decay_mult: 0 948 | } 949 | convolution_param { 950 | num_output: 32 951 | pad: 0 952 | kernel_size: 1 953 | stride: 1 954 | weight_filler { 955 | type: "xavier" 956 | } 957 | bias_filler { 958 | type: "constant" 959 | value: 0 960 | } 961 | } 962 | } 963 | layer { 964 | name: "conv9_1_relu" 965 | type: "ReLU" 966 | bottom: "conv9_1_h" 967 | top: "conv9_1_h" 968 | } 969 | layer { 970 | name: "conv9_2_h" 971 | type: "Convolution" 972 | bottom: "conv9_1_h" 973 | top: "conv9_2_h" 974 | param { 975 | lr_mult: 1 976 | decay_mult: 1 977 | } 978 | param { 979 | lr_mult: 2 980 | decay_mult: 0 981 | } 982 | convolution_param { 983 | num_output: 64 984 | pad: 1 985 | kernel_size: 3 986 | stride: 1 987 | weight_filler { 988 | type: "xavier" 989 | } 990 | bias_filler { 991 | type: "constant" 992 | value: 0 993 | } 994 | } 995 | } 996 | layer { 997 | name: "conv9_2_relu" 998 | type: "ReLU" 999 | bottom: "conv9_2_h" 1000 | top: "conv9_2_h" 1001 | } 1002 | layer { 1003 | name: "conv4_3_norm" 1004 | type: "Normalize" 1005 | bottom: "layer_256_1_bn1" 1006 | top: "conv4_3_norm" 1007 | norm_param { 1008 | across_spatial: false 1009 | scale_filler { 1010 | type: "constant" 1011 | value: 20 1012 | } 1013 | channel_shared: false 1014 | } 1015 | } 1016 | layer { 1017 | name: "conv4_3_norm_mbox_loc" 1018 | type: "Convolution" 1019 | bottom: "conv4_3_norm" 1020 | top: "conv4_3_norm_mbox_loc" 1021 | param { 1022 | lr_mult: 1 1023 | decay_mult: 1 1024 | } 1025 | param { 1026 | lr_mult: 2 1027 | decay_mult: 0 1028 | } 1029 | convolution_param { 1030 | num_output: 16 1031 | pad: 1 1032 | kernel_size: 3 1033 | stride: 1 1034 | weight_filler { 1035 | type: "xavier" 1036 | } 1037 | bias_filler { 1038 | type: "constant" 1039 | value: 0 1040 | } 1041 | } 1042 | } 1043 | layer { 1044 | name: "conv4_3_norm_mbox_loc_perm" 1045 | type: "Permute" 1046 | bottom: "conv4_3_norm_mbox_loc" 1047 | top: "conv4_3_norm_mbox_loc_perm" 1048 | permute_param { 1049 | order: 0 1050 | order: 2 1051 | order: 3 1052 | order: 1 1053 | } 1054 | } 1055 | layer { 1056 | name: "conv4_3_norm_mbox_loc_flat" 1057 | type: "Flatten" 1058 | bottom: "conv4_3_norm_mbox_loc_perm" 1059 | top: "conv4_3_norm_mbox_loc_flat" 1060 | flatten_param { 1061 | axis: 1 1062 | } 1063 | } 1064 | layer { 1065 | name: "conv4_3_norm_mbox_conf" 1066 | type: "Convolution" 1067 | bottom: "conv4_3_norm" 1068 | top: "conv4_3_norm_mbox_conf" 1069 | param { 1070 | lr_mult: 1 1071 | decay_mult: 1 1072 | } 1073 | param { 1074 | lr_mult: 2 1075 | decay_mult: 0 1076 | } 1077 | convolution_param { 1078 | num_output: 8 # 84 1079 | pad: 1 1080 | kernel_size: 3 1081 | stride: 1 1082 | weight_filler { 1083 | type: "xavier" 1084 | } 1085 | bias_filler { 1086 | type: "constant" 1087 | value: 0 1088 | } 1089 | } 1090 | } 1091 | layer { 1092 | name: "conv4_3_norm_mbox_conf_perm" 1093 | type: "Permute" 1094 | bottom: "conv4_3_norm_mbox_conf" 1095 | top: "conv4_3_norm_mbox_conf_perm" 1096 | permute_param { 1097 | order: 0 1098 | order: 2 1099 | order: 3 1100 | order: 1 1101 | } 1102 | } 1103 | layer { 1104 | name: "conv4_3_norm_mbox_conf_flat" 1105 | type: "Flatten" 1106 | bottom: "conv4_3_norm_mbox_conf_perm" 1107 | top: "conv4_3_norm_mbox_conf_flat" 1108 | flatten_param { 1109 | axis: 1 1110 | } 1111 | } 1112 | layer { 1113 | name: "conv4_3_norm_mbox_priorbox" 1114 | type: "PriorBox" 1115 | bottom: "conv4_3_norm" 1116 | bottom: "data" 1117 | top: "conv4_3_norm_mbox_priorbox" 1118 | prior_box_param { 1119 | min_size: 30.0 1120 | max_size: 60.0 1121 | aspect_ratio: 2 1122 | flip: true 1123 | clip: false 1124 | variance: 0.1 1125 | variance: 0.1 1126 | variance: 0.2 1127 | variance: 0.2 1128 | step: 8 1129 | offset: 0.5 1130 | } 1131 | } 1132 | layer { 1133 | name: "fc7_mbox_loc" 1134 | type: "Convolution" 1135 | bottom: "fc7" 1136 | top: "fc7_mbox_loc" 1137 | param { 1138 | lr_mult: 1 1139 | decay_mult: 1 1140 | } 1141 | param { 1142 | lr_mult: 2 1143 | decay_mult: 0 1144 | } 1145 | convolution_param { 1146 | num_output: 24 1147 | pad: 1 1148 | kernel_size: 3 1149 | stride: 1 1150 | weight_filler { 1151 | type: "xavier" 1152 | } 1153 | bias_filler { 1154 | type: "constant" 1155 | value: 0 1156 | } 1157 | } 1158 | } 1159 | layer { 1160 | name: "fc7_mbox_loc_perm" 1161 | type: "Permute" 1162 | bottom: "fc7_mbox_loc" 1163 | top: "fc7_mbox_loc_perm" 1164 | permute_param { 1165 | order: 0 1166 | order: 2 1167 | order: 3 1168 | order: 1 1169 | } 1170 | } 1171 | layer { 1172 | name: "fc7_mbox_loc_flat" 1173 | type: "Flatten" 1174 | bottom: "fc7_mbox_loc_perm" 1175 | top: "fc7_mbox_loc_flat" 1176 | flatten_param { 1177 | axis: 1 1178 | } 1179 | } 1180 | layer { 1181 | name: "fc7_mbox_conf" 1182 | type: "Convolution" 1183 | bottom: "fc7" 1184 | top: "fc7_mbox_conf" 1185 | param { 1186 | lr_mult: 1 1187 | decay_mult: 1 1188 | } 1189 | param { 1190 | lr_mult: 2 1191 | decay_mult: 0 1192 | } 1193 | convolution_param { 1194 | num_output: 12 # 126 1195 | pad: 1 1196 | kernel_size: 3 1197 | stride: 1 1198 | weight_filler { 1199 | type: "xavier" 1200 | } 1201 | bias_filler { 1202 | type: "constant" 1203 | value: 0 1204 | } 1205 | } 1206 | } 1207 | layer { 1208 | name: "fc7_mbox_conf_perm" 1209 | type: "Permute" 1210 | bottom: "fc7_mbox_conf" 1211 | top: "fc7_mbox_conf_perm" 1212 | permute_param { 1213 | order: 0 1214 | order: 2 1215 | order: 3 1216 | order: 1 1217 | } 1218 | } 1219 | layer { 1220 | name: "fc7_mbox_conf_flat" 1221 | type: "Flatten" 1222 | bottom: "fc7_mbox_conf_perm" 1223 | top: "fc7_mbox_conf_flat" 1224 | flatten_param { 1225 | axis: 1 1226 | } 1227 | } 1228 | layer { 1229 | name: "fc7_mbox_priorbox" 1230 | type: "PriorBox" 1231 | bottom: "fc7" 1232 | bottom: "data" 1233 | top: "fc7_mbox_priorbox" 1234 | prior_box_param { 1235 | min_size: 60.0 1236 | max_size: 111.0 1237 | aspect_ratio: 2 1238 | aspect_ratio: 3 1239 | flip: true 1240 | clip: false 1241 | variance: 0.1 1242 | variance: 0.1 1243 | variance: 0.2 1244 | variance: 0.2 1245 | step: 16 1246 | offset: 0.5 1247 | } 1248 | } 1249 | layer { 1250 | name: "conv6_2_mbox_loc" 1251 | type: "Convolution" 1252 | bottom: "conv6_2_h" 1253 | top: "conv6_2_mbox_loc" 1254 | param { 1255 | lr_mult: 1 1256 | decay_mult: 1 1257 | } 1258 | param { 1259 | lr_mult: 2 1260 | decay_mult: 0 1261 | } 1262 | convolution_param { 1263 | num_output: 24 1264 | pad: 1 1265 | kernel_size: 3 1266 | stride: 1 1267 | weight_filler { 1268 | type: "xavier" 1269 | } 1270 | bias_filler { 1271 | type: "constant" 1272 | value: 0 1273 | } 1274 | } 1275 | } 1276 | layer { 1277 | name: "conv6_2_mbox_loc_perm" 1278 | type: "Permute" 1279 | bottom: "conv6_2_mbox_loc" 1280 | top: "conv6_2_mbox_loc_perm" 1281 | permute_param { 1282 | order: 0 1283 | order: 2 1284 | order: 3 1285 | order: 1 1286 | } 1287 | } 1288 | layer { 1289 | name: "conv6_2_mbox_loc_flat" 1290 | type: "Flatten" 1291 | bottom: "conv6_2_mbox_loc_perm" 1292 | top: "conv6_2_mbox_loc_flat" 1293 | flatten_param { 1294 | axis: 1 1295 | } 1296 | } 1297 | layer { 1298 | name: "conv6_2_mbox_conf" 1299 | type: "Convolution" 1300 | bottom: "conv6_2_h" 1301 | top: "conv6_2_mbox_conf" 1302 | param { 1303 | lr_mult: 1 1304 | decay_mult: 1 1305 | } 1306 | param { 1307 | lr_mult: 2 1308 | decay_mult: 0 1309 | } 1310 | convolution_param { 1311 | num_output: 12 # 126 1312 | pad: 1 1313 | kernel_size: 3 1314 | stride: 1 1315 | weight_filler { 1316 | type: "xavier" 1317 | } 1318 | bias_filler { 1319 | type: "constant" 1320 | value: 0 1321 | } 1322 | } 1323 | } 1324 | layer { 1325 | name: "conv6_2_mbox_conf_perm" 1326 | type: "Permute" 1327 | bottom: "conv6_2_mbox_conf" 1328 | top: "conv6_2_mbox_conf_perm" 1329 | permute_param { 1330 | order: 0 1331 | order: 2 1332 | order: 3 1333 | order: 1 1334 | } 1335 | } 1336 | layer { 1337 | name: "conv6_2_mbox_conf_flat" 1338 | type: "Flatten" 1339 | bottom: "conv6_2_mbox_conf_perm" 1340 | top: "conv6_2_mbox_conf_flat" 1341 | flatten_param { 1342 | axis: 1 1343 | } 1344 | } 1345 | layer { 1346 | name: "conv6_2_mbox_priorbox" 1347 | type: "PriorBox" 1348 | bottom: "conv6_2_h" 1349 | bottom: "data" 1350 | top: "conv6_2_mbox_priorbox" 1351 | prior_box_param { 1352 | min_size: 111.0 1353 | max_size: 162.0 1354 | aspect_ratio: 2 1355 | aspect_ratio: 3 1356 | flip: true 1357 | clip: false 1358 | variance: 0.1 1359 | variance: 0.1 1360 | variance: 0.2 1361 | variance: 0.2 1362 | step: 32 1363 | offset: 0.5 1364 | } 1365 | } 1366 | layer { 1367 | name: "conv7_2_mbox_loc" 1368 | type: "Convolution" 1369 | bottom: "conv7_2_h" 1370 | top: "conv7_2_mbox_loc" 1371 | param { 1372 | lr_mult: 1 1373 | decay_mult: 1 1374 | } 1375 | param { 1376 | lr_mult: 2 1377 | decay_mult: 0 1378 | } 1379 | convolution_param { 1380 | num_output: 24 1381 | pad: 1 1382 | kernel_size: 3 1383 | stride: 1 1384 | weight_filler { 1385 | type: "xavier" 1386 | } 1387 | bias_filler { 1388 | type: "constant" 1389 | value: 0 1390 | } 1391 | } 1392 | } 1393 | layer { 1394 | name: "conv7_2_mbox_loc_perm" 1395 | type: "Permute" 1396 | bottom: "conv7_2_mbox_loc" 1397 | top: "conv7_2_mbox_loc_perm" 1398 | permute_param { 1399 | order: 0 1400 | order: 2 1401 | order: 3 1402 | order: 1 1403 | } 1404 | } 1405 | layer { 1406 | name: "conv7_2_mbox_loc_flat" 1407 | type: "Flatten" 1408 | bottom: "conv7_2_mbox_loc_perm" 1409 | top: "conv7_2_mbox_loc_flat" 1410 | flatten_param { 1411 | axis: 1 1412 | } 1413 | } 1414 | layer { 1415 | name: "conv7_2_mbox_conf" 1416 | type: "Convolution" 1417 | bottom: "conv7_2_h" 1418 | top: "conv7_2_mbox_conf" 1419 | param { 1420 | lr_mult: 1 1421 | decay_mult: 1 1422 | } 1423 | param { 1424 | lr_mult: 2 1425 | decay_mult: 0 1426 | } 1427 | convolution_param { 1428 | num_output: 12 # 126 1429 | pad: 1 1430 | kernel_size: 3 1431 | stride: 1 1432 | weight_filler { 1433 | type: "xavier" 1434 | } 1435 | bias_filler { 1436 | type: "constant" 1437 | value: 0 1438 | } 1439 | } 1440 | } 1441 | layer { 1442 | name: "conv7_2_mbox_conf_perm" 1443 | type: "Permute" 1444 | bottom: "conv7_2_mbox_conf" 1445 | top: "conv7_2_mbox_conf_perm" 1446 | permute_param { 1447 | order: 0 1448 | order: 2 1449 | order: 3 1450 | order: 1 1451 | } 1452 | } 1453 | layer { 1454 | name: "conv7_2_mbox_conf_flat" 1455 | type: "Flatten" 1456 | bottom: "conv7_2_mbox_conf_perm" 1457 | top: "conv7_2_mbox_conf_flat" 1458 | flatten_param { 1459 | axis: 1 1460 | } 1461 | } 1462 | layer { 1463 | name: "conv7_2_mbox_priorbox" 1464 | type: "PriorBox" 1465 | bottom: "conv7_2_h" 1466 | bottom: "data" 1467 | top: "conv7_2_mbox_priorbox" 1468 | prior_box_param { 1469 | min_size: 162.0 1470 | max_size: 213.0 1471 | aspect_ratio: 2 1472 | aspect_ratio: 3 1473 | flip: true 1474 | clip: false 1475 | variance: 0.1 1476 | variance: 0.1 1477 | variance: 0.2 1478 | variance: 0.2 1479 | step: 64 1480 | offset: 0.5 1481 | } 1482 | } 1483 | layer { 1484 | name: "conv8_2_mbox_loc" 1485 | type: "Convolution" 1486 | bottom: "conv8_2_h" 1487 | top: "conv8_2_mbox_loc" 1488 | param { 1489 | lr_mult: 1 1490 | decay_mult: 1 1491 | } 1492 | param { 1493 | lr_mult: 2 1494 | decay_mult: 0 1495 | } 1496 | convolution_param { 1497 | num_output: 16 1498 | pad: 1 1499 | kernel_size: 3 1500 | stride: 1 1501 | weight_filler { 1502 | type: "xavier" 1503 | } 1504 | bias_filler { 1505 | type: "constant" 1506 | value: 0 1507 | } 1508 | } 1509 | } 1510 | layer { 1511 | name: "conv8_2_mbox_loc_perm" 1512 | type: "Permute" 1513 | bottom: "conv8_2_mbox_loc" 1514 | top: "conv8_2_mbox_loc_perm" 1515 | permute_param { 1516 | order: 0 1517 | order: 2 1518 | order: 3 1519 | order: 1 1520 | } 1521 | } 1522 | layer { 1523 | name: "conv8_2_mbox_loc_flat" 1524 | type: "Flatten" 1525 | bottom: "conv8_2_mbox_loc_perm" 1526 | top: "conv8_2_mbox_loc_flat" 1527 | flatten_param { 1528 | axis: 1 1529 | } 1530 | } 1531 | layer { 1532 | name: "conv8_2_mbox_conf" 1533 | type: "Convolution" 1534 | bottom: "conv8_2_h" 1535 | top: "conv8_2_mbox_conf" 1536 | param { 1537 | lr_mult: 1 1538 | decay_mult: 1 1539 | } 1540 | param { 1541 | lr_mult: 2 1542 | decay_mult: 0 1543 | } 1544 | convolution_param { 1545 | num_output: 8 # 84 1546 | pad: 1 1547 | kernel_size: 3 1548 | stride: 1 1549 | weight_filler { 1550 | type: "xavier" 1551 | } 1552 | bias_filler { 1553 | type: "constant" 1554 | value: 0 1555 | } 1556 | } 1557 | } 1558 | layer { 1559 | name: "conv8_2_mbox_conf_perm" 1560 | type: "Permute" 1561 | bottom: "conv8_2_mbox_conf" 1562 | top: "conv8_2_mbox_conf_perm" 1563 | permute_param { 1564 | order: 0 1565 | order: 2 1566 | order: 3 1567 | order: 1 1568 | } 1569 | } 1570 | layer { 1571 | name: "conv8_2_mbox_conf_flat" 1572 | type: "Flatten" 1573 | bottom: "conv8_2_mbox_conf_perm" 1574 | top: "conv8_2_mbox_conf_flat" 1575 | flatten_param { 1576 | axis: 1 1577 | } 1578 | } 1579 | layer { 1580 | name: "conv8_2_mbox_priorbox" 1581 | type: "PriorBox" 1582 | bottom: "conv8_2_h" 1583 | bottom: "data" 1584 | top: "conv8_2_mbox_priorbox" 1585 | prior_box_param { 1586 | min_size: 213.0 1587 | max_size: 264.0 1588 | aspect_ratio: 2 1589 | flip: true 1590 | clip: false 1591 | variance: 0.1 1592 | variance: 0.1 1593 | variance: 0.2 1594 | variance: 0.2 1595 | step: 100 1596 | offset: 0.5 1597 | } 1598 | } 1599 | layer { 1600 | name: "conv9_2_mbox_loc" 1601 | type: "Convolution" 1602 | bottom: "conv9_2_h" 1603 | top: "conv9_2_mbox_loc" 1604 | param { 1605 | lr_mult: 1 1606 | decay_mult: 1 1607 | } 1608 | param { 1609 | lr_mult: 2 1610 | decay_mult: 0 1611 | } 1612 | convolution_param { 1613 | num_output: 16 1614 | pad: 1 1615 | kernel_size: 3 1616 | stride: 1 1617 | weight_filler { 1618 | type: "xavier" 1619 | } 1620 | bias_filler { 1621 | type: "constant" 1622 | value: 0 1623 | } 1624 | } 1625 | } 1626 | layer { 1627 | name: "conv9_2_mbox_loc_perm" 1628 | type: "Permute" 1629 | bottom: "conv9_2_mbox_loc" 1630 | top: "conv9_2_mbox_loc_perm" 1631 | permute_param { 1632 | order: 0 1633 | order: 2 1634 | order: 3 1635 | order: 1 1636 | } 1637 | } 1638 | layer { 1639 | name: "conv9_2_mbox_loc_flat" 1640 | type: "Flatten" 1641 | bottom: "conv9_2_mbox_loc_perm" 1642 | top: "conv9_2_mbox_loc_flat" 1643 | flatten_param { 1644 | axis: 1 1645 | } 1646 | } 1647 | layer { 1648 | name: "conv9_2_mbox_conf" 1649 | type: "Convolution" 1650 | bottom: "conv9_2_h" 1651 | top: "conv9_2_mbox_conf" 1652 | param { 1653 | lr_mult: 1 1654 | decay_mult: 1 1655 | } 1656 | param { 1657 | lr_mult: 2 1658 | decay_mult: 0 1659 | } 1660 | convolution_param { 1661 | num_output: 8 # 84 1662 | pad: 1 1663 | kernel_size: 3 1664 | stride: 1 1665 | weight_filler { 1666 | type: "xavier" 1667 | } 1668 | bias_filler { 1669 | type: "constant" 1670 | value: 0 1671 | } 1672 | } 1673 | } 1674 | layer { 1675 | name: "conv9_2_mbox_conf_perm" 1676 | type: "Permute" 1677 | bottom: "conv9_2_mbox_conf" 1678 | top: "conv9_2_mbox_conf_perm" 1679 | permute_param { 1680 | order: 0 1681 | order: 2 1682 | order: 3 1683 | order: 1 1684 | } 1685 | } 1686 | layer { 1687 | name: "conv9_2_mbox_conf_flat" 1688 | type: "Flatten" 1689 | bottom: "conv9_2_mbox_conf_perm" 1690 | top: "conv9_2_mbox_conf_flat" 1691 | flatten_param { 1692 | axis: 1 1693 | } 1694 | } 1695 | layer { 1696 | name: "conv9_2_mbox_priorbox" 1697 | type: "PriorBox" 1698 | bottom: "conv9_2_h" 1699 | bottom: "data" 1700 | top: "conv9_2_mbox_priorbox" 1701 | prior_box_param { 1702 | min_size: 264.0 1703 | max_size: 315.0 1704 | aspect_ratio: 2 1705 | flip: true 1706 | clip: false 1707 | variance: 0.1 1708 | variance: 0.1 1709 | variance: 0.2 1710 | variance: 0.2 1711 | step: 300 1712 | offset: 0.5 1713 | } 1714 | } 1715 | layer { 1716 | name: "mbox_loc" 1717 | type: "Concat" 1718 | bottom: "conv4_3_norm_mbox_loc_flat" 1719 | bottom: "fc7_mbox_loc_flat" 1720 | bottom: "conv6_2_mbox_loc_flat" 1721 | bottom: "conv7_2_mbox_loc_flat" 1722 | bottom: "conv8_2_mbox_loc_flat" 1723 | bottom: "conv9_2_mbox_loc_flat" 1724 | top: "mbox_loc" 1725 | concat_param { 1726 | axis: 1 1727 | } 1728 | } 1729 | layer { 1730 | name: "mbox_conf" 1731 | type: "Concat" 1732 | bottom: "conv4_3_norm_mbox_conf_flat" 1733 | bottom: "fc7_mbox_conf_flat" 1734 | bottom: "conv6_2_mbox_conf_flat" 1735 | bottom: "conv7_2_mbox_conf_flat" 1736 | bottom: "conv8_2_mbox_conf_flat" 1737 | bottom: "conv9_2_mbox_conf_flat" 1738 | top: "mbox_conf" 1739 | concat_param { 1740 | axis: 1 1741 | } 1742 | } 1743 | layer { 1744 | name: "mbox_priorbox" 1745 | type: "Concat" 1746 | bottom: "conv4_3_norm_mbox_priorbox" 1747 | bottom: "fc7_mbox_priorbox" 1748 | bottom: "conv6_2_mbox_priorbox" 1749 | bottom: "conv7_2_mbox_priorbox" 1750 | bottom: "conv8_2_mbox_priorbox" 1751 | bottom: "conv9_2_mbox_priorbox" 1752 | top: "mbox_priorbox" 1753 | concat_param { 1754 | axis: 2 1755 | } 1756 | } 1757 | 1758 | layer { 1759 | name: "mbox_conf_reshape" 1760 | type: "Reshape" 1761 | bottom: "mbox_conf" 1762 | top: "mbox_conf_reshape" 1763 | reshape_param { 1764 | shape { 1765 | dim: 0 1766 | dim: -1 1767 | dim: 2 1768 | } 1769 | } 1770 | } 1771 | layer { 1772 | name: "mbox_conf_softmax" 1773 | type: "Softmax" 1774 | bottom: "mbox_conf_reshape" 1775 | top: "mbox_conf_softmax" 1776 | softmax_param { 1777 | axis: 2 1778 | } 1779 | } 1780 | layer { 1781 | name: "mbox_conf_flatten" 1782 | type: "Flatten" 1783 | bottom: "mbox_conf_softmax" 1784 | top: "mbox_conf_flatten" 1785 | flatten_param { 1786 | axis: 1 1787 | } 1788 | } 1789 | 1790 | layer { 1791 | name: "detection_out" 1792 | type: "DetectionOutput" 1793 | bottom: "mbox_loc" 1794 | bottom: "mbox_conf_flatten" 1795 | bottom: "mbox_priorbox" 1796 | top: "detection_out" 1797 | include { 1798 | phase: TEST 1799 | } 1800 | detection_output_param { 1801 | num_classes: 2 1802 | share_location: true 1803 | background_label_id: 0 1804 | nms_param { 1805 | nms_threshold: 0.45 1806 | top_k: 400 1807 | } 1808 | code_type: CENTER_SIZE 1809 | keep_top_k: 200 1810 | confidence_threshold: 0.01 1811 | } 1812 | } 1813 | layer { 1814 | name: "detection_eval" 1815 | type: "DetectionEvaluate" 1816 | bottom: "detection_out" 1817 | bottom: "label" 1818 | top: "detection_eval" 1819 | include { 1820 | phase: TEST 1821 | } 1822 | detection_evaluate_param { 1823 | num_classes: 2 1824 | background_label_id: 0 1825 | overlap_threshold: 0.5 1826 | evaluate_difficult_gt: false 1827 | } 1828 | } 1829 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD/deploy.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 1 5 | dim: 300 6 | dim: 300 7 | } 8 | 9 | layer { 10 | name: "data_bn" 11 | type: "BatchNorm" 12 | bottom: "data" 13 | top: "data_bn" 14 | param { 15 | lr_mult: 0.0 16 | } 17 | param { 18 | lr_mult: 0.0 19 | } 20 | param { 21 | lr_mult: 0.0 22 | } 23 | } 24 | layer { 25 | name: "data_scale" 26 | type: "Scale" 27 | bottom: "data_bn" 28 | top: "data_bn" 29 | param { 30 | lr_mult: 1.0 31 | decay_mult: 1.0 32 | } 33 | param { 34 | lr_mult: 2.0 35 | decay_mult: 1.0 36 | } 37 | scale_param { 38 | bias_term: true 39 | } 40 | } 41 | layer { 42 | name: "conv1_h" 43 | type: "Convolution" 44 | bottom: "data_bn" 45 | top: "conv1_h" 46 | param { 47 | lr_mult: 1.0 48 | decay_mult: 1.0 49 | } 50 | param { 51 | lr_mult: 2.0 52 | decay_mult: 1.0 53 | } 54 | convolution_param { 55 | num_output: 32 56 | pad: 3 57 | kernel_size: 7 58 | stride: 2 59 | weight_filler { 60 | type: "msra" 61 | variance_norm: FAN_OUT 62 | } 63 | bias_filler { 64 | type: "constant" 65 | value: 0.0 66 | } 67 | } 68 | } 69 | layer { 70 | name: "conv1_bn_h" 71 | type: "BatchNorm" 72 | bottom: "conv1_h" 73 | top: "conv1_h" 74 | param { 75 | lr_mult: 0.0 76 | } 77 | param { 78 | lr_mult: 0.0 79 | } 80 | param { 81 | lr_mult: 0.0 82 | } 83 | } 84 | layer { 85 | name: "conv1_scale_h" 86 | type: "Scale" 87 | bottom: "conv1_h" 88 | top: "conv1_h" 89 | param { 90 | lr_mult: 1.0 91 | decay_mult: 1.0 92 | } 93 | param { 94 | lr_mult: 2.0 95 | decay_mult: 1.0 96 | } 97 | scale_param { 98 | bias_term: true 99 | } 100 | } 101 | layer { 102 | name: "conv1_relu" 103 | type: "ReLU" 104 | bottom: "conv1_h" 105 | top: "conv1_h" 106 | } 107 | layer { 108 | name: "conv1_pool" 109 | type: "Pooling" 110 | bottom: "conv1_h" 111 | top: "conv1_pool" 112 | pooling_param { 113 | kernel_size: 3 114 | stride: 2 115 | } 116 | } 117 | layer { 118 | name: "layer_64_1_conv1_h" 119 | type: "Convolution" 120 | bottom: "conv1_pool" 121 | top: "layer_64_1_conv1_h" 122 | param { 123 | lr_mult: 1.0 124 | decay_mult: 1.0 125 | } 126 | convolution_param { 127 | num_output: 32 128 | bias_term: false 129 | pad: 1 130 | kernel_size: 3 131 | stride: 1 132 | weight_filler { 133 | type: "msra" 134 | } 135 | bias_filler { 136 | type: "constant" 137 | value: 0.0 138 | } 139 | } 140 | } 141 | layer { 142 | name: "layer_64_1_bn2_h" 143 | type: "BatchNorm" 144 | bottom: "layer_64_1_conv1_h" 145 | top: "layer_64_1_conv1_h" 146 | param { 147 | lr_mult: 0.0 148 | } 149 | param { 150 | lr_mult: 0.0 151 | } 152 | param { 153 | lr_mult: 0.0 154 | } 155 | } 156 | layer { 157 | name: "layer_64_1_scale2_h" 158 | type: "Scale" 159 | bottom: "layer_64_1_conv1_h" 160 | top: "layer_64_1_conv1_h" 161 | param { 162 | lr_mult: 1.0 163 | decay_mult: 1.0 164 | } 165 | param { 166 | lr_mult: 2.0 167 | decay_mult: 1.0 168 | } 169 | scale_param { 170 | bias_term: true 171 | } 172 | } 173 | layer { 174 | name: "layer_64_1_relu2" 175 | type: "ReLU" 176 | bottom: "layer_64_1_conv1_h" 177 | top: "layer_64_1_conv1_h" 178 | } 179 | layer { 180 | name: "layer_64_1_conv2_h" 181 | type: "Convolution" 182 | bottom: "layer_64_1_conv1_h" 183 | top: "layer_64_1_conv2_h" 184 | param { 185 | lr_mult: 1.0 186 | decay_mult: 1.0 187 | } 188 | convolution_param { 189 | num_output: 32 190 | bias_term: false 191 | pad: 1 192 | kernel_size: 3 193 | stride: 1 194 | weight_filler { 195 | type: "msra" 196 | } 197 | bias_filler { 198 | type: "constant" 199 | value: 0.0 200 | } 201 | } 202 | } 203 | layer { 204 | name: "layer_64_1_sum" 205 | type: "Eltwise" 206 | bottom: "layer_64_1_conv2_h" 207 | bottom: "conv1_pool" 208 | top: "layer_64_1_sum" 209 | } 210 | layer { 211 | name: "layer_128_1_bn1_h" 212 | type: "BatchNorm" 213 | bottom: "layer_64_1_sum" 214 | top: "layer_128_1_bn1_h" 215 | param { 216 | lr_mult: 0.0 217 | } 218 | param { 219 | lr_mult: 0.0 220 | } 221 | param { 222 | lr_mult: 0.0 223 | } 224 | } 225 | layer { 226 | name: "layer_128_1_scale1_h" 227 | type: "Scale" 228 | bottom: "layer_128_1_bn1_h" 229 | top: "layer_128_1_bn1_h" 230 | param { 231 | lr_mult: 1.0 232 | decay_mult: 1.0 233 | } 234 | param { 235 | lr_mult: 2.0 236 | decay_mult: 1.0 237 | } 238 | scale_param { 239 | bias_term: true 240 | } 241 | } 242 | layer { 243 | name: "layer_128_1_relu1" 244 | type: "ReLU" 245 | bottom: "layer_128_1_bn1_h" 246 | top: "layer_128_1_bn1_h" 247 | } 248 | layer { 249 | name: "layer_128_1_conv1_h" 250 | type: "Convolution" 251 | bottom: "layer_128_1_bn1_h" 252 | top: "layer_128_1_conv1_h" 253 | param { 254 | lr_mult: 1.0 255 | decay_mult: 1.0 256 | } 257 | convolution_param { 258 | num_output: 128 259 | bias_term: false 260 | pad: 1 261 | kernel_size: 3 262 | stride: 2 263 | weight_filler { 264 | type: "msra" 265 | } 266 | bias_filler { 267 | type: "constant" 268 | value: 0.0 269 | } 270 | } 271 | } 272 | layer { 273 | name: "layer_128_1_bn2" 274 | type: "BatchNorm" 275 | bottom: "layer_128_1_conv1_h" 276 | top: "layer_128_1_conv1_h" 277 | param { 278 | lr_mult: 0.0 279 | } 280 | param { 281 | lr_mult: 0.0 282 | } 283 | param { 284 | lr_mult: 0.0 285 | } 286 | } 287 | layer { 288 | name: "layer_128_1_scale2" 289 | type: "Scale" 290 | bottom: "layer_128_1_conv1_h" 291 | top: "layer_128_1_conv1_h" 292 | param { 293 | lr_mult: 1.0 294 | decay_mult: 1.0 295 | } 296 | param { 297 | lr_mult: 2.0 298 | decay_mult: 1.0 299 | } 300 | scale_param { 301 | bias_term: true 302 | } 303 | } 304 | layer { 305 | name: "layer_128_1_relu2" 306 | type: "ReLU" 307 | bottom: "layer_128_1_conv1_h" 308 | top: "layer_128_1_conv1_h" 309 | } 310 | layer { 311 | name: "layer_128_1_conv2" 312 | type: "Convolution" 313 | bottom: "layer_128_1_conv1_h" 314 | top: "layer_128_1_conv2" 315 | param { 316 | lr_mult: 1.0 317 | decay_mult: 1.0 318 | } 319 | convolution_param { 320 | num_output: 128 321 | bias_term: false 322 | pad: 1 323 | kernel_size: 3 324 | stride: 1 325 | weight_filler { 326 | type: "msra" 327 | } 328 | bias_filler { 329 | type: "constant" 330 | value: 0.0 331 | } 332 | } 333 | } 334 | layer { 335 | name: "layer_128_1_conv_expand_h" 336 | type: "Convolution" 337 | bottom: "layer_128_1_bn1_h" 338 | top: "layer_128_1_conv_expand_h" 339 | param { 340 | lr_mult: 1.0 341 | decay_mult: 1.0 342 | } 343 | convolution_param { 344 | num_output: 128 345 | bias_term: false 346 | pad: 0 347 | kernel_size: 1 348 | stride: 2 349 | weight_filler { 350 | type: "msra" 351 | } 352 | bias_filler { 353 | type: "constant" 354 | value: 0.0 355 | } 356 | } 357 | } 358 | layer { 359 | name: "layer_128_1_sum" 360 | type: "Eltwise" 361 | bottom: "layer_128_1_conv2" 362 | bottom: "layer_128_1_conv_expand_h" 363 | top: "layer_128_1_sum" 364 | } 365 | layer { 366 | name: "layer_256_1_bn1" 367 | type: "BatchNorm" 368 | bottom: "layer_128_1_sum" 369 | top: "layer_256_1_bn1" 370 | param { 371 | lr_mult: 0.0 372 | } 373 | param { 374 | lr_mult: 0.0 375 | } 376 | param { 377 | lr_mult: 0.0 378 | } 379 | } 380 | layer { 381 | name: "layer_256_1_scale1" 382 | type: "Scale" 383 | bottom: "layer_256_1_bn1" 384 | top: "layer_256_1_bn1" 385 | param { 386 | lr_mult: 1.0 387 | decay_mult: 1.0 388 | } 389 | param { 390 | lr_mult: 2.0 391 | decay_mult: 1.0 392 | } 393 | scale_param { 394 | bias_term: true 395 | } 396 | } 397 | layer { 398 | name: "layer_256_1_relu1" 399 | type: "ReLU" 400 | bottom: "layer_256_1_bn1" 401 | top: "layer_256_1_bn1" 402 | } 403 | layer { 404 | name: "layer_256_1_conv1" 405 | type: "Convolution" 406 | bottom: "layer_256_1_bn1" 407 | top: "layer_256_1_conv1" 408 | param { 409 | lr_mult: 1.0 410 | decay_mult: 1.0 411 | } 412 | convolution_param { 413 | num_output: 256 414 | bias_term: false 415 | pad: 1 416 | kernel_size: 3 417 | stride: 2 418 | weight_filler { 419 | type: "msra" 420 | } 421 | bias_filler { 422 | type: "constant" 423 | value: 0.0 424 | } 425 | } 426 | } 427 | layer { 428 | name: "layer_256_1_bn2" 429 | type: "BatchNorm" 430 | bottom: "layer_256_1_conv1" 431 | top: "layer_256_1_conv1" 432 | param { 433 | lr_mult: 0.0 434 | } 435 | param { 436 | lr_mult: 0.0 437 | } 438 | param { 439 | lr_mult: 0.0 440 | } 441 | } 442 | layer { 443 | name: "layer_256_1_scale2" 444 | type: "Scale" 445 | bottom: "layer_256_1_conv1" 446 | top: "layer_256_1_conv1" 447 | param { 448 | lr_mult: 1.0 449 | decay_mult: 1.0 450 | } 451 | param { 452 | lr_mult: 2.0 453 | decay_mult: 1.0 454 | } 455 | scale_param { 456 | bias_term: true 457 | } 458 | } 459 | layer { 460 | name: "layer_256_1_relu2" 461 | type: "ReLU" 462 | bottom: "layer_256_1_conv1" 463 | top: "layer_256_1_conv1" 464 | } 465 | layer { 466 | name: "layer_256_1_conv2" 467 | type: "Convolution" 468 | bottom: "layer_256_1_conv1" 469 | top: "layer_256_1_conv2" 470 | param { 471 | lr_mult: 1.0 472 | decay_mult: 1.0 473 | } 474 | convolution_param { 475 | num_output: 256 476 | bias_term: false 477 | pad: 1 478 | kernel_size: 3 479 | stride: 1 480 | weight_filler { 481 | type: "msra" 482 | } 483 | bias_filler { 484 | type: "constant" 485 | value: 0.0 486 | } 487 | } 488 | } 489 | layer { 490 | name: "layer_256_1_conv_expand" 491 | type: "Convolution" 492 | bottom: "layer_256_1_bn1" 493 | top: "layer_256_1_conv_expand" 494 | param { 495 | lr_mult: 1.0 496 | decay_mult: 1.0 497 | } 498 | convolution_param { 499 | num_output: 256 500 | bias_term: false 501 | pad: 0 502 | kernel_size: 1 503 | stride: 2 504 | weight_filler { 505 | type: "msra" 506 | } 507 | bias_filler { 508 | type: "constant" 509 | value: 0.0 510 | } 511 | } 512 | } 513 | layer { 514 | name: "layer_256_1_sum" 515 | type: "Eltwise" 516 | bottom: "layer_256_1_conv2" 517 | bottom: "layer_256_1_conv_expand" 518 | top: "layer_256_1_sum" 519 | } 520 | layer { 521 | name: "layer_512_1_bn1" 522 | type: "BatchNorm" 523 | bottom: "layer_256_1_sum" 524 | top: "layer_512_1_bn1" 525 | param { 526 | lr_mult: 0.0 527 | } 528 | param { 529 | lr_mult: 0.0 530 | } 531 | param { 532 | lr_mult: 0.0 533 | } 534 | } 535 | layer { 536 | name: "layer_512_1_scale1" 537 | type: "Scale" 538 | bottom: "layer_512_1_bn1" 539 | top: "layer_512_1_bn1" 540 | param { 541 | lr_mult: 1.0 542 | decay_mult: 1.0 543 | } 544 | param { 545 | lr_mult: 2.0 546 | decay_mult: 1.0 547 | } 548 | scale_param { 549 | bias_term: true 550 | } 551 | } 552 | layer { 553 | name: "layer_512_1_relu1" 554 | type: "ReLU" 555 | bottom: "layer_512_1_bn1" 556 | top: "layer_512_1_bn1" 557 | } 558 | layer { 559 | name: "layer_512_1_conv1_h" 560 | type: "Convolution" 561 | bottom: "layer_512_1_bn1" 562 | top: "layer_512_1_conv1_h" 563 | param { 564 | lr_mult: 1.0 565 | decay_mult: 1.0 566 | } 567 | convolution_param { 568 | num_output: 128 569 | bias_term: false 570 | pad: 1 571 | kernel_size: 3 572 | stride: 1 # 2 573 | weight_filler { 574 | type: "msra" 575 | } 576 | bias_filler { 577 | type: "constant" 578 | value: 0.0 579 | } 580 | } 581 | } 582 | layer { 583 | name: "layer_512_1_bn2_h" 584 | type: "BatchNorm" 585 | bottom: "layer_512_1_conv1_h" 586 | top: "layer_512_1_conv1_h" 587 | param { 588 | lr_mult: 0.0 589 | } 590 | param { 591 | lr_mult: 0.0 592 | } 593 | param { 594 | lr_mult: 0.0 595 | } 596 | } 597 | layer { 598 | name: "layer_512_1_scale2_h" 599 | type: "Scale" 600 | bottom: "layer_512_1_conv1_h" 601 | top: "layer_512_1_conv1_h" 602 | param { 603 | lr_mult: 1.0 604 | decay_mult: 1.0 605 | } 606 | param { 607 | lr_mult: 2.0 608 | decay_mult: 1.0 609 | } 610 | scale_param { 611 | bias_term: true 612 | } 613 | } 614 | layer { 615 | name: "layer_512_1_relu2" 616 | type: "ReLU" 617 | bottom: "layer_512_1_conv1_h" 618 | top: "layer_512_1_conv1_h" 619 | } 620 | layer { 621 | name: "layer_512_1_conv2_h" 622 | type: "Convolution" 623 | bottom: "layer_512_1_conv1_h" 624 | top: "layer_512_1_conv2_h" 625 | param { 626 | lr_mult: 1.0 627 | decay_mult: 1.0 628 | } 629 | convolution_param { 630 | num_output: 256 631 | bias_term: false 632 | pad: 2 # 1 633 | kernel_size: 3 634 | stride: 1 635 | dilation: 2 636 | weight_filler { 637 | type: "msra" 638 | } 639 | bias_filler { 640 | type: "constant" 641 | value: 0.0 642 | } 643 | } 644 | } 645 | layer { 646 | name: "layer_512_1_conv_expand_h" 647 | type: "Convolution" 648 | bottom: "layer_512_1_bn1" 649 | top: "layer_512_1_conv_expand_h" 650 | param { 651 | lr_mult: 1.0 652 | decay_mult: 1.0 653 | } 654 | convolution_param { 655 | num_output: 256 656 | bias_term: false 657 | pad: 0 658 | kernel_size: 1 659 | stride: 1 # 2 660 | weight_filler { 661 | type: "msra" 662 | } 663 | bias_filler { 664 | type: "constant" 665 | value: 0.0 666 | } 667 | } 668 | } 669 | layer { 670 | name: "layer_512_1_sum" 671 | type: "Eltwise" 672 | bottom: "layer_512_1_conv2_h" 673 | bottom: "layer_512_1_conv_expand_h" 674 | top: "layer_512_1_sum" 675 | } 676 | layer { 677 | name: "last_bn_h" 678 | type: "BatchNorm" 679 | bottom: "layer_512_1_sum" 680 | top: "layer_512_1_sum" 681 | param { 682 | lr_mult: 0.0 683 | } 684 | param { 685 | lr_mult: 0.0 686 | } 687 | param { 688 | lr_mult: 0.0 689 | } 690 | } 691 | layer { 692 | name: "last_scale_h" 693 | type: "Scale" 694 | bottom: "layer_512_1_sum" 695 | top: "layer_512_1_sum" 696 | param { 697 | lr_mult: 1.0 698 | decay_mult: 1.0 699 | } 700 | param { 701 | lr_mult: 2.0 702 | decay_mult: 1.0 703 | } 704 | scale_param { 705 | bias_term: true 706 | } 707 | } 708 | layer { 709 | name: "last_relu" 710 | type: "ReLU" 711 | bottom: "layer_512_1_sum" 712 | top: "fc7" 713 | } 714 | 715 | layer { 716 | name: "conv6_1_h" 717 | type: "Convolution" 718 | bottom: "fc7" 719 | top: "conv6_1_h" 720 | param { 721 | lr_mult: 1 722 | decay_mult: 1 723 | } 724 | param { 725 | lr_mult: 2 726 | decay_mult: 0 727 | } 728 | convolution_param { 729 | num_output: 128 730 | pad: 0 731 | kernel_size: 1 732 | stride: 1 733 | weight_filler { 734 | type: "xavier" 735 | } 736 | bias_filler { 737 | type: "constant" 738 | value: 0 739 | } 740 | } 741 | } 742 | layer { 743 | name: "conv6_1_relu" 744 | type: "ReLU" 745 | bottom: "conv6_1_h" 746 | top: "conv6_1_h" 747 | } 748 | layer { 749 | name: "conv6_2_h" 750 | type: "Convolution" 751 | bottom: "conv6_1_h" 752 | top: "conv6_2_h" 753 | param { 754 | lr_mult: 1 755 | decay_mult: 1 756 | } 757 | param { 758 | lr_mult: 2 759 | decay_mult: 0 760 | } 761 | convolution_param { 762 | num_output: 256 763 | pad: 1 764 | kernel_size: 3 765 | stride: 2 766 | weight_filler { 767 | type: "xavier" 768 | } 769 | bias_filler { 770 | type: "constant" 771 | value: 0 772 | } 773 | } 774 | } 775 | layer { 776 | name: "conv6_2_relu" 777 | type: "ReLU" 778 | bottom: "conv6_2_h" 779 | top: "conv6_2_h" 780 | } 781 | layer { 782 | name: "conv7_1_h" 783 | type: "Convolution" 784 | bottom: "conv6_2_h" 785 | top: "conv7_1_h" 786 | param { 787 | lr_mult: 1 788 | decay_mult: 1 789 | } 790 | param { 791 | lr_mult: 2 792 | decay_mult: 0 793 | } 794 | convolution_param { 795 | num_output: 64 796 | pad: 0 797 | kernel_size: 1 798 | stride: 1 799 | weight_filler { 800 | type: "xavier" 801 | } 802 | bias_filler { 803 | type: "constant" 804 | value: 0 805 | } 806 | } 807 | } 808 | layer { 809 | name: "conv7_1_relu" 810 | type: "ReLU" 811 | bottom: "conv7_1_h" 812 | top: "conv7_1_h" 813 | } 814 | layer { 815 | name: "conv7_2_h" 816 | type: "Convolution" 817 | bottom: "conv7_1_h" 818 | top: "conv7_2_h" 819 | param { 820 | lr_mult: 1 821 | decay_mult: 1 822 | } 823 | param { 824 | lr_mult: 2 825 | decay_mult: 0 826 | } 827 | convolution_param { 828 | num_output: 128 829 | pad: 1 830 | kernel_size: 3 831 | stride: 2 832 | weight_filler { 833 | type: "xavier" 834 | } 835 | bias_filler { 836 | type: "constant" 837 | value: 0 838 | } 839 | } 840 | } 841 | layer { 842 | name: "conv7_2_relu" 843 | type: "ReLU" 844 | bottom: "conv7_2_h" 845 | top: "conv7_2_h" 846 | } 847 | layer { 848 | name: "conv8_1_h" 849 | type: "Convolution" 850 | bottom: "conv7_2_h" 851 | top: "conv8_1_h" 852 | param { 853 | lr_mult: 1 854 | decay_mult: 1 855 | } 856 | param { 857 | lr_mult: 2 858 | decay_mult: 0 859 | } 860 | convolution_param { 861 | num_output: 64 862 | pad: 0 863 | kernel_size: 1 864 | stride: 1 865 | weight_filler { 866 | type: "xavier" 867 | } 868 | bias_filler { 869 | type: "constant" 870 | value: 0 871 | } 872 | } 873 | } 874 | layer { 875 | name: "conv8_1_relu" 876 | type: "ReLU" 877 | bottom: "conv8_1_h" 878 | top: "conv8_1_h" 879 | } 880 | layer { 881 | name: "conv8_2_h" 882 | type: "Convolution" 883 | bottom: "conv8_1_h" 884 | top: "conv8_2_h" 885 | param { 886 | lr_mult: 1 887 | decay_mult: 1 888 | } 889 | param { 890 | lr_mult: 2 891 | decay_mult: 0 892 | } 893 | convolution_param { 894 | num_output: 128 895 | pad: 1 896 | kernel_size: 3 897 | stride: 1 898 | weight_filler { 899 | type: "xavier" 900 | } 901 | bias_filler { 902 | type: "constant" 903 | value: 0 904 | } 905 | } 906 | } 907 | layer { 908 | name: "conv8_2_relu" 909 | type: "ReLU" 910 | bottom: "conv8_2_h" 911 | top: "conv8_2_h" 912 | } 913 | layer { 914 | name: "conv9_1_h" 915 | type: "Convolution" 916 | bottom: "conv8_2_h" 917 | top: "conv9_1_h" 918 | param { 919 | lr_mult: 1 920 | decay_mult: 1 921 | } 922 | param { 923 | lr_mult: 2 924 | decay_mult: 0 925 | } 926 | convolution_param { 927 | num_output: 64 928 | pad: 0 929 | kernel_size: 1 930 | stride: 1 931 | weight_filler { 932 | type: "xavier" 933 | } 934 | bias_filler { 935 | type: "constant" 936 | value: 0 937 | } 938 | } 939 | } 940 | layer { 941 | name: "conv9_1_relu" 942 | type: "ReLU" 943 | bottom: "conv9_1_h" 944 | top: "conv9_1_h" 945 | } 946 | layer { 947 | name: "conv9_2_h" 948 | type: "Convolution" 949 | bottom: "conv9_1_h" 950 | top: "conv9_2_h" 951 | param { 952 | lr_mult: 1 953 | decay_mult: 1 954 | } 955 | param { 956 | lr_mult: 2 957 | decay_mult: 0 958 | } 959 | convolution_param { 960 | num_output: 128 961 | pad: 1 962 | kernel_size: 3 963 | stride: 1 964 | weight_filler { 965 | type: "xavier" 966 | } 967 | bias_filler { 968 | type: "constant" 969 | value: 0 970 | } 971 | } 972 | } 973 | layer { 974 | name: "conv9_2_relu" 975 | type: "ReLU" 976 | bottom: "conv9_2_h" 977 | top: "conv9_2_h" 978 | } 979 | layer { 980 | name: "conv4_3_norm" 981 | type: "Normalize" 982 | bottom: "layer_256_1_bn1" 983 | top: "conv4_3_norm" 984 | norm_param { 985 | across_spatial: false 986 | scale_filler { 987 | type: "constant" 988 | value: 20 989 | } 990 | channel_shared: false 991 | } 992 | } 993 | layer { 994 | name: "conv4_3_norm_mbox_loc" 995 | type: "Convolution" 996 | bottom: "conv4_3_norm" 997 | top: "conv4_3_norm_mbox_loc" 998 | param { 999 | lr_mult: 1 1000 | decay_mult: 1 1001 | } 1002 | param { 1003 | lr_mult: 2 1004 | decay_mult: 0 1005 | } 1006 | convolution_param { 1007 | num_output: 16 1008 | pad: 1 1009 | kernel_size: 3 1010 | stride: 1 1011 | weight_filler { 1012 | type: "xavier" 1013 | } 1014 | bias_filler { 1015 | type: "constant" 1016 | value: 0 1017 | } 1018 | } 1019 | } 1020 | layer { 1021 | name: "conv4_3_norm_mbox_loc_perm" 1022 | type: "Permute" 1023 | bottom: "conv4_3_norm_mbox_loc" 1024 | top: "conv4_3_norm_mbox_loc_perm" 1025 | permute_param { 1026 | order: 0 1027 | order: 2 1028 | order: 3 1029 | order: 1 1030 | } 1031 | } 1032 | layer { 1033 | name: "conv4_3_norm_mbox_loc_flat" 1034 | type: "Flatten" 1035 | bottom: "conv4_3_norm_mbox_loc_perm" 1036 | top: "conv4_3_norm_mbox_loc_flat" 1037 | flatten_param { 1038 | axis: 1 1039 | } 1040 | } 1041 | layer { 1042 | name: "conv4_3_norm_mbox_conf" 1043 | type: "Convolution" 1044 | bottom: "conv4_3_norm" 1045 | top: "conv4_3_norm_mbox_conf" 1046 | param { 1047 | lr_mult: 1 1048 | decay_mult: 1 1049 | } 1050 | param { 1051 | lr_mult: 2 1052 | decay_mult: 0 1053 | } 1054 | convolution_param { 1055 | num_output: 8 # 84 1056 | pad: 1 1057 | kernel_size: 3 1058 | stride: 1 1059 | weight_filler { 1060 | type: "xavier" 1061 | } 1062 | bias_filler { 1063 | type: "constant" 1064 | value: 0 1065 | } 1066 | } 1067 | } 1068 | layer { 1069 | name: "conv4_3_norm_mbox_conf_perm" 1070 | type: "Permute" 1071 | bottom: "conv4_3_norm_mbox_conf" 1072 | top: "conv4_3_norm_mbox_conf_perm" 1073 | permute_param { 1074 | order: 0 1075 | order: 2 1076 | order: 3 1077 | order: 1 1078 | } 1079 | } 1080 | layer { 1081 | name: "conv4_3_norm_mbox_conf_flat" 1082 | type: "Flatten" 1083 | bottom: "conv4_3_norm_mbox_conf_perm" 1084 | top: "conv4_3_norm_mbox_conf_flat" 1085 | flatten_param { 1086 | axis: 1 1087 | } 1088 | } 1089 | layer { 1090 | name: "conv4_3_norm_mbox_priorbox" 1091 | type: "PriorBox" 1092 | bottom: "conv4_3_norm" 1093 | bottom: "data" 1094 | top: "conv4_3_norm_mbox_priorbox" 1095 | prior_box_param { 1096 | min_size: 30.0 1097 | max_size: 60.0 1098 | aspect_ratio: 2 1099 | flip: true 1100 | clip: false 1101 | variance: 0.1 1102 | variance: 0.1 1103 | variance: 0.2 1104 | variance: 0.2 1105 | step: 8 1106 | offset: 0.5 1107 | } 1108 | } 1109 | layer { 1110 | name: "fc7_mbox_loc" 1111 | type: "Convolution" 1112 | bottom: "fc7" 1113 | top: "fc7_mbox_loc" 1114 | param { 1115 | lr_mult: 1 1116 | decay_mult: 1 1117 | } 1118 | param { 1119 | lr_mult: 2 1120 | decay_mult: 0 1121 | } 1122 | convolution_param { 1123 | num_output: 24 1124 | pad: 1 1125 | kernel_size: 3 1126 | stride: 1 1127 | weight_filler { 1128 | type: "xavier" 1129 | } 1130 | bias_filler { 1131 | type: "constant" 1132 | value: 0 1133 | } 1134 | } 1135 | } 1136 | layer { 1137 | name: "fc7_mbox_loc_perm" 1138 | type: "Permute" 1139 | bottom: "fc7_mbox_loc" 1140 | top: "fc7_mbox_loc_perm" 1141 | permute_param { 1142 | order: 0 1143 | order: 2 1144 | order: 3 1145 | order: 1 1146 | } 1147 | } 1148 | layer { 1149 | name: "fc7_mbox_loc_flat" 1150 | type: "Flatten" 1151 | bottom: "fc7_mbox_loc_perm" 1152 | top: "fc7_mbox_loc_flat" 1153 | flatten_param { 1154 | axis: 1 1155 | } 1156 | } 1157 | layer { 1158 | name: "fc7_mbox_conf" 1159 | type: "Convolution" 1160 | bottom: "fc7" 1161 | top: "fc7_mbox_conf" 1162 | param { 1163 | lr_mult: 1 1164 | decay_mult: 1 1165 | } 1166 | param { 1167 | lr_mult: 2 1168 | decay_mult: 0 1169 | } 1170 | convolution_param { 1171 | num_output: 12 # 126 1172 | pad: 1 1173 | kernel_size: 3 1174 | stride: 1 1175 | weight_filler { 1176 | type: "xavier" 1177 | } 1178 | bias_filler { 1179 | type: "constant" 1180 | value: 0 1181 | } 1182 | } 1183 | } 1184 | layer { 1185 | name: "fc7_mbox_conf_perm" 1186 | type: "Permute" 1187 | bottom: "fc7_mbox_conf" 1188 | top: "fc7_mbox_conf_perm" 1189 | permute_param { 1190 | order: 0 1191 | order: 2 1192 | order: 3 1193 | order: 1 1194 | } 1195 | } 1196 | layer { 1197 | name: "fc7_mbox_conf_flat" 1198 | type: "Flatten" 1199 | bottom: "fc7_mbox_conf_perm" 1200 | top: "fc7_mbox_conf_flat" 1201 | flatten_param { 1202 | axis: 1 1203 | } 1204 | } 1205 | layer { 1206 | name: "fc7_mbox_priorbox" 1207 | type: "PriorBox" 1208 | bottom: "fc7" 1209 | bottom: "data" 1210 | top: "fc7_mbox_priorbox" 1211 | prior_box_param { 1212 | min_size: 60.0 1213 | max_size: 111.0 1214 | aspect_ratio: 2 1215 | aspect_ratio: 3 1216 | flip: true 1217 | clip: false 1218 | variance: 0.1 1219 | variance: 0.1 1220 | variance: 0.2 1221 | variance: 0.2 1222 | step: 16 1223 | offset: 0.5 1224 | } 1225 | } 1226 | layer { 1227 | name: "conv6_2_mbox_loc" 1228 | type: "Convolution" 1229 | bottom: "conv6_2_h" 1230 | top: "conv6_2_mbox_loc" 1231 | param { 1232 | lr_mult: 1 1233 | decay_mult: 1 1234 | } 1235 | param { 1236 | lr_mult: 2 1237 | decay_mult: 0 1238 | } 1239 | convolution_param { 1240 | num_output: 24 1241 | pad: 1 1242 | kernel_size: 3 1243 | stride: 1 1244 | weight_filler { 1245 | type: "xavier" 1246 | } 1247 | bias_filler { 1248 | type: "constant" 1249 | value: 0 1250 | } 1251 | } 1252 | } 1253 | layer { 1254 | name: "conv6_2_mbox_loc_perm" 1255 | type: "Permute" 1256 | bottom: "conv6_2_mbox_loc" 1257 | top: "conv6_2_mbox_loc_perm" 1258 | permute_param { 1259 | order: 0 1260 | order: 2 1261 | order: 3 1262 | order: 1 1263 | } 1264 | } 1265 | layer { 1266 | name: "conv6_2_mbox_loc_flat" 1267 | type: "Flatten" 1268 | bottom: "conv6_2_mbox_loc_perm" 1269 | top: "conv6_2_mbox_loc_flat" 1270 | flatten_param { 1271 | axis: 1 1272 | } 1273 | } 1274 | layer { 1275 | name: "conv6_2_mbox_conf" 1276 | type: "Convolution" 1277 | bottom: "conv6_2_h" 1278 | top: "conv6_2_mbox_conf" 1279 | param { 1280 | lr_mult: 1 1281 | decay_mult: 1 1282 | } 1283 | param { 1284 | lr_mult: 2 1285 | decay_mult: 0 1286 | } 1287 | convolution_param { 1288 | num_output: 12 # 126 1289 | pad: 1 1290 | kernel_size: 3 1291 | stride: 1 1292 | weight_filler { 1293 | type: "xavier" 1294 | } 1295 | bias_filler { 1296 | type: "constant" 1297 | value: 0 1298 | } 1299 | } 1300 | } 1301 | layer { 1302 | name: "conv6_2_mbox_conf_perm" 1303 | type: "Permute" 1304 | bottom: "conv6_2_mbox_conf" 1305 | top: "conv6_2_mbox_conf_perm" 1306 | permute_param { 1307 | order: 0 1308 | order: 2 1309 | order: 3 1310 | order: 1 1311 | } 1312 | } 1313 | layer { 1314 | name: "conv6_2_mbox_conf_flat" 1315 | type: "Flatten" 1316 | bottom: "conv6_2_mbox_conf_perm" 1317 | top: "conv6_2_mbox_conf_flat" 1318 | flatten_param { 1319 | axis: 1 1320 | } 1321 | } 1322 | layer { 1323 | name: "conv6_2_mbox_priorbox" 1324 | type: "PriorBox" 1325 | bottom: "conv6_2_h" 1326 | bottom: "data" 1327 | top: "conv6_2_mbox_priorbox" 1328 | prior_box_param { 1329 | min_size: 111.0 1330 | max_size: 162.0 1331 | aspect_ratio: 2 1332 | aspect_ratio: 3 1333 | flip: true 1334 | clip: false 1335 | variance: 0.1 1336 | variance: 0.1 1337 | variance: 0.2 1338 | variance: 0.2 1339 | step: 32 1340 | offset: 0.5 1341 | } 1342 | } 1343 | layer { 1344 | name: "conv7_2_mbox_loc" 1345 | type: "Convolution" 1346 | bottom: "conv7_2_h" 1347 | top: "conv7_2_mbox_loc" 1348 | param { 1349 | lr_mult: 1 1350 | decay_mult: 1 1351 | } 1352 | param { 1353 | lr_mult: 2 1354 | decay_mult: 0 1355 | } 1356 | convolution_param { 1357 | num_output: 24 1358 | pad: 1 1359 | kernel_size: 3 1360 | stride: 1 1361 | weight_filler { 1362 | type: "xavier" 1363 | } 1364 | bias_filler { 1365 | type: "constant" 1366 | value: 0 1367 | } 1368 | } 1369 | } 1370 | layer { 1371 | name: "conv7_2_mbox_loc_perm" 1372 | type: "Permute" 1373 | bottom: "conv7_2_mbox_loc" 1374 | top: "conv7_2_mbox_loc_perm" 1375 | permute_param { 1376 | order: 0 1377 | order: 2 1378 | order: 3 1379 | order: 1 1380 | } 1381 | } 1382 | layer { 1383 | name: "conv7_2_mbox_loc_flat" 1384 | type: "Flatten" 1385 | bottom: "conv7_2_mbox_loc_perm" 1386 | top: "conv7_2_mbox_loc_flat" 1387 | flatten_param { 1388 | axis: 1 1389 | } 1390 | } 1391 | layer { 1392 | name: "conv7_2_mbox_conf" 1393 | type: "Convolution" 1394 | bottom: "conv7_2_h" 1395 | top: "conv7_2_mbox_conf" 1396 | param { 1397 | lr_mult: 1 1398 | decay_mult: 1 1399 | } 1400 | param { 1401 | lr_mult: 2 1402 | decay_mult: 0 1403 | } 1404 | convolution_param { 1405 | num_output: 12 # 126 1406 | pad: 1 1407 | kernel_size: 3 1408 | stride: 1 1409 | weight_filler { 1410 | type: "xavier" 1411 | } 1412 | bias_filler { 1413 | type: "constant" 1414 | value: 0 1415 | } 1416 | } 1417 | } 1418 | layer { 1419 | name: "conv7_2_mbox_conf_perm" 1420 | type: "Permute" 1421 | bottom: "conv7_2_mbox_conf" 1422 | top: "conv7_2_mbox_conf_perm" 1423 | permute_param { 1424 | order: 0 1425 | order: 2 1426 | order: 3 1427 | order: 1 1428 | } 1429 | } 1430 | layer { 1431 | name: "conv7_2_mbox_conf_flat" 1432 | type: "Flatten" 1433 | bottom: "conv7_2_mbox_conf_perm" 1434 | top: "conv7_2_mbox_conf_flat" 1435 | flatten_param { 1436 | axis: 1 1437 | } 1438 | } 1439 | layer { 1440 | name: "conv7_2_mbox_priorbox" 1441 | type: "PriorBox" 1442 | bottom: "conv7_2_h" 1443 | bottom: "data" 1444 | top: "conv7_2_mbox_priorbox" 1445 | prior_box_param { 1446 | min_size: 162.0 1447 | max_size: 213.0 1448 | aspect_ratio: 2 1449 | aspect_ratio: 3 1450 | flip: true 1451 | clip: false 1452 | variance: 0.1 1453 | variance: 0.1 1454 | variance: 0.2 1455 | variance: 0.2 1456 | step: 64 1457 | offset: 0.5 1458 | } 1459 | } 1460 | layer { 1461 | name: "conv8_2_mbox_loc" 1462 | type: "Convolution" 1463 | bottom: "conv8_2_h" 1464 | top: "conv8_2_mbox_loc" 1465 | param { 1466 | lr_mult: 1 1467 | decay_mult: 1 1468 | } 1469 | param { 1470 | lr_mult: 2 1471 | decay_mult: 0 1472 | } 1473 | convolution_param { 1474 | num_output: 16 1475 | pad: 1 1476 | kernel_size: 3 1477 | stride: 1 1478 | weight_filler { 1479 | type: "xavier" 1480 | } 1481 | bias_filler { 1482 | type: "constant" 1483 | value: 0 1484 | } 1485 | } 1486 | } 1487 | layer { 1488 | name: "conv8_2_mbox_loc_perm" 1489 | type: "Permute" 1490 | bottom: "conv8_2_mbox_loc" 1491 | top: "conv8_2_mbox_loc_perm" 1492 | permute_param { 1493 | order: 0 1494 | order: 2 1495 | order: 3 1496 | order: 1 1497 | } 1498 | } 1499 | layer { 1500 | name: "conv8_2_mbox_loc_flat" 1501 | type: "Flatten" 1502 | bottom: "conv8_2_mbox_loc_perm" 1503 | top: "conv8_2_mbox_loc_flat" 1504 | flatten_param { 1505 | axis: 1 1506 | } 1507 | } 1508 | layer { 1509 | name: "conv8_2_mbox_conf" 1510 | type: "Convolution" 1511 | bottom: "conv8_2_h" 1512 | top: "conv8_2_mbox_conf" 1513 | param { 1514 | lr_mult: 1 1515 | decay_mult: 1 1516 | } 1517 | param { 1518 | lr_mult: 2 1519 | decay_mult: 0 1520 | } 1521 | convolution_param { 1522 | num_output: 8 # 84 1523 | pad: 1 1524 | kernel_size: 3 1525 | stride: 1 1526 | weight_filler { 1527 | type: "xavier" 1528 | } 1529 | bias_filler { 1530 | type: "constant" 1531 | value: 0 1532 | } 1533 | } 1534 | } 1535 | layer { 1536 | name: "conv8_2_mbox_conf_perm" 1537 | type: "Permute" 1538 | bottom: "conv8_2_mbox_conf" 1539 | top: "conv8_2_mbox_conf_perm" 1540 | permute_param { 1541 | order: 0 1542 | order: 2 1543 | order: 3 1544 | order: 1 1545 | } 1546 | } 1547 | layer { 1548 | name: "conv8_2_mbox_conf_flat" 1549 | type: "Flatten" 1550 | bottom: "conv8_2_mbox_conf_perm" 1551 | top: "conv8_2_mbox_conf_flat" 1552 | flatten_param { 1553 | axis: 1 1554 | } 1555 | } 1556 | layer { 1557 | name: "conv8_2_mbox_priorbox" 1558 | type: "PriorBox" 1559 | bottom: "conv8_2_h" 1560 | bottom: "data" 1561 | top: "conv8_2_mbox_priorbox" 1562 | prior_box_param { 1563 | min_size: 213.0 1564 | max_size: 264.0 1565 | aspect_ratio: 2 1566 | flip: true 1567 | clip: false 1568 | variance: 0.1 1569 | variance: 0.1 1570 | variance: 0.2 1571 | variance: 0.2 1572 | step: 100 1573 | offset: 0.5 1574 | } 1575 | } 1576 | layer { 1577 | name: "conv9_2_mbox_loc" 1578 | type: "Convolution" 1579 | bottom: "conv9_2_h" 1580 | top: "conv9_2_mbox_loc" 1581 | param { 1582 | lr_mult: 1 1583 | decay_mult: 1 1584 | } 1585 | param { 1586 | lr_mult: 2 1587 | decay_mult: 0 1588 | } 1589 | convolution_param { 1590 | num_output: 16 1591 | pad: 1 1592 | kernel_size: 3 1593 | stride: 1 1594 | weight_filler { 1595 | type: "xavier" 1596 | } 1597 | bias_filler { 1598 | type: "constant" 1599 | value: 0 1600 | } 1601 | } 1602 | } 1603 | layer { 1604 | name: "conv9_2_mbox_loc_perm" 1605 | type: "Permute" 1606 | bottom: "conv9_2_mbox_loc" 1607 | top: "conv9_2_mbox_loc_perm" 1608 | permute_param { 1609 | order: 0 1610 | order: 2 1611 | order: 3 1612 | order: 1 1613 | } 1614 | } 1615 | layer { 1616 | name: "conv9_2_mbox_loc_flat" 1617 | type: "Flatten" 1618 | bottom: "conv9_2_mbox_loc_perm" 1619 | top: "conv9_2_mbox_loc_flat" 1620 | flatten_param { 1621 | axis: 1 1622 | } 1623 | } 1624 | layer { 1625 | name: "conv9_2_mbox_conf" 1626 | type: "Convolution" 1627 | bottom: "conv9_2_h" 1628 | top: "conv9_2_mbox_conf" 1629 | param { 1630 | lr_mult: 1 1631 | decay_mult: 1 1632 | } 1633 | param { 1634 | lr_mult: 2 1635 | decay_mult: 0 1636 | } 1637 | convolution_param { 1638 | num_output: 8 # 84 1639 | pad: 1 1640 | kernel_size: 3 1641 | stride: 1 1642 | weight_filler { 1643 | type: "xavier" 1644 | } 1645 | bias_filler { 1646 | type: "constant" 1647 | value: 0 1648 | } 1649 | } 1650 | } 1651 | layer { 1652 | name: "conv9_2_mbox_conf_perm" 1653 | type: "Permute" 1654 | bottom: "conv9_2_mbox_conf" 1655 | top: "conv9_2_mbox_conf_perm" 1656 | permute_param { 1657 | order: 0 1658 | order: 2 1659 | order: 3 1660 | order: 1 1661 | } 1662 | } 1663 | layer { 1664 | name: "conv9_2_mbox_conf_flat" 1665 | type: "Flatten" 1666 | bottom: "conv9_2_mbox_conf_perm" 1667 | top: "conv9_2_mbox_conf_flat" 1668 | flatten_param { 1669 | axis: 1 1670 | } 1671 | } 1672 | layer { 1673 | name: "conv9_2_mbox_priorbox" 1674 | type: "PriorBox" 1675 | bottom: "conv9_2_h" 1676 | bottom: "data" 1677 | top: "conv9_2_mbox_priorbox" 1678 | prior_box_param { 1679 | min_size: 264.0 1680 | max_size: 315.0 1681 | aspect_ratio: 2 1682 | flip: true 1683 | clip: false 1684 | variance: 0.1 1685 | variance: 0.1 1686 | variance: 0.2 1687 | variance: 0.2 1688 | step: 300 1689 | offset: 0.5 1690 | } 1691 | } 1692 | layer { 1693 | name: "mbox_loc" 1694 | type: "Concat" 1695 | bottom: "conv4_3_norm_mbox_loc_flat" 1696 | bottom: "fc7_mbox_loc_flat" 1697 | bottom: "conv6_2_mbox_loc_flat" 1698 | bottom: "conv7_2_mbox_loc_flat" 1699 | bottom: "conv8_2_mbox_loc_flat" 1700 | bottom: "conv9_2_mbox_loc_flat" 1701 | top: "mbox_loc" 1702 | concat_param { 1703 | axis: 1 1704 | } 1705 | } 1706 | layer { 1707 | name: "mbox_conf" 1708 | type: "Concat" 1709 | bottom: "conv4_3_norm_mbox_conf_flat" 1710 | bottom: "fc7_mbox_conf_flat" 1711 | bottom: "conv6_2_mbox_conf_flat" 1712 | bottom: "conv7_2_mbox_conf_flat" 1713 | bottom: "conv8_2_mbox_conf_flat" 1714 | bottom: "conv9_2_mbox_conf_flat" 1715 | top: "mbox_conf" 1716 | concat_param { 1717 | axis: 1 1718 | } 1719 | } 1720 | layer { 1721 | name: "mbox_priorbox" 1722 | type: "Concat" 1723 | bottom: "conv4_3_norm_mbox_priorbox" 1724 | bottom: "fc7_mbox_priorbox" 1725 | bottom: "conv6_2_mbox_priorbox" 1726 | bottom: "conv7_2_mbox_priorbox" 1727 | bottom: "conv8_2_mbox_priorbox" 1728 | bottom: "conv9_2_mbox_priorbox" 1729 | top: "mbox_priorbox" 1730 | concat_param { 1731 | axis: 2 1732 | } 1733 | } 1734 | 1735 | layer { 1736 | name: "mbox_conf_reshape" 1737 | type: "Reshape" 1738 | bottom: "mbox_conf" 1739 | top: "mbox_conf_reshape" 1740 | reshape_param { 1741 | shape { 1742 | dim: 0 1743 | dim: -1 1744 | dim: 2 1745 | } 1746 | } 1747 | } 1748 | layer { 1749 | name: "mbox_conf_softmax" 1750 | type: "Softmax" 1751 | bottom: "mbox_conf_reshape" 1752 | top: "mbox_conf_softmax" 1753 | softmax_param { 1754 | axis: 2 1755 | } 1756 | } 1757 | layer { 1758 | name: "mbox_conf_flatten" 1759 | type: "Flatten" 1760 | bottom: "mbox_conf_softmax" 1761 | top: "mbox_conf_flatten" 1762 | flatten_param { 1763 | axis: 1 1764 | } 1765 | } 1766 | 1767 | layer { 1768 | name: "detection_out" 1769 | type: "DetectionOutput" 1770 | bottom: "mbox_loc" 1771 | bottom: "mbox_conf_flatten" 1772 | bottom: "mbox_priorbox" 1773 | top: "detection_out" 1774 | include { 1775 | phase: TEST 1776 | } 1777 | detection_output_param { 1778 | num_classes: 2 1779 | share_location: true 1780 | background_label_id: 0 1781 | nms_param { 1782 | nms_threshold: 0.45 1783 | top_k: 400 1784 | } 1785 | code_type: CENTER_SIZE 1786 | keep_top_k: 200 1787 | confidence_threshold: 0.01 1788 | } 1789 | } 1790 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD/iris_ssd.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | using namespace cv; 8 | using namespace cv::dnn; 9 | 10 | #include 11 | #include 12 | using namespace std; 13 | 14 | const size_t inWidth = 300; 15 | const size_t inHeight = 300; 16 | const double inScaleFactor = 1.0; 17 | const Scalar meanVal(128); 18 | 19 | const char* about = "This sample uses Single-Shot Detector " 20 | "(https://arxiv.org/abs/1512.02325) " 21 | "with ResNet-10 architecture to detect faces on camera/video/image.\n" 22 | "More information about the training is available here: " 23 | "/samples/dnn/face_detector/how_to_train_face_detector.txt\n" 24 | ".caffemodel model's file is available here: " 25 | "/samples/dnn/face_detector/res10_300x300_ssd_iter_140000.caffemodel\n" 26 | ".prototxt file is available here: " 27 | "/samples/dnn/face_detector/deploy.prototxt\n"; 28 | 29 | //const char* params 30 | // = "{ help | false | print usage }" 31 | // "{ proto | deploy.prototxt | model configuration (deploy.prototxt) }" 32 | // "{ model | res10_300x300_ssd_iter_31000.caffemodel | model weights (res10_300x300_ssd_iter_140000.caffemodel) }" 33 | // "{ camera_device | 0 | camera device number }" 34 | // "{ video | | video or image for detection }" 35 | // "{ min_confidence | 0.5 | min confidence }"; 36 | 37 | const char* params 38 | = "{ help | false | print usage }" 39 | "{ proto | deploy.half.prototxt | model configuration (deploy.prototxt) }" 40 | "{ model | res10_300x300_ssd.half_iter_31000.caffemodel | model weights (res10_300x300_ssd_iter_140000.caffemodel) }" 41 | "{ camera_device | 0 | camera device number }" 42 | "{ video | | video or image for detection }" 43 | "{ min_confidence | 0.5 | min confidence }"; 44 | 45 | int main(int argc, char** argv) 46 | { 47 | CommandLineParser parser(argc, argv, params); 48 | 49 | if (parser.get("help")) 50 | { 51 | cout << about << endl; 52 | parser.printMessage(); 53 | return 0; 54 | } 55 | 56 | String modelConfiguration = parser.get("proto"); 57 | String modelBinary = parser.get("model"); 58 | 59 | //! [Initialize network] 60 | dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary); 61 | //! [Initialize network] 62 | 63 | if (net.empty()) 64 | { 65 | cerr << "Can't load network by using the following files: " << endl; 66 | cerr << "prototxt: " << modelConfiguration << endl; 67 | cerr << "caffemodel: " << modelBinary << endl; 68 | cerr << "Models are available here:" << endl; 69 | cerr << "/samples/dnn/face_detector" << endl; 70 | cerr << "or here:" << endl; 71 | cerr << "https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector" << endl; 72 | exit(-1); 73 | } 74 | 75 | // net.setPreferableBackend(DNN_BACKEND_HALIDE); 76 | // net.setPreferableTarget(DNN_TARGET_CPU); 77 | 78 | // VideoCapture cap; 79 | // if (parser.get("video").empty()) 80 | // { 81 | // int cameraDevice = parser.get("camera_device"); 82 | // cap = VideoCapture(cameraDevice); 83 | // if(!cap.isOpened()) 84 | // { 85 | // cout << "Couldn't find camera: " << cameraDevice << endl; 86 | // return -1; 87 | // } 88 | // } 89 | // else 90 | // { 91 | // cap.open(parser.get("video")); 92 | // if(!cap.isOpened()) 93 | // { 94 | // cout << "Couldn't open image or video: " << parser.get("video") << endl; 95 | // return -1; 96 | // } 97 | // } 98 | 99 | int cnt = 0; 100 | 101 | for(;;) 102 | { 103 | Mat image; 104 | // cap >> image; // get a new frame from camera/video or read image 105 | 106 | // if (image.empty()) 107 | // { 108 | // waitKey(); 109 | // break; 110 | // } 111 | 112 | image = cv::imread("images/S2353L09.jpg", 1); 113 | 114 | // cv::resize(image, image, cv::Size(0, 0), 0.8, 0.8); 115 | 116 | cv::Mat image_result = image.clone(); 117 | 118 | cv::Mat gray; 119 | cv::cvtColor(image, gray, cv::COLOR_BGR2GRAY); 120 | 121 | int bt = cv::getTickCount(); 122 | 123 | //! [Prepare blob] 124 | //! image: 3 channels 125 | Mat inputBlob = blobFromImage(gray, inScaleFactor, 126 | Size(inWidth, inHeight), Scalar(128), false, false); //Convert Mat to batch of images 127 | //! [Prepare blob] 128 | 129 | //! [Set input blob] 130 | net.setInput(inputBlob, "data"); //set the network input 131 | //! [Set input blob] 132 | 133 | //! [Make forward pass] 134 | Mat detection = net.forward("detection_out"); //compute output 135 | //! [Make forward pass] 136 | 137 | vector layersTimings; 138 | double freq = getTickFrequency() / 1000; 139 | double time = net.getPerfProfile(layersTimings) / freq; 140 | 141 | Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr()); 142 | 143 | int et = cv::getTickCount(); 144 | int t = (et - bt) * 1000.0 / cv::getTickFrequency(); 145 | 146 | cout << t << " ms" << endl; 147 | 148 | ostringstream ss; 149 | ss << "FPS: " << 1000/time << " ; time: " << int(time) << " ms"; 150 | putText(image_result, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255)); 151 | 152 | float confidenceThreshold = parser.get("min_confidence"); 153 | for(int i = 0; i < detectionMat.rows; i++) 154 | { 155 | float confidence = detectionMat.at(i, 2); 156 | 157 | if(confidence > confidenceThreshold) 158 | { 159 | int xLeftBottom = static_cast(detectionMat.at(i, 3) * image.cols); 160 | int yLeftBottom = static_cast(detectionMat.at(i, 4) * image.rows); 161 | int xRightTop = static_cast(detectionMat.at(i, 5) * image.cols); 162 | int yRightTop = static_cast(detectionMat.at(i, 6) * image.rows); 163 | 164 | Rect object((int)xLeftBottom, (int)yLeftBottom, 165 | (int)(xRightTop - xLeftBottom), 166 | (int)(yRightTop - yLeftBottom)); 167 | 168 | rectangle(image_result, object, Scalar(0, 255, 0)); 169 | 170 | ss.str(""); 171 | ss << confidence; 172 | String conf(ss.str()); 173 | String label = "Iris: " + conf; 174 | int baseLine = 0; 175 | Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); 176 | rectangle(image_result, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height), 177 | Size(labelSize.width, labelSize.height + baseLine)), 178 | Scalar(255, 255, 255), CV_FILLED); 179 | putText(image_result, label, Point(xLeftBottom, yLeftBottom), 180 | FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0)); 181 | } 182 | } 183 | 184 | imshow("detections", image_result); 185 | int key = waitKey(1); 186 | if (key == 'q') 187 | break; 188 | if(key == 's') { 189 | imwrite("image.jpg", image_result); 190 | } 191 | 192 | } 193 | 194 | return 0; 195 | } // main 196 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD/iris_ssd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | import cv2 as cv 4 | try: 5 | import cv2 as cv 6 | except ImportError: 7 | raise ImportError('Can\'t find OpenCV Python module. If you\'ve built it from sources without installation, ' 8 | 'configure environemnt variable PYTHONPATH to "opencv_build_dir/lib" directory (with "python3" subdirectory if required)') 9 | 10 | from cv2 import dnn 11 | 12 | inWidth = 300 13 | inHeight = 300 14 | confThreshold = 0.5 15 | 16 | prototxt = 'deploy.prototxt' 17 | caffemodel = 'res10_300x300_ssd_iter_140000.caffemodel' 18 | 19 | if __name__ == '__main__': 20 | net = dnn.readNetFromCaffe(prototxt, caffemodel) 21 | while True: 22 | frame = cv.imread("../../images/S2353L09.jpg", 1) 23 | 24 | gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) 25 | 26 | cols = frame.shape[1] 27 | rows = frame.shape[0] 28 | 29 | net.setInput(dnn.blobFromImage(gray, 1.0, (inWidth, inHeight), (128), False, False)) 30 | detections = net.forward() 31 | 32 | # print(detections) 33 | 34 | perf_stats = net.getPerfProfile() 35 | 36 | infer_time = perf_stats[0] / cv.getTickFrequency() * 1000 37 | fps = 1000 / infer_time 38 | fps_time_str = 'fps = {0}, time = {1} ms'.format(int(fps), int(infer_time)) 39 | cv.putText(frame, fps_time_str, (50, 50), 40 | cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255)) 41 | 42 | for i in range(detections.shape[2]): 43 | confidence = detections[0, 0, i, 2] 44 | if confidence > confThreshold: 45 | xLeftBottom = int(detections[0, 0, i, 3] * cols) 46 | yLeftBottom = int(detections[0, 0, i, 4] * rows) 47 | xRightTop = int(detections[0, 0, i, 5] * cols) 48 | yRightTop = int(detections[0, 0, i, 6] * rows) 49 | 50 | cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), 51 | (0, 255, 0)) 52 | label = "iris: %.4f" % confidence 53 | labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) 54 | 55 | cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]), 56 | (xLeftBottom + labelSize[0], yLeftBottom + baseLine), 57 | (0, 0, 0), cv.FILLED) 58 | cv.putText(frame, label, (xLeftBottom, yLeftBottom), 59 | cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) 60 | 61 | if frame.shape[1] > 800 or frame.shape[0] > 800: 62 | frame = cv.resize(frame, dsize=(0,0), fx=0.5, fy=0.5) 63 | cv.imshow("detections", frame) 64 | if cv.waitKey(1) == int(ord('s')): 65 | cv.imwrite("result.bmp", frame) 66 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD/res10_300x300_ssd_iter_140000.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/Caffe-SSD-Models/ResNet10-SSD/res10_300x300_ssd_iter_140000.caffemodel -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD/result.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/Caffe-SSD-Models/ResNet10-SSD/result.bmp -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD/solver.prototxt: -------------------------------------------------------------------------------- 1 | train_net: "models/ResNet10/train.prototxt" 2 | test_net: "models/ResNet10/test.prototxt" 3 | 4 | test_iter: 2312 5 | test_interval: 5000 6 | test_initialization: true 7 | 8 | base_lr: 0.01 9 | display: 10 10 | lr_policy: "multistep" 11 | max_iter: 140000 12 | stepvalue: 80000 13 | stepvalue: 120000 14 | gamma: 0.1 15 | momentum: 0.9 16 | weight_decay: 0.0005 17 | average_loss: 500 18 | iter_size: 1 19 | type: "SGD" 20 | 21 | solver_mode: GPU 22 | random_seed: 0 23 | debug_info: false 24 | snapshot: 1000 25 | snapshot_prefix: "models/ResNet10/snapshot/res10_300x300_ssd" 26 | 27 | eval_type: "detection" 28 | ap_version: "11point" 29 | -------------------------------------------------------------------------------- /Caffe-SSD-Models/ResNet10-SSD/test.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "data" 3 | type: "AnnotatedData" 4 | top: "data" 5 | top: "label" 6 | include { 7 | phase: TEST 8 | } 9 | transform_param { 10 | mean_value: 128 11 | resize_param { 12 | prob: 1 13 | resize_mode: WARP 14 | height: 300 15 | width: 300 16 | interp_mode: LINEAR 17 | } 18 | emit_constraint { 19 | emit_type: CENTER 20 | } 21 | } 22 | data_param { 23 | source: "examples/iris_dataset/iris_dataset_test_lmdb" 24 | batch_size: 8 25 | backend: LMDB 26 | } 27 | annotated_data_param { 28 | label_map_file: "labelmap.prototxt" 29 | } 30 | } 31 | 32 | layer { 33 | name: "data_bn" 34 | type: "BatchNorm" 35 | bottom: "data" 36 | top: "data_bn" 37 | param { 38 | lr_mult: 0.0 39 | } 40 | param { 41 | lr_mult: 0.0 42 | } 43 | param { 44 | lr_mult: 0.0 45 | } 46 | } 47 | layer { 48 | name: "data_scale" 49 | type: "Scale" 50 | bottom: "data_bn" 51 | top: "data_bn" 52 | param { 53 | lr_mult: 1.0 54 | decay_mult: 1.0 55 | } 56 | param { 57 | lr_mult: 2.0 58 | decay_mult: 1.0 59 | } 60 | scale_param { 61 | bias_term: true 62 | } 63 | } 64 | layer { 65 | name: "conv1_h" 66 | type: "Convolution" 67 | bottom: "data_bn" 68 | top: "conv1_h" 69 | param { 70 | lr_mult: 1.0 71 | decay_mult: 1.0 72 | } 73 | param { 74 | lr_mult: 2.0 75 | decay_mult: 1.0 76 | } 77 | convolution_param { 78 | num_output: 32 79 | pad: 3 80 | kernel_size: 7 81 | stride: 2 82 | weight_filler { 83 | type: "msra" 84 | variance_norm: FAN_OUT 85 | } 86 | bias_filler { 87 | type: "constant" 88 | value: 0.0 89 | } 90 | } 91 | } 92 | layer { 93 | name: "conv1_bn_h" 94 | type: "BatchNorm" 95 | bottom: "conv1_h" 96 | top: "conv1_h" 97 | param { 98 | lr_mult: 0.0 99 | } 100 | param { 101 | lr_mult: 0.0 102 | } 103 | param { 104 | lr_mult: 0.0 105 | } 106 | } 107 | layer { 108 | name: "conv1_scale_h" 109 | type: "Scale" 110 | bottom: "conv1_h" 111 | top: "conv1_h" 112 | param { 113 | lr_mult: 1.0 114 | decay_mult: 1.0 115 | } 116 | param { 117 | lr_mult: 2.0 118 | decay_mult: 1.0 119 | } 120 | scale_param { 121 | bias_term: true 122 | } 123 | } 124 | layer { 125 | name: "conv1_relu" 126 | type: "ReLU" 127 | bottom: "conv1_h" 128 | top: "conv1_h" 129 | } 130 | layer { 131 | name: "conv1_pool" 132 | type: "Pooling" 133 | bottom: "conv1_h" 134 | top: "conv1_pool" 135 | pooling_param { 136 | kernel_size: 3 137 | stride: 2 138 | } 139 | } 140 | layer { 141 | name: "layer_64_1_conv1_h" 142 | type: "Convolution" 143 | bottom: "conv1_pool" 144 | top: "layer_64_1_conv1_h" 145 | param { 146 | lr_mult: 1.0 147 | decay_mult: 1.0 148 | } 149 | convolution_param { 150 | num_output: 32 151 | bias_term: false 152 | pad: 1 153 | kernel_size: 3 154 | stride: 1 155 | weight_filler { 156 | type: "msra" 157 | } 158 | bias_filler { 159 | type: "constant" 160 | value: 0.0 161 | } 162 | } 163 | } 164 | layer { 165 | name: "layer_64_1_bn2_h" 166 | type: "BatchNorm" 167 | bottom: "layer_64_1_conv1_h" 168 | top: "layer_64_1_conv1_h" 169 | param { 170 | lr_mult: 0.0 171 | } 172 | param { 173 | lr_mult: 0.0 174 | } 175 | param { 176 | lr_mult: 0.0 177 | } 178 | } 179 | layer { 180 | name: "layer_64_1_scale2_h" 181 | type: "Scale" 182 | bottom: "layer_64_1_conv1_h" 183 | top: "layer_64_1_conv1_h" 184 | param { 185 | lr_mult: 1.0 186 | decay_mult: 1.0 187 | } 188 | param { 189 | lr_mult: 2.0 190 | decay_mult: 1.0 191 | } 192 | scale_param { 193 | bias_term: true 194 | } 195 | } 196 | layer { 197 | name: "layer_64_1_relu2" 198 | type: "ReLU" 199 | bottom: "layer_64_1_conv1_h" 200 | top: "layer_64_1_conv1_h" 201 | } 202 | layer { 203 | name: "layer_64_1_conv2_h" 204 | type: "Convolution" 205 | bottom: "layer_64_1_conv1_h" 206 | top: "layer_64_1_conv2_h" 207 | param { 208 | lr_mult: 1.0 209 | decay_mult: 1.0 210 | } 211 | convolution_param { 212 | num_output: 32 213 | bias_term: false 214 | pad: 1 215 | kernel_size: 3 216 | stride: 1 217 | weight_filler { 218 | type: "msra" 219 | } 220 | bias_filler { 221 | type: "constant" 222 | value: 0.0 223 | } 224 | } 225 | } 226 | layer { 227 | name: "layer_64_1_sum" 228 | type: "Eltwise" 229 | bottom: "layer_64_1_conv2_h" 230 | bottom: "conv1_pool" 231 | top: "layer_64_1_sum" 232 | } 233 | layer { 234 | name: "layer_128_1_bn1_h" 235 | type: "BatchNorm" 236 | bottom: "layer_64_1_sum" 237 | top: "layer_128_1_bn1_h" 238 | param { 239 | lr_mult: 0.0 240 | } 241 | param { 242 | lr_mult: 0.0 243 | } 244 | param { 245 | lr_mult: 0.0 246 | } 247 | } 248 | layer { 249 | name: "layer_128_1_scale1_h" 250 | type: "Scale" 251 | bottom: "layer_128_1_bn1_h" 252 | top: "layer_128_1_bn1_h" 253 | param { 254 | lr_mult: 1.0 255 | decay_mult: 1.0 256 | } 257 | param { 258 | lr_mult: 2.0 259 | decay_mult: 1.0 260 | } 261 | scale_param { 262 | bias_term: true 263 | } 264 | } 265 | layer { 266 | name: "layer_128_1_relu1" 267 | type: "ReLU" 268 | bottom: "layer_128_1_bn1_h" 269 | top: "layer_128_1_bn1_h" 270 | } 271 | layer { 272 | name: "layer_128_1_conv1_h" 273 | type: "Convolution" 274 | bottom: "layer_128_1_bn1_h" 275 | top: "layer_128_1_conv1_h" 276 | param { 277 | lr_mult: 1.0 278 | decay_mult: 1.0 279 | } 280 | convolution_param { 281 | num_output: 128 282 | bias_term: false 283 | pad: 1 284 | kernel_size: 3 285 | stride: 2 286 | weight_filler { 287 | type: "msra" 288 | } 289 | bias_filler { 290 | type: "constant" 291 | value: 0.0 292 | } 293 | } 294 | } 295 | layer { 296 | name: "layer_128_1_bn2" 297 | type: "BatchNorm" 298 | bottom: "layer_128_1_conv1_h" 299 | top: "layer_128_1_conv1_h" 300 | param { 301 | lr_mult: 0.0 302 | } 303 | param { 304 | lr_mult: 0.0 305 | } 306 | param { 307 | lr_mult: 0.0 308 | } 309 | } 310 | layer { 311 | name: "layer_128_1_scale2" 312 | type: "Scale" 313 | bottom: "layer_128_1_conv1_h" 314 | top: "layer_128_1_conv1_h" 315 | param { 316 | lr_mult: 1.0 317 | decay_mult: 1.0 318 | } 319 | param { 320 | lr_mult: 2.0 321 | decay_mult: 1.0 322 | } 323 | scale_param { 324 | bias_term: true 325 | } 326 | } 327 | layer { 328 | name: "layer_128_1_relu2" 329 | type: "ReLU" 330 | bottom: "layer_128_1_conv1_h" 331 | top: "layer_128_1_conv1_h" 332 | } 333 | layer { 334 | name: "layer_128_1_conv2" 335 | type: "Convolution" 336 | bottom: "layer_128_1_conv1_h" 337 | top: "layer_128_1_conv2" 338 | param { 339 | lr_mult: 1.0 340 | decay_mult: 1.0 341 | } 342 | convolution_param { 343 | num_output: 128 344 | bias_term: false 345 | pad: 1 346 | kernel_size: 3 347 | stride: 1 348 | weight_filler { 349 | type: "msra" 350 | } 351 | bias_filler { 352 | type: "constant" 353 | value: 0.0 354 | } 355 | } 356 | } 357 | layer { 358 | name: "layer_128_1_conv_expand_h" 359 | type: "Convolution" 360 | bottom: "layer_128_1_bn1_h" 361 | top: "layer_128_1_conv_expand_h" 362 | param { 363 | lr_mult: 1.0 364 | decay_mult: 1.0 365 | } 366 | convolution_param { 367 | num_output: 128 368 | bias_term: false 369 | pad: 0 370 | kernel_size: 1 371 | stride: 2 372 | weight_filler { 373 | type: "msra" 374 | } 375 | bias_filler { 376 | type: "constant" 377 | value: 0.0 378 | } 379 | } 380 | } 381 | layer { 382 | name: "layer_128_1_sum" 383 | type: "Eltwise" 384 | bottom: "layer_128_1_conv2" 385 | bottom: "layer_128_1_conv_expand_h" 386 | top: "layer_128_1_sum" 387 | } 388 | layer { 389 | name: "layer_256_1_bn1" 390 | type: "BatchNorm" 391 | bottom: "layer_128_1_sum" 392 | top: "layer_256_1_bn1" 393 | param { 394 | lr_mult: 0.0 395 | } 396 | param { 397 | lr_mult: 0.0 398 | } 399 | param { 400 | lr_mult: 0.0 401 | } 402 | } 403 | layer { 404 | name: "layer_256_1_scale1" 405 | type: "Scale" 406 | bottom: "layer_256_1_bn1" 407 | top: "layer_256_1_bn1" 408 | param { 409 | lr_mult: 1.0 410 | decay_mult: 1.0 411 | } 412 | param { 413 | lr_mult: 2.0 414 | decay_mult: 1.0 415 | } 416 | scale_param { 417 | bias_term: true 418 | } 419 | } 420 | layer { 421 | name: "layer_256_1_relu1" 422 | type: "ReLU" 423 | bottom: "layer_256_1_bn1" 424 | top: "layer_256_1_bn1" 425 | } 426 | layer { 427 | name: "layer_256_1_conv1" 428 | type: "Convolution" 429 | bottom: "layer_256_1_bn1" 430 | top: "layer_256_1_conv1" 431 | param { 432 | lr_mult: 1.0 433 | decay_mult: 1.0 434 | } 435 | convolution_param { 436 | num_output: 256 437 | bias_term: false 438 | pad: 1 439 | kernel_size: 3 440 | stride: 2 441 | weight_filler { 442 | type: "msra" 443 | } 444 | bias_filler { 445 | type: "constant" 446 | value: 0.0 447 | } 448 | } 449 | } 450 | layer { 451 | name: "layer_256_1_bn2" 452 | type: "BatchNorm" 453 | bottom: "layer_256_1_conv1" 454 | top: "layer_256_1_conv1" 455 | param { 456 | lr_mult: 0.0 457 | } 458 | param { 459 | lr_mult: 0.0 460 | } 461 | param { 462 | lr_mult: 0.0 463 | } 464 | } 465 | layer { 466 | name: "layer_256_1_scale2" 467 | type: "Scale" 468 | bottom: "layer_256_1_conv1" 469 | top: "layer_256_1_conv1" 470 | param { 471 | lr_mult: 1.0 472 | decay_mult: 1.0 473 | } 474 | param { 475 | lr_mult: 2.0 476 | decay_mult: 1.0 477 | } 478 | scale_param { 479 | bias_term: true 480 | } 481 | } 482 | layer { 483 | name: "layer_256_1_relu2" 484 | type: "ReLU" 485 | bottom: "layer_256_1_conv1" 486 | top: "layer_256_1_conv1" 487 | } 488 | layer { 489 | name: "layer_256_1_conv2" 490 | type: "Convolution" 491 | bottom: "layer_256_1_conv1" 492 | top: "layer_256_1_conv2" 493 | param { 494 | lr_mult: 1.0 495 | decay_mult: 1.0 496 | } 497 | convolution_param { 498 | num_output: 256 499 | bias_term: false 500 | pad: 1 501 | kernel_size: 3 502 | stride: 1 503 | weight_filler { 504 | type: "msra" 505 | } 506 | bias_filler { 507 | type: "constant" 508 | value: 0.0 509 | } 510 | } 511 | } 512 | layer { 513 | name: "layer_256_1_conv_expand" 514 | type: "Convolution" 515 | bottom: "layer_256_1_bn1" 516 | top: "layer_256_1_conv_expand" 517 | param { 518 | lr_mult: 1.0 519 | decay_mult: 1.0 520 | } 521 | convolution_param { 522 | num_output: 256 523 | bias_term: false 524 | pad: 0 525 | kernel_size: 1 526 | stride: 2 527 | weight_filler { 528 | type: "msra" 529 | } 530 | bias_filler { 531 | type: "constant" 532 | value: 0.0 533 | } 534 | } 535 | } 536 | layer { 537 | name: "layer_256_1_sum" 538 | type: "Eltwise" 539 | bottom: "layer_256_1_conv2" 540 | bottom: "layer_256_1_conv_expand" 541 | top: "layer_256_1_sum" 542 | } 543 | layer { 544 | name: "layer_512_1_bn1" 545 | type: "BatchNorm" 546 | bottom: "layer_256_1_sum" 547 | top: "layer_512_1_bn1" 548 | param { 549 | lr_mult: 0.0 550 | } 551 | param { 552 | lr_mult: 0.0 553 | } 554 | param { 555 | lr_mult: 0.0 556 | } 557 | } 558 | layer { 559 | name: "layer_512_1_scale1" 560 | type: "Scale" 561 | bottom: "layer_512_1_bn1" 562 | top: "layer_512_1_bn1" 563 | param { 564 | lr_mult: 1.0 565 | decay_mult: 1.0 566 | } 567 | param { 568 | lr_mult: 2.0 569 | decay_mult: 1.0 570 | } 571 | scale_param { 572 | bias_term: true 573 | } 574 | } 575 | layer { 576 | name: "layer_512_1_relu1" 577 | type: "ReLU" 578 | bottom: "layer_512_1_bn1" 579 | top: "layer_512_1_bn1" 580 | } 581 | layer { 582 | name: "layer_512_1_conv1_h" 583 | type: "Convolution" 584 | bottom: "layer_512_1_bn1" 585 | top: "layer_512_1_conv1_h" 586 | param { 587 | lr_mult: 1.0 588 | decay_mult: 1.0 589 | } 590 | convolution_param { 591 | num_output: 128 592 | bias_term: false 593 | pad: 1 594 | kernel_size: 3 595 | stride: 1 # 2 596 | weight_filler { 597 | type: "msra" 598 | } 599 | bias_filler { 600 | type: "constant" 601 | value: 0.0 602 | } 603 | } 604 | } 605 | layer { 606 | name: "layer_512_1_bn2_h" 607 | type: "BatchNorm" 608 | bottom: "layer_512_1_conv1_h" 609 | top: "layer_512_1_conv1_h" 610 | param { 611 | lr_mult: 0.0 612 | } 613 | param { 614 | lr_mult: 0.0 615 | } 616 | param { 617 | lr_mult: 0.0 618 | } 619 | } 620 | layer { 621 | name: "layer_512_1_scale2_h" 622 | type: "Scale" 623 | bottom: "layer_512_1_conv1_h" 624 | top: "layer_512_1_conv1_h" 625 | param { 626 | lr_mult: 1.0 627 | decay_mult: 1.0 628 | } 629 | param { 630 | lr_mult: 2.0 631 | decay_mult: 1.0 632 | } 633 | scale_param { 634 | bias_term: true 635 | } 636 | } 637 | layer { 638 | name: "layer_512_1_relu2" 639 | type: "ReLU" 640 | bottom: "layer_512_1_conv1_h" 641 | top: "layer_512_1_conv1_h" 642 | } 643 | layer { 644 | name: "layer_512_1_conv2_h" 645 | type: "Convolution" 646 | bottom: "layer_512_1_conv1_h" 647 | top: "layer_512_1_conv2_h" 648 | param { 649 | lr_mult: 1.0 650 | decay_mult: 1.0 651 | } 652 | convolution_param { 653 | num_output: 256 654 | bias_term: false 655 | pad: 2 # 1 656 | kernel_size: 3 657 | stride: 1 658 | dilation: 2 659 | weight_filler { 660 | type: "msra" 661 | } 662 | bias_filler { 663 | type: "constant" 664 | value: 0.0 665 | } 666 | } 667 | } 668 | layer { 669 | name: "layer_512_1_conv_expand_h" 670 | type: "Convolution" 671 | bottom: "layer_512_1_bn1" 672 | top: "layer_512_1_conv_expand_h" 673 | param { 674 | lr_mult: 1.0 675 | decay_mult: 1.0 676 | } 677 | convolution_param { 678 | num_output: 256 679 | bias_term: false 680 | pad: 0 681 | kernel_size: 1 682 | stride: 1 # 2 683 | weight_filler { 684 | type: "msra" 685 | } 686 | bias_filler { 687 | type: "constant" 688 | value: 0.0 689 | } 690 | } 691 | } 692 | layer { 693 | name: "layer_512_1_sum" 694 | type: "Eltwise" 695 | bottom: "layer_512_1_conv2_h" 696 | bottom: "layer_512_1_conv_expand_h" 697 | top: "layer_512_1_sum" 698 | } 699 | layer { 700 | name: "last_bn_h" 701 | type: "BatchNorm" 702 | bottom: "layer_512_1_sum" 703 | top: "layer_512_1_sum" 704 | param { 705 | lr_mult: 0.0 706 | } 707 | param { 708 | lr_mult: 0.0 709 | } 710 | param { 711 | lr_mult: 0.0 712 | } 713 | } 714 | layer { 715 | name: "last_scale_h" 716 | type: "Scale" 717 | bottom: "layer_512_1_sum" 718 | top: "layer_512_1_sum" 719 | param { 720 | lr_mult: 1.0 721 | decay_mult: 1.0 722 | } 723 | param { 724 | lr_mult: 2.0 725 | decay_mult: 1.0 726 | } 727 | scale_param { 728 | bias_term: true 729 | } 730 | } 731 | layer { 732 | name: "last_relu" 733 | type: "ReLU" 734 | bottom: "layer_512_1_sum" 735 | top: "fc7" 736 | } 737 | 738 | layer { 739 | name: "conv6_1_h" 740 | type: "Convolution" 741 | bottom: "fc7" 742 | top: "conv6_1_h" 743 | param { 744 | lr_mult: 1 745 | decay_mult: 1 746 | } 747 | param { 748 | lr_mult: 2 749 | decay_mult: 0 750 | } 751 | convolution_param { 752 | num_output: 128 753 | pad: 0 754 | kernel_size: 1 755 | stride: 1 756 | weight_filler { 757 | type: "xavier" 758 | } 759 | bias_filler { 760 | type: "constant" 761 | value: 0 762 | } 763 | } 764 | } 765 | layer { 766 | name: "conv6_1_relu" 767 | type: "ReLU" 768 | bottom: "conv6_1_h" 769 | top: "conv6_1_h" 770 | } 771 | layer { 772 | name: "conv6_2_h" 773 | type: "Convolution" 774 | bottom: "conv6_1_h" 775 | top: "conv6_2_h" 776 | param { 777 | lr_mult: 1 778 | decay_mult: 1 779 | } 780 | param { 781 | lr_mult: 2 782 | decay_mult: 0 783 | } 784 | convolution_param { 785 | num_output: 256 786 | pad: 1 787 | kernel_size: 3 788 | stride: 2 789 | weight_filler { 790 | type: "xavier" 791 | } 792 | bias_filler { 793 | type: "constant" 794 | value: 0 795 | } 796 | } 797 | } 798 | layer { 799 | name: "conv6_2_relu" 800 | type: "ReLU" 801 | bottom: "conv6_2_h" 802 | top: "conv6_2_h" 803 | } 804 | layer { 805 | name: "conv7_1_h" 806 | type: "Convolution" 807 | bottom: "conv6_2_h" 808 | top: "conv7_1_h" 809 | param { 810 | lr_mult: 1 811 | decay_mult: 1 812 | } 813 | param { 814 | lr_mult: 2 815 | decay_mult: 0 816 | } 817 | convolution_param { 818 | num_output: 64 819 | pad: 0 820 | kernel_size: 1 821 | stride: 1 822 | weight_filler { 823 | type: "xavier" 824 | } 825 | bias_filler { 826 | type: "constant" 827 | value: 0 828 | } 829 | } 830 | } 831 | layer { 832 | name: "conv7_1_relu" 833 | type: "ReLU" 834 | bottom: "conv7_1_h" 835 | top: "conv7_1_h" 836 | } 837 | layer { 838 | name: "conv7_2_h" 839 | type: "Convolution" 840 | bottom: "conv7_1_h" 841 | top: "conv7_2_h" 842 | param { 843 | lr_mult: 1 844 | decay_mult: 1 845 | } 846 | param { 847 | lr_mult: 2 848 | decay_mult: 0 849 | } 850 | convolution_param { 851 | num_output: 128 852 | pad: 1 853 | kernel_size: 3 854 | stride: 2 855 | weight_filler { 856 | type: "xavier" 857 | } 858 | bias_filler { 859 | type: "constant" 860 | value: 0 861 | } 862 | } 863 | } 864 | layer { 865 | name: "conv7_2_relu" 866 | type: "ReLU" 867 | bottom: "conv7_2_h" 868 | top: "conv7_2_h" 869 | } 870 | layer { 871 | name: "conv8_1_h" 872 | type: "Convolution" 873 | bottom: "conv7_2_h" 874 | top: "conv8_1_h" 875 | param { 876 | lr_mult: 1 877 | decay_mult: 1 878 | } 879 | param { 880 | lr_mult: 2 881 | decay_mult: 0 882 | } 883 | convolution_param { 884 | num_output: 64 885 | pad: 0 886 | kernel_size: 1 887 | stride: 1 888 | weight_filler { 889 | type: "xavier" 890 | } 891 | bias_filler { 892 | type: "constant" 893 | value: 0 894 | } 895 | } 896 | } 897 | layer { 898 | name: "conv8_1_relu" 899 | type: "ReLU" 900 | bottom: "conv8_1_h" 901 | top: "conv8_1_h" 902 | } 903 | layer { 904 | name: "conv8_2_h" 905 | type: "Convolution" 906 | bottom: "conv8_1_h" 907 | top: "conv8_2_h" 908 | param { 909 | lr_mult: 1 910 | decay_mult: 1 911 | } 912 | param { 913 | lr_mult: 2 914 | decay_mult: 0 915 | } 916 | convolution_param { 917 | num_output: 128 918 | pad: 1 919 | kernel_size: 3 920 | stride: 1 921 | weight_filler { 922 | type: "xavier" 923 | } 924 | bias_filler { 925 | type: "constant" 926 | value: 0 927 | } 928 | } 929 | } 930 | layer { 931 | name: "conv8_2_relu" 932 | type: "ReLU" 933 | bottom: "conv8_2_h" 934 | top: "conv8_2_h" 935 | } 936 | layer { 937 | name: "conv9_1_h" 938 | type: "Convolution" 939 | bottom: "conv8_2_h" 940 | top: "conv9_1_h" 941 | param { 942 | lr_mult: 1 943 | decay_mult: 1 944 | } 945 | param { 946 | lr_mult: 2 947 | decay_mult: 0 948 | } 949 | convolution_param { 950 | num_output: 64 951 | pad: 0 952 | kernel_size: 1 953 | stride: 1 954 | weight_filler { 955 | type: "xavier" 956 | } 957 | bias_filler { 958 | type: "constant" 959 | value: 0 960 | } 961 | } 962 | } 963 | layer { 964 | name: "conv9_1_relu" 965 | type: "ReLU" 966 | bottom: "conv9_1_h" 967 | top: "conv9_1_h" 968 | } 969 | layer { 970 | name: "conv9_2_h" 971 | type: "Convolution" 972 | bottom: "conv9_1_h" 973 | top: "conv9_2_h" 974 | param { 975 | lr_mult: 1 976 | decay_mult: 1 977 | } 978 | param { 979 | lr_mult: 2 980 | decay_mult: 0 981 | } 982 | convolution_param { 983 | num_output: 128 984 | pad: 1 985 | kernel_size: 3 986 | stride: 1 987 | weight_filler { 988 | type: "xavier" 989 | } 990 | bias_filler { 991 | type: "constant" 992 | value: 0 993 | } 994 | } 995 | } 996 | layer { 997 | name: "conv9_2_relu" 998 | type: "ReLU" 999 | bottom: "conv9_2_h" 1000 | top: "conv9_2_h" 1001 | } 1002 | layer { 1003 | name: "conv4_3_norm" 1004 | type: "Normalize" 1005 | bottom: "layer_256_1_bn1" 1006 | top: "conv4_3_norm" 1007 | norm_param { 1008 | across_spatial: false 1009 | scale_filler { 1010 | type: "constant" 1011 | value: 20 1012 | } 1013 | channel_shared: false 1014 | } 1015 | } 1016 | layer { 1017 | name: "conv4_3_norm_mbox_loc" 1018 | type: "Convolution" 1019 | bottom: "conv4_3_norm" 1020 | top: "conv4_3_norm_mbox_loc" 1021 | param { 1022 | lr_mult: 1 1023 | decay_mult: 1 1024 | } 1025 | param { 1026 | lr_mult: 2 1027 | decay_mult: 0 1028 | } 1029 | convolution_param { 1030 | num_output: 16 1031 | pad: 1 1032 | kernel_size: 3 1033 | stride: 1 1034 | weight_filler { 1035 | type: "xavier" 1036 | } 1037 | bias_filler { 1038 | type: "constant" 1039 | value: 0 1040 | } 1041 | } 1042 | } 1043 | layer { 1044 | name: "conv4_3_norm_mbox_loc_perm" 1045 | type: "Permute" 1046 | bottom: "conv4_3_norm_mbox_loc" 1047 | top: "conv4_3_norm_mbox_loc_perm" 1048 | permute_param { 1049 | order: 0 1050 | order: 2 1051 | order: 3 1052 | order: 1 1053 | } 1054 | } 1055 | layer { 1056 | name: "conv4_3_norm_mbox_loc_flat" 1057 | type: "Flatten" 1058 | bottom: "conv4_3_norm_mbox_loc_perm" 1059 | top: "conv4_3_norm_mbox_loc_flat" 1060 | flatten_param { 1061 | axis: 1 1062 | } 1063 | } 1064 | layer { 1065 | name: "conv4_3_norm_mbox_conf" 1066 | type: "Convolution" 1067 | bottom: "conv4_3_norm" 1068 | top: "conv4_3_norm_mbox_conf" 1069 | param { 1070 | lr_mult: 1 1071 | decay_mult: 1 1072 | } 1073 | param { 1074 | lr_mult: 2 1075 | decay_mult: 0 1076 | } 1077 | convolution_param { 1078 | num_output: 8 # 84 1079 | pad: 1 1080 | kernel_size: 3 1081 | stride: 1 1082 | weight_filler { 1083 | type: "xavier" 1084 | } 1085 | bias_filler { 1086 | type: "constant" 1087 | value: 0 1088 | } 1089 | } 1090 | } 1091 | layer { 1092 | name: "conv4_3_norm_mbox_conf_perm" 1093 | type: "Permute" 1094 | bottom: "conv4_3_norm_mbox_conf" 1095 | top: "conv4_3_norm_mbox_conf_perm" 1096 | permute_param { 1097 | order: 0 1098 | order: 2 1099 | order: 3 1100 | order: 1 1101 | } 1102 | } 1103 | layer { 1104 | name: "conv4_3_norm_mbox_conf_flat" 1105 | type: "Flatten" 1106 | bottom: "conv4_3_norm_mbox_conf_perm" 1107 | top: "conv4_3_norm_mbox_conf_flat" 1108 | flatten_param { 1109 | axis: 1 1110 | } 1111 | } 1112 | layer { 1113 | name: "conv4_3_norm_mbox_priorbox" 1114 | type: "PriorBox" 1115 | bottom: "conv4_3_norm" 1116 | bottom: "data" 1117 | top: "conv4_3_norm_mbox_priorbox" 1118 | prior_box_param { 1119 | min_size: 30.0 1120 | max_size: 60.0 1121 | aspect_ratio: 2 1122 | flip: true 1123 | clip: false 1124 | variance: 0.1 1125 | variance: 0.1 1126 | variance: 0.2 1127 | variance: 0.2 1128 | step: 8 1129 | offset: 0.5 1130 | } 1131 | } 1132 | layer { 1133 | name: "fc7_mbox_loc" 1134 | type: "Convolution" 1135 | bottom: "fc7" 1136 | top: "fc7_mbox_loc" 1137 | param { 1138 | lr_mult: 1 1139 | decay_mult: 1 1140 | } 1141 | param { 1142 | lr_mult: 2 1143 | decay_mult: 0 1144 | } 1145 | convolution_param { 1146 | num_output: 24 1147 | pad: 1 1148 | kernel_size: 3 1149 | stride: 1 1150 | weight_filler { 1151 | type: "xavier" 1152 | } 1153 | bias_filler { 1154 | type: "constant" 1155 | value: 0 1156 | } 1157 | } 1158 | } 1159 | layer { 1160 | name: "fc7_mbox_loc_perm" 1161 | type: "Permute" 1162 | bottom: "fc7_mbox_loc" 1163 | top: "fc7_mbox_loc_perm" 1164 | permute_param { 1165 | order: 0 1166 | order: 2 1167 | order: 3 1168 | order: 1 1169 | } 1170 | } 1171 | layer { 1172 | name: "fc7_mbox_loc_flat" 1173 | type: "Flatten" 1174 | bottom: "fc7_mbox_loc_perm" 1175 | top: "fc7_mbox_loc_flat" 1176 | flatten_param { 1177 | axis: 1 1178 | } 1179 | } 1180 | layer { 1181 | name: "fc7_mbox_conf" 1182 | type: "Convolution" 1183 | bottom: "fc7" 1184 | top: "fc7_mbox_conf" 1185 | param { 1186 | lr_mult: 1 1187 | decay_mult: 1 1188 | } 1189 | param { 1190 | lr_mult: 2 1191 | decay_mult: 0 1192 | } 1193 | convolution_param { 1194 | num_output: 12 # 126 1195 | pad: 1 1196 | kernel_size: 3 1197 | stride: 1 1198 | weight_filler { 1199 | type: "xavier" 1200 | } 1201 | bias_filler { 1202 | type: "constant" 1203 | value: 0 1204 | } 1205 | } 1206 | } 1207 | layer { 1208 | name: "fc7_mbox_conf_perm" 1209 | type: "Permute" 1210 | bottom: "fc7_mbox_conf" 1211 | top: "fc7_mbox_conf_perm" 1212 | permute_param { 1213 | order: 0 1214 | order: 2 1215 | order: 3 1216 | order: 1 1217 | } 1218 | } 1219 | layer { 1220 | name: "fc7_mbox_conf_flat" 1221 | type: "Flatten" 1222 | bottom: "fc7_mbox_conf_perm" 1223 | top: "fc7_mbox_conf_flat" 1224 | flatten_param { 1225 | axis: 1 1226 | } 1227 | } 1228 | layer { 1229 | name: "fc7_mbox_priorbox" 1230 | type: "PriorBox" 1231 | bottom: "fc7" 1232 | bottom: "data" 1233 | top: "fc7_mbox_priorbox" 1234 | prior_box_param { 1235 | min_size: 60.0 1236 | max_size: 111.0 1237 | aspect_ratio: 2 1238 | aspect_ratio: 3 1239 | flip: true 1240 | clip: false 1241 | variance: 0.1 1242 | variance: 0.1 1243 | variance: 0.2 1244 | variance: 0.2 1245 | step: 16 1246 | offset: 0.5 1247 | } 1248 | } 1249 | layer { 1250 | name: "conv6_2_mbox_loc" 1251 | type: "Convolution" 1252 | bottom: "conv6_2_h" 1253 | top: "conv6_2_mbox_loc" 1254 | param { 1255 | lr_mult: 1 1256 | decay_mult: 1 1257 | } 1258 | param { 1259 | lr_mult: 2 1260 | decay_mult: 0 1261 | } 1262 | convolution_param { 1263 | num_output: 24 1264 | pad: 1 1265 | kernel_size: 3 1266 | stride: 1 1267 | weight_filler { 1268 | type: "xavier" 1269 | } 1270 | bias_filler { 1271 | type: "constant" 1272 | value: 0 1273 | } 1274 | } 1275 | } 1276 | layer { 1277 | name: "conv6_2_mbox_loc_perm" 1278 | type: "Permute" 1279 | bottom: "conv6_2_mbox_loc" 1280 | top: "conv6_2_mbox_loc_perm" 1281 | permute_param { 1282 | order: 0 1283 | order: 2 1284 | order: 3 1285 | order: 1 1286 | } 1287 | } 1288 | layer { 1289 | name: "conv6_2_mbox_loc_flat" 1290 | type: "Flatten" 1291 | bottom: "conv6_2_mbox_loc_perm" 1292 | top: "conv6_2_mbox_loc_flat" 1293 | flatten_param { 1294 | axis: 1 1295 | } 1296 | } 1297 | layer { 1298 | name: "conv6_2_mbox_conf" 1299 | type: "Convolution" 1300 | bottom: "conv6_2_h" 1301 | top: "conv6_2_mbox_conf" 1302 | param { 1303 | lr_mult: 1 1304 | decay_mult: 1 1305 | } 1306 | param { 1307 | lr_mult: 2 1308 | decay_mult: 0 1309 | } 1310 | convolution_param { 1311 | num_output: 12 # 126 1312 | pad: 1 1313 | kernel_size: 3 1314 | stride: 1 1315 | weight_filler { 1316 | type: "xavier" 1317 | } 1318 | bias_filler { 1319 | type: "constant" 1320 | value: 0 1321 | } 1322 | } 1323 | } 1324 | layer { 1325 | name: "conv6_2_mbox_conf_perm" 1326 | type: "Permute" 1327 | bottom: "conv6_2_mbox_conf" 1328 | top: "conv6_2_mbox_conf_perm" 1329 | permute_param { 1330 | order: 0 1331 | order: 2 1332 | order: 3 1333 | order: 1 1334 | } 1335 | } 1336 | layer { 1337 | name: "conv6_2_mbox_conf_flat" 1338 | type: "Flatten" 1339 | bottom: "conv6_2_mbox_conf_perm" 1340 | top: "conv6_2_mbox_conf_flat" 1341 | flatten_param { 1342 | axis: 1 1343 | } 1344 | } 1345 | layer { 1346 | name: "conv6_2_mbox_priorbox" 1347 | type: "PriorBox" 1348 | bottom: "conv6_2_h" 1349 | bottom: "data" 1350 | top: "conv6_2_mbox_priorbox" 1351 | prior_box_param { 1352 | min_size: 111.0 1353 | max_size: 162.0 1354 | aspect_ratio: 2 1355 | aspect_ratio: 3 1356 | flip: true 1357 | clip: false 1358 | variance: 0.1 1359 | variance: 0.1 1360 | variance: 0.2 1361 | variance: 0.2 1362 | step: 32 1363 | offset: 0.5 1364 | } 1365 | } 1366 | layer { 1367 | name: "conv7_2_mbox_loc" 1368 | type: "Convolution" 1369 | bottom: "conv7_2_h" 1370 | top: "conv7_2_mbox_loc" 1371 | param { 1372 | lr_mult: 1 1373 | decay_mult: 1 1374 | } 1375 | param { 1376 | lr_mult: 2 1377 | decay_mult: 0 1378 | } 1379 | convolution_param { 1380 | num_output: 24 1381 | pad: 1 1382 | kernel_size: 3 1383 | stride: 1 1384 | weight_filler { 1385 | type: "xavier" 1386 | } 1387 | bias_filler { 1388 | type: "constant" 1389 | value: 0 1390 | } 1391 | } 1392 | } 1393 | layer { 1394 | name: "conv7_2_mbox_loc_perm" 1395 | type: "Permute" 1396 | bottom: "conv7_2_mbox_loc" 1397 | top: "conv7_2_mbox_loc_perm" 1398 | permute_param { 1399 | order: 0 1400 | order: 2 1401 | order: 3 1402 | order: 1 1403 | } 1404 | } 1405 | layer { 1406 | name: "conv7_2_mbox_loc_flat" 1407 | type: "Flatten" 1408 | bottom: "conv7_2_mbox_loc_perm" 1409 | top: "conv7_2_mbox_loc_flat" 1410 | flatten_param { 1411 | axis: 1 1412 | } 1413 | } 1414 | layer { 1415 | name: "conv7_2_mbox_conf" 1416 | type: "Convolution" 1417 | bottom: "conv7_2_h" 1418 | top: "conv7_2_mbox_conf" 1419 | param { 1420 | lr_mult: 1 1421 | decay_mult: 1 1422 | } 1423 | param { 1424 | lr_mult: 2 1425 | decay_mult: 0 1426 | } 1427 | convolution_param { 1428 | num_output: 12 # 126 1429 | pad: 1 1430 | kernel_size: 3 1431 | stride: 1 1432 | weight_filler { 1433 | type: "xavier" 1434 | } 1435 | bias_filler { 1436 | type: "constant" 1437 | value: 0 1438 | } 1439 | } 1440 | } 1441 | layer { 1442 | name: "conv7_2_mbox_conf_perm" 1443 | type: "Permute" 1444 | bottom: "conv7_2_mbox_conf" 1445 | top: "conv7_2_mbox_conf_perm" 1446 | permute_param { 1447 | order: 0 1448 | order: 2 1449 | order: 3 1450 | order: 1 1451 | } 1452 | } 1453 | layer { 1454 | name: "conv7_2_mbox_conf_flat" 1455 | type: "Flatten" 1456 | bottom: "conv7_2_mbox_conf_perm" 1457 | top: "conv7_2_mbox_conf_flat" 1458 | flatten_param { 1459 | axis: 1 1460 | } 1461 | } 1462 | layer { 1463 | name: "conv7_2_mbox_priorbox" 1464 | type: "PriorBox" 1465 | bottom: "conv7_2_h" 1466 | bottom: "data" 1467 | top: "conv7_2_mbox_priorbox" 1468 | prior_box_param { 1469 | min_size: 162.0 1470 | max_size: 213.0 1471 | aspect_ratio: 2 1472 | aspect_ratio: 3 1473 | flip: true 1474 | clip: false 1475 | variance: 0.1 1476 | variance: 0.1 1477 | variance: 0.2 1478 | variance: 0.2 1479 | step: 64 1480 | offset: 0.5 1481 | } 1482 | } 1483 | layer { 1484 | name: "conv8_2_mbox_loc" 1485 | type: "Convolution" 1486 | bottom: "conv8_2_h" 1487 | top: "conv8_2_mbox_loc" 1488 | param { 1489 | lr_mult: 1 1490 | decay_mult: 1 1491 | } 1492 | param { 1493 | lr_mult: 2 1494 | decay_mult: 0 1495 | } 1496 | convolution_param { 1497 | num_output: 16 1498 | pad: 1 1499 | kernel_size: 3 1500 | stride: 1 1501 | weight_filler { 1502 | type: "xavier" 1503 | } 1504 | bias_filler { 1505 | type: "constant" 1506 | value: 0 1507 | } 1508 | } 1509 | } 1510 | layer { 1511 | name: "conv8_2_mbox_loc_perm" 1512 | type: "Permute" 1513 | bottom: "conv8_2_mbox_loc" 1514 | top: "conv8_2_mbox_loc_perm" 1515 | permute_param { 1516 | order: 0 1517 | order: 2 1518 | order: 3 1519 | order: 1 1520 | } 1521 | } 1522 | layer { 1523 | name: "conv8_2_mbox_loc_flat" 1524 | type: "Flatten" 1525 | bottom: "conv8_2_mbox_loc_perm" 1526 | top: "conv8_2_mbox_loc_flat" 1527 | flatten_param { 1528 | axis: 1 1529 | } 1530 | } 1531 | layer { 1532 | name: "conv8_2_mbox_conf" 1533 | type: "Convolution" 1534 | bottom: "conv8_2_h" 1535 | top: "conv8_2_mbox_conf" 1536 | param { 1537 | lr_mult: 1 1538 | decay_mult: 1 1539 | } 1540 | param { 1541 | lr_mult: 2 1542 | decay_mult: 0 1543 | } 1544 | convolution_param { 1545 | num_output: 8 # 84 1546 | pad: 1 1547 | kernel_size: 3 1548 | stride: 1 1549 | weight_filler { 1550 | type: "xavier" 1551 | } 1552 | bias_filler { 1553 | type: "constant" 1554 | value: 0 1555 | } 1556 | } 1557 | } 1558 | layer { 1559 | name: "conv8_2_mbox_conf_perm" 1560 | type: "Permute" 1561 | bottom: "conv8_2_mbox_conf" 1562 | top: "conv8_2_mbox_conf_perm" 1563 | permute_param { 1564 | order: 0 1565 | order: 2 1566 | order: 3 1567 | order: 1 1568 | } 1569 | } 1570 | layer { 1571 | name: "conv8_2_mbox_conf_flat" 1572 | type: "Flatten" 1573 | bottom: "conv8_2_mbox_conf_perm" 1574 | top: "conv8_2_mbox_conf_flat" 1575 | flatten_param { 1576 | axis: 1 1577 | } 1578 | } 1579 | layer { 1580 | name: "conv8_2_mbox_priorbox" 1581 | type: "PriorBox" 1582 | bottom: "conv8_2_h" 1583 | bottom: "data" 1584 | top: "conv8_2_mbox_priorbox" 1585 | prior_box_param { 1586 | min_size: 213.0 1587 | max_size: 264.0 1588 | aspect_ratio: 2 1589 | flip: true 1590 | clip: false 1591 | variance: 0.1 1592 | variance: 0.1 1593 | variance: 0.2 1594 | variance: 0.2 1595 | step: 100 1596 | offset: 0.5 1597 | } 1598 | } 1599 | layer { 1600 | name: "conv9_2_mbox_loc" 1601 | type: "Convolution" 1602 | bottom: "conv9_2_h" 1603 | top: "conv9_2_mbox_loc" 1604 | param { 1605 | lr_mult: 1 1606 | decay_mult: 1 1607 | } 1608 | param { 1609 | lr_mult: 2 1610 | decay_mult: 0 1611 | } 1612 | convolution_param { 1613 | num_output: 16 1614 | pad: 1 1615 | kernel_size: 3 1616 | stride: 1 1617 | weight_filler { 1618 | type: "xavier" 1619 | } 1620 | bias_filler { 1621 | type: "constant" 1622 | value: 0 1623 | } 1624 | } 1625 | } 1626 | layer { 1627 | name: "conv9_2_mbox_loc_perm" 1628 | type: "Permute" 1629 | bottom: "conv9_2_mbox_loc" 1630 | top: "conv9_2_mbox_loc_perm" 1631 | permute_param { 1632 | order: 0 1633 | order: 2 1634 | order: 3 1635 | order: 1 1636 | } 1637 | } 1638 | layer { 1639 | name: "conv9_2_mbox_loc_flat" 1640 | type: "Flatten" 1641 | bottom: "conv9_2_mbox_loc_perm" 1642 | top: "conv9_2_mbox_loc_flat" 1643 | flatten_param { 1644 | axis: 1 1645 | } 1646 | } 1647 | layer { 1648 | name: "conv9_2_mbox_conf" 1649 | type: "Convolution" 1650 | bottom: "conv9_2_h" 1651 | top: "conv9_2_mbox_conf" 1652 | param { 1653 | lr_mult: 1 1654 | decay_mult: 1 1655 | } 1656 | param { 1657 | lr_mult: 2 1658 | decay_mult: 0 1659 | } 1660 | convolution_param { 1661 | num_output: 8 # 84 1662 | pad: 1 1663 | kernel_size: 3 1664 | stride: 1 1665 | weight_filler { 1666 | type: "xavier" 1667 | } 1668 | bias_filler { 1669 | type: "constant" 1670 | value: 0 1671 | } 1672 | } 1673 | } 1674 | layer { 1675 | name: "conv9_2_mbox_conf_perm" 1676 | type: "Permute" 1677 | bottom: "conv9_2_mbox_conf" 1678 | top: "conv9_2_mbox_conf_perm" 1679 | permute_param { 1680 | order: 0 1681 | order: 2 1682 | order: 3 1683 | order: 1 1684 | } 1685 | } 1686 | layer { 1687 | name: "conv9_2_mbox_conf_flat" 1688 | type: "Flatten" 1689 | bottom: "conv9_2_mbox_conf_perm" 1690 | top: "conv9_2_mbox_conf_flat" 1691 | flatten_param { 1692 | axis: 1 1693 | } 1694 | } 1695 | layer { 1696 | name: "conv9_2_mbox_priorbox" 1697 | type: "PriorBox" 1698 | bottom: "conv9_2_h" 1699 | bottom: "data" 1700 | top: "conv9_2_mbox_priorbox" 1701 | prior_box_param { 1702 | min_size: 264.0 1703 | max_size: 315.0 1704 | aspect_ratio: 2 1705 | flip: true 1706 | clip: false 1707 | variance: 0.1 1708 | variance: 0.1 1709 | variance: 0.2 1710 | variance: 0.2 1711 | step: 300 1712 | offset: 0.5 1713 | } 1714 | } 1715 | layer { 1716 | name: "mbox_loc" 1717 | type: "Concat" 1718 | bottom: "conv4_3_norm_mbox_loc_flat" 1719 | bottom: "fc7_mbox_loc_flat" 1720 | bottom: "conv6_2_mbox_loc_flat" 1721 | bottom: "conv7_2_mbox_loc_flat" 1722 | bottom: "conv8_2_mbox_loc_flat" 1723 | bottom: "conv9_2_mbox_loc_flat" 1724 | top: "mbox_loc" 1725 | concat_param { 1726 | axis: 1 1727 | } 1728 | } 1729 | layer { 1730 | name: "mbox_conf" 1731 | type: "Concat" 1732 | bottom: "conv4_3_norm_mbox_conf_flat" 1733 | bottom: "fc7_mbox_conf_flat" 1734 | bottom: "conv6_2_mbox_conf_flat" 1735 | bottom: "conv7_2_mbox_conf_flat" 1736 | bottom: "conv8_2_mbox_conf_flat" 1737 | bottom: "conv9_2_mbox_conf_flat" 1738 | top: "mbox_conf" 1739 | concat_param { 1740 | axis: 1 1741 | } 1742 | } 1743 | layer { 1744 | name: "mbox_priorbox" 1745 | type: "Concat" 1746 | bottom: "conv4_3_norm_mbox_priorbox" 1747 | bottom: "fc7_mbox_priorbox" 1748 | bottom: "conv6_2_mbox_priorbox" 1749 | bottom: "conv7_2_mbox_priorbox" 1750 | bottom: "conv8_2_mbox_priorbox" 1751 | bottom: "conv9_2_mbox_priorbox" 1752 | top: "mbox_priorbox" 1753 | concat_param { 1754 | axis: 2 1755 | } 1756 | } 1757 | 1758 | layer { 1759 | name: "mbox_conf_reshape" 1760 | type: "Reshape" 1761 | bottom: "mbox_conf" 1762 | top: "mbox_conf_reshape" 1763 | reshape_param { 1764 | shape { 1765 | dim: 0 1766 | dim: -1 1767 | dim: 2 1768 | } 1769 | } 1770 | } 1771 | layer { 1772 | name: "mbox_conf_softmax" 1773 | type: "Softmax" 1774 | bottom: "mbox_conf_reshape" 1775 | top: "mbox_conf_softmax" 1776 | softmax_param { 1777 | axis: 2 1778 | } 1779 | } 1780 | layer { 1781 | name: "mbox_conf_flatten" 1782 | type: "Flatten" 1783 | bottom: "mbox_conf_softmax" 1784 | top: "mbox_conf_flatten" 1785 | flatten_param { 1786 | axis: 1 1787 | } 1788 | } 1789 | 1790 | layer { 1791 | name: "detection_out" 1792 | type: "DetectionOutput" 1793 | bottom: "mbox_loc" 1794 | bottom: "mbox_conf_flatten" 1795 | bottom: "mbox_priorbox" 1796 | top: "detection_out" 1797 | include { 1798 | phase: TEST 1799 | } 1800 | detection_output_param { 1801 | num_classes: 2 1802 | share_location: true 1803 | background_label_id: 0 1804 | nms_param { 1805 | nms_threshold: 0.45 1806 | top_k: 400 1807 | } 1808 | code_type: CENTER_SIZE 1809 | keep_top_k: 200 1810 | confidence_threshold: 0.01 1811 | } 1812 | } 1813 | layer { 1814 | name: "detection_eval" 1815 | type: "DetectionEvaluate" 1816 | bottom: "detection_out" 1817 | bottom: "label" 1818 | top: "detection_eval" 1819 | include { 1820 | phase: TEST 1821 | } 1822 | detection_evaluate_param { 1823 | num_classes: 2 1824 | background_label_id: 0 1825 | overlap_threshold: 0.5 1826 | evaluate_difficult_gt: false 1827 | } 1828 | } 1829 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iris_detector 2 | 3 | ![Alt text](./results/1.png) 4 | 5 | ## How to train iris detector 6 | 7 | See [how_to_train_iris_detector_with_caffe_ssd.md](./how_to_train_iris_detector_with_caffe_ssd.md) 8 | 9 | ## Network design 10 | 11 | ### ResNet10-SSD 12 | Using 4 residual modules as base network, then add SSD's extra layers. 13 | 14 | **Visualization of network structure (tools from ethereon)** 15 | http://ethereon.github.io/netscope/#/gist/bc73857987941a56bc45bf4c4ae870b0 16 | 17 | ### ResNet10-SSD with half filter number 18 | The structure is same as ResNet10-SSD except its filter number. When filter number is larger than 32, reduce it by half. 19 | 20 | **Visualization of network structure (tools from ethereon)** 21 | http://ethereon.github.io/netscope/#/gist/cf4dccec1f9a6c8f3f125000cd7b97f9 22 | 23 | 24 | ### MobileNet-SSD 25 | 26 | See https://github.com/chuanqi305/MobileNet-SSD 27 | 28 | **Visualization of network structure (tools from ethereon)** 29 | http://ethereon.github.io/netscope/#/gist/e1e8c3c3a450f0502ef8ff6547d5dedb 30 | 31 | ## Experiment 32 | 33 | Our iris dataset has 12800 training samples and 3200 test samples. Training on GTX1080Ti. Evaluate on Intel i5 CPU and GTX1080Ti GPU. 34 | 35 | **Speed test** 36 | ResNet10+SSD(half) is faster than others. 37 | 38 | | Network | mAP@0.5 | Speed on Intel i5 CPU(ms) | Speed on GTX1080Ti(ms) | Input resolution | 39 | | :--------: | :--------:| :------: |:--------: | :--------: | 40 | | ResNet10+SSD | 1.0 | 20 | 13 | 640x480 | 41 | | ResNet10+SSD(half) | 1.0 | **10** | **7** | 640x480 | 42 | | MobileNet+SSD | - | 27 | 18 | 640x480 | 43 | 44 | ### ResNet10-SSD 45 | 46 | #### Training 47 | When set confidence threshold to 0.5 and set IoU threshold to 0.5, the accuracy is 100%. 48 | 49 | ![Alt text](./results/2.png) 50 | 51 | 52 | ![Alt text](./results/3.png) 53 | 54 | 55 | #### Evaluation 56 | On Intel i5 CPU, the average inference time is **20ms**. On GTX1080Ti GPU, the average inference time is **13ms**. 57 | 58 | **Intel i5 CPU** 59 | 60 | - evaluate by opencv3.4 Python API: 61 | ![Alt text](./results/4.png) 62 | 63 | - evaluate by opencv3.4 C++ API: 64 | ![Alt text](./results/5.png) 65 | 66 | **GTX1080Ti GPU** 67 | 68 | ![Alt text](./results/6.png) 69 | 70 | ### ResNet10-SSD with half filter number 71 | 72 | #### Training 73 | 74 | When set confidence threshold to 0.5 and set IoU threshold to 0.5, the accuracy is 100%. 75 | 76 | ![Alt text](./results/7.png) 77 | 78 | ![Alt text](./results/8.png) 79 | 80 | #### Evaluation 81 | On Intel i5 CPU, the average inference time is **10ms**. On GTX1080Ti GPU, the average inference time is **7ms**. 82 | 83 | **Intel i5 CPU** 84 | 85 | evaluate by opencv3.4 Python API: 86 | ![Alt text](./results/9.png) 87 | 88 | evaluate by opencv3.4 C++ API: 89 | ![Alt text](./results/10.png) 90 | 91 | **GTX1080Ti GPU** 92 | 93 | ![Alt text](./results/11.png) 94 | 95 | ### MobileNet-SSD 96 | 97 | #### Evaluation 98 | We just evaluate mobilenet_300x300_ssd_iter_3000.caffemodel. On Intel i5 CPU, the average inference time is **27ms**. On GTX1080Ti GPU, the average inference time is **18ms**. The speed is slower than ResNet10-SSD which has high accurracy on iris dataset, so we stop training. 99 | 100 | ![Alt text](./results/12.png) 101 | 102 | ## Questions 103 | Please contact timlee.zh@gmail.com 104 | 105 | -------------------------------------------------------------------------------- /how_to_train_iris_detector_with_caffe_ssd.md: -------------------------------------------------------------------------------- 1 | # how to train iris detector 2 | 3 | ## Installation 4 | 5 | ### Get the code 6 | git clone https://github.com/weiliu89/caffe.git 7 | cd caffe 8 | git checkout iris_ssd 9 | 10 | ### Build the code by CMake 11 | You'd better build caffe with python2. 12 | 13 | If you want building caffe with python3, modify CMakeLists.txt as following: 14 | ``` 15 | -set(python_version "2" CACHE STRING "Specify which Python version to use") 16 | +set(python_version "3" CACHE STRING "Specify which Python version to use") 17 | 18 | - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall") 19 | + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall -std=c++11") 20 | ``` 21 | 22 | **Compile** 23 | 24 | ``` 25 | $ cd $CAFFE_ROOT 26 | $ mkdir build 27 | $ cd build 28 | $ cmake .. 29 | $ make -j8; make install 30 | ``` 31 | 32 | ## Dataset Preparation 33 | 34 | Because our annotations is csv format, so we have to convert it to pascal-voc format. 35 | 36 | ### Convert CSV format annotation to pascal-voc format 37 | 38 | #### CSV format 39 | ``` 40 | filename,left,top,right,bottom 41 | filename1,left1,top1,right1,bottom1 42 | filename2,left2,top2,right2,bottom2 43 | filename3,left3,top3,right3,bottom3 44 | ... 45 | ``` 46 | 47 | #### Pascal-voc format 48 | ``` 49 | 50 | 51 | 300 52 | 300 53 | 54 | 55 | face 56 | 0 57 | 58 | 100 59 | 100 60 | 200 61 | 200 62 | 63 | 64 | 65 | face 66 | 0 67 | 68 | 0 69 | 0 70 | 100 71 | 100 72 | 73 | 74 | 75 | ``` 76 | 77 | #### Format convertion 78 | **Install pascal_voc_writer** 79 | ``` 80 | sudo pip3 install pascal_voc_writer 81 | ``` 82 | 83 | **csv_to_pascal_voc.py** 84 | ``` 85 | import csv 86 | import os 87 | import pascal_voc_writer 88 | 89 | def csv_to_pascal_voc(csv_filename): 90 | with open(csv_filename, 'r') as f: 91 | reader = csv.reader(f) 92 | for item in reader: 93 | 94 | if reader.line_num == 1: 95 | continue 96 | print(item) 97 | 98 | # Writer(path, width, height) 99 | data_home = "/home/tim/datasets/iris_dataset/SingleEye_640x480_JPG/" 100 | abs_path = data_home + item[0] 101 | writer = pascal_voc_writer.Writer(path=abs_path, width=640, height=480, depth=1, database="iris dataset") 102 | # ::addObject(name, xmin, ymin, xmax, ymax) 103 | name = "iris" 104 | writer.addObject(name=name, xmin=item[1], ymin=item[2], xmax=item[3], ymax=item[4]) 105 | # ::save(path) 106 | pascal_voc_filename = '/home/tim/deep_learning/caffe/data/iris_dataset_devkit/single_eye_640x480/Annotations/' + item[0].split('/')[-1].split('.jpg')[0] + '.xml' 107 | writer.save(pascal_voc_filename) 108 | 109 | cmd = "cp {0} /home/tim/deep_learning/caffe/data/iris_dataset_devkit/single_eye_640x480/JPEGImages/".format(abs_path) 110 | os.system(cmd) 111 | 112 | if __name__ == '__main__': 113 | csv_filename = 'iris.bbox.2pts.csv' 114 | csv_to_pascal_voc(csv_filename) 115 | ``` 116 | 117 | After running csv_to_pascal_voc.py, images will be saved to JPEGImages directory, and XML files will be saved to Annotations directory. 118 | 119 | #### Creating trainval.txt and test.txt in ImageSets/Main directory 120 | trainval.txt contains training images' name which without suffix ".jpg". 121 | test.txt contains test images' name which without suffix ".jpg". 122 | 123 | You can use the following command to generate: 124 | 125 | ``` 126 | $ cd JPEGImages 127 | $ ls *.jpg > ../ImageSets/Main/total_image.txt 128 | # shuffle name list 129 | $ cat total_image.txt | perl -MList::Util=shuffle -e 'print shuffle();' > trainval.txt 130 | $ cp trainval.txt test.txt 131 | ``` 132 | 133 | At last, delete last 20% of name list in trainval.txt, and delete top 80% of name list in test.txt , so that the ratio of training samples and test samples is 8 : 2. 134 | 135 | ### Creating lmdb database 136 | **data director tree** 137 | 138 | ``` 139 | tim@tim-server:~/deep_learning/caffe$ tree data/iris_dataset 140 | data/iris_dataset 141 | ├── coco_voc_map.txt 142 | ├── create_data.sh 143 | ├── create_list.sh 144 | ├── labelmap_voc.prototxt 145 | ├── test_name_size.txt 146 | ├── test.txt 147 | └── trainval.txt 148 | 149 | tim@tim-server:~/deep_learning/caffe$ tree data/iris_dataset_devkit/ -L 2 150 | data/iris_dataset_devkit/ 151 | ├── iris_dataset 152 | │ └── lmdb 153 | ├── single_eye_640x480 154 | │ ├── Annotations 155 | │ ├── ImageSets 156 | │ └── JPEGImages 157 | └── single_eye_640x480.zip 158 | ``` 159 | 160 | **modify create_list.sh** 161 | ``` 162 | root_dir=/home/tim/deep_learning/caffe/data/iris_dataset_devkit 163 | for dataset in trainval test 164 | do 165 | ... 166 | for name in single_eye_640x480 167 | do 168 | ... 169 | done 170 | done 171 | ``` 172 | 173 | **modify create_data.sh** 174 | 175 | ``` 176 | root_dir="/home/tim/deep_learning/caffe" 177 | data_root_dir="/home/tim/deep_learning/caffe/data/iris_dataset_devkit" 178 | dataset_name="iris_dataset" 179 | ``` 180 | 181 | **modify labelmap_voc.prototxt"** 182 | 183 | ``` 184 | item { 185 | name: "none_of_the_above" 186 | label: 0 187 | display_name: "background" 188 | } 189 | item { 190 | name: "iris" 191 | label: 1 192 | display_name: "iris" 193 | } 194 | ``` 195 | 196 | Set "--gray = True" in caffe/scripts/create_annoset.py. Because gray scale can reduce reference time of iris detection. 197 | 198 | **Creating lmdb database** 199 | 200 | ``` 201 | $ ./data/iris_dataset/create_list.sh 202 | $ ./data/iris_dataset/create_data.sh 203 | ``` 204 | You will see lmdb database in /home/tim/deep_learning/caffe/data/iris_dataset_devkit/iris_dataset/. 205 | 206 | ## Model training 207 | 208 | **Model training command: ** 209 | ``` 210 | nohup ./build/tools/caffe train \ 211 | --solver="models/ResNet10/solver.prototxt" \ 212 | --gpu 0 2>&1 | tee /home/tim/deep_learning/caffe/models/ResNet10/log/ResNet10_iris_dataset_SSD_300x300.log & 213 | ``` 214 | -------------------------------------------------------------------------------- /images/S2353L09.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/images/S2353L09.jpg -------------------------------------------------------------------------------- /results/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/1.png -------------------------------------------------------------------------------- /results/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/10.png -------------------------------------------------------------------------------- /results/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/11.png -------------------------------------------------------------------------------- /results/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/12.png -------------------------------------------------------------------------------- /results/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/2.png -------------------------------------------------------------------------------- /results/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/3.png -------------------------------------------------------------------------------- /results/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/4.png -------------------------------------------------------------------------------- /results/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/5.png -------------------------------------------------------------------------------- /results/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/6.png -------------------------------------------------------------------------------- /results/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/7.png -------------------------------------------------------------------------------- /results/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/8.png -------------------------------------------------------------------------------- /results/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/9.png -------------------------------------------------------------------------------- /results/speed_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongqianli/iris_detector/7a0f0dab8e5e6920e7db81ffd29787174fea5a6c/results/speed_test.png -------------------------------------------------------------------------------- /如何使用Caffe-SSD框架训练虹膜检测模型.md: -------------------------------------------------------------------------------- 1 | @[toc] 2 | 3 | ## 安装Caffe-SSD 4 | 5 | ### 获取代码 6 | git clone https://github.com/weiliu89/caffe.git 7 | cd caffe 8 | git checkout ssd 9 | 10 | ### 用CMake编译 11 | 最好用python2编译pycaffe 12 | 13 | 如果要用python3,则需要修改CMakeLists.txt: 14 | ``` 15 | -set(python_version "2" CACHE STRING "Specify which Python version to use") 16 | +set(python_version "3" CACHE STRING "Specify which Python version to use") 17 | 18 | - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall") 19 | + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall -std=c++11") 20 | ``` 21 | 22 | **编译** 23 | 24 | ``` 25 | $ cd $CAFFE_ROOT 26 | $ mkdir build 27 | $ cd build 28 | $ cmake .. 29 | $ make -j8; make install 30 | ``` 31 | 32 | ## 数据准备 33 | 因为我们的标注文件是csv格式,需要转换为pascal-voc格式 34 | 35 | ### 转换CSV格式的标注文件为pascal-voc格式 36 | 37 | #### CSV格式 38 | ``` 39 | filename,left,top,right,bottom 40 | filename1,left1,top1,right1,bottom1 41 | filename2,left2,top2,right2,bottom2 42 | filename3,left3,top3,right3,bottom3 43 | ... 44 | ``` 45 | 46 | #### Pascal-voc格式 47 | ``` 48 | 49 | 50 | 300 51 | 300 52 | 53 | 54 | face 55 | 0 56 | 57 | 100 58 | 100 59 | 200 60 | 200 61 | 62 | 63 | 64 | face 65 | 0 66 | 67 | 0 68 | 0 69 | 100 70 | 100 71 | 72 | 73 | 74 | ``` 75 | 76 | #### 格式转换 77 | **安装pascal_voc_writer** 78 | ``` 79 | sudo pip install pascal_voc_writer 80 | ``` 81 | 82 | **csv_to_pascal_voc.py** 83 | ``` 84 | import csv 85 | import os 86 | import pascal_voc_writer 87 | 88 | def csv_to_pascal_voc(csv_filename): 89 | with open(csv_filename, 'r') as f: 90 | reader = csv.reader(f) 91 | for item in reader: 92 | 93 | if reader.line_num == 1: 94 | continue 95 | print(item) 96 | 97 | # Writer(path, width, height) 98 | data_home = "/home/tim/datasets/iris_dataset/SingleEye_640x480_JPG/" 99 | abs_path = data_home + item[0] 100 | writer = pascal_voc_writer.Writer(path=abs_path, width=640, height=480, depth=1, database="iris dataset") 101 | # ::addObject(name, xmin, ymin, xmax, ymax) 102 | name = "iris" 103 | writer.addObject(name=name, xmin=item[1], ymin=item[2], xmax=item[3], ymax=item[4]) 104 | # ::save(path) 105 | pascal_voc_filename = '/home/tim/deep_learning/caffe/data/iris_dataset_devkit/single_eye_640x480/Annotations/' + item[0].split('/')[-1].split('.jpg')[0] + '.xml' 106 | writer.save(pascal_voc_filename) 107 | 108 | cmd = "cp {0} /home/tim/deep_learning/caffe/data/iris_dataset_devkit/single_eye_640x480/JPEGImages/".format(abs_path) 109 | os.system(cmd) 110 | 111 | if __name__ == '__main__': 112 | csv_filename = 'iris.bbox.2pts.csv' 113 | csv_to_pascal_voc(csv_filename) 114 | ``` 115 | 116 | 允许csv_to_pascal_voc.py脚本后,图像将保持到JPEGImages目录, XML文件将保存到Annotations目录。 117 | 118 | #### 在ImageSets/Main目录下创建trainval.txt和test.txt 119 | trainval.txt包含训练样本的名字列表,名字后面没有后缀“.jpg”。 120 | test.txt包含测试样本的名字列表,名字后面没有后缀“.jpg”。 121 | 122 | 可以用下面的命令生成: 123 | ``` 124 | $ cd JPEGImages 125 | $ ls *.jpg > ../ImageSets/Main/total_image.txt 126 | # shuffle name list 127 | $ cat total_image.txt | perl -MList::Util=shuffle -e 'print shuffle();' > trainval.txt 128 | $ cp trainval.txt test.txt 129 | ``` 130 | 131 | 删除trainval.txt名字列表的后20%,删除test.txt的前80%,使得训练样本与测试样本的比例为8 : 2 132 | 133 | ### 创建lmdb数据库 134 | **data目录树** 135 | 136 | ``` 137 | tim@tim-server:~/deep_learning/caffe$ tree data/iris_dataset 138 | data/iris_dataset 139 | ├── coco_voc_map.txt 140 | ├── create_data.sh 141 | ├── create_list.sh 142 | ├── labelmap_voc.prototxt 143 | ├── test_name_size.txt 144 | ├── test.txt 145 | └── trainval.txt 146 | 147 | tim@tim-server:~/deep_learning/caffe$ tree data/iris_dataset_devkit/ -L 2 148 | data/iris_dataset_devkit/ 149 | ├── iris_dataset 150 | │ └── lmdb 151 | ├── single_eye_640x480 152 | │ ├── Annotations 153 | │ ├── ImageSets 154 | │ └── JPEGImages 155 | └── single_eye_640x480.zip 156 | ``` 157 | 158 | **修改 create_list.sh** 159 | ``` 160 | root_dir=/home/tim/deep_learning/caffe/data/iris_dataset_devkit 161 | for dataset in trainval test 162 | do 163 | ... 164 | for name in single_eye_640x480 165 | do 166 | ... 167 | done 168 | done 169 | ``` 170 | 171 | **修改 create_data.sh** 172 | 173 | ``` 174 | root_dir="/home/tim/deep_learning/caffe" 175 | data_root_dir="/home/tim/deep_learning/caffe/data/iris_dataset_devkit" 176 | dataset_name="iris_dataset" 177 | ``` 178 | 179 | **修改 labelmap_voc.prototxt"** 180 | 181 | ``` 182 | item { 183 | name: "none_of_the_above" 184 | label: 0 185 | display_name: "background" 186 | } 187 | item { 188 | name: "iris" 189 | label: 1 190 | display_name: "iris" 191 | } 192 | ``` 193 | 194 | Set "--gray = True" in caffe/scripts/create_annoset.py. Because gray scale can reduce reference time of iris detection. 195 | 196 | **创建lmdb数据库** 197 | 198 | ``` 199 | $ ./data/iris_dataset/create_list.sh 200 | $ ./data/iris_dataset/create_data.sh 201 | ``` 202 | 可以看到lmdb数据库位于 /home/tim/deep_learning/caffe/data/iris_dataset_devkit/iris_dataset/. 203 | 204 | ## 模型训练 205 | 206 | **模型训练命令: ** 207 | ``` 208 | nohup ./build/tools/caffe train \ 209 | --solver="models/ResNet10/solver.prototxt" \ 210 | --gpu 0 2>&1 | tee /home/tim/deep_learning/caffe/models/ResNet10/log/ResNet10_iris_dataset_SSD_300x300.log & 211 | ``` 212 | --------------------------------------------------------------------------------