├── classification_multilabel.cpp ├── solver.prototxt ├── README.md ├── deploy.prototxt ├── convert_multilabel.cpp └── multi_label_AlexNet.prototxt /classification_multilabel.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenjoya/Caffe_MultiLabel_Classification/HEAD/classification_multilabel.cpp -------------------------------------------------------------------------------- /solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/bvlc_alexnet/multi_label_AlexNet.prototxt" 2 | test_iter: 1 3 | test_interval: 500 4 | base_lr: 0.0001 5 | lr_policy: "step" 6 | gamma: 0.1 7 | stepsize: 500 8 | display: 10 9 | max_iter: 1000 10 | momentum: 0.9 11 | weight_decay: 0.0005 12 | snapshot: 500 13 | snapshot_prefix: "models/bvlc_alexnet/ZnCar_alexnet_train" 14 | solver_mode: CPU 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Caffe_MultiLabel_Classification 2 | This is a instance of multilabel classification in caffe. Thanks to the tools of multi-label data conversion tool in https://github.com/HolidayXue/CodeSnap by HolidayXue. 3 | # Step 4 | ## 1、Recompile 5 | Download convert_multilabel.cpp in https://github.com/HolidayXue/CodeSnap, replace convert_imageset.cpp, then recompile caffe. You will get a new exe :convert_multilabel.exe, which can make multi-label data. 6 | ## 2、Manufacture data 7 | Download my ZnCar data in this page. Try to manufacture your own lmdb by the example command line: 8 | ``` 9 | convert_imageset.exe --resize_height=227 --resize_width=227 ZnCar/ ZnCar/Label.txt ZnCarTrainImage ZnCarTrainLabel 2 10 | ``` 11 | We also need mean file: 12 | ``` 13 | compute_image_mean.exe ZnCarTrainImage ZnCarTrainMean.binaryproto 14 | ``` 15 | Generally speaking, you should get lmdb:ZnCarTrainImage/ZnCarTestImage/ZnCarTrainLabel/ZnCarTestlabel mean_file:ZnCarTrainMean.binaryproto/ZnCarTestMean.binaryproto finally. 16 | ## 3、Finetune AlexNet 17 | We can use pretrained caffemodel:bvlc_alexnet.caffemodel to assign the weights to some of the layers that have no change(conv/pool/fc6/7). Finetune command line: 18 | ``` 19 | Build\x64\Release\caffe.exe train --solver=D:\caffe-master\models\bvlc_alexnet\solver.prototxt --weights=D:\caffe-master\models\bvlc_alexnet\bvlc_alexnet.caffemodel 20 | ``` 21 | You can refer to some of my parameters in solver.prototxt and multi_label_AlexNet.prototxt. 22 | ## 4.Modify classification.cpp 23 | classification.exe in Caffe only support single label classification, we should modify classification.cpp. You can use my classification_multilabel.cpp and recompile just like step.1 24 | 25 | ## 5.Use our model to classify a picture 26 | (deploy.prototxt is necessary.)We can use our model to classify a picture for 2 labels: 27 | ``` 28 | classification.exe deploy.prototxt network.caffemodel mean.binaryproto label1.txt label2.txt img.jpg 29 | ``` 30 | The source Image: 31 | 32 | ![](http://img.blog.csdn.net/20170120172753355?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvTXJfQ3Vycnk=/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast) 33 | 34 | The result: 35 | 36 | ![](http://img.blog.csdn.net/20170120172805042?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvTXJfQ3Vycnk=/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast) 37 | 38 | # DataSet 39 | You can try ZnCar.zip. It includes 3 vehicle types and 2 vehicle surfaces. 60 train imgs and 12 test imgs. 40 | http://download.csdn.net/detail/mr_curry/9742578 41 | 42 | ### Note that this project only support 2 classes classification. 43 | The network structure: 44 | ![](http://img.blog.csdn.net/20170120103953309?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvTXJfQ3Vycnk=/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/SouthEast) 45 | -------------------------------------------------------------------------------- /deploy.prototxt: -------------------------------------------------------------------------------- 1 | name: "ZnNet" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { shape: { dim: 1 dim: 3 dim: 227 dim: 227 } } 7 | } 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | param { 14 | lr_mult: 1 15 | decay_mult: 1 16 | } 17 | param { 18 | lr_mult: 2 19 | decay_mult: 0 20 | } 21 | convolution_param { 22 | num_output: 96 23 | kernel_size: 11 24 | stride: 4 25 | weight_filler { 26 | type: "gaussian" 27 | std: 0.01 28 | } 29 | bias_filler { 30 | type: "constant" 31 | value: 0 32 | } 33 | } 34 | } 35 | layer { 36 | name: "relu1" 37 | type: "ReLU" 38 | bottom: "conv1" 39 | top: "conv1" 40 | } 41 | layer { 42 | name: "norm1" 43 | type: "LRN" 44 | bottom: "conv1" 45 | top: "norm1" 46 | lrn_param { 47 | local_size: 5 48 | alpha: 0.0001 49 | beta: 0.75 50 | } 51 | } 52 | layer { 53 | name: "pool1" 54 | type: "Pooling" 55 | bottom: "norm1" 56 | top: "pool1" 57 | pooling_param { 58 | pool: MAX 59 | kernel_size: 3 60 | stride: 2 61 | } 62 | } 63 | layer { 64 | name: "conv2" 65 | type: "Convolution" 66 | bottom: "pool1" 67 | top: "conv2" 68 | param { 69 | lr_mult: 1 70 | decay_mult: 1 71 | } 72 | param { 73 | lr_mult: 2 74 | decay_mult: 0 75 | } 76 | convolution_param { 77 | num_output: 256 78 | pad: 2 79 | kernel_size: 5 80 | group: 2 81 | weight_filler { 82 | type: "gaussian" 83 | std: 0.01 84 | } 85 | bias_filler { 86 | type: "constant" 87 | value: 0.1 88 | } 89 | } 90 | } 91 | layer { 92 | name: "relu2" 93 | type: "ReLU" 94 | bottom: "conv2" 95 | top: "conv2" 96 | } 97 | layer { 98 | name: "norm2" 99 | type: "LRN" 100 | bottom: "conv2" 101 | top: "norm2" 102 | lrn_param { 103 | local_size: 5 104 | alpha: 0.0001 105 | beta: 0.75 106 | } 107 | } 108 | layer { 109 | name: "pool2" 110 | type: "Pooling" 111 | bottom: "norm2" 112 | top: "pool2" 113 | pooling_param { 114 | pool: MAX 115 | kernel_size: 3 116 | stride: 2 117 | } 118 | } 119 | layer { 120 | name: "conv3" 121 | type: "Convolution" 122 | bottom: "pool2" 123 | top: "conv3" 124 | param { 125 | lr_mult: 1 126 | decay_mult: 1 127 | } 128 | param { 129 | lr_mult: 2 130 | decay_mult: 0 131 | } 132 | convolution_param { 133 | num_output: 384 134 | pad: 1 135 | kernel_size: 3 136 | weight_filler { 137 | type: "gaussian" 138 | std: 0.01 139 | } 140 | bias_filler { 141 | type: "constant" 142 | value: 0 143 | } 144 | } 145 | } 146 | layer { 147 | name: "relu3" 148 | type: "ReLU" 149 | bottom: "conv3" 150 | top: "conv3" 151 | } 152 | layer { 153 | name: "conv4" 154 | type: "Convolution" 155 | bottom: "conv3" 156 | top: "conv4" 157 | param { 158 | lr_mult: 1 159 | decay_mult: 1 160 | } 161 | param { 162 | lr_mult: 2 163 | decay_mult: 0 164 | } 165 | convolution_param { 166 | num_output: 384 167 | pad: 1 168 | kernel_size: 3 169 | group: 2 170 | weight_filler { 171 | type: "gaussian" 172 | std: 0.01 173 | } 174 | bias_filler { 175 | type: "constant" 176 | value: 0.1 177 | } 178 | } 179 | } 180 | layer { 181 | name: "relu4" 182 | type: "ReLU" 183 | bottom: "conv4" 184 | top: "conv4" 185 | } 186 | layer { 187 | name: "conv5" 188 | type: "Convolution" 189 | bottom: "conv4" 190 | top: "conv5" 191 | param { 192 | lr_mult: 1 193 | decay_mult: 1 194 | } 195 | param { 196 | lr_mult: 2 197 | decay_mult: 0 198 | } 199 | convolution_param { 200 | num_output: 256 201 | pad: 1 202 | kernel_size: 3 203 | group: 2 204 | weight_filler { 205 | type: "gaussian" 206 | std: 0.01 207 | } 208 | bias_filler { 209 | type: "constant" 210 | value: 0.1 211 | } 212 | } 213 | } 214 | layer { 215 | name: "relu5" 216 | type: "ReLU" 217 | bottom: "conv5" 218 | top: "conv5" 219 | } 220 | layer { 221 | name: "pool5" 222 | type: "Pooling" 223 | bottom: "conv5" 224 | top: "pool5" 225 | pooling_param { 226 | pool: MAX 227 | kernel_size: 3 228 | stride: 2 229 | } 230 | } 231 | layer { 232 | name: "fc6" 233 | type: "InnerProduct" 234 | bottom: "pool5" 235 | top: "fc6" 236 | param { 237 | lr_mult: 1 238 | decay_mult: 1 239 | } 240 | param { 241 | lr_mult: 2 242 | decay_mult: 0 243 | } 244 | inner_product_param { 245 | num_output: 4096 246 | weight_filler { 247 | type: "gaussian" 248 | std: 0.005 249 | } 250 | bias_filler { 251 | type: "constant" 252 | value: 0.1 253 | } 254 | } 255 | } 256 | layer { 257 | name: "relu6" 258 | type: "ReLU" 259 | bottom: "fc6" 260 | top: "fc6" 261 | } 262 | layer { 263 | name: "drop6" 264 | type: "Dropout" 265 | bottom: "fc6" 266 | top: "fc6" 267 | dropout_param { 268 | dropout_ratio: 0.5 269 | } 270 | } 271 | layer { 272 | name: "fc7" 273 | type: "InnerProduct" 274 | bottom: "fc6" 275 | top: "fc7" 276 | param { 277 | lr_mult: 1 278 | decay_mult: 1 279 | } 280 | param { 281 | lr_mult: 2 282 | decay_mult: 0 283 | } 284 | inner_product_param { 285 | num_output: 4096 286 | weight_filler { 287 | type: "gaussian" 288 | std: 0.005 289 | } 290 | bias_filler { 291 | type: "constant" 292 | value: 0.1 293 | } 294 | } 295 | } 296 | layer { 297 | name: "relu7" 298 | type: "ReLU" 299 | bottom: "fc7" 300 | top: "fc7" 301 | } 302 | layer { 303 | name: "drop7" 304 | type: "Dropout" 305 | bottom: "fc7" 306 | top: "fc7" 307 | dropout_param { 308 | dropout_ratio: 0.5 309 | } 310 | } 311 | layer { 312 | name: "fc8_1_type" 313 | type: "InnerProduct" 314 | bottom: "fc7" 315 | top: "fc8_1_type" 316 | param { 317 | lr_mult: 5 318 | decay_mult: 5 319 | } 320 | param { 321 | lr_mult: 10 322 | decay_mult: 0 323 | } 324 | inner_product_param { 325 | num_output: 3 326 | weight_filler { 327 | type: "gaussian" 328 | std: 0.01 329 | } 330 | bias_filler { 331 | type: "constant" 332 | value: 0 333 | } 334 | } 335 | } 336 | layer { 337 | name: "fc8_2_surface" 338 | type: "InnerProduct" 339 | bottom: "fc7" 340 | top: "fc8_2_surface" 341 | param { 342 | lr_mult: 5 343 | decay_mult: 5 344 | } 345 | param { 346 | lr_mult: 10 347 | decay_mult: 0 348 | } 349 | inner_product_param { 350 | num_output: 2 351 | weight_filler { 352 | type: "gaussian" 353 | std: 0.01 354 | } 355 | bias_filler { 356 | type: "constant" 357 | value: 0 358 | } 359 | } 360 | } 361 | layer { 362 | name: "prob_1_type" 363 | type: "Softmax" 364 | bottom: "fc8_1_type" 365 | top: "prob_1_type" 366 | loss_weight:0.5 367 | } 368 | layer { 369 | name: "prob_2_surface" 370 | type: "Softmax" 371 | bottom: "fc8_2_surface" 372 | top: "prob_2_surface" 373 | loss_weight:0.5 374 | } 375 | -------------------------------------------------------------------------------- /convert_multilabel.cpp: -------------------------------------------------------------------------------- 1 | // This program converts a set of images to a lmdb/leveldb by storing them 2 | // as Datum proto buffers. 3 | // Usage: 4 | // convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME 5 | // 6 | // where ROOTFOLDER is the root folder that holds all the images, and LISTFILE 7 | // should be a list of files as well as their labels, in the format as 8 | // subfolder1/file1.JPEG 7 9 | // .... 10 | 11 | 12 | 13 | //#ifdef MULTILABEL 14 | 15 | 16 | 17 | #include 18 | #include // NOLINT(readability/streams) 19 | #include 20 | #include 21 | #include 22 | 23 | #include "boost/scoped_ptr.hpp" 24 | #include "gflags/gflags.h" 25 | #include "glog/logging.h" 26 | 27 | #include "caffe/proto/caffe.pb.h" 28 | #include "caffe/util/db.hpp" 29 | #include "caffe/util/format.hpp" 30 | #include "caffe/util/io.hpp" 31 | #include "caffe/util/rng.hpp" 32 | 33 | using namespace caffe; // NOLINT(build/namespaces) 34 | using std::pair; 35 | using boost::scoped_ptr; 36 | 37 | DEFINE_bool(gray, false, 38 | "When this option is on, treat images as grayscale ones"); 39 | DEFINE_bool(shuffle, false, 40 | "Randomly shuffle the order of images and their labels"); 41 | DEFINE_string(backend, "lmdb", 42 | "The backend {lmdb, leveldb} for storing the result"); 43 | DEFINE_int32(resize_width, 0, "Width images are resized to"); 44 | DEFINE_int32(resize_height, 0, "Height images are resized to"); 45 | DEFINE_bool(check_size, false, 46 | "When this option is on, check that all the datum have the same size"); 47 | DEFINE_bool(encoded, false, 48 | "When this option is on, the encoded image will be save in datum"); 49 | DEFINE_string(encode_type, "", 50 | "Optional: What type should we encode the image as ('png','jpg',...)."); 51 | 52 | int main(int argc, char** argv) { 53 | #ifdef USE_OPENCV 54 | ::google::InitGoogleLogging(argv[0]); 55 | // Print output to stderr (while still logging) 56 | FLAGS_alsologtostderr = 1; 57 | 58 | #ifndef GFLAGS_GFLAGS_H_ 59 | namespace gflags = google; 60 | #endif 61 | 62 | gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n" 63 | "format used as input for Caffe.\n" 64 | "Usage:\n" 65 | " convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n" 66 | "The ImageNet dataset for the training demo is at\n" 67 | " http://www.image-net.org/download-images\n"); 68 | gflags::ParseCommandLineFlags(&argc, &argv, true); 69 | 70 | if (argc < 6) { 71 | gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset"); 72 | return 1; 73 | } 74 | 75 | const bool is_color = !FLAGS_gray; 76 | const bool check_size = FLAGS_check_size; 77 | const bool encoded = FLAGS_encoded; 78 | const string encode_type = FLAGS_encode_type; 79 | 80 | std::ifstream infile(argv[2]); 81 | std::vector> > lines; 82 | std::string filename; 83 | 84 | std::string label_count_string = argv[5]; 85 | int label_count = std::atoi(label_count_string.c_str()); 86 | 87 | std::vector label(label_count); 88 | 89 | while (infile >> filename) 90 | { 91 | for (int i = 0; i < label_count; i++) 92 | { 93 | infile >> label[i]; 94 | 95 | } 96 | lines.push_back(std::make_pair(filename, label)); 97 | } 98 | if (FLAGS_shuffle) { 99 | // randomly shuffle data 100 | LOG(INFO) << "Shuffling data"; 101 | shuffle(lines.begin(), lines.end()); 102 | } 103 | LOG(INFO) << "A total of " << lines.size() << " images."; 104 | 105 | if (encode_type.size() && !encoded) 106 | LOG(INFO) << "encode_type specified, assuming encoded=true."; 107 | 108 | int resize_height = std::max(0, FLAGS_resize_height); 109 | int resize_width = std::max(0, FLAGS_resize_width); 110 | 111 | // Create new DB 112 | scoped_ptr db_image(db::GetDB(FLAGS_backend)); 113 | scoped_ptr db_label(db::GetDB(FLAGS_backend)); 114 | db_image->Open(argv[3], db::NEW); 115 | db_label->Open(argv[4], db::NEW); 116 | scoped_ptr txn_image(db_image->NewTransaction()); 117 | scoped_ptr txn_label(db_label->NewTransaction()); 118 | 119 | // Storing to db 120 | std::string root_folder(argv[1]); 121 | Datum datum_label; 122 | Datum datum_image; 123 | int count = 0; 124 | int data_size_label = 0; 125 | int data_size_image = 0; 126 | bool data_size_initialized = false; 127 | 128 | for (int line_id = 0; line_id < lines.size(); ++line_id) { 129 | bool status; 130 | std::string enc = encode_type; 131 | if (encoded && !enc.size()) { 132 | // Guess the encoding type from the file name 133 | string fn = lines[line_id].first; 134 | size_t p = fn.rfind('.'); 135 | if (p == fn.npos) 136 | LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'"; 137 | enc = fn.substr(p); 138 | std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower); 139 | } 140 | 141 | status = ReadImageToDatum(root_folder + lines[line_id].first, 142 | lines[line_id].second[0], resize_height, resize_width, is_color, 143 | enc, &datum_image); 144 | if (status == false) continue; 145 | 146 | datum_label.set_height(1); 147 | datum_label.set_width(1); 148 | datum_label.set_channels(label_count); 149 | int count_tmp = datum_label.float_data_size(); 150 | for (int index_label = 0; index_label < lines[line_id].second.size(); index_label++) 151 | { 152 | float tmp_float_value = lines[line_id].second[index_label]; 153 | datum_label.add_float_data(tmp_float_value); 154 | } 155 | 156 | if (check_size) { 157 | if (!data_size_initialized) { 158 | data_size_label = datum_label.channels() * datum_label.height() * datum_label.width(); 159 | data_size_image = datum_image.channels() * datum_image.height() * datum_image.width(); 160 | data_size_initialized = true; 161 | } 162 | else { 163 | const std::string& data_label = datum_label.data(); 164 | CHECK_EQ(data_label.size(), data_size_label) << "Incorrect data field size " 165 | << data_label.size(); 166 | 167 | const std::string& data_image = data_image.data(); 168 | CHECK_EQ(data_image.size(), data_size_image) << "Incorrect data field size " 169 | << data_image.size(); 170 | } 171 | } 172 | // sequential 173 | string key_str_image = caffe::format_int(line_id, 8) + "_" + lines[line_id].first; 174 | string key_str_label = caffe::format_int(line_id, 8) + "label_" + lines[line_id].first; 175 | 176 | // Put in db 177 | string out_label; 178 | string out_image; 179 | CHECK(datum_label.SerializeToString(&out_label)); 180 | CHECK(datum_image.SerializeToString(&out_image)); 181 | 182 | datum_label.clear_float_data(); 183 | txn_label->Put(key_str_label, out_label); 184 | txn_image->Put(key_str_image, out_image); 185 | if (++count % 1000 == 0) { 186 | // Commit db 187 | txn_image->Commit(); 188 | txn_image.reset(db_image->NewTransaction()); 189 | 190 | txn_label->Commit(); 191 | txn_label.reset(db_label->NewTransaction()); 192 | LOG(INFO) << "Processed " << count << " files."; 193 | } 194 | 195 | } 196 | // write the last batch 197 | if (count % 1000 != 0) { 198 | txn_label->Commit(); 199 | txn_image->Commit(); 200 | LOG(INFO) << "Processed " << count << " files."; 201 | } 202 | #else 203 | LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV."; 204 | #endif // USE_OPENCV 205 | return 0; 206 | } 207 | 208 | 209 | //#endif -------------------------------------------------------------------------------- /multi_label_AlexNet.prototxt: -------------------------------------------------------------------------------- 1 | name: "ZnNet" 2 | layer { 3 | name: "data" 4 | type: "Data" 5 | top: "data" 6 | transform_param { 7 | mirror: true 8 | crop_size: 227 9 | mean_file: "models/bvlc_alexnet/ZnCarTrainMean.binaryproto" 10 | } 11 | include { 12 | phase: TRAIN 13 | } 14 | data_param { 15 | source: "models/bvlc_alexnet/ZnCarTrainImage" 16 | batch_size: 10 17 | backend: LMDB 18 | } 19 | } 20 | layer { 21 | name: "labels" 22 | type: "Data" 23 | top: "labels" 24 | include { 25 | phase: TRAIN 26 | } 27 | data_param { 28 | source: "models/bvlc_alexnet/ZnCarTrainLabel" 29 | batch_size: 10 30 | backend: LMDB 31 | } 32 | } 33 | layer { 34 | name: "data" 35 | type: "Data" 36 | top: "data" 37 | transform_param { 38 | crop_size: 227 39 | mean_file: "models/bvlc_alexnet/ZnCarTestMean.binaryproto" 40 | } 41 | include { 42 | phase: TEST 43 | } 44 | data_param { 45 | source: "models/bvlc_alexnet/ZnCarTestImage" 46 | batch_size: 12 47 | backend: LMDB 48 | } 49 | } 50 | layer { 51 | name: "labels" 52 | type: "Data" 53 | top: "labels" 54 | include { 55 | phase: TEST 56 | } 57 | data_param { 58 | source: "models/bvlc_alexnet/ZnCarTestLabel" 59 | batch_size: 12 60 | backend: LMDB 61 | } 62 | } 63 | layer { 64 | name: "slice" 65 | type: "Slice" 66 | bottom: "labels" 67 | top: "type" #Æû³µÆ·ÅÆ 68 | top: "surface" #³µµÄÍâÐÎ 69 | slice_param { 70 | axis: 1 71 | slice_point: 1 72 | } 73 | } 74 | layer { 75 | name: "conv1" 76 | type: "Convolution" 77 | bottom: "data" 78 | top: "conv1" 79 | param { 80 | lr_mult: 1 81 | decay_mult: 1 82 | } 83 | param { 84 | lr_mult: 2 85 | decay_mult: 0 86 | } 87 | convolution_param { 88 | num_output: 96 89 | kernel_size: 11 90 | stride: 4 91 | weight_filler { 92 | type: "gaussian" 93 | std: 0.01 94 | } 95 | bias_filler { 96 | type: "constant" 97 | value: 0 98 | } 99 | } 100 | } 101 | layer { 102 | name: "relu1" 103 | type: "ReLU" 104 | bottom: "conv1" 105 | top: "conv1" 106 | } 107 | layer { 108 | name: "norm1" 109 | type: "LRN" 110 | bottom: "conv1" 111 | top: "norm1" 112 | lrn_param { 113 | local_size: 5 114 | alpha: 0.0001 115 | beta: 0.75 116 | } 117 | } 118 | layer { 119 | name: "pool1" 120 | type: "Pooling" 121 | bottom: "norm1" 122 | top: "pool1" 123 | pooling_param { 124 | pool: MAX 125 | kernel_size: 3 126 | stride: 2 127 | } 128 | } 129 | layer { 130 | name: "conv2" 131 | type: "Convolution" 132 | bottom: "pool1" 133 | top: "conv2" 134 | param { 135 | lr_mult: 1 136 | decay_mult: 1 137 | } 138 | param { 139 | lr_mult: 2 140 | decay_mult: 0 141 | } 142 | convolution_param { 143 | num_output: 256 144 | pad: 2 145 | kernel_size: 5 146 | group: 2 147 | weight_filler { 148 | type: "gaussian" 149 | std: 0.01 150 | } 151 | bias_filler { 152 | type: "constant" 153 | value: 0.1 154 | } 155 | } 156 | } 157 | layer { 158 | name: "relu2" 159 | type: "ReLU" 160 | bottom: "conv2" 161 | top: "conv2" 162 | } 163 | layer { 164 | name: "norm2" 165 | type: "LRN" 166 | bottom: "conv2" 167 | top: "norm2" 168 | lrn_param { 169 | local_size: 5 170 | alpha: 0.0001 171 | beta: 0.75 172 | } 173 | } 174 | layer { 175 | name: "pool2" 176 | type: "Pooling" 177 | bottom: "norm2" 178 | top: "pool2" 179 | pooling_param { 180 | pool: MAX 181 | kernel_size: 3 182 | stride: 2 183 | } 184 | } 185 | layer { 186 | name: "conv3" 187 | type: "Convolution" 188 | bottom: "pool2" 189 | top: "conv3" 190 | param { 191 | lr_mult: 1 192 | decay_mult: 1 193 | } 194 | param { 195 | lr_mult: 2 196 | decay_mult: 0 197 | } 198 | convolution_param { 199 | num_output: 384 200 | pad: 1 201 | kernel_size: 3 202 | weight_filler { 203 | type: "gaussian" 204 | std: 0.01 205 | } 206 | bias_filler { 207 | type: "constant" 208 | value: 0 209 | } 210 | } 211 | } 212 | layer { 213 | name: "relu3" 214 | type: "ReLU" 215 | bottom: "conv3" 216 | top: "conv3" 217 | } 218 | layer { 219 | name: "conv4" 220 | type: "Convolution" 221 | bottom: "conv3" 222 | top: "conv4" 223 | param { 224 | lr_mult: 1 225 | decay_mult: 1 226 | } 227 | param { 228 | lr_mult: 2 229 | decay_mult: 0 230 | } 231 | convolution_param { 232 | num_output: 384 233 | pad: 1 234 | kernel_size: 3 235 | group: 2 236 | weight_filler { 237 | type: "gaussian" 238 | std: 0.01 239 | } 240 | bias_filler { 241 | type: "constant" 242 | value: 0.1 243 | } 244 | } 245 | } 246 | layer { 247 | name: "relu4" 248 | type: "ReLU" 249 | bottom: "conv4" 250 | top: "conv4" 251 | } 252 | layer { 253 | name: "conv5" 254 | type: "Convolution" 255 | bottom: "conv4" 256 | top: "conv5" 257 | param { 258 | lr_mult: 1 259 | decay_mult: 1 260 | } 261 | param { 262 | lr_mult: 2 263 | decay_mult: 0 264 | } 265 | convolution_param { 266 | num_output: 256 267 | pad: 1 268 | kernel_size: 3 269 | group: 2 270 | weight_filler { 271 | type: "gaussian" 272 | std: 0.01 273 | } 274 | bias_filler { 275 | type: "constant" 276 | value: 0.1 277 | } 278 | } 279 | } 280 | layer { 281 | name: "relu5" 282 | type: "ReLU" 283 | bottom: "conv5" 284 | top: "conv5" 285 | } 286 | layer { 287 | name: "pool5" 288 | type: "Pooling" 289 | bottom: "conv5" 290 | top: "pool5" 291 | pooling_param { 292 | pool: MAX 293 | kernel_size: 3 294 | stride: 2 295 | } 296 | } 297 | layer { 298 | name: "fc6" 299 | type: "InnerProduct" 300 | bottom: "pool5" 301 | top: "fc6" 302 | param { 303 | lr_mult: 1 304 | decay_mult: 1 305 | } 306 | param { 307 | lr_mult: 2 308 | decay_mult: 0 309 | } 310 | inner_product_param { 311 | num_output: 4096 312 | weight_filler { 313 | type: "gaussian" 314 | std: 0.005 315 | } 316 | bias_filler { 317 | type: "constant" 318 | value: 0.1 319 | } 320 | } 321 | } 322 | layer { 323 | name: "relu6" 324 | type: "ReLU" 325 | bottom: "fc6" 326 | top: "fc6" 327 | } 328 | layer { 329 | name: "drop6" 330 | type: "Dropout" 331 | bottom: "fc6" 332 | top: "fc6" 333 | dropout_param { 334 | dropout_ratio: 0.5 335 | } 336 | } 337 | layer { 338 | name: "fc7" 339 | type: "InnerProduct" 340 | bottom: "fc6" 341 | top: "fc7" 342 | param { 343 | lr_mult: 1 344 | decay_mult: 1 345 | } 346 | param { 347 | lr_mult: 2 348 | decay_mult: 0 349 | } 350 | inner_product_param { 351 | num_output: 4096 352 | weight_filler { 353 | type: "gaussian" 354 | std: 0.005 355 | } 356 | bias_filler { 357 | type: "constant" 358 | value: 0.1 359 | } 360 | } 361 | } 362 | layer { 363 | name: "relu7" 364 | type: "ReLU" 365 | bottom: "fc7" 366 | top: "fc7" 367 | } 368 | layer { 369 | name: "drop7" 370 | type: "Dropout" 371 | bottom: "fc7" 372 | top: "fc7" 373 | dropout_param { 374 | dropout_ratio: 0.5 375 | } 376 | } 377 | layer { 378 | name: "fc8_1_type" 379 | type: "InnerProduct" 380 | bottom: "fc7" 381 | top: "fc8_1_type" 382 | param { 383 | lr_mult: 5 384 | decay_mult: 5 385 | } 386 | param { 387 | lr_mult: 10 388 | decay_mult: 0 389 | } 390 | inner_product_param { 391 | num_output: 3 392 | weight_filler { 393 | type: "gaussian" 394 | std: 0.01 395 | } 396 | bias_filler { 397 | type: "constant" 398 | value: 0 399 | } 400 | } 401 | } 402 | layer { 403 | name: "fc8_2_surface" 404 | type: "InnerProduct" 405 | bottom: "fc7" 406 | top: "fc8_2_surface" 407 | param { 408 | lr_mult: 5 409 | decay_mult: 5 410 | } 411 | param { 412 | lr_mult: 10 413 | decay_mult: 0 414 | } 415 | inner_product_param { 416 | num_output: 2 417 | weight_filler { 418 | type: "gaussian" 419 | std: 0.01 420 | } 421 | bias_filler { 422 | type: "constant" 423 | value: 0 424 | } 425 | } 426 | } 427 | layer { 428 | name: "accuracy_1_type" 429 | type: "Accuracy" 430 | bottom: "fc8_1_type" 431 | bottom: "type" 432 | top: "accuracy_1_type" 433 | include { 434 | phase: TEST 435 | } 436 | } 437 | layer { 438 | name: "loss_1_type" 439 | type: "SoftmaxWithLoss" 440 | bottom: "fc8_1_type" 441 | bottom: "type" 442 | top: "loss_1_type" 443 | loss_weight:0.5 444 | } 445 | layer { 446 | name: "accuracy_2_surface" 447 | type: "Accuracy" 448 | bottom: "fc8_2_surface" 449 | bottom: "surface" 450 | top: "accuracy_2_surface" 451 | include { 452 | phase: TEST 453 | } 454 | } 455 | layer { 456 | name: "loss_2_surface" 457 | type: "SoftmaxWithLoss" 458 | bottom: "fc8_2_surface" 459 | bottom: "surface" 460 | top: "loss_2_surface" 461 | loss_weight:0.5 462 | } 463 | --------------------------------------------------------------------------------