├── .gitattributes
├── center_loss.png
├── multibox_center_loss_layer.cpp
├── center_loss_layer.hpp
├── center_loss_layer.cu
├── multibox_focal_loss_layer.hpp
├── multibox_center_loss_layer.hpp
├── center_loss_layer.cpp
├── README.md
├── multibox_focal_loss_layer.cpp
├── center_loss_test.prototxt
├── center_loss_train.prototxt
└── caffe.proto


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/center_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BOBrown/SSD-Centerloss/HEAD/center_loss.png


--------------------------------------------------------------------------------
/multibox_center_loss_layer.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BOBrown/SSD-Centerloss/HEAD/multibox_center_loss_layer.cpp


--------------------------------------------------------------------------------
/center_loss_layer.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef CAFFE_CENTER_LOSS_LAYER_HPP_
 2 | #define CAFFE_CENTER_LOSS_LAYER_HPP_
 3 | 
 4 | #include <vector>
 5 | 
 6 | #include "caffe/blob.hpp"
 7 | #include "caffe/layer.hpp"
 8 | #include "caffe/proto/caffe.pb.h"
 9 | 
10 | #include "caffe/layers/loss_layer.hpp"
11 | 
12 | namespace caffe {
13 | 
14 | template <typename Dtype>
15 | class CenterLossLayer : public LossLayer<Dtype> {
16 |  public:
17 |   explicit CenterLossLayer(const LayerParameter& param)
18 |       : LossLayer<Dtype>(param) {}
19 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
20 |       const vector<Blob<Dtype>*>& top);
21 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
22 |       const vector<Blob<Dtype>*>& top);
23 | 
24 |   virtual inline const char* type() const { return "CenterLoss"; }
25 |   virtual inline int ExactNumBottomBlobs() const { return 2; }
26 |   virtual inline int ExactNumTopBlobs() const { return -1; }
27 | 
28 |  protected:
29 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
30 |       const vector<Blob<Dtype>*>& top);
31 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
32 |       const vector<Blob<Dtype>*>& top);
33 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
34 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
35 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
36 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
37 | 
38 |   int M_;
39 |   int K_;
40 |   int N_;
41 |   
42 |   Blob<Dtype> distance_;
43 |   Blob<Dtype> variation_sum_;
44 | };
45 | 
46 | }  // namespace caffe
47 | 
48 | #endif  // CAFFE_CENTER_LOSS_LAYER_HPP_


--------------------------------------------------------------------------------
/center_loss_layer.cu:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | 
 3 | #include "caffe/filler.hpp"
 4 | #include "caffe/layers/center_loss_layer.hpp"
 5 | #include "caffe/util/math_functions.hpp"
 6 | 
 7 | namespace caffe {
 8 | 
 9 | template <typename Dtype>
10 | __global__ void Compute_distance_data_gpu(int nthreads, const int K, const Dtype* bottom,
11 | 	      const Dtype* label, const Dtype* center, Dtype* distance) {
12 |   CUDA_KERNEL_LOOP(index, nthreads) {
13 |     int m = index / K;
14 |     int k = index % K;
15 |     const int label_value = static_cast<int>(label[m]);
16 |     // distance(i) = x(i) - c_{y(i)}
17 |     distance[index] = bottom[index] - center[label_value * K + k];
18 |   }
19 | }
20 | 
21 | template <typename Dtype>
22 | __global__ void Compute_center_diff_gpu(int nthreads, const int M, const int K, 
23 |         const Dtype* label, const Dtype* distance, Dtype* variation_sum, 
24 |         Dtype* center_diff) {
25 |   CUDA_KERNEL_LOOP(index, nthreads) {
26 |     int count = 0;
27 |     for (int m = 0; m < M; m++) {
28 |       const int label_value = static_cast<int>(label[m]);
29 |       if (label_value == index) {
30 |         count++;
31 |         for (int k = 0; k < K; k++) {
32 |           variation_sum[index * K + k] -= distance[m * K + k];
33 |         }
34 |       }
35 |     }
36 |     for (int k = 0; k < K; k++) {
37 |       center_diff[index * K + k] = variation_sum[index * K + k] /(count + (Dtype)1.);
38 |     }
39 |   }
40 | }
41 | 
42 | 
43 | template <typename Dtype>
44 | void CenterLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
45 |     const vector<Blob<Dtype>*>& top) {
46 |   int nthreads = M_ * K_;
47 |   Compute_distance_data_gpu<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
48 |       CAFFE_CUDA_NUM_THREADS>>>(nthreads, K_, bottom[0]->gpu_data(), bottom[1]->gpu_data(),
49 |                                 this->blobs_[0]->gpu_data(), distance_.mutable_gpu_data());
50 |   Dtype dot;
51 |   caffe_gpu_dot(M_ * K_, distance_.gpu_data(), distance_.gpu_data(), &dot);
52 |   Dtype loss = dot / M_ / Dtype(2);
53 |   top[0]->mutable_cpu_data()[0] = loss;
54 | }
55 | 
56 | template <typename Dtype>
57 | void CenterLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
58 |     const vector<bool>& propagate_down,
59 |     const vector<Blob<Dtype>*>& bottom) {
60 |   int nthreads = N_;
61 |   caffe_gpu_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data());
62 |   Compute_center_diff_gpu<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
63 |       CAFFE_CUDA_NUM_THREADS>>>(nthreads, M_, K_, bottom[1]->gpu_data(), distance_.gpu_data(), 
64 |                                 variation_sum_.mutable_cpu_data(), this->blobs_[0]->mutable_gpu_diff());
65 | 
66 |   if (propagate_down[0]) {
67 |     caffe_gpu_scale(M_ * K_, top[0]->cpu_diff()[0] / M_, 
68 |                              distance_.gpu_data(), bottom[0]->mutable_gpu_diff());
69 |   }
70 |   if (propagate_down[1]) {
71 |     LOG(FATAL) << this->type()
72 |                << " Layer cannot backpropagate to label inputs.";
73 |   }
74 | }
75 | 
76 | INSTANTIATE_LAYER_GPU_FUNCS(CenterLossLayer);
77 | 
78 | }  // namespace caffe
79 | 


--------------------------------------------------------------------------------
/multibox_focal_loss_layer.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef CAFFE_MULTIBOX_LOSS_LAYER_HPP_
  2 | #define CAFFE_MULTIBOX_LOSS_LAYER_HPP_
  3 | 
  4 | #include <map>
  5 | #include <utility>
  6 | #include <vector>
  7 | 
  8 | #include "caffe/blob.hpp"
  9 | #include "caffe/layer.hpp"
 10 | #include "caffe/proto/caffe.pb.h"
 11 | #include "caffe/util/bbox_util.hpp"
 12 | 
 13 | #include "caffe/layers/loss_layer.hpp"
 14 | 
 15 | namespace caffe {
 16 | 
 17 | /**
 18 |  * @brief Perform MultiBox operations. Including the following:
 19 |  *
 20 |  *  - decode the predictions.
 21 |  *  - perform matching between priors/predictions and ground truth.
 22 |  *  - use matched boxes and confidences to compute loss.
 23 |  *
 24 |  */
 25 | template <typename Dtype>
 26 | class MultiBoxFocalLossLayer : public LossLayer<Dtype> {
 27 |  public:
 28 |   explicit MultiBoxFocalLossLayer(const LayerParameter& param)
 29 |       : LossLayer<Dtype>(param) {}
 30 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 31 |       const vector<Blob<Dtype>*>& top);
 32 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
 33 |       const vector<Blob<Dtype>*>& top);
 34 | 
 35 |   virtual inline const char* type() const { return "MultiBoxFocalLoss"; }
 36 |   // bottom[0] stores the location predictions.
 37 |   // bottom[1] stores the confidence predictions.
 38 |   // bottom[2] stores the prior bounding boxes.
 39 |   // bottom[3] stores the ground truth bounding boxes.
 40 |   virtual inline int ExactNumBottomBlobs() const { return 4; }
 41 |   virtual inline int ExactNumTopBlobs() const { return 1; }
 42 | 
 43 |  protected:
 44 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 45 |       const vector<Blob<Dtype>*>& top);
 46 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
 47 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 48 | 
 49 |   // The internal localization loss layer.
 50 |   shared_ptr<Layer<Dtype> > loc_loss_layer_;
 51 |   LocLossType loc_loss_type_;
 52 |   float loc_weight_;
 53 |   // bottom vector holder used in Forward function.
 54 |   vector<Blob<Dtype>*> loc_bottom_vec_;
 55 |   // top vector holder used in Forward function.
 56 |   vector<Blob<Dtype>*> loc_top_vec_;
 57 |   // blob which stores the matched location prediction.
 58 |   Blob<Dtype> loc_pred_;
 59 |   // blob which stores the corresponding matched ground truth.
 60 |   Blob<Dtype> loc_gt_;
 61 |   // localization loss.
 62 |   Blob<Dtype> loc_loss_;
 63 | 
 64 |   // The internal confidence loss layer.
 65 |   shared_ptr<Layer<Dtype> > conf_loss_layer_;
 66 |   ConfLossType conf_loss_type_;
 67 |   // bottom vector holder used in Forward function.
 68 |   vector<Blob<Dtype>*> conf_bottom_vec_;
 69 |   // top vector holder used in Forward function.
 70 |   vector<Blob<Dtype>*> conf_top_vec_;
 71 |   // blob which stores the confidence prediction.
 72 |   Blob<Dtype> conf_pred_;
 73 |   // blob which stores the corresponding ground truth label.
 74 |   Blob<Dtype> conf_gt_;
 75 |   // confidence loss.
 76 |   Blob<Dtype> conf_loss_;
 77 | 
 78 |   MultiBoxLossParameter multibox_loss_param_;
 79 |   int num_classes_;
 80 |   bool share_location_;
 81 |   MatchType match_type_;
 82 |   float overlap_threshold_;
 83 |   bool use_prior_for_matching_;
 84 |   int background_label_id_;
 85 |   bool use_difficult_gt_;
 86 |   bool do_neg_mining_;
 87 |   float neg_pos_ratio_;
 88 |   float neg_overlap_;
 89 |   CodeType code_type_;
 90 |   bool encode_variance_in_target_;
 91 |   bool map_object_to_agnostic_;
 92 |   bool ignore_cross_boundary_bbox_;
 93 |   bool bp_inside_;
 94 |   MiningType mining_type_;
 95 | 
 96 |   int loc_classes_;
 97 |   int num_gt_;
 98 |   int num_;
 99 |   int num_priors_;
100 | 
101 |   int num_matches_;
102 |   int num_conf_;
103 |   vector<map<int, vector<int> > > all_match_indices_;
104 |   vector<vector<int> > all_neg_indices_;
105 |   float alpha_;
106 |   float gamma_;
107 | 
108 |   // How to normalize the loss.
109 |   LossParameter_NormalizationMode normalization_;
110 | };
111 | 
112 | }  // namespace caffe
113 | 
114 | #endif  // CAFFE_MULTIBOX_LOSS_LAYER_HPP_
115 | 


--------------------------------------------------------------------------------
/multibox_center_loss_layer.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef CAFFE_MULTIBOX_LOSS_LAYER_HPP_
  2 | #define CAFFE_MULTIBOX_LOSS_LAYER_HPP_
  3 | 
  4 | #include <map>
  5 | #include <utility>
  6 | #include <vector>
  7 | 
  8 | #include "caffe/blob.hpp"
  9 | #include "caffe/layer.hpp"
 10 | #include "caffe/proto/caffe.pb.h"
 11 | #include "caffe/util/bbox_util.hpp"
 12 | 
 13 | #include "caffe/layers/loss_layer.hpp"
 14 | 
 15 | namespace caffe {
 16 | 
 17 | /**
 18 |  * @brief Perform MultiBox operations. Including the following:
 19 |  *
 20 |  *  - decode the predictions.
 21 |  *  - perform matching between priors/predictions and ground truth.
 22 |  *  - use matched boxes and confidences to compute loss.
 23 |  *
 24 |  */
 25 | template <typename Dtype>
 26 | class MultiBoxCenterLossLayer : public LossLayer<Dtype> {
 27 |  public:
 28 |   explicit MultiBoxCenterLossLayer(const LayerParameter& param)
 29 |       : LossLayer<Dtype>(param) {}
 30 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 31 |       const vector<Blob<Dtype>*>& top);
 32 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
 33 |       const vector<Blob<Dtype>*>& top);
 34 | 
 35 |   virtual inline const char* type() const { return "MultiBoxCenterLoss"; }
 36 |   // bottom[0] stores the location predictions.
 37 |   // bottom[1] stores the confidence predictions.
 38 |   // bottom[2] stores the prior bounding boxes.
 39 |   // bottom[3] stores the ground truth bounding boxes.
 40 |   virtual inline int ExactNumBottomBlobs() const { return 5; }
 41 |   virtual inline int ExactNumTopBlobs() const { return 1; }
 42 | 
 43 |  protected:
 44 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 45 |       const vector<Blob<Dtype>*>& top);
 46 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
 47 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 48 | 
 49 |   // The internal localization loss layer.
 50 |   shared_ptr<Layer<Dtype> > loc_loss_layer_;
 51 |   LocLossType loc_loss_type_;
 52 |   float loc_weight_;
 53 |   float center_loss_weight_; //weights for penalizing the center_loss
 54 |   // bottom vector holder used in Forward function.
 55 |   vector<Blob<Dtype>*> loc_bottom_vec_;
 56 |   // top vector holder used in Forward function.
 57 |   vector<Blob<Dtype>*> loc_top_vec_;
 58 |   // blob which stores the matched location prediction.
 59 |   Blob<Dtype> loc_pred_;
 60 |   // blob which stores the corresponding matched ground truth.
 61 |   Blob<Dtype> loc_gt_;
 62 |   // localization loss.
 63 |   Blob<Dtype> loc_loss_;
 64 | 
 65 |   // The internal confidence loss layer.
 66 |   shared_ptr<Layer<Dtype> > conf_loss_layer_;
 67 |   // The center confidence loss layer.
 68 |   shared_ptr<Layer<Dtype> > conf_center_loss_layer_;
 69 | 
 70 |   ConfLossType conf_loss_type_;
 71 |   // bottom vector holder used in Forward function.
 72 |   vector<Blob<Dtype>*> conf_bottom_vec_;
 73 |   // top vector holder used in Forward function.
 74 |   vector<Blob<Dtype>*> conf_top_vec_;
 75 |   // blob which stores the confidence prediction.
 76 |   // bottom vector holder used in Forward function.
 77 |   vector<Blob<Dtype>*> conf_center_bottom_vec_;
 78 |   // top vector holder used in Forward function.
 79 |   vector<Blob<Dtype>*> conf_center_top_vec_;
 80 |   // blob which stores the confidence prediction.
 81 |   Blob<Dtype> conf_pred_;
 82 |   // blob which stores the confidence prediction used by center loss.
 83 |   Blob<Dtype> conf_center_pred_;
 84 |   // blob which stores the corresponding ground truth label.
 85 |   Blob<Dtype> conf_gt_;
 86 |   // confidence loss.
 87 |   Blob<Dtype> conf_loss_;
 88 |   // confidence center loss.
 89 |   Blob<Dtype> conf_center_loss_;
 90 | 
 91 |   MultiBoxLossParameter multibox_loss_param_;
 92 |   int num_classes_;
 93 |   int center_features_;
 94 |   bool share_location_;
 95 |   MatchType match_type_;
 96 |   float overlap_threshold_;
 97 |   bool use_prior_for_matching_;
 98 |   int background_label_id_;
 99 |   bool use_difficult_gt_;
100 |   bool do_neg_mining_;
101 |   float neg_pos_ratio_;
102 |   float neg_overlap_;
103 |   CodeType code_type_;
104 |   bool encode_variance_in_target_;
105 |   bool map_object_to_agnostic_;
106 |   bool ignore_cross_boundary_bbox_;
107 |   bool bp_inside_;
108 |   MiningType mining_type_;
109 | 
110 |   int loc_classes_;
111 |   int num_gt_;
112 |   int num_;
113 |   int num_priors_;
114 | 
115 |   int num_matches_;
116 |   int num_conf_;
117 |   vector<map<int, vector<int> > > all_match_indices_;
118 |   vector<vector<int> > all_neg_indices_;
119 | 
120 |   // How to normalize the loss.
121 |   LossParameter_NormalizationMode normalization_;
122 | };
123 | 
124 | }  // namespace caffe
125 | 
126 | #endif  // CAFFE_MULTIBOX_LOSS_LAYER_HPP_
127 | 


--------------------------------------------------------------------------------
/center_loss_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include <vector>
  2 | 
  3 | #include "caffe/filler.hpp"
  4 | #include "caffe/layers/center_loss_layer.hpp"
  5 | #include "caffe/util/math_functions.hpp"
  6 | 
  7 | namespace caffe {
  8 | 
  9 | template <typename Dtype>
 10 | void CenterLossLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 11 |       const vector<Blob<Dtype>*>& top) {
 12 |   const int num_output = this->layer_param_.center_loss_param().num_output();  
 13 |   N_ = num_output;
 14 |   const int axis = bottom[0]->CanonicalAxisIndex(
 15 |       this->layer_param_.center_loss_param().axis());
 16 |   // Dimensions starting from "axis" are "flattened" into a single
 17 |   // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),
 18 |   // and axis == 1, N inner products with dimension CHW are performed.
 19 |   K_ = bottom[0]->count(axis);
 20 |   // Check if we need to set up the weights
 21 |   if (this->blobs_.size() > 0) {
 22 |     LOG(INFO) << "Skipping parameter initialization";
 23 |   } else {
 24 |     this->blobs_.resize(1);
 25 |     // Intialize the weight
 26 |     vector<int> center_shape(2);
 27 |     center_shape[0] = N_;
 28 |     center_shape[1] = K_;
 29 |     this->blobs_[0].reset(new Blob<Dtype>(center_shape));
 30 |     // fill the weights
 31 |     shared_ptr<Filler<Dtype> > center_filler(GetFiller<Dtype>(
 32 |         this->layer_param_.center_loss_param().center_filler()));
 33 |     center_filler->Fill(this->blobs_[0].get());
 34 | 
 35 |   }  // parameter initialization
 36 |   this->param_propagate_down_.resize(this->blobs_.size(), true);
 37 | }
 38 | 
 39 | template <typename Dtype>
 40 | void CenterLossLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
 41 |       const vector<Blob<Dtype>*>& top) {
 42 |   CHECK_EQ(bottom[1]->channels(), 1);
 43 |   CHECK_EQ(bottom[1]->height(), 1);
 44 |   CHECK_EQ(bottom[1]->width(), 1);
 45 |   M_ = bottom[0]->num();
 46 |   // The top shape will be the bottom shape with the flattened axes dropped,
 47 |   // and replaced by a single axis with dimension num_output (N_).
 48 |   LossLayer<Dtype>::Reshape(bottom, top);
 49 |   distance_.ReshapeLike(*bottom[0]);
 50 |   variation_sum_.ReshapeLike(*this->blobs_[0]);
 51 | }
 52 | 
 53 | template <typename Dtype>
 54 | void CenterLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 55 |     const vector<Blob<Dtype>*>& top) {
 56 |   const Dtype* bottom_data = bottom[0]->cpu_data();
 57 |   const Dtype* label = bottom[1]->cpu_data();
 58 |   const Dtype* center = this->blobs_[0]->cpu_data();
 59 |   Dtype* distance_data = distance_.mutable_cpu_data();
 60 |   
 61 |   // the i-th distance_data
 62 |   for (int i = 0; i < M_; i++) {
 63 |     const int label_value = static_cast<int>(label[i]);
 64 |     // D(i,:) = X(i,:) - C(y(i),:)
 65 |     caffe_sub(K_, bottom_data + i * K_, center + label_value * K_, distance_data + i * K_); // sub bottom_data + i * K_ with center + label_value * K_
 66 |   }
 67 |   Dtype dot = caffe_cpu_dot(M_ * K_, distance_.cpu_data(), distance_.cpu_data());
 68 |   Dtype loss = dot / M_ / Dtype(2);
 69 |   top[0]->mutable_cpu_data()[0] = loss;
 70 | }
 71 | 
 72 | template <typename Dtype>
 73 | void CenterLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
 74 |     const vector<bool>& propagate_down,
 75 |     const vector<Blob<Dtype>*>& bottom) {
 76 |   // Gradient with respect to centers
 77 |   if (this->param_propagate_down_[0]) {
 78 |     const Dtype* label = bottom[1]->cpu_data();
 79 |     Dtype* center_diff = this->blobs_[0]->mutable_cpu_diff();
 80 |     Dtype* variation_sum_data = variation_sum_.mutable_cpu_data();
 81 |     const Dtype* distance_data = distance_.cpu_data();
 82 | 
 83 |     // \sum_{y_i==j}
 84 |     caffe_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data());  //initialize each value in variation_sum_ using (Dtype)0
 85 |     for (int n = 0; n < N_; n++) {
 86 |       int count = 0;
 87 |       for (int m = 0; m < M_; m++) {
 88 |         const int label_value = static_cast<int>(label[m]);
 89 |         if (label_value == n) {
 90 |           count++;
 91 |           caffe_sub(K_, variation_sum_data + n * K_, distance_data + m * K_, variation_sum_data + n * K_);
 92 |         }
 93 |       }
 94 |       caffe_axpy(K_, (Dtype)1./(count + (Dtype)1.), variation_sum_data + n * K_, center_diff + n * K_);
 95 |     }
 96 |   }
 97 |   // Gradient with respect to bottom data 
 98 |   if (propagate_down[0]) {
 99 |     caffe_copy(M_ * K_, distance_.cpu_data(), bottom[0]->mutable_cpu_diff());
100 |     caffe_scal(M_ * K_, top[0]->cpu_diff()[0] / M_, bottom[0]->mutable_cpu_diff());
101 |   }
102 |   if (propagate_down[1]) {
103 |     LOG(FATAL) << this->type()
104 |                << " Layer cannot backpropagate to label inputs.";
105 |   }
106 | }
107 | 
108 | #ifdef CPU_ONLY
109 | STUB_GPU(CenterLossLayer);
110 | #endif
111 | 
112 | INSTANTIATE_CLASS(CenterLossLayer);
113 | REGISTER_LAYER_CLASS(CenterLoss);
114 | 
115 | }  // namespace caffe
116 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SSD-Centerloss
  2 | This is an unofficial trial applying Centerloss to SSD multibox_loss function 
  3 | 
  4 | Technical details are in the paper: **A Discriminative Feature Learning Approach for Deep Face Recognition**
  5 | https://pan.baidu.com/s/1up_PWpR85HqVe10yhFzHoQ
  6 | 
  7 | SSD(Single Shot MultiBox Detector) implements the multibox_loss function in the https://github.com/weiliu89/caffe/tree/ssd. We can read the loss function through the coding multibox_loss_layer.h/multibox_loss_layer.cpp
  8 | 
  9 | # Motivation:
 10 | When detecting objects on the image, we often employ, including SSD, softmax function to classify the object and L1 regression to localize the object. 
 11 | 
 12 | $$ L(x,c,l,g) = \frac{1}{N}(L_{conf}(x,c)+ \alpha L_{loc}(x,l,g))$$
 13 | 
 14 | In the equation above, $L_{conf}$ represents the function that classifies each object, $L_{loc}$ stands for the localization function. $N$ is the number of default boxes. This equation means that averaging the sum of all of default boxes loss. Each default box will contribute to the final loss.
 15 | 
 16 | However, for some objects that are similar to each other, learning the location information may be easy. The softmax function is hard to work due to the similarity of feature of foreground samples. Center loss can effectively decrease the feature difference between the same object.
 17 | <br>
 18 | <br>
 19 | ![image](https://github.com/BOBrown/SSD-Centerloss/blob/master/center_loss.png)
 20 | 
 21 | 
 22 | # How to use SSD-Centerloss
 23 | ### (1) add center loss layer:
 24 | Notation ~~ is the root path of your caffe-ssd dir
 25 | ```
 26 | cp center_loss_layer.cpp  ~~/caffe-ssd/src/caffe/layers/
 27 | cp center_loss_layer.h  ~~/caffe-ssd/include/caffe/layers/
 28 | cp multibox_center_loss_layer.cpp ~~/caffe-ssd/src/caffe/layers/
 29 | cp multibox_center_loss_layer.hpp ~~/caffe-ssd/include/caffe/layers/
 30 | ```
 31 | ### (2)Then adding the following code in the caffe.proto
 32 | ```
 33 | message CenterLossParameter {
 34 |   optional uint32 num_output = 1; // The number of outputs for the layer
 35 |   optional FillerParameter center_filler = 2; // The filler for the centers
 36 |   // The first axis to be lumped into a single inner product computation;
 37 |   // all preceding axes are retained in the output.
 38 |   // May be negative to index from the end (e.g., -1 for the last axis).
 39 |   optional int32 axis = 3 [default = 1];
 40 | }
 41 | 
 42 | message MultiBoxCenterLossParameter{
 43 | //center_features represents the length of features that is equal to the length of object centers in each default box.
 44 |   optional uint32 center_features = 1;
 45 | }
 46 | ```
 47 | Adding  in the message LayerParameter 
 48 | ```
 49 | optional MultiBoxCenterLossParameter multibox_center_loss_param = 211;//this value should be the only in this message
 50 | optional CenterLossParameter center_loss_param = 149;
 51 | ```
 52 | ### (3) Getting the center_features of each default box
 53 | For an instance, fc7_norm layer has 4 anchors, including 	aspect ratio = sqrt(2),1,1/2,2. Each anchor has 16 center_features. Therefore the num_output is 64.
 54 | ```
 55 | layer {
 56 |   name: "fc7_norm_center_mbox_conf_new"
 57 |   type: "Convolution"
 58 |   bottom: "fc7_norm"
 59 |   top: "fc7_norm_center_mbox_conf"
 60 |   param {
 61 |     lr_mult: 1
 62 |     decay_mult: 1
 63 |   }
 64 |   param {
 65 |     lr_mult: 2
 66 |     decay_mult: 0
 67 |   }
 68 |   convolution_param {
 69 |     num_output: 64
 70 |     pad: 1
 71 |     kernel_size: 3
 72 |     stride: 1
 73 |     weight_filler {
 74 |       type: "xavier"
 75 |     }
 76 |     bias_filler {
 77 |       type: "constant"
 78 |       value: 0
 79 |     }
 80 |   }
 81 | }
 82 | layer {
 83 |   name: "fc7_norm_center_mbox_conf_perm"
 84 |   type: "Permute"
 85 |   bottom: "fc7_norm_center_mbox_conf"
 86 |   top: "fc7_norm_center_mbox_conf_perm"
 87 |   permute_param {
 88 |     order: 0
 89 |     order: 2
 90 |     order: 3
 91 |     order: 1
 92 |   }
 93 | }
 94 | layer {
 95 |   name: "fc7_norm_mbox_center_conf_flat"
 96 |   type: "Flatten"
 97 |   bottom: "fc7_norm_center_mbox_conf_perm"
 98 |   top: "fc7_norm_mbox_center_conf_flat"
 99 |   flatten_param {
100 |     axis: 1
101 |   }
102 | }
103 | ```
104 | ### (4) Changing the function type in train.prototxt
105 | ```
106 | layer {
107 |   name: "mbox_loss"
108 |   type: "MultiBoxCenterLoss" # the type was changed
109 |   bottom: "mbox_loc"
110 |   bottom: "mbox_conf"
111 |   bottom: "mbox_priorbox"
112 |   bottom: "label"
113 |   bottom: "mbox_center_conf" #mbox_center_conf is the concatenation of all the center_features in all default box.
114 |   top: "mbox_loss"
115 |   include {
116 |     phase: TRAIN
117 |   }
118 |   propagate_down: true
119 |   propagate_down: true
120 |   propagate_down: false
121 |   propagate_down: false
122 |   propagate_down: true #center_features layers need backward.
123 |   loss_param {
124 |     normalization: VALID
125 |   }
126 |   multibox_loss_param {
127 |     loc_loss_type: SMOOTH_L1
128 |     conf_loss_type: SOFTMAX
129 |     loc_weight: 1
130 |     num_classes: 21
131 |     share_location: true
132 |     match_type: PER_PREDICTION
133 |     overlap_threshold: 0.2
134 |     use_prior_for_matching: true
135 |     background_label_id: 0
136 |     use_difficult_gt: true
137 |     neg_pos_ratio: 3
138 |     neg_overlap: 0.1
139 |     code_type: CENTER_SIZE
140 |     ignore_cross_boundary_bbox: false
141 |     mining_type: MAX_NEGATIVE
142 |   }
143 |   multibox_center_loss_param { 
144 |     center_features: 16 # center_features represents the length of features that is equal to the length of object centers in each default box. 
145 |   }
146 | }
147 | ```


--------------------------------------------------------------------------------
/multibox_focal_loss_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include <map>
  3 | #include <utility>
  4 | #include <vector>
  5 | 
  6 | #include "caffe/layers/multibox_focal_loss_layer.hpp"
  7 | #include "caffe/util/math_functions.hpp"
  8 | 
  9 | namespace caffe {
 10 | 
 11 | template <typename Dtype>
 12 | void MultiBoxFocalLossLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 13 |       const vector<Blob<Dtype>*>& top) {
 14 |   LossLayer<Dtype>::LayerSetUp(bottom, top);
 15 |   if (this->layer_param_.propagate_down_size() == 0) {
 16 |     this->layer_param_.add_propagate_down(true);
 17 |     this->layer_param_.add_propagate_down(true);
 18 |     this->layer_param_.add_propagate_down(false);
 19 |     this->layer_param_.add_propagate_down(false);
 20 |   }
 21 | 
 22 |   alpha_ = this->layer_param_.focal_loss_param().alpha();
 23 |   gamma_ = this->layer_param_.focal_loss_param().gamma();
 24 | 
 25 |   const MultiBoxLossParameter& multibox_loss_param =
 26 |       this->layer_param_.multibox_loss_param();
 27 |   multibox_loss_param_ = this->layer_param_.multibox_loss_param();
 28 | 
 29 |   num_ = bottom[0]->num();
 30 |   num_priors_ = bottom[2]->height() / 4;
 31 |   // Get other parameters.
 32 |   CHECK(multibox_loss_param.has_num_classes()) << "Must provide num_classes.";
 33 |   num_classes_ = multibox_loss_param.num_classes();
 34 |   CHECK_GE(num_classes_, 1) << "num_classes should not be less than 1.";
 35 |   share_location_ = multibox_loss_param.share_location();
 36 |   loc_classes_ = share_location_ ? 1 : num_classes_;
 37 |   background_label_id_ = multibox_loss_param.background_label_id();
 38 |   use_difficult_gt_ = multibox_loss_param.use_difficult_gt();
 39 |   mining_type_ = multibox_loss_param.mining_type();
 40 |   if (multibox_loss_param.has_do_neg_mining()) {
 41 |     LOG(WARNING) << "do_neg_mining is deprecated, use mining_type instead.";
 42 |     do_neg_mining_ = multibox_loss_param.do_neg_mining();
 43 |     CHECK_EQ(do_neg_mining_,
 44 |              mining_type_ != MultiBoxLossParameter_MiningType_NONE);
 45 |   }
 46 |   do_neg_mining_ = mining_type_ != MultiBoxLossParameter_MiningType_NONE;
 47 | 
 48 |   if (!this->layer_param_.loss_param().has_normalization() &&
 49 |       this->layer_param_.loss_param().has_normalize()) {
 50 |     normalization_ = this->layer_param_.loss_param().normalize() ?
 51 |                      LossParameter_NormalizationMode_VALID :
 52 |                      LossParameter_NormalizationMode_BATCH_SIZE;
 53 |   } else {
 54 |     normalization_ = this->layer_param_.loss_param().normalization();
 55 |   }
 56 | 
 57 |   if (do_neg_mining_) {
 58 |     CHECK(share_location_)
 59 |         << "Currently only support negative mining if share_location is true.";
 60 |   }
 61 | 
 62 |   vector<int> loss_shape(1, 1);
 63 |   // Set up localization loss layer.
 64 |   loc_weight_ = multibox_loss_param.loc_weight();
 65 |   loc_loss_type_ = multibox_loss_param.loc_loss_type();
 66 |   // fake shape.
 67 |   vector<int> loc_shape(1, 1);
 68 |   loc_shape.push_back(4);
 69 |   loc_pred_.Reshape(loc_shape);
 70 |   loc_gt_.Reshape(loc_shape);
 71 |   loc_bottom_vec_.push_back(&loc_pred_);
 72 |   loc_bottom_vec_.push_back(&loc_gt_);
 73 |   loc_loss_.Reshape(loss_shape);
 74 |   loc_top_vec_.push_back(&loc_loss_);
 75 |   if (loc_loss_type_ == MultiBoxLossParameter_LocLossType_L2) {
 76 |     LayerParameter layer_param;
 77 |     layer_param.set_name(this->layer_param_.name() + "_l2_loc");
 78 |     layer_param.set_type("EuclideanLoss");
 79 |     layer_param.add_loss_weight(loc_weight_);
 80 |     loc_loss_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);
 81 |     loc_loss_layer_->SetUp(loc_bottom_vec_, loc_top_vec_);
 82 |   } else if (loc_loss_type_ == MultiBoxLossParameter_LocLossType_SMOOTH_L1) {
 83 |     LayerParameter layer_param;
 84 |     layer_param.set_name(this->layer_param_.name() + "_smooth_L1_loc");
 85 |     layer_param.set_type("SmoothL1Loss");
 86 |     layer_param.add_loss_weight(loc_weight_);
 87 |     loc_loss_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);
 88 |     loc_loss_layer_->SetUp(loc_bottom_vec_, loc_top_vec_);
 89 |   } else {
 90 |     LOG(FATAL) << "Unknown localization loss type.";
 91 |   }
 92 |   // Set up confidence loss layer.
 93 |   conf_loss_type_ = multibox_loss_param.conf_loss_type();
 94 |   conf_bottom_vec_.push_back(&conf_pred_);
 95 |   conf_bottom_vec_.push_back(&conf_gt_);
 96 |   conf_loss_.Reshape(loss_shape);
 97 |   conf_top_vec_.push_back(&conf_loss_);
 98 |   if (conf_loss_type_ == MultiBoxLossParameter_ConfLossType_SOFTMAX) {
 99 |     CHECK_GE(background_label_id_, 0)
100 |         << "background_label_id should be within [0, num_classes) for Softmax.";
101 |     CHECK_LT(background_label_id_, num_classes_)
102 |         << "background_label_id should be within [0, num_classes) for Softmax.";
103 |     LayerParameter layer_param;
104 |     layer_param.set_name(this->layer_param_.name() + "_focal_conf");
105 |     layer_param.set_type("FocalLoss");
106 |     layer_param.add_loss_weight(Dtype(1.));
107 |     layer_param.mutable_loss_param()->set_normalization(
108 |         LossParameter_NormalizationMode_NONE);
109 |     FocalLossParameter* faocal_param = layer_param.mutable_focal_loss_param();
110 |     faocal_param->set_axis(1);
111 |     faocal_param->set_alpha(alpha_);
112 |     faocal_param->set_gamma(gamma_);
113 |     // Fake reshape.
114 |     vector<int> conf_shape(1, 1);
115 |     conf_gt_.Reshape(conf_shape);
116 |     conf_shape.push_back(num_classes_);
117 |     conf_pred_.Reshape(conf_shape);
118 |     conf_loss_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);
119 |     conf_loss_layer_->SetUp(conf_bottom_vec_, conf_top_vec_);
120 |   } else if (conf_loss_type_ == MultiBoxLossParameter_ConfLossType_LOGISTIC) {
121 |     LayerParameter layer_param;
122 |     layer_param.set_name(this->layer_param_.name() + "_logistic_conf");
123 |     layer_param.set_type("SigmoidCrossEntropyLoss");
124 |     layer_param.add_loss_weight(Dtype(1.));
125 |     // Fake reshape.
126 |     vector<int> conf_shape(1, 1);
127 |     conf_shape.push_back(num_classes_);
128 |     conf_gt_.Reshape(conf_shape);
129 |     conf_pred_.Reshape(conf_shape);
130 |     conf_loss_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);
131 |     conf_loss_layer_->SetUp(conf_bottom_vec_, conf_top_vec_);
132 |   } else {
133 |     LOG(FATAL) << "Unknown confidence loss type.";
134 |   }
135 | }
136 | 
137 | template <typename Dtype>
138 | void MultiBoxFocalLossLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
139 |       const vector<Blob<Dtype>*>& top) {
140 |   LossLayer<Dtype>::Reshape(bottom, top);
141 |   num_ = bottom[0]->num();
142 |   num_priors_ = bottom[2]->height() / 4;
143 |   num_gt_ = bottom[3]->height();
144 |   CHECK_EQ(bottom[0]->num(), bottom[1]->num());
145 |   CHECK_EQ(num_priors_ * loc_classes_ * 4, bottom[0]->channels())
146 |       << "Number of priors must match number of location predictions.";
147 |   CHECK_EQ(num_priors_ * num_classes_, bottom[1]->channels())
148 |       << "Number of priors must match number of confidence predictions.";
149 | }
150 | 
151 | template <typename Dtype>
152 | void MultiBoxFocalLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
153 |     const vector<Blob<Dtype>*>& top) {
154 |   const Dtype* loc_data = bottom[0]->cpu_data();
155 |   const Dtype* conf_data = bottom[1]->cpu_data();
156 |   const Dtype* prior_data = bottom[2]->cpu_data();
157 |   const Dtype* gt_data = bottom[3]->cpu_data();
158 | 
159 |   // Retrieve all ground truth.
160 |   map<int, vector<NormalizedBBox> > all_gt_bboxes;
161 |   GetGroundTruth(gt_data, num_gt_, background_label_id_, use_difficult_gt_,
162 |                  &all_gt_bboxes);
163 | 
164 |   // Retrieve all prior bboxes. It is same within a batch since we assume all
165 |   // images in a batch are of same dimension.
166 |   vector<NormalizedBBox> prior_bboxes;
167 |   vector<vector<float> > prior_variances;
168 |   GetPriorBBoxes(prior_data, num_priors_, &prior_bboxes, &prior_variances);
169 | 
170 |   // Retrieve all predictions.
171 |   vector<LabelBBox> all_loc_preds;
172 |   GetLocPredictions(loc_data, num_, num_priors_, loc_classes_, share_location_,
173 |                     &all_loc_preds);
174 | 
175 |   // Find matches between source bboxes and ground truth bboxes.
176 |   vector<map<int, vector<float> > > all_match_overlaps;
177 |   FindMatches(all_loc_preds, all_gt_bboxes, prior_bboxes, prior_variances,
178 |               multibox_loss_param_, &all_match_overlaps, &all_match_indices_);
179 | 
180 |   num_matches_ = 0;
181 |   int num_negs = 0;
182 |   // Sample hard negative (and positive) examples based on mining type.
183 |   MineHardExamples(*bottom[1], all_loc_preds, all_gt_bboxes, prior_bboxes,
184 |                    prior_variances, all_match_overlaps, multibox_loss_param_,
185 |                    &num_matches_, &num_negs, &all_match_indices_,
186 |                    &all_neg_indices_);
187 | 
188 |   if (num_matches_ >= 1) {
189 |     // Form data to pass on to loc_loss_layer_.
190 |     vector<int> loc_shape(2);
191 |     loc_shape[0] = 1;
192 |     loc_shape[1] = num_matches_ * 4;
193 |     loc_pred_.Reshape(loc_shape);
194 |     loc_gt_.Reshape(loc_shape);
195 |     Dtype* loc_pred_data = loc_pred_.mutable_cpu_data();
196 |     Dtype* loc_gt_data = loc_gt_.mutable_cpu_data();
197 |     EncodeLocPrediction(all_loc_preds, all_gt_bboxes, all_match_indices_,
198 |                         prior_bboxes, prior_variances, multibox_loss_param_,
199 |                         loc_pred_data, loc_gt_data);
200 |     loc_loss_layer_->Reshape(loc_bottom_vec_, loc_top_vec_);
201 |     loc_loss_layer_->Forward(loc_bottom_vec_, loc_top_vec_);
202 |   } else {
203 |     loc_loss_.mutable_cpu_data()[0] = 0;
204 |   }
205 | 
206 |   // Form data to pass on to conf_loss_layer_.
207 |   if (do_neg_mining_) {
208 |     num_conf_ = num_matches_ + num_negs;
209 |   } else {
210 |     num_conf_ = num_ * num_priors_;
211 |   }
212 |   if (num_conf_ >= 1) {
213 |     // Reshape the confidence data.
214 |     vector<int> conf_shape;
215 |     if (conf_loss_type_ == MultiBoxLossParameter_ConfLossType_SOFTMAX) {
216 |       conf_shape.push_back(num_conf_);
217 |       conf_gt_.Reshape(conf_shape);
218 |       conf_shape.push_back(num_classes_);
219 |       conf_pred_.Reshape(conf_shape);
220 |     } else if (conf_loss_type_ == MultiBoxLossParameter_ConfLossType_LOGISTIC) {
221 |       conf_shape.push_back(1);
222 |       conf_shape.push_back(num_conf_);
223 |       conf_shape.push_back(num_classes_);
224 |       conf_gt_.Reshape(conf_shape);
225 |       conf_pred_.Reshape(conf_shape);
226 |     } else {
227 |       LOG(FATAL) << "Unknown confidence loss type.";
228 |     }
229 |     if (!do_neg_mining_) {
230 |       // Consider all scores.
231 |       // Share data and diff with bottom[1].
232 |       CHECK_EQ(conf_pred_.count(), bottom[1]->count());
233 |       conf_pred_.ShareData(*(bottom[1]));
234 |     }
235 |     Dtype* conf_pred_data = conf_pred_.mutable_cpu_data();
236 |     Dtype* conf_gt_data = conf_gt_.mutable_cpu_data();
237 |     caffe_set(conf_gt_.count(), Dtype(background_label_id_), conf_gt_data);
238 |     EncodeConfPrediction(conf_data, num_, num_priors_, multibox_loss_param_,
239 |                          all_match_indices_, all_neg_indices_, all_gt_bboxes,
240 |                          conf_pred_data, conf_gt_data);
241 |     conf_loss_layer_->Reshape(conf_bottom_vec_, conf_top_vec_);
242 |     conf_loss_layer_->Forward(conf_bottom_vec_, conf_top_vec_);
243 |   } else {
244 |     conf_loss_.mutable_cpu_data()[0] = 0;
245 |   }
246 | 
247 |   top[0]->mutable_cpu_data()[0] = 0;
248 |   if (this->layer_param_.propagate_down(0)) {
249 |     Dtype normalizer = LossLayer<Dtype>::GetNormalizer(
250 |         normalization_, num_, num_priors_, num_matches_);
251 |     top[0]->mutable_cpu_data()[0] +=
252 |         loc_weight_ * loc_loss_.cpu_data()[0] / normalizer;
253 |   }
254 |   if (this->layer_param_.propagate_down(1)) {
255 |     Dtype normalizer = LossLayer<Dtype>::GetNormalizer(
256 |         normalization_, num_, num_priors_, num_matches_);
257 |     top[0]->mutable_cpu_data()[0] += conf_loss_.cpu_data()[0] / normalizer;
258 |   }
259 | }
260 | 
261 | template <typename Dtype>
262 | void MultiBoxFocalLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
263 |     const vector<bool>& propagate_down,
264 |     const vector<Blob<Dtype>*>& bottom) {
265 | 
266 |   if (propagate_down[2]) {
267 |     LOG(FATAL) << this->type()
268 |         << " Layer cannot backpropagate to prior inputs.";
269 |   }
270 |   if (propagate_down[3]) {
271 |     LOG(FATAL) << this->type()
272 |         << " Layer cannot backpropagate to label inputs.";
273 |   }
274 | 
275 |   // Back propagate on location prediction.
276 |   if (propagate_down[0]) {
277 |     Dtype* loc_bottom_diff = bottom[0]->mutable_cpu_diff();
278 |     caffe_set(bottom[0]->count(), Dtype(0), loc_bottom_diff);
279 |     if (num_matches_ >= 1) {
280 |       vector<bool> loc_propagate_down;
281 |       // Only back propagate on prediction, not ground truth.
282 |       loc_propagate_down.push_back(true);
283 |       loc_propagate_down.push_back(false);
284 |       loc_loss_layer_->Backward(loc_top_vec_, loc_propagate_down,
285 |                                 loc_bottom_vec_);
286 |       // Scale gradient.
287 |       Dtype normalizer = LossLayer<Dtype>::GetNormalizer(
288 |           normalization_, num_, num_priors_, num_matches_);
289 |       Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer;
290 |       caffe_scal(loc_pred_.count(), loss_weight, loc_pred_.mutable_cpu_diff());
291 |       // Copy gradient back to bottom[0].
292 |       const Dtype* loc_pred_diff = loc_pred_.cpu_diff();
293 |       int count = 0;
294 |       for (int i = 0; i < num_; ++i) {
295 |         for (map<int, vector<int> >::iterator it =
296 |              all_match_indices_[i].begin();
297 |              it != all_match_indices_[i].end(); ++it) {
298 |           const int label = share_location_ ? 0 : it->first;
299 |           const vector<int>& match_index = it->second;
300 |           for (int j = 0; j < match_index.size(); ++j) {
301 |             if (match_index[j] <= -1) {
302 |               continue;
303 |             }
304 |             // Copy the diff to the right place.
305 |             int start_idx = loc_classes_ * 4 * j + label * 4;
306 |             caffe_copy<Dtype>(4, loc_pred_diff + count * 4,
307 |                               loc_bottom_diff + start_idx);
308 |             ++count;
309 |           }
310 |         }
311 |         loc_bottom_diff += bottom[0]->offset(1);
312 |       }
313 |     }
314 |   }
315 | 
316 |   // Back propagate on confidence prediction.
317 |   if (propagate_down[1]) {
318 |     Dtype* conf_bottom_diff = bottom[1]->mutable_cpu_diff();
319 |     caffe_set(bottom[1]->count(), Dtype(0), conf_bottom_diff);
320 |     if (num_conf_ >= 1) {
321 |       vector<bool> conf_propagate_down;
322 |       // Only back propagate on prediction, not ground truth.
323 |       conf_propagate_down.push_back(true);
324 |       conf_propagate_down.push_back(false);
325 |       conf_loss_layer_->Backward(conf_top_vec_, conf_propagate_down,
326 |                                  conf_bottom_vec_);
327 |       // Scale gradient.
328 |       Dtype normalizer = LossLayer<Dtype>::GetNormalizer(
329 |           normalization_, num_, num_priors_, num_matches_);
330 |       Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer;
331 |       caffe_scal(conf_pred_.count(), loss_weight,
332 |                  conf_pred_.mutable_cpu_diff());
333 |       // Copy gradient back to bottom[1].
334 |       const Dtype* conf_pred_diff = conf_pred_.cpu_diff();
335 |       if (do_neg_mining_) {
336 |         int count = 0;
337 |         for (int i = 0; i < num_; ++i) {
338 |           // Copy matched (positive) bboxes scores' diff.
339 |           const map<int, vector<int> >& match_indices = all_match_indices_[i];
340 |           for (map<int, vector<int> >::const_iterator it =
341 |                match_indices.begin(); it != match_indices.end(); ++it) {
342 |             const vector<int>& match_index = it->second;
343 |             CHECK_EQ(match_index.size(), num_priors_);
344 |             for (int j = 0; j < num_priors_; ++j) {
345 |               if (match_index[j] <= -1) {
346 |                 continue;
347 |               }
348 |               // Copy the diff to the right place.
349 |               caffe_copy<Dtype>(num_classes_,
350 |                                 conf_pred_diff + count * num_classes_,
351 |                                 conf_bottom_diff + j * num_classes_);
352 |               ++count;
353 |             }
354 |           }
355 |           // Copy negative bboxes scores' diff.
356 |           for (int n = 0; n < all_neg_indices_[i].size(); ++n) {
357 |             int j = all_neg_indices_[i][n];
358 |             CHECK_LT(j, num_priors_);
359 |             caffe_copy<Dtype>(num_classes_,
360 |                               conf_pred_diff + count * num_classes_,
361 |                               conf_bottom_diff + j * num_classes_);
362 |             ++count;
363 |           }
364 |           conf_bottom_diff += bottom[1]->offset(1);
365 |         }
366 |       } else {
367 |         // The diff is already computed and stored.
368 |         bottom[1]->ShareDiff(conf_pred_);
369 |       }
370 |     }
371 |   }
372 | 
373 |   // After backward, remove match statistics.
374 |   all_match_indices_.clear();
375 |   all_neg_indices_.clear();
376 | }
377 | 
378 | INSTANTIATE_CLASS(MultiBoxFocalLossLayer);
379 | REGISTER_LAYER_CLASS(MultiBoxFocalLoss);
380 | 
381 | }  // namespace caffe
382 | 


--------------------------------------------------------------------------------
/center_loss_test.prototxt:
--------------------------------------------------------------------------------
   1 | layer {
   2 |   name: "data"
   3 |   type: "AnnotatedData"
   4 |   top: "data"
   5 |   top: "label"
   6 |   include {
   7 |     phase: TEST
   8 |   }
   9 |   transform_param {
  10 |     mean_value: 28
  11 |     mean_value: 28
  12 |     mean_value: 28
  13 |     resize_param {
  14 |       prob: 1
  15 |       resize_mode: WARP
  16 |       height: 300
  17 |       width: 300
  18 |       interp_mode: LINEAR
  19 |     }
  20 |   }
  21 |   data_param {
  22 |     source: "/VOC0712/lmdb/VOC0712_test_lmdb/"
  23 |     batch_size: 1
  24 |     backend: LMDB
  25 |   }
  26 |   annotated_data_param {
  27 |     batch_sampler {
  28 |     }
  29 |     label_map_file: "/VOC0712/labelmap_voc.prototxt"
  30 |   }
  31 | }
  32 | layer {
  33 |   name: "conv1_1"
  34 |   type: "Convolution"
  35 |   bottom: "data"
  36 |   top: "conv1_1"
  37 |   param {
  38 |     lr_mult: 1
  39 |     decay_mult: 1
  40 |   }
  41 |   param {
  42 |     lr_mult: 2
  43 |     decay_mult: 0
  44 |   }
  45 |   convolution_param {
  46 |     num_output: 64
  47 |     pad: 1
  48 |     kernel_size: 3
  49 |     weight_filler {
  50 |       type: "xavier"
  51 |     }
  52 |     bias_filler {
  53 |       type: "constant"
  54 |       value: 0
  55 |     }
  56 |   }
  57 | }
  58 | layer {
  59 |   name: "relu1_1"
  60 |   type: "ReLU"
  61 |   bottom: "conv1_1"
  62 |   top: "conv1_1"
  63 | }
  64 | layer {
  65 |   name: "conv1_2"
  66 |   type: "Convolution"
  67 |   bottom: "conv1_1"
  68 |   top: "conv1_2"
  69 |   param {
  70 |     lr_mult: 1
  71 |     decay_mult: 1
  72 |   }
  73 |   param {
  74 |     lr_mult: 2
  75 |     decay_mult: 0
  76 |   }
  77 |   convolution_param {
  78 |     num_output: 64
  79 |     pad: 1
  80 |     kernel_size: 3
  81 |     weight_filler {
  82 |       type: "xavier"
  83 |     }
  84 |     bias_filler {
  85 |       type: "constant"
  86 |       value: 0
  87 |     }
  88 |   }
  89 | }
  90 | layer {
  91 |   name: "relu1_2"
  92 |   type: "ReLU"
  93 |   bottom: "conv1_2"
  94 |   top: "conv1_2"
  95 | }
  96 | layer {
  97 |   name: "pool1"
  98 |   type: "Pooling"
  99 |   bottom: "conv1_2"
 100 |   top: "pool1"
 101 |   pooling_param {
 102 |     pool: MAX
 103 |     kernel_size: 2
 104 |     stride: 2
 105 |   }
 106 | }
 107 | layer {
 108 |   name: "conv2_1"
 109 |   type: "Convolution"
 110 |   bottom: "pool1"
 111 |   top: "conv2_1"
 112 |   param {
 113 |     lr_mult: 1
 114 |     decay_mult: 1
 115 |   }
 116 |   param {
 117 |     lr_mult: 2
 118 |     decay_mult: 0
 119 |   }
 120 |   convolution_param {
 121 |     num_output: 128
 122 |     pad: 1
 123 |     kernel_size: 3
 124 |     weight_filler {
 125 |       type: "xavier"
 126 |     }
 127 |     bias_filler {
 128 |       type: "constant"
 129 |       value: 0
 130 |     }
 131 |   }
 132 | }
 133 | layer {
 134 |   name: "relu2_1"
 135 |   type: "ReLU"
 136 |   bottom: "conv2_1"
 137 |   top: "conv2_1"
 138 | }
 139 | layer {
 140 |   name: "conv2_2"
 141 |   type: "Convolution"
 142 |   bottom: "conv2_1"
 143 |   top: "conv2_2"
 144 |   param {
 145 |     lr_mult: 1
 146 |     decay_mult: 1
 147 |   }
 148 |   param {
 149 |     lr_mult: 2
 150 |     decay_mult: 0
 151 |   }
 152 |   convolution_param {
 153 |     num_output: 128
 154 |     pad: 1
 155 |     kernel_size: 3
 156 |     weight_filler {
 157 |       type: "xavier"
 158 |     }
 159 |     bias_filler {
 160 |       type: "constant"
 161 |       value: 0
 162 |     }
 163 |   }
 164 | }
 165 | layer {
 166 |   name: "relu2_2"
 167 |   type: "ReLU"
 168 |   bottom: "conv2_2"
 169 |   top: "conv2_2"
 170 | }
 171 | layer {
 172 |   name: "pool2"
 173 |   type: "Pooling"
 174 |   bottom: "conv2_2"
 175 |   top: "pool2"
 176 |   pooling_param {
 177 |     pool: MAX
 178 |     kernel_size: 2
 179 |     stride: 2
 180 |   }
 181 | }
 182 | layer {
 183 |   name: "conv3_1"
 184 |   type: "Convolution"
 185 |   bottom: "pool2"
 186 |   top: "conv3_1"
 187 |   param {
 188 |     lr_mult: 1
 189 |     decay_mult: 1
 190 |   }
 191 |   param {
 192 |     lr_mult: 2
 193 |     decay_mult: 0
 194 |   }
 195 |   convolution_param {
 196 |     num_output: 256
 197 |     pad: 1
 198 |     kernel_size: 3
 199 |     weight_filler {
 200 |       type: "xavier"
 201 |     }
 202 |     bias_filler {
 203 |       type: "constant"
 204 |       value: 0
 205 |     }
 206 |   }
 207 | }
 208 | layer {
 209 |   name: "relu3_1"
 210 |   type: "ReLU"
 211 |   bottom: "conv3_1"
 212 |   top: "conv3_1"
 213 | }
 214 | layer {
 215 |   name: "conv3_2"
 216 |   type: "Convolution"
 217 |   bottom: "conv3_1"
 218 |   top: "conv3_2"
 219 |   param {
 220 |     lr_mult: 1
 221 |     decay_mult: 1
 222 |   }
 223 |   param {
 224 |     lr_mult: 2
 225 |     decay_mult: 0
 226 |   }
 227 |   convolution_param {
 228 |     num_output: 256
 229 |     pad: 1
 230 |     kernel_size: 3
 231 |     weight_filler {
 232 |       type: "xavier"
 233 |     }
 234 |     bias_filler {
 235 |       type: "constant"
 236 |       value: 0
 237 |     }
 238 |   }
 239 | }
 240 | layer {
 241 |   name: "relu3_2"
 242 |   type: "ReLU"
 243 |   bottom: "conv3_2"
 244 |   top: "conv3_2"
 245 | }
 246 | layer {
 247 |   name: "conv3_3"
 248 |   type: "Convolution"
 249 |   bottom: "conv3_2"
 250 |   top: "conv3_3"
 251 |   param {
 252 |     lr_mult: 1
 253 |     decay_mult: 1
 254 |   }
 255 |   param {
 256 |     lr_mult: 2
 257 |     decay_mult: 0
 258 |   }
 259 |   convolution_param {
 260 |     num_output: 256
 261 |     pad: 1
 262 |     kernel_size: 3
 263 |     weight_filler {
 264 |       type: "xavier"
 265 |     }
 266 |     bias_filler {
 267 |       type: "constant"
 268 |       value: 0
 269 |     }
 270 |   }
 271 | }
 272 | layer {
 273 |   name: "relu3_3"
 274 |   type: "ReLU"
 275 |   bottom: "conv3_3"
 276 |   top: "conv3_3"
 277 | }
 278 | layer {
 279 |   name: "pool3"
 280 |   type: "Pooling"
 281 |   bottom: "conv3_3"
 282 |   top: "pool3"
 283 |   pooling_param {
 284 |     pool: MAX
 285 |     kernel_size: 2
 286 |     stride: 2
 287 |   }
 288 | }
 289 | layer {
 290 |   name: "conv4_1"
 291 |   type: "Convolution"
 292 |   bottom: "pool3"
 293 |   top: "conv4_1"
 294 |   param {
 295 |     lr_mult: 1
 296 |     decay_mult: 1
 297 |   }
 298 |   param {
 299 |     lr_mult: 2
 300 |     decay_mult: 0
 301 |   }
 302 |   convolution_param {
 303 |     num_output: 512
 304 |     pad: 1
 305 |     kernel_size: 3
 306 |     weight_filler {
 307 |       type: "xavier"
 308 |     }
 309 |     bias_filler {
 310 |       type: "constant"
 311 |       value: 0
 312 |     }
 313 |   }
 314 | }
 315 | layer {
 316 |   name: "relu4_1"
 317 |   type: "ReLU"
 318 |   bottom: "conv4_1"
 319 |   top: "conv4_1"
 320 | }
 321 | layer {
 322 |   name: "conv4_2"
 323 |   type: "Convolution"
 324 |   bottom: "conv4_1"
 325 |   top: "conv4_2"
 326 |   param {
 327 |     lr_mult: 1
 328 |     decay_mult: 1
 329 |   }
 330 |   param {
 331 |     lr_mult: 2
 332 |     decay_mult: 0
 333 |   }
 334 |   convolution_param {
 335 |     num_output: 512
 336 |     pad: 1
 337 |     kernel_size: 3
 338 |     weight_filler {
 339 |       type: "xavier"
 340 |     }
 341 |     bias_filler {
 342 |       type: "constant"
 343 |       value: 0
 344 |     }
 345 |   }
 346 | }
 347 | layer {
 348 |   name: "relu4_2"
 349 |   type: "ReLU"
 350 |   bottom: "conv4_2"
 351 |   top: "conv4_2"
 352 | }
 353 | layer {
 354 |   name: "conv4_3"
 355 |   type: "Convolution"
 356 |   bottom: "conv4_2"
 357 |   top: "conv4_3"
 358 |   param {
 359 |     lr_mult: 1
 360 |     decay_mult: 1
 361 |   }
 362 |   param {
 363 |     lr_mult: 2
 364 |     decay_mult: 0
 365 |   }
 366 |   convolution_param {
 367 |     num_output: 512
 368 |     pad: 1
 369 |     kernel_size: 3
 370 |     weight_filler {
 371 |       type: "xavier"
 372 |     }
 373 |     bias_filler {
 374 |       type: "constant"
 375 |       value: 0
 376 |     }
 377 |   }
 378 | }
 379 | layer {
 380 |   name: "relu4_3"
 381 |   type: "ReLU"
 382 |   bottom: "conv4_3"
 383 |   top: "conv4_3"
 384 | }
 385 | layer {
 386 |   name: "pool4"
 387 |   type: "Pooling"
 388 |   bottom: "conv4_3"
 389 |   top: "pool4"
 390 |   pooling_param {
 391 |     pool: MAX
 392 |     kernel_size: 3
 393 |     stride: 1
 394 |     pad: 1
 395 |   }
 396 | }
 397 | layer {
 398 |   name: "conv5_1"
 399 |   type: "Convolution"
 400 |   bottom: "pool4"
 401 |   top: "conv5_1"
 402 |   param {
 403 |     lr_mult: 1
 404 |     decay_mult: 1
 405 |   }
 406 |   param {
 407 |     lr_mult: 2
 408 |     decay_mult: 0
 409 |   }
 410 |   convolution_param {
 411 |     num_output: 512
 412 |     pad: 2
 413 |     kernel_size: 3
 414 |     weight_filler {
 415 |       type: "xavier"
 416 |     }
 417 |     bias_filler {
 418 |       type: "constant"
 419 |       value: 0
 420 |     }
 421 |     dilation: 2
 422 |   }
 423 | }
 424 | layer {
 425 |   name: "relu5_1"
 426 |   type: "ReLU"
 427 |   bottom: "conv5_1"
 428 |   top: "conv5_1"
 429 | }
 430 | layer {
 431 |   name: "conv5_2"
 432 |   type: "Convolution"
 433 |   bottom: "conv5_1"
 434 |   top: "conv5_2"
 435 |   param {
 436 |     lr_mult: 1
 437 |     decay_mult: 1
 438 |   }
 439 |   param {
 440 |     lr_mult: 2
 441 |     decay_mult: 0
 442 |   }
 443 |   convolution_param {
 444 |     num_output: 512
 445 |     pad: 2
 446 |     kernel_size: 3
 447 |     weight_filler {
 448 |       type: "xavier"
 449 |     }
 450 |     bias_filler {
 451 |       type: "constant"
 452 |       value: 0
 453 |     }
 454 |     dilation: 2
 455 |   }
 456 | }
 457 | layer {
 458 |   name: "relu5_2"
 459 |   type: "ReLU"
 460 |   bottom: "conv5_2"
 461 |   top: "conv5_2"
 462 | }
 463 | layer {
 464 |   name: "conv5_3"
 465 |   type: "Convolution"
 466 |   bottom: "conv5_2"
 467 |   top: "conv5_3"
 468 |   param {
 469 |     lr_mult: 1
 470 |     decay_mult: 1
 471 |   }
 472 |   param {
 473 |     lr_mult: 2
 474 |     decay_mult: 0
 475 |   }
 476 |   convolution_param {
 477 |     num_output: 512
 478 |     pad: 2
 479 |     kernel_size: 3
 480 |     weight_filler {
 481 |       type: "xavier"
 482 |     }
 483 |     bias_filler {
 484 |       type: "constant"
 485 |       value: 0
 486 |     }
 487 |     dilation: 2
 488 |   }
 489 | }
 490 | layer {
 491 |   name: "relu5_3"
 492 |   type: "ReLU"
 493 |   bottom: "conv5_3"
 494 |   top: "conv5_3"
 495 | }
 496 | layer {
 497 |   name: "pool5"
 498 |   type: "Pooling"
 499 |   bottom: "conv5_3"
 500 |   top: "pool5"
 501 |   pooling_param {
 502 |     pool: MAX
 503 |     kernel_size: 3
 504 |     stride: 1
 505 |     pad: 1
 506 |   }
 507 | }
 508 | layer {
 509 |   name: "fc6"
 510 |   type: "Convolution"
 511 |   bottom: "pool5"
 512 |   top: "fc6"
 513 |   param {
 514 |     lr_mult: 1
 515 |     decay_mult: 1
 516 |   }
 517 |   param {
 518 |     lr_mult: 2
 519 |     decay_mult: 0
 520 |   }
 521 |   convolution_param {
 522 |     num_output: 1024
 523 |     pad: 6
 524 |     kernel_size: 3
 525 |     weight_filler {
 526 |       type: "xavier"
 527 |     }
 528 |     bias_filler {
 529 |       type: "constant"
 530 |       value: 0
 531 |     }
 532 |     dilation: 6
 533 |   }
 534 | }
 535 | layer {
 536 |   name: "relu6"
 537 |   type: "ReLU"
 538 |   bottom: "fc6"
 539 |   top: "fc6"
 540 | }
 541 | layer {
 542 |   name: "fc7"
 543 |   type: "Convolution"
 544 |   bottom: "fc6"
 545 |   top: "fc7"
 546 |   param {
 547 |     lr_mult: 1
 548 |     decay_mult: 1
 549 |   }
 550 |   param {
 551 |     lr_mult: 2
 552 |     decay_mult: 0
 553 |   }
 554 |   convolution_param {
 555 |     num_output: 1024
 556 |     kernel_size: 1
 557 |     weight_filler {
 558 |       type: "xavier"
 559 |     }
 560 |     bias_filler {
 561 |       type: "constant"
 562 |       value: 0
 563 |     }
 564 |   }
 565 | }
 566 | layer {
 567 |   name: "relu7"
 568 |   type: "ReLU"
 569 |   bottom: "fc7"
 570 |   top: "fc7"
 571 | }
 572 | layer {
 573 |   name: "conv6_1"
 574 |   type: "Convolution"
 575 |   bottom: "fc7"
 576 |   top: "conv6_1"
 577 |   param {
 578 |     lr_mult: 1
 579 |     decay_mult: 1
 580 |   }
 581 |   param {
 582 |     lr_mult: 2
 583 |     decay_mult: 0
 584 |   }
 585 |   convolution_param {
 586 |     num_output: 256
 587 |     pad: 0
 588 |     kernel_size: 1
 589 |     stride: 1
 590 |     weight_filler {
 591 |       type: "xavier"
 592 |     }
 593 |     bias_filler {
 594 |       type: "constant"
 595 |       value: 0
 596 |     }
 597 |   }
 598 | }
 599 | layer {
 600 |   name: "conv6_1_relu"
 601 |   type: "ReLU"
 602 |   bottom: "conv6_1"
 603 |   top: "conv6_1"
 604 | }
 605 | layer {
 606 |   name: "conv6_2"
 607 |   type: "Convolution"
 608 |   bottom: "conv6_1"
 609 |   top: "conv6_2"
 610 |   param {
 611 |     lr_mult: 1
 612 |     decay_mult: 1
 613 |   }
 614 |   param {
 615 |     lr_mult: 2
 616 |     decay_mult: 0
 617 |   }
 618 |   convolution_param {
 619 |     num_output: 512
 620 |     pad: 6
 621 |     kernel_size: 3
 622 |     stride: 1
 623 |     weight_filler {
 624 |       type: "xavier"
 625 |     }
 626 |     bias_filler {
 627 |       type: "constant"
 628 |       value: 0
 629 |     }
 630 |     dilation: 6
 631 |   }
 632 | }
 633 | layer {
 634 |   name: "conv6_2_relu"
 635 |   type: "ReLU"
 636 |   bottom: "conv6_2"
 637 |   top: "conv6_2"
 638 | }
 639 | layer {
 640 |   name: "conv7_1"
 641 |   type: "Convolution"
 642 |   bottom: "conv6_2"
 643 |   top: "conv7_1"
 644 |   param {
 645 |     lr_mult: 1
 646 |     decay_mult: 1
 647 |   }
 648 |   param {
 649 |     lr_mult: 2
 650 |     decay_mult: 0
 651 |   }
 652 |   convolution_param {
 653 |     num_output: 128
 654 |     pad: 0
 655 |     kernel_size: 1
 656 |     stride: 1
 657 |     weight_filler {
 658 |       type: "xavier"
 659 |     }
 660 |     bias_filler {
 661 |       type: "constant"
 662 |       value: 0
 663 |     }
 664 |   }
 665 | }
 666 | layer {
 667 |   name: "conv7_1_relu"
 668 |   type: "ReLU"
 669 |   bottom: "conv7_1"
 670 |   top: "conv7_1"
 671 | }
 672 | layer {
 673 |   name: "conv7_2"
 674 |   type: "Convolution"
 675 |   bottom: "conv7_1"
 676 |   top: "conv7_2"
 677 |   param {
 678 |     lr_mult: 1
 679 |     decay_mult: 1
 680 |   }
 681 |   param {
 682 |     lr_mult: 2
 683 |     decay_mult: 0
 684 |   }
 685 |   convolution_param {
 686 |     num_output: 256
 687 |     pad: 1
 688 |     kernel_size: 3
 689 |     stride: 2
 690 |     weight_filler {
 691 |       type: "xavier"
 692 |     }
 693 |     bias_filler {
 694 |       type: "constant"
 695 |       value: 0
 696 |     }
 697 |   }
 698 | }
 699 | layer {
 700 |   name: "conv7_2_relu"
 701 |   type: "ReLU"
 702 |   bottom: "conv7_2"
 703 |   top: "conv7_2"
 704 | }
 705 | layer {
 706 |   name: "conv8_1"
 707 |   type: "Convolution"
 708 |   bottom: "conv7_2"
 709 |   top: "conv8_1"
 710 |   param {
 711 |     lr_mult: 1
 712 |     decay_mult: 1
 713 |   }
 714 |   param {
 715 |     lr_mult: 2
 716 |     decay_mult: 0
 717 |   }
 718 |   convolution_param {
 719 |     num_output: 128
 720 |     pad: 0
 721 |     kernel_size: 1
 722 |     stride: 1
 723 |     weight_filler {
 724 |       type: "xavier"
 725 |     }
 726 |     bias_filler {
 727 |       type: "constant"
 728 |       value: 0
 729 |     }
 730 |   }
 731 | }
 732 | layer {
 733 |   name: "conv8_1_relu"
 734 |   type: "ReLU"
 735 |   bottom: "conv8_1"
 736 |   top: "conv8_1"
 737 | }
 738 | layer {
 739 |   name: "conv8_2"
 740 |   type: "Convolution"
 741 |   bottom: "conv8_1"
 742 |   top: "conv8_2"
 743 |   param {
 744 |     lr_mult: 1
 745 |     decay_mult: 1
 746 |   }
 747 |   param {
 748 |     lr_mult: 2
 749 |     decay_mult: 0
 750 |   }
 751 |   convolution_param {
 752 |     num_output: 256
 753 |     pad: 1
 754 |     kernel_size: 3
 755 |     stride: 2
 756 |     weight_filler {
 757 |       type: "xavier"
 758 |     }
 759 |     bias_filler {
 760 |       type: "constant"
 761 |       value: 0
 762 |     }
 763 |   }
 764 | }
 765 | layer {
 766 |   name: "conv8_2_relu"
 767 |   type: "ReLU"
 768 |   bottom: "conv8_2"
 769 |   top: "conv8_2"
 770 | }
 771 | layer {
 772 |   name: "fc7_norm"
 773 |   type: "Normalize"
 774 |   bottom: "fc7"
 775 |   top: "fc7_norm"
 776 |   norm_param {
 777 |     across_spatial: false
 778 |     scale_filler {
 779 |       type: "constant"
 780 |       value: 20
 781 |     }
 782 |     channel_shared: false
 783 |   }
 784 | }
 785 | layer {
 786 |   name: "fc7_norm_mbox_loc"
 787 |   type: "Convolution"
 788 |   bottom: "fc7_norm"
 789 |   top: "fc7_norm_mbox_loc"
 790 |   param {
 791 |     lr_mult: 1
 792 |     decay_mult: 1
 793 |   }
 794 |   param {
 795 |     lr_mult: 2
 796 |     decay_mult: 0
 797 |   }
 798 |   convolution_param {
 799 |     num_output: 40
 800 |     pad: 1
 801 |     kernel_size: 3
 802 |     stride: 1
 803 |     weight_filler {
 804 |       type: "xavier"
 805 |     }
 806 |     bias_filler {
 807 |       type: "constant"
 808 |       value: 0
 809 |     }
 810 |   }
 811 | }
 812 | layer {
 813 |   name: "fc7_norm_mbox_loc_perm"
 814 |   type: "Permute"
 815 |   bottom: "fc7_norm_mbox_loc"
 816 |   top: "fc7_norm_mbox_loc_perm"
 817 |   permute_param {
 818 |     order: 0
 819 |     order: 2
 820 |     order: 3
 821 |     order: 1
 822 |   }
 823 | }
 824 | layer {
 825 |   name: "fc7_norm_mbox_loc_flat"
 826 |   type: "Flatten"
 827 |   bottom: "fc7_norm_mbox_loc_perm"
 828 |   top: "fc7_norm_mbox_loc_flat"
 829 |   flatten_param {
 830 |     axis: 1
 831 |   }
 832 | }
 833 | layer {
 834 |   name: "fc7_norm_mbox_conf_new"
 835 |   type: "Convolution"
 836 |   bottom: "fc7_norm"
 837 |   top: "fc7_norm_mbox_conf"
 838 |   param {
 839 |     lr_mult: 1
 840 |     decay_mult: 1
 841 |   }
 842 |   param {
 843 |     lr_mult: 2
 844 |     decay_mult: 0
 845 |   }
 846 |   convolution_param {
 847 |     num_output: 60
 848 |     pad: 1
 849 |     kernel_size: 3
 850 |     stride: 1
 851 |     weight_filler {
 852 |       type: "xavier"
 853 |     }
 854 |     bias_filler {
 855 |       type: "constant"
 856 |       value: 0
 857 |     }
 858 |   }
 859 | }
 860 | layer {
 861 |   name: "fc7_norm_mbox_conf_perm"
 862 |   type: "Permute"
 863 |   bottom: "fc7_norm_mbox_conf"
 864 |   top: "fc7_norm_mbox_conf_perm"
 865 |   permute_param {
 866 |     order: 0
 867 |     order: 2
 868 |     order: 3
 869 |     order: 1
 870 |   }
 871 | }
 872 | layer {
 873 |   name: "fc7_norm_mbox_conf_flat"
 874 |   type: "Flatten"
 875 |   bottom: "fc7_norm_mbox_conf_perm"
 876 |   top: "fc7_norm_mbox_conf_flat"
 877 |   flatten_param {
 878 |     axis: 1
 879 |   }
 880 | }
 881 | layer {
 882 |   name: "fc7_norm_mbox_priorbox"
 883 |   type: "PriorBox"
 884 |   bottom: "fc7_norm"
 885 |   bottom: "data"
 886 |   top: "fc7_norm_mbox_priorbox"
 887 |   prior_box_param {
 888 |     min_size: 12
 889 |     max_size: 24
 890 |     aspect_ratio: 2
 891 |     aspect_ratio: 3
 892 |     aspect_ratio: 4
 893 |     aspect_ratio: 5
 894 |     flip: true
 895 |     clip: false
 896 |     variance: 0.1
 897 |     variance: 0.1
 898 |     variance: 0.2
 899 |     variance: 0.2
 900 |     step: 8
 901 |     offset: 0.5
 902 |   }
 903 | }
 904 | layer {
 905 |   name: "conv6_2_mbox_loc"
 906 |   type: "Convolution"
 907 |   bottom: "conv6_2"
 908 |   top: "conv6_2_mbox_loc"
 909 |   param {
 910 |     lr_mult: 1
 911 |     decay_mult: 1
 912 |   }
 913 |   param {
 914 |     lr_mult: 2
 915 |     decay_mult: 0
 916 |   }
 917 |   convolution_param {
 918 |     num_output: 40
 919 |     pad: 1
 920 |     kernel_size: 3
 921 |     stride: 1
 922 |     weight_filler {
 923 |       type: "xavier"
 924 |     }
 925 |     bias_filler {
 926 |       type: "constant"
 927 |       value: 0
 928 |     }
 929 |   }
 930 | }
 931 | layer {
 932 |   name: "conv6_2_mbox_loc_perm"
 933 |   type: "Permute"
 934 |   bottom: "conv6_2_mbox_loc"
 935 |   top: "conv6_2_mbox_loc_perm"
 936 |   permute_param {
 937 |     order: 0
 938 |     order: 2
 939 |     order: 3
 940 |     order: 1
 941 |   }
 942 | }
 943 | layer {
 944 |   name: "conv6_2_mbox_loc_flat"
 945 |   type: "Flatten"
 946 |   bottom: "conv6_2_mbox_loc_perm"
 947 |   top: "conv6_2_mbox_loc_flat"
 948 |   flatten_param {
 949 |     axis: 1
 950 |   }
 951 | }
 952 | layer {
 953 |   name: "conv6_2_mbox_conf"
 954 |   type: "Convolution"
 955 |   bottom: "conv6_2"
 956 |   top: "conv6_2_mbox_conf"
 957 |   param {
 958 |     lr_mult: 1
 959 |     decay_mult: 1
 960 |   }
 961 |   param {
 962 |     lr_mult: 2
 963 |     decay_mult: 0
 964 |   }
 965 |   convolution_param {
 966 |     num_output: 60
 967 |     pad: 1
 968 |     kernel_size: 3
 969 |     stride: 1
 970 |     weight_filler {
 971 |       type: "xavier"
 972 |     }
 973 |     bias_filler {
 974 |       type: "constant"
 975 |       value: 0
 976 |     }
 977 |   }
 978 | }
 979 | layer {
 980 |   name: "conv6_2_mbox_conf_perm"
 981 |   type: "Permute"
 982 |   bottom: "conv6_2_mbox_conf"
 983 |   top: "conv6_2_mbox_conf_perm"
 984 |   permute_param {
 985 |     order: 0
 986 |     order: 2
 987 |     order: 3
 988 |     order: 1
 989 |   }
 990 | }
 991 | layer {
 992 |   name: "conv6_2_mbox_conf_flat"
 993 |   type: "Flatten"
 994 |   bottom: "conv6_2_mbox_conf_perm"
 995 |   top: "conv6_2_mbox_conf_flat"
 996 |   flatten_param {
 997 |     axis: 1
 998 |   }
 999 | }
1000 | layer {
1001 |   name: "conv6_2_mbox_priorbox"
1002 |   type: "PriorBox"
1003 |   bottom: "conv6_2"
1004 |   bottom: "data"
1005 |   top: "conv6_2_mbox_priorbox"
1006 |   prior_box_param {
1007 |     min_size: 24
1008 |     max_size: 36
1009 |     aspect_ratio: 2
1010 |     aspect_ratio: 3
1011 |     aspect_ratio: 4
1012 |     aspect_ratio: 5
1013 |     flip: true
1014 |     clip: false
1015 |     variance: 0.1
1016 |     variance: 0.1
1017 |     variance: 0.2
1018 |     variance: 0.2
1019 |     step: 8
1020 |     offset: 0.5
1021 |   }
1022 | }
1023 | layer {
1024 |   name: "conv7_2_mbox_loc"
1025 |   type: "Convolution"
1026 |   bottom: "conv7_2"
1027 |   top: "conv7_2_mbox_loc"
1028 |   param {
1029 |     lr_mult: 1
1030 |     decay_mult: 1
1031 |   }
1032 |   param {
1033 |     lr_mult: 2
1034 |     decay_mult: 0
1035 |   }
1036 |   convolution_param {
1037 |     num_output: 40
1038 |     pad: 1
1039 |     kernel_size: 3
1040 |     stride: 1
1041 |     weight_filler {
1042 |       type: "xavier"
1043 |     }
1044 |     bias_filler {
1045 |       type: "constant"
1046 |       value: 0
1047 |     }
1048 |   }
1049 | }
1050 | layer {
1051 |   name: "conv7_2_mbox_loc_perm"
1052 |   type: "Permute"
1053 |   bottom: "conv7_2_mbox_loc"
1054 |   top: "conv7_2_mbox_loc_perm"
1055 |   permute_param {
1056 |     order: 0
1057 |     order: 2
1058 |     order: 3
1059 |     order: 1
1060 |   }
1061 | }
1062 | layer {
1063 |   name: "conv7_2_mbox_loc_flat"
1064 |   type: "Flatten"
1065 |   bottom: "conv7_2_mbox_loc_perm"
1066 |   top: "conv7_2_mbox_loc_flat"
1067 |   flatten_param {
1068 |     axis: 1
1069 |   }
1070 | }
1071 | layer {
1072 |   name: "conv7_2_mbox_conf"
1073 |   type: "Convolution"
1074 |   bottom: "conv7_2"
1075 |   top: "conv7_2_mbox_conf"
1076 |   param {
1077 |     lr_mult: 1
1078 |     decay_mult: 1
1079 |   }
1080 |   param {
1081 |     lr_mult: 2
1082 |     decay_mult: 0
1083 |   }
1084 |   convolution_param {
1085 |     num_output: 60
1086 |     pad: 1
1087 |     kernel_size: 3
1088 |     stride: 1
1089 |     weight_filler {
1090 |       type: "xavier"
1091 |     }
1092 |     bias_filler {
1093 |       type: "constant"
1094 |       value: 0
1095 |     }
1096 |   }
1097 | }
1098 | layer {
1099 |   name: "conv7_2_mbox_conf_perm"
1100 |   type: "Permute"
1101 |   bottom: "conv7_2_mbox_conf"
1102 |   top: "conv7_2_mbox_conf_perm"
1103 |   permute_param {
1104 |     order: 0
1105 |     order: 2
1106 |     order: 3
1107 |     order: 1
1108 |   }
1109 | }
1110 | layer {
1111 |   name: "conv7_2_mbox_conf_flat"
1112 |   type: "Flatten"
1113 |   bottom: "conv7_2_mbox_conf_perm"
1114 |   top: "conv7_2_mbox_conf_flat"
1115 |   flatten_param {
1116 |     axis: 1
1117 |   }
1118 | }
1119 | layer {
1120 |   name: "conv7_2_mbox_priorbox"
1121 |   type: "PriorBox"
1122 |   bottom: "conv7_2"
1123 |   bottom: "data"
1124 |   top: "conv7_2_mbox_priorbox"
1125 |   prior_box_param {
1126 |     min_size: 36
1127 |     max_size: 48
1128 |     aspect_ratio: 2
1129 |     aspect_ratio: 3
1130 |     aspect_ratio: 4
1131 |     aspect_ratio: 5
1132 |     flip: true
1133 |     clip: false
1134 |     variance: 0.1
1135 |     variance: 0.1
1136 |     variance: 0.2
1137 |     variance: 0.2
1138 |     step: 16
1139 |     offset: 0.5
1140 |   }
1141 | }
1142 | layer {
1143 |   name: "conv8_2_mbox_loc"
1144 |   type: "Convolution"
1145 |   bottom: "conv8_2"
1146 |   top: "conv8_2_mbox_loc"
1147 |   param {
1148 |     lr_mult: 1
1149 |     decay_mult: 1
1150 |   }
1151 |   param {
1152 |     lr_mult: 2
1153 |     decay_mult: 0
1154 |   }
1155 |   convolution_param {
1156 |     num_output: 24
1157 |     pad: 1
1158 |     kernel_size: 3
1159 |     stride: 1
1160 |     weight_filler {
1161 |       type: "xavier"
1162 |     }
1163 |     bias_filler {
1164 |       type: "constant"
1165 |       value: 0
1166 |     }
1167 |   }
1168 | }
1169 | layer {
1170 |   name: "conv8_2_mbox_loc_perm"
1171 |   type: "Permute"
1172 |   bottom: "conv8_2_mbox_loc"
1173 |   top: "conv8_2_mbox_loc_perm"
1174 |   permute_param {
1175 |     order: 0
1176 |     order: 2
1177 |     order: 3
1178 |     order: 1
1179 |   }
1180 | }
1181 | layer {
1182 |   name: "conv8_2_mbox_loc_flat"
1183 |   type: "Flatten"
1184 |   bottom: "conv8_2_mbox_loc_perm"
1185 |   top: "conv8_2_mbox_loc_flat"
1186 |   flatten_param {
1187 |     axis: 1
1188 |   }
1189 | }
1190 | layer {
1191 |   name: "conv8_2_mbox_conf"
1192 |   type: "Convolution"
1193 |   bottom: "conv8_2"
1194 |   top: "conv8_2_mbox_conf"
1195 |   param {
1196 |     lr_mult: 1
1197 |     decay_mult: 1
1198 |   }
1199 |   param {
1200 |     lr_mult: 2
1201 |     decay_mult: 0
1202 |   }
1203 |   convolution_param {
1204 |     num_output: 36
1205 |     pad: 1
1206 |     kernel_size: 3
1207 |     stride: 1
1208 |     weight_filler {
1209 |       type: "xavier"
1210 |     }
1211 |     bias_filler {
1212 |       type: "constant"
1213 |       value: 0
1214 |     }
1215 |   }
1216 | }
1217 | layer {
1218 |   name: "conv8_2_mbox_conf_perm"
1219 |   type: "Permute"
1220 |   bottom: "conv8_2_mbox_conf"
1221 |   top: "conv8_2_mbox_conf_perm"
1222 |   permute_param {
1223 |     order: 0
1224 |     order: 2
1225 |     order: 3
1226 |     order: 1
1227 |   }
1228 | }
1229 | layer {
1230 |   name: "conv8_2_mbox_conf_flat"
1231 |   type: "Flatten"
1232 |   bottom: "conv8_2_mbox_conf_perm"
1233 |   top: "conv8_2_mbox_conf_flat"
1234 |   flatten_param {
1235 |     axis: 1
1236 |   }
1237 | }
1238 | layer {
1239 |   name: "conv8_2_mbox_priorbox"
1240 |   type: "PriorBox"
1241 |   bottom: "conv8_2"
1242 |   bottom: "data"
1243 |   top: "conv8_2_mbox_priorbox"
1244 |   prior_box_param {
1245 |     min_size: 48
1246 |     max_size: 60
1247 |     aspect_ratio: 2
1248 |     aspect_ratio: 3
1249 |     flip: true
1250 |     clip: false
1251 |     variance: 0.1
1252 |     variance: 0.1
1253 |     variance: 0.2
1254 |     variance: 0.2
1255 |     step: 30
1256 |     offset: 0.5
1257 |   }
1258 | }
1259 | layer {
1260 |   name: "mbox_loc"
1261 |   type: "Concat"
1262 |   bottom: "fc7_norm_mbox_loc_flat"
1263 |   bottom: "conv6_2_mbox_loc_flat"
1264 |   bottom: "conv7_2_mbox_loc_flat"
1265 |   bottom: "conv8_2_mbox_loc_flat"
1266 |   top: "mbox_loc"
1267 |   concat_param {
1268 |     axis: 1
1269 |   }
1270 | }
1271 | layer {
1272 |   name: "mbox_conf"
1273 |   type: "Concat"
1274 |   bottom: "fc7_norm_mbox_conf_flat"
1275 |   bottom: "conv6_2_mbox_conf_flat"
1276 |   bottom: "conv7_2_mbox_conf_flat"
1277 |   bottom: "conv8_2_mbox_conf_flat"
1278 |   top: "mbox_conf"
1279 |   concat_param {
1280 |     axis: 1
1281 |   }
1282 | }
1283 | layer {
1284 |   name: "mbox_priorbox"
1285 |   type: "Concat"
1286 |   bottom: "fc7_norm_mbox_priorbox"
1287 |   bottom: "conv6_2_mbox_priorbox"
1288 |   bottom: "conv7_2_mbox_priorbox"
1289 |   bottom: "conv8_2_mbox_priorbox"
1290 |   top: "mbox_priorbox"
1291 |   concat_param {
1292 |     axis: 2
1293 |   }
1294 | }
1295 | layer {
1296 |   name: "mbox_conf_reshape"
1297 |   type: "Reshape"
1298 |   bottom: "mbox_conf"
1299 |   top: "mbox_conf_reshape"
1300 |   reshape_param {
1301 |     shape {
1302 |       dim: 0
1303 |       dim: -1
1304 |       dim: 6
1305 |     }
1306 |   }
1307 | }
1308 | layer {
1309 |   name: "mbox_conf_softmax"
1310 |   type: "Softmax"
1311 |   bottom: "mbox_conf_reshape"
1312 |   top: "mbox_conf_softmax"
1313 |   softmax_param {
1314 |     axis: 2
1315 |   }
1316 | }
1317 | layer {
1318 |   name: "mbox_conf_flatten"
1319 |   type: "Flatten"
1320 |   bottom: "mbox_conf_softmax"
1321 |   top: "mbox_conf_flatten"
1322 |   flatten_param {
1323 |     axis: 1
1324 |   }
1325 | }
1326 | layer {
1327 |   name: "detection_out"
1328 |   type: "DetectionOutput"
1329 |   bottom: "mbox_loc"
1330 |   bottom: "mbox_conf_flatten"
1331 |   bottom: "mbox_priorbox"
1332 |   top: "detection_out"
1333 |   include {
1334 |     phase: TEST
1335 |   }
1336 |   detection_output_param {
1337 |     num_classes: 6
1338 |     share_location: true
1339 |     background_label_id: 0
1340 |     nms_param {
1341 |       nms_threshold: 0.2
1342 |       top_k: 800
1343 |     }
1344 |     code_type: CENTER_SIZE
1345 |     keep_top_k: 200
1346 |     confidence_threshold: 0.01
1347 |   }
1348 | }
1349 | layer {
1350 |   name: "detection_eval"
1351 |   type: "DetectionEvaluate"
1352 |   bottom: "detection_out"
1353 |   bottom: "label"
1354 |   top: "detection_eval"
1355 |   include {
1356 |     phase: TEST
1357 |   }
1358 |   detection_evaluate_param {
1359 |     num_classes: 6
1360 |     background_label_id: 0
1361 |     overlap_threshold: 0.5
1362 |     evaluate_difficult_gt: false
1363 |     name_size_file: "/VOC0712/test_name_size.txt"
1364 |   }
1365 | }
1366 | 


--------------------------------------------------------------------------------
/center_loss_train.prototxt:
--------------------------------------------------------------------------------
   1 | #center_loss_ssd
   2 | layer {
   3 |   name: "data"
   4 |   type: "AnnotatedData"
   5 |   top: "data"
   6 |   top: "label"
   7 |   include {
   8 |     phase: TRAIN
   9 |   }
  10 |   transform_param {
  11 |     mirror: true
  12 |     mean_value: 28
  13 |     mean_value: 28
  14 |     mean_value: 28
  15 |     resize_param {
  16 |       prob: 1
  17 |       resize_mode: WARP
  18 |       height: 300
  19 |       width: 300
  20 |       interp_mode: LINEAR
  21 |       interp_mode: AREA
  22 |       interp_mode: NEAREST
  23 |       interp_mode: CUBIC
  24 |       interp_mode: LANCZOS4
  25 |     }
  26 |     emit_constraint {
  27 |       emit_type: CENTER
  28 |     }
  29 |     distort_param {
  30 |       brightness_prob: 0.8
  31 |       brightness_delta: 32
  32 |       contrast_prob: 0.5
  33 |       contrast_lower: 0.5
  34 |       contrast_upper: 1.5
  35 |       hue_prob: 0.5
  36 |       hue_delta: 18
  37 |       saturation_prob: 0.5
  38 |       saturation_lower: 0.5
  39 |       saturation_upper: 1.5
  40 |       random_order_prob: 0
  41 |     }
  42 |     expand_param {
  43 |       prob: 0.5
  44 |       max_expand_ratio: 2
  45 |     }
  46 |   }
  47 |   data_param {
  48 |     source: "/VOC0712/lmdb/VOC0712_trainval_lmdb/"
  49 |     batch_size: 16
  50 |     backend: LMDB
  51 |   }
  52 |   annotated_data_param {
  53 |     batch_sampler {
  54 |       max_sample: 1
  55 |       max_trials: 1
  56 |     }
  57 |     batch_sampler {
  58 |       sampler {
  59 |         min_scale: 0.3
  60 |         max_scale: 1
  61 |         min_aspect_ratio: 0.5
  62 |         max_aspect_ratio: 2
  63 |       }
  64 |       sample_constraint {
  65 |         min_jaccard_overlap: 0.1
  66 |       }
  67 |       max_sample: 1
  68 |       max_trials: 50
  69 |     }
  70 |     batch_sampler {
  71 |       sampler {
  72 |         min_scale: 0.3
  73 |         max_scale: 1
  74 |         min_aspect_ratio: 0.5
  75 |         max_aspect_ratio: 2
  76 |       }
  77 |       sample_constraint {
  78 |         min_jaccard_overlap: 0.3
  79 |       }
  80 |       max_sample: 1
  81 |       max_trials: 50
  82 |     }
  83 |     batch_sampler {
  84 |       sampler {
  85 |         min_scale: 0.3
  86 |         max_scale: 1
  87 |         min_aspect_ratio: 0.5
  88 |         max_aspect_ratio: 2
  89 |       }
  90 |       sample_constraint {
  91 |         min_jaccard_overlap: 0.5
  92 |       }
  93 |       max_sample: 1
  94 |       max_trials: 50
  95 |     }
  96 |     batch_sampler {
  97 |       sampler {
  98 |         min_scale: 0.3
  99 |         max_scale: 1
 100 |         min_aspect_ratio: 0.5
 101 |         max_aspect_ratio: 2
 102 |       }
 103 |       sample_constraint {
 104 |         min_jaccard_overlap: 0.7
 105 |       }
 106 |       max_sample: 1
 107 |       max_trials: 50
 108 |     }
 109 |     batch_sampler {
 110 |       sampler {
 111 |         min_scale: 0.3
 112 |         max_scale: 1
 113 |         min_aspect_ratio: 0.5
 114 |         max_aspect_ratio: 2
 115 |       }
 116 |       sample_constraint {
 117 |         min_jaccard_overlap: 0.9
 118 |       }
 119 |       max_sample: 1
 120 |       max_trials: 50
 121 |     }
 122 |     batch_sampler {
 123 |       sampler {
 124 |         min_scale: 0.3
 125 |         max_scale: 1
 126 |         min_aspect_ratio: 0.5
 127 |         max_aspect_ratio: 2
 128 |       }
 129 |       sample_constraint {
 130 |         max_jaccard_overlap: 1
 131 |       }
 132 |       max_sample: 1
 133 |       max_trials: 50
 134 |     }
 135 |     label_map_file: "/VOC0712/labelmap_voc.prototxt"
 136 |   }
 137 | }
 138 | layer {
 139 |   name: "conv1_1"
 140 |   type: "Convolution"
 141 |   bottom: "data"
 142 |   top: "conv1_1"
 143 |   param {
 144 |     lr_mult: 1
 145 |     decay_mult: 1
 146 |   }
 147 |   param {
 148 |     lr_mult: 2
 149 |     decay_mult: 0
 150 |   }
 151 |   convolution_param {
 152 |     num_output: 64
 153 |     pad: 1
 154 |     kernel_size: 3
 155 |     weight_filler {
 156 |       type: "xavier"
 157 |     }
 158 |     bias_filler {
 159 |       type: "constant"
 160 |       value: 0
 161 |     }
 162 |   }
 163 | }
 164 | layer {
 165 |   name: "relu1_1"
 166 |   type: "ReLU"
 167 |   bottom: "conv1_1"
 168 |   top: "conv1_1"
 169 | }
 170 | layer {
 171 |   name: "conv1_2"
 172 |   type: "Convolution"
 173 |   bottom: "conv1_1"
 174 |   top: "conv1_2"
 175 |   param {
 176 |     lr_mult: 1
 177 |     decay_mult: 1
 178 |   }
 179 |   param {
 180 |     lr_mult: 2
 181 |     decay_mult: 0
 182 |   }
 183 |   convolution_param {
 184 |     num_output: 64
 185 |     pad: 1
 186 |     kernel_size: 3
 187 |     weight_filler {
 188 |       type: "xavier"
 189 |     }
 190 |     bias_filler {
 191 |       type: "constant"
 192 |       value: 0
 193 |     }
 194 |   }
 195 | }
 196 | layer {
 197 |   name: "relu1_2"
 198 |   type: "ReLU"
 199 |   bottom: "conv1_2"
 200 |   top: "conv1_2"
 201 | }
 202 | layer {
 203 |   name: "pool1"
 204 |   type: "Pooling"
 205 |   bottom: "conv1_2"
 206 |   top: "pool1"
 207 |   pooling_param {
 208 |     pool: MAX
 209 |     kernel_size: 2
 210 |     stride: 2
 211 |   }
 212 | }
 213 | layer {
 214 |   name: "conv2_1"
 215 |   type: "Convolution"
 216 |   bottom: "pool1"
 217 |   top: "conv2_1"
 218 |   param {
 219 |     lr_mult: 1
 220 |     decay_mult: 1
 221 |   }
 222 |   param {
 223 |     lr_mult: 2
 224 |     decay_mult: 0
 225 |   }
 226 |   convolution_param {
 227 |     num_output: 128
 228 |     pad: 1
 229 |     kernel_size: 3
 230 |     weight_filler {
 231 |       type: "xavier"
 232 |     }
 233 |     bias_filler {
 234 |       type: "constant"
 235 |       value: 0
 236 |     }
 237 |   }
 238 | }
 239 | layer {
 240 |   name: "relu2_1"
 241 |   type: "ReLU"
 242 |   bottom: "conv2_1"
 243 |   top: "conv2_1"
 244 | }
 245 | layer {
 246 |   name: "conv2_2"
 247 |   type: "Convolution"
 248 |   bottom: "conv2_1"
 249 |   top: "conv2_2"
 250 |   param {
 251 |     lr_mult: 1
 252 |     decay_mult: 1
 253 |   }
 254 |   param {
 255 |     lr_mult: 2
 256 |     decay_mult: 0
 257 |   }
 258 |   convolution_param {
 259 |     num_output: 128
 260 |     pad: 1
 261 |     kernel_size: 3
 262 |     weight_filler {
 263 |       type: "xavier"
 264 |     }
 265 |     bias_filler {
 266 |       type: "constant"
 267 |       value: 0
 268 |     }
 269 |   }
 270 | }
 271 | layer {
 272 |   name: "relu2_2"
 273 |   type: "ReLU"
 274 |   bottom: "conv2_2"
 275 |   top: "conv2_2"
 276 | }
 277 | layer {
 278 |   name: "pool2"
 279 |   type: "Pooling"
 280 |   bottom: "conv2_2"
 281 |   top: "pool2"
 282 |   pooling_param {
 283 |     pool: MAX
 284 |     kernel_size: 2
 285 |     stride: 2
 286 |   }
 287 | }
 288 | layer {
 289 |   name: "conv3_1"
 290 |   type: "Convolution"
 291 |   bottom: "pool2"
 292 |   top: "conv3_1"
 293 |   param {
 294 |     lr_mult: 1
 295 |     decay_mult: 1
 296 |   }
 297 |   param {
 298 |     lr_mult: 2
 299 |     decay_mult: 0
 300 |   }
 301 |   convolution_param {
 302 |     num_output: 256
 303 |     pad: 1
 304 |     kernel_size: 3
 305 |     weight_filler {
 306 |       type: "xavier"
 307 |     }
 308 |     bias_filler {
 309 |       type: "constant"
 310 |       value: 0
 311 |     }
 312 |   }
 313 | }
 314 | layer {
 315 |   name: "relu3_1"
 316 |   type: "ReLU"
 317 |   bottom: "conv3_1"
 318 |   top: "conv3_1"
 319 | }
 320 | layer {
 321 |   name: "conv3_2"
 322 |   type: "Convolution"
 323 |   bottom: "conv3_1"
 324 |   top: "conv3_2"
 325 |   param {
 326 |     lr_mult: 1
 327 |     decay_mult: 1
 328 |   }
 329 |   param {
 330 |     lr_mult: 2
 331 |     decay_mult: 0
 332 |   }
 333 |   convolution_param {
 334 |     num_output: 256
 335 |     pad: 1
 336 |     kernel_size: 3
 337 |     weight_filler {
 338 |       type: "xavier"
 339 |     }
 340 |     bias_filler {
 341 |       type: "constant"
 342 |       value: 0
 343 |     }
 344 |   }
 345 | }
 346 | layer {
 347 |   name: "relu3_2"
 348 |   type: "ReLU"
 349 |   bottom: "conv3_2"
 350 |   top: "conv3_2"
 351 | }
 352 | layer {
 353 |   name: "conv3_3"
 354 |   type: "Convolution"
 355 |   bottom: "conv3_2"
 356 |   top: "conv3_3"
 357 |   param {
 358 |     lr_mult: 1
 359 |     decay_mult: 1
 360 |   }
 361 |   param {
 362 |     lr_mult: 2
 363 |     decay_mult: 0
 364 |   }
 365 |   convolution_param {
 366 |     num_output: 256
 367 |     pad: 1
 368 |     kernel_size: 3
 369 |     weight_filler {
 370 |       type: "xavier"
 371 |     }
 372 |     bias_filler {
 373 |       type: "constant"
 374 |       value: 0
 375 |     }
 376 |   }
 377 | }
 378 | layer {
 379 |   name: "relu3_3"
 380 |   type: "ReLU"
 381 |   bottom: "conv3_3"
 382 |   top: "conv3_3"
 383 | }
 384 | layer {
 385 |   name: "pool3"
 386 |   type: "Pooling"
 387 |   bottom: "conv3_3"
 388 |   top: "pool3"
 389 |   pooling_param {
 390 |     pool: MAX
 391 |     kernel_size: 2
 392 |     stride: 2
 393 |   }
 394 | }
 395 | layer {
 396 |   name: "conv4_1"
 397 |   type: "Convolution"
 398 |   bottom: "pool3"
 399 |   top: "conv4_1"
 400 |   param {
 401 |     lr_mult: 1
 402 |     decay_mult: 1
 403 |   }
 404 |   param {
 405 |     lr_mult: 2
 406 |     decay_mult: 0
 407 |   }
 408 |   convolution_param {
 409 |     num_output: 512
 410 |     pad: 1
 411 |     kernel_size: 3
 412 |     weight_filler {
 413 |       type: "xavier"
 414 |     }
 415 |     bias_filler {
 416 |       type: "constant"
 417 |       value: 0
 418 |     }
 419 |   }
 420 | }
 421 | layer {
 422 |   name: "relu4_1"
 423 |   type: "ReLU"
 424 |   bottom: "conv4_1"
 425 |   top: "conv4_1"
 426 | }
 427 | layer {
 428 |   name: "conv4_2"
 429 |   type: "Convolution"
 430 |   bottom: "conv4_1"
 431 |   top: "conv4_2"
 432 |   param {
 433 |     lr_mult: 1
 434 |     decay_mult: 1
 435 |   }
 436 |   param {
 437 |     lr_mult: 2
 438 |     decay_mult: 0
 439 |   }
 440 |   convolution_param {
 441 |     num_output: 512
 442 |     pad: 1
 443 |     kernel_size: 3
 444 |     weight_filler {
 445 |       type: "xavier"
 446 |     }
 447 |     bias_filler {
 448 |       type: "constant"
 449 |       value: 0
 450 |     }
 451 |   }
 452 | }
 453 | layer {
 454 |   name: "relu4_2"
 455 |   type: "ReLU"
 456 |   bottom: "conv4_2"
 457 |   top: "conv4_2"
 458 | }
 459 | layer {
 460 |   name: "conv4_3"
 461 |   type: "Convolution"
 462 |   bottom: "conv4_2"
 463 |   top: "conv4_3"
 464 |   param {
 465 |     lr_mult: 1
 466 |     decay_mult: 1
 467 |   }
 468 |   param {
 469 |     lr_mult: 2
 470 |     decay_mult: 0
 471 |   }
 472 |   convolution_param {
 473 |     num_output: 512
 474 |     pad: 1
 475 |     kernel_size: 3
 476 |     weight_filler {
 477 |       type: "xavier"
 478 |     }
 479 |     bias_filler {
 480 |       type: "constant"
 481 |       value: 0
 482 |     }
 483 |   }
 484 | }
 485 | layer {
 486 |   name: "relu4_3"
 487 |   type: "ReLU"
 488 |   bottom: "conv4_3"
 489 |   top: "conv4_3"
 490 | }
 491 | layer {
 492 |   name: "pool4"
 493 |   type: "Pooling"
 494 |   bottom: "conv4_3"
 495 |   top: "pool4"
 496 |   pooling_param {
 497 |     pool: MAX
 498 |     kernel_size: 3
 499 |     stride: 1
 500 |     pad: 1
 501 |   }
 502 | }
 503 | layer {
 504 |   name: "conv5_1"
 505 |   type: "Convolution"
 506 |   bottom: "pool4"
 507 |   top: "conv5_1"
 508 |   param {
 509 |     lr_mult: 1
 510 |     decay_mult: 1
 511 |   }
 512 |   param {
 513 |     lr_mult: 2
 514 |     decay_mult: 0
 515 |   }
 516 |   convolution_param {
 517 |     num_output: 512
 518 |     pad: 2
 519 |     kernel_size: 3
 520 |     weight_filler {
 521 |       type: "xavier"
 522 |     }
 523 |     bias_filler {
 524 |       type: "constant"
 525 |       value: 0
 526 |     }
 527 |     dilation: 2
 528 |   }
 529 | }
 530 | layer {
 531 |   name: "relu5_1"
 532 |   type: "ReLU"
 533 |   bottom: "conv5_1"
 534 |   top: "conv5_1"
 535 | }
 536 | layer {
 537 |   name: "conv5_2"
 538 |   type: "Convolution"
 539 |   bottom: "conv5_1"
 540 |   top: "conv5_2"
 541 |   param {
 542 |     lr_mult: 1
 543 |     decay_mult: 1
 544 |   }
 545 |   param {
 546 |     lr_mult: 2
 547 |     decay_mult: 0
 548 |   }
 549 |   convolution_param {
 550 |     num_output: 512
 551 |     pad: 2
 552 |     kernel_size: 3
 553 |     weight_filler {
 554 |       type: "xavier"
 555 |     }
 556 |     bias_filler {
 557 |       type: "constant"
 558 |       value: 0
 559 |     }
 560 |     dilation: 2
 561 |   }
 562 | }
 563 | layer {
 564 |   name: "relu5_2"
 565 |   type: "ReLU"
 566 |   bottom: "conv5_2"
 567 |   top: "conv5_2"
 568 | }
 569 | layer {
 570 |   name: "conv5_3"
 571 |   type: "Convolution"
 572 |   bottom: "conv5_2"
 573 |   top: "conv5_3"
 574 |   param {
 575 |     lr_mult: 1
 576 |     decay_mult: 1
 577 |   }
 578 |   param {
 579 |     lr_mult: 2
 580 |     decay_mult: 0
 581 |   }
 582 |   convolution_param {
 583 |     num_output: 512
 584 |     pad: 2
 585 |     kernel_size: 3
 586 |     weight_filler {
 587 |       type: "xavier"
 588 |     }
 589 |     bias_filler {
 590 |       type: "constant"
 591 |       value: 0
 592 |     }
 593 |     dilation: 2
 594 |   }
 595 | }
 596 | layer {
 597 |   name: "relu5_3"
 598 |   type: "ReLU"
 599 |   bottom: "conv5_3"
 600 |   top: "conv5_3"
 601 | }
 602 | layer {
 603 |   name: "pool5"
 604 |   type: "Pooling"
 605 |   bottom: "conv5_3"
 606 |   top: "pool5"
 607 |   pooling_param {
 608 |     pool: MAX
 609 |     kernel_size: 3
 610 |     stride: 1
 611 |     pad: 1
 612 |   }
 613 | }
 614 | layer {
 615 |   name: "fc6"
 616 |   type: "Convolution"
 617 |   bottom: "pool5"
 618 |   top: "fc6"
 619 |   param {
 620 |     lr_mult: 1
 621 |     decay_mult: 1
 622 |   }
 623 |   param {
 624 |     lr_mult: 2
 625 |     decay_mult: 0
 626 |   }
 627 |   convolution_param {
 628 |     num_output: 1024
 629 |     pad: 6
 630 |     kernel_size: 3
 631 |     weight_filler {
 632 |       type: "xavier"
 633 |     }
 634 |     bias_filler {
 635 |       type: "constant"
 636 |       value: 0
 637 |     }
 638 |     dilation: 6
 639 |   }
 640 | }
 641 | layer {
 642 |   name: "relu6"
 643 |   type: "ReLU"
 644 |   bottom: "fc6"
 645 |   top: "fc6"
 646 | }
 647 | layer {
 648 |   name: "fc7"
 649 |   type: "Convolution"
 650 |   bottom: "fc6"
 651 |   top: "fc7"
 652 |   param {
 653 |     lr_mult: 1
 654 |     decay_mult: 1
 655 |   }
 656 |   param {
 657 |     lr_mult: 2
 658 |     decay_mult: 0
 659 |   }
 660 |   convolution_param {
 661 |     num_output: 1024
 662 |     kernel_size: 1
 663 |     weight_filler {
 664 |       type: "xavier"
 665 |     }
 666 |     bias_filler {
 667 |       type: "constant"
 668 |       value: 0
 669 |     }
 670 |   }
 671 | }
 672 | layer {
 673 |   name: "relu7"
 674 |   type: "ReLU"
 675 |   bottom: "fc7"
 676 |   top: "fc7"
 677 | }
 678 | layer {
 679 |   name: "conv6_1"
 680 |   type: "Convolution"
 681 |   bottom: "fc7"
 682 |   top: "conv6_1"
 683 |   param {
 684 |     lr_mult: 1
 685 |     decay_mult: 1
 686 |   }
 687 |   param {
 688 |     lr_mult: 2
 689 |     decay_mult: 0
 690 |   }
 691 |   convolution_param {
 692 |     num_output: 256
 693 |     pad: 0
 694 |     kernel_size: 1
 695 |     stride: 1
 696 |     weight_filler {
 697 |       type: "xavier"
 698 |     }
 699 |     bias_filler {
 700 |       type: "constant"
 701 |       value: 0
 702 |     }
 703 |   }
 704 | }
 705 | layer {
 706 |   name: "conv6_1_relu"
 707 |   type: "ReLU"
 708 |   bottom: "conv6_1"
 709 |   top: "conv6_1"
 710 | }
 711 | layer {
 712 |   name: "conv6_2"
 713 |   type: "Convolution"
 714 |   bottom: "conv6_1"
 715 |   top: "conv6_2"
 716 |   param {
 717 |     lr_mult: 1
 718 |     decay_mult: 1
 719 |   }
 720 |   param {
 721 |     lr_mult: 2
 722 |     decay_mult: 0
 723 |   }
 724 |   convolution_param {
 725 |     num_output: 512
 726 |     pad: 6
 727 |     kernel_size: 3
 728 |     stride: 1
 729 |     weight_filler {
 730 |       type: "xavier"
 731 |     }
 732 |     bias_filler {
 733 |       type: "constant"
 734 |       value: 0
 735 |     }
 736 |     dilation: 6
 737 |   }
 738 | }
 739 | layer {
 740 |   name: "conv6_2_relu"
 741 |   type: "ReLU"
 742 |   bottom: "conv6_2"
 743 |   top: "conv6_2"
 744 | }
 745 | layer {
 746 |   name: "conv7_1"
 747 |   type: "Convolution"
 748 |   bottom: "conv6_2"
 749 |   top: "conv7_1"
 750 |   param {
 751 |     lr_mult: 1
 752 |     decay_mult: 1
 753 |   }
 754 |   param {
 755 |     lr_mult: 2
 756 |     decay_mult: 0
 757 |   }
 758 |   convolution_param {
 759 |     num_output: 128
 760 |     pad: 0
 761 |     kernel_size: 1
 762 |     stride: 1
 763 |     weight_filler {
 764 |       type: "xavier"
 765 |     }
 766 |     bias_filler {
 767 |       type: "constant"
 768 |       value: 0
 769 |     }
 770 |   }
 771 | }
 772 | layer {
 773 |   name: "conv7_1_relu"
 774 |   type: "ReLU"
 775 |   bottom: "conv7_1"
 776 |   top: "conv7_1"
 777 | }
 778 | layer {
 779 |   name: "conv7_2"
 780 |   type: "Convolution"
 781 |   bottom: "conv7_1"
 782 |   top: "conv7_2"
 783 |   param {
 784 |     lr_mult: 1
 785 |     decay_mult: 1
 786 |   }
 787 |   param {
 788 |     lr_mult: 2
 789 |     decay_mult: 0
 790 |   }
 791 |   convolution_param {
 792 |     num_output: 256
 793 |     pad: 1
 794 |     kernel_size: 3
 795 |     stride: 2
 796 |     weight_filler {
 797 |       type: "xavier"
 798 |     }
 799 |     bias_filler {
 800 |       type: "constant"
 801 |       value: 0
 802 |     }
 803 |   }
 804 | }
 805 | layer {
 806 |   name: "conv7_2_relu"
 807 |   type: "ReLU"
 808 |   bottom: "conv7_2"
 809 |   top: "conv7_2"
 810 | }
 811 | layer {
 812 |   name: "conv8_1"
 813 |   type: "Convolution"
 814 |   bottom: "conv7_2"
 815 |   top: "conv8_1"
 816 |   param {
 817 |     lr_mult: 1
 818 |     decay_mult: 1
 819 |   }
 820 |   param {
 821 |     lr_mult: 2
 822 |     decay_mult: 0
 823 |   }
 824 |   convolution_param {
 825 |     num_output: 128
 826 |     pad: 0
 827 |     kernel_size: 1
 828 |     stride: 1
 829 |     weight_filler {
 830 |       type: "xavier"
 831 |     }
 832 |     bias_filler {
 833 |       type: "constant"
 834 |       value: 0
 835 |     }
 836 |   }
 837 | }
 838 | layer {
 839 |   name: "conv8_1_relu"
 840 |   type: "ReLU"
 841 |   bottom: "conv8_1"
 842 |   top: "conv8_1"
 843 | }
 844 | layer {
 845 |   name: "conv8_2"
 846 |   type: "Convolution"
 847 |   bottom: "conv8_1"
 848 |   top: "conv8_2"
 849 |   param {
 850 |     lr_mult: 1
 851 |     decay_mult: 1
 852 |   }
 853 |   param {
 854 |     lr_mult: 2
 855 |     decay_mult: 0
 856 |   }
 857 |   convolution_param {
 858 |     num_output: 256
 859 |     pad: 1
 860 |     kernel_size: 3
 861 |     stride: 2
 862 |     weight_filler {
 863 |       type: "xavier"
 864 |     }
 865 |     bias_filler {
 866 |       type: "constant"
 867 |       value: 0
 868 |     }
 869 |   }
 870 | }
 871 | layer {
 872 |   name: "conv8_2_relu"
 873 |   type: "ReLU"
 874 |   bottom: "conv8_2"
 875 |   top: "conv8_2"
 876 | }
 877 | layer {
 878 |   name: "fc7_norm"
 879 |   type: "Normalize"
 880 |   bottom: "fc7"
 881 |   top: "fc7_norm"
 882 |   norm_param {
 883 |     across_spatial: false
 884 |     scale_filler {
 885 |       type: "constant"
 886 |       value: 20
 887 |     }
 888 |     channel_shared: false
 889 |   }
 890 | }
 891 | layer {
 892 |   name: "fc7_norm_mbox_loc"
 893 |   type: "Convolution"
 894 |   bottom: "fc7_norm"
 895 |   top: "fc7_norm_mbox_loc"
 896 |   param {
 897 |     lr_mult: 1
 898 |     decay_mult: 1
 899 |   }
 900 |   param {
 901 |     lr_mult: 2
 902 |     decay_mult: 0
 903 |   }
 904 |   convolution_param {
 905 |     num_output: 40
 906 |     pad: 1
 907 |     kernel_size: 3
 908 |     stride: 1
 909 |     weight_filler {
 910 |       type: "xavier"
 911 |     }
 912 |     bias_filler {
 913 |       type: "constant"
 914 |       value: 0
 915 |     }
 916 |   }
 917 | }
 918 | layer {
 919 |   name: "fc7_norm_mbox_loc_perm"
 920 |   type: "Permute"
 921 |   bottom: "fc7_norm_mbox_loc"
 922 |   top: "fc7_norm_mbox_loc_perm"
 923 |   permute_param {
 924 |     order: 0
 925 |     order: 2
 926 |     order: 3
 927 |     order: 1
 928 |   }
 929 | }
 930 | layer {
 931 |   name: "fc7_norm_mbox_loc_flat"
 932 |   type: "Flatten"
 933 |   bottom: "fc7_norm_mbox_loc_perm"
 934 |   top: "fc7_norm_mbox_loc_flat"
 935 |   flatten_param {
 936 |     axis: 1
 937 |   }
 938 | }
 939 | layer {
 940 |   name: "fc7_norm_mbox_conf_new"
 941 |   type: "Convolution"
 942 |   bottom: "fc7_norm"
 943 |   top: "fc7_norm_mbox_conf"
 944 |   param {
 945 |     lr_mult: 1
 946 |     decay_mult: 1
 947 |   }
 948 |   param {
 949 |     lr_mult: 2
 950 |     decay_mult: 0
 951 |   }
 952 |   convolution_param {
 953 |     num_output: 60
 954 |     pad: 1
 955 |     kernel_size: 3
 956 |     stride: 1
 957 |     weight_filler {
 958 |       type: "xavier"
 959 |     }
 960 |     bias_filler {
 961 |       type: "constant"
 962 |       value: 0
 963 |     }
 964 |   }
 965 | }
 966 | layer {
 967 |   name: "fc7_norm_mbox_conf_perm"
 968 |   type: "Permute"
 969 |   bottom: "fc7_norm_mbox_conf"
 970 |   top: "fc7_norm_mbox_conf_perm"
 971 |   permute_param {
 972 |     order: 0
 973 |     order: 2
 974 |     order: 3
 975 |     order: 1
 976 |   }
 977 | }
 978 | layer {
 979 |   name: "fc7_norm_mbox_conf_flat"
 980 |   type: "Flatten"
 981 |   bottom: "fc7_norm_mbox_conf_perm"
 982 |   top: "fc7_norm_mbox_conf_flat"
 983 |   flatten_param {
 984 |     axis: 1
 985 |   }
 986 | }
 987 | layer {
 988 |   name: "fc7_norm_center_mbox_conf_new"
 989 |   type: "Convolution"
 990 |   bottom: "fc7_norm"
 991 |   top: "fc7_norm_center_mbox_conf"
 992 |   param {
 993 |     lr_mult: 1
 994 |     decay_mult: 1
 995 |   }
 996 |   param {
 997 |     lr_mult: 2
 998 |     decay_mult: 0
 999 |   }
1000 |   convolution_param {
1001 |     num_output: 160
1002 |     pad: 1
1003 |     kernel_size: 3
1004 |     stride: 1
1005 |     weight_filler {
1006 |       type: "xavier"
1007 |     }
1008 |     bias_filler {
1009 |       type: "constant"
1010 |       value: 0
1011 |     }
1012 |   }
1013 | }
1014 | layer {
1015 |   name: "fc7_norm_center_mbox_conf_perm"
1016 |   type: "Permute"
1017 |   bottom: "fc7_norm_center_mbox_conf"
1018 |   top: "fc7_norm_center_mbox_conf_perm"
1019 |   permute_param {
1020 |     order: 0
1021 |     order: 2
1022 |     order: 3
1023 |     order: 1
1024 |   }
1025 | }
1026 | layer {
1027 |   name: "fc7_norm_mbox_center_conf_flat"
1028 |   type: "Flatten"
1029 |   bottom: "fc7_norm_center_mbox_conf_perm"
1030 |   top: "fc7_norm_mbox_center_conf_flat"
1031 |   flatten_param {
1032 |     axis: 1
1033 |   }
1034 | }
1035 | layer {
1036 |   name: "fc7_norm_mbox_priorbox"
1037 |   type: "PriorBox"
1038 |   bottom: "fc7_norm"
1039 |   bottom: "data"
1040 |   top: "fc7_norm_mbox_priorbox"
1041 |   prior_box_param {
1042 |     min_size: 12
1043 |     max_size: 24
1044 |     aspect_ratio: 2
1045 |     aspect_ratio: 3
1046 |     aspect_ratio: 4
1047 |     aspect_ratio: 5
1048 |     flip: true
1049 |     clip: false
1050 |     variance: 0.1
1051 |     variance: 0.1
1052 |     variance: 0.2
1053 |     variance: 0.2
1054 |     step: 8
1055 |     offset: 0.5
1056 |   }
1057 | }
1058 | layer {
1059 |   name: "conv6_2_mbox_loc"
1060 |   type: "Convolution"
1061 |   bottom: "conv6_2"
1062 |   top: "conv6_2_mbox_loc"
1063 |   param {
1064 |     lr_mult: 1
1065 |     decay_mult: 1
1066 |   }
1067 |   param {
1068 |     lr_mult: 2
1069 |     decay_mult: 0
1070 |   }
1071 |   convolution_param {
1072 |     num_output: 40
1073 |     pad: 1
1074 |     kernel_size: 3
1075 |     stride: 1
1076 |     weight_filler {
1077 |       type: "xavier"
1078 |     }
1079 |     bias_filler {
1080 |       type: "constant"
1081 |       value: 0
1082 |     }
1083 |   }
1084 | }
1085 | layer {
1086 |   name: "conv6_2_mbox_loc_perm"
1087 |   type: "Permute"
1088 |   bottom: "conv6_2_mbox_loc"
1089 |   top: "conv6_2_mbox_loc_perm"
1090 |   permute_param {
1091 |     order: 0
1092 |     order: 2
1093 |     order: 3
1094 |     order: 1
1095 |   }
1096 | }
1097 | layer {
1098 |   name: "conv6_2_mbox_loc_flat"
1099 |   type: "Flatten"
1100 |   bottom: "conv6_2_mbox_loc_perm"
1101 |   top: "conv6_2_mbox_loc_flat"
1102 |   flatten_param {
1103 |     axis: 1
1104 |   }
1105 | }
1106 | layer {
1107 |   name: "conv6_2_mbox_conf"
1108 |   type: "Convolution"
1109 |   bottom: "conv6_2"
1110 |   top: "conv6_2_mbox_conf"
1111 |   param {
1112 |     lr_mult: 1
1113 |     decay_mult: 1
1114 |   }
1115 |   param {
1116 |     lr_mult: 2
1117 |     decay_mult: 0
1118 |   }
1119 |   convolution_param {
1120 |     num_output: 60
1121 |     pad: 1
1122 |     kernel_size: 3
1123 |     stride: 1
1124 |     weight_filler {
1125 |       type: "xavier"
1126 |     }
1127 |     bias_filler {
1128 |       type: "constant"
1129 |       value: 0
1130 |     }
1131 |   }
1132 | }
1133 | layer {
1134 |   name: "conv6_2_mbox_conf_perm"
1135 |   type: "Permute"
1136 |   bottom: "conv6_2_mbox_conf"
1137 |   top: "conv6_2_mbox_conf_perm"
1138 |   permute_param {
1139 |     order: 0
1140 |     order: 2
1141 |     order: 3
1142 |     order: 1
1143 |   }
1144 | }
1145 | layer {
1146 |   name: "conv6_2_mbox_conf_flat"
1147 |   type: "Flatten"
1148 |   bottom: "conv6_2_mbox_conf_perm"
1149 |   top: "conv6_2_mbox_conf_flat"
1150 |   flatten_param {
1151 |     axis: 1
1152 |   }
1153 | }
1154 | layer {
1155 |   name: "conv6_2_center_mbox_conf_new"
1156 |   type: "Convolution"
1157 |   bottom: "conv6_2"
1158 |   top: "conv6_2_center_mbox_conf"
1159 |   param {
1160 |     lr_mult: 1
1161 |     decay_mult: 1
1162 |   }
1163 |   param {
1164 |     lr_mult: 2
1165 |     decay_mult: 0
1166 |   }
1167 |   convolution_param {
1168 |     num_output: 160
1169 |     pad: 1
1170 |     kernel_size: 3
1171 |     stride: 1
1172 |     weight_filler {
1173 |       type: "xavier"
1174 |     }
1175 |     bias_filler {
1176 |       type: "constant"
1177 |       value: 0
1178 |     }
1179 |   }
1180 | }
1181 | layer {
1182 |   name: "conv6_2_center_mbox_conf_perm"
1183 |   type: "Permute"
1184 |   bottom: "conv6_2_center_mbox_conf"
1185 |   top: "conv6_2_center_mbox_conf_perm"
1186 |   permute_param {
1187 |     order: 0
1188 |     order: 2
1189 |     order: 3
1190 |     order: 1
1191 |   }
1192 | }
1193 | layer {
1194 |   name: "conv6_2_center_mbox_conf_flat"
1195 |   type: "Flatten"
1196 |   bottom: "conv6_2_center_mbox_conf_perm"
1197 |   top: "conv6_2_center_mbox_conf_flat"
1198 |   flatten_param {
1199 |     axis: 1
1200 |   }
1201 | }
1202 | layer {
1203 |   name: "conv6_2_mbox_priorbox"
1204 |   type: "PriorBox"
1205 |   bottom: "conv6_2"
1206 |   bottom: "data"
1207 |   top: "conv6_2_mbox_priorbox"
1208 |   prior_box_param {
1209 |     min_size: 24
1210 |     max_size: 36
1211 |     aspect_ratio: 2
1212 |     aspect_ratio: 3
1213 |     aspect_ratio: 4
1214 |     aspect_ratio: 5
1215 |     flip: true
1216 |     clip: false
1217 |     variance: 0.1
1218 |     variance: 0.1
1219 |     variance: 0.2
1220 |     variance: 0.2
1221 |     step: 8
1222 |     offset: 0.5
1223 |   }
1224 | }
1225 | layer {
1226 |   name: "conv7_2_mbox_loc"
1227 |   type: "Convolution"
1228 |   bottom: "conv7_2"
1229 |   top: "conv7_2_mbox_loc"
1230 |   param {
1231 |     lr_mult: 1
1232 |     decay_mult: 1
1233 |   }
1234 |   param {
1235 |     lr_mult: 2
1236 |     decay_mult: 0
1237 |   }
1238 |   convolution_param {
1239 |     num_output: 40
1240 |     pad: 1
1241 |     kernel_size: 3
1242 |     stride: 1
1243 |     weight_filler {
1244 |       type: "xavier"
1245 |     }
1246 |     bias_filler {
1247 |       type: "constant"
1248 |       value: 0
1249 |     }
1250 |   }
1251 | }
1252 | layer {
1253 |   name: "conv7_2_mbox_loc_perm"
1254 |   type: "Permute"
1255 |   bottom: "conv7_2_mbox_loc"
1256 |   top: "conv7_2_mbox_loc_perm"
1257 |   permute_param {
1258 |     order: 0
1259 |     order: 2
1260 |     order: 3
1261 |     order: 1
1262 |   }
1263 | }
1264 | layer {
1265 |   name: "conv7_2_mbox_loc_flat"
1266 |   type: "Flatten"
1267 |   bottom: "conv7_2_mbox_loc_perm"
1268 |   top: "conv7_2_mbox_loc_flat"
1269 |   flatten_param {
1270 |     axis: 1
1271 |   }
1272 | }
1273 | layer {
1274 |   name: "conv7_2_mbox_conf"
1275 |   type: "Convolution"
1276 |   bottom: "conv7_2"
1277 |   top: "conv7_2_mbox_conf"
1278 |   param {
1279 |     lr_mult: 1
1280 |     decay_mult: 1
1281 |   }
1282 |   param {
1283 |     lr_mult: 2
1284 |     decay_mult: 0
1285 |   }
1286 |   convolution_param {
1287 |     num_output: 60
1288 |     pad: 1
1289 |     kernel_size: 3
1290 |     stride: 1
1291 |     weight_filler {
1292 |       type: "xavier"
1293 |     }
1294 |     bias_filler {
1295 |       type: "constant"
1296 |       value: 0
1297 |     }
1298 |   }
1299 | }
1300 | layer {
1301 |   name: "conv7_2_mbox_conf_perm"
1302 |   type: "Permute"
1303 |   bottom: "conv7_2_mbox_conf"
1304 |   top: "conv7_2_mbox_conf_perm"
1305 |   permute_param {
1306 |     order: 0
1307 |     order: 2
1308 |     order: 3
1309 |     order: 1
1310 |   }
1311 | }
1312 | layer {
1313 |   name: "conv7_2_mbox_conf_flat"
1314 |   type: "Flatten"
1315 |   bottom: "conv7_2_mbox_conf_perm"
1316 |   top: "conv7_2_mbox_conf_flat"
1317 |   flatten_param {
1318 |     axis: 1
1319 |   }
1320 | }
1321 | layer {
1322 |   name: "conv7_2_center_mbox_conf_new"
1323 |   type: "Convolution"
1324 |   bottom: "conv7_2"
1325 |   top: "conv7_2_center_mbox_conf"
1326 |   param {
1327 |     lr_mult: 1
1328 |     decay_mult: 1
1329 |   }
1330 |   param {
1331 |     lr_mult: 2
1332 |     decay_mult: 0
1333 |   }
1334 |   convolution_param {
1335 |     num_output: 160
1336 |     pad: 1
1337 |     kernel_size: 3
1338 |     stride: 1
1339 |     weight_filler {
1340 |       type: "xavier"
1341 |     }
1342 |     bias_filler {
1343 |       type: "constant"
1344 |       value: 0
1345 |     }
1346 |   }
1347 | }
1348 | layer {
1349 |   name: "conv7_2_center_mbox_conf_perm"
1350 |   type: "Permute"
1351 |   bottom: "conv7_2_center_mbox_conf"
1352 |   top: "conv7_2_center_mbox_conf_perm"
1353 |   permute_param {
1354 |     order: 0
1355 |     order: 2
1356 |     order: 3
1357 |     order: 1
1358 |   }
1359 | }
1360 | layer {
1361 |   name: "conv7_2_center_mbox_conf_flat"
1362 |   type: "Flatten"
1363 |   bottom: "conv7_2_center_mbox_conf_perm"
1364 |   top: "conv7_2_center_mbox_conf_flat"
1365 |   flatten_param {
1366 |     axis: 1
1367 |   }
1368 | }
1369 | layer {
1370 |   name: "conv7_2_mbox_priorbox"
1371 |   type: "PriorBox"
1372 |   bottom: "conv7_2"
1373 |   bottom: "data"
1374 |   top: "conv7_2_mbox_priorbox"
1375 |   prior_box_param {
1376 |     min_size: 36
1377 |     max_size: 48
1378 |     aspect_ratio: 2
1379 |     aspect_ratio: 3
1380 |     aspect_ratio: 4
1381 |     aspect_ratio: 5
1382 |     flip: true
1383 |     clip: false
1384 |     variance: 0.1
1385 |     variance: 0.1
1386 |     variance: 0.2
1387 |     variance: 0.2
1388 |     step: 16
1389 |     offset: 0.5
1390 |   }
1391 | }
1392 | layer {
1393 |   name: "conv8_2_mbox_loc"
1394 |   type: "Convolution"
1395 |   bottom: "conv8_2"
1396 |   top: "conv8_2_mbox_loc"
1397 |   param {
1398 |     lr_mult: 1
1399 |     decay_mult: 1
1400 |   }
1401 |   param {
1402 |     lr_mult: 2
1403 |     decay_mult: 0
1404 |   }
1405 |   convolution_param {
1406 |     num_output: 24
1407 |     pad: 1
1408 |     kernel_size: 3
1409 |     stride: 1
1410 |     weight_filler {
1411 |       type: "xavier"
1412 |     }
1413 |     bias_filler {
1414 |       type: "constant"
1415 |       value: 0
1416 |     }
1417 |   }
1418 | }
1419 | layer {
1420 |   name: "conv8_2_mbox_loc_perm"
1421 |   type: "Permute"
1422 |   bottom: "conv8_2_mbox_loc"
1423 |   top: "conv8_2_mbox_loc_perm"
1424 |   permute_param {
1425 |     order: 0
1426 |     order: 2
1427 |     order: 3
1428 |     order: 1
1429 |   }
1430 | }
1431 | layer {
1432 |   name: "conv8_2_mbox_loc_flat"
1433 |   type: "Flatten"
1434 |   bottom: "conv8_2_mbox_loc_perm"
1435 |   top: "conv8_2_mbox_loc_flat"
1436 |   flatten_param {
1437 |     axis: 1
1438 |   }
1439 | }
1440 | layer {
1441 |   name: "conv8_2_mbox_conf"
1442 |   type: "Convolution"
1443 |   bottom: "conv8_2"
1444 |   top: "conv8_2_mbox_conf"
1445 |   param {
1446 |     lr_mult: 1
1447 |     decay_mult: 1
1448 |   }
1449 |   param {
1450 |     lr_mult: 2
1451 |     decay_mult: 0
1452 |   }
1453 |   convolution_param {
1454 |     num_output: 36
1455 |     pad: 1
1456 |     kernel_size: 3
1457 |     stride: 1
1458 |     weight_filler {
1459 |       type: "xavier"
1460 |     }
1461 |     bias_filler {
1462 |       type: "constant"
1463 |       value: 0
1464 |     }
1465 |   }
1466 | }
1467 | layer {
1468 |   name: "conv8_2_mbox_conf_perm"
1469 |   type: "Permute"
1470 |   bottom: "conv8_2_mbox_conf"
1471 |   top: "conv8_2_mbox_conf_perm"
1472 |   permute_param {
1473 |     order: 0
1474 |     order: 2
1475 |     order: 3
1476 |     order: 1
1477 |   }
1478 | }
1479 | layer {
1480 |   name: "conv8_2_mbox_conf_flat"
1481 |   type: "Flatten"
1482 |   bottom: "conv8_2_mbox_conf_perm"
1483 |   top: "conv8_2_mbox_conf_flat"
1484 |   flatten_param {
1485 |     axis: 1
1486 |   }
1487 | }
1488 | layer {
1489 |   name: "conv8_2_center_mbox_conf_new"
1490 |   type: "Convolution"
1491 |   bottom: "conv8_2"
1492 |   top: "conv8_2_center_mbox_conf"
1493 |   param {
1494 |     lr_mult: 1
1495 |     decay_mult: 1
1496 |   }
1497 |   param {
1498 |     lr_mult: 2
1499 |     decay_mult: 0
1500 |   }
1501 |   convolution_param {
1502 |     num_output: 96
1503 |     pad: 1
1504 |     kernel_size: 3
1505 |     stride: 1
1506 |     weight_filler {
1507 |       type: "xavier"
1508 |     }
1509 |     bias_filler {
1510 |       type: "constant"
1511 |       value: 0
1512 |     }
1513 |   }
1514 | }
1515 | layer {
1516 |   name: "conv8_2_center_mbox_conf_perm"
1517 |   type: "Permute"
1518 |   bottom: "conv8_2_center_mbox_conf"
1519 |   top: "conv8_2_center_mbox_conf_perm"
1520 |   permute_param {
1521 |     order: 0
1522 |     order: 2
1523 |     order: 3
1524 |     order: 1
1525 |   }
1526 | }
1527 | layer {
1528 |   name: "conv8_2_center_mbox_conf_flat"
1529 |   type: "Flatten"
1530 |   bottom: "conv8_2_center_mbox_conf_perm"
1531 |   top: "conv8_2_center_mbox_conf_flat"
1532 |   flatten_param {
1533 |     axis: 1
1534 |   }
1535 | }
1536 | layer {
1537 |   name: "conv8_2_mbox_priorbox"
1538 |   type: "PriorBox"
1539 |   bottom: "conv8_2"
1540 |   bottom: "data"
1541 |   top: "conv8_2_mbox_priorbox"
1542 |   prior_box_param {
1543 |     min_size: 48
1544 |     max_size: 60
1545 |     aspect_ratio: 2
1546 |     aspect_ratio: 3
1547 |     flip: true
1548 |     clip: false
1549 |     variance: 0.1
1550 |     variance: 0.1
1551 |     variance: 0.2
1552 |     variance: 0.2
1553 |     step: 30
1554 |     offset: 0.5
1555 |   }
1556 | }
1557 | layer {
1558 |   name: "mbox_loc"
1559 |   type: "Concat"
1560 |   bottom: "fc7_norm_mbox_loc_flat"
1561 |   bottom: "conv6_2_mbox_loc_flat"
1562 |   bottom: "conv7_2_mbox_loc_flat"
1563 |   bottom: "conv8_2_mbox_loc_flat"
1564 |   top: "mbox_loc"
1565 |   concat_param {
1566 |     axis: 1
1567 |   }
1568 | }
1569 | layer {
1570 |   name: "mbox_conf"
1571 |   type: "Concat"
1572 |   bottom: "fc7_norm_mbox_conf_flat"
1573 |   bottom: "conv6_2_mbox_conf_flat"
1574 |   bottom: "conv7_2_mbox_conf_flat"
1575 |   bottom: "conv8_2_mbox_conf_flat"
1576 |   top: "mbox_conf"
1577 |   concat_param {
1578 |     axis: 1
1579 |   }
1580 | }
1581 | layer {
1582 |   name: "mbox_priorbox"
1583 |   type: "Concat"
1584 |   bottom: "fc7_norm_mbox_priorbox"
1585 |   bottom: "conv6_2_mbox_priorbox"
1586 |   bottom: "conv7_2_mbox_priorbox"
1587 |   bottom: "conv8_2_mbox_priorbox"
1588 |   top: "mbox_priorbox"
1589 |   concat_param {
1590 |     axis: 2
1591 |   }
1592 | }
1593 | layer {
1594 |   name: "mbox_center_conf"
1595 |   type: "Concat"
1596 |   bottom: "fc7_norm_mbox_center_conf_flat"
1597 |   bottom: "conv6_2_center_mbox_conf_flat"
1598 |   bottom: "conv7_2_center_mbox_conf_flat"
1599 |   bottom: "conv8_2_center_mbox_conf_flat"
1600 |   top: "mbox_center_conf"
1601 |   concat_param {
1602 |     axis: 1
1603 |   }
1604 | }
1605 | layer {
1606 |   name: "mbox_loss"
1607 |   type: "MultiBoxCenterLoss"
1608 |   bottom: "mbox_loc"
1609 |   bottom: "mbox_conf"
1610 |   bottom: "mbox_priorbox"
1611 |   bottom: "label"
1612 |   bottom: "mbox_center_conf"
1613 |   top: "mbox_loss"
1614 |   include {
1615 |     phase: TRAIN
1616 |   }
1617 |   propagate_down: true
1618 |   propagate_down: true
1619 |   propagate_down: false
1620 |   propagate_down: false
1621 |   propagate_down: true
1622 |   loss_param {
1623 |     normalization: VALID
1624 |   }
1625 |   multibox_loss_param {
1626 |     loc_loss_type: SMOOTH_L1
1627 |     conf_loss_type: SOFTMAX
1628 |     loc_weight: 1
1629 |     num_classes: 6
1630 |     share_location: true
1631 |     match_type: PER_PREDICTION
1632 |     overlap_threshold: 0.45
1633 |     use_prior_for_matching: true
1634 |     background_label_id: 0
1635 |     use_difficult_gt: true
1636 |     neg_pos_ratio: 3
1637 |     neg_overlap: 0.1
1638 |     code_type: CENTER_SIZE
1639 |     ignore_cross_boundary_bbox: false
1640 |     mining_type: MAX_NEGATIVE
1641 |     center_loss_weight: 0.1
1642 |   }
1643 |   multibox_center_loss_param {
1644 |     center_features: 16
1645 |   }
1646 |  }
1647 | 


--------------------------------------------------------------------------------
/caffe.proto:
--------------------------------------------------------------------------------
   1 | syntax = "proto2";
   2 | 
   3 | package caffe;
   4 | 
   5 | // Specifies the shape (dimensions) of a Blob.
   6 | message BlobShape {
   7 |   repeated int64 dim = 1 [packed = true];
   8 | }
   9 | 
  10 | message BlobProto {
  11 |   optional BlobShape shape = 7;
  12 |   repeated float data = 5 [packed = true];
  13 |   repeated float diff = 6 [packed = true];
  14 |   repeated double double_data = 8 [packed = true];
  15 |   repeated double double_diff = 9 [packed = true];
  16 | 
  17 |   // 4D dimensions -- deprecated.  Use "shape" instead.
  18 |   optional int32 num = 1 [default = 0];
  19 |   optional int32 channels = 2 [default = 0];
  20 |   optional int32 height = 3 [default = 0];
  21 |   optional int32 width = 4 [default = 0];
  22 | }
  23 | 
  24 | // The BlobProtoVector is simply a way to pass multiple blobproto instances
  25 | // around.
  26 | message BlobProtoVector {
  27 |   repeated BlobProto blobs = 1;
  28 | }
  29 | 
  30 | message Datum {
  31 |   optional int32 channels = 1;
  32 |   optional int32 height = 2;
  33 |   optional int32 width = 3;
  34 |   // the actual image data, in bytes
  35 |   optional bytes data = 4;
  36 |   optional int32 label = 5;
  37 |   // Optionally, the datum could also hold float data.
  38 |   repeated float float_data = 6;
  39 |   // If true data contains an encoded image that need to be decoded
  40 |   optional bool encoded = 7 [default = false];
  41 | }
  42 | 
  43 | // The label (display) name and label id.
  44 | message LabelMapItem {
  45 |   // Both name and label are required.
  46 |   optional string name = 1;
  47 |   optional int32 label = 2;
  48 |   // display_name is optional.
  49 |   optional string display_name = 3;
  50 | }
  51 | 
  52 | message LabelMap {
  53 |   repeated LabelMapItem item = 1;
  54 | }
  55 | 
  56 | // Sample a bbox in the normalized space [0, 1] with provided constraints.
  57 | message Sampler {
  58 |   // Minimum scale of the sampled bbox.
  59 |   optional float min_scale = 1 [default = 1.];
  60 |   // Maximum scale of the sampled bbox.
  61 |   optional float max_scale = 2 [default = 1.];
  62 | 
  63 |   // Minimum aspect ratio of the sampled bbox.
  64 |   optional float min_aspect_ratio = 3 [default = 1.];
  65 |   // Maximum aspect ratio of the sampled bbox.
  66 |   optional float max_aspect_ratio = 4 [default = 1.];
  67 | }
  68 | 
  69 | // Constraints for selecting sampled bbox.
  70 | message SampleConstraint {
  71 |   // Minimum Jaccard overlap between sampled bbox and all bboxes in
  72 |   // AnnotationGroup.
  73 |   optional float min_jaccard_overlap = 1;
  74 |   // Maximum Jaccard overlap between sampled bbox and all bboxes in
  75 |   // AnnotationGroup.
  76 |   optional float max_jaccard_overlap = 2;
  77 | 
  78 |   // Minimum coverage of sampled bbox by all bboxes in AnnotationGroup.
  79 |   optional float min_sample_coverage = 3;
  80 |   // Maximum coverage of sampled bbox by all bboxes in AnnotationGroup.
  81 |   optional float max_sample_coverage = 4;
  82 | 
  83 |   // Minimum coverage of all bboxes in AnnotationGroup by sampled bbox.
  84 |   optional float min_object_coverage = 5;
  85 |   // Maximum coverage of all bboxes in AnnotationGroup by sampled bbox.
  86 |   optional float max_object_coverage = 6;
  87 | }
  88 | 
  89 | // Sample a batch of bboxes with provided constraints.
  90 | message BatchSampler {
  91 |   // Use original image as the source for sampling.
  92 |   optional bool use_original_image = 1 [default = true];
  93 | 
  94 |   // Constraints for sampling bbox.
  95 |   optional Sampler sampler = 2;
  96 | 
  97 |   // Constraints for determining if a sampled bbox is positive or negative.
  98 |   optional SampleConstraint sample_constraint = 3;
  99 | 
 100 |   // If provided, break when found certain number of samples satisfing the
 101 |   // sample_constraint.
 102 |   optional uint32 max_sample = 4;
 103 | 
 104 |   // Maximum number of trials for sampling to avoid infinite loop.
 105 |   optional uint32 max_trials = 5 [default = 100];
 106 | }
 107 | 
 108 | // Condition for emitting annotations.
 109 | message EmitConstraint {
 110 |   enum EmitType {
 111 |     CENTER = 0;
 112 |     MIN_OVERLAP = 1;
 113 |   }
 114 |   optional EmitType emit_type = 1 [default = CENTER];
 115 |   // If emit_type is MIN_OVERLAP, provide the emit_overlap.
 116 |   optional float emit_overlap = 2;
 117 | }
 118 | 
 119 | // The normalized bounding box [0, 1] w.r.t. the input image size.
 120 | message NormalizedBBox {
 121 |   optional float xmin = 1;
 122 |   optional float ymin = 2;
 123 |   optional float xmax = 3;
 124 |   optional float ymax = 4;
 125 |   optional int32 label = 5;
 126 |   optional bool difficult = 6;
 127 |   optional float score = 7;
 128 |   optional float size = 8;
 129 | }
 130 | 
 131 | // Annotation for each object instance.
 132 | message Annotation {
 133 |   optional int32 instance_id = 1 [default = 0];
 134 |   optional NormalizedBBox bbox = 2;
 135 | }
 136 | 
 137 | // Group of annotations for a particular label.
 138 | message AnnotationGroup {
 139 |   optional int32 group_label = 1;
 140 |   repeated Annotation annotation = 2;
 141 | }
 142 | 
 143 | // An extension of Datum which contains "rich" annotations.
 144 | message AnnotatedDatum {
 145 |   enum AnnotationType {
 146 |     BBOX = 0;
 147 |   }
 148 |   optional Datum datum = 1;
 149 |   // If there are "rich" annotations, specify the type of annotation.
 150 |   // Currently it only supports bounding box.
 151 |   // If there are no "rich" annotations, use label in datum instead.
 152 |   optional AnnotationType type = 2;
 153 |   // Each group contains annotation for a particular class.
 154 |   repeated AnnotationGroup annotation_group = 3;
 155 | }
 156 | 
 157 | message FillerParameter {
 158 |   // The filler type.
 159 |   optional string type = 1 [default = 'constant'];
 160 |   optional float value = 2 [default = 0]; // the value in constant filler
 161 |   optional float min = 3 [default = 0]; // the min value in uniform filler
 162 |   optional float max = 4 [default = 1]; // the max value in uniform filler
 163 |   optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
 164 |   optional float std = 6 [default = 1]; // the std value in Gaussian filler
 165 |   // The expected number of non-zero output weights for a given input in
 166 |   // Gaussian filler -- the default -1 means don't perform sparsification.
 167 |   optional int32 sparse = 7 [default = -1];
 168 |   // Normalize the filler variance by fan_in, fan_out, or their average.
 169 |   // Applies to 'xavier' and 'msra' fillers.
 170 |   enum VarianceNorm {
 171 |     FAN_IN = 0;
 172 |     FAN_OUT = 1;
 173 |     AVERAGE = 2;
 174 |   }
 175 |   optional VarianceNorm variance_norm = 8 [default = FAN_IN];
 176 | }
 177 | 
 178 | message NetParameter {
 179 |   optional string name = 1; // consider giving the network a name
 180 |   // DEPRECATED. See InputParameter. The input blobs to the network.
 181 |   repeated string input = 3;
 182 |   // DEPRECATED. See InputParameter. The shape of the input blobs.
 183 |   repeated BlobShape input_shape = 8;
 184 | 
 185 |   // 4D input dimensions -- deprecated.  Use "input_shape" instead.
 186 |   // If specified, for each input blob there should be four
 187 |   // values specifying the num, channels, height and width of the input blob.
 188 |   // Thus, there should be a total of (4 * #input) numbers.
 189 |   repeated int32 input_dim = 4;
 190 | 
 191 |   // Whether the network will force every layer to carry out backward operation.
 192 |   // If set False, then whether to carry out backward is determined
 193 |   // automatically according to the net structure and learning rates.
 194 |   optional bool force_backward = 5 [default = false];
 195 |   // The current "state" of the network, including the phase, level, and stage.
 196 |   // Some layers may be included/excluded depending on this state and the states
 197 |   // specified in the layers' include and exclude fields.
 198 |   optional NetState state = 6;
 199 | 
 200 |   // Print debugging information about results while running Net::Forward,
 201 |   // Net::Backward, and Net::Update.
 202 |   optional bool debug_info = 7 [default = false];
 203 | 
 204 |   // The layers that make up the net.  Each of their configurations, including
 205 |   // connectivity and behavior, is specified as a LayerParameter.
 206 |   repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.
 207 | 
 208 |   // DEPRECATED: use 'layer' instead.
 209 |   repeated V1LayerParameter layers = 2;
 210 | }
 211 | 
 212 | // NOTE
 213 | // Update the next available ID when you add a new SolverParameter field.
 214 | //
 215 | // SolverParameter next available ID: 44 (last added: plateau_winsize)
 216 | message SolverParameter {
 217 |   //////////////////////////////////////////////////////////////////////////////
 218 |   // Specifying the train and test networks
 219 |   //
 220 |   // Exactly one train net must be specified using one of the following fields:
 221 |   //     train_net_param, train_net, net_param, net
 222 |   // One or more test nets may be specified using any of the following fields:
 223 |   //     test_net_param, test_net, net_param, net
 224 |   // If more than one test net field is specified (e.g., both net and
 225 |   // test_net are specified), they will be evaluated in the field order given
 226 |   // above: (1) test_net_param, (2) test_net, (3) net_param/net.
 227 |   // A test_iter must be specified for each test_net.
 228 |   // A test_level and/or a test_stage may also be specified for each test_net.
 229 |   //////////////////////////////////////////////////////////////////////////////
 230 | 
 231 |   // Proto filename for the train net, possibly combined with one or more
 232 |   // test nets.
 233 |   optional string net = 24;
 234 |   // Inline train net param, possibly combined with one or more test nets.
 235 |   optional NetParameter net_param = 25;
 236 | 
 237 |   optional string train_net = 1; // Proto filename for the train net.
 238 |   repeated string test_net = 2; // Proto filenames for the test nets.
 239 |   optional NetParameter train_net_param = 21; // Inline train net params.
 240 |   repeated NetParameter test_net_param = 22; // Inline test net params.
 241 | 
 242 |   // The states for the train/test nets. Must be unspecified or
 243 |   // specified once per net.
 244 |   //
 245 |   // By default, all states will have solver = true;
 246 |   // train_state will have phase = TRAIN,
 247 |   // and all test_state's will have phase = TEST.
 248 |   // Other defaults are set according to the NetState defaults.
 249 |   optional NetState train_state = 26;
 250 |   repeated NetState test_state = 27;
 251 | 
 252 |   // Evaluation type.
 253 |   optional string eval_type = 41 [default = "classification"];
 254 |   // ap_version: different ways of computing Average Precision.
 255 |   //    Check https://sanchom.wordpress.com/tag/average-precision/ for details.
 256 |   //    11point: the 11-point interpolated average precision. Used in VOC2007.
 257 |   //    MaxIntegral: maximally interpolated AP. Used in VOC2012/ILSVRC.
 258 |   //    Integral: the natural integral of the precision-recall curve.
 259 |   optional string ap_version = 42 [default = "Integral"];
 260 |   // If true, display per class result.
 261 |   optional bool show_per_class_result = 44 [default = false];
 262 | 
 263 |   // The number of iterations for each test net.
 264 |   repeated int32 test_iter = 3;
 265 | 
 266 |   // The number of iterations between two testing phases.
 267 |   optional int32 test_interval = 4 [default = 0];
 268 |   optional bool test_compute_loss = 19 [default = false];
 269 |   // If true, run an initial test pass before the first iteration,
 270 |   // ensuring memory availability and printing the starting value of the loss.
 271 |   optional bool test_initialization = 32 [default = true];
 272 |   optional float base_lr = 5; // The base learning rate
 273 |   // the number of iterations between displaying info. If display = 0, no info
 274 |   // will be displayed.
 275 |   optional int32 display = 6;
 276 |   // Display the loss averaged over the last average_loss iterations
 277 |   optional int32 average_loss = 33 [default = 1];
 278 |   optional int32 max_iter = 7; // the maximum number of iterations
 279 |   // accumulate gradients over `iter_size` x `batch_size` instances
 280 |   optional int32 iter_size = 36 [default = 1];
 281 | 
 282 |   // The learning rate decay policy. The currently implemented learning rate
 283 |   // policies are as follows:
 284 |   //    - fixed: always return base_lr.
 285 |   //    - step: return base_lr * gamma ^ (floor(iter / step))
 286 |   //    - exp: return base_lr * gamma ^ iter
 287 |   //    - inv: return base_lr * (1 + gamma * iter) ^ (- power)
 288 |   //    - multistep: similar to step but it allows non uniform steps defined by
 289 |   //      stepvalue
 290 |   //    - poly: the effective learning rate follows a polynomial decay, to be
 291 |   //      zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
 292 |   //    - sigmoid: the effective learning rate follows a sigmod decay
 293 |   //      return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
 294 |   //    - plateau: decreases lr
 295 |   //              if the minimum loss isn't updated for 'plateau_winsize' iters
 296 |   //
 297 |   // where base_lr, max_iter, gamma, step, stepvalue and power are defined
 298 |   // in the solver parameter protocol buffer, and iter is the current iteration.
 299 |   optional string lr_policy = 8;
 300 |   optional float gamma = 9; // The parameter to compute the learning rate.
 301 |   optional float power = 10; // The parameter to compute the learning rate.
 302 |   optional float momentum = 11; // The momentum value.
 303 |   optional float weight_decay = 12; // The weight decay.
 304 |   // regularization types supported: L1 and L2
 305 |   // controlled by weight_decay
 306 |   optional string regularization_type = 29 [default = "L2"];
 307 |   // the stepsize for learning rate policy "step"
 308 |   optional int32 stepsize = 13;
 309 |   // the stepsize for learning rate policy "multistep"
 310 |   repeated int32 stepvalue = 34;
 311 |   // the stepsize for learning rate policy "plateau"
 312 |   repeated int32 plateau_winsize = 43;
 313 | 
 314 |   // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
 315 |   // whenever their actual L2 norm is larger.
 316 |   optional float clip_gradients = 35 [default = -1];
 317 | 
 318 |   optional int32 snapshot = 14 [default = 0]; // The snapshot interval
 319 |   optional string snapshot_prefix = 15; // The prefix for the snapshot.
 320 |   // whether to snapshot diff in the results or not. Snapshotting diff will help
 321 |   // debugging but the final protocol buffer size will be much larger.
 322 |   optional bool snapshot_diff = 16 [default = false];
 323 |   enum SnapshotFormat {
 324 |     HDF5 = 0;
 325 |     BINARYPROTO = 1;
 326 |   }
 327 |   optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
 328 |   // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
 329 |   enum SolverMode {
 330 |     CPU = 0;
 331 |     GPU = 1;
 332 |   }
 333 |   optional SolverMode solver_mode = 17 [default = GPU];
 334 |   // the device_id will that be used in GPU mode. Use device_id = 0 in default.
 335 |   optional int32 device_id = 18 [default = 0];
 336 |   // If non-negative, the seed with which the Solver will initialize the Caffe
 337 |   // random number generator -- useful for reproducible results. Otherwise,
 338 |   // (and by default) initialize using a seed derived from the system clock.
 339 |   optional int64 random_seed = 20 [default = -1];
 340 | 
 341 |   // type of the solver
 342 |   optional string type = 40 [default = "SGD"];
 343 | 
 344 |   // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
 345 |   optional float delta = 31 [default = 1e-8];
 346 |   // parameters for the Adam solver
 347 |   optional float momentum2 = 39 [default = 0.999];
 348 | 
 349 |   // RMSProp decay value
 350 |   // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
 351 |   optional float rms_decay = 38 [default = 0.99];
 352 | 
 353 |   // If true, print information about the state of the net that may help with
 354 |   // debugging learning problems.
 355 |   optional bool debug_info = 23 [default = false];
 356 | 
 357 |   // If false, don't save a snapshot after training finishes.
 358 |   optional bool snapshot_after_train = 28 [default = true];
 359 | 
 360 |   // DEPRECATED: old solver enum types, use string instead
 361 |   enum SolverType {
 362 |     SGD = 0;
 363 |     NESTEROV = 1;
 364 |     ADAGRAD = 2;
 365 |     RMSPROP = 3;
 366 |     ADADELTA = 4;
 367 |     ADAM = 5;
 368 |   }
 369 |   // DEPRECATED: use type instead of solver_type
 370 |   optional SolverType solver_type = 30 [default = SGD];
 371 | }
 372 | 
 373 | // A message that stores the solver snapshots
 374 | message SolverState {
 375 |   optional int32 iter = 1; // The current iteration
 376 |   optional string learned_net = 2; // The file that stores the learned net.
 377 |   repeated BlobProto history = 3; // The history for sgd solvers
 378 |   optional int32 current_step = 4 [default = 0]; // The current step for learning rate
 379 |   optional float minimum_loss = 5 [default = 1E38]; // Historical minimum loss
 380 |   optional int32 iter_last_event = 6 [default = 0]; // The iteration when last lr-update or min_loss-update happend
 381 | }
 382 | 
 383 | enum Phase {
 384 |    TRAIN = 0;
 385 |    TEST = 1;
 386 | }
 387 | 
 388 | message NetState {
 389 |   optional Phase phase = 1 [default = TEST];
 390 |   optional int32 level = 2 [default = 0];
 391 |   repeated string stage = 3;
 392 | }
 393 | 
 394 | message NetStateRule {
 395 |   // Set phase to require the NetState have a particular phase (TRAIN or TEST)
 396 |   // to meet this rule.
 397 |   optional Phase phase = 1;
 398 | 
 399 |   // Set the minimum and/or maximum levels in which the layer should be used.
 400 |   // Leave undefined to meet the rule regardless of level.
 401 |   optional int32 min_level = 2;
 402 |   optional int32 max_level = 3;
 403 | 
 404 |   // Customizable sets of stages to include or exclude.
 405 |   // The net must have ALL of the specified stages and NONE of the specified
 406 |   // "not_stage"s to meet the rule.
 407 |   // (Use multiple NetStateRules to specify conjunctions of stages.)
 408 |   repeated string stage = 4;
 409 |   repeated string not_stage = 5;
 410 | }
 411 | 
 412 | // Specifies training parameters (multipliers on global learning constants,
 413 | // and the name and other settings used for weight sharing).
 414 | message ParamSpec {
 415 |   // The names of the parameter blobs -- useful for sharing parameters among
 416 |   // layers, but never required otherwise.  To share a parameter between two
 417 |   // layers, give it a (non-empty) name.
 418 |   optional string name = 1;
 419 | 
 420 |   // Whether to require shared weights to have the same shape, or just the same
 421 |   // count -- defaults to STRICT if unspecified.
 422 |   optional DimCheckMode share_mode = 2;
 423 |   enum DimCheckMode {
 424 |     // STRICT (default) requires that num, channels, height, width each match.
 425 |     STRICT = 0;
 426 |     // PERMISSIVE requires only the count (num*channels*height*width) to match.
 427 |     PERMISSIVE = 1;
 428 |   }
 429 | 
 430 |   // The multiplier on the global learning rate for this parameter.
 431 |   optional float lr_mult = 3 [default = 1.0];
 432 | 
 433 |   // The multiplier on the global weight decay for this parameter.
 434 |   optional float decay_mult = 4 [default = 1.0];
 435 | }
 436 | 
 437 | // NOTE
 438 | // Update the next available ID when you add a new LayerParameter field.
 439 | //
 440 | // LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
 441 | message LayerParameter {
 442 |   optional string name = 1; // the layer name
 443 |   optional string type = 2; // the layer type
 444 |   repeated string bottom = 3; // the name of each bottom blob
 445 |   repeated string top = 4; // the name of each top blob
 446 | 
 447 |   // The train / test phase for computation.
 448 |   optional Phase phase = 10;
 449 | 
 450 |   // The amount of weight to assign each top blob in the objective.
 451 |   // Each layer assigns a default value, usually of either 0 or 1,
 452 |   // to each top blob.
 453 |   repeated float loss_weight = 5;
 454 | 
 455 |   // Specifies training parameters (multipliers on global learning constants,
 456 |   // and the name and other settings used for weight sharing).
 457 |   repeated ParamSpec param = 6;
 458 | 
 459 |   // The blobs containing the numeric parameters of the layer.
 460 |   repeated BlobProto blobs = 7;
 461 | 
 462 |   // Specifies whether to backpropagate to each bottom. If unspecified,
 463 |   // Caffe will automatically infer whether each input needs backpropagation
 464 |   // to compute parameter gradients. If set to true for some inputs,
 465 |   // backpropagation to those inputs is forced; if set false for some inputs,
 466 |   // backpropagation to those inputs is skipped.
 467 |   //
 468 |   // The size must be either 0 or equal to the number of bottoms.
 469 |   repeated bool propagate_down = 11;
 470 | 
 471 |   // Rules controlling whether and when a layer is included in the network,
 472 |   // based on the current NetState.  You may specify a non-zero number of rules
 473 |   // to include OR exclude, but not both.  If no include or exclude rules are
 474 |   // specified, the layer is always included.  If the current NetState meets
 475 |   // ANY (i.e., one or more) of the specified rules, the layer is
 476 |   // included/excluded.
 477 |   repeated NetStateRule include = 8;
 478 |   repeated NetStateRule exclude = 9;
 479 | 
 480 |   // Parameters for data pre-processing.
 481 |   optional TransformationParameter transform_param = 100;
 482 | 
 483 |   // Parameters shared by loss layers.
 484 |   optional LossParameter loss_param = 101;
 485 | 
 486 |   // Layer type-specific parameters.
 487 |   //
 488 |   // Note: certain layers may have more than one computational engine
 489 |   // for their implementation. These layers include an Engine type and
 490 |   // engine parameter for selecting the implementation.
 491 |   // The default for the engine is set by the ENGINE switch at compile-time.
 492 |   optional AccuracyParameter accuracy_param = 102;
 493 |   optional AnnotatedDataParameter annotated_data_param = 200;
 494 |   optional ArgMaxParameter argmax_param = 103;
 495 |   optional BatchNormParameter batch_norm_param = 139;
 496 |   optional BiasParameter bias_param = 141;
 497 |   optional CenterLossParameter center_loss_param = 149;
 498 |   optional ConcatParameter concat_param = 104;
 499 |   optional ContrastiveLossParameter contrastive_loss_param = 105;
 500 |   optional ConvolutionParameter convolution_param = 106;
 501 |   optional CropParameter crop_param = 144;
 502 |   optional DataParameter data_param = 107;
 503 |   optional DetectionEvaluateParameter detection_evaluate_param = 205;
 504 |   optional DetectionOutputParameter detection_output_param = 204;
 505 |   optional DropoutParameter dropout_param = 108;
 506 |   optional DummyDataParameter dummy_data_param = 109;
 507 |   optional EltwiseParameter eltwise_param = 110;
 508 |   optional ELUParameter elu_param = 140;
 509 |   optional EmbedParameter embed_param = 137;
 510 |   optional ExpParameter exp_param = 111;
 511 |   optional FlattenParameter flatten_param = 135;
 512 |   optional HDF5DataParameter hdf5_data_param = 112;
 513 |   optional HDF5OutputParameter hdf5_output_param = 113;
 514 |   optional HingeLossParameter hinge_loss_param = 114;
 515 |   optional ImageDataParameter image_data_param = 115;
 516 |   optional InfogainLossParameter infogain_loss_param = 116;
 517 |   optional InnerProductParameter inner_product_param = 117;
 518 |   optional InputParameter input_param = 143;
 519 |   optional LogParameter log_param = 134;
 520 |   optional LRNParameter lrn_param = 118;
 521 |   optional MemoryDataParameter memory_data_param = 119;
 522 |   optional MultiBoxLossParameter multibox_loss_param = 201;
 523 |   optional MultiBoxCenterLossParameter multibox_center_loss_param = 211;
 524 |   optional MVNParameter mvn_param = 120;
 525 |   optional NormalizeParameter norm_param = 206;
 526 |   optional ParameterParameter parameter_param = 145;
 527 |   optional PermuteParameter permute_param = 202;
 528 |   optional PoolingParameter pooling_param = 121;
 529 |   optional PowerParameter power_param = 122;
 530 |   optional PReLUParameter prelu_param = 131;
 531 |   optional PriorBoxParameter prior_box_param = 203;
 532 |   optional PythonParameter python_param = 130;
 533 |   optional RecurrentParameter recurrent_param = 146;
 534 |   optional ReductionParameter reduction_param = 136;
 535 |   optional ReLUParameter relu_param = 123;
 536 |   optional ReshapeParameter reshape_param = 133;
 537 |   optional ScaleParameter scale_param = 142;
 538 |   optional SigmoidParameter sigmoid_param = 124;
 539 |   optional SoftmaxParameter softmax_param = 125;
 540 |   optional SPPParameter spp_param = 132;
 541 |   optional SliceParameter slice_param = 126;
 542 |   optional TanHParameter tanh_param = 127;
 543 |   optional ThresholdParameter threshold_param = 128;
 544 |   optional TileParameter tile_param = 138;
 545 |   optional VideoDataParameter video_data_param = 207;
 546 |   optional WindowDataParameter window_data_param = 129;
 547 |   optional FocalLossParameter focal_loss_param = 147;
 548 | }
 549 | 
 550 | // Message that stores parameters used to apply transformation
 551 | // to the data layer's data
 552 | message TransformationParameter {
 553 |   // For data pre-processing, we can do simple scaling and subtracting the
 554 |   // data mean, if provided. Note that the mean subtraction is always carried
 555 |   // out before scaling.
 556 |   optional float scale = 1 [default = 1];
 557 |   // Specify if we want to randomly mirror data.
 558 |   optional bool mirror = 2 [default = false];
 559 |   // Specify if we would like to randomly crop an image.
 560 |   optional uint32 crop_size = 3 [default = 0];
 561 |   optional uint32 crop_h = 11 [default = 0];
 562 |   optional uint32 crop_w = 12 [default = 0];
 563 | 
 564 |   // mean_file and mean_value cannot be specified at the same time
 565 |   optional string mean_file = 4;
 566 |   // if specified can be repeated once (would substract it from all the channels)
 567 |   // or can be repeated the same number of times as channels
 568 |   // (would subtract them from the corresponding channel)
 569 |   repeated float mean_value = 5;
 570 |   // Force the decoded image to have 3 color channels.
 571 |   optional bool force_color = 6 [default = false];
 572 |   // Force the decoded image to have 1 color channels.
 573 |   optional bool force_gray = 7 [default = false];
 574 |   // Resize policy
 575 |   optional ResizeParameter resize_param = 8;
 576 |   // Noise policy
 577 |   optional NoiseParameter noise_param = 9;
 578 |   // Distortion policy
 579 |   optional DistortionParameter distort_param = 13;
 580 |   // Expand policy
 581 |   optional ExpansionParameter expand_param = 14;
 582 |   // Constraint for emitting the annotation after transformation.
 583 |   optional EmitConstraint emit_constraint = 10;
 584 | }
 585 | 
 586 | // Message that stores parameters used by data transformer for resize policy
 587 | message ResizeParameter {
 588 |   //Probability of using this resize policy
 589 |   optional float prob = 1 [default = 1];
 590 | 
 591 |   enum Resize_mode {
 592 |     WARP = 1;
 593 |     FIT_SMALL_SIZE = 2;
 594 |     FIT_LARGE_SIZE_AND_PAD = 3;
 595 |   }
 596 |   optional Resize_mode resize_mode = 2 [default = WARP];
 597 |   optional uint32 height = 3 [default = 0];
 598 |   optional uint32 width = 4 [default = 0];
 599 |   // A parameter used to update bbox in FIT_SMALL_SIZE mode.
 600 |   optional uint32 height_scale = 8 [default = 0];
 601 |   optional uint32 width_scale = 9 [default = 0];
 602 | 
 603 |   enum Pad_mode {
 604 |     CONSTANT = 1;
 605 |     MIRRORED = 2;
 606 |     REPEAT_NEAREST = 3;
 607 |   }
 608 |   // Padding mode for BE_SMALL_SIZE_AND_PAD mode and object centering
 609 |   optional Pad_mode pad_mode = 5 [default = CONSTANT];
 610 |   // if specified can be repeated once (would fill all the channels)
 611 |   // or can be repeated the same number of times as channels
 612 |   // (would use it them to the corresponding channel)
 613 |   repeated float pad_value = 6;
 614 | 
 615 |   enum Interp_mode { //Same as in OpenCV
 616 |     LINEAR = 1;
 617 |     AREA = 2;
 618 |     NEAREST = 3;
 619 |     CUBIC = 4;
 620 |     LANCZOS4 = 5;
 621 |   }
 622 |   //interpolation for for resizing
 623 |   repeated Interp_mode interp_mode = 7;
 624 | }
 625 | 
 626 | message SaltPepperParameter {
 627 |   //Percentage of pixels
 628 |   optional float fraction = 1 [default = 0];
 629 |   repeated float value = 2;
 630 | }
 631 | 
 632 | // Message that stores parameters used by data transformer for transformation
 633 | // policy
 634 | message NoiseParameter {
 635 |   //Probability of using this resize policy
 636 |   optional float prob = 1 [default = 0];
 637 |   // Histogram equalized
 638 |   optional bool hist_eq = 2 [default = false];
 639 |   // Color inversion
 640 |   optional bool inverse = 3 [default = false];
 641 |   // Grayscale
 642 |   optional bool decolorize = 4 [default = false];
 643 |   // Gaussian blur
 644 |   optional bool gauss_blur = 5 [default = false];
 645 | 
 646 |   // JPEG compression quality (-1 = no compression)
 647 |   optional float jpeg = 6 [default = -1];
 648 | 
 649 |   // Posterization
 650 |   optional bool posterize = 7 [default = false];
 651 | 
 652 |   // Erosion
 653 |   optional bool erode = 8 [default = false];
 654 | 
 655 |   // Salt-and-pepper noise
 656 |   optional bool saltpepper = 9 [default = false];
 657 | 
 658 |   optional SaltPepperParameter saltpepper_param = 10;
 659 | 
 660 |   // Local histogram equalization
 661 |   optional bool clahe = 11 [default = false];
 662 | 
 663 |   // Color space conversion
 664 |   optional bool convert_to_hsv = 12 [default = false];
 665 | 
 666 |   // Color space conversion
 667 |   optional bool convert_to_lab = 13 [default = false];
 668 | }
 669 | 
 670 | // Message that stores parameters used by data transformer for distortion policy
 671 | message DistortionParameter {
 672 |   // The probability of adjusting brightness.
 673 |   optional float brightness_prob = 1 [default = 0.0];
 674 |   // Amount to add to the pixel values within [-delta, delta].
 675 |   // The possible value is within [0, 255]. Recommend 32.
 676 |   optional float brightness_delta = 2 [default = 0.0];
 677 | 
 678 |   // The probability of adjusting contrast.
 679 |   optional float contrast_prob = 3 [default = 0.0];
 680 |   // Lower bound for random contrast factor. Recommend 0.5.
 681 |   optional float contrast_lower = 4 [default = 0.0];
 682 |   // Upper bound for random contrast factor. Recommend 1.5.
 683 |   optional float contrast_upper = 5 [default = 0.0];
 684 | 
 685 |   // The probability of adjusting hue.
 686 |   optional float hue_prob = 6 [default = 0.0];
 687 |   // Amount to add to the hue channel within [-delta, delta].
 688 |   // The possible value is within [0, 180]. Recommend 36.
 689 |   optional float hue_delta = 7 [default = 0.0];
 690 | 
 691 |   // The probability of adjusting saturation.
 692 |   optional float saturation_prob = 8 [default = 0.0];
 693 |   // Lower bound for the random saturation factor. Recommend 0.5.
 694 |   optional float saturation_lower = 9 [default = 0.0];
 695 |   // Upper bound for the random saturation factor. Recommend 1.5.
 696 |   optional float saturation_upper = 10 [default = 0.0];
 697 | 
 698 |   // The probability of randomly order the image channels.
 699 |   optional float random_order_prob = 11 [default = 0.0];
 700 | }
 701 | 
 702 | // Message that stores parameters used by data transformer for expansion policy
 703 | message ExpansionParameter {
 704 |   //Probability of using this expansion policy
 705 |   optional float prob = 1 [default = 1];
 706 | 
 707 |   // The ratio to expand the image.
 708 |   optional float max_expand_ratio = 2 [default = 1.];
 709 | }
 710 | 
 711 | // Message that stores parameters shared by loss layers
 712 | message LossParameter {
 713 |   // If specified, ignore instances with the given label.
 714 |   optional int32 ignore_label = 1;
 715 |   // How to normalize the loss for loss layers that aggregate across batches,
 716 |   // spatial dimensions, or other dimensions.  Currently only implemented in
 717 |   // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers.
 718 |   enum NormalizationMode {
 719 |     // Divide by the number of examples in the batch times spatial dimensions.
 720 |     // Outputs that receive the ignore label will NOT be ignored in computing
 721 |     // the normalization factor.
 722 |     FULL = 0;
 723 |     // Divide by the total number of output locations that do not take the
 724 |     // ignore_label.  If ignore_label is not set, this behaves like FULL.
 725 |     VALID = 1;
 726 |     // Divide by the batch size.
 727 |     BATCH_SIZE = 2;
 728 |     // Do not normalize the loss.
 729 |     NONE = 3;
 730 |   }
 731 |   // For historical reasons, the default normalization for
 732 |   // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID.
 733 |   optional NormalizationMode normalization = 3 [default = VALID];
 734 |   // Deprecated.  Ignored if normalization is specified.  If normalization
 735 |   // is not specified, then setting this to false will be equivalent to
 736 |   // normalization = BATCH_SIZE to be consistent with previous behavior.
 737 |   optional bool normalize = 2;
 738 | }
 739 | 
 740 | // Messages that store parameters used by individual layer types follow, in
 741 | // alphabetical order.
 742 | 
 743 | message AccuracyParameter {
 744 |   // When computing accuracy, count as correct by comparing the true label to
 745 |   // the top k scoring classes.  By default, only compare to the top scoring
 746 |   // class (i.e. argmax).
 747 |   optional uint32 top_k = 1 [default = 1];
 748 | 
 749 |   // The "label" axis of the prediction blob, whose argmax corresponds to the
 750 |   // predicted label -- may be negative to index from the end (e.g., -1 for the
 751 |   // last axis).  For example, if axis == 1 and the predictions are
 752 |   // (N x C x H x W), the label blob is expected to contain N*H*W ground truth
 753 |   // labels with integer values in {0, 1, ..., C-1}.
 754 |   optional int32 axis = 2 [default = 1];
 755 | 
 756 |   // If specified, ignore instances with the given label.
 757 |   optional int32 ignore_label = 3;
 758 | }
 759 | 
 760 | message AnnotatedDataParameter {
 761 |   // Define the sampler.
 762 |   repeated BatchSampler batch_sampler = 1;
 763 |   // Store label name and label id in LabelMap format.
 764 |   optional string label_map_file = 2;
 765 |   // If provided, it will replace the AnnotationType stored in each
 766 |   // AnnotatedDatum.
 767 |   optional AnnotatedDatum.AnnotationType anno_type = 3;
 768 | }
 769 | 
 770 | message ArgMaxParameter {
 771 |   // If true produce pairs (argmax, maxval)
 772 |   optional bool out_max_val = 1 [default = false];
 773 |   optional uint32 top_k = 2 [default = 1];
 774 |   // The axis along which to maximise -- may be negative to index from the
 775 |   // end (e.g., -1 for the last axis).
 776 |   // By default ArgMaxLayer maximizes over the flattened trailing dimensions
 777 |   // for each index of the first / num dimension.
 778 |   optional int32 axis = 3;
 779 | }
 780 | 
 781 | message ConcatParameter {
 782 |   // The axis along which to concatenate -- may be negative to index from the
 783 |   // end (e.g., -1 for the last axis).  Other axes must have the
 784 |   // same dimension for all the bottom blobs.
 785 |   // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
 786 |   optional int32 axis = 2 [default = 1];
 787 | 
 788 |   // DEPRECATED: alias for "axis" -- does not support negative indexing.
 789 |   optional uint32 concat_dim = 1 [default = 1];
 790 | }
 791 | 
 792 | message BatchNormParameter {
 793 |   // If false, accumulate global mean/variance values via a moving average. If
 794 |   // true, use those accumulated values instead of computing mean/variance
 795 |   // across the batch.
 796 |   optional bool use_global_stats = 1;
 797 |   // How much does the moving average decay each iteration?
 798 |   optional float moving_average_fraction = 2 [default = .999];
 799 |   // Small value to add to the variance estimate so that we don't divide by
 800 |   // zero.
 801 |   optional float eps = 3 [default = 1e-5];
 802 | }
 803 | 
 804 | message BiasParameter {
 805 |   // The first axis of bottom[0] (the first input Blob) along which to apply
 806 |   // bottom[1] (the second input Blob).  May be negative to index from the end
 807 |   // (e.g., -1 for the last axis).
 808 |   //
 809 |   // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
 810 |   // top[0] will have the same shape, and bottom[1] may have any of the
 811 |   // following shapes (for the given value of axis):
 812 |   //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
 813 |   //    (axis == 1 == -3)          3;     3x40;     3x40x60
 814 |   //    (axis == 2 == -2)                   40;       40x60
 815 |   //    (axis == 3 == -1)                                60
 816 |   // Furthermore, bottom[1] may have the empty shape (regardless of the value of
 817 |   // "axis") -- a scalar bias.
 818 |   optional int32 axis = 1 [default = 1];
 819 | 
 820 |   // (num_axes is ignored unless just one bottom is given and the bias is
 821 |   // a learned parameter of the layer.  Otherwise, num_axes is determined by the
 822 |   // number of axes by the second bottom.)
 823 |   // The number of axes of the input (bottom[0]) covered by the bias
 824 |   // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
 825 |   // Set num_axes := 0, to add a zero-axis Blob: a scalar.
 826 |   optional int32 num_axes = 2 [default = 1];
 827 | 
 828 |   // (filler is ignored unless just one bottom is given and the bias is
 829 |   // a learned parameter of the layer.)
 830 |   // The initialization for the learned bias parameter.
 831 |   // Default is the zero (0) initialization, resulting in the BiasLayer
 832 |   // initially performing the identity operation.
 833 |   optional FillerParameter filler = 3;
 834 | }
 835 | 
 836 | message ContrastiveLossParameter {
 837 |   // margin for dissimilar pair
 838 |   optional float margin = 1 [default = 1.0];
 839 |   // The first implementation of this cost did not exactly match the cost of
 840 |   // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
 841 |   // legacy_version = false (the default) uses (margin - d)^2 as proposed in the
 842 |   // Hadsell paper. New models should probably use this version.
 843 |   // legacy_version = true uses (margin - d^2). This is kept to support /
 844 |   // reproduce existing models and results
 845 |   optional bool legacy_version = 2 [default = false];
 846 | }
 847 | 
 848 | message ConvolutionParameter {
 849 |   optional uint32 num_output = 1; // The number of outputs for the layer
 850 |   optional bool bias_term = 2 [default = true]; // whether to have bias terms
 851 | 
 852 |   // Pad, kernel size, and stride are all given as a single value for equal
 853 |   // dimensions in all spatial dimensions, or once per spatial dimension.
 854 |   repeated uint32 pad = 3; // The padding size; defaults to 0
 855 |   repeated uint32 kernel_size = 4; // The kernel size
 856 |   repeated uint32 stride = 6; // The stride; defaults to 1
 857 |   // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
 858 |   // holes. (Kernel dilation is sometimes referred to by its use in the
 859 |   // algorithme à trous from Holschneider et al. 1987.)
 860 |   repeated uint32 dilation = 18; // The dilation; defaults to 1
 861 | 
 862 |   // For 2D convolution only, the *_h and *_w versions may also be used to
 863 |   // specify both spatial dimensions.
 864 |   optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
 865 |   optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
 866 |   optional uint32 kernel_h = 11; // The kernel height (2D only)
 867 |   optional uint32 kernel_w = 12; // The kernel width (2D only)
 868 |   optional uint32 stride_h = 13; // The stride height (2D only)
 869 |   optional uint32 stride_w = 14; // The stride width (2D only)
 870 | 
 871 |   optional uint32 group = 5 [default = 1]; // The group size for group conv
 872 | 
 873 |   optional FillerParameter weight_filler = 7; // The filler for the weight
 874 |   optional FillerParameter bias_filler = 8; // The filler for the bias
 875 |   enum Engine {
 876 |     DEFAULT = 0;
 877 |     CAFFE = 1;
 878 |     CUDNN = 2;
 879 |   }
 880 |   optional Engine engine = 15 [default = DEFAULT];
 881 | 
 882 |   // The axis to interpret as "channels" when performing convolution.
 883 |   // Preceding dimensions are treated as independent inputs;
 884 |   // succeeding dimensions are treated as "spatial".
 885 |   // With (N, C, H, W) inputs, and axis == 1 (the default), we perform
 886 |   // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
 887 |   // groups g>1) filters across the spatial axes (H, W) of the input.
 888 |   // With (N, C, D, H, W) inputs, and axis == 1, we perform
 889 |   // N independent 3D convolutions, sliding (C/g)-channels
 890 |   // filters across the spatial axes (D, H, W) of the input.
 891 |   optional int32 axis = 16 [default = 1];
 892 | 
 893 |   // Whether to force use of the general ND convolution, even if a specific
 894 |   // implementation for blobs of the appropriate number of spatial dimensions
 895 |   // is available. (Currently, there is only a 2D-specific convolution
 896 |   // implementation; for input blobs with num_axes != 2, this option is
 897 |   // ignored and the ND implementation will be used.)
 898 |   optional bool force_nd_im2col = 17 [default = false];
 899 | }
 900 | 
 901 | message CropParameter {
 902 |   // To crop, elements of the first bottom are selected to fit the dimensions
 903 |   // of the second, reference bottom. The crop is configured by
 904 |   // - the crop `axis` to pick the dimensions for cropping
 905 |   // - the crop `offset` to set the shift for all/each dimension
 906 |   // to align the cropped bottom with the reference bottom.
 907 |   // All dimensions up to but excluding `axis` are preserved, while
 908 |   // the dimensions including and trailing `axis` are cropped.
 909 |   // If only one `offset` is set, then all dimensions are offset by this amount.
 910 |   // Otherwise, the number of offsets must equal the number of cropped axes to
 911 |   // shift the crop in each dimension accordingly.
 912 |   // Note: standard dimensions are N,C,H,W so the default is a spatial crop,
 913 |   // and `axis` may be negative to index from the end (e.g., -1 for the last
 914 |   // axis).
 915 |   optional int32 axis = 1 [default = 2];
 916 |   repeated uint32 offset = 2;
 917 | }
 918 | 
 919 | message DataParameter {
 920 |   enum DB {
 921 |     LEVELDB = 0;
 922 |     LMDB = 1;
 923 |   }
 924 |   // Specify the data source.
 925 |   optional string source = 1;
 926 |   // Specify the batch size.
 927 |   optional uint32 batch_size = 4;
 928 |   // The rand_skip variable is for the data layer to skip a few data points
 929 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
 930 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
 931 |   // be larger than the number of keys in the database.
 932 |   // DEPRECATED. Each solver accesses a different subset of the database.
 933 |   optional uint32 rand_skip = 7 [default = 0];
 934 |   optional DB backend = 8 [default = LEVELDB];
 935 |   // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
 936 |   // simple scaling and subtracting the data mean, if provided. Note that the
 937 |   // mean subtraction is always carried out before scaling.
 938 |   optional float scale = 2 [default = 1];
 939 |   optional string mean_file = 3;
 940 |   // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
 941 |   // crop an image.
 942 |   optional uint32 crop_size = 5 [default = 0];
 943 |   // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
 944 |   // data.
 945 |   optional bool mirror = 6 [default = false];
 946 |   // Force the encoded image to have 3 color channels
 947 |   optional bool force_encoded_color = 9 [default = false];
 948 |   // Prefetch queue (Number of batches to prefetch to host memory, increase if
 949 |   // data access bandwidth varies).
 950 |   optional uint32 prefetch = 10 [default = 4];
 951 | }
 952 | 
 953 | // Message that store parameters used by DetectionEvaluateLayer
 954 | message DetectionEvaluateParameter {
 955 |   // Number of classes that are actually predicted. Required!
 956 |   optional uint32 num_classes = 1;
 957 |   // Label id for background class. Needed for sanity check so that
 958 |   // background class is neither in the ground truth nor the detections.
 959 |   optional uint32 background_label_id = 2 [default = 0];
 960 |   // Threshold for deciding true/false positive.
 961 |   optional float overlap_threshold = 3 [default = 0.5];
 962 |   // If true, also consider difficult ground truth for evaluation.
 963 |   optional bool evaluate_difficult_gt = 4 [default = true];
 964 |   // A file which contains a list of names and sizes with same order
 965 |   // of the input DB. The file is in the following format:
 966 |   //    name height width
 967 |   //    ...
 968 |   // If provided, we will scale the prediction and ground truth NormalizedBBox
 969 |   // for evaluation.
 970 |   optional string name_size_file = 5;
 971 |   // The resize parameter used in converting NormalizedBBox to original image.
 972 |   optional ResizeParameter resize_param = 6;
 973 | }
 974 | 
 975 | message NonMaximumSuppressionParameter {
 976 |   // Threshold to be used in nms.
 977 |   optional float nms_threshold = 1 [default = 0.3];
 978 |   // Maximum number of results to be kept.
 979 |   optional int32 top_k = 2;
 980 |   // Parameter for adaptive nms.
 981 |   optional float eta = 3 [default = 1.0];
 982 | }
 983 | 
 984 | message SaveOutputParameter {
 985 |   // Output directory. If not empty, we will save the results.
 986 |   optional string output_directory = 1;
 987 |   // Output name prefix.
 988 |   optional string output_name_prefix = 2;
 989 |   // Output format.
 990 |   //    VOC - PASCAL VOC output format.
 991 |   //    COCO - MS COCO output format.
 992 |   optional string output_format = 3;
 993 |   // If you want to output results, must also provide the following two files.
 994 |   // Otherwise, we will ignore saving results.
 995 |   // label map file.
 996 |   optional string label_map_file = 4;
 997 |   // A file which contains a list of names and sizes with same order
 998 |   // of the input DB. The file is in the following format:
 999 |   //    name height width
1000 |   //    ...
1001 |   optional string name_size_file = 5;
1002 |   // Number of test images. It can be less than the lines specified in
1003 |   // name_size_file. For example, when we only want to evaluate on part
1004 |   // of the test images.
1005 |   optional uint32 num_test_image = 6;
1006 |   // The resize parameter used in saving the data.
1007 |   optional ResizeParameter resize_param = 7;
1008 | }
1009 | 
1010 | // Message that store parameters used by DetectionOutputLayer
1011 | message DetectionOutputParameter {
1012 |   // Number of classes to be predicted. Required!
1013 |   optional uint32 num_classes = 1;
1014 |   // If true, bounding box are shared among different classes.
1015 |   optional bool share_location = 2 [default = true];
1016 |   // Background label id. If there is no background class,
1017 |   // set it as -1.
1018 |   optional int32 background_label_id = 3 [default = 0];
1019 |   // Parameters used for non maximum suppression.
1020 |   optional NonMaximumSuppressionParameter nms_param = 4;
1021 |   // Parameters used for saving detection results.
1022 |   optional SaveOutputParameter save_output_param = 5;
1023 |   // Type of coding method for bbox.
1024 |   optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
1025 |   // If true, variance is encoded in target; otherwise we need to adjust the
1026 |   // predicted offset accordingly.
1027 |   optional bool variance_encoded_in_target = 8 [default = false];
1028 |   // Number of total bboxes to be kept per image after nms step.
1029 |   // -1 means keeping all bboxes after nms step.
1030 |   optional int32 keep_top_k = 7 [default = -1];
1031 |   // Only consider detections whose confidences are larger than a threshold.
1032 |   // If not provided, consider all boxes.
1033 |   optional float confidence_threshold = 9;
1034 |   // If true, visualize the detection results.
1035 |   optional bool visualize = 10 [default = false];
1036 |   // The threshold used to visualize the detection results.
1037 |   optional float visualize_threshold = 11;
1038 |   // If provided, save outputs to video file.
1039 |   optional string save_file = 12;
1040 | }
1041 | 
1042 | message DropoutParameter {
1043 |   optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
1044 | }
1045 | 
1046 | // DummyDataLayer fills any number of arbitrarily shaped blobs with random
1047 | // (or constant) data generated by "Fillers" (see "message FillerParameter").
1048 | message DummyDataParameter {
1049 |   // This layer produces N >= 1 top blobs.  DummyDataParameter must specify 1 or N
1050 |   // shape fields, and 0, 1 or N data_fillers.
1051 |   //
1052 |   // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
1053 |   // If 1 data_filler is specified, it is applied to all top blobs.  If N are
1054 |   // specified, the ith is applied to the ith top blob.
1055 |   repeated FillerParameter data_filler = 1;
1056 |   repeated BlobShape shape = 6;
1057 | 
1058 |   // 4D dimensions -- deprecated.  Use "shape" instead.
1059 |   repeated uint32 num = 2;
1060 |   repeated uint32 channels = 3;
1061 |   repeated uint32 height = 4;
1062 |   repeated uint32 width = 5;
1063 | }
1064 | 
1065 | message EltwiseParameter {
1066 |   enum EltwiseOp {
1067 |     PROD = 0;
1068 |     SUM = 1;
1069 |     MAX = 2;
1070 |   }
1071 |   optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
1072 |   repeated float coeff = 2; // blob-wise coefficient for SUM operation
1073 | 
1074 |   // Whether to use an asymptotically slower (for >2 inputs) but stabler method
1075 |   // of computing the gradient for the PROD operation. (No effect for SUM op.)
1076 |   optional bool stable_prod_grad = 3 [default = true];
1077 | }
1078 | 
1079 | // Message that stores parameters used by ELULayer
1080 | message ELUParameter {
1081 |   // Described in:
1082 |   // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
1083 |   // Deep Network Learning by Exponential Linear Units (ELUs). arXiv
1084 |   optional float alpha = 1 [default = 1];
1085 | }
1086 | 
1087 | // Message that stores parameters used by EmbedLayer
1088 | message EmbedParameter {
1089 |   optional uint32 num_output = 1; // The number of outputs for the layer
1090 |   // The input is given as integers to be interpreted as one-hot
1091 |   // vector indices with dimension num_input.  Hence num_input should be
1092 |   // 1 greater than the maximum possible input value.
1093 |   optional uint32 input_dim = 2;
1094 | 
1095 |   optional bool bias_term = 3 [default = true]; // Whether to use a bias term
1096 |   optional FillerParameter weight_filler = 4; // The filler for the weight
1097 |   optional FillerParameter bias_filler = 5; // The filler for the bias
1098 | 
1099 | }
1100 | 
1101 | // Message that stores parameters used by ExpLayer
1102 | message ExpParameter {
1103 |   // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
1104 |   // Or if base is set to the default (-1), base is set to e,
1105 |   // so y = exp(shift + scale * x).
1106 |   optional float base = 1 [default = -1.0];
1107 |   optional float scale = 2 [default = 1.0];
1108 |   optional float shift = 3 [default = 0.0];
1109 | }
1110 | 
1111 | /// Message that stores parameters used by FlattenLayer
1112 | message FlattenParameter {
1113 |   // The first axis to flatten: all preceding axes are retained in the output.
1114 |   // May be negative to index from the end (e.g., -1 for the last axis).
1115 |   optional int32 axis = 1 [default = 1];
1116 | 
1117 |   // The last axis to flatten: all following axes are retained in the output.
1118 |   // May be negative to index from the end (e.g., the default -1 for the last
1119 |   // axis).
1120 |   optional int32 end_axis = 2 [default = -1];
1121 | }
1122 | 
1123 | // Message that stores parameters used by HDF5DataLayer
1124 | message HDF5DataParameter {
1125 |   // Specify the data source.
1126 |   optional string source = 1;
1127 |   // Specify the batch size.
1128 |   optional uint32 batch_size = 2;
1129 | 
1130 |   // Specify whether to shuffle the data.
1131 |   // If shuffle == true, the ordering of the HDF5 files is shuffled,
1132 |   // and the ordering of data within any given HDF5 file is shuffled,
1133 |   // but data between different files are not interleaved; all of a file's
1134 |   // data are output (in a random order) before moving onto another file.
1135 |   optional bool shuffle = 3 [default = false];
1136 | }
1137 | 
1138 | message HDF5OutputParameter {
1139 |   optional string file_name = 1;
1140 | }
1141 | 
1142 | message HingeLossParameter {
1143 |   enum Norm {
1144 |     L1 = 1;
1145 |     L2 = 2;
1146 |   }
1147 |   // Specify the Norm to use L1 or L2
1148 |   optional Norm norm = 1 [default = L1];
1149 | }
1150 | 
1151 | message ImageDataParameter {
1152 |   // Specify the data source.
1153 |   optional string source = 1;
1154 |   // Specify the batch size.
1155 |   optional uint32 batch_size = 4 [default = 1];
1156 |   // The rand_skip variable is for the data layer to skip a few data points
1157 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
1158 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
1159 |   // be larger than the number of keys in the database.
1160 |   optional uint32 rand_skip = 7 [default = 0];
1161 |   // Whether or not ImageLayer should shuffle the list of files at every epoch.
1162 |   optional bool shuffle = 8 [default = false];
1163 |   // It will also resize images if new_height or new_width are not zero.
1164 |   optional uint32 new_height = 9 [default = 0];
1165 |   optional uint32 new_width = 10 [default = 0];
1166 |   // Specify if the images are color or gray
1167 |   optional bool is_color = 11 [default = true];
1168 |   // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
1169 |   // simple scaling and subtracting the data mean, if provided. Note that the
1170 |   // mean subtraction is always carried out before scaling.
1171 |   optional float scale = 2 [default = 1];
1172 |   optional string mean_file = 3;
1173 |   // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
1174 |   // crop an image.
1175 |   optional uint32 crop_size = 5 [default = 0];
1176 |   // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
1177 |   // data.
1178 |   optional bool mirror = 6 [default = false];
1179 |   optional string root_folder = 12 [default = ""];
1180 | }
1181 | 
1182 | message InfogainLossParameter {
1183 |   // Specify the infogain matrix source.
1184 |   optional string source = 1;
1185 | }
1186 | 
1187 | message InnerProductParameter {
1188 |   optional uint32 num_output = 1; // The number of outputs for the layer
1189 |   optional bool bias_term = 2 [default = true]; // whether to have bias terms
1190 |   optional FillerParameter weight_filler = 3; // The filler for the weight
1191 |   optional FillerParameter bias_filler = 4; // The filler for the bias
1192 | 
1193 |   // The first axis to be lumped into a single inner product computation;
1194 |   // all preceding axes are retained in the output.
1195 |   // May be negative to index from the end (e.g., -1 for the last axis).
1196 |   optional int32 axis = 5 [default = 1];
1197 |   // Specify whether to transpose the weight matrix or not.
1198 |   // If transpose == true, any operations will be performed on the transpose
1199 |   // of the weight matrix. The weight matrix itself is not going to be transposed
1200 |   // but rather the transfer flag of operations will be toggled accordingly.
1201 |   optional bool transpose = 6 [default = false];
1202 | }
1203 | 
1204 | message InputParameter {
1205 |   // This layer produces N >= 1 top blob(s) to be assigned manually.
1206 |   // Define N shapes to set a shape for each top.
1207 |   // Define 1 shape to set the same shape for every top.
1208 |   // Define no shape to defer to reshaping manually.
1209 |   repeated BlobShape shape = 1;
1210 | }
1211 | 
1212 | // Message that stores parameters used by LogLayer
1213 | message LogParameter {
1214 |   // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
1215 |   // Or if base is set to the default (-1), base is set to e,
1216 |   // so y = ln(shift + scale * x) = log_e(shift + scale * x)
1217 |   optional float base = 1 [default = -1.0];
1218 |   optional float scale = 2 [default = 1.0];
1219 |   optional float shift = 3 [default = 0.0];
1220 | }
1221 | 
1222 | // Message that stores parameters used by LRNLayer
1223 | message LRNParameter {
1224 |   optional uint32 local_size = 1 [default = 5];
1225 |   optional float alpha = 2 [default = 1.];
1226 |   optional float beta = 3 [default = 0.75];
1227 |   enum NormRegion {
1228 |     ACROSS_CHANNELS = 0;
1229 |     WITHIN_CHANNEL = 1;
1230 |   }
1231 |   optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
1232 |   optional float k = 5 [default = 1.];
1233 |   enum Engine {
1234 |     DEFAULT = 0;
1235 |     CAFFE = 1;
1236 |     CUDNN = 2;
1237 |   }
1238 |   optional Engine engine = 6 [default = DEFAULT];
1239 | }
1240 | 
1241 | message MemoryDataParameter {
1242 |   optional uint32 batch_size = 1;
1243 |   optional uint32 channels = 2;
1244 |   optional uint32 height = 3;
1245 |   optional uint32 width = 4;
1246 | }
1247 | 
1248 | // Message that store parameters used by MultiBoxLossLayer
1249 | message MultiBoxLossParameter {
1250 |   // Localization loss type.
1251 |   enum LocLossType {
1252 |     L2 = 0;
1253 |     SMOOTH_L1 = 1;
1254 |   }
1255 |   optional LocLossType loc_loss_type = 1 [default = SMOOTH_L1];
1256 |   // Confidence loss type.
1257 |   enum ConfLossType {
1258 |     SOFTMAX = 0;
1259 |     LOGISTIC = 1;
1260 |   }
1261 |   optional ConfLossType conf_loss_type = 2 [default = SOFTMAX];
1262 |   // Weight for localization loss.
1263 |   optional float loc_weight = 3 [default = 1.0];
1264 |   // Weight for center loss.
1265 |   optional float center_loss_weight = 24 [default = 1.0];
1266 |   // Number of classes to be predicted. Required!
1267 |   optional uint32 num_classes = 4;
1268 |   // If true, bounding box are shared among different classes.
1269 |   optional bool share_location = 5 [default = true];
1270 |   // Matching method during training.
1271 |   enum MatchType {
1272 |     BIPARTITE = 0;
1273 |     PER_PREDICTION = 1;
1274 |   }
1275 |   optional MatchType match_type = 6 [default = PER_PREDICTION];
1276 |   // If match_type is PER_PREDICTION, use overlap_threshold to
1277 |   // determine the extra matching bboxes.
1278 |   optional float overlap_threshold = 7 [default = 0.5];
1279 |   // Use prior for matching.
1280 |   optional bool use_prior_for_matching = 8 [default = true];
1281 |   // Background label id.
1282 |   optional uint32 background_label_id = 9 [default = 0];
1283 |   // If true, also consider difficult ground truth.
1284 |   optional bool use_difficult_gt = 10 [default = true];
1285 |   // If true, perform negative mining.
1286 |   // DEPRECATED: use mining_type instead.
1287 |   optional bool do_neg_mining = 11;
1288 |   // The negative/positive ratio.
1289 |   optional float neg_pos_ratio = 12 [default = 3.0];
1290 |   // The negative overlap upperbound for the unmatched predictions.
1291 |   optional float neg_overlap = 13 [default = 0.5];
1292 |   // Type of coding method for bbox.
1293 |   optional PriorBoxParameter.CodeType code_type = 14 [default = CORNER];
1294 |   // If true, encode the variance of prior box in the loc loss target instead of
1295 |   // in bbox.
1296 |   optional bool encode_variance_in_target = 16 [default = false];
1297 |   // If true, map all object classes to agnostic class. It is useful for learning
1298 |   // objectness detector.
1299 |   optional bool map_object_to_agnostic = 17 [default = false];
1300 |   // If true, ignore cross boundary bbox during matching.
1301 |   // Cross boundary bbox is a bbox who is outside of the image region.
1302 |   optional bool ignore_cross_boundary_bbox = 18 [default = false];
1303 |   // If true, only backpropagate on corners which are inside of the image
1304 |   // region when encode_type is CORNER or CORNER_SIZE.
1305 |   optional bool bp_inside = 19 [default = false];
1306 |   // Mining type during training.
1307 |   //   NONE : use all negatives.
1308 |   //   MAX_NEGATIVE : select negatives based on the score.
1309 |   //   HARD_EXAMPLE : select hard examples based on "Training Region-based Object Detectors with Online Hard Example Mining", Shrivastava et.al.
1310 |   enum MiningType {
1311 |     NONE = 0;
1312 |     MAX_NEGATIVE = 1;
1313 |     HARD_EXAMPLE = 2;
1314 |   }
1315 |   optional MiningType mining_type = 20 [default = MAX_NEGATIVE];
1316 |   // Parameters used for non maximum suppression durig hard example mining.
1317 |   optional NonMaximumSuppressionParameter nms_param = 21;
1318 |   optional int32 sample_size = 22 [default = 64];
1319 |   optional bool use_prior_for_nms = 23 [default = false];
1320 | }
1321 | 
1322 | 
1323 | 
1324 | message MVNParameter {
1325 |   // This parameter can be set to false to normalize mean only
1326 |   optional bool normalize_variance = 1 [default = true];
1327 | 
1328 |   // This parameter can be set to true to perform DNN-like MVN
1329 |   optional bool across_channels = 2 [default = false];
1330 | 
1331 |   // Epsilon for not dividing by zero while normalizing variance
1332 |   optional float eps = 3 [default = 1e-9];
1333 | }
1334 | 
1335 | // Message that stores parameters used by NormalizeLayer
1336 | message NormalizeParameter {
1337 |   optional bool across_spatial = 1 [default = true];
1338 |   // Initial value of scale. Default is 1.0 for all
1339 |   optional FillerParameter scale_filler = 2;
1340 |   // Whether or not scale parameters are shared across channels.
1341 |   optional bool channel_shared = 3 [default = true];
1342 |   // Epsilon for not dividing by zero while normalizing variance
1343 |   optional float eps = 4 [default = 1e-10];
1344 | }
1345 | 
1346 | message ParameterParameter {
1347 |   optional BlobShape shape = 1;
1348 | }
1349 | 
1350 | message PermuteParameter {
1351 |   // The new orders of the axes of data. Notice it should be with
1352 |   // in the same range as the input data, and it starts from 0.
1353 |   // Do not provide repeated order.
1354 |   repeated uint32 order = 1;
1355 | }
1356 | 
1357 | message PoolingParameter {
1358 |   enum PoolMethod {
1359 |     MAX = 0;
1360 |     AVE = 1;
1361 |     STOCHASTIC = 2;
1362 |   }
1363 |   optional PoolMethod pool = 1 [default = MAX]; // The pooling method
1364 |   // Pad, kernel size, and stride are all given as a single value for equal
1365 |   // dimensions in height and width or as Y, X pairs.
1366 |   optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
1367 |   optional uint32 pad_h = 9 [default = 0]; // The padding height
1368 |   optional uint32 pad_w = 10 [default = 0]; // The padding width
1369 |   optional uint32 kernel_size = 2; // The kernel size (square)
1370 |   optional uint32 kernel_h = 5; // The kernel height
1371 |   optional uint32 kernel_w = 6; // The kernel width
1372 |   optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
1373 |   optional uint32 stride_h = 7; // The stride height
1374 |   optional uint32 stride_w = 8; // The stride width
1375 |   enum Engine {
1376 |     DEFAULT = 0;
1377 |     CAFFE = 1;
1378 |     CUDNN = 2;
1379 |   }
1380 |   optional Engine engine = 11 [default = DEFAULT];
1381 |   // If global_pooling then it will pool over the size of the bottom by doing
1382 |   // kernel_h = bottom->height and kernel_w = bottom->width
1383 |   optional bool global_pooling = 12 [default = false];
1384 | }
1385 | 
1386 | message PowerParameter {
1387 |   // PowerLayer computes outputs y = (shift + scale * x) ^ power.
1388 |   optional float power = 1 [default = 1.0];
1389 |   optional float scale = 2 [default = 1.0];
1390 |   optional float shift = 3 [default = 0.0];
1391 | }
1392 | 
1393 | // Message that store parameters used by PriorBoxLayer
1394 | message PriorBoxParameter {
1395 |   // Encode/decode type.
1396 |   enum CodeType {
1397 |     CORNER = 1;
1398 |     CENTER_SIZE = 2;
1399 |     CORNER_SIZE = 3;
1400 |   }
1401 |   // Minimum box size (in pixels). Required!
1402 |   repeated float min_size = 1;
1403 |   // Maximum box size (in pixels). Required!
1404 |   repeated float max_size = 2;
1405 |   // Various of aspect ratios. Duplicate ratios will be ignored.
1406 |   // If none is provided, we use default ratio 1.
1407 |   repeated float aspect_ratio = 3;
1408 |   // If true, will flip each aspect ratio.
1409 |   // For example, if there is aspect ratio "r",
1410 |   // we will generate aspect ratio "1.0/r" as well.
1411 |   optional bool flip = 4 [default = true];
1412 |   // If true, will clip the prior so that it is within [0, 1]
1413 |   optional bool clip = 5 [default = false];
1414 |   // Variance for adjusting the prior bboxes.
1415 |   repeated float variance = 6;
1416 |   // By default, we calculate img_height, img_width, step_x, step_y based on
1417 |   // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely
1418 |   // provided.
1419 |   // Explicitly provide the img_size.
1420 |   optional uint32 img_size = 7;
1421 |   // Either img_size or img_h/img_w should be specified; not both.
1422 |   optional uint32 img_h = 8;
1423 |   optional uint32 img_w = 9;
1424 | 
1425 |   // Explicitly provide the step size.
1426 |   optional float step = 10;
1427 |   // Either step or step_h/step_w should be specified; not both.
1428 |   optional float step_h = 11;
1429 |   optional float step_w = 12;
1430 | 
1431 |   // Offset to the top left corner of each cell.
1432 |   optional float offset = 13 [default = 0.5];
1433 | }
1434 | 
1435 | message PythonParameter {
1436 |   optional string module = 1;
1437 |   optional string layer = 2;
1438 |   // This value is set to the attribute `param_str` of the `PythonLayer` object
1439 |   // in Python before calling the `setup()` method. This could be a number,
1440 |   // string, dictionary in Python dict format, JSON, etc. You may parse this
1441 |   // string in `setup` method and use it in `forward` and `backward`.
1442 |   optional string param_str = 3 [default = ''];
1443 |   // Whether this PythonLayer is shared among worker solvers during data parallelism.
1444 |   // If true, each worker solver sequentially run forward from this layer.
1445 |   // This value should be set true if you are using it as a data layer.
1446 |   optional bool share_in_parallel = 4 [default = false];
1447 | }
1448 | 
1449 | // Message that stores parameters used by RecurrentLayer
1450 | message RecurrentParameter {
1451 |   // The dimension of the output (and usually hidden state) representation --
1452 |   // must be explicitly set to non-zero.
1453 |   optional uint32 num_output = 1 [default = 0];
1454 | 
1455 |   optional FillerParameter weight_filler = 2; // The filler for the weight
1456 |   optional FillerParameter bias_filler = 3; // The filler for the bias
1457 | 
1458 |   // Whether to enable displaying debug_info in the unrolled recurrent net.
1459 |   optional bool debug_info = 4 [default = false];
1460 | 
1461 |   // Whether to add as additional inputs (bottoms) the initial hidden state
1462 |   // blobs, and add as additional outputs (tops) the final timestep hidden state
1463 |   // blobs.  The number of additional bottom/top blobs required depends on the
1464 |   // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs.
1465 |   optional bool expose_hidden = 5 [default = false];
1466 | }
1467 | 
1468 | // Message that stores parameters used by ReductionLayer
1469 | message ReductionParameter {
1470 |   enum ReductionOp {
1471 |     SUM = 1;
1472 |     ASUM = 2;
1473 |     SUMSQ = 3;
1474 |     MEAN = 4;
1475 |   }
1476 | 
1477 |   optional ReductionOp operation = 1 [default = SUM]; // reduction operation
1478 | 
1479 |   // The first axis to reduce to a scalar -- may be negative to index from the
1480 |   // end (e.g., -1 for the last axis).
1481 |   // (Currently, only reduction along ALL "tail" axes is supported; reduction
1482 |   // of axis M through N, where N < num_axes - 1, is unsupported.)
1483 |   // Suppose we have an n-axis bottom Blob with shape:
1484 |   //     (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
1485 |   // If axis == m, the output Blob will have shape
1486 |   //     (d0, d1, d2, ..., d(m-1)),
1487 |   // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
1488 |   // times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
1489 |   // If axis == 0 (the default), the output Blob always has the empty shape
1490 |   // (count 1), performing reduction across the entire input --
1491 |   // often useful for creating new loss functions.
1492 |   optional int32 axis = 2 [default = 0];
1493 | 
1494 |   optional float coeff = 3 [default = 1.0]; // coefficient for output
1495 | }
1496 | 
1497 | // Message that stores parameters used by ReLULayer
1498 | message ReLUParameter {
1499 |   // Allow non-zero slope for negative inputs to speed up optimization
1500 |   // Described in:
1501 |   // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
1502 |   // improve neural network acoustic models. In ICML Workshop on Deep Learning
1503 |   // for Audio, Speech, and Language Processing.
1504 |   optional float negative_slope = 1 [default = 0];
1505 |   enum Engine {
1506 |     DEFAULT = 0;
1507 |     CAFFE = 1;
1508 |     CUDNN = 2;
1509 |   }
1510 |   optional Engine engine = 2 [default = DEFAULT];
1511 | }
1512 | 
1513 | message ReshapeParameter {
1514 |   // Specify the output dimensions. If some of the dimensions are set to 0,
1515 |   // the corresponding dimension from the bottom layer is used (unchanged).
1516 |   // Exactly one dimension may be set to -1, in which case its value is
1517 |   // inferred from the count of the bottom blob and the remaining dimensions.
1518 |   // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
1519 |   //
1520 |   //   layer {
1521 |   //     type: "Reshape" bottom: "input" top: "output"
1522 |   //     reshape_param { ... }
1523 |   //   }
1524 |   //
1525 |   // If "input" is 2D with shape 2 x 8, then the following reshape_param
1526 |   // specifications are all equivalent, producing a 3D blob "output" with shape
1527 |   // 2 x 2 x 4:
1528 |   //
1529 |   //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
1530 |   //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
1531 |   //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
1532 |   //   reshape_param { shape { dim:  0  dim:-1  dim:  4 } }
1533 |   //
1534 |   optional BlobShape shape = 1;
1535 | 
1536 |   // axis and num_axes control the portion of the bottom blob's shape that are
1537 |   // replaced by (included in) the reshape. By default (axis == 0 and
1538 |   // num_axes == -1), the entire bottom blob shape is included in the reshape,
1539 |   // and hence the shape field must specify the entire output shape.
1540 |   //
1541 |   // axis may be non-zero to retain some portion of the beginning of the input
1542 |   // shape (and may be negative to index from the end; e.g., -1 to begin the
1543 |   // reshape after the last axis, including nothing in the reshape,
1544 |   // -2 to include only the last axis, etc.).
1545 |   //
1546 |   // For example, suppose "input" is a 2D blob with shape 2 x 8.
1547 |   // Then the following ReshapeLayer specifications are all equivalent,
1548 |   // producing a blob "output" with shape 2 x 2 x 4:
1549 |   //
1550 |   //   reshape_param { shape { dim: 2  dim: 2  dim: 4 } }
1551 |   //   reshape_param { shape { dim: 2  dim: 4 } axis:  1 }
1552 |   //   reshape_param { shape { dim: 2  dim: 4 } axis: -3 }
1553 |   //
1554 |   // num_axes specifies the extent of the reshape.
1555 |   // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
1556 |   // input axes in the range [axis, axis+num_axes].
1557 |   // num_axes may also be -1, the default, to include all remaining axes
1558 |   // (starting from axis).
1559 |   //
1560 |   // For example, suppose "input" is a 2D blob with shape 2 x 8.
1561 |   // Then the following ReshapeLayer specifications are equivalent,
1562 |   // producing a blob "output" with shape 1 x 2 x 8.
1563 |   //
1564 |   //   reshape_param { shape { dim:  1  dim: 2  dim:  8 } }
1565 |   //   reshape_param { shape { dim:  1  dim: 2  }  num_axes: 1 }
1566 |   //   reshape_param { shape { dim:  1  }  num_axes: 0 }
1567 |   //
1568 |   // On the other hand, these would produce output blob shape 2 x 1 x 8:
1569 |   //
1570 |   //   reshape_param { shape { dim: 2  dim: 1  dim: 8  }  }
1571 |   //   reshape_param { shape { dim: 1 }  axis: 1  num_axes: 0 }
1572 |   //
1573 |   optional int32 axis = 2 [default = 0];
1574 |   optional int32 num_axes = 3 [default = -1];
1575 | }
1576 | 
1577 | message ScaleParameter {
1578 |   // The first axis of bottom[0] (the first input Blob) along which to apply
1579 |   // bottom[1] (the second input Blob).  May be negative to index from the end
1580 |   // (e.g., -1 for the last axis).
1581 |   //
1582 |   // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
1583 |   // top[0] will have the same shape, and bottom[1] may have any of the
1584 |   // following shapes (for the given value of axis):
1585 |   //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
1586 |   //    (axis == 1 == -3)          3;     3x40;     3x40x60
1587 |   //    (axis == 2 == -2)                   40;       40x60
1588 |   //    (axis == 3 == -1)                                60
1589 |   // Furthermore, bottom[1] may have the empty shape (regardless of the value of
1590 |   // "axis") -- a scalar multiplier.
1591 |   optional int32 axis = 1 [default = 1];
1592 | 
1593 |   // (num_axes is ignored unless just one bottom is given and the scale is
1594 |   // a learned parameter of the layer.  Otherwise, num_axes is determined by the
1595 |   // number of axes by the second bottom.)
1596 |   // The number of axes of the input (bottom[0]) covered by the scale
1597 |   // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
1598 |   // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
1599 |   optional int32 num_axes = 2 [default = 1];
1600 | 
1601 |   // (filler is ignored unless just one bottom is given and the scale is
1602 |   // a learned parameter of the layer.)
1603 |   // The initialization for the learned scale parameter.
1604 |   // Default is the unit (1) initialization, resulting in the ScaleLayer
1605 |   // initially performing the identity operation.
1606 |   optional FillerParameter filler = 3;
1607 | 
1608 |   // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
1609 |   // may be more efficient).  Initialized with bias_filler (defaults to 0).
1610 |   optional bool bias_term = 4 [default = false];
1611 |   optional FillerParameter bias_filler = 5;
1612 | }
1613 | 
1614 | message SigmoidParameter {
1615 |   enum Engine {
1616 |     DEFAULT = 0;
1617 |     CAFFE = 1;
1618 |     CUDNN = 2;
1619 |   }
1620 |   optional Engine engine = 1 [default = DEFAULT];
1621 | }
1622 | 
1623 | message SliceParameter {
1624 |   // The axis along which to slice -- may be negative to index from the end
1625 |   // (e.g., -1 for the last axis).
1626 |   // By default, SliceLayer concatenates blobs along the "channels" axis (1).
1627 |   optional int32 axis = 3 [default = 1];
1628 |   repeated uint32 slice_point = 2;
1629 | 
1630 |   // DEPRECATED: alias for "axis" -- does not support negative indexing.
1631 |   optional uint32 slice_dim = 1 [default = 1];
1632 | }
1633 | 
1634 | // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
1635 | message SoftmaxParameter {
1636 |   enum Engine {
1637 |     DEFAULT = 0;
1638 |     CAFFE = 1;
1639 |     CUDNN = 2;
1640 |   }
1641 |   optional Engine engine = 1 [default = DEFAULT];
1642 | 
1643 |   // The axis along which to perform the softmax -- may be negative to index
1644 |   // from the end (e.g., -1 for the last axis).
1645 |   // Any other axes will be evaluated as independent softmaxes.
1646 |   optional int32 axis = 2 [default = 1];
1647 | }
1648 | 
1649 | message TanHParameter {
1650 |   enum Engine {
1651 |     DEFAULT = 0;
1652 |     CAFFE = 1;
1653 |     CUDNN = 2;
1654 |   }
1655 |   optional Engine engine = 1 [default = DEFAULT];
1656 | }
1657 | 
1658 | // Message that stores parameters used by TileLayer
1659 | message TileParameter {
1660 |   // The index of the axis to tile.
1661 |   optional int32 axis = 1 [default = 1];
1662 | 
1663 |   // The number of copies (tiles) of the blob to output.
1664 |   optional int32 tiles = 2;
1665 | }
1666 | 
1667 | // Message that stores parameters used by ThresholdLayer
1668 | message ThresholdParameter {
1669 |   optional float threshold = 1 [default = 0]; // Strictly positive values
1670 | }
1671 | 
1672 | message VideoDataParameter{
1673 |   enum VideoType {
1674 |     WEBCAM = 0;
1675 |     VIDEO = 1;
1676 |   }
1677 |   optional VideoType video_type = 1 [default = WEBCAM];
1678 |   optional int32 device_id = 2 [default = 0];
1679 |   optional string video_file = 3;
1680 |   // Number of frames to be skipped before processing a frame.
1681 |   optional uint32 skip_frames = 4 [default = 0];
1682 | }
1683 | 
1684 | message WindowDataParameter {
1685 |   // Specify the data source.
1686 |   optional string source = 1;
1687 |   // For data pre-processing, we can do simple scaling and subtracting the
1688 |   // data mean, if provided. Note that the mean subtraction is always carried
1689 |   // out before scaling.
1690 |   optional float scale = 2 [default = 1];
1691 |   optional string mean_file = 3;
1692 |   // Specify the batch size.
1693 |   optional uint32 batch_size = 4;
1694 |   // Specify if we would like to randomly crop an image.
1695 |   optional uint32 crop_size = 5 [default = 0];
1696 |   // Specify if we want to randomly mirror data.
1697 |   optional bool mirror = 6 [default = false];
1698 |   // Foreground (object) overlap threshold
1699 |   optional float fg_threshold = 7 [default = 0.5];
1700 |   // Background (non-object) overlap threshold
1701 |   optional float bg_threshold = 8 [default = 0.5];
1702 |   // Fraction of batch that should be foreground objects
1703 |   optional float fg_fraction = 9 [default = 0.25];
1704 |   // Amount of contextual padding to add around a window
1705 |   // (used only by the window_data_layer)
1706 |   optional uint32 context_pad = 10 [default = 0];
1707 |   // Mode for cropping out a detection window
1708 |   // warp: cropped window is warped to a fixed size and aspect ratio
1709 |   // square: the tightest square around the window is cropped
1710 |   optional string crop_mode = 11 [default = "warp"];
1711 |   // cache_images: will load all images in memory for faster access
1712 |   optional bool cache_images = 12 [default = false];
1713 |   // append root_folder to locate images
1714 |   optional string root_folder = 13 [default = ""];
1715 | }
1716 | 
1717 | message SPPParameter {
1718 |   enum PoolMethod {
1719 |     MAX = 0;
1720 |     AVE = 1;
1721 |     STOCHASTIC = 2;
1722 |   }
1723 |   optional uint32 pyramid_height = 1;
1724 |   optional PoolMethod pool = 2 [default = MAX]; // The pooling method
1725 |   enum Engine {
1726 |     DEFAULT = 0;
1727 |     CAFFE = 1;
1728 |     CUDNN = 2;
1729 |   }
1730 |   optional Engine engine = 6 [default = DEFAULT];
1731 | }
1732 | 
1733 | // DEPRECATED: use LayerParameter.
1734 | message V1LayerParameter {
1735 |   repeated string bottom = 2;
1736 |   repeated string top = 3;
1737 |   optional string name = 4;
1738 |   repeated NetStateRule include = 32;
1739 |   repeated NetStateRule exclude = 33;
1740 |   enum LayerType {
1741 |     NONE = 0;
1742 |     ABSVAL = 35;
1743 |     ACCURACY = 1;
1744 |     ARGMAX = 30;
1745 |     BNLL = 2;
1746 |     CONCAT = 3;
1747 |     CONTRASTIVE_LOSS = 37;
1748 |     CONVOLUTION = 4;
1749 |     DATA = 5;
1750 |     DECONVOLUTION = 39;
1751 |     DROPOUT = 6;
1752 |     DUMMY_DATA = 32;
1753 |     EUCLIDEAN_LOSS = 7;
1754 |     ELTWISE = 25;
1755 |     EXP = 38;
1756 |     FLATTEN = 8;
1757 |     HDF5_DATA = 9;
1758 |     HDF5_OUTPUT = 10;
1759 |     HINGE_LOSS = 28;
1760 |     IM2COL = 11;
1761 |     IMAGE_DATA = 12;
1762 |     INFOGAIN_LOSS = 13;
1763 |     INNER_PRODUCT = 14;
1764 |     LRN = 15;
1765 |     MEMORY_DATA = 29;
1766 |     MULTINOMIAL_LOGISTIC_LOSS = 16;
1767 |     MVN = 34;
1768 |     POOLING = 17;
1769 |     POWER = 26;
1770 |     RELU = 18;
1771 |     SIGMOID = 19;
1772 |     SIGMOID_CROSS_ENTROPY_LOSS = 27;
1773 |     SILENCE = 36;
1774 |     SOFTMAX = 20;
1775 |     SOFTMAX_LOSS = 21;
1776 |     SPLIT = 22;
1777 |     SLICE = 33;
1778 |     TANH = 23;
1779 |     WINDOW_DATA = 24;
1780 |     THRESHOLD = 31;
1781 |   }
1782 |   optional LayerType type = 5;
1783 |   repeated BlobProto blobs = 6;
1784 |   repeated string param = 1001;
1785 |   repeated DimCheckMode blob_share_mode = 1002;
1786 |   enum DimCheckMode {
1787 |     STRICT = 0;
1788 |     PERMISSIVE = 1;
1789 |   }
1790 |   repeated float blobs_lr = 7;
1791 |   repeated float weight_decay = 8;
1792 |   repeated float loss_weight = 35;
1793 |   optional AccuracyParameter accuracy_param = 27;
1794 |   optional ArgMaxParameter argmax_param = 23;
1795 |   optional ConcatParameter concat_param = 9;
1796 |   optional ContrastiveLossParameter contrastive_loss_param = 40;
1797 |   optional ConvolutionParameter convolution_param = 10;
1798 |   optional DataParameter data_param = 11;
1799 |   optional DropoutParameter dropout_param = 12;
1800 |   optional DummyDataParameter dummy_data_param = 26;
1801 |   optional EltwiseParameter eltwise_param = 24;
1802 |   optional ExpParameter exp_param = 41;
1803 |   optional HDF5DataParameter hdf5_data_param = 13;
1804 |   optional HDF5OutputParameter hdf5_output_param = 14;
1805 |   optional HingeLossParameter hinge_loss_param = 29;
1806 |   optional ImageDataParameter image_data_param = 15;
1807 |   optional InfogainLossParameter infogain_loss_param = 16;
1808 |   optional InnerProductParameter inner_product_param = 17;
1809 |   optional LRNParameter lrn_param = 18;
1810 |   optional MemoryDataParameter memory_data_param = 22;
1811 |   optional MVNParameter mvn_param = 34;
1812 |   optional PoolingParameter pooling_param = 19;
1813 |   optional PowerParameter power_param = 21;
1814 |   optional ReLUParameter relu_param = 30;
1815 |   optional SigmoidParameter sigmoid_param = 38;
1816 |   optional SoftmaxParameter softmax_param = 39;
1817 |   optional SliceParameter slice_param = 31;
1818 |   optional TanHParameter tanh_param = 37;
1819 |   optional ThresholdParameter threshold_param = 25;
1820 |   optional WindowDataParameter window_data_param = 20;
1821 |   optional TransformationParameter transform_param = 36;
1822 |   optional LossParameter loss_param = 42;
1823 |   optional V0LayerParameter layer = 1;
1824 | }
1825 | 
1826 | // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
1827 | // in Caffe.  We keep this message type around for legacy support.
1828 | message V0LayerParameter {
1829 |   optional string name = 1; // the layer name
1830 |   optional string type = 2; // the string to specify the layer type
1831 | 
1832 |   // Parameters to specify layers with inner products.
1833 |   optional uint32 num_output = 3; // The number of outputs for the layer
1834 |   optional bool biasterm = 4 [default = true]; // whether to have bias terms
1835 |   optional FillerParameter weight_filler = 5; // The filler for the weight
1836 |   optional FillerParameter bias_filler = 6; // The filler for the bias
1837 | 
1838 |   optional uint32 pad = 7 [default = 0]; // The padding size
1839 |   optional uint32 kernelsize = 8; // The kernel size
1840 |   optional uint32 group = 9 [default = 1]; // The group size for group conv
1841 |   optional uint32 stride = 10 [default = 1]; // The stride
1842 |   enum PoolMethod {
1843 |     MAX = 0;
1844 |     AVE = 1;
1845 |     STOCHASTIC = 2;
1846 |   }
1847 |   optional PoolMethod pool = 11 [default = MAX]; // The pooling method
1848 |   optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
1849 | 
1850 |   optional uint32 local_size = 13 [default = 5]; // for local response norm
1851 |   optional float alpha = 14 [default = 1.]; // for local response norm
1852 |   optional float beta = 15 [default = 0.75]; // for local response norm
1853 |   optional float k = 22 [default = 1.];
1854 | 
1855 |   // For data layers, specify the data source
1856 |   optional string source = 16;
1857 |   // For data pre-processing, we can do simple scaling and subtracting the
1858 |   // data mean, if provided. Note that the mean subtraction is always carried
1859 |   // out before scaling.
1860 |   optional float scale = 17 [default = 1];
1861 |   optional string meanfile = 18;
1862 |   // For data layers, specify the batch size.
1863 |   optional uint32 batchsize = 19;
1864 |   // For data layers, specify if we would like to randomly crop an image.
1865 |   optional uint32 cropsize = 20 [default = 0];
1866 |   // For data layers, specify if we want to randomly mirror data.
1867 |   optional bool mirror = 21 [default = false];
1868 | 
1869 |   // The blobs containing the numeric parameters of the layer
1870 |   repeated BlobProto blobs = 50;
1871 |   // The ratio that is multiplied on the global learning rate. If you want to
1872 |   // set the learning ratio for one blob, you need to set it for all blobs.
1873 |   repeated float blobs_lr = 51;
1874 |   // The weight decay that is multiplied on the global weight decay.
1875 |   repeated float weight_decay = 52;
1876 | 
1877 |   // The rand_skip variable is for the data layer to skip a few data points
1878 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
1879 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
1880 |   // be larger than the number of keys in the database.
1881 |   optional uint32 rand_skip = 53 [default = 0];
1882 | 
1883 |   // Fields related to detection (det_*)
1884 |   // foreground (object) overlap threshold
1885 |   optional float det_fg_threshold = 54 [default = 0.5];
1886 |   // background (non-object) overlap threshold
1887 |   optional float det_bg_threshold = 55 [default = 0.5];
1888 |   // Fraction of batch that should be foreground objects
1889 |   optional float det_fg_fraction = 56 [default = 0.25];
1890 | 
1891 |   // optional bool OBSOLETE_can_clobber = 57 [default = true];
1892 | 
1893 |   // Amount of contextual padding to add around a window
1894 |   // (used only by the window_data_layer)
1895 |   optional uint32 det_context_pad = 58 [default = 0];
1896 | 
1897 |   // Mode for cropping out a detection window
1898 |   // warp: cropped window is warped to a fixed size and aspect ratio
1899 |   // square: the tightest square around the window is cropped
1900 |   optional string det_crop_mode = 59 [default = "warp"];
1901 | 
1902 |   // For ReshapeLayer, one needs to specify the new dimensions.
1903 |   optional int32 new_num = 60 [default = 0];
1904 |   optional int32 new_channels = 61 [default = 0];
1905 |   optional int32 new_height = 62 [default = 0];
1906 |   optional int32 new_width = 63 [default = 0];
1907 | 
1908 |   // Whether or not ImageLayer should shuffle the list of files at every epoch.
1909 |   // It will also resize images if new_height or new_width are not zero.
1910 |   optional bool shuffle_images = 64 [default = false];
1911 | 
1912 |   // For ConcatLayer, one needs to specify the dimension for concatenation, and
1913 |   // the other dimensions must be the same for all the bottom blobs.
1914 |   // By default it will concatenate blobs along the channels dimension.
1915 |   optional uint32 concat_dim = 65 [default = 1];
1916 | 
1917 |   optional HDF5OutputParameter hdf5_output_param = 1001;
1918 | }
1919 | 
1920 | message PReLUParameter {
1921 |   // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
1922 |   // Surpassing Human-Level Performance on ImageNet Classification, 2015.
1923 | 
1924 |   // Initial value of a_i. Default is a_i=0.25 for all i.
1925 |   optional FillerParameter filler = 1;
1926 |   // Whether or not slope paramters are shared across channels.
1927 |   optional bool channel_shared = 2 [default = false];
1928 | }
1929 | 
1930 | message CenterLossParameter {
1931 |   optional uint32 num_output = 1; // The number of outputs for the layer
1932 |   optional FillerParameter center_filler = 2; // The filler for the centers
1933 |   // The first axis to be lumped into a single inner product computation;
1934 |   // all preceding axes are retained in the output.
1935 |   // May be negative to index from the end (e.g., -1 for the last axis).
1936 |   optional int32 axis = 3 [default = 1];
1937 | }
1938 | 
1939 | message FocalLossParameter {
1940 |   enum Engine {
1941 |     DEFAULT = 0;
1942 |     CAFFE = 1;
1943 |     CUDNN = 2;
1944 |   }
1945 |   optional Engine engine = 1 [default = DEFAULT];
1946 | 
1947 |   // The axis along which to perform the softmax -- may be negative to index
1948 |   // from the end (e.g., -1 for the last axis).
1949 |   // Any other axes will be evaluated as independent softmaxes.
1950 |   optional int32 axis = 2 [default = 1];
1951 |   optional float alpha = 3 [default = 0.25];
1952 |   optional float gamma = 4 [default = 2.0];
1953 | }
1954 | 
1955 | message MultiBoxCenterLossParameter{
1956 |   optional uint32 center_features = 1;
1957 | }


--------------------------------------------------------------------------------