├── README.md
├── LICENSE
├── transform_param.txt
├── ImageAugmentationParameters
├── data_layer.cpp
├── data_transformer.hpp
├── data_transformer.cpp
└── caffe.proto


/README.md:
--------------------------------------------------------------------------------
1 | # DataAugmentation
2 | Caffe Image Data Augmentation
3 | 此数据增强是针对利用原始图片进行训练（image_data_layer.cpp）的方式进行的。
4 | 实际应用时从https://github.com/BVLC/caffe 下载官方caffe然后将caffe.proto、data_transformer.cpp、data_transformer.hpp替换掉原版caffe即可。
5 | train_val.prototxt中transform_param的配置参考transform_param.txt，其中备注随机的参数推荐只对train做，不要对test\val数据做。
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 GarryLau
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/transform_param.txt:
--------------------------------------------------------------------------------
 1 |   transform_param {
 2 |     scale: 0.00390625                               #1 灰度值由[0,255]归一化到[0,1] scale是指对图像像素值的变化
 3 |     mirror: true                                    #2 镜像
 4 |     crop_size: 224                                  #3 裁剪
 5 |     mean_file: "train_mean.binaryproto"             #4 均值文件，不能与#5同时用
 6 |     #mean_value: 104                                #5 均值，不能与#4同时用
 7 |     #mean_value: 117
 8 |     #mean_value: 123
 9 |     #force_color: false                             #6 强制为彩色图
10 |     #force_gray: false                              #7 强制为灰度图
11 | 
12 |     # Begin Added by garylau for Image augmentation
13 |     apply_probability: 0.5                          #8 是否做smooth、brightness、color_shift是概率性事件
14 |     smooth_filtering: true                          #9 是否模糊处理                                             （随机）
15 |     max_smooth: 6                                   #10 模糊处理参数                                            （随机）
16 |     max_rotation_angle: 30                          #11 旋转角度，（也用作仿射变换的旋转角度）                    （随机）
17 |     contrast_brightness_adjustment: true            #12 是否调节contrast、brightness                            （随机）
18 |     min_contrast: 0.8                               #13 调节contrast、brightness用                              （随机）
19 |     max_contrast: 1.2                               #14 调节contrast、brightness用                              （随机）
20 |     max_brightness_shift: 20                        #15 调节contrast、brightness用                              （随机）
21 |     max_color_shift: 20                             #16 颜色偏移                                                （随机）
22 |     min_side_min: 224                               #17 对图像做缩放并随机裁剪时用，与#18同时用，不能与#19同时用   （随机）
23 |     min_side_max: 288                               #18 对图像做缩放并随机裁剪时用，与#17同时用，不能与#19同时用   （随机）
24 |     #min_side: 200                                  #19 将图像裁剪到min_side大小，不能与#17，#18同时用           （随机）
25 |     affine_min_scale: 0.8                           #20 仿射变换缩放尺度最小值                                  （随机）
26 |     affine_max_scale: 1.2                           #21 仿射变换缩放尺度最大值                                  （随机）
27 |     random_erasing_low: 0.02                        #22 随机擦除概率最小值                                      （随机）
28 |     random_erasing_high: 0.2                        #23 随机擦除概率最大值                                      （随机）
29 |     random_erasing_ratio: 0.3                       #24 用来确定随机擦除面积的比例的参数                         （随机）
30 |     debug_params: false                             #22 调试看数据增加是否正确时用
31 |     # End Added by garylau for Image augmentation
32 |   }
33 | 


--------------------------------------------------------------------------------
/ImageAugmentationParameters:
--------------------------------------------------------------------------------
 1 | 针对利用原始图像作为数据源进行训练的训练方式所做的数据增强，主要是对cv::Mat做几何、颜色变换，在train_val.prototxt中在transform_param字段中对这些参数进行配置。
 2 | 目前transform_param中的参数总共有25个，其中#1—#7是原版caffe自带的，#8—#25是开发的（#25用作调试用，不用做对图像的变换）。
 3 | 
 4 | 25个参数列表：  
 5 | transform_param {
 6 |     scale: 0.00390625                               #1 灰度值由[0,255]归一化到[0,1] scale是指对图像像素值的变换
 7 |     mirror: true                                    #2 镜像
 8 |     crop_size: 224                                  #3 裁剪
 9 |     mean_file: "train_mean.binaryproto"             #4 均值文件，不能与#5同时用
10 |     #mean_value: 104                                #5 均值，不能与#4同时用
11 |     #mean_value: 117
12 |     #mean_value: 123
13 |     #force_color: false                             #6 强制为彩色图
14 |     #force_gray: false                              #7 强制为灰度图
15 | 
16 |     # Begin Added by liugan5 for Image augmentation
17 |     apply_probability: 0.5                          #8 是否做#9—#24的变换是概率性事件
18 |     smooth_filtering: true                          #9 是否模糊处理                                             （随机）
19 |     max_smooth: 6                                   #10 模糊处理参数                                            （随机）
20 |     max_rotation_angle: 30                          #11 旋转角度，（也用作仿射变换的旋转角度）                    （随机）
21 |     contrast_brightness_adjustment: true            #12 是否调节contrast、brightness                            （随机）
22 |     min_contrast: 0.8                               #13 调节contrast、brightness用                              （随机）
23 |     max_contrast: 1.2                               #14 调节contrast、brightness用                              （随机）
24 |     max_brightness_shift: 20                        #15 调节contrast、brightness用                              （随机）
25 |     max_color_shift: 20                             #16 颜色偏移                                                （随机）
26 |     min_side_min: 224                               #17 对图像做缩放并随机裁剪时用，与#18同时用，不能与#19同时用    （随机）
27 |     min_side_max: 288                               #18 对图像做缩放并随机裁剪时用，与#17同时用，不能与#19同时用    （随机）
28 |     #min_side: 200                                  #19 将图像裁剪到min_side大小，不能与#17，#18同时用            （随机）
29 |     affine_min_scale: 0.8                           #20 仿射变换缩放尺度最小值                                   （随机）
30 |     affine_max_scale: 1.2                           #21 仿射变换缩放尺度最大值                                   （随机）
31 |     random_erasing_low: 0.02                        #22 随机擦除概率最小值                                       （随机）
32 |     random_erasing_high: 0.2                        #23 随机擦除概率最大值                                       （随机）
33 |     random_erasing_ratio: 0.3                       #24 用来确定随机擦除面积的比例的参数                          （随机）
34 |     debug_params: false                             #25 调试看数据增加是否正确时用
35 |     # End Added by liugan5 for Image augmentation
36 |   }
37 | 
38 | #8—#25参数说明：
39 | 概率：
40 | #8：apply_probability，用来和代码内部每次训练随机产生的概率相比较，用来确定该次训练是否需要对图像进行#9—#24的变换
41 | 
42 | 模糊处理：同时满足smooth_filtering: true和max_smooth > 6
43 | #9：smooth_filtering: 是否要做模糊处理
44 | #10：max_smooth，模糊处理参数
45 | 
46 | 旋转操作（不会与仿射变换同时进行）：满足max_rotation_angle > 0
47 | #11：max_rotation_angle，旋转角度，（也用作仿射变换的旋转角度）
48 | 
49 | 亮度、对比度操作：同时满足contrast_brightness_adjustment: true、min_contrast > 0、
50 |                                                          max_contrast >= min_contrast、 max_brightness_shift >= 0
51 | #12：contrast_brightness_adjustment，是否要做亮度、对比度变换
52 | #13：min_contrast，亮度、对比度参数                               
53 | #14：max_contrast，亮度、对比度参数
54 | #15：max_brightness_shift，亮度、对比度参数
55 | 
56 | 颜色偏移：满足max_color_shift > 0
57 | #16：max_color_shift，颜色偏移参数
58 | 
59 | 裁剪操作：两种方式，但是两种方式的参数不可同时配置，类似#4与#5的关系。两种方式的差别，第一种方式图像每次裁剪之后的大小是随机的，第二种方式图像每次被裁剪到固定大小（且设置的固定大小要比输入图像小，否则会出错）。
60 | 方式一，同时满足min_side_min > 0、min_side_max > min_side_min
61 | 方式二，满足min_side > 0
62 | #17：min_side_min，图像裁剪参数，裁剪之后的图像比min_side_min大，比min_side_max小
63 | #18：min_side_max
64 | #19：min_side，图像裁剪参数，裁剪之后的大小为min_side
65 | 
66 | 仿射变换（不会与旋转操作同时进行）：同时满足affine_min_scale > 0、affine_max_scale > affine_min_scale
67 | #20：affine_min_scale，仿射变换中的缩放参数，实际缩放的尺度比ffine_min_scale大，比affine_max_scale小
68 | #21：affine_max_scale
69 | 
70 | 随机擦除：同时满足random_erasing_ratio > 0、random_erasing_high > random_erasing_low、param_.random_erasing_low > 0
71 | #22：random_erasing_low，随机擦除的面积比例，实际擦除的面积比例比random_erasing_low大，比random_erasing_high小
72 | #23：random_erasing_high
73 | #24：random_erasing_ratio，用来确定随机擦除的矩形区域的宽、高
74 | 
75 | 调试：满足debug_params: true
76 | #25：debug_params，调试看图像增强参数
77 | 


--------------------------------------------------------------------------------
/data_layer.cpp:
--------------------------------------------------------------------------------
  1 | #ifdef USE_OPENCV
  2 | #include <opencv2/core/core.hpp>
  3 | #endif  // USE_OPENCV
  4 | #include <stdint.h>
  5 | 
  6 | #include <vector>
  7 | 
  8 | #include "caffe/data_transformer.hpp"
  9 | #include "caffe/layers/data_layer.hpp"
 10 | #include "caffe/util/benchmark.hpp"
 11 | 
 12 | namespace caffe {
 13 | 
 14 | template <typename Dtype>
 15 | DataLayer<Dtype>::DataLayer(const LayerParameter& param)
 16 |   : BasePrefetchingDataLayer<Dtype>(param),
 17 |     reader_(param) {
 18 | }
 19 | 
 20 | template <typename Dtype>
 21 | DataLayer<Dtype>::~DataLayer() {
 22 |   this->StopInternalThread();
 23 | }
 24 | 
 25 | template <typename Dtype>
 26 | void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
 27 |       const vector<Blob<Dtype>*>& top) {
 28 |   const int batch_size = this->layer_param_.data_param().batch_size();
 29 |   // Read a data point, and use it to initialize the top blob.
 30 |   Datum& datum = *(reader_.full().peek());
 31 | 
 32 |   // Use data_transformer to infer the expected blob shape from datum.
 33 |   vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
 34 |   this->transformed_data_.Reshape(top_shape);
 35 |   // Reshape top[0] and prefetch_data according to the batch_size.
 36 |   top_shape[0] = batch_size;
 37 |   top[0]->Reshape(top_shape);
 38 |   for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
 39 |     this->prefetch_[i].data_.Reshape(top_shape);
 40 |   }
 41 |   LOG(INFO) << "output data size: " << top[0]->num() << ","
 42 |       << top[0]->channels() << "," << top[0]->height() << ","
 43 |       << top[0]->width();
 44 |   // label
 45 |   if (this->output_labels_) {
 46 |     vector<int> label_shape(1, batch_size);
 47 |     top[1]->Reshape(label_shape);
 48 |     for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
 49 |       this->prefetch_[i].label_.Reshape(label_shape);
 50 |     }
 51 |   }
 52 | }
 53 | 
 54 | // This function is called on prefetch thread
 55 | template<typename Dtype>
 56 | void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
 57 |   CPUTimer batch_timer;
 58 |   batch_timer.Start();
 59 |   double read_time = 0;
 60 |   double trans_time = 0;
 61 |   CPUTimer timer;
 62 |   CHECK(batch->data_.count());
 63 |   CHECK(this->transformed_data_.count());
 64 | 
 65 |   // Reshape according to the first datum of each batch
 66 |   // on single input batches allows for inputs of varying dimension.
 67 |   const int batch_size = this->layer_param_.data_param().batch_size();
 68 |   Datum& datum = *(reader_.full().peek());
 69 |   // Use data_transformer to infer the expected blob shape from datum.
 70 |   vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
 71 |   this->transformed_data_.Reshape(top_shape);
 72 |   // Reshape batch according to the batch_size.
 73 |   top_shape[0] = batch_size;
 74 |   batch->data_.Reshape(top_shape);
 75 | 
 76 |   Dtype* top_data = batch->data_.mutable_cpu_data();
 77 |   Dtype* top_label = NULL;  // suppress warnings about uninitialized variables
 78 | 
 79 |   if (this->output_labels_) {
 80 |     top_label = batch->label_.mutable_cpu_data();
 81 |   }
 82 |   for (int item_id = 0; item_id < batch_size; ++item_id) {
 83 |     timer.Start();
 84 |     // get a datum
 85 |     Datum& datum = *(reader_.full().pop("Waiting for data"));
 86 |     read_time += timer.MicroSeconds();
 87 |     timer.Start();
 88 | 
 89 | 	/* Begin Added by garylau, for lmdb data augmentation, 2017.12.11 */
 90 | 	int imgH = datum.height();
 91 | 	int imgW = datum.width();
 92 | 	cv::Mat cv_img(imgH, imgW, CV_8UC3);
 93 | 	this->data_transformer_->DatumToMat(&datum, cv_img);
 94 | 	this->data_transformer_->CVMatTransform(cv_img);
 95 | 	this->data_transformer_->MatToDatum(cv_img, &datum);
 96 | 	/* End Added by garylau, for lmdb data augmentation, 2017.12.11 */
 97 | 
 98 |     // Apply data transformations (mirror, scale, crop...)
 99 |     int offset = batch->data_.offset(item_id);
100 |     this->transformed_data_.set_cpu_data(top_data + offset);
101 |     this->data_transformer_->Transform(datum, &(this->transformed_data_));
102 |     // Copy label.
103 |     if (this->output_labels_) {
104 |       top_label[item_id] = datum.label();
105 |     }
106 |     trans_time += timer.MicroSeconds();
107 | 
108 |     reader_.free().push(const_cast<Datum*>(&datum));
109 |   }
110 |   timer.Stop();
111 |   batch_timer.Stop();
112 |   DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
113 |   DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
114 |   DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
115 | }
116 | 
117 | INSTANTIATE_CLASS(DataLayer);
118 | REGISTER_LAYER_CLASS(Data);
119 | 
120 | }  // namespace caffe
121 | 
122 | 


--------------------------------------------------------------------------------
/data_transformer.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef CAFFE_DATA_TRANSFORMER_HPP
  2 | #define CAFFE_DATA_TRANSFORMER_HPP
  3 | 
  4 | #include <vector>
  5 | 
  6 | #include "caffe/blob.hpp"
  7 | #include "caffe/common.hpp"
  8 | #include "caffe/proto/caffe.pb.h"
  9 | 
 10 | namespace caffe {
 11 | 
 12 | /**
 13 |  * @brief Applies common transformations to the input data, such as
 14 |  * scaling, mirroring, substracting the image mean...
 15 |  */
 16 | template <typename Dtype>
 17 | class DataTransformer {
 18 |  public:
 19 |   explicit DataTransformer(const TransformationParameter& param, Phase phase);
 20 |   virtual ~DataTransformer() {}
 21 | 
 22 |   /**
 23 |    * @brief Initialize the Random number generations if needed by the
 24 |    *    transformation.
 25 |    */
 26 |   void InitRand();
 27 | 
 28 |   /**
 29 |    * @brief Applies the transformation defined in the data layer's
 30 |    * transform_param block to the data.
 31 |    *
 32 |    * @param datum
 33 |    *    Datum containing the data to be transformed.
 34 |    * @param transformed_blob
 35 |    *    This is destination blob. It can be part of top blob's data if
 36 |    *    set_cpu_data() is used. See data_layer.cpp for an example.
 37 |    */
 38 |   void Transform(const Datum& datum, Blob<Dtype>* transformed_blob);
 39 | 
 40 |   /**
 41 |    * @brief Applies the transformation defined in the data layer's
 42 |    * transform_param block to a vector of Datum.
 43 |    *
 44 |    * @param datum_vector
 45 |    *    A vector of Datum containing the data to be transformed.
 46 |    * @param transformed_blob
 47 |    *    This is destination blob. It can be part of top blob's data if
 48 |    *    set_cpu_data() is used. See memory_layer.cpp for an example.
 49 |    */
 50 |   void Transform(const vector<Datum> & datum_vector,
 51 |                 Blob<Dtype>* transformed_blob);
 52 | 
 53 | #ifdef USE_OPENCV
 54 |   /**
 55 |    * @brief Applies the transformation defined in the data layer's
 56 |    * transform_param block to a vector of Mat.
 57 |    *
 58 |    * @param mat_vector
 59 |    *    A vector of Mat containing the data to be transformed.
 60 |    * @param transformed_blob
 61 |    *    This is destination blob. It can be part of top blob's data if
 62 |    *    set_cpu_data() is used. See memory_layer.cpp for an example.
 63 |    */
 64 |   void Transform(const vector<cv::Mat> & mat_vector,
 65 |                 Blob<Dtype>* transformed_blob);
 66 | 
 67 |   /**
 68 |    * @brief Applies the transformation defined in the data layer's
 69 |    * transform_param block to a cv::Mat
 70 |    *
 71 |    * @param cv_img
 72 |    *    cv::Mat containing the data to be transformed.
 73 |    * @param transformed_blob
 74 |    *    This is destination blob. It can be part of top blob's data if
 75 |    *    set_cpu_data() is used. See image_data_layer.cpp for an example.
 76 |    */
 77 |   void Transform(const cv::Mat& cv_img, Blob<Dtype>* transformed_blob);
 78 | #endif  // USE_OPENCV
 79 | 
 80 |   /**
 81 |    * @brief Applies the same transformation defined in the data layer's
 82 |    * transform_param block to all the num images in a input_blob.
 83 |    *
 84 |    * @param input_blob
 85 |    *    A Blob containing the data to be transformed. It applies the same
 86 |    *    transformation to all the num images in the blob.
 87 |    * @param transformed_blob
 88 |    *    This is destination blob, it will contain as many images as the
 89 |    *    input blob. It can be part of top blob's data.
 90 |    */
 91 |   void Transform(Blob<Dtype>* input_blob, Blob<Dtype>* transformed_blob);
 92 | 
 93 |   /**
 94 |    * @brief Infers the shape of transformed_blob will have when
 95 |    *    the transformation is applied to the data.
 96 |    *
 97 |    * @param datum
 98 |    *    Datum containing the data to be transformed.
 99 |    */
100 |   vector<int> InferBlobShape(const Datum& datum);
101 |   /**
102 |    * @brief Infers the shape of transformed_blob will have when
103 |    *    the transformation is applied to the data.
104 |    *    It uses the first element to infer the shape of the blob.
105 |    *
106 |    * @param datum_vector
107 |    *    A vector of Datum containing the data to be transformed.
108 |    */
109 |   vector<int> InferBlobShape(const vector<Datum> & datum_vector);
110 |   /**
111 |    * @brief Infers the shape of transformed_blob will have when
112 |    *    the transformation is applied to the data.
113 |    *    It uses the first element to infer the shape of the blob.
114 |    *
115 |    * @param mat_vector
116 |    *    A vector of Mat containing the data to be transformed.
117 |    */
118 | #ifdef USE_OPENCV
119 |   vector<int> InferBlobShape(const vector<cv::Mat> & mat_vector);
120 |   /**
121 |    * @brief Infers the shape of transformed_blob will have when
122 |    *    the transformation is applied to the data.
123 |    *
124 |    * @param cv_img
125 |    *    cv::Mat containing the data to be transformed.
126 |    */
127 |   vector<int> InferBlobShape(const cv::Mat& cv_img);
128 | #endif  // USE_OPENCV
129 | 
130 |  protected:
131 |    /**
132 |    * @brief Generates a random integer from Uniform({0, 1, ..., n-1}).
133 |    *
134 |    * @param n
135 |    *    The upperbound (exclusive) value of the random number.
136 |    * @return
137 |    *    A uniformly random integer value from ({0, 1, ..., n-1}).
138 |    */
139 |   virtual int Rand(int n);
140 | 
141 |   void Transform(const Datum& datum, Dtype* transformed_data);
142 |   // Tranformation parameters
143 |   TransformationParameter param_;
144 | 
145 |   /* Begin Added by garylau, for data augmentation, 2017.11.29 */
146 |   void random_crop(cv::Mat& cv_img, int crop_size);
147 |   /* End Added by garylau, for data augmentation, 2017.11.29 */
148 | 
149 |   shared_ptr<Caffe::RNG> rng_;
150 |   Phase phase_;
151 |   Blob<Dtype> data_mean_;
152 |   vector<Dtype> mean_values_;
153 | 
154 |   /* Begin Added by garylau, for lmdb data augmentation, 2017.12.11 */
155 |  public:
156 |   void DatumToMat(const Datum* datum, cv::Mat& cv_img);
157 |   void MatToDatum(const cv::Mat& cv_img, Datum* datum);
158 |   void CVMatTransform(cv::Mat& cv_img);
159 |   /* End Added by garylau, for lmdb data augmentation, 2017.12.11 */
160 | };
161 | 
162 | }  // namespace caffe
163 | 
164 | #endif  // CAFFE_DATA_TRANSFORMER_HPP_
165 | 


--------------------------------------------------------------------------------
/data_transformer.cpp:
--------------------------------------------------------------------------------
   1 | #ifdef USE_OPENCV
   2 | #include <opencv2/core/core.hpp>
   3 | /* Begin Added by garylau, for data augmentation, 2017.11.22 */
   4 | #include <opencv2/core/core.hpp>
   5 | #include <opencv2/core/mat.hpp>
   6 | #include <opencv2/highgui/highgui.hpp>
   7 | #include <opencv2/imgproc/imgproc.hpp>
   8 | #include <opencv2/opencv.hpp>
   9 | /* End Added by garylau, for data augmentation, 2017.11.22 */
  10 | #endif  // USE_OPENCV
  11 | 
  12 | #include <string>
  13 | #include <vector>
  14 | 
  15 | #include "caffe/data_transformer.hpp"
  16 | #include "caffe/util/io.hpp"
  17 | #include "caffe/util/math_functions.hpp"
  18 | #include "caffe/util/rng.hpp"
  19 | /* Begin Added by garylau, for data augmentation, 2017.11.22 */
  20 | #include <math.h>
  21 | #define PI 3.14159265358979323846
  22 | /* End Added by garylau, for data augmentation, 2017.11.22 */
  23 | 
  24 | namespace caffe {
  25 | 
  26 | template<typename Dtype>
  27 | DataTransformer<Dtype>::DataTransformer(const TransformationParameter& param,
  28 |     Phase phase)
  29 |     : param_(param), phase_(phase) {
  30 |   // check if we want to use mean_file
  31 |   if (param_.has_mean_file()) {
  32 |     CHECK_EQ(param_.mean_value_size(), 0) <<
  33 |       "Cannot specify mean_file and mean_value at the same time";
  34 |     const string& mean_file = param.mean_file();
  35 |     if (Caffe::root_solver()) {
  36 |       LOG(INFO) << "Loading mean file from: " << mean_file;
  37 |     }
  38 |     BlobProto blob_proto;
  39 |     ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
  40 |     data_mean_.FromProto(blob_proto);
  41 |   }
  42 |   // check if we want to use mean_value
  43 |   if (param_.mean_value_size() > 0) {
  44 |     CHECK(param_.has_mean_file() == false) <<
  45 |       "Cannot specify mean_file and mean_value at the same time";
  46 |     for (int c = 0; c < param_.mean_value_size(); ++c) {
  47 |       mean_values_.push_back(param_.mean_value(c));
  48 |     }
  49 |   }
  50 | 		/* Begin Added by garylau, for data augmentation, 2017.11.30 */
  51 | 		// check if we want to use min_side
  52 | 		if (param_.min_side())
  53 | 		{
  54 | 			CHECK_EQ(param_.min_side_min(), 0) << "Cannot specify min_side and min_side_min & min_side_max at the same time";
  55 | 			CHECK_EQ(param_.min_side_max(), 0) << "Cannot specify min_side and min_side_min & min_side_max at the same time";
  56 | 		}
  57 | 		// check if we want to use min_side_min & min_side_max
  58 | 		if (param_.min_side_min() || param_.min_side_max())
  59 | 		{
  60 | 			CHECK_EQ(param_.min_side(), 0) << "Cannot specify min_side_min & min_side_max and min_side at the same time";
  61 | 			CHECK_GE(param_.min_side_max(), param_.min_side_min()) << "min_side_max must be greater than (or equals to) min_side_min";
  62 | 		}
  63 | 		/* End Added by garylau, for data augmentation, 2017.11.30 */
  64 | 	}
  65 | 
  66 | 	/* 被读取lmdb图片的Transform调用的Transform, garylau */
  67 | template<typename Dtype>
  68 | void DataTransformer<Dtype>::Transform(const Datum& datum,
  69 |                                        Dtype* transformed_data) {
  70 |   const string& data = datum.data();
  71 |   const int datum_channels = datum.channels();
  72 |   const int datum_height = datum.height();
  73 |   const int datum_width = datum.width();
  74 | 
  75 |   const int crop_size = param_.crop_size();
  76 |   const Dtype scale = param_.scale();
  77 |   const bool do_mirror = param_.mirror() && Rand(2);
  78 |   const bool has_mean_file = param_.has_mean_file();
  79 |   const bool has_uint8 = data.size() > 0;
  80 |   const bool has_mean_values = mean_values_.size() > 0;
  81 | 
  82 |   CHECK_GT(datum_channels, 0);
  83 |   CHECK_GE(datum_height, crop_size);
  84 |   CHECK_GE(datum_width, crop_size);
  85 | 
  86 |   Dtype* mean = NULL;
  87 |   if (has_mean_file) {
  88 |     CHECK_EQ(datum_channels, data_mean_.channels());
  89 |     CHECK_EQ(datum_height, data_mean_.height());
  90 |     CHECK_EQ(datum_width, data_mean_.width());
  91 |     mean = data_mean_.mutable_cpu_data();
  92 |   }
  93 |   if (has_mean_values) {
  94 |     CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) <<
  95 |      "Specify either 1 mean_value or as many as channels: " << datum_channels;
  96 |     if (datum_channels > 1 && mean_values_.size() == 1) {
  97 |       // Replicate the mean_value for simplicity
  98 |       for (int c = 1; c < datum_channels; ++c) {
  99 |         mean_values_.push_back(mean_values_[0]);
 100 |       }
 101 |     }
 102 |   }
 103 | 
 104 |   int height = datum_height;
 105 |   int width = datum_width;
 106 | 
 107 |   int h_off = 0;
 108 |   int w_off = 0;
 109 |   if (crop_size) {
 110 |     height = crop_size;
 111 |     width = crop_size;
 112 |     // We only do random crop when we do training.
 113 |     if (phase_ == TRAIN) {
 114 |       h_off = Rand(datum_height - crop_size + 1);
 115 |       w_off = Rand(datum_width - crop_size + 1);
 116 |     } else {
 117 |       h_off = (datum_height - crop_size) / 2;
 118 |       w_off = (datum_width - crop_size) / 2;
 119 |     }
 120 |   }
 121 | 
 122 |   Dtype datum_element;
 123 |   int top_index, data_index;
 124 |   for (int c = 0; c < datum_channels; ++c) {
 125 |     for (int h = 0; h < height; ++h) {
 126 |       for (int w = 0; w < width; ++w) {
 127 |         data_index = (c * datum_height + h_off + h) * datum_width + w_off + w;
 128 |         if (do_mirror) {
 129 |           top_index = (c * height + h) * width + (width - 1 - w);
 130 |         } else {
 131 |           top_index = (c * height + h) * width + w;
 132 |         }
 133 |         if (has_uint8) {
 134 |           datum_element =
 135 |             static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
 136 |         } else {
 137 |           datum_element = datum.float_data(data_index);
 138 |         }
 139 |         if (has_mean_file) {
 140 |           transformed_data[top_index] =
 141 |             (datum_element - mean[data_index]) * scale;
 142 |         } else {
 143 |           if (has_mean_values) {
 144 |             transformed_data[top_index] =
 145 |               (datum_element - mean_values_[c]) * scale;
 146 |           } else {
 147 |             transformed_data[top_index] = datum_element * scale;
 148 |           }
 149 |         }
 150 |       }
 151 |     }
 152 |   }
 153 | }
 154 | 
 155 | 	/* 读取lmdb图片所用到的Transform, garylau */
 156 | template<typename Dtype>
 157 | void DataTransformer<Dtype>::Transform(const Datum& datum,
 158 |                                        Blob<Dtype>* transformed_blob) {
 159 |   // If datum is encoded, decoded and transform the cv::image.
 160 |   if (datum.encoded()) {
 161 | #ifdef USE_OPENCV
 162 |     CHECK(!(param_.force_color() && param_.force_gray()))
 163 |         << "cannot set both force_color and force_gray";
 164 |     cv::Mat cv_img;
 165 |     if (param_.force_color() || param_.force_gray()) {
 166 |     // If force_color then decode in color otherwise decode in gray.
 167 |       cv_img = DecodeDatumToCVMat(datum, param_.force_color());
 168 |     } else {
 169 |       cv_img = DecodeDatumToCVMatNative(datum);
 170 |     }
 171 |     // Transform the cv::image into blob.
 172 |     return Transform(cv_img, transformed_blob);
 173 | #else
 174 |     LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV.";
 175 | #endif  // USE_OPENCV
 176 |   } else {
 177 |     if (param_.force_color() || param_.force_gray()) {
 178 |       LOG(ERROR) << "force_color and force_gray only for encoded datum";
 179 |     }
 180 |   }
 181 | 
 182 |   const int crop_size = param_.crop_size();
 183 |   const int datum_channels = datum.channels();
 184 |   const int datum_height = datum.height();
 185 |   const int datum_width = datum.width();
 186 | 
 187 |   // Check dimensions.
 188 |   const int channels = transformed_blob->channels();
 189 |   const int height = transformed_blob->height();
 190 |   const int width = transformed_blob->width();
 191 |   const int num = transformed_blob->num();
 192 | 
 193 |   CHECK_EQ(channels, datum_channels);
 194 |   CHECK_LE(height, datum_height);
 195 |   CHECK_LE(width, datum_width);
 196 |   CHECK_GE(num, 1);
 197 | 
 198 |   if (crop_size) {
 199 |     CHECK_EQ(crop_size, height);
 200 |     CHECK_EQ(crop_size, width);
 201 |   } else {
 202 |     CHECK_EQ(datum_height, height);
 203 |     CHECK_EQ(datum_width, width);
 204 |   }
 205 | 
 206 |   Dtype* transformed_data = transformed_blob->mutable_cpu_data();
 207 |   Transform(datum, transformed_data);
 208 | }
 209 | 
 210 | template<typename Dtype>
 211 | void DataTransformer<Dtype>::Transform(const vector<Datum> & datum_vector,
 212 |                                        Blob<Dtype>* transformed_blob) {
 213 |   const int datum_num = datum_vector.size();
 214 |   const int num = transformed_blob->num();
 215 |   const int channels = transformed_blob->channels();
 216 |   const int height = transformed_blob->height();
 217 |   const int width = transformed_blob->width();
 218 | 
 219 |   CHECK_GT(datum_num, 0) << "There is no datum to add";
 220 |   CHECK_LE(datum_num, num) <<
 221 |     "The size of datum_vector must be no greater than transformed_blob->num()";
 222 |   Blob<Dtype> uni_blob(1, channels, height, width);
 223 |   for (int item_id = 0; item_id < datum_num; ++item_id) {
 224 |     int offset = transformed_blob->offset(item_id);
 225 |     uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset);
 226 |     Transform(datum_vector[item_id], &uni_blob);
 227 |   }
 228 | }
 229 | 
 230 | #ifdef USE_OPENCV
 231 | template<typename Dtype>
 232 | void DataTransformer<Dtype>::Transform(const vector<cv::Mat> & mat_vector,
 233 |                                        Blob<Dtype>* transformed_blob) {
 234 |   const int mat_num = mat_vector.size();
 235 |   const int num = transformed_blob->num();
 236 |   const int channels = transformed_blob->channels();
 237 |   const int height = transformed_blob->height();
 238 |   const int width = transformed_blob->width();
 239 | 
 240 |   CHECK_GT(mat_num, 0) << "There is no MAT to add";
 241 |   CHECK_EQ(mat_num, num) <<
 242 |     "The size of mat_vector must be equals to transformed_blob->num()";
 243 |   Blob<Dtype> uni_blob(1, channels, height, width);
 244 |   for (int item_id = 0; item_id < mat_num; ++item_id) {
 245 |     int offset = transformed_blob->offset(item_id);
 246 |     uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset);
 247 |     Transform(mat_vector[item_id], &uni_blob);
 248 |   }
 249 | }
 250 | 
 251 |     /* Begin Added by garylau, for data augmentation, 2017.11.22 */
 252 | 	void rotate(cv::Mat& src, int angle)
 253 | 	{
 254 | 		// get rotation matrix for rotating the image around its center
 255 | 		cv::Point2f center(src.cols / 2.0, src.rows / 2.0);
 256 | 		cv::Mat rot = cv::getRotationMatrix2D(center, angle, 1.0);
 257 | 		// determine bounding rectangle
 258 | 		cv::Rect bbox = cv::RotatedRect(center, src.size(), angle).boundingRect();
 259 | 		// adjust transformation matrix
 260 | 		rot.at<double>(0, 2) += bbox.width / 2.0 - center.x;
 261 | 		rot.at<double>(1, 2) += bbox.height / 2.0 - center.y;
 262 | 		cv::warpAffine(src, src, rot, bbox.size());
 263 | 	}
 264 | 
 265 | 	template <typename Dtype>
 266 | 	void DataTransformer<Dtype>::random_crop(cv::Mat& cv_img, int crop_size)
 267 | 	{
 268 | 		int h_off = 0;
 269 | 		int w_off = 0;
 270 | 		const int img_height = cv_img.rows;
 271 | 		const int img_width = cv_img.cols;
 272 | 
 273 | 		h_off = Rand(img_height - crop_size + 1);
 274 | 		w_off = Rand(img_width - crop_size + 1);
 275 | 		cv::Rect roi(w_off, h_off, crop_size, crop_size);
 276 | 		cv_img = cv_img(roi);
 277 | 	}
 278 | 
 279 | 	void crop_center(cv::Mat& cv_img, int w, int h)
 280 | 	{
 281 | 		int h_off = 0;
 282 | 		int w_off = 0;
 283 | 		const int img_height = cv_img.rows;
 284 | 		const int img_width = cv_img.cols;
 285 | 		h_off = (img_height - h) / 2;
 286 | 		w_off = (img_width - w) / 2;
 287 | 		cv::Rect roi(w_off, h_off, w, h);
 288 | 		cv_img = cv_img(roi);
 289 | 	}
 290 | 
 291 | 	void resize(cv::Mat& cv_img, int smallest_side)
 292 | 	{
 293 | 		int cur_width = cv_img.cols;
 294 | 		int cur_height = cv_img.rows;
 295 | 		cv::Size dsize;
 296 | 		if (cur_height <= cur_width)
 297 | 		{
 298 | 			double k = ((double)cur_height) / smallest_side;
 299 | 			int new_size = (int)ceil(cur_width / k);
 300 | 			dsize = cv::Size(new_size, smallest_side);
 301 | 		}
 302 | 		else
 303 | 		{
 304 | 			double k = ((double)cur_width) / smallest_side;
 305 | 			int new_size = (int)ceil(cur_height / k);
 306 | 			dsize = cv::Size(smallest_side, new_size);
 307 | 		}
 308 | 		cv::resize(cv_img, cv_img, dsize);
 309 | 	}
 310 |     /* End Added by garylau, for data augmentation, 2017.11.22 */
 311 | 
 312 | 	/* 读取原始图片所用到的Transform, garylau */
 313 | 	template<typename Dtype>
 314 | 	void DataTransformer<Dtype>::Transform(const cv::Mat& img, Blob<Dtype>* transformed_blob)
 315 | 	{
 316 | 		const int crop_size = param_.crop_size();
 317 | 		// Check dimensions.
 318 | 		const int channels = transformed_blob->channels();
 319 | 		const int height = transformed_blob->height();
 320 | 		const int width = transformed_blob->width();
 321 | 		const int num = transformed_blob->num();
 322 | 		const Dtype scale = param_.scale();
 323 | 		const bool has_mean_file = param_.has_mean_file();
 324 | 		const bool has_mean_values = mean_values_.size() > 0;
 325 | 		/* Begin Added by garylau, for data augmentation, 2017.11.22 */
 326 | 		const float apply_prob = 1.f - param_.apply_probability();
 327 | 		const float max_smooth = param_.max_smooth();
 328 | 		const int rotation_angle = param_.max_rotation_angle();
 329 | 		const float min_contrast = param_.min_contrast();
 330 | 		const float max_contrast = param_.max_contrast();
 331 | 		const int max_brightness_shift = param_.max_brightness_shift();
 332 | 		const int max_color_shift = param_.max_color_shift();
 333 | 		const int min_side_min = param_.min_side_min();
 334 | 		const int min_side_max = param_.min_side_max();
 335 | 		const int min_side = param_.min_side();
 336 | 		const float affine_min_scale = param_.affine_min_scale();
 337 | 		const float affine_max_scale = param_.affine_max_scale();
 338 | 		const float random_erasing_low = param_.random_erasing_low();
 339 | 		const float random_erasing_high = param_.random_erasing_high();
 340 | 		const float random_erasing_ratio = param_.random_erasing_ratio();
 341 | 		const bool debug_params = param_.debug_params();
 342 | 
 343 | 		const bool do_mirror = param_.mirror() && phase_ == TRAIN && Rand(2);
 344 | 		float current_prob = 0.f;
 345 | 		caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 346 | 		const bool do_smooth = param_.smooth_filtering() && phase_ == TRAIN && max_smooth > 1 && current_prob > apply_prob;
 347 | 		caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 348 | 		const bool do_rotation = rotation_angle > 0 && current_prob > apply_prob && phase_ == TRAIN;
 349 | 		caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 350 | 		const bool do_brightness = param_.contrast_brightness_adjustment() && min_contrast > 0 && max_contrast >= min_contrast
 351 | 			                       && max_brightness_shift >= 0 && phase_ == TRAIN && current_prob > apply_prob;
 352 | 		caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 353 | 		const bool do_color_shift = max_color_shift > 0 && phase_ == TRAIN && current_prob > apply_prob;
 354 | 		caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 355 | 		const bool do_resize_to_min_side_min_max = min_side_min > 0 && min_side_max > min_side_min && phase_ == TRAIN && current_prob > apply_prob;
 356 | 		const bool do_resize_to_min_side = min_side > 0 && phase_ == TRAIN && current_prob > apply_prob;
 357 | 		caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 358 | 		const bool do_affine = affine_min_scale > 0 && affine_max_scale > affine_min_scale && phase_ == TRAIN && current_prob > apply_prob;
 359 | 		caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 360 | 		const bool do_random_erasing = param_.random_erasing_ratio() > 0 && param_.random_erasing_high() > param_.random_erasing_low()
 361 | 			                           && param_.random_erasing_low() > 0 && phase_ == TRAIN && current_prob > apply_prob;
 362 | 
 363 | 		cv::Mat cv_img = img;
 364 | 		/* 随机擦除Random-Erasing */
 365 | 		cv::Scalar erase_mean = cv::mean(cv_img);
 366 | 		cv::Rect erase_rect;
 367 | 		if (do_random_erasing)
 368 | 		{
 369 | 			int area = cv_img.cols * cv_img.rows;
 370 | 			caffe_rng_uniform(1, random_erasing_low, random_erasing_high, &current_prob);
 371 | 			float target_area = current_prob * area;
 372 | 			caffe_rng_uniform(1, random_erasing_ratio, 1.f / random_erasing_ratio, &current_prob);
 373 | 			float aspect_ratio = current_prob;
 374 | 			int erase_height = int(round(sqrt(target_area * aspect_ratio)));   /* 待erase的矩形区域的高 */
 375 | 			int erase_weight = int(round(sqrt(target_area / aspect_ratio)));   /* 待erase的矩形区域的宽 */
 376 | 			if (erase_weight <= cv_img.cols && erase_height <= cv_img.rows)
 377 | 			{
 378 | 				float erase_x = 0;                                             /* 待erase的矩形区域的左上角x坐标 */
 379 | 				float erase_y = 0;                                             /* 待erase的矩形区域的左上角y坐标 */
 380 | 				caffe_rng_uniform(1, 0.f, 1.f * (cv_img.cols - erase_weight), &erase_x);
 381 | 				caffe_rng_uniform(1, 0.f, 1.f * (cv_img.rows - erase_height), &erase_y);
 382 | 				erase_rect = cv::Rect(erase_x, erase_y, erase_weight, erase_height);
 383 | 				if (3 == cv_img.channels())
 384 | 				{
 385 | 					cv::Mat_<cv::Vec3b> img_test = cv_img;
 386 | 					for (size_t i = erase_x; i < erase_x + erase_weight; i++)
 387 | 					{
 388 | 						for (size_t j = erase_y; j < erase_y + erase_height; j++)
 389 | 						{
 390 | 							img_test(i, j) = cv::Vec3b(erase_mean.val[0], erase_mean.val[1], erase_mean.val[2]);
 391 | 						}
 392 | 					}
 393 | 				}
 394 | 				else
 395 | 				{
 396 | 					cv_img(erase_rect) = erase_mean.val[0];
 397 | 				}
 398 | 			}
 399 | 		}
 400 | 
 401 | 		// apply color shift
 402 | 		if (do_color_shift)
 403 | 		{
 404 | 			int b = Rand(max_color_shift + 1);
 405 | 			int g = Rand(max_color_shift + 1);
 406 | 			int r = Rand(max_color_shift + 1);
 407 | 			int sign = Rand(2);
 408 | 			cv::Mat shiftArr = cv_img.clone();
 409 | 			shiftArr.setTo(cv::Scalar(b, g, r));
 410 | 			if (sign == 1)
 411 | 			{
 412 | 				cv_img -= shiftArr;
 413 | 			}
 414 | 			else
 415 | 			{
 416 | 				cv_img += shiftArr;
 417 | 			}
 418 | 		}
 419 | 
 420 | 		// set contrast and brightness
 421 | 		float alpha;
 422 | 		int beta;
 423 | 		if (do_brightness)
 424 | 		{
 425 | 			caffe_rng_uniform(1, min_contrast, max_contrast, &alpha);
 426 | 			beta = Rand(max_brightness_shift * 2 + 1) - max_brightness_shift;
 427 | 			cv_img.convertTo(cv_img, -1, alpha, beta);
 428 | 		}
 429 | 
 430 | 		// set smoothness
 431 | 		int smooth_param = 0;
 432 | 		int smooth_type = 0;
 433 | 		if (do_smooth)
 434 | 		{
 435 | 			smooth_type = Rand(4);
 436 | 			smooth_param = 1 + 2 * Rand(max_smooth / 2);
 437 | 			switch (smooth_type)
 438 | 			{
 439 | 			case 0:
 440 | 				cv::GaussianBlur(cv_img, cv_img, cv::Size(smooth_param, smooth_param), 0);
 441 | 				break;
 442 | 			case 1:
 443 | 				cv::blur(cv_img, cv_img, cv::Size(smooth_param, smooth_param));
 444 | 				break;
 445 | 			case 2:
 446 | 				cv::medianBlur(cv_img, cv_img, smooth_param);
 447 | 				break;
 448 | 			case 3:
 449 | 				cv::boxFilter(cv_img, cv_img, -1, cv::Size(smooth_param * 2, smooth_param * 2));
 450 | 				break;
 451 | 			default:
 452 | 				break;
 453 | 			}
 454 | 		}
 455 | 		/* End Added by garylau, for data augmentation, 2017.11.22 */
 456 | 
 457 | 		const int img_channels = cv_img.channels();
 458 | 		const int img_height = cv_img.rows;
 459 | 		const int img_width = cv_img.cols;
 460 | 
 461 | 		CHECK_GT(img_channels, 0);
 462 | 		CHECK_GE(img_height, crop_size);
 463 | 		CHECK_GE(img_width, crop_size);
 464 | 		CHECK_EQ(channels, img_channels);
 465 | 		CHECK_LE(height, img_height);
 466 | 		CHECK_LE(width, img_width);
 467 | 		CHECK_GE(num, 1);
 468 | 		CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";
 469 | 
 470 |   Dtype* mean = NULL;
 471 |   if (has_mean_file) {
 472 |     CHECK_EQ(img_channels, data_mean_.channels());
 473 |     CHECK_EQ(img_height, data_mean_.height());
 474 |     CHECK_EQ(img_width, data_mean_.width());
 475 |     mean = data_mean_.mutable_cpu_data();
 476 |   }
 477 |   if (has_mean_values) {
 478 |     CHECK(mean_values_.size() == 1 || mean_values_.size() == img_channels) <<
 479 |      "Specify either 1 mean_value or as many as channels: " << img_channels;
 480 |     if (img_channels > 1 && mean_values_.size() == 1) {
 481 |       // Replicate the mean_value for simplicity
 482 |       for (int c = 1; c < img_channels; ++c) {
 483 |         mean_values_.push_back(mean_values_[0]);
 484 |       }
 485 |     }
 486 |   }
 487 | 
 488 |   /* Begin Added by garylau, for data augmentation, 2017.11.22 */
 489 |   // resizing and crop according to min side, preserving aspect ratio
 490 |   if (do_resize_to_min_side)
 491 |   {
 492 | 	  random_crop(cv_img, min_side);
 493 |   }
 494 |   if (do_resize_to_min_side_min_max)
 495 |   {
 496 | 	  int min_side_length = min_side_min + Rand(min_side_max - min_side_min + 1);
 497 | 	  resize(cv_img, min_side_max);
 498 | 	  random_crop(cv_img, min_side_length);
 499 |   }
 500 |   /* 仿射变换 */
 501 |   float affine_angle = 0.f;
 502 |   float affine_scale = 0.f;
 503 |   if (do_affine)
 504 |   {
 505 | 	  cv::Point2f affine_center = cv::Point2f(cv_img.rows / 2, cv_img.cols / 2);
 506 | 	  affine_angle = 1.0 * Rand(rotation_angle * 2 + 1) - rotation_angle;
 507 | 	  affine_scale = affine_min_scale + Rand((affine_max_scale - affine_min_scale) * 10) / 10.f;
 508 | 	  cv::Mat affine_matrix = cv::getRotationMatrix2D(affine_center, affine_angle, affine_scale);
 509 | 	  cv::Size dize = cv::Size(cv_img.rows * (affine_min_scale + Rand((affine_max_scale - affine_min_scale) * 10) / 10.f),
 510 | 		  cv_img.cols * (affine_min_scale + Rand((affine_max_scale - affine_min_scale) * 10) / 10.f));
 511 | 	  cv::warpAffine(cv_img, cv_img, affine_matrix, dize);
 512 |   }
 513 |   /* 旋转操作 */
 514 |   int current_angle = 0;
 515 |   if (do_rotation && !do_affine)
 516 |   {
 517 | 	  current_angle = Rand(rotation_angle * 2 + 1) - rotation_angle;
 518 | 	  if (current_angle)
 519 | 	  {
 520 | 		  rotate(cv_img, current_angle);
 521 | 	  }
 522 |   }
 523 | 
 524 |   if (debug_params && phase_ == TRAIN) {
 525 | 	  LOG(INFO) << "----------------------------------------";
 526 | 	  if (do_smooth)
 527 | 	  {
 528 | 		  LOG(INFO) << "* parameter for smooth filtering: ";
 529 | 		  LOG(INFO) << "  smooth type: " << smooth_type << ", smooth param: " << smooth_param;
 530 | 	  }
 531 | 	  if (do_rotation)
 532 | 	  {
 533 | 		  LOG(INFO) << "* parameter for rotation: ";
 534 | 		  LOG(INFO) << "  current rotation angle: " << current_angle;
 535 | 	  }
 536 | 	  if (do_brightness)
 537 | 	  {
 538 | 		  LOG(INFO) << "* parameter for contrast adjustment: ";
 539 | 		  LOG(INFO) << "  alpha: " << alpha << ", beta: " << beta;
 540 | 	  }
 541 | 	  if (do_color_shift)
 542 | 	  {
 543 | 		  LOG(INFO) << "* parameter for color shift: ";
 544 | 		  LOG(INFO) << "max_color_shift: " << max_color_shift;
 545 | 	  }
 546 | 	  if (do_resize_to_min_side_min_max)
 547 | 	  {
 548 | 		  LOG(INFO) << "* parameter for min_side_min_max crop: ";
 549 | 		  LOG(INFO) << "min_side_min: " << min_side_min << ", min_side_max: " << min_side_max;
 550 | 	  }
 551 | 	  if (do_resize_to_min_side)
 552 | 	  {
 553 | 		  LOG(INFO) << "* parameter for min_side crop: ";
 554 | 		  LOG(INFO) << "min_side: " << min_side;
 555 | 	  }
 556 | 	  if (do_affine)
 557 | 	  {
 558 | 		  LOG(INFO) << "* parameter for affine transformation: ";
 559 | 		  LOG(INFO) << "affine_angle: " << affine_angle << ", affine_scale: " << affine_scale;
 560 | 	  }
 561 | 	  if (do_random_erasing)
 562 | 	  {
 563 | 		  LOG(INFO) << "* parameter for random erasing: ";
 564 | 		  LOG(INFO) << "erase_rect: " << "x:" << erase_rect.x << ", y:" << erase_rect.y << ", width:" << erase_rect.width << ", height:" << erase_rect.height;
 565 | 	  }
 566 |   }
 567 |   /* End Added by garylau, for data augmentation, 2017.11.22 */
 568 | 
 569 |   int h_off = 0;
 570 |   int w_off = 0;
 571 |   cv::Mat cv_cropped_img = cv_img;
 572 |   /* Begin Added by garylau, for data augmentation, 2017.11.22 */
 573 |   if (img_width != cv_cropped_img.cols || img_height != cv_cropped_img.rows)
 574 |   {
 575 | 	  cv::resize(cv_cropped_img, cv_cropped_img, cv::Size(img_width, img_height));
 576 |   }
 577 |   /* End Added by garylau, for data augmentation, 2017.11.22 */
 578 |   if (crop_size) {
 579 |     CHECK_EQ(crop_size, height);
 580 |     CHECK_EQ(crop_size, width);
 581 |     // We only do random crop when we do training.
 582 |     if (phase_ == TRAIN) {
 583 |       h_off = Rand(img_height - crop_size + 1);
 584 |       w_off = Rand(img_width - crop_size + 1);
 585 |     } else {
 586 |       h_off = (img_height - crop_size) / 2;
 587 |       w_off = (img_width - crop_size) / 2;
 588 |     }
 589 |     cv::Rect roi(w_off, h_off, crop_size, crop_size);
 590 | 	cv_cropped_img = cv_cropped_img(roi);
 591 |   } else {
 592 |     CHECK_EQ(img_height, height);
 593 |     CHECK_EQ(img_width, width);
 594 |   }
 595 | 
 596 |   CHECK(cv_cropped_img.data);
 597 | 
 598 |   Dtype* transformed_data = transformed_blob->mutable_cpu_data();
 599 |   int top_index;
 600 |   for (int h = 0; h < height; ++h) {
 601 |     const uchar* ptr = cv_cropped_img.ptr<uchar>(h);
 602 |     int img_index = 0;
 603 |     for (int w = 0; w < width; ++w) {
 604 |       for (int c = 0; c < img_channels; ++c) {
 605 |         if (do_mirror) {
 606 |           top_index = (c * height + h) * width + (width - 1 - w);
 607 |         } else {
 608 |           top_index = (c * height + h) * width + w;
 609 |         }
 610 |         // int top_index = (c * height + h) * width + w;
 611 |         Dtype pixel = static_cast<Dtype>(ptr[img_index++]);
 612 |         if (has_mean_file) {
 613 |           int mean_index = (c * img_height + h_off + h) * img_width + w_off + w;
 614 |           transformed_data[top_index] =
 615 |             (pixel - mean[mean_index]) * scale;
 616 |         } else {
 617 |           if (has_mean_values) {
 618 |             transformed_data[top_index] =
 619 |               (pixel - mean_values_[c]) * scale;
 620 |           } else {
 621 |             transformed_data[top_index] = pixel * scale;
 622 |           }
 623 |         }
 624 |       }
 625 |     }
 626 |   }
 627 | }
 628 | #endif  // USE_OPENCV
 629 | 
 630 | template<typename Dtype>
 631 | void DataTransformer<Dtype>::Transform(Blob<Dtype>* input_blob,
 632 |                                        Blob<Dtype>* transformed_blob) {
 633 |   const int crop_size = param_.crop_size();
 634 |   const int input_num = input_blob->num();
 635 |   const int input_channels = input_blob->channels();
 636 |   const int input_height = input_blob->height();
 637 |   const int input_width = input_blob->width();
 638 | 
 639 |   if (transformed_blob->count() == 0) {
 640 |     // Initialize transformed_blob with the right shape.
 641 |     if (crop_size) {
 642 |       transformed_blob->Reshape(input_num, input_channels,
 643 |                                 crop_size, crop_size);
 644 |     } else {
 645 |       transformed_blob->Reshape(input_num, input_channels,
 646 |                                 input_height, input_width);
 647 |     }
 648 |   }
 649 | 
 650 |   const int num = transformed_blob->num();
 651 |   const int channels = transformed_blob->channels();
 652 |   const int height = transformed_blob->height();
 653 |   const int width = transformed_blob->width();
 654 |   const int size = transformed_blob->count();
 655 | 
 656 |   CHECK_LE(input_num, num);
 657 |   CHECK_EQ(input_channels, channels);
 658 |   CHECK_GE(input_height, height);
 659 |   CHECK_GE(input_width, width);
 660 | 
 661 | 
 662 |   const Dtype scale = param_.scale();
 663 |   const bool do_mirror = param_.mirror() && Rand(2);
 664 |   const bool has_mean_file = param_.has_mean_file();
 665 |   const bool has_mean_values = mean_values_.size() > 0;
 666 | 
 667 |   int h_off = 0;
 668 |   int w_off = 0;
 669 |   if (crop_size) {
 670 |     CHECK_EQ(crop_size, height);
 671 |     CHECK_EQ(crop_size, width);
 672 |     // We only do random crop when we do training.
 673 |     if (phase_ == TRAIN) {
 674 |       h_off = Rand(input_height - crop_size + 1);
 675 |       w_off = Rand(input_width - crop_size + 1);
 676 |     } else {
 677 |       h_off = (input_height - crop_size) / 2;
 678 |       w_off = (input_width - crop_size) / 2;
 679 |     }
 680 |   } else {
 681 |     CHECK_EQ(input_height, height);
 682 |     CHECK_EQ(input_width, width);
 683 |   }
 684 | 
 685 |   Dtype* input_data = input_blob->mutable_cpu_data();
 686 |   if (has_mean_file) {
 687 |     CHECK_EQ(input_channels, data_mean_.channels());
 688 |     CHECK_EQ(input_height, data_mean_.height());
 689 |     CHECK_EQ(input_width, data_mean_.width());
 690 |     for (int n = 0; n < input_num; ++n) {
 691 |       int offset = input_blob->offset(n);
 692 |       caffe_sub(data_mean_.count(), input_data + offset,
 693 |             data_mean_.cpu_data(), input_data + offset);
 694 |     }
 695 |   }
 696 | 
 697 |   if (has_mean_values) {
 698 |     CHECK(mean_values_.size() == 1 || mean_values_.size() == input_channels) <<
 699 |      "Specify either 1 mean_value or as many as channels: " << input_channels;
 700 |     if (mean_values_.size() == 1) {
 701 |       caffe_add_scalar(input_blob->count(), -(mean_values_[0]), input_data);
 702 |     } else {
 703 |       for (int n = 0; n < input_num; ++n) {
 704 |         for (int c = 0; c < input_channels; ++c) {
 705 |           int offset = input_blob->offset(n, c);
 706 |           caffe_add_scalar(input_height * input_width, -(mean_values_[c]),
 707 |             input_data + offset);
 708 |         }
 709 |       }
 710 |     }
 711 |   }
 712 | 
 713 |   Dtype* transformed_data = transformed_blob->mutable_cpu_data();
 714 | 
 715 |   for (int n = 0; n < input_num; ++n) {
 716 |     int top_index_n = n * channels;
 717 |     int data_index_n = n * channels;
 718 |     for (int c = 0; c < channels; ++c) {
 719 |       int top_index_c = (top_index_n + c) * height;
 720 |       int data_index_c = (data_index_n + c) * input_height + h_off;
 721 |       for (int h = 0; h < height; ++h) {
 722 |         int top_index_h = (top_index_c + h) * width;
 723 |         int data_index_h = (data_index_c + h) * input_width + w_off;
 724 |         if (do_mirror) {
 725 |           int top_index_w = top_index_h + width - 1;
 726 |           for (int w = 0; w < width; ++w) {
 727 |             transformed_data[top_index_w-w] = input_data[data_index_h + w];
 728 |           }
 729 |         } else {
 730 |           for (int w = 0; w < width; ++w) {
 731 |             transformed_data[top_index_h + w] = input_data[data_index_h + w];
 732 |           }
 733 |         }
 734 |       }
 735 |     }
 736 |   }
 737 |   if (scale != Dtype(1)) {
 738 |     DLOG(INFO) << "Scale: " << scale;
 739 |     caffe_scal(size, scale, transformed_data);
 740 |   }
 741 | }
 742 | 
 743 | template<typename Dtype>
 744 | vector<int> DataTransformer<Dtype>::InferBlobShape(const Datum& datum) {
 745 |   if (datum.encoded()) {
 746 | #ifdef USE_OPENCV
 747 |     CHECK(!(param_.force_color() && param_.force_gray()))
 748 |         << "cannot set both force_color and force_gray";
 749 |     cv::Mat cv_img;
 750 |     if (param_.force_color() || param_.force_gray()) {
 751 |     // If force_color then decode in color otherwise decode in gray.
 752 |       cv_img = DecodeDatumToCVMat(datum, param_.force_color());
 753 |     } else {
 754 |       cv_img = DecodeDatumToCVMatNative(datum);
 755 |     }
 756 |     // InferBlobShape using the cv::image.
 757 |     return InferBlobShape(cv_img);
 758 | #else
 759 |     LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV.";
 760 | #endif  // USE_OPENCV
 761 |   }
 762 |   const int crop_size = param_.crop_size();
 763 |   const int datum_channels = datum.channels();
 764 |   const int datum_height = datum.height();
 765 |   const int datum_width = datum.width();
 766 |   // Check dimensions.
 767 |   CHECK_GT(datum_channels, 0);
 768 |   CHECK_GE(datum_height, crop_size);
 769 |   CHECK_GE(datum_width, crop_size);
 770 |   // Build BlobShape.
 771 |   vector<int> shape(4);
 772 |   shape[0] = 1;
 773 |   shape[1] = datum_channels;
 774 |   shape[2] = (crop_size)? crop_size: datum_height;
 775 |   shape[3] = (crop_size)? crop_size: datum_width;
 776 |   return shape;
 777 | }
 778 | 
 779 | template<typename Dtype>
 780 | vector<int> DataTransformer<Dtype>::InferBlobShape(
 781 |     const vector<Datum> & datum_vector) {
 782 |   const int num = datum_vector.size();
 783 |   CHECK_GT(num, 0) << "There is no datum to in the vector";
 784 |   // Use first datum in the vector to InferBlobShape.
 785 |   vector<int> shape = InferBlobShape(datum_vector[0]);
 786 |   // Adjust num to the size of the vector.
 787 |   shape[0] = num;
 788 |   return shape;
 789 | }
 790 | 
 791 | #ifdef USE_OPENCV
 792 | template<typename Dtype>
 793 | vector<int> DataTransformer<Dtype>::InferBlobShape(const cv::Mat& cv_img) {
 794 |   const int crop_size = param_.crop_size();
 795 |   const int img_channels = cv_img.channels();
 796 |   const int img_height = cv_img.rows;
 797 |   const int img_width = cv_img.cols;
 798 |   // Check dimensions.
 799 |   CHECK_GT(img_channels, 0);
 800 |   CHECK_GE(img_height, crop_size);
 801 |   CHECK_GE(img_width, crop_size);
 802 |   // Build BlobShape.
 803 |   vector<int> shape(4);
 804 |   shape[0] = 1;
 805 |   shape[1] = img_channels;
 806 |   shape[2] = (crop_size)? crop_size: img_height;
 807 |   shape[3] = (crop_size)? crop_size: img_width;
 808 |   return shape;
 809 | }
 810 | 
 811 | template<typename Dtype>
 812 | vector<int> DataTransformer<Dtype>::InferBlobShape(
 813 |     const vector<cv::Mat> & mat_vector) {
 814 |   const int num = mat_vector.size();
 815 |   CHECK_GT(num, 0) << "There is no cv_img to in the vector";
 816 |   // Use first cv_img in the vector to InferBlobShape.
 817 |   vector<int> shape = InferBlobShape(mat_vector[0]);
 818 |   // Adjust num to the size of the vector.
 819 |   shape[0] = num;
 820 |   return shape;
 821 | }
 822 | #endif  // USE_OPENCV
 823 | 
 824 | template <typename Dtype>
 825 | void DataTransformer<Dtype>::InitRand() {
 826 |   const bool needs_rand = param_.mirror() ||
 827 |       (phase_ == TRAIN && param_.crop_size());
 828 |   if (needs_rand) {
 829 |     const unsigned int rng_seed = caffe_rng_rand();
 830 |     rng_.reset(new Caffe::RNG(rng_seed));
 831 |   } else {
 832 |     rng_.reset();
 833 |   }
 834 | }
 835 | 
 836 | template <typename Dtype>
 837 | int DataTransformer<Dtype>::Rand(int n) {
 838 |   CHECK(rng_);
 839 |   CHECK_GT(n, 0);
 840 |   caffe::rng_t* rng =
 841 |       static_cast<caffe::rng_t*>(rng_->generator());
 842 |   return ((*rng)() % n);
 843 | }
 844 | 
 845 | INSTANTIATE_CLASS(DataTransformer);
 846 | 
 847 | /* Begin Added by garylau, for lmdb data augmentation, 2017.12.11 */
 848 | template<typename Dtype>
 849 | void DataTransformer<Dtype>::DatumToMat(const Datum* datum, cv::Mat& cv_img)
 850 | {
 851 | 	int datum_channels = datum->channels();
 852 | 	int datum_height = datum->height();
 853 | 	int datum_width = datum->width();
 854 | 	int datum_size = datum_channels * datum_height * datum_width;
 855 | 
 856 | 	std::string buffer(datum_size, ' ');
 857 | 	buffer = datum->data();
 858 | 
 859 | 	for (int h = 0; h < datum_height; ++h) {
 860 | 		uchar* ptr = cv_img.ptr<uchar>(h);
 861 | 		int img_index = 0;
 862 | 		for (int w = 0; w < datum_width; ++w) {
 863 | 			for (int c = 0; c < datum_channels; ++c) {
 864 | 				int datum_index = (c * datum_height + h) * datum_width + w;
 865 | 				ptr[img_index++] = static_cast<uchar>(buffer[datum_index]);
 866 | 			}
 867 | 		}
 868 | 	}
 869 | }
 870 | template<typename Dtype>
 871 | void DataTransformer<Dtype>::MatToDatum(const cv::Mat& cv_img, Datum* datum)
 872 | {
 873 | 	CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";
 874 | 	datum->set_channels(cv_img.channels());
 875 | 	datum->set_height(cv_img.rows);
 876 | 	datum->set_width(cv_img.cols);
 877 | 	datum->clear_data();
 878 | 	datum->set_encoded(false);
 879 | 	int datum_channels = datum->channels();
 880 | 	int datum_height = datum->height();
 881 | 	int datum_width = datum->width();
 882 | 	int datum_size = datum_channels * datum_height * datum_width;
 883 | 	std::string buffer(datum_size, ' ');
 884 | 	for (int h = 0; h < datum_height; ++h) {
 885 | 		const uchar* ptr = cv_img.ptr<uchar>(h);
 886 | 		int img_index = 0;
 887 | 		for (int w = 0; w < datum_width; ++w) {
 888 | 			for (int c = 0; c < datum_channels; ++c) {
 889 | 				int datum_index = (c * datum_height + h) * datum_width + w;
 890 | 				buffer[datum_index] = static_cast<char>(ptr[img_index++]);
 891 | 			}
 892 | 		}
 893 | 	}
 894 | 	datum->set_data(buffer);
 895 | }
 896 | template<typename Dtype>
 897 | void DataTransformer<Dtype>::CVMatTransform(cv::Mat& in_out_cv_img)
 898 | {
 899 | 	const float apply_prob = 1.f - param_.apply_probability();
 900 | 	const float max_smooth = param_.max_smooth();
 901 | 	const int rotation_angle = param_.max_rotation_angle();
 902 | 	const float min_contrast = param_.min_contrast();
 903 | 	const float max_contrast = param_.max_contrast();
 904 | 	const int max_brightness_shift = param_.max_brightness_shift();
 905 | 	const int max_color_shift = param_.max_color_shift();
 906 | 	const int min_side_min = param_.min_side_min();
 907 | 	const int min_side_max = param_.min_side_max();
 908 | 	const int min_side = param_.min_side();
 909 | 	const float affine_min_scale = param_.affine_min_scale();
 910 | 	const float affine_max_scale = param_.affine_max_scale();
 911 | 	const float random_erasing_low = param_.random_erasing_low();
 912 | 	const float random_erasing_high = param_.random_erasing_high();
 913 | 	const float random_erasing_ratio = param_.random_erasing_ratio();
 914 | 	const bool debug_params = param_.debug_params();
 915 | 
 916 | 	float current_prob = 0.f;
 917 | 	caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 918 | 	const bool do_smooth = param_.smooth_filtering() && phase_ == TRAIN && max_smooth > 1 && current_prob > apply_prob;
 919 | 	caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 920 | 	const bool do_rotation = rotation_angle > 0 && current_prob > apply_prob && phase_ == TRAIN;
 921 | 	caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 922 | 	const bool do_brightness = param_.contrast_brightness_adjustment() && min_contrast > 0 && max_contrast >= min_contrast
 923 | 		&& max_brightness_shift >= 0 && phase_ == TRAIN && current_prob > apply_prob;
 924 | 	caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 925 | 	const bool do_color_shift = max_color_shift > 0 && phase_ == TRAIN && current_prob > apply_prob;
 926 | 	caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 927 | 	const bool do_resize_to_min_side_min_max = min_side_min > 0 && min_side_max > min_side_min && phase_ == TRAIN && current_prob > apply_prob;
 928 | 	const bool do_resize_to_min_side = min_side > 0 && phase_ == TRAIN && current_prob > apply_prob;
 929 | 	caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 930 | 	const bool do_affine = affine_min_scale > 0 && affine_max_scale > affine_min_scale && phase_ == TRAIN && current_prob > apply_prob;
 931 | 	caffe_rng_uniform(1, 0.f, 1.f, &current_prob);
 932 | 	const bool do_random_erasing = param_.random_erasing_ratio() > 0 && param_.random_erasing_high() > param_.random_erasing_low()
 933 | 		&& param_.random_erasing_low() > 0 && phase_ == TRAIN && current_prob > apply_prob;
 934 | 
 935 | 	cv::Mat cv_img = in_out_cv_img;
 936 | 	/* 随机擦除Random-Erasing */
 937 | 	cv::Scalar erase_mean = cv::mean(cv_img);
 938 | 	cv::Rect erase_rect;
 939 | 	if (do_random_erasing)
 940 | 	{
 941 | 		int area = cv_img.cols * cv_img.rows;
 942 | 		caffe_rng_uniform(1, random_erasing_low, random_erasing_high, &current_prob);
 943 | 		float target_area = current_prob * area;
 944 | 		caffe_rng_uniform(1, random_erasing_ratio, 1.f / random_erasing_ratio, &current_prob);
 945 | 		float aspect_ratio = current_prob;
 946 | 		int erase_height = int(round(sqrt(target_area * aspect_ratio)));   /* 待erase的矩形区域的高 */
 947 | 		int erase_weight = int(round(sqrt(target_area / aspect_ratio)));   /* 待erase的矩形区域的宽 */
 948 | 		if (erase_weight <= cv_img.cols && erase_height <= cv_img.rows)
 949 | 		{
 950 | 			float erase_x = 0;                                             /* 待erase的矩形区域的左上角x坐标 */
 951 | 			float erase_y = 0;                                             /* 待erase的矩形区域的左上角y坐标 */
 952 | 			caffe_rng_uniform(1, 0.f, 1.f * (cv_img.cols - erase_weight), &erase_x);
 953 | 			caffe_rng_uniform(1, 0.f, 1.f * (cv_img.rows - erase_height), &erase_y);
 954 | 			erase_rect = cv::Rect(erase_x, erase_y, erase_weight, erase_height);
 955 | 			if (3 == cv_img.channels())
 956 | 			{
 957 | 				cv::Mat_<cv::Vec3b> img_test = cv_img;
 958 | 				for (size_t i = erase_x; i < erase_x + erase_weight; i++)
 959 | 				{
 960 | 					for (size_t j = erase_y; j < erase_y + erase_height; j++)
 961 | 					{
 962 | 						img_test(i, j) = cv::Vec3b(erase_mean.val[0], erase_mean.val[1], erase_mean.val[2]);
 963 | 					}
 964 | 				}
 965 | 			}
 966 | 			else
 967 | 			{
 968 | 				cv_img(erase_rect) = erase_mean.val[0];
 969 | 			}
 970 | 		}
 971 | 	}
 972 | 
 973 | 	// apply color shift
 974 | 	if (do_color_shift)
 975 | 	{
 976 | 		int b = Rand(max_color_shift + 1);
 977 | 		int g = Rand(max_color_shift + 1);
 978 | 		int r = Rand(max_color_shift + 1);
 979 | 		int sign = Rand(2);
 980 | 		cv::Mat shiftArr = cv_img.clone();
 981 | 		shiftArr.setTo(cv::Scalar(b, g, r));
 982 | 		if (sign == 1)
 983 | 		{
 984 | 			cv_img -= shiftArr;
 985 | 		}
 986 | 		else
 987 | 		{
 988 | 			cv_img += shiftArr;
 989 | 		}
 990 | 	}
 991 | 
 992 | 	// set contrast and brightness
 993 | 	float alpha;
 994 | 	int beta;
 995 | 	if (do_brightness)
 996 | 	{
 997 | 		caffe_rng_uniform(1, min_contrast, max_contrast, &alpha);
 998 | 		beta = Rand(max_brightness_shift * 2 + 1) - max_brightness_shift;
 999 | 		cv_img.convertTo(cv_img, -1, alpha, beta);
1000 | 	}
1001 | 
1002 | 	// set smoothness
1003 | 	int smooth_param = 0;
1004 | 	int smooth_type = 0;
1005 | 	if (do_smooth)
1006 | 	{
1007 | 		smooth_type = Rand(4);
1008 | 		smooth_param = 1 + 2 * Rand(max_smooth / 2);
1009 | 		switch (smooth_type)
1010 | 		{
1011 | 		case 0:
1012 | 			cv::GaussianBlur(cv_img, cv_img, cv::Size(smooth_param, smooth_param), 0);
1013 | 			break;
1014 | 		case 1:
1015 | 			cv::blur(cv_img, cv_img, cv::Size(smooth_param, smooth_param));
1016 | 			break;
1017 | 		case 2:
1018 | 			cv::medianBlur(cv_img, cv_img, smooth_param);
1019 | 			break;
1020 | 		case 3:
1021 | 			cv::boxFilter(cv_img, cv_img, -1, cv::Size(smooth_param * 2, smooth_param * 2));
1022 | 			break;
1023 | 		default:
1024 | 			break;
1025 | 		}
1026 | 	}
1027 | 
1028 | 	const int img_height = cv_img.rows;
1029 | 	const int img_width = cv_img.cols;
1030 | 
1031 | 	CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";
1032 | 
1033 | 	// resizing and crop according to min side, preserving aspect ratio
1034 | 	if (do_resize_to_min_side)
1035 | 	{
1036 | 		random_crop(cv_img, min_side);
1037 | 	}
1038 | 	if (do_resize_to_min_side_min_max)
1039 | 	{
1040 | 		int min_side_length = min_side_min + Rand(min_side_max - min_side_min + 1);
1041 | 		resize(cv_img, min_side_max);
1042 | 		random_crop(cv_img, min_side_length);
1043 | 	}
1044 | 	/* 仿射变换 */
1045 | 	float affine_angle = 0.f;
1046 | 	float affine_scale = 0.f;
1047 | 	if (do_affine)
1048 | 	{
1049 | 		cv::Point2f affine_center = cv::Point2f(cv_img.rows / 2, cv_img.cols / 2);
1050 | 		affine_angle = 1.0 * Rand(rotation_angle * 2 + 1) - rotation_angle;
1051 | 		affine_scale = affine_min_scale + Rand((affine_max_scale - affine_min_scale) * 10) / 10.f;
1052 | 		cv::Mat affine_matrix = cv::getRotationMatrix2D(affine_center, affine_angle, affine_scale);
1053 | 		cv::Size dize = cv::Size(cv_img.rows * (affine_min_scale + Rand((affine_max_scale - affine_min_scale) * 10) / 10.f),
1054 | 			cv_img.cols * (affine_min_scale + Rand((affine_max_scale - affine_min_scale) * 10) / 10.f));
1055 | 		cv::warpAffine(cv_img, cv_img, affine_matrix, dize);
1056 | 	}
1057 | 	/* 旋转操作 */
1058 | 	int current_angle = 0;
1059 | 	if (do_rotation && !do_affine)
1060 | 	{
1061 | 		current_angle = Rand(rotation_angle * 2 + 1) - rotation_angle;
1062 | 		if (current_angle)
1063 | 		{
1064 | 			rotate(cv_img, current_angle);
1065 | 		}
1066 | 	}
1067 | 
1068 | 	if (debug_params && phase_ == TRAIN) {
1069 | 		LOG(INFO) << "----------------------------------------";
1070 | 		if (do_smooth)
1071 | 		{
1072 | 			LOG(INFO) << "* parameter for smooth filtering: ";
1073 | 			LOG(INFO) << "  smooth type: " << smooth_type << ", smooth param: " << smooth_param;
1074 | 		}
1075 | 		if (do_rotation)
1076 | 		{
1077 | 			LOG(INFO) << "* parameter for rotation: ";
1078 | 			LOG(INFO) << "  current rotation angle: " << current_angle;
1079 | 		}
1080 | 		if (do_brightness)
1081 | 		{
1082 | 			LOG(INFO) << "* parameter for contrast adjustment: ";
1083 | 			LOG(INFO) << "  alpha: " << alpha << ", beta: " << beta;
1084 | 		}
1085 | 		if (do_color_shift)
1086 | 		{
1087 | 			LOG(INFO) << "* parameter for color shift: ";
1088 | 			LOG(INFO) << "max_color_shift: " << max_color_shift;
1089 | 		}
1090 | 		if (do_resize_to_min_side_min_max)
1091 | 		{
1092 | 			LOG(INFO) << "* parameter for min_side_min_max crop: ";
1093 | 			LOG(INFO) << "min_side_min: " << min_side_min << ", min_side_max: " << min_side_max;
1094 | 		}
1095 | 		if (do_resize_to_min_side)
1096 | 		{
1097 | 			LOG(INFO) << "* parameter for min_side crop: ";
1098 | 			LOG(INFO) << "min_side: " << min_side;
1099 | 		}
1100 | 		if (do_affine)
1101 | 		{
1102 | 			LOG(INFO) << "* parameter for affine transformation: ";
1103 | 			LOG(INFO) << "affine_angle: " << affine_angle << ", affine_scale: " << affine_scale;
1104 | 		}
1105 | 		if (do_random_erasing)
1106 | 		{
1107 | 			LOG(INFO) << "* parameter for random erasing: ";
1108 | 			LOG(INFO) << "erase_rect: " << "x:" << erase_rect.x << ", y:" << erase_rect.y << ", width:" << erase_rect.width << ", height:" << erase_rect.height;
1109 | 		}
1110 | 	}
1111 | 
1112 | 	int h_off = 0;
1113 | 	int w_off = 0;
1114 | 	if (img_width != cv_img.cols || img_height != cv_img.rows)
1115 | 	{
1116 | 		cv::resize(cv_img, cv_img, cv::Size(img_width, img_height));
1117 | 	}
1118 | 	in_out_cv_img = cv_img;
1119 | }
1120 | /* End Added by garylau, for lmdb data augmentation, 2017.12.11 */
1121 | 
1122 | }  // namespace caffe
1123 | 


--------------------------------------------------------------------------------
/caffe.proto:
--------------------------------------------------------------------------------
   1 | syntax = "proto2";
   2 | 
   3 | package caffe;
   4 | 
   5 | // Specifies the shape (dimensions) of a Blob.
   6 | message BlobShape {
   7 |   repeated int64 dim = 1 [packed = true];
   8 | }
   9 | 
  10 | message BlobProto {
  11 |   optional BlobShape shape = 7;
  12 |   repeated float data = 5 [packed = true];
  13 |   repeated float diff = 6 [packed = true];
  14 |   repeated double double_data = 8 [packed = true];
  15 |   repeated double double_diff = 9 [packed = true];
  16 | 
  17 |   // 4D dimensions -- deprecated.  Use "shape" instead.
  18 |   optional int32 num = 1 [default = 0];
  19 |   optional int32 channels = 2 [default = 0];
  20 |   optional int32 height = 3 [default = 0];
  21 |   optional int32 width = 4 [default = 0];
  22 | }
  23 | 
  24 | // The BlobProtoVector is simply a way to pass multiple blobproto instances
  25 | // around.
  26 | message BlobProtoVector {
  27 |   repeated BlobProto blobs = 1;
  28 | }
  29 | 
  30 | message Datum {
  31 |   optional int32 channels = 1;
  32 |   optional int32 height = 2;
  33 |   optional int32 width = 3;
  34 |   // the actual image data, in bytes
  35 |   optional bytes data = 4;
  36 |   optional int32 label = 5;
  37 |   // Optionally, the datum could also hold float data.
  38 |   repeated float float_data = 6;
  39 |   // If true data contains an encoded image that need to be decoded
  40 |   optional bool encoded = 7 [default = false];
  41 | }
  42 | 
  43 | message FillerParameter {
  44 |   // The filler type.
  45 |   optional string type = 1 [default = 'constant'];
  46 |   optional float value = 2 [default = 0]; // the value in constant filler
  47 |   optional float min = 3 [default = 0]; // the min value in uniform filler
  48 |   optional float max = 4 [default = 1]; // the max value in uniform filler
  49 |   optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
  50 |   optional float std = 6 [default = 1]; // the std value in Gaussian filler
  51 |   // The expected number of non-zero output weights for a given input in
  52 |   // Gaussian filler -- the default -1 means don't perform sparsification.
  53 |   optional int32 sparse = 7 [default = -1];
  54 |   // Normalize the filler variance by fan_in, fan_out, or their average.
  55 |   // Applies to 'xavier' and 'msra' fillers.
  56 |   enum VarianceNorm {
  57 |     FAN_IN = 0;
  58 |     FAN_OUT = 1;
  59 |     AVERAGE = 2;
  60 |   }
  61 |   optional VarianceNorm variance_norm = 8 [default = FAN_IN];
  62 | }
  63 | 
  64 | message NetParameter {
  65 |   optional string name = 1; // consider giving the network a name
  66 |   // DEPRECATED. See InputParameter. The input blobs to the network.
  67 |   repeated string input = 3;
  68 |   // DEPRECATED. See InputParameter. The shape of the input blobs.
  69 |   repeated BlobShape input_shape = 8;
  70 | 
  71 |   // 4D input dimensions -- deprecated.  Use "input_shape" instead.
  72 |   // If specified, for each input blob there should be four
  73 |   // values specifying the num, channels, height and width of the input blob.
  74 |   // Thus, there should be a total of (4 * #input) numbers.
  75 |   repeated int32 input_dim = 4;
  76 | 
  77 |   // Whether the network will force every layer to carry out backward operation.
  78 |   // If set False, then whether to carry out backward is determined
  79 |   // automatically according to the net structure and learning rates.
  80 |   optional bool force_backward = 5 [default = false];
  81 |   // The current "state" of the network, including the phase, level, and stage.
  82 |   // Some layers may be included/excluded depending on this state and the states
  83 |   // specified in the layers' include and exclude fields.
  84 |   optional NetState state = 6;
  85 | 
  86 |   // Print debugging information about results while running Net::Forward,
  87 |   // Net::Backward, and Net::Update.
  88 |   optional bool debug_info = 7 [default = false];
  89 | 
  90 |   // The layers that make up the net.  Each of their configurations, including
  91 |   // connectivity and behavior, is specified as a LayerParameter.
  92 |   repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.
  93 | 
  94 |   // DEPRECATED: use 'layer' instead.
  95 |   repeated V1LayerParameter layers = 2;
  96 | }
  97 | 
  98 | // NOTE
  99 | // Update the next available ID when you add a new SolverParameter field.
 100 | //
 101 | // SolverParameter next available ID: 42 (last added: layer_wise_reduce)
 102 | message SolverParameter {
 103 |   //////////////////////////////////////////////////////////////////////////////
 104 |   // Specifying the train and test networks
 105 |   //
 106 |   // Exactly one train net must be specified using one of the following fields:
 107 |   //     train_net_param, train_net, net_param, net
 108 |   // One or more test nets may be specified using any of the following fields:
 109 |   //     test_net_param, test_net, net_param, net
 110 |   // If more than one test net field is specified (e.g., both net and
 111 |   // test_net are specified), they will be evaluated in the field order given
 112 |   // above: (1) test_net_param, (2) test_net, (3) net_param/net.
 113 |   // A test_iter must be specified for each test_net.
 114 |   // A test_level and/or a test_stage may also be specified for each test_net.
 115 |   //////////////////////////////////////////////////////////////////////////////
 116 | 
 117 |   // Proto filename for the train net, possibly combined with one or more
 118 |   // test nets.
 119 |   optional string net = 24;
 120 |   // Inline train net param, possibly combined with one or more test nets.
 121 |   optional NetParameter net_param = 25;
 122 | 
 123 |   optional string train_net = 1; // Proto filename for the train net.
 124 |   repeated string test_net = 2; // Proto filenames for the test nets.
 125 |   optional NetParameter train_net_param = 21; // Inline train net params.
 126 |   repeated NetParameter test_net_param = 22; // Inline test net params.
 127 | 
 128 |   // The states for the train/test nets. Must be unspecified or
 129 |   // specified once per net.
 130 |   //
 131 |   // By default, train_state will have phase = TRAIN,
 132 |   // and all test_state's will have phase = TEST.
 133 |   // Other defaults are set according to the NetState defaults.
 134 |   optional NetState train_state = 26;
 135 |   repeated NetState test_state = 27;
 136 | 
 137 |   // The number of iterations for each test net.
 138 |   repeated int32 test_iter = 3;
 139 | 
 140 |   // The number of iterations between two testing phases.
 141 |   optional int32 test_interval = 4 [default = 0];
 142 |   optional bool test_compute_loss = 19 [default = false];
 143 |   // If true, run an initial test pass before the first iteration,
 144 |   // ensuring memory availability and printing the starting value of the loss.
 145 |   optional bool test_initialization = 32 [default = true];
 146 |   optional float base_lr = 5; // The base learning rate
 147 |   // the number of iterations between displaying info. If display = 0, no info
 148 |   // will be displayed.
 149 |   optional int32 display = 6;
 150 |   // Display the loss averaged over the last average_loss iterations
 151 |   optional int32 average_loss = 33 [default = 1];
 152 |   optional int32 max_iter = 7; // the maximum number of iterations
 153 |   // accumulate gradients over `iter_size` x `batch_size` instances
 154 |   optional int32 iter_size = 36 [default = 1];
 155 | 
 156 |   // The learning rate decay policy. The currently implemented learning rate
 157 |   // policies are as follows:
 158 |   //    - fixed: always return base_lr.
 159 |   //    - step: return base_lr * gamma ^ (floor(iter / step))
 160 |   //    - exp: return base_lr * gamma ^ iter
 161 |   //    - inv: return base_lr * (1 + gamma * iter) ^ (- power)
 162 |   //    - multistep: similar to step but it allows non uniform steps defined by
 163 |   //      stepvalue
 164 |   //    - poly: the effective learning rate follows a polynomial decay, to be
 165 |   //      zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
 166 |   //    - sigmoid: the effective learning rate follows a sigmod decay
 167 |   //      return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
 168 |   //
 169 |   // where base_lr, max_iter, gamma, step, stepvalue and power are defined
 170 |   // in the solver parameter protocol buffer, and iter is the current iteration.
 171 |   optional string lr_policy = 8;
 172 |   optional float gamma = 9; // The parameter to compute the learning rate.
 173 |   optional float power = 10; // The parameter to compute the learning rate.
 174 |   optional float momentum = 11; // The momentum value.
 175 |   optional float weight_decay = 12; // The weight decay.
 176 |   // regularization types supported: L1 and L2
 177 |   // controlled by weight_decay
 178 |   optional string regularization_type = 29 [default = "L2"];
 179 |   // the stepsize for learning rate policy "step"
 180 |   optional int32 stepsize = 13;
 181 |   // the stepsize for learning rate policy "multistep"
 182 |   repeated int32 stepvalue = 34;
 183 | 
 184 |   // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
 185 |   // whenever their actual L2 norm is larger.
 186 |   optional float clip_gradients = 35 [default = -1];
 187 | 
 188 |   optional int32 snapshot = 14 [default = 0]; // The snapshot interval
 189 |   optional string snapshot_prefix = 15; // The prefix for the snapshot.
 190 |   // whether to snapshot diff in the results or not. Snapshotting diff will help
 191 |   // debugging but the final protocol buffer size will be much larger.
 192 |   optional bool snapshot_diff = 16 [default = false];
 193 |   enum SnapshotFormat {
 194 |     HDF5 = 0;
 195 |     BINARYPROTO = 1;
 196 |   }
 197 |   optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
 198 |   // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
 199 |   enum SolverMode {
 200 |     CPU = 0;
 201 |     GPU = 1;
 202 |   }
 203 |   optional SolverMode solver_mode = 17 [default = GPU];
 204 |   // the device_id will that be used in GPU mode. Use device_id = 0 in default.
 205 |   optional int32 device_id = 18 [default = 0];
 206 |   // If non-negative, the seed with which the Solver will initialize the Caffe
 207 |   // random number generator -- useful for reproducible results. Otherwise,
 208 |   // (and by default) initialize using a seed derived from the system clock.
 209 |   optional int64 random_seed = 20 [default = -1];
 210 | 
 211 |   // type of the solver
 212 |   optional string type = 40 [default = "SGD"];
 213 | 
 214 |   // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
 215 |   optional float delta = 31 [default = 1e-8];
 216 |   // parameters for the Adam solver
 217 |   optional float momentum2 = 39 [default = 0.999];
 218 | 
 219 |   // RMSProp decay value
 220 |   // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
 221 |   optional float rms_decay = 38 [default = 0.99];
 222 | 
 223 |   // If true, print information about the state of the net that may help with
 224 |   // debugging learning problems.
 225 |   optional bool debug_info = 23 [default = false];
 226 | 
 227 |   // If false, don't save a snapshot after training finishes.
 228 |   optional bool snapshot_after_train = 28 [default = true];
 229 | 
 230 |   // DEPRECATED: old solver enum types, use string instead
 231 |   enum SolverType {
 232 |     SGD = 0;
 233 |     NESTEROV = 1;
 234 |     ADAGRAD = 2;
 235 |     RMSPROP = 3;
 236 |     ADADELTA = 4;
 237 |     ADAM = 5;
 238 |   }
 239 |   // DEPRECATED: use type instead of solver_type
 240 |   optional SolverType solver_type = 30 [default = SGD];
 241 | 
 242 |   // Overlap compute and communication for data parallel training
 243 |   optional bool layer_wise_reduce = 41 [default = true];
 244 | }
 245 | 
 246 | // A message that stores the solver snapshots
 247 | message SolverState {
 248 |   optional int32 iter = 1; // The current iteration
 249 |   optional string learned_net = 2; // The file that stores the learned net.
 250 |   repeated BlobProto history = 3; // The history for sgd solvers
 251 |   optional int32 current_step = 4 [default = 0]; // The current step for learning rate
 252 | }
 253 | 
 254 | enum Phase {
 255 |    TRAIN = 0;
 256 |    TEST = 1;
 257 | }
 258 | 
 259 | message NetState {
 260 |   optional Phase phase = 1 [default = TEST];
 261 |   optional int32 level = 2 [default = 0];
 262 |   repeated string stage = 3;
 263 | }
 264 | 
 265 | message NetStateRule {
 266 |   // Set phase to require the NetState have a particular phase (TRAIN or TEST)
 267 |   // to meet this rule.
 268 |   optional Phase phase = 1;
 269 | 
 270 |   // Set the minimum and/or maximum levels in which the layer should be used.
 271 |   // Leave undefined to meet the rule regardless of level.
 272 |   optional int32 min_level = 2;
 273 |   optional int32 max_level = 3;
 274 | 
 275 |   // Customizable sets of stages to include or exclude.
 276 |   // The net must have ALL of the specified stages and NONE of the specified
 277 |   // "not_stage"s to meet the rule.
 278 |   // (Use multiple NetStateRules to specify conjunctions of stages.)
 279 |   repeated string stage = 4;
 280 |   repeated string not_stage = 5;
 281 | }
 282 | 
 283 | // Specifies training parameters (multipliers on global learning constants,
 284 | // and the name and other settings used for weight sharing).
 285 | message ParamSpec {
 286 |   // The names of the parameter blobs -- useful for sharing parameters among
 287 |   // layers, but never required otherwise.  To share a parameter between two
 288 |   // layers, give it a (non-empty) name.
 289 |   optional string name = 1;
 290 | 
 291 |   // Whether to require shared weights to have the same shape, or just the same
 292 |   // count -- defaults to STRICT if unspecified.
 293 |   optional DimCheckMode share_mode = 2;
 294 |   enum DimCheckMode {
 295 |     // STRICT (default) requires that num, channels, height, width each match.
 296 |     STRICT = 0;
 297 |     // PERMISSIVE requires only the count (num*channels*height*width) to match.
 298 |     PERMISSIVE = 1;
 299 |   }
 300 | 
 301 |   // The multiplier on the global learning rate for this parameter.
 302 |   optional float lr_mult = 3 [default = 1.0];
 303 | 
 304 |   // The multiplier on the global weight decay for this parameter.
 305 |   optional float decay_mult = 4 [default = 1.0];
 306 | }
 307 | 
 308 | // NOTE
 309 | // Update the next available ID when you add a new LayerParameter field.
 310 | //
 311 | // LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
 312 | message LayerParameter {
 313 |   optional string name = 1; // the layer name
 314 |   optional string type = 2; // the layer type
 315 |   repeated string bottom = 3; // the name of each bottom blob
 316 |   repeated string top = 4; // the name of each top blob
 317 | 
 318 |   // The train / test phase for computation.
 319 |   optional Phase phase = 10;
 320 | 
 321 |   // The amount of weight to assign each top blob in the objective.
 322 |   // Each layer assigns a default value, usually of either 0 or 1,
 323 |   // to each top blob.
 324 |   repeated float loss_weight = 5;
 325 | 
 326 |   // Specifies training parameters (multipliers on global learning constants,
 327 |   // and the name and other settings used for weight sharing).
 328 |   repeated ParamSpec param = 6;
 329 | 
 330 |   // The blobs containing the numeric parameters of the layer.
 331 |   repeated BlobProto blobs = 7;
 332 | 
 333 |   // Specifies whether to backpropagate to each bottom. If unspecified,
 334 |   // Caffe will automatically infer whether each input needs backpropagation
 335 |   // to compute parameter gradients. If set to true for some inputs,
 336 |   // backpropagation to those inputs is forced; if set false for some inputs,
 337 |   // backpropagation to those inputs is skipped.
 338 |   //
 339 |   // The size must be either 0 or equal to the number of bottoms.
 340 |   repeated bool propagate_down = 11;
 341 | 
 342 |   // Rules controlling whether and when a layer is included in the network,
 343 |   // based on the current NetState.  You may specify a non-zero number of rules
 344 |   // to include OR exclude, but not both.  If no include or exclude rules are
 345 |   // specified, the layer is always included.  If the current NetState meets
 346 |   // ANY (i.e., one or more) of the specified rules, the layer is
 347 |   // included/excluded.
 348 |   repeated NetStateRule include = 8;
 349 |   repeated NetStateRule exclude = 9;
 350 | 
 351 |   // Parameters for data pre-processing.
 352 |   optional TransformationParameter transform_param = 100;
 353 | 
 354 |   // Parameters shared by loss layers.
 355 |   optional LossParameter loss_param = 101;
 356 | 
 357 |   // Layer type-specific parameters.
 358 |   //
 359 |   // Note: certain layers may have more than one computational engine
 360 |   // for their implementation. These layers include an Engine type and
 361 |   // engine parameter for selecting the implementation.
 362 |   // The default for the engine is set by the ENGINE switch at compile-time.
 363 |   optional AccuracyParameter accuracy_param = 102;
 364 |   optional ArgMaxParameter argmax_param = 103;
 365 |   optional BatchNormParameter batch_norm_param = 139;
 366 |   optional BiasParameter bias_param = 141;
 367 |   optional ConcatParameter concat_param = 104;
 368 |   optional ContrastiveLossParameter contrastive_loss_param = 105;
 369 |   optional ConvolutionParameter convolution_param = 106;
 370 |   optional CropParameter crop_param = 144;
 371 |   optional DataParameter data_param = 107;
 372 |   optional DropoutParameter dropout_param = 108;
 373 |   optional DummyDataParameter dummy_data_param = 109;
 374 |   optional EltwiseParameter eltwise_param = 110;
 375 |   optional ELUParameter elu_param = 140;
 376 |   optional EmbedParameter embed_param = 137;
 377 |   optional ExpParameter exp_param = 111;
 378 |   optional FlattenParameter flatten_param = 135;
 379 |   optional HDF5DataParameter hdf5_data_param = 112;
 380 |   optional HDF5OutputParameter hdf5_output_param = 113;
 381 |   optional HingeLossParameter hinge_loss_param = 114;
 382 |   optional ImageDataParameter image_data_param = 115;
 383 |   optional InfogainLossParameter infogain_loss_param = 116;
 384 |   optional InnerProductParameter inner_product_param = 117;
 385 |   optional InputParameter input_param = 143;
 386 |   optional LogParameter log_param = 134;
 387 |   optional LRNParameter lrn_param = 118;
 388 |   optional MemoryDataParameter memory_data_param = 119;
 389 |   optional MVNParameter mvn_param = 120;
 390 |   optional ParameterParameter parameter_param = 145;
 391 |   optional PoolingParameter pooling_param = 121;
 392 |   optional PowerParameter power_param = 122;
 393 |   optional PReLUParameter prelu_param = 131;
 394 |   optional PythonParameter python_param = 130;
 395 |   optional RecurrentParameter recurrent_param = 146;
 396 |   optional ReductionParameter reduction_param = 136;
 397 |   optional ReLUParameter relu_param = 123;
 398 |   optional ReshapeParameter reshape_param = 133;
 399 |   optional ScaleParameter scale_param = 142;
 400 |   optional SigmoidParameter sigmoid_param = 124;
 401 |   optional SoftmaxParameter softmax_param = 125;
 402 |   optional SPPParameter spp_param = 132;
 403 |   optional SliceParameter slice_param = 126;
 404 |   optional TanHParameter tanh_param = 127;
 405 |   optional ThresholdParameter threshold_param = 128;
 406 |   optional TileParameter tile_param = 138;
 407 |   optional WindowDataParameter window_data_param = 129;
 408 | }
 409 | 
 410 | // Message that stores parameters used to apply transformation
 411 | // to the data layer's data
 412 | message TransformationParameter {
 413 |   // For data pre-processing, we can do simple scaling and subtracting the
 414 |   // data mean, if provided. Note that the mean subtraction is always carried
 415 |   // out before scaling.
 416 |   optional float scale = 1 [default = 1];
 417 |   // Specify if we want to randomly mirror data.
 418 |   optional bool mirror = 2 [default = false];
 419 |   // Specify if we would like to randomly crop an image.
 420 |   optional uint32 crop_size = 3 [default = 0];
 421 |   // mean_file and mean_value cannot be specified at the same time
 422 |   optional string mean_file = 4;
 423 |   // if specified can be repeated once (would subtract it from all the channels)
 424 |   // or can be repeated the same number of times as channels
 425 |   // (would subtract them from the corresponding channel)
 426 |   repeated float mean_value = 5;
 427 |   // Force the decoded image to have 3 color channels.
 428 |   optional bool force_color = 6 [default = false];
 429 |   // Force the decoded image to have 1 color channels.
 430 |   optional bool force_gray = 7 [default = false];
 431 |   
 432 |   // Begin Added by garylau for Image augmentation, 2017.11.30
 433 |   optional float apply_probability = 8 [default = 0.5];
 434 |   optional bool smooth_filtering = 9 [default = false];
 435 |   optional float max_smooth = 10 [default = 6];
 436 |   // Specify the angle for doing rotation
 437 |   optional uint32 max_rotation_angle = 11 [default = 0];
 438 |   // Specify the contrast, brightness, smooth and color shift for augmentation
 439 |   optional bool contrast_brightness_adjustment = 12 [default = false];
 440 |   optional float min_contrast = 13 [default = 0.8];
 441 |   optional float max_contrast = 14 [default = 1.2];
 442 |   optional uint32 max_brightness_shift = 15 [default = 5];
 443 |   optional uint32 max_color_shift = 16 [default = 0];  
 444 |   // Min side resizing, keep aspect ratio
 445 |   optional uint32 min_side_min = 17 [default = 0];
 446 |   optional uint32 min_side_max = 18 [default = 0];
 447 |   optional uint32 min_side = 19 [default = 0];
 448 |   // affine transformation
 449 |   optional float affine_min_scale = 20 [default = 0];
 450 |   optional float affine_max_scale = 21 [default = 0];
 451 |   optional bool debug_params = 22 [default = false];
 452 |   // End Added by garylau for Image augmentation, 2017.11.30
 453 | }
 454 | 
 455 | // Message that stores parameters shared by loss layers
 456 | message LossParameter {
 457 |   // If specified, ignore instances with the given label.
 458 |   optional int32 ignore_label = 1;
 459 |   // How to normalize the loss for loss layers that aggregate across batches,
 460 |   // spatial dimensions, or other dimensions.  Currently only implemented in
 461 |   // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers.
 462 |   enum NormalizationMode {
 463 |     // Divide by the number of examples in the batch times spatial dimensions.
 464 |     // Outputs that receive the ignore label will NOT be ignored in computing
 465 |     // the normalization factor.
 466 |     FULL = 0;
 467 |     // Divide by the total number of output locations that do not take the
 468 |     // ignore_label.  If ignore_label is not set, this behaves like FULL.
 469 |     VALID = 1;
 470 |     // Divide by the batch size.
 471 |     BATCH_SIZE = 2;
 472 |     // Do not normalize the loss.
 473 |     NONE = 3;
 474 |   }
 475 |   // For historical reasons, the default normalization for
 476 |   // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID.
 477 |   optional NormalizationMode normalization = 3 [default = VALID];
 478 |   // Deprecated.  Ignored if normalization is specified.  If normalization
 479 |   // is not specified, then setting this to false will be equivalent to
 480 |   // normalization = BATCH_SIZE to be consistent with previous behavior.
 481 |   optional bool normalize = 2;
 482 | }
 483 | 
 484 | // Messages that store parameters used by individual layer types follow, in
 485 | // alphabetical order.
 486 | 
 487 | message AccuracyParameter {
 488 |   // When computing accuracy, count as correct by comparing the true label to
 489 |   // the top k scoring classes.  By default, only compare to the top scoring
 490 |   // class (i.e. argmax).
 491 |   optional uint32 top_k = 1 [default = 1];
 492 | 
 493 |   // The "label" axis of the prediction blob, whose argmax corresponds to the
 494 |   // predicted label -- may be negative to index from the end (e.g., -1 for the
 495 |   // last axis).  For example, if axis == 1 and the predictions are
 496 |   // (N x C x H x W), the label blob is expected to contain N*H*W ground truth
 497 |   // labels with integer values in {0, 1, ..., C-1}.
 498 |   optional int32 axis = 2 [default = 1];
 499 | 
 500 |   // If specified, ignore instances with the given label.
 501 |   optional int32 ignore_label = 3;
 502 | }
 503 | 
 504 | message ArgMaxParameter {
 505 |   // If true produce pairs (argmax, maxval)
 506 |   optional bool out_max_val = 1 [default = false];
 507 |   optional uint32 top_k = 2 [default = 1];
 508 |   // The axis along which to maximise -- may be negative to index from the
 509 |   // end (e.g., -1 for the last axis).
 510 |   // By default ArgMaxLayer maximizes over the flattened trailing dimensions
 511 |   // for each index of the first / num dimension.
 512 |   optional int32 axis = 3;
 513 | }
 514 | 
 515 | message ConcatParameter {
 516 |   // The axis along which to concatenate -- may be negative to index from the
 517 |   // end (e.g., -1 for the last axis).  Other axes must have the
 518 |   // same dimension for all the bottom blobs.
 519 |   // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
 520 |   optional int32 axis = 2 [default = 1];
 521 | 
 522 |   // DEPRECATED: alias for "axis" -- does not support negative indexing.
 523 |   optional uint32 concat_dim = 1 [default = 1];
 524 | }
 525 | 
 526 | message BatchNormParameter {
 527 |   // If false, normalization is performed over the current mini-batch
 528 |   // and global statistics are accumulated (but not yet used) by a moving
 529 |   // average.
 530 |   // If true, those accumulated mean and variance values are used for the
 531 |   // normalization.
 532 |   // By default, it is set to false when the network is in the training
 533 |   // phase and true when the network is in the testing phase.
 534 |   optional bool use_global_stats = 1;
 535 |   // What fraction of the moving average remains each iteration?
 536 |   // Smaller values make the moving average decay faster, giving more
 537 |   // weight to the recent values.
 538 |   // Each iteration updates the moving average @f$S_{t-1}@f$ with the
 539 |   // current mean @f$ Y_t @f$ by
 540 |   // @f$ S_t = (1-\beta)Y_t + \beta \cdot S_{t-1} @f$, where @f$ \beta @f$
 541 |   // is the moving_average_fraction parameter.
 542 |   optional float moving_average_fraction = 2 [default = .999];
 543 |   // Small value to add to the variance estimate so that we don't divide by
 544 |   // zero.
 545 |   optional float eps = 3 [default = 1e-5];
 546 | }
 547 | 
 548 | message BiasParameter {
 549 |   // The first axis of bottom[0] (the first input Blob) along which to apply
 550 |   // bottom[1] (the second input Blob).  May be negative to index from the end
 551 |   // (e.g., -1 for the last axis).
 552 |   //
 553 |   // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
 554 |   // top[0] will have the same shape, and bottom[1] may have any of the
 555 |   // following shapes (for the given value of axis):
 556 |   //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
 557 |   //    (axis == 1 == -3)          3;     3x40;     3x40x60
 558 |   //    (axis == 2 == -2)                   40;       40x60
 559 |   //    (axis == 3 == -1)                                60
 560 |   // Furthermore, bottom[1] may have the empty shape (regardless of the value of
 561 |   // "axis") -- a scalar bias.
 562 |   optional int32 axis = 1 [default = 1];
 563 | 
 564 |   // (num_axes is ignored unless just one bottom is given and the bias is
 565 |   // a learned parameter of the layer.  Otherwise, num_axes is determined by the
 566 |   // number of axes by the second bottom.)
 567 |   // The number of axes of the input (bottom[0]) covered by the bias
 568 |   // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
 569 |   // Set num_axes := 0, to add a zero-axis Blob: a scalar.
 570 |   optional int32 num_axes = 2 [default = 1];
 571 | 
 572 |   // (filler is ignored unless just one bottom is given and the bias is
 573 |   // a learned parameter of the layer.)
 574 |   // The initialization for the learned bias parameter.
 575 |   // Default is the zero (0) initialization, resulting in the BiasLayer
 576 |   // initially performing the identity operation.
 577 |   optional FillerParameter filler = 3;
 578 | }
 579 | 
 580 | message ContrastiveLossParameter {
 581 |   // margin for dissimilar pair
 582 |   optional float margin = 1 [default = 1.0];
 583 |   // The first implementation of this cost did not exactly match the cost of
 584 |   // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
 585 |   // legacy_version = false (the default) uses (margin - d)^2 as proposed in the
 586 |   // Hadsell paper. New models should probably use this version.
 587 |   // legacy_version = true uses (margin - d^2). This is kept to support /
 588 |   // reproduce existing models and results
 589 |   optional bool legacy_version = 2 [default = false];
 590 | }
 591 | 
 592 | message ConvolutionParameter {
 593 |   optional uint32 num_output = 1; // The number of outputs for the layer
 594 |   optional bool bias_term = 2 [default = true]; // whether to have bias terms
 595 | 
 596 |   // Pad, kernel size, and stride are all given as a single value for equal
 597 |   // dimensions in all spatial dimensions, or once per spatial dimension.
 598 |   repeated uint32 pad = 3; // The padding size; defaults to 0
 599 |   repeated uint32 kernel_size = 4; // The kernel size
 600 |   repeated uint32 stride = 6; // The stride; defaults to 1
 601 |   // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
 602 |   // holes. (Kernel dilation is sometimes referred to by its use in the
 603 |   // algorithme à trous from Holschneider et al. 1987.)
 604 |   repeated uint32 dilation = 18; // The dilation; defaults to 1
 605 | 
 606 |   // For 2D convolution only, the *_h and *_w versions may also be used to
 607 |   // specify both spatial dimensions.
 608 |   optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
 609 |   optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
 610 |   optional uint32 kernel_h = 11; // The kernel height (2D only)
 611 |   optional uint32 kernel_w = 12; // The kernel width (2D only)
 612 |   optional uint32 stride_h = 13; // The stride height (2D only)
 613 |   optional uint32 stride_w = 14; // The stride width (2D only)
 614 | 
 615 |   optional uint32 group = 5 [default = 1]; // The group size for group conv
 616 | 
 617 |   optional FillerParameter weight_filler = 7; // The filler for the weight
 618 |   optional FillerParameter bias_filler = 8; // The filler for the bias
 619 |   enum Engine {
 620 |     DEFAULT = 0;
 621 |     CAFFE = 1;
 622 |     CUDNN = 2;
 623 |   }
 624 |   optional Engine engine = 15 [default = DEFAULT];
 625 | 
 626 |   // The axis to interpret as "channels" when performing convolution.
 627 |   // Preceding dimensions are treated as independent inputs;
 628 |   // succeeding dimensions are treated as "spatial".
 629 |   // With (N, C, H, W) inputs, and axis == 1 (the default), we perform
 630 |   // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
 631 |   // groups g>1) filters across the spatial axes (H, W) of the input.
 632 |   // With (N, C, D, H, W) inputs, and axis == 1, we perform
 633 |   // N independent 3D convolutions, sliding (C/g)-channels
 634 |   // filters across the spatial axes (D, H, W) of the input.
 635 |   optional int32 axis = 16 [default = 1];
 636 | 
 637 |   // Whether to force use of the general ND convolution, even if a specific
 638 |   // implementation for blobs of the appropriate number of spatial dimensions
 639 |   // is available. (Currently, there is only a 2D-specific convolution
 640 |   // implementation; for input blobs with num_axes != 2, this option is
 641 |   // ignored and the ND implementation will be used.)
 642 |   optional bool force_nd_im2col = 17 [default = false];
 643 | }
 644 | 
 645 | message CropParameter {
 646 |   // To crop, elements of the first bottom are selected to fit the dimensions
 647 |   // of the second, reference bottom. The crop is configured by
 648 |   // - the crop `axis` to pick the dimensions for cropping
 649 |   // - the crop `offset` to set the shift for all/each dimension
 650 |   // to align the cropped bottom with the reference bottom.
 651 |   // All dimensions up to but excluding `axis` are preserved, while
 652 |   // the dimensions including and trailing `axis` are cropped.
 653 |   // If only one `offset` is set, then all dimensions are offset by this amount.
 654 |   // Otherwise, the number of offsets must equal the number of cropped axes to
 655 |   // shift the crop in each dimension accordingly.
 656 |   // Note: standard dimensions are N,C,H,W so the default is a spatial crop,
 657 |   // and `axis` may be negative to index from the end (e.g., -1 for the last
 658 |   // axis).
 659 |   optional int32 axis = 1 [default = 2];
 660 |   repeated uint32 offset = 2;
 661 | }
 662 | 
 663 | message DataParameter {
 664 |   enum DB {
 665 |     LEVELDB = 0;
 666 |     LMDB = 1;
 667 |   }
 668 |   // Specify the data source.
 669 |   optional string source = 1;
 670 |   // Specify the batch size.
 671 |   optional uint32 batch_size = 4;
 672 |   // The rand_skip variable is for the data layer to skip a few data points
 673 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
 674 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
 675 |   // be larger than the number of keys in the database.
 676 |   // DEPRECATED. Each solver accesses a different subset of the database.
 677 |   optional uint32 rand_skip = 7 [default = 0];
 678 |   optional DB backend = 8 [default = LEVELDB];
 679 |   // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
 680 |   // simple scaling and subtracting the data mean, if provided. Note that the
 681 |   // mean subtraction is always carried out before scaling.
 682 |   optional float scale = 2 [default = 1];
 683 |   optional string mean_file = 3;
 684 |   // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
 685 |   // crop an image.
 686 |   optional uint32 crop_size = 5 [default = 0];
 687 |   // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
 688 |   // data.
 689 |   optional bool mirror = 6 [default = false];
 690 |   // Force the encoded image to have 3 color channels
 691 |   optional bool force_encoded_color = 9 [default = false];
 692 |   // Prefetch queue (Increase if data feeding bandwidth varies, within the
 693 |   // limit of device memory for GPU training)
 694 |   optional uint32 prefetch = 10 [default = 4];
 695 | }
 696 | 
 697 | message DropoutParameter {
 698 |   optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
 699 | }
 700 | 
 701 | // DummyDataLayer fills any number of arbitrarily shaped blobs with random
 702 | // (or constant) data generated by "Fillers" (see "message FillerParameter").
 703 | message DummyDataParameter {
 704 |   // This layer produces N >= 1 top blobs.  DummyDataParameter must specify 1 or N
 705 |   // shape fields, and 0, 1 or N data_fillers.
 706 |   //
 707 |   // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
 708 |   // If 1 data_filler is specified, it is applied to all top blobs.  If N are
 709 |   // specified, the ith is applied to the ith top blob.
 710 |   repeated FillerParameter data_filler = 1;
 711 |   repeated BlobShape shape = 6;
 712 | 
 713 |   // 4D dimensions -- deprecated.  Use "shape" instead.
 714 |   repeated uint32 num = 2;
 715 |   repeated uint32 channels = 3;
 716 |   repeated uint32 height = 4;
 717 |   repeated uint32 width = 5;
 718 | }
 719 | 
 720 | message EltwiseParameter {
 721 |   enum EltwiseOp {
 722 |     PROD = 0;
 723 |     SUM = 1;
 724 |     MAX = 2;
 725 |   }
 726 |   optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
 727 |   repeated float coeff = 2; // blob-wise coefficient for SUM operation
 728 | 
 729 |   // Whether to use an asymptotically slower (for >2 inputs) but stabler method
 730 |   // of computing the gradient for the PROD operation. (No effect for SUM op.)
 731 |   optional bool stable_prod_grad = 3 [default = true];
 732 | }
 733 | 
 734 | // Message that stores parameters used by ELULayer
 735 | message ELUParameter {
 736 |   // Described in:
 737 |   // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
 738 |   // Deep Network Learning by Exponential Linear Units (ELUs). arXiv
 739 |   optional float alpha = 1 [default = 1];
 740 | }
 741 | 
 742 | // Message that stores parameters used by EmbedLayer
 743 | message EmbedParameter {
 744 |   optional uint32 num_output = 1; // The number of outputs for the layer
 745 |   // The input is given as integers to be interpreted as one-hot
 746 |   // vector indices with dimension num_input.  Hence num_input should be
 747 |   // 1 greater than the maximum possible input value.
 748 |   optional uint32 input_dim = 2;
 749 | 
 750 |   optional bool bias_term = 3 [default = true]; // Whether to use a bias term
 751 |   optional FillerParameter weight_filler = 4; // The filler for the weight
 752 |   optional FillerParameter bias_filler = 5; // The filler for the bias
 753 | 
 754 | }
 755 | 
 756 | // Message that stores parameters used by ExpLayer
 757 | message ExpParameter {
 758 |   // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
 759 |   // Or if base is set to the default (-1), base is set to e,
 760 |   // so y = exp(shift + scale * x).
 761 |   optional float base = 1 [default = -1.0];
 762 |   optional float scale = 2 [default = 1.0];
 763 |   optional float shift = 3 [default = 0.0];
 764 | }
 765 | 
 766 | /// Message that stores parameters used by FlattenLayer
 767 | message FlattenParameter {
 768 |   // The first axis to flatten: all preceding axes are retained in the output.
 769 |   // May be negative to index from the end (e.g., -1 for the last axis).
 770 |   optional int32 axis = 1 [default = 1];
 771 | 
 772 |   // The last axis to flatten: all following axes are retained in the output.
 773 |   // May be negative to index from the end (e.g., the default -1 for the last
 774 |   // axis).
 775 |   optional int32 end_axis = 2 [default = -1];
 776 | }
 777 | 
 778 | // Message that stores parameters used by HDF5DataLayer
 779 | message HDF5DataParameter {
 780 |   // Specify the data source.
 781 |   optional string source = 1;
 782 |   // Specify the batch size.
 783 |   optional uint32 batch_size = 2;
 784 | 
 785 |   // Specify whether to shuffle the data.
 786 |   // If shuffle == true, the ordering of the HDF5 files is shuffled,
 787 |   // and the ordering of data within any given HDF5 file is shuffled,
 788 |   // but data between different files are not interleaved; all of a file's
 789 |   // data are output (in a random order) before moving onto another file.
 790 |   optional bool shuffle = 3 [default = false];
 791 | }
 792 | 
 793 | message HDF5OutputParameter {
 794 |   optional string file_name = 1;
 795 | }
 796 | 
 797 | message HingeLossParameter {
 798 |   enum Norm {
 799 |     L1 = 1;
 800 |     L2 = 2;
 801 |   }
 802 |   // Specify the Norm to use L1 or L2
 803 |   optional Norm norm = 1 [default = L1];
 804 | }
 805 | 
 806 | message ImageDataParameter {
 807 |   // Specify the data source.
 808 |   optional string source = 1;
 809 |   // Specify the batch size.
 810 |   optional uint32 batch_size = 4 [default = 1];
 811 |   // The rand_skip variable is for the data layer to skip a few data points
 812 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
 813 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
 814 |   // be larger than the number of keys in the database.
 815 |   optional uint32 rand_skip = 7 [default = 0];
 816 |   // Whether or not ImageLayer should shuffle the list of files at every epoch.
 817 |   optional bool shuffle = 8 [default = false];
 818 |   // It will also resize images if new_height or new_width are not zero.
 819 |   optional uint32 new_height = 9 [default = 0];
 820 |   optional uint32 new_width = 10 [default = 0];
 821 |   // Specify if the images are color or gray
 822 |   optional bool is_color = 11 [default = true];
 823 |   // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
 824 |   // simple scaling and subtracting the data mean, if provided. Note that the
 825 |   // mean subtraction is always carried out before scaling.
 826 |   optional float scale = 2 [default = 1];
 827 |   optional string mean_file = 3;
 828 |   // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
 829 |   // crop an image.
 830 |   optional uint32 crop_size = 5 [default = 0];
 831 |   // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
 832 |   // data.
 833 |   optional bool mirror = 6 [default = false];
 834 |   optional string root_folder = 12 [default = ""];
 835 | }
 836 | 
 837 | message InfogainLossParameter {
 838 |   // Specify the infogain matrix source.
 839 |   optional string source = 1;
 840 |   optional int32 axis = 2 [default = 1]; // axis of prob
 841 | }
 842 | 
 843 | message InnerProductParameter {
 844 |   optional uint32 num_output = 1; // The number of outputs for the layer
 845 |   optional bool bias_term = 2 [default = true]; // whether to have bias terms
 846 |   optional FillerParameter weight_filler = 3; // The filler for the weight
 847 |   optional FillerParameter bias_filler = 4; // The filler for the bias
 848 | 
 849 |   // The first axis to be lumped into a single inner product computation;
 850 |   // all preceding axes are retained in the output.
 851 |   // May be negative to index from the end (e.g., -1 for the last axis).
 852 |   optional int32 axis = 5 [default = 1];
 853 |   // Specify whether to transpose the weight matrix or not.
 854 |   // If transpose == true, any operations will be performed on the transpose
 855 |   // of the weight matrix. The weight matrix itself is not going to be transposed
 856 |   // but rather the transfer flag of operations will be toggled accordingly.
 857 |   optional bool transpose = 6 [default = false];
 858 | }
 859 | 
 860 | message InputParameter {
 861 |   // This layer produces N >= 1 top blob(s) to be assigned manually.
 862 |   // Define N shapes to set a shape for each top.
 863 |   // Define 1 shape to set the same shape for every top.
 864 |   // Define no shape to defer to reshaping manually.
 865 |   repeated BlobShape shape = 1;
 866 | }
 867 | 
 868 | // Message that stores parameters used by LogLayer
 869 | message LogParameter {
 870 |   // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
 871 |   // Or if base is set to the default (-1), base is set to e,
 872 |   // so y = ln(shift + scale * x) = log_e(shift + scale * x)
 873 |   optional float base = 1 [default = -1.0];
 874 |   optional float scale = 2 [default = 1.0];
 875 |   optional float shift = 3 [default = 0.0];
 876 | }
 877 | 
 878 | // Message that stores parameters used by LRNLayer
 879 | message LRNParameter {
 880 |   optional uint32 local_size = 1 [default = 5];
 881 |   optional float alpha = 2 [default = 1.];
 882 |   optional float beta = 3 [default = 0.75];
 883 |   enum NormRegion {
 884 |     ACROSS_CHANNELS = 0;
 885 |     WITHIN_CHANNEL = 1;
 886 |   }
 887 |   optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
 888 |   optional float k = 5 [default = 1.];
 889 |   enum Engine {
 890 |     DEFAULT = 0;
 891 |     CAFFE = 1;
 892 |     CUDNN = 2;
 893 |   }
 894 |   optional Engine engine = 6 [default = DEFAULT];
 895 | }
 896 | 
 897 | message MemoryDataParameter {
 898 |   optional uint32 batch_size = 1;
 899 |   optional uint32 channels = 2;
 900 |   optional uint32 height = 3;
 901 |   optional uint32 width = 4;
 902 | }
 903 | 
 904 | message MVNParameter {
 905 |   // This parameter can be set to false to normalize mean only
 906 |   optional bool normalize_variance = 1 [default = true];
 907 | 
 908 |   // This parameter can be set to true to perform DNN-like MVN
 909 |   optional bool across_channels = 2 [default = false];
 910 | 
 911 |   // Epsilon for not dividing by zero while normalizing variance
 912 |   optional float eps = 3 [default = 1e-9];
 913 | }
 914 | 
 915 | message ParameterParameter {
 916 |   optional BlobShape shape = 1;
 917 | }
 918 | 
 919 | message PoolingParameter {
 920 |   enum PoolMethod {
 921 |     MAX = 0;
 922 |     AVE = 1;
 923 |     STOCHASTIC = 2;
 924 |   }
 925 |   optional PoolMethod pool = 1 [default = MAX]; // The pooling method
 926 |   // Pad, kernel size, and stride are all given as a single value for equal
 927 |   // dimensions in height and width or as Y, X pairs.
 928 |   optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
 929 |   optional uint32 pad_h = 9 [default = 0]; // The padding height
 930 |   optional uint32 pad_w = 10 [default = 0]; // The padding width
 931 |   optional uint32 kernel_size = 2; // The kernel size (square)
 932 |   optional uint32 kernel_h = 5; // The kernel height
 933 |   optional uint32 kernel_w = 6; // The kernel width
 934 |   optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
 935 |   optional uint32 stride_h = 7; // The stride height
 936 |   optional uint32 stride_w = 8; // The stride width
 937 |   enum Engine {
 938 |     DEFAULT = 0;
 939 |     CAFFE = 1;
 940 |     CUDNN = 2;
 941 |   }
 942 |   optional Engine engine = 11 [default = DEFAULT];
 943 |   // If global_pooling then it will pool over the size of the bottom by doing
 944 |   // kernel_h = bottom->height and kernel_w = bottom->width
 945 |   optional bool global_pooling = 12 [default = false];
 946 | }
 947 | 
 948 | message PowerParameter {
 949 |   // PowerLayer computes outputs y = (shift + scale * x) ^ power.
 950 |   optional float power = 1 [default = 1.0];
 951 |   optional float scale = 2 [default = 1.0];
 952 |   optional float shift = 3 [default = 0.0];
 953 | }
 954 | 
 955 | message PythonParameter {
 956 |   optional string module = 1;
 957 |   optional string layer = 2;
 958 |   // This value is set to the attribute `param_str` of the `PythonLayer` object
 959 |   // in Python before calling the `setup()` method. This could be a number,
 960 |   // string, dictionary in Python dict format, JSON, etc. You may parse this
 961 |   // string in `setup` method and use it in `forward` and `backward`.
 962 |   optional string param_str = 3 [default = ''];
 963 |   // DEPRECATED
 964 |   optional bool share_in_parallel = 4 [default = false];
 965 | }
 966 | 
 967 | // Message that stores parameters used by RecurrentLayer
 968 | message RecurrentParameter {
 969 |   // The dimension of the output (and usually hidden state) representation --
 970 |   // must be explicitly set to non-zero.
 971 |   optional uint32 num_output = 1 [default = 0];
 972 | 
 973 |   optional FillerParameter weight_filler = 2; // The filler for the weight
 974 |   optional FillerParameter bias_filler = 3; // The filler for the bias
 975 | 
 976 |   // Whether to enable displaying debug_info in the unrolled recurrent net.
 977 |   optional bool debug_info = 4 [default = false];
 978 | 
 979 |   // Whether to add as additional inputs (bottoms) the initial hidden state
 980 |   // blobs, and add as additional outputs (tops) the final timestep hidden state
 981 |   // blobs.  The number of additional bottom/top blobs required depends on the
 982 |   // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs.
 983 |   optional bool expose_hidden = 5 [default = false];
 984 | }
 985 | 
 986 | // Message that stores parameters used by ReductionLayer
 987 | message ReductionParameter {
 988 |   enum ReductionOp {
 989 |     SUM = 1;
 990 |     ASUM = 2;
 991 |     SUMSQ = 3;
 992 |     MEAN = 4;
 993 |   }
 994 | 
 995 |   optional ReductionOp operation = 1 [default = SUM]; // reduction operation
 996 | 
 997 |   // The first axis to reduce to a scalar -- may be negative to index from the
 998 |   // end (e.g., -1 for the last axis).
 999 |   // (Currently, only reduction along ALL "tail" axes is supported; reduction
1000 |   // of axis M through N, where N < num_axes - 1, is unsupported.)
1001 |   // Suppose we have an n-axis bottom Blob with shape:
1002 |   //     (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
1003 |   // If axis == m, the output Blob will have shape
1004 |   //     (d0, d1, d2, ..., d(m-1)),
1005 |   // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
1006 |   // times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
1007 |   // If axis == 0 (the default), the output Blob always has the empty shape
1008 |   // (count 1), performing reduction across the entire input --
1009 |   // often useful for creating new loss functions.
1010 |   optional int32 axis = 2 [default = 0];
1011 | 
1012 |   optional float coeff = 3 [default = 1.0]; // coefficient for output
1013 | }
1014 | 
1015 | // Message that stores parameters used by ReLULayer
1016 | message ReLUParameter {
1017 |   // Allow non-zero slope for negative inputs to speed up optimization
1018 |   // Described in:
1019 |   // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
1020 |   // improve neural network acoustic models. In ICML Workshop on Deep Learning
1021 |   // for Audio, Speech, and Language Processing.
1022 |   optional float negative_slope = 1 [default = 0];
1023 |   enum Engine {
1024 |     DEFAULT = 0;
1025 |     CAFFE = 1;
1026 |     CUDNN = 2;
1027 |   }
1028 |   optional Engine engine = 2 [default = DEFAULT];
1029 | }
1030 | 
1031 | message ReshapeParameter {
1032 |   // Specify the output dimensions. If some of the dimensions are set to 0,
1033 |   // the corresponding dimension from the bottom layer is used (unchanged).
1034 |   // Exactly one dimension may be set to -1, in which case its value is
1035 |   // inferred from the count of the bottom blob and the remaining dimensions.
1036 |   // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
1037 |   //
1038 |   //   layer {
1039 |   //     type: "Reshape" bottom: "input" top: "output"
1040 |   //     reshape_param { ... }
1041 |   //   }
1042 |   //
1043 |   // If "input" is 2D with shape 2 x 8, then the following reshape_param
1044 |   // specifications are all equivalent, producing a 3D blob "output" with shape
1045 |   // 2 x 2 x 4:
1046 |   //
1047 |   //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
1048 |   //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
1049 |   //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
1050 |   //   reshape_param { shape { dim:  0  dim:-1  dim:  4 } }
1051 |   //
1052 |   optional BlobShape shape = 1;
1053 | 
1054 |   // axis and num_axes control the portion of the bottom blob's shape that are
1055 |   // replaced by (included in) the reshape. By default (axis == 0 and
1056 |   // num_axes == -1), the entire bottom blob shape is included in the reshape,
1057 |   // and hence the shape field must specify the entire output shape.
1058 |   //
1059 |   // axis may be non-zero to retain some portion of the beginning of the input
1060 |   // shape (and may be negative to index from the end; e.g., -1 to begin the
1061 |   // reshape after the last axis, including nothing in the reshape,
1062 |   // -2 to include only the last axis, etc.).
1063 |   //
1064 |   // For example, suppose "input" is a 2D blob with shape 2 x 8.
1065 |   // Then the following ReshapeLayer specifications are all equivalent,
1066 |   // producing a blob "output" with shape 2 x 2 x 4:
1067 |   //
1068 |   //   reshape_param { shape { dim: 2  dim: 2  dim: 4 } }
1069 |   //   reshape_param { shape { dim: 2  dim: 4 } axis:  1 }
1070 |   //   reshape_param { shape { dim: 2  dim: 4 } axis: -3 }
1071 |   //
1072 |   // num_axes specifies the extent of the reshape.
1073 |   // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
1074 |   // input axes in the range [axis, axis+num_axes].
1075 |   // num_axes may also be -1, the default, to include all remaining axes
1076 |   // (starting from axis).
1077 |   //
1078 |   // For example, suppose "input" is a 2D blob with shape 2 x 8.
1079 |   // Then the following ReshapeLayer specifications are equivalent,
1080 |   // producing a blob "output" with shape 1 x 2 x 8.
1081 |   //
1082 |   //   reshape_param { shape { dim:  1  dim: 2  dim:  8 } }
1083 |   //   reshape_param { shape { dim:  1  dim: 2  }  num_axes: 1 }
1084 |   //   reshape_param { shape { dim:  1  }  num_axes: 0 }
1085 |   //
1086 |   // On the other hand, these would produce output blob shape 2 x 1 x 8:
1087 |   //
1088 |   //   reshape_param { shape { dim: 2  dim: 1  dim: 8  }  }
1089 |   //   reshape_param { shape { dim: 1 }  axis: 1  num_axes: 0 }
1090 |   //
1091 |   optional int32 axis = 2 [default = 0];
1092 |   optional int32 num_axes = 3 [default = -1];
1093 | }
1094 | 
1095 | message ScaleParameter {
1096 |   // The first axis of bottom[0] (the first input Blob) along which to apply
1097 |   // bottom[1] (the second input Blob).  May be negative to index from the end
1098 |   // (e.g., -1 for the last axis).
1099 |   //
1100 |   // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
1101 |   // top[0] will have the same shape, and bottom[1] may have any of the
1102 |   // following shapes (for the given value of axis):
1103 |   //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
1104 |   //    (axis == 1 == -3)          3;     3x40;     3x40x60
1105 |   //    (axis == 2 == -2)                   40;       40x60
1106 |   //    (axis == 3 == -1)                                60
1107 |   // Furthermore, bottom[1] may have the empty shape (regardless of the value of
1108 |   // "axis") -- a scalar multiplier.
1109 |   optional int32 axis = 1 [default = 1];
1110 | 
1111 |   // (num_axes is ignored unless just one bottom is given and the scale is
1112 |   // a learned parameter of the layer.  Otherwise, num_axes is determined by the
1113 |   // number of axes by the second bottom.)
1114 |   // The number of axes of the input (bottom[0]) covered by the scale
1115 |   // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
1116 |   // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
1117 |   optional int32 num_axes = 2 [default = 1];
1118 | 
1119 |   // (filler is ignored unless just one bottom is given and the scale is
1120 |   // a learned parameter of the layer.)
1121 |   // The initialization for the learned scale parameter.
1122 |   // Default is the unit (1) initialization, resulting in the ScaleLayer
1123 |   // initially performing the identity operation.
1124 |   optional FillerParameter filler = 3;
1125 | 
1126 |   // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
1127 |   // may be more efficient).  Initialized with bias_filler (defaults to 0).
1128 |   optional bool bias_term = 4 [default = false];
1129 |   optional FillerParameter bias_filler = 5;
1130 | }
1131 | 
1132 | message SigmoidParameter {
1133 |   enum Engine {
1134 |     DEFAULT = 0;
1135 |     CAFFE = 1;
1136 |     CUDNN = 2;
1137 |   }
1138 |   optional Engine engine = 1 [default = DEFAULT];
1139 | }
1140 | 
1141 | message SliceParameter {
1142 |   // The axis along which to slice -- may be negative to index from the end
1143 |   // (e.g., -1 for the last axis).
1144 |   // By default, SliceLayer concatenates blobs along the "channels" axis (1).
1145 |   optional int32 axis = 3 [default = 1];
1146 |   repeated uint32 slice_point = 2;
1147 | 
1148 |   // DEPRECATED: alias for "axis" -- does not support negative indexing.
1149 |   optional uint32 slice_dim = 1 [default = 1];
1150 | }
1151 | 
1152 | // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
1153 | message SoftmaxParameter {
1154 |   enum Engine {
1155 |     DEFAULT = 0;
1156 |     CAFFE = 1;
1157 |     CUDNN = 2;
1158 |   }
1159 |   optional Engine engine = 1 [default = DEFAULT];
1160 | 
1161 |   // The axis along which to perform the softmax -- may be negative to index
1162 |   // from the end (e.g., -1 for the last axis).
1163 |   // Any other axes will be evaluated as independent softmaxes.
1164 |   optional int32 axis = 2 [default = 1];
1165 | }
1166 | 
1167 | message TanHParameter {
1168 |   enum Engine {
1169 |     DEFAULT = 0;
1170 |     CAFFE = 1;
1171 |     CUDNN = 2;
1172 |   }
1173 |   optional Engine engine = 1 [default = DEFAULT];
1174 | }
1175 | 
1176 | // Message that stores parameters used by TileLayer
1177 | message TileParameter {
1178 |   // The index of the axis to tile.
1179 |   optional int32 axis = 1 [default = 1];
1180 | 
1181 |   // The number of copies (tiles) of the blob to output.
1182 |   optional int32 tiles = 2;
1183 | }
1184 | 
1185 | // Message that stores parameters used by ThresholdLayer
1186 | message ThresholdParameter {
1187 |   optional float threshold = 1 [default = 0]; // Strictly positive values
1188 | }
1189 | 
1190 | message WindowDataParameter {
1191 |   // Specify the data source.
1192 |   optional string source = 1;
1193 |   // For data pre-processing, we can do simple scaling and subtracting the
1194 |   // data mean, if provided. Note that the mean subtraction is always carried
1195 |   // out before scaling.
1196 |   optional float scale = 2 [default = 1];
1197 |   optional string mean_file = 3;
1198 |   // Specify the batch size.
1199 |   optional uint32 batch_size = 4;
1200 |   // Specify if we would like to randomly crop an image.
1201 |   optional uint32 crop_size = 5 [default = 0];
1202 |   // Specify if we want to randomly mirror data.
1203 |   optional bool mirror = 6 [default = false];
1204 |   // Foreground (object) overlap threshold
1205 |   optional float fg_threshold = 7 [default = 0.5];
1206 |   // Background (non-object) overlap threshold
1207 |   optional float bg_threshold = 8 [default = 0.5];
1208 |   // Fraction of batch that should be foreground objects
1209 |   optional float fg_fraction = 9 [default = 0.25];
1210 |   // Amount of contextual padding to add around a window
1211 |   // (used only by the window_data_layer)
1212 |   optional uint32 context_pad = 10 [default = 0];
1213 |   // Mode for cropping out a detection window
1214 |   // warp: cropped window is warped to a fixed size and aspect ratio
1215 |   // square: the tightest square around the window is cropped
1216 |   optional string crop_mode = 11 [default = "warp"];
1217 |   // cache_images: will load all images in memory for faster access
1218 |   optional bool cache_images = 12 [default = false];
1219 |   // append root_folder to locate images
1220 |   optional string root_folder = 13 [default = ""];
1221 | }
1222 | 
1223 | message SPPParameter {
1224 |   enum PoolMethod {
1225 |     MAX = 0;
1226 |     AVE = 1;
1227 |     STOCHASTIC = 2;
1228 |   }
1229 |   optional uint32 pyramid_height = 1;
1230 |   optional PoolMethod pool = 2 [default = MAX]; // The pooling method
1231 |   enum Engine {
1232 |     DEFAULT = 0;
1233 |     CAFFE = 1;
1234 |     CUDNN = 2;
1235 |   }
1236 |   optional Engine engine = 6 [default = DEFAULT];
1237 | }
1238 | 
1239 | // DEPRECATED: use LayerParameter.
1240 | message V1LayerParameter {
1241 |   repeated string bottom = 2;
1242 |   repeated string top = 3;
1243 |   optional string name = 4;
1244 |   repeated NetStateRule include = 32;
1245 |   repeated NetStateRule exclude = 33;
1246 |   enum LayerType {
1247 |     NONE = 0;
1248 |     ABSVAL = 35;
1249 |     ACCURACY = 1;
1250 |     ARGMAX = 30;
1251 |     BNLL = 2;
1252 |     CONCAT = 3;
1253 |     CONTRASTIVE_LOSS = 37;
1254 |     CONVOLUTION = 4;
1255 |     DATA = 5;
1256 |     DECONVOLUTION = 39;
1257 |     DROPOUT = 6;
1258 |     DUMMY_DATA = 32;
1259 |     EUCLIDEAN_LOSS = 7;
1260 |     ELTWISE = 25;
1261 |     EXP = 38;
1262 |     FLATTEN = 8;
1263 |     HDF5_DATA = 9;
1264 |     HDF5_OUTPUT = 10;
1265 |     HINGE_LOSS = 28;
1266 |     IM2COL = 11;
1267 |     IMAGE_DATA = 12;
1268 |     INFOGAIN_LOSS = 13;
1269 |     INNER_PRODUCT = 14;
1270 |     LRN = 15;
1271 |     MEMORY_DATA = 29;
1272 |     MULTINOMIAL_LOGISTIC_LOSS = 16;
1273 |     MVN = 34;
1274 |     POOLING = 17;
1275 |     POWER = 26;
1276 |     RELU = 18;
1277 |     SIGMOID = 19;
1278 |     SIGMOID_CROSS_ENTROPY_LOSS = 27;
1279 |     SILENCE = 36;
1280 |     SOFTMAX = 20;
1281 |     SOFTMAX_LOSS = 21;
1282 |     SPLIT = 22;
1283 |     SLICE = 33;
1284 |     TANH = 23;
1285 |     WINDOW_DATA = 24;
1286 |     THRESHOLD = 31;
1287 |   }
1288 |   optional LayerType type = 5;
1289 |   repeated BlobProto blobs = 6;
1290 |   repeated string param = 1001;
1291 |   repeated DimCheckMode blob_share_mode = 1002;
1292 |   enum DimCheckMode {
1293 |     STRICT = 0;
1294 |     PERMISSIVE = 1;
1295 |   }
1296 |   repeated float blobs_lr = 7;
1297 |   repeated float weight_decay = 8;
1298 |   repeated float loss_weight = 35;
1299 |   optional AccuracyParameter accuracy_param = 27;
1300 |   optional ArgMaxParameter argmax_param = 23;
1301 |   optional ConcatParameter concat_param = 9;
1302 |   optional ContrastiveLossParameter contrastive_loss_param = 40;
1303 |   optional ConvolutionParameter convolution_param = 10;
1304 |   optional DataParameter data_param = 11;
1305 |   optional DropoutParameter dropout_param = 12;
1306 |   optional DummyDataParameter dummy_data_param = 26;
1307 |   optional EltwiseParameter eltwise_param = 24;
1308 |   optional ExpParameter exp_param = 41;
1309 |   optional HDF5DataParameter hdf5_data_param = 13;
1310 |   optional HDF5OutputParameter hdf5_output_param = 14;
1311 |   optional HingeLossParameter hinge_loss_param = 29;
1312 |   optional ImageDataParameter image_data_param = 15;
1313 |   optional InfogainLossParameter infogain_loss_param = 16;
1314 |   optional InnerProductParameter inner_product_param = 17;
1315 |   optional LRNParameter lrn_param = 18;
1316 |   optional MemoryDataParameter memory_data_param = 22;
1317 |   optional MVNParameter mvn_param = 34;
1318 |   optional PoolingParameter pooling_param = 19;
1319 |   optional PowerParameter power_param = 21;
1320 |   optional ReLUParameter relu_param = 30;
1321 |   optional SigmoidParameter sigmoid_param = 38;
1322 |   optional SoftmaxParameter softmax_param = 39;
1323 |   optional SliceParameter slice_param = 31;
1324 |   optional TanHParameter tanh_param = 37;
1325 |   optional ThresholdParameter threshold_param = 25;
1326 |   optional WindowDataParameter window_data_param = 20;
1327 |   optional TransformationParameter transform_param = 36;
1328 |   optional LossParameter loss_param = 42;
1329 |   optional V0LayerParameter layer = 1;
1330 | }
1331 | 
1332 | // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
1333 | // in Caffe.  We keep this message type around for legacy support.
1334 | message V0LayerParameter {
1335 |   optional string name = 1; // the layer name
1336 |   optional string type = 2; // the string to specify the layer type
1337 | 
1338 |   // Parameters to specify layers with inner products.
1339 |   optional uint32 num_output = 3; // The number of outputs for the layer
1340 |   optional bool biasterm = 4 [default = true]; // whether to have bias terms
1341 |   optional FillerParameter weight_filler = 5; // The filler for the weight
1342 |   optional FillerParameter bias_filler = 6; // The filler for the bias
1343 | 
1344 |   optional uint32 pad = 7 [default = 0]; // The padding size
1345 |   optional uint32 kernelsize = 8; // The kernel size
1346 |   optional uint32 group = 9 [default = 1]; // The group size for group conv
1347 |   optional uint32 stride = 10 [default = 1]; // The stride
1348 |   enum PoolMethod {
1349 |     MAX = 0;
1350 |     AVE = 1;
1351 |     STOCHASTIC = 2;
1352 |   }
1353 |   optional PoolMethod pool = 11 [default = MAX]; // The pooling method
1354 |   optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
1355 | 
1356 |   optional uint32 local_size = 13 [default = 5]; // for local response norm
1357 |   optional float alpha = 14 [default = 1.]; // for local response norm
1358 |   optional float beta = 15 [default = 0.75]; // for local response norm
1359 |   optional float k = 22 [default = 1.];
1360 | 
1361 |   // For data layers, specify the data source
1362 |   optional string source = 16;
1363 |   // For data pre-processing, we can do simple scaling and subtracting the
1364 |   // data mean, if provided. Note that the mean subtraction is always carried
1365 |   // out before scaling.
1366 |   optional float scale = 17 [default = 1];
1367 |   optional string meanfile = 18;
1368 |   // For data layers, specify the batch size.
1369 |   optional uint32 batchsize = 19;
1370 |   // For data layers, specify if we would like to randomly crop an image.
1371 |   optional uint32 cropsize = 20 [default = 0];
1372 |   // For data layers, specify if we want to randomly mirror data.
1373 |   optional bool mirror = 21 [default = false];
1374 | 
1375 |   // The blobs containing the numeric parameters of the layer
1376 |   repeated BlobProto blobs = 50;
1377 |   // The ratio that is multiplied on the global learning rate. If you want to
1378 |   // set the learning ratio for one blob, you need to set it for all blobs.
1379 |   repeated float blobs_lr = 51;
1380 |   // The weight decay that is multiplied on the global weight decay.
1381 |   repeated float weight_decay = 52;
1382 | 
1383 |   // The rand_skip variable is for the data layer to skip a few data points
1384 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
1385 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
1386 |   // be larger than the number of keys in the database.
1387 |   optional uint32 rand_skip = 53 [default = 0];
1388 | 
1389 |   // Fields related to detection (det_*)
1390 |   // foreground (object) overlap threshold
1391 |   optional float det_fg_threshold = 54 [default = 0.5];
1392 |   // background (non-object) overlap threshold
1393 |   optional float det_bg_threshold = 55 [default = 0.5];
1394 |   // Fraction of batch that should be foreground objects
1395 |   optional float det_fg_fraction = 56 [default = 0.25];
1396 | 
1397 |   // optional bool OBSOLETE_can_clobber = 57 [default = true];
1398 | 
1399 |   // Amount of contextual padding to add around a window
1400 |   // (used only by the window_data_layer)
1401 |   optional uint32 det_context_pad = 58 [default = 0];
1402 | 
1403 |   // Mode for cropping out a detection window
1404 |   // warp: cropped window is warped to a fixed size and aspect ratio
1405 |   // square: the tightest square around the window is cropped
1406 |   optional string det_crop_mode = 59 [default = "warp"];
1407 | 
1408 |   // For ReshapeLayer, one needs to specify the new dimensions.
1409 |   optional int32 new_num = 60 [default = 0];
1410 |   optional int32 new_channels = 61 [default = 0];
1411 |   optional int32 new_height = 62 [default = 0];
1412 |   optional int32 new_width = 63 [default = 0];
1413 | 
1414 |   // Whether or not ImageLayer should shuffle the list of files at every epoch.
1415 |   // It will also resize images if new_height or new_width are not zero.
1416 |   optional bool shuffle_images = 64 [default = false];
1417 | 
1418 |   // For ConcatLayer, one needs to specify the dimension for concatenation, and
1419 |   // the other dimensions must be the same for all the bottom blobs.
1420 |   // By default it will concatenate blobs along the channels dimension.
1421 |   optional uint32 concat_dim = 65 [default = 1];
1422 | 
1423 |   optional HDF5OutputParameter hdf5_output_param = 1001;
1424 | }
1425 | 
1426 | message PReLUParameter {
1427 |   // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
1428 |   // Surpassing Human-Level Performance on ImageNet Classification, 2015.
1429 | 
1430 |   // Initial value of a_i. Default is a_i=0.25 for all i.
1431 |   optional FillerParameter filler = 1;
1432 |   // Whether or not slope parameters are shared across channels.
1433 |   optional bool channel_shared = 2 [default = false];
1434 | }
1435 | 


--------------------------------------------------------------------------------