├── README.md ├── LICENSE ├── transform_param.txt ├── ImageAugmentationParameters ├── data_layer.cpp ├── data_transformer.hpp ├── data_transformer.cpp └── caffe.proto /README.md: -------------------------------------------------------------------------------- 1 | # DataAugmentation 2 | Caffe Image Data Augmentation 3 | 此数据增强是针对利用原始图片进行训练(image_data_layer.cpp)的方式进行的。 4 | 实际应用时从https://github.com/BVLC/caffe 下载官方caffe然后将caffe.proto、data_transformer.cpp、data_transformer.hpp替换掉原版caffe即可。 5 | train_val.prototxt中transform_param的配置参考transform_param.txt,其中备注随机的参数推荐只对train做,不要对test\val数据做。 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 GarryLau 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /transform_param.txt: -------------------------------------------------------------------------------- 1 | transform_param { 2 | scale: 0.00390625 #1 灰度值由[0,255]归一化到[0,1] scale是指对图像像素值的变化 3 | mirror: true #2 镜像 4 | crop_size: 224 #3 裁剪 5 | mean_file: "train_mean.binaryproto" #4 均值文件,不能与#5同时用 6 | #mean_value: 104 #5 均值,不能与#4同时用 7 | #mean_value: 117 8 | #mean_value: 123 9 | #force_color: false #6 强制为彩色图 10 | #force_gray: false #7 强制为灰度图 11 | 12 | # Begin Added by garylau for Image augmentation 13 | apply_probability: 0.5 #8 是否做smooth、brightness、color_shift是概率性事件 14 | smooth_filtering: true #9 是否模糊处理 (随机) 15 | max_smooth: 6 #10 模糊处理参数 (随机) 16 | max_rotation_angle: 30 #11 旋转角度,(也用作仿射变换的旋转角度) (随机) 17 | contrast_brightness_adjustment: true #12 是否调节contrast、brightness (随机) 18 | min_contrast: 0.8 #13 调节contrast、brightness用 (随机) 19 | max_contrast: 1.2 #14 调节contrast、brightness用 (随机) 20 | max_brightness_shift: 20 #15 调节contrast、brightness用 (随机) 21 | max_color_shift: 20 #16 颜色偏移 (随机) 22 | min_side_min: 224 #17 对图像做缩放并随机裁剪时用,与#18同时用,不能与#19同时用 (随机) 23 | min_side_max: 288 #18 对图像做缩放并随机裁剪时用,与#17同时用,不能与#19同时用 (随机) 24 | #min_side: 200 #19 将图像裁剪到min_side大小,不能与#17,#18同时用 (随机) 25 | affine_min_scale: 0.8 #20 仿射变换缩放尺度最小值 (随机) 26 | affine_max_scale: 1.2 #21 仿射变换缩放尺度最大值 (随机) 27 | random_erasing_low: 0.02 #22 随机擦除概率最小值 (随机) 28 | random_erasing_high: 0.2 #23 随机擦除概率最大值 (随机) 29 | random_erasing_ratio: 0.3 #24 用来确定随机擦除面积的比例的参数 (随机) 30 | debug_params: false #22 调试看数据增加是否正确时用 31 | # End Added by garylau for Image augmentation 32 | } 33 | -------------------------------------------------------------------------------- /ImageAugmentationParameters: -------------------------------------------------------------------------------- 1 | 针对利用原始图像作为数据源进行训练的训练方式所做的数据增强,主要是对cv::Mat做几何、颜色变换,在train_val.prototxt中在transform_param字段中对这些参数进行配置。 2 | 目前transform_param中的参数总共有25个,其中#1—#7是原版caffe自带的,#8—#25是开发的(#25用作调试用,不用做对图像的变换)。 3 | 4 | 25个参数列表: 5 | transform_param { 6 | scale: 0.00390625 #1 灰度值由[0,255]归一化到[0,1] scale是指对图像像素值的变换 7 | mirror: true #2 镜像 8 | crop_size: 224 #3 裁剪 9 | mean_file: "train_mean.binaryproto" #4 均值文件,不能与#5同时用 10 | #mean_value: 104 #5 均值,不能与#4同时用 11 | #mean_value: 117 12 | #mean_value: 123 13 | #force_color: false #6 强制为彩色图 14 | #force_gray: false #7 强制为灰度图 15 | 16 | # Begin Added by liugan5 for Image augmentation 17 | apply_probability: 0.5 #8 是否做#9—#24的变换是概率性事件 18 | smooth_filtering: true #9 是否模糊处理 (随机) 19 | max_smooth: 6 #10 模糊处理参数 (随机) 20 | max_rotation_angle: 30 #11 旋转角度,(也用作仿射变换的旋转角度) (随机) 21 | contrast_brightness_adjustment: true #12 是否调节contrast、brightness (随机) 22 | min_contrast: 0.8 #13 调节contrast、brightness用 (随机) 23 | max_contrast: 1.2 #14 调节contrast、brightness用 (随机) 24 | max_brightness_shift: 20 #15 调节contrast、brightness用 (随机) 25 | max_color_shift: 20 #16 颜色偏移 (随机) 26 | min_side_min: 224 #17 对图像做缩放并随机裁剪时用,与#18同时用,不能与#19同时用 (随机) 27 | min_side_max: 288 #18 对图像做缩放并随机裁剪时用,与#17同时用,不能与#19同时用 (随机) 28 | #min_side: 200 #19 将图像裁剪到min_side大小,不能与#17,#18同时用 (随机) 29 | affine_min_scale: 0.8 #20 仿射变换缩放尺度最小值 (随机) 30 | affine_max_scale: 1.2 #21 仿射变换缩放尺度最大值 (随机) 31 | random_erasing_low: 0.02 #22 随机擦除概率最小值 (随机) 32 | random_erasing_high: 0.2 #23 随机擦除概率最大值 (随机) 33 | random_erasing_ratio: 0.3 #24 用来确定随机擦除面积的比例的参数 (随机) 34 | debug_params: false #25 调试看数据增加是否正确时用 35 | # End Added by liugan5 for Image augmentation 36 | } 37 | 38 | #8—#25参数说明: 39 | 概率: 40 | #8:apply_probability,用来和代码内部每次训练随机产生的概率相比较,用来确定该次训练是否需要对图像进行#9—#24的变换 41 | 42 | 模糊处理:同时满足smooth_filtering: true和max_smooth > 6 43 | #9:smooth_filtering: 是否要做模糊处理 44 | #10:max_smooth,模糊处理参数 45 | 46 | 旋转操作(不会与仿射变换同时进行):满足max_rotation_angle > 0 47 | #11:max_rotation_angle,旋转角度,(也用作仿射变换的旋转角度) 48 | 49 | 亮度、对比度操作:同时满足contrast_brightness_adjustment: true、min_contrast > 0、 50 | max_contrast >= min_contrast、 max_brightness_shift >= 0 51 | #12:contrast_brightness_adjustment,是否要做亮度、对比度变换 52 | #13:min_contrast,亮度、对比度参数 53 | #14:max_contrast,亮度、对比度参数 54 | #15:max_brightness_shift,亮度、对比度参数 55 | 56 | 颜色偏移:满足max_color_shift > 0 57 | #16:max_color_shift,颜色偏移参数 58 | 59 | 裁剪操作:两种方式,但是两种方式的参数不可同时配置,类似#4与#5的关系。两种方式的差别,第一种方式图像每次裁剪之后的大小是随机的,第二种方式图像每次被裁剪到固定大小(且设置的固定大小要比输入图像小,否则会出错)。 60 | 方式一,同时满足min_side_min > 0、min_side_max > min_side_min 61 | 方式二,满足min_side > 0 62 | #17:min_side_min,图像裁剪参数,裁剪之后的图像比min_side_min大,比min_side_max小 63 | #18:min_side_max 64 | #19:min_side,图像裁剪参数,裁剪之后的大小为min_side 65 | 66 | 仿射变换(不会与旋转操作同时进行):同时满足affine_min_scale > 0、affine_max_scale > affine_min_scale 67 | #20:affine_min_scale,仿射变换中的缩放参数,实际缩放的尺度比ffine_min_scale大,比affine_max_scale小 68 | #21:affine_max_scale 69 | 70 | 随机擦除:同时满足random_erasing_ratio > 0、random_erasing_high > random_erasing_low、param_.random_erasing_low > 0 71 | #22:random_erasing_low,随机擦除的面积比例,实际擦除的面积比例比random_erasing_low大,比random_erasing_high小 72 | #23:random_erasing_high 73 | #24:random_erasing_ratio,用来确定随机擦除的矩形区域的宽、高 74 | 75 | 调试:满足debug_params: true 76 | #25:debug_params,调试看图像增强参数 77 | -------------------------------------------------------------------------------- /data_layer.cpp: -------------------------------------------------------------------------------- 1 | #ifdef USE_OPENCV 2 | #include 3 | #endif // USE_OPENCV 4 | #include 5 | 6 | #include 7 | 8 | #include "caffe/data_transformer.hpp" 9 | #include "caffe/layers/data_layer.hpp" 10 | #include "caffe/util/benchmark.hpp" 11 | 12 | namespace caffe { 13 | 14 | template 15 | DataLayer::DataLayer(const LayerParameter& param) 16 | : BasePrefetchingDataLayer(param), 17 | reader_(param) { 18 | } 19 | 20 | template 21 | DataLayer::~DataLayer() { 22 | this->StopInternalThread(); 23 | } 24 | 25 | template 26 | void DataLayer::DataLayerSetUp(const vector*>& bottom, 27 | const vector*>& top) { 28 | const int batch_size = this->layer_param_.data_param().batch_size(); 29 | // Read a data point, and use it to initialize the top blob. 30 | Datum& datum = *(reader_.full().peek()); 31 | 32 | // Use data_transformer to infer the expected blob shape from datum. 33 | vector top_shape = this->data_transformer_->InferBlobShape(datum); 34 | this->transformed_data_.Reshape(top_shape); 35 | // Reshape top[0] and prefetch_data according to the batch_size. 36 | top_shape[0] = batch_size; 37 | top[0]->Reshape(top_shape); 38 | for (int i = 0; i < this->PREFETCH_COUNT; ++i) { 39 | this->prefetch_[i].data_.Reshape(top_shape); 40 | } 41 | LOG(INFO) << "output data size: " << top[0]->num() << "," 42 | << top[0]->channels() << "," << top[0]->height() << "," 43 | << top[0]->width(); 44 | // label 45 | if (this->output_labels_) { 46 | vector label_shape(1, batch_size); 47 | top[1]->Reshape(label_shape); 48 | for (int i = 0; i < this->PREFETCH_COUNT; ++i) { 49 | this->prefetch_[i].label_.Reshape(label_shape); 50 | } 51 | } 52 | } 53 | 54 | // This function is called on prefetch thread 55 | template 56 | void DataLayer::load_batch(Batch* batch) { 57 | CPUTimer batch_timer; 58 | batch_timer.Start(); 59 | double read_time = 0; 60 | double trans_time = 0; 61 | CPUTimer timer; 62 | CHECK(batch->data_.count()); 63 | CHECK(this->transformed_data_.count()); 64 | 65 | // Reshape according to the first datum of each batch 66 | // on single input batches allows for inputs of varying dimension. 67 | const int batch_size = this->layer_param_.data_param().batch_size(); 68 | Datum& datum = *(reader_.full().peek()); 69 | // Use data_transformer to infer the expected blob shape from datum. 70 | vector top_shape = this->data_transformer_->InferBlobShape(datum); 71 | this->transformed_data_.Reshape(top_shape); 72 | // Reshape batch according to the batch_size. 73 | top_shape[0] = batch_size; 74 | batch->data_.Reshape(top_shape); 75 | 76 | Dtype* top_data = batch->data_.mutable_cpu_data(); 77 | Dtype* top_label = NULL; // suppress warnings about uninitialized variables 78 | 79 | if (this->output_labels_) { 80 | top_label = batch->label_.mutable_cpu_data(); 81 | } 82 | for (int item_id = 0; item_id < batch_size; ++item_id) { 83 | timer.Start(); 84 | // get a datum 85 | Datum& datum = *(reader_.full().pop("Waiting for data")); 86 | read_time += timer.MicroSeconds(); 87 | timer.Start(); 88 | 89 | /* Begin Added by garylau, for lmdb data augmentation, 2017.12.11 */ 90 | int imgH = datum.height(); 91 | int imgW = datum.width(); 92 | cv::Mat cv_img(imgH, imgW, CV_8UC3); 93 | this->data_transformer_->DatumToMat(&datum, cv_img); 94 | this->data_transformer_->CVMatTransform(cv_img); 95 | this->data_transformer_->MatToDatum(cv_img, &datum); 96 | /* End Added by garylau, for lmdb data augmentation, 2017.12.11 */ 97 | 98 | // Apply data transformations (mirror, scale, crop...) 99 | int offset = batch->data_.offset(item_id); 100 | this->transformed_data_.set_cpu_data(top_data + offset); 101 | this->data_transformer_->Transform(datum, &(this->transformed_data_)); 102 | // Copy label. 103 | if (this->output_labels_) { 104 | top_label[item_id] = datum.label(); 105 | } 106 | trans_time += timer.MicroSeconds(); 107 | 108 | reader_.free().push(const_cast(&datum)); 109 | } 110 | timer.Stop(); 111 | batch_timer.Stop(); 112 | DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; 113 | DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; 114 | DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; 115 | } 116 | 117 | INSTANTIATE_CLASS(DataLayer); 118 | REGISTER_LAYER_CLASS(Data); 119 | 120 | } // namespace caffe 121 | 122 | -------------------------------------------------------------------------------- /data_transformer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CAFFE_DATA_TRANSFORMER_HPP 2 | #define CAFFE_DATA_TRANSFORMER_HPP 3 | 4 | #include 5 | 6 | #include "caffe/blob.hpp" 7 | #include "caffe/common.hpp" 8 | #include "caffe/proto/caffe.pb.h" 9 | 10 | namespace caffe { 11 | 12 | /** 13 | * @brief Applies common transformations to the input data, such as 14 | * scaling, mirroring, substracting the image mean... 15 | */ 16 | template 17 | class DataTransformer { 18 | public: 19 | explicit DataTransformer(const TransformationParameter& param, Phase phase); 20 | virtual ~DataTransformer() {} 21 | 22 | /** 23 | * @brief Initialize the Random number generations if needed by the 24 | * transformation. 25 | */ 26 | void InitRand(); 27 | 28 | /** 29 | * @brief Applies the transformation defined in the data layer's 30 | * transform_param block to the data. 31 | * 32 | * @param datum 33 | * Datum containing the data to be transformed. 34 | * @param transformed_blob 35 | * This is destination blob. It can be part of top blob's data if 36 | * set_cpu_data() is used. See data_layer.cpp for an example. 37 | */ 38 | void Transform(const Datum& datum, Blob* transformed_blob); 39 | 40 | /** 41 | * @brief Applies the transformation defined in the data layer's 42 | * transform_param block to a vector of Datum. 43 | * 44 | * @param datum_vector 45 | * A vector of Datum containing the data to be transformed. 46 | * @param transformed_blob 47 | * This is destination blob. It can be part of top blob's data if 48 | * set_cpu_data() is used. See memory_layer.cpp for an example. 49 | */ 50 | void Transform(const vector & datum_vector, 51 | Blob* transformed_blob); 52 | 53 | #ifdef USE_OPENCV 54 | /** 55 | * @brief Applies the transformation defined in the data layer's 56 | * transform_param block to a vector of Mat. 57 | * 58 | * @param mat_vector 59 | * A vector of Mat containing the data to be transformed. 60 | * @param transformed_blob 61 | * This is destination blob. It can be part of top blob's data if 62 | * set_cpu_data() is used. See memory_layer.cpp for an example. 63 | */ 64 | void Transform(const vector & mat_vector, 65 | Blob* transformed_blob); 66 | 67 | /** 68 | * @brief Applies the transformation defined in the data layer's 69 | * transform_param block to a cv::Mat 70 | * 71 | * @param cv_img 72 | * cv::Mat containing the data to be transformed. 73 | * @param transformed_blob 74 | * This is destination blob. It can be part of top blob's data if 75 | * set_cpu_data() is used. See image_data_layer.cpp for an example. 76 | */ 77 | void Transform(const cv::Mat& cv_img, Blob* transformed_blob); 78 | #endif // USE_OPENCV 79 | 80 | /** 81 | * @brief Applies the same transformation defined in the data layer's 82 | * transform_param block to all the num images in a input_blob. 83 | * 84 | * @param input_blob 85 | * A Blob containing the data to be transformed. It applies the same 86 | * transformation to all the num images in the blob. 87 | * @param transformed_blob 88 | * This is destination blob, it will contain as many images as the 89 | * input blob. It can be part of top blob's data. 90 | */ 91 | void Transform(Blob* input_blob, Blob* transformed_blob); 92 | 93 | /** 94 | * @brief Infers the shape of transformed_blob will have when 95 | * the transformation is applied to the data. 96 | * 97 | * @param datum 98 | * Datum containing the data to be transformed. 99 | */ 100 | vector InferBlobShape(const Datum& datum); 101 | /** 102 | * @brief Infers the shape of transformed_blob will have when 103 | * the transformation is applied to the data. 104 | * It uses the first element to infer the shape of the blob. 105 | * 106 | * @param datum_vector 107 | * A vector of Datum containing the data to be transformed. 108 | */ 109 | vector InferBlobShape(const vector & datum_vector); 110 | /** 111 | * @brief Infers the shape of transformed_blob will have when 112 | * the transformation is applied to the data. 113 | * It uses the first element to infer the shape of the blob. 114 | * 115 | * @param mat_vector 116 | * A vector of Mat containing the data to be transformed. 117 | */ 118 | #ifdef USE_OPENCV 119 | vector InferBlobShape(const vector & mat_vector); 120 | /** 121 | * @brief Infers the shape of transformed_blob will have when 122 | * the transformation is applied to the data. 123 | * 124 | * @param cv_img 125 | * cv::Mat containing the data to be transformed. 126 | */ 127 | vector InferBlobShape(const cv::Mat& cv_img); 128 | #endif // USE_OPENCV 129 | 130 | protected: 131 | /** 132 | * @brief Generates a random integer from Uniform({0, 1, ..., n-1}). 133 | * 134 | * @param n 135 | * The upperbound (exclusive) value of the random number. 136 | * @return 137 | * A uniformly random integer value from ({0, 1, ..., n-1}). 138 | */ 139 | virtual int Rand(int n); 140 | 141 | void Transform(const Datum& datum, Dtype* transformed_data); 142 | // Tranformation parameters 143 | TransformationParameter param_; 144 | 145 | /* Begin Added by garylau, for data augmentation, 2017.11.29 */ 146 | void random_crop(cv::Mat& cv_img, int crop_size); 147 | /* End Added by garylau, for data augmentation, 2017.11.29 */ 148 | 149 | shared_ptr rng_; 150 | Phase phase_; 151 | Blob data_mean_; 152 | vector mean_values_; 153 | 154 | /* Begin Added by garylau, for lmdb data augmentation, 2017.12.11 */ 155 | public: 156 | void DatumToMat(const Datum* datum, cv::Mat& cv_img); 157 | void MatToDatum(const cv::Mat& cv_img, Datum* datum); 158 | void CVMatTransform(cv::Mat& cv_img); 159 | /* End Added by garylau, for lmdb data augmentation, 2017.12.11 */ 160 | }; 161 | 162 | } // namespace caffe 163 | 164 | #endif // CAFFE_DATA_TRANSFORMER_HPP_ 165 | -------------------------------------------------------------------------------- /data_transformer.cpp: -------------------------------------------------------------------------------- 1 | #ifdef USE_OPENCV 2 | #include 3 | /* Begin Added by garylau, for data augmentation, 2017.11.22 */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | /* End Added by garylau, for data augmentation, 2017.11.22 */ 10 | #endif // USE_OPENCV 11 | 12 | #include 13 | #include 14 | 15 | #include "caffe/data_transformer.hpp" 16 | #include "caffe/util/io.hpp" 17 | #include "caffe/util/math_functions.hpp" 18 | #include "caffe/util/rng.hpp" 19 | /* Begin Added by garylau, for data augmentation, 2017.11.22 */ 20 | #include 21 | #define PI 3.14159265358979323846 22 | /* End Added by garylau, for data augmentation, 2017.11.22 */ 23 | 24 | namespace caffe { 25 | 26 | template 27 | DataTransformer::DataTransformer(const TransformationParameter& param, 28 | Phase phase) 29 | : param_(param), phase_(phase) { 30 | // check if we want to use mean_file 31 | if (param_.has_mean_file()) { 32 | CHECK_EQ(param_.mean_value_size(), 0) << 33 | "Cannot specify mean_file and mean_value at the same time"; 34 | const string& mean_file = param.mean_file(); 35 | if (Caffe::root_solver()) { 36 | LOG(INFO) << "Loading mean file from: " << mean_file; 37 | } 38 | BlobProto blob_proto; 39 | ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); 40 | data_mean_.FromProto(blob_proto); 41 | } 42 | // check if we want to use mean_value 43 | if (param_.mean_value_size() > 0) { 44 | CHECK(param_.has_mean_file() == false) << 45 | "Cannot specify mean_file and mean_value at the same time"; 46 | for (int c = 0; c < param_.mean_value_size(); ++c) { 47 | mean_values_.push_back(param_.mean_value(c)); 48 | } 49 | } 50 | /* Begin Added by garylau, for data augmentation, 2017.11.30 */ 51 | // check if we want to use min_side 52 | if (param_.min_side()) 53 | { 54 | CHECK_EQ(param_.min_side_min(), 0) << "Cannot specify min_side and min_side_min & min_side_max at the same time"; 55 | CHECK_EQ(param_.min_side_max(), 0) << "Cannot specify min_side and min_side_min & min_side_max at the same time"; 56 | } 57 | // check if we want to use min_side_min & min_side_max 58 | if (param_.min_side_min() || param_.min_side_max()) 59 | { 60 | CHECK_EQ(param_.min_side(), 0) << "Cannot specify min_side_min & min_side_max and min_side at the same time"; 61 | CHECK_GE(param_.min_side_max(), param_.min_side_min()) << "min_side_max must be greater than (or equals to) min_side_min"; 62 | } 63 | /* End Added by garylau, for data augmentation, 2017.11.30 */ 64 | } 65 | 66 | /* 被读取lmdb图片的Transform调用的Transform, garylau */ 67 | template 68 | void DataTransformer::Transform(const Datum& datum, 69 | Dtype* transformed_data) { 70 | const string& data = datum.data(); 71 | const int datum_channels = datum.channels(); 72 | const int datum_height = datum.height(); 73 | const int datum_width = datum.width(); 74 | 75 | const int crop_size = param_.crop_size(); 76 | const Dtype scale = param_.scale(); 77 | const bool do_mirror = param_.mirror() && Rand(2); 78 | const bool has_mean_file = param_.has_mean_file(); 79 | const bool has_uint8 = data.size() > 0; 80 | const bool has_mean_values = mean_values_.size() > 0; 81 | 82 | CHECK_GT(datum_channels, 0); 83 | CHECK_GE(datum_height, crop_size); 84 | CHECK_GE(datum_width, crop_size); 85 | 86 | Dtype* mean = NULL; 87 | if (has_mean_file) { 88 | CHECK_EQ(datum_channels, data_mean_.channels()); 89 | CHECK_EQ(datum_height, data_mean_.height()); 90 | CHECK_EQ(datum_width, data_mean_.width()); 91 | mean = data_mean_.mutable_cpu_data(); 92 | } 93 | if (has_mean_values) { 94 | CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) << 95 | "Specify either 1 mean_value or as many as channels: " << datum_channels; 96 | if (datum_channels > 1 && mean_values_.size() == 1) { 97 | // Replicate the mean_value for simplicity 98 | for (int c = 1; c < datum_channels; ++c) { 99 | mean_values_.push_back(mean_values_[0]); 100 | } 101 | } 102 | } 103 | 104 | int height = datum_height; 105 | int width = datum_width; 106 | 107 | int h_off = 0; 108 | int w_off = 0; 109 | if (crop_size) { 110 | height = crop_size; 111 | width = crop_size; 112 | // We only do random crop when we do training. 113 | if (phase_ == TRAIN) { 114 | h_off = Rand(datum_height - crop_size + 1); 115 | w_off = Rand(datum_width - crop_size + 1); 116 | } else { 117 | h_off = (datum_height - crop_size) / 2; 118 | w_off = (datum_width - crop_size) / 2; 119 | } 120 | } 121 | 122 | Dtype datum_element; 123 | int top_index, data_index; 124 | for (int c = 0; c < datum_channels; ++c) { 125 | for (int h = 0; h < height; ++h) { 126 | for (int w = 0; w < width; ++w) { 127 | data_index = (c * datum_height + h_off + h) * datum_width + w_off + w; 128 | if (do_mirror) { 129 | top_index = (c * height + h) * width + (width - 1 - w); 130 | } else { 131 | top_index = (c * height + h) * width + w; 132 | } 133 | if (has_uint8) { 134 | datum_element = 135 | static_cast(static_cast(data[data_index])); 136 | } else { 137 | datum_element = datum.float_data(data_index); 138 | } 139 | if (has_mean_file) { 140 | transformed_data[top_index] = 141 | (datum_element - mean[data_index]) * scale; 142 | } else { 143 | if (has_mean_values) { 144 | transformed_data[top_index] = 145 | (datum_element - mean_values_[c]) * scale; 146 | } else { 147 | transformed_data[top_index] = datum_element * scale; 148 | } 149 | } 150 | } 151 | } 152 | } 153 | } 154 | 155 | /* 读取lmdb图片所用到的Transform, garylau */ 156 | template 157 | void DataTransformer::Transform(const Datum& datum, 158 | Blob* transformed_blob) { 159 | // If datum is encoded, decoded and transform the cv::image. 160 | if (datum.encoded()) { 161 | #ifdef USE_OPENCV 162 | CHECK(!(param_.force_color() && param_.force_gray())) 163 | << "cannot set both force_color and force_gray"; 164 | cv::Mat cv_img; 165 | if (param_.force_color() || param_.force_gray()) { 166 | // If force_color then decode in color otherwise decode in gray. 167 | cv_img = DecodeDatumToCVMat(datum, param_.force_color()); 168 | } else { 169 | cv_img = DecodeDatumToCVMatNative(datum); 170 | } 171 | // Transform the cv::image into blob. 172 | return Transform(cv_img, transformed_blob); 173 | #else 174 | LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV."; 175 | #endif // USE_OPENCV 176 | } else { 177 | if (param_.force_color() || param_.force_gray()) { 178 | LOG(ERROR) << "force_color and force_gray only for encoded datum"; 179 | } 180 | } 181 | 182 | const int crop_size = param_.crop_size(); 183 | const int datum_channels = datum.channels(); 184 | const int datum_height = datum.height(); 185 | const int datum_width = datum.width(); 186 | 187 | // Check dimensions. 188 | const int channels = transformed_blob->channels(); 189 | const int height = transformed_blob->height(); 190 | const int width = transformed_blob->width(); 191 | const int num = transformed_blob->num(); 192 | 193 | CHECK_EQ(channels, datum_channels); 194 | CHECK_LE(height, datum_height); 195 | CHECK_LE(width, datum_width); 196 | CHECK_GE(num, 1); 197 | 198 | if (crop_size) { 199 | CHECK_EQ(crop_size, height); 200 | CHECK_EQ(crop_size, width); 201 | } else { 202 | CHECK_EQ(datum_height, height); 203 | CHECK_EQ(datum_width, width); 204 | } 205 | 206 | Dtype* transformed_data = transformed_blob->mutable_cpu_data(); 207 | Transform(datum, transformed_data); 208 | } 209 | 210 | template 211 | void DataTransformer::Transform(const vector & datum_vector, 212 | Blob* transformed_blob) { 213 | const int datum_num = datum_vector.size(); 214 | const int num = transformed_blob->num(); 215 | const int channels = transformed_blob->channels(); 216 | const int height = transformed_blob->height(); 217 | const int width = transformed_blob->width(); 218 | 219 | CHECK_GT(datum_num, 0) << "There is no datum to add"; 220 | CHECK_LE(datum_num, num) << 221 | "The size of datum_vector must be no greater than transformed_blob->num()"; 222 | Blob uni_blob(1, channels, height, width); 223 | for (int item_id = 0; item_id < datum_num; ++item_id) { 224 | int offset = transformed_blob->offset(item_id); 225 | uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset); 226 | Transform(datum_vector[item_id], &uni_blob); 227 | } 228 | } 229 | 230 | #ifdef USE_OPENCV 231 | template 232 | void DataTransformer::Transform(const vector & mat_vector, 233 | Blob* transformed_blob) { 234 | const int mat_num = mat_vector.size(); 235 | const int num = transformed_blob->num(); 236 | const int channels = transformed_blob->channels(); 237 | const int height = transformed_blob->height(); 238 | const int width = transformed_blob->width(); 239 | 240 | CHECK_GT(mat_num, 0) << "There is no MAT to add"; 241 | CHECK_EQ(mat_num, num) << 242 | "The size of mat_vector must be equals to transformed_blob->num()"; 243 | Blob uni_blob(1, channels, height, width); 244 | for (int item_id = 0; item_id < mat_num; ++item_id) { 245 | int offset = transformed_blob->offset(item_id); 246 | uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset); 247 | Transform(mat_vector[item_id], &uni_blob); 248 | } 249 | } 250 | 251 | /* Begin Added by garylau, for data augmentation, 2017.11.22 */ 252 | void rotate(cv::Mat& src, int angle) 253 | { 254 | // get rotation matrix for rotating the image around its center 255 | cv::Point2f center(src.cols / 2.0, src.rows / 2.0); 256 | cv::Mat rot = cv::getRotationMatrix2D(center, angle, 1.0); 257 | // determine bounding rectangle 258 | cv::Rect bbox = cv::RotatedRect(center, src.size(), angle).boundingRect(); 259 | // adjust transformation matrix 260 | rot.at(0, 2) += bbox.width / 2.0 - center.x; 261 | rot.at(1, 2) += bbox.height / 2.0 - center.y; 262 | cv::warpAffine(src, src, rot, bbox.size()); 263 | } 264 | 265 | template 266 | void DataTransformer::random_crop(cv::Mat& cv_img, int crop_size) 267 | { 268 | int h_off = 0; 269 | int w_off = 0; 270 | const int img_height = cv_img.rows; 271 | const int img_width = cv_img.cols; 272 | 273 | h_off = Rand(img_height - crop_size + 1); 274 | w_off = Rand(img_width - crop_size + 1); 275 | cv::Rect roi(w_off, h_off, crop_size, crop_size); 276 | cv_img = cv_img(roi); 277 | } 278 | 279 | void crop_center(cv::Mat& cv_img, int w, int h) 280 | { 281 | int h_off = 0; 282 | int w_off = 0; 283 | const int img_height = cv_img.rows; 284 | const int img_width = cv_img.cols; 285 | h_off = (img_height - h) / 2; 286 | w_off = (img_width - w) / 2; 287 | cv::Rect roi(w_off, h_off, w, h); 288 | cv_img = cv_img(roi); 289 | } 290 | 291 | void resize(cv::Mat& cv_img, int smallest_side) 292 | { 293 | int cur_width = cv_img.cols; 294 | int cur_height = cv_img.rows; 295 | cv::Size dsize; 296 | if (cur_height <= cur_width) 297 | { 298 | double k = ((double)cur_height) / smallest_side; 299 | int new_size = (int)ceil(cur_width / k); 300 | dsize = cv::Size(new_size, smallest_side); 301 | } 302 | else 303 | { 304 | double k = ((double)cur_width) / smallest_side; 305 | int new_size = (int)ceil(cur_height / k); 306 | dsize = cv::Size(smallest_side, new_size); 307 | } 308 | cv::resize(cv_img, cv_img, dsize); 309 | } 310 | /* End Added by garylau, for data augmentation, 2017.11.22 */ 311 | 312 | /* 读取原始图片所用到的Transform, garylau */ 313 | template 314 | void DataTransformer::Transform(const cv::Mat& img, Blob* transformed_blob) 315 | { 316 | const int crop_size = param_.crop_size(); 317 | // Check dimensions. 318 | const int channels = transformed_blob->channels(); 319 | const int height = transformed_blob->height(); 320 | const int width = transformed_blob->width(); 321 | const int num = transformed_blob->num(); 322 | const Dtype scale = param_.scale(); 323 | const bool has_mean_file = param_.has_mean_file(); 324 | const bool has_mean_values = mean_values_.size() > 0; 325 | /* Begin Added by garylau, for data augmentation, 2017.11.22 */ 326 | const float apply_prob = 1.f - param_.apply_probability(); 327 | const float max_smooth = param_.max_smooth(); 328 | const int rotation_angle = param_.max_rotation_angle(); 329 | const float min_contrast = param_.min_contrast(); 330 | const float max_contrast = param_.max_contrast(); 331 | const int max_brightness_shift = param_.max_brightness_shift(); 332 | const int max_color_shift = param_.max_color_shift(); 333 | const int min_side_min = param_.min_side_min(); 334 | const int min_side_max = param_.min_side_max(); 335 | const int min_side = param_.min_side(); 336 | const float affine_min_scale = param_.affine_min_scale(); 337 | const float affine_max_scale = param_.affine_max_scale(); 338 | const float random_erasing_low = param_.random_erasing_low(); 339 | const float random_erasing_high = param_.random_erasing_high(); 340 | const float random_erasing_ratio = param_.random_erasing_ratio(); 341 | const bool debug_params = param_.debug_params(); 342 | 343 | const bool do_mirror = param_.mirror() && phase_ == TRAIN && Rand(2); 344 | float current_prob = 0.f; 345 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 346 | const bool do_smooth = param_.smooth_filtering() && phase_ == TRAIN && max_smooth > 1 && current_prob > apply_prob; 347 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 348 | const bool do_rotation = rotation_angle > 0 && current_prob > apply_prob && phase_ == TRAIN; 349 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 350 | const bool do_brightness = param_.contrast_brightness_adjustment() && min_contrast > 0 && max_contrast >= min_contrast 351 | && max_brightness_shift >= 0 && phase_ == TRAIN && current_prob > apply_prob; 352 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 353 | const bool do_color_shift = max_color_shift > 0 && phase_ == TRAIN && current_prob > apply_prob; 354 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 355 | const bool do_resize_to_min_side_min_max = min_side_min > 0 && min_side_max > min_side_min && phase_ == TRAIN && current_prob > apply_prob; 356 | const bool do_resize_to_min_side = min_side > 0 && phase_ == TRAIN && current_prob > apply_prob; 357 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 358 | const bool do_affine = affine_min_scale > 0 && affine_max_scale > affine_min_scale && phase_ == TRAIN && current_prob > apply_prob; 359 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 360 | const bool do_random_erasing = param_.random_erasing_ratio() > 0 && param_.random_erasing_high() > param_.random_erasing_low() 361 | && param_.random_erasing_low() > 0 && phase_ == TRAIN && current_prob > apply_prob; 362 | 363 | cv::Mat cv_img = img; 364 | /* 随机擦除Random-Erasing */ 365 | cv::Scalar erase_mean = cv::mean(cv_img); 366 | cv::Rect erase_rect; 367 | if (do_random_erasing) 368 | { 369 | int area = cv_img.cols * cv_img.rows; 370 | caffe_rng_uniform(1, random_erasing_low, random_erasing_high, ¤t_prob); 371 | float target_area = current_prob * area; 372 | caffe_rng_uniform(1, random_erasing_ratio, 1.f / random_erasing_ratio, ¤t_prob); 373 | float aspect_ratio = current_prob; 374 | int erase_height = int(round(sqrt(target_area * aspect_ratio))); /* 待erase的矩形区域的高 */ 375 | int erase_weight = int(round(sqrt(target_area / aspect_ratio))); /* 待erase的矩形区域的宽 */ 376 | if (erase_weight <= cv_img.cols && erase_height <= cv_img.rows) 377 | { 378 | float erase_x = 0; /* 待erase的矩形区域的左上角x坐标 */ 379 | float erase_y = 0; /* 待erase的矩形区域的左上角y坐标 */ 380 | caffe_rng_uniform(1, 0.f, 1.f * (cv_img.cols - erase_weight), &erase_x); 381 | caffe_rng_uniform(1, 0.f, 1.f * (cv_img.rows - erase_height), &erase_y); 382 | erase_rect = cv::Rect(erase_x, erase_y, erase_weight, erase_height); 383 | if (3 == cv_img.channels()) 384 | { 385 | cv::Mat_ img_test = cv_img; 386 | for (size_t i = erase_x; i < erase_x + erase_weight; i++) 387 | { 388 | for (size_t j = erase_y; j < erase_y + erase_height; j++) 389 | { 390 | img_test(i, j) = cv::Vec3b(erase_mean.val[0], erase_mean.val[1], erase_mean.val[2]); 391 | } 392 | } 393 | } 394 | else 395 | { 396 | cv_img(erase_rect) = erase_mean.val[0]; 397 | } 398 | } 399 | } 400 | 401 | // apply color shift 402 | if (do_color_shift) 403 | { 404 | int b = Rand(max_color_shift + 1); 405 | int g = Rand(max_color_shift + 1); 406 | int r = Rand(max_color_shift + 1); 407 | int sign = Rand(2); 408 | cv::Mat shiftArr = cv_img.clone(); 409 | shiftArr.setTo(cv::Scalar(b, g, r)); 410 | if (sign == 1) 411 | { 412 | cv_img -= shiftArr; 413 | } 414 | else 415 | { 416 | cv_img += shiftArr; 417 | } 418 | } 419 | 420 | // set contrast and brightness 421 | float alpha; 422 | int beta; 423 | if (do_brightness) 424 | { 425 | caffe_rng_uniform(1, min_contrast, max_contrast, &alpha); 426 | beta = Rand(max_brightness_shift * 2 + 1) - max_brightness_shift; 427 | cv_img.convertTo(cv_img, -1, alpha, beta); 428 | } 429 | 430 | // set smoothness 431 | int smooth_param = 0; 432 | int smooth_type = 0; 433 | if (do_smooth) 434 | { 435 | smooth_type = Rand(4); 436 | smooth_param = 1 + 2 * Rand(max_smooth / 2); 437 | switch (smooth_type) 438 | { 439 | case 0: 440 | cv::GaussianBlur(cv_img, cv_img, cv::Size(smooth_param, smooth_param), 0); 441 | break; 442 | case 1: 443 | cv::blur(cv_img, cv_img, cv::Size(smooth_param, smooth_param)); 444 | break; 445 | case 2: 446 | cv::medianBlur(cv_img, cv_img, smooth_param); 447 | break; 448 | case 3: 449 | cv::boxFilter(cv_img, cv_img, -1, cv::Size(smooth_param * 2, smooth_param * 2)); 450 | break; 451 | default: 452 | break; 453 | } 454 | } 455 | /* End Added by garylau, for data augmentation, 2017.11.22 */ 456 | 457 | const int img_channels = cv_img.channels(); 458 | const int img_height = cv_img.rows; 459 | const int img_width = cv_img.cols; 460 | 461 | CHECK_GT(img_channels, 0); 462 | CHECK_GE(img_height, crop_size); 463 | CHECK_GE(img_width, crop_size); 464 | CHECK_EQ(channels, img_channels); 465 | CHECK_LE(height, img_height); 466 | CHECK_LE(width, img_width); 467 | CHECK_GE(num, 1); 468 | CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte"; 469 | 470 | Dtype* mean = NULL; 471 | if (has_mean_file) { 472 | CHECK_EQ(img_channels, data_mean_.channels()); 473 | CHECK_EQ(img_height, data_mean_.height()); 474 | CHECK_EQ(img_width, data_mean_.width()); 475 | mean = data_mean_.mutable_cpu_data(); 476 | } 477 | if (has_mean_values) { 478 | CHECK(mean_values_.size() == 1 || mean_values_.size() == img_channels) << 479 | "Specify either 1 mean_value or as many as channels: " << img_channels; 480 | if (img_channels > 1 && mean_values_.size() == 1) { 481 | // Replicate the mean_value for simplicity 482 | for (int c = 1; c < img_channels; ++c) { 483 | mean_values_.push_back(mean_values_[0]); 484 | } 485 | } 486 | } 487 | 488 | /* Begin Added by garylau, for data augmentation, 2017.11.22 */ 489 | // resizing and crop according to min side, preserving aspect ratio 490 | if (do_resize_to_min_side) 491 | { 492 | random_crop(cv_img, min_side); 493 | } 494 | if (do_resize_to_min_side_min_max) 495 | { 496 | int min_side_length = min_side_min + Rand(min_side_max - min_side_min + 1); 497 | resize(cv_img, min_side_max); 498 | random_crop(cv_img, min_side_length); 499 | } 500 | /* 仿射变换 */ 501 | float affine_angle = 0.f; 502 | float affine_scale = 0.f; 503 | if (do_affine) 504 | { 505 | cv::Point2f affine_center = cv::Point2f(cv_img.rows / 2, cv_img.cols / 2); 506 | affine_angle = 1.0 * Rand(rotation_angle * 2 + 1) - rotation_angle; 507 | affine_scale = affine_min_scale + Rand((affine_max_scale - affine_min_scale) * 10) / 10.f; 508 | cv::Mat affine_matrix = cv::getRotationMatrix2D(affine_center, affine_angle, affine_scale); 509 | cv::Size dize = cv::Size(cv_img.rows * (affine_min_scale + Rand((affine_max_scale - affine_min_scale) * 10) / 10.f), 510 | cv_img.cols * (affine_min_scale + Rand((affine_max_scale - affine_min_scale) * 10) / 10.f)); 511 | cv::warpAffine(cv_img, cv_img, affine_matrix, dize); 512 | } 513 | /* 旋转操作 */ 514 | int current_angle = 0; 515 | if (do_rotation && !do_affine) 516 | { 517 | current_angle = Rand(rotation_angle * 2 + 1) - rotation_angle; 518 | if (current_angle) 519 | { 520 | rotate(cv_img, current_angle); 521 | } 522 | } 523 | 524 | if (debug_params && phase_ == TRAIN) { 525 | LOG(INFO) << "----------------------------------------"; 526 | if (do_smooth) 527 | { 528 | LOG(INFO) << "* parameter for smooth filtering: "; 529 | LOG(INFO) << " smooth type: " << smooth_type << ", smooth param: " << smooth_param; 530 | } 531 | if (do_rotation) 532 | { 533 | LOG(INFO) << "* parameter for rotation: "; 534 | LOG(INFO) << " current rotation angle: " << current_angle; 535 | } 536 | if (do_brightness) 537 | { 538 | LOG(INFO) << "* parameter for contrast adjustment: "; 539 | LOG(INFO) << " alpha: " << alpha << ", beta: " << beta; 540 | } 541 | if (do_color_shift) 542 | { 543 | LOG(INFO) << "* parameter for color shift: "; 544 | LOG(INFO) << "max_color_shift: " << max_color_shift; 545 | } 546 | if (do_resize_to_min_side_min_max) 547 | { 548 | LOG(INFO) << "* parameter for min_side_min_max crop: "; 549 | LOG(INFO) << "min_side_min: " << min_side_min << ", min_side_max: " << min_side_max; 550 | } 551 | if (do_resize_to_min_side) 552 | { 553 | LOG(INFO) << "* parameter for min_side crop: "; 554 | LOG(INFO) << "min_side: " << min_side; 555 | } 556 | if (do_affine) 557 | { 558 | LOG(INFO) << "* parameter for affine transformation: "; 559 | LOG(INFO) << "affine_angle: " << affine_angle << ", affine_scale: " << affine_scale; 560 | } 561 | if (do_random_erasing) 562 | { 563 | LOG(INFO) << "* parameter for random erasing: "; 564 | LOG(INFO) << "erase_rect: " << "x:" << erase_rect.x << ", y:" << erase_rect.y << ", width:" << erase_rect.width << ", height:" << erase_rect.height; 565 | } 566 | } 567 | /* End Added by garylau, for data augmentation, 2017.11.22 */ 568 | 569 | int h_off = 0; 570 | int w_off = 0; 571 | cv::Mat cv_cropped_img = cv_img; 572 | /* Begin Added by garylau, for data augmentation, 2017.11.22 */ 573 | if (img_width != cv_cropped_img.cols || img_height != cv_cropped_img.rows) 574 | { 575 | cv::resize(cv_cropped_img, cv_cropped_img, cv::Size(img_width, img_height)); 576 | } 577 | /* End Added by garylau, for data augmentation, 2017.11.22 */ 578 | if (crop_size) { 579 | CHECK_EQ(crop_size, height); 580 | CHECK_EQ(crop_size, width); 581 | // We only do random crop when we do training. 582 | if (phase_ == TRAIN) { 583 | h_off = Rand(img_height - crop_size + 1); 584 | w_off = Rand(img_width - crop_size + 1); 585 | } else { 586 | h_off = (img_height - crop_size) / 2; 587 | w_off = (img_width - crop_size) / 2; 588 | } 589 | cv::Rect roi(w_off, h_off, crop_size, crop_size); 590 | cv_cropped_img = cv_cropped_img(roi); 591 | } else { 592 | CHECK_EQ(img_height, height); 593 | CHECK_EQ(img_width, width); 594 | } 595 | 596 | CHECK(cv_cropped_img.data); 597 | 598 | Dtype* transformed_data = transformed_blob->mutable_cpu_data(); 599 | int top_index; 600 | for (int h = 0; h < height; ++h) { 601 | const uchar* ptr = cv_cropped_img.ptr(h); 602 | int img_index = 0; 603 | for (int w = 0; w < width; ++w) { 604 | for (int c = 0; c < img_channels; ++c) { 605 | if (do_mirror) { 606 | top_index = (c * height + h) * width + (width - 1 - w); 607 | } else { 608 | top_index = (c * height + h) * width + w; 609 | } 610 | // int top_index = (c * height + h) * width + w; 611 | Dtype pixel = static_cast(ptr[img_index++]); 612 | if (has_mean_file) { 613 | int mean_index = (c * img_height + h_off + h) * img_width + w_off + w; 614 | transformed_data[top_index] = 615 | (pixel - mean[mean_index]) * scale; 616 | } else { 617 | if (has_mean_values) { 618 | transformed_data[top_index] = 619 | (pixel - mean_values_[c]) * scale; 620 | } else { 621 | transformed_data[top_index] = pixel * scale; 622 | } 623 | } 624 | } 625 | } 626 | } 627 | } 628 | #endif // USE_OPENCV 629 | 630 | template 631 | void DataTransformer::Transform(Blob* input_blob, 632 | Blob* transformed_blob) { 633 | const int crop_size = param_.crop_size(); 634 | const int input_num = input_blob->num(); 635 | const int input_channels = input_blob->channels(); 636 | const int input_height = input_blob->height(); 637 | const int input_width = input_blob->width(); 638 | 639 | if (transformed_blob->count() == 0) { 640 | // Initialize transformed_blob with the right shape. 641 | if (crop_size) { 642 | transformed_blob->Reshape(input_num, input_channels, 643 | crop_size, crop_size); 644 | } else { 645 | transformed_blob->Reshape(input_num, input_channels, 646 | input_height, input_width); 647 | } 648 | } 649 | 650 | const int num = transformed_blob->num(); 651 | const int channels = transformed_blob->channels(); 652 | const int height = transformed_blob->height(); 653 | const int width = transformed_blob->width(); 654 | const int size = transformed_blob->count(); 655 | 656 | CHECK_LE(input_num, num); 657 | CHECK_EQ(input_channels, channels); 658 | CHECK_GE(input_height, height); 659 | CHECK_GE(input_width, width); 660 | 661 | 662 | const Dtype scale = param_.scale(); 663 | const bool do_mirror = param_.mirror() && Rand(2); 664 | const bool has_mean_file = param_.has_mean_file(); 665 | const bool has_mean_values = mean_values_.size() > 0; 666 | 667 | int h_off = 0; 668 | int w_off = 0; 669 | if (crop_size) { 670 | CHECK_EQ(crop_size, height); 671 | CHECK_EQ(crop_size, width); 672 | // We only do random crop when we do training. 673 | if (phase_ == TRAIN) { 674 | h_off = Rand(input_height - crop_size + 1); 675 | w_off = Rand(input_width - crop_size + 1); 676 | } else { 677 | h_off = (input_height - crop_size) / 2; 678 | w_off = (input_width - crop_size) / 2; 679 | } 680 | } else { 681 | CHECK_EQ(input_height, height); 682 | CHECK_EQ(input_width, width); 683 | } 684 | 685 | Dtype* input_data = input_blob->mutable_cpu_data(); 686 | if (has_mean_file) { 687 | CHECK_EQ(input_channels, data_mean_.channels()); 688 | CHECK_EQ(input_height, data_mean_.height()); 689 | CHECK_EQ(input_width, data_mean_.width()); 690 | for (int n = 0; n < input_num; ++n) { 691 | int offset = input_blob->offset(n); 692 | caffe_sub(data_mean_.count(), input_data + offset, 693 | data_mean_.cpu_data(), input_data + offset); 694 | } 695 | } 696 | 697 | if (has_mean_values) { 698 | CHECK(mean_values_.size() == 1 || mean_values_.size() == input_channels) << 699 | "Specify either 1 mean_value or as many as channels: " << input_channels; 700 | if (mean_values_.size() == 1) { 701 | caffe_add_scalar(input_blob->count(), -(mean_values_[0]), input_data); 702 | } else { 703 | for (int n = 0; n < input_num; ++n) { 704 | for (int c = 0; c < input_channels; ++c) { 705 | int offset = input_blob->offset(n, c); 706 | caffe_add_scalar(input_height * input_width, -(mean_values_[c]), 707 | input_data + offset); 708 | } 709 | } 710 | } 711 | } 712 | 713 | Dtype* transformed_data = transformed_blob->mutable_cpu_data(); 714 | 715 | for (int n = 0; n < input_num; ++n) { 716 | int top_index_n = n * channels; 717 | int data_index_n = n * channels; 718 | for (int c = 0; c < channels; ++c) { 719 | int top_index_c = (top_index_n + c) * height; 720 | int data_index_c = (data_index_n + c) * input_height + h_off; 721 | for (int h = 0; h < height; ++h) { 722 | int top_index_h = (top_index_c + h) * width; 723 | int data_index_h = (data_index_c + h) * input_width + w_off; 724 | if (do_mirror) { 725 | int top_index_w = top_index_h + width - 1; 726 | for (int w = 0; w < width; ++w) { 727 | transformed_data[top_index_w-w] = input_data[data_index_h + w]; 728 | } 729 | } else { 730 | for (int w = 0; w < width; ++w) { 731 | transformed_data[top_index_h + w] = input_data[data_index_h + w]; 732 | } 733 | } 734 | } 735 | } 736 | } 737 | if (scale != Dtype(1)) { 738 | DLOG(INFO) << "Scale: " << scale; 739 | caffe_scal(size, scale, transformed_data); 740 | } 741 | } 742 | 743 | template 744 | vector DataTransformer::InferBlobShape(const Datum& datum) { 745 | if (datum.encoded()) { 746 | #ifdef USE_OPENCV 747 | CHECK(!(param_.force_color() && param_.force_gray())) 748 | << "cannot set both force_color and force_gray"; 749 | cv::Mat cv_img; 750 | if (param_.force_color() || param_.force_gray()) { 751 | // If force_color then decode in color otherwise decode in gray. 752 | cv_img = DecodeDatumToCVMat(datum, param_.force_color()); 753 | } else { 754 | cv_img = DecodeDatumToCVMatNative(datum); 755 | } 756 | // InferBlobShape using the cv::image. 757 | return InferBlobShape(cv_img); 758 | #else 759 | LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV."; 760 | #endif // USE_OPENCV 761 | } 762 | const int crop_size = param_.crop_size(); 763 | const int datum_channels = datum.channels(); 764 | const int datum_height = datum.height(); 765 | const int datum_width = datum.width(); 766 | // Check dimensions. 767 | CHECK_GT(datum_channels, 0); 768 | CHECK_GE(datum_height, crop_size); 769 | CHECK_GE(datum_width, crop_size); 770 | // Build BlobShape. 771 | vector shape(4); 772 | shape[0] = 1; 773 | shape[1] = datum_channels; 774 | shape[2] = (crop_size)? crop_size: datum_height; 775 | shape[3] = (crop_size)? crop_size: datum_width; 776 | return shape; 777 | } 778 | 779 | template 780 | vector DataTransformer::InferBlobShape( 781 | const vector & datum_vector) { 782 | const int num = datum_vector.size(); 783 | CHECK_GT(num, 0) << "There is no datum to in the vector"; 784 | // Use first datum in the vector to InferBlobShape. 785 | vector shape = InferBlobShape(datum_vector[0]); 786 | // Adjust num to the size of the vector. 787 | shape[0] = num; 788 | return shape; 789 | } 790 | 791 | #ifdef USE_OPENCV 792 | template 793 | vector DataTransformer::InferBlobShape(const cv::Mat& cv_img) { 794 | const int crop_size = param_.crop_size(); 795 | const int img_channels = cv_img.channels(); 796 | const int img_height = cv_img.rows; 797 | const int img_width = cv_img.cols; 798 | // Check dimensions. 799 | CHECK_GT(img_channels, 0); 800 | CHECK_GE(img_height, crop_size); 801 | CHECK_GE(img_width, crop_size); 802 | // Build BlobShape. 803 | vector shape(4); 804 | shape[0] = 1; 805 | shape[1] = img_channels; 806 | shape[2] = (crop_size)? crop_size: img_height; 807 | shape[3] = (crop_size)? crop_size: img_width; 808 | return shape; 809 | } 810 | 811 | template 812 | vector DataTransformer::InferBlobShape( 813 | const vector & mat_vector) { 814 | const int num = mat_vector.size(); 815 | CHECK_GT(num, 0) << "There is no cv_img to in the vector"; 816 | // Use first cv_img in the vector to InferBlobShape. 817 | vector shape = InferBlobShape(mat_vector[0]); 818 | // Adjust num to the size of the vector. 819 | shape[0] = num; 820 | return shape; 821 | } 822 | #endif // USE_OPENCV 823 | 824 | template 825 | void DataTransformer::InitRand() { 826 | const bool needs_rand = param_.mirror() || 827 | (phase_ == TRAIN && param_.crop_size()); 828 | if (needs_rand) { 829 | const unsigned int rng_seed = caffe_rng_rand(); 830 | rng_.reset(new Caffe::RNG(rng_seed)); 831 | } else { 832 | rng_.reset(); 833 | } 834 | } 835 | 836 | template 837 | int DataTransformer::Rand(int n) { 838 | CHECK(rng_); 839 | CHECK_GT(n, 0); 840 | caffe::rng_t* rng = 841 | static_cast(rng_->generator()); 842 | return ((*rng)() % n); 843 | } 844 | 845 | INSTANTIATE_CLASS(DataTransformer); 846 | 847 | /* Begin Added by garylau, for lmdb data augmentation, 2017.12.11 */ 848 | template 849 | void DataTransformer::DatumToMat(const Datum* datum, cv::Mat& cv_img) 850 | { 851 | int datum_channels = datum->channels(); 852 | int datum_height = datum->height(); 853 | int datum_width = datum->width(); 854 | int datum_size = datum_channels * datum_height * datum_width; 855 | 856 | std::string buffer(datum_size, ' '); 857 | buffer = datum->data(); 858 | 859 | for (int h = 0; h < datum_height; ++h) { 860 | uchar* ptr = cv_img.ptr(h); 861 | int img_index = 0; 862 | for (int w = 0; w < datum_width; ++w) { 863 | for (int c = 0; c < datum_channels; ++c) { 864 | int datum_index = (c * datum_height + h) * datum_width + w; 865 | ptr[img_index++] = static_cast(buffer[datum_index]); 866 | } 867 | } 868 | } 869 | } 870 | template 871 | void DataTransformer::MatToDatum(const cv::Mat& cv_img, Datum* datum) 872 | { 873 | CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte"; 874 | datum->set_channels(cv_img.channels()); 875 | datum->set_height(cv_img.rows); 876 | datum->set_width(cv_img.cols); 877 | datum->clear_data(); 878 | datum->set_encoded(false); 879 | int datum_channels = datum->channels(); 880 | int datum_height = datum->height(); 881 | int datum_width = datum->width(); 882 | int datum_size = datum_channels * datum_height * datum_width; 883 | std::string buffer(datum_size, ' '); 884 | for (int h = 0; h < datum_height; ++h) { 885 | const uchar* ptr = cv_img.ptr(h); 886 | int img_index = 0; 887 | for (int w = 0; w < datum_width; ++w) { 888 | for (int c = 0; c < datum_channels; ++c) { 889 | int datum_index = (c * datum_height + h) * datum_width + w; 890 | buffer[datum_index] = static_cast(ptr[img_index++]); 891 | } 892 | } 893 | } 894 | datum->set_data(buffer); 895 | } 896 | template 897 | void DataTransformer::CVMatTransform(cv::Mat& in_out_cv_img) 898 | { 899 | const float apply_prob = 1.f - param_.apply_probability(); 900 | const float max_smooth = param_.max_smooth(); 901 | const int rotation_angle = param_.max_rotation_angle(); 902 | const float min_contrast = param_.min_contrast(); 903 | const float max_contrast = param_.max_contrast(); 904 | const int max_brightness_shift = param_.max_brightness_shift(); 905 | const int max_color_shift = param_.max_color_shift(); 906 | const int min_side_min = param_.min_side_min(); 907 | const int min_side_max = param_.min_side_max(); 908 | const int min_side = param_.min_side(); 909 | const float affine_min_scale = param_.affine_min_scale(); 910 | const float affine_max_scale = param_.affine_max_scale(); 911 | const float random_erasing_low = param_.random_erasing_low(); 912 | const float random_erasing_high = param_.random_erasing_high(); 913 | const float random_erasing_ratio = param_.random_erasing_ratio(); 914 | const bool debug_params = param_.debug_params(); 915 | 916 | float current_prob = 0.f; 917 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 918 | const bool do_smooth = param_.smooth_filtering() && phase_ == TRAIN && max_smooth > 1 && current_prob > apply_prob; 919 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 920 | const bool do_rotation = rotation_angle > 0 && current_prob > apply_prob && phase_ == TRAIN; 921 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 922 | const bool do_brightness = param_.contrast_brightness_adjustment() && min_contrast > 0 && max_contrast >= min_contrast 923 | && max_brightness_shift >= 0 && phase_ == TRAIN && current_prob > apply_prob; 924 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 925 | const bool do_color_shift = max_color_shift > 0 && phase_ == TRAIN && current_prob > apply_prob; 926 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 927 | const bool do_resize_to_min_side_min_max = min_side_min > 0 && min_side_max > min_side_min && phase_ == TRAIN && current_prob > apply_prob; 928 | const bool do_resize_to_min_side = min_side > 0 && phase_ == TRAIN && current_prob > apply_prob; 929 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 930 | const bool do_affine = affine_min_scale > 0 && affine_max_scale > affine_min_scale && phase_ == TRAIN && current_prob > apply_prob; 931 | caffe_rng_uniform(1, 0.f, 1.f, ¤t_prob); 932 | const bool do_random_erasing = param_.random_erasing_ratio() > 0 && param_.random_erasing_high() > param_.random_erasing_low() 933 | && param_.random_erasing_low() > 0 && phase_ == TRAIN && current_prob > apply_prob; 934 | 935 | cv::Mat cv_img = in_out_cv_img; 936 | /* 随机擦除Random-Erasing */ 937 | cv::Scalar erase_mean = cv::mean(cv_img); 938 | cv::Rect erase_rect; 939 | if (do_random_erasing) 940 | { 941 | int area = cv_img.cols * cv_img.rows; 942 | caffe_rng_uniform(1, random_erasing_low, random_erasing_high, ¤t_prob); 943 | float target_area = current_prob * area; 944 | caffe_rng_uniform(1, random_erasing_ratio, 1.f / random_erasing_ratio, ¤t_prob); 945 | float aspect_ratio = current_prob; 946 | int erase_height = int(round(sqrt(target_area * aspect_ratio))); /* 待erase的矩形区域的高 */ 947 | int erase_weight = int(round(sqrt(target_area / aspect_ratio))); /* 待erase的矩形区域的宽 */ 948 | if (erase_weight <= cv_img.cols && erase_height <= cv_img.rows) 949 | { 950 | float erase_x = 0; /* 待erase的矩形区域的左上角x坐标 */ 951 | float erase_y = 0; /* 待erase的矩形区域的左上角y坐标 */ 952 | caffe_rng_uniform(1, 0.f, 1.f * (cv_img.cols - erase_weight), &erase_x); 953 | caffe_rng_uniform(1, 0.f, 1.f * (cv_img.rows - erase_height), &erase_y); 954 | erase_rect = cv::Rect(erase_x, erase_y, erase_weight, erase_height); 955 | if (3 == cv_img.channels()) 956 | { 957 | cv::Mat_ img_test = cv_img; 958 | for (size_t i = erase_x; i < erase_x + erase_weight; i++) 959 | { 960 | for (size_t j = erase_y; j < erase_y + erase_height; j++) 961 | { 962 | img_test(i, j) = cv::Vec3b(erase_mean.val[0], erase_mean.val[1], erase_mean.val[2]); 963 | } 964 | } 965 | } 966 | else 967 | { 968 | cv_img(erase_rect) = erase_mean.val[0]; 969 | } 970 | } 971 | } 972 | 973 | // apply color shift 974 | if (do_color_shift) 975 | { 976 | int b = Rand(max_color_shift + 1); 977 | int g = Rand(max_color_shift + 1); 978 | int r = Rand(max_color_shift + 1); 979 | int sign = Rand(2); 980 | cv::Mat shiftArr = cv_img.clone(); 981 | shiftArr.setTo(cv::Scalar(b, g, r)); 982 | if (sign == 1) 983 | { 984 | cv_img -= shiftArr; 985 | } 986 | else 987 | { 988 | cv_img += shiftArr; 989 | } 990 | } 991 | 992 | // set contrast and brightness 993 | float alpha; 994 | int beta; 995 | if (do_brightness) 996 | { 997 | caffe_rng_uniform(1, min_contrast, max_contrast, &alpha); 998 | beta = Rand(max_brightness_shift * 2 + 1) - max_brightness_shift; 999 | cv_img.convertTo(cv_img, -1, alpha, beta); 1000 | } 1001 | 1002 | // set smoothness 1003 | int smooth_param = 0; 1004 | int smooth_type = 0; 1005 | if (do_smooth) 1006 | { 1007 | smooth_type = Rand(4); 1008 | smooth_param = 1 + 2 * Rand(max_smooth / 2); 1009 | switch (smooth_type) 1010 | { 1011 | case 0: 1012 | cv::GaussianBlur(cv_img, cv_img, cv::Size(smooth_param, smooth_param), 0); 1013 | break; 1014 | case 1: 1015 | cv::blur(cv_img, cv_img, cv::Size(smooth_param, smooth_param)); 1016 | break; 1017 | case 2: 1018 | cv::medianBlur(cv_img, cv_img, smooth_param); 1019 | break; 1020 | case 3: 1021 | cv::boxFilter(cv_img, cv_img, -1, cv::Size(smooth_param * 2, smooth_param * 2)); 1022 | break; 1023 | default: 1024 | break; 1025 | } 1026 | } 1027 | 1028 | const int img_height = cv_img.rows; 1029 | const int img_width = cv_img.cols; 1030 | 1031 | CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte"; 1032 | 1033 | // resizing and crop according to min side, preserving aspect ratio 1034 | if (do_resize_to_min_side) 1035 | { 1036 | random_crop(cv_img, min_side); 1037 | } 1038 | if (do_resize_to_min_side_min_max) 1039 | { 1040 | int min_side_length = min_side_min + Rand(min_side_max - min_side_min + 1); 1041 | resize(cv_img, min_side_max); 1042 | random_crop(cv_img, min_side_length); 1043 | } 1044 | /* 仿射变换 */ 1045 | float affine_angle = 0.f; 1046 | float affine_scale = 0.f; 1047 | if (do_affine) 1048 | { 1049 | cv::Point2f affine_center = cv::Point2f(cv_img.rows / 2, cv_img.cols / 2); 1050 | affine_angle = 1.0 * Rand(rotation_angle * 2 + 1) - rotation_angle; 1051 | affine_scale = affine_min_scale + Rand((affine_max_scale - affine_min_scale) * 10) / 10.f; 1052 | cv::Mat affine_matrix = cv::getRotationMatrix2D(affine_center, affine_angle, affine_scale); 1053 | cv::Size dize = cv::Size(cv_img.rows * (affine_min_scale + Rand((affine_max_scale - affine_min_scale) * 10) / 10.f), 1054 | cv_img.cols * (affine_min_scale + Rand((affine_max_scale - affine_min_scale) * 10) / 10.f)); 1055 | cv::warpAffine(cv_img, cv_img, affine_matrix, dize); 1056 | } 1057 | /* 旋转操作 */ 1058 | int current_angle = 0; 1059 | if (do_rotation && !do_affine) 1060 | { 1061 | current_angle = Rand(rotation_angle * 2 + 1) - rotation_angle; 1062 | if (current_angle) 1063 | { 1064 | rotate(cv_img, current_angle); 1065 | } 1066 | } 1067 | 1068 | if (debug_params && phase_ == TRAIN) { 1069 | LOG(INFO) << "----------------------------------------"; 1070 | if (do_smooth) 1071 | { 1072 | LOG(INFO) << "* parameter for smooth filtering: "; 1073 | LOG(INFO) << " smooth type: " << smooth_type << ", smooth param: " << smooth_param; 1074 | } 1075 | if (do_rotation) 1076 | { 1077 | LOG(INFO) << "* parameter for rotation: "; 1078 | LOG(INFO) << " current rotation angle: " << current_angle; 1079 | } 1080 | if (do_brightness) 1081 | { 1082 | LOG(INFO) << "* parameter for contrast adjustment: "; 1083 | LOG(INFO) << " alpha: " << alpha << ", beta: " << beta; 1084 | } 1085 | if (do_color_shift) 1086 | { 1087 | LOG(INFO) << "* parameter for color shift: "; 1088 | LOG(INFO) << "max_color_shift: " << max_color_shift; 1089 | } 1090 | if (do_resize_to_min_side_min_max) 1091 | { 1092 | LOG(INFO) << "* parameter for min_side_min_max crop: "; 1093 | LOG(INFO) << "min_side_min: " << min_side_min << ", min_side_max: " << min_side_max; 1094 | } 1095 | if (do_resize_to_min_side) 1096 | { 1097 | LOG(INFO) << "* parameter for min_side crop: "; 1098 | LOG(INFO) << "min_side: " << min_side; 1099 | } 1100 | if (do_affine) 1101 | { 1102 | LOG(INFO) << "* parameter for affine transformation: "; 1103 | LOG(INFO) << "affine_angle: " << affine_angle << ", affine_scale: " << affine_scale; 1104 | } 1105 | if (do_random_erasing) 1106 | { 1107 | LOG(INFO) << "* parameter for random erasing: "; 1108 | LOG(INFO) << "erase_rect: " << "x:" << erase_rect.x << ", y:" << erase_rect.y << ", width:" << erase_rect.width << ", height:" << erase_rect.height; 1109 | } 1110 | } 1111 | 1112 | int h_off = 0; 1113 | int w_off = 0; 1114 | if (img_width != cv_img.cols || img_height != cv_img.rows) 1115 | { 1116 | cv::resize(cv_img, cv_img, cv::Size(img_width, img_height)); 1117 | } 1118 | in_out_cv_img = cv_img; 1119 | } 1120 | /* End Added by garylau, for lmdb data augmentation, 2017.12.11 */ 1121 | 1122 | } // namespace caffe 1123 | -------------------------------------------------------------------------------- /caffe.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package caffe; 4 | 5 | // Specifies the shape (dimensions) of a Blob. 6 | message BlobShape { 7 | repeated int64 dim = 1 [packed = true]; 8 | } 9 | 10 | message BlobProto { 11 | optional BlobShape shape = 7; 12 | repeated float data = 5 [packed = true]; 13 | repeated float diff = 6 [packed = true]; 14 | repeated double double_data = 8 [packed = true]; 15 | repeated double double_diff = 9 [packed = true]; 16 | 17 | // 4D dimensions -- deprecated. Use "shape" instead. 18 | optional int32 num = 1 [default = 0]; 19 | optional int32 channels = 2 [default = 0]; 20 | optional int32 height = 3 [default = 0]; 21 | optional int32 width = 4 [default = 0]; 22 | } 23 | 24 | // The BlobProtoVector is simply a way to pass multiple blobproto instances 25 | // around. 26 | message BlobProtoVector { 27 | repeated BlobProto blobs = 1; 28 | } 29 | 30 | message Datum { 31 | optional int32 channels = 1; 32 | optional int32 height = 2; 33 | optional int32 width = 3; 34 | // the actual image data, in bytes 35 | optional bytes data = 4; 36 | optional int32 label = 5; 37 | // Optionally, the datum could also hold float data. 38 | repeated float float_data = 6; 39 | // If true data contains an encoded image that need to be decoded 40 | optional bool encoded = 7 [default = false]; 41 | } 42 | 43 | message FillerParameter { 44 | // The filler type. 45 | optional string type = 1 [default = 'constant']; 46 | optional float value = 2 [default = 0]; // the value in constant filler 47 | optional float min = 3 [default = 0]; // the min value in uniform filler 48 | optional float max = 4 [default = 1]; // the max value in uniform filler 49 | optional float mean = 5 [default = 0]; // the mean value in Gaussian filler 50 | optional float std = 6 [default = 1]; // the std value in Gaussian filler 51 | // The expected number of non-zero output weights for a given input in 52 | // Gaussian filler -- the default -1 means don't perform sparsification. 53 | optional int32 sparse = 7 [default = -1]; 54 | // Normalize the filler variance by fan_in, fan_out, or their average. 55 | // Applies to 'xavier' and 'msra' fillers. 56 | enum VarianceNorm { 57 | FAN_IN = 0; 58 | FAN_OUT = 1; 59 | AVERAGE = 2; 60 | } 61 | optional VarianceNorm variance_norm = 8 [default = FAN_IN]; 62 | } 63 | 64 | message NetParameter { 65 | optional string name = 1; // consider giving the network a name 66 | // DEPRECATED. See InputParameter. The input blobs to the network. 67 | repeated string input = 3; 68 | // DEPRECATED. See InputParameter. The shape of the input blobs. 69 | repeated BlobShape input_shape = 8; 70 | 71 | // 4D input dimensions -- deprecated. Use "input_shape" instead. 72 | // If specified, for each input blob there should be four 73 | // values specifying the num, channels, height and width of the input blob. 74 | // Thus, there should be a total of (4 * #input) numbers. 75 | repeated int32 input_dim = 4; 76 | 77 | // Whether the network will force every layer to carry out backward operation. 78 | // If set False, then whether to carry out backward is determined 79 | // automatically according to the net structure and learning rates. 80 | optional bool force_backward = 5 [default = false]; 81 | // The current "state" of the network, including the phase, level, and stage. 82 | // Some layers may be included/excluded depending on this state and the states 83 | // specified in the layers' include and exclude fields. 84 | optional NetState state = 6; 85 | 86 | // Print debugging information about results while running Net::Forward, 87 | // Net::Backward, and Net::Update. 88 | optional bool debug_info = 7 [default = false]; 89 | 90 | // The layers that make up the net. Each of their configurations, including 91 | // connectivity and behavior, is specified as a LayerParameter. 92 | repeated LayerParameter layer = 100; // ID 100 so layers are printed last. 93 | 94 | // DEPRECATED: use 'layer' instead. 95 | repeated V1LayerParameter layers = 2; 96 | } 97 | 98 | // NOTE 99 | // Update the next available ID when you add a new SolverParameter field. 100 | // 101 | // SolverParameter next available ID: 42 (last added: layer_wise_reduce) 102 | message SolverParameter { 103 | ////////////////////////////////////////////////////////////////////////////// 104 | // Specifying the train and test networks 105 | // 106 | // Exactly one train net must be specified using one of the following fields: 107 | // train_net_param, train_net, net_param, net 108 | // One or more test nets may be specified using any of the following fields: 109 | // test_net_param, test_net, net_param, net 110 | // If more than one test net field is specified (e.g., both net and 111 | // test_net are specified), they will be evaluated in the field order given 112 | // above: (1) test_net_param, (2) test_net, (3) net_param/net. 113 | // A test_iter must be specified for each test_net. 114 | // A test_level and/or a test_stage may also be specified for each test_net. 115 | ////////////////////////////////////////////////////////////////////////////// 116 | 117 | // Proto filename for the train net, possibly combined with one or more 118 | // test nets. 119 | optional string net = 24; 120 | // Inline train net param, possibly combined with one or more test nets. 121 | optional NetParameter net_param = 25; 122 | 123 | optional string train_net = 1; // Proto filename for the train net. 124 | repeated string test_net = 2; // Proto filenames for the test nets. 125 | optional NetParameter train_net_param = 21; // Inline train net params. 126 | repeated NetParameter test_net_param = 22; // Inline test net params. 127 | 128 | // The states for the train/test nets. Must be unspecified or 129 | // specified once per net. 130 | // 131 | // By default, train_state will have phase = TRAIN, 132 | // and all test_state's will have phase = TEST. 133 | // Other defaults are set according to the NetState defaults. 134 | optional NetState train_state = 26; 135 | repeated NetState test_state = 27; 136 | 137 | // The number of iterations for each test net. 138 | repeated int32 test_iter = 3; 139 | 140 | // The number of iterations between two testing phases. 141 | optional int32 test_interval = 4 [default = 0]; 142 | optional bool test_compute_loss = 19 [default = false]; 143 | // If true, run an initial test pass before the first iteration, 144 | // ensuring memory availability and printing the starting value of the loss. 145 | optional bool test_initialization = 32 [default = true]; 146 | optional float base_lr = 5; // The base learning rate 147 | // the number of iterations between displaying info. If display = 0, no info 148 | // will be displayed. 149 | optional int32 display = 6; 150 | // Display the loss averaged over the last average_loss iterations 151 | optional int32 average_loss = 33 [default = 1]; 152 | optional int32 max_iter = 7; // the maximum number of iterations 153 | // accumulate gradients over `iter_size` x `batch_size` instances 154 | optional int32 iter_size = 36 [default = 1]; 155 | 156 | // The learning rate decay policy. The currently implemented learning rate 157 | // policies are as follows: 158 | // - fixed: always return base_lr. 159 | // - step: return base_lr * gamma ^ (floor(iter / step)) 160 | // - exp: return base_lr * gamma ^ iter 161 | // - inv: return base_lr * (1 + gamma * iter) ^ (- power) 162 | // - multistep: similar to step but it allows non uniform steps defined by 163 | // stepvalue 164 | // - poly: the effective learning rate follows a polynomial decay, to be 165 | // zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power) 166 | // - sigmoid: the effective learning rate follows a sigmod decay 167 | // return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize)))) 168 | // 169 | // where base_lr, max_iter, gamma, step, stepvalue and power are defined 170 | // in the solver parameter protocol buffer, and iter is the current iteration. 171 | optional string lr_policy = 8; 172 | optional float gamma = 9; // The parameter to compute the learning rate. 173 | optional float power = 10; // The parameter to compute the learning rate. 174 | optional float momentum = 11; // The momentum value. 175 | optional float weight_decay = 12; // The weight decay. 176 | // regularization types supported: L1 and L2 177 | // controlled by weight_decay 178 | optional string regularization_type = 29 [default = "L2"]; 179 | // the stepsize for learning rate policy "step" 180 | optional int32 stepsize = 13; 181 | // the stepsize for learning rate policy "multistep" 182 | repeated int32 stepvalue = 34; 183 | 184 | // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm, 185 | // whenever their actual L2 norm is larger. 186 | optional float clip_gradients = 35 [default = -1]; 187 | 188 | optional int32 snapshot = 14 [default = 0]; // The snapshot interval 189 | optional string snapshot_prefix = 15; // The prefix for the snapshot. 190 | // whether to snapshot diff in the results or not. Snapshotting diff will help 191 | // debugging but the final protocol buffer size will be much larger. 192 | optional bool snapshot_diff = 16 [default = false]; 193 | enum SnapshotFormat { 194 | HDF5 = 0; 195 | BINARYPROTO = 1; 196 | } 197 | optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO]; 198 | // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. 199 | enum SolverMode { 200 | CPU = 0; 201 | GPU = 1; 202 | } 203 | optional SolverMode solver_mode = 17 [default = GPU]; 204 | // the device_id will that be used in GPU mode. Use device_id = 0 in default. 205 | optional int32 device_id = 18 [default = 0]; 206 | // If non-negative, the seed with which the Solver will initialize the Caffe 207 | // random number generator -- useful for reproducible results. Otherwise, 208 | // (and by default) initialize using a seed derived from the system clock. 209 | optional int64 random_seed = 20 [default = -1]; 210 | 211 | // type of the solver 212 | optional string type = 40 [default = "SGD"]; 213 | 214 | // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam 215 | optional float delta = 31 [default = 1e-8]; 216 | // parameters for the Adam solver 217 | optional float momentum2 = 39 [default = 0.999]; 218 | 219 | // RMSProp decay value 220 | // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) 221 | optional float rms_decay = 38 [default = 0.99]; 222 | 223 | // If true, print information about the state of the net that may help with 224 | // debugging learning problems. 225 | optional bool debug_info = 23 [default = false]; 226 | 227 | // If false, don't save a snapshot after training finishes. 228 | optional bool snapshot_after_train = 28 [default = true]; 229 | 230 | // DEPRECATED: old solver enum types, use string instead 231 | enum SolverType { 232 | SGD = 0; 233 | NESTEROV = 1; 234 | ADAGRAD = 2; 235 | RMSPROP = 3; 236 | ADADELTA = 4; 237 | ADAM = 5; 238 | } 239 | // DEPRECATED: use type instead of solver_type 240 | optional SolverType solver_type = 30 [default = SGD]; 241 | 242 | // Overlap compute and communication for data parallel training 243 | optional bool layer_wise_reduce = 41 [default = true]; 244 | } 245 | 246 | // A message that stores the solver snapshots 247 | message SolverState { 248 | optional int32 iter = 1; // The current iteration 249 | optional string learned_net = 2; // The file that stores the learned net. 250 | repeated BlobProto history = 3; // The history for sgd solvers 251 | optional int32 current_step = 4 [default = 0]; // The current step for learning rate 252 | } 253 | 254 | enum Phase { 255 | TRAIN = 0; 256 | TEST = 1; 257 | } 258 | 259 | message NetState { 260 | optional Phase phase = 1 [default = TEST]; 261 | optional int32 level = 2 [default = 0]; 262 | repeated string stage = 3; 263 | } 264 | 265 | message NetStateRule { 266 | // Set phase to require the NetState have a particular phase (TRAIN or TEST) 267 | // to meet this rule. 268 | optional Phase phase = 1; 269 | 270 | // Set the minimum and/or maximum levels in which the layer should be used. 271 | // Leave undefined to meet the rule regardless of level. 272 | optional int32 min_level = 2; 273 | optional int32 max_level = 3; 274 | 275 | // Customizable sets of stages to include or exclude. 276 | // The net must have ALL of the specified stages and NONE of the specified 277 | // "not_stage"s to meet the rule. 278 | // (Use multiple NetStateRules to specify conjunctions of stages.) 279 | repeated string stage = 4; 280 | repeated string not_stage = 5; 281 | } 282 | 283 | // Specifies training parameters (multipliers on global learning constants, 284 | // and the name and other settings used for weight sharing). 285 | message ParamSpec { 286 | // The names of the parameter blobs -- useful for sharing parameters among 287 | // layers, but never required otherwise. To share a parameter between two 288 | // layers, give it a (non-empty) name. 289 | optional string name = 1; 290 | 291 | // Whether to require shared weights to have the same shape, or just the same 292 | // count -- defaults to STRICT if unspecified. 293 | optional DimCheckMode share_mode = 2; 294 | enum DimCheckMode { 295 | // STRICT (default) requires that num, channels, height, width each match. 296 | STRICT = 0; 297 | // PERMISSIVE requires only the count (num*channels*height*width) to match. 298 | PERMISSIVE = 1; 299 | } 300 | 301 | // The multiplier on the global learning rate for this parameter. 302 | optional float lr_mult = 3 [default = 1.0]; 303 | 304 | // The multiplier on the global weight decay for this parameter. 305 | optional float decay_mult = 4 [default = 1.0]; 306 | } 307 | 308 | // NOTE 309 | // Update the next available ID when you add a new LayerParameter field. 310 | // 311 | // LayerParameter next available layer-specific ID: 147 (last added: recurrent_param) 312 | message LayerParameter { 313 | optional string name = 1; // the layer name 314 | optional string type = 2; // the layer type 315 | repeated string bottom = 3; // the name of each bottom blob 316 | repeated string top = 4; // the name of each top blob 317 | 318 | // The train / test phase for computation. 319 | optional Phase phase = 10; 320 | 321 | // The amount of weight to assign each top blob in the objective. 322 | // Each layer assigns a default value, usually of either 0 or 1, 323 | // to each top blob. 324 | repeated float loss_weight = 5; 325 | 326 | // Specifies training parameters (multipliers on global learning constants, 327 | // and the name and other settings used for weight sharing). 328 | repeated ParamSpec param = 6; 329 | 330 | // The blobs containing the numeric parameters of the layer. 331 | repeated BlobProto blobs = 7; 332 | 333 | // Specifies whether to backpropagate to each bottom. If unspecified, 334 | // Caffe will automatically infer whether each input needs backpropagation 335 | // to compute parameter gradients. If set to true for some inputs, 336 | // backpropagation to those inputs is forced; if set false for some inputs, 337 | // backpropagation to those inputs is skipped. 338 | // 339 | // The size must be either 0 or equal to the number of bottoms. 340 | repeated bool propagate_down = 11; 341 | 342 | // Rules controlling whether and when a layer is included in the network, 343 | // based on the current NetState. You may specify a non-zero number of rules 344 | // to include OR exclude, but not both. If no include or exclude rules are 345 | // specified, the layer is always included. If the current NetState meets 346 | // ANY (i.e., one or more) of the specified rules, the layer is 347 | // included/excluded. 348 | repeated NetStateRule include = 8; 349 | repeated NetStateRule exclude = 9; 350 | 351 | // Parameters for data pre-processing. 352 | optional TransformationParameter transform_param = 100; 353 | 354 | // Parameters shared by loss layers. 355 | optional LossParameter loss_param = 101; 356 | 357 | // Layer type-specific parameters. 358 | // 359 | // Note: certain layers may have more than one computational engine 360 | // for their implementation. These layers include an Engine type and 361 | // engine parameter for selecting the implementation. 362 | // The default for the engine is set by the ENGINE switch at compile-time. 363 | optional AccuracyParameter accuracy_param = 102; 364 | optional ArgMaxParameter argmax_param = 103; 365 | optional BatchNormParameter batch_norm_param = 139; 366 | optional BiasParameter bias_param = 141; 367 | optional ConcatParameter concat_param = 104; 368 | optional ContrastiveLossParameter contrastive_loss_param = 105; 369 | optional ConvolutionParameter convolution_param = 106; 370 | optional CropParameter crop_param = 144; 371 | optional DataParameter data_param = 107; 372 | optional DropoutParameter dropout_param = 108; 373 | optional DummyDataParameter dummy_data_param = 109; 374 | optional EltwiseParameter eltwise_param = 110; 375 | optional ELUParameter elu_param = 140; 376 | optional EmbedParameter embed_param = 137; 377 | optional ExpParameter exp_param = 111; 378 | optional FlattenParameter flatten_param = 135; 379 | optional HDF5DataParameter hdf5_data_param = 112; 380 | optional HDF5OutputParameter hdf5_output_param = 113; 381 | optional HingeLossParameter hinge_loss_param = 114; 382 | optional ImageDataParameter image_data_param = 115; 383 | optional InfogainLossParameter infogain_loss_param = 116; 384 | optional InnerProductParameter inner_product_param = 117; 385 | optional InputParameter input_param = 143; 386 | optional LogParameter log_param = 134; 387 | optional LRNParameter lrn_param = 118; 388 | optional MemoryDataParameter memory_data_param = 119; 389 | optional MVNParameter mvn_param = 120; 390 | optional ParameterParameter parameter_param = 145; 391 | optional PoolingParameter pooling_param = 121; 392 | optional PowerParameter power_param = 122; 393 | optional PReLUParameter prelu_param = 131; 394 | optional PythonParameter python_param = 130; 395 | optional RecurrentParameter recurrent_param = 146; 396 | optional ReductionParameter reduction_param = 136; 397 | optional ReLUParameter relu_param = 123; 398 | optional ReshapeParameter reshape_param = 133; 399 | optional ScaleParameter scale_param = 142; 400 | optional SigmoidParameter sigmoid_param = 124; 401 | optional SoftmaxParameter softmax_param = 125; 402 | optional SPPParameter spp_param = 132; 403 | optional SliceParameter slice_param = 126; 404 | optional TanHParameter tanh_param = 127; 405 | optional ThresholdParameter threshold_param = 128; 406 | optional TileParameter tile_param = 138; 407 | optional WindowDataParameter window_data_param = 129; 408 | } 409 | 410 | // Message that stores parameters used to apply transformation 411 | // to the data layer's data 412 | message TransformationParameter { 413 | // For data pre-processing, we can do simple scaling and subtracting the 414 | // data mean, if provided. Note that the mean subtraction is always carried 415 | // out before scaling. 416 | optional float scale = 1 [default = 1]; 417 | // Specify if we want to randomly mirror data. 418 | optional bool mirror = 2 [default = false]; 419 | // Specify if we would like to randomly crop an image. 420 | optional uint32 crop_size = 3 [default = 0]; 421 | // mean_file and mean_value cannot be specified at the same time 422 | optional string mean_file = 4; 423 | // if specified can be repeated once (would subtract it from all the channels) 424 | // or can be repeated the same number of times as channels 425 | // (would subtract them from the corresponding channel) 426 | repeated float mean_value = 5; 427 | // Force the decoded image to have 3 color channels. 428 | optional bool force_color = 6 [default = false]; 429 | // Force the decoded image to have 1 color channels. 430 | optional bool force_gray = 7 [default = false]; 431 | 432 | // Begin Added by garylau for Image augmentation, 2017.11.30 433 | optional float apply_probability = 8 [default = 0.5]; 434 | optional bool smooth_filtering = 9 [default = false]; 435 | optional float max_smooth = 10 [default = 6]; 436 | // Specify the angle for doing rotation 437 | optional uint32 max_rotation_angle = 11 [default = 0]; 438 | // Specify the contrast, brightness, smooth and color shift for augmentation 439 | optional bool contrast_brightness_adjustment = 12 [default = false]; 440 | optional float min_contrast = 13 [default = 0.8]; 441 | optional float max_contrast = 14 [default = 1.2]; 442 | optional uint32 max_brightness_shift = 15 [default = 5]; 443 | optional uint32 max_color_shift = 16 [default = 0]; 444 | // Min side resizing, keep aspect ratio 445 | optional uint32 min_side_min = 17 [default = 0]; 446 | optional uint32 min_side_max = 18 [default = 0]; 447 | optional uint32 min_side = 19 [default = 0]; 448 | // affine transformation 449 | optional float affine_min_scale = 20 [default = 0]; 450 | optional float affine_max_scale = 21 [default = 0]; 451 | optional bool debug_params = 22 [default = false]; 452 | // End Added by garylau for Image augmentation, 2017.11.30 453 | } 454 | 455 | // Message that stores parameters shared by loss layers 456 | message LossParameter { 457 | // If specified, ignore instances with the given label. 458 | optional int32 ignore_label = 1; 459 | // How to normalize the loss for loss layers that aggregate across batches, 460 | // spatial dimensions, or other dimensions. Currently only implemented in 461 | // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers. 462 | enum NormalizationMode { 463 | // Divide by the number of examples in the batch times spatial dimensions. 464 | // Outputs that receive the ignore label will NOT be ignored in computing 465 | // the normalization factor. 466 | FULL = 0; 467 | // Divide by the total number of output locations that do not take the 468 | // ignore_label. If ignore_label is not set, this behaves like FULL. 469 | VALID = 1; 470 | // Divide by the batch size. 471 | BATCH_SIZE = 2; 472 | // Do not normalize the loss. 473 | NONE = 3; 474 | } 475 | // For historical reasons, the default normalization for 476 | // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID. 477 | optional NormalizationMode normalization = 3 [default = VALID]; 478 | // Deprecated. Ignored if normalization is specified. If normalization 479 | // is not specified, then setting this to false will be equivalent to 480 | // normalization = BATCH_SIZE to be consistent with previous behavior. 481 | optional bool normalize = 2; 482 | } 483 | 484 | // Messages that store parameters used by individual layer types follow, in 485 | // alphabetical order. 486 | 487 | message AccuracyParameter { 488 | // When computing accuracy, count as correct by comparing the true label to 489 | // the top k scoring classes. By default, only compare to the top scoring 490 | // class (i.e. argmax). 491 | optional uint32 top_k = 1 [default = 1]; 492 | 493 | // The "label" axis of the prediction blob, whose argmax corresponds to the 494 | // predicted label -- may be negative to index from the end (e.g., -1 for the 495 | // last axis). For example, if axis == 1 and the predictions are 496 | // (N x C x H x W), the label blob is expected to contain N*H*W ground truth 497 | // labels with integer values in {0, 1, ..., C-1}. 498 | optional int32 axis = 2 [default = 1]; 499 | 500 | // If specified, ignore instances with the given label. 501 | optional int32 ignore_label = 3; 502 | } 503 | 504 | message ArgMaxParameter { 505 | // If true produce pairs (argmax, maxval) 506 | optional bool out_max_val = 1 [default = false]; 507 | optional uint32 top_k = 2 [default = 1]; 508 | // The axis along which to maximise -- may be negative to index from the 509 | // end (e.g., -1 for the last axis). 510 | // By default ArgMaxLayer maximizes over the flattened trailing dimensions 511 | // for each index of the first / num dimension. 512 | optional int32 axis = 3; 513 | } 514 | 515 | message ConcatParameter { 516 | // The axis along which to concatenate -- may be negative to index from the 517 | // end (e.g., -1 for the last axis). Other axes must have the 518 | // same dimension for all the bottom blobs. 519 | // By default, ConcatLayer concatenates blobs along the "channels" axis (1). 520 | optional int32 axis = 2 [default = 1]; 521 | 522 | // DEPRECATED: alias for "axis" -- does not support negative indexing. 523 | optional uint32 concat_dim = 1 [default = 1]; 524 | } 525 | 526 | message BatchNormParameter { 527 | // If false, normalization is performed over the current mini-batch 528 | // and global statistics are accumulated (but not yet used) by a moving 529 | // average. 530 | // If true, those accumulated mean and variance values are used for the 531 | // normalization. 532 | // By default, it is set to false when the network is in the training 533 | // phase and true when the network is in the testing phase. 534 | optional bool use_global_stats = 1; 535 | // What fraction of the moving average remains each iteration? 536 | // Smaller values make the moving average decay faster, giving more 537 | // weight to the recent values. 538 | // Each iteration updates the moving average @f$S_{t-1}@f$ with the 539 | // current mean @f$ Y_t @f$ by 540 | // @f$ S_t = (1-\beta)Y_t + \beta \cdot S_{t-1} @f$, where @f$ \beta @f$ 541 | // is the moving_average_fraction parameter. 542 | optional float moving_average_fraction = 2 [default = .999]; 543 | // Small value to add to the variance estimate so that we don't divide by 544 | // zero. 545 | optional float eps = 3 [default = 1e-5]; 546 | } 547 | 548 | message BiasParameter { 549 | // The first axis of bottom[0] (the first input Blob) along which to apply 550 | // bottom[1] (the second input Blob). May be negative to index from the end 551 | // (e.g., -1 for the last axis). 552 | // 553 | // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 554 | // top[0] will have the same shape, and bottom[1] may have any of the 555 | // following shapes (for the given value of axis): 556 | // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 557 | // (axis == 1 == -3) 3; 3x40; 3x40x60 558 | // (axis == 2 == -2) 40; 40x60 559 | // (axis == 3 == -1) 60 560 | // Furthermore, bottom[1] may have the empty shape (regardless of the value of 561 | // "axis") -- a scalar bias. 562 | optional int32 axis = 1 [default = 1]; 563 | 564 | // (num_axes is ignored unless just one bottom is given and the bias is 565 | // a learned parameter of the layer. Otherwise, num_axes is determined by the 566 | // number of axes by the second bottom.) 567 | // The number of axes of the input (bottom[0]) covered by the bias 568 | // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 569 | // Set num_axes := 0, to add a zero-axis Blob: a scalar. 570 | optional int32 num_axes = 2 [default = 1]; 571 | 572 | // (filler is ignored unless just one bottom is given and the bias is 573 | // a learned parameter of the layer.) 574 | // The initialization for the learned bias parameter. 575 | // Default is the zero (0) initialization, resulting in the BiasLayer 576 | // initially performing the identity operation. 577 | optional FillerParameter filler = 3; 578 | } 579 | 580 | message ContrastiveLossParameter { 581 | // margin for dissimilar pair 582 | optional float margin = 1 [default = 1.0]; 583 | // The first implementation of this cost did not exactly match the cost of 584 | // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. 585 | // legacy_version = false (the default) uses (margin - d)^2 as proposed in the 586 | // Hadsell paper. New models should probably use this version. 587 | // legacy_version = true uses (margin - d^2). This is kept to support / 588 | // reproduce existing models and results 589 | optional bool legacy_version = 2 [default = false]; 590 | } 591 | 592 | message ConvolutionParameter { 593 | optional uint32 num_output = 1; // The number of outputs for the layer 594 | optional bool bias_term = 2 [default = true]; // whether to have bias terms 595 | 596 | // Pad, kernel size, and stride are all given as a single value for equal 597 | // dimensions in all spatial dimensions, or once per spatial dimension. 598 | repeated uint32 pad = 3; // The padding size; defaults to 0 599 | repeated uint32 kernel_size = 4; // The kernel size 600 | repeated uint32 stride = 6; // The stride; defaults to 1 601 | // Factor used to dilate the kernel, (implicitly) zero-filling the resulting 602 | // holes. (Kernel dilation is sometimes referred to by its use in the 603 | // algorithme à trous from Holschneider et al. 1987.) 604 | repeated uint32 dilation = 18; // The dilation; defaults to 1 605 | 606 | // For 2D convolution only, the *_h and *_w versions may also be used to 607 | // specify both spatial dimensions. 608 | optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) 609 | optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) 610 | optional uint32 kernel_h = 11; // The kernel height (2D only) 611 | optional uint32 kernel_w = 12; // The kernel width (2D only) 612 | optional uint32 stride_h = 13; // The stride height (2D only) 613 | optional uint32 stride_w = 14; // The stride width (2D only) 614 | 615 | optional uint32 group = 5 [default = 1]; // The group size for group conv 616 | 617 | optional FillerParameter weight_filler = 7; // The filler for the weight 618 | optional FillerParameter bias_filler = 8; // The filler for the bias 619 | enum Engine { 620 | DEFAULT = 0; 621 | CAFFE = 1; 622 | CUDNN = 2; 623 | } 624 | optional Engine engine = 15 [default = DEFAULT]; 625 | 626 | // The axis to interpret as "channels" when performing convolution. 627 | // Preceding dimensions are treated as independent inputs; 628 | // succeeding dimensions are treated as "spatial". 629 | // With (N, C, H, W) inputs, and axis == 1 (the default), we perform 630 | // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for 631 | // groups g>1) filters across the spatial axes (H, W) of the input. 632 | // With (N, C, D, H, W) inputs, and axis == 1, we perform 633 | // N independent 3D convolutions, sliding (C/g)-channels 634 | // filters across the spatial axes (D, H, W) of the input. 635 | optional int32 axis = 16 [default = 1]; 636 | 637 | // Whether to force use of the general ND convolution, even if a specific 638 | // implementation for blobs of the appropriate number of spatial dimensions 639 | // is available. (Currently, there is only a 2D-specific convolution 640 | // implementation; for input blobs with num_axes != 2, this option is 641 | // ignored and the ND implementation will be used.) 642 | optional bool force_nd_im2col = 17 [default = false]; 643 | } 644 | 645 | message CropParameter { 646 | // To crop, elements of the first bottom are selected to fit the dimensions 647 | // of the second, reference bottom. The crop is configured by 648 | // - the crop `axis` to pick the dimensions for cropping 649 | // - the crop `offset` to set the shift for all/each dimension 650 | // to align the cropped bottom with the reference bottom. 651 | // All dimensions up to but excluding `axis` are preserved, while 652 | // the dimensions including and trailing `axis` are cropped. 653 | // If only one `offset` is set, then all dimensions are offset by this amount. 654 | // Otherwise, the number of offsets must equal the number of cropped axes to 655 | // shift the crop in each dimension accordingly. 656 | // Note: standard dimensions are N,C,H,W so the default is a spatial crop, 657 | // and `axis` may be negative to index from the end (e.g., -1 for the last 658 | // axis). 659 | optional int32 axis = 1 [default = 2]; 660 | repeated uint32 offset = 2; 661 | } 662 | 663 | message DataParameter { 664 | enum DB { 665 | LEVELDB = 0; 666 | LMDB = 1; 667 | } 668 | // Specify the data source. 669 | optional string source = 1; 670 | // Specify the batch size. 671 | optional uint32 batch_size = 4; 672 | // The rand_skip variable is for the data layer to skip a few data points 673 | // to avoid all asynchronous sgd clients to start at the same point. The skip 674 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 675 | // be larger than the number of keys in the database. 676 | // DEPRECATED. Each solver accesses a different subset of the database. 677 | optional uint32 rand_skip = 7 [default = 0]; 678 | optional DB backend = 8 [default = LEVELDB]; 679 | // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 680 | // simple scaling and subtracting the data mean, if provided. Note that the 681 | // mean subtraction is always carried out before scaling. 682 | optional float scale = 2 [default = 1]; 683 | optional string mean_file = 3; 684 | // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 685 | // crop an image. 686 | optional uint32 crop_size = 5 [default = 0]; 687 | // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 688 | // data. 689 | optional bool mirror = 6 [default = false]; 690 | // Force the encoded image to have 3 color channels 691 | optional bool force_encoded_color = 9 [default = false]; 692 | // Prefetch queue (Increase if data feeding bandwidth varies, within the 693 | // limit of device memory for GPU training) 694 | optional uint32 prefetch = 10 [default = 4]; 695 | } 696 | 697 | message DropoutParameter { 698 | optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio 699 | } 700 | 701 | // DummyDataLayer fills any number of arbitrarily shaped blobs with random 702 | // (or constant) data generated by "Fillers" (see "message FillerParameter"). 703 | message DummyDataParameter { 704 | // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N 705 | // shape fields, and 0, 1 or N data_fillers. 706 | // 707 | // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. 708 | // If 1 data_filler is specified, it is applied to all top blobs. If N are 709 | // specified, the ith is applied to the ith top blob. 710 | repeated FillerParameter data_filler = 1; 711 | repeated BlobShape shape = 6; 712 | 713 | // 4D dimensions -- deprecated. Use "shape" instead. 714 | repeated uint32 num = 2; 715 | repeated uint32 channels = 3; 716 | repeated uint32 height = 4; 717 | repeated uint32 width = 5; 718 | } 719 | 720 | message EltwiseParameter { 721 | enum EltwiseOp { 722 | PROD = 0; 723 | SUM = 1; 724 | MAX = 2; 725 | } 726 | optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation 727 | repeated float coeff = 2; // blob-wise coefficient for SUM operation 728 | 729 | // Whether to use an asymptotically slower (for >2 inputs) but stabler method 730 | // of computing the gradient for the PROD operation. (No effect for SUM op.) 731 | optional bool stable_prod_grad = 3 [default = true]; 732 | } 733 | 734 | // Message that stores parameters used by ELULayer 735 | message ELUParameter { 736 | // Described in: 737 | // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate 738 | // Deep Network Learning by Exponential Linear Units (ELUs). arXiv 739 | optional float alpha = 1 [default = 1]; 740 | } 741 | 742 | // Message that stores parameters used by EmbedLayer 743 | message EmbedParameter { 744 | optional uint32 num_output = 1; // The number of outputs for the layer 745 | // The input is given as integers to be interpreted as one-hot 746 | // vector indices with dimension num_input. Hence num_input should be 747 | // 1 greater than the maximum possible input value. 748 | optional uint32 input_dim = 2; 749 | 750 | optional bool bias_term = 3 [default = true]; // Whether to use a bias term 751 | optional FillerParameter weight_filler = 4; // The filler for the weight 752 | optional FillerParameter bias_filler = 5; // The filler for the bias 753 | 754 | } 755 | 756 | // Message that stores parameters used by ExpLayer 757 | message ExpParameter { 758 | // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. 759 | // Or if base is set to the default (-1), base is set to e, 760 | // so y = exp(shift + scale * x). 761 | optional float base = 1 [default = -1.0]; 762 | optional float scale = 2 [default = 1.0]; 763 | optional float shift = 3 [default = 0.0]; 764 | } 765 | 766 | /// Message that stores parameters used by FlattenLayer 767 | message FlattenParameter { 768 | // The first axis to flatten: all preceding axes are retained in the output. 769 | // May be negative to index from the end (e.g., -1 for the last axis). 770 | optional int32 axis = 1 [default = 1]; 771 | 772 | // The last axis to flatten: all following axes are retained in the output. 773 | // May be negative to index from the end (e.g., the default -1 for the last 774 | // axis). 775 | optional int32 end_axis = 2 [default = -1]; 776 | } 777 | 778 | // Message that stores parameters used by HDF5DataLayer 779 | message HDF5DataParameter { 780 | // Specify the data source. 781 | optional string source = 1; 782 | // Specify the batch size. 783 | optional uint32 batch_size = 2; 784 | 785 | // Specify whether to shuffle the data. 786 | // If shuffle == true, the ordering of the HDF5 files is shuffled, 787 | // and the ordering of data within any given HDF5 file is shuffled, 788 | // but data between different files are not interleaved; all of a file's 789 | // data are output (in a random order) before moving onto another file. 790 | optional bool shuffle = 3 [default = false]; 791 | } 792 | 793 | message HDF5OutputParameter { 794 | optional string file_name = 1; 795 | } 796 | 797 | message HingeLossParameter { 798 | enum Norm { 799 | L1 = 1; 800 | L2 = 2; 801 | } 802 | // Specify the Norm to use L1 or L2 803 | optional Norm norm = 1 [default = L1]; 804 | } 805 | 806 | message ImageDataParameter { 807 | // Specify the data source. 808 | optional string source = 1; 809 | // Specify the batch size. 810 | optional uint32 batch_size = 4 [default = 1]; 811 | // The rand_skip variable is for the data layer to skip a few data points 812 | // to avoid all asynchronous sgd clients to start at the same point. The skip 813 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 814 | // be larger than the number of keys in the database. 815 | optional uint32 rand_skip = 7 [default = 0]; 816 | // Whether or not ImageLayer should shuffle the list of files at every epoch. 817 | optional bool shuffle = 8 [default = false]; 818 | // It will also resize images if new_height or new_width are not zero. 819 | optional uint32 new_height = 9 [default = 0]; 820 | optional uint32 new_width = 10 [default = 0]; 821 | // Specify if the images are color or gray 822 | optional bool is_color = 11 [default = true]; 823 | // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 824 | // simple scaling and subtracting the data mean, if provided. Note that the 825 | // mean subtraction is always carried out before scaling. 826 | optional float scale = 2 [default = 1]; 827 | optional string mean_file = 3; 828 | // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 829 | // crop an image. 830 | optional uint32 crop_size = 5 [default = 0]; 831 | // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 832 | // data. 833 | optional bool mirror = 6 [default = false]; 834 | optional string root_folder = 12 [default = ""]; 835 | } 836 | 837 | message InfogainLossParameter { 838 | // Specify the infogain matrix source. 839 | optional string source = 1; 840 | optional int32 axis = 2 [default = 1]; // axis of prob 841 | } 842 | 843 | message InnerProductParameter { 844 | optional uint32 num_output = 1; // The number of outputs for the layer 845 | optional bool bias_term = 2 [default = true]; // whether to have bias terms 846 | optional FillerParameter weight_filler = 3; // The filler for the weight 847 | optional FillerParameter bias_filler = 4; // The filler for the bias 848 | 849 | // The first axis to be lumped into a single inner product computation; 850 | // all preceding axes are retained in the output. 851 | // May be negative to index from the end (e.g., -1 for the last axis). 852 | optional int32 axis = 5 [default = 1]; 853 | // Specify whether to transpose the weight matrix or not. 854 | // If transpose == true, any operations will be performed on the transpose 855 | // of the weight matrix. The weight matrix itself is not going to be transposed 856 | // but rather the transfer flag of operations will be toggled accordingly. 857 | optional bool transpose = 6 [default = false]; 858 | } 859 | 860 | message InputParameter { 861 | // This layer produces N >= 1 top blob(s) to be assigned manually. 862 | // Define N shapes to set a shape for each top. 863 | // Define 1 shape to set the same shape for every top. 864 | // Define no shape to defer to reshaping manually. 865 | repeated BlobShape shape = 1; 866 | } 867 | 868 | // Message that stores parameters used by LogLayer 869 | message LogParameter { 870 | // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. 871 | // Or if base is set to the default (-1), base is set to e, 872 | // so y = ln(shift + scale * x) = log_e(shift + scale * x) 873 | optional float base = 1 [default = -1.0]; 874 | optional float scale = 2 [default = 1.0]; 875 | optional float shift = 3 [default = 0.0]; 876 | } 877 | 878 | // Message that stores parameters used by LRNLayer 879 | message LRNParameter { 880 | optional uint32 local_size = 1 [default = 5]; 881 | optional float alpha = 2 [default = 1.]; 882 | optional float beta = 3 [default = 0.75]; 883 | enum NormRegion { 884 | ACROSS_CHANNELS = 0; 885 | WITHIN_CHANNEL = 1; 886 | } 887 | optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; 888 | optional float k = 5 [default = 1.]; 889 | enum Engine { 890 | DEFAULT = 0; 891 | CAFFE = 1; 892 | CUDNN = 2; 893 | } 894 | optional Engine engine = 6 [default = DEFAULT]; 895 | } 896 | 897 | message MemoryDataParameter { 898 | optional uint32 batch_size = 1; 899 | optional uint32 channels = 2; 900 | optional uint32 height = 3; 901 | optional uint32 width = 4; 902 | } 903 | 904 | message MVNParameter { 905 | // This parameter can be set to false to normalize mean only 906 | optional bool normalize_variance = 1 [default = true]; 907 | 908 | // This parameter can be set to true to perform DNN-like MVN 909 | optional bool across_channels = 2 [default = false]; 910 | 911 | // Epsilon for not dividing by zero while normalizing variance 912 | optional float eps = 3 [default = 1e-9]; 913 | } 914 | 915 | message ParameterParameter { 916 | optional BlobShape shape = 1; 917 | } 918 | 919 | message PoolingParameter { 920 | enum PoolMethod { 921 | MAX = 0; 922 | AVE = 1; 923 | STOCHASTIC = 2; 924 | } 925 | optional PoolMethod pool = 1 [default = MAX]; // The pooling method 926 | // Pad, kernel size, and stride are all given as a single value for equal 927 | // dimensions in height and width or as Y, X pairs. 928 | optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) 929 | optional uint32 pad_h = 9 [default = 0]; // The padding height 930 | optional uint32 pad_w = 10 [default = 0]; // The padding width 931 | optional uint32 kernel_size = 2; // The kernel size (square) 932 | optional uint32 kernel_h = 5; // The kernel height 933 | optional uint32 kernel_w = 6; // The kernel width 934 | optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) 935 | optional uint32 stride_h = 7; // The stride height 936 | optional uint32 stride_w = 8; // The stride width 937 | enum Engine { 938 | DEFAULT = 0; 939 | CAFFE = 1; 940 | CUDNN = 2; 941 | } 942 | optional Engine engine = 11 [default = DEFAULT]; 943 | // If global_pooling then it will pool over the size of the bottom by doing 944 | // kernel_h = bottom->height and kernel_w = bottom->width 945 | optional bool global_pooling = 12 [default = false]; 946 | } 947 | 948 | message PowerParameter { 949 | // PowerLayer computes outputs y = (shift + scale * x) ^ power. 950 | optional float power = 1 [default = 1.0]; 951 | optional float scale = 2 [default = 1.0]; 952 | optional float shift = 3 [default = 0.0]; 953 | } 954 | 955 | message PythonParameter { 956 | optional string module = 1; 957 | optional string layer = 2; 958 | // This value is set to the attribute `param_str` of the `PythonLayer` object 959 | // in Python before calling the `setup()` method. This could be a number, 960 | // string, dictionary in Python dict format, JSON, etc. You may parse this 961 | // string in `setup` method and use it in `forward` and `backward`. 962 | optional string param_str = 3 [default = '']; 963 | // DEPRECATED 964 | optional bool share_in_parallel = 4 [default = false]; 965 | } 966 | 967 | // Message that stores parameters used by RecurrentLayer 968 | message RecurrentParameter { 969 | // The dimension of the output (and usually hidden state) representation -- 970 | // must be explicitly set to non-zero. 971 | optional uint32 num_output = 1 [default = 0]; 972 | 973 | optional FillerParameter weight_filler = 2; // The filler for the weight 974 | optional FillerParameter bias_filler = 3; // The filler for the bias 975 | 976 | // Whether to enable displaying debug_info in the unrolled recurrent net. 977 | optional bool debug_info = 4 [default = false]; 978 | 979 | // Whether to add as additional inputs (bottoms) the initial hidden state 980 | // blobs, and add as additional outputs (tops) the final timestep hidden state 981 | // blobs. The number of additional bottom/top blobs required depends on the 982 | // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs. 983 | optional bool expose_hidden = 5 [default = false]; 984 | } 985 | 986 | // Message that stores parameters used by ReductionLayer 987 | message ReductionParameter { 988 | enum ReductionOp { 989 | SUM = 1; 990 | ASUM = 2; 991 | SUMSQ = 3; 992 | MEAN = 4; 993 | } 994 | 995 | optional ReductionOp operation = 1 [default = SUM]; // reduction operation 996 | 997 | // The first axis to reduce to a scalar -- may be negative to index from the 998 | // end (e.g., -1 for the last axis). 999 | // (Currently, only reduction along ALL "tail" axes is supported; reduction 1000 | // of axis M through N, where N < num_axes - 1, is unsupported.) 1001 | // Suppose we have an n-axis bottom Blob with shape: 1002 | // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). 1003 | // If axis == m, the output Blob will have shape 1004 | // (d0, d1, d2, ..., d(m-1)), 1005 | // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) 1006 | // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. 1007 | // If axis == 0 (the default), the output Blob always has the empty shape 1008 | // (count 1), performing reduction across the entire input -- 1009 | // often useful for creating new loss functions. 1010 | optional int32 axis = 2 [default = 0]; 1011 | 1012 | optional float coeff = 3 [default = 1.0]; // coefficient for output 1013 | } 1014 | 1015 | // Message that stores parameters used by ReLULayer 1016 | message ReLUParameter { 1017 | // Allow non-zero slope for negative inputs to speed up optimization 1018 | // Described in: 1019 | // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities 1020 | // improve neural network acoustic models. In ICML Workshop on Deep Learning 1021 | // for Audio, Speech, and Language Processing. 1022 | optional float negative_slope = 1 [default = 0]; 1023 | enum Engine { 1024 | DEFAULT = 0; 1025 | CAFFE = 1; 1026 | CUDNN = 2; 1027 | } 1028 | optional Engine engine = 2 [default = DEFAULT]; 1029 | } 1030 | 1031 | message ReshapeParameter { 1032 | // Specify the output dimensions. If some of the dimensions are set to 0, 1033 | // the corresponding dimension from the bottom layer is used (unchanged). 1034 | // Exactly one dimension may be set to -1, in which case its value is 1035 | // inferred from the count of the bottom blob and the remaining dimensions. 1036 | // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: 1037 | // 1038 | // layer { 1039 | // type: "Reshape" bottom: "input" top: "output" 1040 | // reshape_param { ... } 1041 | // } 1042 | // 1043 | // If "input" is 2D with shape 2 x 8, then the following reshape_param 1044 | // specifications are all equivalent, producing a 3D blob "output" with shape 1045 | // 2 x 2 x 4: 1046 | // 1047 | // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1048 | // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } 1049 | // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } 1050 | // reshape_param { shape { dim: 0 dim:-1 dim: 4 } } 1051 | // 1052 | optional BlobShape shape = 1; 1053 | 1054 | // axis and num_axes control the portion of the bottom blob's shape that are 1055 | // replaced by (included in) the reshape. By default (axis == 0 and 1056 | // num_axes == -1), the entire bottom blob shape is included in the reshape, 1057 | // and hence the shape field must specify the entire output shape. 1058 | // 1059 | // axis may be non-zero to retain some portion of the beginning of the input 1060 | // shape (and may be negative to index from the end; e.g., -1 to begin the 1061 | // reshape after the last axis, including nothing in the reshape, 1062 | // -2 to include only the last axis, etc.). 1063 | // 1064 | // For example, suppose "input" is a 2D blob with shape 2 x 8. 1065 | // Then the following ReshapeLayer specifications are all equivalent, 1066 | // producing a blob "output" with shape 2 x 2 x 4: 1067 | // 1068 | // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1069 | // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } 1070 | // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } 1071 | // 1072 | // num_axes specifies the extent of the reshape. 1073 | // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on 1074 | // input axes in the range [axis, axis+num_axes]. 1075 | // num_axes may also be -1, the default, to include all remaining axes 1076 | // (starting from axis). 1077 | // 1078 | // For example, suppose "input" is a 2D blob with shape 2 x 8. 1079 | // Then the following ReshapeLayer specifications are equivalent, 1080 | // producing a blob "output" with shape 1 x 2 x 8. 1081 | // 1082 | // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } 1083 | // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } 1084 | // reshape_param { shape { dim: 1 } num_axes: 0 } 1085 | // 1086 | // On the other hand, these would produce output blob shape 2 x 1 x 8: 1087 | // 1088 | // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } 1089 | // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } 1090 | // 1091 | optional int32 axis = 2 [default = 0]; 1092 | optional int32 num_axes = 3 [default = -1]; 1093 | } 1094 | 1095 | message ScaleParameter { 1096 | // The first axis of bottom[0] (the first input Blob) along which to apply 1097 | // bottom[1] (the second input Blob). May be negative to index from the end 1098 | // (e.g., -1 for the last axis). 1099 | // 1100 | // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 1101 | // top[0] will have the same shape, and bottom[1] may have any of the 1102 | // following shapes (for the given value of axis): 1103 | // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 1104 | // (axis == 1 == -3) 3; 3x40; 3x40x60 1105 | // (axis == 2 == -2) 40; 40x60 1106 | // (axis == 3 == -1) 60 1107 | // Furthermore, bottom[1] may have the empty shape (regardless of the value of 1108 | // "axis") -- a scalar multiplier. 1109 | optional int32 axis = 1 [default = 1]; 1110 | 1111 | // (num_axes is ignored unless just one bottom is given and the scale is 1112 | // a learned parameter of the layer. Otherwise, num_axes is determined by the 1113 | // number of axes by the second bottom.) 1114 | // The number of axes of the input (bottom[0]) covered by the scale 1115 | // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 1116 | // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. 1117 | optional int32 num_axes = 2 [default = 1]; 1118 | 1119 | // (filler is ignored unless just one bottom is given and the scale is 1120 | // a learned parameter of the layer.) 1121 | // The initialization for the learned scale parameter. 1122 | // Default is the unit (1) initialization, resulting in the ScaleLayer 1123 | // initially performing the identity operation. 1124 | optional FillerParameter filler = 3; 1125 | 1126 | // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but 1127 | // may be more efficient). Initialized with bias_filler (defaults to 0). 1128 | optional bool bias_term = 4 [default = false]; 1129 | optional FillerParameter bias_filler = 5; 1130 | } 1131 | 1132 | message SigmoidParameter { 1133 | enum Engine { 1134 | DEFAULT = 0; 1135 | CAFFE = 1; 1136 | CUDNN = 2; 1137 | } 1138 | optional Engine engine = 1 [default = DEFAULT]; 1139 | } 1140 | 1141 | message SliceParameter { 1142 | // The axis along which to slice -- may be negative to index from the end 1143 | // (e.g., -1 for the last axis). 1144 | // By default, SliceLayer concatenates blobs along the "channels" axis (1). 1145 | optional int32 axis = 3 [default = 1]; 1146 | repeated uint32 slice_point = 2; 1147 | 1148 | // DEPRECATED: alias for "axis" -- does not support negative indexing. 1149 | optional uint32 slice_dim = 1 [default = 1]; 1150 | } 1151 | 1152 | // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer 1153 | message SoftmaxParameter { 1154 | enum Engine { 1155 | DEFAULT = 0; 1156 | CAFFE = 1; 1157 | CUDNN = 2; 1158 | } 1159 | optional Engine engine = 1 [default = DEFAULT]; 1160 | 1161 | // The axis along which to perform the softmax -- may be negative to index 1162 | // from the end (e.g., -1 for the last axis). 1163 | // Any other axes will be evaluated as independent softmaxes. 1164 | optional int32 axis = 2 [default = 1]; 1165 | } 1166 | 1167 | message TanHParameter { 1168 | enum Engine { 1169 | DEFAULT = 0; 1170 | CAFFE = 1; 1171 | CUDNN = 2; 1172 | } 1173 | optional Engine engine = 1 [default = DEFAULT]; 1174 | } 1175 | 1176 | // Message that stores parameters used by TileLayer 1177 | message TileParameter { 1178 | // The index of the axis to tile. 1179 | optional int32 axis = 1 [default = 1]; 1180 | 1181 | // The number of copies (tiles) of the blob to output. 1182 | optional int32 tiles = 2; 1183 | } 1184 | 1185 | // Message that stores parameters used by ThresholdLayer 1186 | message ThresholdParameter { 1187 | optional float threshold = 1 [default = 0]; // Strictly positive values 1188 | } 1189 | 1190 | message WindowDataParameter { 1191 | // Specify the data source. 1192 | optional string source = 1; 1193 | // For data pre-processing, we can do simple scaling and subtracting the 1194 | // data mean, if provided. Note that the mean subtraction is always carried 1195 | // out before scaling. 1196 | optional float scale = 2 [default = 1]; 1197 | optional string mean_file = 3; 1198 | // Specify the batch size. 1199 | optional uint32 batch_size = 4; 1200 | // Specify if we would like to randomly crop an image. 1201 | optional uint32 crop_size = 5 [default = 0]; 1202 | // Specify if we want to randomly mirror data. 1203 | optional bool mirror = 6 [default = false]; 1204 | // Foreground (object) overlap threshold 1205 | optional float fg_threshold = 7 [default = 0.5]; 1206 | // Background (non-object) overlap threshold 1207 | optional float bg_threshold = 8 [default = 0.5]; 1208 | // Fraction of batch that should be foreground objects 1209 | optional float fg_fraction = 9 [default = 0.25]; 1210 | // Amount of contextual padding to add around a window 1211 | // (used only by the window_data_layer) 1212 | optional uint32 context_pad = 10 [default = 0]; 1213 | // Mode for cropping out a detection window 1214 | // warp: cropped window is warped to a fixed size and aspect ratio 1215 | // square: the tightest square around the window is cropped 1216 | optional string crop_mode = 11 [default = "warp"]; 1217 | // cache_images: will load all images in memory for faster access 1218 | optional bool cache_images = 12 [default = false]; 1219 | // append root_folder to locate images 1220 | optional string root_folder = 13 [default = ""]; 1221 | } 1222 | 1223 | message SPPParameter { 1224 | enum PoolMethod { 1225 | MAX = 0; 1226 | AVE = 1; 1227 | STOCHASTIC = 2; 1228 | } 1229 | optional uint32 pyramid_height = 1; 1230 | optional PoolMethod pool = 2 [default = MAX]; // The pooling method 1231 | enum Engine { 1232 | DEFAULT = 0; 1233 | CAFFE = 1; 1234 | CUDNN = 2; 1235 | } 1236 | optional Engine engine = 6 [default = DEFAULT]; 1237 | } 1238 | 1239 | // DEPRECATED: use LayerParameter. 1240 | message V1LayerParameter { 1241 | repeated string bottom = 2; 1242 | repeated string top = 3; 1243 | optional string name = 4; 1244 | repeated NetStateRule include = 32; 1245 | repeated NetStateRule exclude = 33; 1246 | enum LayerType { 1247 | NONE = 0; 1248 | ABSVAL = 35; 1249 | ACCURACY = 1; 1250 | ARGMAX = 30; 1251 | BNLL = 2; 1252 | CONCAT = 3; 1253 | CONTRASTIVE_LOSS = 37; 1254 | CONVOLUTION = 4; 1255 | DATA = 5; 1256 | DECONVOLUTION = 39; 1257 | DROPOUT = 6; 1258 | DUMMY_DATA = 32; 1259 | EUCLIDEAN_LOSS = 7; 1260 | ELTWISE = 25; 1261 | EXP = 38; 1262 | FLATTEN = 8; 1263 | HDF5_DATA = 9; 1264 | HDF5_OUTPUT = 10; 1265 | HINGE_LOSS = 28; 1266 | IM2COL = 11; 1267 | IMAGE_DATA = 12; 1268 | INFOGAIN_LOSS = 13; 1269 | INNER_PRODUCT = 14; 1270 | LRN = 15; 1271 | MEMORY_DATA = 29; 1272 | MULTINOMIAL_LOGISTIC_LOSS = 16; 1273 | MVN = 34; 1274 | POOLING = 17; 1275 | POWER = 26; 1276 | RELU = 18; 1277 | SIGMOID = 19; 1278 | SIGMOID_CROSS_ENTROPY_LOSS = 27; 1279 | SILENCE = 36; 1280 | SOFTMAX = 20; 1281 | SOFTMAX_LOSS = 21; 1282 | SPLIT = 22; 1283 | SLICE = 33; 1284 | TANH = 23; 1285 | WINDOW_DATA = 24; 1286 | THRESHOLD = 31; 1287 | } 1288 | optional LayerType type = 5; 1289 | repeated BlobProto blobs = 6; 1290 | repeated string param = 1001; 1291 | repeated DimCheckMode blob_share_mode = 1002; 1292 | enum DimCheckMode { 1293 | STRICT = 0; 1294 | PERMISSIVE = 1; 1295 | } 1296 | repeated float blobs_lr = 7; 1297 | repeated float weight_decay = 8; 1298 | repeated float loss_weight = 35; 1299 | optional AccuracyParameter accuracy_param = 27; 1300 | optional ArgMaxParameter argmax_param = 23; 1301 | optional ConcatParameter concat_param = 9; 1302 | optional ContrastiveLossParameter contrastive_loss_param = 40; 1303 | optional ConvolutionParameter convolution_param = 10; 1304 | optional DataParameter data_param = 11; 1305 | optional DropoutParameter dropout_param = 12; 1306 | optional DummyDataParameter dummy_data_param = 26; 1307 | optional EltwiseParameter eltwise_param = 24; 1308 | optional ExpParameter exp_param = 41; 1309 | optional HDF5DataParameter hdf5_data_param = 13; 1310 | optional HDF5OutputParameter hdf5_output_param = 14; 1311 | optional HingeLossParameter hinge_loss_param = 29; 1312 | optional ImageDataParameter image_data_param = 15; 1313 | optional InfogainLossParameter infogain_loss_param = 16; 1314 | optional InnerProductParameter inner_product_param = 17; 1315 | optional LRNParameter lrn_param = 18; 1316 | optional MemoryDataParameter memory_data_param = 22; 1317 | optional MVNParameter mvn_param = 34; 1318 | optional PoolingParameter pooling_param = 19; 1319 | optional PowerParameter power_param = 21; 1320 | optional ReLUParameter relu_param = 30; 1321 | optional SigmoidParameter sigmoid_param = 38; 1322 | optional SoftmaxParameter softmax_param = 39; 1323 | optional SliceParameter slice_param = 31; 1324 | optional TanHParameter tanh_param = 37; 1325 | optional ThresholdParameter threshold_param = 25; 1326 | optional WindowDataParameter window_data_param = 20; 1327 | optional TransformationParameter transform_param = 36; 1328 | optional LossParameter loss_param = 42; 1329 | optional V0LayerParameter layer = 1; 1330 | } 1331 | 1332 | // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters 1333 | // in Caffe. We keep this message type around for legacy support. 1334 | message V0LayerParameter { 1335 | optional string name = 1; // the layer name 1336 | optional string type = 2; // the string to specify the layer type 1337 | 1338 | // Parameters to specify layers with inner products. 1339 | optional uint32 num_output = 3; // The number of outputs for the layer 1340 | optional bool biasterm = 4 [default = true]; // whether to have bias terms 1341 | optional FillerParameter weight_filler = 5; // The filler for the weight 1342 | optional FillerParameter bias_filler = 6; // The filler for the bias 1343 | 1344 | optional uint32 pad = 7 [default = 0]; // The padding size 1345 | optional uint32 kernelsize = 8; // The kernel size 1346 | optional uint32 group = 9 [default = 1]; // The group size for group conv 1347 | optional uint32 stride = 10 [default = 1]; // The stride 1348 | enum PoolMethod { 1349 | MAX = 0; 1350 | AVE = 1; 1351 | STOCHASTIC = 2; 1352 | } 1353 | optional PoolMethod pool = 11 [default = MAX]; // The pooling method 1354 | optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio 1355 | 1356 | optional uint32 local_size = 13 [default = 5]; // for local response norm 1357 | optional float alpha = 14 [default = 1.]; // for local response norm 1358 | optional float beta = 15 [default = 0.75]; // for local response norm 1359 | optional float k = 22 [default = 1.]; 1360 | 1361 | // For data layers, specify the data source 1362 | optional string source = 16; 1363 | // For data pre-processing, we can do simple scaling and subtracting the 1364 | // data mean, if provided. Note that the mean subtraction is always carried 1365 | // out before scaling. 1366 | optional float scale = 17 [default = 1]; 1367 | optional string meanfile = 18; 1368 | // For data layers, specify the batch size. 1369 | optional uint32 batchsize = 19; 1370 | // For data layers, specify if we would like to randomly crop an image. 1371 | optional uint32 cropsize = 20 [default = 0]; 1372 | // For data layers, specify if we want to randomly mirror data. 1373 | optional bool mirror = 21 [default = false]; 1374 | 1375 | // The blobs containing the numeric parameters of the layer 1376 | repeated BlobProto blobs = 50; 1377 | // The ratio that is multiplied on the global learning rate. If you want to 1378 | // set the learning ratio for one blob, you need to set it for all blobs. 1379 | repeated float blobs_lr = 51; 1380 | // The weight decay that is multiplied on the global weight decay. 1381 | repeated float weight_decay = 52; 1382 | 1383 | // The rand_skip variable is for the data layer to skip a few data points 1384 | // to avoid all asynchronous sgd clients to start at the same point. The skip 1385 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 1386 | // be larger than the number of keys in the database. 1387 | optional uint32 rand_skip = 53 [default = 0]; 1388 | 1389 | // Fields related to detection (det_*) 1390 | // foreground (object) overlap threshold 1391 | optional float det_fg_threshold = 54 [default = 0.5]; 1392 | // background (non-object) overlap threshold 1393 | optional float det_bg_threshold = 55 [default = 0.5]; 1394 | // Fraction of batch that should be foreground objects 1395 | optional float det_fg_fraction = 56 [default = 0.25]; 1396 | 1397 | // optional bool OBSOLETE_can_clobber = 57 [default = true]; 1398 | 1399 | // Amount of contextual padding to add around a window 1400 | // (used only by the window_data_layer) 1401 | optional uint32 det_context_pad = 58 [default = 0]; 1402 | 1403 | // Mode for cropping out a detection window 1404 | // warp: cropped window is warped to a fixed size and aspect ratio 1405 | // square: the tightest square around the window is cropped 1406 | optional string det_crop_mode = 59 [default = "warp"]; 1407 | 1408 | // For ReshapeLayer, one needs to specify the new dimensions. 1409 | optional int32 new_num = 60 [default = 0]; 1410 | optional int32 new_channels = 61 [default = 0]; 1411 | optional int32 new_height = 62 [default = 0]; 1412 | optional int32 new_width = 63 [default = 0]; 1413 | 1414 | // Whether or not ImageLayer should shuffle the list of files at every epoch. 1415 | // It will also resize images if new_height or new_width are not zero. 1416 | optional bool shuffle_images = 64 [default = false]; 1417 | 1418 | // For ConcatLayer, one needs to specify the dimension for concatenation, and 1419 | // the other dimensions must be the same for all the bottom blobs. 1420 | // By default it will concatenate blobs along the channels dimension. 1421 | optional uint32 concat_dim = 65 [default = 1]; 1422 | 1423 | optional HDF5OutputParameter hdf5_output_param = 1001; 1424 | } 1425 | 1426 | message PReLUParameter { 1427 | // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: 1428 | // Surpassing Human-Level Performance on ImageNet Classification, 2015. 1429 | 1430 | // Initial value of a_i. Default is a_i=0.25 for all i. 1431 | optional FillerParameter filler = 1; 1432 | // Whether or not slope parameters are shared across channels. 1433 | optional bool channel_shared = 2 [default = false]; 1434 | } 1435 | --------------------------------------------------------------------------------