├── README.md ├── caffe.proto ├── include ├── conv_dw_layer.hpp ├── coord2heatmap_layer.hpp ├── heatmap2coord.hpp ├── heatmap_loss_layer.hpp └── wing_loss_layer.hpp └── src ├── conv_dw_layer.cpp ├── conv_dw_layer.cu ├── coord2heatmap_layer.cpp ├── coord2heatmap_layer.cu ├── heatmap2coord.cpp ├── heatmap2coord.cu ├── heatmap_loss_layer.cpp ├── heatmap_loss_layer.cu ├── wing_loss_layer.cpp └── wing_loss_layer.cu /README.md: -------------------------------------------------------------------------------- 1 | # caffe-layer 2 | Implementation of some layers in caffe. 3 | File `caffe.proto` are parameters defined for these layers, which also need to be added into `LayerParameter`. 4 | 5 | # wing loss layer 6 | caffe implementation for **Wing Loss for Robust Facial Landmark Localisation with Convolutoinal Neural Networks** CVPR 2018 7 | 8 | # conv dw layer 9 | Clone from https://github.com/suzhenghang/MobileNetv2 10 | 11 | # coord2heatmap layer 12 | Turn coordinates into heatmaps. 13 | 14 | # heatmap loss layer 15 | Define loss for heatmap regression. -------------------------------------------------------------------------------- /caffe.proto: -------------------------------------------------------------------------------- 1 | 2 | message WingLossParameter { 3 | optional float omega = 1 [default = 10]; 4 | optional float epsilon = 2 [default = 2]; 5 | } 6 | 7 | message Coord2heatmapParameter { 8 | optional uint32 height = 1; 9 | optional uint32 width = 2; 10 | optional uint32 num_points = 3; 11 | optional uint32 max_value = 4 [default = 1]; 12 | optional uint32 radius = 5 [default = 5]; 13 | } 14 | 15 | message HeatmapLossParameter { 16 | enum LossType { 17 | CE = 0; 18 | SE = 1; // Square error, actually MSE 19 | } 20 | optional LossType loss_type = 1 [default = CE]; 21 | optional float negative_ratio = 2 [default = 0.001]; 22 | optional float eps = 3 [default = 1e-8]; 23 | optional float grad_clip = 4 [default = 10.0]; 24 | optional float negative_sample_prob = 5 [default = 1.0]; 25 | } -------------------------------------------------------------------------------- /include/conv_dw_layer.hpp: -------------------------------------------------------------------------------- 1 | // Clone from https://github.com/suzhenghang/MobileNetv2 2 | 3 | #ifndef CAFFE_CONV_DW_LAYER_HPP_ 4 | #define CAFFE_CONV_DW_LAYER_HPP_ 5 | 6 | #include 7 | #include "caffe/blob.hpp" 8 | #include "caffe/layer.hpp" 9 | #include "caffe/proto/caffe.pb.h" 10 | 11 | namespace caffe { 12 | 13 | template 14 | class ConvolutionDepthwiseLayer : public Layer { 15 | public: 16 | explicit ConvolutionDepthwiseLayer(const LayerParameter& param) 17 | : Layer(param) {} 18 | virtual void LayerSetUp(const vector*>& bottom, 19 | const vector*>& top); 20 | virtual void Reshape(const vector*>& bottom, 21 | const vector*>& top); 22 | virtual inline int ExactNumBottomBlobs() const { return 1; } 23 | virtual inline int ExactNumTopBlobs() const { return 1; } 24 | virtual inline const char* type() const { return "ConvolutionDepthwise"; } 25 | protected: 26 | virtual void Forward_cpu(const vector*>& bottom, 27 | const vector*>& top); 28 | virtual void Forward_gpu(const vector*>& bottom, 29 | const vector*>& top); 30 | virtual void Backward_cpu(const vector*>& top, 31 | const vector& propagate_down, const vector*>& bottom); 32 | virtual void Backward_gpu(const vector*>& top, 33 | const vector& propagate_down, const vector*>& bottom); 34 | unsigned int kernel_h_; 35 | unsigned int kernel_w_; 36 | unsigned int stride_h_; 37 | unsigned int stride_w_; 38 | unsigned int pad_h_; 39 | unsigned int pad_w_; 40 | unsigned int dilation_h_; 41 | unsigned int dilation_w_; 42 | Blob weight_buffer_; 43 | Blob weight_multiplier_; 44 | Blob bias_buffer_; 45 | Blob bias_multiplier_; 46 | }; 47 | 48 | } // namespace caffe 49 | 50 | #endif // CAFFE_CONV_DW_LAYER_HPP_ 51 | -------------------------------------------------------------------------------- /include/coord2heatmap_layer.hpp: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Convert coordinates into multiple heatmaps. 3 | // Written by ZhouJunr 4 | // ------------------------------------------------------------------ 5 | 6 | #ifndef CAFFE_COORD2HEATMAP_LAYER_HPP_ 7 | #define CAFFE_COORD2HEATMAP_LAYER_HPP_ 8 | 9 | #include 10 | 11 | #include "caffe/layer.hpp" 12 | #include "caffe/blob.hpp" 13 | #include "caffe/common.hpp" 14 | #include "caffe/proto/caffe.pb.h" 15 | 16 | namespace caffe { 17 | 18 | /** 19 | * @brief A layer turn coordinates to a heatmap. 20 | * coordinates: x_0, y_0, x_1, y_1, ... 21 | * heatmaps: (batch_size, num_points, h, w), all pixels are 0. except 22 | * keypoints, which are 1. 23 | */ 24 | template 25 | class Coord2heatmapLayer : public Layer { 26 | public: 27 | explicit Coord2heatmapLayer(const LayerParameter& param) 28 | : Layer(param) {} 29 | virtual void LayerSetUp(const vector*>& bottom, 30 | const vector*>& top); 31 | virtual void Reshape(const vector*>& bottom, 32 | const vector*>& top); 33 | 34 | virtual inline const char* type() const { return "Coord2heatmap"; } 35 | virtual inline int ExactNumBottomBlobs() const { return 1; } 36 | virtual inline int ExactNumTopBlobs() const { return 1; } 37 | 38 | protected: 39 | virtual void Forward_cpu(const vector*>& bottom, 40 | const vector*>& top); 41 | virtual void Forward_gpu(const vector*>& bottom, 42 | const vector*>& top); 43 | virtual void Backward_cpu(const vector*>& top, 44 | const vector& propagate_down, const vector*>& bottom) { 45 | NOT_IMPLEMENTED; 46 | } 47 | 48 | int output_height_; 49 | int output_width_; 50 | int num_points_; 51 | int max_value_; 52 | int radius_; 53 | }; 54 | 55 | } // namespace caffe 56 | 57 | #endif // CAFFE_COORD2HEATMAP_LAYER_HPP_ -------------------------------------------------------------------------------- /include/heatmap2coord.hpp: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Convert heatmap into coordinates(x, y) anc concat along input 3 | // channels. 4 | // Written by ZhouJunr 5 | // ------------------------------------------------------------------ 6 | 7 | #ifndef CAFFE_HEATMAP2COORD_LAYER_HPP_ 8 | #define CAFFE_HEATMAP2COORD_LAYER_HPP_ 9 | 10 | #include 11 | 12 | #include "caffe/layer.hpp" 13 | #include "caffe/blob.hpp" 14 | #include "caffe/common.hpp" 15 | #include "caffe/proto/caffe.pb.h" 16 | 17 | namespace caffe { 18 | 19 | /** 20 | * @brief Turn heatmap into coordinates(x, y). Assume input shape 21 | * [n, c, h, w], output shape will be [n, 2*c] 22 | */ 23 | template 24 | class Heatmap2CoordLayer : public Layer { 25 | public: 26 | explicit Heatmap2CoordLayer(const LayerParameter& param) 27 | : Layer(param) {} 28 | virtual void LayerSetUp(const vector*>& bottom, 29 | const vector*>& top); 30 | virtual void Reshape(const vector*>& bottom, 31 | const vector*>& top); 32 | 33 | virtual inline const char* type() const { return "Heatmap2coord"; } 34 | virtual inline int ExactNumBottomBlobs() const { return 1; } 35 | virtual inline int ExactNumTopBlobs() const { return 1; } 36 | 37 | protected: 38 | virtual void Forward_cpu(const vector*>& bottom, 39 | const vector*>& top); 40 | 41 | int num_points_; 42 | }; 43 | 44 | } // namespace caffe 45 | 46 | #endif -------------------------------------------------------------------------------- /include/heatmap_loss_layer.hpp: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Heatmap Loss layer. 3 | // Written by ZhouJunr 4 | // ------------------------------------------------------------------ 5 | 6 | #ifndef CAFFE_HEATMAP_LOSS_LAYER_HPP_ 7 | #define CAFFE_HEATMAP_LOSS_LAYER_HPP_ 8 | 9 | #include 10 | 11 | #include "caffe/blob.hpp" 12 | #include "caffe/common.hpp" 13 | #include "caffe/layer.hpp" 14 | #include "caffe/layers/loss_layer.hpp" 15 | #include "caffe/proto/caffe.pb.h" 16 | 17 | namespace caffe { 18 | 19 | /** 20 | * @brief Heatmap loss layer. 21 | * Loss layer for heatmap input. 22 | */ 23 | 24 | template 25 | class HeatmapLossLayer : public LossLayer { 26 | public: 27 | explicit HeatmapLossLayer(const LayerParameter& param) 28 | : LossLayer(param), diff_() {} 29 | virtual void LayerSetUp(const vector*>& bottom, 30 | const vector*>& top); 31 | virtual void Reshape(const vector*>& bottom, 32 | const vector*>& top); 33 | 34 | virtual inline const char* type() const { return "HeatmapLoss"; } 35 | 36 | virtual inline int ExactNumBottomBlobs() const { return -1; } 37 | virtual inline int MinBottomBlobs() const { return 2; } 38 | virtual inline int MaxBottomBlobs() const { return 3; } 39 | 40 | protected: 41 | virtual void Forward_cpu(const vector*>& bottom, 42 | const vector*>& top); 43 | virtual void Forward_gpu(const vector*>& bottom, 44 | const vector*>& top); 45 | 46 | virtual void Backward_cpu(const vector*>& top, 47 | const vector& propagate_down, const vector*>& bottom); 48 | virtual void Backward_gpu(const vector*>& top, 49 | const vector& propagate_down, const vector*>& bottom); 50 | 51 | Blob diff_; 52 | bool has_weights_; 53 | Dtype negative_ratio_; 54 | Dtype eps_; 55 | Dtype grad_clip_; 56 | float negative_sample_prob_; 57 | Blob rand_mask_; 58 | }; 59 | 60 | 61 | } // namespace caffe 62 | 63 | #endif // CAFFE_HEATMAP_LOSS_LAYER_HPP_ -------------------------------------------------------------------------------- /include/wing_loss_layer.hpp: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // CVPR 2018 4 | // Written by ZhouJunr 5 | // ------------------------------------------------------------------ 6 | 7 | #ifndef CAFFE_WING_LOSS_LAYERS_HPP_ 8 | #define CAFFE_WING_LOSS_LAYERS_HPP_ 9 | 10 | #include 11 | 12 | #include "caffe/blob.hpp" 13 | #include "caffe/common.hpp" 14 | #include "caffe/layer.hpp" 15 | #include "caffe/layers/loss_layer.hpp" 16 | #include "caffe/proto/caffe.pb.h" 17 | 18 | namespace caffe { 19 | 20 | template 21 | class WingLossLayer : public LossLayer { 22 | public: 23 | explicit WingLossLayer(const LayerParameter& param) 24 | : LossLayer(param), diff_() {} 25 | virtual void LayerSetUp(const vector*>& bottom, 26 | const vector*>& top); 27 | virtual void Reshape(const vector*>& bottom, 28 | const vector*>& top); 29 | 30 | virtual inline const char* type() const { return "WingLoss"; } 31 | 32 | virtual inline int ExactNumBottomBlobs() const { return -1; } 33 | virtual inline int MinBottomBlobs() const { return 2; } 34 | virtual inline int MaxBottomBlobs() const { return 3; } 35 | 36 | protected: 37 | virtual void Forward_cpu(const vector*>& bottom, 38 | const vector*>& top); 39 | virtual void Forward_gpu(const vector*>& bottom, 40 | const vector*>& top); 41 | 42 | virtual void Backward_cpu(const vector*>& top, 43 | const vector& propagate_down, const vector*>& bottom); 44 | virtual void Backward_gpu(const vector*>& top, 45 | const vector& propagate_down, const vector*>& bottom); 46 | 47 | Blob diff_; 48 | Blob errors_; 49 | float omega_; 50 | float epsilon_; 51 | float C_; 52 | bool has_weights_; 53 | Dtype norm_value_; 54 | }; 55 | 56 | } // namespace caffe 57 | 58 | #endif // CAFFE_WING_LOSS_LAYERS_HPP_ 59 | -------------------------------------------------------------------------------- /src/conv_dw_layer.cpp: -------------------------------------------------------------------------------- 1 | // Clone from https://github.com/suzhenghang/MobileNetv2 2 | 3 | #include 4 | #include 5 | #include "caffe/filler.hpp" 6 | #include "caffe/layers/conv_dw_layer.hpp" 7 | 8 | namespace caffe { 9 | 10 | template 11 | void ConvolutionDepthwiseLayer::LayerSetUp(const vector*>& bottom, 12 | const vector*>& top) { 13 | ConvolutionParameter conv_param = this->layer_param_.convolution_param(); 14 | if (conv_param.has_kernel_h() && conv_param.has_kernel_w()) { 15 | kernel_h_ = conv_param.kernel_h(); 16 | kernel_w_ = conv_param.kernel_w(); 17 | } else { 18 | if (conv_param.kernel_size_size() == 1) 19 | { 20 | kernel_h_ = conv_param.kernel_size(0); 21 | kernel_w_ = conv_param.kernel_size(0); 22 | } 23 | else 24 | { 25 | kernel_h_ = conv_param.kernel_size(0); 26 | kernel_w_ = conv_param.kernel_size(1); 27 | } 28 | } 29 | if (conv_param.has_stride_h() && conv_param.has_stride_w()) { 30 | stride_h_ = conv_param.stride_h(); 31 | stride_w_ = conv_param.stride_w(); 32 | } else { 33 | if (conv_param.stride_size() == 1) 34 | { 35 | stride_h_ = conv_param.stride(0); 36 | stride_w_ = conv_param.stride(0); 37 | } 38 | else 39 | { 40 | stride_h_ = conv_param.stride(0); 41 | stride_w_ = conv_param.stride(1); 42 | } 43 | } 44 | if (conv_param.has_pad_h() && conv_param.has_pad_w()) { 45 | pad_h_ = conv_param.pad_h(); 46 | pad_w_ = conv_param.pad_w(); 47 | } else { 48 | if (conv_param.pad_size() == 1) 49 | { 50 | pad_h_ = conv_param.pad(0); 51 | pad_w_ = conv_param.pad(0); 52 | } 53 | else 54 | { 55 | pad_h_ = conv_param.pad(0); 56 | pad_w_ = conv_param.pad(1); 57 | } 58 | } 59 | if (conv_param.dilation_size() > 0) 60 | { 61 | if (conv_param.dilation_size() == 1) 62 | { 63 | dilation_h_ = conv_param.dilation(0); 64 | dilation_w_ = conv_param.dilation(0); 65 | } 66 | else 67 | { 68 | dilation_h_ = conv_param.dilation(0); 69 | dilation_w_ = conv_param.dilation(1); 70 | } 71 | } 72 | else 73 | { 74 | dilation_h_ = 1; 75 | dilation_w_ = 1; 76 | } 77 | vector weight_shape(4); 78 | weight_shape[0] = bottom[0]->channels(); 79 | weight_shape[1] = 1; 80 | weight_shape[2] = kernel_h_; 81 | weight_shape[3] = kernel_w_; 82 | vector bias_shape; 83 | if (conv_param.bias_term()) 84 | { 85 | bias_shape.push_back(bottom[0]->channels()); 86 | } 87 | if (this->blobs_.size() == 0) { 88 | if (conv_param.bias_term()) { 89 | this->blobs_.resize(2); 90 | } else { 91 | this->blobs_.resize(1); 92 | } 93 | this->blobs_[0].reset(new Blob(weight_shape)); 94 | shared_ptr > weight_filler(GetFiller(conv_param.weight_filler())); 95 | weight_filler->Fill(this->blobs_[0].get()); 96 | if (conv_param.bias_term()) { 97 | this->blobs_[1].reset(new Blob(bias_shape)); 98 | shared_ptr > bias_filler(GetFiller(conv_param.bias_filler())); 99 | bias_filler->Fill(this->blobs_[1].get()); 100 | } 101 | } 102 | this->param_propagate_down_.resize(this->blobs_.size(), true); 103 | } 104 | 105 | template 106 | void ConvolutionDepthwiseLayer::Reshape(const vector*>& bottom, 107 | const vector*>& top) { 108 | vector top_shape; 109 | top_shape.push_back(bottom[0]->num()); 110 | top_shape.push_back(bottom[0]->channels()); 111 | top_shape.push_back((bottom[0]->height() + 2 * pad_h_ - (dilation_h_ * (kernel_h_ - 1) + 1)) / stride_h_ + 1); 112 | top_shape.push_back((bottom[0]->width() + 2 * pad_w_ - (dilation_w_ * (kernel_w_ - 1) + 1)) / stride_w_ + 1); 113 | top[0]->Reshape(top_shape); 114 | vector weight_buffer_shape; 115 | weight_buffer_shape.push_back(bottom[0]->channels()); 116 | weight_buffer_shape.push_back(kernel_h_); 117 | weight_buffer_shape.push_back(kernel_w_); 118 | weight_buffer_shape.push_back(bottom[0]->num()); 119 | weight_buffer_shape.push_back(top[0]->height()); 120 | weight_buffer_shape.push_back(top[0]->width()); 121 | weight_buffer_.Reshape(weight_buffer_shape); 122 | vector weight_multiplier_shape; 123 | weight_multiplier_shape.push_back(bottom[0]->num()); 124 | weight_multiplier_shape.push_back(top[0]->height()); 125 | weight_multiplier_shape.push_back(top[0]->width()); 126 | weight_multiplier_.Reshape(weight_multiplier_shape); 127 | caffe_gpu_set(weight_multiplier_.count(), Dtype(1), weight_multiplier_.mutable_gpu_data()); 128 | if (this->layer_param_.convolution_param().bias_term()) 129 | { 130 | vector bias_buffer_shape; 131 | bias_buffer_shape.push_back(bottom[0]->channels()); 132 | bias_buffer_shape.push_back(bottom[0]->num()); 133 | bias_buffer_shape.push_back(top[0]->height()); 134 | bias_buffer_shape.push_back(top[0]->width()); 135 | bias_buffer_.Reshape(bias_buffer_shape); 136 | vector bias_multiplier_shape; 137 | bias_multiplier_shape.push_back(bottom[0]->num()); 138 | bias_multiplier_shape.push_back(top[0]->height()); 139 | bias_multiplier_shape.push_back(top[0]->width()); 140 | bias_multiplier_.Reshape(bias_multiplier_shape); 141 | caffe_gpu_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_gpu_data()); 142 | } 143 | } 144 | 145 | template 146 | void ConvolutionDepthwiseLayer::Forward_cpu(const vector*>& bottom, 147 | const vector*>& top) 148 | { 149 | const int num = top[0]->num(); 150 | const int channels = top[0]->channels(); 151 | const int top_height = top[0]->height(); 152 | const int top_width = top[0]->width(); 153 | const int bottom_height = bottom[0]->height(); 154 | const int bottom_width = bottom[0]->width(); 155 | const Dtype* bottom_data = bottom[0]->cpu_data(); 156 | const Dtype* weight_data_base = this->blobs_[0]->cpu_data(); 157 | Dtype* top_data = top[0]->mutable_cpu_data(); 158 | for (int n = 0; n < num; ++n) 159 | { 160 | for (int c = 0; c < channels; ++c) 161 | { 162 | for (int h = 0; h < top_height; ++h) 163 | { 164 | for (int w = 0; w < top_width; ++w) 165 | { 166 | const Dtype* weight_data = weight_data_base + c * kernel_h_ * kernel_w_; 167 | Dtype value = 0; 168 | for (int kh = 0; kh < kernel_h_; ++kh) 169 | { 170 | for (int kw = 0; kw < kernel_w_; ++kw) 171 | { 172 | int h_in = -pad_h_ + h * stride_h_ + kh * dilation_h_; 173 | int w_in = -pad_w_ + w * stride_w_ + kw * dilation_w_; 174 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width)) 175 | { 176 | int offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in; 177 | value += (*weight_data) * bottom_data[offset]; 178 | } 179 | ++weight_data; 180 | } 181 | } 182 | *top_data++ = value; 183 | } 184 | } 185 | } 186 | } 187 | if (this->layer_param_.convolution_param().bias_term()) 188 | { 189 | top_data = top[0]->mutable_cpu_data(); 190 | for (int n = 0; n < num; ++n) 191 | { 192 | const Dtype* bias_data = this->blobs_[1]->cpu_data(); 193 | for (int c = 0; c < channels; ++c) 194 | { 195 | for (int h = 0; h < top_height; ++h) 196 | { 197 | for (int w = 0; w < top_width; ++w) 198 | { 199 | *top_data += *bias_data; 200 | ++top_data; 201 | } 202 | } 203 | ++bias_data; 204 | } 205 | } 206 | } 207 | } 208 | 209 | template 210 | void ConvolutionDepthwiseLayer::Backward_cpu(const vector*>& top, 211 | const vector& propagate_down, const vector*>& bottom) 212 | { 213 | const int num = top[0]->num(); 214 | const int channels = top[0]->channels(); 215 | const int top_height = top[0]->height(); 216 | const int top_width = top[0]->width(); 217 | const int bottom_height = bottom[0]->height(); 218 | const int bottom_width = bottom[0]->width(); 219 | caffe_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_cpu_diff()); 220 | if (this->layer_param_.convolution_param().bias_term() && this->param_propagate_down_[1]) 221 | { 222 | const Dtype* top_diff = top[0]->cpu_diff(); 223 | for (int n = 0; n < num; ++n) 224 | { 225 | Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); 226 | for (int c = 0; c < channels; ++c) 227 | { 228 | for (int h = 0; h < top_height; ++h) 229 | { 230 | for (int w = 0; w < top_width; ++w) 231 | { 232 | *bias_diff += *top_diff; 233 | ++top_diff; 234 | } 235 | } 236 | ++bias_diff; 237 | } 238 | } 239 | } 240 | if (this->param_propagate_down_[0]) 241 | { 242 | const Dtype* top_diff = top[0]->cpu_diff(); 243 | const Dtype* bottom_data = bottom[0]->cpu_data(); 244 | Dtype* weight_diff_base = this->blobs_[0]->mutable_cpu_diff(); 245 | for (int n = 0; n < num; ++n) 246 | { 247 | for (int c = 0; c < channels; ++c) 248 | { 249 | for (int h = 0; h < top_height; ++h) 250 | { 251 | for (int w = 0; w < top_width; ++w) 252 | { 253 | Dtype* weight_diff = weight_diff_base + c * kernel_h_ * kernel_w_; 254 | for (int kh = 0; kh < kernel_h_; ++kh) 255 | { 256 | for (int kw = 0; kw < kernel_w_; ++kw) 257 | { 258 | int h_in = -pad_h_ + h * stride_h_ + kh * dilation_h_; 259 | int w_in = -pad_w_ + w * stride_w_ + kw * dilation_w_; 260 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width)) 261 | { 262 | int offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in; 263 | *weight_diff += bottom_data[offset] * (*top_diff); 264 | } 265 | ++weight_diff; 266 | } 267 | } 268 | ++top_diff; 269 | } 270 | } 271 | } 272 | } 273 | } 274 | if (propagate_down[0]) 275 | { 276 | const Dtype* top_diff = top[0]->cpu_diff(); 277 | const Dtype* weight_data_base = this->blobs_[0]->cpu_data(); 278 | Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); 279 | for (int n = 0; n < num; ++n) 280 | { 281 | for (int c = 0; c < channels; ++c) 282 | { 283 | for (int h = 0; h < top_height; ++h) 284 | { 285 | for (int w = 0; w < top_width; ++w) 286 | { 287 | const Dtype* weight_data = weight_data_base + c * kernel_h_ * kernel_w_; 288 | for (int kh = 0; kh < kernel_h_; ++kh) 289 | { 290 | for (int kw = 0; kw < kernel_w_; ++kw) 291 | { 292 | int h_in = -pad_h_ + h * stride_h_ + kh * dilation_h_; 293 | int w_in = -pad_w_ + w * stride_w_ + kw * dilation_w_; 294 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width)) 295 | { 296 | int offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in; 297 | bottom_diff[offset] += (*weight_data) * (*top_diff); 298 | } 299 | ++weight_data; 300 | } 301 | } 302 | ++top_diff; 303 | } 304 | } 305 | } 306 | } 307 | } 308 | } 309 | 310 | #ifdef CPU_ONLY 311 | STUB_GPU(ConvolutionDepthwiseLayer); 312 | #endif 313 | 314 | INSTANTIATE_CLASS(ConvolutionDepthwiseLayer); 315 | REGISTER_LAYER_CLASS(ConvolutionDepthwise); 316 | 317 | } // namespace caffe 318 | -------------------------------------------------------------------------------- /src/conv_dw_layer.cu: -------------------------------------------------------------------------------- 1 | // Clone from https://github.com/suzhenghang/MobileNetv2 2 | 3 | #include 4 | #include "caffe/layers/conv_dw_layer.hpp" 5 | #include "caffe/util/gpu_util.cuh" 6 | 7 | namespace caffe { 8 | 9 | template 10 | __global__ void ConvolutionDepthwiseWeightForward(const int nthreads, 11 | const Dtype* const bottom_data, const Dtype* const weight_data, const int num, const int channels, 12 | const int top_height, const int top_width, const int bottom_height, const int bottom_width, 13 | const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, 14 | const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, 15 | Dtype* const top_data) { 16 | CUDA_KERNEL_LOOP(index, nthreads) { 17 | const int n = index / channels / top_height / top_width; 18 | const int c = (index / top_height / top_width) % channels; 19 | const int h = (index / top_width) % top_height; 20 | const int w = index % top_width; 21 | const Dtype* weight = weight_data + c * kernel_h * kernel_w; 22 | Dtype value = 0; 23 | for (int kh = 0; kh < kernel_h; ++kh) 24 | { 25 | for (int kw = 0; kw < kernel_w; ++kw) 26 | { 27 | const int h_in = -pad_h + h * stride_h + kh * dilation_h; 28 | const int w_in = -pad_w + w * stride_w + kw * dilation_w; 29 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width)) 30 | { 31 | const int offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in; 32 | value += (*weight) * bottom_data[offset]; 33 | } 34 | ++weight; 35 | } 36 | } 37 | top_data[index] = value; 38 | } 39 | } 40 | 41 | template 42 | __global__ void ConvolutionDepthwiseBiasForward(const int nthreads, 43 | const Dtype* const bias_data, const int num, const int channels, 44 | const int top_height, const int top_width, Dtype* const top_data) { 45 | CUDA_KERNEL_LOOP(index, nthreads) { 46 | const int c = (index / top_height / top_width) % channels; 47 | top_data[index] += bias_data[c]; 48 | } 49 | } 50 | 51 | template 52 | void ConvolutionDepthwiseLayer::Forward_gpu(const vector*>& bottom, 53 | const vector*>& top) { 54 | const Dtype* bottom_data = bottom[0]->gpu_data(); 55 | Dtype* top_data = top[0]->mutable_gpu_data(); 56 | const Dtype* weight_data = this->blobs_[0]->gpu_data(); 57 | const int count = top[0]->count(); 58 | const int num = top[0]->num(); 59 | const int channels = top[0]->channels(); 60 | const int top_height = top[0]->height(); 61 | const int top_width = top[0]->width(); 62 | const int bottom_height = bottom[0]->height(); 63 | const int bottom_width = bottom[0]->width(); 64 | ConvolutionDepthwiseWeightForward<<>>( 65 | count, bottom_data, weight_data, num, channels, 66 | top_height, top_width, bottom_height, bottom_width, 67 | kernel_h_, kernel_w_, stride_h_, stride_w_, 68 | pad_h_, pad_w_, dilation_h_, dilation_w_, top_data); 69 | if (this->layer_param_.convolution_param().bias_term()) 70 | { 71 | const Dtype* bias_data = this->blobs_[1]->gpu_data(); 72 | ConvolutionDepthwiseBiasForward<<>>( 73 | count, bias_data, num, channels, 74 | top_height, top_width, top_data); 75 | } 76 | } 77 | 78 | template 79 | __global__ void ConvolutionDepthwiseWeightBackward(const int nthreads, 80 | const Dtype* const top_diff, const Dtype* const bottom_data, const int num, const int channels, 81 | const int top_height, const int top_width, const int bottom_height, const int bottom_width, 82 | const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, 83 | const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, 84 | Dtype* const buffer_data) { 85 | CUDA_KERNEL_LOOP(index, nthreads) { 86 | const int h = (index / top_width) % top_height; 87 | const int w = index % top_width; 88 | const int kh = (index / kernel_w / num / top_height / top_width) % kernel_h; 89 | const int kw = (index / num / top_height / top_width) % kernel_w; 90 | const int h_in = -pad_h + h * stride_h + kh * dilation_h; 91 | const int w_in = -pad_w + w * stride_w + kw * dilation_w; 92 | if ((h_in >= 0) && (h_in < bottom_height) && (w_in >= 0) && (w_in < bottom_width)) 93 | { 94 | const int c = index / kernel_h / kernel_w / num / top_height / top_width; 95 | const int n = (index / top_height / top_width) % num; 96 | const int top_offset = ((n * channels + c) * top_height + h) * top_width + w; 97 | const int bottom_offset = ((n * channels + c) * bottom_height + h_in) * bottom_width + w_in; 98 | buffer_data[index] = top_diff[top_offset] * bottom_data[bottom_offset]; 99 | } 100 | else 101 | { 102 | buffer_data[index] = 0; 103 | } 104 | } 105 | } 106 | 107 | template 108 | __global__ void ConvolutionDepthwiseBottomBackward(const int nthreads, 109 | const Dtype* const top_diff, const Dtype* const weight_data, const int num, const int channels, 110 | const int top_height, const int top_width, const int bottom_height, const int bottom_width, 111 | const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, 112 | const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, 113 | Dtype* const bottom_diff) { 114 | CUDA_KERNEL_LOOP(index, nthreads) { 115 | const int n = index / channels / bottom_height / bottom_width; 116 | const int c = (index / bottom_height / bottom_width) % channels; 117 | const int h = (index / bottom_width) % bottom_height; 118 | const int w = index % bottom_width; 119 | const Dtype* weight = weight_data + c * kernel_h * kernel_w; 120 | Dtype value = 0; 121 | for (int kh = 0; kh < kernel_h; ++kh) 122 | { 123 | for (int kw = 0; kw < kernel_w; ++kw) 124 | { 125 | const int h_out_s = h + pad_h - kh * dilation_h; 126 | const int w_out_s = w + pad_w - kw * dilation_w; 127 | if (((h_out_s % stride_h) == 0) && ((w_out_s % stride_w) == 0)) 128 | { 129 | const int h_out = h_out_s / stride_h; 130 | const int w_out = w_out_s / stride_w; 131 | if ((h_out >= 0) && (h_out < top_height) && (w_out >= 0) && (w_out < top_width)) 132 | { 133 | const int offset = ((n * channels + c) * top_height + h_out) * top_width + w_out; 134 | value += (*weight) * top_diff[offset]; 135 | } 136 | } 137 | ++weight; 138 | } 139 | } 140 | bottom_diff[index] += value; 141 | } 142 | } 143 | 144 | template 145 | __global__ void ConvolutionDepthwiseBiasBackward(const int nthreads, 146 | const Dtype* const top_diff, const int num, const int channels, 147 | const int top_height, const int top_width, Dtype* const buffer_data) { 148 | CUDA_KERNEL_LOOP(index, nthreads) { 149 | const int c = index / num / top_height / top_width; 150 | const int n = (index / top_height / top_width) % num; 151 | const int h = (index / top_width) % top_height; 152 | const int w = index % top_width; 153 | const int offset = ((n * channels + c) * top_height + h) * top_width + w; 154 | buffer_data[index] = top_diff[offset]; 155 | } 156 | } 157 | 158 | template 159 | void ConvolutionDepthwiseLayer::Backward_gpu(const vector*>& top, 160 | const vector& propagate_down, const vector*>& bottom) { 161 | const Dtype* top_diff = top[0]->gpu_diff(); 162 | const int bottom_count = bottom[0]->count(); 163 | const int num = top[0]->num(); 164 | const int channels = top[0]->channels(); 165 | const int top_height = top[0]->height(); 166 | const int top_width = top[0]->width(); 167 | const int bottom_height = bottom[0]->height(); 168 | const int bottom_width = bottom[0]->width(); 169 | const int length = num * top_height * top_width; 170 | caffe_gpu_set(bottom_count, Dtype(0), bottom[0]->mutable_gpu_diff()); 171 | if (this->layer_param_.convolution_param().bias_term() && this->param_propagate_down_[1]) 172 | { 173 | const int bias_buffer_count = bias_buffer_.count(); 174 | Dtype* bias_buffer_mutable_data = bias_buffer_.mutable_gpu_data(); 175 | ConvolutionDepthwiseBiasBackward<<>>( 176 | bias_buffer_count, top_diff, num, channels, 177 | top_height, top_width, bias_buffer_mutable_data); 178 | const int bias_count = this->blobs_[1]->count(); 179 | const Dtype* bias_buffer_data = bias_buffer_.gpu_data(); 180 | Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); 181 | const Dtype* bias_multiplier_data = bias_multiplier_.gpu_data(); 182 | caffe_gpu_gemv(CblasNoTrans, bias_count, length, Dtype(1), bias_buffer_data, bias_multiplier_data, Dtype(1), bias_diff); 183 | } 184 | if (this->param_propagate_down_[0]) 185 | { 186 | const int weight_buffer_count = weight_buffer_.count(); 187 | const Dtype* bottom_data = bottom[0]->gpu_data(); 188 | Dtype* weight_buffer_mutable_data = weight_buffer_.mutable_gpu_data(); 189 | ConvolutionDepthwiseWeightBackward<<>>( 190 | weight_buffer_count, top_diff, bottom_data, num, channels, 191 | top_height, top_width, bottom_height, bottom_width, 192 | kernel_h_, kernel_w_, stride_h_, stride_w_, 193 | pad_h_, pad_w_, dilation_h_, dilation_w_, weight_buffer_mutable_data); 194 | const int weight_count = this->blobs_[0]->count(); 195 | const Dtype* weight_buffer_data = weight_buffer_.gpu_data(); 196 | Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); 197 | const Dtype* weight_multiplier_data = weight_multiplier_.gpu_data(); 198 | caffe_gpu_gemv(CblasNoTrans, weight_count, length, Dtype(1), weight_buffer_data, weight_multiplier_data, Dtype(1), weight_diff); 199 | } 200 | if (propagate_down[0]) 201 | { 202 | const Dtype* weight_data = this->blobs_[0]->gpu_data(); 203 | Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); 204 | ConvolutionDepthwiseBottomBackward<<>>( 205 | bottom_count, top_diff, weight_data, num, channels, 206 | top_height, top_width, bottom_height, bottom_width, 207 | kernel_h_, kernel_w_, stride_h_, stride_w_, 208 | pad_h_, pad_w_, dilation_h_, dilation_w_, bottom_diff); 209 | } 210 | } 211 | 212 | INSTANTIATE_LAYER_GPU_FUNCS(ConvolutionDepthwiseLayer); 213 | 214 | } // namespace caffe 215 | -------------------------------------------------------------------------------- /src/coord2heatmap_layer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "caffe/layers/coord2heatmap_layer.hpp" 4 | 5 | namespace caffe { 6 | 7 | template 8 | void Coord2heatmapLayer::LayerSetUp(const vector*>& bottom, 9 | const vector*>& top) { 10 | output_height_ = this->layer_param_.coord2heatmap_param().height(); 11 | CHECK_GT(output_height_, 0) << "Coord2heatmapLayer height must be positive."; 12 | output_width_ = this->layer_param_.coord2heatmap_param().width(); 13 | CHECK_GT(output_width_, 0) << "Coord2heatmapLayer width must be positive."; 14 | num_points_ = this->layer_param_.coord2heatmap_param().num_points(); 15 | CHECK_GT(num_points_, 0) << "Coord2heatmapLayer num_points must be positive."; 16 | // LOG(INFO) << bottom[0]->shape_string(); 17 | int bottom_points = bottom[0]->shape(1) / 2; 18 | CHECK_LE(num_points_, bottom_points) << "Coord2heatmapLayer num_points must " 19 | "be less or equal to number of inputs points."; 20 | max_value_ = this->layer_param_.coord2heatmap_param().max_value(); 21 | CHECK_GE(max_value_, 1) << "Coord2heatmapLayer max_value must be greater " 22 | "or equal to 1"; 23 | radius_ = this->layer_param_.coord2heatmap_param().radius(); 24 | if (radius_ != 1) 25 | CHECK_EQ(radius_, 5) << "Only support radius 5, you can set radius to 1" 26 | "to not use gaussian blur."; 27 | } 28 | 29 | template 30 | void Coord2heatmapLayer::Reshape(const vector*>& bottom, 31 | const vector*>& top) { 32 | top[0]->Reshape(bottom[0]->shape(0), num_points_, output_height_, output_width_); 33 | caffe_set(top[0]->count(), Dtype(0), top[0]->mutable_cpu_data()); 34 | } 35 | 36 | template 37 | void Coord2heatmapLayer::Forward_cpu(const vector*>& bottom, 38 | const vector*>& top) { 39 | const Dtype* bottom_data = bottom[0]->cpu_data(); 40 | Dtype* top_data = top[0]->mutable_cpu_data(); 41 | const int batch_size = bottom[0]->shape(0); 42 | for (int b = 0; b < batch_size; ++b) { 43 | for (int c = 0; c < num_points_; c++) { 44 | int tmp = 2 * c * (b + 1); 45 | int x = (int)bottom_data[tmp]; 46 | int y = (int)bottom_data[tmp + 1]; 47 | x = x > (output_width_ - 1) ? output_width_ -1 : x; 48 | y = y > (output_height_ - 1) ? output_height_ -1 : y; 49 | if (x > 0 && y > 0) { 50 | top_data[top[0]->offset(b, c, y, x)] = Dtype(max_value_); 51 | } 52 | } 53 | } 54 | } 55 | 56 | #ifdef CPU_ONLY 57 | STUB_GPU_FORWARD(Coord2heatmapLayer, Forward); 58 | #endif 59 | 60 | INSTANTIATE_CLASS(Coord2heatmapLayer); 61 | REGISTER_LAYER_CLASS(Coord2heatmap); 62 | 63 | } // namespace caffe -------------------------------------------------------------------------------- /src/coord2heatmap_layer.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "caffe/layers/coord2heatmap_layer.hpp" 5 | 6 | namespace caffe { 7 | 8 | template 9 | __global__ void Coord2heatmapForward(const int n, 10 | const int batch_size, 11 | const int num_points, 12 | const int height, 13 | const int width, 14 | const int max_value, 15 | const int radius, 16 | const Dtype* in, Dtype* out) { 17 | CUDA_KERNEL_LOOP(index, n) { 18 | int c = index % num_points; 19 | int b = index / num_points; 20 | int tmp = 2 * c * (b + 1); 21 | int x = int(in[tmp]); 22 | int y = int(in[tmp + 1]); 23 | x = x > (width - 1) ? width -1 : x; 24 | y = y > (height - 1) ? height -1 : y; 25 | if (x > 0 && y > 0) { 26 | out[((b * num_points + c) * height + y) * width + x] = Dtype(max_value); 27 | } 28 | 29 | // No test, and ugly 30 | if (radius == 5) { 31 | out[((b * num_points + c) * height + y-2) * width + x-2] = Dtype(6.6585366) * Dtype(max_value); 32 | out[((b * num_points + c) * height + y+2) * width + x-2] = Dtype(6.6585366) * Dtype(max_value); 33 | out[((b * num_points + c) * height + y-2) * width + x+2] = Dtype(6.6585366) * Dtype(max_value); 34 | out[((b * num_points + c) * height + y+2) * width + x+2] = Dtype(6.6585366) * Dtype(max_value); 35 | 36 | out[((b * num_points + c) * height + y-1) * width + x-2] = Dtype(6.6585366) * 4 * Dtype(max_value); 37 | out[((b * num_points + c) * height + y+1) * width + x-2] = Dtype(6.6585366) * 4 * Dtype(max_value); 38 | out[((b * num_points + c) * height + y-1) * width + x+2] = Dtype(6.6585366) * 4 * Dtype(max_value); 39 | out[((b * num_points + c) * height + y+1) * width + x+2] = Dtype(6.6585366) * 4 * Dtype(max_value); 40 | 41 | out[((b * num_points + c) * height + y-2) * width + x-1] = Dtype(6.6585366) * 4 * Dtype(max_value); 42 | out[((b * num_points + c) * height + y+2) * width + x-1] = Dtype(6.6585366) * 4 * Dtype(max_value); 43 | out[((b * num_points + c) * height + y-2) * width + x+1] = Dtype(6.6585366) * 4 * Dtype(max_value); 44 | out[((b * num_points + c) * height + y+2) * width + x+1] = Dtype(6.6585366) * 4 * Dtype(max_value); 45 | 46 | out[((b * num_points + c) * height + y-2) * width + x] = Dtype(6.6585366) * 7 * Dtype(max_value); 47 | out[((b * num_points + c) * height + y+2) * width + x] = Dtype(6.6585366) * 7 * Dtype(max_value); 48 | out[((b * num_points + c) * height + y) * width + x+2] = Dtype(6.6585366) * 7 * Dtype(max_value); 49 | out[((b * num_points + c) * height + y) * width + x-2] = Dtype(6.6585366) * 7 * Dtype(max_value); 50 | 51 | out[((b * num_points + c) * height + y-1) * width + x-1] = Dtype(6.6585366) * 16 * Dtype(max_value); 52 | out[((b * num_points + c) * height + y+1) * width + x-1] = Dtype(6.6585366) * 16 * Dtype(max_value); 53 | out[((b * num_points + c) * height + y-1) * width + x+1] = Dtype(6.6585366) * 16 * Dtype(max_value); 54 | out[((b * num_points + c) * height + y+1) * width + x+1] = Dtype(6.6585366) * 16 * Dtype(max_value); 55 | 56 | out[((b * num_points + c) * height + y-1) * width + x] = Dtype(6.6585366) * 26 * Dtype(max_value); 57 | out[((b * num_points + c) * height + y+1) * width + x] = Dtype(6.6585366) * 26 * Dtype(max_value); 58 | out[((b * num_points + c) * height + y) * width + x+1] = Dtype(6.6585366) * 26 * Dtype(max_value); 59 | out[((b * num_points + c) * height + y) * width + x-1] = Dtype(6.6585366) * 26 * Dtype(max_value); 60 | } 61 | 62 | } 63 | } 64 | 65 | template 66 | void Coord2heatmapLayer::Forward_gpu(const vector*>& bottom, 67 | const vector*>& top) { 68 | const Dtype* bottom_data = bottom[0]->gpu_data(); 69 | Dtype* top_data = top[0]->mutable_gpu_data(); 70 | const int batch_size = bottom[0]->shape(0); 71 | const int count = batch_size * num_points_; 72 | Coord2heatmapForward<<>>( 73 | count, batch_size, num_points_, 74 | output_height_, output_width_, max_value_, radius_, 75 | bottom_data, top_data); 76 | CUDA_POST_KERNEL_CHECK; 77 | } 78 | 79 | INSTANTIATE_LAYER_GPU_FORWARD(Coord2heatmapLayer); 80 | 81 | } -------------------------------------------------------------------------------- /src/heatmap2coord.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "caffe/layers/heatmap2coord.hpp" 5 | 6 | namespace caffe { 7 | 8 | template 9 | void Heatmap2coordLayer::LayerSetup(const vector*>& bottom, 10 | const vector*>& top) { 11 | num_points_ = bottom[0]->shape(1); 12 | } 13 | 14 | template 15 | void Heatmap2coordLayer::Reshape(const vector*>& bottom, 16 | const vector*>& top) { 17 | top[0]->Reshape(bottom[0]->shape(0), bottom[0]->shape(1) * 2); 18 | } 19 | 20 | template 21 | void Heatmap2coordLayer::Forward_cpu(const vector*>& bottom, 22 | const vector*>& top) { 23 | const Dtype* bottom_data = bottom[0]->cpu_data(); 24 | Dtype* top_data = top[0]->mutable_cpu_data(); 25 | const int h = bottom[0]->shape(2); 26 | const int w = bottom[0]->shape(3); 27 | int argmax_h = -1; 28 | int argmax_w = -1; 29 | Dtype max_val = std::numberic_limits::min(); 30 | Dtype tmp_val; 31 | // int batch_size = bottom[0]->shape(0); 32 | 33 | for (int bi = 0; bi < batch_size; ++bi) { 34 | for (int p_idx = 0; p_idx < num_points_; ++p_idx) { 35 | for (int hi = 0; hi < h; ++hi) { 36 | for (int wi = 0; wi < w; ++wi) { 37 | tmp_val = bottom_data[bottom_data[0]->offset(bi, p_idx, hi, wi)]; 38 | if ( tmp_val > max_val) { 39 | max_val = tmp_val; 40 | argmax_h = hi; 41 | argmax_w = wi; 42 | } 43 | } 44 | } 45 | // assign x and y 46 | top_data[bi * 2 * num_points_ + p_idx * 2] = argmax_w; 47 | top_data[bi * 2 * num_points_ + p_idx * 2 + 1] = argmax_h; 48 | } 49 | } 50 | } 51 | 52 | #ifdef CPU_ONLY 53 | STUB_GPU_FORWARD(Heatmap2CoordLayer, Forward); 54 | #endif 55 | 56 | INSTANTIATE_CLASS(Heatmap2CoordLayer); 57 | REGISTER_LAYER_CLASS(Heatmap2Coord); 58 | 59 | } -------------------------------------------------------------------------------- /src/heatmap2coord.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "caffe/layers/heatmap2coord.hpp" 5 | 6 | namespace caffe { 7 | 8 | template 9 | __global__ void H2C_kernel(const int n, const Dtype* input, 10 | Dtype* out, const int num_points, const int h, const int w, 11 | Dtype max_val) { 12 | CUDA_KERNEL_LOOP(index, n) { 13 | int bi = index / num_points; 14 | int p_idx = index % num_points; 15 | int argmax_h = -1; 16 | int argmax_w = -1; 17 | Dtype tmp_val = max_val; 18 | for (int hi = 0; hi < h; ++hi) { 19 | for (int wi = 0; wi < w; ++wi) { 20 | tmp_val = input[((bi * num_points + p_idx) * h + hi) * w + wi]; 21 | if ( tmp_val > max_val ) { 22 | max_val = tmp_val; 23 | argmax_h = hi; 24 | argmax_w = wi; 25 | } 26 | } 27 | } 28 | // assign x and y 29 | out[bi * 2 * num_points + p_idx * 2] = argmax_w; 30 | out[bi * 2 * num_points + p_idx * 2 + 1] = argmax_h; 31 | } 32 | } 33 | 34 | template 35 | void Heatmap2coordLayer::Forward_gpu(const vector*>& bottom, 36 | const vector*>& top) { 37 | const Dtype* bottom_data = bottom[0]->cpu_data(); 38 | Dtype* top_data = top[0]->mutable_cpu_data(); 39 | const int h = bottom[0]->shape(2); 40 | const int w = bottom[0]->shape(3); 41 | Dtype max_val = std::numberic_limits::min(); 42 | const int n = bottom[0]->count(0, 2); 43 | H2C_kernel<<>>( 44 | n, bottom[0]->gpu_data(), top[0]->mutable_gpu_data(), num_points_, 45 | h, w, max_val); 46 | } 47 | 48 | INSTANTIATE_LAYER_GPU_FUNCS(Heatmap2CoordLayer); 49 | 50 | } -------------------------------------------------------------------------------- /src/heatmap_loss_layer.cpp: -------------------------------------------------------------------------------- 1 | #include "caffe/layers/heatmap_loss_layer.hpp" 2 | 3 | namespace caffe { 4 | 5 | template 6 | void HeatmapLossLayer::LayerSetUp( 7 | const vector*>& bottom, const vector*>& top) { 8 | has_weights_ = (bottom.size() == 3); 9 | negative_ratio_ = (Dtype)this->layer_param_.heatmap_loss_param().negative_ratio(); 10 | eps_ = (Dtype)this->layer_param_.heatmap_loss_param().eps(); 11 | grad_clip_ = (Dtype)this->layer_param_.heatmap_loss_param().grad_clip(); 12 | negative_sample_prob_ = this->layer_param_.heatmap_loss_param().negative_sample_prob(); 13 | CHECK_LE(negative_sample_prob_, 1.); 14 | CHECK_GE(negative_sample_prob_, 0.); 15 | } 16 | 17 | template 18 | void HeatmapLossLayer::Reshape( 19 | const vector*>& bottom, const vector*>& top) { 20 | LossLayer::Reshape(bottom, top); 21 | CHECK_EQ(bottom[0]->channels(), bottom[1]->channels()); 22 | CHECK_EQ(bottom[0]->height(), bottom[1]->height()); 23 | CHECK_EQ(bottom[0]->width(), bottom[1]->width()); 24 | if (has_weights_) { 25 | CHECK_EQ(bottom[0]->channels(), bottom[2]->shape(1) / 2); 26 | } 27 | diff_.Reshape(bottom[0]->shape()); 28 | // if (negative_sample_prob_ < 0.9999) { 29 | rand_mask_.Reshape(bottom[0]->shape()); 30 | // } 31 | vector loss_shape(0); 32 | top[0]->Reshape(loss_shape); 33 | } 34 | 35 | template 36 | void HeatmapLossLayer::Forward_cpu(const vector*>& bottom, 37 | const vector*>& top) { 38 | NOT_IMPLEMENTED; 39 | } 40 | 41 | template 42 | void HeatmapLossLayer::Backward_cpu(const vector*>& top, 43 | const vector& propagate_down, const vector*>& bottom) { 44 | NOT_IMPLEMENTED; 45 | } 46 | 47 | #ifdef CPU_ONLY 48 | STUB_GPU(HeatmapLossLayer); 49 | #endif 50 | 51 | INSTANTIATE_CLASS(HeatmapLossLayer); 52 | REGISTER_LAYER_CLASS(HeatmapLoss); 53 | 54 | } // namespace caffe -------------------------------------------------------------------------------- /src/heatmap_loss_layer.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "caffe/layers/heatmap_loss_layer.hpp" 4 | #include "caffe/util/math_functions.hpp" 5 | 6 | // TODO(ZhouJ) Actually we do not need to calculate loss 7 | // in forward pass. 8 | 9 | namespace caffe { 10 | 11 | template 12 | __global__ void CE(const int n, const Dtype* gt, const Dtype* pred, 13 | Dtype* out, Dtype negative_ratio, Dtype eps) { 14 | CUDA_KERNEL_LOOP(index, n) { 15 | Dtype gt_ = gt[index]; 16 | Dtype pred_ = pred[index]; 17 | out[index] = gt_ * log(pred_ + eps) + (1 - gt_) * log(1 - pred_ + eps); 18 | if (gt_ == Dtype(0)) { 19 | out[index] *= negative_ratio; 20 | } 21 | out[index] = -out[index]; 22 | } 23 | } 24 | 25 | template 26 | __global__ void CE_mask(const int n, const Dtype* gt, const Dtype* pred, 27 | Dtype* out, const Dtype* mask, Dtype negative_ratio, 28 | int w, int h, int c, Dtype eps) { 29 | CUDA_KERNEL_LOOP(index, n) { 30 | int chn = index / w / h % c; 31 | int batch_idx = index / w / h / c; 32 | Dtype m = mask[batch_idx * c * 2 + 2 * chn]; 33 | if (m == Dtype(0)) { 34 | out[index] = Dtype(0); 35 | } else { 36 | Dtype gt_ = gt[index]; 37 | Dtype pred_ = pred[index]; 38 | out[index] = (gt_ * log(pred_ + eps) + (1 - gt_) * log(1 - pred_ + eps)); 39 | if (gt_ == Dtype(0)) { 40 | out[index] *= negative_ratio; 41 | } 42 | out[index] = -out[index] * m; 43 | } 44 | } 45 | } 46 | 47 | template 48 | __global__ void SE(const int n, const Dtype* gt, const Dtype* pred, 49 | Dtype* out, Dtype negative_ratio, Dtype eps) { 50 | CUDA_KERNEL_LOOP(index, n) { 51 | Dtype gt_ = gt[index]; 52 | Dtype pred_ = pred[index]; 53 | out[index] = (gt_ - pred_) * (gt_ - pred_); 54 | if (gt_ == Dtype(0)) { 55 | out[index] *= negative_ratio; 56 | } 57 | } 58 | } 59 | 60 | template 61 | __global__ void SE_mask(const int n, const Dtype* gt, const Dtype* pred, 62 | Dtype* out, const Dtype* mask, Dtype negative_ratio, 63 | int w, int h, int c, Dtype eps) { 64 | CUDA_KERNEL_LOOP(index, n) { 65 | int chn = index / w / h % c; 66 | int batch_idx = index / w / h / c; 67 | Dtype m = mask[batch_idx * c * 2 + 2 * chn]; 68 | if (m == Dtype(0)) { 69 | out[index] = Dtype(0); 70 | } else { 71 | Dtype gt_ = gt[index]; 72 | Dtype pred_ = pred[index]; 73 | out[index] = (gt_ - pred_) * (gt_ - pred_); 74 | if (gt_ == Dtype(0)) { 75 | out[index] *= negative_ratio; 76 | } 77 | out[index] *= m; 78 | } 79 | } 80 | } 81 | 82 | template 83 | __global__ void bp_CE(const int n, const Dtype* gt, const Dtype* pred, 84 | Dtype* out, Dtype negative_ratio, Dtype eps, Dtype grad_clip) { 85 | CUDA_KERNEL_LOOP(index, n) { 86 | Dtype gt_ = gt[index]; 87 | Dtype pred_ = pred[index]; 88 | out[index] = (1 - gt_) / (1 - pred_ + eps) - gt_ / (eps + pred_); 89 | if (gt_ == Dtype(0)) { 90 | out[index] *= negative_ratio; 91 | } 92 | if (grad_clip > 0 && abs(out[index]) > grad_clip) 93 | out[index] = out[index] > 0 ? grad_clip : -grad_clip; 94 | } 95 | } 96 | 97 | template 98 | __global__ void bp_SE(const int n, const Dtype* gt, const Dtype* pred, 99 | Dtype* out, Dtype negative_ratio, Dtype eps, Dtype grad_clip) { 100 | CUDA_KERNEL_LOOP(index, n) { 101 | Dtype gt_ = gt[index]; 102 | Dtype pred_ = pred[index]; 103 | out[index] = 2 * (pred_ - gt_); 104 | if (gt_ == Dtype(0)) { 105 | out[index] *= negative_ratio; 106 | } 107 | if (grad_clip > 0 && abs(out[index]) > grad_clip) 108 | out[index] = out[index] > 0 ? grad_clip : -grad_clip; 109 | } 110 | } 111 | 112 | template 113 | __global__ void bp_CE_mask(const int n, const Dtype* gt, const Dtype* pred, 114 | Dtype* out, const Dtype* mask, Dtype negative_ratio, 115 | int w, int h, int c, Dtype eps, Dtype grad_clip) { 116 | CUDA_KERNEL_LOOP(index, n) { 117 | int chn = index / w / h % c; 118 | int batch_idx = index / w / h / c; 119 | Dtype m = mask[batch_idx * c * 2 + 2 * chn]; 120 | if (Dtype(0) == m) { 121 | out[index] = Dtype(0); 122 | } else { 123 | Dtype gt_ = gt[index]; 124 | Dtype pred_ = pred[index]; 125 | out[index] = (1 - gt_) / (1 - pred_ + eps) - gt_ / (pred_ + eps); 126 | if (gt_ == Dtype(0)) { 127 | out[index] *= negative_ratio; 128 | } 129 | out[index] *= m; 130 | } 131 | if (grad_clip > 0 && abs(out[index]) > grad_clip) 132 | out[index] = out[index] > 0 ? grad_clip : -grad_clip; 133 | } 134 | } 135 | 136 | template 137 | __global__ void bp_SE_mask(const int n, const Dtype* gt, const Dtype* pred, 138 | Dtype* out, const Dtype* mask, Dtype negative_ratio, 139 | int w, int h, int c, Dtype eps, Dtype grad_clip) { 140 | CUDA_KERNEL_LOOP(index, n) { 141 | int chn = index / w / h % c; 142 | int batch_idx = index / w / h / c; 143 | Dtype m = mask[batch_idx * c * 2 + 2 * chn]; 144 | if (Dtype(0) == m) { 145 | out[index] = Dtype(0); 146 | } else { 147 | Dtype gt_ = gt[index]; 148 | Dtype pred_ = pred[index]; 149 | out[index] = 2 * (pred_ - gt_); 150 | if (gt_ == Dtype(0)) { 151 | out[index] *= negative_ratio; 152 | } 153 | out[index] *= m; 154 | } 155 | if (grad_clip > 0 && abs(out[index]) > grad_clip) 156 | out[index] = out[index] > 0 ? grad_clip : -grad_clip; 157 | } 158 | } 159 | 160 | template 161 | __global__ void bp_CE_ns(const int n, const Dtype* gt, const Dtype* pred, 162 | Dtype* out, Dtype negative_ratio, Dtype eps, Dtype grad_clip, 163 | const float* r_mask, float threshold) { 164 | CUDA_KERNEL_LOOP(index, n) { 165 | Dtype gt_ = gt[index]; 166 | Dtype pred_ = pred[index]; 167 | out[index] = (1 - gt_) / (1 - pred_ + eps) - gt_ / (eps + pred_); 168 | if (gt_ == Dtype(0)) { 169 | if (r_mask[index] > threshold) { 170 | out[index] = Dtype(0); 171 | } else { 172 | out[index] *= negative_ratio; 173 | } 174 | } 175 | if (grad_clip > 0 && abs(out[index]) > grad_clip) 176 | out[index] = out[index] > 0 ? grad_clip : -grad_clip; 177 | } 178 | } 179 | 180 | template 181 | __global__ void bp_SE_ns(const int n, const Dtype* gt, const Dtype* pred, 182 | Dtype* out, Dtype negative_ratio, Dtype eps, Dtype grad_clip, 183 | const float* r_mask, float threshold) { 184 | CUDA_KERNEL_LOOP(index, n) { 185 | Dtype gt_ = gt[index]; 186 | Dtype pred_ = pred[index]; 187 | out[index] = 2 * (pred_ - gt_); 188 | if (gt_ == Dtype(0)) { 189 | if (r_mask[index] > threshold) { 190 | out[index] = Dtype(0); 191 | } else { 192 | out[index] *= negative_ratio; 193 | } 194 | } 195 | if (grad_clip > 0 && abs(out[index]) > grad_clip) 196 | out[index] = out[index] > 0 ? grad_clip : -grad_clip; 197 | } 198 | } 199 | 200 | template 201 | __global__ void bp_CE_mask_ns(const int n, const Dtype* gt, const Dtype* pred, 202 | Dtype* out, const Dtype* mask, Dtype negative_ratio, 203 | int w, int h, int c, Dtype eps, Dtype grad_clip, 204 | const float* r_mask, float threshold) { 205 | CUDA_KERNEL_LOOP(index, n) { 206 | int chn = index / w / h % c; 207 | int batch_idx = index / w / h / c; 208 | Dtype m = mask[batch_idx * c * 2 + 2 * chn]; 209 | if (Dtype(0) == m) { 210 | out[index] = Dtype(0); 211 | } else { 212 | Dtype gt_ = gt[index]; 213 | Dtype pred_ = pred[index]; 214 | out[index] = (1 - gt_) / (1 - pred_ + eps) - gt_ / (pred_ + eps); 215 | if (gt_ == Dtype(0)) { 216 | if (r_mask[index] > threshold) { 217 | out[index] = Dtype(0); 218 | } else { 219 | out[index] *= negative_ratio; 220 | } 221 | } 222 | out[index] *= m; 223 | } 224 | if (grad_clip > 0 && abs(out[index]) > grad_clip) 225 | out[index] = out[index] > 0 ? grad_clip : -grad_clip; 226 | } 227 | } 228 | 229 | template 230 | __global__ void bp_SE_mask_ns(const int n, const Dtype* gt, const Dtype* pred, 231 | Dtype* out, const Dtype* mask, Dtype negative_ratio, 232 | int w, int h, int c, Dtype eps, Dtype grad_clip, 233 | const float* r_mask, float threshold) { 234 | CUDA_KERNEL_LOOP(index, n) { 235 | int chn = index / w / h % c; 236 | int batch_idx = index / w / h / c; 237 | Dtype m = mask[batch_idx * c * 2 + 2 * chn]; 238 | if (Dtype(0) == m) { 239 | out[index] = Dtype(0); 240 | } else { 241 | Dtype gt_ = gt[index]; 242 | Dtype pred_ = pred[index]; 243 | out[index] = 2 * (pred_ - gt_); 244 | if (gt_ == Dtype(0)) { 245 | if (r_mask[index] > threshold) { 246 | out[index] = Dtype(0); 247 | } else { 248 | out[index] *= negative_ratio; 249 | } 250 | } 251 | out[index] *= m; 252 | } 253 | if (grad_clip > 0 && abs(out[index]) > grad_clip) 254 | out[index] = out[index] > 0 ? grad_clip : -grad_clip; 255 | } 256 | } 257 | 258 | template 259 | void HeatmapLossLayer::Forward_gpu(const vector*>& bottom, 260 | const vector*>& top) { 261 | int count = diff_.count(); 262 | Dtype loss; 263 | switch (this->layer_param_.heatmap_loss_param().loss_type()) { 264 | case HeatmapLossParameter_LossType_CE: 265 | if (has_weights_) { 266 | CE_mask<<>>( 267 | count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 268 | diff_.mutable_gpu_data(), 269 | bottom[2]->gpu_data(), 270 | negative_ratio_, bottom[0]->shape(3), 271 | bottom[0]->shape(2), bottom[0]->shape(1), eps_); 272 | CUDA_POST_KERNEL_CHECK; 273 | } else { 274 | CE<<>>( 275 | count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 276 | diff_.mutable_gpu_data(), negative_ratio_, eps_); 277 | CUDA_POST_KERNEL_CHECK; 278 | } 279 | 280 | caffe_gpu_asum(count, diff_.gpu_data(), &loss); 281 | top[0]->mutable_cpu_data()[0] = loss / (bottom[0]->count(2, 4)); 282 | break; 283 | 284 | case HeatmapLossParameter_LossType_SE: 285 | if (has_weights_) { 286 | SE_mask<<>>( 287 | count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 288 | diff_.mutable_gpu_data(), 289 | bottom[2]->gpu_data(), 290 | negative_ratio_, bottom[0]->shape(3), 291 | bottom[0]->shape(2), bottom[0]->shape(1), eps_); 292 | CUDA_POST_KERNEL_CHECK; 293 | } else { 294 | SE<<>>( 295 | count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 296 | diff_.mutable_gpu_data(), negative_ratio_, eps_); 297 | CUDA_POST_KERNEL_CHECK; 298 | } 299 | 300 | caffe_gpu_asum(count, diff_.gpu_data(), &loss); 301 | top[0]->mutable_cpu_data()[0] = loss / (bottom[0]->count(2, 4)); 302 | break; 303 | } 304 | } 305 | 306 | template 307 | void HeatmapLossLayer::Backward_gpu(const vector*>& top, 308 | const vector& propagate_down, const vector*>& bottom) { 309 | int count = bottom[0]->count(); 310 | if (negative_sample_prob_ < 0.999) { 311 | caffe_rng_uniform(count, float(0.), float(1.), rand_mask_.mutable_cpu_data()); 312 | } 313 | switch (this->layer_param_.heatmap_loss_param().loss_type()) { 314 | case HeatmapLossParameter_LossType_CE: 315 | if (negative_sample_prob_ > 0.999) { 316 | if (has_weights_) { 317 | bp_CE_mask<<>>( 318 | count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 319 | bottom[1]->mutable_gpu_diff(), bottom[2]->gpu_data(), 320 | negative_ratio_, bottom[0]->shape(3), 321 | bottom[0]->shape(2), bottom[0]->shape(1), eps_, grad_clip_); 322 | CUDA_POST_KERNEL_CHECK; 323 | } else { 324 | bp_CE<<>>( 325 | count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 326 | bottom[1]->mutable_gpu_diff(), 327 | negative_ratio_, eps_, grad_clip_); 328 | CUDA_POST_KERNEL_CHECK; 329 | } 330 | } else { 331 | if (has_weights_) { 332 | bp_CE_mask_ns<<>>( 333 | count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 334 | bottom[1]->mutable_gpu_diff(), bottom[2]->gpu_data(), 335 | negative_ratio_, bottom[0]->shape(3), 336 | bottom[0]->shape(2), bottom[0]->shape(1), eps_, grad_clip_, 337 | rand_mask_.gpu_data(), negative_sample_prob_); 338 | CUDA_POST_KERNEL_CHECK; 339 | } else { 340 | bp_CE_ns<<>>( 341 | count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 342 | bottom[1]->mutable_gpu_diff(), 343 | negative_ratio_, eps_, grad_clip_, 344 | rand_mask_.gpu_data(), negative_sample_prob_); 345 | CUDA_POST_KERNEL_CHECK; 346 | } 347 | } 348 | break; 349 | 350 | case HeatmapLossParameter_LossType_SE: 351 | if (negative_sample_prob_ > 0.999) { 352 | if (has_weights_) { 353 | bp_SE_mask<<>>( 354 | count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 355 | bottom[1]->mutable_gpu_diff(), bottom[2]->gpu_data(), 356 | negative_ratio_, bottom[0]->shape(3), 357 | bottom[0]->shape(2), bottom[0]->shape(1), eps_, grad_clip_); 358 | CUDA_POST_KERNEL_CHECK; 359 | } else { 360 | bp_SE<<>>( 361 | count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 362 | bottom[1]->mutable_gpu_diff(), 363 | negative_ratio_, eps_, grad_clip_); 364 | CUDA_POST_KERNEL_CHECK; 365 | } 366 | } else { 367 | if (has_weights_) { 368 | bp_SE_mask_ns<<>>( 369 | count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 370 | bottom[1]->mutable_gpu_diff(), bottom[2]->gpu_data(), 371 | negative_ratio_, bottom[0]->shape(3), 372 | bottom[0]->shape(2), bottom[0]->shape(1), eps_, grad_clip_, 373 | rand_mask_.gpu_data(), negative_sample_prob_); 374 | CUDA_POST_KERNEL_CHECK; 375 | } else { 376 | bp_SE_ns<<>>( 377 | count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 378 | bottom[1]->mutable_gpu_diff(), 379 | negative_ratio_, eps_, grad_clip_, 380 | rand_mask_.gpu_data(), negative_sample_prob_); 381 | CUDA_POST_KERNEL_CHECK; 382 | } 383 | } 384 | break; 385 | } 386 | caffe_gpu_scal( 387 | bottom[1]->count(), 388 | Dtype(1) / bottom[1]->count(2, 4), 389 | bottom[1]->mutable_gpu_diff()); 390 | } 391 | 392 | INSTANTIATE_LAYER_GPU_FUNCS(HeatmapLossLayer); 393 | 394 | } -------------------------------------------------------------------------------- /src/wing_loss_layer.cpp: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // CVPR 2018 4 | // Written by ZhouJunr 5 | // ------------------------------------------------------------------ 6 | 7 | #include "caffe/layers/wing_loss_layer.hpp" 8 | 9 | namespace caffe { 10 | 11 | template 12 | void WingLossLayer::LayerSetUp( 13 | const vector*>& bottom, const vector*>& top) { 14 | has_weights_ = (bottom.size() == 3); 15 | omega_ = this->layer_param_.wing_loss_param().omega(); 16 | epsilon_ = this->layer_param_.wing_loss_param().epsilon(); 17 | C_ = omega_ * (1 - log(1 + omega_ / epsilon_)); 18 | } 19 | 20 | template 21 | void WingLossLayer::Reshape( 22 | const vector*>& bottom, const vector*>& top) { 23 | LossLayer::Reshape(bottom, top); 24 | CHECK_EQ(bottom[0]->channels(), bottom[1]->channels()); 25 | CHECK_EQ(bottom[0]->height(), bottom[1]->height()); 26 | CHECK_EQ(bottom[0]->width(), bottom[1]->width()); 27 | if (has_weights_) { 28 | CHECK_EQ(bottom[0]->channels(), bottom[2]->channels()); 29 | CHECK_EQ(bottom[0]->height(), bottom[2]->height()); 30 | CHECK_EQ(bottom[0]->width(), bottom[2]->width()); 31 | } 32 | diff_.Reshape(bottom[0]->num(), bottom[0]->channels(), 33 | bottom[0]->height(), bottom[0]->width()); 34 | errors_.Reshape(bottom[0]->num(), bottom[0]->channels(), 35 | bottom[0]->height(), bottom[0]->width()); 36 | } 37 | 38 | template 39 | void WingLossLayer::Forward_cpu(const vector*>& bottom, 40 | const vector*>& top) { 41 | NOT_IMPLEMENTED; 42 | } 43 | 44 | template 45 | void WingLossLayer::Backward_cpu(const vector*>& top, 46 | const vector& propagate_down, const vector*>& bottom) { 47 | NOT_IMPLEMENTED; 48 | } 49 | 50 | #ifdef CPU_ONLY 51 | STUB_GPU(WingLossLayer); 52 | #endif 53 | 54 | INSTANTIATE_CLASS(WingLossLayer); 55 | REGISTER_LAYER_CLASS(WingLoss); 56 | 57 | } // namespace caffe 58 | -------------------------------------------------------------------------------- /src/wing_loss_layer.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // CVPR 2018 4 | // Written by ZhouJunr 5 | // ------------------------------------------------------------------ 6 | 7 | #include "caffe/layers/wing_loss_layer.hpp" 8 | 9 | namespace caffe { 10 | 11 | template 12 | __global__ void WingForward(const int n, const Dtype* in, Dtype* out, 13 | const float omega, const float epsilon, const float C) { 14 | // w * ln( 1 + |x| / e), |x| \lt w 15 | // |x| - C, otherwise 16 | CUDA_KERNEL_LOOP(index, n) { 17 | Dtype val = in[index]; 18 | Dtype abs_val = abs(val); 19 | if (abs_val < omega) { 20 | out[index] = omega * log(1 + abs_val / epsilon); 21 | } else { 22 | out[index] = abs_val - C; 23 | } 24 | } 25 | } 26 | 27 | template 28 | void WingLossLayer::Forward_gpu(const vector*>& bottom, 29 | const vector*>& top) { 30 | int count = bottom[0]->count(); 31 | caffe_gpu_sub( 32 | count, 33 | bottom[0]->gpu_data(), 34 | bottom[1]->gpu_data(), 35 | diff_.mutable_gpu_data()); // d := b0 - b1 36 | if (has_weights_) { 37 | caffe_gpu_mul( 38 | count, 39 | bottom[2]->gpu_data(), 40 | diff_.gpu_data(), 41 | diff_.mutable_gpu_data()); // d := w * (b0 - b1) 42 | } 43 | WingForward<<>>( 44 | count, diff_.gpu_data(), errors_.mutable_gpu_data(), 45 | omega_, epsilon_, C_); 46 | CUDA_POST_KERNEL_CHECK; 47 | 48 | Dtype loss; 49 | caffe_gpu_asum(count, errors_.gpu_data(), &loss); 50 | 51 | if (has_weights_) { 52 | //normalize the loss 53 | caffe_gpu_asum(bottom[2]->count(), bottom[2]->gpu_data(), &norm_value_); 54 | } else { 55 | norm_value_ = Dtype(1) * bottom[0]->num(); 56 | } 57 | top[0]->mutable_cpu_data()[0] = loss / norm_value_; 58 | } 59 | 60 | template 61 | __global__ void WingBackward(const int n, const Dtype* in, Dtype* out, 62 | const float omega, const float epsilon, const float C) { 63 | // f'(x) = sign(x) * w / (1 + |x|/e) / e, if |x| < C 64 | // = sign(x), otherwise 65 | CUDA_KERNEL_LOOP(index, n) { 66 | Dtype val = in[index]; 67 | Dtype abs_val = abs(val); 68 | Dtype sign = (Dtype(0) < val) - (val < Dtype(0)); 69 | if (abs_val < omega) { 70 | out[index] = sign * omega / (1 + abs_val / epsilon) / epsilon; 71 | } else { 72 | out[index] = sign; 73 | } 74 | } 75 | } 76 | 77 | template 78 | void WingLossLayer::Backward_gpu(const vector*>& top, 79 | const vector& propagate_down, const vector*>& bottom) { 80 | int count = diff_.count(); 81 | WingBackward<<>>( 82 | count, diff_.gpu_data(), diff_.mutable_gpu_data(), 83 | omega_, epsilon_, C_); 84 | if (has_weights_) { 85 | caffe_gpu_mul( 86 | count, 87 | bottom[2]->gpu_data(), 88 | diff_.gpu_data(), 89 | diff_.mutable_gpu_data()); 90 | } 91 | CUDA_POST_KERNEL_CHECK; 92 | 93 | for (int i = 0; i < 2; ++i) { 94 | if (propagate_down[i]) { 95 | const Dtype sign = (i == 0) ? 1 : -1; 96 | const Dtype alpha = sign * top[0]->cpu_diff()[0] / norm_value_; 97 | caffe_gpu_axpby( 98 | bottom[i]->count(), // count 99 | alpha, // alpha 100 | diff_.gpu_data(), // x 101 | Dtype(0), // beta 102 | bottom[i]->mutable_gpu_diff()); // y 103 | } 104 | } 105 | } 106 | 107 | INSTANTIATE_LAYER_GPU_FUNCS(WingLossLayer); 108 | 109 | } // namespace caffe 110 | --------------------------------------------------------------------------------