├── README.md ├── weighted_softmax_loss_layer.cpp └── weighted_softmax_loss_layer.cu /README.md: -------------------------------------------------------------------------------- 1 | # Weighted_Softmax_Loss 2 | Weighted Softmax Loss Layer for Caffe 3 | 4 | Usage: 5 | 6 | （1）caffe.proto文件修改以下部分，增加pos_mult（指定某类的权重乘子）和pos_cid（指定的某类的类别编号）两个参数： 7 | 8 | ``` 9 | // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer 10 | message SoftmaxParameter { 11 | enum Engine { 12 | DEFAULT = 0; 13 | CAFFE = 1; 14 | CUDNN = 2; 15 | } 16 | optional Engine engine = 1 [default = DEFAULT]; 17 | 18 | // The axis along which to perform the softmax -- may be negative to index 19 | // from the end (e.g., -1 for the last axis). 20 | // Any other axes will be evaluated as independent softmaxes. 21 | optional int32 axis = 2 [default = 1]; 22 | optional float pos_mult = 3 [default = 1]; 23 | optional int32 pos_cid = 4 [default = 1]; 24 | } 25 | ``` 26 | 27 | （2）include\caffe\loss_layers.hpp，增加以下部分： 28 | ``` 29 | /** 30 | * @brief A weighted version of SoftmaxWithLossLayer. 31 | * 32 | * TODO: Add description. Add the formulation in math. 33 | */ 34 | template 35 | class WeightedSoftmaxWithLossLayer : public LossLayer { 36 | public: 37 | /** 38 | * @param param provides LossParameter loss_param, with options: 39 | * - ignore_label (optional) 40 | * Specify a label value that should be ignored when computing the loss. 41 | * - normalize (optional, default true) 42 | * If true, the loss is normalized by the number of (nonignored) labels 43 | * present; otherwise the loss is simply summed over spatial locations. 44 | */ 45 | explicit WeightedSoftmaxWithLossLayer(const LayerParameter& param) 46 | : LossLayer(param) {} 47 | virtual void LayerSetUp(const vector*>& bottom, 48 | const vector*>& top); 49 | virtual void Reshape(const vector*>& bottom, 50 | const vector*>& top); 51 | 52 | virtual inline const char* type() const { return "WeightedSoftmaxWithLoss"; } 53 | virtual inline int ExactNumBottomBlobs() const { return -1; } 54 | virtual inline int MinBottomBlobs() const { return 1; } 55 | virtual inline int MaxBottomBlobs() const { return 2; } 56 | virtual inline int ExactNumTopBlobs() const { return -1; } 57 | virtual inline int MinTopBlobs() const { return 1; } 58 | virtual inline int MaxTopBlobs() const { return 2; } 59 | 60 | protected: 61 | /// @copydoc WeightedSoftmaxWithLossLayer 62 | virtual void Forward_cpu(const vector*>& bottom, 63 | const vector*>& top); 64 | virtual void Forward_gpu(const vector*>& bottom, 65 | const vector*>& top); 66 | /** 67 | * @brief Computes the softmax loss error gradient w.r.t. the predictions. 68 | * 69 | * Gradients cannot be computed with respect to the label inputs (bottom[1]), 70 | * so this method ignores bottom[1] and requires !propagate_down[1], crashing 71 | * if propagate_down[1] is set. 72 | * 73 | * @param top output Blob vector (length 1), providing the error gradient with 74 | * respect to the outputs 75 | * -# @f$ (1 \times 1 \times 1 \times 1) @f$ 76 | * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, 77 | * as @f$ \lambda @f$ is the coefficient of this layer's output 78 | * @f$\ell_i@f$ in the overall Net loss 79 | * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence 80 | * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. 81 | * (*Assuming that this top Blob is not used as a bottom (input) by any 82 | * other layer of the Net.) 83 | * @param propagate_down see Layer::Backward. 84 | * propagate_down[1] must be false as we can't compute gradients with 85 | * respect to the labels. 86 | * @param bottom input Blob vector (length 2) 87 | * -# @f$ (N \times C \times H \times W) @f$ 88 | * the predictions @f$ x @f$; Backward computes diff 89 | * @f$ \frac{\partial E}{\partial x} @f$ 90 | * -# @f$ (N \times 1 \times 1 \times 1) @f$ 91 | * the labels -- ignored as we can't compute their error gradients 92 | */ 93 | virtual void Backward_cpu(const vector*>& top, 94 | const vector& propagate_down, const vector*>& bottom); 95 | virtual void Backward_gpu(const vector*>& top, 96 | const vector& propagate_down, const vector*>& bottom); 97 | 98 | 99 | /// The internal SoftmaxLayer used to map predictions to a distribution. 100 | shared_ptr > softmax_layer_; 101 | /// prob stores the output probability predictions from the SoftmaxLayer. 102 | Blob prob_; 103 | /// bottom vector holder used in call to the underlying SoftmaxLayer::Forward 104 | vector*> softmax_bottom_vec_; 105 | /// top vector holder used in call to the underlying SoftmaxLayer::Forward 106 | vector*> softmax_top_vec_; 107 | /// Whether to ignore instances with a certain label. 108 | bool has_ignore_label_; 109 | /// The label indicating that an instance should be ignored. 110 | int ignore_label_; 111 | /// Whether to normalize the loss by the total number of values present 112 | /// (otherwise just by the batch size). 113 | bool normalize_; 114 | int softmax_axis_, outer_num_, inner_num_; 115 | 116 | float pos_mult_; 117 | int pos_cid_; 118 | }; 119 | ``` 120 | 121 | （3）在src\caffe\layers文件夹中增加weighted_softmax_loss_layer.cpp和weighted_softmax_loss_layer.cu两个文件 122 | 123 | （4）在训练的prototxt文件中，按照下面方法使用（比如指定从0数起的第1类，权重加强，乘子为2.0）： 124 | ``` 125 | layer { 126 | name: "loss" 127 | type: "WeightedSoftmaxWithLoss" 128 | bottom: "fc_end" 129 | bottom: "label" 130 | top: "loss" 131 | softmax_param { 132 | pos_cid: 1 133 | pos_mult: 2.0 134 | } 135 | } 136 | ``` 137 | -------------------------------------------------------------------------------- /weighted_softmax_loss_layer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "caffe/layer.hpp" 6 | #include "caffe/layer_factory.hpp" 7 | #include "caffe/util/math_functions.hpp" 8 | #include "caffe/vision_layers.hpp" 9 | 10 | namespace caffe { 11 | 12 | template 13 | void WeightedSoftmaxWithLossLayer::LayerSetUp( 14 | const vector*>& bottom, const vector*>& top) { 15 | LossLayer::LayerSetUp(bottom, top); 16 | LayerParameter softmax_param(this->layer_param_); 17 | softmax_param.set_type("Softmax"); 18 | softmax_layer_ = LayerRegistry::CreateLayer(softmax_param); 19 | softmax_bottom_vec_.clear(); 20 | softmax_bottom_vec_.push_back(bottom[0]); 21 | softmax_top_vec_.clear(); 22 | softmax_top_vec_.push_back(&prob_); 23 | softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_); 24 | pos_mult_ = this->layer_param_.softmax_param().pos_mult(); 25 | pos_cid_ = this->layer_param_.softmax_param().pos_cid(); 26 | 27 | LOG(INFO) << "mult: " << pos_mult_ << ", id: " << pos_cid_; 28 | 29 | has_ignore_label_ = 30 | this->layer_param_.loss_param().has_ignore_label(); 31 | if (has_ignore_label_) { 32 | ignore_label_ = this->layer_param_.loss_param().ignore_label(); 33 | } 34 | normalize_ = this->layer_param_.loss_param().normalize(); 35 | } 36 | 37 | template 38 | void WeightedSoftmaxWithLossLayer::Reshape( 39 | const vector*>& bottom, const vector*>& top) { 40 | LossLayer::Reshape(bottom, top); 41 | softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_); 42 | softmax_axis_ = 43 | bottom[0]->CanonicalAxisIndex(this->layer_param_.softmax_param().axis()); 44 | outer_num_ = bottom[0]->count(0, softmax_axis_); 45 | inner_num_ = bottom[0]->count(softmax_axis_ + 1); 46 | LOG(INFO) << "outer_num_: " << outer_num_ << ", inner_num_: " << inner_num_; 47 | 48 | CHECK_EQ(outer_num_ * inner_num_, bottom[1]->count()) 49 | << "Number of labels must match number of predictions; " 50 | << "e.g., if softmax axis == 1 and prediction shape is (N, C, H, W), " 51 | << "label count (number of labels) must be N*H*W, " 52 | << "with integer values in {0, 1, ..., C-1}."; 53 | if (top.size() >= 2) { 54 | // softmax output 55 | top[1]->ReshapeLike(*bottom[0]); 56 | } 57 | } 58 | 59 | template 60 | void WeightedSoftmaxWithLossLayer::Forward_cpu( 61 | const vector*>& bottom, const vector*>& top) { 62 | // The forward pass computes the softmax prob values. 63 | softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); 64 | const Dtype* prob_data = prob_.cpu_data(); 65 | const Dtype* label = bottom[1]->cpu_data(); 66 | 67 | int dim = prob_.count() / outer_num_; 68 | int count = 0; 69 | Dtype loss = 0; 70 | LOG(INFO) << "dim:" << dim; 71 | 72 | for (int i = 0; i < outer_num_; ++i) { 73 | for (int j = 0; j < inner_num_; j++) { 74 | const int label_value = static_cast(label[i * inner_num_ + j]); 75 | if (has_ignore_label_ && label_value == ignore_label_) { 76 | continue; 77 | } 78 | DCHECK_GE(label_value, 0); 79 | DCHECK_LT(label_value, prob_.shape(softmax_axis_)); 80 | Dtype w = (label_value == pos_cid_) ? pos_mult_ : 1; 81 | loss -= w * log(std::max(prob_data[i * dim + label_value * inner_num_ + j], 82 | Dtype(FLT_MIN))); 83 | ++count; 84 | } 85 | } 86 | if (normalize_) { 87 | top[0]->mutable_cpu_data()[0] = loss / count; 88 | } else { 89 | top[0]->mutable_cpu_data()[0] = loss / outer_num_; 90 | } 91 | if (top.size() == 2) { 92 | top[1]->ShareData(prob_); 93 | } 94 | } 95 | 96 | template 97 | void WeightedSoftmaxWithLossLayer::Backward_cpu(const vector*>& top, 98 | const vector& propagate_down, const vector*>& bottom) { 99 | if (propagate_down[1]) { 100 | LOG(FATAL) << this->type() 101 | << " Layer cannot backpropagate to label inputs."; 102 | } 103 | if (propagate_down[0]) { 104 | Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); 105 | const Dtype* prob_data = prob_.cpu_data(); 106 | caffe_copy(prob_.count(), prob_data, bottom_diff); 107 | const Dtype* label = bottom[1]->cpu_data(); 108 | 109 | int dim = prob_.count() / outer_num_; 110 | 111 | int count = 0; 112 | for (int i = 0; i < outer_num_; ++i) { 113 | for (int j = 0; j < inner_num_; ++j) { 114 | const int label_value = static_cast(label[i * inner_num_ + j]); 115 | if (has_ignore_label_ && label_value == ignore_label_) { 116 | for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) { 117 | bottom_diff[i * dim + c * inner_num_ + j] = 0; 118 | } 119 | } else { 120 | bottom_diff[i * dim + label_value * inner_num_ + j] -= 1; 121 | Dtype w = (label_value == pos_cid_) ? pos_mult_ : 1; 122 | for (int k = 0; k < dim; ++k) { 123 | bottom_diff[i * dim + k * inner_num_ + j] *= w; 124 | } 125 | ++count; 126 | } 127 | } 128 | } 129 | // Scale gradient 130 | const Dtype loss_weight = top[0]->cpu_diff()[0]; 131 | if (normalize_) { 132 | caffe_scal(prob_.count(), loss_weight / count, bottom_diff); 133 | } else { 134 | caffe_scal(prob_.count(), loss_weight / outer_num_, bottom_diff); 135 | } 136 | } 137 | } 138 | 139 | 140 | #ifdef CPU_ONLY 141 | STUB_GPU(WeightedSoftmaxWithLossLayer); 142 | #endif 143 | 144 | INSTANTIATE_CLASS(WeightedSoftmaxWithLossLayer); 145 | REGISTER_LAYER_CLASS(WeightedSoftmaxWithLoss); 146 | 147 | } // namespace caffe 148 | -------------------------------------------------------------------------------- /weighted_softmax_loss_layer.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "caffe/layer.hpp" 6 | #include "caffe/util/math_functions.hpp" 7 | #include "caffe/vision_layers.hpp" 8 | 9 | namespace caffe { 10 | 11 | template 12 | __global__ void WeightedSoftmaxLossForwardGPU(const int nthreads, 13 | const Dtype* prob_data, const Dtype* label, Dtype* loss, 14 | const Dtype pos_mult_, const int pos_cid_, 15 | const int num, const int dim, const int spatial_dim, 16 | const bool has_ignore_label_, const int ignore_label_, 17 | Dtype* counts) { 18 | CUDA_KERNEL_LOOP(index, nthreads) { 19 | const int n = index / spatial_dim; 20 | const int s = index % spatial_dim; 21 | const int label_value = static_cast(label[n * spatial_dim + s]); 22 | Dtype w = (label_value == pos_cid_) ? pos_mult_ : 1; 23 | if (has_ignore_label_ && label_value == ignore_label_) { 24 | loss[index] = 0; 25 | counts[index] = 0; 26 | } else { 27 | loss[index] = -w * log(max(prob_data[n * dim + label_value * spatial_dim + s], 28 | Dtype(FLT_MIN))); 29 | counts[index] = 1; 30 | } 31 | } 32 | } 33 | 34 | template 35 | void WeightedSoftmaxWithLossLayer::Forward_gpu( 36 | const vector*>& bottom, const vector*>& top) { 37 | softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); 38 | const Dtype* prob_data = prob_.gpu_data(); 39 | const Dtype* label = bottom[1]->gpu_data(); 40 | 41 | const int dim = prob_.count() / outer_num_; 42 | const int nthreads = outer_num_ * inner_num_; 43 | // Since this memory is not used for anything until it is overwritten 44 | // on the backward pass, we use it here to avoid having to allocate new GPU 45 | // memory to accumulate intermediate results in the kernel. 46 | Dtype* loss_data = bottom[0]->mutable_gpu_diff(); 47 | // Similarly, this memory is never used elsewhere, and thus we can use it 48 | // to avoid having to allocate additional GPU memory. 49 | Dtype* counts = prob_.mutable_gpu_diff(); 50 | // NOLINT_NEXT_LINE(whitespace/operators) 51 | WeightedSoftmaxLossForwardGPU<<>>(nthreads, prob_data, label, loss_data, 53 | pos_mult_, pos_cid_, 54 | outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts); 55 | Dtype loss; 56 | caffe_gpu_asum(nthreads, loss_data, &loss); 57 | if (normalize_) { 58 | Dtype count; 59 | caffe_gpu_asum(nthreads, counts, &count); 60 | loss /= count; 61 | } else { 62 | loss /= outer_num_; 63 | } 64 | top[0]->mutable_cpu_data()[0] = loss; 65 | if (top.size() == 2) { 66 | top[1]->ShareData(prob_); 67 | } 68 | } 69 | 70 | template 71 | __global__ void WeightedSoftmaxLossBackwardGPU(const int nthreads, const Dtype* top, 72 | const Dtype* label, Dtype* bottom_diff, 73 | const Dtype pos_mult_, const int pos_cid_, 74 | const int num, const int dim, 75 | const int spatial_dim, const bool has_ignore_label_, 76 | const int ignore_label_, Dtype* counts) { 77 | const int channels = dim / spatial_dim; 78 | 79 | CUDA_KERNEL_LOOP(index, nthreads) { 80 | const int n = index / spatial_dim; 81 | const int s = index % spatial_dim; 82 | const int label_value = static_cast(label[n * spatial_dim + s]); 83 | Dtype w = (label_value == pos_cid_) ? pos_mult_ : 1; 84 | 85 | if (has_ignore_label_ && label_value == ignore_label_) { 86 | for (int c = 0; c < channels; ++c) { 87 | bottom_diff[n * dim + c * spatial_dim + s] = 0; 88 | } 89 | counts[index] = 0; 90 | } else { 91 | bottom_diff[n * dim + label_value * spatial_dim + s] -= 1; 92 | counts[index] = 1; 93 | for (int c = 0; c < channels; ++c) { 94 | bottom_diff[n * dim + c * spatial_dim + s] *= w; 95 | } 96 | } 97 | } 98 | } 99 | 100 | template 101 | void WeightedSoftmaxWithLossLayer::Backward_gpu(const vector*>& top, 102 | const vector& propagate_down, const vector*>& bottom) { 103 | if (propagate_down[1]) { 104 | LOG(FATAL) << this->type() 105 | << " Layer cannot backpropagate to label inputs."; 106 | } 107 | if (propagate_down[0]) { 108 | Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); 109 | const Dtype* prob_data = prob_.gpu_data(); 110 | const Dtype* top_data = top[0]->gpu_data(); 111 | caffe_gpu_memcpy(prob_.count() * sizeof(Dtype), prob_data, bottom_diff); 112 | const Dtype* label = bottom[1]->gpu_data(); 113 | 114 | const int dim = prob_.count() / outer_num_; 115 | const int nthreads = outer_num_ * inner_num_; 116 | // Since this memory is never used for anything else, 117 | // we use to to avoid allocating new GPU memory. 118 | Dtype* counts = prob_.mutable_gpu_diff(); 119 | // NOLINT_NEXT_LINE(whitespace/operators) 120 | WeightedSoftmaxLossBackwardGPU<<>>(nthreads, top_data, label, bottom_diff, 122 | pos_mult_, pos_cid_, 123 | outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts); 124 | const Dtype loss_weight = top[0]->cpu_diff()[0]; 125 | if (normalize_) { 126 | Dtype count; 127 | caffe_gpu_asum(nthreads, counts, &count); 128 | caffe_gpu_scal(prob_.count(), loss_weight / count, bottom_diff); 129 | } else { 130 | caffe_gpu_scal(prob_.count(), loss_weight / outer_num_, bottom_diff); 131 | } 132 | } 133 | } 134 | 135 | INSTANTIATE_LAYER_GPU_FUNCS(WeightedSoftmaxWithLossLayer); 136 | 137 | } // namespace caffe 138 | --------------------------------------------------------------------------------