├── .gitmodules ├── .gitignore ├── train.py ├── forward_all.py ├── lib ├── pylayer.py ├── balance_cross_entropy_loss_layer.cu ├── balance_cross_entropy_loss_layer.hpp ├── balance_cross_entropy_loss_layer.cpp └── test_balance_cross_entropy_loss_layer.cpp ├── README.md └── model ├── hed.py ├── rcf.py ├── h1.py └── h2.py /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "caffe"] 2 | path = caffe 3 | url = https://github.com/bvlc/caffe 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.caffemodel 2 | *.solverstate 3 | *.pyc 4 | data/ 5 | *~ 6 | *.pt 7 | *.prototxt 8 | *.log 9 | .ipynb_checkpoints/ 10 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import sys, os, argparse 4 | from os.path import isfile, join, isdir 5 | sys.path.insert(0, 'model') 6 | sys.path.insert(0, 'lib') 7 | import caffe 8 | parser = argparse.ArgumentParser(description='Training hed.') 9 | parser.add_argument('--gpu', type=int, help='gpu ID', default=0) 10 | parser.add_argument('--solver', type=str, help='solver', default='model/hed_solver.pt') 11 | parser.add_argument('--weights', type=str, help='base model', default='model/vgg16convs.caffemodel') 12 | parser.add_argument('--caffe', type=str, help='base model', default='caffe') 13 | args = parser.parse_args() 14 | sys.path.insert(0, join(args.caffe, 'python')) 15 | assert isfile(args.weights) and isfile(args.solver) 16 | caffe.set_mode_gpu() 17 | caffe.set_device(args.gpu) 18 | if not isdir('snapshot'): 19 | os.makedirs('snapshot') 20 | solver = caffe.SGDSolver(args.solver) 21 | solver.net.copy_from(args.weights) 22 | for p in solver.net.params: 23 | param = solver.net.params[p] 24 | for i in range(len(param)): 25 | print p, "param[%d]: mean=%.5f, std=%.5f"%(i, solver.net.params[p][i].data.mean(), \ 26 | solver.net.params[p][0].data.mean()) 27 | solver.solve() 28 | 29 | -------------------------------------------------------------------------------- /forward_all.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | import scipy.misc 4 | import cv2 5 | import scipy.io 6 | import os, sys, argparse 7 | from os.path import join, splitext, split, isfile 8 | parser = argparse.ArgumentParser(description='Forward all testing images.') 9 | parser.add_argument('--model', type=str, default='snapshot/hed_pretrained_bsds.caffemodel') 10 | parser.add_argument('--net', type=str, default='model/hed_test.pt') 11 | parser.add_argument('--output', type=str, default='sigmoid_fuse') # output field 12 | parser.add_argument('--gpu', type=int, default=0) 13 | parser.add_argument('--ms', type=bool, default=True) # Using multiscale 14 | parser.add_argument('--savemat', type=bool, default=False) # whether save .mat 15 | args = parser.parse_args() 16 | sys.path.insert(0, 'caffe/python') 17 | import caffe 18 | def forward(data): 19 | assert data.ndim == 3 20 | data -= np.array((104.00698793,116.66876762,122.67891434)) 21 | data = data.transpose((2, 0, 1)) 22 | net.blobs['data'].reshape(1, *data.shape) 23 | net.blobs['data'].data[...] = data 24 | return net.forward() 25 | assert isfile(args.model) and isfile(args.net), 'file not exists' 26 | caffe.set_mode_gpu() 27 | caffe.set_device(args.gpu) 28 | 29 | net = caffe.Net(args.net, args.model, caffe.TEST) 30 | test_dir = 'data/HED-BSDS/test/' # test images directory 31 | save_dir = join('data/edge-results/', splitext(split(args.model)[1])[0]) # directory to save results 32 | if args.ms: 33 | save_dir = save_dir + '_multiscale' 34 | if not os.path.exists(save_dir): 35 | os.makedirs(save_dir) 36 | imgs = [i for i in os.listdir(test_dir) if '.jpg' in i] 37 | nimgs = len(imgs) 38 | print "totally "+str(nimgs)+"images" 39 | for i in range(nimgs): 40 | img = imgs[i] 41 | img = cv2.imread(join(test_dir, img)).astype(np.float32) 42 | if img.ndim == 2: 43 | img = img[:, :, np.newaxis] 44 | img = np.repeat(img, 3, 2) 45 | h, w, _ = img.shape 46 | edge = np.zeros((h, w), np.float32) 47 | if args.ms: 48 | scales = [0.5, 1, 1.5] 49 | else: 50 | scales = [1] 51 | for s in scales: 52 | h1, w1 = int(s * h), int(s * w) 53 | img1 = cv2.resize(img, (w1, h1), interpolation=cv2.INTER_CUBIC).astype(np.float32) 54 | edge1 = np.squeeze(forward(img1)[args.output][0, 0, :, :]) 55 | edge += cv2.resize(edge1, (w, h), interpolation=cv2.INTER_CUBIC).astype(np.float32) 56 | edge /= len(scales) 57 | fn, ext = splitext(imgs[i]) 58 | if args.savemat: 59 | scipy.io.savemat(join(save_dir, fn),dict({'edge': edge / edge.max()}),appendmat=True) 60 | scipy.misc.imsave(join(save_dir, fn+'.png'), edge / edge.max()) 61 | print "Saving to '" + join(save_dir, imgs[i][0:-4]) + "', Processing %d of %d..."%(i + 1, nimgs) 62 | 63 | -------------------------------------------------------------------------------- /lib/pylayer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Code written by KAI ZHAO(http://kaiz.xyz) 4 | import caffe 5 | import numpy as np 6 | from os.path import join, isfile 7 | import random, cv2 8 | 9 | class ImageLabelmapDataLayer(caffe.Layer): 10 | """ 11 | Python data layer 12 | """ 13 | def setup(self, bottom, top): 14 | params = eval(self.param_str) 15 | self.root = params['root'] 16 | self.source = params['source'] 17 | self.shuffle = bool(params['shuffle']) 18 | self.mean = np.array(params['mean'], dtype=np.float32) 19 | assert self.mean.size == 1 or self.mean.size == 3, "mean.size != 1 and mean.size != 3" 20 | if params.has_key('ignore_label'): 21 | self.ignore_label = np.float32(params['ignore_label']) 22 | else: 23 | self.ignore_label = None 24 | with open(join(self.root, self.source), 'r') as f: 25 | self.filelist = f.readlines() 26 | if self.shuffle: 27 | random.shuffle(self.filelist) 28 | self.idx = 0 29 | top[0].reshape(1, 3, 100, 100) # img 30 | top[1].reshape(1, 1, 100, 100) # lb 31 | 32 | def reshape(self, bottom, top): 33 | """ 34 | Will reshape in forward() 35 | """ 36 | 37 | def forward(self, bottom, top): 38 | """ 39 | Load data 40 | """ 41 | [imgfn, lbfn] = self.filelist[self.idx].split() 42 | [imgfn, lbfn] = join(self.root, imgfn), join(self.root, lbfn) 43 | assert isfile(imgfn) and isfile(lbfn), "File not exists!" 44 | img = cv2.imread(imgfn).astype(np.float32) 45 | lb = cv2.imread(lbfn, 0).astype(np.float32) 46 | if img.ndim == 2: 47 | img = img[:,:,np.newaxis] 48 | img = np.repeat(img, 3, 2) 49 | img -= self.mean 50 | img = np.transpose(img, (2, 0, 1)) 51 | img = img[np.newaxis, :, :, :] 52 | assert lb.ndim == 2, "lb.ndim = %d"%lb.ndim 53 | h, w = lb.shape 54 | assert img.shape[2] == h and img.shape[3] == w, "Image and GT shape mismatch." 55 | lb = lb[np.newaxis, np.newaxis, :, :] 56 | thres = 125 57 | if self.ignore_label: 58 | lb[np.logical_and(lb < thres, lb != 0)] = self.ignore_label 59 | lb[lb >= thres] = 1 60 | else: 61 | lb[lb < thres] = 0 62 | lb[lb != 0] = 1 63 | if np.count_nonzero(lb) == 0: 64 | print "Warning: all zero label map!" 65 | top[0].reshape(1, 3, h, w) 66 | top[1].reshape(1, 1, h, w) 67 | top[0].data[...] = img 68 | top[1].data[...] = lb 69 | if self.idx == len(self.filelist)-1: 70 | # we've reached the end, restart. 71 | print "Restarting data prefetching from start." 72 | random.shuffle(self.filelist) 73 | self.idx = 0 74 | else: 75 | self.idx = self.idx + 1 76 | 77 | def backward(self, top, propagate_down, bottom): 78 | """ 79 | Data layer doesn't need back propagate 80 | """ 81 | pass 82 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Reimplementation of [HED](https://github.com/s9xie/hed) based on official version of caffe 2 | 3 | ### For training: 4 | 1. Clone this code by `git clone https://github.com/zeakey/hed --recursive`, assume your source code directory is`$HED`; 5 | 6 | 2. Download [training data](http://vcl.ucsd.edu/hed/HED-BSDS.tar) from the [original](https://github.com/s9xie/hed) repo, and extract it to `$HED/data/`; 7 | 8 | 3. Build caffe with `bash $HED/build.sh`, this will copy reimplemented loss layer to caffe folder first; 9 | 10 | 4. Download [initial model](http://zhaok-data.oss-cn-shanghai.aliyuncs.com/caffe-model/vgg16convs.caffemodel) and put it 11 | into `$HED/model/`; 12 | 13 | 5. Generate network prototxts by `python model/hed.py`; 14 | 15 | 6. Start to train with `cd $HED && python train.py --gpu GPU-ID 2>&1 | tee hed.log`. 16 | 17 | ### For testing: 18 | 1. Download [pretrained model](http://data.kaiz.xyz/edges/my_hed_pretrained_bsds.caffemodel) `$HED/snapshot/`; 19 | 20 | 2. Generate testing network prototxt by `python $HED/model/hed.py`(will generate training network prototxt as well); 21 | 22 | 3. Run `cd $HED && python forward_all()`; 23 | 24 | ### Performance evaluation 25 | I achieved ODS=0.779 on [BSDS500](https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/resources.html) 26 | dataset, which is similar to HED's 0.78. Your can train your own model and evaluate using this 27 | [code](https://github.com/zeakey/edgeval). 28 | 29 | ### Pretrained models and detection results: 30 | | [Orig-HED](https://github.com/s9xie/hed) | [My-HED](https://github.com/zeakey/hed) | 31 | | ------------- | ------------- | 32 | | [Pretrained model](http://vcl.ucsd.edu/hed/hed_pretrained_bsds.caffemodel) | [Pretrained model](http://data.kaiz.xyz/edges/my_hed_pretrained_bsds.caffemodel) | 33 | | [BSDS results](http://data.kaiz.xyz/edges/detection_results/hed_pretrained_bsds.tar) | [BSDS results](http://data.kaiz.xyz/edges/detection_results/my_hed_pretrained_bsds.tar) | 34 | | [Evaluation results](http://vcl.ucsd.edu/hed/eval_results.tar) | [Evaluation results](http://data.kaiz.xyz/edges/my_hed_pretrained_bsds-eval.tar) | 35 | 36 | All detection results on the BSDS500 testing set and the pretrained models are provided. 37 | For example, the detected results of '3063.jpg' by the original [HED](https://github.com/s9xie/hed) and my 38 | implementation are shown below: 39 | 40 | 41 | 42 | ![](http://data.kaiz.xyz/edges/detection_results/hed_pretrained_bsds/3063.png?x-oss-process=image/auto-orient,1/resize,h_250) 43 | 44 | 45 | 46 | ![](http://data.kaiz.xyz/edges/detection_results/my_hed_bsds/3063.png?x-oss-process=image/auto-orient,1/resize,h_250) 47 | 48 | You can preview results of all other images by replacing the filename in the above url. 49 | ___ 50 | By [KAI ZHAO](http://kaiz.xyz) 51 | 52 | -------------------------------------------------------------------------------- /lib/balance_cross_entropy_loss_layer.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "caffe/layers/balance_cross_entropy_loss_layer.hpp" 4 | #include "caffe/util/math_functions.hpp" 5 | 6 | namespace caffe { 7 | 8 | 9 | template 10 | __global__ void BalanceCrossEntropyLossForwardGPU(const int nthreads, 11 | const Dtype* input_data, const Dtype* target, Dtype* loss, 12 | const bool has_ignore_label_, const int ignore_label_, 13 | Dtype* counts) { 14 | CUDA_KERNEL_LOOP(i, nthreads) { 15 | const int target_value = static_cast(target[i]); 16 | if (has_ignore_label_ && target_value == ignore_label_) { 17 | loss[i] = 0; 18 | counts[i] = 0; 19 | } else { 20 | loss[i] = input_data[i] * (target[i] - (input_data[i] >= 0)) - 21 | log(1 + exp(input_data[i] - 2 * input_data[i] * 22 | (input_data[i] >= 0))); 23 | counts[i] = 1; 24 | } 25 | } 26 | } 27 | 28 | template 29 | __global__ void BalanceCrossEntropyLossIgnoreDiffGPU(const int count, 30 | const int ignore_label, const Dtype* target, Dtype* diff) { 31 | CUDA_KERNEL_LOOP(i, count) { 32 | const int target_value = static_cast(target[i]); 33 | if (target_value == ignore_label) { 34 | diff[i] = 0; 35 | } 36 | } 37 | } 38 | 39 | 40 | template 41 | void BalanceCrossEntropyLossLayer::Forward_gpu( 42 | const vector*>& bottom, const vector*>& top) { 43 | Forward_cpu(bottom, top); return; 44 | // The forward pass computes the sigmoid outputs. 45 | sigmoid_bottom_vec_[0] = bottom[0]; 46 | sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_); 47 | // Compute the loss (negative log likelihood) 48 | const int count = bottom[0]->count(); 49 | // Stable version of loss computation from input data 50 | const Dtype* input_data = bottom[0]->gpu_data(); 51 | const Dtype* target = bottom[1]->gpu_data(); 52 | // Since this memory is not used for anything until it is overwritten 53 | // on the backward pass, we use it here to avoid having to allocate new GPU 54 | // memory to accumulate intermediate results in the kernel. 55 | Dtype* loss_data = bottom[0]->mutable_gpu_diff(); 56 | Dtype* count_data = bottom[1]->mutable_gpu_diff(); 57 | Dtype valid_count; 58 | // NOLINT_NEXT_LINE(whitespace/operators) 59 | BalanceCrossEntropyLossForwardGPU<<>>(count, input_data, target, loss_data, 61 | has_ignore_label_, ignore_label_, count_data); 62 | // Only launch another CUDA kernel if we actually need the valid count. 63 | if (normalization_ == LossParameter_NormalizationMode_VALID && 64 | has_ignore_label_) { 65 | caffe_gpu_asum(count, count_data, &valid_count); 66 | } else { 67 | valid_count = count; 68 | } 69 | Dtype loss; 70 | caffe_gpu_asum(count, loss_data, &loss); 71 | normalizer_ = get_normalizer(normalization_, valid_count); 72 | top[0]->mutable_cpu_data()[0] = loss / normalizer_; 73 | } 74 | 75 | template 76 | void BalanceCrossEntropyLossLayer::Backward_gpu( 77 | const vector*>& top, const vector& propagate_down, 78 | const vector*>& bottom) { 79 | Backward_cpu(top, propagate_down, bottom); return; 80 | if (propagate_down[1]) { 81 | LOG(FATAL) << this->type() 82 | << " Layer cannot backpropagate to label inputs."; 83 | } 84 | if (propagate_down[0]) { 85 | // First, compute the diff 86 | const int count = bottom[0]->count(); 87 | const Dtype* sigmoid_output_data = sigmoid_output_->gpu_data(); 88 | const Dtype* target = bottom[1]->gpu_data(); 89 | Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); 90 | caffe_copy(count, sigmoid_output_data, bottom_diff); 91 | caffe_gpu_axpy(count, Dtype(-1), target, bottom_diff); 92 | // Zero out gradient of ignored targets. 93 | if (has_ignore_label_) { 94 | // NOLINT_NEXT_LINE(whitespace/operators) 95 | BalanceCrossEntropyLossIgnoreDiffGPU<<>>(count, ignore_label_, target, bottom_diff); 97 | } 98 | // Scale down gradient 99 | Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer_; 100 | caffe_gpu_scal(count, loss_weight, bottom_diff); 101 | } 102 | } 103 | 104 | INSTANTIATE_LAYER_GPU_FUNCS(BalanceCrossEntropyLossLayer); 105 | 106 | } // namespace caffe 107 | -------------------------------------------------------------------------------- /lib/balance_cross_entropy_loss_layer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CAFFE_BALANCE_CROSS_ENTROPY_LOSS_LAYER_HPP_ 2 | #define CAFFE_BALANCE_CROSS_ENTROPY_LOSS_LAYER_HPP_ 3 | 4 | #include 5 | 6 | #include "caffe/blob.hpp" 7 | #include "caffe/layer.hpp" 8 | #include "caffe/proto/caffe.pb.h" 9 | 10 | #include "caffe/layers/loss_layer.hpp" 11 | #include "caffe/layers/sigmoid_layer.hpp" 12 | 13 | namespace caffe { 14 | 15 | /** 16 | * @brief Computes the cross-entropy (logistic) loss @f$ 17 | * E = \frac{-1}{n} \sum\limits_{n=1}^N \left[ 18 | * p_n \log \hat{p}_n + 19 | * (1 - p_n) \log(1 - \hat{p}_n) 20 | * \right] 21 | * @f$, often used for predicting targets interpreted as probabilities. 22 | * 23 | * This layer is implemented rather than separate 24 | * SigmoidLayer + CrossEntropyLayer 25 | * as its gradient computation is more numerically stable. 26 | * At test time, this layer can be replaced simply by a SigmoidLayer. 27 | * 28 | * @param bottom input Blob vector (length 2) 29 | * -# @f$ (N \times C \times H \times W) @f$ 30 | * the scores @f$ x \in [-\infty, +\infty]@f$, 31 | * which this layer maps to probability predictions 32 | * @f$ \hat{p}_n = \sigma(x_n) \in [0, 1] @f$ 33 | * using the sigmoid function @f$ \sigma(.) @f$ (see SigmoidLayer). 34 | * -# @f$ (N \times C \times H \times W) @f$ 35 | * the targets @f$ y \in [0, 1] @f$ 36 | * @param top output Blob vector (length 1) 37 | * -# @f$ (1 \times 1 \times 1 \times 1) @f$ 38 | * the computed cross-entropy loss: @f$ 39 | * E = \frac{-1}{n} \sum\limits_{n=1}^N \left[ 40 | * p_n \log \hat{p}_n + (1 - p_n) \log(1 - \hat{p}_n) 41 | * \right] 42 | * @f$ 43 | */ 44 | template 45 | class BalanceCrossEntropyLossLayer : public LossLayer { 46 | public: 47 | explicit BalanceCrossEntropyLossLayer(const LayerParameter& param) 48 | : LossLayer(param), 49 | sigmoid_layer_(new SigmoidLayer(param)), 50 | sigmoid_output_(new Blob()) {} 51 | virtual void LayerSetUp(const vector*>& bottom, 52 | const vector*>& top); 53 | virtual void Reshape(const vector*>& bottom, 54 | const vector*>& top); 55 | 56 | virtual inline const char* type() const { return "BalanceCrossEntropyLoss"; } 57 | 58 | protected: 59 | /// @copydoc BalanceCrossEntropyLossLayer 60 | virtual void Forward_cpu(const vector*>& bottom, 61 | const vector*>& top); 62 | virtual void Forward_gpu(const vector*>& bottom, 63 | const vector*>& top); 64 | 65 | /** 66 | * @brief Computes the sigmoid cross-entropy loss error gradient w.r.t. the 67 | * predictions. 68 | * 69 | * Gradients cannot be computed with respect to the target inputs (bottom[1]), 70 | * so this method ignores bottom[1] and requires !propagate_down[1], crashing 71 | * if propagate_down[1] is set. 72 | * 73 | * @param top output Blob vector (length 1), providing the error gradient with 74 | * respect to the outputs 75 | * -# @f$ (1 \times 1 \times 1 \times 1) @f$ 76 | * This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$, 77 | * as @f$ \lambda @f$ is the coefficient of this layer's output 78 | * @f$\ell_i@f$ in the overall Net loss 79 | * @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence 80 | * @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$. 81 | * (*Assuming that this top Blob is not used as a bottom (input) by any 82 | * other layer of the Net.) 83 | * @param propagate_down see Layer::Backward. 84 | * propagate_down[1] must be false as gradient computation with respect 85 | * to the targets is not implemented. 86 | * @param bottom input Blob vector (length 2) 87 | * -# @f$ (N \times C \times H \times W) @f$ 88 | * the predictions @f$x@f$; Backward computes diff 89 | * @f$ \frac{\partial E}{\partial x} = 90 | * \frac{1}{n} \sum\limits_{n=1}^N (\hat{p}_n - p_n) 91 | * @f$ 92 | * -# @f$ (N \times 1 \times 1 \times 1) @f$ 93 | * the labels -- ignored as we can't compute their error gradients 94 | */ 95 | virtual void Backward_cpu(const vector*>& top, 96 | const vector& propagate_down, const vector*>& bottom); 97 | virtual void Backward_gpu(const vector*>& top, 98 | const vector& propagate_down, const vector*>& bottom); 99 | 100 | /// Read the normalization mode parameter and compute the normalizer based 101 | /// on the blob size. If normalization_mode is VALID, the count of valid 102 | /// outputs will be read from valid_count, unless it is -1 in which case 103 | /// all outputs are assumed to be valid. 104 | virtual Dtype get_normalizer( 105 | LossParameter_NormalizationMode normalization_mode, int valid_count); 106 | 107 | /// The internal SigmoidLayer used to map predictions to probabilities. 108 | shared_ptr > sigmoid_layer_; 109 | /// sigmoid_output stores the output of the SigmoidLayer. 110 | shared_ptr > sigmoid_output_; 111 | /// bottom vector holder to call the underlying SigmoidLayer::Forward 112 | vector*> sigmoid_bottom_vec_; 113 | /// top vector holder to call the underlying SigmoidLayer::Forward 114 | vector*> sigmoid_top_vec_; 115 | 116 | /// Whether to ignore instances with a certain label. 117 | bool has_ignore_label_; 118 | /// The label indicating that an instance should be ignored. 119 | int ignore_label_; 120 | /// How to normalize the loss. 121 | LossParameter_NormalizationMode normalization_; 122 | Dtype normalizer_; 123 | int outer_num_, inner_num_; 124 | std::vector count_; 125 | }; 126 | 127 | } // namespace caffe 128 | 129 | #endif // CAFFE_BALANCE_CROSS_ENTROPY_LOSS_LAYER_HPP_ 130 | -------------------------------------------------------------------------------- /lib/balance_cross_entropy_loss_layer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "caffe/layers/balance_cross_entropy_loss_layer.hpp" 5 | #include "caffe/util/math_functions.hpp" 6 | 7 | namespace caffe { 8 | 9 | template 10 | void BalanceCrossEntropyLossLayer::LayerSetUp( 11 | const vector*>& bottom, const vector*>& top) { 12 | LossLayer::LayerSetUp(bottom, top); 13 | sigmoid_bottom_vec_.clear(); 14 | sigmoid_bottom_vec_.push_back(bottom[0]); 15 | sigmoid_top_vec_.clear(); 16 | sigmoid_top_vec_.push_back(sigmoid_output_.get()); 17 | sigmoid_layer_->SetUp(sigmoid_bottom_vec_, sigmoid_top_vec_); 18 | count_.resize(2); 19 | has_ignore_label_ = 20 | this->layer_param_.loss_param().has_ignore_label(); 21 | if (has_ignore_label_) { 22 | ignore_label_ = this->layer_param_.loss_param().ignore_label(); 23 | } 24 | if (this->layer_param_.loss_param().has_normalization()) { 25 | normalization_ = this->layer_param_.loss_param().normalization(); 26 | } else if (this->layer_param_.loss_param().has_normalize()) { 27 | normalization_ = this->layer_param_.loss_param().normalize() ? 28 | LossParameter_NormalizationMode_VALID : 29 | LossParameter_NormalizationMode_BATCH_SIZE; 30 | } else { 31 | normalization_ = LossParameter_NormalizationMode_BATCH_SIZE; 32 | } 33 | } 34 | 35 | template 36 | void BalanceCrossEntropyLossLayer::Reshape( 37 | const vector*>& bottom, const vector*>& top) { 38 | LossLayer::Reshape(bottom, top); 39 | outer_num_ = bottom[0]->shape(0); // batch size 40 | inner_num_ = bottom[0]->count(1); // instance size: |output| == |target| 41 | CHECK_EQ(bottom[0]->count(), bottom[1]->count()) << 42 | "BALANCE_CROSS_ENTROPY_LOSS layer inputs must have the same count."; 43 | sigmoid_layer_->Reshape(sigmoid_bottom_vec_, sigmoid_top_vec_); 44 | } 45 | 46 | template 47 | Dtype BalanceCrossEntropyLossLayer::get_normalizer( 48 | LossParameter_NormalizationMode normalization_mode, int valid_count) { 49 | Dtype normalizer; 50 | switch (normalization_mode) { 51 | case LossParameter_NormalizationMode_FULL: 52 | normalizer = Dtype(outer_num_ * inner_num_); 53 | break; 54 | case LossParameter_NormalizationMode_VALID: 55 | if (valid_count == -1) { 56 | normalizer = Dtype(outer_num_ * inner_num_); 57 | } else { 58 | normalizer = Dtype(valid_count); 59 | } 60 | break; 61 | case LossParameter_NormalizationMode_BATCH_SIZE: 62 | normalizer = Dtype(outer_num_); 63 | break; 64 | case LossParameter_NormalizationMode_NONE: 65 | normalizer = Dtype(1); 66 | break; 67 | default: 68 | LOG(FATAL) << "Unknown normalization mode: " 69 | << LossParameter_NormalizationMode_Name(normalization_mode); 70 | } 71 | // Some users will have no labels for some examples in order to 'turn off' a 72 | // particular loss in a multi-task setup. The max prevents NaNs in that case. 73 | return std::max(Dtype(1.0), normalizer); 74 | } 75 | 76 | template 77 | void BalanceCrossEntropyLossLayer::Forward_cpu( 78 | const vector*>& bottom, const vector*>& top) { 79 | // The forward pass computes the sigmoid outputs. 80 | sigmoid_bottom_vec_[0] = bottom[0]; 81 | sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_); 82 | // Compute the loss (negative log likelihood) 83 | // Stable version of loss computation from input data 84 | const Dtype* input_data = bottom[0]->cpu_data(); 85 | const Dtype* target = bottom[1]->cpu_data(); 86 | Dtype loss = 0, loss_pos = 0, loss_neg = 0; 87 | count_[0] = 0; count_[1] = 0; 88 | for (int i = 0; i < bottom[0]->count(); ++i) { 89 | const int target_value = static_cast(target[i]); 90 | if (has_ignore_label_ && target_value == ignore_label_) { 91 | continue; 92 | } 93 | if (target[i] == 1) { 94 | loss_pos -= input_data[i] * (target[i] - (input_data[i] >= 0)) - 95 | log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0))); 96 | count_[1]++; 97 | } else if (target[i] == 0) { 98 | loss_neg -= input_data[i] * (target[i] - (input_data[i] >= 0)) - 99 | log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0))); 100 | count_[0]++; 101 | } else { 102 | LOG(FATAL)<<"Unknown target value: " << target[i]; 103 | } 104 | } 105 | loss += loss_pos * count_[0] / (count_[0]+count_[1]); 106 | loss += loss_neg * count_[1] / (count_[0]+count_[1]); 107 | normalizer_ = get_normalizer(normalization_, count_[0]+count_[1]); 108 | top[0]->mutable_cpu_data()[0] = loss / normalizer_; 109 | } 110 | 111 | template 112 | void BalanceCrossEntropyLossLayer::Backward_cpu( 113 | const vector*>& top, const vector& propagate_down, 114 | const vector*>& bottom) { 115 | if (propagate_down[1]) { 116 | LOG(FATAL) << this->type() 117 | << " Layer cannot backpropagate to label inputs."; 118 | } 119 | if (propagate_down[0]) { 120 | // First, compute the diff 121 | const int count = bottom[0]->count(); 122 | const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data(); 123 | const Dtype* target = bottom[1]->cpu_data(); 124 | Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); 125 | caffe_sub(count, sigmoid_output_data, target, bottom_diff); 126 | for (int i = 0; i < count; ++i) { 127 | // Zero out gradient of ignored targets. 128 | if (has_ignore_label_ && target[i] == ignore_label_) { 129 | bottom_diff[i] = 0; continue; 130 | } 131 | if (target[i] == 0) { 132 | bottom_diff[i] *= count_[1] / (count_[0]+count_[1]); 133 | } else if (target[i] == 1) { 134 | bottom_diff[i] *= count_[0] / (count_[0]+count_[1]); 135 | } else { 136 | LOG(FATAL)<<"Unknown target value: "<cpu_diff()[0] / normalizer_; 141 | caffe_scal(count, loss_weight, bottom_diff); 142 | } 143 | } 144 | 145 | #ifdef CPU_ONLY 146 | STUB_GPU(BalanceCrossEntropyLossLayer); 147 | #endif 148 | 149 | INSTANTIATE_CLASS(BalanceCrossEntropyLossLayer); 150 | REGISTER_LAYER_CLASS(BalanceCrossEntropyLoss); 151 | 152 | } // namespace caffe 153 | -------------------------------------------------------------------------------- /model/hed.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.insert(0, 'caffe/python') 3 | import caffe 4 | from caffe import layers as L, params as P 5 | from caffe.coord_map import crop 6 | import numpy as np 7 | from math import ceil 8 | 9 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1, mult=[1,1,2,0]): 10 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 11 | num_output=nout, pad=pad, weight_filler=dict(type='xavier'), 12 | param=[dict(lr_mult=mult[0], decay_mult=mult[1]), dict(lr_mult=mult[2], decay_mult=mult[3])]) 13 | return conv, L.ReLU(conv, in_place=True) 14 | 15 | def max_pool(bottom, ks=2, stride=2): 16 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 17 | 18 | def conv1x1(bottom, name, lr=1): 19 | return L.Convolution(bottom, name=name, kernel_size=1,num_output=1,# weight_filler=dict(type='xavier'), 20 | param=[dict(lr_mult=0.01*lr, decay_mult=1), dict(lr_mult=0.02*lr, decay_mult=0)]) 21 | 22 | def upsample(bottom, stride): 23 | s, k, pad = stride, 2 * stride, int(ceil(stride-1)/2) 24 | name = "upsample%d"%s 25 | return L.Deconvolution(bottom, name=name, convolution_param=dict(num_output=1, 26 | kernel_size=k, stride=s, pad=pad, weight_filler = dict(type="bilinear")), 27 | param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) 28 | 29 | def net(split): 30 | n = caffe.NetSpec() 31 | if split=='train': 32 | data_params = dict(mean=(104.00699, 116.66877, 122.67892)) 33 | data_params['root'] = 'data/HED-BSDS' 34 | data_params['source'] = "train_pair.lst" 35 | data_params['shuffle'] = True 36 | data_params['ignore_label'] = -1 # ignore label 37 | n.data, n.label = L.Python(module='pylayer', layer='ImageLabelmapDataLayer', ntop=2, \ 38 | param_str=str(data_params)) 39 | loss_param = dict(normalize=False) 40 | if data_params.has_key('ignore_label'): 41 | loss_param['ignore_label'] = data_params['ignore_label'] 42 | elif split == 'test': 43 | n.data = L.Input(name = 'data', input_param=dict(shape=dict(dim=[1,3,500,500]))) 44 | else: 45 | raise Exception("Invalid phase") 46 | 47 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1) 48 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 49 | n.pool1 = max_pool(n.relu1_2) 50 | 51 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 52 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 53 | n.pool2 = max_pool(n.relu2_2) 54 | 55 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 56 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 57 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 58 | n.pool3 = max_pool(n.relu3_3) 59 | 60 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 61 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 62 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 63 | n.pool4 = max_pool(n.relu4_3) 64 | 65 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512, mult=[100,1,200,0]) 66 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, mult=[100,1,200,0]) 67 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, mult=[100,1,200,0]) 68 | 69 | # DSN1 70 | n.score_dsn1=conv1x1(n.conv1_2, 'score-dsn1', lr=1) 71 | n.upscore_dsn1 = crop(n.score_dsn1, n.data) 72 | if split=='train': 73 | n.loss1 = L.BalanceCrossEntropyLoss(n.upscore_dsn1, n.label, loss_param=loss_param) 74 | else: 75 | n.sigmoid_dsn1 = L.Sigmoid(n.upscore_dsn1) 76 | # DSN2 77 | n.score_dsn2 = conv1x1(n.conv2_2, 'score-dsn2') 78 | n.score_dsn2_up = upsample(n.score_dsn2, stride=2) 79 | n.upscore_dsn2 = crop(n.score_dsn2_up, n.data) 80 | if split=='train': 81 | n.loss2 = L.BalanceCrossEntropyLoss(n.upscore_dsn2, n.label, loss_param=loss_param) 82 | else: 83 | n.sigmoid_dsn2 = L.Sigmoid(n.upscore_dsn2) 84 | # DSN3 85 | n.score_dsn3=conv1x1(n.conv3_3, 'score-dsn3') 86 | n.score_dsn3_up = upsample(n.score_dsn3, stride=4) 87 | n.upscore_dsn3 = crop(n.score_dsn3_up, n.data) 88 | if split=='train': 89 | n.loss3 = L.BalanceCrossEntropyLoss(n.upscore_dsn3, n.label, loss_param=loss_param) 90 | else: 91 | n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3) 92 | # DSN4 93 | n.score_dsn4 = conv1x1(n.conv4_3, 'score-dsn4') 94 | n.score_dsn4_up = upsample(n.score_dsn4, stride=8) 95 | n.upscore_dsn4 = crop(n.score_dsn4_up, n.data) 96 | if split=='train': 97 | n.loss4 = L.BalanceCrossEntropyLoss(n.upscore_dsn4, n.label, loss_param=loss_param) 98 | else: 99 | n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4) 100 | # DSN5 101 | n.score_dsn5=conv1x1(n.conv5_3, 'score-dsn5') 102 | n.score_dsn5_up = upsample(n.score_dsn5, stride=16) 103 | n.upscore_dsn5 = crop(n.score_dsn5_up, n.data) 104 | if split=='train': 105 | n.loss5 = L.BalanceCrossEntropyLoss(n.upscore_dsn5, n.label, loss_param=loss_param) 106 | elif split=='test': 107 | n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5) 108 | # concat and fuse 109 | n.concat_upscore = L.Concat(n.upscore_dsn1, 110 | n.upscore_dsn2, 111 | n.upscore_dsn3, 112 | n.upscore_dsn4, 113 | n.upscore_dsn5, 114 | name='concat', concat_param=dict({'concat_dim':1})) 115 | n.upscore_fuse = L.Convolution(n.concat_upscore, name='new-score-weighting', 116 | num_output=1, kernel_size=1, 117 | param=[dict(lr_mult=0.001, decay_mult=1), dict(lr_mult=0.002, decay_mult=0)], 118 | weight_filler=dict(type='constant', value=0.2)) 119 | if split=='test': 120 | n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse) 121 | else: 122 | n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse, n.label, loss_param=loss_param) 123 | return n.to_proto() 124 | 125 | def make_net(): 126 | with open('model/hed_train.pt', 'w') as f: 127 | f.write(str(net('train'))) 128 | with open('model/hed_test.pt', 'w') as f: 129 | f.write(str(net('test'))) 130 | def make_solver(): 131 | sp = {} 132 | sp['net'] = '"model/hed_train.pt"' 133 | sp['base_lr'] = '0.000001' 134 | sp['lr_policy'] = '"step"' 135 | sp['momentum'] = '0.9' 136 | sp['weight_decay'] = '0.0002' 137 | sp['iter_size'] = '10' 138 | sp['stepsize'] = '10000' 139 | sp['display'] = '10' 140 | sp['snapshot'] = '2000' 141 | sp['snapshot_prefix'] = '"snapshot/hed"' 142 | sp['gamma'] = '0.1' 143 | sp['max_iter'] = '40000' 144 | sp['solver_mode'] = 'GPU' 145 | f = open('model/hed_solver.pt', 'w') 146 | for k, v in sorted(sp.items()): 147 | if not(type(v) is str): 148 | raise TypeError('All solver parameters must be strings') 149 | f.write('%s: %s\n'%(k, v)) 150 | f.close() 151 | 152 | def make_all(): 153 | make_net() 154 | make_solver() 155 | 156 | if __name__ == '__main__': 157 | make_all() 158 | -------------------------------------------------------------------------------- /lib/test_balance_cross_entropy_loss_layer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "gtest/gtest.h" 5 | 6 | #include "caffe/blob.hpp" 7 | #include "caffe/common.hpp" 8 | #include "caffe/filler.hpp" 9 | #include "caffe/layers/balance_cross_entropy_loss_layer.hpp" 10 | 11 | #include "caffe/test/test_caffe_main.hpp" 12 | #include "caffe/test/test_gradient_check_util.hpp" 13 | 14 | namespace caffe { 15 | 16 | template 17 | class BalanceCrossEntropyLossLayerTest : public MultiDeviceTest { 18 | typedef typename TypeParam::Dtype Dtype; 19 | 20 | protected: 21 | BalanceCrossEntropyLossLayerTest() 22 | : blob_bottom_data_(new Blob(1, 1, 3, 3)), 23 | blob_bottom_targets_(new Blob(1, 1, 3, 3)), 24 | blob_top_loss_(new Blob()) { 25 | // Fill the data vector 26 | FillerParameter data_filler_param; 27 | data_filler_param.set_std(1); 28 | GaussianFiller data_filler(data_filler_param); 29 | data_filler.Fill(blob_bottom_data_); 30 | blob_bottom_vec_.push_back(blob_bottom_data_); 31 | // Fill the targets vector 32 | FillerParameter targets_filler_param; 33 | targets_filler_param.set_min(0); 34 | targets_filler_param.set_max(1); 35 | UniformFiller targets_filler(targets_filler_param); 36 | targets_filler.Fill(blob_bottom_targets_); 37 | for (int i=0; icount(); ++i) { 38 | if (blob_bottom_targets_->cpu_data()[i] >= 0.5) 39 | blob_bottom_targets_->mutable_cpu_data()[i] = 1; 40 | else 41 | blob_bottom_targets_->mutable_cpu_data()[i] = 0; 42 | } 43 | blob_bottom_vec_.push_back(blob_bottom_targets_); 44 | blob_top_vec_.push_back(blob_top_loss_); 45 | } 46 | virtual ~BalanceCrossEntropyLossLayerTest() { 47 | delete blob_bottom_data_; 48 | delete blob_bottom_targets_; 49 | delete blob_top_loss_; 50 | } 51 | 52 | Dtype BalanceCrossEntropyLossReference(const int count, const int num, 53 | const Dtype* input, 54 | const Dtype* target) { 55 | Dtype loss = 0; 56 | for (int i = 0; i < count; ++i) { 57 | const Dtype prediction = 1 / (1 + exp(-input[i])); 58 | EXPECT_LE(prediction, 1); 59 | EXPECT_GE(prediction, 0); 60 | EXPECT_LE(target[i], 1); 61 | EXPECT_GE(target[i], 0); 62 | loss -= target[i] * log(prediction + (target[i] == Dtype(0))); 63 | loss -= (1 - target[i]) * log(1 - prediction + (target[i] == Dtype(1))); 64 | } 65 | return loss / num; 66 | } 67 | 68 | void TestForward() { 69 | LayerParameter layer_param; 70 | const Dtype kLossWeight = 3.7; 71 | layer_param.add_loss_weight(kLossWeight); 72 | FillerParameter data_filler_param; 73 | data_filler_param.set_std(1); 74 | GaussianFiller data_filler(data_filler_param); 75 | FillerParameter targets_filler_param; 76 | targets_filler_param.set_min(0.0); 77 | targets_filler_param.set_max(1.0); 78 | UniformFiller targets_filler(targets_filler_param); 79 | Dtype eps = 2e-2; 80 | for (int i = 0; i < 100; ++i) { 81 | // Fill the data vector 82 | data_filler.Fill(this->blob_bottom_data_); 83 | // Fill the targets vector 84 | targets_filler.Fill(this->blob_bottom_targets_); 85 | for (int k=0; kcount(); ++k){ 86 | if (blob_bottom_targets_->cpu_data()[k] <= 0.5) 87 | blob_bottom_targets_->mutable_cpu_data()[k] = 0; 88 | else 89 | blob_bottom_targets_->mutable_cpu_data()[k] = 1; 90 | } 91 | BalanceCrossEntropyLossLayer layer(layer_param); 92 | layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); 93 | Dtype layer_loss = 94 | layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); 95 | const int count = this->blob_bottom_data_->count(); 96 | const int num = this->blob_bottom_data_->num(); 97 | const Dtype* blob_bottom_data = this->blob_bottom_data_->cpu_data(); 98 | const Dtype* blob_bottom_targets = 99 | this->blob_bottom_targets_->cpu_data(); 100 | Dtype reference_loss = kLossWeight * BalanceCrossEntropyLossReference( 101 | count, num, blob_bottom_data, blob_bottom_targets); 102 | EXPECT_NEAR(reference_loss, layer_loss, eps) << "debug: trial #" << i; 103 | } 104 | } 105 | 106 | Blob* const blob_bottom_data_; 107 | Blob* const blob_bottom_targets_; 108 | Blob* const blob_top_loss_; 109 | vector*> blob_bottom_vec_; 110 | vector*> blob_top_vec_; 111 | }; 112 | 113 | TYPED_TEST_CASE(BalanceCrossEntropyLossLayerTest, TestDtypesAndDevices); 114 | 115 | // TYPED_TEST(BalanceCrossEntropyLossLayerTest, TestBalanceCrossEntropyLoss) { 116 | // this->TestForward(); 117 | // } 118 | 119 | TYPED_TEST(BalanceCrossEntropyLossLayerTest, TestGradient) { 120 | typedef typename TypeParam::Dtype Dtype; 121 | LayerParameter layer_param; 122 | const Dtype kLossWeight = 3.7; 123 | layer_param.add_loss_weight(kLossWeight); 124 | BalanceCrossEntropyLossLayer layer(layer_param); 125 | layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); 126 | GradientChecker checker(1e-2, 1e-2, 1701); 127 | checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, 128 | this->blob_top_vec_, 0); 129 | } 130 | 131 | TYPED_TEST(BalanceCrossEntropyLossLayerTest, TestIgnoreGradient) { 132 | typedef typename TypeParam::Dtype Dtype; 133 | FillerParameter data_filler_param; 134 | data_filler_param.set_std(1); 135 | GaussianFiller data_filler(data_filler_param); 136 | data_filler.Fill(this->blob_bottom_data_); 137 | LayerParameter layer_param; 138 | LossParameter* loss_param = layer_param.mutable_loss_param(); 139 | loss_param->set_ignore_label(-1); 140 | Dtype* target = this->blob_bottom_targets_->mutable_cpu_data(); 141 | const int count = this->blob_bottom_targets_->count(); 142 | // Ignore half of targets, then check that diff of this half is zero, 143 | // while the other half is nonzero. 144 | caffe_set(count / 2, Dtype(-1), target); 145 | BalanceCrossEntropyLossLayer layer(layer_param); 146 | layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); 147 | layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); 148 | vector propagate_down(2); 149 | propagate_down[0] = true; 150 | propagate_down[1] = false; 151 | layer.Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); 152 | const Dtype* diff = this->blob_bottom_data_->cpu_diff(); 153 | for (int i = 0; i < count / 2; ++i) { 154 | EXPECT_FLOAT_EQ(diff[i], 0.); 155 | EXPECT_NE(diff[i + count / 2], 0.); 156 | } 157 | } 158 | 159 | 160 | } // namespace caffe 161 | -------------------------------------------------------------------------------- /model/rcf.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | sys.path.insert(0, 'caffe/python') 3 | import caffe 4 | from caffe import layers as L, params as P 5 | from caffe.coord_map import crop 6 | import numpy as np 7 | from math import ceil 8 | 9 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1, mult=[1,1,2,0]): 10 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 11 | num_output=nout, pad=pad, weight_filler=dict(type='xavier'), 12 | param=[dict(lr_mult=mult[0], decay_mult=mult[1]), dict(lr_mult=mult[2], decay_mult=mult[3])]) 13 | return conv, L.ReLU(conv, in_place=True) 14 | 15 | def max_pool(bottom, ks=2, stride=2): 16 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 17 | 18 | def conv1x1(bottom, lr=[0.01, 1, 0.02, 0], wf=dict(type="constant")): 19 | return L.Convolution(bottom, kernel_size=1,num_output=1, weight_filler=wf, 20 | param=[dict(lr_mult=lr[0], decay_mult=lr[1]), dict(lr_mult=lr[2], decay_mult=lr[3])]) 21 | 22 | def upsample(bottom, stride): 23 | s, k, pad = stride, 2 * stride, int(ceil(stride-1)/2) 24 | name = "upsample%d"%s 25 | return L.Deconvolution(bottom, name=name, convolution_param=dict(num_output=1, 26 | kernel_size=k, stride=s, pad=pad, weight_filler = dict(type="bilinear")), 27 | param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) 28 | 29 | def net(split): 30 | n = caffe.NetSpec() 31 | # loss_param = dict(normalization=P.Loss.VALID) 32 | loss_param = dict(normalize=False) 33 | if split=='train': 34 | data_params = dict(mean=(104.00699, 116.66877, 122.67892)) 35 | data_params['root'] = 'data/HED-BSDS_PASCAL' 36 | data_params['source'] = "bsds_pascal_train_pair.lst" 37 | data_params['shuffle'] = True 38 | data_params['ignore_label'] = -1 39 | n.data, n.label = L.Python(module='pylayer', layer='ImageLabelmapDataLayer', ntop=2, \ 40 | param_str=str(data_params)) 41 | if data_params.has_key('ignore_label'): 42 | loss_param['ignore_label'] = int(data_params['ignore_label']) 43 | elif split == 'test': 44 | n.data = L.Input(name = 'data', input_param=dict(shape=dict(dim=[1,3,500,500]))) 45 | else: 46 | raise Exception("Invalid phase") 47 | 48 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1) 49 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 50 | n.pool1 = max_pool(n.relu1_2) 51 | 52 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 53 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 54 | n.pool2 = max_pool(n.relu2_2) 55 | 56 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 57 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 58 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 59 | n.pool3 = max_pool(n.relu3_3) 60 | 61 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 62 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 63 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 64 | n.pool4 = max_pool(n.relu4_3) 65 | 66 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512, mult=[100,1,200,0]) 67 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, mult=[100,1,200,0]) 68 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, mult=[100,1,200,0]) 69 | 70 | # DSN1 71 | n.w1_1 = conv1x1(n.conv1_1, lr=[0.1, 1, 0.2, 0]) 72 | n.w1_2 = conv1x1(n.conv1_2, lr=[0.1, 1, 0.2, 0]) 73 | n.fuse1 = L.Eltwise(n.w1_1, n.w1_2, operation=P.Eltwise.SUM) 74 | n.score_dsn1 = conv1x1(n.fuse1, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) 75 | n.upscore_dsn1 = crop(n.score_dsn1, n.data) 76 | if split=='train': 77 | n.loss1 = L.BalanceCrossEntropyLoss(n.upscore_dsn1, n.label, loss_param=loss_param) 78 | if split=='test': 79 | n.sigmoid_dsn1 = L.Sigmoid(n.upscore_dsn1) 80 | # DSN2 81 | n.w2_1 = conv1x1(n.conv2_1, lr=[0.1, 1, 0.2, 0]) 82 | n.w2_2 = conv1x1(n.conv2_1, lr=[0.1, 1, 0.2, 0]) 83 | n.fuse2 = L.Eltwise(n.w2_1, n.w2_2, operation=P.Eltwise.SUM) 84 | n.score_dsn2 = conv1x1(n.fuse2, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) 85 | n.score_dsn2_up = upsample(n.score_dsn2, stride=2) 86 | n.upscore_dsn2 = crop(n.score_dsn2_up, n.data) 87 | if split=='train': 88 | n.loss2 = L.BalanceCrossEntropyLoss(n.upscore_dsn2, n.label, loss_param=loss_param) 89 | if split=='test': 90 | n.sigmoid_dsn2 = L.Sigmoid(n.upscore_dsn2) 91 | # DSN3 92 | n.w3_1 = conv1x1(n.conv3_1, lr=[0.1, 1, 0.2, 0]) 93 | n.w3_2 = conv1x1(n.conv3_2, lr=[0.1, 1, 0.2, 0]) 94 | n.w3_3 = conv1x1(n.conv3_3, lr=[0.1, 1, 0.2, 0]) 95 | n.fuse3 = L.Eltwise(n.w3_1, n.w3_2, n.w3_3, operation=P.Eltwise.SUM) 96 | n.score_dsn3 = conv1x1(n.fuse3, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) 97 | n.score_dsn3_up = upsample(n.score_dsn3, stride=4) 98 | n.upscore_dsn3 = crop(n.score_dsn3_up, n.data) 99 | if split=='train': 100 | n.loss3 = L.BalanceCrossEntropyLoss(n.upscore_dsn3, n.label, loss_param=loss_param) 101 | if split=='test': 102 | n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3) 103 | # DSN4 104 | n.w4_1 = conv1x1(n.conv4_1, lr=[0.1, 1, 0.2, 0]) 105 | n.w4_2 = conv1x1(n.conv4_2, lr=[0.1, 1, 0.2, 0]) 106 | n.w4_3 = conv1x1(n.conv4_3, lr=[0.1, 1, 0.2, 0]) 107 | n.fuse4 = L.Eltwise(n.w4_1, n.w4_2, n.w4_3, operation=P.Eltwise.SUM) 108 | n.score_dsn4 = conv1x1(n.fuse4, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) 109 | n.score_dsn4_up = upsample(n.score_dsn4, stride=8) 110 | n.upscore_dsn4 = crop(n.score_dsn4_up, n.data) 111 | if split=='train': 112 | n.loss4 = L.BalanceCrossEntropyLoss(n.upscore_dsn4, n.label, loss_param=loss_param) 113 | if split=='test': 114 | n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4) 115 | # DSN5 116 | n.w5_1 = conv1x1(n.conv5_1, lr=[0.1, 1, 0.2, 0]) 117 | n.w5_2 = conv1x1(n.conv5_2, lr=[0.1, 1, 0.2, 0]) 118 | n.w5_3 = conv1x1(n.conv5_3, lr=[0.1, 1, 0.2, 0]) 119 | n.fuse5 = L.Eltwise(n.w5_1, n.w5_2, n.w5_3, operation=P.Eltwise.SUM) 120 | n.score_dsn5 = conv1x1(n.fuse5, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) 121 | n.score_dsn5_up = upsample(n.score_dsn5, stride=16) 122 | n.upscore_dsn5 = crop(n.score_dsn5_up, n.data) 123 | if split=='train': 124 | n.loss5 = L.BalanceCrossEntropyLoss(n.upscore_dsn5, n.label, loss_param=loss_param) 125 | elif split=='test': 126 | n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5) 127 | # concat and fuse 128 | n.concat_upscore = L.Concat(n.upscore_dsn1, 129 | n.upscore_dsn2, 130 | n.upscore_dsn3, 131 | n.upscore_dsn4, 132 | n.upscore_dsn5, 133 | name='concat', concat_param=dict({'concat_dim':1})) 134 | n.upscore_fuse = L.Convolution(n.concat_upscore, name='new-score-weighting', 135 | num_output=1, kernel_size=1, 136 | param=[dict(lr_mult=0.001, decay_mult=1), dict(lr_mult=0.002, decay_mult=0)], 137 | weight_filler=dict(type='constant', value=0.2)) 138 | if split=='test': 139 | n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse) 140 | if split=='train': 141 | n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse, n.label, loss_param=loss_param) 142 | return n.to_proto() 143 | 144 | def make_net(): 145 | with open('model/rcf_train.pt', 'w') as f: 146 | f.write(str(net('train'))) 147 | with open('model/rcf_test.pt', 'w') as f: 148 | f.write(str(net('test'))) 149 | def make_solver(): 150 | sp = {} 151 | sp['net'] = '"model/rcf_train.pt"' 152 | sp['base_lr'] = '0.000001' 153 | sp['lr_policy'] = '"step"' 154 | sp['momentum'] = '0.9' 155 | sp['weight_decay'] = '0.0002' 156 | sp['iter_size'] = '10' 157 | sp['stepsize'] = '20000' 158 | sp['display'] = '10' 159 | sp['snapshot'] = '2000' 160 | sp['snapshot_prefix'] = '"snapshot/rcf"' 161 | sp['gamma'] = '0.1' 162 | sp['max_iter'] = '40000' 163 | sp['solver_mode'] = 'GPU' 164 | f = open('model/rcf_solver.pt', 'w') 165 | for k, v in sorted(sp.items()): 166 | if not(type(v) is str): 167 | raise TypeError('All solver parameters must be strings') 168 | f.write('%s: %s\n'%(k, v)) 169 | f.close() 170 | 171 | def make_all(): 172 | make_net() 173 | make_solver() 174 | 175 | if __name__ == '__main__': 176 | make_all() 177 | -------------------------------------------------------------------------------- /model/h1.py: -------------------------------------------------------------------------------- 1 | import sys, os, argparse 2 | from os.path import join, isdir, isfile, split 3 | sys.path.insert(0, 'caffe/python') 4 | import caffe 5 | from caffe import layers as L, params as P 6 | from caffe.coord_map import crop 7 | import numpy as np 8 | from math import ceil 9 | parser = argparse.ArgumentParser(description='Training hed.') 10 | parser.add_argument('--nfeat', type=int, help='number features', default=1) 11 | parser.add_argument('--cat', type=str, help='cat or elt-sum', default='False') 12 | parser.add_argument('--bias', type=bool, default=True) 13 | args = parser.parse_args() 14 | def str2bool(str1): 15 | str1 = str(str1).lower() 16 | if "false" in str1 or "0" in str1: 17 | return False 18 | else: 19 | return True 20 | args.cat = str2bool(args.cat) 21 | tmp_dir = 'tmp' 22 | if not isdir(tmp_dir): 23 | os.makedirs(tmp_dir) 24 | 25 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1, mult=[1,1,2,0]): 26 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 27 | num_output=nout, pad=pad, weight_filler=dict(type='msra'), 28 | param=[dict(lr_mult=mult[0], decay_mult=mult[1]), dict(lr_mult=mult[2], decay_mult=mult[3])]) 29 | return conv, L.ReLU(conv, in_place=True) 30 | 31 | def max_pool(bottom, ks=2, stride=2): 32 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 33 | 34 | def conv1x1(bottom, nout=1, lr=[0.01, 1, 0.02, 0], wf=dict(type="constant")): 35 | if args.bias: 36 | return L.Convolution(bottom, kernel_size=1, num_output=nout, weight_filler=wf, 37 | param=[dict(lr_mult=lr[0], decay_mult=lr[1]), dict(lr_mult=lr[2], decay_mult=lr[3])]) 38 | else: 39 | return L.Convolution(bottom, kernel_size=1, num_output=nout, weight_filler=wf, 40 | bias_term=False, param=[dict(lr_mult=lr[0], decay_mult=lr[1])]) 41 | 42 | def upsample(bottom, stride, nout=1, name=None): 43 | s, k, pad = stride, 2 * stride, int(ceil(stride-1)/2) 44 | if not name: 45 | name = "upsample%d"%s 46 | return L.Deconvolution(bottom, name=name, convolution_param=dict(num_output=nout, bias_term=False, 47 | kernel_size=k, stride=s, pad=pad, weight_filler = dict(type="bilinear"), group=nout), 48 | param=[dict(lr_mult=0, decay_mult=0)]) 49 | 50 | def net(split): 51 | n = caffe.NetSpec() 52 | loss_param = dict(normalization=P.Loss.VALID) 53 | # loss_param = dict(normalize=False) 54 | if split=='train': 55 | data_params = dict(mean=(104.00699, 116.66877, 122.67892)) 56 | #data_params['root'] = 'data/HED-BSDS_PASCAL' 57 | data_params['root'] = 'data/PASCAL-Context-Edge/' 58 | data_params['source'] = "train_pair.lst" 59 | data_params['shuffle'] = True 60 | #data_params['ignore_label'] = -1 61 | n.data, n.label = L.Python(module='pylayer', layer='ImageLabelmapDataLayer', ntop=2, \ 62 | param_str=str(data_params)) 63 | if data_params.has_key('ignore_label'): 64 | loss_param['ignore_label'] = int(data_params['ignore_label']) 65 | elif split == 'test': 66 | n.data = L.Input(name = 'data', input_param=dict(shape=dict(dim=[1,3,200,200]))) 67 | else: 68 | raise Exception("Invalid phase") 69 | 70 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1) 71 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 72 | n.pool1 = max_pool(n.relu1_2) 73 | 74 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 75 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 76 | n.pool2 = max_pool(n.relu2_2) 77 | 78 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 79 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 80 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 81 | n.pool3 = max_pool(n.relu3_3) 82 | 83 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 84 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 85 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 86 | n.pool4 = max_pool(n.relu4_3) 87 | 88 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512, mult=[100,1,200,0]) 89 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, mult=[100,1,200,0]) 90 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, mult=[100,1,200,0]) 91 | ## w1 92 | n.w1_1top = conv1x1(n.conv1_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 93 | n.w1_2top = conv1x1(n.conv1_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 94 | ## w2 95 | n.w2_1top = conv1x1(n.conv2_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 96 | n.w2_2top = conv1x1(n.conv2_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 97 | n.w2_1down = conv1x1(n.conv2_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 98 | n.w2_2down = conv1x1(n.conv2_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 99 | ## w3 100 | n.w3_1top = conv1x1(n.conv3_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 101 | n.w3_2top = conv1x1(n.conv3_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 102 | n.w3_3top = conv1x1(n.conv3_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 103 | n.w3_1down = conv1x1(n.conv3_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 104 | n.w3_2down = conv1x1(n.conv3_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 105 | n.w3_3down = conv1x1(n.conv3_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 106 | ## w4 107 | n.w4_1top = conv1x1(n.conv4_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 108 | n.w4_2top = conv1x1(n.conv4_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 109 | n.w4_3top = conv1x1(n.conv4_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 110 | n.w4_1down = conv1x1(n.conv4_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 111 | n.w4_2down = conv1x1(n.conv4_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 112 | n.w4_3down = conv1x1(n.conv4_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 113 | ## w5 114 | n.w5_1down = conv1x1(n.conv5_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 115 | n.w5_2down = conv1x1(n.conv5_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 116 | n.w5_3down = conv1x1(n.conv5_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 117 | 118 | ## upsample wx_xdown 119 | n.w2_1down_up = upsample(n.w2_1down, nout=args.nfeat, stride=2, name='upsample2_1') 120 | n.w2_2down_up = upsample(n.w2_2down, nout=args.nfeat, stride=2, name='upsample2_2') 121 | 122 | n.w3_1down_up = upsample(n.w3_1down, nout=args.nfeat, stride=2, name='upsample3_1') 123 | n.w3_2down_up = upsample(n.w3_2down, nout=args.nfeat, stride=2, name='upsample3_2') 124 | n.w3_3down_up = upsample(n.w3_3down, nout=args.nfeat, stride=2, name='upsample3_3') 125 | 126 | n.w4_1down_up = upsample(n.w4_1down, nout=args.nfeat, stride=2, name='upsample4_1') 127 | n.w4_2down_up = upsample(n.w4_2down, nout=args.nfeat, stride=2, name='upsample4_2') 128 | n.w4_3down_up = upsample(n.w4_3down, nout=args.nfeat, stride=2, name='upsample4_3') 129 | 130 | n.w5_1down_up = upsample(n.w5_1down, nout=args.nfeat, stride=2, name='upsample5_1') 131 | n.w5_2down_up = upsample(n.w5_2down, nout=args.nfeat, stride=2, name='upsample5_2') 132 | n.w5_3down_up = upsample(n.w5_3down, nout=args.nfeat, stride=2, name='upsample5_3') 133 | 134 | ## crop wx_xdown_up 135 | n.w2_1down_up_crop = crop(n.w2_1down_up, n.w1_1top) 136 | n.w2_2down_up_crop = crop(n.w2_2down_up, n.w1_1top) 137 | 138 | n.w3_1down_up_crop = crop(n.w3_1down_up, n.w2_1top) 139 | n.w3_2down_up_crop = crop(n.w3_2down_up, n.w2_1top) 140 | n.w3_3down_up_crop = crop(n.w3_3down_up, n.w2_1top) 141 | 142 | n.w4_1down_up_crop = crop(n.w4_1down_up, n.w3_1top) 143 | n.w4_2down_up_crop = crop(n.w4_2down_up, n.w3_1top) 144 | n.w4_3down_up_crop = crop(n.w4_3down_up, n.w3_1top) 145 | 146 | n.w5_1down_up_crop = crop(n.w5_1down_up, n.w4_1top) 147 | n.w5_2down_up_crop = crop(n.w5_2down_up, n.w4_1top) 148 | n.w5_3down_up_crop = crop(n.w5_3down_up, n.w4_1top) 149 | ## fuse 150 | if args.cat: 151 | n.h1s1_2 = L.Concat(n.w1_1top, n.w1_2top, n.w2_1down_up_crop, n.w2_2down_up_crop) 152 | n.h1s2_3 = L.Concat(n.w2_1top, n.w2_2top, n.w3_1down_up_crop, n.w3_2down_up_crop, n.w3_3down_up_crop) 153 | n.h1s3_4 = L.Concat(n.w3_1top, n.w3_2top, n.w3_3top, \ 154 | n.w4_1down_up_crop, n.w4_2down_up_crop, n.w4_3down_up_crop) 155 | n.h1s4_5 = L.Concat(n.w4_1top, n.w4_2top, n.w4_3top, \ 156 | n.w5_1down_up_crop, n.w5_2down_up_crop, n.w5_3down_up_crop) 157 | # n.h1s1_2 = conv1x1(n.h1s1_2cat, lr=[0.01, 1, 0.02, 0], wf=dict(type='constant', value=1)) 158 | # n.h1s2_3 = conv1x1(n.h1s2_3cat, lr=[0.01, 1, 0.02, 0], wf=dict(type='constant', value=1)) 159 | # n.h1s3_4 = conv1x1(n.h1s3_4cat, lr=[0.01, 1, 0.02, 0], wf=dict(type='constant', value=1)) 160 | # n.h1s4_5 = conv1x1(n.h1s4_5cat, lr=[0.01, 1, 0.02, 0], wf=dict(type='constant', value=1)) 161 | else: 162 | n.h1s1_2 = L.Eltwise(n.w1_1top, n.w1_2top, n.w2_1down_up_crop, n.w2_2down_up_crop) 163 | n.h1s2_3 = L.Eltwise(n.w2_1top, n.w2_2top, n.w3_1down_up_crop, n.w3_2down_up_crop, n.w3_3down_up_crop) 164 | n.h1s3_4 = L.Eltwise(n.w3_1top, n.w3_2top, n.w3_3top, \ 165 | n.w4_1down_up_crop, n.w4_2down_up_crop, n.w4_3down_up_crop) 166 | n.h1s4_5 = L.Eltwise(n.w4_1top, n.w4_2top, n.w4_3top, \ 167 | n.w5_1down_up_crop, n.w5_2down_up_crop, n.w5_3down_up_crop) 168 | ## score h1sx_x 169 | n.score_h1s1_2 = conv1x1(n.h1s1_2, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) 170 | n.score_h1s2_3 = conv1x1(n.h1s2_3, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) 171 | n.score_h1s3_4 = conv1x1(n.h1s3_4, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) 172 | n.score_h1s4_5 = conv1x1(n.h1s4_5, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) 173 | ## upsample score 174 | n.upscore_h1s2_3 = upsample(n.score_h1s2_3, stride=2, name='upscore_h1s2_3') 175 | n.upscore_h1s3_4 = upsample(n.score_h1s3_4, stride=4, name='upscore_h1s3_4') 176 | n.upscore_h1s4_5 = upsample(n.score_h1s4_5, stride=8, name='upscore_h1s4_5') 177 | ## crop upscore_h1sx_x 178 | n.crop_h1s1_2 = crop(n.score_h1s1_2, n.data) 179 | n.crop_h1s2_3 = crop(n.upscore_h1s2_3, n.data) 180 | n.crop_h1s3_4 = crop(n.upscore_h1s3_4, n.data) 181 | n.crop_h1s4_5 = crop(n.upscore_h1s4_5, n.data) 182 | ## fuse 183 | n.h1_concat = L.Concat(n.crop_h1s1_2, 184 | n.crop_h1s2_3, 185 | n.crop_h1s3_4, 186 | n.crop_h1s4_5, 187 | concat_param=dict({'concat_dim':1})) 188 | n.h1_fuse = conv1x1(n.h1_concat, lr=[0.001, 1, 0.002, 0], wf=dict(type='constant', value=float(1)/4)) 189 | if split == 'train': 190 | n.loss_h1s1_2 = L.BalanceCrossEntropyLoss(n.crop_h1s1_2, n.label, loss_param=loss_param) 191 | n.loss_h1s2_3 = L.BalanceCrossEntropyLoss(n.crop_h1s2_3, n.label, loss_param=loss_param) 192 | n.loss_h1s3_4 = L.BalanceCrossEntropyLoss(n.crop_h1s3_4, n.label, loss_param=loss_param) 193 | n.loss_h1s4_5 = L.BalanceCrossEntropyLoss(n.crop_h1s4_5, n.label, loss_param=loss_param) 194 | n.loss_h1_fuse = L.BalanceCrossEntropyLoss(n.h1_fuse, n.label, loss_param=loss_param) 195 | else: 196 | n.sigmoid_h1s1_2 = L.Sigmoid(n.crop_h1s1_2) 197 | n.sigmoid_h1s2_3 = L.Sigmoid(n.crop_h1s2_3) 198 | n.sigmoid_h1s3_4 = L.Sigmoid(n.crop_h1s3_4) 199 | n.sigmoid_h1s4_5 = L.Sigmoid(n.crop_h1s4_5) 200 | n.sigmoid_h1_fuse = L.Sigmoid(n.h1_fuse) 201 | return n.to_proto() 202 | 203 | def make_net(): 204 | fpath = join(tmp_dir, "h1feat%d_train.pt"%args.nfeat) 205 | with open(fpath, 'w') as f: 206 | f.write(str(net('train'))) 207 | fpath = join(tmp_dir, "h1feat%d_test.pt"%args.nfeat) 208 | with open(fpath, 'w') as f: 209 | f.write(str(net('test'))) 210 | def make_solver(): 211 | sp = {} 212 | fpath = join(tmp_dir, "h1feat%d_train.pt"%args.nfeat) 213 | sp['net'] = '"' + fpath + '"' 214 | sp['base_lr'] = '0.01' 215 | sp['lr_policy'] = '"step"' 216 | sp['momentum'] = '0.9' 217 | sp['weight_decay'] = '0.0002' 218 | sp['iter_size'] = '10' 219 | sp['stepsize'] = '20000' 220 | sp['display'] = '10' 221 | sp['snapshot'] = '2000' 222 | sp['snapshot_prefix'] = '"snapshot/h1feat%d"'%args.nfeat 223 | sp['gamma'] = '0.1' 224 | sp['max_iter'] = '40000' 225 | sp['solver_mode'] = 'GPU' 226 | fpath = join(tmp_dir, "h1feat%d_solver.pt"%args.nfeat) 227 | f = open(fpath, 'w') 228 | for k, v in sorted(sp.items()): 229 | if not(type(v) is str): 230 | raise TypeError('All solver parameters must be strings') 231 | f.write('%s: %s\n'%(k, v)) 232 | f.close() 233 | 234 | def make_all(): 235 | make_net() 236 | make_solver() 237 | 238 | if __name__ == '__main__': 239 | make_all() 240 | -------------------------------------------------------------------------------- /model/h2.py: -------------------------------------------------------------------------------- 1 | import sys, os, argparse 2 | from os.path import join, isdir, isfile, split 3 | sys.path.insert(0, 'caffe/python') 4 | import caffe 5 | from caffe import layers as L, params as P 6 | from caffe.coord_map import crop 7 | import numpy as np 8 | from math import ceil 9 | parser = argparse.ArgumentParser(description='Training hed.') 10 | parser.add_argument('--nfeat', type=int, help='number features', default=11) 11 | parser.add_argument('--bias', type=bool, default=True) 12 | args = parser.parse_args() 13 | tmp_dir = 'tmp' 14 | if not isdir(tmp_dir): 15 | os.makedirs(tmp_dir) 16 | 17 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1, mult=[1,1,2,0]): 18 | conv = L.Convolution(bottom, kernel_size=ks, stride=stride, 19 | num_output=nout, pad=pad, weight_filler=dict(type='msra'), 20 | param=[dict(lr_mult=mult[0], decay_mult=mult[1]), dict(lr_mult=mult[2], decay_mult=mult[3])]) 21 | return conv, L.ReLU(conv, in_place=True) 22 | 23 | def max_pool(bottom, ks=2, stride=2): 24 | return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride) 25 | 26 | def conv1x1(bottom, nout=1, lr=[0.01, 1, 0.02, 0], wf=dict(type="constant")): 27 | if args.bias: 28 | return L.Convolution(bottom, kernel_size=1, num_output=nout, weight_filler=wf, 29 | param=[dict(lr_mult=lr[0], decay_mult=lr[1]), dict(lr_mult=lr[2], decay_mult=lr[3])]) 30 | else: 31 | return L.Convolution(bottom, kernel_size=1, num_output=nout, weight_filler=wf, 32 | bias_term=False, param=[dict(lr_mult=lr[0], decay_mult=lr[1])]) 33 | 34 | def upsample(bottom, stride, nout=1, name=None): 35 | s, k, pad = stride, 2 * stride, int(ceil(stride-1)/2) 36 | if not name: 37 | name = "upsample%d"%s 38 | return L.Deconvolution(bottom, name=name, convolution_param=dict(num_output=nout, bias_term=False, 39 | kernel_size=k, stride=s, pad=pad, weight_filler = dict(type="bilinear"), group=nout), 40 | param=[dict(lr_mult=0, decay_mult=0)]) 41 | 42 | def net(split): 43 | n = caffe.NetSpec() 44 | # loss_param = dict(normalization=P.Loss.VALID) 45 | loss_param = dict(normalize=False) 46 | if split=='train': 47 | data_params = dict(mean=(104.00699, 116.66877, 122.67892)) 48 | data_params['root'] = 'data/HED-BSDS_PASCAL' 49 | data_params['source'] = "bsds_pascal_train_pair.lst" 50 | data_params['shuffle'] = True 51 | data_params['ignore_label'] = -1 52 | n.data, n.label = L.Python(module='pylayer', layer='ImageLabelmapDataLayer', ntop=2, \ 53 | param_str=str(data_params)) 54 | if data_params.has_key('ignore_label'): 55 | loss_param['ignore_label'] = int(data_params['ignore_label']) 56 | elif split == 'test': 57 | n.data = L.Input(name = 'data', input_param=dict(shape=dict(dim=[1,3,200,200]))) 58 | else: 59 | raise Exception("Invalid phase") 60 | 61 | n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1) 62 | n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) 63 | n.pool1 = max_pool(n.relu1_2) 64 | 65 | n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) 66 | n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) 67 | n.pool2 = max_pool(n.relu2_2) 68 | 69 | n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) 70 | n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) 71 | n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) 72 | n.pool3 = max_pool(n.relu3_3) 73 | 74 | n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) 75 | n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) 76 | n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) 77 | n.pool4 = max_pool(n.relu4_3) 78 | 79 | n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512, mult=[100,1,200,0]) 80 | n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, mult=[100,1,200,0]) 81 | n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, mult=[100,1,200,0]) 82 | ## w1 83 | n.w1_1top = conv1x1(n.conv1_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 84 | n.w1_2top = conv1x1(n.conv1_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 85 | ## w2 86 | n.w2_1top = conv1x1(n.conv2_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 87 | n.w2_2top = conv1x1(n.conv2_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 88 | n.w2_1down = conv1x1(n.conv2_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 89 | n.w2_2down = conv1x1(n.conv2_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 90 | ## w3 91 | n.w3_1top = conv1x1(n.conv3_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 92 | n.w3_2top = conv1x1(n.conv3_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 93 | n.w3_3top = conv1x1(n.conv3_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 94 | n.w3_1down = conv1x1(n.conv3_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 95 | n.w3_2down = conv1x1(n.conv3_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 96 | n.w3_3down = conv1x1(n.conv3_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 97 | ## w4 98 | n.w4_1top = conv1x1(n.conv4_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 99 | n.w4_2top = conv1x1(n.conv4_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 100 | n.w4_3top = conv1x1(n.conv4_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 101 | n.w4_1down = conv1x1(n.conv4_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 102 | n.w4_2down = conv1x1(n.conv4_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 103 | n.w4_3down = conv1x1(n.conv4_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 104 | ## w5 105 | n.w5_1down = conv1x1(n.conv5_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 106 | n.w5_2down = conv1x1(n.conv5_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 107 | n.w5_3down = conv1x1(n.conv5_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 108 | 109 | ## upsample wx_xdown 110 | n.w2_1down_up = upsample(n.w2_1down, nout=args.nfeat, stride=2, name='upsample2_1') 111 | n.w2_2down_up = upsample(n.w2_2down, nout=args.nfeat, stride=2, name='upsample2_2') 112 | 113 | n.w3_1down_up = upsample(n.w3_1down, nout=args.nfeat, stride=2, name='upsample3_1') 114 | n.w3_2down_up = upsample(n.w3_2down, nout=args.nfeat, stride=2, name='upsample3_2') 115 | n.w3_3down_up = upsample(n.w3_3down, nout=args.nfeat, stride=2, name='upsample3_3') 116 | 117 | n.w4_1down_up = upsample(n.w4_1down, nout=args.nfeat, stride=2, name='upsample4_1') 118 | n.w4_2down_up = upsample(n.w4_2down, nout=args.nfeat, stride=2, name='upsample4_2') 119 | n.w4_3down_up = upsample(n.w4_3down, nout=args.nfeat, stride=2, name='upsample4_3') 120 | 121 | n.w5_1down_up = upsample(n.w5_1down, nout=args.nfeat, stride=2, name='upsample5_1') 122 | n.w5_2down_up = upsample(n.w5_2down, nout=args.nfeat, stride=2, name='upsample5_2') 123 | n.w5_3down_up = upsample(n.w5_3down, nout=args.nfeat, stride=2, name='upsample5_3') 124 | 125 | ## crop wx_xdown_up 126 | n.w2_1down_up_crop = crop(n.w2_1down_up, n.w1_1top) 127 | n.w2_2down_up_crop = crop(n.w2_2down_up, n.w1_1top) 128 | 129 | n.w3_1down_up_crop = crop(n.w3_1down_up, n.w2_1top) 130 | n.w3_2down_up_crop = crop(n.w3_2down_up, n.w2_1top) 131 | n.w3_3down_up_crop = crop(n.w3_3down_up, n.w2_1top) 132 | 133 | n.w4_1down_up_crop = crop(n.w4_1down_up, n.w3_1top) 134 | n.w4_2down_up_crop = crop(n.w4_2down_up, n.w3_1top) 135 | n.w4_3down_up_crop = crop(n.w4_3down_up, n.w3_1top) 136 | 137 | n.w5_1down_up_crop = crop(n.w5_1down_up, n.w4_1top) 138 | n.w5_2down_up_crop = crop(n.w5_2down_up, n.w4_1top) 139 | n.w5_3down_up_crop = crop(n.w5_3down_up, n.w4_1top) 140 | 141 | ## fuse 142 | n.h1s1_2 = L.Eltwise(n.w1_1top, n.w1_2top, n.w2_1down_up_crop, n.w2_2down_up_crop) 143 | 144 | n.h1s2_3 = L.Eltwise(n.w2_1top, n.w2_2top, n.w3_1down_up_crop, n.w3_2down_up_crop, n.w3_3down_up_crop) 145 | 146 | n.h1s3_4 = L.Eltwise(n.w3_1top, n.w3_2top, n.w3_3top, \ 147 | n.w4_1down_up_crop, n.w4_2down_up_crop, n.w4_3down_up_crop) 148 | 149 | n.h1s4_5 = L.Eltwise(n.w4_1top, n.w4_2top, n.w4_3top, \ 150 | n.w5_1down_up_crop, n.w5_2down_up_crop, n.w5_3down_up_crop) 151 | 152 | ## score h1sx_x 153 | n.score_h1s1_2 = conv1x1(n.h1s1_2, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001)) 154 | n.score_h1s2_3 = conv1x1(n.h1s2_3, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001)) 155 | n.score_h1s3_4 = conv1x1(n.h1s3_4, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001)) 156 | n.score_h1s4_5 = conv1x1(n.h1s4_5, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001)) 157 | ## upsample score 158 | n.upscore_h1s2_3 = upsample(n.score_h1s2_3, stride=2, name='upscore_h1s2_3') 159 | n.upscore_h1s3_4 = upsample(n.score_h1s3_4, stride=4, name='upscore_h1s2_4') 160 | n.upscore_h1s4_5 = upsample(n.score_h1s4_5, stride=8, name='upscore_h1s4_5') 161 | ## crop upscore_h1sx_x 162 | n.crop_h1s1_2 = crop(n.score_h1s1_2, n.data) 163 | n.crop_h1s2_3 = crop(n.upscore_h1s2_3, n.data) 164 | n.crop_h1s3_4 = crop(n.upscore_h1s3_4, n.data) 165 | n.crop_h1s4_5 = crop(n.upscore_h1s4_5, n.data) 166 | ## fuse 167 | n.h1_concat = L.Concat(n.crop_h1s1_2, 168 | n.crop_h1s2_3, 169 | n.crop_h1s3_4, 170 | n.crop_h1s4_5, 171 | concat_param=dict({'concat_dim':1})) 172 | n.h1_fuse = conv1x1(n.h1_concat, lr=[0.001, 1, 0.002, 0], wf=dict(type='constant', value=float(1)/4)) 173 | if split == 'train': 174 | n.loss_h1s1_2 = L.BalanceCrossEntropyLoss(n.crop_h1s1_2, n.label, loss_param=loss_param) 175 | n.loss_h1s2_3 = L.BalanceCrossEntropyLoss(n.crop_h1s2_3, n.label, loss_param=loss_param) 176 | n.loss_h1s3_4 = L.BalanceCrossEntropyLoss(n.crop_h1s3_4, n.label, loss_param=loss_param) 177 | n.loss_h1s4_5 = L.BalanceCrossEntropyLoss(n.crop_h1s4_5, n.label, loss_param=loss_param) 178 | n.loss_h1_fuse = L.BalanceCrossEntropyLoss(n.h1_fuse, n.label, loss_param=loss_param) 179 | else: 180 | n.sigmoid_h1s1_2 = L.Sigmoid(n.crop_h1s1_2) 181 | n.sigmoid_h1s2_3 = L.Sigmoid(n.crop_h1s2_3) 182 | n.sigmoid_h1s3_4 = L.Sigmoid(n.crop_h1s3_4) 183 | n.sigmoid_h1s4_5 = L.Sigmoid(n.crop_h1s4_5) 184 | n.sigmoid_h1_fuse = L.Sigmoid(n.h1_fuse) 185 | ## H2: conv h1sx_x for H2 fusing 186 | n.h1s1_2top = conv1x1(n.h1s1_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 187 | n.h1s2_3top = conv1x1(n.h1s2_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 188 | n.h1s2_3down = conv1x1(n.h1s2_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 189 | n.h1s3_4top = conv1x1(n.h1s3_4, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 190 | n.h1s3_4down = conv1x1(n.h1s3_4, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 191 | n.h1s4_5down = conv1x1(n.h1s4_5, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001)) 192 | ## upsample H2 193 | n.h1s2_3upsample = upsample(n.h1s2_3down, nout=args.nfeat, stride=2, name='upsample_h1s2_3') 194 | n.h1s3_4upsample = upsample(n.h1s3_4down, nout=args.nfeat, stride=2, name='upsample_h1s3_4') 195 | n.h1s4_5upsample = upsample(n.h1s4_5down, nout=args.nfeat, stride=2, name='upsample_h1s4_5') 196 | ## Crop H2 197 | n.h1s2_3crop = crop(n.h1s2_3upsample, n.h1s1_2top) 198 | n.h1s3_4crop = crop(n.h1s3_4upsample, n.h1s2_3top) 199 | n.h1s4_5crop = crop(n.h1s4_5upsample, n.h1s3_4top) 200 | ## fuse H2 201 | n.h2s1_2_3 = L.Eltwise(n.h1s1_2top, n.h1s2_3crop) 202 | n.h2s2_3_4 = L.Eltwise(n.h1s2_3top, n.h1s3_4crop) 203 | n.h2s3_4_5 = L.Eltwise(n.h1s3_4top, n.h1s4_5crop) 204 | ## score H2 205 | n.score_h2s1_2_3 = conv1x1(n.h2s1_2_3, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001)) 206 | n.score_h2s2_3_4 = conv1x1(n.h2s2_3_4, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001)) 207 | n.score_h2s3_4_5 = conv1x1(n.h2s3_4_5, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001)) 208 | ## upsample H2 score 209 | n.score_h2s2_3_4upsample = upsample(n.score_h2s2_3_4, stride=2, name='upscore_h2s2_3_4') 210 | n.score_h2s3_4_5upsample = upsample(n.score_h2s3_4_5, stride=4, name='upscore_h2s3_4_5') 211 | ## Crop H2 score 212 | n.score_h2s1_2_3crop = crop(n.score_h2s1_2_3, n.data) 213 | n.score_h2s2_3_4crop = crop(n.score_h2s2_3_4upsample, n.data) 214 | n.score_h2s3_4_5crop = crop(n.score_h2s3_4_5upsample, n.data) 215 | # concat H2 216 | n.h2_concat = L.Concat(n.score_h2s1_2_3crop, n.score_h2s2_3_4crop, n.score_h2s3_4_5crop,\ 217 | concat_param=dict({'concat_dim':1})) 218 | n.h2_fuse = conv1x1(n.h2_concat, lr=[0.001, 1, 0.002, 0], wf=dict(type='constant', value=0.333)) 219 | if split == 'train': 220 | n.loss_h2s1_2_3 = L.BalanceCrossEntropyLoss(n.score_h2s1_2_3crop, n.label, loss_param=loss_param) 221 | n.loss_h2s2_3_4 = L.BalanceCrossEntropyLoss(n.score_h2s2_3_4crop, n.label, loss_param=loss_param) 222 | n.loss_h2s3_4_5 = L.BalanceCrossEntropyLoss(n.score_h2s3_4_5crop, n.label, loss_param=loss_param) 223 | n.loss_h2_fuse = L.BalanceCrossEntropyLoss(n.h2_fuse, n.label, loss_param=loss_param) 224 | else: 225 | n.sigmoid_h2s1_2_3 = L.Sigmoid(n.score_h2s1_2_3crop) 226 | n.sigmoid_h2s2_3_4 = L.Sigmoid(n.score_h2s2_3_4crop) 227 | n.sigmoid_h2s3_4_5 = L.Sigmoid(n.score_h2s3_4_5crop) 228 | n.sigmoid_h2_fuse = L.Sigmoid(n.h2_fuse) 229 | # Concat H1 and H2 230 | n.h1h2_concat = L.Concat(n.score_h2s1_2_3crop, n.score_h2s2_3_4crop, n.score_h2s3_4_5crop, 231 | n.crop_h1s1_2, n.crop_h1s2_3, n.crop_h1s3_4, n.crop_h1s4_5, 232 | concat_param=dict({'concat_dim': 1})) 233 | n.h1h2_fuse = conv1x1(n.h1h2_concat, lr=[0.001, 1, 0.002, 0], wf=dict(type='constant', value=float(1)/7)) 234 | if split == 'train': 235 | n.loss_h1h2_fuse = L.BalanceCrossEntropyLoss(n.h1h2_fuse, n.label, loss_param=loss_param) 236 | else: 237 | n.sigmoid_h1h2_fuse = L.Sigmoid(n.h1h2_fuse) 238 | return n.to_proto() 239 | 240 | def make_net(): 241 | fpath = join(tmp_dir, "h2feat%d_train.pt"%args.nfeat) 242 | with open(fpath, 'w') as f: 243 | f.write(str(net('train'))) 244 | fpath = join(tmp_dir, "h2feat%d_test.pt"%args.nfeat) 245 | with open(fpath, 'w') as f: 246 | f.write(str(net('test'))) 247 | def make_solver(): 248 | sp = {} 249 | fpath = join(tmp_dir, "h1feat%d_train.pt"%args.nfeat) 250 | sp['net'] = '"' + fpath + '"' 251 | sp['base_lr'] = '0.000001' 252 | sp['lr_policy'] = '"step"' 253 | sp['momentum'] = '0.9' 254 | sp['weight_decay'] = '0.0002' 255 | sp['iter_size'] = '10' 256 | sp['stepsize'] = '20000' 257 | sp['display'] = '10' 258 | sp['snapshot'] = '2000' 259 | sp['snapshot_prefix'] = '"snapshot/h2feat%d"'%args.nfeat 260 | sp['gamma'] = '0.1' 261 | sp['max_iter'] = '40000' 262 | sp['solver_mode'] = 'GPU' 263 | fpath = join(tmp_dir, "h2feat%d_solver.pt"%args.nfeat) 264 | f = open(fpath, 'w') 265 | for k, v in sorted(sp.items()): 266 | if not(type(v) is str): 267 | raise TypeError('All solver parameters must be strings') 268 | f.write('%s: %s\n'%(k, v)) 269 | f.close() 270 | 271 | def make_all(): 272 | make_net() 273 | make_solver() 274 | 275 | if __name__ == '__main__': 276 | make_all() 277 | --------------------------------------------------------------------------------