├── .gitmodules
├── .gitignore
├── train.py
├── forward_all.py
├── lib
    ├── pylayer.py
    ├── balance_cross_entropy_loss_layer.cu
    ├── balance_cross_entropy_loss_layer.hpp
    ├── balance_cross_entropy_loss_layer.cpp
    └── test_balance_cross_entropy_loss_layer.cpp
├── README.md
└── model
    ├── hed.py
    ├── rcf.py
    ├── h1.py
    └── h2.py


/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "caffe"]
2 | 	path = caffe
3 | 	url = https://github.com/bvlc/caffe
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.caffemodel
 2 | *.solverstate
 3 | *.pyc
 4 | data/
 5 | *~
 6 | *.pt
 7 | *.prototxt
 8 | *.log
 9 | .ipynb_checkpoints/
10 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import numpy as np
 3 | import sys, os, argparse
 4 | from os.path import isfile, join, isdir
 5 | sys.path.insert(0, 'model')
 6 | sys.path.insert(0, 'lib')
 7 | import caffe
 8 | parser = argparse.ArgumentParser(description='Training hed.')
 9 | parser.add_argument('--gpu', type=int, help='gpu ID', default=0)
10 | parser.add_argument('--solver', type=str, help='solver', default='model/hed_solver.pt')
11 | parser.add_argument('--weights', type=str, help='base model', default='model/vgg16convs.caffemodel')
12 | parser.add_argument('--caffe', type=str, help='base model', default='caffe')
13 | args = parser.parse_args()
14 | sys.path.insert(0, join(args.caffe, 'python'))
15 | assert isfile(args.weights) and isfile(args.solver)
16 | caffe.set_mode_gpu()
17 | caffe.set_device(args.gpu)
18 | if not isdir('snapshot'):
19 |   os.makedirs('snapshot')
20 | solver = caffe.SGDSolver(args.solver)
21 | solver.net.copy_from(args.weights)
22 | for p in solver.net.params:
23 |   param = solver.net.params[p]
24 |   for i in range(len(param)):
25 |     print p, "param[%d]: mean=%.5f, std=%.5f"%(i, solver.net.params[p][i].data.mean(), \
26 |     solver.net.params[p][0].data.mean())
27 | solver.solve()
28 | 
29 | 


--------------------------------------------------------------------------------
/forward_all.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import numpy as np
 3 | import scipy.misc
 4 | import cv2
 5 | import scipy.io
 6 | import os, sys, argparse
 7 | from os.path import join, splitext, split, isfile
 8 | parser = argparse.ArgumentParser(description='Forward all testing images.')
 9 | parser.add_argument('--model', type=str, default='snapshot/hed_pretrained_bsds.caffemodel')
10 | parser.add_argument('--net', type=str, default='model/hed_test.pt')
11 | parser.add_argument('--output', type=str, default='sigmoid_fuse') # output field
12 | parser.add_argument('--gpu', type=int, default=0)
13 | parser.add_argument('--ms', type=bool, default=True) # Using multiscale
14 | parser.add_argument('--savemat', type=bool, default=False) # whether save .mat
15 | args = parser.parse_args()
16 | sys.path.insert(0, 'caffe/python')
17 | import caffe
18 | def forward(data):
19 |   assert data.ndim == 3
20 |   data -= np.array((104.00698793,116.66876762,122.67891434))
21 |   data = data.transpose((2, 0, 1))
22 |   net.blobs['data'].reshape(1, *data.shape)
23 |   net.blobs['data'].data[...] = data
24 |   return net.forward()
25 | assert isfile(args.model) and isfile(args.net), 'file not exists'
26 | caffe.set_mode_gpu()
27 | caffe.set_device(args.gpu)
28 | 
29 | net = caffe.Net(args.net, args.model, caffe.TEST)
30 | test_dir = 'data/HED-BSDS/test/' # test images directory
31 | save_dir = join('data/edge-results/', splitext(split(args.model)[1])[0]) # directory to save results
32 | if args.ms:
33 |   save_dir = save_dir + '_multiscale'
34 | if not os.path.exists(save_dir):
35 |     os.makedirs(save_dir)
36 | imgs = [i for i in os.listdir(test_dir) if '.jpg' in i]
37 | nimgs = len(imgs)
38 | print "totally "+str(nimgs)+"images"
39 | for i in range(nimgs):
40 |   img = imgs[i]
41 |   img = cv2.imread(join(test_dir, img)).astype(np.float32)
42 |   if img.ndim == 2:
43 |     img = img[:, :, np.newaxis]
44 |     img = np.repeat(img, 3, 2)
45 |   h, w, _ = img.shape
46 |   edge = np.zeros((h, w), np.float32)
47 |   if args.ms:
48 |     scales = [0.5, 1, 1.5]
49 |   else:
50 |     scales = [1]
51 |   for s in scales:
52 |     h1, w1 = int(s * h), int(s * w)
53 |     img1 = cv2.resize(img, (w1, h1), interpolation=cv2.INTER_CUBIC).astype(np.float32)
54 |     edge1 = np.squeeze(forward(img1)[args.output][0, 0, :, :])
55 |     edge += cv2.resize(edge1, (w, h), interpolation=cv2.INTER_CUBIC).astype(np.float32)
56 |   edge /= len(scales)
57 |   fn, ext = splitext(imgs[i])
58 |   if args.savemat:
59 |     scipy.io.savemat(join(save_dir, fn),dict({'edge': edge / edge.max()}),appendmat=True)
60 |   scipy.misc.imsave(join(save_dir, fn+'.png'), edge / edge.max())
61 |   print "Saving to '" + join(save_dir, imgs[i][0:-4]) + "', Processing %d of %d..."%(i + 1, nimgs)
62 | 
63 | 


--------------------------------------------------------------------------------
/lib/pylayer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # Code written by KAI ZHAO(http://kaiz.xyz)
 4 | import caffe
 5 | import numpy as np
 6 | from os.path import join, isfile
 7 | import random, cv2
 8 | 
 9 | class ImageLabelmapDataLayer(caffe.Layer):
10 |   """
11 |   Python data layer
12 |   """
13 |   def setup(self, bottom, top):
14 |     params = eval(self.param_str)
15 |     self.root = params['root']
16 |     self.source = params['source']
17 |     self.shuffle = bool(params['shuffle'])
18 |     self.mean = np.array(params['mean'], dtype=np.float32)
19 |     assert self.mean.size == 1 or self.mean.size == 3, "mean.size != 1 and mean.size != 3"
20 |     if params.has_key('ignore_label'):
21 |       self.ignore_label = np.float32(params['ignore_label'])
22 |     else:
23 |       self.ignore_label = None
24 |     with open(join(self.root, self.source), 'r') as f:
25 |       self.filelist = f.readlines()
26 |     if self.shuffle:
27 |       random.shuffle(self.filelist)
28 |     self.idx = 0
29 |     top[0].reshape(1, 3, 100, 100) # img
30 |     top[1].reshape(1, 1, 100, 100) # lb
31 |   
32 |   def reshape(self, bottom, top):
33 |     """
34 |     Will reshape in forward()
35 |     """
36 | 
37 |   def forward(self, bottom, top):
38 |     """
39 |     Load data
40 |     """
41 |     [imgfn, lbfn] = self.filelist[self.idx].split()
42 |     [imgfn, lbfn] = join(self.root, imgfn), join(self.root, lbfn)
43 |     assert isfile(imgfn) and isfile(lbfn), "File not exists!"
44 |     img = cv2.imread(imgfn).astype(np.float32)
45 |     lb = cv2.imread(lbfn, 0).astype(np.float32)
46 |     if img.ndim == 2:
47 |       img = img[:,:,np.newaxis]
48 |       img = np.repeat(img, 3, 2)
49 |     img -= self.mean
50 |     img = np.transpose(img, (2, 0, 1))
51 |     img = img[np.newaxis, :, :, :]
52 |     assert lb.ndim == 2, "lb.ndim = %d"%lb.ndim
53 |     h, w = lb.shape
54 |     assert img.shape[2] == h and img.shape[3] == w, "Image and GT shape mismatch."
55 |     lb = lb[np.newaxis, np.newaxis, :, :]
56 |     thres = 125
57 |     if self.ignore_label:
58 |       lb[np.logical_and(lb < thres, lb != 0)] = self.ignore_label
59 |       lb[lb >= thres] = 1
60 |     else:
61 |       lb[lb < thres] = 0
62 |       lb[lb != 0] = 1
63 |     if np.count_nonzero(lb) == 0:
64 |       print "Warning: all zero label map!"
65 |     top[0].reshape(1, 3, h, w)
66 |     top[1].reshape(1, 1, h, w)
67 |     top[0].data[...] = img
68 |     top[1].data[...] = lb
69 |     if self.idx == len(self.filelist)-1:
70 |       # we've reached the end, restart.
71 |       print "Restarting data prefetching from start."
72 |       random.shuffle(self.filelist)
73 |       self.idx = 0
74 |     else:
75 |       self.idx = self.idx + 1
76 | 
77 |   def backward(self, top, propagate_down, bottom):
78 |     """
79 |     Data layer doesn't need back propagate
80 |     """
81 |     pass
82 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Reimplementation of [HED](https://github.com/s9xie/hed) based on official version of caffe
 2 | 
 3 | ### For training:
 4 | 1. Clone this code by `git clone https://github.com/zeakey/hed --recursive`, assume your source code directory is`$HED`;
 5 | 
 6 | 2. Download [training data](http://vcl.ucsd.edu/hed/HED-BSDS.tar) from the [original](https://github.com/s9xie/hed) repo, and extract it to `$HED/data/`;
 7 | 
 8 | 3. Build caffe with `bash $HED/build.sh`, this will copy reimplemented loss layer to caffe folder first;
 9 | 
10 | 4. Download [initial model](http://zhaok-data.oss-cn-shanghai.aliyuncs.com/caffe-model/vgg16convs.caffemodel) and put it
11 | into `$HED/model/`;
12 | 
13 | 5. Generate network prototxts by `python model/hed.py`;
14 | 
15 | 6. Start to train with `cd $HED && python train.py --gpu GPU-ID 2>&1 | tee hed.log`.
16 | 
17 | ### For testing:
18 | 1. Download [pretrained model](http://data.kaiz.xyz/edges/my_hed_pretrained_bsds.caffemodel) `$HED/snapshot/`;
19 | 
20 | 2. Generate testing network prototxt by `python $HED/model/hed.py`(will generate training network prototxt as well); 
21 | 
22 | 3. Run `cd $HED && python forward_all()`;
23 | 
24 | ### Performance evaluation
25 | I achieved ODS=0.779 on [BSDS500](https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/resources.html)
26 | dataset, which is similar to HED's 0.78. Your can train your own model and evaluate using this
27 | [code](https://github.com/zeakey/edgeval).
28 | 
29 | ### Pretrained models and detection results:
30 | | [Orig-HED](https://github.com/s9xie/hed)  | [My-HED](https://github.com/zeakey/hed) |
31 | | ------------- | ------------- |
32 | | [Pretrained model](http://vcl.ucsd.edu/hed/hed_pretrained_bsds.caffemodel)  | [Pretrained model](http://data.kaiz.xyz/edges/my_hed_pretrained_bsds.caffemodel)  |
33 | | [BSDS results](http://data.kaiz.xyz/edges/detection_results/hed_pretrained_bsds.tar)  | [BSDS results](http://data.kaiz.xyz/edges/detection_results/my_hed_pretrained_bsds.tar)  |
34 | | [Evaluation results](http://vcl.ucsd.edu/hed/eval_results.tar)  | [Evaluation results](http://data.kaiz.xyz/edges/my_hed_pretrained_bsds-eval.tar)  |
35 | 
36 | All detection results on the BSDS500 testing set and the pretrained models  are provided.
37 | For example, the detected results of '3063.jpg' by the original [HED](https://github.com/s9xie/hed) and my
38 | implementation are shown below:
39 | 
40 | <http://data.kaiz.xyz/edges/detection_results/hed_pretrained_bsds/3063.png>
41 | 
42 | ![](http://data.kaiz.xyz/edges/detection_results/hed_pretrained_bsds/3063.png?x-oss-process=image/auto-orient,1/resize,h_250)
43 | 
44 | <http://data.kaiz.xyz/edges/detection_results/my_hed_bsds/3063.png>
45 | 
46 | ![](http://data.kaiz.xyz/edges/detection_results/my_hed_bsds/3063.png?x-oss-process=image/auto-orient,1/resize,h_250)
47 | 
48 | You can preview results of all other images by replacing the filename in the above url.
49 | ___
50 | By [KAI ZHAO](http://kaiz.xyz)
51 | 
52 | 


--------------------------------------------------------------------------------
/lib/balance_cross_entropy_loss_layer.cu:
--------------------------------------------------------------------------------
  1 | #include <vector>
  2 | 
  3 | #include "caffe/layers/balance_cross_entropy_loss_layer.hpp"
  4 | #include "caffe/util/math_functions.hpp"
  5 | 
  6 | namespace caffe {
  7 | 
  8 | 
  9 | template <typename Dtype>
 10 | __global__ void BalanceCrossEntropyLossForwardGPU(const int nthreads,
 11 |           const Dtype* input_data, const Dtype* target, Dtype* loss,
 12 |           const bool has_ignore_label_, const int ignore_label_,
 13 |           Dtype* counts) {
 14 |   CUDA_KERNEL_LOOP(i, nthreads) {
 15 |     const int target_value = static_cast<int>(target[i]);
 16 |     if (has_ignore_label_ && target_value == ignore_label_) {
 17 |       loss[i] = 0;
 18 |       counts[i] = 0;
 19 |     } else {
 20 |       loss[i] = input_data[i] * (target[i] - (input_data[i] >= 0)) -
 21 |           log(1 + exp(input_data[i] - 2 * input_data[i] *
 22 |           (input_data[i] >= 0)));
 23 |       counts[i] = 1;
 24 |     }
 25 |   }
 26 | }
 27 | 
 28 | template <typename Dtype>
 29 | __global__ void BalanceCrossEntropyLossIgnoreDiffGPU(const int count,
 30 |     const int ignore_label, const Dtype* target, Dtype* diff) {
 31 |   CUDA_KERNEL_LOOP(i, count) {
 32 |     const int target_value = static_cast<int>(target[i]);
 33 |     if (target_value == ignore_label) {
 34 |       diff[i] = 0;
 35 |     }
 36 |   }
 37 | }
 38 | 
 39 | 
 40 | template <typename Dtype>
 41 | void BalanceCrossEntropyLossLayer<Dtype>::Forward_gpu(
 42 |     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
 43 |   Forward_cpu(bottom, top); return;
 44 |   // The forward pass computes the sigmoid outputs.
 45 |   sigmoid_bottom_vec_[0] = bottom[0];
 46 |   sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);
 47 |   // Compute the loss (negative log likelihood)
 48 |   const int count = bottom[0]->count();
 49 |   // Stable version of loss computation from input data
 50 |   const Dtype* input_data = bottom[0]->gpu_data();
 51 |   const Dtype* target = bottom[1]->gpu_data();
 52 |   // Since this memory is not used for anything until it is overwritten
 53 |   // on the backward pass, we use it here to avoid having to allocate new GPU
 54 |   // memory to accumulate intermediate results in the kernel.
 55 |   Dtype* loss_data = bottom[0]->mutable_gpu_diff();
 56 |   Dtype* count_data = bottom[1]->mutable_gpu_diff();
 57 |   Dtype valid_count;
 58 |   // NOLINT_NEXT_LINE(whitespace/operators)
 59 |   BalanceCrossEntropyLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
 60 |       CAFFE_CUDA_NUM_THREADS>>>(count, input_data, target, loss_data,
 61 |       has_ignore_label_, ignore_label_, count_data);
 62 |   // Only launch another CUDA kernel if we actually need the valid count.
 63 |   if (normalization_ == LossParameter_NormalizationMode_VALID &&
 64 |       has_ignore_label_) {
 65 |     caffe_gpu_asum(count, count_data, &valid_count);
 66 |   } else {
 67 |     valid_count = count;
 68 |   }
 69 |   Dtype loss;
 70 |   caffe_gpu_asum(count, loss_data, &loss);
 71 |   normalizer_ = get_normalizer(normalization_, valid_count);
 72 |   top[0]->mutable_cpu_data()[0] = loss / normalizer_;
 73 | }
 74 | 
 75 | template <typename Dtype>
 76 | void BalanceCrossEntropyLossLayer<Dtype>::Backward_gpu(
 77 |     const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
 78 |     const vector<Blob<Dtype>*>& bottom) {
 79 |   Backward_cpu(top, propagate_down, bottom); return;
 80 |   if (propagate_down[1]) {
 81 |     LOG(FATAL) << this->type()
 82 |                << " Layer cannot backpropagate to label inputs.";
 83 |   }
 84 |   if (propagate_down[0]) {
 85 |     // First, compute the diff
 86 |     const int count = bottom[0]->count();
 87 |     const Dtype* sigmoid_output_data = sigmoid_output_->gpu_data();
 88 |     const Dtype* target = bottom[1]->gpu_data();
 89 |     Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
 90 |     caffe_copy(count, sigmoid_output_data, bottom_diff);
 91 |     caffe_gpu_axpy(count, Dtype(-1), target, bottom_diff);
 92 |     // Zero out gradient of ignored targets.
 93 |     if (has_ignore_label_) {
 94 |       // NOLINT_NEXT_LINE(whitespace/operators)
 95 |       BalanceCrossEntropyLossIgnoreDiffGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
 96 |         CAFFE_CUDA_NUM_THREADS>>>(count, ignore_label_, target, bottom_diff);
 97 |     }
 98 |     // Scale down gradient
 99 |     Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer_;
100 |     caffe_gpu_scal(count, loss_weight, bottom_diff);
101 |   }
102 | }
103 | 
104 | INSTANTIATE_LAYER_GPU_FUNCS(BalanceCrossEntropyLossLayer);
105 | 
106 | }  // namespace caffe
107 | 


--------------------------------------------------------------------------------
/lib/balance_cross_entropy_loss_layer.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef CAFFE_BALANCE_CROSS_ENTROPY_LOSS_LAYER_HPP_
  2 | #define CAFFE_BALANCE_CROSS_ENTROPY_LOSS_LAYER_HPP_
  3 | 
  4 | #include <vector>
  5 | 
  6 | #include "caffe/blob.hpp"
  7 | #include "caffe/layer.hpp"
  8 | #include "caffe/proto/caffe.pb.h"
  9 | 
 10 | #include "caffe/layers/loss_layer.hpp"
 11 | #include "caffe/layers/sigmoid_layer.hpp"
 12 | 
 13 | namespace caffe {
 14 | 
 15 | /**
 16 |  * @brief Computes the cross-entropy (logistic) loss @f$
 17 |  *          E = \frac{-1}{n} \sum\limits_{n=1}^N \left[
 18 |  *                  p_n \log \hat{p}_n +
 19 |  *                  (1 - p_n) \log(1 - \hat{p}_n)
 20 |  *              \right]
 21 |  *        @f$, often used for predicting targets interpreted as probabilities.
 22 |  *
 23 |  * This layer is implemented rather than separate
 24 |  * SigmoidLayer + CrossEntropyLayer
 25 |  * as its gradient computation is more numerically stable.
 26 |  * At test time, this layer can be replaced simply by a SigmoidLayer.
 27 |  *
 28 |  * @param bottom input Blob vector (length 2)
 29 |  *   -# @f$ (N \times C \times H \times W) @f$
 30 |  *      the scores @f$ x \in [-\infty, +\infty]@f$,
 31 |  *      which this layer maps to probability predictions
 32 |  *      @f$ \hat{p}_n = \sigma(x_n) \in [0, 1] @f$
 33 |  *      using the sigmoid function @f$ \sigma(.) @f$ (see SigmoidLayer).
 34 |  *   -# @f$ (N \times C \times H \times W) @f$
 35 |  *      the targets @f$ y \in [0, 1] @f$
 36 |  * @param top output Blob vector (length 1)
 37 |  *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
 38 |  *      the computed cross-entropy loss: @f$
 39 |  *          E = \frac{-1}{n} \sum\limits_{n=1}^N \left[
 40 |  *                  p_n \log \hat{p}_n + (1 - p_n) \log(1 - \hat{p}_n)
 41 |  *              \right]
 42 |  *      @f$
 43 |  */
 44 | template <typename Dtype>
 45 | class BalanceCrossEntropyLossLayer : public LossLayer<Dtype> {
 46 |  public:
 47 |   explicit BalanceCrossEntropyLossLayer(const LayerParameter& param)
 48 |       : LossLayer<Dtype>(param),
 49 |           sigmoid_layer_(new SigmoidLayer<Dtype>(param)),
 50 |           sigmoid_output_(new Blob<Dtype>()) {}
 51 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 52 |       const vector<Blob<Dtype>*>& top);
 53 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
 54 |       const vector<Blob<Dtype>*>& top);
 55 | 
 56 |   virtual inline const char* type() const { return "BalanceCrossEntropyLoss"; }
 57 | 
 58 |  protected:
 59 |   /// @copydoc BalanceCrossEntropyLossLayer
 60 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 61 |       const vector<Blob<Dtype>*>& top);
 62 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 63 |       const vector<Blob<Dtype>*>& top);
 64 | 
 65 |   /**
 66 |    * @brief Computes the sigmoid cross-entropy loss error gradient w.r.t. the
 67 |    *        predictions.
 68 |    *
 69 |    * Gradients cannot be computed with respect to the target inputs (bottom[1]),
 70 |    * so this method ignores bottom[1] and requires !propagate_down[1], crashing
 71 |    * if propagate_down[1] is set.
 72 |    *
 73 |    * @param top output Blob vector (length 1), providing the error gradient with
 74 |    *      respect to the outputs
 75 |    *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
 76 |    *      This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
 77 |    *      as @f$ \lambda @f$ is the coefficient of this layer's output
 78 |    *      @f$\ell_i@f$ in the overall Net loss
 79 |    *      @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
 80 |    *      @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
 81 |    *      (*Assuming that this top Blob is not used as a bottom (input) by any
 82 |    *      other layer of the Net.)
 83 |    * @param propagate_down see Layer::Backward.
 84 |    *      propagate_down[1] must be false as gradient computation with respect
 85 |    *      to the targets is not implemented.
 86 |    * @param bottom input Blob vector (length 2)
 87 |    *   -# @f$ (N \times C \times H \times W) @f$
 88 |    *      the predictions @f$x@f$; Backward computes diff
 89 |    *      @f$ \frac{\partial E}{\partial x} =
 90 |    *          \frac{1}{n} \sum\limits_{n=1}^N (\hat{p}_n - p_n)
 91 |    *      @f$
 92 |    *   -# @f$ (N \times 1 \times 1 \times 1) @f$
 93 |    *      the labels -- ignored as we can't compute their error gradients
 94 |    */
 95 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
 96 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 97 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
 98 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 99 | 
100 |   /// Read the normalization mode parameter and compute the normalizer based
101 |   /// on the blob size.  If normalization_mode is VALID, the count of valid
102 |   /// outputs will be read from valid_count, unless it is -1 in which case
103 |   /// all outputs are assumed to be valid.
104 |   virtual Dtype get_normalizer(
105 |       LossParameter_NormalizationMode normalization_mode, int valid_count);
106 | 
107 |   /// The internal SigmoidLayer used to map predictions to probabilities.
108 |   shared_ptr<SigmoidLayer<Dtype> > sigmoid_layer_;
109 |   /// sigmoid_output stores the output of the SigmoidLayer.
110 |   shared_ptr<Blob<Dtype> > sigmoid_output_;
111 |   /// bottom vector holder to call the underlying SigmoidLayer::Forward
112 |   vector<Blob<Dtype>*> sigmoid_bottom_vec_;
113 |   /// top vector holder to call the underlying SigmoidLayer::Forward
114 |   vector<Blob<Dtype>*> sigmoid_top_vec_;
115 | 
116 |   /// Whether to ignore instances with a certain label.
117 |   bool has_ignore_label_;
118 |   /// The label indicating that an instance should be ignored.
119 |   int ignore_label_;
120 |   /// How to normalize the loss.
121 |   LossParameter_NormalizationMode normalization_;
122 |   Dtype normalizer_;
123 |   int outer_num_, inner_num_;
124 |   std::vector<Dtype> count_;
125 | };
126 | 
127 | }  // namespace caffe
128 | 
129 | #endif  // CAFFE_BALANCE_CROSS_ENTROPY_LOSS_LAYER_HPP_
130 | 


--------------------------------------------------------------------------------
/lib/balance_cross_entropy_loss_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include <vector>
  3 | 
  4 | #include "caffe/layers/balance_cross_entropy_loss_layer.hpp"
  5 | #include "caffe/util/math_functions.hpp"
  6 | 
  7 | namespace caffe {
  8 | 
  9 | template <typename Dtype>
 10 | void BalanceCrossEntropyLossLayer<Dtype>::LayerSetUp(
 11 |     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
 12 |   LossLayer<Dtype>::LayerSetUp(bottom, top);
 13 |   sigmoid_bottom_vec_.clear();
 14 |   sigmoid_bottom_vec_.push_back(bottom[0]);
 15 |   sigmoid_top_vec_.clear();
 16 |   sigmoid_top_vec_.push_back(sigmoid_output_.get());
 17 |   sigmoid_layer_->SetUp(sigmoid_bottom_vec_, sigmoid_top_vec_);
 18 |   count_.resize(2);
 19 |   has_ignore_label_ =
 20 |     this->layer_param_.loss_param().has_ignore_label();
 21 |   if (has_ignore_label_) {
 22 |     ignore_label_ = this->layer_param_.loss_param().ignore_label();
 23 |   }
 24 |   if (this->layer_param_.loss_param().has_normalization()) {
 25 |     normalization_ = this->layer_param_.loss_param().normalization();
 26 |   } else if (this->layer_param_.loss_param().has_normalize()) {
 27 |     normalization_ = this->layer_param_.loss_param().normalize() ?
 28 |                      LossParameter_NormalizationMode_VALID :
 29 |                      LossParameter_NormalizationMode_BATCH_SIZE;
 30 |   } else {
 31 |     normalization_ = LossParameter_NormalizationMode_BATCH_SIZE;
 32 |   }
 33 | }
 34 | 
 35 | template <typename Dtype>
 36 | void BalanceCrossEntropyLossLayer<Dtype>::Reshape(
 37 |     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
 38 |   LossLayer<Dtype>::Reshape(bottom, top);
 39 |   outer_num_ = bottom[0]->shape(0);  // batch size
 40 |   inner_num_ = bottom[0]->count(1);  // instance size: |output| == |target|
 41 |   CHECK_EQ(bottom[0]->count(), bottom[1]->count()) <<
 42 |       "BALANCE_CROSS_ENTROPY_LOSS layer inputs must have the same count.";
 43 |   sigmoid_layer_->Reshape(sigmoid_bottom_vec_, sigmoid_top_vec_);
 44 | }
 45 | 
 46 | template <typename Dtype>
 47 | Dtype BalanceCrossEntropyLossLayer<Dtype>::get_normalizer(
 48 |     LossParameter_NormalizationMode normalization_mode, int valid_count) {
 49 |   Dtype normalizer;
 50 |   switch (normalization_mode) {
 51 |     case LossParameter_NormalizationMode_FULL:
 52 |       normalizer = Dtype(outer_num_ * inner_num_);
 53 |       break;
 54 |     case LossParameter_NormalizationMode_VALID:
 55 |       if (valid_count == -1) {
 56 |         normalizer = Dtype(outer_num_ * inner_num_);
 57 |       } else {
 58 |         normalizer = Dtype(valid_count);
 59 |       }
 60 |       break;
 61 |     case LossParameter_NormalizationMode_BATCH_SIZE:
 62 |       normalizer = Dtype(outer_num_);
 63 |       break;
 64 |     case LossParameter_NormalizationMode_NONE:
 65 |       normalizer = Dtype(1);
 66 |       break;
 67 |     default:
 68 |       LOG(FATAL) << "Unknown normalization mode: "
 69 |           << LossParameter_NormalizationMode_Name(normalization_mode);
 70 |   }
 71 |   // Some users will have no labels for some examples in order to 'turn off' a
 72 |   // particular loss in a multi-task setup. The max prevents NaNs in that case.
 73 |   return std::max(Dtype(1.0), normalizer);
 74 | }
 75 | 
 76 | template <typename Dtype>
 77 | void BalanceCrossEntropyLossLayer<Dtype>::Forward_cpu(
 78 |     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
 79 |   // The forward pass computes the sigmoid outputs.
 80 |   sigmoid_bottom_vec_[0] = bottom[0];
 81 |   sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);
 82 |   // Compute the loss (negative log likelihood)
 83 |   // Stable version of loss computation from input data
 84 |   const Dtype* input_data = bottom[0]->cpu_data();
 85 |   const Dtype* target = bottom[1]->cpu_data();
 86 |   Dtype loss = 0, loss_pos = 0, loss_neg = 0;
 87 |   count_[0] = 0; count_[1] = 0;
 88 |   for (int i = 0; i < bottom[0]->count(); ++i) {
 89 |     const int target_value = static_cast<int>(target[i]);
 90 |     if (has_ignore_label_ && target_value == ignore_label_) {
 91 |       continue;
 92 |     }
 93 |     if (target[i] == 1) {
 94 |       loss_pos -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
 95 |           log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
 96 |       count_[1]++;
 97 |     } else if (target[i] == 0) {
 98 |       loss_neg -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
 99 |           log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
100 |       count_[0]++;
101 |     } else {
102 |       LOG(FATAL)<<"Unknown target value: " << target[i];
103 |     }
104 |   }
105 |   loss += loss_pos * count_[0] / (count_[0]+count_[1]);
106 |   loss += loss_neg * count_[1] / (count_[0]+count_[1]);
107 |   normalizer_ = get_normalizer(normalization_, count_[0]+count_[1]);
108 |   top[0]->mutable_cpu_data()[0] = loss / normalizer_;
109 | }
110 | 
111 | template <typename Dtype>
112 | void BalanceCrossEntropyLossLayer<Dtype>::Backward_cpu(
113 |     const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
114 |     const vector<Blob<Dtype>*>& bottom) {
115 |   if (propagate_down[1]) {
116 |     LOG(FATAL) << this->type()
117 |                << " Layer cannot backpropagate to label inputs.";
118 |   }
119 |   if (propagate_down[0]) {
120 |     // First, compute the diff
121 |     const int count = bottom[0]->count();
122 |     const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
123 |     const Dtype* target = bottom[1]->cpu_data();
124 |     Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
125 |     caffe_sub(count, sigmoid_output_data, target, bottom_diff);
126 |     for (int i = 0; i < count; ++i) {
127 |       // Zero out gradient of ignored targets.
128 |       if (has_ignore_label_ && target[i] == ignore_label_) {
129 |         bottom_diff[i] = 0; continue;
130 |       }
131 |       if (target[i] == 0) {
132 |         bottom_diff[i] *=  count_[1] / (count_[0]+count_[1]);
133 |       } else if (target[i] == 1) {
134 |         bottom_diff[i] *=  count_[0] / (count_[0]+count_[1]);
135 |       } else {
136 |         LOG(FATAL)<<"Unknown target value: "<<target[i];
137 |       }
138 |     }
139 |     // Scale down gradient
140 |     Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer_;
141 |     caffe_scal(count, loss_weight, bottom_diff);
142 |   }
143 | }
144 | 
145 | #ifdef CPU_ONLY
146 | STUB_GPU(BalanceCrossEntropyLossLayer);
147 | #endif
148 | 
149 | INSTANTIATE_CLASS(BalanceCrossEntropyLossLayer);
150 | REGISTER_LAYER_CLASS(BalanceCrossEntropyLoss);
151 | 
152 | }  // namespace caffe
153 | 


--------------------------------------------------------------------------------
/model/hed.py:
--------------------------------------------------------------------------------
  1 | import sys, os
  2 | sys.path.insert(0, 'caffe/python')
  3 | import caffe
  4 | from caffe import layers as L, params as P
  5 | from caffe.coord_map import crop
  6 | import numpy as np
  7 | from math import ceil
  8 | 
  9 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1, mult=[1,1,2,0]):
 10 |   conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
 11 |     num_output=nout, pad=pad, weight_filler=dict(type='xavier'), 
 12 |     param=[dict(lr_mult=mult[0], decay_mult=mult[1]), dict(lr_mult=mult[2], decay_mult=mult[3])])
 13 |   return conv, L.ReLU(conv, in_place=True)
 14 | 
 15 | def max_pool(bottom, ks=2, stride=2):
 16 |   return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
 17 | 
 18 | def conv1x1(bottom, name, lr=1):
 19 |   return L.Convolution(bottom, name=name, kernel_size=1,num_output=1,# weight_filler=dict(type='xavier'),
 20 |       param=[dict(lr_mult=0.01*lr, decay_mult=1), dict(lr_mult=0.02*lr, decay_mult=0)])
 21 | 
 22 | def upsample(bottom, stride):
 23 |   s, k, pad = stride, 2 * stride, int(ceil(stride-1)/2)
 24 |   name = "upsample%d"%s
 25 |   return L.Deconvolution(bottom, name=name, convolution_param=dict(num_output=1, 
 26 |     kernel_size=k, stride=s, pad=pad, weight_filler = dict(type="bilinear")),
 27 |       param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
 28 | 
 29 | def net(split):
 30 |   n = caffe.NetSpec()
 31 |   if split=='train':
 32 |     data_params = dict(mean=(104.00699, 116.66877, 122.67892))
 33 |     data_params['root'] = 'data/HED-BSDS'
 34 |     data_params['source'] = "train_pair.lst"
 35 |     data_params['shuffle'] = True
 36 |     data_params['ignore_label'] = -1 # ignore label
 37 |     n.data, n.label = L.Python(module='pylayer', layer='ImageLabelmapDataLayer', ntop=2, \
 38 |     param_str=str(data_params))
 39 |     loss_param = dict(normalize=False)
 40 |     if data_params.has_key('ignore_label'):
 41 |       loss_param['ignore_label'] = data_params['ignore_label']
 42 |   elif split == 'test':
 43 |     n.data = L.Input(name = 'data', input_param=dict(shape=dict(dim=[1,3,500,500])))
 44 |   else:
 45 |     raise Exception("Invalid phase")
 46 |   
 47 |   n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1)
 48 |   n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
 49 |   n.pool1 = max_pool(n.relu1_2)
 50 | 
 51 |   n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
 52 |   n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
 53 |   n.pool2 = max_pool(n.relu2_2)
 54 | 
 55 |   n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
 56 |   n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
 57 |   n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
 58 |   n.pool3 = max_pool(n.relu3_3)
 59 | 
 60 |   n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
 61 |   n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
 62 |   n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
 63 |   n.pool4 = max_pool(n.relu4_3)
 64 |   
 65 |   n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512, mult=[100,1,200,0])
 66 |   n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, mult=[100,1,200,0])
 67 |   n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, mult=[100,1,200,0])
 68 |   
 69 |   # DSN1
 70 |   n.score_dsn1=conv1x1(n.conv1_2, 'score-dsn1', lr=1)
 71 |   n.upscore_dsn1 = crop(n.score_dsn1, n.data)
 72 |   if split=='train':
 73 |     n.loss1 = L.BalanceCrossEntropyLoss(n.upscore_dsn1, n.label, loss_param=loss_param)
 74 |   else:
 75 |     n.sigmoid_dsn1 = L.Sigmoid(n.upscore_dsn1)
 76 |   # DSN2
 77 |   n.score_dsn2 = conv1x1(n.conv2_2, 'score-dsn2')
 78 |   n.score_dsn2_up = upsample(n.score_dsn2, stride=2)
 79 |   n.upscore_dsn2 = crop(n.score_dsn2_up, n.data)
 80 |   if split=='train':
 81 |     n.loss2 = L.BalanceCrossEntropyLoss(n.upscore_dsn2, n.label, loss_param=loss_param)
 82 |   else:
 83 |     n.sigmoid_dsn2 = L.Sigmoid(n.upscore_dsn2)
 84 |   # DSN3
 85 |   n.score_dsn3=conv1x1(n.conv3_3, 'score-dsn3')
 86 |   n.score_dsn3_up = upsample(n.score_dsn3, stride=4)
 87 |   n.upscore_dsn3 = crop(n.score_dsn3_up, n.data)
 88 |   if split=='train':
 89 |     n.loss3 = L.BalanceCrossEntropyLoss(n.upscore_dsn3, n.label, loss_param=loss_param)
 90 |   else:
 91 |     n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3)
 92 |   # DSN4
 93 |   n.score_dsn4 = conv1x1(n.conv4_3, 'score-dsn4')
 94 |   n.score_dsn4_up = upsample(n.score_dsn4, stride=8)
 95 |   n.upscore_dsn4 = crop(n.score_dsn4_up, n.data)
 96 |   if split=='train':
 97 |     n.loss4 = L.BalanceCrossEntropyLoss(n.upscore_dsn4, n.label, loss_param=loss_param)
 98 |   else:
 99 |     n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4)
100 |   # DSN5
101 |   n.score_dsn5=conv1x1(n.conv5_3, 'score-dsn5')
102 |   n.score_dsn5_up = upsample(n.score_dsn5, stride=16)
103 |   n.upscore_dsn5 = crop(n.score_dsn5_up, n.data)
104 |   if split=='train':
105 |     n.loss5 = L.BalanceCrossEntropyLoss(n.upscore_dsn5, n.label, loss_param=loss_param)
106 |   elif split=='test':
107 |     n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5)
108 |   # concat and fuse
109 |   n.concat_upscore = L.Concat(n.upscore_dsn1,
110 |                       n.upscore_dsn2,
111 |                       n.upscore_dsn3,
112 |                       n.upscore_dsn4,
113 |                       n.upscore_dsn5,
114 |                       name='concat', concat_param=dict({'concat_dim':1}))
115 |   n.upscore_fuse = L.Convolution(n.concat_upscore, name='new-score-weighting', 
116 |                  num_output=1, kernel_size=1,
117 |                  param=[dict(lr_mult=0.001, decay_mult=1), dict(lr_mult=0.002, decay_mult=0)],
118 |                  weight_filler=dict(type='constant', value=0.2))
119 |   if split=='test':
120 |     n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse)
121 |   else:
122 |     n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse, n.label, loss_param=loss_param)
123 |   return n.to_proto()
124 | 
125 | def make_net():
126 |   with open('model/hed_train.pt', 'w') as f:
127 |     f.write(str(net('train')))
128 |   with open('model/hed_test.pt', 'w') as f:
129 |     f.write(str(net('test')))
130 | def make_solver():
131 |   sp = {}
132 |   sp['net'] = '"model/hed_train.pt"'
133 |   sp['base_lr'] = '0.000001'
134 |   sp['lr_policy'] = '"step"'
135 |   sp['momentum'] = '0.9'
136 |   sp['weight_decay'] = '0.0002'
137 |   sp['iter_size'] = '10'
138 |   sp['stepsize'] = '10000'
139 |   sp['display'] = '10'
140 |   sp['snapshot'] = '2000'
141 |   sp['snapshot_prefix'] = '"snapshot/hed"'
142 |   sp['gamma'] = '0.1'
143 |   sp['max_iter'] = '40000'
144 |   sp['solver_mode'] = 'GPU'
145 |   f = open('model/hed_solver.pt', 'w')
146 |   for k, v in sorted(sp.items()):
147 |       if not(type(v) is str):
148 |           raise TypeError('All solver parameters must be strings')
149 |       f.write('%s: %s\n'%(k, v))
150 |   f.close()
151 | 
152 | def make_all():
153 |   make_net()
154 |   make_solver()
155 | 
156 | if __name__ == '__main__':
157 |   make_all()
158 | 


--------------------------------------------------------------------------------
/lib/test_balance_cross_entropy_loss_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include <cmath>
  2 | #include <vector>
  3 | 
  4 | #include "gtest/gtest.h"
  5 | 
  6 | #include "caffe/blob.hpp"
  7 | #include "caffe/common.hpp"
  8 | #include "caffe/filler.hpp"
  9 | #include "caffe/layers/balance_cross_entropy_loss_layer.hpp"
 10 | 
 11 | #include "caffe/test/test_caffe_main.hpp"
 12 | #include "caffe/test/test_gradient_check_util.hpp"
 13 | 
 14 | namespace caffe {
 15 | 
 16 | template <typename TypeParam>
 17 | class BalanceCrossEntropyLossLayerTest : public MultiDeviceTest<TypeParam> {
 18 |   typedef typename TypeParam::Dtype Dtype;
 19 | 
 20 |  protected:
 21 |   BalanceCrossEntropyLossLayerTest()
 22 |       : blob_bottom_data_(new Blob<Dtype>(1, 1, 3, 3)),
 23 |         blob_bottom_targets_(new Blob<Dtype>(1, 1, 3, 3)),
 24 |         blob_top_loss_(new Blob<Dtype>()) {
 25 |     // Fill the data vector
 26 |     FillerParameter data_filler_param;
 27 |     data_filler_param.set_std(1);
 28 |     GaussianFiller<Dtype> data_filler(data_filler_param);
 29 |     data_filler.Fill(blob_bottom_data_);
 30 |     blob_bottom_vec_.push_back(blob_bottom_data_);
 31 |     // Fill the targets vector
 32 |     FillerParameter targets_filler_param;
 33 |     targets_filler_param.set_min(0);
 34 |     targets_filler_param.set_max(1);
 35 |     UniformFiller<Dtype> targets_filler(targets_filler_param);
 36 |     targets_filler.Fill(blob_bottom_targets_);
 37 |     for (int i=0; i<blob_bottom_targets_->count(); ++i) {
 38 |       if (blob_bottom_targets_->cpu_data()[i] >= 0.5)
 39 |         blob_bottom_targets_->mutable_cpu_data()[i] = 1;
 40 |       else
 41 |         blob_bottom_targets_->mutable_cpu_data()[i] = 0;
 42 |     }
 43 |     blob_bottom_vec_.push_back(blob_bottom_targets_);
 44 |     blob_top_vec_.push_back(blob_top_loss_);
 45 |   }
 46 |   virtual ~BalanceCrossEntropyLossLayerTest() {
 47 |     delete blob_bottom_data_;
 48 |     delete blob_bottom_targets_;
 49 |     delete blob_top_loss_;
 50 |   }
 51 | 
 52 |   Dtype BalanceCrossEntropyLossReference(const int count, const int num,
 53 |                                          const Dtype* input,
 54 |                                          const Dtype* target) {
 55 |     Dtype loss = 0;
 56 |     for (int i = 0; i < count; ++i) {
 57 |       const Dtype prediction = 1 / (1 + exp(-input[i]));
 58 |       EXPECT_LE(prediction, 1);
 59 |       EXPECT_GE(prediction, 0);
 60 |       EXPECT_LE(target[i], 1);
 61 |       EXPECT_GE(target[i], 0);
 62 |       loss -= target[i] * log(prediction + (target[i] == Dtype(0)));
 63 |       loss -= (1 - target[i]) * log(1 - prediction + (target[i] == Dtype(1)));
 64 |     }
 65 |     return loss / num;
 66 |   }
 67 | 
 68 |   void TestForward() {
 69 |     LayerParameter layer_param;
 70 |     const Dtype kLossWeight = 3.7;
 71 |     layer_param.add_loss_weight(kLossWeight);
 72 |     FillerParameter data_filler_param;
 73 |     data_filler_param.set_std(1);
 74 |     GaussianFiller<Dtype> data_filler(data_filler_param);
 75 |     FillerParameter targets_filler_param;
 76 |     targets_filler_param.set_min(0.0);
 77 |     targets_filler_param.set_max(1.0);
 78 |     UniformFiller<Dtype> targets_filler(targets_filler_param);
 79 |     Dtype eps = 2e-2;
 80 |     for (int i = 0; i < 100; ++i) {
 81 |       // Fill the data vector
 82 |       data_filler.Fill(this->blob_bottom_data_);
 83 |       // Fill the targets vector
 84 |       targets_filler.Fill(this->blob_bottom_targets_);
 85 |       for (int k=0; k<blob_bottom_targets_->count(); ++k){
 86 |         if (blob_bottom_targets_->cpu_data()[k] <= 0.5)
 87 |           blob_bottom_targets_->mutable_cpu_data()[k] = 0;
 88 |         else
 89 |           blob_bottom_targets_->mutable_cpu_data()[k] = 1;
 90 |       }
 91 |       BalanceCrossEntropyLossLayer<Dtype> layer(layer_param);
 92 |       layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
 93 |       Dtype layer_loss =
 94 |           layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
 95 |       const int count = this->blob_bottom_data_->count();
 96 |       const int num = this->blob_bottom_data_->num();
 97 |       const Dtype* blob_bottom_data = this->blob_bottom_data_->cpu_data();
 98 |       const Dtype* blob_bottom_targets =
 99 |           this->blob_bottom_targets_->cpu_data();
100 |       Dtype reference_loss = kLossWeight * BalanceCrossEntropyLossReference(
101 |           count, num, blob_bottom_data, blob_bottom_targets);
102 |       EXPECT_NEAR(reference_loss, layer_loss, eps) << "debug: trial #" << i;
103 |     }
104 |   }
105 | 
106 |   Blob<Dtype>* const blob_bottom_data_;
107 |   Blob<Dtype>* const blob_bottom_targets_;
108 |   Blob<Dtype>* const blob_top_loss_;
109 |   vector<Blob<Dtype>*> blob_bottom_vec_;
110 |   vector<Blob<Dtype>*> blob_top_vec_;
111 | };
112 | 
113 | TYPED_TEST_CASE(BalanceCrossEntropyLossLayerTest, TestDtypesAndDevices);
114 | 
115 | // TYPED_TEST(BalanceCrossEntropyLossLayerTest, TestBalanceCrossEntropyLoss) {
116 | //   this->TestForward();
117 | // }
118 | 
119 | TYPED_TEST(BalanceCrossEntropyLossLayerTest, TestGradient) {
120 |   typedef typename TypeParam::Dtype Dtype;
121 |   LayerParameter layer_param;
122 |   const Dtype kLossWeight = 3.7;
123 |   layer_param.add_loss_weight(kLossWeight);
124 |   BalanceCrossEntropyLossLayer<Dtype> layer(layer_param);
125 |   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
126 |   GradientChecker<Dtype> checker(1e-2, 1e-2, 1701);
127 |   checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
128 |       this->blob_top_vec_, 0);
129 | }
130 | 
131 | TYPED_TEST(BalanceCrossEntropyLossLayerTest, TestIgnoreGradient) {
132 |   typedef typename TypeParam::Dtype Dtype;
133 |   FillerParameter data_filler_param;
134 |   data_filler_param.set_std(1);
135 |   GaussianFiller<Dtype> data_filler(data_filler_param);
136 |   data_filler.Fill(this->blob_bottom_data_);
137 |   LayerParameter layer_param;
138 |   LossParameter* loss_param = layer_param.mutable_loss_param();
139 |   loss_param->set_ignore_label(-1);
140 |   Dtype* target = this->blob_bottom_targets_->mutable_cpu_data();
141 |   const int count = this->blob_bottom_targets_->count();
142 |   // Ignore half of targets, then check that diff of this half is zero,
143 |   // while the other half is nonzero.
144 |   caffe_set(count / 2, Dtype(-1), target);
145 |   BalanceCrossEntropyLossLayer<Dtype> layer(layer_param);
146 |   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
147 |   layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
148 |   vector<bool> propagate_down(2);
149 |   propagate_down[0] = true;
150 |   propagate_down[1] = false;
151 |   layer.Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
152 |   const Dtype* diff = this->blob_bottom_data_->cpu_diff();
153 |   for (int i = 0; i < count / 2; ++i) {
154 |     EXPECT_FLOAT_EQ(diff[i], 0.);
155 |     EXPECT_NE(diff[i + count / 2], 0.);
156 |   }
157 | }
158 | 
159 | 
160 | }  // namespace caffe
161 | 


--------------------------------------------------------------------------------
/model/rcf.py:
--------------------------------------------------------------------------------
  1 | import sys, os
  2 | sys.path.insert(0, 'caffe/python')
  3 | import caffe
  4 | from caffe import layers as L, params as P
  5 | from caffe.coord_map import crop
  6 | import numpy as np
  7 | from math import ceil
  8 | 
  9 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1, mult=[1,1,2,0]):
 10 |   conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
 11 |     num_output=nout, pad=pad, weight_filler=dict(type='xavier'), 
 12 |     param=[dict(lr_mult=mult[0], decay_mult=mult[1]), dict(lr_mult=mult[2], decay_mult=mult[3])])
 13 |   return conv, L.ReLU(conv, in_place=True)
 14 | 
 15 | def max_pool(bottom, ks=2, stride=2):
 16 |   return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
 17 | 
 18 | def conv1x1(bottom, lr=[0.01, 1, 0.02, 0], wf=dict(type="constant")):
 19 | 	return L.Convolution(bottom, kernel_size=1,num_output=1, weight_filler=wf,
 20 |       param=[dict(lr_mult=lr[0], decay_mult=lr[1]), dict(lr_mult=lr[2], decay_mult=lr[3])])
 21 | 
 22 | def upsample(bottom, stride):
 23 |   s, k, pad = stride, 2 * stride, int(ceil(stride-1)/2)
 24 |   name = "upsample%d"%s
 25 |   return L.Deconvolution(bottom, name=name, convolution_param=dict(num_output=1, 
 26 |     kernel_size=k, stride=s, pad=pad, weight_filler = dict(type="bilinear")),
 27 |       param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
 28 | 
 29 | def net(split):
 30 |   n = caffe.NetSpec()
 31 |   # loss_param = dict(normalization=P.Loss.VALID)
 32 |   loss_param = dict(normalize=False)
 33 |   if split=='train':
 34 |     data_params = dict(mean=(104.00699, 116.66877, 122.67892))
 35 |     data_params['root'] = 'data/HED-BSDS_PASCAL'
 36 |     data_params['source'] = "bsds_pascal_train_pair.lst"
 37 |     data_params['shuffle'] = True
 38 |     data_params['ignore_label'] = -1
 39 |     n.data, n.label = L.Python(module='pylayer', layer='ImageLabelmapDataLayer', ntop=2, \
 40 |     param_str=str(data_params))
 41 |     if data_params.has_key('ignore_label'):
 42 |       loss_param['ignore_label'] = int(data_params['ignore_label'])
 43 |   elif split == 'test':
 44 |     n.data = L.Input(name = 'data', input_param=dict(shape=dict(dim=[1,3,500,500])))
 45 |   else:
 46 |     raise Exception("Invalid phase")
 47 | 
 48 |   n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1)
 49 |   n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
 50 |   n.pool1 = max_pool(n.relu1_2)
 51 | 
 52 |   n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
 53 |   n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
 54 |   n.pool2 = max_pool(n.relu2_2)
 55 | 
 56 |   n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
 57 |   n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
 58 |   n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
 59 |   n.pool3 = max_pool(n.relu3_3)
 60 | 
 61 |   n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
 62 |   n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
 63 |   n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
 64 |   n.pool4 = max_pool(n.relu4_3)
 65 |   
 66 |   n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512, mult=[100,1,200,0])
 67 |   n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, mult=[100,1,200,0])
 68 |   n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, mult=[100,1,200,0])
 69 |   
 70 |   # DSN1
 71 |   n.w1_1 = conv1x1(n.conv1_1, lr=[0.1, 1, 0.2, 0])
 72 |   n.w1_2 = conv1x1(n.conv1_2, lr=[0.1, 1, 0.2, 0])
 73 |   n.fuse1 = L.Eltwise(n.w1_1, n.w1_2, operation=P.Eltwise.SUM)
 74 |   n.score_dsn1 = conv1x1(n.fuse1, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01))
 75 |   n.upscore_dsn1 = crop(n.score_dsn1, n.data)
 76 |   if split=='train':
 77 |     n.loss1 = L.BalanceCrossEntropyLoss(n.upscore_dsn1, n.label, loss_param=loss_param)
 78 |   if split=='test':
 79 |     n.sigmoid_dsn1 = L.Sigmoid(n.upscore_dsn1)
 80 |   # DSN2
 81 |   n.w2_1 = conv1x1(n.conv2_1, lr=[0.1, 1, 0.2, 0])
 82 |   n.w2_2 = conv1x1(n.conv2_1, lr=[0.1, 1, 0.2, 0])
 83 |   n.fuse2 = L.Eltwise(n.w2_1, n.w2_2, operation=P.Eltwise.SUM)
 84 |   n.score_dsn2 = conv1x1(n.fuse2, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01))
 85 |   n.score_dsn2_up = upsample(n.score_dsn2, stride=2)
 86 |   n.upscore_dsn2 = crop(n.score_dsn2_up, n.data)
 87 |   if split=='train':
 88 |     n.loss2 = L.BalanceCrossEntropyLoss(n.upscore_dsn2, n.label, loss_param=loss_param)
 89 |   if split=='test':
 90 |     n.sigmoid_dsn2 = L.Sigmoid(n.upscore_dsn2)
 91 |   # DSN3
 92 |   n.w3_1 = conv1x1(n.conv3_1, lr=[0.1, 1, 0.2, 0])
 93 |   n.w3_2 = conv1x1(n.conv3_2, lr=[0.1, 1, 0.2, 0])
 94 |   n.w3_3 = conv1x1(n.conv3_3, lr=[0.1, 1, 0.2, 0])
 95 |   n.fuse3 = L.Eltwise(n.w3_1, n.w3_2, n.w3_3, operation=P.Eltwise.SUM)
 96 |   n.score_dsn3 = conv1x1(n.fuse3, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01))
 97 |   n.score_dsn3_up = upsample(n.score_dsn3, stride=4)
 98 |   n.upscore_dsn3 = crop(n.score_dsn3_up, n.data)
 99 |   if split=='train':
100 |     n.loss3 = L.BalanceCrossEntropyLoss(n.upscore_dsn3, n.label, loss_param=loss_param)
101 |   if split=='test':
102 |     n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3)
103 |   # DSN4
104 |   n.w4_1 = conv1x1(n.conv4_1, lr=[0.1, 1, 0.2, 0])
105 |   n.w4_2 = conv1x1(n.conv4_2, lr=[0.1, 1, 0.2, 0])
106 |   n.w4_3 = conv1x1(n.conv4_3, lr=[0.1, 1, 0.2, 0])
107 |   n.fuse4 = L.Eltwise(n.w4_1, n.w4_2, n.w4_3, operation=P.Eltwise.SUM)
108 |   n.score_dsn4 = conv1x1(n.fuse4, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01))
109 |   n.score_dsn4_up = upsample(n.score_dsn4, stride=8)
110 |   n.upscore_dsn4 = crop(n.score_dsn4_up, n.data)
111 |   if split=='train':
112 |     n.loss4 = L.BalanceCrossEntropyLoss(n.upscore_dsn4, n.label, loss_param=loss_param)
113 |   if split=='test':
114 |     n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4)
115 |   # DSN5
116 |   n.w5_1 = conv1x1(n.conv5_1, lr=[0.1, 1, 0.2, 0])
117 |   n.w5_2 = conv1x1(n.conv5_2, lr=[0.1, 1, 0.2, 0])
118 |   n.w5_3 = conv1x1(n.conv5_3, lr=[0.1, 1, 0.2, 0])
119 |   n.fuse5 = L.Eltwise(n.w5_1, n.w5_2, n.w5_3, operation=P.Eltwise.SUM)
120 |   n.score_dsn5 = conv1x1(n.fuse5, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01))
121 |   n.score_dsn5_up = upsample(n.score_dsn5, stride=16)
122 |   n.upscore_dsn5 = crop(n.score_dsn5_up, n.data)
123 |   if split=='train':
124 |     n.loss5 = L.BalanceCrossEntropyLoss(n.upscore_dsn5, n.label, loss_param=loss_param)
125 |   elif split=='test':
126 |     n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5)
127 |   # concat and fuse
128 |   n.concat_upscore = L.Concat(n.upscore_dsn1,
129 |                       n.upscore_dsn2,
130 |                       n.upscore_dsn3,
131 |                       n.upscore_dsn4,
132 |                       n.upscore_dsn5,
133 |                       name='concat', concat_param=dict({'concat_dim':1}))
134 |   n.upscore_fuse = L.Convolution(n.concat_upscore, name='new-score-weighting', 
135 |                  num_output=1, kernel_size=1,
136 |                  param=[dict(lr_mult=0.001, decay_mult=1), dict(lr_mult=0.002, decay_mult=0)],
137 |                  weight_filler=dict(type='constant', value=0.2))
138 |   if split=='test':
139 |     n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse)
140 |   if split=='train':
141 |     n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse, n.label, loss_param=loss_param)
142 |   return n.to_proto()
143 | 
144 | def make_net():
145 |   with open('model/rcf_train.pt', 'w') as f:
146 |     f.write(str(net('train')))
147 |   with open('model/rcf_test.pt', 'w') as f:
148 |     f.write(str(net('test')))
149 | def make_solver():
150 |   sp = {}
151 |   sp['net'] = '"model/rcf_train.pt"'
152 |   sp['base_lr'] = '0.000001'
153 |   sp['lr_policy'] = '"step"'
154 |   sp['momentum'] = '0.9'
155 |   sp['weight_decay'] = '0.0002'
156 |   sp['iter_size'] = '10'
157 |   sp['stepsize'] = '20000'
158 |   sp['display'] = '10'
159 |   sp['snapshot'] = '2000'
160 |   sp['snapshot_prefix'] = '"snapshot/rcf"'
161 |   sp['gamma'] = '0.1'
162 |   sp['max_iter'] = '40000'
163 |   sp['solver_mode'] = 'GPU'
164 |   f = open('model/rcf_solver.pt', 'w')
165 |   for k, v in sorted(sp.items()):
166 |       if not(type(v) is str):
167 |           raise TypeError('All solver parameters must be strings')
168 |       f.write('%s: %s\n'%(k, v))
169 |   f.close()
170 | 
171 | def make_all():
172 |   make_net()
173 |   make_solver()
174 | 
175 | if __name__ == '__main__':
176 |   make_all()
177 | 


--------------------------------------------------------------------------------
/model/h1.py:
--------------------------------------------------------------------------------
  1 | import sys, os, argparse
  2 | from os.path import join, isdir, isfile, split
  3 | sys.path.insert(0, 'caffe/python')
  4 | import caffe
  5 | from caffe import layers as L, params as P
  6 | from caffe.coord_map import crop
  7 | import numpy as np
  8 | from math import ceil
  9 | parser = argparse.ArgumentParser(description='Training hed.')
 10 | parser.add_argument('--nfeat', type=int, help='number features', default=1)
 11 | parser.add_argument('--cat', type=str, help='cat or elt-sum', default='False')
 12 | parser.add_argument('--bias', type=bool, default=True)
 13 | args = parser.parse_args()
 14 | def str2bool(str1):
 15 |   str1 = str(str1).lower()
 16 |   if "false" in str1 or "0" in str1:
 17 |     return False
 18 |   else:
 19 |     return True
 20 | args.cat = str2bool(args.cat)
 21 | tmp_dir = 'tmp'
 22 | if not isdir(tmp_dir):
 23 |   os.makedirs(tmp_dir)
 24 | 
 25 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1, mult=[1,1,2,0]):
 26 |   conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
 27 |     num_output=nout, pad=pad, weight_filler=dict(type='msra'), 
 28 |     param=[dict(lr_mult=mult[0], decay_mult=mult[1]), dict(lr_mult=mult[2], decay_mult=mult[3])])
 29 |   return conv, L.ReLU(conv, in_place=True)
 30 | 
 31 | def max_pool(bottom, ks=2, stride=2):
 32 |   return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
 33 | 
 34 | def conv1x1(bottom, nout=1, lr=[0.01, 1, 0.02, 0], wf=dict(type="constant")):
 35 |   if args.bias:
 36 |     return L.Convolution(bottom, kernel_size=1, num_output=nout, weight_filler=wf,
 37 |         param=[dict(lr_mult=lr[0], decay_mult=lr[1]), dict(lr_mult=lr[2], decay_mult=lr[3])])
 38 |   else:
 39 |     return L.Convolution(bottom, kernel_size=1, num_output=nout, weight_filler=wf,
 40 |         bias_term=False, param=[dict(lr_mult=lr[0], decay_mult=lr[1])])
 41 | 
 42 | def upsample(bottom, stride, nout=1, name=None):
 43 |   s, k, pad = stride, 2 * stride, int(ceil(stride-1)/2)
 44 |   if not name:
 45 |     name = "upsample%d"%s
 46 |   return L.Deconvolution(bottom, name=name, convolution_param=dict(num_output=nout, bias_term=False, 
 47 |     kernel_size=k, stride=s, pad=pad, weight_filler = dict(type="bilinear"), group=nout), 
 48 |     param=[dict(lr_mult=0, decay_mult=0)])
 49 | 
 50 | def net(split):
 51 |   n = caffe.NetSpec()
 52 |   loss_param = dict(normalization=P.Loss.VALID)
 53 |   # loss_param = dict(normalize=False)
 54 |   if split=='train':
 55 |     data_params = dict(mean=(104.00699, 116.66877, 122.67892))
 56 |     #data_params['root'] = 'data/HED-BSDS_PASCAL'
 57 |     data_params['root'] = 'data/PASCAL-Context-Edge/'
 58 |     data_params['source'] = "train_pair.lst"
 59 |     data_params['shuffle'] = True
 60 |     #data_params['ignore_label'] = -1
 61 |     n.data, n.label = L.Python(module='pylayer', layer='ImageLabelmapDataLayer', ntop=2, \
 62 |     param_str=str(data_params))
 63 |     if data_params.has_key('ignore_label'):
 64 |       loss_param['ignore_label'] = int(data_params['ignore_label'])
 65 |   elif split == 'test':
 66 |     n.data = L.Input(name = 'data', input_param=dict(shape=dict(dim=[1,3,200,200])))
 67 |   else:
 68 |     raise Exception("Invalid phase")
 69 | 
 70 |   n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1)
 71 |   n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
 72 |   n.pool1 = max_pool(n.relu1_2)
 73 | 
 74 |   n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
 75 |   n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
 76 |   n.pool2 = max_pool(n.relu2_2)
 77 | 
 78 |   n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
 79 |   n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
 80 |   n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
 81 |   n.pool3 = max_pool(n.relu3_3)
 82 | 
 83 |   n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
 84 |   n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
 85 |   n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
 86 |   n.pool4 = max_pool(n.relu4_3)
 87 |   
 88 |   n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512, mult=[100,1,200,0])
 89 |   n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, mult=[100,1,200,0])
 90 |   n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, mult=[100,1,200,0])
 91 |   ## w1
 92 |   n.w1_1top = conv1x1(n.conv1_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 93 |   n.w1_2top = conv1x1(n.conv1_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 94 |   ## w2
 95 |   n.w2_1top = conv1x1(n.conv2_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 96 |   n.w2_2top = conv1x1(n.conv2_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 97 |   n.w2_1down = conv1x1(n.conv2_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 98 |   n.w2_2down = conv1x1(n.conv2_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 99 |   ## w3
100 |   n.w3_1top = conv1x1(n.conv3_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
101 |   n.w3_2top = conv1x1(n.conv3_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
102 |   n.w3_3top = conv1x1(n.conv3_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
103 |   n.w3_1down = conv1x1(n.conv3_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
104 |   n.w3_2down = conv1x1(n.conv3_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
105 |   n.w3_3down = conv1x1(n.conv3_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
106 |   ## w4
107 |   n.w4_1top = conv1x1(n.conv4_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
108 |   n.w4_2top = conv1x1(n.conv4_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
109 |   n.w4_3top = conv1x1(n.conv4_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
110 |   n.w4_1down = conv1x1(n.conv4_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
111 |   n.w4_2down = conv1x1(n.conv4_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
112 |   n.w4_3down = conv1x1(n.conv4_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
113 |   ## w5
114 |   n.w5_1down = conv1x1(n.conv5_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
115 |   n.w5_2down = conv1x1(n.conv5_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
116 |   n.w5_3down = conv1x1(n.conv5_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
117 | 
118 |   ## upsample wx_xdown
119 |   n.w2_1down_up = upsample(n.w2_1down, nout=args.nfeat, stride=2, name='upsample2_1')
120 |   n.w2_2down_up = upsample(n.w2_2down, nout=args.nfeat, stride=2, name='upsample2_2')
121 |   
122 |   n.w3_1down_up = upsample(n.w3_1down, nout=args.nfeat, stride=2, name='upsample3_1')
123 |   n.w3_2down_up = upsample(n.w3_2down, nout=args.nfeat, stride=2, name='upsample3_2')
124 |   n.w3_3down_up = upsample(n.w3_3down, nout=args.nfeat, stride=2, name='upsample3_3')
125 |   
126 |   n.w4_1down_up = upsample(n.w4_1down, nout=args.nfeat, stride=2, name='upsample4_1')
127 |   n.w4_2down_up = upsample(n.w4_2down, nout=args.nfeat, stride=2, name='upsample4_2')
128 |   n.w4_3down_up = upsample(n.w4_3down, nout=args.nfeat, stride=2, name='upsample4_3')
129 |   
130 |   n.w5_1down_up = upsample(n.w5_1down, nout=args.nfeat, stride=2, name='upsample5_1')
131 |   n.w5_2down_up = upsample(n.w5_2down, nout=args.nfeat, stride=2, name='upsample5_2')
132 |   n.w5_3down_up = upsample(n.w5_3down, nout=args.nfeat, stride=2, name='upsample5_3')
133 |   
134 |   ## crop wx_xdown_up
135 |   n.w2_1down_up_crop = crop(n.w2_1down_up, n.w1_1top)
136 |   n.w2_2down_up_crop = crop(n.w2_2down_up, n.w1_1top)
137 |   
138 |   n.w3_1down_up_crop = crop(n.w3_1down_up, n.w2_1top)
139 |   n.w3_2down_up_crop = crop(n.w3_2down_up, n.w2_1top)
140 |   n.w3_3down_up_crop = crop(n.w3_3down_up, n.w2_1top)
141 |   
142 |   n.w4_1down_up_crop = crop(n.w4_1down_up, n.w3_1top)
143 |   n.w4_2down_up_crop = crop(n.w4_2down_up, n.w3_1top)
144 |   n.w4_3down_up_crop = crop(n.w4_3down_up, n.w3_1top)
145 |   
146 |   n.w5_1down_up_crop = crop(n.w5_1down_up, n.w4_1top)
147 |   n.w5_2down_up_crop = crop(n.w5_2down_up, n.w4_1top)
148 |   n.w5_3down_up_crop = crop(n.w5_3down_up, n.w4_1top)
149 |   ## fuse
150 |   if args.cat:
151 |     n.h1s1_2 = L.Concat(n.w1_1top, n.w1_2top, n.w2_1down_up_crop, n.w2_2down_up_crop)
152 |     n.h1s2_3 = L.Concat(n.w2_1top, n.w2_2top, n.w3_1down_up_crop, n.w3_2down_up_crop, n.w3_3down_up_crop)
153 |     n.h1s3_4 = L.Concat(n.w3_1top, n.w3_2top, n.w3_3top, \
154 |                          n.w4_1down_up_crop, n.w4_2down_up_crop, n.w4_3down_up_crop)
155 |     n.h1s4_5 = L.Concat(n.w4_1top, n.w4_2top, n.w4_3top, \
156 |                          n.w5_1down_up_crop, n.w5_2down_up_crop, n.w5_3down_up_crop)
157 |     # n.h1s1_2 = conv1x1(n.h1s1_2cat, lr=[0.01, 1, 0.02, 0], wf=dict(type='constant', value=1))
158 |     # n.h1s2_3 = conv1x1(n.h1s2_3cat, lr=[0.01, 1, 0.02, 0], wf=dict(type='constant', value=1))
159 |     # n.h1s3_4 = conv1x1(n.h1s3_4cat, lr=[0.01, 1, 0.02, 0], wf=dict(type='constant', value=1))
160 |     # n.h1s4_5 = conv1x1(n.h1s4_5cat, lr=[0.01, 1, 0.02, 0], wf=dict(type='constant', value=1))
161 |   else:
162 |     n.h1s1_2 = L.Eltwise(n.w1_1top, n.w1_2top, n.w2_1down_up_crop, n.w2_2down_up_crop)
163 |     n.h1s2_3 = L.Eltwise(n.w2_1top, n.w2_2top, n.w3_1down_up_crop, n.w3_2down_up_crop, n.w3_3down_up_crop)
164 |     n.h1s3_4 = L.Eltwise(n.w3_1top, n.w3_2top, n.w3_3top, \
165 |                          n.w4_1down_up_crop, n.w4_2down_up_crop, n.w4_3down_up_crop)
166 |     n.h1s4_5 = L.Eltwise(n.w4_1top, n.w4_2top, n.w4_3top, \
167 |                          n.w5_1down_up_crop, n.w5_2down_up_crop, n.w5_3down_up_crop)
168 |   ## score h1sx_x
169 |   n.score_h1s1_2 = conv1x1(n.h1s1_2, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01))
170 |   n.score_h1s2_3 = conv1x1(n.h1s2_3, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01))
171 |   n.score_h1s3_4 = conv1x1(n.h1s3_4, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01))
172 |   n.score_h1s4_5 = conv1x1(n.h1s4_5, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01))
173 |   ## upsample score
174 |   n.upscore_h1s2_3 = upsample(n.score_h1s2_3, stride=2, name='upscore_h1s2_3')
175 |   n.upscore_h1s3_4 = upsample(n.score_h1s3_4, stride=4, name='upscore_h1s3_4')
176 |   n.upscore_h1s4_5 = upsample(n.score_h1s4_5, stride=8, name='upscore_h1s4_5')
177 |   ## crop upscore_h1sx_x
178 |   n.crop_h1s1_2 = crop(n.score_h1s1_2, n.data)
179 |   n.crop_h1s2_3 = crop(n.upscore_h1s2_3, n.data)
180 |   n.crop_h1s3_4 = crop(n.upscore_h1s3_4, n.data)
181 |   n.crop_h1s4_5 = crop(n.upscore_h1s4_5, n.data)
182 |   ## fuse
183 |   n.h1_concat = L.Concat(n.crop_h1s1_2,
184 |                       n.crop_h1s2_3,
185 |                       n.crop_h1s3_4,
186 |                       n.crop_h1s4_5,
187 |                       concat_param=dict({'concat_dim':1}))
188 |   n.h1_fuse = conv1x1(n.h1_concat, lr=[0.001, 1, 0.002, 0], wf=dict(type='constant', value=float(1)/4))
189 |   if split == 'train':
190 |     n.loss_h1s1_2 = L.BalanceCrossEntropyLoss(n.crop_h1s1_2, n.label, loss_param=loss_param)
191 |     n.loss_h1s2_3 = L.BalanceCrossEntropyLoss(n.crop_h1s2_3, n.label, loss_param=loss_param)
192 |     n.loss_h1s3_4 = L.BalanceCrossEntropyLoss(n.crop_h1s3_4, n.label, loss_param=loss_param)
193 |     n.loss_h1s4_5 = L.BalanceCrossEntropyLoss(n.crop_h1s4_5, n.label, loss_param=loss_param)
194 |     n.loss_h1_fuse = L.BalanceCrossEntropyLoss(n.h1_fuse, n.label, loss_param=loss_param)
195 |   else:
196 |     n.sigmoid_h1s1_2 = L.Sigmoid(n.crop_h1s1_2)
197 |     n.sigmoid_h1s2_3 = L.Sigmoid(n.crop_h1s2_3)
198 |     n.sigmoid_h1s3_4 = L.Sigmoid(n.crop_h1s3_4)
199 |     n.sigmoid_h1s4_5 = L.Sigmoid(n.crop_h1s4_5)
200 |     n.sigmoid_h1_fuse = L.Sigmoid(n.h1_fuse)
201 |   return n.to_proto()
202 | 
203 | def make_net():
204 |   fpath = join(tmp_dir, "h1feat%d_train.pt"%args.nfeat)
205 |   with open(fpath, 'w') as f:
206 |     f.write(str(net('train')))
207 |   fpath = join(tmp_dir, "h1feat%d_test.pt"%args.nfeat)
208 |   with open(fpath, 'w') as f:
209 |     f.write(str(net('test')))
210 | def make_solver():
211 |   sp = {}
212 |   fpath = join(tmp_dir, "h1feat%d_train.pt"%args.nfeat)
213 |   sp['net'] = '"' + fpath + '"'
214 |   sp['base_lr'] = '0.01'
215 |   sp['lr_policy'] = '"step"'
216 |   sp['momentum'] = '0.9'
217 |   sp['weight_decay'] = '0.0002'
218 |   sp['iter_size'] = '10'
219 |   sp['stepsize'] = '20000'
220 |   sp['display'] = '10'
221 |   sp['snapshot'] = '2000'
222 |   sp['snapshot_prefix'] = '"snapshot/h1feat%d"'%args.nfeat
223 |   sp['gamma'] = '0.1'
224 |   sp['max_iter'] = '40000'
225 |   sp['solver_mode'] = 'GPU'
226 |   fpath = join(tmp_dir, "h1feat%d_solver.pt"%args.nfeat)
227 |   f = open(fpath, 'w')
228 |   for k, v in sorted(sp.items()):
229 |       if not(type(v) is str):
230 |           raise TypeError('All solver parameters must be strings')
231 |       f.write('%s: %s\n'%(k, v))
232 |   f.close()
233 | 
234 | def make_all():
235 |   make_net()
236 |   make_solver()
237 | 
238 | if __name__ == '__main__':
239 |   make_all()
240 | 


--------------------------------------------------------------------------------
/model/h2.py:
--------------------------------------------------------------------------------
  1 | import sys, os, argparse
  2 | from os.path import join, isdir, isfile, split
  3 | sys.path.insert(0, 'caffe/python')
  4 | import caffe
  5 | from caffe import layers as L, params as P
  6 | from caffe.coord_map import crop
  7 | import numpy as np
  8 | from math import ceil
  9 | parser = argparse.ArgumentParser(description='Training hed.')
 10 | parser.add_argument('--nfeat', type=int, help='number features', default=11)
 11 | parser.add_argument('--bias', type=bool, default=True)
 12 | args = parser.parse_args()
 13 | tmp_dir = 'tmp'
 14 | if not isdir(tmp_dir):
 15 |   os.makedirs(tmp_dir)
 16 | 
 17 | def conv_relu(bottom, nout, ks=3, stride=1, pad=1, mult=[1,1,2,0]):
 18 |   conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
 19 |     num_output=nout, pad=pad, weight_filler=dict(type='msra'), 
 20 |     param=[dict(lr_mult=mult[0], decay_mult=mult[1]), dict(lr_mult=mult[2], decay_mult=mult[3])])
 21 |   return conv, L.ReLU(conv, in_place=True)
 22 | 
 23 | def max_pool(bottom, ks=2, stride=2):
 24 |   return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
 25 | 
 26 | def conv1x1(bottom, nout=1, lr=[0.01, 1, 0.02, 0], wf=dict(type="constant")):
 27 |   if args.bias:
 28 |     return L.Convolution(bottom, kernel_size=1, num_output=nout, weight_filler=wf,
 29 |         param=[dict(lr_mult=lr[0], decay_mult=lr[1]), dict(lr_mult=lr[2], decay_mult=lr[3])])
 30 |   else:
 31 |     return L.Convolution(bottom, kernel_size=1, num_output=nout, weight_filler=wf,
 32 |         bias_term=False, param=[dict(lr_mult=lr[0], decay_mult=lr[1])])
 33 | 
 34 | def upsample(bottom, stride, nout=1, name=None):
 35 |   s, k, pad = stride, 2 * stride, int(ceil(stride-1)/2)
 36 |   if not name:
 37 |     name = "upsample%d"%s
 38 |   return L.Deconvolution(bottom, name=name, convolution_param=dict(num_output=nout, bias_term=False,
 39 |     kernel_size=k, stride=s, pad=pad, weight_filler = dict(type="bilinear"), group=nout), 
 40 |     param=[dict(lr_mult=0, decay_mult=0)])
 41 | 
 42 | def net(split):
 43 |   n = caffe.NetSpec()
 44 |   # loss_param = dict(normalization=P.Loss.VALID)
 45 |   loss_param = dict(normalize=False)
 46 |   if split=='train':
 47 |     data_params = dict(mean=(104.00699, 116.66877, 122.67892))
 48 |     data_params['root'] = 'data/HED-BSDS_PASCAL'
 49 |     data_params['source'] = "bsds_pascal_train_pair.lst"
 50 |     data_params['shuffle'] = True
 51 |     data_params['ignore_label'] = -1
 52 |     n.data, n.label = L.Python(module='pylayer', layer='ImageLabelmapDataLayer', ntop=2, \
 53 |     param_str=str(data_params))
 54 |     if data_params.has_key('ignore_label'):
 55 |       loss_param['ignore_label'] = int(data_params['ignore_label'])
 56 |   elif split == 'test':
 57 |     n.data = L.Input(name = 'data', input_param=dict(shape=dict(dim=[1,3,200,200])))
 58 |   else:
 59 |     raise Exception("Invalid phase")
 60 | 
 61 |   n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1)
 62 |   n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
 63 |   n.pool1 = max_pool(n.relu1_2)
 64 | 
 65 |   n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
 66 |   n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
 67 |   n.pool2 = max_pool(n.relu2_2)
 68 | 
 69 |   n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
 70 |   n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
 71 |   n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
 72 |   n.pool3 = max_pool(n.relu3_3)
 73 | 
 74 |   n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
 75 |   n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
 76 |   n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
 77 |   n.pool4 = max_pool(n.relu4_3)
 78 |   
 79 |   n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512, mult=[100,1,200,0])
 80 |   n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, mult=[100,1,200,0])
 81 |   n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, mult=[100,1,200,0])
 82 |   ## w1
 83 |   n.w1_1top = conv1x1(n.conv1_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 84 |   n.w1_2top = conv1x1(n.conv1_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 85 |   ## w2
 86 |   n.w2_1top = conv1x1(n.conv2_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 87 |   n.w2_2top = conv1x1(n.conv2_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 88 |   n.w2_1down = conv1x1(n.conv2_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 89 |   n.w2_2down = conv1x1(n.conv2_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 90 |   ## w3
 91 |   n.w3_1top = conv1x1(n.conv3_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 92 |   n.w3_2top = conv1x1(n.conv3_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 93 |   n.w3_3top = conv1x1(n.conv3_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 94 |   n.w3_1down = conv1x1(n.conv3_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 95 |   n.w3_2down = conv1x1(n.conv3_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 96 |   n.w3_3down = conv1x1(n.conv3_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 97 |   ## w4
 98 |   n.w4_1top = conv1x1(n.conv4_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
 99 |   n.w4_2top = conv1x1(n.conv4_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
100 |   n.w4_3top = conv1x1(n.conv4_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
101 |   n.w4_1down = conv1x1(n.conv4_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
102 |   n.w4_2down = conv1x1(n.conv4_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
103 |   n.w4_3down = conv1x1(n.conv4_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
104 |   ## w5
105 |   n.w5_1down = conv1x1(n.conv5_1, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
106 |   n.w5_2down = conv1x1(n.conv5_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
107 |   n.w5_3down = conv1x1(n.conv5_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
108 | 
109 |   ## upsample wx_xdown
110 |   n.w2_1down_up = upsample(n.w2_1down, nout=args.nfeat, stride=2, name='upsample2_1')
111 |   n.w2_2down_up = upsample(n.w2_2down, nout=args.nfeat, stride=2, name='upsample2_2')
112 |   
113 |   n.w3_1down_up = upsample(n.w3_1down, nout=args.nfeat, stride=2, name='upsample3_1')
114 |   n.w3_2down_up = upsample(n.w3_2down, nout=args.nfeat, stride=2, name='upsample3_2')
115 |   n.w3_3down_up = upsample(n.w3_3down, nout=args.nfeat, stride=2, name='upsample3_3')
116 |   
117 |   n.w4_1down_up = upsample(n.w4_1down, nout=args.nfeat, stride=2, name='upsample4_1')
118 |   n.w4_2down_up = upsample(n.w4_2down, nout=args.nfeat, stride=2, name='upsample4_2')
119 |   n.w4_3down_up = upsample(n.w4_3down, nout=args.nfeat, stride=2, name='upsample4_3')
120 |   
121 |   n.w5_1down_up = upsample(n.w5_1down, nout=args.nfeat, stride=2, name='upsample5_1')
122 |   n.w5_2down_up = upsample(n.w5_2down, nout=args.nfeat, stride=2, name='upsample5_2')
123 |   n.w5_3down_up = upsample(n.w5_3down, nout=args.nfeat, stride=2, name='upsample5_3')
124 |   
125 |   ## crop wx_xdown_up
126 |   n.w2_1down_up_crop = crop(n.w2_1down_up, n.w1_1top)
127 |   n.w2_2down_up_crop = crop(n.w2_2down_up, n.w1_1top)
128 |   
129 |   n.w3_1down_up_crop = crop(n.w3_1down_up, n.w2_1top)
130 |   n.w3_2down_up_crop = crop(n.w3_2down_up, n.w2_1top)
131 |   n.w3_3down_up_crop = crop(n.w3_3down_up, n.w2_1top)
132 |   
133 |   n.w4_1down_up_crop = crop(n.w4_1down_up, n.w3_1top)
134 |   n.w4_2down_up_crop = crop(n.w4_2down_up, n.w3_1top)
135 |   n.w4_3down_up_crop = crop(n.w4_3down_up, n.w3_1top)
136 |   
137 |   n.w5_1down_up_crop = crop(n.w5_1down_up, n.w4_1top)
138 |   n.w5_2down_up_crop = crop(n.w5_2down_up, n.w4_1top)
139 |   n.w5_3down_up_crop = crop(n.w5_3down_up, n.w4_1top)
140 | 
141 |   ## fuse
142 |   n.h1s1_2 = L.Eltwise(n.w1_1top, n.w1_2top, n.w2_1down_up_crop, n.w2_2down_up_crop)
143 |   
144 |   n.h1s2_3 = L.Eltwise(n.w2_1top, n.w2_2top, n.w3_1down_up_crop, n.w3_2down_up_crop, n.w3_3down_up_crop)
145 |   
146 |   n.h1s3_4 = L.Eltwise(n.w3_1top, n.w3_2top, n.w3_3top, \
147 |   	         n.w4_1down_up_crop, n.w4_2down_up_crop, n.w4_3down_up_crop)
148 |   
149 |   n.h1s4_5 = L.Eltwise(n.w4_1top, n.w4_2top, n.w4_3top, \
150 |   	         n.w5_1down_up_crop, n.w5_2down_up_crop, n.w5_3down_up_crop)
151 |   
152 |   ## score h1sx_x
153 |   n.score_h1s1_2 = conv1x1(n.h1s1_2, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001))
154 |   n.score_h1s2_3 = conv1x1(n.h1s2_3, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001))
155 |   n.score_h1s3_4 = conv1x1(n.h1s3_4, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001))
156 |   n.score_h1s4_5 = conv1x1(n.h1s4_5, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001))
157 |   ## upsample score
158 |   n.upscore_h1s2_3 = upsample(n.score_h1s2_3, stride=2, name='upscore_h1s2_3')
159 |   n.upscore_h1s3_4 = upsample(n.score_h1s3_4, stride=4, name='upscore_h1s2_4')
160 |   n.upscore_h1s4_5 = upsample(n.score_h1s4_5, stride=8, name='upscore_h1s4_5')
161 |   ## crop upscore_h1sx_x
162 |   n.crop_h1s1_2 = crop(n.score_h1s1_2, n.data)
163 |   n.crop_h1s2_3 = crop(n.upscore_h1s2_3, n.data)
164 |   n.crop_h1s3_4 = crop(n.upscore_h1s3_4, n.data)
165 |   n.crop_h1s4_5 = crop(n.upscore_h1s4_5, n.data)
166 |   ## fuse
167 |   n.h1_concat = L.Concat(n.crop_h1s1_2,
168 |                       n.crop_h1s2_3,
169 |                       n.crop_h1s3_4,
170 |                       n.crop_h1s4_5,
171 |                       concat_param=dict({'concat_dim':1}))
172 |   n.h1_fuse = conv1x1(n.h1_concat, lr=[0.001, 1, 0.002, 0], wf=dict(type='constant', value=float(1)/4))
173 |   if split == 'train':
174 |   	n.loss_h1s1_2 = L.BalanceCrossEntropyLoss(n.crop_h1s1_2, n.label, loss_param=loss_param)
175 |   	n.loss_h1s2_3 = L.BalanceCrossEntropyLoss(n.crop_h1s2_3, n.label, loss_param=loss_param)
176 |   	n.loss_h1s3_4 = L.BalanceCrossEntropyLoss(n.crop_h1s3_4, n.label, loss_param=loss_param)
177 |   	n.loss_h1s4_5 = L.BalanceCrossEntropyLoss(n.crop_h1s4_5, n.label, loss_param=loss_param)
178 |   	n.loss_h1_fuse = L.BalanceCrossEntropyLoss(n.h1_fuse, n.label, loss_param=loss_param)
179 |   else:
180 |   	n.sigmoid_h1s1_2 = L.Sigmoid(n.crop_h1s1_2)
181 |   	n.sigmoid_h1s2_3 = L.Sigmoid(n.crop_h1s2_3)
182 |   	n.sigmoid_h1s3_4 = L.Sigmoid(n.crop_h1s3_4)
183 |   	n.sigmoid_h1s4_5 = L.Sigmoid(n.crop_h1s4_5)
184 |   	n.sigmoid_h1_fuse = L.Sigmoid(n.h1_fuse)
185 |   ## H2: conv h1sx_x for H2 fusing
186 |   n.h1s1_2top  = conv1x1(n.h1s1_2, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
187 |   n.h1s2_3top  = conv1x1(n.h1s2_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
188 |   n.h1s2_3down = conv1x1(n.h1s2_3, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
189 |   n.h1s3_4top  = conv1x1(n.h1s3_4, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
190 |   n.h1s3_4down = conv1x1(n.h1s3_4, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
191 |   n.h1s4_5down = conv1x1(n.h1s4_5, nout=args.nfeat, lr=[0.1, 1, 0.2, 0], wf=dict(type='gaussian', std=0.001))
192 |   ## upsample H2
193 |   n.h1s2_3upsample = upsample(n.h1s2_3down, nout=args.nfeat, stride=2, name='upsample_h1s2_3')
194 |   n.h1s3_4upsample = upsample(n.h1s3_4down, nout=args.nfeat, stride=2, name='upsample_h1s3_4')
195 |   n.h1s4_5upsample = upsample(n.h1s4_5down, nout=args.nfeat, stride=2, name='upsample_h1s4_5')
196 |   ## Crop H2
197 |   n.h1s2_3crop = crop(n.h1s2_3upsample, n.h1s1_2top)
198 |   n.h1s3_4crop = crop(n.h1s3_4upsample, n.h1s2_3top)
199 |   n.h1s4_5crop = crop(n.h1s4_5upsample, n.h1s3_4top)
200 |   ## fuse H2
201 |   n.h2s1_2_3 = L.Eltwise(n.h1s1_2top, n.h1s2_3crop)
202 |   n.h2s2_3_4 = L.Eltwise(n.h1s2_3top, n.h1s3_4crop)
203 |   n.h2s3_4_5 = L.Eltwise(n.h1s3_4top, n.h1s4_5crop)
204 |   ## score H2
205 |   n.score_h2s1_2_3 = conv1x1(n.h2s1_2_3, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001))
206 |   n.score_h2s2_3_4 = conv1x1(n.h2s2_3_4, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001))
207 |   n.score_h2s3_4_5 = conv1x1(n.h2s3_4_5, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.001))
208 |   ## upsample H2 score
209 |   n.score_h2s2_3_4upsample = upsample(n.score_h2s2_3_4, stride=2, name='upscore_h2s2_3_4')
210 |   n.score_h2s3_4_5upsample = upsample(n.score_h2s3_4_5, stride=4, name='upscore_h2s3_4_5')
211 |   ## Crop H2 score
212 |   n.score_h2s1_2_3crop = crop(n.score_h2s1_2_3, n.data)
213 |   n.score_h2s2_3_4crop = crop(n.score_h2s2_3_4upsample, n.data)
214 |   n.score_h2s3_4_5crop = crop(n.score_h2s3_4_5upsample, n.data)
215 |   # concat H2
216 |   n.h2_concat = L.Concat(n.score_h2s1_2_3crop, n.score_h2s2_3_4crop, n.score_h2s3_4_5crop,\
217 |                         concat_param=dict({'concat_dim':1}))
218 |   n.h2_fuse = conv1x1(n.h2_concat, lr=[0.001, 1, 0.002, 0], wf=dict(type='constant', value=0.333))
219 |   if split == 'train':
220 |   	n.loss_h2s1_2_3 = L.BalanceCrossEntropyLoss(n.score_h2s1_2_3crop, n.label, loss_param=loss_param)
221 |   	n.loss_h2s2_3_4 = L.BalanceCrossEntropyLoss(n.score_h2s2_3_4crop, n.label, loss_param=loss_param)
222 |   	n.loss_h2s3_4_5 = L.BalanceCrossEntropyLoss(n.score_h2s3_4_5crop, n.label, loss_param=loss_param)
223 |   	n.loss_h2_fuse = L.BalanceCrossEntropyLoss(n.h2_fuse, n.label, loss_param=loss_param)
224 |   else:
225 |   	n.sigmoid_h2s1_2_3 = L.Sigmoid(n.score_h2s1_2_3crop)
226 |   	n.sigmoid_h2s2_3_4 = L.Sigmoid(n.score_h2s2_3_4crop)
227 |   	n.sigmoid_h2s3_4_5 = L.Sigmoid(n.score_h2s3_4_5crop)
228 |   	n.sigmoid_h2_fuse = L.Sigmoid(n.h2_fuse)
229 |   # Concat H1 and H2
230 |   n.h1h2_concat = L.Concat(n.score_h2s1_2_3crop, n.score_h2s2_3_4crop, n.score_h2s3_4_5crop,
231 |                            n.crop_h1s1_2, n.crop_h1s2_3, n.crop_h1s3_4, n.crop_h1s4_5,
232 |                            concat_param=dict({'concat_dim': 1}))
233 |   n.h1h2_fuse = conv1x1(n.h1h2_concat, lr=[0.001, 1, 0.002, 0], wf=dict(type='constant', value=float(1)/7))
234 |   if split == 'train':
235 |     n.loss_h1h2_fuse = L.BalanceCrossEntropyLoss(n.h1h2_fuse, n.label, loss_param=loss_param)
236 |   else:
237 |     n.sigmoid_h1h2_fuse = L.Sigmoid(n.h1h2_fuse)
238 |   return n.to_proto()
239 | 
240 | def make_net():
241 |   fpath = join(tmp_dir, "h2feat%d_train.pt"%args.nfeat)
242 |   with open(fpath, 'w') as f:
243 |     f.write(str(net('train')))
244 |   fpath = join(tmp_dir, "h2feat%d_test.pt"%args.nfeat)
245 |   with open(fpath, 'w') as f:
246 |     f.write(str(net('test')))
247 | def make_solver():
248 |   sp = {}
249 |   fpath = join(tmp_dir, "h1feat%d_train.pt"%args.nfeat)
250 |   sp['net'] = '"' + fpath + '"'
251 |   sp['base_lr'] = '0.000001'
252 |   sp['lr_policy'] = '"step"'
253 |   sp['momentum'] = '0.9'
254 |   sp['weight_decay'] = '0.0002'
255 |   sp['iter_size'] = '10'
256 |   sp['stepsize'] = '20000'
257 |   sp['display'] = '10'
258 |   sp['snapshot'] = '2000'
259 |   sp['snapshot_prefix'] = '"snapshot/h2feat%d"'%args.nfeat
260 |   sp['gamma'] = '0.1'
261 |   sp['max_iter'] = '40000'
262 |   sp['solver_mode'] = 'GPU'
263 |   fpath = join(tmp_dir, "h2feat%d_solver.pt"%args.nfeat)
264 |   f = open(fpath, 'w')
265 |   for k, v in sorted(sp.items()):
266 |       if not(type(v) is str):
267 |           raise TypeError('All solver parameters must be strings')
268 |       f.write('%s: %s\n'%(k, v))
269 |   f.close()
270 | 
271 | def make_all():
272 |   make_net()
273 |   make_solver()
274 | 
275 | if __name__ == '__main__':
276 |   make_all()
277 | 


--------------------------------------------------------------------------------