├── README.md
├── caffe
    ├── examples
    │   └── mpelu
    │   │   ├── 15layer_imagenet
    │   │       ├── layer15_elu_taylor_BNoScale.prototxt
    │   │       ├── layer15_lrelu_gauss_BNoScale_slope0.1.prototxt
    │   │       ├── layer15_lrelu_gauss_BNoScale_slope0.25.prototxt
    │   │       ├── layer15_lrelu_gauss_BNoScale_slope0.5.prototxt
    │   │       ├── layer15_lrelu_gauss_BNoScale_slope1.prototxt
    │   │       ├── layer15_mpelu_gauss_BNoScale_alpha0.25_decay1_beta1_decay1.prototxt
    │   │       ├── layer15_mpelu_gauss_BNoScale_alpha0_decay1_beta1_decay1.prototxt
    │   │       ├── layer15_mpelu_gauss_BNoScale_alpha1_decay1_beta1_decay1.prototxt
    │   │       ├── layer15_mpelu_taylor_BNoScale_alpha0.25_decay2_beta1_decay2.prototxt
    │   │       ├── layer15_mpelu_taylor_BNoScale_alpha0_decay2_beta1_decay2.prototxt
    │   │       ├── layer15_mpelu_taylor_BNoScale_alpha1_decay1_beta1_decay1.prototxt
    │   │       ├── layer15_prelu_gauss_BNoScale_alpha0.25_decay1.prototxt
    │   │       ├── layer15_prelu_gauss_BNoScale_alpha0_decay0.prototxt
    │   │       ├── layer15_prelu_gauss_BNoScale_alpha0_decay1_better_than_decay0.prototxt
    │   │       ├── layer15_prelu_gauss_BNoScale_alpha1_decay1.prototxt
    │   │       ├── layer15_prelu_msra_BNoScale_alpha0.25_decay1.prototxt
    │   │       ├── layer15_prelu_msra_BNoScale_alpha0_decay1.prototxt
    │   │       ├── layer15_prelu_msra_BNoScale_alpha1_decay1.prototxt
    │   │       ├── layer15_relu_gauss_BNoScale.prototxt
    │   │       ├── layer15_relu_msra_BNoScale.prototxt
    │   │       ├── solver_elu.prototxt
    │   │       ├── solver_lrelu.prototxt
    │   │       ├── solver_mpelu.prototxt
    │   │       ├── solver_prelu.prototxt
    │   │       └── solver_relu.prototxt
    │   │   ├── 30layer_imagenet
    │   │       ├── README.md
    │   │       ├── layer30_elu_taylor_FAN_IN_withoutBN.prototxt
    │   │       ├── layer30_mpelu_gauss_withoutBN.prototxt
    │   │       ├── layer30_mpelu_taylor_FAN_IN_withoutBN.prototxt
    │   │       ├── layer30_mpelu_taylor_FAN_IN_withoutBN_alpha0.25_decay1_beta1_decay1.prototxt
    │   │       ├── layer30_prelu_msra_FAN_IN_withoutBN.prototxt
    │   │       ├── layer30_relu_msra_FAN_IN_withoutBN.prototxt
    │   │       ├── solver_elu.prototxt
    │   │       ├── solver_mpelu.prototxt
    │   │       ├── solver_prelu.prototxt
    │   │       └── solver_relu.prototxt
    │   │   ├── 50layer_imagenet
    │   │       ├── README.MD
    │   │       ├── layer52_elu_lsuv_withoutBN.prototxt
    │   │       ├── layer52_elu_taylor_FAN_IN_withoutBN.prototxt
    │   │       ├── lsuv_init_python2.py
    │   │       ├── lsuv_init_python3.py
    │   │       └── solver_elu.prototxt
    │   │   └── network_in_network_cifar10
    │   │       ├── README.MD
    │   │       ├── nin_elu_gauss.prototxt
    │   │       ├── nin_m2pelu_gauss.prototxt
    │   │       ├── nin_mpelu_gauss.prototxt
    │   │       ├── nin_prelu_gauss.prototxt
    │   │       ├── nin_relu_gauss.prototxt
    │   │       └── train.sh
    ├── include
    │   ├── filler.hpp
    │   └── layers
    │   │   └── m2pelu_layer.hpp
    └── src
    │   └── caffe
    │       ├── layers
    │           ├── m2pelu_layer.cpp
    │           └── m2pelu_layer.cu
    │       └── proto
    │           └── caffe.proto
├── examples
    └── mnist_mpelu.py
├── mpelu_nopre_resnet
    ├── models
    │   └── mpelu-preactivation-nopre.lua
    └── train.lua
├── pytorch
    ├── mpelu.cpp
    ├── mpelu.h
    ├── mpelu.py
    ├── mpelu_kernel.cu
    └── setup.py
└── torch
    ├── extra
        ├── cunn
        │   └── lib
        │   │   └── THCUNN
        │   │       ├── MPELU.cu
        │   │       ├── SPELU.cu
        │   │       └── generic
        │   │           ├── MPELU.cu
        │   │           ├── SPELU.cu
        │   │           └── THCUNN.h
        └── nn
        │   ├── MPELU.lua
        │   ├── SPELU.lua
        │   ├── init.lua
        │   └── lib
        │       └── THNN
        │           ├── generic
        │               ├── MPELU.c
        │               ├── SPELU.c
        │               └── THNN.h
        │           └── init.c
    └── models
        └── MPELU-NoPre-ResNet.jpg


/README.md:
--------------------------------------------------------------------------------
  1 | ## Updates
  2 | - November 2, 2023: Added support for Mixed Precision
  3 | - March 14, 2023: Added support for PyTorch (latest for pytorch 2.1.0)
  4 | 
  5 | ## Code-for-MPELU
  6 | Code for Improving Deep Neural Network with Multiple Parametric Exponential Linear Units, [arXiv:1606.00305](https://arxiv.org/abs/1606.00305)
  7 | 
  8 | The main contributions are:
  9 | 
 10 | - A new activation function, MPELU, which is a unified form of ReLU, PReLU and ELU.
 11 | - A weight initialization method for both ReLU-like and ELU-like networks. If used with the ReLU nework, it reduces to Kaiming initialization.
 12 | - A network architecture that is more effective than the original Pre-/ResNet.
 13 | 
 14 | #### Citation
 15 | ```
 16 | @article{LI201811,
 17 | 		title = "Improving deep neural network with Multiple Parametric Exponential Linear Units",
 18 | 		journal = "Neurocomputing",
 19 | 		volume = "301",
 20 | 		pages = "11 - 24",
 21 | 		year = "2018",
 22 | 		issn = "0925-2312",
 23 | 		doi = "https://doi.org/10.1016/j.neucom.2018.01.084",
 24 | 		author = "Yang Li and Chunxiao Fan and Yong Li and Qiong Wu and Yue Ming"
 25 | }
 26 | ```
 27 | 
 28 | ## Deep MPELU residual architecture
 29 | 
 30 | MPELU nopre bottleneck architecture:
 31 | 
 32 | ![img](torch/models/MPELU-NoPre-ResNet.jpg)
 33 | 
 34 | ## Experiments on CIFAR-10/100
 35 | 
 36 | MPELU is initialized with alpha = 0.25 or 1 and beta = 1. The learning rate multipliers of alpha and beta are 5. The weight decay multipliers of alpha and beta are 5 or 10. The results are reported as best(mean ± std).
 37 | 
 38 | MPELU nopre ResNet | depth | #params | CIFAR-10 | CIFAR-100
 39 | -------|:--------:|:--------:|:--------:|:--------:|
 40 | alpha = 1; beta = 1 | 164 | 1.696M | 4.58 (4.67 ± 0.06) | 21.35 (21.78 ± 0.33)
 41 | alpha = 1; beta = 1 | 1001 | 10.28M | 3.63 (3.78 ± 0.09) | 18.96 (19.08 ± 0.16)
 42 | alpha = 0.25; beta = 1 | 164 | 1.696M | 4.43 (4.53 ± 0.12) | 21.69 (21.88 ± 0.19)
 43 | alpha = 0.25; beta = 1 | 1001 | 10.28M | **3.57 (3.71 ± 0.11)** | **18.81 (18.98 ± 0.19)**
 44 | 
 45 | The experimental results in paper were conducted in torch7. But we also provide `pytorch` and `caffe` implementations. If you want to use the torch7 version to replicate our results, please follow the steps below: 
 46 | 
 47 | 1. Install [fb.resnet.troch](https://github.com/facebook/fb.resnet.torch)
 48 | 2. Follow our instructions to install MPELU in torch.
 49 | 2. Copy files in `mpelu_nopre_resnet` to `fb.resnet.torch` and overwrite the original files.
 50 | 3. Run the following command to train a 1001-layer MPELU nopre ResNet
 51 | 
 52 | ```
 53 | th main.lua -netType mpelu-preactivation-nopre -depth 1001 -batchSize 64 -nGPU 2 -nThreads 12 -dataset cifar10 -nEpochs 300 -shortcutType B -shareGradInput false -optnet true | tee checkpoints/log.txt
 54 | ```
 55 | 
 56 | ## Installation
 57 | We now provide [PyTorch](https://pytorch.org/), [Caffe](https://github.com/BVLC/caffe) and [Torch7](http://torch.ch/)(deprecated) implementations.
 58 | 
 59 | ### PyTorch
 60 | 
 61 | The pytorch version is implemented using CUDA for fast computation. The code has been tested in Ubuntu 20.04 with CUDA 11.6. The implementation is isolated from your PyTorch library and does not modify any other Python packages installed on your system. It can be installed and uninstalled independently using the `pip` package manager, and therefore can be used alongside your original PyTorch library without interfering with its functionality. You may integrate them into your projects as needed.
 62 | 
 63 | 1) `cd ./pytorch`
 64 | 
 65 | 2) `pip install .`
 66 | 
 67 | ### Caffe:
 68 | 
 69 | 1) Download the latest `caffe` from [https://github.com/BVLC/caffe](https://github.com/BVLC/caffe)
 70 | 
 71 | 2) Move `caffe/*` of this repo to the `caffe` directory and follow the [instruction](http://caffe.berkeleyvision.org/installation.html) to compile.
 72 | 
 73 | ### Torch7:
 74 | 
 75 | 1) Update `torch` to the latest version. This is necessary because of [#346](https://github.com/torch/cunn/pull/346).
 76 | 
 77 | 2) Move `torch/extra` in this repo to the official torch directory and overwrite the corresponding files.
 78 | 
 79 | 3) Run the following command to compile new layers.
 80 | 
 81 | ```
 82 | cd torch/extra/nn/
 83 | luarocks make rocks/nn-scm-1.rockspec
 84 | cd torch/extra/cunn/
 85 | luarocks make rocks/cunn-scm-1.rockspec
 86 | ```
 87 | 
 88 | ## Usage
 89 | ### PyTorch
 90 | 
 91 | Examples:
 92 | ```
 93 | # install MPELU first, then
 94 | python examples/mnist_mpelu.py
 95 | ```
 96 | 
 97 | To use the MPELU module in a neural network, you can import it from the mpelu module and then use it as a regular PyTorch module in your network definition.
 98 | 
 99 | For example, let's say you have defined the MPELU module in a file called mpelu.py. To use it in a neural network, you can do the following:
100 | 
101 | ```
102 | import torch
103 | from mpelu import MPELU
104 | 
105 | class MyNet(torch.nn.Module):
106 |     def __init__(self):
107 |         super(MyNet, self).__init__()
108 | 
109 |         self.conv1 = torch.nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
110 |         self.mpelu1 = MPELU(16)
111 |         self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
112 |         self.mpelu2 = MPELU(32)
113 |         self.fc = torch.nn.Linear(32 * 8 * 8, 10)
114 | 
115 |     def forward(self, x):
116 |         x = self.conv1(x)
117 |         x = self.mpelu1(x)
118 |         x = self.conv2(x)
119 |         x = self.mpelu2(x)
120 |         x = x.view(-1, 32 * 8 * 8)
121 |         x = self.fc(x)
122 |         return x
123 | ```
124 | 
125 | ### Caffe:
126 | 
127 | **MPELU**:
128 | In caffe, MPELU exists as the `M2PELU` layer, where `2` for `two parameters` alpha and beta which are both initialized as 1 in default.
129 | To simply use this layer, replace `type: "ReLU"` with `type: "M2PELU"` in network defination files.
130 | 
131 | **Taylor filler**:
132 | First, replace the keyword `gaussian` or `MSRA` with `taylor` in the `weight_filler` domain. Then, Add two new lines to specify values of `alpha` and `beta`:
133 | 
134 | ```
135 | weight_filler {
136 |       type: "taylor"
137 |       alpha: 1
138 |       beta: 1
139 | }
140 | ```
141 | See the examples for details.
142 | 
143 | 
144 | ### Torch7
145 | 
146 | I implemented two activation functions, `SPELU` and `MPELU`, where `SPELU` is a trimmed version of MPELU and can also be seen as a learnable `ELU`.
147 | 
148 | ```
149 | nn.SPELU(alpha=1, nOutputPlane=0)
150 | nn.MPELU(alpha=1, beta=1, nOutputPlane=0)
151 | ```
152 | 
153 | - When `nOutputPlane = 0`, the `channel-shared` version will be used. 
154 | - When `nOutputPlane` is set to the number of feature maps, the `channel-wise` version will be used.
155 | 
156 | To set the multipliers of weight decay for `MPELU`, use the `nnlr` package.
157 | 
158 | ```
159 | $ luarocks install nnlr
160 | ```
161 | 
162 | ```
163 | require 'nnlr'
164 | 
165 | nn.MPELU(alpha, beta, channels):learningRate('weight', lr_alpha):weightDecay('weight', wd_alpha)
166 |                                :learningRate('bias', lr_beta):weightDecay('bias', wd_beta)
167 | ```
168 | 
169 | **Taylor filler**: Please check our examples in `mpelu_nopre_resnet`.
170 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/15layer_imagenet/solver_elu.prototxt:
--------------------------------------------------------------------------------
 1 | net: "layer15_elu_taylor_BNoScale.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | test_initialization: false
 5 | base_lr: 0.01
 6 | lr_policy: "multistep"
 7 | gamma: 0.1
 8 | stepvalue: 100000
 9 | stepvalue: 600000
10 | display: 20
11 | max_iter: 750000
12 | momentum: 0.9
13 | weight_decay: 0.0005
14 | snapshot: 10000
15 | snapshot_prefix: "layer15_elu_taylor_BNoScale"
16 | solver_mode: GPU
17 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/15layer_imagenet/solver_lrelu.prototxt:
--------------------------------------------------------------------------------
 1 | net: "layer15_lrelu_gauss_BN.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | base_lr: 0.01
 5 | lr_policy: "multistep"
 6 | gamma: 0.1
 7 | stepvalue: 100000
 8 | stepvalue: 600000
 9 | display: 20
10 | max_iter: 750000
11 | momentum: 0.9
12 | weight_decay: 0.0005
13 | snapshot: 10000
14 | snapshot_prefix: "layer15_lrelu_gauss_BNoScale"
15 | solver_mode: GPU
16 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/15layer_imagenet/solver_mpelu.prototxt:
--------------------------------------------------------------------------------
 1 | net: "layer15_m2pelu_taylor_BN_alpha0_decay1_beta1_decay1.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | test_initialization: false
 5 | base_lr: 0.01
 6 | lr_policy: "multistep"
 7 | gamma: 0.1
 8 | stepvalue: 100000
 9 | stepvalue: 600000
10 | display: 20
11 | max_iter: 750000
12 | momentum: 0.9
13 | weight_decay: 0.0005
14 | snapshot: 10000
15 | snapshot_prefix: "layer15_m2pelu_taylor_BN_alpha0_decay1_beta1_decay1"
16 | solver_mode: GPU
17 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/15layer_imagenet/solver_prelu.prototxt:
--------------------------------------------------------------------------------
 1 | net: "layer15_prelu_msra_BNoScale_alpha0.25_decay1.prototxt"
 2 | #test_iter: 1000
 3 | #test_interval: 1000
 4 | #test_initialization: false
 5 | base_lr: 0.01
 6 | lr_policy: "multistep"
 7 | gamma: 0.1
 8 | stepvalue: 100000
 9 | stepvalue: 600000
10 | display: 20
11 | max_iter: 750000
12 | momentum: 0.9
13 | weight_decay: 0.0005
14 | snapshot: 10000
15 | snapshot_prefix: "layer15_prelu_msra_BNoScale_alpha0.25_decay1"
16 | solver_mode: GPU
17 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/15layer_imagenet/solver_relu.prototxt:
--------------------------------------------------------------------------------
 1 | net: "layer15_relu_msra_BNoScale.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | test_initialization: false
 5 | base_lr: 0.01
 6 | lr_policy: "multistep"
 7 | gamma: 0.1
 8 | stepvalue: 100000
 9 | stepvalue: 600000
10 | display: 20
11 | max_iter: 750000
12 | momentum: 0.9
13 | weight_decay: 0.0005
14 | snapshot: 10000
15 | snapshot_prefix: "layer15_relu_msra_BNoScale"
16 | solver_mode: GPU
17 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/30layer_imagenet/README.md:
--------------------------------------------------------------------------------
 1 | # Comparison of Convergence among activation functions
 2 | 
 3 | 30-layer network without BatchNorm
 4 | 
 5 | |   Mode  | ReLU | PReLU | ELU | MPELU |
 6 | | --------|:----:|:-----:|:---:|:-----:|
 7 | | AVERAGE |  X   |   X   |  Y  |   Y   |
 8 | | FAN_IN  |  X   |   X   |  Y  |   Y   |
 9 | | FAN_OUT |  X   |   Y   |  Y  |   Y   |
10 | 
11 | 
12 | - X: fail to converge
13 | - Y: converge


--------------------------------------------------------------------------------
/caffe/examples/mpelu/30layer_imagenet/layer30_relu_msra_FAN_IN_withoutBN.prototxt:
--------------------------------------------------------------------------------
   1 | name: "CaffeNet"
   2 | layer {
   3 |   name: "data"
   4 |   type: "Data"
   5 |   top: "data"
   6 |   top: "label"
   7 |   include {
   8 |     phase: TRAIN
   9 |   }
  10 |   transform_param {
  11 |     mirror: true
  12 |     crop_size: 224
  13 |     mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
  14 |   }
  15 | # mean pixel / channel-wise mean instead of mean image
  16 | #  transform_param {
  17 | #    crop_size: 224
  18 | #    mean_value: 104
  19 | #    mean_value: 117
  20 | #    mean_value: 123
  21 | #    mirror: true
  22 | #  }
  23 |   data_param {
  24 |     source: "../../Datasets/ILSVRC2012/ilsvrc12_train_lmdb"
  25 |     batch_size: 64
  26 |     backend: LMDB
  27 |   }
  28 |   image_data_param {
  29 |     shuffle: true
  30 |   }
  31 | }
  32 | layer {
  33 |   name: "data"
  34 |   type: "Data"
  35 |   top: "data"
  36 |   top: "label"
  37 |   include {
  38 |     phase: TEST
  39 |   }
  40 |   transform_param {
  41 |     mirror: false
  42 |     crop_size: 224
  43 |     mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
  44 |   }
  45 | # mean pixel / channel-wise mean instead of mean image
  46 | #  transform_param {
  47 | #    crop_size: 227
  48 | #    mean_value: 104
  49 | #    mean_value: 117
  50 | #    mean_value: 123
  51 | #    mirror: false
  52 | #  }
  53 |   data_param {
  54 |     source: "../../Datasets/ILSVRC2012/ilsvrc12_val_lmdb"
  55 |     batch_size: 50
  56 |     backend: LMDB
  57 |   }
  58 |   image_data_param {
  59 |     shuffle: true
  60 |   }
  61 | }
  62 | layer {
  63 |   name: "conv1"
  64 |   type: "Convolution"
  65 |   bottom: "data"
  66 |   top: "conv1"
  67 |   param {
  68 |     lr_mult: 1
  69 |     decay_mult: 1
  70 |   }
  71 |   param {
  72 |     lr_mult: 2
  73 |     decay_mult: 0
  74 |   }
  75 |   convolution_param {
  76 |     num_output: 64
  77 |     kernel_size: 7
  78 |     stride: 2
  79 |     weight_filler {
  80 |       type: "msra"
  81 |       variance_norm: FAN_IN
  82 |     }
  83 |     bias_filler {
  84 |       type: "constant"
  85 |       value: 0
  86 |     }
  87 |   }
  88 | }
  89 | layer {
  90 |   name: "relu_conv1"
  91 |   type: "ReLU"
  92 |   bottom: "conv1"
  93 |   top: "conv1"
  94 | }
  95 | layer {
  96 |   name: "pool1"
  97 |   type: "Pooling"
  98 |   bottom: "conv1"
  99 |   top: "pool1"
 100 |   pooling_param {
 101 |     pool: MAX
 102 |     kernel_size: 3
 103 |     stride: 3
 104 |   }
 105 | }
 106 | layer {
 107 |   name: "conv2"
 108 |   type: "Convolution"
 109 |   bottom: "pool1"
 110 |   top: "conv2"
 111 |   param {
 112 |     lr_mult: 1
 113 |     decay_mult: 1
 114 |   }
 115 |   param {
 116 |     lr_mult: 2
 117 |     decay_mult: 0
 118 |   }
 119 |   convolution_param {
 120 |     num_output: 128
 121 |     kernel_size: 2
 122 |     weight_filler {
 123 |       type: "msra"
 124 |       variance_norm: FAN_IN
 125 |     }
 126 |     bias_filler {
 127 |       type: "constant"
 128 |       value: 0
 129 |     }
 130 |   }
 131 | }
 132 | layer {
 133 |   name: "relu_conv2"
 134 |   type: "ReLU"
 135 |   bottom: "conv2"
 136 |   top: "conv2"
 137 | }
 138 | layer {
 139 |   name: "conv3"
 140 |   type: "Convolution"
 141 |   bottom: "conv2"
 142 |   top: "conv3"
 143 |   param {
 144 |     lr_mult: 1
 145 |     decay_mult: 1
 146 |   }
 147 |   param {
 148 |     lr_mult: 2
 149 |     decay_mult: 0
 150 |   }
 151 |   convolution_param {
 152 |     num_output: 128
 153 |     kernel_size: 2
 154 |     pad_h: 1
 155 |     pad_w: 1
 156 |     weight_filler {
 157 |       type: "msra"
 158 |       variance_norm: FAN_IN
 159 |     }
 160 |     bias_filler {
 161 |       type: "constant"
 162 |       value: 0
 163 |     }
 164 |   }
 165 | }
 166 | layer {
 167 |   name: "relu_conv3"
 168 |   type: "ReLU"
 169 |   bottom: "conv3"
 170 |   top: "conv3"
 171 | }
 172 | layer {
 173 |   name: "conv4"
 174 |   type: "Convolution"
 175 |   bottom: "conv3"
 176 |   top: "conv4"
 177 |   param {
 178 |     lr_mult: 1
 179 |     decay_mult: 1
 180 |   }
 181 |   param {
 182 |     lr_mult: 2
 183 |     decay_mult: 0
 184 |   }
 185 |   convolution_param {
 186 |     num_output: 128
 187 |     kernel_size: 2
 188 |     weight_filler {
 189 |       type: "msra"
 190 |       variance_norm: FAN_IN
 191 |     }
 192 |     bias_filler {
 193 |       type: "constant"
 194 |       value: 0
 195 |     }
 196 |   }
 197 | }
 198 | layer {
 199 |   name: "relu_conv4"
 200 |   type: "ReLU"
 201 |   bottom: "conv4"
 202 |   top: "conv4"
 203 | }
 204 | layer {
 205 |   name: "conv5"
 206 |   type: "Convolution"
 207 |   bottom: "conv4"
 208 |   top: "conv5"
 209 |   param {
 210 |     lr_mult: 1
 211 |     decay_mult: 1
 212 |   }
 213 |   param {
 214 |     lr_mult: 2
 215 |     decay_mult: 0
 216 |   }
 217 |   convolution_param {
 218 |     num_output: 128
 219 |     kernel_size: 2
 220 |     pad: 1
 221 |     weight_filler {
 222 |       type: "msra"
 223 |       variance_norm: FAN_IN
 224 |     }
 225 |     bias_filler {
 226 |       type: "constant"
 227 |       value: 0
 228 |     }
 229 |   }
 230 | }
 231 | layer {
 232 |   name: "relu_conv5"
 233 |   type: "ReLU"
 234 |   bottom: "conv5"
 235 |   top: "conv5"
 236 | }
 237 | layer {
 238 |   name: "pool2"
 239 |   type: "Pooling"
 240 |   bottom: "conv5"
 241 |   top: "pool2"
 242 |   pooling_param {
 243 |     pool: MAX
 244 |     kernel_size: 2
 245 |     stride: 2
 246 |   }
 247 | }
 248 | layer {
 249 |   name: "conv6"
 250 |   type: "Convolution"
 251 |   bottom: "pool2"
 252 |   top: "conv6"
 253 |   param {
 254 |     lr_mult: 1
 255 |     decay_mult: 1
 256 |   }
 257 |   param {
 258 |     lr_mult: 2
 259 |     decay_mult: 0
 260 |   }
 261 |   convolution_param {
 262 |     num_output: 256
 263 |     kernel_size: 2
 264 |     weight_filler {
 265 |       type: "msra"
 266 |       variance_norm: FAN_IN
 267 |     }
 268 |     bias_filler {
 269 |       type: "constant"
 270 |       value: 0
 271 |     }
 272 |   }
 273 | }
 274 | layer {
 275 |   name: "relu_conv6"
 276 |   type: "ReLU"
 277 |   bottom: "conv6"
 278 |   top: "conv6"
 279 | }
 280 | layer {
 281 |   name: "conv7"
 282 |   type: "Convolution"
 283 |   bottom: "conv6"
 284 |   top: "conv7"
 285 |   param {
 286 |     lr_mult: 1
 287 |     decay_mult: 1
 288 |   }
 289 |   param {
 290 |     lr_mult: 2
 291 |     decay_mult: 0
 292 |   }
 293 |   convolution_param {
 294 |     num_output: 256
 295 |     kernel_size: 2
 296 |     pad:1
 297 |     weight_filler {
 298 |       type: "msra"
 299 |       variance_norm: FAN_IN
 300 |     }
 301 |     bias_filler {
 302 |       type: "constant"
 303 |       value: 0
 304 |     }
 305 |   }
 306 | }
 307 | layer {
 308 |   name: "relu_conv7"
 309 |   type: "ReLU"
 310 |   bottom: "conv7"
 311 |   top: "conv7"
 312 | }
 313 | layer {
 314 |   name: "conv8"
 315 |   type: "Convolution"
 316 |   bottom: "conv7"
 317 |   top: "conv8"
 318 |   param {
 319 |     lr_mult: 1
 320 |     decay_mult: 1
 321 |   }
 322 |   param {
 323 |     lr_mult: 2
 324 |     decay_mult: 0
 325 |   }
 326 |   convolution_param {
 327 |     num_output: 256
 328 |     kernel_size: 2
 329 |     weight_filler {
 330 |       type: "msra"
 331 |       variance_norm: FAN_IN
 332 |     }
 333 |     bias_filler {
 334 |       type: "constant"
 335 |       value: 0
 336 |     }
 337 |   }
 338 | }
 339 | layer {
 340 |   name: "relu_conv8"
 341 |   type: "ReLU"
 342 |   bottom: "conv8"
 343 |   top: "conv8"
 344 | }
 345 | layer {
 346 |   name: "conv9"
 347 |   type: "Convolution"
 348 |   bottom: "conv8"
 349 |   top: "conv9"
 350 |   param {
 351 |     lr_mult: 1
 352 |     decay_mult: 1
 353 |   }
 354 |   param {
 355 |     lr_mult: 2
 356 |     decay_mult: 0
 357 |   }
 358 |   convolution_param {
 359 |     num_output: 256
 360 |     kernel_size: 2
 361 |     pad:1
 362 |     weight_filler {
 363 |       type: "msra"
 364 |       variance_norm: FAN_IN
 365 |     }
 366 |     bias_filler {
 367 |       type: "constant"
 368 |       value: 0
 369 |     }
 370 |   }
 371 | }
 372 | layer {
 373 |   name: "relu_conv9"
 374 |   type: "ReLU"
 375 |   bottom: "conv9"
 376 |   top: "conv9"
 377 | }
 378 | layer {
 379 |   name: "conv10"
 380 |   type: "Convolution"
 381 |   bottom: "conv9"
 382 |   top: "conv10"
 383 |   param {
 384 |     lr_mult: 1
 385 |     decay_mult: 1
 386 |   }
 387 |   param {
 388 |     lr_mult: 2
 389 |     decay_mult: 0
 390 |   }
 391 |   convolution_param {
 392 |     num_output: 256
 393 |     kernel_size: 2
 394 |     weight_filler {
 395 |       type: "msra"
 396 |       variance_norm: FAN_IN
 397 |     }
 398 |     bias_filler {
 399 |       type: "constant"
 400 |       value: 0
 401 |     }
 402 |   }
 403 | }
 404 | layer {
 405 |   name: "relu_conv10"
 406 |   type: "ReLU"
 407 |   bottom: "conv10"
 408 |   top: "conv10"
 409 | }
 410 | layer {
 411 |   name: "conv11"
 412 |   type: "Convolution"
 413 |   bottom: "conv10"
 414 |   top: "conv11"
 415 |   param {
 416 |     lr_mult: 1
 417 |     decay_mult: 1
 418 |   }
 419 |   param {
 420 |     lr_mult: 2
 421 |     decay_mult: 0
 422 |   }
 423 |   convolution_param {
 424 |     num_output: 256
 425 |     kernel_size: 2
 426 |     pad: 1
 427 |     weight_filler {
 428 |       type: "msra"
 429 |       variance_norm: FAN_IN
 430 |     }
 431 |     bias_filler {
 432 |       type: "constant"
 433 |       value: 0
 434 |     }
 435 |   }
 436 | }
 437 | layer {
 438 |   name: "relu_conv11"
 439 |   type: "ReLU"
 440 |   bottom: "conv11"
 441 |   top: "conv11"
 442 | }
 443 | layer {
 444 |   name: "conv12"
 445 |   type: "Convolution"
 446 |   bottom: "conv11"
 447 |   top: "conv12"
 448 |   param {
 449 |     lr_mult: 1
 450 |     decay_mult: 1
 451 |   }
 452 |   param {
 453 |     lr_mult: 2
 454 |     decay_mult: 0
 455 |   }
 456 |   convolution_param {
 457 |     num_output: 256
 458 |     kernel_size: 2
 459 |     pad: 1
 460 |     weight_filler {
 461 |       type: "msra"
 462 |       variance_norm: FAN_IN
 463 |     }
 464 |     bias_filler {
 465 |       type: "constant"
 466 |       value: 0
 467 |     }
 468 |   }
 469 | }
 470 | layer {
 471 |   name: "relu_conv12"
 472 |   type: "ReLU"
 473 |   bottom: "conv12"
 474 |   top: "conv12"
 475 | }
 476 | layer {
 477 |   name: "conv13"
 478 |   type: "Convolution"
 479 |   bottom: "conv12"
 480 |   top: "conv13"
 481 |   param {
 482 |     lr_mult: 1
 483 |     decay_mult: 1
 484 |   }
 485 |   param {
 486 |     lr_mult: 2
 487 |     decay_mult: 0
 488 |   }
 489 |   convolution_param {
 490 |     num_output: 256
 491 |     kernel_size: 2
 492 |     pad: 1
 493 |     weight_filler {
 494 |       type: "msra"
 495 |       variance_norm: FAN_IN
 496 |     }
 497 |     bias_filler {
 498 |       type: "constant"
 499 |       value: 0
 500 |     }
 501 |   }
 502 | }
 503 | layer {
 504 |   name: "relu_conv13"
 505 |   type: "ReLU"
 506 |   bottom: "conv13"
 507 |   top: "conv13"
 508 | }
 509 | layer {
 510 |   name: "conv14"
 511 |   type: "Convolution"
 512 |   bottom: "conv13"
 513 |   top: "conv14"
 514 |   param {
 515 |     lr_mult: 1
 516 |     decay_mult: 1
 517 |   }
 518 |   param {
 519 |     lr_mult: 2
 520 |     decay_mult: 0
 521 |   }
 522 |   convolution_param {
 523 |     num_output: 256
 524 |     kernel_size: 2
 525 |     pad: 1
 526 |     weight_filler {
 527 |       type: "msra"
 528 |       variance_norm: FAN_IN
 529 |     }
 530 |     bias_filler {
 531 |       type: "constant"
 532 |       value: 0
 533 |     }
 534 |   }
 535 | }
 536 | layer {
 537 |   name: "relu_conv14"
 538 |   type: "ReLU"
 539 |   bottom: "conv14"
 540 |   top: "conv14"
 541 | }
 542 | layer {
 543 |   name: "conv15"
 544 |   type: "Convolution"
 545 |   bottom: "conv14"
 546 |   top: "conv15"
 547 |   param {
 548 |     lr_mult: 1
 549 |     decay_mult: 1
 550 |   }
 551 |   param {
 552 |     lr_mult: 2
 553 |     decay_mult: 0
 554 |   }
 555 |   convolution_param {
 556 |     num_output: 256
 557 |     kernel_size: 2
 558 |     pad: 1
 559 |     weight_filler {
 560 |       type: "msra"
 561 |       variance_norm: FAN_IN
 562 |     }
 563 |     bias_filler {
 564 |       type: "constant"
 565 |       value: 0
 566 |     }
 567 |   }
 568 | }
 569 | layer {
 570 |   name: "relu_conv15"
 571 |   type: "ReLU"
 572 |   bottom: "conv15"
 573 |   top: "conv15"
 574 | }
 575 | layer {
 576 |   name: "conv16"
 577 |   type: "Convolution"
 578 |   bottom: "conv15"
 579 |   top: "conv16"
 580 |   param {
 581 |     lr_mult: 1
 582 |     decay_mult: 1
 583 |   }
 584 |   param {
 585 |     lr_mult: 2
 586 |     decay_mult: 0
 587 |   }
 588 |   convolution_param {
 589 |     num_output: 256
 590 |     kernel_size: 2
 591 |     pad: 1
 592 |     weight_filler {
 593 |       type: "msra"
 594 |       variance_norm: FAN_IN
 595 |     }
 596 |     bias_filler {
 597 |       type: "constant"
 598 |       value: 0
 599 |     }
 600 |   }
 601 | }
 602 | layer {
 603 |   name: "relu_conv16"
 604 |   type: "ReLU"
 605 |   bottom: "conv16"
 606 |   top: "conv16"
 607 | }
 608 | layer {
 609 |   name: "conv17"
 610 |   type: "Convolution"
 611 |   bottom: "conv16"
 612 |   top: "conv17"
 613 |   param {
 614 |     lr_mult: 1
 615 |     decay_mult: 1
 616 |   }
 617 |   param {
 618 |     lr_mult: 2
 619 |     decay_mult: 0
 620 |   }
 621 |   convolution_param {
 622 |     num_output: 256
 623 |     kernel_size: 2
 624 |     pad: 1
 625 |     weight_filler {
 626 |       type: "msra"
 627 |       variance_norm: FAN_IN
 628 |     }
 629 |     bias_filler {
 630 |       type: "constant"
 631 |       value: 0
 632 |     }
 633 |   }
 634 | }
 635 | layer {
 636 |   name: "relu_conv17"
 637 |   type: "ReLU"
 638 |   bottom: "conv17"
 639 |   top: "conv17"
 640 | }
 641 | layer {
 642 |   name: "conv18"
 643 |   type: "Convolution"
 644 |   bottom: "conv17"
 645 |   top: "conv18"
 646 |   param {
 647 |     lr_mult: 1
 648 |     decay_mult: 1
 649 |   }
 650 |   param {
 651 |     lr_mult: 2
 652 |     decay_mult: 0
 653 |   }
 654 |   convolution_param {
 655 |     num_output: 256
 656 |     kernel_size: 2
 657 |     pad: 1
 658 |     weight_filler {
 659 |       type: "msra"
 660 |       variance_norm: FAN_IN
 661 |     }
 662 |     bias_filler {
 663 |       type: "constant"
 664 |       value: 0
 665 |     }
 666 |   }
 667 | }
 668 | layer {
 669 |   name: "relu_conv18"
 670 |   type: "ReLU"
 671 |   bottom: "conv18"
 672 |   top: "conv18"
 673 | }
 674 | layer {
 675 |   name: "conv19"
 676 |   type: "Convolution"
 677 |   bottom: "conv18"
 678 |   top: "conv19"
 679 |   param {
 680 |     lr_mult: 1
 681 |     decay_mult: 1
 682 |   }
 683 |   param {
 684 |     lr_mult: 2
 685 |     decay_mult: 0
 686 |   }
 687 |   convolution_param {
 688 |     num_output: 256
 689 |     kernel_size: 2
 690 |     pad: 1
 691 |     weight_filler {
 692 |       type: "msra"
 693 |       variance_norm: FAN_IN
 694 |     }
 695 |     bias_filler {
 696 |       type: "constant"
 697 |       value: 0
 698 |     }
 699 |   }
 700 | }
 701 | layer {
 702 |   name: "relu_conv19"
 703 |   type: "ReLU"
 704 |   bottom: "conv19"
 705 |   top: "conv19"
 706 | }
 707 | layer {
 708 |   name: "conv20"
 709 |   type: "Convolution"
 710 |   bottom: "conv19"
 711 |   top: "conv20"
 712 |   param {
 713 |     lr_mult: 1
 714 |     decay_mult: 1
 715 |   }
 716 |   param {
 717 |     lr_mult: 2
 718 |     decay_mult: 0
 719 |   }
 720 |   convolution_param {
 721 |     num_output: 256
 722 |     kernel_size: 2
 723 |     pad: 1
 724 |     weight_filler {
 725 |       type: "msra"
 726 |       variance_norm: FAN_IN
 727 |     }
 728 |     bias_filler {
 729 |       type: "constant"
 730 |       value: 0
 731 |     }
 732 |   }
 733 | }
 734 | layer {
 735 |   name: "relu_conv20"
 736 |   type: "ReLU"
 737 |   bottom: "conv20"
 738 |   top: "conv20"
 739 | }
 740 | layer {
 741 |   name: "conv21"
 742 |   type: "Convolution"
 743 |   bottom: "conv20"
 744 |   top: "conv21"
 745 |   param {
 746 |     lr_mult: 1
 747 |     decay_mult: 1
 748 |   }
 749 |   param {
 750 |     lr_mult: 2
 751 |     decay_mult: 0
 752 |   }
 753 |   convolution_param {
 754 |     num_output: 256
 755 |     kernel_size: 2
 756 |     pad: 1
 757 |     weight_filler {
 758 |       type: "msra"
 759 |       variance_norm: FAN_IN
 760 |     }
 761 |     bias_filler {
 762 |       type: "constant"
 763 |       value: 0
 764 |     }
 765 |   }
 766 | }
 767 | layer {
 768 |   name: "relu_conv21"
 769 |   type: "ReLU"
 770 |   bottom: "conv21"
 771 |   top: "conv21"
 772 | }
 773 | layer {
 774 |   name: "conv22"
 775 |   type: "Convolution"
 776 |   bottom: "conv21"
 777 |   top: "conv22"
 778 |   param {
 779 |     lr_mult: 1
 780 |     decay_mult: 1
 781 |   }
 782 |   param {
 783 |     lr_mult: 2
 784 |     decay_mult: 0
 785 |   }
 786 |   convolution_param {
 787 |     num_output: 256
 788 |     kernel_size: 2
 789 |     pad: 1
 790 |     weight_filler {
 791 |       type: "msra"
 792 |       variance_norm: FAN_IN
 793 |     }
 794 |     bias_filler {
 795 |       type: "constant"
 796 |       value: 0
 797 |     }
 798 |   }
 799 | }
 800 | layer {
 801 |   name: "relu_conv22"
 802 |   type: "ReLU"
 803 |   bottom: "conv22"
 804 |   top: "conv22"
 805 | }
 806 | layer {
 807 |   name: "conv23"
 808 |   type: "Convolution"
 809 |   bottom: "conv22"
 810 |   top: "conv23"
 811 |   param {
 812 |     lr_mult: 1
 813 |     decay_mult: 1
 814 |   }
 815 |   param {
 816 |     lr_mult: 2
 817 |     decay_mult: 0
 818 |   }
 819 |   convolution_param {
 820 |     num_output: 256
 821 |     kernel_size: 2
 822 |     pad: 1
 823 |     weight_filler {
 824 |       type: "msra"
 825 |       variance_norm: FAN_IN
 826 |     }
 827 |     bias_filler {
 828 |       type: "constant"
 829 |       value: 0
 830 |     }
 831 |   }
 832 | }
 833 | layer {
 834 |   name: "relu_conv23"
 835 |   type: "ReLU"
 836 |   bottom: "conv23"
 837 |   top: "conv23"
 838 | }
 839 | layer {
 840 |   name: "conv24"
 841 |   type: "Convolution"
 842 |   bottom: "conv23"
 843 |   top: "conv24"
 844 |   param {
 845 |     lr_mult: 1
 846 |     decay_mult: 1
 847 |   }
 848 |   param {
 849 |     lr_mult: 2
 850 |     decay_mult: 0
 851 |   }
 852 |   convolution_param {
 853 |     num_output: 256
 854 |     kernel_size: 2
 855 |     pad: 1
 856 |     weight_filler {
 857 |       type: "msra"
 858 |       variance_norm: FAN_IN
 859 |     }
 860 |     bias_filler {
 861 |       type: "constant"
 862 |       value: 0
 863 |     }
 864 |   }
 865 | }
 866 | layer {
 867 |   name: "relu_conv24"
 868 |   type: "ReLU"
 869 |   bottom: "conv24"
 870 |   top: "conv24"
 871 | }
 872 | layer {
 873 |   name: "conv25"
 874 |   type: "Convolution"
 875 |   bottom: "conv24"
 876 |   top: "conv25"
 877 |   param {
 878 |     lr_mult: 1
 879 |     decay_mult: 1
 880 |   }
 881 |   param {
 882 |     lr_mult: 2
 883 |     decay_mult: 0
 884 |   }
 885 |   convolution_param {
 886 |     num_output: 256
 887 |     kernel_size: 2
 888 |     pad: 1
 889 |     weight_filler {
 890 |       type: "msra"
 891 |       variance_norm: FAN_IN
 892 |     }
 893 |     bias_filler {
 894 |       type: "constant"
 895 |       value: 0
 896 |     }
 897 |   }
 898 | }
 899 | layer {
 900 |   name: "relu_conv25"
 901 |   type: "ReLU"
 902 |   bottom: "conv25"
 903 |   top: "conv25"
 904 | }
 905 | layer {
 906 |   name: "conv26"
 907 |   type: "Convolution"
 908 |   bottom: "conv25"
 909 |   top: "conv26"
 910 |   param {
 911 |     lr_mult: 1
 912 |     decay_mult: 1
 913 |   }
 914 |   param {
 915 |     lr_mult: 2
 916 |     decay_mult: 0
 917 |   }
 918 |   convolution_param {
 919 |     num_output: 256
 920 |     kernel_size: 2
 921 |     pad: 1
 922 |     weight_filler {
 923 |       type: "msra"
 924 |       variance_norm: FAN_IN
 925 |     }
 926 |     bias_filler {
 927 |       type: "constant"
 928 |       value: 0
 929 |     }
 930 |   }
 931 | }
 932 | layer {
 933 |   name: "relu_conv26"
 934 |   type: "ReLU"
 935 |   bottom: "conv26"
 936 |   top: "conv26"
 937 | }
 938 | layer {
 939 |   name: "conv27"
 940 |   type: "Convolution"
 941 |   bottom: "conv26"
 942 |   top: "conv27"
 943 |   param {
 944 |     lr_mult: 1
 945 |     decay_mult: 1
 946 |   }
 947 |   param {
 948 |     lr_mult: 2
 949 |     decay_mult: 0
 950 |   }
 951 |   convolution_param {
 952 |     num_output: 256
 953 |     kernel_size: 2
 954 |     pad: 1
 955 |     weight_filler {
 956 |       type: "msra"
 957 |       variance_norm: FAN_IN
 958 |     }
 959 |     bias_filler {
 960 |       type: "constant"
 961 |       value: 0
 962 |     }
 963 |   }
 964 | }
 965 | layer {
 966 |   name: "relu_conv27"
 967 |   type: "ReLU"
 968 |   bottom: "conv27"
 969 |   top: "conv27"
 970 | }
 971 | layer {
 972 |   name: "spp"
 973 |   type: "SPP"
 974 |   bottom: "conv27"
 975 |   top: "spp"
 976 |   spp_param {
 977 |     pyramid_height: 4
 978 |   }
 979 | }
 980 | layer {
 981 |   name: "fc1"
 982 |   type: "InnerProduct"
 983 |   bottom: "spp"
 984 |   top: "fc1"
 985 |   param {
 986 |     lr_mult: 1
 987 |     decay_mult: 1
 988 |   }
 989 |   param {
 990 |     lr_mult: 2
 991 |     decay_mult: 0
 992 |   }
 993 |   inner_product_param {
 994 |     num_output: 4096
 995 |     weight_filler {
 996 |       type: "gaussian"
 997 |       std: 0.01
 998 |     }
 999 |     bias_filler {
1000 |       type: "constant"
1001 |       value: 0
1002 |     }
1003 |   }
1004 | }
1005 | layer {
1006 |   name: "relu_fc1"
1007 |   type: "ReLU"
1008 |   bottom: "fc1"
1009 |   top: "fc1"
1010 | }
1011 | layer {
1012 |   name: "dropout1"
1013 |   type: "Dropout"
1014 |   bottom: "fc1"
1015 |   top: "dropout1"
1016 |   dropout_param {
1017 |     dropout_ratio: 0.5
1018 |   }
1019 | }
1020 | layer {
1021 |   name: "fc2"
1022 |   type: "InnerProduct"
1023 |   bottom: "dropout1"
1024 |   top: "fc2"
1025 |   param {
1026 |     lr_mult: 1
1027 |     decay_mult: 1
1028 |   }
1029 |   param {
1030 |     lr_mult: 2
1031 |     decay_mult: 0
1032 |   }
1033 |   inner_product_param {
1034 |     num_output: 4096
1035 |     weight_filler {
1036 |       type: "gaussian"
1037 |       std: 0.01
1038 |     }
1039 |     bias_filler {
1040 |       type: "constant"
1041 |       value: 0
1042 |     }
1043 |   }
1044 | }
1045 | layer {
1046 |   name: "relu_fc2"
1047 |   type: "ReLU"
1048 |   bottom: "fc2"
1049 |   top: "fc2"
1050 | }
1051 | layer {
1052 |   name: "dropout2"
1053 |   type: "Dropout"
1054 |   bottom: "fc2"
1055 |   top: "dropout2"
1056 |   dropout_param {
1057 |     dropout_ratio: 0.5
1058 |   }
1059 | }
1060 | layer {
1061 |   name: "fc3"
1062 |   type: "InnerProduct"
1063 |   bottom: "dropout2"
1064 |   top: "fc3"
1065 |   param {
1066 |     lr_mult: 1
1067 |     decay_mult: 1
1068 |   }
1069 |   param {
1070 |     lr_mult: 2
1071 |     decay_mult: 0
1072 |   }
1073 |   inner_product_param {
1074 |     num_output: 1000
1075 |     weight_filler {
1076 |       type: "gaussian"
1077 |       std: 0.001
1078 |     }
1079 |     bias_filler {
1080 |       type: "constant"
1081 |       value: 0
1082 |     }
1083 |   }
1084 | }
1085 | layer {
1086 |   name: "top-1"
1087 |   type: "Accuracy"
1088 |   bottom: "fc3"
1089 |   bottom: "label"
1090 |   top: "top-1"
1091 | }
1092 | layer {
1093 |   name: "top-5"
1094 |   type: "Accuracy"
1095 |   bottom: "fc3"
1096 |   bottom: "label"
1097 |   top: "top-5"
1098 |   accuracy_param {
1099 |     top_k: 5
1100 |   }
1101 | }
1102 | layer {
1103 |   name: "loss"
1104 |   type: "SoftmaxWithLoss"
1105 |   bottom: "fc3"
1106 |   bottom: "label"
1107 |   top: "loss"
1108 | }
1109 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/30layer_imagenet/solver_elu.prototxt:
--------------------------------------------------------------------------------
 1 | net: "layer30_elu_taylor_FAN_IN_withoutBN.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | test_initialization: false
 5 | base_lr: 0.01
 6 | lr_policy: "multistep"
 7 | gamma: 0.1
 8 | stepvalue: 100000
 9 | stepvalue: 600000
10 | display: 20
11 | max_iter: 750000
12 | momentum: 0.9
13 | weight_decay: 0.0005
14 | snapshot: 10000
15 | snapshot_prefix: "layer30_elu_taylor_FAN_IN_withoutBN"
16 | solver_mode: GPU
17 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/30layer_imagenet/solver_mpelu.prototxt:
--------------------------------------------------------------------------------
 1 | net: "layer30_mpelu_taylor_FAN_IN_withoutBN.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | test_initialization: false
 5 | base_lr: 0.01
 6 | lr_policy: "multistep"
 7 | gamma: 0.1
 8 | stepvalue: 100000
 9 | stepvalue: 600000
10 | display: 20
11 | max_iter: 750000
12 | momentum: 0.9
13 | weight_decay: 0.0005
14 | snapshot: 10000
15 | snapshot_prefix: "layer30_mpelu_taylor_FAN_IN_withoutBN"
16 | solver_mode: GPU
17 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/30layer_imagenet/solver_prelu.prototxt:
--------------------------------------------------------------------------------
 1 | net: "layer30_prelu_msra_FAN_IN_withoutBN.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | test_initialization: false
 5 | base_lr: 0.01
 6 | lr_policy: "multistep"
 7 | gamma: 0.1
 8 | stepvalue: 100000
 9 | stepvalue: 600000
10 | display: 20
11 | max_iter: 750000
12 | momentum: 0.9
13 | weight_decay: 0.0005
14 | snapshot: 10000
15 | snapshot_prefix: "layer30_prelu_msra_FAN_IN_withoutBN"
16 | solver_mode: GPU
17 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/30layer_imagenet/solver_relu.prototxt:
--------------------------------------------------------------------------------
 1 | net: "layer30_relu_msra_FAN_IN_withoutBN.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | test_initialization: false
 5 | base_lr: 0.01
 6 | lr_policy: "multistep"
 7 | gamma: 0.1
 8 | stepvalue: 100000
 9 | stepvalue: 600000
10 | display: 20
11 | max_iter: 750000
12 | momentum: 0.9
13 | weight_decay: 0.0005
14 | snapshot: 10000
15 | snapshot_prefix: "layer30_relu_msra_FAN_IN_withoutBN"
16 | solver_mode: GPU
17 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/50layer_imagenet/README.MD:
--------------------------------------------------------------------------------
 1 | # Comparison of weight initialization
 2 | 
 3 | The 52-layer network initialized by taylor is able to converge without Batch Normalization.
 4 | 
 5 | Run the following command to check:
 6 | 
 7 | ```
 8 | build/tools/caffe train -solver examples/mpelu/50layer_imagenet/solver_elu.prototxt
 9 | ```
10 | 
11 | As a counterpart, LSUV([arXiv:1511.06422](http://arxiv.org/abs/1511.06422)) can also be used to initialize ELU network. The code, which has been included in this repo, is available in [LSUVinit](https://github.com/ducha-aiki/LSUVinit). However, 
12 | in our experiments, LSUV made the 52-layer network(also 30-layer ELU network) explode within several iterations.
13 | 
14 | To initialize ELU network with LSUV, first run the following command to compute weights:
15 | 
16 | ```
17 | python examples/mpelu/50layer_imagenet/lsuv_init_python3.py examples/mpelu/50layer_imagenet/layer52_elu_lsuv_withoutBN.prototxt elu.lsuv  OrthonormalLSUV
18 | ```
19 | The computed lsuv weights will be saved in the file `elu.lsuv`.
20 | 
21 | Then, fine-tune the network from lsuv weights `elu.lsuv`.
22 | 
23 | ```
24 | build/tools/caffe train -solver examples/mpelu/50layer_imagenet/solver_elu.prototxt -weights elu.lsuv
25 | ```


--------------------------------------------------------------------------------
/caffe/examples/mpelu/50layer_imagenet/lsuv_init_python2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import print_function 
  3 | import os
  4 | import sys
  5 | 
  6 | class bcolors:
  7 |     LINE = '\033[95m'
  8 |     OKBLUE = '\033[94m'
  9 |     OKGREEN = '\033[92m'
 10 |     WARNING = '\033[93m'
 11 |     FAIL = '\033[91m'
 12 |     ENDC = '\033[0m'
 13 |     BOLD = '\033[1m'
 14 |     UNDERLINE = '\033[4m'
 15 | 
 16 | 
 17 | for arg in sys.argv:
 18 |     if  arg == 'help' or arg == 'HELP' or arg == '-help' or arg == '--help' or arg == '-h' or arg == '--h' or arg == '/h' or arg == '/help' or arg == '/H' or arg == '/HELP':
 19 |         print (bcolors.LINE + """____________________________________________
 20 | 
 21 | """ + bcolors.ENDC + """By $CAFFE_ROOT we mean Caffe's installation folder.
 22 | Place this script into """+ bcolors.OKGREEN +"""$CAFFE_ROOT/tools/extra/"""+ bcolors.ENDC +"""
 23 | 
 24 | Use it as:
 25 |     """+ bcolors.OKGREEN +"""python $CAFFE_ROOT/tools/extra/lsuv-init.py /path/to/solver.prototxt /path/to/initialised.caffemodel LSUV
 26 | """+ bcolors.ENDC +"""or
 27 |     """+ bcolors.OKGREEN +"""python $CAFFE_ROOT/tools/extra/lsuv-init.py /path/to/solver.prototxt /path/to/initialised.caffemodel Orthonormal noFetch gpu
 28 | """+ bcolors.ENDC +"""
 29 | """+ bcolors.OKBLUE +"""initialised.caffemodel"""+ bcolors.ENDC +""" is where the initialised model will be saved to. If such file already exists, it will be loaded and the initialisation distortion will be applied to it instead.
 30 | """+ bcolors.OKBLUE +"""noFetch"""+ bcolors.ENDC +""" is an optional parameter for not loading existing "initialised.caffemodel" file.
 31 | 
 32 | It's highly recommended to """+ bcolors.BOLD + bcolors.UNDERLINE +"""USE LARGE BATCHES"""+ bcolors.ENDC +""" - set them in appropriate *.prototxt - when running LSUV. Obviously, the more different your data is, the bigger the need for larger batches. For 99% of us large batches are easier to get on the CPU using RAM and swapping, which is why CPU is the default platform for computing LSUV.
 33 | """+ bcolors.OKBLUE +"""gpu"""+ bcolors.ENDC +""" is an optional parameter for computing on GPU (the first one of them - "device #0" - if you have several) instead of CPU. You will be limited by your GPU's ram size then, but the LSUV init computation is likely to finish much faster.
 34 | 
 35 | """+ bcolors.OKBLUE +"""LSUV"""+ bcolors.ENDC +""" scientific paper can be found at http://arxiv.org/abs/1511.06422
 36 | """+ bcolors.OKBLUE +"""Orthonormal"""+ bcolors.ENDC +""" is a different initialisation type, which is pretty cool too. http://arxiv.org/abs/1312.6120
 37 | 
 38 | """+ bcolors.BOLD + bcolors.FAIL +"""NOTE!"""+ bcolors.ENDC +"""
 39 | * stands for anything
 40 | Name your """+ bcolors.WARNING +"""activation layers"""+ bcolors.ENDC +""" as """+ bcolors.OKBLUE +"""*_act*"""+ bcolors.ENDC +""", or  """+ bcolors.OKBLUE +"""*_ACT*"""+ bcolors.ENDC +"""
 41 | Name your """+ bcolors.WARNING +"""batch normalization layers"""+ bcolors.ENDC +""" as """+ bcolors.OKBLUE +"""*BN*"""+ bcolors.ENDC +""", or """+ bcolors.OKBLUE +"""*bn*"""+ bcolors.ENDC +"""
 42 | - so that the script wouldn't try to process stuff like """+ bcolors.WARNING +"""PReLU activation layers"""+ bcolors.ENDC +""" and get """+ bcolors.FAIL +"""stuck"""+ bcolors.ENDC +""". This algorithm can only process fully-connected and convolutional layers. Not their activations.
 43 | (That doesn't mean that you can't use PReLU. Just name them as *_act*)
 44 | 
 45 | """+ bcolors.LINE +"""____________________________________________
 46 | """ + bcolors.ENDC)
 47 |         sys.exit()
 48 | 
 49 | 
 50 | from pylab import *
 51 | import random
 52 | import numpy as np
 53 | caffe_root_dir=os.path.dirname(os.path.realpath(__file__))
 54 | caffe_root_dir+='/../../python'
 55 | sys.path.insert(0, caffe_root_dir)
 56 | import caffe
 57 | # Orthonorm init code is taked from Lasagne
 58 | # https://github.com/Lasagne/Lasagne/blob/master/lasagne/init.py
 59 | 
 60 | def svd_orthonormal(shape):
 61 |     if len(shape) < 2:
 62 |         raise RuntimeError("Only shapes of length 2 or more are supported.")
 63 |     flat_shape = (shape[0], np.prod(shape[1:]))
 64 |     a = standard_normal(flat_shape)
 65 |     u, _, v = np.linalg.svd(a, full_matrices=False)
 66 |     q = u if u.shape == flat_shape else v
 67 |     q = q.reshape(shape)
 68 |     return q
 69 | 
 70 | noFetch = False
 71 | for arg in sys.argv:
 72 |     if  arg == 'noFetch':
 73 |         noFetch = True
 74 | 
 75 |     
 76 | if __name__ == '__main__':
 77 |     if len (sys.argv) < 4:
 78 |         raise RuntimeError('Usage: python ' + sys.argv[0] + ' path_to_solver path_to_save_model mode')
 79 |     solver_path = str(sys.argv[1])
 80 |     init_path = str(sys.argv[2])
 81 |     init_mode =  str(sys.argv[3])
 82 |     margin = 0.02;
 83 |     max_iter = 20;
 84 |     needed_variance = 1.0
 85 |     var_before_relu_if_inplace=True
 86 |     mode_check=False;  
 87 |     if init_mode == 'Orthonormal':
 88 |         mode_check=True
 89 |     elif init_mode == 'LSUV':
 90 |         mode_check=True
 91 |     elif init_mode == 'OrthonormalLSUV':
 92 |         mode_check=True
 93 |     else:
 94 |         raise RuntimeError('Unknown mode. Try Orthonormal or LSUV or  OrthonormalLSUV')
 95 | 
 96 |     caffe.set_mode_cpu()
 97 |     for arg in sys.argv:
 98 |         if  arg == 'gpu':
 99 |             caffe.set_mode_gpu()
100 | 
101 |     solver = caffe.SGDSolver(solver_path)
102 |     if os.path.isfile(init_path) and not noFetch:
103 |         print("Loading")
104 |         try:
105 |             solver.net.copy_from(init_path)
106 |         except:
107 |             print('Failed to load weights from ', init_path) 
108 | 
109 |     for k,v in solver.net.params.iteritems():
110 |         if ('BN' in k) or ('bn' in k):
111 |             print('Skipping BatchNorm (*BN* name) layer')
112 |             continue;
113 |         if ('elu' in k) or ('ELU' in k):
114 |             print('Skipping activation ELU layer')
115 |             continue;
116 |         try:
117 |             print(k, v[0].data.shape)
118 |         except:
119 |             print('Skipping layer ', k, ' as it has no parameters to initialize')
120 |             continue
121 |         if 'Orthonormal' in init_mode:
122 |             weights=svd_orthonormal(v[0].data[:].shape)
123 |             solver.net.params[k][0].data[:]=weights#* sqrt(2.0/(1.0+neg_slope*neg_slope));
124 |         else:
125 |             weights=solver.net.params[k][0].data[:]
126 |             
127 |         if 'LSUV' in init_mode:
128 |             if var_before_relu_if_inplace:
129 |                 solver.net.forward(end=k)
130 |             else:
131 |                 solver.net.forward()
132 |             
133 |             v = solver.net.blobs[k]
134 |             var1  = np.var(v.data[:])
135 |             mean1 = np.mean(v.data[:]);
136 |             print(k,'var = ', var1,'mean = ', mean1)
137 |             sys.stdout.flush()
138 |             iter_num = 0;
139 |             while (abs(needed_variance - var1) > margin):
140 |                 weights = solver.net.params[k][0].data[:]
141 |                 solver.net.params[k][0].data[:] = weights / sqrt(var1);
142 |                 if var_before_relu_if_inplace:
143 |                     solver.net.forward(end=k)
144 |                 else:
145 |                     solver.net.forward()
146 |                 v = solver.net.blobs[k];
147 |                 var1  = np.var(v.data[:]);
148 |                 mean1= np.mean(v.data[:]);
149 |                 print(k,'var = ', var1,'mean = ', mean1)
150 |                 sys.stdout.flush()
151 |                 iter_num+=1;
152 |                 if iter_num > max_iter:
153 |                     print('Could not converge in ', iter_num, ' iterations, go to next layer')
154 |                     break; 
155 |     print("Initialization finished!")
156 |     solver.net.forward()
157 |     for k,v in solver.net.blobs.iteritems():
158 |         try:
159 |             print(k,v.data[:].shape, ' var = ', np.var(v.data[:]), ' mean = ', np.mean(v.data[:]))
160 |         except:
161 |             print('Skiping layer', k)
162 |             
163 |     print("Saving model...")
164 |     solver.net.save(init_path)
165 |     print("Finished. Model saved to", init_path)
166 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/50layer_imagenet/lsuv_init_python3.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import print_function 
  3 | import os
  4 | import sys
  5 | 
  6 | class bcolors:
  7 |     LINE = '\033[95m'
  8 |     OKBLUE = '\033[94m'
  9 |     OKGREEN = '\033[92m'
 10 |     WARNING = '\033[93m'
 11 |     FAIL = '\033[91m'
 12 |     ENDC = '\033[0m'
 13 |     BOLD = '\033[1m'
 14 |     UNDERLINE = '\033[4m'
 15 | 
 16 | 
 17 | for arg in sys.argv:
 18 |     if  arg == 'help' or arg == 'HELP' or arg == '-help' or arg == '--help' or arg == '-h' or arg == '--h' or arg == '/h' or arg == '/help' or arg == '/H' or arg == '/HELP':
 19 |         print (bcolors.LINE + """____________________________________________
 20 | 
 21 | """ + bcolors.ENDC + """By $CAFFE_ROOT we mean Caffe's installation folder.
 22 | Place this script into """+ bcolors.OKGREEN +"""$CAFFE_ROOT/tools/extra/"""+ bcolors.ENDC +"""
 23 | 
 24 | Use it as:
 25 |     """+ bcolors.OKGREEN +"""python $CAFFE_ROOT/tools/extra/lsuv-init.py /path/to/solver.prototxt /path/to/initialised.caffemodel LSUV
 26 | """+ bcolors.ENDC +"""or
 27 |     """+ bcolors.OKGREEN +"""python $CAFFE_ROOT/tools/extra/lsuv-init.py /path/to/solver.prototxt /path/to/initialised.caffemodel Orthonormal noFetch gpu
 28 | """+ bcolors.ENDC +"""
 29 | """+ bcolors.OKBLUE +"""initialised.caffemodel"""+ bcolors.ENDC +""" is where the initialised model will be saved to. If such file already exists, it will be loaded and the initialisation distortion will be applied to it instead.
 30 | """+ bcolors.OKBLUE +"""noFetch"""+ bcolors.ENDC +""" is an optional parameter for not loading existing "initialised.caffemodel" file.
 31 | 
 32 | It's highly recommended to """+ bcolors.BOLD + bcolors.UNDERLINE +"""USE LARGE BATCHES"""+ bcolors.ENDC +""" - set them in appropriate *.prototxt - when running LSUV. Obviously, the more different your data is, the bigger the need for larger batches. For 99% of us large batches are easier to get on the CPU using RAM and swapping, which is why CPU is the default platform for computing LSUV.
 33 | """+ bcolors.OKBLUE +"""gpu"""+ bcolors.ENDC +""" is an optional parameter for computing on GPU (the first one of them - "device #0" - if you have several) instead of CPU. You will be limited by your GPU's ram size then, but the LSUV init computation is likely to finish much faster.
 34 | 
 35 | """+ bcolors.OKBLUE +"""LSUV"""+ bcolors.ENDC +""" scientific paper can be found at http://arxiv.org/abs/1511.06422
 36 | """+ bcolors.OKBLUE +"""Orthonormal"""+ bcolors.ENDC +""" is a different initialisation type, which is pretty cool too. http://arxiv.org/abs/1312.6120
 37 | 
 38 | """+ bcolors.BOLD + bcolors.FAIL +"""NOTE!"""+ bcolors.ENDC +"""
 39 | * stands for anything
 40 | Name your """+ bcolors.WARNING +"""activation layers"""+ bcolors.ENDC +""" as """+ bcolors.OKBLUE +"""*_act*"""+ bcolors.ENDC +""", or  """+ bcolors.OKBLUE +"""*_ACT*"""+ bcolors.ENDC +"""
 41 | Name your """+ bcolors.WARNING +"""batch normalization layers"""+ bcolors.ENDC +""" as """+ bcolors.OKBLUE +"""*BN*"""+ bcolors.ENDC +""", or """+ bcolors.OKBLUE +"""*bn*"""+ bcolors.ENDC +"""
 42 | - so that the script wouldn't try to process stuff like """+ bcolors.WARNING +"""PReLU activation layers"""+ bcolors.ENDC +""" and get """+ bcolors.FAIL +"""stuck"""+ bcolors.ENDC +""". This algorithm can only process fully-connected and convolutional layers. Not their activations.
 43 | (That doesn't mean that you can't use PReLU. Just name them as *_act*)
 44 | 
 45 | """+ bcolors.LINE +"""____________________________________________
 46 | """ + bcolors.ENDC)
 47 |         sys.exit()
 48 | 
 49 | 
 50 | from pylab import *
 51 | import random
 52 | import numpy as np
 53 | caffe_root_dir=os.path.dirname(os.path.realpath(__file__))
 54 | caffe_root_dir+='/../../python'
 55 | sys.path.insert(0, caffe_root_dir)
 56 | import caffe
 57 | # Orthonorm init code is taked from Lasagne
 58 | # https://github.com/Lasagne/Lasagne/blob/master/lasagne/init.py
 59 | 
 60 | def svd_orthonormal(shape):
 61 |     if len(shape) < 2:
 62 |         raise RuntimeError("Only shapes of length 2 or more are supported.")
 63 |     flat_shape = (shape[0], np.prod(shape[1:]))
 64 |     a = standard_normal(flat_shape)
 65 |     u, _, v = np.linalg.svd(a, full_matrices=False)
 66 |     q = u if u.shape == flat_shape else v
 67 |     q = q.reshape(shape)
 68 |     return q
 69 | 
 70 | noFetch = False
 71 | for arg in sys.argv:
 72 |     if  arg == 'noFetch':
 73 |         noFetch = True
 74 | 
 75 |     
 76 | if __name__ == '__main__':
 77 |     if len (sys.argv) < 4:
 78 |         raise RuntimeError('Usage: python ' + sys.argv[0] + ' path_to_solver path_to_save_model mode')
 79 |     solver_path = str(sys.argv[1])
 80 |     init_path = str(sys.argv[2])
 81 |     init_mode =  str(sys.argv[3])
 82 |     margin = 0.02;
 83 |     max_iter = 20;
 84 |     needed_variance = 1.0
 85 |     var_before_relu_if_inplace=True
 86 |     mode_check=False;  
 87 |     if init_mode == 'Orthonormal':
 88 |         mode_check=True
 89 |     elif init_mode == 'LSUV':
 90 |         mode_check=True
 91 |     elif init_mode == 'OrthonormalLSUV':
 92 |         mode_check=True
 93 |     else:
 94 |         raise RuntimeError('Unknown mode. Try Orthonormal or LSUV or  OrthonormalLSUV')
 95 | 
 96 |     caffe.set_mode_cpu()
 97 |     for arg in sys.argv:
 98 |         if  arg == 'gpu':
 99 |             caffe.set_mode_gpu()
100 | 
101 |     solver = caffe.SGDSolver(solver_path)
102 |     if os.path.isfile(init_path) and not noFetch:
103 |         print("Loading")
104 |         try:
105 |             solver.net.copy_from(init_path)
106 |         except:
107 |             print('Failed to load weights from ', init_path) 
108 | 
109 |     for k,v in solver.net.params.items():
110 |         if ('BN' in k) or ('bn' in k):
111 |             print('Skipping BatchNorm (*BN* name) layer')
112 |             continue;
113 |         if ('elu' in k) or ('ELU' in k):
114 |             print('Skipping activation ELU layer')
115 |             continue;
116 |         try:
117 |             print(k, v[0].data.shape)
118 |         except:
119 |             print('Skipping layer ', k, ' as it has no parameters to initialize')
120 |             continue
121 |         if 'Orthonormal' in init_mode:
122 |             weights=svd_orthonormal(v[0].data[:].shape)
123 |             solver.net.params[k][0].data[:]=weights#* sqrt(2.0/(1.0+neg_slope*neg_slope));
124 |         else:
125 |             weights=solver.net.params[k][0].data[:]
126 |             
127 |         if 'LSUV' in init_mode:
128 |             if var_before_relu_if_inplace:
129 |                 solver.net.forward(end=k)
130 |             else:
131 |                 solver.net.forward()
132 |             
133 |             v = solver.net.blobs[k]
134 |             var1  = np.var(v.data[:])
135 |             mean1 = np.mean(v.data[:]);
136 |             print(k,'var = ', var1,'mean = ', mean1)
137 |             sys.stdout.flush()
138 |             iter_num = 0;
139 |             while (abs(needed_variance - var1) > margin):
140 |                 weights = solver.net.params[k][0].data[:]
141 |                 solver.net.params[k][0].data[:] = weights / sqrt(var1);
142 |                 if var_before_relu_if_inplace:
143 |                     solver.net.forward(end=k)
144 |                 else:
145 |                     solver.net.forward()
146 |                 v = solver.net.blobs[k];
147 |                 var1  = np.var(v.data[:]);
148 |                 mean1= np.mean(v.data[:]);
149 |                 print(k,'var = ', var1,'mean = ', mean1)
150 |                 sys.stdout.flush()
151 |                 iter_num+=1;
152 |                 if iter_num > max_iter:
153 |                     print('Could not converge in ', iter_num, ' iterations, go to next layer')
154 |                     break; 
155 |     print("Initialization finished!")
156 |     solver.net.forward()
157 |     for k,v in solver.net.blobs.items():
158 |         try:
159 |             print(k,v.data[:].shape, ' var = ', np.var(v.data[:]), ' mean = ', np.mean(v.data[:]))
160 |         except:
161 |             print('Skiping layer', k)
162 |             
163 |     print("Saving model...")
164 |     solver.net.save(init_path)
165 |     print("Finished. Model saved to", init_path)
166 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/50layer_imagenet/solver_elu.prototxt:
--------------------------------------------------------------------------------
 1 | net: "examples/mpelu/50layer_imagenet/layer52_elu_taylor_FAN_IN_withoutBN.prototxt"
 2 | test_iter: 1000
 3 | test_interval: 1000
 4 | test_initialization: false
 5 | base_lr: 0.01
 6 | lr_policy: "multistep"
 7 | gamma: 0.1
 8 | stepvalue: 100000
 9 | stepvalue: 600000
10 | display: 20
11 | max_iter: 750000
12 | momentum: 0.9
13 | weight_decay: 0.0005
14 | snapshot: 10000
15 | snapshot_prefix: "layer52_elu_taylor_FAN_IN_withoutBN"
16 | solver_mode: GPU
17 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/network_in_network_cifar10/README.MD:
--------------------------------------------------------------------------------
 1 | # Comparison of activation functions on the CIFAR10
 2 | 
 3 | # Dataset
 4 | The experiments are performed on the preprocessed `cifar10` dataset which can be downloaded from [Model Zoo](https://gist.github.com/mavenlin/e56253735ef32c3c296d)
 5 | 
 6 | # Usage:
 7 | Run `train.sh` in the `CAFFE_ROOT`. For example:
 8 | 
 9 | ```
10 | $ examples/mpelu/network_in_network_cifar10/train.sh
11 | ```
12 | 
13 | This script will run each experiment 5 times and store the outputs in time-related folders.


--------------------------------------------------------------------------------
/caffe/examples/mpelu/network_in_network_cifar10/nin_elu_gauss.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CIFAR10_full"
  2 | layer {
  3 |   name: "cifar"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   data_param {
  8 |     source: "../../Datasets/cifar-10-nin/cifar-train-leveldb"
  9 |     batch_size: 128
 10 |   }
 11 |   transform_param {
 12 |     crop_size: 28
 13 |     mirror: true
 14 |   }
 15 |   image_data_param {
 16 |     shuffle: true
 17 |   }
 18 |   include: { phase: TRAIN }
 19 | }
 20 | layer {
 21 |   name: "cifar"
 22 |   type: "Data"
 23 |   top: "data"
 24 |   top: "label"
 25 |   data_param {
 26 |     source: "../../Datasets/cifar-10-nin/cifar-test-leveldb"
 27 |     batch_size: 100
 28 |   }
 29 |   transform_param {
 30 |   	crop_size: 28
 31 |     mirror: false
 32 |   }
 33 |   include: { phase: TEST }
 34 | }
 35 | layer {
 36 |   name: "conv1"
 37 |   type: "Convolution"
 38 |   bottom: "data"
 39 |   top: "conv1"
 40 |   param {
 41 |     lr_mult: 1
 42 |     decay_mult: 1
 43 |   }
 44 |   param {
 45 |     lr_mult: 2
 46 |     decay_mult: 0
 47 |   }
 48 |   convolution_param {
 49 |     num_output: 192
 50 |     pad: 2
 51 |     kernel_size: 5
 52 |     weight_filler {
 53 |       type: "gaussian"
 54 |       std: 0.05
 55 |     }
 56 |     bias_filler {
 57 |       type: "constant"
 58 |       value: 0
 59 |     }
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "elu_conv1"
 64 |   type: "ELU"
 65 |   bottom: "conv1"
 66 |   top: "elu_conv1"
 67 |   elu_param {
 68 |     alpha: 1
 69 |   }
 70 | }
 71 | layer {
 72 |   name: "cccp1"
 73 |   type: "Convolution"
 74 |   bottom: "elu_conv1"
 75 |   top: "cccp1"
 76 |   param {
 77 |     lr_mult: 1
 78 |     decay_mult: 1
 79 |   }
 80 |   param {
 81 |     lr_mult: 2
 82 |     decay_mult: 0
 83 |   }
 84 |   convolution_param {
 85 |     num_output: 160
 86 |     kernel_size: 1
 87 |     weight_filler {
 88 |       type: "gaussian"
 89 |       std: 0.05
 90 |     }
 91 |     bias_filler {
 92 |       type: "constant"
 93 |       value: 0
 94 |     }
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "elu_cccp1"
 99 |   type: "ELU"
100 |   bottom: "cccp1"
101 |   top: "elu_cccp1"
102 |   elu_param {
103 |     alpha: 1
104 |   }
105 | }
106 | layer {
107 |   name: "cccp2"
108 |   type: "Convolution"
109 |   bottom: "elu_cccp1"
110 |   top: "cccp2"
111 |   param {
112 |     lr_mult: 1
113 |     decay_mult: 1
114 |   }
115 |   param {
116 |     lr_mult: 2
117 |     decay_mult: 0
118 |   }
119 |   convolution_param {
120 |     num_output: 96
121 |     kernel_size: 1
122 |     weight_filler {
123 |       type: "gaussian"
124 |       std: 0.05
125 |     }
126 |     bias_filler {
127 |       type: "constant"
128 |       value: 0
129 |     }
130 |   }
131 | }
132 | layer {
133 |   name: "elu_cccp2"
134 |   type: "ELU"
135 |   bottom: "cccp2"
136 |   top: "elu_cccp2"
137 |   elu_param {
138 |     alpha: 1
139 |   }
140 | }
141 | layer {
142 |   name: "pool1"
143 |   type: "Pooling"
144 |   bottom: "elu_cccp2"
145 |   top: "pool1"
146 |   pooling_param {
147 |     pool: MAX
148 |     kernel_size: 3
149 |     stride: 2
150 |   }
151 | }
152 | layer {
153 |   name: "dropout1"
154 |   type: "Dropout"
155 |   bottom: "pool1"
156 |   top: "dropout1"
157 |   dropout_param {
158 |     dropout_ratio: 0.5
159 |   }
160 | }
161 | layer {
162 |   name: "conv2"
163 |   type: "Convolution"
164 |   bottom: "dropout1"
165 |   top: "conv2"
166 |   param {
167 |     lr_mult: 1
168 |     decay_mult: 1
169 |   }
170 |   param {
171 |     lr_mult: 2
172 |     decay_mult: 0
173 |   }
174 |   convolution_param {
175 |     num_output: 192
176 |     pad: 2
177 |     kernel_size: 5
178 |     weight_filler {
179 |       type: "gaussian"
180 |       std: 0.05
181 |     }
182 |     bias_filler {
183 |       type: "constant"
184 |       value: 0
185 |     }
186 |   }
187 | }
188 | layer {
189 |   name: "elu_conv2"
190 |   type: "ELU"
191 |   bottom: "conv2"
192 |   top: "elu_conv2"
193 |   elu_param {
194 |     alpha: 1
195 |   }
196 | }
197 | layer {
198 |   name: "cccp3"
199 |   type: "Convolution"
200 |   bottom: "elu_conv2"
201 |   top: "cccp3"
202 |   param {
203 |     lr_mult: 1
204 |     decay_mult: 1
205 |   }
206 |   param {
207 |     lr_mult: 2
208 |     decay_mult: 0
209 |   }
210 |   convolution_param {
211 |     num_output: 192
212 |     kernel_size: 1
213 |     weight_filler {
214 |       type: "gaussian"
215 |       std: 0.05
216 |     }
217 |     bias_filler {
218 |       type: "constant"
219 |       value: 0
220 |     }
221 |   }
222 | }
223 | layer {
224 |   name: "elu_cccp3"
225 |   type: "ELU"
226 |   bottom: "cccp3"
227 |   top: "elu_cccp3"
228 |   elu_param {
229 |     alpha: 1
230 |   }
231 | }
232 | layer {
233 |   name: "cccp4"
234 |   type: "Convolution"
235 |   bottom: "elu_cccp3"
236 |   top: "cccp4"
237 |   param {
238 |     lr_mult: 1
239 |     decay_mult: 1
240 |   }
241 |   param {
242 |     lr_mult: 2
243 |     decay_mult: 0
244 |   }
245 |   convolution_param {
246 |     num_output: 192
247 |     kernel_size: 1
248 |     weight_filler {
249 |       type: "gaussian"
250 |       std: 0.05
251 |     }
252 |     bias_filler {
253 |       type: "constant"
254 |       value: 0
255 |     }
256 |   }
257 | }
258 | layer {
259 |   name: "elu_cccp4"
260 |   type: "ELU"
261 |   bottom: "cccp4"
262 |   top: "elu_cccp4"
263 |   elu_param {
264 |     alpha: 1
265 |   }
266 | }
267 | layer {
268 |   name: "pool2"
269 |   type: "Pooling"
270 |   bottom: "elu_cccp4"
271 |   top: "pool2"
272 |   pooling_param {
273 |     pool: AVE
274 |     kernel_size: 3
275 |     stride: 2
276 |   }
277 | }
278 | layer {
279 |   name: "dropout2"
280 |   type: "Dropout"
281 |   bottom: "pool2"
282 |   top: "dropout2"
283 |   dropout_param {
284 |     dropout_ratio: 0.5
285 |   }
286 | }
287 | layer {
288 |   name: "conv3"
289 |   type: "Convolution"
290 |   bottom: "dropout2"
291 |   top: "conv3"
292 |   param {
293 |     lr_mult: 1
294 |     decay_mult: 1
295 |   }
296 |   param {
297 |     lr_mult: 2
298 |     decay_mult: 0
299 |   }
300 |   convolution_param {
301 |     num_output: 192
302 |     pad: 1
303 |     kernel_size: 3
304 |     weight_filler {
305 |       type: "gaussian"
306 |       std: 0.05
307 |     }
308 |     bias_filler {
309 |       type: "constant"
310 |       value: 0
311 |     }
312 |   }
313 | }
314 | layer {
315 |   name: "elu_conv3"
316 |   type: "ELU"
317 |   bottom: "conv3"
318 |   top: "elu_conv3"
319 |   elu_param {
320 |     alpha: 1
321 |   }
322 | }
323 | layer {
324 |   name: "cccp5"
325 |   type: "Convolution"
326 |   bottom: "elu_conv3"
327 |   top: "cccp5"
328 |   param {
329 |     lr_mult: 1
330 |     decay_mult: 1
331 |   }
332 |   param {
333 |     lr_mult: 2
334 |     decay_mult: 0
335 |   }
336 |   convolution_param {
337 |     num_output: 192
338 |     kernel_size: 1
339 |     weight_filler {
340 |       type: "gaussian"
341 |       std: 0.05
342 |     }
343 |     bias_filler {
344 |       type: "constant"
345 |       value: 0
346 |     }
347 |   }
348 | }
349 | layer {
350 |   name: "elu_cccp5"
351 |   type: "ELU"
352 |   bottom: "cccp5"
353 |   top: "elu_cccp5"
354 |   elu_param {
355 |     alpha: 1
356 |   }
357 | }
358 | layer {
359 |   name: "cccp6"
360 |   type: "Convolution"
361 |   bottom: "elu_cccp5"
362 |   top: "cccp6"
363 |   param {
364 |     lr_mult: 0.1
365 |     decay_mult: 1
366 |   }
367 |   param {
368 |     lr_mult: 0.1
369 |     decay_mult: 0
370 |   }
371 |   convolution_param {
372 |     num_output: 10
373 |     kernel_size: 1
374 |     weight_filler {
375 |       type: "gaussian"
376 |       std: 0.05
377 |     }
378 |     bias_filler {
379 |       type: "constant"
380 |       value: 0
381 |     }
382 |   }
383 | }
384 | layer {
385 |   name: "elu_cccp6"
386 |   type: "ELU"
387 |   bottom: "cccp6"
388 |   top: "elu_cccp6"
389 |   elu_param {
390 |     alpha: 1
391 |   }
392 | }
393 | layer {
394 |   name: "pool3"
395 |   type: "Pooling"
396 |   bottom: "elu_cccp6"
397 |   top: "pool3"
398 |   pooling_param {
399 |     pool: AVE
400 |     kernel_size: 7
401 |     stride: 1
402 |   }
403 | }
404 | layer {
405 |   name: "accuracy"
406 |   type: "Accuracy"
407 |   bottom: "pool3"
408 |   bottom: "label"
409 |   top: "accuracy"
410 | }
411 | layer {
412 |   name: "loss"
413 |   type: "SoftmaxWithLoss"
414 |   bottom: "pool3"
415 |   bottom: "label"
416 |   top: "loss"
417 | }
418 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/network_in_network_cifar10/nin_m2pelu_gauss.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CIFAR10_full"
  2 | layer {
  3 |   name: "cifar"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   data_param {
  8 |     source: "/home/liyang/Datasets/CIFAR/CIFAR-10/cifar-train-leveldb"
  9 |     batch_size: 128
 10 |   }
 11 |   transform_param {
 12 |     crop_size: 28
 13 |     mirror: true
 14 |   }
 15 |   image_data_param {
 16 |     shuffle: true
 17 |   }
 18 |   include: { phase: TRAIN }
 19 | }
 20 | layer {
 21 |   name: "cifar"
 22 |   type: "Data"
 23 |   top: "data"
 24 |   top: "label"
 25 |   data_param {
 26 |     source: "/home/liyang/Datasets/CIFAR/CIFAR-10/cifar-test-leveldb"
 27 |     batch_size: 100
 28 |   }
 29 |   transform_param {
 30 |     crop_size: 28
 31 |     mirror: false
 32 |   }
 33 |   include: { phase: TEST }
 34 | }
 35 | layer {
 36 |   name: "conv1"
 37 |   type: "Convolution"
 38 |   bottom: "data"
 39 |   top: "conv1"
 40 |   param {
 41 |     lr_mult: 1
 42 |     decay_mult: 1
 43 |   }
 44 |   param {
 45 |     lr_mult: 2
 46 |     decay_mult: 0
 47 |   }
 48 |   convolution_param {
 49 |     num_output: 192
 50 |     pad: 2
 51 |     kernel_size: 5
 52 |     weight_filler {
 53 |       type: "gaussian"
 54 |       std: 0.05
 55 |     }
 56 |     bias_filler {
 57 |       type: "constant"
 58 |       value: 0
 59 |     }
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "mpelu_conv1"
 64 |   type: "M2PELU"
 65 |   bottom: "conv1"
 66 |   top: "mpelu_conv1"
 67 |   param {
 68 |     lr_mult: 2
 69 |     decay_mult: 5
 70 |   }
 71 |   param {
 72 |     lr_mult: 2
 73 |     decay_mult: 5
 74 |   }
 75 |   m2pelu_param {
 76 |     alpha_filler {
 77 |       type: "constant"
 78 |       value: 0.25
 79 |     }
 80 |     beta_filler {
 81 |       type: "constant"
 82 |       value: 1
 83 |     }
 84 |   }
 85 | }
 86 | layer {
 87 |   name: "cccp1"
 88 |   type: "Convolution"
 89 |   bottom: "mpelu_conv1"
 90 |   top: "cccp1"
 91 |   param {
 92 |     lr_mult: 1
 93 |     decay_mult: 1
 94 |   }
 95 |   param {
 96 |     lr_mult: 2
 97 |     decay_mult: 0
 98 |   }
 99 |   convolution_param {
100 |     num_output: 160
101 |     kernel_size: 1
102 |     weight_filler {
103 |       type: "gaussian"
104 |       std: 0.05
105 |     }
106 |     bias_filler {
107 |       type: "constant"
108 |       value: 0
109 |     }
110 |   }
111 | }
112 | layer {
113 |   name: "mpelu_cccp1"
114 |   type: "M2PELU"
115 |   bottom: "cccp1"
116 |   top: "mpelu_cccp1"
117 |   param {
118 |     lr_mult: 2
119 |     decay_mult: 5
120 |   }
121 |   param {
122 |     lr_mult: 2
123 |     decay_mult: 5
124 |   }
125 |   m2pelu_param {
126 |     alpha_filler {
127 |       type: "constant"
128 |       value: 0.25
129 |     }
130 |     beta_filler {
131 |       type: "constant"
132 |       value: 1
133 |     }
134 |   }
135 | }
136 | layer {
137 |   name: "cccp2"
138 |   type: "Convolution"
139 |   bottom: "mpelu_cccp1"
140 |   top: "cccp2"
141 |   param {
142 |     lr_mult: 1
143 |     decay_mult: 1
144 |   }
145 |   param {
146 |     lr_mult: 2
147 |     decay_mult: 0
148 |   }
149 |   convolution_param {
150 |     num_output: 96
151 |     kernel_size: 1
152 |     weight_filler {
153 |       type: "gaussian"
154 |       std: 0.05
155 |     }
156 |     bias_filler {
157 |       type: "constant"
158 |       value: 0
159 |     }
160 |   }
161 | }
162 | layer {
163 |   name: "mpelu_cccp2"
164 |   type: "M2PELU"
165 |   bottom: "cccp2"
166 |   top: "mpelu_cccp2"
167 |   param {
168 |     lr_mult: 2
169 |     decay_mult: 5
170 |   }
171 |   param {
172 |     lr_mult: 2
173 |     decay_mult: 5
174 |   }
175 |   m2pelu_param {
176 |     alpha_filler {
177 |       type: "constant"
178 |       value: 0.25
179 |     }
180 |     beta_filler {
181 |       type: "constant"
182 |       value: 1
183 |     }
184 |   }
185 | }
186 | layer {
187 |   name: "pool1"
188 |   type: "Pooling"
189 |   bottom: "mpelu_cccp2"
190 |   top: "pool1"
191 |   pooling_param {
192 |     pool: MAX
193 |     kernel_size: 3
194 |     stride: 2
195 |   }
196 | }
197 | layer {
198 |   name: "dropout1"
199 |   type: "Dropout"
200 |   bottom: "pool1"
201 |   top: "dropout1"
202 |   dropout_param {
203 |     dropout_ratio: 0.5
204 |   }
205 | }
206 | layer {
207 |   name: "conv2"
208 |   type: "Convolution"
209 |   bottom: "dropout1"
210 |   top: "conv2"
211 |   param {
212 |     lr_mult: 1
213 |     decay_mult: 1
214 |   }
215 |   param {
216 |     lr_mult: 2
217 |     decay_mult: 0
218 |   }
219 |   convolution_param {
220 |     num_output: 192
221 |     pad: 2
222 |     kernel_size: 5
223 |     weight_filler {
224 |       type: "gaussian"
225 |       std: 0.05
226 |     }
227 |     bias_filler {
228 |       type: "constant"
229 |       value: 0
230 |     }
231 |   }
232 | }
233 | layer {
234 |   name: "mpelu_conv2"
235 |   type: "M2PELU"
236 |   bottom: "conv2"
237 |   top: "mpelu_conv2"
238 |   param {
239 |     lr_mult: 2
240 |     decay_mult: 5
241 |   }
242 |   param {
243 |     lr_mult: 2
244 |     decay_mult: 5
245 |   }
246 |   m2pelu_param {
247 |     alpha_filler {
248 |       type: "constant"
249 |       value: 0.25
250 |     }
251 |     beta_filler {
252 |       type: "constant"
253 |       value: 1
254 |     }
255 |   }
256 | }
257 | layer {
258 |   name: "cccp3"
259 |   type: "Convolution"
260 |   bottom: "mpelu_conv2"
261 |   top: "cccp3"
262 |   param {
263 |     lr_mult: 1
264 |     decay_mult: 1
265 |   }
266 |   param {
267 |     lr_mult: 2
268 |     decay_mult: 0
269 |   }
270 |   convolution_param {
271 |     num_output: 192
272 |     kernel_size: 1
273 |     weight_filler {
274 |       type: "gaussian"
275 |       std: 0.05
276 |     }
277 |     bias_filler {
278 |       type: "constant"
279 |       value: 0
280 |     }
281 |   }
282 | }
283 | layer {
284 |   name: "mpelu_cccp3"
285 |   type: "M2PELU"
286 |   bottom: "cccp3"
287 |   top: "mpelu_cccp3"
288 |   param {
289 |     lr_mult: 2
290 |     decay_mult: 5
291 |   }
292 |   param {
293 |     lr_mult: 2
294 |     decay_mult: 5
295 |   }
296 |   m2pelu_param {
297 |     alpha_filler {
298 |       type: "constant"
299 |       value: 0.25
300 |     }
301 |     beta_filler {
302 |       type: "constant"
303 |       value: 1
304 |     }
305 |   }
306 | }
307 | layer {
308 |   name: "cccp4"
309 |   type: "Convolution"
310 |   bottom: "mpelu_cccp3"
311 |   top: "cccp4"
312 |   param {
313 |     lr_mult: 1
314 |     decay_mult: 1
315 |   }
316 |   param {
317 |     lr_mult: 2
318 |     decay_mult: 0
319 |   }
320 |   convolution_param {
321 |     num_output: 192
322 |     kernel_size: 1
323 |     weight_filler {
324 |       type: "gaussian"
325 |       std: 0.05
326 |     }
327 |     bias_filler {
328 |       type: "constant"
329 |       value: 0
330 |     }
331 |   }
332 | }
333 | layer {
334 |   name: "mpelu_cccp4"
335 |   type: "M2PELU"
336 |   bottom: "cccp4"
337 |   top: "mpelu_cccp4"
338 |   param {
339 |     lr_mult: 2
340 |     decay_mult: 5
341 |   }
342 |   param {
343 |     lr_mult: 2
344 |     decay_mult: 5
345 |   }
346 |   m2pelu_param {
347 |     alpha_filler {
348 |       type: "constant"
349 |       value: 0.25
350 |     }
351 |     beta_filler {
352 |       type: "constant"
353 |       value: 1
354 |     }
355 |   }
356 | }
357 | layer {
358 |   name: "pool2"
359 |   type: "Pooling"
360 |   bottom: "mpelu_cccp4"
361 |   top: "pool2"
362 |   pooling_param {
363 |     pool: AVE
364 |     kernel_size: 3
365 |     stride: 2
366 |   }
367 | }
368 | layer {
369 |   name: "dropout2"
370 |   type: "Dropout"
371 |   bottom: "pool2"
372 |   top: "dropout2"
373 |   dropout_param {
374 |     dropout_ratio: 0.5
375 |   }
376 | }
377 | layer {
378 |   name: "conv3"
379 |   type: "Convolution"
380 |   bottom: "dropout2"
381 |   top: "conv3"
382 |   param {
383 |     lr_mult: 1
384 |     decay_mult: 1
385 |   }
386 |   param {
387 |     lr_mult: 2
388 |     decay_mult: 0
389 |   }
390 |   convolution_param {
391 |     num_output: 192
392 |     pad: 1
393 |     kernel_size: 3
394 |     weight_filler {
395 |       type: "gaussian"
396 |       std: 0.05
397 |     }
398 |     bias_filler {
399 |       type: "constant"
400 |       value: 0
401 |     }
402 |   }
403 | }
404 | layer {
405 |   name: "mpelu_conv3"
406 |   type: "M2PELU"
407 |   bottom: "conv3"
408 |   top: "mpelu_conv3"
409 |   param {
410 |     lr_mult: 2
411 |     decay_mult: 5
412 |   }
413 |   param {
414 |     lr_mult: 2
415 |     decay_mult: 5
416 |   }
417 |   m2pelu_param {
418 |     alpha_filler {
419 |       type: "constant"
420 |       value: 0.25
421 |     }
422 |     beta_filler {
423 |       type: "constant"
424 |       value: 1
425 |     }
426 |   }
427 | }
428 | layer {
429 |   name: "cccp5"
430 |   type: "Convolution"
431 |   bottom: "mpelu_conv3"
432 |   top: "cccp5"
433 |   param {
434 |     lr_mult: 1
435 |     decay_mult: 1
436 |   }
437 |   param {
438 |     lr_mult: 2
439 |     decay_mult: 0
440 |   }
441 |   convolution_param {
442 |     num_output: 192
443 |     kernel_size: 1
444 |     weight_filler {
445 |       type: "gaussian"
446 |       std: 0.05
447 |     }
448 |     bias_filler {
449 |       type: "constant"
450 |       value: 0
451 |     }
452 |   }
453 | }
454 | layer {
455 |   name: "mpelu_cccp5"
456 |   type: "M2PELU"
457 |   bottom: "cccp5"
458 |   top: "mpelu_cccp5"
459 |   param {
460 |     lr_mult: 2
461 |     decay_mult: 5
462 |   }
463 |   param {
464 |     lr_mult: 2
465 |     decay_mult: 5
466 |   }
467 |   m2pelu_param {
468 |     alpha_filler {
469 |       type: "constant"
470 |       value: 0.25
471 |     }
472 |     beta_filler {
473 |       type: "constant"
474 |       value: 1
475 |     }
476 |   }
477 | }
478 | layer {
479 |   name: "cccp6"
480 |   type: "Convolution"
481 |   bottom: "mpelu_cccp5"
482 |   top: "cccp6"
483 |   param {
484 |     lr_mult: 0.1
485 |     decay_mult: 1
486 |   }
487 |   param {
488 |     lr_mult: 0.1
489 |     decay_mult: 0
490 |   }
491 |   convolution_param {
492 |     num_output: 10
493 |     kernel_size: 1
494 |     weight_filler {
495 |       type: "gaussian"
496 |       std: 0.05
497 |     }
498 |     bias_filler {
499 |       type: "constant"
500 |       value: 0
501 |     }
502 |   }
503 | }
504 | layer {
505 |   name: "mpelu_cccp6"
506 |   type: "M2PELU"
507 |   bottom: "cccp6"
508 |   top: "mpelu_cccp6"
509 |   param {
510 |     lr_mult: 2
511 |     decay_mult: 5
512 |   }
513 |   param {
514 |     lr_mult: 2
515 |     decay_mult: 5
516 |   }
517 |   m2pelu_param {
518 |     alpha_filler {
519 |       type: "constant"
520 |       value: 0.25
521 |     }
522 |     beta_filler {
523 |       type: "constant"
524 |       value: 1
525 |     }
526 |   }
527 | }
528 | layer {
529 |   name: "pool3"
530 |   type: "Pooling"
531 |   bottom: "mpelu_cccp6"
532 |   top: "pool3"
533 |   pooling_param {
534 |     pool: AVE
535 |     kernel_size: 7
536 |     stride: 1
537 |   }
538 | }
539 | layer {
540 |   name: "accuracy"
541 |   type: "Accuracy"
542 |   bottom: "pool3"
543 |   bottom: "label"
544 |   top: "accuracy"
545 | }
546 | layer {
547 |   name: "loss"
548 |   type: "SoftmaxWithLoss"
549 |   bottom: "pool3"
550 |   bottom: "label"
551 |   top: "loss"
552 | }
553 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/network_in_network_cifar10/nin_mpelu_gauss.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CIFAR10_full"
  2 | layer {
  3 |   name: "cifar"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   data_param {
  8 |     source: "../../Datasets/cifar-10-NIN/cifar-train-leveldb"
  9 |     batch_size: 128
 10 |   }
 11 |   transform_param {
 12 |     crop_size: 28
 13 |     mirror: true
 14 |   }
 15 |   image_data_param {
 16 |     shuffle: true
 17 |   }
 18 |   include: { phase: TRAIN }
 19 | }
 20 | layer {
 21 |   name: "cifar"
 22 |   type: "Data"
 23 |   top: "data"
 24 |   top: "label"
 25 |   data_param {
 26 |     source: "../../Datasets/cifar-10-NIN/cifar-test-leveldb"
 27 |     batch_size: 100
 28 |   }
 29 |   transform_param {
 30 |     crop_size: 28
 31 |     mirror: false
 32 |   }
 33 |   include: { phase: TEST }
 34 | }
 35 | layer {
 36 |   name: "conv1"
 37 |   type: "Convolution"
 38 |   bottom: "data"
 39 |   top: "conv1"
 40 |   param {
 41 |     lr_mult: 1
 42 |     decay_mult: 1
 43 |   }
 44 |   param {
 45 |     lr_mult: 2
 46 |     decay_mult: 0
 47 |   }
 48 |   convolution_param {
 49 |     num_output: 192
 50 |     pad: 2
 51 |     kernel_size: 5
 52 |     weight_filler {
 53 |       type: "gaussian"
 54 |       std: 0.05
 55 |     }
 56 |     bias_filler {
 57 |       type: "constant"
 58 |       value: 0
 59 |     }
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "mpelu_conv1"
 64 |   type: "MPELU"
 65 |   bottom: "conv1"
 66 |   top: "mpelu_conv1"
 67 |   param {
 68 |     lr_mult: 2
 69 |     decay_mult: 0
 70 |   }
 71 |   param {
 72 |     lr_mult: 2
 73 |     decay_mult: 0
 74 |   }
 75 |   param {
 76 |     lr_mult: 0
 77 |     decay_mult: 0
 78 |   }
 79 |   mpelu_param {
 80 |     alpha_filler {
 81 |       type: "constant"
 82 |       value: 1
 83 |     }
 84 |     beta_filler {
 85 |       type: "constant"
 86 |       value: 1
 87 |     }
 88 |     gamma_filler {
 89 |       type: "constant"
 90 |       value: 0
 91 |     }
 92 |   }
 93 | }
 94 | layer {
 95 |   name: "cccp1"
 96 |   type: "Convolution"
 97 |   bottom: "mpelu_conv1"
 98 |   top: "cccp1"
 99 |   param {
100 |     lr_mult: 1
101 |     decay_mult: 1
102 |   }
103 |   param {
104 |     lr_mult: 2
105 |     decay_mult: 0
106 |   }
107 |   convolution_param {
108 |     num_output: 160
109 |     kernel_size: 1
110 |     weight_filler {
111 |       type: "gaussian"
112 |       std: 0.05
113 |     }
114 |     bias_filler {
115 |       type: "constant"
116 |       value: 0
117 |     }
118 |   }
119 | }
120 | layer {
121 |   name: "mpelu_cccp1"
122 |   type: "MPELU"
123 |   bottom: "cccp1"
124 |   top: "mpelu_cccp1"
125 |   param {
126 |     lr_mult: 2
127 |     decay_mult: 0
128 |   }
129 |   param {
130 |     lr_mult: 2
131 |     decay_mult: 0
132 |   }
133 |   param {
134 |     lr_mult: 0
135 |     decay_mult: 0
136 |   }
137 |   mpelu_param {
138 |     alpha_filler {
139 |       type: "constant"
140 |       value: 1
141 |     }
142 |     beta_filler {
143 |       type: "constant"
144 |       value: 1
145 |     }
146 |     gamma_filler {
147 |       type: "constant"
148 |       value: 0
149 |     }
150 |   }
151 | }
152 | layer {
153 |   name: "cccp2"
154 |   type: "Convolution"
155 |   bottom: "mpelu_cccp1"
156 |   top: "cccp2"
157 |   param {
158 |     lr_mult: 1
159 |     decay_mult: 1
160 |   }
161 |   param {
162 |     lr_mult: 2
163 |     decay_mult: 0
164 |   }
165 |   convolution_param {
166 |     num_output: 96
167 |     kernel_size: 1
168 |     weight_filler {
169 |       type: "gaussian"
170 |       std: 0.05
171 |     }
172 |     bias_filler {
173 |       type: "constant"
174 |       value: 0
175 |     }
176 |   }
177 | }
178 | layer {
179 |   name: "mpelu_cccp2"
180 |   type: "MPELU"
181 |   bottom: "cccp2"
182 |   top: "mpelu_cccp2"
183 |   param {
184 |     lr_mult: 2
185 |     decay_mult: 0
186 |   }
187 |   param {
188 |     lr_mult: 2
189 |     decay_mult: 0
190 |   }
191 |   param {
192 |     lr_mult: 0
193 |     decay_mult: 0
194 |   }
195 |   mpelu_param {
196 |     alpha_filler {
197 |       type: "constant"
198 |       value: 1
199 |     }
200 |     beta_filler {
201 |       type: "constant"
202 |       value: 1
203 |     }
204 |     gamma_filler {
205 |       type: "constant"
206 |       value: 0
207 |     }
208 |   }
209 | }
210 | layer {
211 |   name: "pool1"
212 |   type: "Pooling"
213 |   bottom: "mpelu_cccp2"
214 |   top: "pool1"
215 |   pooling_param {
216 |     pool: MAX
217 |     kernel_size: 3
218 |     stride: 2
219 |   }
220 | }
221 | layer {
222 |   name: "dropout1"
223 |   type: "Dropout"
224 |   bottom: "pool1"
225 |   top: "dropout1"
226 |   dropout_param {
227 |     dropout_ratio: 0.5
228 |   }
229 | }
230 | layer {
231 |   name: "conv2"
232 |   type: "Convolution"
233 |   bottom: "dropout1"
234 |   top: "conv2"
235 |   param {
236 |     lr_mult: 1
237 |     decay_mult: 1
238 |   }
239 |   param {
240 |     lr_mult: 2
241 |     decay_mult: 0
242 |   }
243 |   convolution_param {
244 |     num_output: 192
245 |     pad: 2
246 |     kernel_size: 5
247 |     weight_filler {
248 |       type: "gaussian"
249 |       std: 0.05
250 |     }
251 |     bias_filler {
252 |       type: "constant"
253 |       value: 0
254 |     }
255 |   }
256 | }
257 | layer {
258 |   name: "mpelu_conv2"
259 |   type: "MPELU"
260 |   bottom: "conv2"
261 |   top: "mpelu_conv2"
262 |   param {
263 |     lr_mult: 2
264 |     decay_mult: 0
265 |   }
266 |   param {
267 |     lr_mult: 2
268 |     decay_mult: 0
269 |   }
270 |   param {
271 |     lr_mult: 0
272 |     decay_mult: 0
273 |   }
274 |   mpelu_param {
275 |     alpha_filler {
276 |       type: "constant"
277 |       value: 1
278 |     }
279 |     beta_filler {
280 |       type: "constant"
281 |       value: 1
282 |     }
283 |     gamma_filler {
284 |       type: "constant"
285 |       value: 0
286 |     }
287 |   }
288 | }
289 | layer {
290 |   name: "cccp3"
291 |   type: "Convolution"
292 |   bottom: "mpelu_conv2"
293 |   top: "cccp3"
294 |   param {
295 |     lr_mult: 1
296 |     decay_mult: 1
297 |   }
298 |   param {
299 |     lr_mult: 2
300 |     decay_mult: 0
301 |   }
302 |   convolution_param {
303 |     num_output: 192
304 |     kernel_size: 1
305 |     weight_filler {
306 |       type: "gaussian"
307 |       std: 0.05
308 |     }
309 |     bias_filler {
310 |       type: "constant"
311 |       value: 0
312 |     }
313 |   }
314 | }
315 | layer {
316 |   name: "mpelu_cccp3"
317 |   type: "MPELU"
318 |   bottom: "cccp3"
319 |   top: "mpelu_cccp3"
320 |   param {
321 |     lr_mult: 2
322 |     decay_mult: 0
323 |   }
324 |   param {
325 |     lr_mult: 2
326 |     decay_mult: 0
327 |   }
328 |   param {
329 |     lr_mult: 0
330 |     decay_mult: 0
331 |   }
332 |   mpelu_param {
333 |     alpha_filler {
334 |       type: "constant"
335 |       value: 1
336 |     }
337 |     beta_filler {
338 |       type: "constant"
339 |       value: 1
340 |     }
341 |     gamma_filler {
342 |       type: "constant"
343 |       value: 0
344 |     }
345 |   }
346 | }
347 | layer {
348 |   name: "cccp4"
349 |   type: "Convolution"
350 |   bottom: "mpelu_cccp3"
351 |   top: "cccp4"
352 |   param {
353 |     lr_mult: 1
354 |     decay_mult: 1
355 |   }
356 |   param {
357 |     lr_mult: 2
358 |     decay_mult: 0
359 |   }
360 |   convolution_param {
361 |     num_output: 192
362 |     kernel_size: 1
363 |     weight_filler {
364 |       type: "gaussian"
365 |       std: 0.05
366 |     }
367 |     bias_filler {
368 |       type: "constant"
369 |       value: 0
370 |     }
371 |   }
372 | }
373 | layer {
374 |   name: "mpelu_cccp4"
375 |   type: "MPELU"
376 |   bottom: "cccp4"
377 |   top: "mpelu_cccp4"
378 |   param {
379 |     lr_mult: 2
380 |     decay_mult: 0
381 |   }
382 |   param {
383 |     lr_mult: 2
384 |     decay_mult: 0
385 |   }
386 |   param {
387 |     lr_mult: 0
388 |     decay_mult: 0
389 |   }
390 |   mpelu_param {
391 |     alpha_filler {
392 |       type: "constant"
393 |       value: 1
394 |     }
395 |     beta_filler {
396 |       type: "constant"
397 |       value: 1
398 |     }
399 |     gamma_filler {
400 |       type: "constant"
401 |       value: 0
402 |     }
403 |   }
404 | }
405 | layer {
406 |   name: "pool2"
407 |   type: "Pooling"
408 |   bottom: "mpelu_cccp4"
409 |   top: "pool2"
410 |   pooling_param {
411 |     pool: AVE
412 |     kernel_size: 3
413 |     stride: 2
414 |   }
415 | }
416 | layer {
417 |   name: "dropout2"
418 |   type: "Dropout"
419 |   bottom: "pool2"
420 |   top: "dropout2"
421 |   dropout_param {
422 |     dropout_ratio: 0.5
423 |   }
424 | }
425 | layer {
426 |   name: "conv3"
427 |   type: "Convolution"
428 |   bottom: "dropout2"
429 |   top: "conv3"
430 |   param {
431 |     lr_mult: 1
432 |     decay_mult: 1
433 |   }
434 |   param {
435 |     lr_mult: 2
436 |     decay_mult: 0
437 |   }
438 |   convolution_param {
439 |     num_output: 192
440 |     pad: 1
441 |     kernel_size: 3
442 |     weight_filler {
443 |       type: "gaussian"
444 |       std: 0.05
445 |     }
446 |     bias_filler {
447 |       type: "constant"
448 |       value: 0
449 |     }
450 |   }
451 | }
452 | layer {
453 |   name: "mpelu_conv3"
454 |   type: "MPELU"
455 |   bottom: "conv3"
456 |   top: "mpelu_conv3"
457 |   param {
458 |     lr_mult: 2
459 |     decay_mult: 0
460 |   }
461 |   param {
462 |     lr_mult: 2
463 |     decay_mult: 0
464 |   }
465 |   param {
466 |     lr_mult: 0
467 |     decay_mult: 0
468 |   }
469 |   mpelu_param {
470 |     alpha_filler {
471 |       type: "constant"
472 |       value: 1
473 |     }
474 |     beta_filler {
475 |       type: "constant"
476 |       value: 1
477 |     }
478 |     gamma_filler {
479 |       type: "constant"
480 |       value: 0
481 |     }
482 |   }
483 | }
484 | layer {
485 |   name: "cccp5"
486 |   type: "Convolution"
487 |   bottom: "mpelu_conv3"
488 |   top: "cccp5"
489 |   param {
490 |     lr_mult: 1
491 |     decay_mult: 1
492 |   }
493 |   param {
494 |     lr_mult: 2
495 |     decay_mult: 0
496 |   }
497 |   convolution_param {
498 |     num_output: 192
499 |     kernel_size: 1
500 |     weight_filler {
501 |       type: "gaussian"
502 |       std: 0.05
503 |     }
504 |     bias_filler {
505 |       type: "constant"
506 |       value: 0
507 |     }
508 |   }
509 | }
510 | layer {
511 |   name: "mpelu_cccp5"
512 |   type: "MPELU"
513 |   bottom: "cccp5"
514 |   top: "mpelu_cccp5"
515 |   param {
516 |     lr_mult: 2
517 |     decay_mult: 0
518 |   }
519 |   param {
520 |     lr_mult: 2
521 |     decay_mult: 0
522 |   }
523 |   param {
524 |     lr_mult: 0
525 |     decay_mult: 0
526 |   }
527 |   mpelu_param {
528 |     alpha_filler {
529 |       type: "constant"
530 |       value: 1
531 |     }
532 |     beta_filler {
533 |       type: "constant"
534 |       value: 1
535 |     }
536 |     gamma_filler {
537 |       type: "constant"
538 |       value: 0
539 |     }
540 |   }
541 | }
542 | layer {
543 |   name: "cccp6"
544 |   type: "Convolution"
545 |   bottom: "mpelu_cccp5"
546 |   top: "cccp6"
547 |   param {
548 |     lr_mult: 0.1
549 |     decay_mult: 1
550 |   }
551 |   param {
552 |     lr_mult: 0.1
553 |     decay_mult: 0
554 |   }
555 |   convolution_param {
556 |     num_output: 10
557 |     kernel_size: 1
558 |     weight_filler {
559 |       type: "gaussian"
560 |       std: 0.05
561 |     }
562 |     bias_filler {
563 |       type: "constant"
564 |       value: 0
565 |     }
566 |   }
567 | }
568 | layer {
569 |   name: "mpelu_cccp6"
570 |   type: "MPELU"
571 |   bottom: "cccp6"
572 |   top: "mpelu_cccp6"
573 |   param {
574 |     lr_mult: 2
575 |     decay_mult: 0
576 |   }
577 |   param {
578 |     lr_mult: 2
579 |     decay_mult: 0
580 |   }
581 |   param {
582 |     lr_mult: 0
583 |     decay_mult: 0
584 |   }
585 |   mpelu_param {
586 |     alpha_filler {
587 |       type: "constant"
588 |       value: 1
589 |     }
590 |     beta_filler {
591 |       type: "constant"
592 |       value: 1
593 |     }
594 |     gamma_filler {
595 |       type: "constant"
596 |       value: 0
597 |     }
598 |   }
599 | }
600 | layer {
601 |   name: "pool3"
602 |   type: "Pooling"
603 |   bottom: "mpelu_cccp6"
604 |   top: "pool3"
605 |   pooling_param {
606 |     pool: AVE
607 |     kernel_size: 7
608 |     stride: 1
609 |   }
610 | }
611 | layer {
612 |   name: "accuracy"
613 |   type: "Accuracy"
614 |   bottom: "pool3"
615 |   bottom: "label"
616 |   top: "accuracy"
617 | }
618 | layer {
619 |   name: "loss"
620 |   type: "SoftmaxWithLoss"
621 |   bottom: "pool3"
622 |   bottom: "label"
623 |   top: "loss"
624 | }
625 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/network_in_network_cifar10/nin_prelu_gauss.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CIFAR10_full"
  2 | layer {
  3 |   name: "cifar"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   data_param {
  8 |     # source: "../../Datasets/cifar-10-nin/cifar-train-leveldb"
  9 |     batch_size: 128
 10 |   }
 11 |   transform_param {
 12 |     crop_size: 28
 13 |     mirror: true
 14 |   }
 15 |   image_data_param {
 16 |     shuffle: true
 17 |   }
 18 |   include: { phase: TRAIN }
 19 | }
 20 | layer {
 21 |   name: "cifar"
 22 |   type: "Data"
 23 |   top: "data"
 24 |   top: "label"
 25 |   data_param {
 26 |     source: "../../Datasets/cifar-10-nin/cifar-test-leveldb"
 27 |     batch_size: 100
 28 |   }
 29 |   transform_param {
 30 |     crop_size: 28
 31 |     mirror: false
 32 |   }
 33 |   include: { phase: TEST }
 34 | }
 35 | layer {
 36 |   name: "conv1"
 37 |   type: "Convolution"
 38 |   bottom: "data"
 39 |   top: "conv1"
 40 |   param {
 41 |     lr_mult: 1
 42 |     decay_mult: 1
 43 |   }
 44 |   param {
 45 |     lr_mult: 2
 46 |     decay_mult: 0
 47 |   }
 48 |   convolution_param {
 49 |     num_output: 192
 50 |     pad: 2
 51 |     kernel_size: 5
 52 |     weight_filler {
 53 |       type: "gaussian"
 54 |       std: 0.05
 55 |     }
 56 |     bias_filler {
 57 |       type: "constant"
 58 |       value: 0
 59 |     }
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "prelu_conv1"
 64 |   type: "PReLU" 
 65 |   bottom: "conv1"
 66 |   top: "prelu_conv1"
 67 |   prelu_param {
 68 |     filler {
 69 |       type: "constant"
 70 |       value: 0.25
 71 |     }
 72 |     channel_shared : false
 73 |   }
 74 | }
 75 | layer {
 76 |   name: "cccp1"
 77 |   type: "Convolution"
 78 |   bottom: "prelu_conv1"
 79 |   top: "cccp1"
 80 |   param {
 81 |     lr_mult: 1
 82 |     decay_mult: 1
 83 |   }
 84 |   param {
 85 |     lr_mult: 2
 86 |     decay_mult: 0
 87 |   }
 88 |   convolution_param {
 89 |     num_output: 160
 90 |     group: 1
 91 |     kernel_size: 1
 92 |     weight_filler {
 93 |       type: "gaussian"
 94 |       std: 0.05
 95 |     }
 96 |     bias_filler {
 97 |       type: "constant"
 98 |       value: 0
 99 |     }
100 |   }
101 | }
102 | layer {
103 |   name: "prelu_cccp1"
104 |   type: "PReLU" 
105 |   bottom: "cccp1"
106 |   top: "prelu_cccp1"
107 |   prelu_param {
108 |     filler {
109 |       type: "constant"
110 |       value: 0.25
111 |     }
112 |     channel_shared : false
113 |   }
114 | }
115 | layer {
116 |   name: "cccp2"
117 |   type: "Convolution"
118 |   bottom: "prelu_cccp1"
119 |   top: "cccp2"
120 |   param {
121 |     lr_mult: 1
122 |     decay_mult: 1
123 |   }
124 |   param {
125 |     lr_mult: 2
126 |     decay_mult: 0
127 |   }
128 |   convolution_param {
129 |     num_output: 96
130 |     group: 1
131 |     kernel_size: 1
132 |     weight_filler {
133 |       type: "gaussian"
134 |       std: 0.05
135 |     }
136 |     bias_filler {
137 |       type: "constant"
138 |       value: 0
139 |     }
140 |   }
141 | }
142 | layer {
143 |   name: "prelu_cccp2"
144 |   type: "PReLU" 
145 |   bottom: "cccp2"
146 |   top: "prelu_cccp2"
147 |   prelu_param {
148 |     filler {
149 |       type: "constant"
150 |       value: 0.25
151 |     }
152 |     channel_shared : false
153 |   }
154 | }
155 | layer {
156 |   name: "pool1"
157 |   type: "Pooling"
158 |   bottom: "prelu_cccp2"
159 |   top: "pool1"
160 |   pooling_param {
161 |     pool: MAX
162 |     kernel_size: 3
163 |     stride: 2
164 |   }
165 | }
166 | layer {
167 |   name: "drop3"
168 |   type: "Dropout"
169 |   bottom: "pool1"
170 |   top: "dropout1"
171 |   dropout_param {
172 |     dropout_ratio: 0.5
173 |   }
174 | }
175 | layer {
176 |   name: "conv2"
177 |   type: "Convolution"
178 |   bottom: "dropout1"
179 |   top: "conv2"
180 |   param {
181 |     lr_mult: 1
182 |     decay_mult: 1
183 |   }
184 |   param {
185 |     lr_mult: 2
186 |     decay_mult: 0
187 |   }
188 |   convolution_param {
189 |     num_output: 192
190 |     pad: 2
191 |     kernel_size: 5
192 |     weight_filler {
193 |       type: "gaussian"
194 |       std: 0.05
195 |     }
196 |     bias_filler {
197 |       type: "constant"
198 |       value: 0
199 |     }
200 |   }
201 | }
202 | layer {
203 |   name: "prelu_conv2"
204 |   type: "PReLU" 
205 |   bottom: "conv2"
206 |   top: "prelu_conv2"
207 |   prelu_param {
208 |     filler {
209 |       type: "constant"
210 |       value: 0.25
211 |     }
212 |     channel_shared : false
213 |   }
214 | }
215 | layer {
216 |   name: "cccp3"
217 |   type: "Convolution"
218 |   bottom: "prelu_conv2"
219 |   top: "cccp3"
220 |   param {
221 |     lr_mult: 1
222 |     decay_mult: 1
223 |   }
224 |   param {
225 |     lr_mult: 2
226 |     decay_mult: 0
227 |   }
228 |   convolution_param {
229 |     num_output: 192
230 |     group: 1
231 |     kernel_size: 1
232 |     weight_filler {
233 |       type: "gaussian"
234 |       std: 0.05
235 |     }
236 |     bias_filler {
237 |       type: "constant"
238 |       value: 0
239 |     }
240 |   }
241 | }
242 | layer {
243 |   name: "prelu_cccp3"
244 |   type: "PReLU" 
245 |   bottom: "cccp3"
246 |   top: "prelu_cccp3"
247 |   prelu_param {
248 |     filler {
249 |       type: "constant"
250 |       value: 0.25
251 |     }
252 |     channel_shared : false
253 |   }
254 | }
255 | layer {
256 |   name: "cccp4"
257 |   type: "Convolution"
258 |   bottom: "prelu_cccp3"
259 |   top: "cccp4"
260 |   param {
261 |     lr_mult: 1
262 |     decay_mult: 1
263 |   }
264 |   param {
265 |     lr_mult: 2
266 |     decay_mult: 0
267 |   }
268 |   convolution_param {
269 |     num_output: 192
270 |     group: 1
271 |     kernel_size: 1
272 |     weight_filler {
273 |       type: "gaussian"
274 |       std: 0.05
275 |     }
276 |     bias_filler {
277 |       type: "constant"
278 |       value: 0
279 |     }
280 |   }
281 | }
282 | layer {
283 |   name: "prelu_cccp4"
284 |   type: "PReLU" 
285 |   bottom: "cccp4"
286 |   top: "prelu_cccp4"
287 |   prelu_param {
288 |     filler {
289 |       type: "constant"
290 |       value: 0.25
291 |     }
292 |     channel_shared : false
293 |   }
294 | }
295 | layer {
296 |   name: "pool2"
297 |   type: "Pooling"
298 |   bottom: "prelu_cccp4"
299 |   top: "pool2"
300 |   pooling_param {
301 |     pool: AVE
302 |     kernel_size: 3
303 |     stride: 2
304 |   }
305 | }
306 | layer {
307 |   name: "drop6"
308 |   type: "Dropout"
309 |   bottom: "pool2"
310 |   top: "dropout2"
311 |   dropout_param {
312 |     dropout_ratio: 0.5
313 |   }
314 | }
315 | layer {
316 |   name: "conv3"
317 |   type: "Convolution"
318 |   bottom: "dropout2"
319 |   top: "conv3"
320 |   param {
321 |     lr_mult: 1
322 |     decay_mult: 1
323 |   }
324 |   param {
325 |     lr_mult: 2
326 |     decay_mult: 0
327 |   }
328 |   convolution_param {
329 |     num_output: 192
330 |     pad: 1
331 |     kernel_size: 3
332 |     weight_filler {
333 |       type: "gaussian"
334 |       std: 0.05
335 |     }
336 |     bias_filler {
337 |       type: "constant"
338 |       value: 0
339 |     }
340 |   }
341 | }
342 | layer {
343 |   name: "prelu_conv3"
344 |   type: "PReLU" 
345 |   bottom: "conv3"
346 |   top: "prelu_conv3"
347 |   prelu_param {
348 |     filler {
349 |       type: "constant"
350 |       value: 0.25
351 |     }
352 |     channel_shared : false
353 |   }
354 | }
355 | layer {
356 |   name: "cccp5"
357 |   type: "Convolution"
358 |   bottom: "prelu_conv3"
359 |   top: "cccp5"
360 |   param {
361 |     lr_mult: 1
362 |     decay_mult: 1
363 |   }
364 |   param {
365 |     lr_mult: 2
366 |     decay_mult: 0
367 |   }
368 |   convolution_param {
369 |     num_output: 192
370 |     group: 1
371 |     kernel_size: 1
372 |     weight_filler {
373 |       type: "gaussian"
374 |       std: 0.05
375 |     }
376 |     bias_filler {
377 |       type: "constant"
378 |       value: 0
379 |     }
380 |   }
381 | }
382 | layer {
383 |   name: "prelu_cccp5"
384 |   type: "PReLU" 
385 |   bottom: "cccp5"
386 |   top: "prelu_cccp5"
387 |   prelu_param {
388 |     filler {
389 |       type: "constant"
390 |       value: 0.25
391 |     }
392 |     channel_shared : false
393 |   }
394 | }
395 | layer {
396 |   name: "cccp6"
397 |   type: "Convolution"
398 |   bottom: "prelu_cccp5"
399 |   top: "cccp6"
400 |   param {
401 |     lr_mult: 0.1
402 |     decay_mult: 1
403 |   }
404 |   param {
405 |     lr_mult: 0.1
406 |     decay_mult: 0
407 |   }
408 |   convolution_param {
409 |     num_output: 10
410 |     group: 1
411 |     kernel_size: 1
412 |     weight_filler {
413 |       type: "gaussian"
414 |       std: 0.05
415 |     }
416 |     bias_filler {
417 |       type: "constant"
418 |       value: 0
419 |     }
420 |   }
421 | }
422 | layer {
423 |   name: "prelu_cccp6"
424 |   type: "PReLU" 
425 |   bottom: "cccp6"
426 |   top: "prelu_cccp6"
427 |   prelu_param {
428 |     filler {
429 |       type: "constant"
430 |       value: 0.25
431 |     }
432 |     channel_shared : false
433 |   }
434 | }
435 | layer {
436 |   name: "pool3"
437 |   type: "Pooling"
438 |   bottom: "prelu_cccp6"
439 |   top: "pool3"
440 |   pooling_param {
441 |     pool: AVE
442 |     kernel_size: 7
443 |     stride: 1
444 |   }
445 | }
446 | layer {
447 |   name: "accuracy"
448 |   type: "Accuracy"
449 |   bottom: "pool3"
450 |   bottom: "label"
451 |   top: "accuracy"
452 | }
453 | layer {
454 |   name: "loss"
455 |   type: "SoftmaxWithLoss"
456 |   bottom: "pool3"
457 |   bottom: "label"
458 |   top: "loss"
459 | }
460 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/network_in_network_cifar10/nin_relu_gauss.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CIFAR10_full"
  2 | layer {
  3 |   name: "cifar"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   data_param {
  8 |     source: "../../Datasets/cifar-10-NIN/cifar-train-leveldb"
  9 |     batch_size: 128
 10 |   }
 11 |   transform_param {
 12 |     crop_size: 28
 13 |     mirror: true
 14 |   }
 15 |   image_data_param {
 16 |     shuffle: true
 17 |   }
 18 |   include: { phase: TRAIN }
 19 | }
 20 | layer {
 21 |   name: "cifar"
 22 |   type: "Data"
 23 |   top: "data"
 24 |   top: "label"
 25 |   data_param {
 26 |     source: "../../Datasets/cifar-10-NIN/cifar-test-leveldb"
 27 |     batch_size: 100
 28 |   }
 29 |   transform_param {
 30 |     crop_size: 28
 31 |     mirror: false
 32 |   }
 33 |   include: { phase: TEST }
 34 | }
 35 | layer {
 36 |   name: "conv1"
 37 |   type: "Convolution"
 38 |   bottom: "data"
 39 |   top: "conv1"
 40 |   param {
 41 |     lr_mult: 1
 42 |     decay_mult: 1
 43 |   }
 44 |   param {
 45 |     lr_mult: 2
 46 |     decay_mult: 0
 47 |   }
 48 |   convolution_param {
 49 |     num_output: 192
 50 |     pad: 2
 51 |     kernel_size: 5
 52 |     weight_filler {
 53 |       type: "gaussian"
 54 |       std: 0.05
 55 |     }
 56 |     bias_filler {
 57 |       type: "constant"
 58 |       value: 0
 59 |     }
 60 |   }
 61 | }
 62 | layer {
 63 |   name: "relu_conv1"
 64 |   type: "ReLU"
 65 |   bottom: "conv1"
 66 |   top: "relu_conv1"
 67 | }
 68 | layer {
 69 |   name: "cccp1"
 70 |   type: "Convolution"
 71 |   bottom: "relu_conv1"
 72 |   top: "cccp1"
 73 |   param {
 74 |     lr_mult: 1
 75 |     decay_mult: 1
 76 |   }
 77 |   param {
 78 |     lr_mult: 2
 79 |     decay_mult: 0
 80 |   }
 81 |   convolution_param {
 82 |     num_output: 160
 83 |     kernel_size: 1
 84 |     weight_filler {
 85 |       type: "gaussian"
 86 |       std: 0.05
 87 |     }
 88 |     bias_filler {
 89 |       type: "constant"
 90 |       value: 0
 91 |     }
 92 |   }
 93 | }
 94 | layer {
 95 |   name: "relu_cccp1"
 96 |   type: "ReLU"
 97 |   bottom: "cccp1"
 98 |   top: "relu_cccp1"
 99 | }
100 | layer {
101 |   name: "cccp2"
102 |   type: "Convolution"
103 |   bottom: "relu_cccp1"
104 |   top: "cccp2"
105 |   param {
106 |     lr_mult: 1
107 |     decay_mult: 1
108 |   }
109 |   param {
110 |     lr_mult: 2
111 |     decay_mult: 0
112 |   }
113 |   convolution_param {
114 |     num_output: 96
115 |     kernel_size: 1
116 |     weight_filler {
117 |       type: "gaussian"
118 |       std: 0.05
119 |     }
120 |     bias_filler {
121 |       type: "constant"
122 |       value: 0
123 |     }
124 |   }
125 | }
126 | layer {
127 |   name: "relu_cccp2"
128 |   type: "ReLU"
129 |   bottom: "cccp2"
130 |   top: "relu_cccp2"
131 | }
132 | layer {
133 |   name: "pool1"
134 |   type: "Pooling"
135 |   bottom: "relu_cccp2"
136 |   top: "pool1"
137 |   pooling_param {
138 |     pool: MAX
139 |     kernel_size: 3
140 |     stride: 2
141 |   }
142 | }
143 | layer {
144 |   name: "dropout1"
145 |   type: "Dropout"
146 |   bottom: "pool1"
147 |   top: "dropout1"
148 |   dropout_param {
149 |     dropout_ratio: 0.5
150 |   }
151 | }
152 | layer {
153 |   name: "conv2"
154 |   type: "Convolution"
155 |   bottom: "dropout1"
156 |   top: "conv2"
157 |   param {
158 |     lr_mult: 1
159 |     decay_mult: 1
160 |   }
161 |   param {
162 |     lr_mult: 2
163 |     decay_mult: 0
164 |   }
165 |   convolution_param {
166 |     num_output: 192
167 |     pad: 2
168 |     kernel_size: 5
169 |     weight_filler {
170 |       type: "gaussian"
171 |       std: 0.05
172 |     }
173 |     bias_filler {
174 |       type: "constant"
175 |       value: 0
176 |     }
177 |   }
178 | }
179 | layer {
180 |   name: "relu_conv2"
181 |   type: "ReLU"
182 |   bottom: "conv2"
183 |   top: "relu_conv2"
184 | }
185 | layer {
186 |   name: "cccp3"
187 |   type: "Convolution"
188 |   bottom: "relu_conv2"
189 |   top: "cccp3"
190 |   param {
191 |     lr_mult: 1
192 |     decay_mult: 1
193 |   }
194 |   param {
195 |     lr_mult: 2
196 |     decay_mult: 0
197 |   }
198 |   convolution_param {
199 |     num_output: 192
200 |     kernel_size: 1
201 |     weight_filler {
202 |       type: "gaussian"
203 |       std: 0.05
204 |     }
205 |     bias_filler {
206 |       type: "constant"
207 |       value: 0
208 |     }
209 |   }
210 | }
211 | layer {
212 |   name: "relu_cccp3"
213 |   type: "ReLU"
214 |   bottom: "cccp3"
215 |   top: "relu_cccp3"
216 | }
217 | layer {
218 |   name: "cccp4"
219 |   type: "Convolution"
220 |   bottom: "relu_cccp3"
221 |   top: "cccp4"
222 |   param {
223 |     lr_mult: 1
224 |     decay_mult: 1
225 |   }
226 |   param {
227 |     lr_mult: 2
228 |     decay_mult: 0
229 |   }
230 |   convolution_param {
231 |     num_output: 192
232 |     kernel_size: 1
233 |     weight_filler {
234 |       type: "gaussian"
235 |       std: 0.05
236 |     }
237 |     bias_filler {
238 |       type: "constant"
239 |       value: 0
240 |     }
241 |   }
242 | }
243 | layer {
244 |   name: "relu_cccp4"
245 |   type: "ReLU"
246 |   bottom: "cccp4"
247 |   top: "relu_cccp4"
248 | }
249 | layer {
250 |   name: "pool2"
251 |   type: "Pooling"
252 |   bottom: "relu_cccp4"
253 |   top: "pool2"
254 |   pooling_param {
255 |     pool: AVE
256 |     kernel_size: 3
257 |     stride: 2
258 |   }
259 | }
260 | layer {
261 |   name: "dropout2"
262 |   type: "Dropout"
263 |   bottom: "pool2"
264 |   top: "dropout2"
265 |   dropout_param {
266 |     dropout_ratio: 0.5
267 |   }
268 | }
269 | layer {
270 |   name: "conv3"
271 |   type: "Convolution"
272 |   bottom: "dropout2"
273 |   top: "conv3"
274 |   param {
275 |     lr_mult: 1
276 |     decay_mult: 1
277 |   }
278 |   param {
279 |     lr_mult: 2
280 |     decay_mult: 0
281 |   }
282 |   convolution_param {
283 |     num_output: 192
284 |     pad: 1
285 |     kernel_size: 3
286 |     weight_filler {
287 |       type: "gaussian"
288 |       std: 0.05
289 |     }
290 |     bias_filler {
291 |       type: "constant"
292 |       value: 0
293 |     }
294 |   }
295 | }
296 | layer {
297 |   name: "relu_conv3"
298 |   type: "ReLU"
299 |   bottom: "conv3"
300 |   top: "relu_conv3"
301 | }
302 | layer {
303 |   name: "cccp5"
304 |   type: "Convolution"
305 |   bottom: "relu_conv3"
306 |   top: "cccp5"
307 |   param {
308 |     lr_mult: 1
309 |     decay_mult: 1
310 |   }
311 |   param {
312 |     lr_mult: 2
313 |     decay_mult: 0
314 |   }
315 |   convolution_param {
316 |     num_output: 192
317 |     kernel_size: 1
318 |     weight_filler {
319 |       type: "gaussian"
320 |       std: 0.05
321 |     }
322 |     bias_filler {
323 |       type: "constant"
324 |       value: 0
325 |     }
326 |   }
327 | }
328 | layer {
329 |   name: "relu_cccp5"
330 |   type: "ReLU"
331 |   bottom: "cccp5"
332 |   top: "relu_cccp5"
333 | }
334 | layer {
335 |   name: "cccp6"
336 |   type: "Convolution"
337 |   bottom: "relu_cccp5"
338 |   top: "cccp6"
339 |   param {
340 |     lr_mult: 0.1
341 |     decay_mult: 1
342 |   }
343 |   param {
344 |     lr_mult: 0.1
345 |     decay_mult: 0
346 |   }
347 |   convolution_param {
348 |     num_output: 10
349 |     kernel_size: 1
350 |     weight_filler {
351 |       type: "gaussian"
352 |       std: 0.05
353 |     }
354 |     bias_filler {
355 |       type: "constant"
356 |       value: 0
357 |     }
358 |   }
359 | }
360 | layer {
361 |   name: "relu_cccp6"
362 |   type: "ReLU"
363 |   bottom: "cccp6"
364 |   top: "relu_cccp6"
365 | }
366 | layer {
367 |   name: "pool3"
368 |   type: "Pooling"
369 |   bottom: "relu_cccp6"
370 |   top: "pool3"
371 |   pooling_param {
372 |     pool: AVE
373 |     kernel_size: 7
374 |     stride: 1
375 |   }
376 | }
377 | layer {
378 |   name: "accuracy"
379 |   type: "Accuracy"
380 |   bottom: "pool3"
381 |   bottom: "label"
382 |   top: "accuracy"
383 | }
384 | layer {
385 |   name: "loss"
386 |   type: "SoftmaxWithLoss"
387 |   bottom: "pool3"
388 |   bottom: "label"
389 |   top: "loss"
390 | }
391 | 


--------------------------------------------------------------------------------
/caffe/examples/mpelu/network_in_network_cifar10/train.sh:
--------------------------------------------------------------------------------
 1 | xelu="mpelu"
 2 | init="gauss"
 3 | lr="0.1"
 4 | directory="trained_models"
 5 | dataset="cifar10"
 6 | unified="y"
 7 | # solver="solver_$xelu.prototxt"
 8 | 
 9 | #for j in 1 2; do
10 | for i in 1, 2, 3, 4, 5;
11 | do
12 | # generating solver.prototxt
13 | # -----------------------------
14 | # train_val_dprelu_msra_thresh-15-part1-train
15 | # train_val_dprelu_msra_thresh-15-part2-test
16 | # if [ $i = 10 ];
17 | # then
18 | # 	echo "Switch activation function from ${xelu} to \c"
19 | # 	xelu="prelu"
20 | # 	echo "${xelu}"
21 | # fi 
22 | 
23 | if [ "${unified}" = "y" ];
24 | then
25 | 	echo "net: \"models/network_in_network/nin_${xelu}_${init}.prototxt\"" > solver.prototxt
26 | else
27 | 	echo "train_net: \"models/network_in_network/train_val_${xelu}_${init}.prototxt\"" > solver.prototxt
28 | 	echo "test_net: \"models/network_in_network/train_val_${xelu}_${init}.prototxt\"" >> solver.prototxt
29 | fi
30 | echo "test_iter: 100" >> solver.prototxt
31 | echo "test_interval: 1000" >> solver.prototxt
32 | echo "test_initialization: false" >> solver.prototx
33 | echo "base_lr: ${lr}" >> solver.prototxt
34 | echo "momentum: 0.9" >> solver.prototxt
35 | echo "weight_decay: 0.0001" >> solver.prototxt
36 | echo "lr_policy: \"multistep\"" >> solver.prototxt
37 | echo "gamma: 0.1" >> solver.prototxt
38 | echo "stepvalue: 100000" >> solver.prototxt
39 | echo "display: 100" >> solver.prototxt
40 | echo "max_iter: 120000" >> solver.prototxt
41 | echo "snapshot: 10000" >> solver.prototxt
42 | echo "snapshot_prefix: \"${xelu}_${init}\"" >> solver.prototxt
43 | echo "solver_mode: GPU" >> solver.prototxt
44 | # ------------------------------
45 | echo 
46 | cat solver.prototxt
47 | echo 
48 | echo "Training"
49 | 
50 | 	echo "training network ${i}"
51 | 	time build/tools/caffe train --solver=solver.prototxt -gpu 0 \
52 | 	                        > nin_${xelu}_${init}_${lr}_${i}.txt 2>&1
53 | 	now=$(date +"%Y%m%d_%H_%M")
54 | 	mkdir $directory
55 | 	mv nin_${xelu}_${init}_${lr}_${i}.txt ${directory}/nin_${xelu}_${init}_${lr}_${now}.txt
56 | 	mv ${xelu}_${init}_* ${directory}/
57 | 	mv $directory nin_${xelu}_${init}_${lr}_${dataset}_${now}
58 | 	echo "network ${i} done!";
59 | 	echo 
60 | done
61 | #done
62 | echo "Training is complete!"
63 | 


--------------------------------------------------------------------------------
/caffe/include/filler.hpp:
--------------------------------------------------------------------------------
  1 | // Fillers are random number generators that fills a blob using the specified
  2 | // algorithm. The expectation is that they are only going to be used during
  3 | // initialization time and will not involve any GPUs.
  4 | 
  5 | #ifndef CAFFE_FILLER_HPP
  6 | #define CAFFE_FILLER_HPP
  7 | 
  8 | #include <string>
  9 | 
 10 | #include "caffe/blob.hpp"
 11 | #include "caffe/proto/caffe.pb.h"
 12 | #include "caffe/syncedmem.hpp"
 13 | #include "caffe/util/math_functions.hpp"
 14 | 
 15 | namespace caffe {
 16 | 
 17 | /// @brief Fills a Blob with constant or randomly-generated data.
 18 | template <typename Dtype>
 19 | class Filler {
 20 |  public:
 21 |   explicit Filler(const FillerParameter& param) : filler_param_(param) {}
 22 |   virtual ~Filler() {}
 23 |   virtual void Fill(Blob<Dtype>* blob) = 0;
 24 |  protected:
 25 |   FillerParameter filler_param_;
 26 | };  // class Filler
 27 | 
 28 | 
 29 | /// @brief Fills a Blob with constant values @f$ x = 0 @f$.
 30 | template <typename Dtype>
 31 | class ConstantFiller : public Filler<Dtype> {
 32 |  public:
 33 |   explicit ConstantFiller(const FillerParameter& param)
 34 |       : Filler<Dtype>(param) {}
 35 |   virtual void Fill(Blob<Dtype>* blob) {
 36 |     Dtype* data = blob->mutable_cpu_data();
 37 |     const int count = blob->count();
 38 |     const Dtype value = this->filler_param_.value();
 39 |     CHECK(count);
 40 |     for (int i = 0; i < count; ++i) {
 41 |       data[i] = value;
 42 |     }
 43 |     CHECK_EQ(this->filler_param_.sparse(), -1)
 44 |          << "Sparsity not supported by this Filler.";
 45 |   }
 46 | };
 47 | 
 48 | /// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$.
 49 | template <typename Dtype>
 50 | class UniformFiller : public Filler<Dtype> {
 51 |  public:
 52 |   explicit UniformFiller(const FillerParameter& param)
 53 |       : Filler<Dtype>(param) {}
 54 |   virtual void Fill(Blob<Dtype>* blob) {
 55 |     CHECK(blob->count());
 56 |     caffe_rng_uniform<Dtype>(blob->count(), Dtype(this->filler_param_.min()),
 57 |         Dtype(this->filler_param_.max()), blob->mutable_cpu_data());
 58 |     CHECK_EQ(this->filler_param_.sparse(), -1)
 59 |          << "Sparsity not supported by this Filler.";
 60 |   }
 61 | };
 62 | 
 63 | /// @brief Fills a Blob with Gaussian-distributed values @f$ x = a @f$.
 64 | template <typename Dtype>
 65 | class GaussianFiller : public Filler<Dtype> {
 66 |  public:
 67 |   explicit GaussianFiller(const FillerParameter& param)
 68 |       : Filler<Dtype>(param) {}
 69 |   virtual void Fill(Blob<Dtype>* blob) {
 70 |     Dtype* data = blob->mutable_cpu_data();
 71 |     CHECK(blob->count());
 72 |     caffe_rng_gaussian<Dtype>(blob->count(), Dtype(this->filler_param_.mean()),
 73 |         Dtype(this->filler_param_.std()), blob->mutable_cpu_data());
 74 |     int sparse = this->filler_param_.sparse();
 75 |     CHECK_GE(sparse, -1);
 76 |     if (sparse >= 0) {
 77 |       // Sparse initialization is implemented for "weight" blobs; i.e. matrices.
 78 |       // These have num == channels == 1; width is number of inputs; height is
 79 |       // number of outputs.  The 'sparse' variable specifies the mean number
 80 |       // of non-zero input weights for a given output.
 81 |       CHECK_GE(blob->num_axes(), 1);
 82 |       const int num_outputs = blob->shape(0);
 83 |       Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs);
 84 |       rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
 85 |       int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
 86 |       caffe_rng_bernoulli(blob->count(), non_zero_probability, mask);
 87 |       for (int i = 0; i < blob->count(); ++i) {
 88 |         data[i] *= mask[i];
 89 |       }
 90 |     }
 91 |   }
 92 | 
 93 |  protected:
 94 |   shared_ptr<SyncedMemory> rand_vec_;
 95 | };
 96 | 
 97 | /** @brief Fills a Blob with values @f$ x \in [0, 1] @f$
 98 |  *         such that @f$ \forall i \sum_j x_{ij} = 1 @f$.
 99 |  */
100 | template <typename Dtype>
101 | class PositiveUnitballFiller : public Filler<Dtype> {
102 |  public:
103 |   explicit PositiveUnitballFiller(const FillerParameter& param)
104 |       : Filler<Dtype>(param) {}
105 |   virtual void Fill(Blob<Dtype>* blob) {
106 |     Dtype* data = blob->mutable_cpu_data();
107 |     DCHECK(blob->count());
108 |     caffe_rng_uniform<Dtype>(blob->count(), 0, 1, blob->mutable_cpu_data());
109 |     // We expect the filler to not be called very frequently, so we will
110 |     // just use a simple implementation
111 |     int dim = blob->count() / blob->num();
112 |     CHECK(dim);
113 |     for (int i = 0; i < blob->num(); ++i) {
114 |       Dtype sum = 0;
115 |       for (int j = 0; j < dim; ++j) {
116 |         sum += data[i * dim + j];
117 |       }
118 |       for (int j = 0; j < dim; ++j) {
119 |         data[i * dim + j] /= sum;
120 |       }
121 |     }
122 |     CHECK_EQ(this->filler_param_.sparse(), -1)
123 |          << "Sparsity not supported by this Filler.";
124 |   }
125 | };
126 | 
127 | /**
128 |  * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is
129 |  *        set inversely proportional to number of incoming nodes, outgoing
130 |  *        nodes, or their average.
131 |  *
132 |  * A Filler based on the paper [Bengio and Glorot 2010]: Understanding
133 |  * the difficulty of training deep feedforward neuralnetworks.
134 |  *
135 |  * It fills the incoming matrix by randomly sampling uniform data from [-scale,
136 |  * scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their
137 |  * average, depending on the variance_norm option. You should make sure the
138 |  * input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c
139 |  * = fan_out. Note that this is currently not the case for inner product layers.
140 |  *
141 |  * TODO(dox): make notation in above comment consistent with rest & use LaTeX.
142 |  */
143 | template <typename Dtype>
144 | class XavierFiller : public Filler<Dtype> {
145 |  public:
146 |   explicit XavierFiller(const FillerParameter& param)
147 |       : Filler<Dtype>(param) {}
148 |   virtual void Fill(Blob<Dtype>* blob) {
149 |     CHECK(blob->count());
150 |     int fan_in = blob->count() / blob->num();
151 |     int fan_out = blob->count() / blob->channels();
152 |     Dtype n = fan_in;  // default to fan_in
153 |     if (this->filler_param_.variance_norm() ==
154 |         FillerParameter_VarianceNorm_AVERAGE) {
155 |       n = (fan_in + fan_out) / Dtype(2);
156 |     } else if (this->filler_param_.variance_norm() ==
157 |         FillerParameter_VarianceNorm_FAN_OUT) {
158 |       n = fan_out;
159 |     }
160 |     Dtype scale = sqrt(Dtype(3) / n);
161 |     caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
162 |         blob->mutable_cpu_data());
163 |     CHECK_EQ(this->filler_param_.sparse(), -1)
164 |          << "Sparsity not supported by this Filler.";
165 |   }
166 | };
167 | 
168 | /**
169 |  * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where
170 |  *        @f$ \sigma^2 @f$ is set inversely proportional to number of incoming
171 |  *        nodes, outgoing nodes, or their average.
172 |  *
173 |  * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically
174 |  * accounts for ReLU nonlinearities.
175 |  *
176 |  * Aside: for another perspective on the scaling factor, see the derivation of
177 |  * [Saxe, McClelland, and Ganguli 2013 (v3)].
178 |  *
179 |  * It fills the incoming matrix by randomly sampling Gaussian data with std =
180 |  * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on
181 |  * the variance_norm option. You should make sure the input blob has shape (num,
182 |  * a, b, c) where a * b * c = fan_in and num * b * c = fan_out. Note that this
183 |  * is currently not the case for inner product layers.
184 |  */
185 | template <typename Dtype>
186 | class MSRAFiller : public Filler<Dtype> {
187 |  public:
188 |   explicit MSRAFiller(const FillerParameter& param)
189 |       : Filler<Dtype>(param) {}
190 |   virtual void Fill(Blob<Dtype>* blob) {
191 |     CHECK(blob->count());
192 |     int fan_in = blob->count() / blob->num();
193 |     int fan_out = blob->count() / blob->channels();
194 |     Dtype n = fan_in;  // default to fan_in
195 |     if (this->filler_param_.variance_norm() ==
196 |         FillerParameter_VarianceNorm_AVERAGE) {
197 |       n = (fan_in + fan_out) / Dtype(2);
198 |     } else if (this->filler_param_.variance_norm() ==
199 |         FillerParameter_VarianceNorm_FAN_OUT) {
200 |       n = fan_out;
201 |     }
202 |     Dtype std = sqrt(Dtype(2) / n);
203 |     caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), std,
204 |         blob->mutable_cpu_data());
205 |     CHECK_EQ(this->filler_param_.sparse(), -1)
206 |          << "Sparsity not supported by this Filler.";
207 |   }
208 | };
209 | 
210 | /**
211 |  * Taylor filler can be used to initialize ReLU/PReLU/ELU/MPELU networks.
212 |  * Parameters alpha and beta need to be given in Convolutional layer.
213 |  * layer {
214 |  *   name: "conv", type: "Convolution"
215 |  *   bottom: "{data}" top: "{conv}"
216 |  *   convolution_param {
217 |  *     kernel_size: 3
218 |  *     num_output: 256
219 |  *     weight_filler: { 
220 |  *       type: "taylor"
221 |  *       alpha: 1
222 |  *       beta: 1
223 |  *     }
224 |  *   }
225 |  * }
226 |  * For ReLU,  alpha: 0
227 |  * For PReLU, alpha: the initial slope of PReLU, beta: 1
228 |  * For ELU,   alpha: alpha used in ELU, beta: 1
229 |  */
230 | template <typename Dtype>
231 | class TaylorFiller : public Filler<Dtype> {
232 |  public:
233 |   explicit TaylorFiller(const FillerParameter& param)
234 |       : Filler<Dtype>(param) {}
235 |   virtual void Fill(Blob<Dtype>* blob) {
236 |     CHECK(blob->count());
237 |     int fan_in = blob->count() / blob->num();
238 |     int fan_out = blob->count() / blob->channels();
239 |     Dtype n = fan_in;  // default to fan_in
240 |     if (this->filler_param_.variance_norm() ==
241 |         FillerParameter_VarianceNorm_AVERAGE) {
242 |       n = (fan_in + fan_out) / Dtype(2);
243 |     } else if (this->filler_param_.variance_norm() ==
244 |         FillerParameter_VarianceNorm_FAN_OUT) {
245 |       n = fan_out;
246 |     }
247 |     float alpha = this->filler_param_.alpha();
248 |     float beta = this->filler_param_.beta();
249 | 
250 |     Dtype std = sqrt(Dtype(2) / n / ( 1 + alpha*alpha*beta*beta ));
251 |     LOG(INFO) << "The std of weights in this layer is: " << std;
252 |     caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), std,
253 |         blob->mutable_cpu_data());
254 |     CHECK_EQ(this->filler_param_.sparse(), -1)
255 |          << "Sparsity not supported by this Filler.";
256 |   }
257 | };
258 | 
259 | 
260 | /*!
261 | @brief Fills a Blob with coefficients for bilinear interpolation.
262 | 
263 | A common use case is with the DeconvolutionLayer acting as upsampling.
264 | You can upsample a feature map with shape of (B, C, H, W) by any integer factor
265 | using the following proto.
266 | \code
267 | layer {
268 |   name: "upsample", type: "Deconvolution"
269 |   bottom: "{{bottom_name}}" top: "{{top_name}}"
270 |   convolution_param {
271 |     kernel_size: {{2 * factor - factor % 2}} stride: {{factor}}
272 |     num_output: {{C}} group: {{C}}
273 |     pad: {{ceil((factor - 1) / 2.)}}
274 |     weight_filler: { type: "bilinear" } bias_term: false
275 |   }
276 |   param { lr_mult: 0 decay_mult: 0 }
277 | }
278 | \endcode
279 | Please use this by replacing `{{}}` with your values. By specifying
280 | `num_output: {{C}} group: {{C}}`, it behaves as
281 | channel-wise convolution. The filter shape of this deconvolution layer will be
282 | (C, 1, K, K) where K is `kernel_size`, and this filler will set a (K, K)
283 | interpolation kernel for every channel of the filter identically. The resulting
284 | shape of the top feature map will be (B, C, factor * H, factor * W).
285 | Note that the learning rate and the
286 | weight decay are set to 0 in order to keep coefficient values of bilinear
287 | interpolation unchanged during training. If you apply this to an image, this
288 | operation is equivalent to the following call in Python with Scikit.Image.
289 | \code{.py}
290 | out = skimage.transform.rescale(img, factor, mode='constant', cval=0)
291 | \endcode
292 |  */
293 | template <typename Dtype>
294 | class BilinearFiller : public Filler<Dtype> {
295 |  public:
296 |   explicit BilinearFiller(const FillerParameter& param)
297 |       : Filler<Dtype>(param) {}
298 |   virtual void Fill(Blob<Dtype>* blob) {
299 |     CHECK_EQ(blob->num_axes(), 4) << "Blob must be 4 dim.";
300 |     CHECK_EQ(blob->width(), blob->height()) << "Filter must be square";
301 |     Dtype* data = blob->mutable_cpu_data();
302 |     int f = ceil(blob->width() / 2.);
303 |     float c = (2 * f - 1 - f % 2) / (2. * f);
304 |     for (int i = 0; i < blob->count(); ++i) {
305 |       float x = i % blob->width();
306 |       float y = (i / blob->width()) % blob->height();
307 |       data[i] = (1 - fabs(x / f - c)) * (1 - fabs(y / f - c));
308 |     }
309 |     CHECK_EQ(this->filler_param_.sparse(), -1)
310 |          << "Sparsity not supported by this Filler.";
311 |   }
312 | };
313 | 
314 | /**
315 |  * @brief Get a specific filler from the specification given in FillerParameter.
316 |  *
317 |  * Ideally this would be replaced by a factory pattern, but we will leave it
318 |  * this way for now.
319 |  */
320 | template <typename Dtype>
321 | Filler<Dtype>* GetFiller(const FillerParameter& param) {
322 |   const std::string& type = param.type();
323 |   if (type == "constant") {
324 |     return new ConstantFiller<Dtype>(param);
325 |   } else if (type == "gaussian") {
326 |     return new GaussianFiller<Dtype>(param);
327 |   } else if (type == "positive_unitball") {
328 |     return new PositiveUnitballFiller<Dtype>(param);
329 |   } else if (type == "uniform") {
330 |     return new UniformFiller<Dtype>(param);
331 |   } else if (type == "xavier") {
332 |     return new XavierFiller<Dtype>(param);
333 |   } else if (type == "msra") {
334 |     return new MSRAFiller<Dtype>(param);
335 |   } else if (type == "bilinear") {
336 |     return new BilinearFiller<Dtype>(param);
337 |   } else if (type == "taylor") {
338 |     return new TaylorFiller<Dtype>(param);
339 |   } else {
340 |     CHECK(false) << "Unknown filler name: " << param.type();
341 |   }
342 |   return (Filler<Dtype>*)(NULL);
343 | }
344 | 
345 | }  // namespace caffe
346 | 
347 | #endif  // CAFFE_FILLER_HPP_
348 | 


--------------------------------------------------------------------------------
/caffe/include/layers/m2pelu_layer.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef CAFFE_M2PELU_LAYER_HPP_
  2 | #define CAFFE_M2PELU_LAYER_HPP_
  3 | 
  4 | #include <vector>
  5 | 
  6 | #include "caffe/blob.hpp"
  7 | #include "caffe/layer.hpp"
  8 | #include "caffe/proto/caffe.pb.h"
  9 | 
 10 | #include "caffe/layers/neuron_layer.hpp"
 11 | 
 12 | namespace caffe {
 13 | 
 14 | /**
 15 |  * @brief Parameterized Rectified Linear Unit non-linearity @f$
 16 |  *        y_i = \max(0, x_i) + a_i \min(0, x_i)
 17 |  *        @f$. The differences from ReLULayer are 1) negative slopes are
 18 |  *        learnable though backprop and 2) negative slopes can vary across
 19 |  *        channels. The number of axes of input blob should be greater than or
 20 |  *        equal to 2. The 1st axis (0-based) is seen as channels.
 21 |  */
 22 | template <typename Dtype>
 23 | class M2PELULayer : public NeuronLayer<Dtype> {
 24 |  public:
 25 |   /**
 26 |    * @param param provides M2PELUParameter M2PELU_param,
 27 |    *     with M2PELULayer options:
 28 |    *   - filler (\b optional, FillerParameter,
 29 |    *     default {'type': constant 'value':0.25}).
 30 |    *   - channel_shared (\b optional, default false).
 31 |    *     negative slopes are shared across channels.
 32 |    */
 33 |   explicit M2PELULayer(const LayerParameter& param)
 34 |       : NeuronLayer<Dtype>(param) {}
 35 | 
 36 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 37 |       const vector<Blob<Dtype>*>& top);
 38 | 
 39 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
 40 |       const vector<Blob<Dtype>*>& top);
 41 | 
 42 |   virtual inline const char* type() const { return "M2PELU"; }
 43 | 
 44 |  protected:
 45 |   /**
 46 |    * @param bottom input Blob vector (length 1)
 47 |    *   -# @f$ (N \times C \times ...) @f$
 48 |    *      the inputs @f$ x @f$
 49 |    * @param top output Blob vector (length 1)
 50 |    *   -# @f$ (N \times C \times ...) @f$
 51 |    *      the computed outputs for each channel @f$i@f$ @f$
 52 |    *        y_i = \max(0, x_i) + a_i \min(0, x_i)
 53 |    *      @f$.
 54 |    */
 55 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 56 |       const vector<Blob<Dtype>*>& top);
 57 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 58 |       const vector<Blob<Dtype>*>& top);
 59 | 
 60 |   /**
 61 |    * @brief Computes the error gradient w.r.t. the M2PELU inputs.
 62 |    *
 63 |    * @param top output Blob vector (length 1), providing the error gradient with
 64 |    *      respect to the outputs
 65 |    *   -# @f$ (N \times C \times ...) @f$
 66 |    *      containing error gradients @f$ \frac{\partial E}{\partial y} @f$
 67 |    *      with respect to computed outputs @f$ y @f$
 68 |    * @param propagate_down see Layer::Backward.
 69 |    * @param bottom input Blob vector (length 1)
 70 |    *   -# @f$ (N \times C \times ...) @f$
 71 |    *      the inputs @f$ x @f$; For each channel @f$i@f$, backward fills their
 72 |    *      diff with gradients @f$
 73 |    *        \frac{\partial E}{\partial x_i} = \left\{
 74 |    *        \begin{array}{lr}
 75 |    *            a_i \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i \le 0 \\
 76 |    *            \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i > 0
 77 |    *        \end{array} \right.
 78 |    *      @f$.
 79 |    *      If param_propagate_down_[0] is true, it fills the diff with gradients
 80 |    *      @f$
 81 |    *        \frac{\partial E}{\partial a_i} = \left\{
 82 |    *        \begin{array}{lr}
 83 |    *            \sum_{x_i} x_i \frac{\partial E}{\partial y_i} & \mathrm{if} \; x_i \le 0 \\
 84 |    *            0 & \mathrm{if} \; x_i > 0
 85 |    *        \end{array} \right.
 86 |    *      @f$.
 87 |    */
 88 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
 89 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 90 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
 91 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 92 | 
 93 |   bool channel_shared_;
 94 |   Blob<Dtype> multiplier_;  // dot multiplier for backward computation of params
 95 |   Blob<Dtype> backward_buff_alpha;  // temporary buffer for backward computation
 96 |   Blob<Dtype> backward_buff_beta; 
 97 |   Blob<Dtype> bottom_memory_;  // memory for in-place computation
 98 | };
 99 | 
100 | }  // namespace caffe
101 | 
102 | #endif  // CAFFE_M2PELU_LAYER_HPP_
103 | 


--------------------------------------------------------------------------------
/caffe/src/caffe/layers/m2pelu_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include <vector>
  3 | 
  4 | #include "caffe/filler.hpp"
  5 | 
  6 | #include "caffe/layers/neuron_layer.hpp"
  7 | #include "caffe/layers/m2pelu_layer.hpp"
  8 | 
  9 | namespace caffe {
 10 | 
 11 | template <typename Dtype>
 12 | void M2PELULayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 13 |     const vector<Blob<Dtype>*>& top) {
 14 |   CHECK_GE(bottom[0]->num_axes(), 2)
 15 |       << "Number of axes of bottom blob must be >=2.";
 16 |   M2PELUParameter m2pelu_param = this->layer_param().m2pelu_param();
 17 |   int channels = bottom[0]->channels();
 18 |   channel_shared_ = m2pelu_param.channel_shared();
 19 | 
 20 |   if (this->blobs_.size() > 0) {
 21 |     LOG(INFO) << "Skipping parameter initialization";
 22 |   } else {
 23 |     this->blobs_.resize(2);
 24 |     if (channel_shared_) {
 25 |       this->blobs_[0].reset(new Blob<Dtype>(vector<int>(0)));
 26 |       this->blobs_[1].reset(new Blob<Dtype>(vector<int>(0)));
 27 |     } else {
 28 |       this->blobs_[0].reset(new Blob<Dtype>(vector<int>(1, channels)));
 29 |       this->blobs_[1].reset(new Blob<Dtype>(vector<int>(1, channels)));
 30 |     }
 31 |     shared_ptr<Filler<Dtype> > filler;
 32 |     if (m2pelu_param.has_alpha_filler()) {
 33 |       filler.reset(GetFiller<Dtype>(m2pelu_param.alpha_filler()));
 34 |     } else {
 35 |       FillerParameter filler_param;
 36 |       filler_param.set_type("constant");
 37 |       filler_param.set_value(1);
 38 |       filler.reset(GetFiller<Dtype>(filler_param));
 39 |     }
 40 |     filler->Fill(this->blobs_[0].get());
 41 |     if (m2pelu_param.has_beta_filler()) {
 42 |       filler.reset(GetFiller<Dtype>(m2pelu_param.beta_filler()));
 43 |     } else {
 44 |       FillerParameter filler_param;
 45 |       filler_param.set_type("constant");
 46 |       filler_param.set_value(1.0);
 47 |       filler.reset(GetFiller<Dtype>(filler_param));
 48 |     }
 49 |     filler->Fill(this->blobs_[1].get());
 50 |   }
 51 |   if (channel_shared_) {
 52 |     CHECK_EQ(this->blobs_[0]->count(), 1)
 53 |         << "Alpha size is inconsistent with prototxt config";
 54 |     CHECK_EQ(this->blobs_[1]->count(), 1)
 55 |         << "Beta  size is inconsistent with prototxt config";
 56 |   } else {
 57 |     CHECK_EQ(this->blobs_[0]->count(), channels)
 58 |         << "Alpha size is inconsistent with prototxt config";
 59 |     CHECK_EQ(this->blobs_[1]->count(), channels)
 60 |         << "Beta  size is inconsistent with prototxt config";
 61 |   }
 62 | 
 63 |   // Propagate gradients to the parameters (as directed by backward pass).
 64 |   this->param_propagate_down_.resize(this->blobs_.size(), true);
 65 |   multiplier_.Reshape(vector<int>(1, bottom[0]->count(1)));
 66 |   backward_buff_alpha.Reshape(vector<int>(1, bottom[0]->count(1)));
 67 |   backward_buff_beta.Reshape(vector<int>(1, bottom[0]->count(1)));
 68 |   caffe_set(multiplier_.count(), Dtype(1), multiplier_.mutable_cpu_data());
 69 | }
 70 | 
 71 | template <typename Dtype>
 72 | void M2PELULayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
 73 |     const vector<Blob<Dtype>*>& top) {
 74 |   CHECK_GE(bottom[0]->num_axes(), 2)
 75 |       << "Number of axes of bottom blob must be >=2.";
 76 |   top[0]->ReshapeLike(*bottom[0]);
 77 |   if (bottom[0] == top[0]) {
 78 |     // For in-place computation
 79 |     bottom_memory_.ReshapeLike(*bottom[0]);
 80 |   }
 81 |   // CHECK_NE(bottom[0], top[0]) << "***In-place computation is not allowed, because top will be used in the backward pass.***";
 82 | }
 83 | 
 84 | template <typename Dtype>
 85 | void M2PELULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 86 |     const vector<Blob<Dtype>*>& top) {
 87 |   const Dtype* bottom_data = bottom[0]->cpu_data();
 88 |   Dtype* top_data = top[0]->mutable_cpu_data();
 89 |   const int count = bottom[0]->count();
 90 |   const int dim = bottom[0]->count(2);
 91 |   const int channels = bottom[0]->channels();
 92 | 
 93 |   const Dtype* alpha = this->blobs_[0]->cpu_data();
 94 |   const Dtype* beta  = this->blobs_[1]->cpu_data();
 95 | 
 96 |   // CHECK_NE(bottom[0], top[0]) << "***In-place computation is not allowed, because top will be used in the backward pass.***";
 97 |   // For in-place computation
 98 |   if (bottom[0] == top[0]) {
 99 |     caffe_copy(count, bottom_data, bottom_memory_.mutable_cpu_data());
100 |   }
101 | 
102 |   // if channel_shared, channel index in the following computation becomes
103 |   // always zero.
104 |   const int div_factor = channel_shared_ ? channels : 1;
105 |   for (int i = 0; i < count; ++i) {
106 |     int c = (i / dim) % channels / div_factor;
107 |     top_data[i] = std::max(bottom_data[i], Dtype(0))
108 |         + alpha[c] * ( exp(beta[c]*std::min(bottom_data[i], Dtype(0))) - 1);
109 |   }
110 | }
111 | 
112 | template <typename Dtype>
113 | void M2PELULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
114 |     const vector<bool>& propagate_down,
115 |     const vector<Blob<Dtype>*>& bottom) {
116 |   const Dtype* bottom_data = bottom[0]->cpu_data();
117 |   const Dtype* top_diff = top[0]->cpu_diff();
118 |   const Dtype* top_data = top[0]->cpu_data();
119 | 
120 |   const Dtype* alpha = this->blobs_[0]->cpu_data();
121 |   const Dtype* beta  = this->blobs_[1]->cpu_data();
122 | 
123 |   const int count = bottom[0]->count();
124 |   const int dim = bottom[0]->count(2);
125 |   const int channels = bottom[0]->channels();
126 | 
127 |   // CHECK_NE(bottom[0], top[0]) 
128 |   					// << "***In-place computation is not allowed, because top will be used in the backward pass.***";
129 |   // For in-place computation
130 |   if (top[0] == bottom[0]) {
131 |     bottom_data = bottom_memory_.cpu_data();
132 |   }
133 | 
134 |   // if channel_shared, channel index in the following computation becomes
135 |   // always zero.
136 |   const int div_factor = channel_shared_ ? channels : 1;
137 | 
138 |   // Propagte to param alpha
139 |   if (this->param_propagate_down_[0]) {
140 |     Dtype* alpha_diff = this->blobs_[0]->mutable_cpu_diff();
141 |     Dtype* beta_diff  = this->blobs_[1]->mutable_cpu_diff();
142 |     Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
143 |     for (int i = 0; i < count; ++i) {
144 |       int c = (i / dim) % channels / div_factor;
145 |       // CHECK_NE(alpha, 0) << "alpha can not be zero in the backward pass in M2PELU"
146 |       alpha_diff[c] += top_diff[i] * ( (bottom_data[i] <= 0)? (exp(beta[c]*bottom_data[i]) - 1 ): Dtype(0)  );
147 |       // alpha_diff[c] += top_diff[i] * (bottom_data[i] <= 0) * (exp(beta[c]*bottom_data[i] + gamma[c]) - exp(gamma[c]) );
148 |       beta_diff[c]  += top_diff[i] * ( bottom_data[i]*( top_data[i] + alpha[c] ) * (bottom_data[i] <= 0) );
149 |       // beta_diff[c]  += top_diff[i] * alpha[c] * bottom_data[i] * exp(beta[c] * bottom_data[i] + gamma[c]) * (bottom_data[i] <= 0);
150 |       // if (bottom_data[i] == 0) CHECK_EQ(top_data[i], 0) << "gamma diff: top is not zero when bottom is.";
151 |       bottom_diff[i] = top_diff[i] * ( (bottom_data[i] > 0) + beta[c]*( top_data[i] + alpha[c]) * (bottom_data[i] <= 0) );
152 |     }
153 |   }
154 | 
155 | //   // Propagate to bottom
156 | //   if (propagate_down[0]) {
157 | //     Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
158 | //     for (int i = 0; i < count; ++i) {
159 | //       int c = (i / dim) % channels / div_factor;
160 | //       bottom_diff[i] = top_diff[i] * ( (bottom_data[i] > 0) + beta[c]*( top_data[i] + alpha[c]*exp(gamma[c])) * (bottom_data[i] <= 0) );
161 | //       // bottom_diff[i] = top_diff[i] * ( (bottom_data[i] > 0) + alpha[c] * beta[c] * exp(beta[c]*bottom_data[i] + gamma[c])*(bottom_data[i] <= 0) );
162 | //       // if (bottom_data[i] == 0) CHECK_EQ(top_data[i], 0) << "input diff: top is not zero when bottom is.";
163 | //     }
164 | //   }
165 | }
166 | 
167 | 
168 | #ifdef CPU_ONLY
169 | STUB_GPU(M2PELULayer);
170 | #endif
171 | 
172 | INSTANTIATE_CLASS(M2PELULayer);
173 | REGISTER_LAYER_CLASS(M2PELU);
174 | 
175 | }  // namespace caffe
176 | 


--------------------------------------------------------------------------------
/caffe/src/caffe/layers/m2pelu_layer.cu:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include <vector>
  3 | 
  4 | #include "caffe/layers/neuron_layer.hpp"
  5 | #include "caffe/layers/m2pelu_layer.hpp"
  6 | 
  7 | namespace caffe {
  8 | 
  9 | // CUDA kernele for forward
 10 | template <typename Dtype>
 11 | __global__ void M2PELUForward(const int n, const int channels, const int dim,
 12 |     const Dtype* in, Dtype* out, const int div_factor, const Dtype* alpha, const Dtype* beta) {
 13 |   CUDA_KERNEL_LOOP(index, n) {
 14 |     int c = (index / dim) % channels / div_factor;
 15 |     out[index] = in[index] > 0 ? in[index] : ( exp(beta[c]*in[index] ) - 1 )* alpha[c];
 16 |   }
 17 | }
 18 | 
 19 | // CUDA kernel for bottom backward
 20 | // template <typename Dtype>
 21 | // __global__ void M2PELUBackward(const int n, const int channels, const int dim,
 22 | //     const Dtype* in_diff, const Dtype* in_data, Dtype* out_diff,
 23 | //     const int div_factor,
 24 | //     const Dtype* top_data, const Dtype* alpha, const Dtype* beta, const Dtype* gamma) {
 25 | //   CUDA_KERNEL_LOOP(index, n) {
 26 | //     int c = (index / dim) % channels / div_factor;
 27 | //     out_diff[index] = in_diff[index] * ((in_data[index] > 0)
 28 | //         + (in_data[index] <= 0) * beta[c] * ( top_data[index] + alpha[c]*exp(gamma[c]) ) );
 29 | //   }
 30 | // }
 31 | 
 32 | // CUDA kernel for element-wise parameter backward
 33 | template <typename Dtype>
 34 | __global__ void M2PELUParamBackward(const int n, 
 35 |     const int rows, const int rowPitch, const Dtype* in_diff,
 36 |     const Dtype* in_data, Dtype* out_diff_alpha, Dtype* out_diff_beta,
 37 |     const int channels, const int dim, const int div_factor, const Dtype* top_data, Dtype* out_diff,
 38 |     const Dtype* alpha, const Dtype* beta ) {
 39 |   CUDA_KERNEL_LOOP(index, n) {
 40 |     int c = (index / dim) % channels / div_factor;
 41 |     out_diff_alpha[index] = in_diff[index] * ( exp(beta[c]*in_data[index] ) - 1 ) * (in_data[index] <= 0);
 42 |     out_diff_beta[index]  = in_diff[index] * ( in_data[index] * (top_data[index] + alpha[c]  ) ) * (in_data[index] <= 0);
 43 |     out_diff[index] = in_diff[index] * ((in_data[index] > 0) + (in_data[index] <= 0) * beta[c] * ( top_data[index] + alpha[c] ) );
 44 |     for ( int k = 1; k < rows; k++ ) {
 45 |     	int tmp_index = index + k*rowPitch;
 46 |       out_diff_alpha[index] += in_diff[tmp_index] * (  exp(beta[c] * in_data[tmp_index] ) - 1  )* (in_data[tmp_index] <= 0);
 47 |       out_diff_beta[index]  += in_diff[tmp_index] * (  in_data[tmp_index] * (top_data[tmp_index] + alpha[c] ) ) * (in_data[tmp_index] <= 0);
 48 |       out_diff[tmp_index] = in_diff[tmp_index] * ((in_data[tmp_index] > 0) + (in_data[tmp_index] <= 0) * beta[c] * ( top_data[tmp_index] + alpha[c] ) );
 49 |     }
 50 |   }
 51 | }
 52 | 
 53 | template <typename Dtype>
 54 | void M2PELULayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 55 |     const vector<Blob<Dtype>*>& top) {
 56 |   const Dtype* bottom_data = bottom[0]->gpu_data();
 57 |   Dtype* top_data = top[0]->mutable_gpu_data();
 58 |   const int count = bottom[0]->count();
 59 |   const int dim = bottom[0]->count(2);
 60 |   const int channels = bottom[0]->channels();
 61 |   const int div_factor = channel_shared_ ? channels : 1;
 62 | 
 63 |   const Dtype* alpha = this->blobs_[0]->gpu_data();
 64 |   const Dtype* beta  = this->blobs_[1]->gpu_data();
 65 | 
 66 |   // For in-place computation
 67 |   if (top[0] == bottom[0]) {
 68 |     // exit(0);
 69 |     caffe_copy(count, bottom_data, bottom_memory_.mutable_gpu_data());
 70 |   }
 71 | 
 72 |   // NOLINT_NEXT_LINE(whitespace/operators)
 73 |   M2PELUForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
 74 |       count, channels, dim, bottom_data, top_data, div_factor, alpha, beta);
 75 |   CUDA_POST_KERNEL_CHECK;
 76 | }
 77 | 
 78 | template <typename Dtype>
 79 | void M2PELULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 80 |     const vector<bool>& propagate_down,
 81 |     const vector<Blob<Dtype>*>& bottom) {
 82 |   const Dtype* bottom_data = bottom[0]->gpu_data();
 83 |   const Dtype* top_diff = top[0]->gpu_diff();
 84 |   const Dtype* top_data = top[0]->gpu_data();
 85 |   const int count = bottom[0]->count();
 86 |   const int dim = bottom[0]->count(2);
 87 |   const int channels = bottom[0]->channels();
 88 |   const int div_factor = channel_shared_ ? channels : 1;
 89 | 
 90 |   const Dtype* alpha = this->blobs_[0]->gpu_data();
 91 |   const Dtype* beta  = this->blobs_[1]->gpu_data();
 92 | 
 93 |   // For in-place computation
 94 |   if (top[0] == bottom[0]) {
 95 |     // exit(0);
 96 |     bottom_data = bottom_memory_.gpu_data();
 97 |   }
 98 | 
 99 |   // Propagate to param alpha
100 |   // Since to write bottom diff will affect top diff if top and bottom blobs
101 |   // are identical (in-place computaion), we first compute param backward to
102 |   // keep top_diff unchanged.
103 | //  if (this->param_propagate_down_[0]) {
104 |   if (1) {
105 |     Dtype* alpha_diff = this->blobs_[0]->mutable_gpu_diff();
106 |     Dtype* beta_diff  = this->blobs_[1]->mutable_gpu_diff();
107 |     Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
108 |     int cdim = channels * dim;
109 | 
110 |     // compute element-wise diff
111 |     // NOLINT_NEXT_LINE(whitespace/operators)
112 |     M2PELUParamBackward<Dtype><<<CAFFE_GET_BLOCKS(cdim),
113 |       CAFFE_CUDA_NUM_THREADS>>>(
114 |       cdim, bottom[0]->num(), top[0]->offset(1), top_diff ,
115 |       bottom_data ,
116 |       backward_buff_alpha.mutable_gpu_diff(), backward_buff_beta.mutable_gpu_diff(),
117 |       channels, dim, div_factor, top_data, bottom_diff, alpha, beta);
118 |     CUDA_POST_KERNEL_CHECK;
119 |     if (channel_shared_) {
120 |       Dtype dsum_alpha;
121 |       Dtype dsum_beta;
122 |       caffe_gpu_dot<Dtype>(channels * dim, backward_buff_alpha.gpu_diff(),
123 |        multiplier_.gpu_data(), &dsum_alpha);
124 |       caffe_gpu_dot<Dtype>(channels * dim, backward_buff_beta.gpu_diff(),
125 |        multiplier_.gpu_data(), &dsum_beta);
126 |       caffe_gpu_add_scalar(this->blobs_[0]->count(), Dtype(dsum_alpha), alpha_diff);
127 |       caffe_gpu_add_scalar(this->blobs_[1]->count(), Dtype(dsum_beta),  beta_diff );
128 |     } else {
129 |       caffe_gpu_gemv<Dtype>(CblasNoTrans, channels, dim, 1.,
130 |         backward_buff_alpha.gpu_diff(), multiplier_.gpu_data(), 1.,
131 |         alpha_diff);
132 |       caffe_gpu_gemv<Dtype>(CblasNoTrans, channels, dim, 1.,
133 |         backward_buff_beta.gpu_diff(), multiplier_.gpu_data(), 1.,
134 |         beta_diff);
135 |     }
136 |   }
137 | 
138 |   // Propagate to bottom
139 | //   if (propagate_down[0]) {
140 | //     Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
141 | //     // const Dtype* slope_data = this->blobs_[0]->gpu_data();
142 | //     // int div_factor = channel_shared_ ? channels : 1;
143 | //     // NOLINT_NEXT_LINE(whitespace/operators)
144 | //     M2PELUBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
145 | //         CAFFE_CUDA_NUM_THREADS>>>(
146 | //         count, channels, dim, top_diff, bottom_data, bottom_diff, div_factor,
147 | //         top_data, alpha, beta, gamma);
148 | //     CUDA_POST_KERNEL_CHECK;
149 | //   }
150 | }
151 | 
152 | 
153 | INSTANTIATE_LAYER_GPU_FUNCS(M2PELULayer);
154 | 
155 | 
156 | }  // namespace caffe
157 | 


--------------------------------------------------------------------------------
/examples/mnist_mpelu.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import torch.optim as optim
  7 | from torchvision import datasets, transforms
  8 | from torch.optim.lr_scheduler import StepLR
  9 | import sys
 10 | import os
 11 | sys.path.insert(0, os.path.abspath('./pytorch'))
 12 | from mpelu import MPELU
 13 | 
 14 | class Net(nn.Module):
 15 |     def __init__(self):
 16 |         super(Net, self).__init__()
 17 |         self.conv1 = nn.Conv2d(1, 32, 3, 1)
 18 |         self.mpelu1 = MPELU(32)
 19 |         self.conv2 = nn.Conv2d(32, 64, 3, 1)
 20 |         self.mpelu2 = MPELU(64)
 21 |         self.dropout1 = nn.Dropout(0.25)
 22 |         self.dropout2 = nn.Dropout(0.5)
 23 |         self.fc1 = nn.Linear(9216, 128)
 24 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # Global Average Pooling Layer
 25 |         self.fc2 = nn.Linear(64, 10) # fc2 now takes 64 input channels instead of 128
 26 |         self.fc2 = nn.Linear(128, 10)
 27 | 
 28 |     def forward(self, x):
 29 |         x = self.conv1(x)
 30 |         # x = F.relu(x)
 31 |         x = self.mpelu1(x)
 32 |         x = self.conv2(x)
 33 |         # x = F.relu(x)
 34 |         x = self.mpelu2(x)
 35 |         x = F.max_pool2d(x, 2)
 36 |         x = self.dropout1(x)
 37 | 
 38 |         # x = self.avgpool(x) # Apply Global Average Pooling
 39 |         # x = torch.flatten(x, 1)
 40 |         # x = self.dropout2(x)
 41 |         # x = self.fc2(x)
 42 |         # output = F.log_softmax(x, dim=1)
 43 | 
 44 | 
 45 |         x = torch.flatten(x, 1)
 46 |         x = self.fc1(x)
 47 |         x = F.relu(x)
 48 |         x = self.dropout2(x)
 49 |         x = self.fc2(x)
 50 |         output = F.log_softmax(x, dim=1)
 51 | 
 52 |         return output
 53 | 
 54 | 
 55 | def train(args, model, device, train_loader, optimizer, epoch):
 56 |     model.train()
 57 |     for batch_idx, (data, target) in enumerate(train_loader):
 58 |         data, target = data.to(device), target.to(device)
 59 |         optimizer.zero_grad()
 60 |         output = model(data)
 61 |         loss = F.nll_loss(output, target)
 62 |         loss.backward()
 63 |         optimizer.step()
 64 |         if batch_idx % args.log_interval == 0:
 65 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
 66 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
 67 |                 100. * batch_idx / len(train_loader), loss.item()))
 68 |             if args.dry_run:
 69 |                 break
 70 | 
 71 | 
 72 | def test(model, device, test_loader):
 73 |     model.eval()
 74 |     test_loss = 0
 75 |     correct = 0
 76 |     print('mpelu parameters: ', model.mpelu1.a.mean().item())
 77 |     with torch.no_grad():
 78 |         for data, target in test_loader:
 79 |             data, target = data.to(device), target.to(device)
 80 |             output = model(data)
 81 |             test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
 82 |             pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
 83 |             correct += pred.eq(target.view_as(pred)).sum().item()
 84 | 
 85 |     test_loss /= len(test_loader.dataset)
 86 | 
 87 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
 88 |         test_loss, correct, len(test_loader.dataset),
 89 |         100. * correct / len(test_loader.dataset)))
 90 | 
 91 | 
 92 | def main():
 93 |     # Training settings
 94 |     parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
 95 |     parser.add_argument('--batch-size', type=int, default=64, metavar='N',
 96 |                         help='input batch size for training (default: 64)')
 97 |     parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
 98 |                         help='input batch size for testing (default: 1000)')
 99 |     parser.add_argument('--epochs', type=int, default=14, metavar='N',
100 |                         help='number of epochs to train (default: 14)')
101 |     parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
102 |                         help='learning rate (default: 1.0)')
103 |     parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
104 |                         help='Learning rate step gamma (default: 0.7)')
105 |     parser.add_argument('--no-cuda', action='store_true', default=False,
106 |                         help='disables CUDA training')
107 |     parser.add_argument('--no-mps', action='store_true', default=False,
108 |                         help='disables macOS GPU training')
109 |     parser.add_argument('--dry-run', action='store_true', default=False,
110 |                         help='quickly check a single pass')
111 |     parser.add_argument('--seed', type=int, default=1, metavar='S',
112 |                         help='random seed (default: 1)')
113 |     parser.add_argument('--log-interval', type=int, default=10, metavar='N',
114 |                         help='how many batches to wait before logging training status')
115 |     parser.add_argument('--save-model', action='store_true', default=False,
116 |                         help='For Saving the current Model')
117 |     args = parser.parse_args()
118 |     use_cuda = not args.no_cuda and torch.cuda.is_available()
119 |     use_mps = not args.no_mps and torch.backends.mps.is_available()
120 | 
121 |     torch.manual_seed(args.seed)
122 | 
123 |     if use_cuda:
124 |         device = torch.device("cuda")
125 |     elif use_mps:
126 |         device = torch.device("mps")
127 |     else:
128 |         device = torch.device("cpu")
129 | 
130 |     train_kwargs = {'batch_size': args.batch_size}
131 |     test_kwargs = {'batch_size': args.test_batch_size}
132 |     if use_cuda:
133 |         cuda_kwargs = {'num_workers': 1,
134 |                        'pin_memory': True,
135 |                        'shuffle': True}
136 |         train_kwargs.update(cuda_kwargs)
137 |         test_kwargs.update(cuda_kwargs)
138 | 
139 |     transform=transforms.Compose([
140 |         transforms.ToTensor(),
141 |         transforms.Normalize((0.1307,), (0.3081,))
142 |         ])
143 |     dataset1 = datasets.MNIST('./data', train=True, download=True,
144 |                        transform=transform)
145 |     dataset2 = datasets.MNIST('./data', train=False,
146 |                        transform=transform)
147 |     train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
148 |     test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
149 | 
150 |     model = Net().to(device)
151 |     optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
152 | 
153 |     scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
154 |     for epoch in range(1, args.epochs + 1):
155 |         train(args, model, device, train_loader, optimizer, epoch)
156 |         test(model, device, test_loader)
157 |         scheduler.step()
158 | 
159 |     if args.save_model:
160 |         torch.save(model.state_dict(), "mnist_cnn.pt")
161 | 
162 | 
163 | if __name__ == '__main__':
164 |     main()
165 | 


--------------------------------------------------------------------------------
/mpelu_nopre_resnet/models/mpelu-preactivation-nopre.lua:
--------------------------------------------------------------------------------
  1 | --  ************************************************************************
  2 | --  The implementation of MPELU nopre ResNet, arXiv:1606.00305 (https://arxiv.org/abs/1606.00305).
  3 | --  This code is modified from pre-ResNet (https://github.com/KaimingHe/resnet-1k-layers)
  4 | --  and fb.resnet.torch (https://github.com/facebook/fb.resnet.torch)
  5 | --  ************************************************************************
  6 | 
  7 | local nn = require 'nn'
  8 | require 'cunn'
  9 | require 'nnlr'
 10 | 
 11 | local Convolution = cudnn.SpatialConvolution
 12 | local Avg = cudnn.SpatialAveragePooling
 13 | local Max = nn.SpatialMaxPooling
 14 | local SBatchNorm = nn.SpatialBatchNormalization
 15 | local alpha = 0.25
 16 | local beta  = 1
 17 | local lr_a = 5
 18 | local wd_a = 10
 19 | local lr_b = 5
 20 | local wd_b = 10
 21 | 
 22 | function MPELU(channels)
 23 |     local MPELU = nn.MPELU(alpha, beta, channels):learningRate('weight', lr_a):weightDecay('weight', wd_a)
 24 |                                                  :learningRate('bias', lr_b):weightDecay('bias', wd_b)
 25 | 
 26 |     return MPELU
 27 | end
 28 | 
 29 | local function createModel(opt)
 30 |    local depth = opt.depth
 31 |    
 32 |    local function bottleneck(nInputPlane, nOutputPlane, stride)
 33 |       
 34 |       local nBottleneckPlane = nOutputPlane / 4
 35 |       
 36 |       if nInputPlane == nOutputPlane then -- most Residual Units have this shape      
 37 |          local convs = nn.Sequential()
 38 |          -- conv1x1
 39 |          convs:add(Convolution(nInputPlane,nBottleneckPlane,1,1,stride,stride,0,0))
 40 |          convs:add(SBatchNorm(nBottleneckPlane))
 41 |          convs:add(MPELU(nBottleneckPlane))
 42 |          -- conv3x3
 43 |          convs:add(Convolution(nBottleneckPlane,nBottleneckPlane,3,3,1,1,1,1))
 44 |          convs:add(SBatchNorm(nBottleneckPlane))
 45 |          convs:add(MPELU(nBottleneckPlane))
 46 |          -- conv1x1
 47 |          convs:add(Convolution(nBottleneckPlane,nOutputPlane,1,1,1,1,0,0))
 48 |         
 49 |          local shortcut = nn.Identity()
 50 |         
 51 |          return nn.Sequential()
 52 |             :add(nn.ConcatTable()
 53 |                :add(convs)
 54 |                :add(shortcut))
 55 |             :add(nn.CAddTable(true))
 56 |       else
 57 |          local block = nn.Sequential()     
 58 |          local convs = nn.Sequential()     
 59 |          -- conv1x1
 60 |          convs:add(Convolution(nInputPlane,nBottleneckPlane,1,1,stride,stride,0,0))
 61 |          convs:add(SBatchNorm(nBottleneckPlane))
 62 |          convs:add(MPELU(nBottleneckPlane))
 63 |          -- conv3x3
 64 |          convs:add(Convolution(nBottleneckPlane,nBottleneckPlane,3,3,1,1,1,1))
 65 |          convs:add(SBatchNorm(nBottleneckPlane))
 66 |          convs:add(MPELU(nBottleneckPlane))
 67 |          -- conv1x1
 68 |          convs:add(Convolution(nBottleneckPlane,nOutputPlane,1,1,1,1,0,0))
 69 |         
 70 |          local shortcut = nn.Sequential()
 71 |          shortcut:add(Convolution(nInputPlane,nOutputPlane,1,1,stride,stride,0,0))
 72 |         
 73 |          return block
 74 |             :add(nn.ConcatTable()
 75 |                :add(convs)
 76 |                :add(shortcut))
 77 |             :add(nn.CAddTable(true))
 78 |       end
 79 |    end
 80 | 
 81 |    -- Stacking Residual Units on the same stage
 82 |    local function layer(block, nInputPlane, nOutputPlane, count, stride)
 83 |       local s = nn.Sequential()
 84 | 
 85 |       s:add(block(nInputPlane, nOutputPlane, stride))
 86 |       for i=2,count do
 87 |          s:add(block(nOutputPlane, nOutputPlane, 1))
 88 |       end
 89 |       return s
 90 |    end
 91 | 
 92 |    local model = nn.Sequential()
 93 |    if opt.dataset == 'cifar10' then
 94 |       -- Model type specifies number of layers for CIFAR-10 model
 95 |       assert((depth - 2) % 9 == 0, 'depth should be 9n+2 (e.g., 164 or 1001 in the paper)')
 96 |       local n = (depth - 2) / 9
 97 |       print(' | ResNet-' .. depth .. ' CIFAR-10')
 98 | 
 99 |       -- The new ResNet-164 and ResNet-1001 in [a]
100 | 	  local nStages = {16, 64, 128, 256}
101 | 
102 |       model:add(Convolution(3,nStages[1],3,3,1,1,1,1))
103 |       model:add(SBatchNorm(nStages[1]))
104 |       model:add(MPELU(nStages[1]))
105 |       model:add(layer(bottleneck, nStages[1], nStages[2], n, 1)) -- Stage 1 (spatial size: 32x32)
106 |       model:add(layer(bottleneck, nStages[2], nStages[3], n, 2)) -- Stage 2 (spatial size: 16x16)
107 |       model:add(layer(bottleneck, nStages[3], nStages[4], n, 2)) -- Stage 3 (spatial size: 8x8)
108 |       model:add(SBatchNorm(nStages[4]))
109 |       model:add(MPELU(nStages[4]))
110 |       model:add(Avg(8, 8, 1, 1))
111 |       model:add(nn.View(nStages[4]):setNumInputDims(3))
112 |       model:add(nn.Linear(nStages[4], 10))
113 |    elseif opt.dataset == 'cifar100' then
114 |       -- Model type specifies number of layers for CIFAR-10 model
115 |       assert((depth - 2) % 9 == 0, 'depth should be 9n+2 (e.g., 164 or 1001 in the paper)')
116 |       local n = (depth - 2) / 9
117 |       print(' | ResNet-' .. depth .. ' CIFAR-100')
118 | 
119 |       -- The new ResNet-164 and ResNet-1001 in [a]
120 | 	  local nStages = {16, 64, 128, 256}
121 | 
122 |       model:add(Convolution(3,nStages[1],3,3,1,1,1,1))
123 |       model:add(SBatchNorm(nStages[1]))
124 |       model:add(MPELU(nStages[1]))
125 |       model:add(layer(bottleneck, nStages[1], nStages[2], n, 1)) -- Stage 1 (spatial size: 32x32)
126 |       model:add(layer(bottleneck, nStages[2], nStages[3], n, 2)) -- Stage 2 (spatial size: 16x16)
127 |       model:add(layer(bottleneck, nStages[3], nStages[4], n, 2)) -- Stage 3 (spatial size: 8x8)
128 |       model:add(SBatchNorm(nStages[4]))
129 |       model:add(MPELU(nStages[4]))
130 |       model:add(Avg(8, 8, 1, 1))
131 |       model:add(nn.View(nStages[4]):setNumInputDims(3))
132 |       model:add(nn.Linear(nStages[4], 100))
133 |    else
134 |       error('invalid dataset: ' .. opt.dataset)
135 |    end
136 | 
137 |    local function ConvInit(name)
138 |       for k,v in pairs(model:findModules(name)) do
139 |          local n = v.kW*v.kH*v.nOutputPlane
140 |          v.weight:normal(0,math.sqrt(2/n/(1 + alpha*alpha*beta*beta))) -- Taylor initialization for MPELU networks
141 |          if cudnn.version >= 4000 then
142 |             v.bias = nil
143 |             v.gradBias = nil
144 |          else
145 |             v.bias:zero()
146 |          end
147 |       end
148 |    end
149 |    local function BNInit(name)
150 |       for k,v in pairs(model:findModules(name)) do
151 |          v.weight:fill(1)
152 |          v.bias:zero()
153 |       end
154 |    end
155 | 
156 |    ConvInit('cudnn.SpatialConvolution')
157 |    ConvInit('nn.SpatialConvolution')
158 |    BNInit('fbnn.SpatialBatchNormalization')
159 |    BNInit('cudnn.SpatialBatchNormalization')
160 |    BNInit('nn.SpatialBatchNormalization')
161 |    for k,v in pairs(model:findModules('nn.Linear')) do
162 |       v.bias:zero()
163 |    end
164 |    model:type(opt.tensorType)
165 | 
166 |    if opt.cudnn == 'deterministic' then
167 |       model:apply(function(m)
168 |          if m.setMode then m:setMode(1,1,1) end
169 |       end)
170 |    end
171 | 
172 |    model:get(1).gradInput = nil
173 | 
174 |    return model
175 | end
176 | 
177 | return createModel
178 | 


--------------------------------------------------------------------------------
/mpelu_nopre_resnet/train.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  The training loop and learning rate schedule
 10 | --
 11 | 
 12 | local optim = require 'optim'
 13 | require 'nnlr'
 14 | 
 15 | local M = {}
 16 | local Trainer = torch.class('resnet.Trainer', M)
 17 | 
 18 | function Trainer:__init(model, criterion, opt, optimState)
 19 |    self.model = model
 20 |    self.criterion = criterion
 21 |    self.optimState = optimState or {
 22 |       learningRate = opt.LR,
 23 |       learningRateDecay = 0.0,
 24 |       momentum = opt.momentum,
 25 |       nesterov = true,
 26 |       dampening = 0.0,
 27 |       weightDecay = opt.weightDecay,
 28 |    }
 29 |    self.opt = opt
 30 |    self.params, self.gradParams = model:getParameters()
 31 | end
 32 | 
 33 | function Trainer:train(epoch, dataloader)
 34 |    -- Trains the model for a single epoch
 35 |    self.optimState.learningRate = self:learningRate(epoch)
 36 |    
 37 |    -- Use multiple learning rate and weight decay like caffe. -- coldmooon
 38 |    -------------------------------------------------------------
 39 |    learning_rates, weight_decays = self.model:getOptimConfig(self.optimState.learningRate, self.opt.weightDecay)
 40 | 
 41 |    self.optimState.learningRates = learning_rates
 42 |    self.optimState.weightDecay = nil
 43 |    self.optimState.weightDecays = weight_decays
 44 | 
 45 |    -------------------------------------------------------------
 46 | 
 47 |    local timer = torch.Timer()
 48 |    local dataTimer = torch.Timer()
 49 | 
 50 |    local function feval()
 51 |       return self.criterion.output, self.gradParams
 52 |    end
 53 | 
 54 |    local trainSize = dataloader:size()
 55 |    local top1Sum, top5Sum, lossSum = 0.0, 0.0, 0.0
 56 |    local N = 0
 57 | 
 58 |    print('=> Training epoch # ' .. epoch)
 59 |    print('learning rate: ' .. self.optimState.learningRate)
 60 |    -- set the batch norm to training mode
 61 |    self.model:training()
 62 |    for n, sample in dataloader:run() do
 63 |       local dataTime = dataTimer:time().real
 64 | 
 65 |       -- Copy input and target to the GPU
 66 |       self:copyInputs(sample)
 67 |       -- print(self.input)
 68 |       local output = self.model:forward(self.input):float()
 69 |       local batchSize = output:size(1)
 70 |       local loss = self.criterion:forward(self.model.output, self.target)
 71 | 
 72 |       self.model:zeroGradParameters()
 73 |       self.criterion:backward(self.model.output, self.target)
 74 |       self.model:backward(self.input, self.criterion.gradInput)
 75 | 
 76 |       optim.sgd(feval, self.params, self.optimState)
 77 | 
 78 |       local top1, top5 = self:computeScore(output, sample.target, 1)
 79 |       top1Sum = top1Sum + top1*batchSize
 80 |       top5Sum = top5Sum + top5*batchSize
 81 |       lossSum = lossSum + loss*batchSize
 82 |       N = N + batchSize
 83 | 
 84 |       print((' | Epoch: [%d][%d/%d]    Time %.3f  Data %.3f  Err %1.4f  top1 %7.3f  top5 %7.3f'):format(
 85 |          epoch, n, trainSize, timer:time().real, dataTime, loss, top1, top5))
 86 | 
 87 |       -- check that the storage didn't get changed do to an unfortunate getParameters call
 88 |       assert(self.params:storage() == self.model:parameters()[1]:storage())
 89 | 
 90 |       timer:reset()
 91 |       dataTimer:reset()
 92 |    end
 93 | 
 94 |    return top1Sum / N, top5Sum / N, lossSum / N
 95 | end
 96 | 
 97 | function Trainer:test(epoch, dataloader)
 98 |    -- Computes the top-1 and top-5 err on the validation set
 99 | 
100 |    local timer = torch.Timer()
101 |    local dataTimer = torch.Timer()
102 |    local size = dataloader:size()
103 | 
104 |    local nCrops = self.opt.tenCrop and 10 or 1
105 |    local top1Sum, top5Sum = 0.0, 0.0
106 |    local N = 0
107 | 
108 |    self.model:evaluate()
109 |    for n, sample in dataloader:run() do
110 |       local dataTime = dataTimer:time().real
111 | 
112 |       -- Copy input and target to the GPU
113 |       self:copyInputs(sample)
114 | 
115 |       local output = self.model:forward(self.input):float()
116 |       local batchSize = output:size(1) / nCrops
117 |       local loss = self.criterion:forward(self.model.output, self.target)
118 | 
119 |       local top1, top5 = self:computeScore(output, sample.target, nCrops)
120 |       top1Sum = top1Sum + top1*batchSize
121 |       top5Sum = top5Sum + top5*batchSize
122 |       N = N + batchSize
123 | 
124 |       print((' | Test: [%d][%d/%d]    Time %.3f  Data %.3f  top1 %7.3f (%7.3f)  top5 %7.3f (%7.3f)'):format(
125 |          epoch, n, size, timer:time().real, dataTime, top1, top1Sum / N, top5, top5Sum / N))
126 | 
127 |       timer:reset()
128 |       dataTimer:reset()
129 |    end
130 |    self.model:training()
131 | 
132 |    print((' * Finished epoch # %d     top1: %7.3f  top5: %7.3f\n'):format(
133 |       epoch, top1Sum / N, top5Sum / N))
134 | 
135 |    return top1Sum / N, top5Sum / N
136 | end
137 | 
138 | function Trainer:computeScore(output, target, nCrops)
139 |    if nCrops > 1 then
140 |       -- Sum over crops
141 |       output = output:view(output:size(1) / nCrops, nCrops, output:size(2))
142 |          --:exp()
143 |          :sum(2):squeeze(2)
144 |    end
145 | 
146 |    -- Coputes the top1 and top5 error rate
147 |    local batchSize = output:size(1)
148 | 
149 |    local _ , predictions = output:float():topk(5, 2, true, true) -- descending
150 | 
151 |    -- Find which predictions match the target
152 |    local correct = predictions:eq(
153 |       target:long():view(batchSize, 1):expandAs(predictions))
154 | 
155 |    -- Top-1 score
156 |    local top1 = 1.0 - (correct:narrow(2, 1, 1):sum() / batchSize)
157 | 
158 |    -- Top-5 score, if there are at least 5 classes
159 |    local len = math.min(5, correct:size(2))
160 |    local top5 = 1.0 - (correct:narrow(2, 1, len):sum() / batchSize)
161 | 
162 |    return top1 * 100, top5 * 100
163 | end
164 | 
165 | function Trainer:copyInputs(sample)
166 |    -- Copies the input to a CUDA tensor, if using 1 GPU, or to pinned memory,
167 |    -- if using DataParallelTable. The target is always copied to a CUDA tensor
168 |    self.input = self.input or (self.opt.nGPU == 1
169 |       and torch.CudaTensor()
170 |       or cutorch.createCudaHostTensor())
171 |    self.target = self.target or (torch.CudaLongTensor and torch.CudaLongTensor()or torch.CudaTensor())
172 |    self.input:resize(sample.input:size()):copy(sample.input)
173 |    self.target:resize(sample.target:size()):copy(sample.target)
174 | end
175 | 
176 | function Trainer:learningRate(epoch)
177 |    -- Training schedule
178 |    local decay = 0
179 |    if self.opt.dataset == 'imagenet' then
180 |       decay = math.floor((epoch - 1) / 30)
181 |    elseif self.opt.dataset == 'cifar10' then
182 |       -- decay = epoch >= 122 and 2 or epoch >= 81 and 1 or 0
183 |       -- lr_mult = epoch >= 150 and 10 or epoch >= 122 and 100 or epoch >= 81 and 10 or 1
184 |       decay = epoch >= 225 and 2 or epoch >= 150 and 1 or 0
185 |    elseif self.opt.dataset == 'cifar100' then
186 |       -- decay = epoch >= 122 and 2 or epoch >= 81 and 1 or 0
187 |       decay = epoch >= 225 and 2 or epoch >= 150 and 1 or 0
188 |    end
189 | 
190 |    -- use lr_mult like caffe:
191 |    -- for k,v in pairs(self.model:findModules('nn.SPELU')) do
192 |    --    v.__weightLearningRate = lr_mult
193 |    -- end
194 |    -- for k,v in pairs(self.model:findModules('nn.PReLU')) do
195 |    --    v.__weightLearningRate = lr_mult
196 |    -- end
197 |    for k,v in pairs(self.model:findModules('nn.MPELU')) do
198 |       v.__weightLearningRate = lr_mult
199 |    end
200 | 
201 |    return self.opt.LR * math.pow(0.1, decay)
202 | end
203 | 
204 | return M.Trainer
205 | 


--------------------------------------------------------------------------------
/pytorch/mpelu.cpp:
--------------------------------------------------------------------------------
 1 | #include "mpelu.h"
 2 | 
 3 | torch::Tensor mpelu_forward(
 4 |     torch::Tensor input,
 5 |     torch::Tensor a,
 6 |     torch::Tensor b
 7 | ) {
 8 |     CHECK_INPUT(input);
 9 |     CHECK_INPUT(a);
10 |     CHECK_INPUT(b);
11 | 
12 |     return mpelu_forward_cuda(input, a, b);
13 | }
14 | 
15 | void mpelu_backward(
16 |     const torch::Tensor& input,
17 |     const torch::Tensor& a,
18 |     const torch::Tensor& b,
19 |     const torch::Tensor& output,
20 |     const torch::Tensor& grad_output,
21 |     torch::Tensor& grad_input,
22 |     torch::Tensor& grad_a,
23 |     torch::Tensor& grad_b
24 | ) {
25 |     CHECK_INPUT(input);
26 |     CHECK_INPUT(a);
27 |     CHECK_INPUT(b);
28 |     CHECK_INPUT(output);
29 |     CHECK_INPUT(grad_output);
30 |     CHECK_INPUT(grad_input);
31 |     CHECK_INPUT(grad_a);
32 |     CHECK_INPUT(grad_b);
33 | 
34 |     mpelu_backward_cuda(input, a, b, output, grad_output, grad_input, grad_a, grad_b);
35 | }
36 | 
37 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){
38 |     m.def("mpelu_forward", &mpelu_forward);
39 |     m.def("mpelu_backward", &mpelu_backward);
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/pytorch/mpelu.h:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
 4 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
 5 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
 6 | 
 7 | torch::Tensor mpelu_forward_cuda(
 8 |     const torch::Tensor input,
 9 |     const torch::Tensor a,
10 |     const torch::Tensor b
11 | );
12 | 
13 | 
14 | void mpelu_backward_cuda(
15 |     const torch::Tensor& input,
16 |     const torch::Tensor& a,
17 |     const torch::Tensor& b,
18 |     const torch::Tensor& output,
19 |     const torch::Tensor& grad_output,
20 |     torch::Tensor& grad_input,
21 |     torch::Tensor& grad_a,
22 |     torch::Tensor& grad_b
23 | );
24 | 


--------------------------------------------------------------------------------
/pytorch/mpelu.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import mpelu_cuda
 3 | from torch.cuda.amp import custom_fwd, custom_bwd
 4 | 
 5 | class MPELUFunction(torch.autograd.Function):
 6 |     @staticmethod
 7 |     @custom_fwd
 8 |     def forward(ctx, input, alpha, beta):
 9 |         output = mpelu_cuda.mpelu_forward(input, alpha.to(input.dtype), beta.to(input.dtype))
10 |         ctx.save_for_backward(input, alpha, beta, output)
11 | 
12 |         return output
13 | 
14 | 
15 |     @staticmethod
16 |     @custom_bwd
17 |     def backward(ctx, grad_output):
18 |         input, alpha, beta, output = ctx.saved_tensors
19 |         alpha = alpha.to(input.dtype)
20 |         beta = beta.to(input.dtype)
21 |         grad_input = torch.zeros_like(input)
22 |         grad_a = torch.zeros_like(alpha)
23 |         grad_b = torch.zeros_like(beta)
24 | 
25 |         mpelu_cuda.mpelu_backward(input, alpha, beta, output, grad_output.contiguous(), grad_input.contiguous(), grad_a.contiguous(), grad_b.contiguous())
26 |         
27 |         return grad_input, grad_a, grad_b
28 | 
29 | 
30 | class MPELU(torch.nn.Module):
31 |     def __init__(self, num_channels):
32 |         super(MPELU, self).__init__()
33 |         self.alpha = torch.nn.Parameter(torch.Tensor(num_channels))
34 |         self.beta = torch.nn.Parameter(torch.Tensor(num_channels))
35 |         self.reset_parameters()
36 | 
37 |     def reset_parameters(self):
38 |         torch.nn.init.constant_(self.alpha, 0.25)
39 |         torch.nn.init.ones_(self.beta)
40 | 
41 |     def forward(self, input):
42 |         return MPELUFunction.apply(input, self.alpha, self.beta)


--------------------------------------------------------------------------------
/pytorch/mpelu_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <torch/extension.h>
  2 | #include <ATen/cuda/Atomic.cuh>
  3 | 
  4 | template <typename scalar_t>
  5 | __global__ void mpelu_forward_cuda_kernel(
  6 |     const torch::PackedTensorAccessor<scalar_t, 4, torch::RestrictPtrTraits, size_t> input,
  7 |     const torch::PackedTensorAccessor<scalar_t, 1, torch::RestrictPtrTraits, size_t> a,
  8 |     const torch::PackedTensorAccessor<scalar_t, 1, torch::RestrictPtrTraits, size_t> b,
  9 |     torch::PackedTensorAccessor<scalar_t, 4, torch::RestrictPtrTraits, size_t> output,
 10 |     const int width, const int height
 11 | ) {
 12 |     const int batch_idx = blockIdx.z;
 13 |     const int channel_idx = blockIdx.y;
 14 |     const int pixel_idx = blockIdx.x * blockDim.x + threadIdx.x;
 15 | 
 16 |     if (pixel_idx < width * height) {
 17 |         int x = pixel_idx % width;
 18 |         int y = pixel_idx / width;
 19 | 
 20 |         scalar_t in_val = input[batch_idx][channel_idx][y][x];
 21 |         if (in_val < 0) {
 22 |             output[batch_idx][channel_idx][y][x] = a[channel_idx] * (exp(b[channel_idx] * in_val) - 1);
 23 |         } else {
 24 |             output[batch_idx][channel_idx][y][x] = in_val;
 25 |         }
 26 |     }
 27 | }
 28 | 
 29 | /*
 30 | =============== Solution 1 for atomicAdd not support for (c10::Half *, c10::Half) ===============
 31 | adapted from https://github.com/torch/cutorch/blob/master/lib/THC/THCAtomics.cuh
 32 | https://forums.developer.nvidia.com/t/atomicadd-not-overloaded-for-c10-half/204474/2
 33 | __device__ __forceinline__ void atomicAdd(c10::Half* address, c10::Half val) {
 34 |     unsigned int *address_as_ui = reinterpret_cast<unsigned int *>(reinterpret_cast<char *>(address) - (reinterpret_cast<size_t>(address) & 2));
 35 |     unsigned int old = *address_as_ui;
 36 |     unsigned int assumed;
 37 | 
 38 |     do {
 39 |         assumed = old;
 40 |         unsigned short hsum = reinterpret_cast<size_t>(address) & 2 ? (old >> 16) : (old & 0xffff);
 41 |         hsum += val;
 42 |         old = reinterpret_cast<size_t>(address) & 2
 43 |                  ? (old & 0xffff) | (hsum << 16)
 44 |                  : (old & 0xffff0000) | hsum;
 45 |         old = atomicCAS(address_as_ui, assumed, old);
 46 | 
 47 |     // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
 48 |     } while (assumed != old);
 49 | }
 50 | =============== End: Solution 1 for atomicAdd not support for (c10::Half *, c10::Half) ===============
 51 | 
 52 | =============== Solution 2 for atomicAdd not support for (c10::Half *, c10::Half) ===============
 53 | https://discuss.pytorch.org/t/c10-half-float-type-support-for-atomicadd/137628/2
 54 | Use gpuAtomicAdd rather than atomicAdd:
 55 | https://github.com/pytorch/pytorch/blob/085e2f7bddc45f859fcdb786926d60d709b2daa0/aten/src/ATen/cuda/Atomic.cuh#L181-L190
 56 | =============== End: Solution 2 for atomicAdd not support for (c10::Half *, c10::Half) =============== 
 57 | */
 58 | 
 59 | template <typename scalar_t>
 60 | __global__ void mpelu_backward_cuda_kernel(
 61 |     const torch::PackedTensorAccessor<scalar_t,4,torch::RestrictPtrTraits,size_t> input,
 62 |     const torch::PackedTensorAccessor<scalar_t,1,torch::RestrictPtrTraits,size_t> a,
 63 |     const torch::PackedTensorAccessor<scalar_t,1,torch::RestrictPtrTraits,size_t> b,
 64 |     const torch::PackedTensorAccessor<scalar_t,4,torch::RestrictPtrTraits,size_t> output,    
 65 |     const torch::PackedTensorAccessor<scalar_t,4,torch::RestrictPtrTraits,size_t> grad_output,
 66 |     torch::PackedTensorAccessor<scalar_t,4,torch::RestrictPtrTraits,size_t> grad_input,
 67 |     torch::PackedTensorAccessor<scalar_t,1,torch::RestrictPtrTraits,size_t> grad_a,
 68 |     torch::PackedTensorAccessor<scalar_t,1,torch::RestrictPtrTraits,size_t> grad_b,
 69 |     const int width, const int height
 70 | ) {
 71 |     const int batch_idx = blockIdx.z;
 72 |     const int channel_idx = blockIdx.y;
 73 |     const int pixel_idx = blockIdx.x * blockDim.x + threadIdx.x;
 74 | 
 75 |     if (pixel_idx < width * height) {
 76 |         int x = pixel_idx % width;
 77 |         int y = pixel_idx / width;
 78 | 
 79 |         const scalar_t inp = input[batch_idx][channel_idx][y][x];
 80 |         const scalar_t oup = output[batch_idx][channel_idx][y][x];
 81 |         const scalar_t grad_out = grad_output[batch_idx][channel_idx][y][x];
 82 | 
 83 |         atomicAdd(&grad_a[channel_idx], grad_out * (inp <= 0) * (oup / a[channel_idx]));
 84 |         atomicAdd(&grad_b[channel_idx], grad_out * (inp <= 0) * inp * (oup + a[channel_idx]));
 85 |         grad_input[batch_idx][channel_idx][y][x] = grad_out * ( (inp > 0) + (inp <= 0) * b[channel_idx] * (oup + a[channel_idx]));
 86 |     }
 87 | }
 88 | 
 89 | 
 90 | // ===================================================================
 91 | 
 92 | torch::Tensor mpelu_forward_cuda(
 93 |     const torch::Tensor input,
 94 |     const torch::Tensor a,
 95 |     const torch::Tensor b
 96 | ){
 97 | 
 98 |     torch::Tensor output = torch::zeros_like(input);
 99 | 
100 |     const int threads_per_block = 256;
101 |     const int batch_size = input.size(0);
102 |     const int num_channels = input.size(1);
103 |     const int height = input.size(2);
104 |     const int width = input.size(3);
105 | 
106 |     dim3 threadsPerBlock(threads_per_block);
107 |     dim3 numBlocks(
108 |         (width * height + threads_per_block - 1) / threads_per_block,
109 |         num_channels,
110 |         batch_size
111 |     );
112 |     
113 |     AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.type(), "mpelu_forward_cuda", ([&] {
114 |         mpelu_forward_cuda_kernel<scalar_t><<<numBlocks, threadsPerBlock>>>(
115 |             input.packed_accessor<scalar_t, 4, torch::RestrictPtrTraits, size_t>(),
116 |             a.packed_accessor<scalar_t, 1, torch::RestrictPtrTraits, size_t>(),
117 |             b.packed_accessor<scalar_t, 1, torch::RestrictPtrTraits, size_t>(),
118 |             output.packed_accessor<scalar_t, 4, torch::RestrictPtrTraits, size_t>(),
119 |             width, height
120 |         );
121 |     }));
122 |     
123 |     return output;
124 | }
125 | 
126 | void mpelu_backward_cuda(
127 |     const torch::Tensor& input,
128 |     const torch::Tensor& a,
129 |     const torch::Tensor& b,
130 |     const torch::Tensor& output,
131 |     const torch::Tensor& grad_output,
132 |     torch::Tensor& grad_input,
133 |     torch::Tensor& grad_a,
134 |     torch::Tensor& grad_b
135 | ){
136 | 
137 |     grad_input.zero_();
138 |     grad_a.zero_();
139 |     grad_b.zero_();
140 | 
141 |     const int batch_size = grad_output.size(0);
142 |     const int num_channels = grad_output.size(1);
143 |     const int height = grad_output.size(2);
144 |     const int width = grad_output.size(3);
145 | 
146 |     const int threads_per_block = 256;
147 |     dim3 threadsPerBlock(threads_per_block);
148 |     dim3 numBlocks(
149 |         (width * height + threads_per_block - 1) / threads_per_block,
150 |         num_channels,
151 |         batch_size
152 |     );
153 | 
154 |     AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad_output.type(), "mpelu_backward_cuda", ([&] {
155 |         mpelu_backward_cuda_kernel<scalar_t><<<numBlocks, threadsPerBlock>>>(
156 |             input.packed_accessor<scalar_t,4,torch::RestrictPtrTraits,size_t>(),
157 |             a.packed_accessor<scalar_t,1,torch::RestrictPtrTraits,size_t>(),
158 |             b.packed_accessor<scalar_t,1,torch::RestrictPtrTraits,size_t>(),
159 |             output.packed_accessor<scalar_t,4,torch::RestrictPtrTraits,size_t>(),
160 |             grad_output.packed_accessor<scalar_t,4,torch::RestrictPtrTraits,size_t>(),
161 |             grad_input.packed_accessor<scalar_t,4,torch::RestrictPtrTraits,size_t>(),
162 |             grad_a.packed_accessor<scalar_t,1,torch::RestrictPtrTraits,size_t>(),
163 |             grad_b.packed_accessor<scalar_t,1,torch::RestrictPtrTraits,size_t>(),
164 |             width, height
165 |         );
166 |     }));
167 |     
168 | };


--------------------------------------------------------------------------------
/pytorch/setup.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os.path as osp
 3 | from setuptools import setup
 4 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension
 5 | 
 6 | 
 7 | ROOT_DIR = osp.dirname(osp.abspath(__file__))
 8 | include_dirs = [osp.join(ROOT_DIR, "include")]
 9 | 
10 | sources = glob.glob('*.cpp')+glob.glob('*.cu')
11 | 
12 | 
13 | setup(
14 |     name='mpelu_cuda',
15 |     version='1.6',
16 |     ext_modules=[
17 |         CUDAExtension(
18 |             name='mpelu_cuda',
19 |             sources=sources,
20 |             include_dirs=include_dirs,
21 |             extra_compile_args={
22 |                 'cxx': ['-O2'],
23 |                 'nvcc': ['-O2', '-G', '-lineinfo']  # Add debug flags here
24 |             }
25 |         )
26 |     ],
27 |     cmdclass={
28 |         'build_ext': BuildExtension
29 |     }
30 | )
31 | 


--------------------------------------------------------------------------------
/torch/extra/cunn/lib/THCUNN/MPELU.cu:
--------------------------------------------------------------------------------
  1 | #include "THCUNN.h"
  2 | #include "THCHalf.h"
  3 | #include "THCHalfAutoNumerics.cuh"
  4 | #include <THC/THCApply.cuh>
  5 | 
  6 | #include "common.h"
  7 | 
  8 | template <typename T>
  9 | struct MPELUUpdateOutput
 10 | {
 11 |   T* weight_;
 12 |   T* bias_;
 13 |   MPELUUpdateOutput(T* weight, T* bias)
 14 |     : weight_(weight), bias_(bias)
 15 |   {}
 16 | 
 17 |   __device__ __forceinline__ void operator()(T *out, T *in)
 18 |   {
 19 |     T x = *in;
 20 |     *out = (x > 0) ? x : weight_[0] * (exp(bias_[0] * x) - 1);
 21 |   }
 22 | };
 23 | 
 24 | template <typename T>
 25 | __global__ void mpeluForward(T *output, const T *input, const T *weight, const T *bias, int n, int nElemsPerSample, int mapSize)
 26 | {
 27 |   CUDA_KERNEL_LOOP(i, n)
 28 |   {
 29 |     int positionInSample = i % nElemsPerSample;
 30 |     int mapNumber = positionInSample / mapSize;
 31 |     output[i] = input[i] > 0 ? input[i] : (exp(bias[mapNumber] * input[i]) - 1) * weight[mapNumber];
 32 |   }
 33 | }
 34 | 
 35 | template <typename T>
 36 | struct MPELUUpdateGradInput
 37 | {
 38 |   T *weight_;
 39 |   T *bias_;
 40 |   MPELUUpdateGradInput(T *weight, T *bias)
 41 |     : weight_(weight), bias_(bias)
 42 |   {}
 43 | 
 44 |   __device__ __forceinline__ void operator()(T *gradInput, T *gradOutput, T *input)
 45 |   {
 46 |     *gradInput = *input > 0 ? *gradOutput : *gradOutput * ( *weight_ * *bias_ * exp(*bias_ * *input) );
 47 |   }
 48 | };
 49 | 
 50 | template <typename T>
 51 | __global__ void mpeluBackward(
 52 |   T *gradInput,
 53 |   const T *input,
 54 |   const T *weight, const T *bias,
 55 |   const T *gradOutput,
 56 |   int n, int nElemsPerSample, int mapSize)
 57 | {
 58 |   CUDA_KERNEL_LOOP(i, n)
 59 |   {
 60 |     int positionInSample = i % nElemsPerSample;
 61 |     int mapNumber = positionInSample / mapSize;
 62 |     gradInput[i] = input[i] > 0 ? gradOutput[i] : gradOutput[i] * weight[mapNumber] * bias[mapNumber] * exp(bias[mapNumber] * input[i]);
 63 |   }
 64 | }
 65 | 
 66 | template <typename T>
 67 | struct MPELUAccGradParametersShared
 68 | {
 69 |   T *weight_;
 70 |   T *bias_;
 71 |   MPELUAccGradParametersShared(T *weight, T *bias)
 72 |     : weight_(weight), bias_(bias)
 73 |   {}
 74 | 
 75 |   __device__ __forceinline__ void operator()(T *gradInput, T  *input, T *gradOutput)
 76 |   {
 77 |     *gradInput = (exp(*bias_ * *input) - 1) * (*gradOutput) * (*input <= 0);
 78 |   }
 79 | };
 80 | 
 81 | template <typename T>
 82 | struct MPELUAccGradParametersSharedForBeta
 83 | {
 84 |   T *weight_;
 85 |   T *bias_;
 86 |   MPELUAccGradParametersSharedForBeta(T *weight, T *bias)
 87 |     : weight_(weight), bias_(bias)
 88 |   {}
 89 | 
 90 |   __device__ __forceinline__ void operator()(T *gradInput, T  *input, T *gradOutput)
 91 |   {
 92 |     *gradInput = *weight_ * exp(*bias_ * *input) * *input * (*gradOutput) * (*input <= 0);
 93 |   }
 94 | };
 95 | 
 96 | template <typename T>
 97 | struct MPELUAccGradParameters
 98 | {
 99 |   T scale;
100 |   T *weight_;
101 |   T *bias_;
102 |   MPELUAccGradParameters(T scale, T *weight, T *bias)
103 |     : scale(scale), weight_(weight), bias_(bias)
104 |   {}
105 | 
106 |   __device__ __forceinline__ void operator()(T *gradInput, T *input, T *gradOutput)
107 |   {
108 |     *gradInput = (exp(*bias_ * *input) - 1) * (*gradOutput) * scale * (*input <= 0);
109 |   }
110 | };
111 | 
112 | template <typename T>
113 | struct MPELUAccGradParametersForBeta
114 | {
115 |   T scale;
116 |   T *weight_;
117 |   T *bias_;
118 |   MPELUAccGradParametersForBeta(T scale, T *weight, T *bias)
119 |     : scale(scale), weight_(weight), bias_(bias)
120 |   {}
121 | 
122 |   __device__ __forceinline__ void operator()(T *gradInput, T *input, T *gradOutput)
123 |   {
124 |     *gradInput = *weight_ * exp(*bias_ * *input) * *input * (*gradOutput) * scale * (*input <= 0);
125 |   }
126 | };
127 | 
128 | template <typename T>
129 | struct MPELUAccGradParameters1to1
130 | {
131 |   T scale;
132 |   T *weight_;
133 |   T *bias_;
134 |   MPELUAccGradParameters1to1(T scale, T *weight, T *bias)
135 |     : scale(scale), weight_(weight), bias_(bias)
136 |   {}
137 | 
138 |   __device__ __forceinline__ void operator()(T *gradWeight, T *input, T *gradOutput)
139 |   {
140 |     *gradWeight += (exp(*bias_ * *input) - 1) * (*gradOutput) * scale * (*input <= 0);
141 |   }
142 | };
143 | 
144 | template <typename T>
145 | struct MPELUAccGradParameters1to1ForBeta
146 | {
147 |   T scale;
148 |   T *weight_;
149 |   T *bias_;
150 |   MPELUAccGradParameters1to1ForBeta(T scale, T *weight, T *bias)
151 |     : scale(scale), weight_(weight), bias_(bias)
152 |   {}
153 | 
154 |   __device__ __forceinline__ void operator()(T *gradBias, T *input, T *gradOutput)
155 |   {
156 |     *gradBias += *weight_ * exp(*bias_ * *input) * *input * (*gradOutput) * scale * (*input <= 0);
157 |   }
158 | };
159 | 
160 | #include "generic/MPELU.cu"
161 | #include "THCGenerateFloatTypes.h"
162 | 


--------------------------------------------------------------------------------
/torch/extra/cunn/lib/THCUNN/SPELU.cu:
--------------------------------------------------------------------------------
  1 | #include "THCUNN.h"
  2 | #include "THCHalf.h"
  3 | #include "THCHalfAutoNumerics.cuh"
  4 | #include <THC/THCApply.cuh>
  5 | 
  6 | #include "common.h"
  7 | 
  8 | template <typename T>
  9 | struct SPELUUpdateOutput
 10 | {
 11 |   T* weight_;
 12 | 
 13 |   SPELUUpdateOutput(T* weight)
 14 |     : weight_(weight)
 15 |   {}
 16 | 
 17 |   __device__ __forceinline__ void operator()(T *out, T *in)
 18 |   {
 19 |     T x = *in;
 20 |     *out = (x > 0) ? x : weight_[0] * (exp(x) - 1);
 21 |   }
 22 | };
 23 | 
 24 | template <typename T>
 25 | __global__ void speluForward(T *output, const T *input, const T *weight, int n, int nElemsPerSample, int mapSize)
 26 | {
 27 |   CUDA_KERNEL_LOOP(i, n)
 28 |   {
 29 |     int positionInSample = i % nElemsPerSample;
 30 |     int mapNumber = positionInSample / mapSize;
 31 |     output[i] = input[i] > 0 ? input[i] : (exp(input[i]) - 1) * weight[mapNumber];
 32 |   }
 33 | }
 34 | 
 35 | template <typename T>
 36 | struct SPELUUpdateGradInput
 37 | {
 38 |   T *weight_;
 39 | 
 40 |   SPELUUpdateGradInput(T *weight)
 41 |     : weight_(weight)
 42 |   {}
 43 | 
 44 |   __device__ __forceinline__ void operator()(T *gradInput, T *gradOutput, T *input)
 45 |   {
 46 |     *gradInput = *input > 0 ? *gradOutput : *gradOutput * *weight_ * exp(*input);
 47 |   }
 48 | };
 49 | 
 50 | template <typename T>
 51 | __global__ void speluBackward(
 52 |   T *gradInput,
 53 |   const T *input,
 54 |   const T *weight,
 55 |   const T *gradOutput,
 56 |   int n, int nElemsPerSample, int mapSize)
 57 | {
 58 |   CUDA_KERNEL_LOOP(i, n)
 59 |   {
 60 |     int positionInSample = i % nElemsPerSample;
 61 |     int mapNumber = positionInSample / mapSize;
 62 |     gradInput[i] = input[i] > 0 ? gradOutput[i] : gradOutput[i] * weight[mapNumber] * exp(input[i]);
 63 |   }
 64 | }
 65 | 
 66 | template <typename T>
 67 | struct SPELUAccGradParametersShared
 68 | {
 69 |   __device__ __forceinline__ void operator()(T *gradInput, T  *input, T *gradOutput)
 70 |   {
 71 |     *gradInput = (exp(*input) - 1) * (*gradOutput) * (*input <= 0);
 72 |   }
 73 | };
 74 | 
 75 | template <typename T>
 76 | struct SPELUAccGradParameters
 77 | {
 78 |   T scale;
 79 | 
 80 |   SPELUAccGradParameters(T scale)
 81 |     : scale(scale)
 82 |   {}
 83 | 
 84 |   __device__ __forceinline__ void operator()(T *gradInput, T *input, T *gradOutput)
 85 |   {
 86 |     *gradInput = (exp(*input) - 1) * (*gradOutput) * scale * (*input <= 0);
 87 |   }
 88 | };
 89 | 
 90 | template <typename T>
 91 | struct SPELUAccGradParameters1to1
 92 | {
 93 |   T scale;
 94 | 
 95 |   SPELUAccGradParameters1to1(T scale)
 96 |     : scale(scale)
 97 |   {}
 98 | 
 99 |   __device__ __forceinline__ void operator()(T *gradWeight, T *input, T *gradOutput)
100 |   {
101 |     *gradWeight += (exp(*input) - 1) * (*gradOutput) * scale * (*input <= 0);
102 |   }
103 | };
104 | 
105 | #include "generic/SPELU.cu"
106 | #include "THCGenerateFloatTypes.h"
107 | 


--------------------------------------------------------------------------------
/torch/extra/cunn/lib/THCUNN/generic/MPELU.cu:
--------------------------------------------------------------------------------
  1 | #ifndef THC_GENERIC_FILE
  2 | #define THC_GENERIC_FILE "generic/MPELU.cu"
  3 | #else
  4 | 
  5 | void THNN_(MPELU_updateOutput)(
  6 |            THCState *state,
  7 |            THCTensor *input,
  8 |            THCTensor *output,
  9 |            THCTensor *weight,
 10 |            THCTensor *bias,
 11 |            long nOutputPlane)
 12 | {
 13 |   THCTensor_(resizeAs)(state, output, input);
 14 | 
 15 |   weight = THCTensor_(newContiguous)(state, weight);
 16 |   real *w = THCTensor_(data)(state, weight);
 17 |   bias = THCTensor_(newContiguous)(state, bias);
 18 |   real *b = THCTensor_(data)(state, bias);
 19 |   if (nOutputPlane == 0)
 20 |   {
 21 |     THC_pointwiseApply2(state, output, input, MPELUUpdateOutput<real>(w, b));
 22 |   }
 23 |   else
 24 |   {
 25 |     int ndim = THCTensor_(nDimension)(state, input);
 26 |     input = THCTensor_(newContiguous)(state, input);
 27 | 
 28 |     int n = THCTensor_(nElement)(state, input);
 29 |     if (input->size[ndim > 1] != nOutputPlane)
 30 |       THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[ndim > 1]);
 31 | 
 32 |     int mapSize = 1;
 33 |     for (int d = 2; d < ndim; d++) {
 34 |       mapSize *= input->size[d];
 35 |     }
 36 |     int nElemsPerSample = nOutputPlane * mapSize;
 37 |     mpeluForward<<<GET_BLOCKS(n), CUDA_NUM_THREADS, 0, THCState_getCurrentStream(state)>>>(
 38 |       THCTensor_(data)(state, output),
 39 |       THCTensor_(data)(state, input),
 40 |       w, b, 
 41 |       n, nElemsPerSample, mapSize
 42 |     );
 43 |     THCudaCheck(cudaGetLastError());
 44 |     THCTensor_(free)(state, input);
 45 |   }
 46 | 
 47 |   THCTensor_(free)(state, weight);
 48 |   THCTensor_(free)(state, bias);
 49 | }
 50 | 
 51 | void THNN_(MPELU_updateGradInput)(
 52 |            THCState *state,
 53 |            THCTensor *input,
 54 |            THCTensor *gradOutput,
 55 |            THCTensor *gradInput,
 56 |            THCTensor *weight,
 57 |            THCTensor *bias,
 58 |            long nOutputPlane)
 59 | {
 60 |   THCUNN_check_nElement(state, input, gradOutput);
 61 |   THCTensor_(resizeAs)(state, gradInput, input);
 62 | 
 63 |   weight = THCTensor_(newContiguous)(state, weight);
 64 |   real *w = THCTensor_(data)(state, weight);
 65 |   bias = THCTensor_(newContiguous)(state, bias);
 66 |   real *b = THCTensor_(data)(state, bias);
 67 |   if (nOutputPlane == 0)
 68 |   {
 69 |     THC_pointwiseApply3(state, gradInput, gradOutput, input, MPELUUpdateGradInput<real>(w, b));
 70 |   }
 71 |   else
 72 |   {
 73 |     int ndim = THCTensor_(nDimension)(state, input);
 74 |     input = THCTensor_(newContiguous)(state, input);
 75 |     gradOutput = THCTensor_(newContiguous)(state, gradOutput);
 76 | 
 77 |     int n = THCTensor_(nElement)(state, input);
 78 |     if (input->size[ndim > 1] != nOutputPlane)
 79 |       THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[ndim > 1]);
 80 | 
 81 |     int mapSize = 1;
 82 |     for (int d = 2; d < ndim; d++) {
 83 |       mapSize *= input->size[d];
 84 |     }
 85 |     int nElemsPerSample = nOutputPlane * mapSize;
 86 |     mpeluBackward<<<GET_BLOCKS(n), CUDA_NUM_THREADS, 0, THCState_getCurrentStream(state)>>>(
 87 |       THCTensor_(data)(state, gradInput),
 88 |       THCTensor_(data)(state, input),
 89 |       w, b,
 90 |       THCTensor_(data)(state, gradOutput),
 91 |       n, nElemsPerSample, mapSize
 92 |     );
 93 |     THCudaCheck(cudaGetLastError());
 94 |     THCTensor_(free)(state, input);
 95 |     THCTensor_(free)(state, gradOutput);
 96 |   }
 97 | 
 98 |   THCTensor_(free)(state, weight);
 99 |   THCTensor_(free)(state, bias);
100 | }
101 | 
102 | void THNN_(MPELU_accGradParameters)(
103 |            THCState *state,
104 |            THCTensor *input,
105 |            THCTensor *gradOutput,
106 |            THCTensor *gradInput,
107 |            THCTensor *weight,
108 |            THCTensor *gradWeight,
109 |            THCTensor *gradWeightBuf,
110 |            THCTensor *gradWeightBuf2,
111 |            THCTensor *bias,
112 |            THCTensor *gradBias,
113 |            THCTensor *gradBiasBuf,
114 |            THCTensor *gradBiasBuf2,
115 |            long nOutputPlane,
116 |            accreal scale_)
117 | {
118 |   real scale = ScalarConvert<accreal, real>::to(scale_);
119 |   THCUNN_check_nElement(state, input, gradOutput);
120 | 
121 |   THCTensor *weighT = THCTensor_(newContiguous)(state, weight);
122 |   real *w = THCTensor_(data)(state, weighT);
123 |   THCTensor *biaS = THCTensor_(newContiguous)(state, bias);
124 |   real *b = THCTensor_(data)(state, biaS);
125 | 
126 |   // use grad input for temporary storage, then call updateGradInput again
127 |   if (nOutputPlane == 0)
128 |   {
129 |     THC_pointwiseApply3(state, gradInput, input, gradOutput, MPELUAccGradParametersShared<real>(w, b));
130 | 
131 |     // introduces a sync point
132 |     real sum = ScalarConvert<accreal, real>::to(THCTensor_(sumall)(state, gradInput));
133 |     real t = THCTensor_(get1d)(state, gradWeight, 0);
134 |     THCTensor_(set1d)(state, gradWeight, 0, t + sum * scale);
135 | 
136 |     THC_pointwiseApply3(state, gradInput, input, gradOutput, MPELUAccGradParametersSharedForBeta<real>(w, b));
137 | 
138 |     // introduces a sync point
139 |     sum = ScalarConvert<accreal, real>::to(THCTensor_(sumall)(state, gradInput));
140 |     t = THCTensor_(get1d)(state, gradBias, 0);
141 |     THCTensor_(set1d)(state, gradBias, 0, t + sum * scale);
142 | 
143 |     // restore gradInput
144 |     THNN_(MPELU_updateGradInput)(state, input, gradOutput, gradInput, weight, bias, nOutputPlane);
145 |   }
146 |   else
147 |   {
148 |     int ndim = THCTensor_(nDimension)(state, input);
149 | 
150 |     if (ndim == 1)
151 |     {
152 |       THC_pointwiseApply3(state, gradWeight, input, gradOutput, MPELUAccGradParameters1to1<real>(scale, w, b));
153 |       THC_pointwiseApply3(state, gradBias, input, gradOutput, MPELUAccGradParameters1to1ForBeta<real>(scale, w, b));
154 |     }
155 |     else
156 |     {
157 |     //   THCTensor gradInputP;
158 |     //   THCTensor_(freeCopyTo)(state, gradInput, gradInputP);
159 |       THCTensor *gradInputPartnar = THCTensor_(newContiguous)(state, gradInput);
160 |       // THCTensor_(resizeAs)(state, gradInputPartnar, gradInput);
161 |       // THCTensor_(resizeNd)(state, gradInputPartnar, input->nDimension, input->size, NULL);
162 | 
163 |       THC_pointwiseApply3(state, gradInput, input, gradOutput, MPELUAccGradParameters<real>(scale, w, b));
164 |       THC_pointwiseApply3(state, gradInputPartnar, input, gradOutput, MPELUAccGradParametersForBeta<real>(scale, w, b));
165 | 
166 |       THCTensor *sumbuf = gradWeightBuf2;
167 |       THCTensor_(resizeAs)(state, gradWeightBuf, gradWeight);
168 | 
169 |       THCTensor *sumbufBeta = gradBiasBuf2;
170 |       THCTensor_(resizeAs)(state, gradBiasBuf, gradBias);
171 | 
172 |       if (ndim == 2)
173 |       {
174 |         THCTensor_(sum)(state, gradWeightBuf, gradInput, 0, 1);
175 |         THCTensor_(cadd)(state, gradWeight, gradWeight, scale, gradWeightBuf);
176 | 
177 |         THCTensor_(sum)(state, gradBiasBuf, gradInputPartnar, 0, 1);
178 |         THCTensor_(cadd)(state, gradBias, gradBias, scale, gradBiasBuf);
179 |       }
180 |       else
181 |       {
182 |         THCTensor *buffer = THCTensor_(newContiguous)(state, gradInput);
183 |         THCTensor *bufferBeta = THCTensor_(newContiguous)(state, gradInputPartnar);
184 |         long size3 = 1;
185 |         for (int d = 2; d < ndim; d++) {
186 |           size3 *= input->size[d];
187 |         }
188 |         THCTensor_(resize3d)(state, buffer, input->size[0], nOutputPlane, size3);
189 |         THCTensor_(resize2d)(state, sumbuf, input->size[0], nOutputPlane);
190 |         THCTensor_(sum)(state, sumbuf, buffer, 2, 1);
191 |         THCTensor_(sum)(state, gradWeightBuf, sumbuf, 0, 1);
192 |         THCTensor_(cadd)(state, gradWeight, gradWeight, scale, gradWeightBuf);
193 |         THCTensor_(free)(state, buffer);
194 | 
195 |         THCTensor_(resize3d)(state, bufferBeta, input->size[0], nOutputPlane, size3);
196 |         THCTensor_(resize2d)(state, sumbufBeta, input->size[0], nOutputPlane);
197 |         THCTensor_(sum)(state, sumbufBeta, bufferBeta, 2, 1);
198 |         THCTensor_(sum)(state, gradBiasBuf, sumbufBeta, 0, 1);
199 |         THCTensor_(cadd)(state, gradBias, gradBias, scale, gradBiasBuf);
200 |         THCTensor_(free)(state, bufferBeta);
201 |       }
202 | 
203 |       // restore gradInput
204 |       THNN_(MPELU_updateGradInput)(state, input, gradOutput, gradInput, weight, bias, nOutputPlane);
205 |     }
206 |   }
207 | }
208 | 
209 | #endif
210 | 


--------------------------------------------------------------------------------
/torch/extra/cunn/lib/THCUNN/generic/SPELU.cu:
--------------------------------------------------------------------------------
  1 | #ifndef THC_GENERIC_FILE
  2 | #define THC_GENERIC_FILE "generic/SPELU.cu"
  3 | #else
  4 | 
  5 | void THNN_(SPELU_updateOutput)(
  6 |            THCState *state,
  7 |            THCTensor *input,
  8 |            THCTensor *output,
  9 |            THCTensor *weight,
 10 |            long nOutputPlane)
 11 | {
 12 |   THCTensor_(resizeAs)(state, output, input);
 13 | 
 14 |   weight = THCTensor_(newContiguous)(state, weight);
 15 |   real *w = THCTensor_(data)(state, weight);
 16 | 
 17 |   if (nOutputPlane == 0)
 18 |   {
 19 |     THC_pointwiseApply2(state, output, input, SPELUUpdateOutput<real>(w));
 20 |   }
 21 |   else
 22 |   {
 23 |     int ndim = THCTensor_(nDimension)(state, input);
 24 |     input = THCTensor_(newContiguous)(state, input);
 25 | 
 26 |     int n = THCTensor_(nElement)(state, input);
 27 |     if (input->size[ndim > 1] != nOutputPlane)
 28 |       THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[ndim > 1]);
 29 | 
 30 |     int mapSize = 1;
 31 |     for (int d = 2; d < ndim; d++) {
 32 |       mapSize *= input->size[d];
 33 |     }
 34 |     int nElemsPerSample = nOutputPlane * mapSize;
 35 |     speluForward<<<GET_BLOCKS(n), CUDA_NUM_THREADS, 0, THCState_getCurrentStream(state)>>>(
 36 |       THCTensor_(data)(state, output),
 37 |       THCTensor_(data)(state, input),
 38 |       w,
 39 |       n, nElemsPerSample, mapSize
 40 |     );
 41 |     THCudaCheck(cudaGetLastError());
 42 |     THCTensor_(free)(state, input);
 43 |   }
 44 | 
 45 |   THCTensor_(free)(state, weight);
 46 | }
 47 | 
 48 | void THNN_(SPELU_updateGradInput)(
 49 |            THCState *state,
 50 |            THCTensor *input,
 51 |            THCTensor *gradOutput,
 52 |            THCTensor *gradInput,
 53 |            THCTensor *weight,
 54 |            long nOutputPlane)
 55 | {
 56 |   THCUNN_check_nElement(state, input, gradOutput);
 57 |   THCTensor_(resizeAs)(state, gradInput, input);
 58 | 
 59 |   weight = THCTensor_(newContiguous)(state, weight);
 60 |   real *w = THCTensor_(data)(state, weight);
 61 |   if (nOutputPlane == 0)
 62 |   {
 63 |     THC_pointwiseApply3(state, gradInput, gradOutput, input, SPELUUpdateGradInput<real>(w));
 64 |   }
 65 |   else
 66 |   {
 67 |     int ndim = THCTensor_(nDimension)(state, input);
 68 |     input = THCTensor_(newContiguous)(state, input);
 69 |     gradOutput = THCTensor_(newContiguous)(state, gradOutput);
 70 | 
 71 |     int n = THCTensor_(nElement)(state, input);
 72 |     if (input->size[ndim > 1] != nOutputPlane)
 73 |       THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[ndim > 1]);
 74 | 
 75 |     int mapSize = 1;
 76 |     for (int d = 2; d < ndim; d++) {
 77 |       mapSize *= input->size[d];
 78 |     }
 79 |     int nElemsPerSample = nOutputPlane * mapSize;
 80 |     speluBackward<<<GET_BLOCKS(n), CUDA_NUM_THREADS, 0, THCState_getCurrentStream(state)>>>(
 81 |       THCTensor_(data)(state, gradInput),
 82 |       THCTensor_(data)(state, input),
 83 |       w,
 84 |       THCTensor_(data)(state, gradOutput),
 85 |       n, nElemsPerSample, mapSize
 86 |     );
 87 |     THCudaCheck(cudaGetLastError());
 88 |     THCTensor_(free)(state, input);
 89 |     THCTensor_(free)(state, gradOutput);
 90 |   }
 91 |   THCTensor_(free)(state, weight);
 92 | }
 93 | 
 94 | void THNN_(SPELU_accGradParameters)(
 95 |            THCState *state,
 96 |            THCTensor *input,
 97 |            THCTensor *gradOutput,
 98 |            THCTensor *gradInput,
 99 |            THCTensor *weight,
100 |            THCTensor *gradWeight,
101 |            THCTensor *gradWeightBuf,
102 |            THCTensor *gradWeightBuf2,
103 |            long nOutputPlane,
104 |            accreal scale_)
105 | {
106 |   real scale = ScalarConvert<accreal, real>::to(scale_);
107 |   THCUNN_check_nElement(state, input, gradOutput);
108 |   // use grad input for temporary storage, then call updateGradInput again
109 | 
110 |   if (nOutputPlane == 0)
111 |   {
112 |     THC_pointwiseApply3(state, gradInput, input, gradOutput, SPELUAccGradParametersShared<real>());
113 | 
114 |     // introduces a sync point
115 |     real sum = ScalarConvert<accreal, real>::to(THCTensor_(sumall)(state, gradInput));
116 |     real w = THCTensor_(get1d)(state, gradWeight, 0);
117 |     THCTensor_(set1d)(state, gradWeight, 0, w + sum * scale);
118 | 
119 |     // restore gradInput
120 |     THNN_(SPELU_updateGradInput)(state, input, gradOutput, gradInput, weight, nOutputPlane);
121 |   }
122 |   else
123 |   {
124 |     int ndim = THCTensor_(nDimension)(state, input);
125 | 
126 |     if (ndim == 1)
127 |     {
128 |       THC_pointwiseApply3(state, gradWeight, input, gradOutput, SPELUAccGradParameters1to1<real>(scale));
129 |     }
130 |     else
131 |     {
132 |       THC_pointwiseApply3(state, gradInput, input, gradOutput, SPELUAccGradParameters<real>(scale));
133 |       THCTensor *sumbuf = gradWeightBuf2;
134 |       THCTensor_(resizeAs)(state, gradWeightBuf, gradWeight);
135 | 
136 |       if (ndim == 2)
137 |       {
138 |         THCTensor_(sum)(state, gradWeightBuf, gradInput, 0, 1);
139 |         THCTensor_(cadd)(state, gradWeight, gradWeight, scale, gradWeightBuf);
140 |       }
141 |       else
142 |       {
143 |         THCTensor *buffer = THCTensor_(newContiguous)(state, gradInput);
144 |         long size3 = 1;
145 |         for (int d = 2; d < ndim; d++) {
146 |           size3 *= input->size[d];
147 |         }
148 |         THCTensor_(resize3d)(state, buffer, input->size[0], nOutputPlane, size3);
149 |         THCTensor_(resize2d)(state, sumbuf, input->size[0], nOutputPlane);
150 |         THCTensor_(sum)(state, sumbuf, buffer, 2, 1);
151 |         THCTensor_(sum)(state, gradWeightBuf, sumbuf, 0, 1);
152 |         THCTensor_(cadd)(state, gradWeight, gradWeight, scale, gradWeightBuf);
153 |         THCTensor_(free)(state, buffer);
154 |       }
155 | 
156 |       // restore gradInput
157 |       THNN_(SPELU_updateGradInput)(state, input, gradOutput, gradInput, weight, nOutputPlane);
158 |     }
159 |   }
160 | }
161 | 
162 | #endif
163 | 


--------------------------------------------------------------------------------
/torch/extra/nn/MPELU.lua:
--------------------------------------------------------------------------------
 1 | local MPELU, parent = torch.class('nn.MPELU','nn.Module')
 2 | 
 3 | function MPELU:__init(alpha, beta, nOutputPlane)
 4 |    parent.__init(self)
 5 |    -- if no argument provided, use shared model (weight is scalar)
 6 |    self.nOutputPlane = nOutputPlane or 0
 7 |    local a = alpha or 1
 8 |    local b = beta or 1
 9 |    self.weight = torch.Tensor(nOutputPlane or 1):fill(a)
10 |    self.gradWeight = torch.Tensor(nOutputPlane or 1)
11 |    self.bias = torch.Tensor(nOutputPlane or 1):fill(b)
12 |    self.gradBias = torch.Tensor(nOutputPlane or 1)
13 | end
14 | 
15 | function MPELU:updateOutput(input)
16 |    input.THNN.MPELU_updateOutput(
17 |       input:cdata(),
18 |       self.output:cdata(),
19 |       self.weight:cdata(),
20 |       self.bias:cdata(),
21 |       self.nOutputPlane
22 |    )
23 |    return self.output
24 | end
25 | 
26 | function MPELU:updateGradInput(input, gradOutput)
27 |    input.THNN.MPELU_updateGradInput(
28 |       input:cdata(),
29 |       gradOutput:cdata(),
30 |       self.gradInput:cdata(),
31 |       self.weight:cdata(),
32 |       self.bias:cdata(),
33 |       self.nOutputPlane
34 |    )
35 |    return self.gradInput
36 | end
37 | 
38 | function MPELU:accGradParameters(input, gradOutput, scale)
39 |    self.gradWeightBuf = self.gradWeightBuf or input.new()
40 |    self.gradWeightBuf2 = self.gradWeightBuf2 or input.new()
41 |    self.gradBiasBuf = self.gradBiasBuf or input.new()
42 |    self.gradBiasBuf2 = self.gradBiasBuf2 or input.new()
43 |    input.THNN.MPELU_accGradParameters(
44 |       input:cdata(),
45 |       gradOutput:cdata(),
46 |       self.gradInput:cdata(),
47 |       self.weight:cdata(),
48 |       self.gradWeight:cdata(),
49 |       self.gradWeightBuf:cdata(),
50 |       self.gradWeightBuf2:cdata(),
51 |       self.bias:cdata(),
52 |       self.gradBias:cdata(),
53 |       self.gradBiasBuf:cdata(),
54 |       self.gradBiasBuf2:cdata(),
55 |       self.nOutputPlane,
56 |       scale or 1
57 |    )
58 |    return self.gradWeight, self.gradBias
59 | end
60 | 
61 | function MPELU:clearState()
62 |    nn.utils.clear(self, 'gradWeightBuf', 'gradWeightBuf2')
63 |    nn.utils.clear(self, 'gradBiasBuf', 'gradBiasBuf2')
64 |    return parent.clearState(self)
65 | end
66 | 
67 | function MPELU:__tostring__()
68 |    s = string.format('%s(alpha=%f, beta=%f, channels=%d)', torch.type(self), self.weight[1], self.bias[1], self.nOutputPlane)
69 |    return s
70 | end


--------------------------------------------------------------------------------
/torch/extra/nn/SPELU.lua:
--------------------------------------------------------------------------------
 1 | local SPELU, parent = torch.class('nn.SPELU','nn.Module')
 2 | 
 3 | function SPELU:__init(alpha, nOutputPlane)
 4 |    parent.__init(self)
 5 |    -- if no argument provided, use shared model (weight is scalar)
 6 |    self.nOutputPlane = nOutputPlane or 0
 7 |    local a = alpha or 1
 8 |    self.weight = torch.Tensor(nOutputPlane or 1):fill(a)
 9 |    self.gradWeight = torch.Tensor(nOutputPlane or 1)
10 | end
11 | 
12 | function SPELU:updateOutput(input)
13 |    input.THNN.SPELU_updateOutput(
14 |       input:cdata(),
15 |       self.output:cdata(),
16 |       self.weight:cdata(),
17 |       self.nOutputPlane
18 |    )
19 |    return self.output
20 | end
21 | 
22 | function SPELU:updateGradInput(input, gradOutput)
23 |    input.THNN.SPELU_updateGradInput(
24 |       input:cdata(),
25 |       gradOutput:cdata(),
26 |       self.gradInput:cdata(),
27 |       self.weight:cdata(),
28 |       self.nOutputPlane
29 |    )
30 |    return self.gradInput
31 | end
32 | 
33 | function SPELU:accGradParameters(input, gradOutput, scale)
34 |    self.gradWeightBuf = self.gradWeightBuf or input.new()
35 |    self.gradWeightBuf2 = self.gradWeightBuf2 or input.new()
36 |    input.THNN.SPELU_accGradParameters(
37 |       input:cdata(),
38 |       gradOutput:cdata(),
39 |       self.gradInput:cdata(),
40 |       self.weight:cdata(),
41 |       self.gradWeight:cdata(),
42 |       self.gradWeightBuf:cdata(),
43 |       self.gradWeightBuf2:cdata(),
44 |       self.nOutputPlane,
45 |       scale or 1
46 |    )
47 |    return self.gradWeight
48 | end
49 | 
50 | function SPELU:clearState()
51 |    nn.utils.clear(self, 'gradWeightBuf', 'gradWeightBuf2')
52 |    return parent.clearState(self)
53 | end
54 | 


--------------------------------------------------------------------------------
/torch/extra/nn/init.lua:
--------------------------------------------------------------------------------
  1 | require('torch')
  2 | 
  3 | nn = {} -- define the global nn table
  4 | 
  5 | require('nn.THNN')
  6 | 
  7 | require('nn.utils')
  8 | 
  9 | 
 10 | require('nn.ErrorMessages')
 11 | require('nn.Module')
 12 | 
 13 | require('nn.Container')
 14 | require('nn.Concat')
 15 | require('nn.Parallel')
 16 | require('nn.Sequential')
 17 | require('nn.DepthConcat')
 18 | 
 19 | require('nn.Decorator')
 20 | require('nn.Bottle')
 21 | require('nn.WeightNorm')
 22 | require('nn.DontCast')
 23 | require('nn.NaN')
 24 | require('nn.Profile')
 25 | 
 26 | require('nn.Linear')
 27 | require('nn.LinearWeightNorm')
 28 | require('nn.Bilinear')
 29 | require('nn.PartialLinear')
 30 | require('nn.SparseLinear')
 31 | require('nn.IndexLinear')
 32 | require('nn.Reshape')
 33 | require('nn.View')
 34 | require('nn.Contiguous')
 35 | require('nn.Select')
 36 | require('nn.Narrow')
 37 | require('nn.Index')
 38 | require('nn.Squeeze')
 39 | require('nn.Unsqueeze')
 40 | require('nn.Replicate')
 41 | require('nn.Transpose')
 42 | require('nn.BatchNormalization')
 43 | require('nn.LayerNormalization')
 44 | require('nn.Padding')
 45 | require('nn.GradientReversal')
 46 | require('nn.MaskedSelect')
 47 | 
 48 | require('nn.Copy')
 49 | require('nn.Min')
 50 | require('nn.Max')
 51 | require('nn.Sum')
 52 | require('nn.Mean')
 53 | require('nn.CMul')
 54 | require('nn.Mul')
 55 | require('nn.MulConstant')
 56 | require('nn.CAdd')
 57 | require('nn.Add')
 58 | require('nn.AddConstant')
 59 | require('nn.Constant')
 60 | require('nn.Dropout')
 61 | require('nn.SpatialDropout')
 62 | require('nn.VolumetricDropout')
 63 | require('nn.WhiteNoise')
 64 | require('nn.OneHot')
 65 | require('nn.PrintSize')
 66 | require('nn.ZeroGrad')
 67 | 
 68 | require('nn.CAddTable')
 69 | require('nn.CDivTable')
 70 | require('nn.CMulTable')
 71 | require('nn.CSubTable')
 72 | require('nn.CMaxTable')
 73 | require('nn.CMinTable')
 74 | require('nn.CAddTensorTable')
 75 | 
 76 | require('nn.Euclidean')
 77 | require('nn.WeightedEuclidean')
 78 | require('nn.PairwiseDistance')
 79 | require('nn.CosineDistance')
 80 | require('nn.DotProduct')
 81 | require('nn.Normalize')
 82 | require('nn.Cosine')
 83 | require('nn.Kmeans')
 84 | 
 85 | require('nn.Exp')
 86 | require('nn.Log')
 87 | require('nn.HardTanh')
 88 | require('nn.Clamp')
 89 | require('nn.LogSigmoid')
 90 | require('nn.LogSoftMax')
 91 | require('nn.Sigmoid')
 92 | require('nn.SoftMax')
 93 | require('nn.SoftMin')
 94 | require('nn.SoftPlus')
 95 | require('nn.SoftSign')
 96 | require('nn.Tanh')
 97 | require('nn.TanhShrink')
 98 | require('nn.Abs')
 99 | require('nn.Power')
100 | require('nn.Square')
101 | require('nn.Sqrt')
102 | require('nn.HardShrink')
103 | require('nn.SoftShrink')
104 | require('nn.Threshold')
105 | require('nn.Maxout')
106 | require('nn.ReLU')
107 | require('nn.ReLU6')
108 | require('nn.PReLU')
109 | require('nn.CReLU')
110 | require('nn.LeakyReLU')
111 | require('nn.SpatialSoftMax')
112 | require('nn.SpatialLogSoftMax')
113 | require('nn.RReLU')
114 | require('nn.ELU')
115 | require('nn.GatedLinearUnit')
116 | 
117 | require('nn.LookupTable')
118 | require('nn.SpatialConvolution')
119 | require('nn.SpatialConvolutionLocal')
120 | require('nn.SpatialFullConvolution')
121 | require('nn.SpatialFullConvolutionMap')
122 | require('nn.SpatialConvolutionMM')
123 | require('nn.SpatialDepthWiseConvolution')
124 | require('nn.SpatialConvolutionMap')
125 | require('nn.SpatialDilatedConvolution')
126 | require('nn.SpatialSubSampling')
127 | require('nn.SpatialMaxPooling')
128 | require('nn.SpatialDilatedMaxPooling')
129 | require('nn.SpatialMaxUnpooling')
130 | require('nn.SpatialFractionalMaxPooling')
131 | require('nn.SpatialLPPooling')
132 | require('nn.SpatialAveragePooling')
133 | require('nn.SpatialAdaptiveMaxPooling')
134 | require('nn.SpatialAdaptiveAveragePooling')
135 | require('nn.TemporalConvolution')
136 | require('nn.TemporalSubSampling')
137 | require('nn.TemporalMaxPooling')
138 | require('nn.TemporalDynamicKMaxPooling')
139 | require('nn.TemporalRowConvolution')
140 | require('nn.SpatialSubtractiveNormalization')
141 | require('nn.SpatialDivisiveNormalization')
142 | require('nn.SpatialContrastiveNormalization')
143 | require('nn.SpatialCrossMapLRN')
144 | require('nn.SpatialZeroPadding')
145 | require('nn.SpatialReflectionPadding')
146 | require('nn.SpatialReplicationPadding')
147 | require('nn.SpatialUpSamplingNearest')
148 | require('nn.SpatialUpSamplingBilinear')
149 | require('nn.SpatialBatchNormalization')
150 | 
151 | require('nn.VolumetricConvolution')
152 | require('nn.VolumetricFullConvolution')
153 | require('nn.VolumetricDilatedConvolution')
154 | require('nn.VolumetricMaxPooling')
155 | require('nn.VolumetricDilatedMaxPooling')
156 | require('nn.VolumetricFractionalMaxPooling')
157 | require('nn.VolumetricMaxUnpooling')
158 | require('nn.VolumetricAveragePooling')
159 | require('nn.VolumetricBatchNormalization')
160 | require('nn.VolumetricReplicationPadding')
161 | 
162 | require('nn.GPU')
163 | 
164 | require('nn.ParallelTable')
165 | require('nn.Identity')
166 | require('nn.ConcatTable')
167 | require('nn.SplitTable')
168 | require('nn.JoinTable')
169 | require('nn.SelectTable')
170 | require('nn.MixtureTable')
171 | require('nn.CriterionTable')
172 | require('nn.FlattenTable')
173 | require('nn.NarrowTable')
174 | require('nn.MapTable')
175 | require('nn.ZipTable')
176 | require('nn.ZipTableOneToMany')
177 | require('nn.Collapse')
178 | require('nn.Convert')
179 | 
180 | require('nn.Criterion')
181 | require('nn.MSECriterion')
182 | require('nn.SpatialAutoCropMSECriterion')
183 | require('nn.SmoothL1Criterion')
184 | require('nn.MarginCriterion')
185 | require('nn.SoftMarginCriterion')
186 | require('nn.AbsCriterion')
187 | require('nn.ClassNLLCriterion')
188 | require('nn.SpatialClassNLLCriterion')
189 | require('nn.ClassSimplexCriterion')
190 | require('nn.DistKLDivCriterion')
191 | require('nn.MultiCriterion')
192 | require('nn.L1HingeEmbeddingCriterion')
193 | require('nn.HingeEmbeddingCriterion')
194 | require('nn.CosineEmbeddingCriterion')
195 | require('nn.MarginRankingCriterion')
196 | require('nn.MultiMarginCriterion')
197 | require('nn.MultiLabelMarginCriterion')
198 | require('nn.MultiLabelSoftMarginCriterion')
199 | require('nn.L1Cost')
200 | require('nn.L1Penalty')
201 | require('nn.WeightedMSECriterion')
202 | require('nn.BCECriterion')
203 | require('nn.CrossEntropyCriterion')
204 | require('nn.ParallelCriterion')
205 | require('nn.DistanceRatioCriterion')
206 | require('nn.ModuleCriterion')
207 | 
208 | require('nn.PixelShuffle')
209 | 
210 | require('nn.StochasticGradient')
211 | 
212 | require('nn.MM')
213 | require('nn.MV')
214 | 
215 | require('nn.Jacobian')
216 | require('nn.SparseJacobian')
217 | require('nn.hessian')
218 | require('nn.test')
219 | 
220 | require('nn.SPELU')
221 | require('nn.MPELU')
222 | 
223 | return nn
224 | 


--------------------------------------------------------------------------------
/torch/extra/nn/lib/THNN/generic/MPELU.c:
--------------------------------------------------------------------------------
  1 | #ifndef TH_GENERIC_FILE
  2 | #define TH_GENERIC_FILE "generic/MPELU.c"
  3 | #else
  4 | 
  5 | void THNN_(MPELU_updateOutput)(
  6 |           THNNState *state,
  7 |           THTensor *input,
  8 |           THTensor *output,
  9 |           THTensor *weight,
 10 |           THTensor *bias,
 11 |           THIndex_t nOutputPlane)
 12 | {
 13 |   THTensor_(resizeAs)(output, input);
 14 | 
 15 |   if (nOutputPlane == 0)
 16 |   {
 17 |     // handle shared parameter case
 18 |     real w = *THTensor_(data)(weight);
 19 |     real b = *THTensor_(data)(bias);
 20 |     TH_TENSOR_APPLY2(real, output, real, input,
 21 |       *output_data = (*input_data > 0) ? *input_data : w * ( exp(b * *input_data) - 1);
 22 |     );
 23 |   }
 24 |   else
 25 |   {
 26 |     input = THTensor_(newContiguous)(input);
 27 |     long bs = 1, ks = 1;
 28 |     {
 29 |       long input_ndim = THTensor_(nDimension)(input);
 30 |       if (input->size[input_ndim > 1] != nOutputPlane)
 31 |         THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]);
 32 | 
 33 |       if (input_ndim > 1) {
 34 |           bs = input->size[0];
 35 |           for (int d = 2; d < input_ndim; d++) {
 36 |               ks *= input->size[d];
 37 |           }
 38 |       }
 39 |     }
 40 | 
 41 |     real *output_data = THTensor_(data)(output);
 42 |     real *input_data = THTensor_(data)(input);
 43 |     real *weight_data = THTensor_(data)(weight);
 44 |     real *bias_data = THTensor_(data)(bias);
 45 |     THIndex_t i, j, k;
 46 | #pragma omp parallel for private(j,k)
 47 |     for (i = 0; i < bs; ++i)
 48 |     {
 49 |       real* n_input_data = input_data + i*nOutputPlane*ks;
 50 |       real* n_output_data = output_data + i*nOutputPlane*ks;
 51 |       for (j = 0; j < nOutputPlane; ++j)
 52 |       {
 53 |         for (k = 0; k < ks; ++k)
 54 |           n_output_data[k] = (n_input_data[k] > 0) ? n_input_data[k] : weight_data[j] * (exp(bias_data[j] * n_input_data[k]) - 1);
 55 |         n_input_data += ks;
 56 |         n_output_data += ks;
 57 |       }
 58 |     }
 59 |     THTensor_(free)(input);
 60 |   }
 61 | }
 62 | 
 63 | void THNN_(MPELU_updateGradInput)(
 64 |           THNNState *state,
 65 |           THTensor *input,
 66 |           THTensor *gradOutput,
 67 |           THTensor *gradInput,
 68 |           THTensor *weight,
 69 |           THTensor *bias,
 70 |           THIndex_t nOutputPlane)
 71 | {
 72 |   THNN_CHECK_NELEMENT(input, gradOutput);
 73 |   THTensor_(resizeAs)(gradInput, input);
 74 | 
 75 |   if (nOutputPlane == 0)
 76 |   {
 77 |     real w = THTensor_(data)(weight)[0];
 78 |     real b = THTensor_(data)(bias)[0];
 79 |     TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
 80 |        if ((*input_data) > 0)
 81 |          *gradInput_data = *gradOutput_data;
 82 |        else
 83 |          *gradInput_data = w * b * (exp(b * *input_data)) * (*gradOutput_data);
 84 |     );
 85 |   }
 86 |   else
 87 |   {
 88 |     input = THTensor_(newContiguous)(input);
 89 |     gradOutput = THTensor_(newContiguous)(gradOutput);
 90 |     weight = THTensor_(newContiguous)(weight);
 91 |     bias = THTensor_(newContiguous)(bias);
 92 |     const real *input_data = THTensor_(data)(input);
 93 |     const real *gradOutput_data = THTensor_(data)(gradOutput);
 94 |     const real *weight_data = THTensor_(data)(weight);
 95 |     const real *bias_data = THTensor_(data)(bias);
 96 |     real *gradInput_data = THTensor_(data)(gradInput);
 97 | 
 98 |     long bs = 1, ks = 1;
 99 |     {
100 |       long input_ndim = THTensor_(nDimension)(input);
101 |       if (input->size[input_ndim > 1] != nOutputPlane)
102 |         THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]);
103 | 
104 |       if (input_ndim > 1) {
105 |           bs = input->size[0];
106 |           for (int d = 2; d < input_ndim; d++) {
107 |               ks *= input->size[d];
108 |           }
109 |       }
110 |     }
111 | 
112 |     THIndex_t i, j, k;
113 | #pragma omp parallel for private(j,k)
114 |     for (i = 0; i < bs; ++i)
115 |     {
116 |       const real *n_input_data = input_data + i*nOutputPlane*ks;
117 |       const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
118 |       real *n_gradInput_data = gradInput_data + i*nOutputPlane*ks;
119 | 
120 |       for (j = 0; j < nOutputPlane; ++j)
121 |       {
122 |         real w = weight_data[j];
123 |         real b = bias_data[j];
124 |         for (k = 0; k < ks; ++k)
125 |         {
126 |           if (n_input_data[k] > 0)
127 |             n_gradInput_data[k] = n_gradOutput_data[k];
128 |           else
129 |             n_gradInput_data[k] = n_gradOutput_data[k] * w * b * (exp(b * n_input_data[k]));
130 |         }
131 |         n_input_data += ks;
132 |         n_gradInput_data += ks;
133 |         n_gradOutput_data += ks;
134 |       }
135 |     }
136 |     THTensor_(free)(input);
137 |     THTensor_(free)(gradOutput);
138 |     THTensor_(free)(weight);
139 |     THTensor_(free)(bias);
140 |   }
141 | }
142 | 
143 | void THNN_(MPELU_accGradParameters)(
144 |           THNNState *state,
145 |           THTensor *input,
146 |           THTensor *gradOutput,
147 |           THTensor *gradInput,
148 |           THTensor *weight,
149 |           THTensor *gradWeight,
150 |           THTensor *gradWeightBuf,
151 |           THTensor *gradWeightBuf2,
152 |           THTensor *bias,
153 |           THTensor *gradBias,
154 |           THTensor *gradBiasBuf,
155 |           THTensor *gradBiasBuf2,
156 |           THIndex_t nOutputPlane,
157 |           accreal scale_)
158 | {
159 |   real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
160 |   THNN_CHECK_NELEMENT(input, gradOutput);
161 | 
162 |   if (nOutputPlane == 0)
163 |   {
164 |     real w = *THTensor_(data)(weight);
165 |     real b = *THTensor_(data)(bias);
166 |     real *gradWeight_data = THTensor_(data)(gradWeight);
167 |     real *gradBias_data = THTensor_(data)(gradBias);
168 |     real sum = 0;
169 |     real sumforb = 0;
170 |     TH_TENSOR_APPLY2(real, input, real, gradOutput,
171 |       if ((*input_data) <= 0) {
172 |         sum += (exp(b * *input_data) - 1) * (*gradOutput_data);
173 |         sumforb += (w * exp(b * *input_data) * *input_data) * (*gradOutput_data);
174 |       }
175 |     );
176 |     gradWeight_data[0] += scale * sum;
177 |     gradBias_data[0] += scale * sumforb;
178 |   }
179 |   else
180 |   {
181 |     THArgCheck(THTensor_(isContiguous)(gradWeight), 6, "gradWeight needs to be contiguous");
182 |     THArgCheck(THTensor_(isContiguous)(gradBias), 6, "gradWeight needs to be contiguous");
183 |     input = THTensor_(newContiguous)(input);
184 |     gradOutput = THTensor_(newContiguous)(gradOutput);
185 |     weight = THTensor_(newContiguous)(weight);
186 |     bias = THTensor_(newContiguous)(bias);
187 |     long bs = 1, ks = 1;
188 |     {
189 |       long input_ndim = THTensor_(nDimension)(input);
190 |       if (input->size[input_ndim > 1] != nOutputPlane)
191 |         THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]);
192 | 
193 |       if (input_ndim > 1) {
194 |           bs = input->size[0];
195 |           for (int d = 2; d < input_ndim; d++) {
196 |             ks *= input->size[d];
197 |           }
198 |       }
199 |     }
200 | 
201 |     const real *input_data = THTensor_(data)(input);
202 |     const real *gradOutput_data = THTensor_(data)(gradOutput);
203 |     const real *weight_data = THTensor_(data)(weight);
204 |     real *gradWeight_data = THTensor_(data)(gradWeight);
205 | 
206 |     const real *bias_data = THTensor_(data)(bias);
207 |     real *gradBias_data = THTensor_(data)(gradBias);
208 | 
209 |     THIndex_t i, j, k;
210 |     for (i = 0; i < bs; ++i)
211 |     {
212 |       const real *n_input_data = input_data + i*nOutputPlane*ks;
213 |       const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
214 | 
215 |       for (j = 0; j < nOutputPlane; ++j)
216 |       {
217 |         real sum = 0;
218 |         real sumforb = 0;
219 |         for (k = 0; k < ks; ++k)
220 |           if (n_input_data[k] <= 0) {
221 |             sum += n_gradOutput_data[k] * (exp(bias_data[j] * n_input_data[k]) - 1);
222 |             sumforb += n_gradOutput_data[k] * weight_data[j] * (exp(bias_data[j] * n_input_data[k]) * n_input_data[k]);
223 |           }
224 |         gradWeight_data[j] += scale * sum;
225 |         gradBias_data[j] += scale * sumforb;
226 |         n_input_data += ks;
227 |         n_gradOutput_data += ks;
228 |       }
229 |     }
230 |     THTensor_(free)(input);
231 |     THTensor_(free)(gradOutput);
232 |     THTensor_(free)(weight);
233 |     THTensor_(free)(bias);
234 |   }
235 | }
236 | 
237 | #endif
238 | 


--------------------------------------------------------------------------------
/torch/extra/nn/lib/THNN/generic/SPELU.c:
--------------------------------------------------------------------------------
  1 | #ifndef TH_GENERIC_FILE
  2 | #define TH_GENERIC_FILE "generic/SPELU.c"
  3 | #else
  4 | 
  5 | void THNN_(SPELU_updateOutput)(
  6 |           THNNState *state,
  7 |           THTensor *input,
  8 |           THTensor *output,
  9 |           THTensor *weight,
 10 |           THIndex_t nOutputPlane)
 11 | {
 12 |   THTensor_(resizeAs)(output, input);
 13 | 
 14 |   if (nOutputPlane == 0)
 15 |   {
 16 |     // handle shared parameter case
 17 |     real w = *THTensor_(data)(weight);
 18 |     TH_TENSOR_APPLY2(real, output, real, input,
 19 |       *output_data = (*input_data > 0) ? *input_data : w*( exp(*input_data) - 1);
 20 |     );
 21 |   }
 22 |   else
 23 |   {
 24 |     input = THTensor_(newContiguous)(input);
 25 |     long bs = 1, ks = 1;
 26 |     {
 27 |       long input_ndim = THTensor_(nDimension)(input);
 28 |       if (input->size[input_ndim > 1] != nOutputPlane)
 29 |         THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]);
 30 | 
 31 |       if (input_ndim > 1) {
 32 |           bs = input->size[0];
 33 |           for (int d = 2; d < input_ndim; d++) {
 34 |               ks *= input->size[d];
 35 |           }
 36 |       }
 37 |     }
 38 | 
 39 |     real *output_data = THTensor_(data)(output);
 40 |     real *input_data = THTensor_(data)(input);
 41 |     real *weight_data = THTensor_(data)(weight);
 42 |     THIndex_t i, j, k;
 43 | #pragma omp parallel for private(j,k)
 44 |     for (i = 0; i < bs; ++i)
 45 |     {
 46 |       real* n_input_data = input_data + i*nOutputPlane*ks;
 47 |       real* n_output_data = output_data + i*nOutputPlane*ks;
 48 |       for (j = 0; j < nOutputPlane; ++j)
 49 |       {
 50 |         for (k = 0; k < ks; ++k)
 51 |           n_output_data[k] = (n_input_data[k] > 0) ? n_input_data[k] : weight_data[j] * (exp(n_input_data[k]) - 1);
 52 |         n_input_data += ks;
 53 |         n_output_data += ks;
 54 |       }
 55 |     }
 56 |     THTensor_(free)(input);
 57 |   }
 58 | }
 59 | 
 60 | void THNN_(SPELU_updateGradInput)(
 61 |           THNNState *state,
 62 |           THTensor *input,
 63 |           THTensor *gradOutput,
 64 |           THTensor *gradInput,
 65 |           THTensor *weight,
 66 |           THIndex_t nOutputPlane)
 67 | {
 68 |   THNN_CHECK_NELEMENT(input, gradOutput);
 69 |   THTensor_(resizeAs)(gradInput, input);
 70 | 
 71 |   if (nOutputPlane == 0)
 72 |   {
 73 |     real w = THTensor_(data)(weight)[0];
 74 |     TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
 75 |        if ((*input_data) > 0)
 76 |          *gradInput_data = *gradOutput_data;
 77 |        else
 78 |          *gradInput_data = w * (exp(*input_data)) * (*gradOutput_data);
 79 |     );
 80 |   }
 81 |   else
 82 |   {
 83 |     input = THTensor_(newContiguous)(input);
 84 |     gradOutput = THTensor_(newContiguous)(gradOutput);
 85 |     weight = THTensor_(newContiguous)(weight);
 86 |     const real *input_data = THTensor_(data)(input);
 87 |     const real *gradOutput_data = THTensor_(data)(gradOutput);
 88 |     const real *weight_data = THTensor_(data)(weight);
 89 |     real *gradInput_data = THTensor_(data)(gradInput);
 90 | 
 91 |     long bs = 1, ks = 1;
 92 |     {
 93 |       long input_ndim = THTensor_(nDimension)(input);
 94 |       if (input->size[input_ndim > 1] != nOutputPlane)
 95 |         THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]);
 96 | 
 97 |       if (input_ndim > 1) {
 98 |           bs = input->size[0];
 99 |           for (int d = 2; d < input_ndim; d++) {
100 |               ks *= input->size[d];
101 |           }
102 |       }
103 |     }
104 | 
105 |     THIndex_t i, j, k;
106 | #pragma omp parallel for private(j,k)
107 |     for (i = 0; i < bs; ++i)
108 |     {
109 |       const real *n_input_data = input_data + i*nOutputPlane*ks;
110 |       const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
111 |       real *n_gradInput_data = gradInput_data + i*nOutputPlane*ks;
112 | 
113 |       for (j = 0; j < nOutputPlane; ++j)
114 |       {
115 |         real w = weight_data[j];
116 |         for (k = 0; k < ks; ++k)
117 |         {
118 |           if (n_input_data[k] > 0)
119 |             n_gradInput_data[k] = n_gradOutput_data[k];
120 |           else
121 |             n_gradInput_data[k] = n_gradOutput_data[k] * w * (exp(n_input_data[k]));
122 |         }
123 |         n_input_data += ks;
124 |         n_gradInput_data += ks;
125 |         n_gradOutput_data += ks;
126 |       }
127 |     }
128 |     THTensor_(free)(input);
129 |     THTensor_(free)(gradOutput);
130 |     THTensor_(free)(weight);
131 |   }
132 | }
133 | 
134 | void THNN_(SPELU_accGradParameters)(
135 |           THNNState *state,
136 |           THTensor *input,
137 |           THTensor *gradOutput,
138 |           THTensor *gradInput,
139 |           THTensor *weight,
140 |           THTensor *gradWeight,
141 |           THTensor *gradWeightBuf,
142 |           THTensor *gradWeightBuf2,
143 |           THIndex_t nOutputPlane,
144 |           accreal scale_)
145 | {
146 |   real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
147 |   THNN_CHECK_NELEMENT(input, gradOutput);
148 | 
149 |   if (nOutputPlane == 0)
150 |   {
151 |     real *gradWeight_data = THTensor_(data)(gradWeight);
152 |     real sum = 0;
153 |     TH_TENSOR_APPLY2(real, input, real, gradOutput,
154 |       if ((*input_data) <= 0)
155 |         sum += (exp(*input_data) - 1) * (*gradOutput_data);
156 |     );
157 |     gradWeight_data[0] += scale * sum;
158 |   }
159 |   else
160 |   {
161 |     THArgCheck(THTensor_(isContiguous)(gradWeight), 6, "gradWeight needs to be contiguous");
162 |     input = THTensor_(newContiguous)(input);
163 |     gradOutput = THTensor_(newContiguous)(gradOutput);
164 |     weight = THTensor_(newContiguous)(weight);
165 |     long bs = 1, ks = 1;
166 |     {
167 |       long input_ndim = THTensor_(nDimension)(input);
168 |       if (input->size[input_ndim > 1] != nOutputPlane)
169 |         THError("Wrong number of input planes. Expected %d but got %d.", nOutputPlane, input->size[input_ndim > 1]);
170 | 
171 |       if (input_ndim > 1) {
172 |           bs = input->size[0];
173 |           for (int d = 2; d < input_ndim; d++) {
174 |             ks *= input->size[d];
175 |           }
176 |       }
177 |     }
178 | 
179 |     const real *input_data = THTensor_(data)(input);
180 |     const real *gradOutput_data = THTensor_(data)(gradOutput);
181 |     const real *weight_data = THTensor_(data)(weight);
182 |     real *gradWeight_data = THTensor_(data)(gradWeight);
183 | 
184 |     THIndex_t i, j, k;
185 |     for (i = 0; i < bs; ++i)
186 |     {
187 |       const real *n_input_data = input_data + i*nOutputPlane*ks;
188 |       const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
189 | 
190 |       for (j = 0; j < nOutputPlane; ++j)
191 |       {
192 |         real sum = 0;
193 |         for (k = 0; k < ks; ++k)
194 |           if (n_input_data[k] <= 0)
195 |             sum += n_gradOutput_data[k] * (exp(n_input_data[k]) - 1);
196 |         gradWeight_data[j] += scale * sum;
197 |         n_input_data += ks;
198 |         n_gradOutput_data += ks;
199 |       }
200 |     }
201 |     THTensor_(free)(input);
202 |     THTensor_(free)(gradOutput);
203 |     THTensor_(free)(weight);
204 |   }
205 | }
206 | 
207 | #endif
208 | 


--------------------------------------------------------------------------------
/torch/extra/nn/lib/THNN/init.c:
--------------------------------------------------------------------------------
  1 | #include "TH.h"
  2 | #include "THNN.h"
  3 | 
  4 | #define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
  5 | #define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME)
  6 | 
  7 | #define THNN_CHECK_SHAPE(I1, I2)			\
  8 |   if (I1 != NULL && I2 != NULL && !THTensor_(isSameSizeAs)(I1, I2))	\
  9 |     {							\
 10 |        THDescBuff s1 = THTensor_(sizeDesc)(I1);		\
 11 |        THDescBuff s2 = THTensor_(sizeDesc)(I2);		\
 12 |        THError(#I1 " and " #I2 " shapes do not match: "	\
 13 | 	       #I1 " %s, " #I2 " %s", s1.str, s2.str);	\
 14 |     }
 15 | 
 16 | #define THNN_CHECK_SHAPE_INDICES(I1, I2)             \
 17 |   THLongStorage *size2 = THLongTensor_newSizeOf(I2); \
 18 |   if (I1 != NULL && I2 != NULL && !THTensor_(isSize)(I1, size2)) \
 19 |     {             \
 20 |       THDescBuff s1 = THTensor_(sizeDesc)(I1);       \
 21 |       THDescBuff s2 = THLongTensor_sizeDesc(I2);     \
 22 |       THLongStorage_free(size2);                     \
 23 |       THError(#I1 " and " #I2 " shapes do not match: " \
 24 |         #I1 " %s, " #I2 " %s", s1.str, s2.str);      \
 25 |     } else {      \
 26 |       THLongStorage_free(size2);                     \
 27 |     }
 28 | 
 29 | #define THNN_CHECK_NELEMENT(I1, I2) \
 30 |   if (I1 != NULL && I2 != NULL ) {					\
 31 |     ptrdiff_t n1 = THTensor_(nElement)(I1);					\
 32 |     ptrdiff_t n2 = THTensor_(nElement)(I2);	                                \
 33 |     if (n1 != n2)							\
 34 |       {									\
 35 | 	THDescBuff s1 = THTensor_(sizeDesc)(I1);			\
 36 | 	THDescBuff s2 = THTensor_(sizeDesc)(I2);			\
 37 | 	THError(#I1 " and " #I2 " have different number of elements: "	\
 38 | 		#I1 "%s has %ld elements, while "			\
 39 | 		#I2 "%s has %ld elements", s1.str, n1, s2.str, n2);	\
 40 |       }									\
 41 |   }
 42 | 
 43 | #define THNN_CHECK_DIM_SIZE(T, DIM, DIM_SIZE, SIZE)			\
 44 |   if (THTensor_(nDimension)(T) != DIM ||				\
 45 |       THTensor_(size)(T, DIM_SIZE) != SIZE) {				\
 46 |       THDescBuff s1 = THTensor_(sizeDesc)(T);				\
 47 |       THError("Need " #T " of dimension %d and " #T ".size[%d] == %d"	\
 48 | 	      " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \
 49 |   }
 50 | 
 51 | #define THNN_CHECK_DIM_SIZE_INDICES(T, DIM, DIM_SIZE, SIZE)			\
 52 |   if (THIndexTensor_(nDimension)(T) != DIM ||				\
 53 |       THIndexTensor_(size)(T, DIM_SIZE) != SIZE) {				\
 54 |       THDescBuff s1 = THIndexTensor_(sizeDesc)(T);				\
 55 |       THError("Need " #T " of dimension %d and " #T ".size[%d] == %d"	\
 56 |         " but got " #T " to be of shape: %s", DIM, DIM_SIZE, SIZE, s1.str); \
 57 |   }
 58 | 
 59 | #define THNN_ARGCHECK(COND, ARG, T, FORMAT)	\
 60 |   if (!(COND)) {				\
 61 |     THDescBuff s1 = THTensor_(sizeDesc)(T);	\
 62 |     THArgCheck(COND, ARG, FORMAT, s1.str);	\
 63 |   }
 64 | 
 65 | #include "generic/Abs.c"
 66 | #include "THGenerateFloatTypes.h"
 67 | 
 68 | #include "generic/AbsCriterion.c"
 69 | #include "THGenerateFloatTypes.h"
 70 | 
 71 | #include "generic/BCECriterion.c"
 72 | #include "THGenerateFloatTypes.h"
 73 | 
 74 | #include "generic/ClassNLLCriterion.c"
 75 | #include "THGenerateFloatTypes.h"
 76 | 
 77 | #include "generic/SpatialClassNLLCriterion.c"
 78 | #include "THGenerateFloatTypes.h"
 79 | 
 80 | #include "generic/DistKLDivCriterion.c"
 81 | #include "THGenerateFloatTypes.h"
 82 | 
 83 | #include "generic/ELU.c"
 84 | #include "THGenerateFloatTypes.h"
 85 | 
 86 | #include "generic/HardShrink.c"
 87 | #include "THGenerateFloatTypes.h"
 88 | 
 89 | #include "generic/HardTanh.c"
 90 | #include "THGenerateFloatTypes.h"
 91 | 
 92 | #include "generic/GatedLinearUnit.c"
 93 | #include "THGenerateFloatTypes.h"
 94 | 
 95 | #include "generic/L1Cost.c"
 96 | #include "THGenerateFloatTypes.h"
 97 | 
 98 | #include "generic/LeakyReLU.c"
 99 | #include "THGenerateFloatTypes.h"
100 | 
101 | #include "generic/FusedRNNKernel.c"
102 | #include "THGenerateFloatTypes.h"
103 | 
104 | #include "generic/LogSigmoid.c"
105 | #include "THGenerateFloatTypes.h"
106 | 
107 | #include "generic/LogSoftMax.c"
108 | #include "THGenerateFloatTypes.h"
109 | 
110 | #include "generic/LookupTable.c"
111 | #include "THGenerateFloatTypes.h"
112 | 
113 | #include "generic/MSECriterion.c"
114 | #include "THGenerateFloatTypes.h"
115 | 
116 | #include "generic/MarginCriterion.c"
117 | #include "THGenerateFloatTypes.h"
118 | 
119 | #include "generic/SoftMarginCriterion.c"
120 | #include "THGenerateFloatTypes.h"
121 | 
122 | #include "generic/MultiLabelMarginCriterion.c"
123 | #include "THGenerateFloatTypes.h"
124 | 
125 | #include "generic/MultiMarginCriterion.c"
126 | #include "THGenerateFloatTypes.h"
127 | 
128 | #include "generic/Linear.c"
129 | #include "THGenerateFloatTypes.h"
130 | 
131 | #include "generic/PReLU.c"
132 | #include "THGenerateFloatTypes.h"
133 | 
134 | #include "generic/RReLU.c"
135 | #include "THGenerateFloatTypes.h"
136 | 
137 | #include "generic/Sigmoid.c"
138 | #include "THGenerateFloatTypes.h"
139 | 
140 | #include "generic/SmoothL1Criterion.c"
141 | #include "THGenerateFloatTypes.h"
142 | 
143 | #include "generic/SoftMax.c"
144 | #include "THGenerateFloatTypes.h"
145 | 
146 | #include "generic/SoftPlus.c"
147 | #include "THGenerateFloatTypes.h"
148 | 
149 | #include "generic/SoftShrink.c"
150 | #include "THGenerateFloatTypes.h"
151 | 
152 | #include "generic/SparseLinear.c"
153 | #include "THGenerateFloatTypes.h"
154 | 
155 | #include "generic/IndexLinear.c"
156 | #include "THGenerateFloatTypes.h"
157 | 
158 | #include "generic/Sqrt.c"
159 | #include "THGenerateFloatTypes.h"
160 | 
161 | #include "generic/Square.c"
162 | #include "THGenerateFloatTypes.h"
163 | 
164 | #include "generic/Tanh.c"
165 | #include "THGenerateFloatTypes.h"
166 | 
167 | #include "generic/Threshold.c"
168 | #include "THGenerateFloatTypes.h"
169 | 
170 | #include "generic/TemporalConvolution.c"
171 | #include "THGenerateFloatTypes.h"
172 | 
173 | #include "generic/TemporalSubSampling.c"
174 | #include "THGenerateFloatTypes.h"
175 | 
176 | #include "generic/TemporalMaxPooling.c"
177 | #include "THGenerateFloatTypes.h"
178 | 
179 | #include "generic/TemporalRowConvolution.c"
180 | #include "THGenerateFloatTypes.h"
181 | 
182 | #include "generic/BatchNormalization.c"
183 | #include "THGenerateFloatTypes.h"
184 | 
185 | #include "generic/unfold.c"
186 | #include "THGenerateFloatTypes.h"
187 | 
188 | #include "generic/SpatialConvolutionMap.c"
189 | #include "THGenerateFloatTypes.h"
190 | 
191 | #include "generic/SpatialConvolutionMM.c"
192 | #include "THGenerateFloatTypes.h"
193 | 
194 | #include "generic/SpatialDepthWiseConvolution.c"
195 | #include "THGenerateFloatTypes.h"
196 | 
197 | #include "generic/SpatialConvolutionLocal.c"
198 | #include "THGenerateFloatTypes.h"
199 | 
200 | #include "generic/SpatialFullConvolution.c"
201 | #include "THGenerateFloatTypes.h"
202 | 
203 | #include "generic/SpatialFullConvolutionMap.c"
204 | #include "THGenerateFloatTypes.h"
205 | 
206 | #include "generic/SpatialDilatedConvolution.c"
207 | #include "THGenerateFloatTypes.h"
208 | 
209 | #include "generic/SpatialAdaptiveMaxPooling.c"
210 | #include "THGenerateFloatTypes.h"
211 | 
212 | #include "generic/SpatialAdaptiveAveragePooling.c"
213 | #include "THGenerateFloatTypes.h"
214 | 
215 | #include "generic/SpatialAveragePooling.c"
216 | #include "THGenerateFloatTypes.h"
217 | 
218 | #include "generic/SpatialFractionalMaxPooling.c"
219 | #include "THGenerateFloatTypes.h"
220 | 
221 | #include "generic/SpatialMaxPooling.c"
222 | #include "THGenerateFloatTypes.h"
223 | 
224 | #include "generic/SpatialDilatedMaxPooling.c"
225 | #include "THGenerateFloatTypes.h"
226 | 
227 | #include "generic/SpatialMaxUnpooling.c"
228 | #include "THGenerateFloatTypes.h"
229 | 
230 | #include "generic/SpatialSubSampling.c"
231 | #include "THGenerateFloatTypes.h"
232 | 
233 | #include "generic/SpatialUpSamplingNearest.c"
234 | #include "THGenerateFloatTypes.h"
235 | 
236 | #include "generic/SpatialUpSamplingBilinear.c"
237 | #include "THGenerateFloatTypes.h"
238 | 
239 | #include "generic/SpatialGridSamplerBilinear.c"
240 | #include "THGenerateFloatTypes.h"
241 | 
242 | #include "generic/VolumetricAveragePooling.c"
243 | #include "THGenerateFloatTypes.h"
244 | 
245 | #include "generic/VolumetricConvolution.c"
246 | #include "THGenerateFloatTypes.h"
247 | 
248 | #include "generic/VolumetricConvolutionMM.c"
249 | #include "THGenerateFloatTypes.h"
250 | 
251 | #include "generic/VolumetricFullConvolution.c"
252 | #include "THGenerateFloatTypes.h"
253 | 
254 | #include "generic/VolumetricDilatedConvolution.c"
255 | #include "THGenerateFloatTypes.h"
256 | 
257 | #include "generic/VolumetricMaxPooling.c"
258 | #include "THGenerateFloatTypes.h"
259 | 
260 | #include "generic/VolumetricDilatedMaxPooling.c"
261 | #include "THGenerateFloatTypes.h"
262 | 
263 | #include "generic/VolumetricFractionalMaxPooling.c"
264 | #include "THGenerateFloatTypes.h"
265 | 
266 | #include "generic/VolumetricMaxUnpooling.c"
267 | #include "THGenerateFloatTypes.h"
268 | 
269 | #include "generic/SpatialReflectionPadding.c"
270 | #include "THGenerateFloatTypes.h"
271 | 
272 | #include "generic/SpatialReplicationPadding.c"
273 | #include "THGenerateFloatTypes.h"
274 | 
275 | #include "generic/VolumetricReplicationPadding.c"
276 | #include "THGenerateFloatTypes.h"
277 | 
278 | #include "generic/VolumetricUpSamplingNearest.c"
279 | #include "THGenerateFloatTypes.h"
280 | 
281 | #include "generic/VolumetricUpSamplingTrilinear.c"
282 | #include "THGenerateFloatTypes.h"
283 | 
284 | #include "generic/SPELU.c"
285 | #include "THGenerateFloatTypes.h"
286 | 
287 | #include "generic/MPELU.c"
288 | #include "THGenerateFloatTypes.h"


--------------------------------------------------------------------------------
/torch/models/MPELU-NoPre-ResNet.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Coldmooon/Code-for-MPELU/46d137ab0e2cd568fbaf28d65a661f8d1b9fb163/torch/models/MPELU-NoPre-ResNet.jpg


--------------------------------------------------------------------------------