├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── example_prepare_resnet_cub200.sh ├── prepare-finetuning-batchscript.py ├── prepare-finetuning-vgg16.ipynb └── pretrained_models ├── caffe ├── download_caffe_model_link.txt └── train_val.prototxt ├── resnet50_cvgj ├── download_resnet50_cvgj_model_link.txt └── train_val.prototxt └── vgg16 ├── download_vgg16_model_link.txt └── train_val.prototxt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # caffe models 104 | *.caffemodel 105 | caffe_reference_imagenet_model 106 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "caffe_pp2"] 2 | path = caffe_pp2 3 | url = git@github.com:cvjena/caffe_pp2.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2017, Computer Vision Group Jena 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Alpha pooling for fine-grained recognition 2 | This repository contains code for our International Conference on Computer Vision publication ``[Generalized Orderless Pooling Performs Implicit Salient Matching](http://openaccess.thecvf.com/content_iccv_2017/html/Simon_Generalized_Orderless_Pooling_ICCV_2017_paper.html)''. It contains scripts for fine-tuning a pre-trained VGG16 model with our presented alpha-pooling approach. 3 | 4 | ## Abstract of the paper 5 | Most recent CNN architectures use average pooling as a final feature encoding step. In the field of fine-grained recognition, however, recent global representations like bilinear pooling offer improved performance. In this paper, we generalize average and bilinear pooling to "alpha-pooling", allowing for learning the pooling strategy during training. In addition, we present a novel way to visualize decisions made by these approaches. We identify parts of training images having the highest influence on the prediction of a given test image. This allows for justifying decisions to users and also for analyzing the influence of semantic parts. For example, we can show that the higher capacity VGG16 model focuses much more on the bird's head than, e.g., the lower-capacity VGG-M model when recognizing fine-grained bird categories. Both contributions allow us to analyze the difference when moving between average and bilinear pooling. In addition, experiments show that our generalized approach can outperform both across a variety of standard datasets. 6 | 7 | ## Getting started 8 | You need our custom caffe located at [https://github.com/cvjena/caffe_pp2](https://github.com/cvjena/caffe_pp2), which has our own SignedPowerLayer with learnable power as well as a [spatial transformer layer](https://github.com/daerduoCarey/SpatialTransformerLayer) used for on-the-fly image resizing and a [compact bilinear layer](https://github.com/gy20073/compact_bilinear_pooling) for computing the outer product in an efficient manner. Please clone and compile caffe_pp2 as well as its python interface. We use python 3 in all our experiments. 9 | 10 | ## Preparation of the dataset 11 | We use an ImageData layer in our experiments. This layer is required in order to use the scripts provided here. Hence you will need a list of train images and a list of test images. Each file should contain the path to the respective images relative to `--image_root` and the label as integer separated by comma. This means, the files should look like 12 | 13 | ``` 14 | /path/to/dataset/class1/image1.jpg 1 15 | /path/to/dataset/class1/image2.jpg 1 16 | /path/to/dataset/class2/image1.jpg 2 17 | /path/to/dataset/class2/image2.jpg 2 18 | ``` 19 | 20 | The path to these files is used in the following scripts and are called *train_imagelist* and *val_imagelist*. 21 | 22 | ## How to learn an alpha-pooling model 23 | We provide a batch script and an Jupyter notebook to prepare the fine-tuning. 24 | The usage of the batch script is described in the --help message: 25 | 26 | usage: prepare-finetuning-batchscript.py [-h] [--init_weights INIT_WEIGHTS] 27 | [--label LABEL] [--gpu_id GPU_ID] 28 | [--num_classes NUM_CLASSES] 29 | [--image_root IMAGE_ROOT] 30 | train_imagelist val_imagelist 31 | 32 | Prepare fine-tuning of multiscale alpha pooling. The working directory should 33 | contain train_val.prototxt of vgg16. The models will be created in the 34 | subfolders. 35 | 36 | positional arguments: 37 | train_imagelist Path to imagelist containing the training images. Each 38 | line should contain the path to an image followed by a 39 | space and the class ID. 40 | val_imagelist Path to imagelist containing the validation images. 41 | Each line should contain the path to an image followed 42 | by a space and the class ID. 43 | 44 | optional arguments: 45 | -h, --help show this help message and exit 46 | --init_weights INIT_WEIGHTS 47 | Path to the pre-trained vgg16 model 48 | --label LABEL Label of the created output folder 49 | --gpu_id GPU_ID ID of the GPU to use 50 | --num_classes NUM_CLASSES 51 | Number of object categories 52 | --image_root IMAGE_ROOT 53 | Image root folder, used to set the root_folder 54 | parameter of the ImageData layer of caffe. 55 | 56 | The explanation for the usage of the notebook is described in the comments of it. Please note that gamma in the scripts refers to alpha in the paper due to last minute renaming of the approach before submission. 57 | 58 | The script preprare the prototxt and solver for learning the model. In addition, they also learn the last classification layer already. After the preparation, you can fine-tune the network using the created ft.solver file in the finetuning subfolder. *Please note that our implementation only supports GPU computation, as the SignedPowerLayer in caffe_pp2 has only a GPU implementation at the moment.* 59 | 60 | 61 | ## How to learn another architecture 62 | The code shows the fine-tuning preparation for VGG16. If you want to learn another model, you will need a train_val.prototxt, which has two ImageData layers. It is probably the best to take your existing train_val.prototxt and replace your data layers with the ImageData layers of our VGG16 train_val.prototxt. Our script does not support LMDB or any other types of layers, but could be probably adapted for it. After these adjustments, you might also need to adjust the notebook or prepare-finetuning-batchscript.py, depending on what you are using. 63 | 64 | Feel free to try any other model, for example our caffe implementation of ResNet50 from https://github.com/cvjena/cnn-models/tree/master/ResNet_preact/ResNet50_cvgj 65 | 66 | ## Accuracy 67 | With VGG16 and a resolution of 224 and 560 pixels on the smaller side of the image, you should achieve the 85.3% top-1 accuracy reported in the paper. Complete list of results: 68 | 69 | |Dataset|CUB200-2011|Aircraft|40 actions-| 70 | |---|---|---|---| 71 | |classes / images| 200 / 12k | 89 / 10k |40 / 9.5k| 72 | Previous| 81.0% [24]| 72.5% [6]| 72.0% [36]| 73 | ||82.0% [17]| 78.0% [22] |80.9% [4]| 74 | ||84.5% [34] |80.7% [13]| 81.7% [22]| 75 | |Special case: bilinear [19] |84.1%| 84.1% |-| 76 | |Learned strategy (Ours)| 85.3% |85.5% |86.0%| 77 | 78 | Note: running the training longer the the predefined number of itertions leads to a higher accuracy and is necessary to reproduce the paper results. 79 | 80 | ## Citation 81 | Please cite the corresponding ICCV 2017 publication if our models helped your research: 82 | 83 | ``` 84 | @inproceedings{Simon17_GOP, 85 | title = {Generalized orderless pooling performs implicit salient matching}, 86 | booktitle = {International Conference on Computer Vision (ICCV)}, 87 | author = {Marcel Simon and Yang Gao and Trevor Darrell and Joachim Denzler and Erik Rodner}, 88 | year = {2017}, 89 | } 90 | ``` 91 | 92 | ### License and support 93 | The code is released under BSD 2-clause license allowing both academic and commercial use. I would appreciate if you give credit to this work by citing our paper in academic works and referencing to this Github repository in commercial works. If you need any support, please open an issue or contact [Marcel Simon](https://marcelsimon.com/). 94 | -------------------------------------------------------------------------------- /example_prepare_resnet_cub200.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python prepare-finetuning-batchscript.py \ 3 | --init_weights /path/to/pretrained_models/resnet50/ft_iter_320000.caffemodel \ 4 | --tag cub200 \ 5 | --gpu_id 0 \ 6 | --num_classes 201 \ 7 | --image_root /path/to/cub200/images/ \ 8 | --chop_off_layer last_relu \ 9 | --train_batch_size 8 \ 10 | --architecture resnet50 \ 11 | /path/to/cub200/train_images.txt \ 12 | /path/to/cub200/test_images.txt 13 | 14 | -------------------------------------------------------------------------------- /prepare-finetuning-batchscript.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import caffe 3 | import h5py 4 | import numpy as np 5 | import os 6 | import google.protobuf 7 | import google.protobuf.text_format 8 | import uuid 9 | import pyprind 10 | import argparse 11 | import random 12 | 13 | 14 | parser = argparse.ArgumentParser(description='Prepare fine-tuning of multiscale alpha pooling. The working directory should contain train_val.prototxt of vgg16. The models will be created in the subfolders.') 15 | parser.add_argument('train_imagelist', type=str, help='Path to imagelist containing the training images. Each line should contain the path to an image followed by a space and the class ID.') 16 | parser.add_argument('val_imagelist', type=str, help='Path to imagelist containing the validation images. Each line should contain the path to an image followed by a space and the class ID.') 17 | parser.add_argument('--init_weights', type=str, help='Path to the pre-trained vgg16 model', default='./pretrained_models/vgg16/vgg16_imagenet.caffemodel') 18 | parser.add_argument('--tag', type=str, help='Tag of the created output folder', default='nolabel') 19 | parser.add_argument('--gpu_id', type=int, help='ID of the GPU to use', default=0) 20 | parser.add_argument('--num_classes', type=int, help='Number of object categories', default=1000) 21 | parser.add_argument('--image_root', type=str, help='Image root folder, used to set the root_folder parameter of the ImageData layer of caffe.', default='/') 22 | parser.add_argument('--architecture', type=str, help='CNN architecture to use as basis. Should be a folder name present in the ./pretrained_models/ directory. Should contain a prepared train_val.prototxt.', default='vgg16') 23 | parser.add_argument('--chop_off_layer', type=str, help='Layer in the selected CNN architecture to compute the alpha pooling features from.', default='relu5_3') 24 | parser.add_argument('--train_batch_size', type=int, help='Batch size in training. Should be between 1 and 8, as we will use iter_size to achieve an effective batch size of 8. For network with batch norm, a batch size of 4 or greater is required to avoid divergence and 8 is recommended if you have enough GPU memory.', default=8) 25 | parser.add_argument('--resolutions', nargs='+', type=int, default=[224,560], help='The input size of the different multi-scale branches.') 26 | parser.add_argument('--crop_size', type=int, default=None, help='The crop size of the augmented input image. Should be at least as high as the maximum of --resolutions' ) 27 | parser.add_argument('--augmentation_resize', nargs=2, type=int, default=[560,640], help='Images are randomly resized before cropping to the minimal and maximal length of the smaller side. You can provide the minimal and maximal length here. Should be larger than --crop_size.') 28 | args = parser.parse_args() 29 | 30 | # Some other parameters, usually you don't need to change this 31 | initial_alpha = 2.0 32 | chop_off_layer = args.chop_off_layer 33 | resize_size = args.augmentation_resize 34 | if args.crop_size is None: 35 | crop_size = resize_size[0] 36 | else: 37 | crop_size = args.crop_size 38 | resolutions = args.resolutions 39 | prefix_template = 'res%i/' 40 | num_classes = args.num_classes 41 | init_weights = os.path.abspath(args.init_weights) 42 | 43 | caffe.set_device(args.gpu_id) 44 | caffe.set_mode_gpu() 45 | 46 | # Create parameter files 47 | # Net 48 | netparams_in = caffe.proto.caffe_pb2.NetParameter() 49 | protofile = os.getcwd() + '/pretrained_models/' + args.architecture +'/train_val.prototxt' 50 | google.protobuf.text_format.Merge(open(protofile).read(),netparams_in) 51 | 52 | # In[3]: 53 | 54 | # Change to working dir 55 | working_dir = 'finetuning/%s_%s_%s'%(args.architecture, args.tag, str(uuid.uuid4())) 56 | try: os.makedirs(working_dir) 57 | except: pass 58 | os.chdir(working_dir) 59 | 60 | assert(args.chop_off_layer in [l.name for l in netparams_in.layer]), 'Chop off layer not found. I can only find the layers {}'.format([l.name for l in netparams_in.layer]) 61 | 62 | # Prepare data layer 63 | lyr = netparams_in.layer 64 | lyr[0].image_data_param.source = args.train_imagelist 65 | lyr[0].image_data_param.root_folder = args.image_root 66 | lyr[0].image_data_param.batch_size = args.train_batch_size 67 | [lyr[0].image_data_param.smaller_side_size.append(0) for _ in range(2-len(lyr[0].image_data_param.smaller_side_size))] 68 | lyr[0].type = 'ImageData' 69 | 70 | lyr[1].image_data_param.source = args.val_imagelist 71 | lyr[1].image_data_param.root_folder = args.image_root 72 | lyr[1].image_data_param.batch_size = 1 73 | lyr[1].type = 'ImageData' 74 | 75 | # Write out init prototxt with correct paths for copying 76 | open('original.prototxt','w').write(google.protobuf.text_format.MessageToString(netparams_in)) 77 | 78 | lyr[0].transform_param.crop_size = crop_size 79 | lyr[0].image_data_param.smaller_side_size[0] = resize_size[0] 80 | lyr[0].image_data_param.smaller_side_size[1] = resize_size[1] 81 | 82 | lyr[1].transform_param.crop_size = crop_size 83 | [lyr[1].image_data_param.smaller_side_size.append(0) for _ in range(2-len(lyr[1].image_data_param.smaller_side_size))] 84 | lyr[1].image_data_param.smaller_side_size[0] = crop_size 85 | lyr[1].image_data_param.smaller_side_size[1] = crop_size 86 | 87 | # Add batch norm 88 | netparams = caffe.proto.caffe_pb2.NetParameter() 89 | netparams.name = netparams_in.name 90 | 91 | alpha_outputs = [] 92 | 93 | 94 | # Input layers 95 | for idx, l in enumerate(netparams_in.layer): 96 | if l.type in ['ImageData', 'Data']: 97 | netparams.layer.add() 98 | netparams.layer[-1].MergeFrom(l) 99 | 100 | for idx, l in enumerate(netparams_in.layer): 101 | if l.type in ['ImageData', 'Data']: 102 | netparams.layer.add() 103 | netparams.layer[-1].name = 'zeros' 104 | netparams.layer[-1].type = 'DummyData' 105 | netparams.layer[-1].top.append('zeros') 106 | netparams.layer[-1].dummy_data_param.shape.add() 107 | netparams.layer[-1].dummy_data_param.shape[0].dim.extend([l.image_data_param.batch_size,1]) 108 | netparams.layer[-1].include.add() 109 | netparams.layer[-1].include[0].phase = l.include[0].phase 110 | 111 | 112 | # In[9]: 113 | 114 | 115 | for res_idx, res in enumerate(resolutions): 116 | prefix = prefix_template%res 117 | netparams.layer.add() 118 | netparams.layer[-1].name = prefix + netparams_in.layer[0].top[0] 119 | netparams.layer[-1].type = 'SpatialTransformer' 120 | netparams.layer[-1].bottom.append(netparams_in.layer[0].top[0]) 121 | netparams.layer[-1].bottom.append('zeros') 122 | netparams.layer[-1].top.append(netparams.layer[-1].name) 123 | netparams.layer[-1].st_param.theta_1_1 = 1 124 | netparams.layer[-1].st_param.theta_1_2 = 0 125 | netparams.layer[-1].st_param.theta_1_3 = 0 126 | netparams.layer[-1].st_param.theta_2_1 = 0 127 | netparams.layer[-1].st_param.theta_2_2 = 1 128 | #netparams.layer[-1].st_param.theta_2_3 = 0 129 | netparams.layer[-1].st_param.to_compute_dU = False 130 | netparams.layer[-1].st_param.output_H = res; 131 | netparams.layer[-1].st_param.output_W = res; 132 | 133 | 134 | # In[10]: 135 | 136 | 137 | for res_idx, res in enumerate(resolutions): 138 | for idx, l in enumerate(netparams_in.layer): 139 | if l.type in ['ImageData', 'Data']: 140 | continue 141 | netparams.layer.add() 142 | netparams.layer[-1].MergeFrom(l) 143 | prefix = prefix_template%res 144 | netparams.layer[-1].name = prefix + netparams.layer[-1].name 145 | for i in range(len(l.top)): 146 | netparams.layer[-1].top[i] = prefix + netparams.layer[-1].top[i] 147 | for i in range(len(l.bottom)): 148 | netparams.layer[-1].bottom[i] = prefix + netparams.layer[-1].bottom[i] 149 | for param_idx, p in enumerate(netparams.layer[-1].param): 150 | p.name = '%s_param%i'%(l.name,param_idx) 151 | 152 | if l.name == chop_off_layer: 153 | break 154 | 155 | # Add alpha layer 156 | netparams.layer.add() 157 | netparams.layer[-1].name = prefix + 'alpha_power' 158 | netparams.layer[-1].type = 'SignedPower' 159 | netparams.layer[-1].bottom.append(netparams.layer[-2].top[0]) 160 | netparams.layer[-1].top.append(netparams.layer[-1].name) 161 | netparams.layer[-1].power_param.power = initial_alpha - 1 162 | netparams.layer[-1].param.add() 163 | netparams.layer[-1].param[0].name = 'alpha_power' 164 | netparams.layer[-1].param[0].lr_mult = 10 165 | netparams.layer[-1].param[0].decay_mult = 0 166 | 167 | # Add outer product layer 168 | netparams.layer.add() 169 | netparams.layer[-1].name = prefix + 'outer_product' 170 | netparams.layer[-1].type = 'CompactBilinear' 171 | netparams.layer[-1].bottom.append(netparams.layer[-3].top[0]) 172 | netparams.layer[-1].bottom.append(netparams.layer[-2].top[0]) 173 | netparams.layer[-1].top.append(netparams.layer[-1].name) 174 | netparams.layer[-1].compact_bilinear_param.num_output = 8192 175 | 176 | alpha_outputs.append(netparams.layer[-1].top[0]) 177 | 178 | 179 | # In[11]: 180 | 181 | 182 | if len(alpha_outputs)>1: 183 | netparams.layer.add() 184 | netparams.layer[-1].name = 'sum' 185 | netparams.layer[-1].type = 'Eltwise' 186 | for alpha_out in alpha_outputs: 187 | netparams.layer[-1].bottom.append(alpha_out) 188 | netparams.layer[-1].top.append(netparams.layer[-1].name) 189 | 190 | if True: 191 | netparams.layer.add() 192 | netparams.layer[-1].name = 'root' 193 | netparams.layer[-1].type = 'SignedPower' 194 | netparams.layer[-1].bottom.append(netparams.layer[-2].name) 195 | netparams.layer[-1].top.append(netparams.layer[-1].name) 196 | netparams.layer[-1].power_param.power = 0.5 #1.0 / (gamma) 197 | netparams.layer[-1].param.add() 198 | netparams.layer[-1].param[0].lr_mult = 0 199 | netparams.layer[-1].param[0].decay_mult = 0 200 | 201 | if False: 202 | # Add reshape for global bn 203 | netparams.layer.add() 204 | netparams.layer[-1].name = 'final_dropout' 205 | netparams.layer[-1].type = 'Dropout' 206 | netparams.layer[-1].bottom.append(netparams.layer[-2].top[0]) 207 | netparams.layer[-1].top.append(netparams.layer[-1].name) 208 | netparams.layer[-1].dropout_param.dropout_ratio = 0.5 209 | 210 | if True: 211 | netparams.layer.add() 212 | netparams.layer[-1].name = 'l2' 213 | netparams.layer[-1].type = 'L2Normalize' 214 | netparams.layer[-1].bottom.append(netparams.layer[-2].top[0]) 215 | netparams.layer[-1].top.append(netparams.layer[-1].name) 216 | 217 | # fc8 218 | netparams.layer.add() 219 | netparams.layer[-1].name = 'fc8_ft' 220 | netparams.layer[-1].type = 'InnerProduct' 221 | netparams.layer[-1].bottom.append(netparams.layer[-2].top[0]) 222 | netparams.layer[-1].top.append(netparams.layer[-1].name) 223 | netparams.layer[-1].inner_product_param.num_output = num_classes 224 | [netparams.layer[-1].param.add() for _ in range(2)] 225 | netparams.layer[-1].param[0].lr_mult = 1 226 | netparams.layer[-1].param[0].decay_mult = 1 227 | netparams.layer[-1].param[1].lr_mult = 2 228 | netparams.layer[-1].param[1].decay_mult = 2 229 | 230 | # Accuracy 231 | netparams.layer.add() 232 | netparams.layer[-1].name = 'loss' 233 | netparams.layer[-1].type = 'SoftmaxWithLoss' 234 | netparams.layer[-1].bottom.append(netparams.layer[-2].top[0]) 235 | netparams.layer[-1].bottom.append('label') 236 | netparams.layer[-1].top.append(netparams.layer[-1].name) 237 | 238 | # Softmax 239 | netparams.layer.add() 240 | netparams.layer[-1].name = 'Accuracy' 241 | netparams.layer[-1].type = 'Accuracy' 242 | netparams.layer[-1].bottom.append(netparams.layer[-3].top[0]) 243 | netparams.layer[-1].bottom.append('label') 244 | netparams.layer[-1].top.append(netparams.layer[-1].name) 245 | netparams.layer[-1].include.add() 246 | netparams.layer[-1].include[0].phase = 1 247 | 248 | for l in netparams.layer: 249 | if l.type == 'BatchNorm': 250 | #l.batch_norm_param.use_global_mean_in_training = False 251 | l.batch_norm_param.moving_average_fraction = 0.997 252 | 253 | num_images = [len([None for _ in open(netparams.layer[i].image_data_param.source,'r')]) for i in [0,1]] 254 | iter_per_epoch = int(num_images[0]/32) 255 | assert iter_per_epoch>0 256 | 257 | # Solver 258 | solverfile = 'ft.solver' 259 | params = caffe.proto.caffe_pb2.SolverParameter() 260 | params.net = u'ft.prototxt' 261 | params.test_iter.append(int(len([None for _ in open(netparams.layer[1].image_data_param.source,'rt')]) / lyr[1].image_data_param.batch_size)) 262 | params.test_interval = 10000 263 | params.test_initialization = True 264 | params.base_lr = 0.001 265 | params.display = 100 266 | params.max_iter = 200 * iter_per_epoch 267 | params.lr_policy = "fixed" 268 | params.power = 1 269 | #params.stepsize = 100000 270 | #params.gamma = 0.1 271 | #params.momentum = 0.9 272 | params.weight_decay = 0.0005 273 | params.snapshot = 10000 274 | #params.random_seed = 0 275 | params.snapshot_prefix = "ft" 276 | params.net = "ft.prototxt" 277 | params.iter_size = int(8/lyr[0].image_data_param.batch_size) 278 | #params.type = "Nesterov" 279 | assert params.iter_size > 0 280 | open(solverfile,'w').write(google.protobuf.text_format.MessageToString(params)) 281 | open(params.net,'w').write(google.protobuf.text_format.MessageToString(netparams)) 282 | 283 | net_origin = caffe.Net('original.prototxt', init_weights, caffe.TEST) 284 | net_target = caffe.Net('ft.prototxt',caffe.TEST) 285 | 286 | for origin_param in net_origin.params.keys(): 287 | for res in resolutions: 288 | prefix = prefix_template%res 289 | target_param = prefix + origin_param 290 | if target_param in net_target.params: 291 | for idx in range(len(net_origin.params[origin_param])): 292 | #print('Copying %s[%i] to %s[%i]'%(origin_param, idx, target_param, idx)) 293 | net_target.params[target_param][idx].data[...] = net_origin.params[origin_param][idx].data 294 | 295 | net_target.save('model_init') 296 | del net_origin 297 | del net_target 298 | 299 | 300 | #Calc the features 301 | def calc_features(net, n_images, blobs): 302 | n_images = int(0.6*n_images) 303 | batchsize = net.blobs['data'].data.shape[0] 304 | feats = dict() 305 | for blob in blobs: 306 | out_shape = list(net.blobs[blob].data.shape) 307 | out_shape[0] = n_images 308 | print('Will allocate {:.2f} GiB of memory'.format(np.prod(out_shape)*2/1024/1024/1024)) 309 | feats[blob] = np.zeros(tuple(out_shape),dtype=np.float16 if not blob=='label' else np.int32) 310 | print('Need %.3f GiB'%(np.sum([x.nbytes for x in feats.values()])/1024/1024/1024)) 311 | 312 | for it in pyprind.prog_bar(range(0,n_images,batchsize),update_interval=10, stream=sys.stderr): 313 | net.forward() 314 | for blob in blobs: 315 | feats[blob][it:it+batchsize,...] = net.blobs[blob].data[:feats[blob][it:it+batchsize,...].shape[0],...] 316 | 317 | return [feats[blob] for blob in blobs] 318 | 319 | last_blob = [l.bottom[0] for l in netparams.layer if l.type == 'InnerProduct'][-1] 320 | 321 | solver = caffe.get_solver('ft.solver') 322 | solver.net.copy_from('model_init') 323 | train_feats,train_labels = calc_features(solver.net,num_images[0],[last_blob,'label']) 324 | del solver 325 | 326 | try: 327 | f = h5py.File('features.h5', "w") 328 | dset = f.create_dataset("feats", train_feats.shape, dtype='float16', compression="gzip", compression_opts=1) 329 | dset[...] = train_feats 330 | dset = f.create_dataset("labels", train_labels.shape, dtype='int32', compression="gzip", compression_opts=1) 331 | dset[...] = train_labels 332 | f.close() 333 | except e: 334 | pass 335 | 336 | 337 | 338 | netparams_fixed = caffe.proto.caffe_pb2.NetParameter() 339 | netparams_fixed.layer.add() 340 | netparams_fixed.layer[-1].name = 'data' 341 | netparams_fixed.layer[-1].type = 'Input' 342 | netparams_fixed.layer[-1].top.append(last_blob) 343 | netparams_fixed.layer[-1].input_param.shape.add() 344 | netparams_fixed.layer[-1].input_param.shape[0].dim.extend((32,) + train_feats.shape[1:]) 345 | 346 | netparams_fixed.layer.add() 347 | netparams_fixed.layer[-1].name = 'label' 348 | netparams_fixed.layer[-1].type = 'Input' 349 | netparams_fixed.layer[-1].top.append('label') 350 | netparams_fixed.layer[-1].input_param.shape.add() 351 | netparams_fixed.layer[-1].input_param.shape[0].dim.extend((32,)) 352 | # Add all layers after fc8 353 | approached_fc8 = False 354 | for l in netparams.layer: 355 | if l.name == 'fc8_ft': 356 | l.param[0].lr_mult = 1 357 | l.param[0].decay_mult = 1 358 | l.param[1].lr_mult = 1 359 | l.param[1].decay_mult = 1 360 | l.inner_product_param.weight_filler.std = 0.0001 361 | l.inner_product_param.bias_filler.value = 0 362 | approached_fc8 = approached_fc8 or l.name == 'fc8_ft' 363 | if approached_fc8: 364 | netparams_fixed.layer.add() 365 | netparams_fixed.layer[-1].MergeFrom(l) 366 | 367 | 368 | # In[42]: 369 | iter_per_epoch = int(iter_per_epoch) 370 | # Solver 371 | solverfile = 'ft_fixed.solver' 372 | params = caffe.proto.caffe_pb2.SolverParameter() 373 | params.net = u'ft_fixed.prototxt' 374 | #params.test_iter.append(1450) 375 | #params.test_interval = 1000 376 | params.test_initialization = False 377 | params.base_lr = 1 378 | params.display = 100 379 | params.max_iter = 360 * iter_per_epoch 380 | params.lr_policy = "multistep" 381 | params.stepvalue.extend([ep * iter_per_epoch for ep in [120,180,240,300]]) 382 | #params.power = 1 383 | #params.stepsize = 100000 384 | params.gamma = 0.25 385 | params.momentum = 0.9 386 | params.weight_decay = 0.000005 387 | params.snapshot = 10000000 388 | #params.random_seed = 0 389 | params.snapshot_prefix = "ft_fixed" 390 | params.iter_size = 1 391 | assert params.iter_size > 0 392 | open(solverfile,'w').write(google.protobuf.text_format.MessageToString(params)) 393 | open(params.net,'w').write(google.protobuf.text_format.MessageToString(netparams_fixed)) 394 | 395 | solver = caffe.get_solver('ft_fixed.solver') 396 | 397 | # Train 398 | for it in pyprind.prog_bar(range(params.max_iter), stream=sys.stderr): 399 | train_ids = random.sample(range(train_feats.shape[0]),32) 400 | solver.net.blobs[last_blob].data[...] = train_feats[train_ids,...] 401 | solver.net.blobs['label'].data[...] = train_labels[train_ids] 402 | solver.step(1) 403 | 404 | solver.net.save('model_lr') 405 | del solver 406 | 407 | solver = caffe.get_solver('ft.solver') 408 | solver.net.copy_from('model_init') 409 | solver.net.copy_from('model_lr') 410 | solver.net.save('model_lr') 411 | -------------------------------------------------------------------------------- /prepare-finetuning-vgg16.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Fine-tuning an $\\alpha$-pooling model" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "We use a custom caffe framework, which implements a SignedPowerLayer. Please make sure to clone and make it before using this script and add the path to caffe in the following box." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "%load_ext autoreload\n", 24 | "%autoreload 2\n", 25 | "import sys\n", 26 | "from sklearn.svm import LinearSVC\n", 27 | "from sklearn.linear_model import LogisticRegression\n", 28 | "sys.path.append('/home/simon/Research/lib/caffe/python')\n", 29 | "sys.path.append('/home/simon/Research/finegrained/src/part_model_layer/part_autoencoder')\n", 30 | "sys.path.append('/home/simon/Research/generic/src/bilinear_logm/')\n", 31 | "\n", 32 | "import caffe\n", 33 | "import scipy.misc\n", 34 | "import h5py\n", 35 | "import scipy.io\n", 36 | "import matplotlib.pyplot as plt\n", 37 | "import numpy as np\n", 38 | "import glob\n", 39 | "import time\n", 40 | "%matplotlib inline \n", 41 | "import os\n", 42 | "import matplotlib\n", 43 | "from sklearn.metrics import confusion_matrix\n", 44 | "import google.protobuf\n", 45 | "import uuid\n", 46 | "import pyprind\n", 47 | "import random\n", 48 | "import google.protobuf.text_format\n", 49 | "caffe.set_device(0)\n", 50 | "caffe.set_mode_gpu()" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "The following box contains most things you might want to adjust" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "# Initial value for alpha, called gamma in this file\n", 67 | "gamma = 2.0\n", 68 | "chop_off_layer = 'relu5_3'\n", 69 | "# Resize images to this size before cropping for data augmentation\n", 70 | "resize_size = 640\n", 71 | "# Actual crop size\n", 72 | "crop_size = 560\n", 73 | "# The resolutions to extract alpha-pooling features from\n", 74 | "resolutions = [224,560]\n", 75 | "prefix_template = 'res%i/'\n", 76 | "# Number of object classes\n", 77 | "num_classes = 201\n", 78 | "init_model = './vgg16-training/vgg16_imagenet.caffemodel'" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "# Create parameter files\n", 88 | "# Net\n", 89 | "netparams_in = caffe.proto.caffe_pb2.NetParameter()\n", 90 | "protofile = './vgg16-training/train_val.prototxt'\n", 91 | "google.protobuf.text_format.Merge(open(protofile).read(),netparams_in)\n", 92 | "\n", 93 | "# Solver\n", 94 | "params = caffe.proto.caffe_pb2.SolverParameter()" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "# Change to working dir\n", 104 | "working_dir = 'finetuning/finetuning_%s'%(str(uuid.uuid4()))\n", 105 | "try: os.makedirs(working_dir) \n", 106 | "except: pass\n", 107 | "os.chdir(working_dir)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "### Add second branch" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "In this section, we take a prepared prototxt and adjust it for our needs. You might want to adjust the path to the image data here." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "# Prepare data layer\n", 131 | "lyr = netparams_in.layer\n", 132 | "lyr[0].image_data_param.source = '/home/simon/Datasets/CUB_200_2011/train_images.txt'\n", 133 | "lyr[0].image_data_param.root_folder = '/home/simon/Datasets/CUB_200_2011/images/'\n", 134 | "lyr[0].image_data_param.batch_size = 8\n", 135 | "lyr[0].image_data_param.smaller_side_size[0] = resize_size\n", 136 | "#lyr[0].image_data_param.smaller_side_size[1] = crop_size\n", 137 | "lyr[0].transform_param.crop_size = crop_size\n", 138 | "lyr[0].type = 'ImageData'\n", 139 | "\n", 140 | "lyr[1].image_data_param.source = '/home/simon/Datasets/CUB_200_2011/test_images.txt'\n", 141 | "lyr[1].image_data_param.root_folder = '/home/simon/Datasets/CUB_200_2011/images/'\n", 142 | "lyr[1].image_data_param.batch_size = 1\n", 143 | "lyr[1].image_data_param.smaller_side_size[0] = resize_size\n", 144 | "#lyr[1].image_data_param.smaller_side_size[1] = crop_size\n", 145 | "lyr[1].transform_param.crop_size = crop_size\n", 146 | "lyr[1].type = 'ImageData'" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "# Add batch norm\n", 156 | "netparams = caffe.proto.caffe_pb2.NetParameter()\n", 157 | "netparams.name = netparams_in.name\n", 158 | "\n", 159 | "bilinear_outputs = []" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# Input layers\n", 169 | "for idx, l in enumerate(netparams_in.layer):\n", 170 | " if l.type in ['ImageData', 'Data']:\n", 171 | " netparams.layer.add()\n", 172 | " netparams.layer[-1].MergeFrom(l)\n", 173 | "\n", 174 | "for idx, l in enumerate(netparams_in.layer):\n", 175 | " if l.type in ['ImageData', 'Data']:\n", 176 | " netparams.layer.add()\n", 177 | " netparams.layer[-1].name = 'zeros'\n", 178 | " netparams.layer[-1].type = 'DummyData'\n", 179 | " netparams.layer[-1].top.append('zeros')\n", 180 | " netparams.layer[-1].dummy_data_param.shape.add()\n", 181 | " netparams.layer[-1].dummy_data_param.shape[0].dim.extend([l.image_data_param.batch_size,1])\n", 182 | " netparams.layer[-1].include.add()\n", 183 | " netparams.layer[-1].include[0].phase = l.include[0].phase" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "# Resize layers\n", 193 | "for res_idx, res in enumerate(resolutions):\n", 194 | " prefix = prefix_template%res \n", 195 | " netparams.layer.add()\n", 196 | " netparams.layer[-1].name = prefix + netparams_in.layer[0].top[0]\n", 197 | " netparams.layer[-1].type = 'SpatialTransformer'\n", 198 | " netparams.layer[-1].bottom.append(netparams_in.layer[0].top[0])\n", 199 | " netparams.layer[-1].bottom.append('zeros')\n", 200 | " netparams.layer[-1].top.append(netparams.layer[-1].name)\n", 201 | " netparams.layer[-1].st_param.theta_1_1 = 1\n", 202 | " netparams.layer[-1].st_param.theta_1_2 = 0\n", 203 | " netparams.layer[-1].st_param.theta_1_3 = 0\n", 204 | " netparams.layer[-1].st_param.theta_2_1 = 0\n", 205 | " netparams.layer[-1].st_param.theta_2_2 = 1\n", 206 | " #netparams.layer[-1].st_param.theta_2_3 = 0\n", 207 | " netparams.layer[-1].st_param.to_compute_dU = False\n", 208 | " netparams.layer[-1].st_param.output_H = res;\n", 209 | " netparams.layer[-1].st_param.output_W = res;" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "# for each resolution\n", 219 | "for res_idx, res in enumerate(resolutions):\n", 220 | " # Add all layers before chop_off\n", 221 | " for idx, l in enumerate(netparams_in.layer):\n", 222 | " if l.type in ['ImageData', 'Data']:\n", 223 | " continue\n", 224 | " netparams.layer.add()\n", 225 | " netparams.layer[-1].MergeFrom(l)\n", 226 | " prefix = prefix_template%res \n", 227 | " netparams.layer[-1].name = prefix + netparams.layer[-1].name \n", 228 | " for i in range(len(l.top)):\n", 229 | " netparams.layer[-1].top[i] = prefix + netparams.layer[-1].top[i]\n", 230 | " for i in range(len(l.bottom)):\n", 231 | " netparams.layer[-1].bottom[i] = prefix + netparams.layer[-1].bottom[i]\n", 232 | " for param_idx, p in enumerate(netparams.layer[-1].param):\n", 233 | " p.name = '%s_param%i'%(l.name,param_idx)\n", 234 | "\n", 235 | " if l.name == chop_off_layer:\n", 236 | " break\n", 237 | "\n", 238 | " # Add gamma layer\n", 239 | " netparams.layer.add()\n", 240 | " netparams.layer[-1].name = prefix + 'gamma_power'\n", 241 | " netparams.layer[-1].type = 'SignedPower'\n", 242 | " netparams.layer[-1].bottom.append(netparams.layer[-2].top[0])\n", 243 | " netparams.layer[-1].top.append(netparams.layer[-1].name)\n", 244 | " netparams.layer[-1].power_param.power = gamma - 1\n", 245 | " netparams.layer[-1].param.add()\n", 246 | " netparams.layer[-1].param[0].name = 'gamma_power'\n", 247 | " netparams.layer[-1].param[0].lr_mult = 10\n", 248 | " netparams.layer[-1].param[0].decay_mult = 0\n", 249 | "\n", 250 | " # Add bilinear layers \n", 251 | " netparams.layer.add()\n", 252 | " netparams.layer[-1].name = prefix + 'bilinear'\n", 253 | " netparams.layer[-1].type = 'CompactBilinear'\n", 254 | " netparams.layer[-1].bottom.append(netparams.layer[-3].top[0])\n", 255 | " netparams.layer[-1].bottom.append(netparams.layer[-2].top[0])\n", 256 | " netparams.layer[-1].top.append(netparams.layer[-1].name)\n", 257 | " netparams.layer[-1].compact_bilinear_param.num_output = 8192\n", 258 | "\n", 259 | " bilinear_outputs.append(netparams.layer[-1].top[0])" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "# Normalization layers\n", 269 | "if len(bilinear_outputs)>1:\n", 270 | " netparams.layer.add()\n", 271 | " netparams.layer[-1].name = 'bilinear_sum'\n", 272 | " netparams.layer[-1].type = 'Eltwise'\n", 273 | " for bi_out in bilinear_outputs:\n", 274 | " netparams.layer[-1].bottom.append(bi_out)\n", 275 | " netparams.layer[-1].top.append(netparams.layer[-1].name)\n", 276 | "\n", 277 | "if True:\n", 278 | " netparams.layer.add()\n", 279 | " netparams.layer[-1].name = 'bilinear_gamma_root'\n", 280 | " netparams.layer[-1].type = 'SignedPower'\n", 281 | " netparams.layer[-1].bottom.append(netparams.layer[-2].name)\n", 282 | " netparams.layer[-1].top.append(netparams.layer[-1].name)\n", 283 | " netparams.layer[-1].power_param.power = 0.5 #1.0 / (gamma)\n", 284 | " netparams.layer[-1].param.add()\n", 285 | " netparams.layer[-1].param[0].lr_mult = 0\n", 286 | " netparams.layer[-1].param[0].decay_mult = 0\n", 287 | "\n", 288 | "if True:\n", 289 | " netparams.layer.add()\n", 290 | " netparams.layer[-1].name = 'bilinear_l2'\n", 291 | " netparams.layer[-1].type = 'L2Normalize'\n", 292 | " netparams.layer[-1].bottom.append(netparams.layer[-2].top[0])\n", 293 | " netparams.layer[-1].top.append(netparams.layer[-1].name)\n", 294 | "\n", 295 | "# fc8\n", 296 | "netparams.layer.add()\n", 297 | "netparams.layer[-1].name = 'fc8_ft'\n", 298 | "netparams.layer[-1].type = 'InnerProduct'\n", 299 | "netparams.layer[-1].bottom.append(netparams.layer[-2].top[0])\n", 300 | "netparams.layer[-1].top.append(netparams.layer[-1].name) \n", 301 | "netparams.layer[-1].inner_product_param.num_output = num_classes\n", 302 | "[netparams.layer[-1].param.add() for _ in range(2)]\n", 303 | "netparams.layer[-1].param[0].lr_mult = 1\n", 304 | "netparams.layer[-1].param[0].decay_mult = 1\n", 305 | "netparams.layer[-1].param[1].lr_mult = 2\n", 306 | "netparams.layer[-1].param[1].decay_mult = 2\n", 307 | "\n", 308 | "# Accuracy\n", 309 | "netparams.layer.add()\n", 310 | "netparams.layer[-1].name = 'loss'\n", 311 | "netparams.layer[-1].type = 'SoftmaxWithLoss'\n", 312 | "netparams.layer[-1].bottom.append(netparams.layer[-2].top[0])\n", 313 | "netparams.layer[-1].bottom.append('label')\n", 314 | "netparams.layer[-1].top.append(netparams.layer[-1].name) \n", 315 | "\n", 316 | "# Softmax\n", 317 | "netparams.layer.add()\n", 318 | "netparams.layer[-1].name = 'Accuracy'\n", 319 | "netparams.layer[-1].type = 'Accuracy'\n", 320 | "netparams.layer[-1].bottom.append(netparams.layer[-3].top[0])\n", 321 | "netparams.layer[-1].bottom.append('label')\n", 322 | "netparams.layer[-1].top.append(netparams.layer[-1].name) \n", 323 | "netparams.layer[-1].include.add()\n", 324 | "netparams.layer[-1].include[0].phase = 1" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "# Learning rates and decays and so on\n", 334 | "for l in netparams.layer:\n", 335 | " if l.type in ['InnerProduct','Convolution','Scale']:\n", 336 | " [l.param.add() for _ in range(2 - len(l.param))]\n", 337 | " l.param[0].lr_mult = 1\n", 338 | " l.param[0].decay_mult = 1\n", 339 | " l.param[1].lr_mult = 2\n", 340 | " l.param[1].decay_mult = 2\n", 341 | " if l.type in ['InnerProduct']:\n", 342 | " l.inner_product_param.weight_filler.type = \"gaussian\"\n", 343 | " l.inner_product_param.weight_filler.ClearField('std')\n", 344 | " l.inner_product_param.weight_filler.std = 0.01\n", 345 | " l.inner_product_param.bias_filler.type = \"constant\"\n", 346 | " l.inner_product_param.bias_filler.value = 0.0\n", 347 | " if l.name in ['fc8_ft']:\n", 348 | " l.inner_product_param.weight_filler.type = \"gaussian\"\n", 349 | " l.inner_product_param.weight_filler.std = 0.000000001\n", 350 | " l.inner_product_param.bias_filler.type = \"constant\"\n", 351 | " l.inner_product_param.bias_filler.value = 0.01\n", 352 | " if l.type in ['Convolution']:\n", 353 | " l.convolution_param.weight_filler.type = \"gaussian\"\n", 354 | " l.convolution_param.weight_filler.ClearField('std')\n", 355 | " l.inner_product_param.weight_filler.std = 0.01\n", 356 | " l.convolution_param.bias_filler.type = \"constant\"\n", 357 | " l.convolution_param.bias_filler.value = 0.0\n", 358 | " if l.type == \"BatchNorm\":\n", 359 | " l.param[0].lr_mult = 0\n", 360 | " l.param[1].lr_mult = 0\n", 361 | " l.param[2].lr_mult = 0\n", 362 | " l.batch_norm_param.ClearField('use_global_stats')\n", 363 | "# if l.name in ['fc6','fc7']:\n", 364 | "# l.inner_product_param.num_output = 2048" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "# Solver for fine-tuning\n", 374 | "solverfile = 'ft.solver'\n", 375 | "params = caffe.proto.caffe_pb2.SolverParameter()\n", 376 | "params.net = u'ft.prototxt'\n", 377 | "params.test_iter.append(int(len([None for _ in open(netparams.layer[1].image_data_param.source,'rt')]) / lyr[0].image_data_param.batch_size))\n", 378 | "params.test_interval = 10000\n", 379 | "params.test_initialization = True\n", 380 | "params.base_lr = 0.001\n", 381 | "params.display = 100\n", 382 | "params.max_iter = 1000000\n", 383 | "params.lr_policy = \"fixed\"\n", 384 | "params.power = 1\n", 385 | "#params.stepsize = 100000\n", 386 | "#params.gamma = 0.1\n", 387 | "#params.momentum = 0.9\n", 388 | "params.weight_decay = 0.0005\n", 389 | "params.snapshot = 10000\n", 390 | "#params.random_seed = 0\n", 391 | "params.snapshot_prefix = \"ft\"\n", 392 | "params.net = \"ft.prototxt\"\n", 393 | "params.iter_size = int(8/lyr[0].image_data_param.batch_size)\n", 394 | "#params.type = \"Nesterov\"\n", 395 | "assert params.iter_size > 0\n", 396 | "open(solverfile,'w').write(google.protobuf.text_format.MessageToString(params))\n", 397 | "open(params.net,'w').write(google.protobuf.text_format.MessageToString(netparams))" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "Copy the weights from the pre-trained model" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [ 413 | "os.getcwd()" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": null, 419 | "metadata": {}, 420 | "outputs": [], 421 | "source": [ 422 | "net_origin = caffe.Net('../../'+protofile, '../../'+init_model, caffe.TEST)" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [ 431 | "net_target = caffe.Net('ft.prototxt',caffe.TEST)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": null, 437 | "metadata": { 438 | "scrolled": true 439 | }, 440 | "outputs": [], 441 | "source": [ 442 | "for origin_param in net_origin.params.keys():\n", 443 | " for res in resolutions:\n", 444 | " prefix = prefix_template%res\n", 445 | " target_param = prefix + origin_param\n", 446 | " if target_param in net_target.params:\n", 447 | " for idx in range(len(net_origin.params[origin_param])):\n", 448 | " #print('Copying %s[%i] to %s[%i]'%(origin_param, idx, target_param, idx))\n", 449 | " net_target.params[target_param][idx].data[...] = net_origin.params[origin_param][idx].data" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": null, 455 | "metadata": {}, 456 | "outputs": [], 457 | "source": [ 458 | "if False: net_target.copy_from(init_model)" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": null, 464 | "metadata": {}, 465 | "outputs": [], 466 | "source": [ 467 | "net_target.save('model_init')" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [ 476 | "del net_origin\n", 477 | "del net_target" 478 | ] 479 | }, 480 | { 481 | "cell_type": "markdown", 482 | "metadata": {}, 483 | "source": [ 484 | "### Caffe LR init" 485 | ] 486 | }, 487 | { 488 | "cell_type": "markdown", 489 | "metadata": {}, 490 | "source": [ 491 | "To speed everything up, we calculate features for each image and learn only the classifier with it" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": null, 497 | "metadata": {}, 498 | "outputs": [], 499 | "source": [ 500 | "#Calc the features\n", 501 | "def calc_features(net, n_images, blobs):\n", 502 | " batchsize = net.blobs['data'].data.shape[0]\n", 503 | " feats = dict()\n", 504 | " for blob in blobs:\n", 505 | " out_shape = list(net.blobs[blob].data.shape)\n", 506 | " out_shape[0] = n_images\n", 507 | " feats[blob] = np.zeros(tuple(out_shape),dtype=np.float16 if not blob=='label' else np.int32)\n", 508 | " print('Need %.3f GiB'%(np.sum([x.nbytes for x in feats.values()])/1024/1024/1024))\n", 509 | " \n", 510 | " for it in pyprind.prog_bar(range(0,n_images,batchsize),update_interval=10):\n", 511 | " net.forward()\n", 512 | " for blob in blobs:\n", 513 | " feats[blob][it:it+batchsize,...] = net.blobs[blob].data[:feats[blob][it:it+batchsize,...].shape[0],...]\n", 514 | " \n", 515 | " return [feats[blob] for blob in blobs]" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": {}, 522 | "outputs": [], 523 | "source": [ 524 | "num_images = [len([None for _ in open(netparams.layer[i].image_data_param.source,'r')]) for i in [0,1]]" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": null, 530 | "metadata": {}, 531 | "outputs": [], 532 | "source": [ 533 | "last_blob = [l.bottom[0] for l in netparams.layer if l.type == 'InnerProduct'][-1]\n", 534 | "last_blob" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": null, 540 | "metadata": { 541 | "scrolled": true 542 | }, 543 | "outputs": [], 544 | "source": [ 545 | "solver = caffe.get_solver('ft.solver')\n", 546 | "solver.net.copy_from('model_init')\n", 547 | "train_feats,train_labels = calc_features(solver.net,num_images[0],[last_blob,'label'])\n", 548 | "del solver" 549 | ] 550 | }, 551 | { 552 | "cell_type": "code", 553 | "execution_count": null, 554 | "metadata": {}, 555 | "outputs": [], 556 | "source": [ 557 | "if False:\n", 558 | " solver = caffe.get_solver('ft.solver')\n", 559 | " solver.test_nets[0].copy_from('model_init')\n", 560 | " val_feats,val_labels = calc_features(solver.test_nets[0],num_images[1],[last_blob, 'label'])\n", 561 | " del solver.test_nets[0]\n", 562 | " del solver" 563 | ] 564 | }, 565 | { 566 | "cell_type": "code", 567 | "execution_count": null, 568 | "metadata": {}, 569 | "outputs": [], 570 | "source": [ 571 | "netparams_fixed = caffe.proto.caffe_pb2.NetParameter()\n", 572 | "netparams_fixed.layer.add()\n", 573 | "netparams_fixed.layer[-1].name = 'data'\n", 574 | "netparams_fixed.layer[-1].type = 'Input'\n", 575 | "netparams_fixed.layer[-1].top.append(last_blob)\n", 576 | "netparams_fixed.layer[-1].input_param.shape.add()\n", 577 | "netparams_fixed.layer[-1].input_param.shape[0].dim.extend((32,) + train_feats.shape[1:])\n", 578 | "\n", 579 | "netparams_fixed.layer.add()\n", 580 | "netparams_fixed.layer[-1].name = 'label'\n", 581 | "netparams_fixed.layer[-1].type = 'Input'\n", 582 | "netparams_fixed.layer[-1].top.append('label')\n", 583 | "netparams_fixed.layer[-1].input_param.shape.add()\n", 584 | "netparams_fixed.layer[-1].input_param.shape[0].dim.extend((32,))\n", 585 | "# Add all layers after fc8\n", 586 | "approached_fc8 = False\n", 587 | "for l in netparams.layer:\n", 588 | " if l.name == 'fc8_ft':\n", 589 | " l.param[0].lr_mult = 1\n", 590 | " l.param[0].decay_mult = 1\n", 591 | " l.param[1].lr_mult = 1\n", 592 | " l.param[1].decay_mult = 1\n", 593 | " l.inner_product_param.weight_filler.std = 0.0001\n", 594 | " l.inner_product_param.bias_filler.value = 0\n", 595 | " approached_fc8 = approached_fc8 or l.name == 'fc8_ft'\n", 596 | " if approached_fc8:\n", 597 | " netparams_fixed.layer.add()\n", 598 | " netparams_fixed.layer[-1].MergeFrom(l)" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": null, 604 | "metadata": {}, 605 | "outputs": [], 606 | "source": [ 607 | "# Solver\n", 608 | "solverfile = 'ft_fixed.solver'\n", 609 | "params = caffe.proto.caffe_pb2.SolverParameter()\n", 610 | "params.net = u'ft_fixed.prototxt'\n", 611 | "#params.test_iter.append(1450)\n", 612 | "#params.test_interval = 1000\n", 613 | "params.test_initialization = False\n", 614 | "params.base_lr = 1\n", 615 | "params.display = 100\n", 616 | "params.max_iter = 60000\n", 617 | "params.lr_policy = \"multistep\"\n", 618 | "params.stepvalue.extend([20000,30000,40000,50000])\n", 619 | "#params.power = 1\n", 620 | "#params.stepsize = 100000\n", 621 | "params.gamma = 0.25\n", 622 | "params.momentum = 0.9\n", 623 | "params.weight_decay = 0.000005\n", 624 | "params.snapshot = 10000000\n", 625 | "#params.random_seed = 0\n", 626 | "params.snapshot_prefix = \"ft_fixed\"\n", 627 | "params.iter_size = 1\n", 628 | "assert params.iter_size > 0\n", 629 | "open(solverfile,'w').write(google.protobuf.text_format.MessageToString(params))\n", 630 | "open(params.net,'w').write(google.protobuf.text_format.MessageToString(netparams_fixed))" 631 | ] 632 | }, 633 | { 634 | "cell_type": "code", 635 | "execution_count": null, 636 | "metadata": {}, 637 | "outputs": [], 638 | "source": [ 639 | "solver = caffe.get_solver('ft_fixed.solver')" 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": null, 645 | "metadata": {}, 646 | "outputs": [], 647 | "source": [ 648 | "# Train\n", 649 | "for it in pyprind.prog_bar(range(params.max_iter)):\n", 650 | " train_ids = random.sample(range(train_feats.shape[0]),32)\n", 651 | " solver.net.blobs[last_blob].data[...] = train_feats[train_ids,...]\n", 652 | " solver.net.blobs['label'].data[...] = train_labels[train_ids]\n", 653 | " solver.step(1)" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": null, 659 | "metadata": {}, 660 | "outputs": [], 661 | "source": [ 662 | "solver.net.save('model_lr')" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": null, 668 | "metadata": {}, 669 | "outputs": [], 670 | "source": [ 671 | "del solver" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": null, 677 | "metadata": {}, 678 | "outputs": [], 679 | "source": [ 680 | "solver = caffe.get_solver('ft.solver')\n", 681 | "solver.net.copy_from('model_init')\n", 682 | "solver.net.copy_from('model_lr')\n", 683 | "solver.net.save('model_lr')\n", 684 | "del solver" 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": null, 690 | "metadata": {}, 691 | "outputs": [], 692 | "source": [ 693 | "if False:\n", 694 | " import sklearn\n", 695 | "\n", 696 | " model = sklearn.linear_model.LogisticRegression(C=1000, solver='lbfgs',multi_class='multinomial', max_iter = 10000, tol = 1e-10)\n", 697 | " %time model.fit(train_feats.reshape(train_feats.shape[0],-1), train_labels)\n", 698 | " print(\"LR Accuracy is \")\n", 699 | " print(model.score(val_feats.reshape(val_feats.shape[0],-1), val_labels))" 700 | ] 701 | }, 702 | { 703 | "cell_type": "code", 704 | "execution_count": null, 705 | "metadata": {}, 706 | "outputs": [], 707 | "source": [] 708 | } 709 | ], 710 | "metadata": { 711 | "anaconda-cloud": {}, 712 | "kernelspec": { 713 | "display_name": "Python 3", 714 | "language": "python", 715 | "name": "python3" 716 | }, 717 | "language_info": { 718 | "codemirror_mode": { 719 | "name": "ipython", 720 | "version": 3 721 | }, 722 | "file_extension": ".py", 723 | "mimetype": "text/x-python", 724 | "name": "python", 725 | "nbconvert_exporter": "python", 726 | "pygments_lexer": "ipython3", 727 | "version": "3.6.3" 728 | } 729 | }, 730 | "nbformat": 4, 731 | "nbformat_minor": 1 732 | } 733 | -------------------------------------------------------------------------------- /pretrained_models/caffe/download_caffe_model_link.txt: -------------------------------------------------------------------------------- 1 | Please see the model in the official repo https://github.com/BVLC/caffe/tree/master/models/bvlc_reference_caffenet 2 | -------------------------------------------------------------------------------- /pretrained_models/caffe/train_val.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "data" 3 | type: "ImageData" 4 | top: "data" 5 | top: "label" 6 | transform_param { 7 | mirror: true 8 | crop_size: 224 9 | mean_value: 104 10 | mean_value: 117 11 | mean_value: 123 12 | } 13 | image_data_param { 14 | source: "/home/simon/Datasets/CUB_200_2011/train_images.txt" 15 | root_folder: "/home/simon/Datasets/CUB_200_2011/images/" 16 | batch_size: 16 17 | shuffle: true 18 | smaller_side_size: 560 19 | smaller_side_size: 640 20 | } 21 | include: { phase: TRAIN } 22 | } 23 | layer { 24 | name: "data" 25 | type: "ImageData" 26 | top: "data" 27 | top: "label" 28 | transform_param { 29 | mirror: false 30 | crop_size: 224 31 | mean_value: 104 32 | mean_value: 117 33 | mean_value: 123 34 | } 35 | image_data_param { 36 | source: "/home/simon/Datasets/CUB_200_2011/test_images.txt" 37 | root_folder: "/home/simon/Datasets/CUB_200_2011/images/" 38 | batch_size: 2 39 | shuffle: true 40 | smaller_side_size: 560 41 | } 42 | include: { phase: TEST } 43 | } 44 | layer { 45 | name: "conv1" 46 | type: "Convolution" 47 | bottom: "data" 48 | top: "conv1" 49 | param { 50 | lr_mult: 1 51 | decay_mult: 1 52 | } 53 | param { 54 | lr_mult: 2 55 | decay_mult: 0 56 | } 57 | convolution_param { 58 | num_output: 96 59 | kernel_size: 11 60 | stride: 4 61 | weight_filler { 62 | type: "gaussian" 63 | std: 0.01 64 | } 65 | bias_filler { 66 | type: "constant" 67 | value: 0 68 | } 69 | } 70 | } 71 | layer { 72 | name: "relu1" 73 | type: "ReLU" 74 | bottom: "conv1" 75 | top: "conv1" 76 | } 77 | layer { 78 | name: "pool1" 79 | type: "Pooling" 80 | bottom: "conv1" 81 | top: "pool1" 82 | pooling_param { 83 | pool: MAX 84 | kernel_size: 3 85 | stride: 2 86 | } 87 | } 88 | layer { 89 | name: "norm1" 90 | type: "LRN" 91 | bottom: "pool1" 92 | top: "norm1" 93 | lrn_param { 94 | local_size: 5 95 | alpha: 0.0001 96 | beta: 0.75 97 | } 98 | } 99 | layer { 100 | name: "conv2" 101 | type: "Convolution" 102 | bottom: "norm1" 103 | top: "conv2" 104 | param { 105 | lr_mult: 1 106 | decay_mult: 1 107 | } 108 | param { 109 | lr_mult: 2 110 | decay_mult: 0 111 | } 112 | convolution_param { 113 | num_output: 256 114 | pad: 2 115 | kernel_size: 5 116 | group: 2 117 | weight_filler { 118 | type: "gaussian" 119 | std: 0.01 120 | } 121 | bias_filler { 122 | type: "constant" 123 | value: 1 124 | } 125 | } 126 | } 127 | layer { 128 | name: "relu2" 129 | type: "ReLU" 130 | bottom: "conv2" 131 | top: "conv2" 132 | } 133 | layer { 134 | name: "pool2" 135 | type: "Pooling" 136 | bottom: "conv2" 137 | top: "pool2" 138 | pooling_param { 139 | pool: MAX 140 | kernel_size: 3 141 | stride: 2 142 | } 143 | } 144 | layer { 145 | name: "norm2" 146 | type: "LRN" 147 | bottom: "pool2" 148 | top: "norm2" 149 | lrn_param { 150 | local_size: 5 151 | alpha: 0.0001 152 | beta: 0.75 153 | } 154 | } 155 | layer { 156 | name: "conv3" 157 | type: "Convolution" 158 | bottom: "norm2" 159 | top: "conv3" 160 | param { 161 | lr_mult: 1 162 | decay_mult: 1 163 | } 164 | param { 165 | lr_mult: 2 166 | decay_mult: 0 167 | } 168 | convolution_param { 169 | num_output: 384 170 | pad: 1 171 | kernel_size: 3 172 | weight_filler { 173 | type: "gaussian" 174 | std: 0.01 175 | } 176 | bias_filler { 177 | type: "constant" 178 | value: 0 179 | } 180 | } 181 | } 182 | layer { 183 | name: "relu3" 184 | type: "ReLU" 185 | bottom: "conv3" 186 | top: "conv3" 187 | } 188 | layer { 189 | name: "conv4" 190 | type: "Convolution" 191 | bottom: "conv3" 192 | top: "conv4" 193 | param { 194 | lr_mult: 1 195 | decay_mult: 1 196 | } 197 | param { 198 | lr_mult: 2 199 | decay_mult: 0 200 | } 201 | convolution_param { 202 | num_output: 384 203 | pad: 1 204 | kernel_size: 3 205 | group: 2 206 | weight_filler { 207 | type: "gaussian" 208 | std: 0.01 209 | } 210 | bias_filler { 211 | type: "constant" 212 | value: 1 213 | } 214 | } 215 | } 216 | layer { 217 | name: "relu4" 218 | type: "ReLU" 219 | bottom: "conv4" 220 | top: "conv4" 221 | } 222 | layer { 223 | name: "conv5" 224 | type: "Convolution" 225 | bottom: "conv4" 226 | top: "conv5" 227 | param { 228 | lr_mult: 1 229 | decay_mult: 1 230 | } 231 | param { 232 | lr_mult: 2 233 | decay_mult: 0 234 | } 235 | convolution_param { 236 | num_output: 256 237 | pad: 1 238 | kernel_size: 3 239 | group: 2 240 | weight_filler { 241 | type: "gaussian" 242 | std: 0.01 243 | } 244 | bias_filler { 245 | type: "constant" 246 | value: 1 247 | } 248 | } 249 | } 250 | layer { 251 | name: "relu5" 252 | type: "ReLU" 253 | bottom: "conv5" 254 | top: "conv5" 255 | } 256 | layer { 257 | name: "pool5" 258 | type: "Pooling" 259 | bottom: "conv5" 260 | top: "pool5" 261 | pooling_param { 262 | pool: MAX 263 | kernel_size: 3 264 | stride: 2 265 | } 266 | } 267 | layer { 268 | name: "fc6" 269 | type: "InnerProduct" 270 | bottom: "pool5" 271 | top: "fc6" 272 | param { 273 | lr_mult: 1 274 | decay_mult: 1 275 | } 276 | param { 277 | lr_mult: 2 278 | decay_mult: 0 279 | } 280 | inner_product_param { 281 | num_output: 4096 282 | weight_filler { 283 | type: "gaussian" 284 | std: 0.005 285 | } 286 | bias_filler { 287 | type: "constant" 288 | value: 1 289 | } 290 | } 291 | } 292 | layer { 293 | name: "relu6" 294 | type: "ReLU" 295 | bottom: "fc6" 296 | top: "fc6" 297 | } 298 | layer { 299 | name: "drop6" 300 | type: "Dropout" 301 | bottom: "fc6" 302 | top: "fc6" 303 | dropout_param { 304 | dropout_ratio: 0.5 305 | } 306 | } 307 | layer { 308 | name: "fc7" 309 | type: "InnerProduct" 310 | bottom: "fc6" 311 | top: "fc7" 312 | param { 313 | lr_mult: 1 314 | decay_mult: 1 315 | } 316 | param { 317 | lr_mult: 2 318 | decay_mult: 0 319 | } 320 | inner_product_param { 321 | num_output: 4096 322 | weight_filler { 323 | type: "gaussian" 324 | std: 0.005 325 | } 326 | bias_filler { 327 | type: "constant" 328 | value: 1 329 | } 330 | } 331 | } 332 | layer { 333 | name: "relu7" 334 | type: "ReLU" 335 | bottom: "fc7" 336 | top: "fc7" 337 | } 338 | layer { 339 | name: "drop7" 340 | type: "Dropout" 341 | bottom: "fc7" 342 | top: "fc7" 343 | dropout_param { 344 | dropout_ratio: 0.5 345 | } 346 | } 347 | layer { 348 | name: "fc8_flickr" 349 | type: "InnerProduct" 350 | bottom: "fc7" 351 | top: "fc8_flickr" 352 | param { 353 | lr_mult: 10 354 | decay_mult: 1 355 | } 356 | param { 357 | lr_mult: 20 358 | decay_mult: 0 359 | } 360 | inner_product_param { 361 | num_output: 20 362 | weight_filler { 363 | type: "gaussian" 364 | std: 0.01 365 | } 366 | bias_filler { 367 | type: "constant" 368 | value: 0 369 | } 370 | } 371 | } 372 | layer { 373 | name: "loss" 374 | type: "SoftmaxWithLoss" 375 | bottom: "fc8_flickr" 376 | bottom: "label" 377 | } 378 | layer { 379 | name: "accuracy" 380 | type: "Accuracy" 381 | bottom: "fc8_flickr" 382 | bottom: "label" 383 | top: "accuracy" 384 | include { 385 | phase: TEST 386 | } 387 | } 388 | -------------------------------------------------------------------------------- /pretrained_models/resnet50_cvgj/download_resnet50_cvgj_model_link.txt: -------------------------------------------------------------------------------- 1 | ResNet 50 model used in the paper were taken from https://github.com/cvjena/cnn-models/tree/master/ResNet_preact/ResNet50_cvgj 2 | Please refer to the download link there 3 | -------------------------------------------------------------------------------- /pretrained_models/resnet50_cvgj/train_val.prototxt: -------------------------------------------------------------------------------- 1 | layer { 2 | name: "data" 3 | type: "ImageData" 4 | top: "data" 5 | top: "label" 6 | transform_param { 7 | mirror: true 8 | crop_size: 224 9 | } 10 | image_data_param { 11 | source: "/home/simon/Datasets/CUB_200_2011/train_images.txt" 12 | root_folder: "/home/simon/Datasets/CUB_200_2011/images/" 13 | batch_size: 16 14 | shuffle: true 15 | smaller_side_size: 256 16 | smaller_side_size: 512 17 | } 18 | include: { phase: TRAIN } 19 | } 20 | layer { 21 | name: "data" 22 | type: "ImageData" 23 | top: "data" 24 | top: "label" 25 | transform_param { 26 | mirror: false 27 | crop_size: 224 28 | } 29 | image_data_param { 30 | source: "/home/simon/Datasets/CUB_200_2011/test_images.txt" 31 | root_folder: "/home/simon/Datasets/CUB_200_2011/images/" 32 | batch_size: 2 33 | shuffle: true 34 | smaller_side_size: 256 35 | } 36 | include: { phase: TEST } 37 | } 38 | layer { 39 | name: "data_bn" 40 | type: "BatchNorm" 41 | bottom: "data" 42 | top: "data_bn" 43 | param { 44 | lr_mult: 0.0 45 | } 46 | param { 47 | lr_mult: 0.0 48 | } 49 | param { 50 | lr_mult: 0.0 51 | } 52 | } 53 | layer { 54 | name: "data_scale" 55 | type: "Scale" 56 | bottom: "data_bn" 57 | top: "data_bn" 58 | param { 59 | lr_mult: 1.0 60 | decay_mult: 1.0 61 | } 62 | param { 63 | lr_mult: 2.0 64 | decay_mult: 1.0 65 | } 66 | scale_param { 67 | bias_term: true 68 | } 69 | } 70 | layer { 71 | name: "conv1" 72 | type: "Convolution" 73 | bottom: "data_bn" 74 | top: "conv1" 75 | param { 76 | lr_mult: 1.0 77 | decay_mult: 1.0 78 | } 79 | param { 80 | lr_mult: 2.0 81 | decay_mult: 1.0 82 | } 83 | convolution_param { 84 | num_output: 64 85 | pad: 3 86 | kernel_size: 7 87 | stride: 2 88 | weight_filler { 89 | type: "msra" 90 | variance_norm: FAN_OUT 91 | } 92 | bias_filler { 93 | type: "constant" 94 | value: 0.0 95 | } 96 | } 97 | } 98 | layer { 99 | name: "conv1_bn" 100 | type: "BatchNorm" 101 | bottom: "conv1" 102 | top: "conv1" 103 | param { 104 | lr_mult: 0.0 105 | } 106 | param { 107 | lr_mult: 0.0 108 | } 109 | param { 110 | lr_mult: 0.0 111 | } 112 | } 113 | layer { 114 | name: "conv1_scale" 115 | type: "Scale" 116 | bottom: "conv1" 117 | top: "conv1" 118 | param { 119 | lr_mult: 1.0 120 | decay_mult: 1.0 121 | } 122 | param { 123 | lr_mult: 2.0 124 | decay_mult: 1.0 125 | } 126 | scale_param { 127 | bias_term: true 128 | } 129 | } 130 | layer { 131 | name: "conv1_relu" 132 | type: "ReLU" 133 | bottom: "conv1" 134 | top: "conv1" 135 | } 136 | layer { 137 | name: "conv1_pool" 138 | type: "Pooling" 139 | bottom: "conv1" 140 | top: "conv1_pool" 141 | pooling_param { 142 | kernel_size: 3 143 | stride: 2 144 | } 145 | } 146 | layer { 147 | name: "layer_64_1_conv1" 148 | type: "Convolution" 149 | bottom: "conv1_pool" 150 | top: "layer_64_1_conv1" 151 | param { 152 | lr_mult: 1.0 153 | decay_mult: 1.0 154 | } 155 | convolution_param { 156 | num_output: 64 157 | bias_term: false 158 | pad: 0 159 | kernel_size: 1 160 | stride: 1 161 | weight_filler { 162 | type: "msra" 163 | } 164 | bias_filler { 165 | type: "constant" 166 | value: 0.0 167 | } 168 | } 169 | } 170 | layer { 171 | name: "layer_64_1_bn2" 172 | type: "BatchNorm" 173 | bottom: "layer_64_1_conv1" 174 | top: "layer_64_1_conv1" 175 | param { 176 | lr_mult: 0.0 177 | } 178 | param { 179 | lr_mult: 0.0 180 | } 181 | param { 182 | lr_mult: 0.0 183 | } 184 | } 185 | layer { 186 | name: "layer_64_1_scale2" 187 | type: "Scale" 188 | bottom: "layer_64_1_conv1" 189 | top: "layer_64_1_conv1" 190 | param { 191 | lr_mult: 1.0 192 | decay_mult: 1.0 193 | } 194 | param { 195 | lr_mult: 2.0 196 | decay_mult: 1.0 197 | } 198 | scale_param { 199 | bias_term: true 200 | } 201 | } 202 | layer { 203 | name: "layer_64_1_relu2" 204 | type: "ReLU" 205 | bottom: "layer_64_1_conv1" 206 | top: "layer_64_1_conv1" 207 | } 208 | layer { 209 | name: "layer_64_1_conv2" 210 | type: "Convolution" 211 | bottom: "layer_64_1_conv1" 212 | top: "layer_64_1_conv2" 213 | param { 214 | lr_mult: 1.0 215 | decay_mult: 1.0 216 | } 217 | convolution_param { 218 | num_output: 64 219 | bias_term: false 220 | pad: 1 221 | kernel_size: 3 222 | stride: 1 223 | weight_filler { 224 | type: "msra" 225 | } 226 | bias_filler { 227 | type: "constant" 228 | value: 0.0 229 | } 230 | } 231 | } 232 | layer { 233 | name: "layer_64_1_bn3" 234 | type: "BatchNorm" 235 | bottom: "layer_64_1_conv2" 236 | top: "layer_64_1_conv2" 237 | param { 238 | lr_mult: 0.0 239 | } 240 | param { 241 | lr_mult: 0.0 242 | } 243 | param { 244 | lr_mult: 0.0 245 | } 246 | } 247 | layer { 248 | name: "layer_64_1_scale3" 249 | type: "Scale" 250 | bottom: "layer_64_1_conv2" 251 | top: "layer_64_1_conv2" 252 | param { 253 | lr_mult: 1.0 254 | decay_mult: 1.0 255 | } 256 | param { 257 | lr_mult: 2.0 258 | decay_mult: 1.0 259 | } 260 | scale_param { 261 | bias_term: true 262 | } 263 | } 264 | layer { 265 | name: "layer_64_1_relu3" 266 | type: "ReLU" 267 | bottom: "layer_64_1_conv2" 268 | top: "layer_64_1_conv2" 269 | } 270 | layer { 271 | name: "layer_64_1_conv3" 272 | type: "Convolution" 273 | bottom: "layer_64_1_conv2" 274 | top: "layer_64_1_conv3" 275 | param { 276 | lr_mult: 1.0 277 | decay_mult: 1.0 278 | } 279 | convolution_param { 280 | num_output: 256 281 | bias_term: false 282 | pad: 0 283 | kernel_size: 1 284 | stride: 1 285 | weight_filler { 286 | type: "msra" 287 | } 288 | bias_filler { 289 | type: "constant" 290 | value: 0.0 291 | } 292 | } 293 | } 294 | layer { 295 | name: "layer_64_1_conv_expand" 296 | type: "Convolution" 297 | bottom: "layer_64_1_conv1" 298 | top: "layer_64_1_conv_expand" 299 | param { 300 | lr_mult: 1.0 301 | decay_mult: 1.0 302 | } 303 | convolution_param { 304 | num_output: 256 305 | bias_term: false 306 | pad: 0 307 | kernel_size: 1 308 | stride: 1 309 | weight_filler { 310 | type: "msra" 311 | } 312 | bias_filler { 313 | type: "constant" 314 | value: 0.0 315 | } 316 | } 317 | } 318 | layer { 319 | name: "layer_64_1_sum" 320 | type: "Eltwise" 321 | bottom: "layer_64_1_conv3" 322 | bottom: "layer_64_1_conv_expand" 323 | top: "layer_64_1_sum" 324 | } 325 | layer { 326 | name: "layer_64_2_bn1" 327 | type: "BatchNorm" 328 | bottom: "layer_64_1_sum" 329 | top: "layer_64_2_bn1" 330 | param { 331 | lr_mult: 0.0 332 | } 333 | param { 334 | lr_mult: 0.0 335 | } 336 | param { 337 | lr_mult: 0.0 338 | } 339 | } 340 | layer { 341 | name: "layer_64_2_scale1" 342 | type: "Scale" 343 | bottom: "layer_64_2_bn1" 344 | top: "layer_64_2_bn1" 345 | param { 346 | lr_mult: 1.0 347 | decay_mult: 1.0 348 | } 349 | param { 350 | lr_mult: 2.0 351 | decay_mult: 1.0 352 | } 353 | scale_param { 354 | bias_term: true 355 | } 356 | } 357 | layer { 358 | name: "layer_64_2_relu1" 359 | type: "ReLU" 360 | bottom: "layer_64_2_bn1" 361 | top: "layer_64_2_bn1" 362 | } 363 | layer { 364 | name: "layer_64_2_conv1" 365 | type: "Convolution" 366 | bottom: "layer_64_2_bn1" 367 | top: "layer_64_2_conv1" 368 | param { 369 | lr_mult: 1.0 370 | decay_mult: 1.0 371 | } 372 | convolution_param { 373 | num_output: 64 374 | bias_term: false 375 | pad: 0 376 | kernel_size: 1 377 | stride: 1 378 | weight_filler { 379 | type: "msra" 380 | } 381 | bias_filler { 382 | type: "constant" 383 | value: 0.0 384 | } 385 | } 386 | } 387 | layer { 388 | name: "layer_64_2_bn2" 389 | type: "BatchNorm" 390 | bottom: "layer_64_2_conv1" 391 | top: "layer_64_2_conv1" 392 | param { 393 | lr_mult: 0.0 394 | } 395 | param { 396 | lr_mult: 0.0 397 | } 398 | param { 399 | lr_mult: 0.0 400 | } 401 | } 402 | layer { 403 | name: "layer_64_2_scale2" 404 | type: "Scale" 405 | bottom: "layer_64_2_conv1" 406 | top: "layer_64_2_conv1" 407 | param { 408 | lr_mult: 1.0 409 | decay_mult: 1.0 410 | } 411 | param { 412 | lr_mult: 2.0 413 | decay_mult: 1.0 414 | } 415 | scale_param { 416 | bias_term: true 417 | } 418 | } 419 | layer { 420 | name: "layer_64_2_relu2" 421 | type: "ReLU" 422 | bottom: "layer_64_2_conv1" 423 | top: "layer_64_2_conv1" 424 | } 425 | layer { 426 | name: "layer_64_2_conv2" 427 | type: "Convolution" 428 | bottom: "layer_64_2_conv1" 429 | top: "layer_64_2_conv2" 430 | param { 431 | lr_mult: 1.0 432 | decay_mult: 1.0 433 | } 434 | convolution_param { 435 | num_output: 64 436 | bias_term: false 437 | pad: 1 438 | kernel_size: 3 439 | stride: 1 440 | weight_filler { 441 | type: "msra" 442 | } 443 | bias_filler { 444 | type: "constant" 445 | value: 0.0 446 | } 447 | } 448 | } 449 | layer { 450 | name: "layer_64_2_bn3" 451 | type: "BatchNorm" 452 | bottom: "layer_64_2_conv2" 453 | top: "layer_64_2_conv2" 454 | param { 455 | lr_mult: 0.0 456 | } 457 | param { 458 | lr_mult: 0.0 459 | } 460 | param { 461 | lr_mult: 0.0 462 | } 463 | } 464 | layer { 465 | name: "layer_64_2_scale3" 466 | type: "Scale" 467 | bottom: "layer_64_2_conv2" 468 | top: "layer_64_2_conv2" 469 | param { 470 | lr_mult: 1.0 471 | decay_mult: 1.0 472 | } 473 | param { 474 | lr_mult: 2.0 475 | decay_mult: 1.0 476 | } 477 | scale_param { 478 | bias_term: true 479 | } 480 | } 481 | layer { 482 | name: "layer_64_2_relu3" 483 | type: "ReLU" 484 | bottom: "layer_64_2_conv2" 485 | top: "layer_64_2_conv2" 486 | } 487 | layer { 488 | name: "layer_64_2_conv3" 489 | type: "Convolution" 490 | bottom: "layer_64_2_conv2" 491 | top: "layer_64_2_conv3" 492 | param { 493 | lr_mult: 1.0 494 | decay_mult: 1.0 495 | } 496 | convolution_param { 497 | num_output: 256 498 | bias_term: false 499 | pad: 0 500 | kernel_size: 1 501 | stride: 1 502 | weight_filler { 503 | type: "msra" 504 | } 505 | bias_filler { 506 | type: "constant" 507 | value: 0.0 508 | } 509 | } 510 | } 511 | layer { 512 | name: "layer_64_2_sum" 513 | type: "Eltwise" 514 | bottom: "layer_64_2_conv3" 515 | bottom: "layer_64_1_sum" 516 | top: "layer_64_2_sum" 517 | } 518 | layer { 519 | name: "layer_64_3_bn1" 520 | type: "BatchNorm" 521 | bottom: "layer_64_2_sum" 522 | top: "layer_64_3_bn1" 523 | param { 524 | lr_mult: 0.0 525 | } 526 | param { 527 | lr_mult: 0.0 528 | } 529 | param { 530 | lr_mult: 0.0 531 | } 532 | } 533 | layer { 534 | name: "layer_64_3_scale1" 535 | type: "Scale" 536 | bottom: "layer_64_3_bn1" 537 | top: "layer_64_3_bn1" 538 | param { 539 | lr_mult: 1.0 540 | decay_mult: 1.0 541 | } 542 | param { 543 | lr_mult: 2.0 544 | decay_mult: 1.0 545 | } 546 | scale_param { 547 | bias_term: true 548 | } 549 | } 550 | layer { 551 | name: "layer_64_3_relu1" 552 | type: "ReLU" 553 | bottom: "layer_64_3_bn1" 554 | top: "layer_64_3_bn1" 555 | } 556 | layer { 557 | name: "layer_64_3_conv1" 558 | type: "Convolution" 559 | bottom: "layer_64_3_bn1" 560 | top: "layer_64_3_conv1" 561 | param { 562 | lr_mult: 1.0 563 | decay_mult: 1.0 564 | } 565 | convolution_param { 566 | num_output: 64 567 | bias_term: false 568 | pad: 0 569 | kernel_size: 1 570 | stride: 1 571 | weight_filler { 572 | type: "msra" 573 | } 574 | bias_filler { 575 | type: "constant" 576 | value: 0.0 577 | } 578 | } 579 | } 580 | layer { 581 | name: "layer_64_3_bn2" 582 | type: "BatchNorm" 583 | bottom: "layer_64_3_conv1" 584 | top: "layer_64_3_conv1" 585 | param { 586 | lr_mult: 0.0 587 | } 588 | param { 589 | lr_mult: 0.0 590 | } 591 | param { 592 | lr_mult: 0.0 593 | } 594 | } 595 | layer { 596 | name: "layer_64_3_scale2" 597 | type: "Scale" 598 | bottom: "layer_64_3_conv1" 599 | top: "layer_64_3_conv1" 600 | param { 601 | lr_mult: 1.0 602 | decay_mult: 1.0 603 | } 604 | param { 605 | lr_mult: 2.0 606 | decay_mult: 1.0 607 | } 608 | scale_param { 609 | bias_term: true 610 | } 611 | } 612 | layer { 613 | name: "layer_64_3_relu2" 614 | type: "ReLU" 615 | bottom: "layer_64_3_conv1" 616 | top: "layer_64_3_conv1" 617 | } 618 | layer { 619 | name: "layer_64_3_conv2" 620 | type: "Convolution" 621 | bottom: "layer_64_3_conv1" 622 | top: "layer_64_3_conv2" 623 | param { 624 | lr_mult: 1.0 625 | decay_mult: 1.0 626 | } 627 | convolution_param { 628 | num_output: 64 629 | bias_term: false 630 | pad: 1 631 | kernel_size: 3 632 | stride: 1 633 | weight_filler { 634 | type: "msra" 635 | } 636 | bias_filler { 637 | type: "constant" 638 | value: 0.0 639 | } 640 | } 641 | } 642 | layer { 643 | name: "layer_64_3_bn3" 644 | type: "BatchNorm" 645 | bottom: "layer_64_3_conv2" 646 | top: "layer_64_3_conv2" 647 | param { 648 | lr_mult: 0.0 649 | } 650 | param { 651 | lr_mult: 0.0 652 | } 653 | param { 654 | lr_mult: 0.0 655 | } 656 | } 657 | layer { 658 | name: "layer_64_3_scale3" 659 | type: "Scale" 660 | bottom: "layer_64_3_conv2" 661 | top: "layer_64_3_conv2" 662 | param { 663 | lr_mult: 1.0 664 | decay_mult: 1.0 665 | } 666 | param { 667 | lr_mult: 2.0 668 | decay_mult: 1.0 669 | } 670 | scale_param { 671 | bias_term: true 672 | } 673 | } 674 | layer { 675 | name: "layer_64_3_relu3" 676 | type: "ReLU" 677 | bottom: "layer_64_3_conv2" 678 | top: "layer_64_3_conv2" 679 | } 680 | layer { 681 | name: "layer_64_3_conv3" 682 | type: "Convolution" 683 | bottom: "layer_64_3_conv2" 684 | top: "layer_64_3_conv3" 685 | param { 686 | lr_mult: 1.0 687 | decay_mult: 1.0 688 | } 689 | convolution_param { 690 | num_output: 256 691 | bias_term: false 692 | pad: 0 693 | kernel_size: 1 694 | stride: 1 695 | weight_filler { 696 | type: "msra" 697 | } 698 | bias_filler { 699 | type: "constant" 700 | value: 0.0 701 | } 702 | } 703 | } 704 | layer { 705 | name: "layer_64_3_sum" 706 | type: "Eltwise" 707 | bottom: "layer_64_3_conv3" 708 | bottom: "layer_64_2_sum" 709 | top: "layer_64_3_sum" 710 | } 711 | layer { 712 | name: "layer_128_1_bn1" 713 | type: "BatchNorm" 714 | bottom: "layer_64_3_sum" 715 | top: "layer_128_1_bn1" 716 | param { 717 | lr_mult: 0.0 718 | } 719 | param { 720 | lr_mult: 0.0 721 | } 722 | param { 723 | lr_mult: 0.0 724 | } 725 | } 726 | layer { 727 | name: "layer_128_1_scale1" 728 | type: "Scale" 729 | bottom: "layer_128_1_bn1" 730 | top: "layer_128_1_bn1" 731 | param { 732 | lr_mult: 1.0 733 | decay_mult: 1.0 734 | } 735 | param { 736 | lr_mult: 2.0 737 | decay_mult: 1.0 738 | } 739 | scale_param { 740 | bias_term: true 741 | } 742 | } 743 | layer { 744 | name: "layer_128_1_relu1" 745 | type: "ReLU" 746 | bottom: "layer_128_1_bn1" 747 | top: "layer_128_1_bn1" 748 | } 749 | layer { 750 | name: "layer_128_1_conv1" 751 | type: "Convolution" 752 | bottom: "layer_128_1_bn1" 753 | top: "layer_128_1_conv1" 754 | param { 755 | lr_mult: 1.0 756 | decay_mult: 1.0 757 | } 758 | convolution_param { 759 | num_output: 128 760 | bias_term: false 761 | pad: 0 762 | kernel_size: 1 763 | stride: 1 764 | weight_filler { 765 | type: "msra" 766 | } 767 | bias_filler { 768 | type: "constant" 769 | value: 0.0 770 | } 771 | } 772 | } 773 | layer { 774 | name: "layer_128_1_bn2" 775 | type: "BatchNorm" 776 | bottom: "layer_128_1_conv1" 777 | top: "layer_128_1_conv1" 778 | param { 779 | lr_mult: 0.0 780 | } 781 | param { 782 | lr_mult: 0.0 783 | } 784 | param { 785 | lr_mult: 0.0 786 | } 787 | } 788 | layer { 789 | name: "layer_128_1_scale2" 790 | type: "Scale" 791 | bottom: "layer_128_1_conv1" 792 | top: "layer_128_1_conv1" 793 | param { 794 | lr_mult: 1.0 795 | decay_mult: 1.0 796 | } 797 | param { 798 | lr_mult: 2.0 799 | decay_mult: 1.0 800 | } 801 | scale_param { 802 | bias_term: true 803 | } 804 | } 805 | layer { 806 | name: "layer_128_1_relu2" 807 | type: "ReLU" 808 | bottom: "layer_128_1_conv1" 809 | top: "layer_128_1_conv1" 810 | } 811 | layer { 812 | name: "layer_128_1_conv2" 813 | type: "Convolution" 814 | bottom: "layer_128_1_conv1" 815 | top: "layer_128_1_conv2" 816 | param { 817 | lr_mult: 1.0 818 | decay_mult: 1.0 819 | } 820 | convolution_param { 821 | num_output: 128 822 | bias_term: false 823 | pad: 1 824 | kernel_size: 3 825 | stride: 2 826 | weight_filler { 827 | type: "msra" 828 | } 829 | bias_filler { 830 | type: "constant" 831 | value: 0.0 832 | } 833 | } 834 | } 835 | layer { 836 | name: "layer_128_1_bn3" 837 | type: "BatchNorm" 838 | bottom: "layer_128_1_conv2" 839 | top: "layer_128_1_conv2" 840 | param { 841 | lr_mult: 0.0 842 | } 843 | param { 844 | lr_mult: 0.0 845 | } 846 | param { 847 | lr_mult: 0.0 848 | } 849 | } 850 | layer { 851 | name: "layer_128_1_scale3" 852 | type: "Scale" 853 | bottom: "layer_128_1_conv2" 854 | top: "layer_128_1_conv2" 855 | param { 856 | lr_mult: 1.0 857 | decay_mult: 1.0 858 | } 859 | param { 860 | lr_mult: 2.0 861 | decay_mult: 1.0 862 | } 863 | scale_param { 864 | bias_term: true 865 | } 866 | } 867 | layer { 868 | name: "layer_128_1_relu3" 869 | type: "ReLU" 870 | bottom: "layer_128_1_conv2" 871 | top: "layer_128_1_conv2" 872 | } 873 | layer { 874 | name: "layer_128_1_conv3" 875 | type: "Convolution" 876 | bottom: "layer_128_1_conv2" 877 | top: "layer_128_1_conv3" 878 | param { 879 | lr_mult: 1.0 880 | decay_mult: 1.0 881 | } 882 | convolution_param { 883 | num_output: 512 884 | bias_term: false 885 | pad: 0 886 | kernel_size: 1 887 | stride: 1 888 | weight_filler { 889 | type: "msra" 890 | } 891 | bias_filler { 892 | type: "constant" 893 | value: 0.0 894 | } 895 | } 896 | } 897 | layer { 898 | name: "layer_128_1_conv_expand" 899 | type: "Convolution" 900 | bottom: "layer_128_1_bn1" 901 | top: "layer_128_1_conv_expand" 902 | param { 903 | lr_mult: 1.0 904 | decay_mult: 1.0 905 | } 906 | convolution_param { 907 | num_output: 512 908 | bias_term: false 909 | pad: 0 910 | kernel_size: 1 911 | stride: 2 912 | weight_filler { 913 | type: "msra" 914 | } 915 | bias_filler { 916 | type: "constant" 917 | value: 0.0 918 | } 919 | } 920 | } 921 | layer { 922 | name: "layer_128_1_sum" 923 | type: "Eltwise" 924 | bottom: "layer_128_1_conv3" 925 | bottom: "layer_128_1_conv_expand" 926 | top: "layer_128_1_sum" 927 | } 928 | layer { 929 | name: "layer_128_2_bn1" 930 | type: "BatchNorm" 931 | bottom: "layer_128_1_sum" 932 | top: "layer_128_2_bn1" 933 | param { 934 | lr_mult: 0.0 935 | } 936 | param { 937 | lr_mult: 0.0 938 | } 939 | param { 940 | lr_mult: 0.0 941 | } 942 | } 943 | layer { 944 | name: "layer_128_2_scale1" 945 | type: "Scale" 946 | bottom: "layer_128_2_bn1" 947 | top: "layer_128_2_bn1" 948 | param { 949 | lr_mult: 1.0 950 | decay_mult: 1.0 951 | } 952 | param { 953 | lr_mult: 2.0 954 | decay_mult: 1.0 955 | } 956 | scale_param { 957 | bias_term: true 958 | } 959 | } 960 | layer { 961 | name: "layer_128_2_relu1" 962 | type: "ReLU" 963 | bottom: "layer_128_2_bn1" 964 | top: "layer_128_2_bn1" 965 | } 966 | layer { 967 | name: "layer_128_2_conv1" 968 | type: "Convolution" 969 | bottom: "layer_128_2_bn1" 970 | top: "layer_128_2_conv1" 971 | param { 972 | lr_mult: 1.0 973 | decay_mult: 1.0 974 | } 975 | convolution_param { 976 | num_output: 128 977 | bias_term: false 978 | pad: 0 979 | kernel_size: 1 980 | stride: 1 981 | weight_filler { 982 | type: "msra" 983 | } 984 | bias_filler { 985 | type: "constant" 986 | value: 0.0 987 | } 988 | } 989 | } 990 | layer { 991 | name: "layer_128_2_bn2" 992 | type: "BatchNorm" 993 | bottom: "layer_128_2_conv1" 994 | top: "layer_128_2_conv1" 995 | param { 996 | lr_mult: 0.0 997 | } 998 | param { 999 | lr_mult: 0.0 1000 | } 1001 | param { 1002 | lr_mult: 0.0 1003 | } 1004 | } 1005 | layer { 1006 | name: "layer_128_2_scale2" 1007 | type: "Scale" 1008 | bottom: "layer_128_2_conv1" 1009 | top: "layer_128_2_conv1" 1010 | param { 1011 | lr_mult: 1.0 1012 | decay_mult: 1.0 1013 | } 1014 | param { 1015 | lr_mult: 2.0 1016 | decay_mult: 1.0 1017 | } 1018 | scale_param { 1019 | bias_term: true 1020 | } 1021 | } 1022 | layer { 1023 | name: "layer_128_2_relu2" 1024 | type: "ReLU" 1025 | bottom: "layer_128_2_conv1" 1026 | top: "layer_128_2_conv1" 1027 | } 1028 | layer { 1029 | name: "layer_128_2_conv2" 1030 | type: "Convolution" 1031 | bottom: "layer_128_2_conv1" 1032 | top: "layer_128_2_conv2" 1033 | param { 1034 | lr_mult: 1.0 1035 | decay_mult: 1.0 1036 | } 1037 | convolution_param { 1038 | num_output: 128 1039 | bias_term: false 1040 | pad: 1 1041 | kernel_size: 3 1042 | stride: 1 1043 | weight_filler { 1044 | type: "msra" 1045 | } 1046 | bias_filler { 1047 | type: "constant" 1048 | value: 0.0 1049 | } 1050 | } 1051 | } 1052 | layer { 1053 | name: "layer_128_2_bn3" 1054 | type: "BatchNorm" 1055 | bottom: "layer_128_2_conv2" 1056 | top: "layer_128_2_conv2" 1057 | param { 1058 | lr_mult: 0.0 1059 | } 1060 | param { 1061 | lr_mult: 0.0 1062 | } 1063 | param { 1064 | lr_mult: 0.0 1065 | } 1066 | } 1067 | layer { 1068 | name: "layer_128_2_scale3" 1069 | type: "Scale" 1070 | bottom: "layer_128_2_conv2" 1071 | top: "layer_128_2_conv2" 1072 | param { 1073 | lr_mult: 1.0 1074 | decay_mult: 1.0 1075 | } 1076 | param { 1077 | lr_mult: 2.0 1078 | decay_mult: 1.0 1079 | } 1080 | scale_param { 1081 | bias_term: true 1082 | } 1083 | } 1084 | layer { 1085 | name: "layer_128_2_relu3" 1086 | type: "ReLU" 1087 | bottom: "layer_128_2_conv2" 1088 | top: "layer_128_2_conv2" 1089 | } 1090 | layer { 1091 | name: "layer_128_2_conv3" 1092 | type: "Convolution" 1093 | bottom: "layer_128_2_conv2" 1094 | top: "layer_128_2_conv3" 1095 | param { 1096 | lr_mult: 1.0 1097 | decay_mult: 1.0 1098 | } 1099 | convolution_param { 1100 | num_output: 512 1101 | bias_term: false 1102 | pad: 0 1103 | kernel_size: 1 1104 | stride: 1 1105 | weight_filler { 1106 | type: "msra" 1107 | } 1108 | bias_filler { 1109 | type: "constant" 1110 | value: 0.0 1111 | } 1112 | } 1113 | } 1114 | layer { 1115 | name: "layer_128_2_sum" 1116 | type: "Eltwise" 1117 | bottom: "layer_128_2_conv3" 1118 | bottom: "layer_128_1_sum" 1119 | top: "layer_128_2_sum" 1120 | } 1121 | layer { 1122 | name: "layer_128_3_bn1" 1123 | type: "BatchNorm" 1124 | bottom: "layer_128_2_sum" 1125 | top: "layer_128_3_bn1" 1126 | param { 1127 | lr_mult: 0.0 1128 | } 1129 | param { 1130 | lr_mult: 0.0 1131 | } 1132 | param { 1133 | lr_mult: 0.0 1134 | } 1135 | } 1136 | layer { 1137 | name: "layer_128_3_scale1" 1138 | type: "Scale" 1139 | bottom: "layer_128_3_bn1" 1140 | top: "layer_128_3_bn1" 1141 | param { 1142 | lr_mult: 1.0 1143 | decay_mult: 1.0 1144 | } 1145 | param { 1146 | lr_mult: 2.0 1147 | decay_mult: 1.0 1148 | } 1149 | scale_param { 1150 | bias_term: true 1151 | } 1152 | } 1153 | layer { 1154 | name: "layer_128_3_relu1" 1155 | type: "ReLU" 1156 | bottom: "layer_128_3_bn1" 1157 | top: "layer_128_3_bn1" 1158 | } 1159 | layer { 1160 | name: "layer_128_3_conv1" 1161 | type: "Convolution" 1162 | bottom: "layer_128_3_bn1" 1163 | top: "layer_128_3_conv1" 1164 | param { 1165 | lr_mult: 1.0 1166 | decay_mult: 1.0 1167 | } 1168 | convolution_param { 1169 | num_output: 128 1170 | bias_term: false 1171 | pad: 0 1172 | kernel_size: 1 1173 | stride: 1 1174 | weight_filler { 1175 | type: "msra" 1176 | } 1177 | bias_filler { 1178 | type: "constant" 1179 | value: 0.0 1180 | } 1181 | } 1182 | } 1183 | layer { 1184 | name: "layer_128_3_bn2" 1185 | type: "BatchNorm" 1186 | bottom: "layer_128_3_conv1" 1187 | top: "layer_128_3_conv1" 1188 | param { 1189 | lr_mult: 0.0 1190 | } 1191 | param { 1192 | lr_mult: 0.0 1193 | } 1194 | param { 1195 | lr_mult: 0.0 1196 | } 1197 | } 1198 | layer { 1199 | name: "layer_128_3_scale2" 1200 | type: "Scale" 1201 | bottom: "layer_128_3_conv1" 1202 | top: "layer_128_3_conv1" 1203 | param { 1204 | lr_mult: 1.0 1205 | decay_mult: 1.0 1206 | } 1207 | param { 1208 | lr_mult: 2.0 1209 | decay_mult: 1.0 1210 | } 1211 | scale_param { 1212 | bias_term: true 1213 | } 1214 | } 1215 | layer { 1216 | name: "layer_128_3_relu2" 1217 | type: "ReLU" 1218 | bottom: "layer_128_3_conv1" 1219 | top: "layer_128_3_conv1" 1220 | } 1221 | layer { 1222 | name: "layer_128_3_conv2" 1223 | type: "Convolution" 1224 | bottom: "layer_128_3_conv1" 1225 | top: "layer_128_3_conv2" 1226 | param { 1227 | lr_mult: 1.0 1228 | decay_mult: 1.0 1229 | } 1230 | convolution_param { 1231 | num_output: 128 1232 | bias_term: false 1233 | pad: 1 1234 | kernel_size: 3 1235 | stride: 1 1236 | weight_filler { 1237 | type: "msra" 1238 | } 1239 | bias_filler { 1240 | type: "constant" 1241 | value: 0.0 1242 | } 1243 | } 1244 | } 1245 | layer { 1246 | name: "layer_128_3_bn3" 1247 | type: "BatchNorm" 1248 | bottom: "layer_128_3_conv2" 1249 | top: "layer_128_3_conv2" 1250 | param { 1251 | lr_mult: 0.0 1252 | } 1253 | param { 1254 | lr_mult: 0.0 1255 | } 1256 | param { 1257 | lr_mult: 0.0 1258 | } 1259 | } 1260 | layer { 1261 | name: "layer_128_3_scale3" 1262 | type: "Scale" 1263 | bottom: "layer_128_3_conv2" 1264 | top: "layer_128_3_conv2" 1265 | param { 1266 | lr_mult: 1.0 1267 | decay_mult: 1.0 1268 | } 1269 | param { 1270 | lr_mult: 2.0 1271 | decay_mult: 1.0 1272 | } 1273 | scale_param { 1274 | bias_term: true 1275 | } 1276 | } 1277 | layer { 1278 | name: "layer_128_3_relu3" 1279 | type: "ReLU" 1280 | bottom: "layer_128_3_conv2" 1281 | top: "layer_128_3_conv2" 1282 | } 1283 | layer { 1284 | name: "layer_128_3_conv3" 1285 | type: "Convolution" 1286 | bottom: "layer_128_3_conv2" 1287 | top: "layer_128_3_conv3" 1288 | param { 1289 | lr_mult: 1.0 1290 | decay_mult: 1.0 1291 | } 1292 | convolution_param { 1293 | num_output: 512 1294 | bias_term: false 1295 | pad: 0 1296 | kernel_size: 1 1297 | stride: 1 1298 | weight_filler { 1299 | type: "msra" 1300 | } 1301 | bias_filler { 1302 | type: "constant" 1303 | value: 0.0 1304 | } 1305 | } 1306 | } 1307 | layer { 1308 | name: "layer_128_3_sum" 1309 | type: "Eltwise" 1310 | bottom: "layer_128_3_conv3" 1311 | bottom: "layer_128_2_sum" 1312 | top: "layer_128_3_sum" 1313 | } 1314 | layer { 1315 | name: "layer_128_4_bn1" 1316 | type: "BatchNorm" 1317 | bottom: "layer_128_3_sum" 1318 | top: "layer_128_4_bn1" 1319 | param { 1320 | lr_mult: 0.0 1321 | } 1322 | param { 1323 | lr_mult: 0.0 1324 | } 1325 | param { 1326 | lr_mult: 0.0 1327 | } 1328 | } 1329 | layer { 1330 | name: "layer_128_4_scale1" 1331 | type: "Scale" 1332 | bottom: "layer_128_4_bn1" 1333 | top: "layer_128_4_bn1" 1334 | param { 1335 | lr_mult: 1.0 1336 | decay_mult: 1.0 1337 | } 1338 | param { 1339 | lr_mult: 2.0 1340 | decay_mult: 1.0 1341 | } 1342 | scale_param { 1343 | bias_term: true 1344 | } 1345 | } 1346 | layer { 1347 | name: "layer_128_4_relu1" 1348 | type: "ReLU" 1349 | bottom: "layer_128_4_bn1" 1350 | top: "layer_128_4_bn1" 1351 | } 1352 | layer { 1353 | name: "layer_128_4_conv1" 1354 | type: "Convolution" 1355 | bottom: "layer_128_4_bn1" 1356 | top: "layer_128_4_conv1" 1357 | param { 1358 | lr_mult: 1.0 1359 | decay_mult: 1.0 1360 | } 1361 | convolution_param { 1362 | num_output: 128 1363 | bias_term: false 1364 | pad: 0 1365 | kernel_size: 1 1366 | stride: 1 1367 | weight_filler { 1368 | type: "msra" 1369 | } 1370 | bias_filler { 1371 | type: "constant" 1372 | value: 0.0 1373 | } 1374 | } 1375 | } 1376 | layer { 1377 | name: "layer_128_4_bn2" 1378 | type: "BatchNorm" 1379 | bottom: "layer_128_4_conv1" 1380 | top: "layer_128_4_conv1" 1381 | param { 1382 | lr_mult: 0.0 1383 | } 1384 | param { 1385 | lr_mult: 0.0 1386 | } 1387 | param { 1388 | lr_mult: 0.0 1389 | } 1390 | } 1391 | layer { 1392 | name: "layer_128_4_scale2" 1393 | type: "Scale" 1394 | bottom: "layer_128_4_conv1" 1395 | top: "layer_128_4_conv1" 1396 | param { 1397 | lr_mult: 1.0 1398 | decay_mult: 1.0 1399 | } 1400 | param { 1401 | lr_mult: 2.0 1402 | decay_mult: 1.0 1403 | } 1404 | scale_param { 1405 | bias_term: true 1406 | } 1407 | } 1408 | layer { 1409 | name: "layer_128_4_relu2" 1410 | type: "ReLU" 1411 | bottom: "layer_128_4_conv1" 1412 | top: "layer_128_4_conv1" 1413 | } 1414 | layer { 1415 | name: "layer_128_4_conv2" 1416 | type: "Convolution" 1417 | bottom: "layer_128_4_conv1" 1418 | top: "layer_128_4_conv2" 1419 | param { 1420 | lr_mult: 1.0 1421 | decay_mult: 1.0 1422 | } 1423 | convolution_param { 1424 | num_output: 128 1425 | bias_term: false 1426 | pad: 1 1427 | kernel_size: 3 1428 | stride: 1 1429 | weight_filler { 1430 | type: "msra" 1431 | } 1432 | bias_filler { 1433 | type: "constant" 1434 | value: 0.0 1435 | } 1436 | } 1437 | } 1438 | layer { 1439 | name: "layer_128_4_bn3" 1440 | type: "BatchNorm" 1441 | bottom: "layer_128_4_conv2" 1442 | top: "layer_128_4_conv2" 1443 | param { 1444 | lr_mult: 0.0 1445 | } 1446 | param { 1447 | lr_mult: 0.0 1448 | } 1449 | param { 1450 | lr_mult: 0.0 1451 | } 1452 | } 1453 | layer { 1454 | name: "layer_128_4_scale3" 1455 | type: "Scale" 1456 | bottom: "layer_128_4_conv2" 1457 | top: "layer_128_4_conv2" 1458 | param { 1459 | lr_mult: 1.0 1460 | decay_mult: 1.0 1461 | } 1462 | param { 1463 | lr_mult: 2.0 1464 | decay_mult: 1.0 1465 | } 1466 | scale_param { 1467 | bias_term: true 1468 | } 1469 | } 1470 | layer { 1471 | name: "layer_128_4_relu3" 1472 | type: "ReLU" 1473 | bottom: "layer_128_4_conv2" 1474 | top: "layer_128_4_conv2" 1475 | } 1476 | layer { 1477 | name: "layer_128_4_conv3" 1478 | type: "Convolution" 1479 | bottom: "layer_128_4_conv2" 1480 | top: "layer_128_4_conv3" 1481 | param { 1482 | lr_mult: 1.0 1483 | decay_mult: 1.0 1484 | } 1485 | convolution_param { 1486 | num_output: 512 1487 | bias_term: false 1488 | pad: 0 1489 | kernel_size: 1 1490 | stride: 1 1491 | weight_filler { 1492 | type: "msra" 1493 | } 1494 | bias_filler { 1495 | type: "constant" 1496 | value: 0.0 1497 | } 1498 | } 1499 | } 1500 | layer { 1501 | name: "layer_128_4_sum" 1502 | type: "Eltwise" 1503 | bottom: "layer_128_4_conv3" 1504 | bottom: "layer_128_3_sum" 1505 | top: "layer_128_4_sum" 1506 | } 1507 | layer { 1508 | name: "layer_256_1_bn1" 1509 | type: "BatchNorm" 1510 | bottom: "layer_128_4_sum" 1511 | top: "layer_256_1_bn1" 1512 | param { 1513 | lr_mult: 0.0 1514 | } 1515 | param { 1516 | lr_mult: 0.0 1517 | } 1518 | param { 1519 | lr_mult: 0.0 1520 | } 1521 | } 1522 | layer { 1523 | name: "layer_256_1_scale1" 1524 | type: "Scale" 1525 | bottom: "layer_256_1_bn1" 1526 | top: "layer_256_1_bn1" 1527 | param { 1528 | lr_mult: 1.0 1529 | decay_mult: 1.0 1530 | } 1531 | param { 1532 | lr_mult: 2.0 1533 | decay_mult: 1.0 1534 | } 1535 | scale_param { 1536 | bias_term: true 1537 | } 1538 | } 1539 | layer { 1540 | name: "layer_256_1_relu1" 1541 | type: "ReLU" 1542 | bottom: "layer_256_1_bn1" 1543 | top: "layer_256_1_bn1" 1544 | } 1545 | layer { 1546 | name: "layer_256_1_conv1" 1547 | type: "Convolution" 1548 | bottom: "layer_256_1_bn1" 1549 | top: "layer_256_1_conv1" 1550 | param { 1551 | lr_mult: 1.0 1552 | decay_mult: 1.0 1553 | } 1554 | convolution_param { 1555 | num_output: 256 1556 | bias_term: false 1557 | pad: 0 1558 | kernel_size: 1 1559 | stride: 1 1560 | weight_filler { 1561 | type: "msra" 1562 | } 1563 | bias_filler { 1564 | type: "constant" 1565 | value: 0.0 1566 | } 1567 | } 1568 | } 1569 | layer { 1570 | name: "layer_256_1_bn2" 1571 | type: "BatchNorm" 1572 | bottom: "layer_256_1_conv1" 1573 | top: "layer_256_1_conv1" 1574 | param { 1575 | lr_mult: 0.0 1576 | } 1577 | param { 1578 | lr_mult: 0.0 1579 | } 1580 | param { 1581 | lr_mult: 0.0 1582 | } 1583 | } 1584 | layer { 1585 | name: "layer_256_1_scale2" 1586 | type: "Scale" 1587 | bottom: "layer_256_1_conv1" 1588 | top: "layer_256_1_conv1" 1589 | param { 1590 | lr_mult: 1.0 1591 | decay_mult: 1.0 1592 | } 1593 | param { 1594 | lr_mult: 2.0 1595 | decay_mult: 1.0 1596 | } 1597 | scale_param { 1598 | bias_term: true 1599 | } 1600 | } 1601 | layer { 1602 | name: "layer_256_1_relu2" 1603 | type: "ReLU" 1604 | bottom: "layer_256_1_conv1" 1605 | top: "layer_256_1_conv1" 1606 | } 1607 | layer { 1608 | name: "layer_256_1_conv2" 1609 | type: "Convolution" 1610 | bottom: "layer_256_1_conv1" 1611 | top: "layer_256_1_conv2" 1612 | param { 1613 | lr_mult: 1.0 1614 | decay_mult: 1.0 1615 | } 1616 | convolution_param { 1617 | num_output: 256 1618 | bias_term: false 1619 | pad: 1 1620 | kernel_size: 3 1621 | stride: 2 1622 | weight_filler { 1623 | type: "msra" 1624 | } 1625 | bias_filler { 1626 | type: "constant" 1627 | value: 0.0 1628 | } 1629 | } 1630 | } 1631 | layer { 1632 | name: "layer_256_1_bn3" 1633 | type: "BatchNorm" 1634 | bottom: "layer_256_1_conv2" 1635 | top: "layer_256_1_conv2" 1636 | param { 1637 | lr_mult: 0.0 1638 | } 1639 | param { 1640 | lr_mult: 0.0 1641 | } 1642 | param { 1643 | lr_mult: 0.0 1644 | } 1645 | } 1646 | layer { 1647 | name: "layer_256_1_scale3" 1648 | type: "Scale" 1649 | bottom: "layer_256_1_conv2" 1650 | top: "layer_256_1_conv2" 1651 | param { 1652 | lr_mult: 1.0 1653 | decay_mult: 1.0 1654 | } 1655 | param { 1656 | lr_mult: 2.0 1657 | decay_mult: 1.0 1658 | } 1659 | scale_param { 1660 | bias_term: true 1661 | } 1662 | } 1663 | layer { 1664 | name: "layer_256_1_relu3" 1665 | type: "ReLU" 1666 | bottom: "layer_256_1_conv2" 1667 | top: "layer_256_1_conv2" 1668 | } 1669 | layer { 1670 | name: "layer_256_1_conv3" 1671 | type: "Convolution" 1672 | bottom: "layer_256_1_conv2" 1673 | top: "layer_256_1_conv3" 1674 | param { 1675 | lr_mult: 1.0 1676 | decay_mult: 1.0 1677 | } 1678 | convolution_param { 1679 | num_output: 1024 1680 | bias_term: false 1681 | pad: 0 1682 | kernel_size: 1 1683 | stride: 1 1684 | weight_filler { 1685 | type: "msra" 1686 | } 1687 | bias_filler { 1688 | type: "constant" 1689 | value: 0.0 1690 | } 1691 | } 1692 | } 1693 | layer { 1694 | name: "layer_256_1_conv_expand" 1695 | type: "Convolution" 1696 | bottom: "layer_256_1_bn1" 1697 | top: "layer_256_1_conv_expand" 1698 | param { 1699 | lr_mult: 1.0 1700 | decay_mult: 1.0 1701 | } 1702 | convolution_param { 1703 | num_output: 1024 1704 | bias_term: false 1705 | pad: 0 1706 | kernel_size: 1 1707 | stride: 2 1708 | weight_filler { 1709 | type: "msra" 1710 | } 1711 | bias_filler { 1712 | type: "constant" 1713 | value: 0.0 1714 | } 1715 | } 1716 | } 1717 | layer { 1718 | name: "layer_256_1_sum" 1719 | type: "Eltwise" 1720 | bottom: "layer_256_1_conv3" 1721 | bottom: "layer_256_1_conv_expand" 1722 | top: "layer_256_1_sum" 1723 | } 1724 | layer { 1725 | name: "layer_256_2_bn1" 1726 | type: "BatchNorm" 1727 | bottom: "layer_256_1_sum" 1728 | top: "layer_256_2_bn1" 1729 | param { 1730 | lr_mult: 0.0 1731 | } 1732 | param { 1733 | lr_mult: 0.0 1734 | } 1735 | param { 1736 | lr_mult: 0.0 1737 | } 1738 | } 1739 | layer { 1740 | name: "layer_256_2_scale1" 1741 | type: "Scale" 1742 | bottom: "layer_256_2_bn1" 1743 | top: "layer_256_2_bn1" 1744 | param { 1745 | lr_mult: 1.0 1746 | decay_mult: 1.0 1747 | } 1748 | param { 1749 | lr_mult: 2.0 1750 | decay_mult: 1.0 1751 | } 1752 | scale_param { 1753 | bias_term: true 1754 | } 1755 | } 1756 | layer { 1757 | name: "layer_256_2_relu1" 1758 | type: "ReLU" 1759 | bottom: "layer_256_2_bn1" 1760 | top: "layer_256_2_bn1" 1761 | } 1762 | layer { 1763 | name: "layer_256_2_conv1" 1764 | type: "Convolution" 1765 | bottom: "layer_256_2_bn1" 1766 | top: "layer_256_2_conv1" 1767 | param { 1768 | lr_mult: 1.0 1769 | decay_mult: 1.0 1770 | } 1771 | convolution_param { 1772 | num_output: 256 1773 | bias_term: false 1774 | pad: 0 1775 | kernel_size: 1 1776 | stride: 1 1777 | weight_filler { 1778 | type: "msra" 1779 | } 1780 | bias_filler { 1781 | type: "constant" 1782 | value: 0.0 1783 | } 1784 | } 1785 | } 1786 | layer { 1787 | name: "layer_256_2_bn2" 1788 | type: "BatchNorm" 1789 | bottom: "layer_256_2_conv1" 1790 | top: "layer_256_2_conv1" 1791 | param { 1792 | lr_mult: 0.0 1793 | } 1794 | param { 1795 | lr_mult: 0.0 1796 | } 1797 | param { 1798 | lr_mult: 0.0 1799 | } 1800 | } 1801 | layer { 1802 | name: "layer_256_2_scale2" 1803 | type: "Scale" 1804 | bottom: "layer_256_2_conv1" 1805 | top: "layer_256_2_conv1" 1806 | param { 1807 | lr_mult: 1.0 1808 | decay_mult: 1.0 1809 | } 1810 | param { 1811 | lr_mult: 2.0 1812 | decay_mult: 1.0 1813 | } 1814 | scale_param { 1815 | bias_term: true 1816 | } 1817 | } 1818 | layer { 1819 | name: "layer_256_2_relu2" 1820 | type: "ReLU" 1821 | bottom: "layer_256_2_conv1" 1822 | top: "layer_256_2_conv1" 1823 | } 1824 | layer { 1825 | name: "layer_256_2_conv2" 1826 | type: "Convolution" 1827 | bottom: "layer_256_2_conv1" 1828 | top: "layer_256_2_conv2" 1829 | param { 1830 | lr_mult: 1.0 1831 | decay_mult: 1.0 1832 | } 1833 | convolution_param { 1834 | num_output: 256 1835 | bias_term: false 1836 | pad: 1 1837 | kernel_size: 3 1838 | stride: 1 1839 | weight_filler { 1840 | type: "msra" 1841 | } 1842 | bias_filler { 1843 | type: "constant" 1844 | value: 0.0 1845 | } 1846 | } 1847 | } 1848 | layer { 1849 | name: "layer_256_2_bn3" 1850 | type: "BatchNorm" 1851 | bottom: "layer_256_2_conv2" 1852 | top: "layer_256_2_conv2" 1853 | param { 1854 | lr_mult: 0.0 1855 | } 1856 | param { 1857 | lr_mult: 0.0 1858 | } 1859 | param { 1860 | lr_mult: 0.0 1861 | } 1862 | } 1863 | layer { 1864 | name: "layer_256_2_scale3" 1865 | type: "Scale" 1866 | bottom: "layer_256_2_conv2" 1867 | top: "layer_256_2_conv2" 1868 | param { 1869 | lr_mult: 1.0 1870 | decay_mult: 1.0 1871 | } 1872 | param { 1873 | lr_mult: 2.0 1874 | decay_mult: 1.0 1875 | } 1876 | scale_param { 1877 | bias_term: true 1878 | } 1879 | } 1880 | layer { 1881 | name: "layer_256_2_relu3" 1882 | type: "ReLU" 1883 | bottom: "layer_256_2_conv2" 1884 | top: "layer_256_2_conv2" 1885 | } 1886 | layer { 1887 | name: "layer_256_2_conv3" 1888 | type: "Convolution" 1889 | bottom: "layer_256_2_conv2" 1890 | top: "layer_256_2_conv3" 1891 | param { 1892 | lr_mult: 1.0 1893 | decay_mult: 1.0 1894 | } 1895 | convolution_param { 1896 | num_output: 1024 1897 | bias_term: false 1898 | pad: 0 1899 | kernel_size: 1 1900 | stride: 1 1901 | weight_filler { 1902 | type: "msra" 1903 | } 1904 | bias_filler { 1905 | type: "constant" 1906 | value: 0.0 1907 | } 1908 | } 1909 | } 1910 | layer { 1911 | name: "layer_256_2_sum" 1912 | type: "Eltwise" 1913 | bottom: "layer_256_2_conv3" 1914 | bottom: "layer_256_1_sum" 1915 | top: "layer_256_2_sum" 1916 | } 1917 | layer { 1918 | name: "layer_256_3_bn1" 1919 | type: "BatchNorm" 1920 | bottom: "layer_256_2_sum" 1921 | top: "layer_256_3_bn1" 1922 | param { 1923 | lr_mult: 0.0 1924 | } 1925 | param { 1926 | lr_mult: 0.0 1927 | } 1928 | param { 1929 | lr_mult: 0.0 1930 | } 1931 | } 1932 | layer { 1933 | name: "layer_256_3_scale1" 1934 | type: "Scale" 1935 | bottom: "layer_256_3_bn1" 1936 | top: "layer_256_3_bn1" 1937 | param { 1938 | lr_mult: 1.0 1939 | decay_mult: 1.0 1940 | } 1941 | param { 1942 | lr_mult: 2.0 1943 | decay_mult: 1.0 1944 | } 1945 | scale_param { 1946 | bias_term: true 1947 | } 1948 | } 1949 | layer { 1950 | name: "layer_256_3_relu1" 1951 | type: "ReLU" 1952 | bottom: "layer_256_3_bn1" 1953 | top: "layer_256_3_bn1" 1954 | } 1955 | layer { 1956 | name: "layer_256_3_conv1" 1957 | type: "Convolution" 1958 | bottom: "layer_256_3_bn1" 1959 | top: "layer_256_3_conv1" 1960 | param { 1961 | lr_mult: 1.0 1962 | decay_mult: 1.0 1963 | } 1964 | convolution_param { 1965 | num_output: 256 1966 | bias_term: false 1967 | pad: 0 1968 | kernel_size: 1 1969 | stride: 1 1970 | weight_filler { 1971 | type: "msra" 1972 | } 1973 | bias_filler { 1974 | type: "constant" 1975 | value: 0.0 1976 | } 1977 | } 1978 | } 1979 | layer { 1980 | name: "layer_256_3_bn2" 1981 | type: "BatchNorm" 1982 | bottom: "layer_256_3_conv1" 1983 | top: "layer_256_3_conv1" 1984 | param { 1985 | lr_mult: 0.0 1986 | } 1987 | param { 1988 | lr_mult: 0.0 1989 | } 1990 | param { 1991 | lr_mult: 0.0 1992 | } 1993 | } 1994 | layer { 1995 | name: "layer_256_3_scale2" 1996 | type: "Scale" 1997 | bottom: "layer_256_3_conv1" 1998 | top: "layer_256_3_conv1" 1999 | param { 2000 | lr_mult: 1.0 2001 | decay_mult: 1.0 2002 | } 2003 | param { 2004 | lr_mult: 2.0 2005 | decay_mult: 1.0 2006 | } 2007 | scale_param { 2008 | bias_term: true 2009 | } 2010 | } 2011 | layer { 2012 | name: "layer_256_3_relu2" 2013 | type: "ReLU" 2014 | bottom: "layer_256_3_conv1" 2015 | top: "layer_256_3_conv1" 2016 | } 2017 | layer { 2018 | name: "layer_256_3_conv2" 2019 | type: "Convolution" 2020 | bottom: "layer_256_3_conv1" 2021 | top: "layer_256_3_conv2" 2022 | param { 2023 | lr_mult: 1.0 2024 | decay_mult: 1.0 2025 | } 2026 | convolution_param { 2027 | num_output: 256 2028 | bias_term: false 2029 | pad: 1 2030 | kernel_size: 3 2031 | stride: 1 2032 | weight_filler { 2033 | type: "msra" 2034 | } 2035 | bias_filler { 2036 | type: "constant" 2037 | value: 0.0 2038 | } 2039 | } 2040 | } 2041 | layer { 2042 | name: "layer_256_3_bn3" 2043 | type: "BatchNorm" 2044 | bottom: "layer_256_3_conv2" 2045 | top: "layer_256_3_conv2" 2046 | param { 2047 | lr_mult: 0.0 2048 | } 2049 | param { 2050 | lr_mult: 0.0 2051 | } 2052 | param { 2053 | lr_mult: 0.0 2054 | } 2055 | } 2056 | layer { 2057 | name: "layer_256_3_scale3" 2058 | type: "Scale" 2059 | bottom: "layer_256_3_conv2" 2060 | top: "layer_256_3_conv2" 2061 | param { 2062 | lr_mult: 1.0 2063 | decay_mult: 1.0 2064 | } 2065 | param { 2066 | lr_mult: 2.0 2067 | decay_mult: 1.0 2068 | } 2069 | scale_param { 2070 | bias_term: true 2071 | } 2072 | } 2073 | layer { 2074 | name: "layer_256_3_relu3" 2075 | type: "ReLU" 2076 | bottom: "layer_256_3_conv2" 2077 | top: "layer_256_3_conv2" 2078 | } 2079 | layer { 2080 | name: "layer_256_3_conv3" 2081 | type: "Convolution" 2082 | bottom: "layer_256_3_conv2" 2083 | top: "layer_256_3_conv3" 2084 | param { 2085 | lr_mult: 1.0 2086 | decay_mult: 1.0 2087 | } 2088 | convolution_param { 2089 | num_output: 1024 2090 | bias_term: false 2091 | pad: 0 2092 | kernel_size: 1 2093 | stride: 1 2094 | weight_filler { 2095 | type: "msra" 2096 | } 2097 | bias_filler { 2098 | type: "constant" 2099 | value: 0.0 2100 | } 2101 | } 2102 | } 2103 | layer { 2104 | name: "layer_256_3_sum" 2105 | type: "Eltwise" 2106 | bottom: "layer_256_3_conv3" 2107 | bottom: "layer_256_2_sum" 2108 | top: "layer_256_3_sum" 2109 | } 2110 | layer { 2111 | name: "layer_256_4_bn1" 2112 | type: "BatchNorm" 2113 | bottom: "layer_256_3_sum" 2114 | top: "layer_256_4_bn1" 2115 | param { 2116 | lr_mult: 0.0 2117 | } 2118 | param { 2119 | lr_mult: 0.0 2120 | } 2121 | param { 2122 | lr_mult: 0.0 2123 | } 2124 | } 2125 | layer { 2126 | name: "layer_256_4_scale1" 2127 | type: "Scale" 2128 | bottom: "layer_256_4_bn1" 2129 | top: "layer_256_4_bn1" 2130 | param { 2131 | lr_mult: 1.0 2132 | decay_mult: 1.0 2133 | } 2134 | param { 2135 | lr_mult: 2.0 2136 | decay_mult: 1.0 2137 | } 2138 | scale_param { 2139 | bias_term: true 2140 | } 2141 | } 2142 | layer { 2143 | name: "layer_256_4_relu1" 2144 | type: "ReLU" 2145 | bottom: "layer_256_4_bn1" 2146 | top: "layer_256_4_bn1" 2147 | } 2148 | layer { 2149 | name: "layer_256_4_conv1" 2150 | type: "Convolution" 2151 | bottom: "layer_256_4_bn1" 2152 | top: "layer_256_4_conv1" 2153 | param { 2154 | lr_mult: 1.0 2155 | decay_mult: 1.0 2156 | } 2157 | convolution_param { 2158 | num_output: 256 2159 | bias_term: false 2160 | pad: 0 2161 | kernel_size: 1 2162 | stride: 1 2163 | weight_filler { 2164 | type: "msra" 2165 | } 2166 | bias_filler { 2167 | type: "constant" 2168 | value: 0.0 2169 | } 2170 | } 2171 | } 2172 | layer { 2173 | name: "layer_256_4_bn2" 2174 | type: "BatchNorm" 2175 | bottom: "layer_256_4_conv1" 2176 | top: "layer_256_4_conv1" 2177 | param { 2178 | lr_mult: 0.0 2179 | } 2180 | param { 2181 | lr_mult: 0.0 2182 | } 2183 | param { 2184 | lr_mult: 0.0 2185 | } 2186 | } 2187 | layer { 2188 | name: "layer_256_4_scale2" 2189 | type: "Scale" 2190 | bottom: "layer_256_4_conv1" 2191 | top: "layer_256_4_conv1" 2192 | param { 2193 | lr_mult: 1.0 2194 | decay_mult: 1.0 2195 | } 2196 | param { 2197 | lr_mult: 2.0 2198 | decay_mult: 1.0 2199 | } 2200 | scale_param { 2201 | bias_term: true 2202 | } 2203 | } 2204 | layer { 2205 | name: "layer_256_4_relu2" 2206 | type: "ReLU" 2207 | bottom: "layer_256_4_conv1" 2208 | top: "layer_256_4_conv1" 2209 | } 2210 | layer { 2211 | name: "layer_256_4_conv2" 2212 | type: "Convolution" 2213 | bottom: "layer_256_4_conv1" 2214 | top: "layer_256_4_conv2" 2215 | param { 2216 | lr_mult: 1.0 2217 | decay_mult: 1.0 2218 | } 2219 | convolution_param { 2220 | num_output: 256 2221 | bias_term: false 2222 | pad: 1 2223 | kernel_size: 3 2224 | stride: 1 2225 | weight_filler { 2226 | type: "msra" 2227 | } 2228 | bias_filler { 2229 | type: "constant" 2230 | value: 0.0 2231 | } 2232 | } 2233 | } 2234 | layer { 2235 | name: "layer_256_4_bn3" 2236 | type: "BatchNorm" 2237 | bottom: "layer_256_4_conv2" 2238 | top: "layer_256_4_conv2" 2239 | param { 2240 | lr_mult: 0.0 2241 | } 2242 | param { 2243 | lr_mult: 0.0 2244 | } 2245 | param { 2246 | lr_mult: 0.0 2247 | } 2248 | } 2249 | layer { 2250 | name: "layer_256_4_scale3" 2251 | type: "Scale" 2252 | bottom: "layer_256_4_conv2" 2253 | top: "layer_256_4_conv2" 2254 | param { 2255 | lr_mult: 1.0 2256 | decay_mult: 1.0 2257 | } 2258 | param { 2259 | lr_mult: 2.0 2260 | decay_mult: 1.0 2261 | } 2262 | scale_param { 2263 | bias_term: true 2264 | } 2265 | } 2266 | layer { 2267 | name: "layer_256_4_relu3" 2268 | type: "ReLU" 2269 | bottom: "layer_256_4_conv2" 2270 | top: "layer_256_4_conv2" 2271 | } 2272 | layer { 2273 | name: "layer_256_4_conv3" 2274 | type: "Convolution" 2275 | bottom: "layer_256_4_conv2" 2276 | top: "layer_256_4_conv3" 2277 | param { 2278 | lr_mult: 1.0 2279 | decay_mult: 1.0 2280 | } 2281 | convolution_param { 2282 | num_output: 1024 2283 | bias_term: false 2284 | pad: 0 2285 | kernel_size: 1 2286 | stride: 1 2287 | weight_filler { 2288 | type: "msra" 2289 | } 2290 | bias_filler { 2291 | type: "constant" 2292 | value: 0.0 2293 | } 2294 | } 2295 | } 2296 | layer { 2297 | name: "layer_256_4_sum" 2298 | type: "Eltwise" 2299 | bottom: "layer_256_4_conv3" 2300 | bottom: "layer_256_3_sum" 2301 | top: "layer_256_4_sum" 2302 | } 2303 | layer { 2304 | name: "layer_256_5_bn1" 2305 | type: "BatchNorm" 2306 | bottom: "layer_256_4_sum" 2307 | top: "layer_256_5_bn1" 2308 | param { 2309 | lr_mult: 0.0 2310 | } 2311 | param { 2312 | lr_mult: 0.0 2313 | } 2314 | param { 2315 | lr_mult: 0.0 2316 | } 2317 | } 2318 | layer { 2319 | name: "layer_256_5_scale1" 2320 | type: "Scale" 2321 | bottom: "layer_256_5_bn1" 2322 | top: "layer_256_5_bn1" 2323 | param { 2324 | lr_mult: 1.0 2325 | decay_mult: 1.0 2326 | } 2327 | param { 2328 | lr_mult: 2.0 2329 | decay_mult: 1.0 2330 | } 2331 | scale_param { 2332 | bias_term: true 2333 | } 2334 | } 2335 | layer { 2336 | name: "layer_256_5_relu1" 2337 | type: "ReLU" 2338 | bottom: "layer_256_5_bn1" 2339 | top: "layer_256_5_bn1" 2340 | } 2341 | layer { 2342 | name: "layer_256_5_conv1" 2343 | type: "Convolution" 2344 | bottom: "layer_256_5_bn1" 2345 | top: "layer_256_5_conv1" 2346 | param { 2347 | lr_mult: 1.0 2348 | decay_mult: 1.0 2349 | } 2350 | convolution_param { 2351 | num_output: 256 2352 | bias_term: false 2353 | pad: 0 2354 | kernel_size: 1 2355 | stride: 1 2356 | weight_filler { 2357 | type: "msra" 2358 | } 2359 | bias_filler { 2360 | type: "constant" 2361 | value: 0.0 2362 | } 2363 | } 2364 | } 2365 | layer { 2366 | name: "layer_256_5_bn2" 2367 | type: "BatchNorm" 2368 | bottom: "layer_256_5_conv1" 2369 | top: "layer_256_5_conv1" 2370 | param { 2371 | lr_mult: 0.0 2372 | } 2373 | param { 2374 | lr_mult: 0.0 2375 | } 2376 | param { 2377 | lr_mult: 0.0 2378 | } 2379 | } 2380 | layer { 2381 | name: "layer_256_5_scale2" 2382 | type: "Scale" 2383 | bottom: "layer_256_5_conv1" 2384 | top: "layer_256_5_conv1" 2385 | param { 2386 | lr_mult: 1.0 2387 | decay_mult: 1.0 2388 | } 2389 | param { 2390 | lr_mult: 2.0 2391 | decay_mult: 1.0 2392 | } 2393 | scale_param { 2394 | bias_term: true 2395 | } 2396 | } 2397 | layer { 2398 | name: "layer_256_5_relu2" 2399 | type: "ReLU" 2400 | bottom: "layer_256_5_conv1" 2401 | top: "layer_256_5_conv1" 2402 | } 2403 | layer { 2404 | name: "layer_256_5_conv2" 2405 | type: "Convolution" 2406 | bottom: "layer_256_5_conv1" 2407 | top: "layer_256_5_conv2" 2408 | param { 2409 | lr_mult: 1.0 2410 | decay_mult: 1.0 2411 | } 2412 | convolution_param { 2413 | num_output: 256 2414 | bias_term: false 2415 | pad: 1 2416 | kernel_size: 3 2417 | stride: 1 2418 | weight_filler { 2419 | type: "msra" 2420 | } 2421 | bias_filler { 2422 | type: "constant" 2423 | value: 0.0 2424 | } 2425 | } 2426 | } 2427 | layer { 2428 | name: "layer_256_5_bn3" 2429 | type: "BatchNorm" 2430 | bottom: "layer_256_5_conv2" 2431 | top: "layer_256_5_conv2" 2432 | param { 2433 | lr_mult: 0.0 2434 | } 2435 | param { 2436 | lr_mult: 0.0 2437 | } 2438 | param { 2439 | lr_mult: 0.0 2440 | } 2441 | } 2442 | layer { 2443 | name: "layer_256_5_scale3" 2444 | type: "Scale" 2445 | bottom: "layer_256_5_conv2" 2446 | top: "layer_256_5_conv2" 2447 | param { 2448 | lr_mult: 1.0 2449 | decay_mult: 1.0 2450 | } 2451 | param { 2452 | lr_mult: 2.0 2453 | decay_mult: 1.0 2454 | } 2455 | scale_param { 2456 | bias_term: true 2457 | } 2458 | } 2459 | layer { 2460 | name: "layer_256_5_relu3" 2461 | type: "ReLU" 2462 | bottom: "layer_256_5_conv2" 2463 | top: "layer_256_5_conv2" 2464 | } 2465 | layer { 2466 | name: "layer_256_5_conv3" 2467 | type: "Convolution" 2468 | bottom: "layer_256_5_conv2" 2469 | top: "layer_256_5_conv3" 2470 | param { 2471 | lr_mult: 1.0 2472 | decay_mult: 1.0 2473 | } 2474 | convolution_param { 2475 | num_output: 1024 2476 | bias_term: false 2477 | pad: 0 2478 | kernel_size: 1 2479 | stride: 1 2480 | weight_filler { 2481 | type: "msra" 2482 | } 2483 | bias_filler { 2484 | type: "constant" 2485 | value: 0.0 2486 | } 2487 | } 2488 | } 2489 | layer { 2490 | name: "layer_256_5_sum" 2491 | type: "Eltwise" 2492 | bottom: "layer_256_5_conv3" 2493 | bottom: "layer_256_4_sum" 2494 | top: "layer_256_5_sum" 2495 | } 2496 | layer { 2497 | name: "layer_256_6_bn1" 2498 | type: "BatchNorm" 2499 | bottom: "layer_256_5_sum" 2500 | top: "layer_256_6_bn1" 2501 | param { 2502 | lr_mult: 0.0 2503 | } 2504 | param { 2505 | lr_mult: 0.0 2506 | } 2507 | param { 2508 | lr_mult: 0.0 2509 | } 2510 | } 2511 | layer { 2512 | name: "layer_256_6_scale1" 2513 | type: "Scale" 2514 | bottom: "layer_256_6_bn1" 2515 | top: "layer_256_6_bn1" 2516 | param { 2517 | lr_mult: 1.0 2518 | decay_mult: 1.0 2519 | } 2520 | param { 2521 | lr_mult: 2.0 2522 | decay_mult: 1.0 2523 | } 2524 | scale_param { 2525 | bias_term: true 2526 | } 2527 | } 2528 | layer { 2529 | name: "layer_256_6_relu1" 2530 | type: "ReLU" 2531 | bottom: "layer_256_6_bn1" 2532 | top: "layer_256_6_bn1" 2533 | } 2534 | layer { 2535 | name: "layer_256_6_conv1" 2536 | type: "Convolution" 2537 | bottom: "layer_256_6_bn1" 2538 | top: "layer_256_6_conv1" 2539 | param { 2540 | lr_mult: 1.0 2541 | decay_mult: 1.0 2542 | } 2543 | convolution_param { 2544 | num_output: 256 2545 | bias_term: false 2546 | pad: 0 2547 | kernel_size: 1 2548 | stride: 1 2549 | weight_filler { 2550 | type: "msra" 2551 | } 2552 | bias_filler { 2553 | type: "constant" 2554 | value: 0.0 2555 | } 2556 | } 2557 | } 2558 | layer { 2559 | name: "layer_256_6_bn2" 2560 | type: "BatchNorm" 2561 | bottom: "layer_256_6_conv1" 2562 | top: "layer_256_6_conv1" 2563 | param { 2564 | lr_mult: 0.0 2565 | } 2566 | param { 2567 | lr_mult: 0.0 2568 | } 2569 | param { 2570 | lr_mult: 0.0 2571 | } 2572 | } 2573 | layer { 2574 | name: "layer_256_6_scale2" 2575 | type: "Scale" 2576 | bottom: "layer_256_6_conv1" 2577 | top: "layer_256_6_conv1" 2578 | param { 2579 | lr_mult: 1.0 2580 | decay_mult: 1.0 2581 | } 2582 | param { 2583 | lr_mult: 2.0 2584 | decay_mult: 1.0 2585 | } 2586 | scale_param { 2587 | bias_term: true 2588 | } 2589 | } 2590 | layer { 2591 | name: "layer_256_6_relu2" 2592 | type: "ReLU" 2593 | bottom: "layer_256_6_conv1" 2594 | top: "layer_256_6_conv1" 2595 | } 2596 | layer { 2597 | name: "layer_256_6_conv2" 2598 | type: "Convolution" 2599 | bottom: "layer_256_6_conv1" 2600 | top: "layer_256_6_conv2" 2601 | param { 2602 | lr_mult: 1.0 2603 | decay_mult: 1.0 2604 | } 2605 | convolution_param { 2606 | num_output: 256 2607 | bias_term: false 2608 | pad: 1 2609 | kernel_size: 3 2610 | stride: 1 2611 | weight_filler { 2612 | type: "msra" 2613 | } 2614 | bias_filler { 2615 | type: "constant" 2616 | value: 0.0 2617 | } 2618 | } 2619 | } 2620 | layer { 2621 | name: "layer_256_6_bn3" 2622 | type: "BatchNorm" 2623 | bottom: "layer_256_6_conv2" 2624 | top: "layer_256_6_conv2" 2625 | param { 2626 | lr_mult: 0.0 2627 | } 2628 | param { 2629 | lr_mult: 0.0 2630 | } 2631 | param { 2632 | lr_mult: 0.0 2633 | } 2634 | } 2635 | layer { 2636 | name: "layer_256_6_scale3" 2637 | type: "Scale" 2638 | bottom: "layer_256_6_conv2" 2639 | top: "layer_256_6_conv2" 2640 | param { 2641 | lr_mult: 1.0 2642 | decay_mult: 1.0 2643 | } 2644 | param { 2645 | lr_mult: 2.0 2646 | decay_mult: 1.0 2647 | } 2648 | scale_param { 2649 | bias_term: true 2650 | } 2651 | } 2652 | layer { 2653 | name: "layer_256_6_relu3" 2654 | type: "ReLU" 2655 | bottom: "layer_256_6_conv2" 2656 | top: "layer_256_6_conv2" 2657 | } 2658 | layer { 2659 | name: "layer_256_6_conv3" 2660 | type: "Convolution" 2661 | bottom: "layer_256_6_conv2" 2662 | top: "layer_256_6_conv3" 2663 | param { 2664 | lr_mult: 1.0 2665 | decay_mult: 1.0 2666 | } 2667 | convolution_param { 2668 | num_output: 1024 2669 | bias_term: false 2670 | pad: 0 2671 | kernel_size: 1 2672 | stride: 1 2673 | weight_filler { 2674 | type: "msra" 2675 | } 2676 | bias_filler { 2677 | type: "constant" 2678 | value: 0.0 2679 | } 2680 | } 2681 | } 2682 | layer { 2683 | name: "layer_256_6_sum" 2684 | type: "Eltwise" 2685 | bottom: "layer_256_6_conv3" 2686 | bottom: "layer_256_5_sum" 2687 | top: "layer_256_6_sum" 2688 | } 2689 | layer { 2690 | name: "layer_512_1_bn1" 2691 | type: "BatchNorm" 2692 | bottom: "layer_256_6_sum" 2693 | top: "layer_512_1_bn1" 2694 | param { 2695 | lr_mult: 0.0 2696 | } 2697 | param { 2698 | lr_mult: 0.0 2699 | } 2700 | param { 2701 | lr_mult: 0.0 2702 | } 2703 | } 2704 | layer { 2705 | name: "layer_512_1_scale1" 2706 | type: "Scale" 2707 | bottom: "layer_512_1_bn1" 2708 | top: "layer_512_1_bn1" 2709 | param { 2710 | lr_mult: 1.0 2711 | decay_mult: 1.0 2712 | } 2713 | param { 2714 | lr_mult: 2.0 2715 | decay_mult: 1.0 2716 | } 2717 | scale_param { 2718 | bias_term: true 2719 | } 2720 | } 2721 | layer { 2722 | name: "layer_512_1_relu1" 2723 | type: "ReLU" 2724 | bottom: "layer_512_1_bn1" 2725 | top: "layer_512_1_bn1" 2726 | } 2727 | layer { 2728 | name: "layer_512_1_conv1" 2729 | type: "Convolution" 2730 | bottom: "layer_512_1_bn1" 2731 | top: "layer_512_1_conv1" 2732 | param { 2733 | lr_mult: 1.0 2734 | decay_mult: 1.0 2735 | } 2736 | convolution_param { 2737 | num_output: 512 2738 | bias_term: false 2739 | pad: 0 2740 | kernel_size: 1 2741 | stride: 1 2742 | weight_filler { 2743 | type: "msra" 2744 | } 2745 | bias_filler { 2746 | type: "constant" 2747 | value: 0.0 2748 | } 2749 | } 2750 | } 2751 | layer { 2752 | name: "layer_512_1_bn2" 2753 | type: "BatchNorm" 2754 | bottom: "layer_512_1_conv1" 2755 | top: "layer_512_1_conv1" 2756 | param { 2757 | lr_mult: 0.0 2758 | } 2759 | param { 2760 | lr_mult: 0.0 2761 | } 2762 | param { 2763 | lr_mult: 0.0 2764 | } 2765 | } 2766 | layer { 2767 | name: "layer_512_1_scale2" 2768 | type: "Scale" 2769 | bottom: "layer_512_1_conv1" 2770 | top: "layer_512_1_conv1" 2771 | param { 2772 | lr_mult: 1.0 2773 | decay_mult: 1.0 2774 | } 2775 | param { 2776 | lr_mult: 2.0 2777 | decay_mult: 1.0 2778 | } 2779 | scale_param { 2780 | bias_term: true 2781 | } 2782 | } 2783 | layer { 2784 | name: "layer_512_1_relu2" 2785 | type: "ReLU" 2786 | bottom: "layer_512_1_conv1" 2787 | top: "layer_512_1_conv1" 2788 | } 2789 | layer { 2790 | name: "layer_512_1_conv2" 2791 | type: "Convolution" 2792 | bottom: "layer_512_1_conv1" 2793 | top: "layer_512_1_conv2" 2794 | param { 2795 | lr_mult: 1.0 2796 | decay_mult: 1.0 2797 | } 2798 | convolution_param { 2799 | num_output: 512 2800 | bias_term: false 2801 | pad: 1 2802 | kernel_size: 3 2803 | stride: 2 2804 | weight_filler { 2805 | type: "msra" 2806 | } 2807 | bias_filler { 2808 | type: "constant" 2809 | value: 0.0 2810 | } 2811 | } 2812 | } 2813 | layer { 2814 | name: "layer_512_1_bn3" 2815 | type: "BatchNorm" 2816 | bottom: "layer_512_1_conv2" 2817 | top: "layer_512_1_conv2" 2818 | param { 2819 | lr_mult: 0.0 2820 | } 2821 | param { 2822 | lr_mult: 0.0 2823 | } 2824 | param { 2825 | lr_mult: 0.0 2826 | } 2827 | } 2828 | layer { 2829 | name: "layer_512_1_scale3" 2830 | type: "Scale" 2831 | bottom: "layer_512_1_conv2" 2832 | top: "layer_512_1_conv2" 2833 | param { 2834 | lr_mult: 1.0 2835 | decay_mult: 1.0 2836 | } 2837 | param { 2838 | lr_mult: 2.0 2839 | decay_mult: 1.0 2840 | } 2841 | scale_param { 2842 | bias_term: true 2843 | } 2844 | } 2845 | layer { 2846 | name: "layer_512_1_relu3" 2847 | type: "ReLU" 2848 | bottom: "layer_512_1_conv2" 2849 | top: "layer_512_1_conv2" 2850 | } 2851 | layer { 2852 | name: "layer_512_1_conv3" 2853 | type: "Convolution" 2854 | bottom: "layer_512_1_conv2" 2855 | top: "layer_512_1_conv3" 2856 | param { 2857 | lr_mult: 1.0 2858 | decay_mult: 1.0 2859 | } 2860 | convolution_param { 2861 | num_output: 2048 2862 | bias_term: false 2863 | pad: 0 2864 | kernel_size: 1 2865 | stride: 1 2866 | weight_filler { 2867 | type: "msra" 2868 | } 2869 | bias_filler { 2870 | type: "constant" 2871 | value: 0.0 2872 | } 2873 | } 2874 | } 2875 | layer { 2876 | name: "layer_512_1_conv_expand" 2877 | type: "Convolution" 2878 | bottom: "layer_512_1_bn1" 2879 | top: "layer_512_1_conv_expand" 2880 | param { 2881 | lr_mult: 1.0 2882 | decay_mult: 1.0 2883 | } 2884 | convolution_param { 2885 | num_output: 2048 2886 | bias_term: false 2887 | pad: 0 2888 | kernel_size: 1 2889 | stride: 2 2890 | weight_filler { 2891 | type: "msra" 2892 | } 2893 | bias_filler { 2894 | type: "constant" 2895 | value: 0.0 2896 | } 2897 | } 2898 | } 2899 | layer { 2900 | name: "layer_512_1_sum" 2901 | type: "Eltwise" 2902 | bottom: "layer_512_1_conv3" 2903 | bottom: "layer_512_1_conv_expand" 2904 | top: "layer_512_1_sum" 2905 | } 2906 | layer { 2907 | name: "layer_512_2_bn1" 2908 | type: "BatchNorm" 2909 | bottom: "layer_512_1_sum" 2910 | top: "layer_512_2_bn1" 2911 | param { 2912 | lr_mult: 0.0 2913 | } 2914 | param { 2915 | lr_mult: 0.0 2916 | } 2917 | param { 2918 | lr_mult: 0.0 2919 | } 2920 | } 2921 | layer { 2922 | name: "layer_512_2_scale1" 2923 | type: "Scale" 2924 | bottom: "layer_512_2_bn1" 2925 | top: "layer_512_2_bn1" 2926 | param { 2927 | lr_mult: 1.0 2928 | decay_mult: 1.0 2929 | } 2930 | param { 2931 | lr_mult: 2.0 2932 | decay_mult: 1.0 2933 | } 2934 | scale_param { 2935 | bias_term: true 2936 | } 2937 | } 2938 | layer { 2939 | name: "layer_512_2_relu1" 2940 | type: "ReLU" 2941 | bottom: "layer_512_2_bn1" 2942 | top: "layer_512_2_bn1" 2943 | } 2944 | layer { 2945 | name: "layer_512_2_conv1" 2946 | type: "Convolution" 2947 | bottom: "layer_512_2_bn1" 2948 | top: "layer_512_2_conv1" 2949 | param { 2950 | lr_mult: 1.0 2951 | decay_mult: 1.0 2952 | } 2953 | convolution_param { 2954 | num_output: 512 2955 | bias_term: false 2956 | pad: 0 2957 | kernel_size: 1 2958 | stride: 1 2959 | weight_filler { 2960 | type: "msra" 2961 | } 2962 | bias_filler { 2963 | type: "constant" 2964 | value: 0.0 2965 | } 2966 | } 2967 | } 2968 | layer { 2969 | name: "layer_512_2_bn2" 2970 | type: "BatchNorm" 2971 | bottom: "layer_512_2_conv1" 2972 | top: "layer_512_2_conv1" 2973 | param { 2974 | lr_mult: 0.0 2975 | } 2976 | param { 2977 | lr_mult: 0.0 2978 | } 2979 | param { 2980 | lr_mult: 0.0 2981 | } 2982 | } 2983 | layer { 2984 | name: "layer_512_2_scale2" 2985 | type: "Scale" 2986 | bottom: "layer_512_2_conv1" 2987 | top: "layer_512_2_conv1" 2988 | param { 2989 | lr_mult: 1.0 2990 | decay_mult: 1.0 2991 | } 2992 | param { 2993 | lr_mult: 2.0 2994 | decay_mult: 1.0 2995 | } 2996 | scale_param { 2997 | bias_term: true 2998 | } 2999 | } 3000 | layer { 3001 | name: "layer_512_2_relu2" 3002 | type: "ReLU" 3003 | bottom: "layer_512_2_conv1" 3004 | top: "layer_512_2_conv1" 3005 | } 3006 | layer { 3007 | name: "layer_512_2_conv2" 3008 | type: "Convolution" 3009 | bottom: "layer_512_2_conv1" 3010 | top: "layer_512_2_conv2" 3011 | param { 3012 | lr_mult: 1.0 3013 | decay_mult: 1.0 3014 | } 3015 | convolution_param { 3016 | num_output: 512 3017 | bias_term: false 3018 | pad: 1 3019 | kernel_size: 3 3020 | stride: 1 3021 | weight_filler { 3022 | type: "msra" 3023 | } 3024 | bias_filler { 3025 | type: "constant" 3026 | value: 0.0 3027 | } 3028 | } 3029 | } 3030 | layer { 3031 | name: "layer_512_2_bn3" 3032 | type: "BatchNorm" 3033 | bottom: "layer_512_2_conv2" 3034 | top: "layer_512_2_conv2" 3035 | param { 3036 | lr_mult: 0.0 3037 | } 3038 | param { 3039 | lr_mult: 0.0 3040 | } 3041 | param { 3042 | lr_mult: 0.0 3043 | } 3044 | } 3045 | layer { 3046 | name: "layer_512_2_scale3" 3047 | type: "Scale" 3048 | bottom: "layer_512_2_conv2" 3049 | top: "layer_512_2_conv2" 3050 | param { 3051 | lr_mult: 1.0 3052 | decay_mult: 1.0 3053 | } 3054 | param { 3055 | lr_mult: 2.0 3056 | decay_mult: 1.0 3057 | } 3058 | scale_param { 3059 | bias_term: true 3060 | } 3061 | } 3062 | layer { 3063 | name: "layer_512_2_relu3" 3064 | type: "ReLU" 3065 | bottom: "layer_512_2_conv2" 3066 | top: "layer_512_2_conv2" 3067 | } 3068 | layer { 3069 | name: "layer_512_2_conv3" 3070 | type: "Convolution" 3071 | bottom: "layer_512_2_conv2" 3072 | top: "layer_512_2_conv3" 3073 | param { 3074 | lr_mult: 1.0 3075 | decay_mult: 1.0 3076 | } 3077 | convolution_param { 3078 | num_output: 2048 3079 | bias_term: false 3080 | pad: 0 3081 | kernel_size: 1 3082 | stride: 1 3083 | weight_filler { 3084 | type: "msra" 3085 | } 3086 | bias_filler { 3087 | type: "constant" 3088 | value: 0.0 3089 | } 3090 | } 3091 | } 3092 | layer { 3093 | name: "layer_512_2_sum" 3094 | type: "Eltwise" 3095 | bottom: "layer_512_2_conv3" 3096 | bottom: "layer_512_1_sum" 3097 | top: "layer_512_2_sum" 3098 | } 3099 | layer { 3100 | name: "layer_512_3_bn1" 3101 | type: "BatchNorm" 3102 | bottom: "layer_512_2_sum" 3103 | top: "layer_512_3_bn1" 3104 | param { 3105 | lr_mult: 0.0 3106 | } 3107 | param { 3108 | lr_mult: 0.0 3109 | } 3110 | param { 3111 | lr_mult: 0.0 3112 | } 3113 | } 3114 | layer { 3115 | name: "layer_512_3_scale1" 3116 | type: "Scale" 3117 | bottom: "layer_512_3_bn1" 3118 | top: "layer_512_3_bn1" 3119 | param { 3120 | lr_mult: 1.0 3121 | decay_mult: 1.0 3122 | } 3123 | param { 3124 | lr_mult: 2.0 3125 | decay_mult: 1.0 3126 | } 3127 | scale_param { 3128 | bias_term: true 3129 | } 3130 | } 3131 | layer { 3132 | name: "layer_512_3_relu1" 3133 | type: "ReLU" 3134 | bottom: "layer_512_3_bn1" 3135 | top: "layer_512_3_bn1" 3136 | } 3137 | layer { 3138 | name: "layer_512_3_conv1" 3139 | type: "Convolution" 3140 | bottom: "layer_512_3_bn1" 3141 | top: "layer_512_3_conv1" 3142 | param { 3143 | lr_mult: 1.0 3144 | decay_mult: 1.0 3145 | } 3146 | convolution_param { 3147 | num_output: 512 3148 | bias_term: false 3149 | pad: 0 3150 | kernel_size: 1 3151 | stride: 1 3152 | weight_filler { 3153 | type: "msra" 3154 | } 3155 | bias_filler { 3156 | type: "constant" 3157 | value: 0.0 3158 | } 3159 | } 3160 | } 3161 | layer { 3162 | name: "layer_512_3_bn2" 3163 | type: "BatchNorm" 3164 | bottom: "layer_512_3_conv1" 3165 | top: "layer_512_3_conv1" 3166 | param { 3167 | lr_mult: 0.0 3168 | } 3169 | param { 3170 | lr_mult: 0.0 3171 | } 3172 | param { 3173 | lr_mult: 0.0 3174 | } 3175 | } 3176 | layer { 3177 | name: "layer_512_3_scale2" 3178 | type: "Scale" 3179 | bottom: "layer_512_3_conv1" 3180 | top: "layer_512_3_conv1" 3181 | param { 3182 | lr_mult: 1.0 3183 | decay_mult: 1.0 3184 | } 3185 | param { 3186 | lr_mult: 2.0 3187 | decay_mult: 1.0 3188 | } 3189 | scale_param { 3190 | bias_term: true 3191 | } 3192 | } 3193 | layer { 3194 | name: "layer_512_3_relu2" 3195 | type: "ReLU" 3196 | bottom: "layer_512_3_conv1" 3197 | top: "layer_512_3_conv1" 3198 | } 3199 | layer { 3200 | name: "layer_512_3_conv2" 3201 | type: "Convolution" 3202 | bottom: "layer_512_3_conv1" 3203 | top: "layer_512_3_conv2" 3204 | param { 3205 | lr_mult: 1.0 3206 | decay_mult: 1.0 3207 | } 3208 | convolution_param { 3209 | num_output: 512 3210 | bias_term: false 3211 | pad: 1 3212 | kernel_size: 3 3213 | stride: 1 3214 | weight_filler { 3215 | type: "msra" 3216 | } 3217 | bias_filler { 3218 | type: "constant" 3219 | value: 0.0 3220 | } 3221 | } 3222 | } 3223 | layer { 3224 | name: "layer_512_3_bn3" 3225 | type: "BatchNorm" 3226 | bottom: "layer_512_3_conv2" 3227 | top: "layer_512_3_conv2" 3228 | param { 3229 | lr_mult: 0.0 3230 | } 3231 | param { 3232 | lr_mult: 0.0 3233 | } 3234 | param { 3235 | lr_mult: 0.0 3236 | } 3237 | } 3238 | layer { 3239 | name: "layer_512_3_scale3" 3240 | type: "Scale" 3241 | bottom: "layer_512_3_conv2" 3242 | top: "layer_512_3_conv2" 3243 | param { 3244 | lr_mult: 1.0 3245 | decay_mult: 1.0 3246 | } 3247 | param { 3248 | lr_mult: 2.0 3249 | decay_mult: 1.0 3250 | } 3251 | scale_param { 3252 | bias_term: true 3253 | } 3254 | } 3255 | layer { 3256 | name: "layer_512_3_relu3" 3257 | type: "ReLU" 3258 | bottom: "layer_512_3_conv2" 3259 | top: "layer_512_3_conv2" 3260 | } 3261 | layer { 3262 | name: "layer_512_3_conv3" 3263 | type: "Convolution" 3264 | bottom: "layer_512_3_conv2" 3265 | top: "layer_512_3_conv3" 3266 | param { 3267 | lr_mult: 1.0 3268 | decay_mult: 1.0 3269 | } 3270 | convolution_param { 3271 | num_output: 2048 3272 | bias_term: false 3273 | pad: 0 3274 | kernel_size: 1 3275 | stride: 1 3276 | weight_filler { 3277 | type: "msra" 3278 | } 3279 | bias_filler { 3280 | type: "constant" 3281 | value: 0.0 3282 | } 3283 | } 3284 | } 3285 | layer { 3286 | name: "layer_512_3_sum" 3287 | type: "Eltwise" 3288 | bottom: "layer_512_3_conv3" 3289 | bottom: "layer_512_2_sum" 3290 | top: "layer_512_3_sum" 3291 | } 3292 | layer { 3293 | name: "last_bn" 3294 | type: "BatchNorm" 3295 | bottom: "layer_512_3_sum" 3296 | top: "layer_512_3_sum" 3297 | param { 3298 | lr_mult: 0.0 3299 | } 3300 | param { 3301 | lr_mult: 0.0 3302 | } 3303 | param { 3304 | lr_mult: 0.0 3305 | } 3306 | } 3307 | layer { 3308 | name: "last_scale" 3309 | type: "Scale" 3310 | bottom: "layer_512_3_sum" 3311 | top: "layer_512_3_sum" 3312 | param { 3313 | lr_mult: 1.0 3314 | decay_mult: 1.0 3315 | } 3316 | param { 3317 | lr_mult: 2.0 3318 | decay_mult: 1.0 3319 | } 3320 | scale_param { 3321 | bias_term: true 3322 | } 3323 | } 3324 | layer { 3325 | name: "last_relu" 3326 | type: "ReLU" 3327 | bottom: "layer_512_3_sum" 3328 | top: "layer_512_3_sum" 3329 | } 3330 | layer { 3331 | name: "global_pool" 3332 | type: "Pooling" 3333 | bottom: "layer_512_3_sum" 3334 | top: "global_pool" 3335 | pooling_param { 3336 | pool: AVE 3337 | global_pooling: true 3338 | } 3339 | } 3340 | layer { 3341 | name: "score_ft" 3342 | type: "InnerProduct" 3343 | bottom: "global_pool" 3344 | top: "score" 3345 | param { 3346 | lr_mult: 1.0 3347 | decay_mult: 1.0 3348 | } 3349 | param { 3350 | lr_mult: 2.0 3351 | decay_mult: 1.0 3352 | } 3353 | inner_product_param { 3354 | num_output: 1000 3355 | } 3356 | } 3357 | layer { 3358 | name: "loss" 3359 | type: "SoftmaxWithLoss" 3360 | bottom: "score" 3361 | bottom: "label" 3362 | top: "loss" 3363 | } 3364 | layer { 3365 | name: "accuracy" 3366 | type: "Accuracy" 3367 | bottom: "score" 3368 | bottom: "label" 3369 | top: "accuracy" 3370 | include { 3371 | phase: TEST 3372 | } 3373 | } 3374 | -------------------------------------------------------------------------------- /pretrained_models/vgg16/download_vgg16_model_link.txt: -------------------------------------------------------------------------------- 1 | http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel 2 | 3 | License: see http://www.robots.ox.ac.uk/~vgg/research/very_deep/ 4 | -------------------------------------------------------------------------------- /pretrained_models/vgg16/train_val.prototxt: -------------------------------------------------------------------------------- 1 | name: "CaffeNet" 2 | layer { 3 | name: "data" 4 | type: "ImageData" 5 | top: "data" 6 | top: "label" 7 | transform_param { 8 | mirror: true 9 | crop_size: 224 10 | mean_value: 104 11 | mean_value: 117 12 | mean_value: 123 13 | } 14 | image_data_param { 15 | source: "/home/simon/Datasets/CUB_200_2011/train_images.txt" 16 | root_folder: "/home/simon/Datasets/CUB_200_2011/images/" 17 | batch_size: 16 18 | shuffle: true 19 | smaller_side_size: 560 20 | smaller_side_size: 640 21 | } 22 | include: { phase: TRAIN } 23 | } 24 | layer { 25 | name: "data" 26 | type: "ImageData" 27 | top: "data" 28 | top: "label" 29 | transform_param { 30 | mirror: false 31 | crop_size: 224 32 | mean_value: 104 33 | mean_value: 117 34 | mean_value: 123 35 | } 36 | image_data_param { 37 | source: "/home/simon/Datasets/CUB_200_2011/test_images.txt" 38 | root_folder: "/home/simon/Datasets/CUB_200_2011/images/" 39 | batch_size: 2 40 | shuffle: true 41 | smaller_side_size: 560 42 | } 43 | include: { phase: TEST } 44 | } 45 | layer { 46 | name: "conv1_1" 47 | type: "Convolution" 48 | bottom: "data" 49 | top: "conv1_1" 50 | param { 51 | lr_mult: 1 52 | decay_mult: 1 53 | } 54 | param { 55 | lr_mult: 2 56 | decay_mult: 0 57 | } 58 | convolution_param { 59 | num_output: 64 60 | pad: 1 61 | kernel_size: 3 62 | weight_filler { 63 | type: "gaussian" 64 | std: 0.01 65 | } 66 | bias_filler { 67 | type: "constant" 68 | value: 0 69 | } 70 | } 71 | } 72 | layer { 73 | name: "relu1_1" 74 | type: "ReLU" 75 | bottom: "conv1_1" 76 | top: "conv1_1" 77 | } 78 | layer { 79 | name: "conv1_2" 80 | type: "Convolution" 81 | bottom: "conv1_1" 82 | top: "conv1_2" 83 | param { 84 | lr_mult: 1 85 | decay_mult: 1 86 | } 87 | param { 88 | lr_mult: 2 89 | decay_mult: 0 90 | } 91 | convolution_param { 92 | num_output: 64 93 | pad: 1 94 | kernel_size: 3 95 | weight_filler { 96 | type: "gaussian" 97 | std: 0.01 98 | } 99 | bias_filler { 100 | type: "constant" 101 | value: 0 102 | } 103 | } 104 | } 105 | layer { 106 | name: "relu1_2" 107 | type: "ReLU" 108 | bottom: "conv1_2" 109 | top: "conv1_2" 110 | } 111 | layer { 112 | name: "pool1" 113 | type: "Pooling" 114 | bottom: "conv1_2" 115 | top: "pool1" 116 | pooling_param { 117 | pool: MAX 118 | kernel_size: 2 119 | stride: 2 120 | } 121 | } 122 | layer { 123 | name: "conv2_1" 124 | type: "Convolution" 125 | bottom: "pool1" 126 | top: "conv2_1" 127 | param { 128 | lr_mult: 1 129 | decay_mult: 1 130 | } 131 | param { 132 | lr_mult: 2 133 | decay_mult: 0 134 | } 135 | convolution_param { 136 | num_output: 128 137 | pad: 1 138 | kernel_size: 3 139 | weight_filler { 140 | type: "gaussian" 141 | std: 0.01 142 | } 143 | bias_filler { 144 | type: "constant" 145 | value: 0 146 | } 147 | } 148 | } 149 | layer { 150 | name: "relu2_1" 151 | type: "ReLU" 152 | bottom: "conv2_1" 153 | top: "conv2_1" 154 | } 155 | layer { 156 | name: "conv2_2" 157 | type: "Convolution" 158 | bottom: "conv2_1" 159 | top: "conv2_2" 160 | param { 161 | lr_mult: 1 162 | decay_mult: 1 163 | } 164 | param { 165 | lr_mult: 2 166 | decay_mult: 0 167 | } 168 | convolution_param { 169 | num_output: 128 170 | pad: 1 171 | kernel_size: 3 172 | weight_filler { 173 | type: "gaussian" 174 | std: 0.01 175 | } 176 | bias_filler { 177 | type: "constant" 178 | value: 0 179 | } 180 | } 181 | } 182 | layer { 183 | name: "relu2_2" 184 | type: "ReLU" 185 | bottom: "conv2_2" 186 | top: "conv2_2" 187 | } 188 | layer { 189 | name: "pool2" 190 | type: "Pooling" 191 | bottom: "conv2_2" 192 | top: "pool2" 193 | pooling_param { 194 | pool: MAX 195 | kernel_size: 2 196 | stride: 2 197 | } 198 | } 199 | layer { 200 | name: "conv3_1" 201 | type: "Convolution" 202 | bottom: "pool2" 203 | top: "conv3_1" 204 | param { 205 | lr_mult: 1 206 | decay_mult: 1 207 | } 208 | param { 209 | lr_mult: 2 210 | decay_mult: 0 211 | } 212 | convolution_param { 213 | num_output: 256 214 | pad: 1 215 | kernel_size: 3 216 | weight_filler { 217 | type: "gaussian" 218 | std: 0.01 219 | } 220 | bias_filler { 221 | type: "constant" 222 | value: 0 223 | } 224 | } 225 | } 226 | layer { 227 | name: "relu3_1" 228 | type: "ReLU" 229 | bottom: "conv3_1" 230 | top: "conv3_1" 231 | } 232 | layer { 233 | name: "conv3_2" 234 | type: "Convolution" 235 | bottom: "conv3_1" 236 | top: "conv3_2" 237 | param { 238 | lr_mult: 1 239 | decay_mult: 1 240 | } 241 | param { 242 | lr_mult: 2 243 | decay_mult: 0 244 | } 245 | convolution_param { 246 | num_output: 256 247 | pad: 1 248 | kernel_size: 3 249 | weight_filler { 250 | type: "gaussian" 251 | std: 0.01 252 | } 253 | bias_filler { 254 | type: "constant" 255 | value: 0 256 | } 257 | } 258 | } 259 | layer { 260 | name: "relu3_2" 261 | type: "ReLU" 262 | bottom: "conv3_2" 263 | top: "conv3_2" 264 | } 265 | layer { 266 | name: "conv3_3" 267 | type: "Convolution" 268 | bottom: "conv3_2" 269 | top: "conv3_3" 270 | param { 271 | lr_mult: 1 272 | decay_mult: 1 273 | } 274 | param { 275 | lr_mult: 2 276 | decay_mult: 0 277 | } 278 | convolution_param { 279 | num_output: 256 280 | pad: 1 281 | kernel_size: 3 282 | weight_filler { 283 | type: "gaussian" 284 | std: 0.01 285 | } 286 | bias_filler { 287 | type: "constant" 288 | value: 0 289 | } 290 | } 291 | } 292 | layer { 293 | name: "relu3_3" 294 | type: "ReLU" 295 | bottom: "conv3_3" 296 | top: "conv3_3" 297 | } 298 | layer { 299 | name: "pool3" 300 | type: "Pooling" 301 | bottom: "conv3_3" 302 | top: "pool3" 303 | pooling_param { 304 | pool: MAX 305 | kernel_size: 2 306 | stride: 2 307 | } 308 | } 309 | layer { 310 | name: "conv4_1" 311 | type: "Convolution" 312 | bottom: "pool3" 313 | top: "conv4_1" 314 | param { 315 | lr_mult: 1 316 | decay_mult: 1 317 | } 318 | param { 319 | lr_mult: 2 320 | decay_mult: 0 321 | } 322 | convolution_param { 323 | num_output: 512 324 | pad: 1 325 | kernel_size: 3 326 | weight_filler { 327 | type: "gaussian" 328 | std: 0.01 329 | } 330 | bias_filler { 331 | type: "constant" 332 | value: 0 333 | } 334 | } 335 | } 336 | layer { 337 | name: "relu4_1" 338 | type: "ReLU" 339 | bottom: "conv4_1" 340 | top: "conv4_1" 341 | } 342 | layer { 343 | name: "conv4_2" 344 | type: "Convolution" 345 | bottom: "conv4_1" 346 | top: "conv4_2" 347 | param { 348 | lr_mult: 1 349 | decay_mult: 1 350 | } 351 | param { 352 | lr_mult: 2 353 | decay_mult: 0 354 | } 355 | convolution_param { 356 | num_output: 512 357 | pad: 1 358 | kernel_size: 3 359 | weight_filler { 360 | type: "gaussian" 361 | std: 0.01 362 | } 363 | bias_filler { 364 | type: "constant" 365 | value: 0 366 | } 367 | } 368 | } 369 | layer { 370 | name: "relu4_2" 371 | type: "ReLU" 372 | bottom: "conv4_2" 373 | top: "conv4_2" 374 | } 375 | layer { 376 | name: "conv4_3" 377 | type: "Convolution" 378 | bottom: "conv4_2" 379 | top: "conv4_3" 380 | param { 381 | lr_mult: 1 382 | decay_mult: 1 383 | } 384 | param { 385 | lr_mult: 2 386 | decay_mult: 0 387 | } 388 | convolution_param { 389 | num_output: 512 390 | pad: 1 391 | kernel_size: 3 392 | weight_filler { 393 | type: "gaussian" 394 | std: 0.01 395 | } 396 | bias_filler { 397 | type: "constant" 398 | value: 0 399 | } 400 | } 401 | } 402 | layer { 403 | name: "relu4_3" 404 | type: "ReLU" 405 | bottom: "conv4_3" 406 | top: "conv4_3" 407 | } 408 | layer { 409 | name: "pool4" 410 | type: "Pooling" 411 | bottom: "conv4_3" 412 | top: "pool4" 413 | pooling_param { 414 | pool: MAX 415 | kernel_size: 2 416 | stride: 2 417 | } 418 | } 419 | layer { 420 | name: "conv5_1" 421 | type: "Convolution" 422 | bottom: "pool4" 423 | top: "conv5_1" 424 | param { 425 | lr_mult: 1 426 | decay_mult: 1 427 | } 428 | param { 429 | lr_mult: 2 430 | decay_mult: 0 431 | } 432 | convolution_param { 433 | num_output: 512 434 | pad: 1 435 | kernel_size: 3 436 | weight_filler { 437 | type: "gaussian" 438 | std: 0.01 439 | } 440 | bias_filler { 441 | type: "constant" 442 | value: 0 443 | } 444 | } 445 | } 446 | layer { 447 | name: "relu5_1" 448 | type: "ReLU" 449 | bottom: "conv5_1" 450 | top: "conv5_1" 451 | } 452 | layer { 453 | name: "conv5_2" 454 | type: "Convolution" 455 | bottom: "conv5_1" 456 | top: "conv5_2" 457 | param { 458 | lr_mult: 1 459 | decay_mult: 1 460 | } 461 | param { 462 | lr_mult: 2 463 | decay_mult: 0 464 | } 465 | convolution_param { 466 | num_output: 512 467 | pad: 1 468 | kernel_size: 3 469 | weight_filler { 470 | type: "gaussian" 471 | std: 0.01 472 | } 473 | bias_filler { 474 | type: "constant" 475 | value: 0 476 | } 477 | } 478 | } 479 | layer { 480 | name: "relu5_2" 481 | type: "ReLU" 482 | bottom: "conv5_2" 483 | top: "conv5_2" 484 | } 485 | layer { 486 | name: "conv5_3" 487 | type: "Convolution" 488 | bottom: "conv5_2" 489 | top: "conv5_3" 490 | param { 491 | lr_mult: 1 492 | decay_mult: 1 493 | } 494 | param { 495 | lr_mult: 2 496 | decay_mult: 0 497 | } 498 | convolution_param { 499 | num_output: 512 500 | pad: 1 501 | kernel_size: 3 502 | weight_filler { 503 | type: "gaussian" 504 | std: 0.01 505 | } 506 | bias_filler { 507 | type: "constant" 508 | value: 0 509 | } 510 | } 511 | } 512 | layer { 513 | name: "relu5_3" 514 | type: "ReLU" 515 | bottom: "conv5_3" 516 | top: "conv5_3" 517 | } 518 | layer { 519 | name: "pool5" 520 | type: "Pooling" 521 | bottom: "conv5_3" 522 | top: "pool5" 523 | pooling_param { 524 | pool: MAX 525 | kernel_size: 2 526 | stride: 2 527 | } 528 | } 529 | layer { 530 | name: "fc6" 531 | type: "InnerProduct" 532 | bottom: "pool5" 533 | top: "fc6" 534 | param { 535 | lr_mult: 1 536 | decay_mult: 1 537 | } 538 | param { 539 | lr_mult: 2 540 | decay_mult: 0 541 | } 542 | inner_product_param { 543 | num_output: 4096 544 | weight_filler { 545 | type: "gaussian" 546 | std: 0.005 547 | } 548 | bias_filler { 549 | type: "constant" 550 | value: 1 551 | } 552 | } 553 | } 554 | layer { 555 | name: "relu6" 556 | type: "ReLU" 557 | bottom: "fc6" 558 | top: "fc6" 559 | } 560 | layer { 561 | name: "drop6" 562 | type: "Dropout" 563 | bottom: "fc6" 564 | top: "fc6" 565 | dropout_param { 566 | dropout_ratio: 0.5 567 | } 568 | } 569 | layer { 570 | name: "fc7" 571 | type: "InnerProduct" 572 | bottom: "fc6" 573 | top: "fc7" 574 | param { 575 | lr_mult: 1 576 | decay_mult: 1 577 | } 578 | param { 579 | lr_mult: 2 580 | decay_mult: 0 581 | } 582 | inner_product_param { 583 | num_output: 4096 584 | weight_filler { 585 | type: "gaussian" 586 | std: 0.005 587 | } 588 | bias_filler { 589 | type: "constant" 590 | value: 1 591 | } 592 | } 593 | } 594 | layer { 595 | name: "relu7" 596 | type: "ReLU" 597 | bottom: "fc7" 598 | top: "fc7" 599 | } 600 | layer { 601 | name: "drop7" 602 | type: "Dropout" 603 | bottom: "fc7" 604 | top: "fc7" 605 | dropout_param { 606 | dropout_ratio: 0.5 607 | } 608 | } 609 | layer { 610 | name: "fc8_ft" 611 | type: "InnerProduct" 612 | bottom: "fc7" 613 | top: "fc8" 614 | param { 615 | lr_mult: 1 616 | decay_mult: 1 617 | } 618 | param { 619 | lr_mult: 2 620 | decay_mult: 0 621 | } 622 | inner_product_param { 623 | num_output: 5089 624 | weight_filler { 625 | type: "gaussian" 626 | std: 0.01 627 | } 628 | bias_filler { 629 | type: "constant" 630 | value: 0 631 | } 632 | } 633 | } 634 | layer { 635 | name: "loss" 636 | type: "SoftmaxWithLoss" 637 | bottom: "fc8" 638 | bottom: "label" 639 | top: "loss" 640 | } 641 | layer { 642 | name: "accuracy" 643 | type: "Accuracy" 644 | bottom: "fc8" 645 | bottom: "label" 646 | top: "accuracy" 647 | include { 648 | phase: TEST 649 | } 650 | } 651 | --------------------------------------------------------------------------------