├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── code
    ├── Makefile
    ├── include
    │   └── caffe
    │   │   ├── common_layers.hpp
    │   │   └── loss_layers.hpp
    └── src
    │   └── caffe
    │       ├── layers
    │           ├── super_category_fm_layer.cpp
    │           ├── super_category_fm_layer.cu
    │           ├── super_category_fm_post_layer.cpp
    │           └── super_category_layer.cpp
    │       └── proto
    │           └── caffe.proto
├── example
    ├── solver.prototxt
    ├── super_category.prototxt
    └── train_val.prototxt
├── miscellaneous
    └── headline.png
└── run.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | cifar100
2 | example/*.log
3 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "caffe"]
2 | 	path = caffe
3 | 	url = https://github.com/BVLC/caffe.git
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 GOO WONJOON
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Taxonomy-Regularized Semantic Deep Convolutional Neural Networks
 2 | 
 3 | + Wonjoon Goo(SNU, now at UT Austin), Juyong Kim(SNU), Gunhee Kim(SNU), and Sung Ju Hwang(UNIST)
 4 | 
 5 | ![alt tag](https://raw.githubusercontent.com/hiwonjoon/eccv16-taxonomy/master/miscellaneous/headline.png)
 6 | 
 7 | This project hosts the code for our **ECCV 2016** paper. [[pdf](http://vision.snu.ac.kr/wordpress/wp-content/uploads/2016/08/eccv16_taxonomy.pdf)]
 8 | 
 9 | We propose a novel convolutional neural network architecture
10 | that abstracts and differentiates the categories based on a given class
11 | hierarchy. We exploit grouped and discriminative information provided
12 | by the taxonomy, by focusing on the general and specific components
13 | that comprise each category, through the min- and difference-pooling
14 | operations. Without using any additional parameters or substantial increase
15 | in time complexity, our model is able to learn the features that are
16 | discriminative for classifying often confusing sub-classes belonging to the
17 | same superclass, and thus improve the overall classification performance.
18 | 
19 | 
20 | ##Reference
21 | 
22 | If you use this code as part of any published research, please refer the following paper.
23 | 
24 | ```
25 | @inproceedings{taxonomy:2016:ECCV,
26 |     author    = {Wonjoon Goo, Juyong Kim, Gunhee Kim and Sung Ju Hwang},
27 |     title     = "{Taxonomy-Regularized Semantic Deep Convolutional Neural Networks}"
28 |     booktitle = {ECCV},
29 |     year      = 2016
30 | }
31 | ```
32 | 
33 | ## Running Code
34 | 
35 | We implemented a new type of regularize layer as described in the paper based on BVLC caffe deep learning library.
36 | It would be better to go through tutorials of [Caffe](https://github.com/BVLC/caffe]) deep learning library before running our code. Most of error you might encounter will be the problem on running caffe, not ours.
37 | 
38 | ### Get our code
39 | ```
40 | git clone --recursive https://github.com/hiwonjoon/eccv16-taxonomy.git taxonomy
41 | ```
42 | 
43 | If you look at directory, there will be three important sub-directories; caffe, code, example.
44 | 
45 | 'caffe' directory is the code of BVLC caffe on specific branch "dd6e8e6" commit, which was our working branch. The directory added in the form of git submodule, so if you want to use latest caffe version, then try it :)
46 | 
47 | 'code' directory contains our implementations(super category label layer, etc.)
48 | 
49 | 'example' directory contains the sample prototxt files that is required for training and validation.
50 | 
51 | 
52 | ### Apply & Build caffe
53 | 
54 | First, copy our implementation into original caffe code.
55 | ```
56 | $copy -r ./code/* ./caffe/
57 | ```
58 | Then, build caffe as same as original caffe; Config by modifying Makefile.config and `make all`.
59 | 
60 | ### Run examples
61 | 
62 | Before you start and acquire the same experiment result of ours, you need preprocessed Cifar 100 dataset as described on the paper. We assumed that the dataset is located on the top directory(cloned directory), named 'cifar100'. Please change lmdb file locations which are specified on train_val.prototxt
63 | 
64 | You also need taxonomy tree in the form of prototxt. The sample taxonomy tree for Cifar 100 dataset is given on the directory.
65 | 
66 | If you execute run.sh script, then you can start training, or you can directly start it from a shell. The script provided is only for less typing :) And, with provided trained model and small modification of scripts and prototxt, you can reproduce our experiment results.
67 | 
68 | ```
69 | ./run.sh
70 | ```
71 | 
72 | ### Try it yourself!
73 | 
74 | You can use our code for your own datasets or taxonmy trees with minor modification of example prototxt files.
75 | If you find any problems, please contact me. Enjoy :)
76 | 
77 | 
78 | ## Acknowledgement
79 | 
80 | This work was supported by Samsung Research Funding Center of Samsung Electronics under Project Number SRFC-IT1502-03.
81 | 
82 | 
83 | ## Authors
84 | 
85 | [Wonjoon Goo](http://vision.snu.ac.kr/wonjoongoo/)<sup>1</sup>, [Juyong Kim](http://juyongkim.com/)<sup>1</sup>, [Gunhee Kim](http://www.cs.cmu.edu/~gunhee/)<sup>1</sup>, and [Sung Ju Hwang](http://www.sungjuhwang.com/)<sup>2</sup>
86 | 
87 | <sup>1</sup>[Vision and Learning Lab](http://vision.snu.ac.kr/) @ Computer Science and Engineering, Seoul National University, Seoul, Korea
88 | 
89 | <sup>2</sup>[MLVR Lab](http://ml.unist.ac.kr/) @ School of Electrical and Computer Engineering, UNIST, Ulsan, South Korea
90 | 
91 | 
92 | ## License
93 |     MIT license
94 | 


--------------------------------------------------------------------------------
/code/Makefile:
--------------------------------------------------------------------------------
  1 | PROJECT := caffe
  2 | 
  3 | CONFIG_FILE := Makefile.config
  4 | # Explicitly check for the config file, otherwise make -k will proceed anyway.
  5 | ifeq ($(wildcard $(CONFIG_FILE)),)
  6 | $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example.)
  7 | endif
  8 | include $(CONFIG_FILE)
  9 | 
 10 | BUILD_DIR_LINK := $(BUILD_DIR)
 11 | ifeq ($(RELEASE_BUILD_DIR),)
 12 | 	RELEASE_BUILD_DIR := .$(BUILD_DIR)_release
 13 | endif
 14 | ifeq ($(DEBUG_BUILD_DIR),)
 15 | 	DEBUG_BUILD_DIR := .$(BUILD_DIR)_debug
 16 | endif
 17 | 
 18 | DEBUG ?= 0
 19 | ifeq ($(DEBUG), 1)
 20 | 	BUILD_DIR := $(DEBUG_BUILD_DIR)
 21 | 	OTHER_BUILD_DIR := $(RELEASE_BUILD_DIR)
 22 | else
 23 | 	BUILD_DIR := $(RELEASE_BUILD_DIR)
 24 | 	OTHER_BUILD_DIR := $(DEBUG_BUILD_DIR)
 25 | endif
 26 | 
 27 | # All of the directories containing code.
 28 | SRC_DIRS := $(shell find * -type d -exec bash -c "find {} -maxdepth 1 \
 29 | 	\( -name '*.cpp' -o -name '*.proto' \) | grep -q ." \; -print)
 30 | 
 31 | # The target shared library name
 32 | LIB_BUILD_DIR := $(BUILD_DIR)/lib
 33 | STATIC_NAME := $(LIB_BUILD_DIR)/lib$(PROJECT).a
 34 | DYNAMIC_NAME := $(LIB_BUILD_DIR)/lib$(PROJECT).so
 35 | 
 36 | ##############################
 37 | # Get all source files
 38 | ##############################
 39 | # CXX_SRCS are the source files excluding the test ones.
 40 | CXX_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cpp" -name "*.cpp")
 41 | # CU_SRCS are the cuda source files
 42 | CU_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cu" -name "*.cu")
 43 | # TEST_SRCS are the test source files
 44 | TEST_MAIN_SRC := src/$(PROJECT)/test/test_caffe_main.cpp
 45 | TEST_SRCS := $(shell find src/$(PROJECT) -name "test_*.cpp")
 46 | TEST_SRCS := $(filter-out $(TEST_MAIN_SRC), $(TEST_SRCS))
 47 | TEST_CU_SRCS := $(shell find src/$(PROJECT) -name "test_*.cu")
 48 | GTEST_SRC := src/gtest/gtest-all.cpp
 49 | # TOOL_SRCS are the source files for the tool binaries
 50 | TOOL_SRCS := $(shell find tools -name "*.cpp")
 51 | # EXAMPLE_SRCS are the source files for the example binaries
 52 | EXAMPLE_SRCS := $(shell find examples -name "*.cpp")
 53 | # BUILD_INCLUDE_DIR contains any generated header files we want to include.
 54 | BUILD_INCLUDE_DIR := $(BUILD_DIR)/src
 55 | # PROTO_SRCS are the protocol buffer definitions
 56 | PROTO_SRC_DIR := src/$(PROJECT)/proto
 57 | PROTO_SRCS := $(wildcard $(PROTO_SRC_DIR)/*.proto)
 58 | # PROTO_BUILD_DIR will contain the .cc and obj files generated from
 59 | # PROTO_SRCS; PROTO_BUILD_INCLUDE_DIR will contain the .h header files
 60 | PROTO_BUILD_DIR := $(BUILD_DIR)/$(PROTO_SRC_DIR)
 61 | PROTO_BUILD_INCLUDE_DIR := $(BUILD_INCLUDE_DIR)/$(PROJECT)/proto
 62 | # NONGEN_CXX_SRCS includes all source/header files except those generated
 63 | # automatically (e.g., by proto).
 64 | NONGEN_CXX_SRCS := $(shell find \
 65 | 	src/$(PROJECT) \
 66 | 	include/$(PROJECT) \
 67 | 	python/$(PROJECT) \
 68 | 	matlab/+$(PROJECT)/private \
 69 | 	examples \
 70 | 	tools \
 71 | 	-name "*.cpp" -or -name "*.hpp" -or -name "*.cu" -or -name "*.cuh")
 72 | LINT_SCRIPT := scripts/cpp_lint.py
 73 | LINT_OUTPUT_DIR := $(BUILD_DIR)/.lint
 74 | LINT_EXT := lint.txt
 75 | LINT_OUTPUTS := $(addsuffix .$(LINT_EXT), $(addprefix $(LINT_OUTPUT_DIR)/, $(NONGEN_CXX_SRCS)))
 76 | EMPTY_LINT_REPORT := $(BUILD_DIR)/.$(LINT_EXT)
 77 | NONEMPTY_LINT_REPORT := $(BUILD_DIR)/$(LINT_EXT)
 78 | # PY$(PROJECT)_SRC is the python wrapper for $(PROJECT)
 79 | PY$(PROJECT)_SRC := python/$(PROJECT)/_$(PROJECT).cpp
 80 | PY$(PROJECT)_SO := python/$(PROJECT)/_$(PROJECT).so
 81 | PY$(PROJECT)_HXX := include/$(PROJECT)/python_layer.hpp
 82 | # MAT$(PROJECT)_SRC is the mex entrance point of matlab package for $(PROJECT)
 83 | MAT$(PROJECT)_SRC := matlab/+$(PROJECT)/private/$(PROJECT)_.cpp
 84 | ifneq ($(MATLAB_DIR),)
 85 | 	MAT_SO_EXT := $(shell $(MATLAB_DIR)/bin/mexext)
 86 | endif
 87 | MAT$(PROJECT)_SO := matlab/+$(PROJECT)/private/$(PROJECT)_.$(MAT_SO_EXT)
 88 | 
 89 | ##############################
 90 | # Derive generated files
 91 | ##############################
 92 | # The generated files for protocol buffers
 93 | PROTO_GEN_HEADER_SRCS := $(addprefix $(PROTO_BUILD_DIR)/, \
 94 | 		$(notdir ${PROTO_SRCS:.proto=.pb.h}))
 95 | PROTO_GEN_HEADER := $(addprefix $(PROTO_BUILD_INCLUDE_DIR)/, \
 96 | 		$(notdir ${PROTO_SRCS:.proto=.pb.h}))
 97 | PROTO_GEN_CC := $(addprefix $(BUILD_DIR)/, ${PROTO_SRCS:.proto=.pb.cc})
 98 | PY_PROTO_BUILD_DIR := python/$(PROJECT)/proto
 99 | PY_PROTO_INIT := python/$(PROJECT)/proto/__init__.py
100 | PROTO_GEN_PY := $(foreach file,${PROTO_SRCS:.proto=_pb2.py}, \
101 | 		$(PY_PROTO_BUILD_DIR)/$(notdir $(file)))
102 | # The objects corresponding to the source files
103 | # These objects will be linked into the final shared library, so we
104 | # exclude the tool, example, and test objects.
105 | CXX_OBJS := $(addprefix $(BUILD_DIR)/, ${CXX_SRCS:.cpp=.o})
106 | CU_OBJS := $(addprefix $(BUILD_DIR)/cuda/, ${CU_SRCS:.cu=.o})
107 | PROTO_OBJS := ${PROTO_GEN_CC:.cc=.o}
108 | OBJS := $(PROTO_OBJS) $(CXX_OBJS) $(CU_OBJS)
109 | # tool, example, and test objects
110 | TOOL_OBJS := $(addprefix $(BUILD_DIR)/, ${TOOL_SRCS:.cpp=.o})
111 | TOOL_BUILD_DIR := $(BUILD_DIR)/tools
112 | TEST_CXX_BUILD_DIR := $(BUILD_DIR)/src/$(PROJECT)/test
113 | TEST_CU_BUILD_DIR := $(BUILD_DIR)/cuda/src/$(PROJECT)/test
114 | TEST_CXX_OBJS := $(addprefix $(BUILD_DIR)/, ${TEST_SRCS:.cpp=.o})
115 | TEST_CU_OBJS := $(addprefix $(BUILD_DIR)/cuda/, ${TEST_CU_SRCS:.cu=.o})
116 | TEST_OBJS := $(TEST_CXX_OBJS) $(TEST_CU_OBJS)
117 | GTEST_OBJ := $(addprefix $(BUILD_DIR)/, ${GTEST_SRC:.cpp=.o})
118 | EXAMPLE_OBJS := $(addprefix $(BUILD_DIR)/, ${EXAMPLE_SRCS:.cpp=.o})
119 | # Output files for automatic dependency generation
120 | DEPS := ${CXX_OBJS:.o=.d} ${CU_OBJS:.o=.d} ${TEST_CXX_OBJS:.o=.d} \
121 | 	${TEST_CU_OBJS:.o=.d} $(BUILD_DIR)/${MAT$(PROJECT)_SO:.$(MAT_SO_EXT)=.d}
122 | # tool, example, and test bins
123 | TOOL_BINS := ${TOOL_OBJS:.o=.bin}
124 | EXAMPLE_BINS := ${EXAMPLE_OBJS:.o=.bin}
125 | # symlinks to tool bins without the ".bin" extension
126 | TOOL_BIN_LINKS := ${TOOL_BINS:.bin=}
127 | # Put the test binaries in build/test for convenience.
128 | TEST_BIN_DIR := $(BUILD_DIR)/test
129 | TEST_CU_BINS := $(addsuffix .testbin,$(addprefix $(TEST_BIN_DIR)/, \
130 | 		$(foreach obj,$(TEST_CU_OBJS),$(basename $(notdir $(obj))))))
131 | TEST_CXX_BINS := $(addsuffix .testbin,$(addprefix $(TEST_BIN_DIR)/, \
132 | 		$(foreach obj,$(TEST_CXX_OBJS),$(basename $(notdir $(obj))))))
133 | TEST_BINS := $(TEST_CXX_BINS) $(TEST_CU_BINS)
134 | # TEST_ALL_BIN is the test binary that links caffe dynamically.
135 | TEST_ALL_BIN := $(TEST_BIN_DIR)/test_all.testbin
136 | 
137 | ##############################
138 | # Derive compiler warning dump locations
139 | ##############################
140 | WARNS_EXT := warnings.txt
141 | CXX_WARNS := $(addprefix $(BUILD_DIR)/, ${CXX_SRCS:.cpp=.o.$(WARNS_EXT)})
142 | CU_WARNS := $(addprefix $(BUILD_DIR)/cuda/, ${CU_SRCS:.cu=.o.$(WARNS_EXT)})
143 | TOOL_WARNS := $(addprefix $(BUILD_DIR)/, ${TOOL_SRCS:.cpp=.o.$(WARNS_EXT)})
144 | EXAMPLE_WARNS := $(addprefix $(BUILD_DIR)/, ${EXAMPLE_SRCS:.cpp=.o.$(WARNS_EXT)})
145 | TEST_WARNS := $(addprefix $(BUILD_DIR)/, ${TEST_SRCS:.cpp=.o.$(WARNS_EXT)})
146 | TEST_CU_WARNS := $(addprefix $(BUILD_DIR)/cuda/, ${TEST_CU_SRCS:.cu=.o.$(WARNS_EXT)})
147 | ALL_CXX_WARNS := $(CXX_WARNS) $(TOOL_WARNS) $(EXAMPLE_WARNS) $(TEST_WARNS)
148 | ALL_CU_WARNS := $(CU_WARNS) $(TEST_CU_WARNS)
149 | ALL_WARNS := $(ALL_CXX_WARNS) $(ALL_CU_WARNS)
150 | 
151 | EMPTY_WARN_REPORT := $(BUILD_DIR)/.$(WARNS_EXT)
152 | NONEMPTY_WARN_REPORT := $(BUILD_DIR)/$(WARNS_EXT)
153 | 
154 | ##############################
155 | # Derive include and lib directories
156 | ##############################
157 | CUDA_INCLUDE_DIR := $(CUDA_DIR)/include
158 | 
159 | CUDA_LIB_DIR :=
160 | # add <cuda>/lib64 only if it exists
161 | ifneq ("$(wildcard $(CUDA_DIR)/lib64)","")
162 | 	CUDA_LIB_DIR += $(CUDA_DIR)/lib64
163 | endif
164 | CUDA_LIB_DIR += $(CUDA_DIR)/lib
165 | 
166 | INCLUDE_DIRS += $(BUILD_INCLUDE_DIR) ./src ./include
167 | ifneq ($(CPU_ONLY), 1)
168 | 	INCLUDE_DIRS += $(CUDA_INCLUDE_DIR)
169 | 	LIBRARY_DIRS += $(CUDA_LIB_DIR)
170 | 	LIBRARIES := cudart cublas curand
171 | endif
172 | LIBRARIES += glog gflags protobuf leveldb snappy \
173 | 	lmdb boost_system hdf5_hl hdf5 m \
174 | 	opencv_core opencv_highgui opencv_imgproc
175 | PYTHON_LIBRARIES := boost_python python2.7
176 | WARNINGS := -Wall -Wno-sign-compare
177 | 
178 | ##############################
179 | # Set build directories
180 | ##############################
181 | 
182 | DISTRIBUTE_DIR ?= distribute
183 | DISTRIBUTE_SUBDIRS := $(DISTRIBUTE_DIR)/bin $(DISTRIBUTE_DIR)/lib
184 | DIST_ALIASES := dist
185 | ifneq ($(strip $(DISTRIBUTE_DIR)),distribute)
186 | 		DIST_ALIASES += distribute
187 | endif
188 | 
189 | ALL_BUILD_DIRS := $(sort $(BUILD_DIR) $(addprefix $(BUILD_DIR)/, $(SRC_DIRS)) \
190 | 	$(addprefix $(BUILD_DIR)/cuda/, $(SRC_DIRS)) \
191 | 	$(LIB_BUILD_DIR) $(TEST_BIN_DIR) $(PY_PROTO_BUILD_DIR) $(LINT_OUTPUT_DIR) \
192 | 	$(DISTRIBUTE_SUBDIRS) $(PROTO_BUILD_INCLUDE_DIR))
193 | 
194 | ##############################
195 | # Set directory for Doxygen-generated documentation
196 | ##############################
197 | DOXYGEN_CONFIG_FILE ?= ./.Doxyfile
198 | # should be the same as OUTPUT_DIRECTORY in the .Doxyfile
199 | DOXYGEN_OUTPUT_DIR ?= ./doxygen
200 | DOXYGEN_COMMAND ?= doxygen
201 | # All the files that might have Doxygen documentation.
202 | DOXYGEN_SOURCES := $(shell find \
203 | 	src/$(PROJECT) \
204 | 	include/$(PROJECT) \
205 | 	python/ \
206 | 	matlab/ \
207 | 	examples \
208 | 	tools \
209 | 	-name "*.cpp" -or -name "*.hpp" -or -name "*.cu" -or -name "*.cuh" -or \
210 |         -name "*.py" -or -name "*.m")
211 | DOXYGEN_SOURCES += $(DOXYGEN_CONFIG_FILE)
212 | 
213 | 
214 | ##############################
215 | # Configure build
216 | ##############################
217 | 
218 | # Determine platform
219 | UNAME := $(shell uname -s)
220 | ifeq ($(UNAME), Linux)
221 | 	LINUX := 1
222 | else ifeq ($(UNAME), Darwin)
223 | 	OSX := 1
224 | endif
225 | 
226 | # Linux
227 | ifeq ($(LINUX), 1)
228 | 	CXX ?= /usr/bin/g++
229 | 	GCCVERSION := $(shell $(CXX) -dumpversion | cut -f1,2 -d.)
230 | 	# older versions of gcc are too dumb to build boost with -Wuninitalized
231 | 	ifeq ($(shell echo $(GCCVERSION) \< 4.6 | bc), 1)
232 | 		WARNINGS += -Wno-uninitialized
233 | 	endif
234 | 	# boost::thread is reasonably called boost_thread (compare OS X)
235 | 	# We will also explicitly add stdc++ to the link target.
236 | 	LIBRARIES += boost_thread stdc++
237 | endif
238 | 
239 | # OS X:
240 | # clang++ instead of g++
241 | # libstdc++ for NVCC compatibility on OS X >= 10.9 with CUDA < 7.0
242 | ifeq ($(OSX), 1)
243 | 	CXX := /usr/bin/clang++
244 | 	ifneq ($(CPU_ONLY), 1)
245 | 		CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release \d' | grep -o '\d')
246 | 		ifeq ($(shell echo $(CUDA_VERSION) \< 7.0 | bc), 1)
247 | 			CXXFLAGS += -stdlib=libstdc++
248 | 			LINKFLAGS += -stdlib=libstdc++
249 | 		endif
250 | 		# clang throws this warning for cuda headers
251 | 		WARNINGS += -Wno-unneeded-internal-declaration
252 | 	endif
253 | 	# gtest needs to use its own tuple to not conflict with clang
254 | 	COMMON_FLAGS += -DGTEST_USE_OWN_TR1_TUPLE=1
255 | 	# boost::thread is called boost_thread-mt to mark multithreading on OS X
256 | 	LIBRARIES += boost_thread-mt
257 | 	# we need to explicitly ask for the rpath to be obeyed
258 | 	DYNAMIC_FLAGS := -install_name @rpath/libcaffe.so
259 | 	ORIGIN := @loader_path
260 | else
261 | 	ORIGIN := \$$ORIGIN
262 | endif
263 | 
264 | # Custom compiler
265 | ifdef CUSTOM_CXX
266 | 	CXX := $(CUSTOM_CXX)
267 | endif
268 | 
269 | # Static linking
270 | ifneq (,$(findstring clang++,$(CXX)))
271 | 	STATIC_LINK_COMMAND := -Wl,-force_load $(STATIC_NAME)
272 | else ifneq (,$(findstring g++,$(CXX)))
273 | 	STATIC_LINK_COMMAND := -Wl,--whole-archive $(STATIC_NAME) -Wl,--no-whole-archive
274 | else
275 |   # The following line must not be indented with a tab, since we are not inside a target
276 |   $(error Cannot static link with the $(CXX) compiler)
277 | endif
278 | 
279 | # Debugging
280 | ifeq ($(DEBUG), 1)
281 | 	COMMON_FLAGS += -DDEBUG -g -O0
282 | 	NVCCFLAGS += -G
283 | else
284 | 	COMMON_FLAGS += -DNDEBUG -O2
285 | endif
286 | 
287 | # cuDNN acceleration configuration.
288 | ifeq ($(USE_CUDNN), 1)
289 | 	LIBRARIES += cudnn
290 | 	COMMON_FLAGS += -DUSE_CUDNN
291 | endif
292 | 
293 | # CPU-only configuration
294 | ifeq ($(CPU_ONLY), 1)
295 | 	OBJS := $(PROTO_OBJS) $(CXX_OBJS)
296 | 	TEST_OBJS := $(TEST_CXX_OBJS)
297 | 	TEST_BINS := $(TEST_CXX_BINS)
298 | 	ALL_WARNS := $(ALL_CXX_WARNS)
299 | 	TEST_FILTER := --gtest_filter="-*GPU*"
300 | 	COMMON_FLAGS += -DCPU_ONLY
301 | endif
302 | 
303 | # Python layer support
304 | ifeq ($(WITH_PYTHON_LAYER), 1)
305 | 	COMMON_FLAGS += -DWITH_PYTHON_LAYER
306 | 	LIBRARIES += $(PYTHON_LIBRARIES)
307 | endif
308 | 
309 | # BLAS configuration (default = ATLAS)
310 | BLAS ?= atlas
311 | ifeq ($(BLAS), mkl)
312 | 	# MKL
313 | 	LIBRARIES += mkl_rt
314 | 	COMMON_FLAGS += -DUSE_MKL
315 | 	MKL_DIR ?= /opt/intel/mkl
316 | 	BLAS_INCLUDE ?= $(MKL_DIR)/include
317 | 	BLAS_LIB ?= $(MKL_DIR)/lib $(MKL_DIR)/lib/intel64
318 | else ifeq ($(BLAS), open)
319 | 	# OpenBLAS
320 | 	LIBRARIES += openblas
321 | else
322 | 	# ATLAS
323 | 	ifeq ($(LINUX), 1)
324 | 		ifeq ($(BLAS), atlas)
325 | 			# Linux simply has cblas and atlas
326 | 			LIBRARIES += cblas atlas
327 | 		endif
328 | 	else ifeq ($(OSX), 1)
329 | 		# OS X packages atlas as the vecLib framework
330 | 		LIBRARIES += cblas
331 | 		# 10.10 has accelerate while 10.9 has veclib
332 | 		XCODE_CLT_VER := $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables | grep -o 'version: 6')
333 | 		ifneq (,$(findstring version: 6,$(XCODE_CLT_VER)))
334 | 			BLAS_INCLUDE ?= /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
335 | 			LDFLAGS += -framework Accelerate
336 | 		else
337 | 			BLAS_INCLUDE ?= /System/Library/Frameworks/vecLib.framework/Versions/Current/Headers/
338 | 			LDFLAGS += -framework vecLib
339 | 		endif
340 | 	endif
341 | endif
342 | INCLUDE_DIRS += $(BLAS_INCLUDE)
343 | LIBRARY_DIRS += $(BLAS_LIB)
344 | 
345 | LIBRARY_DIRS += $(LIB_BUILD_DIR)
346 | 
347 | # Automatic dependency generation (nvcc is handled separately)
348 | CXXFLAGS += -MMD -MP
349 | 
350 | # Complete build flags.
351 | COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
352 | CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS) -std=c++11
353 | NVCCFLAGS += -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
354 | # mex may invoke an older gcc that is too liberal with -Wuninitalized
355 | MATLAB_CXXFLAGS := $(CXXFLAGS) -Wno-uninitialized
356 | LINKFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS)
357 | 
358 | USE_PKG_CONFIG ?= 0
359 | ifeq ($(USE_PKG_CONFIG), 1)
360 | 	PKG_CONFIG := $(shell pkg-config opencv --libs)
361 | else
362 | 	PKG_CONFIG :=
363 | endif
364 | LDFLAGS += $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) $(PKG_CONFIG) \
365 | 		$(foreach library,$(LIBRARIES),-l$(library))
366 | PYTHON_LDFLAGS := $(LDFLAGS) $(foreach library,$(PYTHON_LIBRARIES),-l$(library))
367 | 
368 | # 'superclean' target recursively* deletes all files ending with an extension
369 | # in $(SUPERCLEAN_EXTS) below.  This may be useful if you've built older
370 | # versions of Caffe that do not place all generated files in a location known
371 | # to the 'clean' target.
372 | #
373 | # 'supercleanlist' will list the files to be deleted by make superclean.
374 | #
375 | # * Recursive with the exception that symbolic links are never followed, per the
376 | # default behavior of 'find'.
377 | SUPERCLEAN_EXTS := .so .a .o .bin .testbin .pb.cc .pb.h _pb2.py .cuo
378 | 
379 | # Set the sub-targets of the 'everything' target.
380 | EVERYTHING_TARGETS := all py$(PROJECT) test warn lint
381 | # Only build matcaffe as part of "everything" if MATLAB_DIR is specified.
382 | ifneq ($(MATLAB_DIR),)
383 | 	EVERYTHING_TARGETS += mat$(PROJECT)
384 | endif
385 | 
386 | ##############################
387 | # Define build targets
388 | ##############################
389 | .PHONY: all test clean docs linecount lint lintclean tools examples $(DIST_ALIASES) \
390 | 	py mat py$(PROJECT) mat$(PROJECT) proto runtest \
391 | 	superclean supercleanlist supercleanfiles warn everything
392 | 
393 | all: $(STATIC_NAME) $(DYNAMIC_NAME) tools examples
394 | 
395 | everything: $(EVERYTHING_TARGETS)
396 | 
397 | linecount:
398 | 	cloc --read-lang-def=$(PROJECT).cloc \
399 | 		src/$(PROJECT) include/$(PROJECT) tools examples \
400 | 		python matlab
401 | 
402 | lint: $(EMPTY_LINT_REPORT)
403 | 
404 | lintclean:
405 | 	@ $(RM) -r $(LINT_OUTPUT_DIR) $(EMPTY_LINT_REPORT) $(NONEMPTY_LINT_REPORT)
406 | 
407 | docs: $(DOXYGEN_OUTPUT_DIR)
408 | 	@ cd ./docs ; ln -sfn ../$(DOXYGEN_OUTPUT_DIR)/html doxygen
409 | 
410 | $(DOXYGEN_OUTPUT_DIR): $(DOXYGEN_CONFIG_FILE) $(DOXYGEN_SOURCES)
411 | 	$(DOXYGEN_COMMAND) $(DOXYGEN_CONFIG_FILE)
412 | 
413 | $(EMPTY_LINT_REPORT): $(LINT_OUTPUTS) | $(BUILD_DIR)
414 | 	@ cat $(LINT_OUTPUTS) > $@
415 | 	@ if [ -s "$@" ]; then \
416 | 		cat $@; \
417 | 		mv $@ $(NONEMPTY_LINT_REPORT); \
418 | 		echo "Found one or more lint errors."; \
419 | 		exit 1; \
420 | 	  fi; \
421 | 	  $(RM) $(NONEMPTY_LINT_REPORT); \
422 | 	  echo "No lint errors!";
423 | 
424 | $(LINT_OUTPUTS): $(LINT_OUTPUT_DIR)/%.lint.txt : % $(LINT_SCRIPT) | $(LINT_OUTPUT_DIR)
425 | 	@ mkdir -p $(dir $@)
426 | 	@ python $(LINT_SCRIPT) $< 2>&1 \
427 | 		| grep -v "^Done processing " \
428 | 		| grep -v "^Total errors found: 0" \
429 | 		> $@ \
430 | 		|| true
431 | 
432 | test: $(TEST_ALL_BIN) $(TEST_ALL_DYNLINK_BIN) $(TEST_BINS)
433 | 
434 | tools: $(TOOL_BINS) $(TOOL_BIN_LINKS)
435 | 
436 | examples: $(EXAMPLE_BINS)
437 | 
438 | py$(PROJECT): py
439 | 
440 | py: $(PY$(PROJECT)_SO) $(PROTO_GEN_PY)
441 | 
442 | $(PY$(PROJECT)_SO): $(PY$(PROJECT)_SRC) $(PY$(PROJECT)_HXX) | $(DYNAMIC_NAME)
443 | 	@ echo CXX/LD -o $@ $<
444 | 	$(Q)$(CXX) -shared -o $@ $(PY$(PROJECT)_SRC) \
445 | 		-o $@ $(LINKFLAGS) -l$(PROJECT) $(PYTHON_LDFLAGS) \
446 | 		-Wl,-rpath,$(ORIGIN)/../../build/lib
447 | 
448 | mat$(PROJECT): mat
449 | 
450 | mat: $(MAT$(PROJECT)_SO)
451 | 
452 | $(MAT$(PROJECT)_SO): $(MAT$(PROJECT)_SRC) $(STATIC_NAME)
453 | 	@ if [ -z "$(MATLAB_DIR)" ]; then \
454 | 		echo "MATLAB_DIR must be specified in $(CONFIG_FILE)" \
455 | 			"to build mat$(PROJECT)."; \
456 | 		exit 1; \
457 | 	fi
458 | 	@ echo MEX $<
459 | 	$(Q)$(MATLAB_DIR)/bin/mex $(MAT$(PROJECT)_SRC) \
460 | 			CXX="$(CXX)" \
461 | 			CXXFLAGS="\$$CXXFLAGS $(MATLAB_CXXFLAGS)" \
462 | 			CXXLIBS="\$$CXXLIBS $(STATIC_LINK_COMMAND) $(LDFLAGS)" -output $@
463 | 	@ if [ -f "$(PROJECT)_.d" ]; then \
464 | 		mv -f $(PROJECT)_.d $(BUILD_DIR)/${MAT$(PROJECT)_SO:.$(MAT_SO_EXT)=.d}; \
465 | 	fi
466 | 
467 | runtest: $(TEST_ALL_BIN)
468 | 	$(TOOL_BUILD_DIR)/caffe
469 | 	$(TEST_ALL_BIN) $(TEST_GPUID) --gtest_shuffle $(TEST_FILTER)
470 | 
471 | pytest: py
472 | 	cd python; python -m unittest discover -s caffe/test
473 | 	
474 | mattest: mat
475 | 	cd matlab; $(MATLAB_DIR)/bin/matlab -nodisplay -r 'caffe.run_tests(), exit()'
476 | 
477 | warn: $(EMPTY_WARN_REPORT)
478 | 
479 | $(EMPTY_WARN_REPORT): $(ALL_WARNS) | $(BUILD_DIR)
480 | 	@ cat $(ALL_WARNS) > $@
481 | 	@ if [ -s "$@" ]; then \
482 | 		cat $@; \
483 | 		mv $@ $(NONEMPTY_WARN_REPORT); \
484 | 		echo "Compiler produced one or more warnings."; \
485 | 		exit 1; \
486 | 	  fi; \
487 | 	  $(RM) $(NONEMPTY_WARN_REPORT); \
488 | 	  echo "No compiler warnings!";
489 | 
490 | $(ALL_WARNS): %.o.$(WARNS_EXT) : %.o
491 | 
492 | $(BUILD_DIR_LINK): $(BUILD_DIR)/.linked
493 | 
494 | # Create a target ".linked" in this BUILD_DIR to tell Make that the "build" link
495 | # is currently correct, then delete the one in the OTHER_BUILD_DIR in case it
496 | # exists and $(DEBUG) is toggled later.
497 | $(BUILD_DIR)/.linked:
498 | 	@ mkdir -p $(BUILD_DIR)
499 | 	@ $(RM) $(OTHER_BUILD_DIR)/.linked
500 | 	@ $(RM) -r $(BUILD_DIR_LINK)
501 | 	@ ln -s $(BUILD_DIR) $(BUILD_DIR_LINK)
502 | 	@ touch $@
503 | 
504 | $(ALL_BUILD_DIRS): | $(BUILD_DIR_LINK)
505 | 	@ mkdir -p $@
506 | 
507 | $(DYNAMIC_NAME): $(OBJS) | $(LIB_BUILD_DIR)
508 | 	@ echo LD -o $@
509 | 	$(Q)$(CXX) -shared -o $@ $(OBJS) $(LINKFLAGS) $(LDFLAGS) $(DYNAMIC_FLAGS)
510 | 
511 | $(STATIC_NAME): $(OBJS) | $(LIB_BUILD_DIR)
512 | 	@ echo AR -o $@
513 | 	$(Q)ar rcs $@ $(OBJS)
514 | 
515 | $(BUILD_DIR)/%.o: %.cpp | $(ALL_BUILD_DIRS)
516 | 	@ echo CXX $<
517 | 	$(Q)$(CXX) $< $(CXXFLAGS) -c -o $@ 2> $@.$(WARNS_EXT) \
518 | 		|| (cat $@.$(WARNS_EXT); exit 1)
519 | 	@ cat $@.$(WARNS_EXT)
520 | 
521 | $(PROTO_BUILD_DIR)/%.pb.o: $(PROTO_BUILD_DIR)/%.pb.cc $(PROTO_GEN_HEADER) \
522 | 		| $(PROTO_BUILD_DIR)
523 | 	@ echo CXX $<
524 | 	$(Q)$(CXX) $< $(CXXFLAGS) -c -o $@ 2> $@.$(WARNS_EXT) \
525 | 		|| (cat $@.$(WARNS_EXT); exit 1)
526 | 	@ cat $@.$(WARNS_EXT)
527 | 
528 | $(BUILD_DIR)/cuda/%.o: %.cu | $(ALL_BUILD_DIRS)
529 | 	@ echo NVCC $<
530 | 	$(Q)$(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} \
531 | 		-odir $(@D)
532 | 	$(Q)$(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@ 2> $@.$(WARNS_EXT) \
533 | 		|| (cat $@.$(WARNS_EXT); exit 1)
534 | 	@ cat $@.$(WARNS_EXT)
535 | 
536 | $(TEST_ALL_BIN): $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) \
537 | 		| $(DYNAMIC_NAME) $(TEST_BIN_DIR)
538 | 	@ echo CXX/LD -o $@ $<
539 | 	$(Q)$(CXX) $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) \
540 | 		-o $@ $(LINKFLAGS) $(LDFLAGS) -l$(PROJECT) -Wl,-rpath,$(ORIGIN)/../lib
541 | 
542 | $(TEST_CU_BINS): $(TEST_BIN_DIR)/%.testbin: $(TEST_CU_BUILD_DIR)/%.o \
543 | 	$(GTEST_OBJ) | $(DYNAMIC_NAME) $(TEST_BIN_DIR)
544 | 	@ echo LD $<
545 | 	$(Q)$(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) \
546 | 		-o $@ $(LINKFLAGS) $(LDFLAGS) -l$(PROJECT) -Wl,-rpath,$(ORIGIN)/../lib
547 | 
548 | $(TEST_CXX_BINS): $(TEST_BIN_DIR)/%.testbin: $(TEST_CXX_BUILD_DIR)/%.o \
549 | 	$(GTEST_OBJ) | $(DYNAMIC_NAME) $(TEST_BIN_DIR)
550 | 	@ echo LD $<
551 | 	$(Q)$(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) \
552 | 		-o $@ $(LINKFLAGS) $(LDFLAGS) -l$(PROJECT) -Wl,-rpath,$(ORIGIN)/../lib
553 | 
554 | # Target for extension-less symlinks to tool binaries with extension '*.bin'.
555 | $(TOOL_BUILD_DIR)/%: $(TOOL_BUILD_DIR)/%.bin | $(TOOL_BUILD_DIR)
556 | 	@ $(RM) $@
557 | 	@ ln -s $(abspath $<) $@
558 | 
559 | $(TOOL_BINS): %.bin : %.o | $(DYNAMIC_NAME)
560 | 	@ echo CXX/LD -o $@
561 | 	$(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(PROJECT) $(LDFLAGS) \
562 | 		-Wl,-rpath,$(ORIGIN)/../lib
563 | 
564 | $(EXAMPLE_BINS): %.bin : %.o | $(DYNAMIC_NAME)
565 | 	@ echo CXX/LD -o $@
566 | 	$(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(PROJECT) $(LDFLAGS) \
567 | 		-Wl,-rpath,$(ORIGIN)/../../lib
568 | 
569 | proto: $(PROTO_GEN_CC) $(PROTO_GEN_HEADER)
570 | 
571 | $(PROTO_BUILD_DIR)/%.pb.cc $(PROTO_BUILD_DIR)/%.pb.h : \
572 | 		$(PROTO_SRC_DIR)/%.proto | $(PROTO_BUILD_DIR)
573 | 	@ echo PROTOC $<
574 | 	$(Q)protoc --proto_path=$(PROTO_SRC_DIR) --cpp_out=$(PROTO_BUILD_DIR) $<
575 | 
576 | $(PY_PROTO_BUILD_DIR)/%_pb2.py : $(PROTO_SRC_DIR)/%.proto \
577 | 		$(PY_PROTO_INIT) | $(PY_PROTO_BUILD_DIR)
578 | 	@ echo PROTOC \(python\) $<
579 | 	$(Q)protoc --proto_path=$(PROTO_SRC_DIR) --python_out=$(PY_PROTO_BUILD_DIR) $<
580 | 
581 | $(PY_PROTO_INIT): | $(PY_PROTO_BUILD_DIR)
582 | 	touch $(PY_PROTO_INIT)
583 | 
584 | clean:
585 | 	@- $(RM) -rf $(ALL_BUILD_DIRS)
586 | 	@- $(RM) -rf $(OTHER_BUILD_DIR)
587 | 	@- $(RM) -rf $(BUILD_DIR_LINK)
588 | 	@- $(RM) -rf $(DISTRIBUTE_DIR)
589 | 	@- $(RM) $(PY$(PROJECT)_SO)
590 | 	@- $(RM) $(MAT$(PROJECT)_SO)
591 | 
592 | supercleanfiles:
593 | 	$(eval SUPERCLEAN_FILES := $(strip \
594 | 			$(foreach ext,$(SUPERCLEAN_EXTS), $(shell find . -name '*$(ext)' \
595 | 			-not -path './data/*'))))
596 | 
597 | supercleanlist: supercleanfiles
598 | 	@ \
599 | 	if [ -z "$(SUPERCLEAN_FILES)" ]; then \
600 | 		echo "No generated files found."; \
601 | 	else \
602 | 		echo $(SUPERCLEAN_FILES) | tr ' ' '\n'; \
603 | 	fi
604 | 
605 | superclean: clean supercleanfiles
606 | 	@ \
607 | 	if [ -z "$(SUPERCLEAN_FILES)" ]; then \
608 | 		echo "No generated files found."; \
609 | 	else \
610 | 		echo "Deleting the following generated files:"; \
611 | 		echo $(SUPERCLEAN_FILES) | tr ' ' '\n'; \
612 | 		$(RM) $(SUPERCLEAN_FILES); \
613 | 	fi
614 | 
615 | $(DIST_ALIASES): $(DISTRIBUTE_DIR)
616 | 
617 | $(DISTRIBUTE_DIR): all py | $(DISTRIBUTE_SUBDIRS)
618 | 	# add include
619 | 	cp -r include $(DISTRIBUTE_DIR)/
620 | 	mkdir -p $(DISTRIBUTE_DIR)/include/caffe/proto
621 | 	cp $(PROTO_GEN_HEADER_SRCS) $(DISTRIBUTE_DIR)/include/caffe/proto
622 | 	# add tool and example binaries
623 | 	cp $(TOOL_BINS) $(DISTRIBUTE_DIR)/bin
624 | 	cp $(EXAMPLE_BINS) $(DISTRIBUTE_DIR)/bin
625 | 	# add libraries
626 | 	cp $(STATIC_NAME) $(DISTRIBUTE_DIR)/lib
627 | 	cp $(DYNAMIC_NAME) $(DISTRIBUTE_DIR)/lib
628 | 	# add python - it's not the standard way, indeed...
629 | 	cp -r python $(DISTRIBUTE_DIR)/python
630 | 
631 | -include $(DEPS)
632 | 


--------------------------------------------------------------------------------
/code/include/caffe/common_layers.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef CAFFE_COMMON_LAYERS_HPP_
  2 | #define CAFFE_COMMON_LAYERS_HPP_
  3 | 
  4 | #include <string>
  5 | #include <utility>
  6 | #include <vector>
  7 | #include <queue>
  8 | 
  9 | #include "caffe/blob.hpp"
 10 | #include "caffe/common.hpp"
 11 | #include "caffe/data_layers.hpp"
 12 | #include "caffe/layer.hpp"
 13 | #include "caffe/loss_layers.hpp"
 14 | #include "caffe/neuron_layers.hpp"
 15 | #include "caffe/proto/caffe.pb.h"
 16 | 
 17 | namespace caffe {
 18 | 
 19 | /**
 20 |  * @brief Compute the index of the @f$ K @f$ max values for each datum across
 21 |  *        all dimensions @f$ (C \times H \times W) @f$.
 22 |  *
 23 |  * Intended for use after a classification layer to produce a prediction.
 24 |  * If parameter out_max_val is set to true, output is a vector of pairs
 25 |  * (max_ind, max_val) for each image.
 26 |  *
 27 |  * NOTE: does not implement Backwards operation.
 28 |  */
 29 | template <typename Dtype>
 30 | class ArgMaxLayer : public Layer<Dtype> {
 31 |  public:
 32 |   /**
 33 |    * @param param provides ArgMaxParameter argmax_param,
 34 |    *     with ArgMaxLayer options:
 35 |    *   - top_k (\b optional uint, default 1).
 36 |    *     the number @f$ K @f$ of maximal items to output.
 37 |    *   - out_max_val (\b optional bool, default false).
 38 |    *     if set, output a vector of pairs (max_ind, max_val) for each image.
 39 |    */
 40 |   explicit ArgMaxLayer(const LayerParameter& param)
 41 |       : Layer<Dtype>(param) {}
 42 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 43 |       const vector<Blob<Dtype>*>& top);
 44 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
 45 |       const vector<Blob<Dtype>*>& top);
 46 | 
 47 |   virtual inline const char* type() const { return "ArgMax"; }
 48 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
 49 |   virtual inline int ExactNumTopBlobs() const { return 1; }
 50 | 
 51 |  protected:
 52 |   /**
 53 |    * @param bottom input Blob vector (length 1)
 54 |    *   -# @f$ (N \times C \times H \times W) @f$
 55 |    *      the inputs @f$ x @f$
 56 |    * @param top output Blob vector (length 1)
 57 |    *   -# @f$ (N \times 1 \times K \times 1) @f$ or, if out_max_val
 58 |    *      @f$ (N \times 2 \times K \times 1) @f$
 59 |    *      the computed outputs @f$
 60 |    *       y_n = \arg\max\limits_i x_{ni}
 61 |    *      @f$ (for @f$ K = 1 @f$).
 62 |    */
 63 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 64 |       const vector<Blob<Dtype>*>& top);
 65 |   /// @brief Not implemented (non-differentiable function)
 66 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
 67 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
 68 |     NOT_IMPLEMENTED;
 69 |   }
 70 |   bool out_max_val_;
 71 |   size_t top_k_;
 72 | };
 73 | 
 74 | /**
 75 |  * @brief Takes at least two Blob%s and concatenates them along either the num
 76 |  *        or channel dimension, outputting the result.
 77 |  */
 78 | template <typename Dtype>
 79 | class ConcatLayer : public Layer<Dtype> {
 80 |  public:
 81 |   explicit ConcatLayer(const LayerParameter& param)
 82 |       : Layer<Dtype>(param) {}
 83 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 84 |       const vector<Blob<Dtype>*>& top);
 85 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
 86 |       const vector<Blob<Dtype>*>& top);
 87 | 
 88 |   virtual inline const char* type() const { return "Concat"; }
 89 |   virtual inline int MinBottomBlobs() const { return 2; }
 90 |   virtual inline int ExactNumTopBlobs() const { return 1; }
 91 | 
 92 |  protected:
 93 |   /**
 94 |    * @param bottom input Blob vector (length 2+)
 95 |    *   -# @f$ (N \times C \times H \times W) @f$
 96 |    *      the inputs @f$ x_1 @f$
 97 |    *   -# @f$ (N \times C \times H \times W) @f$
 98 |    *      the inputs @f$ x_2 @f$
 99 |    *   -# ...
100 |    *   - K @f$ (N \times C \times H \times W) @f$
101 |    *      the inputs @f$ x_K @f$
102 |    * @param top output Blob vector (length 1)
103 |    *   -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or
104 |    *      @f$ (N \times KC \times H \times W) @f$ if axis == 1:
105 |    *      the concatenated output @f$
106 |    *        y = [\begin{array}{cccc} x_1 & x_2 & ... & x_K \end{array}]
107 |    *      @f$
108 |    */
109 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
110 |       const vector<Blob<Dtype>*>& top);
111 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
112 |       const vector<Blob<Dtype>*>& top);
113 | 
114 |   /**
115 |    * @brief Computes the error gradient w.r.t. the concatenate inputs.
116 |    *
117 |    * @param top output Blob vector (length 1), providing the error gradient with
118 |    *        respect to the outputs
119 |    *   -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or
120 |    *      @f$ (N \times KC \times H \times W) @f$ if axis == 1:
121 |    *      containing error gradients @f$ \frac{\partial E}{\partial y} @f$
122 |    *      with respect to concatenated outputs @f$ y @f$
123 |    * @param propagate_down see Layer::Backward.
124 |    * @param bottom input Blob vector (length K), into which the top gradient
125 |    *        @f$ \frac{\partial E}{\partial y} @f$ is deconcatenated back to the
126 |    *        inputs @f$
127 |    *        \left[ \begin{array}{cccc}
128 |    *          \frac{\partial E}{\partial x_1} &
129 |    *          \frac{\partial E}{\partial x_2} &
130 |    *          ... &
131 |    *          \frac{\partial E}{\partial x_K}
132 |    *        \end{array} \right] =
133 |    *        \frac{\partial E}{\partial y}
134 |    *        @f$
135 |    */
136 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
137 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
138 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
139 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
140 | 
141 |   int count_;
142 |   int num_concats_;
143 |   int concat_input_size_;
144 |   int concat_axis_;
145 | };
146 | 
147 | /**
148 |  * @brief Compute elementwise operations, such as product and sum,
149 |  *        along multiple input Blobs.
150 |  *
151 |  * TODO(dox): thorough documentation for Forward, Backward, and proto params.
152 |  */
153 | template <typename Dtype>
154 | class EltwiseLayer : public Layer<Dtype> {
155 |  public:
156 |   explicit EltwiseLayer(const LayerParameter& param)
157 |       : Layer<Dtype>(param) {}
158 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
159 |       const vector<Blob<Dtype>*>& top);
160 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
161 |       const vector<Blob<Dtype>*>& top);
162 | 
163 |   virtual inline const char* type() const { return "Eltwise"; }
164 |   virtual inline int MinBottomBlobs() const { return 2; }
165 |   virtual inline int ExactNumTopBlobs() const { return 1; }
166 | 
167 |  protected:
168 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
169 |       const vector<Blob<Dtype>*>& top);
170 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
171 |       const vector<Blob<Dtype>*>& top);
172 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
173 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
174 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
175 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
176 | 
177 |   EltwiseParameter_EltwiseOp op_;
178 |   vector<Dtype> coeffs_;
179 |   Blob<int> max_idx_;
180 | 
181 |   bool stable_prod_grad_;
182 | };
183 | 
184 | /**
185 |  * @brief Takes two+ Blobs, interprets last Blob as a selector and
186 |  *  filter remaining Blobs accordingly with selector data (0 means that
187 |  * the corresponding item has to be filtered, non-zero means that corresponding
188 |  * item needs to stay).
189 |  */
190 | template <typename Dtype>
191 | class FilterLayer : public Layer<Dtype> {
192 |  public:
193 |   explicit FilterLayer(const LayerParameter& param)
194 |       : Layer<Dtype>(param) {}
195 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
196 |       const vector<Blob<Dtype>*>& top);
197 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
198 |       const vector<Blob<Dtype>*>& top);
199 | 
200 |   virtual inline const char* type() const { return "Filter"; }
201 |   virtual inline int MinBottomBlobs() const { return 2; }
202 |   virtual inline int MinTopBlobs() const { return 1; }
203 | 
204 |  protected:
205 |   /**
206 |    * @param bottom input Blob vector (length 2+)
207 |    *   -# @f$ (N \times C \times H \times W) @f$
208 |    *      the inputs to be filtered @f$ x_1 @f$
209 |    *   -# ...
210 |    *   -# @f$ (N \times C \times H \times W) @f$
211 |    *      the inputs to be filtered @f$ x_K @f$
212 |    *   -# @f$ (N \times 1 \times 1 \times 1) @f$
213 |    *      the selector blob
214 |    * @param top output Blob vector (length 1+)
215 |    *   -# @f$ (S \times C \times H \times W) @f$ ()
216 |    *        the filtered output @f$ x_1 @f$
217 |    *        where S is the number of items
218 |    *        that haven't been filtered
219 |    *      @f$ (S \times C \times H \times W) @f$
220 |    *        the filtered output @f$ x_K @f$
221 |    *        where S is the number of items
222 |    *        that haven't been filtered
223 |    */
224 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
225 |       const vector<Blob<Dtype>*>& top);
226 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
227 |     const vector<Blob<Dtype>*>& top);
228 | 
229 |   /**
230 |    * @brief Computes the error gradient w.r.t. the forwarded inputs.
231 |    *
232 |    * @param top output Blob vector (length 1+), providing the error gradient with
233 |    *        respect to the outputs
234 |    * @param propagate_down see Layer::Backward.
235 |    * @param bottom input Blob vector (length 2+), into which the top error
236 |    *        gradient is copied
237 |    */
238 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
239 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
240 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
241 |     const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
242 | 
243 |   bool first_reshape_;
244 |   vector<int> indices_to_forward_;
245 | };
246 | 
247 | /**
248 |  * @brief Reshapes the input Blob into flat vectors.
249 |  *
250 |  * Note: because this layer does not change the input values -- merely the
251 |  * dimensions -- it can simply copy the input. The copy happens "virtually"
252 |  * (thus taking effectively 0 real time) by setting, in Forward, the data
253 |  * pointer of the top Blob to that of the bottom Blob (see Blob::ShareData),
254 |  * and in Backward, the diff pointer of the bottom Blob to that of the top Blob
255 |  * (see Blob::ShareDiff).
256 |  */
257 | template <typename Dtype>
258 | class FlattenLayer : public Layer<Dtype> {
259 |  public:
260 |   explicit FlattenLayer(const LayerParameter& param)
261 |       : Layer<Dtype>(param) {}
262 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
263 |       const vector<Blob<Dtype>*>& top);
264 | 
265 |   virtual inline const char* type() const { return "Flatten"; }
266 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
267 |   virtual inline int ExactNumTopBlobs() const { return 1; }
268 | 
269 |  protected:
270 |   /**
271 |    * @param bottom input Blob vector (length 2+)
272 |    *   -# @f$ (N \times C \times H \times W) @f$
273 |    *      the inputs
274 |    * @param top output Blob vector (length 1)
275 |    *   -# @f$ (N \times CHW \times 1 \times 1) @f$
276 |    *      the outputs -- i.e., the (virtually) copied, flattened inputs
277 |    */
278 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
279 |       const vector<Blob<Dtype>*>& top);
280 | 
281 |   /**
282 |    * @brief Computes the error gradient w.r.t. the concatenate inputs.
283 |    *
284 |    * @param top output Blob vector (length 1), providing the error gradient with
285 |    *        respect to the outputs
286 |    * @param propagate_down see Layer::Backward.
287 |    * @param bottom input Blob vector (length K), into which the top error
288 |    *        gradient is (virtually) copied
289 |    */
290 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
291 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
292 | };
293 | 
294 | /**
295 |  * @brief Also known as a "fully-connected" layer, computes an inner product
296 |  *        with a set of learned weights, and (optionally) adds biases.
297 |  *
298 |  * TODO(dox): thorough documentation for Forward, Backward, and proto params.
299 |  */
300 | template <typename Dtype>
301 | class InnerProductLayer : public Layer<Dtype> {
302 |  public:
303 |   explicit InnerProductLayer(const LayerParameter& param)
304 |       : Layer<Dtype>(param) {}
305 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
306 |       const vector<Blob<Dtype>*>& top);
307 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
308 |       const vector<Blob<Dtype>*>& top);
309 | 
310 |   virtual inline const char* type() const { return "InnerProduct"; }
311 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
312 |   virtual inline int ExactNumTopBlobs() const { return 1; }
313 | 
314 |  protected:
315 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
316 |       const vector<Blob<Dtype>*>& top);
317 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
318 |       const vector<Blob<Dtype>*>& top);
319 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
320 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
321 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
322 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
323 | 
324 |   int M_;
325 |   int K_;
326 |   int N_;
327 |   bool bias_term_;
328 |   Blob<Dtype> bias_multiplier_;
329 | };
330 | 
331 | //Tree Class for SuperCategory Layer
332 | //Class Location..?
333 | class Tree {
334 | public :
335 |   Tree() : label(-1) {}
336 |   ~Tree() {}
337 | 
338 |   int GetLabel() const { return label; }
339 |   int GetIndex() const { return index; }
340 |   Tree * InsertChild(shared_ptr<Tree> child) {
341 | 	  children.push_back(child);
342 | 	  child->parent = this;
343 | 	  return this;
344 |   }
345 |   void SetLabel(int label_) { this->label = label_; }
346 |   const Tree * GetParent() const { return parent; }
347 |   const std::vector<shared_ptr<Tree> > * GetChildren() const {
348 | 	  return &children;
349 |   }
350 | 
351 |   int Depth() const;
352 |   void MakeBalance(int remain);
353 | 
354 |   //Tree helper
355 |   static void GiveIndex(Tree * root, std::vector<Tree *>& serialized_tree);
356 |   static void GetNodeNumPerLevelAndGiveLabel(std::vector<int>& node_num, std::vector<int>& base_index,Tree * root, std::vector<Tree *>& serialized_tree, std::vector<int>& label_to_index);
357 |   static void MakeTree(Tree * node, const SuperCategoryParameter::TreeScheme * node_param);
358 | 
359 | private :
360 |   int label;
361 |   int index;
362 | 
363 |   Tree * parent;
364 |   std::vector<shared_ptr<Tree> > children;
365 | };
366 | 
367 | template <typename Dtype>
368 | class SuperCategoryFMLayer : public Layer<Dtype> {
369 |  public:
370 |   explicit SuperCategoryFMLayer(const LayerParameter& param)
371 |       : Layer<Dtype>(param) {}
372 |   virtual void LayerSetUp(
373 |       const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
374 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
375 |       const vector<Blob<Dtype>*>& top);
376 | 
377 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
378 |   virtual inline const char* type() const { return "SuperCategoryFM"; }
379 | 
380 |  protected:
381 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
382 |       const vector<Blob<Dtype>*>& top);
383 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
384 |       const vector<Blob<Dtype>*>& top)
385 |   {
386 | 	  return Forward_cpu(bottom,top);
387 |   }
388 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
389 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
390 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
391 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
392 |   {
393 | 	  return Backward_cpu(top,propagate_down,bottom);
394 |   }
395 | 
396 |   int M_; //Batch Size
397 |   int N_; //Filter Size(# of category)
398 |   int H_; //height of feature map size
399 |   int W_; //width of feature map size
400 | 
401 |   Tree root_;
402 |   int depth_;
403 |   std::vector<int> node_num_per_level_;
404 |   std::vector<int> base_index_per_level_;
405 |   std::vector<int> label_to_index_;
406 |   std::vector<Tree *> serialized_tree_;
407 | 
408 |   EltwiseParameter_EltwiseOp op_;
409 |   std::vector<shared_ptr<Blob<int> > > mark_;
410 | };
411 | 
412 | template <typename Dtype>
413 | class SuperCategoryFMPostLayer : public Layer<Dtype> {
414 |  public:
415 |   explicit SuperCategoryFMPostLayer(const LayerParameter& param)
416 |       : Layer<Dtype>(param) {}
417 |   virtual void LayerSetUp(
418 |       const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
419 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
420 |       const vector<Blob<Dtype>*>& top);
421 | 
422 |   virtual inline int MinBottomBlobs() const { return 2; }
423 |   virtual inline int MinTopBlobs() const { return 2; }
424 |   virtual inline const char* type() const { return "SuperCategoryFMPost"; }
425 | 
426 |  protected:
427 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
428 |       const vector<Blob<Dtype>*>& top);
429 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
430 |       const vector<Blob<Dtype>*>& top)
431 |   {
432 | 	  return Forward_cpu(bottom,top);
433 |   }
434 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
435 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
436 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
437 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
438 |   {
439 | 	  return Backward_cpu(top,propagate_down,bottom);
440 |   }
441 | 
442 |   int M_; //Batch Size
443 |   int N_; //Filter Size(# of category)
444 |   int H_; //height of feature map size
445 |   int W_; //width of feature map size
446 | 
447 |   Tree root_;
448 |   int depth_;
449 |   std::vector<int> node_num_per_level_;
450 |   std::vector<int> base_index_per_level_;
451 |   std::vector<int> label_to_index_;
452 |   std::vector<Tree *> serialized_tree_;
453 | 
454 |   EltwiseParameter_EltwiseOp op_;
455 |   //std::vector<shared_ptr<Blob<int> > > mark_;
456 | };
457 | template <typename Dtype>
458 | class SuperCategoryLayer : public Layer<Dtype> {
459 |  public:
460 |   explicit SuperCategoryLayer(const LayerParameter& param)
461 |       : Layer<Dtype>(param) {}
462 |   virtual void LayerSetUp(
463 |       const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
464 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
465 |       const vector<Blob<Dtype>*>& top);
466 | 
467 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
468 |   virtual inline const char* type() const { return "SuperCategory"; }
469 | 
470 |  protected:
471 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
472 |       const vector<Blob<Dtype>*>& top);
473 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
474 |       const vector<Blob<Dtype>*>& top)
475 |   {
476 | 	  return Forward_cpu(bottom,top);
477 |   }
478 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
479 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
480 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
481 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
482 |   {
483 | 	  return Backward_cpu(top,propagate_down,bottom);
484 |   }
485 | 
486 | 
487 |   int N_; //Batch Size
488 |   Blob<Dtype> temp_; //temporary blob for intermediate result
489 |   std::vector<shared_ptr<Blob<int> > > mark_;
490 | 
491 |   Tree root_;
492 |   int depth_;
493 |   std::vector<int> node_num_per_level_;
494 |   std::vector<int> base_index_per_level_;
495 |   std::vector<int> label_to_index_;
496 |   std::vector<Tree *> serialized_tree_;
497 | };
498 | template <typename Dtype>
499 | class SuperCategoryLabelLayer : public Layer<Dtype> {
500 |  public:
501 |   explicit SuperCategoryLabelLayer(const LayerParameter& param)
502 |       : Layer<Dtype>(param) {}
503 |   virtual void LayerSetUp(
504 |       const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
505 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
506 |       const vector<Blob<Dtype>*>& top);
507 | 
508 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
509 |   virtual inline const char* type() const { return "SuperCategoryLabel"; }
510 | 
511 |  protected:
512 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
513 |       const vector<Blob<Dtype>*>& top);
514 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
515 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom){}
516 | 
517 |   int N_;	//Batch Size
518 | 
519 |   Tree root_;
520 |   int depth_;
521 |   std::vector<int> node_num_per_level_;
522 |   std::vector<int> base_index_per_level_;
523 |   std::vector<int> label_to_index_;
524 |   std::vector<Tree *> serialized_tree_;
525 | };
526 | 
527 | /**
528 |  * @brief Normalizes the input to have 0-mean and/or unit (1) variance.
529 |  *
530 |  * TODO(dox): thorough documentation for Forward, Backward, and proto params.
531 |  */
532 | template <typename Dtype>
533 | class MVNLayer : public Layer<Dtype> {
534 |  public:
535 |   explicit MVNLayer(const LayerParameter& param)
536 |       : Layer<Dtype>(param) {}
537 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
538 |       const vector<Blob<Dtype>*>& top);
539 | 
540 |   virtual inline const char* type() const { return "MVN"; }
541 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
542 |   virtual inline int ExactNumTopBlobs() const { return 1; }
543 | 
544 |  protected:
545 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
546 |       const vector<Blob<Dtype>*>& top);
547 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
548 |       const vector<Blob<Dtype>*>& top);
549 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
550 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
551 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
552 |      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
553 | 
554 |   Blob<Dtype> mean_, variance_, temp_;
555 | 
556 |   /// sum_multiplier is used to carry out sum using BLAS
557 |   Blob<Dtype> sum_multiplier_;
558 |   Dtype eps_;
559 | };
560 | 
561 | /*
562 |  * @brief Reshapes the input Blob into an arbitrary-sized output Blob.
563 |  *
564 |  * Note: similarly to FlattenLayer, this layer does not change the input values
565 |  * (see FlattenLayer, Blob::ShareData and Blob::ShareDiff).
566 |  */
567 | template <typename Dtype>
568 | class ReshapeLayer : public Layer<Dtype> {
569 |  public:
570 |   explicit ReshapeLayer(const LayerParameter& param)
571 |       : Layer<Dtype>(param) {}
572 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
573 |       const vector<Blob<Dtype>*>& top);
574 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
575 |       const vector<Blob<Dtype>*>& top);
576 | 
577 |   virtual inline const char* type() const { return "Reshape"; }
578 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
579 |   virtual inline int ExactNumTopBlobs() const { return 1; }
580 | 
581 |  protected:
582 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
583 |       const vector<Blob<Dtype>*>& top) {}
584 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
585 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
586 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
587 |       const vector<Blob<Dtype>*>& top) {}
588 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
589 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
590 | 
591 |   /// @brief vector of axes indices whose dimensions we'll copy from the bottom
592 |   vector<int> copy_axes_;
593 |   /// @brief the index of the axis whose dimension we infer, or -1 if none
594 |   int inferred_axis_;
595 |   /// @brief the product of the "constant" output dimensions
596 |   int constant_count_;
597 | };
598 | 
599 | /**
600 |  * @brief Compute "reductions" -- operations that return a scalar output Blob
601 |  *        for an input Blob of arbitrary size, such as the sum, absolute sum,
602 |  *        and sum of squares.
603 |  *
604 |  * TODO(dox): thorough documentation for Forward, Backward, and proto params.
605 |  */
606 | template <typename Dtype>
607 | class ReductionLayer : public Layer<Dtype> {
608 |  public:
609 |   explicit ReductionLayer(const LayerParameter& param)
610 |       : Layer<Dtype>(param) {}
611 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
612 |       const vector<Blob<Dtype>*>& top);
613 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
614 |       const vector<Blob<Dtype>*>& top);
615 | 
616 |   virtual inline const char* type() const { return "Reduction"; }
617 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
618 |   virtual inline int ExactNumTopBlobs() const { return 1; }
619 | 
620 |  protected:
621 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
622 |       const vector<Blob<Dtype>*>& top);
623 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
624 |       const vector<Blob<Dtype>*>& top);
625 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
626 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
627 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
628 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
629 | 
630 |   /// @brief the reduction operation performed by the layer
631 |   ReductionParameter_ReductionOp op_;
632 |   /// @brief a scalar coefficient applied to all outputs
633 |   Dtype coeff_;
634 |   /// @brief the index of the first input axis to reduce
635 |   int axis_;
636 |   /// @brief the number of reductions performed
637 |   int num_;
638 |   /// @brief the input size of each reduction
639 |   int dim_;
640 |   /// @brief a helper Blob used for summation (op_ == SUM)
641 |   Blob<Dtype> sum_multiplier_;
642 | };
643 | 
644 | /**
645 |  * @brief Ignores bottom blobs while producing no top blobs. (This is useful
646 |  *        to suppress outputs during testing.)
647 |  */
648 | template <typename Dtype>
649 | class SilenceLayer : public Layer<Dtype> {
650 |  public:
651 |   explicit SilenceLayer(const LayerParameter& param)
652 |       : Layer<Dtype>(param) {}
653 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
654 |       const vector<Blob<Dtype>*>& top) {}
655 | 
656 |   virtual inline const char* type() const { return "Silence"; }
657 |   virtual inline int MinBottomBlobs() const { return 1; }
658 |   virtual inline int ExactNumTopBlobs() const { return 0; }
659 | 
660 |  protected:
661 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
662 |       const vector<Blob<Dtype>*>& top) {}
663 |   // We can't define Forward_gpu here, since STUB_GPU will provide
664 |   // its own definition for CPU_ONLY mode.
665 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
666 |       const vector<Blob<Dtype>*>& top);
667 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
668 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
669 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
670 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
671 | };
672 | 
673 | /**
674 |  * @brief Computes the softmax function.
675 |  *
676 |  * TODO(dox): thorough documentation for Forward, Backward, and proto params.
677 |  */
678 | template <typename Dtype>
679 | class SoftmaxLayer : public Layer<Dtype> {
680 |  public:
681 |   explicit SoftmaxLayer(const LayerParameter& param)
682 |       : Layer<Dtype>(param) {}
683 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
684 |       const vector<Blob<Dtype>*>& top);
685 | 
686 |   virtual inline const char* type() const { return "Softmax"; }
687 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
688 |   virtual inline int ExactNumTopBlobs() const { return 1; }
689 | 
690 |  protected:
691 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
692 |       const vector<Blob<Dtype>*>& top);
693 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
694 |       const vector<Blob<Dtype>*>& top);
695 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
696 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
697 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
698 |      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
699 | 
700 |   int outer_num_;
701 |   int inner_num_;
702 |   int softmax_axis_;
703 |   /// sum_multiplier is used to carry out sum using BLAS
704 |   Blob<Dtype> sum_multiplier_;
705 |   /// scale is an intermediate Blob to hold temporary results.
706 |   Blob<Dtype> scale_;
707 | };
708 | 
709 | #ifdef USE_CUDNN
710 | /**
711 |  * @brief cuDNN implementation of SoftmaxLayer.
712 |  *        Fallback to SoftmaxLayer for CPU mode.
713 |  */
714 | template <typename Dtype>
715 | class CuDNNSoftmaxLayer : public SoftmaxLayer<Dtype> {
716 |  public:
717 |   explicit CuDNNSoftmaxLayer(const LayerParameter& param)
718 |       : SoftmaxLayer<Dtype>(param), handles_setup_(false) {}
719 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
720 |       const vector<Blob<Dtype>*>& top);
721 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
722 |       const vector<Blob<Dtype>*>& top);
723 |   virtual ~CuDNNSoftmaxLayer();
724 | 
725 |  protected:
726 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
727 |       const vector<Blob<Dtype>*>& top);
728 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
729 |      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
730 | 
731 |   bool handles_setup_;
732 |   cudnnHandle_t             handle_;
733 |   cudnnTensorDescriptor_t bottom_desc_;
734 |   cudnnTensorDescriptor_t top_desc_;
735 | };
736 | #endif
737 | 
738 | /**
739 |  * @brief Creates a "split" path in the network by copying the bottom Blob
740 |  *        into multiple top Blob%s to be used by multiple consuming layers.
741 |  *
742 |  * TODO(dox): thorough documentation for Forward, Backward, and proto params.
743 |  */
744 | template <typename Dtype>
745 | class SplitLayer : public Layer<Dtype> {
746 |  public:
747 |   explicit SplitLayer(const LayerParameter& param)
748 |       : Layer<Dtype>(param) {}
749 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
750 |       const vector<Blob<Dtype>*>& top);
751 | 
752 |   virtual inline const char* type() const { return "Split"; }
753 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
754 |   virtual inline int MinTopBlobs() const { return 1; }
755 | 
756 |  protected:
757 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
758 |       const vector<Blob<Dtype>*>& top);
759 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
760 |       const vector<Blob<Dtype>*>& top);
761 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
762 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
763 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
764 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
765 | 
766 |   int count_;
767 | };
768 | 
769 | /**
770 |  * @brief Takes a Blob and slices it along either the num or channel dimension,
771 |  *        outputting multiple sliced Blob results.
772 |  *
773 |  * TODO(dox): thorough documentation for Forward, Backward, and proto params.
774 |  */
775 | template <typename Dtype>
776 | class SliceLayer : public Layer<Dtype> {
777 |  public:
778 |   explicit SliceLayer(const LayerParameter& param)
779 |       : Layer<Dtype>(param) {}
780 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
781 |       const vector<Blob<Dtype>*>& top);
782 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
783 |       const vector<Blob<Dtype>*>& top);
784 | 
785 |   virtual inline const char* type() const { return "Slice"; }
786 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
787 |   virtual inline int MinTopBlobs() const { return 2; }
788 | 
789 |  protected:
790 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
791 |       const vector<Blob<Dtype>*>& top);
792 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
793 |       const vector<Blob<Dtype>*>& top);
794 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
795 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
796 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
797 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
798 | 
799 |   int count_;
800 |   int num_slices_;
801 |   int slice_size_;
802 |   int slice_axis_;
803 |   vector<int> slice_point_;
804 | };
805 | 
806 | }  // namespace caffe
807 | 
808 | #endif  // CAFFE_COMMON_LAYERS_HPP_
809 | 


--------------------------------------------------------------------------------
/code/include/caffe/loss_layers.hpp:
--------------------------------------------------------------------------------
  1 | #ifndef CAFFE_LOSS_LAYERS_HPP_
  2 | #define CAFFE_LOSS_LAYERS_HPP_
  3 | 
  4 | #include <string>
  5 | #include <utility>
  6 | #include <vector>
  7 | 
  8 | #include "caffe/blob.hpp"
  9 | #include "caffe/common.hpp"
 10 | #include "caffe/layer.hpp"
 11 | #include "caffe/neuron_layers.hpp"
 12 | #include "caffe/proto/caffe.pb.h"
 13 | 
 14 | namespace caffe {
 15 | 
 16 | const float kLOG_THRESHOLD = 1e-20;
 17 | 
 18 | /**
 19 |  * @brief Computes the classification accuracy for a one-of-many
 20 |  *        classification task.
 21 |  */
 22 | template <typename Dtype>
 23 | class AccuracyLayer : public Layer<Dtype> {
 24 |  public:
 25 |   /**
 26 |    * @param param provides AccuracyParameter accuracy_param,
 27 |    *     with AccuracyLayer options:
 28 |    *   - top_k (\b optional, default 1).
 29 |    *     Sets the maximum rank @f$ k @f$ at which a prediction is considered
 30 |    *     correct.  For example, if @f$ k = 5 @f$, a prediction is counted
 31 |    *     correct if the correct label is among the top 5 predicted labels.
 32 |    */
 33 |   explicit AccuracyLayer(const LayerParameter& param)
 34 |       : Layer<Dtype>(param) {}
 35 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 36 |       const vector<Blob<Dtype>*>& top);
 37 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
 38 |       const vector<Blob<Dtype>*>& top);
 39 | 
 40 |   virtual inline const char* type() const { return "Accuracy"; }
 41 |   virtual inline int ExactNumBottomBlobs() const { return 2; }
 42 |   virtual inline int ExactNumTopBlobs() const { return 1; }
 43 | 
 44 |  protected:
 45 |   /**
 46 |    * @param bottom input Blob vector (length 2)
 47 |    *   -# @f$ (N \times C \times H \times W) @f$
 48 |    *      the predictions @f$ x @f$, a Blob with values in
 49 |    *      @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of
 50 |    *      the @f$ K = CHW @f$ classes. Each @f$ x_n @f$ is mapped to a predicted
 51 |    *      label @f$ \hat{l}_n @f$ given by its maximal index:
 52 |    *      @f$ \hat{l}_n = \arg\max\limits_k x_{nk} @f$
 53 |    *   -# @f$ (N \times 1 \times 1 \times 1) @f$
 54 |    *      the labels @f$ l @f$, an integer-valued Blob with values
 55 |    *      @f$ l_n \in [0, 1, 2, ..., K - 1] @f$
 56 |    *      indicating the correct class label among the @f$ K @f$ classes
 57 |    * @param top output Blob vector (length 1)
 58 |    *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
 59 |    *      the computed accuracy: @f$
 60 |    *        \frac{1}{N} \sum\limits_{n=1}^N \delta\{ \hat{l}_n = l_n \}
 61 |    *      @f$, where @f$
 62 |    *      \delta\{\mathrm{condition}\} = \left\{
 63 |    *         \begin{array}{lr}
 64 |    *            1 & \mbox{if condition} \\
 65 |    *            0 & \mbox{otherwise}
 66 |    *         \end{array} \right.
 67 |    *      @f$
 68 |    */
 69 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 70 |       const vector<Blob<Dtype>*>& top);
 71 | 
 72 | 
 73 |   /// @brief Not implemented -- AccuracyLayer cannot be used as a loss.
 74 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
 75 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
 76 |     for (int i = 0; i < propagate_down.size(); ++i) {
 77 |       if (propagate_down[i]) { NOT_IMPLEMENTED; }
 78 |     }
 79 |   }
 80 | 
 81 |   int label_axis_, outer_num_, inner_num_;
 82 | 
 83 |   int top_k_;
 84 | 
 85 |   /// Whether to ignore instances with a certain label.
 86 |   bool has_ignore_label_;
 87 |   /// The label indicating that an instance should be ignored.
 88 |   int ignore_label_;
 89 |   /// Wheter print out top-k result. Used for making test result.
 90 |   bool print_;
 91 | };
 92 | 
 93 | /**
 94 |  * @brief Computes the classification accuracy and confusion matrix for a one-of-many
 95 |  *        classification task.
 96 |  */
 97 | template <typename Dtype>
 98 | class AccuracyWithConfusionLayer : public Layer<Dtype> {
 99 |  public:
100 |   /**
101 |    * @param param provides AccuracyParameter accuracy_param,
102 |    *     with AccuracyLayer options:
103 |    *   - top_k (\b optional, default 1).
104 |    *     Sets the maximum rank @f$ k @f$ at which a prediction is considered
105 |    *     correct.  For example, if @f$ k = 5 @f$, a prediction is counted
106 |    *     correct if the correct label is among the top 5 predicted labels.
107 |    */
108 |   explicit AccuracyWithConfusionLayer(const LayerParameter& param)
109 |       : Layer<Dtype>(param) {}
110 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
111 |       const vector<Blob<Dtype>*>& top);
112 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
113 |       const vector<Blob<Dtype>*>& top);
114 | 
115 |   virtual inline const char* type() const { return "AccuracyWithConfusion"; }
116 |   virtual inline int ExactNumBottomBlobs() const { return 2; }
117 |   virtual inline int ExactNumTopBlobs() const { return 2; }
118 | 
119 |  protected:
120 |   /**
121 |    * @param bottom input Blob vector (length 2)
122 |    *   -# @f$ (N \times C \times H \times W) @f$
123 |    *      the predictions @f$ x @f$, a Blob with values in
124 |    *      @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of
125 |    *      the @f$ K = CHW @f$ classes. Each @f$ x_n @f$ is mapped to a predicted
126 |    *      label @f$ \hat{l}_n @f$ given by its maximal index:
127 |    *      @f$ \hat{l}_n = \arg\max\limits_k x_{nk} @f$
128 |    *   -# @f$ (N \times 1 \times 1 \times 1) @f$
129 |    *      the labels @f$ l @f$, an integer-valued Blob with values
130 |    *      @f$ l_n \in [0, 1, 2, ..., K - 1] @f$
131 |    *      indicating the correct class label among the @f$ K @f$ classes
132 |    * @param top output Blob vector (length 2)
133 |    *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
134 |    *      the computed accuracy: @f$
135 |    *        \frac{1}{N} \sum\limits_{n=1}^N \delta\{ \hat{l}_n = l_n \}
136 |    *      @f$, where @f$
137 |    *      \delta\{\mathrm{condition}\} = \left\{
138 |    *         \begin{array}{lr}
139 |    *            1 & \mbox{if condition} \\
140 |    *            0 & \mbox{otherwise}
141 |    *         \end{array} \right.
142 |    *      @f$
143 |    *   -# @f$ ({N} \times {N} \times 1 \times 1) @f$
144 |    *      the computed confusion matrix: @f$
145 |    *      @f$
146 |    */
147 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
148 |       const vector<Blob<Dtype>*>& top);
149 | 
150 | 
151 |   /// @brief Not implemented -- AccuracyLayer cannot be used as a loss.
152 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
153 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
154 |     for (int i = 0; i < propagate_down.size(); ++i) {
155 |       if (propagate_down[i]) { NOT_IMPLEMENTED; }
156 |     }
157 |   }
158 | 
159 |   int label_axis_, outer_num_, inner_num_;
160 | 
161 |   int top_k_;
162 | 
163 |   /// Whether to ignore instances with a certain label.
164 |   bool has_ignore_label_;
165 |   /// The label indicating that an instance should be ignored.
166 |   int ignore_label_;
167 | };
168 | 
169 | template <typename Dtype>
170 | class LabelAccuracyLayer : public Layer<Dtype> {
171 |  public:
172 |   explicit LabelAccuracyLayer(const LayerParameter& param)
173 |       : Layer<Dtype>(param) {}
174 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
175 |       const vector<Blob<Dtype>*>& top);
176 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
177 |       const vector<Blob<Dtype>*>& top);
178 | 
179 |   virtual inline const char* type() const { return "LabelAccuracy"; }
180 |   virtual inline int ExactNumBottomBlobs() const { return 2; }
181 |   virtual inline int ExactNumTopBlobs() const { return 1; }
182 | 
183 |  protected:
184 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
185 |       const vector<Blob<Dtype>*>& top);
186 | 
187 | 
188 |   /// @brief Not implemented -- TopLabelLayer cannot be used as a loss.
189 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
190 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
191 |     for (int i = 0; i < propagate_down.size(); ++i) {
192 |       if (propagate_down[i]) { NOT_IMPLEMENTED; }
193 |     }
194 |   }
195 | 
196 |   int label_axis_, outer_num_, inner_num_;
197 | 
198 |   int top_k_;
199 | 
200 |   /// Whether to ignore instances with a certain label.
201 |   bool has_ignore_label_;
202 |   /// The label indicating that an instance should be ignored.
203 |   int ignore_label_;
204 | };
205 | template <typename Dtype>
206 | class LabelAccuracyWithConfusionLayer : public Layer<Dtype> {
207 |  public:
208 |   explicit LabelAccuracyWithConfusionLayer(const LayerParameter& param)
209 |       : Layer<Dtype>(param) {}
210 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
211 |       const vector<Blob<Dtype>*>& top);
212 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
213 |       const vector<Blob<Dtype>*>& top);
214 | 
215 |   virtual inline const char* type() const { return "LabelAccuracyWithConfusion"; }
216 |   virtual inline int ExactNumBottomBlobs() const { return 2; }
217 |   virtual inline int ExactNumTopBlobs() const { return 2; }
218 | 
219 |  protected:
220 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
221 |       const vector<Blob<Dtype>*>& top);
222 | 
223 | 
224 |   /// @brief Not implemented -- TopLabelLayer cannot be used as a loss.
225 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
226 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
227 |     for (int i = 0; i < propagate_down.size(); ++i) {
228 |       if (propagate_down[i]) { NOT_IMPLEMENTED; }
229 |     }
230 |   }
231 | 
232 |   int num_label_;
233 |   /// Whether to ignore instances with a certain label.
234 |   bool has_ignore_label_;
235 |   /// The label indicating that an instance should be ignored.
236 |   int ignore_label_;
237 | };
238 | /**
239 |  * @brief Computes the classification top-k result
240 |  *        classification task.
241 |  */
242 | template <typename Dtype>
243 | class TopLabelLayer : public Layer<Dtype> {
244 |  public:
245 |   explicit TopLabelLayer(const LayerParameter& param)
246 |       : Layer<Dtype>(param) {}
247 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
248 |       const vector<Blob<Dtype>*>& top);
249 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
250 |       const vector<Blob<Dtype>*>& top);
251 | 
252 |   virtual inline const char* type() const { return "TopLabel"; }
253 |   virtual inline int ExactNumBottomBlobs() const { return 1; }
254 |   virtual inline int ExactNumTopBlobs() const { return 1; }
255 | 
256 |  protected:
257 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
258 |       const vector<Blob<Dtype>*>& top);
259 | 
260 | 
261 |   /// @brief Not implemented -- TopLabelLayer cannot be used as a loss.
262 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
263 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
264 |     for (int i = 0; i < propagate_down.size(); ++i) {
265 |       if (propagate_down[i]) { NOT_IMPLEMENTED; }
266 |     }
267 |   }
268 | 
269 |   int label_axis_, outer_num_, inner_num_;
270 | 
271 |   int top_k_;
272 | 
273 |   /// Whether to ignore instances with a certain label.
274 |   bool has_ignore_label_;
275 |   /// The label indicating that an instance should be ignored.
276 |   int ignore_label_;
277 | };
278 | 
279 | /**
280 |  * @brief An interface for Layer%s that take two Blob%s as input -- usually
281 |  *        (1) predictions and (2) ground-truth labels -- and output a
282 |  *        singleton Blob representing the loss.
283 |  *
284 |  * LossLayers are typically only capable of backpropagating to their first input
285 |  * -- the predictions.
286 |  */
287 | template <typename Dtype>
288 | class LossLayer : public Layer<Dtype> {
289 |  public:
290 |   explicit LossLayer(const LayerParameter& param)
291 |      : Layer<Dtype>(param) {}
292 |   virtual void LayerSetUp(
293 |       const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
294 |   virtual void Reshape(
295 |       const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
296 | 
297 |   virtual inline int ExactNumBottomBlobs() const { return 2; }
298 | 
299 |   /**
300 |    * @brief For convenience and backwards compatibility, instruct the Net to
301 |    *        automatically allocate a single top Blob for LossLayers, into which
302 |    *        they output their singleton loss, (even if the user didn't specify
303 |    *        one in the prototxt, etc.).
304 |    */
305 |   virtual inline bool AutoTopBlobs() const { return true; }
306 |   virtual inline int ExactNumTopBlobs() const { return 1; }
307 |   /**
308 |    * We usually cannot backpropagate to the labels; ignore force_backward for
309 |    * these inputs.
310 |    */
311 |   virtual inline bool AllowForceBackward(const int bottom_index) const {
312 |     return bottom_index != 1;
313 |   }
314 | };
315 | 
316 | /**
317 |  * @brief Computes the contrastive loss @f$
318 |  *          E = \frac{1}{2N} \sum\limits_{n=1}^N \left(y\right) d +
319 |  *              \left(1-y\right) \max \left(margin-d, 0\right)
320 |  *          @f$ where @f$
321 |  *          d = \left| \left| a_n - b_n \right| \right|_2^2 @f$. This can be
322 |  *          used to train siamese networks.
323 |  *
324 |  * @param bottom input Blob vector (length 3)
325 |  *   -# @f$ (N \times C \times 1 \times 1) @f$
326 |  *      the features @f$ a \in [-\infty, +\infty]@f$
327 |  *   -# @f$ (N \times C \times 1 \times 1) @f$
328 |  *      the features @f$ b \in [-\infty, +\infty]@f$
329 |  *   -# @f$ (N \times 1 \times 1 \times 1) @f$
330 |  *      the binary similarity @f$ s \in [0, 1]@f$
331 |  * @param top output Blob vector (length 1)
332 |  *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
333 |  *      the computed contrastive loss: @f$ E =
334 |  *          \frac{1}{2N} \sum\limits_{n=1}^N \left(y\right) d +
335 |  *          \left(1-y\right) \max \left(margin-d, 0\right)
336 |  *          @f$ where @f$
337 |  *          d = \left| \left| a_n - b_n \right| \right|_2^2 @f$.
338 |  * This can be used to train siamese networks.
339 |  */
340 | template <typename Dtype>
341 | class ContrastiveLossLayer : public LossLayer<Dtype> {
342 |  public:
343 |   explicit ContrastiveLossLayer(const LayerParameter& param)
344 |       : LossLayer<Dtype>(param), diff_() {}
345 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
346 |       const vector<Blob<Dtype>*>& top);
347 | 
348 |   virtual inline int ExactNumBottomBlobs() const { return 3; }
349 |   virtual inline const char* type() const { return "ContrastiveLoss"; }
350 |   /**
351 |    * Unlike most loss layers, in the ContrastiveLossLayer we can backpropagate
352 |    * to the first two inputs.
353 |    */
354 |   virtual inline bool AllowForceBackward(const int bottom_index) const {
355 |     return bottom_index != 2;
356 |   }
357 | 
358 |  protected:
359 |   /// @copydoc ContrastiveLossLayer
360 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
361 |       const vector<Blob<Dtype>*>& top);
362 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
363 |       const vector<Blob<Dtype>*>& top);
364 | 
365 |   /**
366 |    * @brief Computes the Contrastive error gradient w.r.t. the inputs.
367 |    *
368 |    * Computes the gradients with respect to the two input vectors (bottom[0] and
369 |    * bottom[1]), but not the similarity label (bottom[2]).
370 |    *
371 |    * @param top output Blob vector (length 1), providing the error gradient with
372 |    *      respect to the outputs
373 |    *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
374 |    *      This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
375 |    *      as @f$ \lambda @f$ is the coefficient of this layer's output
376 |    *      @f$\ell_i@f$ in the overall Net loss
377 |    *      @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
378 |    *      @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
379 |    *      (*Assuming that this top Blob is not used as a bottom (input) by any
380 |    *      other layer of the Net.)
381 |    * @param propagate_down see Layer::Backward.
382 |    * @param bottom input Blob vector (length 2)
383 |    *   -# @f$ (N \times C \times 1 \times 1) @f$
384 |    *      the features @f$a@f$; Backward fills their diff with
385 |    *      gradients if propagate_down[0]
386 |    *   -# @f$ (N \times C \times 1 \times 1) @f$
387 |    *      the features @f$b@f$; Backward fills their diff with gradients if
388 |    *      propagate_down[1]
389 |    */
390 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
391 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
392 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
393 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
394 | 
395 |   Blob<Dtype> diff_;  // cached for backward pass
396 |   Blob<Dtype> dist_sq_;  // cached for backward pass
397 |   Blob<Dtype> diff_sq_;  // tmp storage for gpu forward pass
398 |   Blob<Dtype> summer_vec_;  // tmp storage for gpu forward pass
399 | };
400 | 
401 | /**
402 |  * @brief Computes the Euclidean (L2) loss @f$
403 |  *          E = \frac{1}{2N} \sum\limits_{n=1}^N \left| \left| \hat{y}_n - y_n
404 |  *        \right| \right|_2^2 @f$ for real-valued regression tasks.
405 |  *
406 |  * @param bottom input Blob vector (length 2)
407 |  *   -# @f$ (N \times C \times H \times W) @f$
408 |  *      the predictions @f$ \hat{y} \in [-\infty, +\infty]@f$
409 |  *   -# @f$ (N \times C \times H \times W) @f$
410 |  *      the targets @f$ y \in [-\infty, +\infty]@f$
411 |  * @param top output Blob vector (length 1)
412 |  *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
413 |  *      the computed Euclidean loss: @f$ E =
414 |  *          \frac{1}{2n} \sum\limits_{n=1}^N \left| \left| \hat{y}_n - y_n
415 |  *        \right| \right|_2^2 @f$
416 |  *
417 |  * This can be used for least-squares regression tasks.  An InnerProductLayer
418 |  * input to a EuclideanLossLayer exactly formulates a linear least squares
419 |  * regression problem. With non-zero weight decay the problem becomes one of
420 |  * ridge regression -- see src/caffe/test/test_sgd_solver.cpp for a concrete
421 |  * example wherein we check that the gradients computed for a Net with exactly
422 |  * this structure match hand-computed gradient formulas for ridge regression.
423 |  *
424 |  * (Note: Caffe, and SGD in general, is certainly \b not the best way to solve
425 |  * linear least squares problems! We use it only as an instructive example.)
426 |  */
427 | template <typename Dtype>
428 | class EuclideanLossLayer : public LossLayer<Dtype> {
429 |  public:
430 |   explicit EuclideanLossLayer(const LayerParameter& param)
431 |       : LossLayer<Dtype>(param), diff_() {}
432 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
433 |       const vector<Blob<Dtype>*>& top);
434 | 
435 |   virtual inline const char* type() const { return "EuclideanLoss"; }
436 |   /**
437 |    * Unlike most loss layers, in the EuclideanLossLayer we can backpropagate
438 |    * to both inputs -- override to return true and always allow force_backward.
439 |    */
440 |   virtual inline bool AllowForceBackward(const int bottom_index) const {
441 |     return true;
442 |   }
443 | 
444 |  protected:
445 |   /// @copydoc EuclideanLossLayer
446 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
447 |       const vector<Blob<Dtype>*>& top);
448 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
449 |       const vector<Blob<Dtype>*>& top);
450 | 
451 |   /**
452 |    * @brief Computes the Euclidean error gradient w.r.t. the inputs.
453 |    *
454 |    * Unlike other children of LossLayer, EuclideanLossLayer \b can compute
455 |    * gradients with respect to the label inputs bottom[1] (but still only will
456 |    * if propagate_down[1] is set, due to being produced by learnable parameters
457 |    * or if force_backward is set). In fact, this layer is "commutative" -- the
458 |    * result is the same regardless of the order of the two bottoms.
459 |    *
460 |    * @param top output Blob vector (length 1), providing the error gradient with
461 |    *      respect to the outputs
462 |    *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
463 |    *      This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
464 |    *      as @f$ \lambda @f$ is the coefficient of this layer's output
465 |    *      @f$\ell_i@f$ in the overall Net loss
466 |    *      @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
467 |    *      @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
468 |    *      (*Assuming that this top Blob is not used as a bottom (input) by any
469 |    *      other layer of the Net.)
470 |    * @param propagate_down see Layer::Backward.
471 |    * @param bottom input Blob vector (length 2)
472 |    *   -# @f$ (N \times C \times H \times W) @f$
473 |    *      the predictions @f$\hat{y}@f$; Backward fills their diff with
474 |    *      gradients @f$
475 |    *        \frac{\partial E}{\partial \hat{y}} =
476 |    *            \frac{1}{n} \sum\limits_{n=1}^N (\hat{y}_n - y_n)
477 |    *      @f$ if propagate_down[0]
478 |    *   -# @f$ (N \times C \times H \times W) @f$
479 |    *      the targets @f$y@f$; Backward fills their diff with gradients
480 |    *      @f$ \frac{\partial E}{\partial y} =
481 |    *          \frac{1}{n} \sum\limits_{n=1}^N (y_n - \hat{y}_n)
482 |    *      @f$ if propagate_down[1]
483 |    */
484 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
485 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
486 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
487 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
488 | 
489 |   Blob<Dtype> diff_;
490 | };
491 | 
492 | /**
493 |  * @brief Computes the hinge loss for a one-of-many classification task.
494 |  *
495 |  * @param bottom input Blob vector (length 2)
496 |  *   -# @f$ (N \times C \times H \times W) @f$
497 |  *      the predictions @f$ t @f$, a Blob with values in
498 |  *      @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of
499 |  *      the @f$ K = CHW @f$ classes. In an SVM, @f$ t @f$ is the result of
500 |  *      taking the inner product @f$ X^T W @f$ of the D-dimensional features
501 |  *      @f$ X \in \mathcal{R}^{D \times N} @f$ and the learned hyperplane
502 |  *      parameters @f$ W \in \mathcal{R}^{D \times K} @f$, so a Net with just
503 |  *      an InnerProductLayer (with num_output = D) providing predictions to a
504 |  *      HingeLossLayer and no other learnable parameters or losses is
505 |  *      equivalent to an SVM.
506 |  *   -# @f$ (N \times 1 \times 1 \times 1) @f$
507 |  *      the labels @f$ l @f$, an integer-valued Blob with values
508 |  *      @f$ l_n \in [0, 1, 2, ..., K - 1] @f$
509 |  *      indicating the correct class label among the @f$ K @f$ classes
510 |  * @param top output Blob vector (length 1)
511 |  *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
512 |  *      the computed hinge loss: @f$ E =
513 |  *        \frac{1}{N} \sum\limits_{n=1}^N \sum\limits_{k=1}^K
514 |  *        [\max(0, 1 - \delta\{l_n = k\} t_{nk})] ^ p
515 |  *      @f$, for the @f$ L^p @f$ norm
516 |  *      (defaults to @f$ p = 1 @f$, the L1 norm; L2 norm, as in L2-SVM,
517 |  *      is also available), and @f$
518 |  *      \delta\{\mathrm{condition}\} = \left\{
519 |  *         \begin{array}{lr}
520 |  *            1 & \mbox{if condition} \\
521 |  *           -1 & \mbox{otherwise}
522 |  *         \end{array} \right.
523 |  *      @f$
524 |  *
525 |  * In an SVM, @f$ t \in \mathcal{R}^{N \times K} @f$ is the result of taking
526 |  * the inner product @f$ X^T W @f$ of the features
527 |  * @f$ X \in \mathcal{R}^{D \times N} @f$
528 |  * and the learned hyperplane parameters
529 |  * @f$ W \in \mathcal{R}^{D \times K} @f$. So, a Net with just an
530 |  * InnerProductLayer (with num_output = @f$k@f$) providing predictions to a
531 |  * HingeLossLayer is equivalent to an SVM (assuming it has no other learned
532 |  * outside the InnerProductLayer and no other losses outside the
533 |  * HingeLossLayer).
534 |  */
535 | template <typename Dtype>
536 | class HingeLossLayer : public LossLayer<Dtype> {
537 |  public:
538 |   explicit HingeLossLayer(const LayerParameter& param)
539 |       : LossLayer<Dtype>(param) {}
540 | 
541 |   virtual inline const char* type() const { return "HingeLoss"; }
542 | 
543 |  protected:
544 |   /// @copydoc HingeLossLayer
545 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
546 |       const vector<Blob<Dtype>*>& top);
547 | 
548 |   /**
549 |    * @brief Computes the hinge loss error gradient w.r.t. the predictions.
550 |    *
551 |    * Gradients cannot be computed with respect to the label inputs (bottom[1]),
552 |    * so this method ignores bottom[1] and requires !propagate_down[1], crashing
553 |    * if propagate_down[1] is set.
554 |    *
555 |    * @param top output Blob vector (length 1), providing the error gradient with
556 |    *      respect to the outputs
557 |    *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
558 |    *      This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
559 |    *      as @f$ \lambda @f$ is the coefficient of this layer's output
560 |    *      @f$\ell_i@f$ in the overall Net loss
561 |    *      @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
562 |    *      @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
563 |    *      (*Assuming that this top Blob is not used as a bottom (input) by any
564 |    *      other layer of the Net.)
565 |    * @param propagate_down see Layer::Backward.
566 |    *      propagate_down[1] must be false as we can't compute gradients with
567 |    *      respect to the labels.
568 |    * @param bottom input Blob vector (length 2)
569 |    *   -# @f$ (N \times C \times H \times W) @f$
570 |    *      the predictions @f$t@f$; Backward computes diff
571 |    *      @f$ \frac{\partial E}{\partial t} @f$
572 |    *   -# @f$ (N \times 1 \times 1 \times 1) @f$
573 |    *      the labels -- ignored as we can't compute their error gradients
574 |    */
575 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
576 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
577 | };
578 | 
579 | /**
580 |  * @brief A generalization of MultinomialLogisticLossLayer that takes an
581 |  *        "information gain" (infogain) matrix specifying the "value" of all label
582 |  *        pairs.
583 |  *
584 |  * Equivalent to the MultinomialLogisticLossLayer if the infogain matrix is the
585 |  * identity.
586 |  *
587 |  * @param bottom input Blob vector (length 2-3)
588 |  *   -# @f$ (N \times C \times H \times W) @f$
589 |  *      the predictions @f$ \hat{p} @f$, a Blob with values in
590 |  *      @f$ [0, 1] @f$ indicating the predicted probability of each of the
591 |  *      @f$ K = CHW @f$ classes.  Each prediction vector @f$ \hat{p}_n @f$
592 |  *      should sum to 1 as in a probability distribution: @f$
593 |  *      \forall n \sum\limits_{k=1}^K \hat{p}_{nk} = 1 @f$.
594 |  *   -# @f$ (N \times 1 \times 1 \times 1) @f$
595 |  *      the labels @f$ l @f$, an integer-valued Blob with values
596 |  *      @f$ l_n \in [0, 1, 2, ..., K - 1] @f$
597 |  *      indicating the correct class label among the @f$ K @f$ classes
598 |  *   -# @f$ (1 \times 1 \times K \times K) @f$
599 |  *      (\b optional) the infogain matrix @f$ H @f$.  This must be provided as
600 |  *      the third bottom blob input if not provided as the infogain_mat in the
601 |  *      InfogainLossParameter. If @f$ H = I @f$, this layer is equivalent to the
602 |  *      MultinomialLogisticLossLayer.
603 |  * @param top output Blob vector (length 1)
604 |  *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
605 |  *      the computed infogain multinomial logistic loss: @f$ E =
606 |  *        \frac{-1}{N} \sum\limits_{n=1}^N H_{l_n} \log(\hat{p}_n) =
607 |  *        \frac{-1}{N} \sum\limits_{n=1}^N \sum\limits_{k=1}^{K} H_{l_n,k}
608 |  *        \log(\hat{p}_{n,k})
609 |  *      @f$, where @f$ H_{l_n} @f$ denotes row @f$l_n@f$ of @f$H@f$.
610 |  */
611 | template <typename Dtype>
612 | class InfogainLossLayer : public LossLayer<Dtype> {
613 |  public:
614 |   explicit InfogainLossLayer(const LayerParameter& param)
615 |       : LossLayer<Dtype>(param), infogain_() {}
616 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
617 |       const vector<Blob<Dtype>*>& top);
618 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
619 |       const vector<Blob<Dtype>*>& top);
620 | 
621 |   // InfogainLossLayer takes 2-3 bottom Blobs; if there are 3 the third should
622 |   // be the infogain matrix.  (Otherwise the infogain matrix is loaded from a
623 |   // file specified by LayerParameter.)
624 |   virtual inline int ExactNumBottomBlobs() const { return -1; }
625 |   virtual inline int MinBottomBlobs() const { return 2; }
626 |   virtual inline int MaxBottomBlobs() const { return 3; }
627 | 
628 |   virtual inline const char* type() const { return "InfogainLoss"; }
629 | 
630 |  protected:
631 |   /// @copydoc InfogainLossLayer
632 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
633 |       const vector<Blob<Dtype>*>& top);
634 | 
635 |   /**
636 |    * @brief Computes the infogain loss error gradient w.r.t. the predictions.
637 |    *
638 |    * Gradients cannot be computed with respect to the label inputs (bottom[1]),
639 |    * so this method ignores bottom[1] and requires !propagate_down[1], crashing
640 |    * if propagate_down[1] is set. (The same applies to the infogain matrix, if
641 |    * provided as bottom[2] rather than in the layer_param.)
642 |    *
643 |    * @param top output Blob vector (length 1), providing the error gradient
644 |    *      with respect to the outputs
645 |    *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
646 |    *      This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
647 |    *      as @f$ \lambda @f$ is the coefficient of this layer's output
648 |    *      @f$\ell_i@f$ in the overall Net loss
649 |    *      @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
650 |    *      @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
651 |    *      (*Assuming that this top Blob is not used as a bottom (input) by any
652 |    *      other layer of the Net.)
653 |    * @param propagate_down see Layer::Backward.
654 |    *      propagate_down[1] must be false as we can't compute gradients with
655 |    *      respect to the labels (similarly for propagate_down[2] and the
656 |    *      infogain matrix, if provided as bottom[2])
657 |    * @param bottom input Blob vector (length 2-3)
658 |    *   -# @f$ (N \times C \times H \times W) @f$
659 |    *      the predictions @f$ \hat{p} @f$; Backward computes diff
660 |    *      @f$ \frac{\partial E}{\partial \hat{p}} @f$
661 |    *   -# @f$ (N \times 1 \times 1 \times 1) @f$
662 |    *      the labels -- ignored as we can't compute their error gradients
663 |    *   -# @f$ (1 \times 1 \times K \times K) @f$
664 |    *      (\b optional) the information gain matrix -- ignored as its error
665 |    *      gradient computation is not implemented.
666 |    */
667 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
668 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
669 | 
670 |   Blob<Dtype> infogain_;
671 | };
672 | 
673 | /**
674 |  * @brief Computes the multinomial logistic loss for a one-of-many
675 |  *        classification task, directly taking a predicted probability
676 |  *        distribution as input.
677 |  *
678 |  * When predictions are not already a probability distribution, you should
679 |  * instead use the SoftmaxWithLossLayer, which maps predictions to a
680 |  * distribution using the SoftmaxLayer, before computing the multinomial
681 |  * logistic loss. The SoftmaxWithLossLayer should be preferred over separate
682 |  * SoftmaxLayer + MultinomialLogisticLossLayer
683 |  * as its gradient computation is more numerically stable.
684 |  *
685 |  * @param bottom input Blob vector (length 2)
686 |  *   -# @f$ (N \times C \times H \times W) @f$
687 |  *      the predictions @f$ \hat{p} @f$, a Blob with values in
688 |  *      @f$ [0, 1] @f$ indicating the predicted probability of each of the
689 |  *      @f$ K = CHW @f$ classes.  Each prediction vector @f$ \hat{p}_n @f$
690 |  *      should sum to 1 as in a probability distribution: @f$
691 |  *      \forall n \sum\limits_{k=1}^K \hat{p}_{nk} = 1 @f$.
692 |  *   -# @f$ (N \times 1 \times 1 \times 1) @f$
693 |  *      the labels @f$ l @f$, an integer-valued Blob with values
694 |  *      @f$ l_n \in [0, 1, 2, ..., K - 1] @f$
695 |  *      indicating the correct class label among the @f$ K @f$ classes
696 |  * @param top output Blob vector (length 1)
697 |  *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
698 |  *      the computed multinomial logistic loss: @f$ E =
699 |  *        \frac{-1}{N} \sum\limits_{n=1}^N \log(\hat{p}_{n,l_n})
700 |  *      @f$
701 |  */
702 | template <typename Dtype>
703 | class MultinomialLogisticLossLayer : public LossLayer<Dtype> {
704 |  public:
705 |   explicit MultinomialLogisticLossLayer(const LayerParameter& param)
706 |       : LossLayer<Dtype>(param) {}
707 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
708 |       const vector<Blob<Dtype>*>& top);
709 | 
710 |   virtual inline const char* type() const { return "MultinomialLogisticLoss"; }
711 | 
712 |  protected:
713 |   /// @copydoc MultinomialLogisticLossLayer
714 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
715 |       const vector<Blob<Dtype>*>& top);
716 | 
717 |   /**
718 |    * @brief Computes the multinomial logistic loss error gradient w.r.t. the
719 |    *        predictions.
720 |    *
721 |    * Gradients cannot be computed with respect to the label inputs (bottom[1]),
722 |    * so this method ignores bottom[1] and requires !propagate_down[1], crashing
723 |    * if propagate_down[1] is set.
724 |    *
725 |    * @param top output Blob vector (length 1), providing the error gradient with
726 |    *      respect to the outputs
727 |    *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
728 |    *      This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
729 |    *      as @f$ \lambda @f$ is the coefficient of this layer's output
730 |    *      @f$\ell_i@f$ in the overall Net loss
731 |    *      @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
732 |    *      @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
733 |    *      (*Assuming that this top Blob is not used as a bottom (input) by any
734 |    *      other layer of the Net.)
735 |    * @param propagate_down see Layer::Backward.
736 |    *      propagate_down[1] must be false as we can't compute gradients with
737 |    *      respect to the labels.
738 |    * @param bottom input Blob vector (length 2)
739 |    *   -# @f$ (N \times C \times H \times W) @f$
740 |    *      the predictions @f$ \hat{p} @f$; Backward computes diff
741 |    *      @f$ \frac{\partial E}{\partial \hat{p}} @f$
742 |    *   -# @f$ (N \times 1 \times 1 \times 1) @f$
743 |    *      the labels -- ignored as we can't compute their error gradients
744 |    */
745 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
746 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
747 | };
748 | 
749 | /**
750 |  * @brief Computes the cross-entropy (logistic) loss @f$
751 |  *          E = \frac{-1}{n} \sum\limits_{n=1}^N \left[
752 |  *                  p_n \log \hat{p}_n +
753 |  *                  (1 - p_n) \log(1 - \hat{p}_n)
754 |  *              \right]
755 |  *        @f$, often used for predicting targets interpreted as probabilities.
756 |  *
757 |  * This layer is implemented rather than separate
758 |  * SigmoidLayer + CrossEntropyLayer
759 |  * as its gradient computation is more numerically stable.
760 |  * At test time, this layer can be replaced simply by a SigmoidLayer.
761 |  *
762 |  * @param bottom input Blob vector (length 2)
763 |  *   -# @f$ (N \times C \times H \times W) @f$
764 |  *      the scores @f$ x \in [-\infty, +\infty]@f$,
765 |  *      which this layer maps to probability predictions
766 |  *      @f$ \hat{p}_n = \sigma(x_n) \in [0, 1] @f$
767 |  *      using the sigmoid function @f$ \sigma(.) @f$ (see SigmoidLayer).
768 |  *   -# @f$ (N \times C \times H \times W) @f$
769 |  *      the targets @f$ y \in [0, 1] @f$
770 |  * @param top output Blob vector (length 1)
771 |  *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
772 |  *      the computed cross-entropy loss: @f$
773 |  *          E = \frac{-1}{n} \sum\limits_{n=1}^N \left[
774 |  *                  p_n \log \hat{p}_n + (1 - p_n) \log(1 - \hat{p}_n)
775 |  *              \right]
776 |  *      @f$
777 |  */
778 | template <typename Dtype>
779 | class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> {
780 |  public:
781 |   explicit SigmoidCrossEntropyLossLayer(const LayerParameter& param)
782 |       : LossLayer<Dtype>(param),
783 |           sigmoid_layer_(new SigmoidLayer<Dtype>(param)),
784 |           sigmoid_output_(new Blob<Dtype>()) {}
785 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
786 |       const vector<Blob<Dtype>*>& top);
787 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
788 |       const vector<Blob<Dtype>*>& top);
789 | 
790 |   virtual inline const char* type() const { return "SigmoidCrossEntropyLoss"; }
791 | 
792 |  protected:
793 |   /// @copydoc SigmoidCrossEntropyLossLayer
794 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
795 |       const vector<Blob<Dtype>*>& top);
796 | 
797 |   /**
798 |    * @brief Computes the sigmoid cross-entropy loss error gradient w.r.t. the
799 |    *        predictions.
800 |    *
801 |    * Gradients cannot be computed with respect to the target inputs (bottom[1]),
802 |    * so this method ignores bottom[1] and requires !propagate_down[1], crashing
803 |    * if propagate_down[1] is set.
804 |    *
805 |    * @param top output Blob vector (length 1), providing the error gradient with
806 |    *      respect to the outputs
807 |    *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
808 |    *      This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
809 |    *      as @f$ \lambda @f$ is the coefficient of this layer's output
810 |    *      @f$\ell_i@f$ in the overall Net loss
811 |    *      @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
812 |    *      @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
813 |    *      (*Assuming that this top Blob is not used as a bottom (input) by any
814 |    *      other layer of the Net.)
815 |    * @param propagate_down see Layer::Backward.
816 |    *      propagate_down[1] must be false as gradient computation with respect
817 |    *      to the targets is not implemented.
818 |    * @param bottom input Blob vector (length 2)
819 |    *   -# @f$ (N \times C \times H \times W) @f$
820 |    *      the predictions @f$x@f$; Backward computes diff
821 |    *      @f$ \frac{\partial E}{\partial x} =
822 |    *          \frac{1}{n} \sum\limits_{n=1}^N (\hat{p}_n - p_n)
823 |    *      @f$
824 |    *   -# @f$ (N \times 1 \times 1 \times 1) @f$
825 |    *      the labels -- ignored as we can't compute their error gradients
826 |    */
827 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
828 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
829 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
830 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
831 | 
832 |   /// The internal SigmoidLayer used to map predictions to probabilities.
833 |   shared_ptr<SigmoidLayer<Dtype> > sigmoid_layer_;
834 |   /// sigmoid_output stores the output of the SigmoidLayer.
835 |   shared_ptr<Blob<Dtype> > sigmoid_output_;
836 |   /// bottom vector holder to call the underlying SigmoidLayer::Forward
837 |   vector<Blob<Dtype>*> sigmoid_bottom_vec_;
838 |   /// top vector holder to call the underlying SigmoidLayer::Forward
839 |   vector<Blob<Dtype>*> sigmoid_top_vec_;
840 | };
841 | 
842 | // Forward declare SoftmaxLayer for use in SoftmaxWithLossLayer.
843 | template <typename Dtype> class SoftmaxLayer;
844 | 
845 | /**
846 |  * @brief Computes the multinomial logistic loss for a one-of-many
847 |  *        classification task, passing real-valued predictions through a
848 |  *        softmax to get a probability distribution over classes.
849 |  *
850 |  * This layer should be preferred over separate
851 |  * SoftmaxLayer + MultinomialLogisticLossLayer
852 |  * as its gradient computation is more numerically stable.
853 |  * At test time, this layer can be replaced simply by a SoftmaxLayer.
854 |  *
855 |  * @param bottom input Blob vector (length 2)
856 |  *   -# @f$ (N \times C \times H \times W) @f$
857 |  *      the predictions @f$ x @f$, a Blob with values in
858 |  *      @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of
859 |  *      the @f$ K = CHW @f$ classes. This layer maps these scores to a
860 |  *      probability distribution over classes using the softmax function
861 |  *      @f$ \hat{p}_{nk} = \exp(x_{nk}) /
862 |  *      \left[\sum_{k'} \exp(x_{nk'})\right] @f$ (see SoftmaxLayer).
863 |  *   -# @f$ (N \times 1 \times 1 \times 1) @f$
864 |  *      the labels @f$ l @f$, an integer-valued Blob with values
865 |  *      @f$ l_n \in [0, 1, 2, ..., K - 1] @f$
866 |  *      indicating the correct class label among the @f$ K @f$ classes
867 |  * @param top output Blob vector (length 1)
868 |  *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
869 |  *      the computed cross-entropy classification loss: @f$ E =
870 |  *        \frac{-1}{N} \sum\limits_{n=1}^N \log(\hat{p}_{n,l_n})
871 |  *      @f$, for softmax output class probabilites @f$ \hat{p} @f$
872 |  */
873 | template <typename Dtype>
874 | class SoftmaxWithLossLayer : public LossLayer<Dtype> {
875 |  public:
876 |    /**
877 |     * @param param provides LossParameter loss_param, with options:
878 |     *  - ignore_label (optional)
879 |     *    Specify a label value that should be ignored when computing the loss.
880 |     *  - normalize (optional, default true)
881 |     *    If true, the loss is normalized by the number of (nonignored) labels
882 |     *    present; otherwise the loss is simply summed over spatial locations.
883 |     */
884 |   explicit SoftmaxWithLossLayer(const LayerParameter& param)
885 |       : LossLayer<Dtype>(param) {}
886 |   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
887 |       const vector<Blob<Dtype>*>& top);
888 |   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
889 |       const vector<Blob<Dtype>*>& top);
890 | 
891 |   virtual inline const char* type() const { return "SoftmaxWithLoss"; }
892 |   virtual inline int ExactNumTopBlobs() const { return -1; }
893 |   virtual inline int MinTopBlobs() const { return 1; }
894 |   virtual inline int MaxTopBlobs() const { return 2; }
895 | 
896 |  protected:
897 |   /// @copydoc SoftmaxWithLossLayer
898 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
899 |       const vector<Blob<Dtype>*>& top);
900 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
901 |       const vector<Blob<Dtype>*>& top);
902 |   /**
903 |    * @brief Computes the softmax loss error gradient w.r.t. the predictions.
904 |    *
905 |    * Gradients cannot be computed with respect to the label inputs (bottom[1]),
906 |    * so this method ignores bottom[1] and requires !propagate_down[1], crashing
907 |    * if propagate_down[1] is set.
908 |    *
909 |    * @param top output Blob vector (length 1), providing the error gradient with
910 |    *      respect to the outputs
911 |    *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
912 |    *      This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
913 |    *      as @f$ \lambda @f$ is the coefficient of this layer's output
914 |    *      @f$\ell_i@f$ in the overall Net loss
915 |    *      @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
916 |    *      @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
917 |    *      (*Assuming that this top Blob is not used as a bottom (input) by any
918 |    *      other layer of the Net.)
919 |    * @param propagate_down see Layer::Backward.
920 |    *      propagate_down[1] must be false as we can't compute gradients with
921 |    *      respect to the labels.
922 |    * @param bottom input Blob vector (length 2)
923 |    *   -# @f$ (N \times C \times H \times W) @f$
924 |    *      the predictions @f$ x @f$; Backward computes diff
925 |    *      @f$ \frac{\partial E}{\partial x} @f$
926 |    *   -# @f$ (N \times 1 \times 1 \times 1) @f$
927 |    *      the labels -- ignored as we can't compute their error gradients
928 |    */
929 |   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
930 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
931 |   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
932 |       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
933 | 
934 | 
935 |   /// The internal SoftmaxLayer used to map predictions to a distribution.
936 |   shared_ptr<Layer<Dtype> > softmax_layer_;
937 |   /// prob stores the output probability predictions from the SoftmaxLayer.
938 |   Blob<Dtype> prob_;
939 |   /// bottom vector holder used in call to the underlying SoftmaxLayer::Forward
940 |   vector<Blob<Dtype>*> softmax_bottom_vec_;
941 |   /// top vector holder used in call to the underlying SoftmaxLayer::Forward
942 |   vector<Blob<Dtype>*> softmax_top_vec_;
943 |   /// Whether to ignore instances with a certain label.
944 |   bool has_ignore_label_;
945 |   /// The label indicating that an instance should be ignored.
946 |   int ignore_label_;
947 |   /// Whether to normalize the loss by the total number of values present
948 |   /// (otherwise just by the batch size).
949 |   bool normalize_;
950 | 
951 |   int softmax_axis_, outer_num_, inner_num_;
952 | };
953 | 
954 | }  // namespace caffe
955 | 
956 | #endif  // CAFFE_LOSS_LAYERS_HPP_
957 | 


--------------------------------------------------------------------------------
/code/src/caffe/layers/super_category_fm_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include <vector>
  2 | #include <limits>
  3 | #include <queue>
  4 | 
  5 | #include "caffe/blob.hpp"
  6 | #include "caffe/common.hpp"
  7 | #include "caffe/filler.hpp"
  8 | #include "caffe/layer.hpp"
  9 | #include "caffe/util/math_functions.hpp"
 10 | #include "caffe/vision_layers.hpp"
 11 | #include "caffe/util/io.hpp"
 12 | 
 13 | namespace caffe {
 14 | //Layer Implementation
 15 | template <typename Dtype>
 16 | void SuperCategoryFMLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 17 |       const vector<Blob<Dtype>*>& top) {
 18 | 	op_ = this->layer_param_.eltwise_param().operation();
 19 | 
 20 | 	SuperCategoryParameter * super_param = this->layer_param_.mutable_super_category_param();
 21 | 	if( super_param->file_name().empty() == false ) {
 22 | 		ReadProtoFromTextFileOrDie(super_param->file_name().c_str(), super_param->mutable_root());
 23 | 	}
 24 | 
 25 | 	Tree::MakeTree(&root_, &super_param->root());
 26 | 	depth_ = root_.Depth() - 1;
 27 | 	root_.MakeBalance(depth_);
 28 | 	Tree::GiveIndex(&root_, serialized_tree_);
 29 | 	Tree::GetNodeNumPerLevelAndGiveLabel(node_num_per_level_, base_index_per_level_, &this->root_,serialized_tree_,label_to_index_);
 30 | 
 31 | 	M_ = bottom[0]->shape(0);
 32 | 	N_ = bottom[0]->shape(1);
 33 | 	H_ = bottom[0]->shape(2);
 34 | 	W_ = bottom[0]->shape(3);
 35 | }
 36 | 
 37 | template <typename Dtype>
 38 | void SuperCategoryFMLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
 39 |       const vector<Blob<Dtype>*>& top) {
 40 | 	CHECK_EQ(top.size(), depth_);
 41 | 
 42 | 	if( op_ == EltwiseParameter_EltwiseOp_MIN ||
 43 |       op_ == EltwiseParameter_EltwiseOp_MAX ) {
 44 | 		mark_.resize(depth_);
 45 | 		for( int i = 0; i < depth_; ++i) {
 46 | 			std::vector<int> shape;
 47 | 			shape.push_back(M_);
 48 | 			shape.push_back(node_num_per_level_[i]);
 49 | 			shape.push_back(H_);
 50 | 			shape.push_back(W_);
 51 | 			mark_[i].reset(new Blob<int>(shape));
 52 | 		}
 53 | 	}
 54 | 
 55 | 
 56 | 	for( int i = 0; i < depth_; ++i) {
 57 | 		std::vector<int> shape;
 58 | 		shape.push_back(M_);
 59 | 		shape.push_back(node_num_per_level_[i]);
 60 | 		shape.push_back(H_);
 61 | 		shape.push_back(W_);
 62 | 		top[i]->Reshape(shape); // Top for output data
 63 | 	}
 64 | 
 65 | 	CHECK_EQ(top[depth_-1]->count(), bottom[0]->count());
 66 | }
 67 | 
 68 | template <typename Dtype>
 69 | void SuperCategoryFMLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 70 |     const vector<Blob<Dtype>*>& top) {
 71 | 	caffe_copy(bottom[0]->count(), bottom[0]->cpu_data(), top[depth_-1]->mutable_cpu_data());
 72 | 
 73 | 	switch (op_) {
 74 | 	case EltwiseParameter_EltwiseOp_AVG :
 75 | 		for(int i = 0; i < depth_-1; ++i)
 76 | 			caffe_set(top[i]->count(), (Dtype)0., top[i]->mutable_cpu_data());
 77 | 
 78 | 		for(int m = 0; m < M_; ++m) {
 79 | 			for( int i = depth_-2; i >= 0; --i ) {
 80 | 				Blob<Dtype> * tops = top[i];
 81 | 				Blob<Dtype> * bottoms = top[i+1];
 82 | 
 83 | 				int base_idx = base_index_per_level_[i];
 84 | 				for(int j = 0; j < node_num_per_level_[i]; ++j) {
 85 | 					Tree * node = serialized_tree_[base_idx + j];
 86 | 					const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
 87 | 
 88 | 					Dtype * top_data = &tops->mutable_cpu_data()[tops->offset(m,node->GetLabel())];
 89 | 
 90 | 					for(auto it = children->cbegin(); it != children->cend(); ++it) {
 91 | 						int offset = bottoms->offset(m,(*it)->GetLabel());
 92 | 						const Dtype * bottom_data = &bottoms->cpu_data()[offset];
 93 | 						caffe_axpy(H_*W_,(Dtype)(1.),bottom_data,top_data);
 94 | 					}
 95 | 
 96 | 					caffe_scal(H_*W_,(Dtype)(1./children->size()),top_data);
 97 | 				}
 98 | 			}
 99 | 		}
100 | 	break;
101 | 	case EltwiseParameter_EltwiseOp_MIN :
102 | 		for(int m = 0; m < M_; ++m) {
103 | 			for( int i = depth_-2; i >= 0; --i ) {
104 | 				Blob<Dtype> * tops = top[i];
105 | 				Blob<int> * marks = mark_[i].get();
106 | 				Blob<Dtype> * bottoms = top[i+1];
107 | 
108 | 				int base_idx = base_index_per_level_[i];
109 | 				for(int j = 0; j < node_num_per_level_[i]; ++j) {
110 | 					Tree * node = serialized_tree_[base_idx + j];
111 | 					const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
112 | 
113 | 					Dtype * top_data = &tops->mutable_cpu_data()[tops->offset(m,node->GetLabel())];
114 | 					int * mark_data = &marks->mutable_cpu_data()[marks->offset(m,node->GetLabel())];
115 | 					caffe_set(H_*W_,std::numeric_limits<Dtype>::max(), top_data);
116 | 					caffe_set(H_*W_,-1,mark_data);
117 | 
118 | 					for(auto it = children->cbegin(); it != children->cend(); ++it) {
119 | 						int offset = bottoms->offset(m,(*it)->GetLabel());
120 | 						const Dtype * bottom_data = &bottoms->cpu_data()[offset];
121 | 						for(int h = 0; h < H_; ++h) {
122 | 							for(int w = 0; w < W_; ++w) {
123 | 								int idx = h*W_+w;
124 | 								if( bottom_data[idx] < top_data[idx] ) {
125 | 									top_data[idx] = bottom_data[idx];
126 | 									mark_data[idx] = (*it)->GetLabel();
127 | 								}
128 | 							}
129 | 						}
130 | 					}
131 | 				}
132 | 			}
133 | 		}
134 | 	break;
135 | 	case EltwiseParameter_EltwiseOp_MAX :
136 | 		for(int m = 0; m < M_; ++m) {
137 | 			for( int i = depth_-2; i >= 0; --i ) {
138 | 				Blob<Dtype> * tops = top[i];
139 | 				Blob<int> * marks = mark_[i].get();
140 | 				Blob<Dtype> * bottoms = top[i+1];
141 | 
142 | 				int base_idx = base_index_per_level_[i];
143 | 				for(int j = 0; j < node_num_per_level_[i]; ++j) {
144 | 					Tree * node = serialized_tree_[base_idx + j];
145 | 					const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
146 | 
147 | 					Dtype * top_data = &tops->mutable_cpu_data()[tops->offset(m,node->GetLabel())];
148 | 					int * mark_data = &marks->mutable_cpu_data()[marks->offset(m,node->GetLabel())];
149 | 					caffe_set(H_*W_,std::numeric_limits<Dtype>::lowest(), top_data);
150 | 					caffe_set(H_*W_,-1,mark_data);
151 | 
152 | 					for(auto it = children->cbegin(); it != children->cend(); ++it) {
153 | 						int offset = bottoms->offset(m,(*it)->GetLabel());
154 | 						const Dtype * bottom_data = &bottoms->cpu_data()[offset];
155 | 						for(int h = 0; h < H_; ++h) {
156 | 							for(int w = 0; w < W_; ++w) {
157 | 								int idx = h*W_+w;
158 | 								if( bottom_data[idx] > top_data[idx] ) {
159 | 									top_data[idx] = bottom_data[idx];
160 | 									mark_data[idx] = (*it)->GetLabel();
161 | 								}
162 | 							}
163 | 						}
164 | 					}
165 | 				}
166 | 			}
167 | 		}
168 | 	break;
169 | 	default:
170 |         LOG(FATAL) << "Unknown elementwise operation.";
171 | 	}
172 | 
173 | }
174 | 
175 | template <typename Dtype>
176 | void SuperCategoryFMLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
177 |     const vector<bool>& propagate_down,
178 |     const vector<Blob<Dtype>*>& bottom) {
179 | 	if( propagate_down[0] == false )
180 | 		return;
181 | 
182 | 	switch (op_) {
183 | 	case EltwiseParameter_EltwiseOp_AVG :
184 | 		for(int m = 0; m < M_; ++m) {
185 | 			for( int i = 0; i < depth_-1; ++i ) {
186 | 				Blob<Dtype> * tops = top[i];
187 | 				Blob<Dtype> * bottoms = top[i+1];
188 | 
189 | 				int base_idx = base_index_per_level_[i];
190 | 				for(int j = 0; j < node_num_per_level_[i]; ++j) {
191 | 					Tree * node = serialized_tree_[base_idx + j];
192 | 					const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
193 | 					const Dtype * top_diff = &tops->cpu_diff()[tops->offset(m,node->GetLabel())];
194 | 					for(auto it = children->cbegin(); it != children->cend(); ++it) {
195 | 						int offset = bottoms->offset(m,(*it)->GetLabel());
196 | 						Dtype * bottom_diff = &bottoms->mutable_cpu_diff()[offset];
197 | 
198 | 						caffe_axpy(H_*W_,(Dtype)(1./children->size()),top_diff,bottom_diff);	
199 | 					}
200 | 
201 | 				}
202 | 			}
203 | 		}
204 | 		caffe_copy(bottom[0]->count(), top[depth_-1]->cpu_diff(), bottom[0]->mutable_cpu_diff());
205 | 		break;
206 | 	case EltwiseParameter_EltwiseOp_MIN :
207 | 		for(int m = 0; m < M_; ++m) {
208 | 			for( int i = 0; i < depth_-1; ++i ) {
209 | 				Blob<Dtype> * tops = top[i];
210 | 				Blob<int> * marks = mark_[i].get();
211 | 				Blob<Dtype> * bottoms = top[i+1];
212 | 
213 | 				int base_idx = base_index_per_level_[i];
214 | 				for(int j = 0; j < node_num_per_level_[i]; ++j) {
215 | 					Tree * node = serialized_tree_[base_idx + j];
216 | 					const Dtype * top_diff = &tops->cpu_diff()[tops->offset(m,node->GetLabel())];
217 | 					const int * mark_data = &marks->cpu_data()[marks->offset(m,node->GetLabel())];
218 | 					for(int h = 0; h < H_; ++h) {
219 | 						for(int w = 0; w < W_; ++w) {
220 | 							int idx = h*W_ + w;
221 | 							int label = mark_data[idx];
222 | 							int offset = bottoms->offset(m,label);
223 | 							bottoms->mutable_cpu_diff()[offset+idx] += top_diff[idx];
224 | 						}
225 | 					}
226 | 				}
227 | 			}
228 | 		}
229 | 		caffe_copy(bottom[0]->count(), top[depth_-1]->cpu_diff(), bottom[0]->mutable_cpu_diff());
230 | 		break;
231 | 	case EltwiseParameter_EltwiseOp_MAX :
232 | 		for(int m = 0; m < M_; ++m) {
233 | 			for( int i = 0; i < depth_-1; ++i ) {
234 | 				Blob<Dtype> * tops = top[i];
235 | 				Blob<int> * marks = mark_[i].get();
236 | 				Blob<Dtype> * bottoms = top[i+1];
237 | 
238 | 				int base_idx = base_index_per_level_[i];
239 | 				for(int j = 0; j < node_num_per_level_[i]; ++j) {
240 | 					Tree * node = serialized_tree_[base_idx + j];
241 | 					const Dtype * top_diff = &tops->cpu_diff()[tops->offset(m,node->GetLabel())];
242 | 					const int * mark_data = &marks->cpu_data()[marks->offset(m,node->GetLabel())];
243 | 					for(int h = 0; h < H_; ++h) {
244 | 						for(int w = 0; w < W_; ++w) {
245 | 							int idx = h*W_ + w;
246 | 							int label = mark_data[idx];
247 | 							int offset = bottoms->offset(m,label);
248 | 							bottoms->mutable_cpu_diff()[offset+idx] += top_diff[idx];
249 | 						}
250 | 					}
251 | 				}
252 | 			}
253 | 		}
254 | 		caffe_copy(bottom[0]->count(), top[depth_-1]->cpu_diff(), bottom[0]->mutable_cpu_diff());
255 | 		break;
256 | 	default:
257 |         LOG(FATAL) << "Unknown elementwise operation.";
258 | 	}
259 | }
260 | 
261 | #ifdef CPU_ONLY
262 | STUB_GPU(SuperCategoryFMLayer);
263 | #endif
264 | 
265 | INSTANTIATE_CLASS(SuperCategoryFMLayer);
266 | 
267 | REGISTER_LAYER_CLASS(SuperCategoryFM);
268 | }  // namespace caffe
269 | 
270 | 


--------------------------------------------------------------------------------
/code/src/caffe/layers/super_category_fm_layer.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 | #include <cfloat>
 3 | #include <vector>
 4 | 
 5 | #include "caffe/layer.hpp"
 6 | #include "caffe/util/math_functions.hpp"
 7 | #include "caffe/vision_layers.hpp"
 8 | 
 9 | namespace caffe {
10 | 
11 | template <typename Dtype>
12 | void SuperCategoryFMLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
13 |     const vector<Blob<Dtype>*>& top) {
14 | 	caffe_copy(bottom[0]->count(), bottom[0]->gpu_data(), top[depth_-1]->mutable_gpu_data());
15 | 	for(int i = 0; i < depth_-1; ++i)
16 | 		caffe_gpu_set(top[i]->count(), (Dtype)0., top[i]->mutable_gpu_data());
17 | 
18 | 	for(int m = 0; m < M_; ++m) {
19 | 		for( int i = depth_-2; i >= 0; --i ) {
20 | 			Blob<Dtype> * tops = top[i];
21 | 			Blob<Dtype> * bottoms = top[i+1];
22 | 
23 | 			int base_idx = base_index_per_level_[i];
24 | 			for(int j = 0; j < node_num_per_level_[i]; ++j) {
25 | 				Tree * node = serialized_tree_[base_idx + j];
26 | 				const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
27 | 
28 | 				Dtype * top_data = &tops->mutable_gpu_data()[tops->offset(m,node->GetLabel())];
29 | 
30 | 				for(std::vector<shared_ptr<Tree> >::const_iterator it = children->begin(); it != children->end(); ++it) {
31 | 					int offset = bottoms->offset(m,(*it)->GetLabel());
32 | 					const Dtype * bottom_data = &bottoms->gpu_data()[offset];
33 | 					caffe_gpu_axpy(H_*W_,(Dtype)(1.),bottom_data,top_data);
34 | 				}
35 | 
36 | 				caffe_gpu_scal(H_*W_,(Dtype)(1./children->size()),top_data);
37 | 			}
38 | 		}
39 | 	}
40 | }
41 | 
42 | template <typename Dtype>
43 | void SuperCategoryFMLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
44 |     const vector<bool>& propagate_down,
45 |     const vector<Blob<Dtype>*>& bottom) {
46 | 	if( propagate_down[0] == false )
47 | 		return;
48 | 
49 | 	for(int m = 0; m < M_; ++m) {
50 | 		for( int i = 0; i < depth_-1; ++i ) {
51 | 			Blob<Dtype> * tops = top[i];
52 | 			Blob<Dtype> * bottoms = top[i+1];
53 | 
54 | 			int base_idx = base_index_per_level_[i];
55 | 			for(int j = 0; j < node_num_per_level_[i]; ++j) {
56 | 				Tree * node = serialized_tree_[base_idx + j];
57 | 				const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
58 | 				const Dtype * top_diff = &tops->gpu_diff()[tops->offset(m,node->GetLabel())];
59 | 				for(std::vector<shared_ptr<Tree> >::const_iterator it = children->begin(); it != children->end(); ++it) {
60 | 					int offset = bottoms->offset(m,(*it)->GetLabel());
61 | 					Dtype * bottom_diff = &bottoms->mutable_gpu_diff()[offset];
62 | 
63 | 					caffe_gpu_axpy(H_*W_,(Dtype)(1./children->size()),top_diff,bottom_diff);	
64 | 				}
65 | 
66 | 			}
67 | 		}
68 | 	}
69 | 	caffe_copy(bottom[0]->count(), top[depth_-1]->gpu_diff(), bottom[0]->mutable_gpu_diff());
70 | }
71 | 
72 | INSTANTIATE_LAYER_GPU_FUNCS(SuperCategoryFMLayer);
73 | 
74 | }  // namespace caffe
75 | */
76 | 


--------------------------------------------------------------------------------
/code/src/caffe/layers/super_category_fm_post_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include <vector>
  2 | #include <limits>
  3 | #include <queue>
  4 | 
  5 | #include "caffe/blob.hpp"
  6 | #include "caffe/common.hpp"
  7 | #include "caffe/filler.hpp"
  8 | #include "caffe/layer.hpp"
  9 | #include "caffe/util/math_functions.hpp"
 10 | #include "caffe/vision_layers.hpp"
 11 | #include "caffe/util/io.hpp"
 12 | 
 13 | namespace caffe {
 14 | //Layer Implementation
 15 | template <typename Dtype>
 16 | void SuperCategoryFMPostLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 17 |       const vector<Blob<Dtype>*>& top) {
 18 | 	op_ = this->layer_param_.eltwise_param().operation();
 19 | 
 20 | 	SuperCategoryParameter * super_param = this->layer_param_.mutable_super_category_param();
 21 | 	if( super_param->file_name().empty() == false ) {
 22 | 		ReadProtoFromTextFileOrDie(super_param->file_name().c_str(), super_param->mutable_root());
 23 | 	}
 24 | 
 25 | 	Tree::MakeTree(&root_, &super_param->root());
 26 | 	depth_ = root_.Depth() - 1;
 27 | 	root_.MakeBalance(depth_);
 28 | 	Tree::GiveIndex(&root_, serialized_tree_);
 29 | 	Tree::GetNodeNumPerLevelAndGiveLabel(node_num_per_level_, base_index_per_level_, &this->root_,serialized_tree_,label_to_index_);
 30 | 
 31 | 	CHECK_EQ(depth_,bottom.size());
 32 | 
 33 | 	M_ = bottom[depth_-1]->shape(0);
 34 | 	H_ = bottom[depth_-1]->shape(2);
 35 | 	W_ = bottom[depth_-1]->shape(3);
 36 | 	for(int i = 0; i < depth_; ++i) {
 37 | 		CHECK_EQ(bottom[i]->shape(0),M_);
 38 | 		CHECK_EQ(bottom[i]->shape(1),node_num_per_level_[i]);
 39 | 		CHECK_EQ(bottom[i]->shape(2),H_);
 40 | 		CHECK_EQ(bottom[i]->shape(3),W_);
 41 | 	}
 42 | }
 43 | 
 44 | template <typename Dtype>
 45 | void SuperCategoryFMPostLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
 46 |       const vector<Blob<Dtype>*>& top) {
 47 | 	CHECK_EQ(depth_,top.size());
 48 | 	for(int i = 0; i < depth_; ++i) {
 49 | 		top[i]->ReshapeLike(*bottom[i]); // Top for output data
 50 | 	}
 51 | }
 52 | 
 53 | template <typename Dtype>
 54 | void SuperCategoryFMPostLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 55 |     const vector<Blob<Dtype>*>& top) {
 56 | 	for(int i = 0; i < depth_; ++i)
 57 | 		caffe_copy(top[i]->count(), bottom[i]->cpu_data(), top[i]->mutable_cpu_data());
 58 | 
 59 | 	switch (op_) {
 60 | 	case EltwiseParameter_EltwiseOp_SUM :
 61 | 		for(int m = 0; m < M_; ++m) {
 62 | 			for(int i = 0; i < depth_-1; ++i) {
 63 | 				Blob<Dtype> * tops = top[i];
 64 | 				Blob<Dtype> * bottoms = top[i+1];
 65 | 					
 66 | 				int base_idx = base_index_per_level_[i];
 67 | 				for(int j = 0; j < node_num_per_level_[i]; ++j) {
 68 | 					Tree * node = serialized_tree_[base_idx + j];
 69 | 					const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
 70 | 					const Dtype * top_data = &tops->cpu_data()[tops->offset(m,node->GetLabel())];
 71 | 
 72 | 					for(auto it = children->cbegin(); it != children->cend(); ++it) {
 73 | 						int offset = bottoms->offset(m,(*it)->GetLabel());
 74 | 						Dtype * bottom_data = &bottoms->mutable_cpu_data()[offset];
 75 | 						caffe_axpy(H_*W_,(Dtype)1.,top_data,bottom_data);
 76 | 					}
 77 | 				}
 78 | 			}
 79 | 		}
 80 | 		break;
 81 | 	case EltwiseParameter_EltwiseOp_MINUS :
 82 | 		for(int m = 0; m < M_; ++m) {
 83 | 			for(int i = 0; i < depth_-1; ++i) {
 84 | 				Blob<Dtype> * tops = bottom[i];
 85 | 				Blob<Dtype> * bottoms = top[i+1];
 86 | 					
 87 | 				int base_idx = base_index_per_level_[i];
 88 | 				for(int j = 0; j < node_num_per_level_[i]; ++j) {
 89 | 					Tree * node = serialized_tree_[base_idx + j];
 90 | 					const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
 91 | 					const Dtype * top_data = &tops->cpu_data()[tops->offset(m,node->GetLabel())];
 92 | 
 93 | 					for(auto it = children->cbegin(); it != children->cend(); ++it) {
 94 | 						int offset = bottoms->offset(m,(*it)->GetLabel());
 95 | 						Dtype * bottom_data = &bottoms->mutable_cpu_data()[offset];
 96 | 						caffe_axpy(H_*W_,(Dtype)-1.,top_data,bottom_data);
 97 | 					}
 98 | 				}
 99 | 			}
100 | 		}
101 | 		break;
102 | 	case EltwiseParameter_EltwiseOp_MINUS_REVERSE :
103 | 		for(int m = 0; m < M_; ++m) {
104 | 			for(int i = 0; i < depth_-1; ++i) {
105 | 				Blob<Dtype> * tops = bottom[i];
106 | 				Blob<Dtype> * bottoms = top[i+1];
107 | 					
108 | 				int base_idx = base_index_per_level_[i];
109 | 				for(int j = 0; j < node_num_per_level_[i]; ++j) {
110 | 					Tree * node = serialized_tree_[base_idx + j];
111 | 					const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
112 | 					const Dtype * top_data = &tops->cpu_data()[tops->offset(m,node->GetLabel())];
113 | 
114 | 					for(auto it = children->cbegin(); it != children->cend(); ++it) {
115 | 						int offset = bottoms->offset(m,(*it)->GetLabel());
116 | 						Dtype * bottom_data = &bottoms->mutable_cpu_data()[offset];
117 | 						caffe_cpu_axpby(H_*W_,(Dtype)1.,top_data,(Dtype)-1.,bottom_data);
118 | 					}
119 | 				}
120 | 			}
121 | 		}
122 | 		break;
123 | 	default:
124 |         LOG(FATAL) << "Unknown elementwise operation.";
125 | 		break;
126 | 	}
127 | }
128 | 
129 | template <typename Dtype>
130 | void SuperCategoryFMPostLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
131 |     const vector<bool>& propagate_down,
132 |     const vector<Blob<Dtype>*>& bottom) {
133 | 	//if( propagate_down[0] == false )
134 | 	//	return;
135 | 
136 | 	for(int i = 0; i < depth_; ++i) {
137 | 		if( propagate_down[i] )
138 | 			caffe_copy(bottom[i]->count(), top[i]->cpu_diff(), bottom[i]->mutable_cpu_diff());
139 | 	}
140 | 
141 | 	switch (op_) {
142 | 	case EltwiseParameter_EltwiseOp_SUM :
143 | 		for(int m = 0; m < M_; ++m) {
144 | 			for( int i = depth_-1; i > 0; --i ) {
145 | 				if( propagate_down[i] != true )
146 | 					continue;
147 | 
148 | 				Blob<Dtype> * tops = bottom[i-1];
149 | 				Blob<Dtype> * bottoms = bottom[i];
150 | 
151 | 				int base_idx = base_index_per_level_[i-1];
152 | 				for(int j = 0; j < node_num_per_level_[i-1]; ++j) {
153 | 					Tree * node = serialized_tree_[base_idx + j];
154 | 					const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
155 | 					Dtype * top_diff = &tops->mutable_cpu_diff()[tops->offset(m,node->GetLabel())];
156 | 					for(auto it = children->cbegin(); it != children->cend(); ++it) {
157 | 						int offset = bottoms->offset(m,(*it)->GetLabel());
158 | 						const Dtype * bottom_diff = &bottoms->cpu_diff()[offset];
159 | 
160 | 						caffe_axpy(H_*W_,(Dtype)(1.),bottom_diff,top_diff);
161 | 					}
162 | 				}
163 | 			}
164 | 		}
165 | 		break;
166 | 	case EltwiseParameter_EltwiseOp_MINUS :
167 | 		for(int m = 0; m < M_; ++m) {
168 | 			for( int i = depth_-1; i > 0; --i ) {
169 | 				if( propagate_down[i] != true )
170 | 					continue;
171 | 
172 | 				Blob<Dtype> * tops = bottom[i-1];
173 | 				Blob<Dtype> * bottoms = top[i];
174 | 
175 | 				int base_idx = base_index_per_level_[i-1];
176 | 				for(int j = 0; j < node_num_per_level_[i-1]; ++j) {
177 | 					Tree * node = serialized_tree_[base_idx + j];
178 | 					const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
179 | 					Dtype * top_diff = &tops->mutable_cpu_diff()[tops->offset(m,node->GetLabel())];
180 | 					for(auto it = children->cbegin(); it != children->cend(); ++it) {
181 | 						int offset = bottoms->offset(m,(*it)->GetLabel());
182 | 						const Dtype * bottom_diff = &bottoms->cpu_diff()[offset];
183 | 
184 | 						caffe_axpy(H_*W_,(Dtype)(-1.),bottom_diff,top_diff);
185 | 					}
186 | 				}
187 | 			}
188 | 		}
189 | 		break;
190 | 	case EltwiseParameter_EltwiseOp_MINUS_REVERSE :
191 |     for(int i = 1; i < depth_; ++i ) {
192 | 			caffe_scal(bottom[i]->count(), (Dtype)(-1.), bottom[i]->mutable_cpu_diff());
193 |     }
194 | 		for(int m = 0; m < M_; ++m) {
195 | 			for( int i = depth_-1; i > 0; --i ) {
196 | 				if( propagate_down[i] != true )
197 | 					continue;
198 | 
199 | 				Blob<Dtype> * tops = bottom[i-1];
200 | 				Blob<Dtype> * bottoms = top[i];
201 | 
202 | 				int base_idx = base_index_per_level_[i-1];
203 | 				for(int j = 0; j < node_num_per_level_[i-1]; ++j) {
204 | 					Tree * node = serialized_tree_[base_idx + j];
205 | 					const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
206 | 					Dtype * top_diff = &tops->mutable_cpu_diff()[tops->offset(m,node->GetLabel())];
207 | 					for(auto it = children->cbegin(); it != children->cend(); ++it) {
208 | 						int offset = bottoms->offset(m,(*it)->GetLabel());
209 | 						const Dtype * bottom_diff = &bottoms->cpu_diff()[offset];
210 | 
211 | 						caffe_axpy(H_*W_,(Dtype)(1.),bottom_diff,top_diff);
212 | 					}
213 | 				}
214 | 			}
215 | 		}
216 | /*		for(int m = 0; m < M_; ++m) {
217 | 			for( int i = depth_-1; i > 0; --i ) {
218 | 				if( propagate_down[i] != true )
219 | 					continue;
220 | 
221 | 				Blob<Dtype> * tops = top[i];
222 | 				Blob<Dtype> * bottoms = bottom[i-1];
223 | 
224 | 				int base_idx = base_index_per_level_[i-1];
225 | 				for(int j = 0; j < node_num_per_level_[i-1]; ++j) {
226 | 					Tree * node = serialized_tree_[base_idx + j];
227 | 					const std::vector<shared_ptr<Tree> >* children = node->GetChildren();
228 | 					Dtype * bottom_diff = &bottoms->mutable_cpu_diff()[bottoms->offset(m,node->GetLabel())];
229 | 
230 | 					for(auto it = children->cbegin(); it != children->cend(); ++it) {
231 | 						int offset = tops->offset(m,(*it)->GetLabel());
232 | 						const Dtype * top_diff = &tops->cpu_diff()[offset];
233 | 
234 | 						caffe_axpy(H_*W_,(Dtype)(-1.),top_diff,bottom_diff);
235 | 					}
236 | 				}
237 | 			}
238 | 		}
239 |     */
240 | 		break;
241 | 	default:
242 |         LOG(FATAL) << "Unknown elementwise operation.";
243 | 		break;
244 | 	}
245 | }
246 | 
247 | #ifdef CPU_ONLY
248 | STUB_GPU(SuperCategoryFMPostLayer);
249 | #endif
250 | 
251 | INSTANTIATE_CLASS(SuperCategoryFMPostLayer);
252 | 
253 | REGISTER_LAYER_CLASS(SuperCategoryFMPost);
254 | }  // namespace caffe
255 | 
256 | 
257 | 


--------------------------------------------------------------------------------
/code/src/caffe/layers/super_category_layer.cpp:
--------------------------------------------------------------------------------
  1 | #include <vector>
  2 | #include <limits>
  3 | #include <queue>
  4 | 
  5 | #include "caffe/blob.hpp"
  6 | #include "caffe/common.hpp"
  7 | #include "caffe/filler.hpp"
  8 | #include "caffe/layer.hpp"
  9 | #include "caffe/util/math_functions.hpp"
 10 | #include "caffe/vision_layers.hpp"
 11 | #include "caffe/util/io.hpp"
 12 | 
 13 | namespace caffe {
 14 | //Tree Implementation
 15 | int Tree::Depth() const {
 16 | 	int max_depth = 0;
 17 | 	for(int i = 0; i < this->children.size(); i++) {
 18 | 	  int depth = this->children[i]->Depth();
 19 | 	  if( max_depth < depth ) max_depth = depth;
 20 | 	}
 21 | 	return max_depth + 1;
 22 | }
 23 | void Tree::MakeBalance(int remain) {
 24 | 	if( remain == 0 ) return;
 25 | 	if( children.size() == 0 ) {
 26 | 	  Tree * root = this;
 27 | 	  int label = root->label;
 28 | 	  for(int i = 0; i < remain; ++i ) {
 29 | 		  root->InsertChild(shared_ptr<Tree>(new Tree()));
 30 | 		  root->SetLabel(-1);
 31 | 		  root = root->children[0].get();
 32 | 	  }
 33 | 	  root->SetLabel(label);
 34 | 	}
 35 | 	else {
 36 | 	  for(int i = 0; i < children.size(); ++i)
 37 | 		  children[i]->MakeBalance(remain-1);
 38 | 	}
 39 | }
 40 | //Tree helper
 41 | void Tree::GiveIndex(Tree * root, std::vector<Tree *>& serialized_tree) {
 42 | 	int cnt = 0;
 43 | 	std::queue<Tree *> queue;
 44 | 	queue.push(root);
 45 | 	while( queue.size() != 0 ) {
 46 | 	  Tree * node = queue.front();
 47 | 	  node->index = cnt++;
 48 | 
 49 | 	  serialized_tree.push_back(node);
 50 | 	  for(int i = 0; i < node->children.size(); ++i)
 51 | 		  queue.push(node->children[i].get());
 52 | 	  queue.pop();
 53 | 	}
 54 | }
 55 | void Tree::GetNodeNumPerLevelAndGiveLabel(std::vector<int>& node_num, std::vector<int>& base_index,Tree * root, std::vector<Tree *>& serialized_tree, std::vector<int>& label_to_index) { 
 56 | 	Tree * right_root = root;
 57 | 	int depth = root->Depth();
 58 | 	node_num.resize(depth-1);
 59 | 	base_index.resize(depth-1);
 60 | 	for(int i = 0; i < depth-1; ++i)
 61 | 	{
 62 | 	  node_num[i] = right_root->children[right_root->children.size()-1]->GetIndex() - root->children[0]->GetIndex() + 1;
 63 | 	  base_index[i] = root->children[0]->index;
 64 | 	  root = root->children[0].get();
 65 | 	  right_root = right_root->children[right_root->children.size()-1].get();
 66 | 
 67 | 	  if( i < depth-2 ){ //label for last layer is already made
 68 | 		  for(int j = base_index[i]; j < base_index[i]+node_num[i]; ++j)
 69 | 			  serialized_tree[j]->label = j - base_index[i];
 70 | 	  }
 71 | 	  else {
 72 | 		  label_to_index.resize(node_num[i]);
 73 | 		  for(int index = 0; index < node_num[i]; ++index) {
 74 | 			  int label = serialized_tree[index+base_index[i]]->GetLabel();
 75 | 			  CHECK_LT(label,node_num[i]);
 76 | 			  label_to_index[label] = index;
 77 | 		  }
 78 | 	  }
 79 | 
 80 | 	}
 81 | }
 82 | void Tree::MakeTree(Tree * node, const SuperCategoryParameter::TreeScheme * node_param){
 83 | 	if( node_param->children_size() == 0 ){
 84 | 		CHECK_NE(node_param->label(),-1);
 85 | 		node->SetLabel(node_param->label());
 86 | 	}
 87 | 	else {
 88 | 		CHECK_EQ(node->label,-1);
 89 | 		for(int i = 0; i < node_param->children_size(); ++i) {
 90 | 			shared_ptr<Tree> child(new Tree());
 91 | 			node->InsertChild(child);
 92 | 			MakeTree(child.get(), &node_param->children(i));
 93 | 		}
 94 | 	}
 95 | }
 96 | 
 97 | //Layer Implementation
 98 | template <typename Dtype>
 99 | void SuperCategoryLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
100 |       const vector<Blob<Dtype>*>& top) {
101 | 
102 | 	SuperCategoryParameter * super_param = this->layer_param_.mutable_super_category_param();
103 | 	if( super_param->file_name().empty() == false ) {
104 | 		ReadProtoFromTextFileOrDie(super_param->file_name().c_str(), super_param->mutable_root());
105 | 	}
106 | 
107 | 	Tree::MakeTree(&root_, &super_param->root());
108 | 	depth_ = root_.Depth();
109 | 	root_.MakeBalance(depth_-1);
110 | 	Tree::GiveIndex(&root_, serialized_tree_);
111 | 	Tree::GetNodeNumPerLevelAndGiveLabel(node_num_per_level_, base_index_per_level_, &this->root_,serialized_tree_,label_to_index_);
112 | 
113 | 	N_ = bottom[0]->count(0,1);
114 | 	CHECK_EQ(*node_num_per_level_.rbegin(), bottom[0]->count(1));
115 | 
116 | 	this->temp_.Reshape(N_,1,1,1);
117 | }
118 | 
119 | template <typename Dtype>
120 | void SuperCategoryLabelLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
121 |       const vector<Blob<Dtype>*>& top) {
122 | 	SuperCategoryParameter * super_param = this->layer_param_.mutable_super_category_param();
123 | 	if( super_param->file_name().empty() == false ) {
124 | 		ReadProtoFromTextFileOrDie(super_param->file_name().c_str(), super_param->mutable_root());
125 | 	}
126 | 
127 | 	N_ = bottom[0]->count(0,1);
128 | 
129 | 	Tree::MakeTree(&root_, &super_param->root());
130 | 	depth_ = root_.Depth();
131 | 	root_.MakeBalance(depth_-1);
132 | 	Tree::GiveIndex(&root_, serialized_tree_);
133 | 	Tree::GetNodeNumPerLevelAndGiveLabel(node_num_per_level_, base_index_per_level_, &this->root_,serialized_tree_,label_to_index_);
134 | }
135 | 
136 | template <typename Dtype>
137 | void SuperCategoryLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
138 |       const vector<Blob<Dtype>*>& top) {
139 | 
140 | 	CHECK_EQ(top.size(), depth_-1);
141 | 
142 | 	mark_.resize(top.size());
143 | 	for( int i = 0; i < depth_-1; ++i) {
144 | 		std::vector<int> shape;
145 | 		shape.push_back(N_);
146 | 		shape.push_back(node_num_per_level_[i]);
147 | 		top[i]->Reshape(shape); // Top for output data
148 | 		mark_[i].reset(new Blob<int> (shape));// Marking for Maxpoolling backprop
149 | 	}
150 | }
151 | 
152 | template <typename Dtype>
153 | void SuperCategoryLabelLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
154 |       const vector<Blob<Dtype>*>& top) {
155 | 
156 | 	CHECK_EQ(top.size(), depth_-1);
157 | 
158 | 	int i = 0;
159 | 	for( i = 0; i < depth_-1; ++i) {
160 | 		std::vector<int> shape;
161 | 		shape.push_back(N_);
162 | 		top[i]->Reshape(shape); // Top for label
163 | 	}
164 | 	CHECK_EQ(bottom[0]->count(), N_);
165 | }
166 | 
167 | template <typename Dtype>
168 | void SuperCategoryLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
169 |     const vector<Blob<Dtype>*>& top) {
170 | 
171 | 	//For Data
172 | 	for(int n = 0; n < N_; ++n) {
173 | 		for(int i = depth_-2; i >= 0; --i)
174 | 		{
175 | 			int node_cnt;
176 | 			if( i == depth_-2)
177 | 				node_cnt = node_num_per_level_[i];
178 | 			else
179 | 				node_cnt = node_num_per_level_[i+1];
180 | 
181 | 			Blob<Dtype> * bottoms;
182 | 			if( i == depth_-2 )
183 | 				bottoms = bottom[0];
184 | 			else
185 | 				bottoms  = top[i+1];
186 | 
187 | 			Dtype * top_data = &top[i]->mutable_cpu_data()[node_num_per_level_[i]*n];
188 | 			int * mark_data = &mark_[i]->mutable_cpu_data()[node_num_per_level_[i]*n];
189 | 			const Dtype * bottom_data = &bottoms->cpu_data()[node_cnt*n]; //is equal.
190 | 
191 | 			int base_idx = base_index_per_level_[i];
192 | 			for(int j = 0; j < node_num_per_level_[i]; ++j ) {
193 | 				Tree * node = serialized_tree_[base_idx + j];
194 | 				const std::vector<shared_ptr<Tree> > * children = node->GetChildren();
195 | 				if( children->size() == 0 )
196 | 				{
197 | 					CHECK_EQ(i, depth_-2);
198 | 					//caffe_mul<Dtype>(N_,&blob_data[N_*j], &bottom_data[N_*j], &top_data[N_*j]);
199 | 					top_data[j] = bottom_data[j];
200 | 				}
201 | 				else{
202 | 					int node_label = node->GetLabel();
203 | 					top_data[node_label] = -1 * std::numeric_limits<Dtype>::max();
204 | 					for(auto it = children->cbegin(); it != children->cend(); ++it) {
205 | 						int label = (*it)->GetLabel();
206 | 						//caffe_mul<Dtype>(N_,&blob_data[idx*N_],&bottom_data[idx*N_],temp_.mutable_cpu_data());
207 | 						//caffe_add<Dtype>(N_,temp_.cpu_data(),&top_data[j*N_],&top_data[j*N_]);
208 | 						if( top_data[node_label] < bottom_data[label] )
209 | 						{
210 | 							top_data[node_label] = bottom_data[label];
211 | 							mark_data[node_label] = label;
212 | 						}
213 | 					}
214 | 				}
215 | 			}
216 | 		}
217 | 	}
218 | }
219 | 
220 | template <typename Dtype>
221 | void SuperCategoryLabelLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
222 |     const vector<Blob<Dtype>*>& top) {
223 | 	//For Label
224 | 	for(int n = 0; n < N_; ++n) {
225 | 		int idx = label_to_index_[static_cast<int>(bottom[0]->cpu_data()[n])] + *(base_index_per_level_.rbegin());
226 | 		const Tree * node = serialized_tree_[idx];
227 | 		for(int i = depth_-2; i >= 0; --i) {
228 | 			top[i]->mutable_cpu_data()[n] = node->GetLabel();
229 | 			node = node->GetParent();
230 | 		}
231 | 		CHECK_EQ(top[depth_-2]->cpu_data()[n],bottom[0]->cpu_data()[n]);
232 | 	}
233 | }
234 | 
235 | template <typename Dtype>
236 | void SuperCategoryLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
237 |     const vector<bool>& propagate_down,
238 |     const vector<Blob<Dtype>*>& bottom) {
239 | 
240 | 	caffe_set(bottom[0]->count(), Dtype(0), bottom[0]->mutable_cpu_diff());
241 | 
242 | 	if( propagate_down[0] ) {
243 | 
244 | 		for(int n = 0; n < N_; ++n) {
245 | 			for(int i = 0; i < depth_-1; ++i) {
246 | 
247 | 				int node_cnt;
248 | 				if( i == depth_-2)
249 | 					node_cnt = node_num_per_level_[i];
250 | 				else
251 | 					node_cnt = node_num_per_level_[i+1];
252 | 
253 | 				const Dtype * top_diff = &top[i]->cpu_diff()[n*node_num_per_level_[i]];
254 | 				const int * mark_data = &mark_[i]->cpu_data()[n*node_num_per_level_[i]];
255 | 				Dtype * bottom_diff;
256 | 				if( i + 1 == depth_-1 ){
257 | 					bottom_diff = &bottom[0]->mutable_cpu_diff()[n*node_cnt];
258 | 				}
259 | 				else {
260 | 					bottom_diff = &top[i+1]->mutable_cpu_diff()[n*node_cnt];
261 | 				}
262 | 
263 | 				int base_idx = base_index_per_level_[i];
264 | 				for(int j = 0; j < node_num_per_level_[i]; ++j) {
265 | 					Tree * node = serialized_tree_[base_idx + j];
266 | 					const std::vector<shared_ptr<Tree> > * children = node->GetChildren();
267 | 					if( children->size() == 0 ) { //this layer is connected with bottom layer
268 | 						//caffe_mul<Dtype>(N_,&top_diff[j*N_],&bottom_data[j*N_],&blob_diff[j*N_]);
269 | 						//caffe_mul<Dtype>(N_,&top_diff[j*N_],&blob_data[j*N_],&bottom_diff[j*N_]);
270 | 						CHECK_EQ(i, depth_-2);
271 | 						bottom_diff[j] = top_diff[j];
272 | 					}
273 | 					else {
274 | 						int node_label = node->GetLabel();
275 | 						int label = mark_data[node_label];
276 | 						bottom_diff[label] += top_diff[node_label];
277 | 					}
278 | 				}
279 | 			}
280 | 		}
281 | 	}
282 | }
283 | 
284 | #ifdef CPU_ONLY
285 | STUB_GPU(SuperCategoryLayer);
286 | STUB_GPU(SuperCategoryLabelLayer);
287 | #endif
288 | 
289 | INSTANTIATE_CLASS(SuperCategoryLayer);
290 | REGISTER_LAYER_CLASS(SuperCategory);
291 | 
292 | INSTANTIATE_CLASS(SuperCategoryLabelLayer);
293 | REGISTER_LAYER_CLASS(SuperCategoryLabel);
294 | }  // namespace caffe
295 | 


--------------------------------------------------------------------------------
/code/src/caffe/proto/caffe.proto:
--------------------------------------------------------------------------------
   1 | syntax = "proto2";
   2 | 
   3 | package caffe;
   4 | 
   5 | // Specifies the shape (dimensions) of a Blob.
   6 | message BlobShape {
   7 |   repeated int64 dim = 1 [packed = true];
   8 | }
   9 | 
  10 | message BlobProto {
  11 |   optional BlobShape shape = 7;
  12 |   repeated float data = 5 [packed = true];
  13 |   repeated float diff = 6 [packed = true];
  14 | 
  15 |   // 4D dimensions -- deprecated.  Use "shape" instead.
  16 |   optional int32 num = 1 [default = 0];
  17 |   optional int32 channels = 2 [default = 0];
  18 |   optional int32 height = 3 [default = 0];
  19 |   optional int32 width = 4 [default = 0];
  20 | }
  21 | 
  22 | // The BlobProtoVector is simply a way to pass multiple blobproto instances
  23 | // around.
  24 | message BlobProtoVector {
  25 |   repeated BlobProto blobs = 1;
  26 | }
  27 | 
  28 | message Datum {
  29 |   optional int32 channels = 1;
  30 |   optional int32 height = 2;
  31 |   optional int32 width = 3;
  32 |   // the actual image data, in bytes
  33 |   optional bytes data = 4;
  34 |   optional int32 label = 5;
  35 |   // Optionally, the datum could also hold float data.
  36 |   repeated float float_data = 6;
  37 |   // If true data contains an encoded image that need to be decoded
  38 |   optional bool encoded = 7 [default = false];
  39 | }
  40 | 
  41 | message FillerParameter {
  42 |   // The filler type.
  43 |   optional string type = 1 [default = 'constant'];
  44 |   optional float value = 2 [default = 0]; // the value in constant filler
  45 |   optional float min = 3 [default = 0]; // the min value in uniform filler
  46 |   optional float max = 4 [default = 1]; // the max value in uniform filler
  47 |   optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
  48 |   optional float std = 6 [default = 1]; // the std value in Gaussian filler
  49 |   // The expected number of non-zero output weights for a given input in
  50 |   // Gaussian filler -- the default -1 means don't perform sparsification.
  51 |   optional int32 sparse = 7 [default = -1];
  52 |   // Normalize the filler variance by fan_in, fan_out, or their average.
  53 |   // Applies to 'xavier' and 'msra' fillers.
  54 |   enum VarianceNorm {
  55 |     FAN_IN = 0;
  56 |     FAN_OUT = 1;
  57 |     AVERAGE = 2;
  58 |   }
  59 |   optional VarianceNorm variance_norm = 8 [default = FAN_IN];
  60 | }
  61 | 
  62 | message NetParameter {
  63 |   optional string name = 1; // consider giving the network a name
  64 |   // The input blobs to the network.
  65 |   repeated string input = 3;
  66 |   // The shape of the input blobs.
  67 |   repeated BlobShape input_shape = 8;
  68 | 
  69 |   // 4D input dimensions -- deprecated.  Use "shape" instead.
  70 |   // If specified, for each input blob there should be four
  71 |   // values specifying the num, channels, height and width of the input blob.
  72 |   // Thus, there should be a total of (4 * #input) numbers.
  73 |   repeated int32 input_dim = 4;
  74 | 
  75 |   // Whether the network will force every layer to carry out backward operation.
  76 |   // If set False, then whether to carry out backward is determined
  77 |   // automatically according to the net structure and learning rates.
  78 |   optional bool force_backward = 5 [default = false];
  79 |   // The current "state" of the network, including the phase, level, and stage.
  80 |   // Some layers may be included/excluded depending on this state and the states
  81 |   // specified in the layers' include and exclude fields.
  82 |   optional NetState state = 6;
  83 | 
  84 |   // Print debugging information about results while running Net::Forward,
  85 |   // Net::Backward, and Net::Update.
  86 |   optional bool debug_info = 7 [default = false];
  87 | 
  88 |   // The layers that make up the net.  Each of their configurations, including
  89 |   // connectivity and behavior, is specified as a LayerParameter.
  90 |   repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.
  91 | 
  92 |   // DEPRECATED: use 'layer' instead.
  93 |   repeated V1LayerParameter layers = 2;
  94 | }
  95 | 
  96 | // NOTE
  97 | // Update the next available ID when you add a new SolverParameter field.
  98 | //
  99 | // SolverParameter next available ID: 37 (last added: iter_size)
 100 | message SolverParameter {
 101 |   //////////////////////////////////////////////////////////////////////////////
 102 |   // Specifying the train and test networks
 103 |   //
 104 |   // Exactly one train net must be specified using one of the following fields:
 105 |   //     train_net_param, train_net, net_param, net
 106 |   // One or more test nets may be specified using any of the following fields:
 107 |   //     test_net_param, test_net, net_param, net
 108 |   // If more than one test net field is specified (e.g., both net and
 109 |   // test_net are specified), they will be evaluated in the field order given
 110 |   // above: (1) test_net_param, (2) test_net, (3) net_param/net.
 111 |   // A test_iter must be specified for each test_net.
 112 |   // A test_level and/or a test_stage may also be specified for each test_net.
 113 |   //////////////////////////////////////////////////////////////////////////////
 114 | 
 115 |   // Proto filename for the train net, possibly combined with one or more
 116 |   // test nets.
 117 |   optional string net = 24;
 118 |   // Inline train net param, possibly combined with one or more test nets.
 119 |   optional NetParameter net_param = 25;
 120 | 
 121 |   optional string train_net = 1; // Proto filename for the train net.
 122 |   repeated string test_net = 2; // Proto filenames for the test nets.
 123 |   optional NetParameter train_net_param = 21; // Inline train net params.
 124 |   repeated NetParameter test_net_param = 22; // Inline test net params.
 125 | 
 126 |   // The states for the train/test nets. Must be unspecified or
 127 |   // specified once per net.
 128 |   //
 129 |   // By default, all states will have solver = true;
 130 |   // train_state will have phase = TRAIN,
 131 |   // and all test_state's will have phase = TEST.
 132 |   // Other defaults are set according to the NetState defaults.
 133 |   optional NetState train_state = 26;
 134 |   repeated NetState test_state = 27;
 135 | 
 136 |   // The number of iterations for each test net.
 137 |   repeated int32 test_iter = 3;
 138 | 
 139 |   // The number of iterations between two testing phases.
 140 |   optional int32 test_interval = 4 [default = 0];
 141 |   optional bool test_compute_loss = 19 [default = false];
 142 |   // If true, run an initial test pass before the first iteration,
 143 |   // ensuring memory availability and printing the starting value of the loss.
 144 |   optional bool test_initialization = 32 [default = true];
 145 |   optional float base_lr = 5; // The base learning rate
 146 |   // the number of iterations between displaying info. If display = 0, no info
 147 |   // will be displayed.
 148 |   optional int32 display = 6;
 149 |   // Display the loss averaged over the last average_loss iterations
 150 |   optional int32 average_loss = 33 [default = 1];
 151 |   optional int32 max_iter = 7; // the maximum number of iterations
 152 |   // accumulate gradients over `iter_size` x `batch_size` instances
 153 |   optional int32 iter_size = 36 [default = 1];
 154 |   optional string lr_policy = 8; // The learning rate decay policy.
 155 |   optional float gamma = 9; // The parameter to compute the learning rate.
 156 |   optional float power = 10; // The parameter to compute the learning rate.
 157 |   optional float momentum = 11; // The momentum value.
 158 |   optional float weight_decay = 12; // The weight decay.
 159 |   // regularization types supported: L1 and L2
 160 |   // controlled by weight_decay
 161 |   optional string regularization_type = 29 [default = "L2"];
 162 |   // the stepsize for learning rate policy "step"
 163 |   optional int32 stepsize = 13;
 164 |   // the stepsize for learning rate policy "multistep"
 165 |   repeated int32 stepvalue = 34;
 166 | 
 167 |   // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
 168 |   // whenever their actual L2 norm is larger.
 169 |   optional float clip_gradients = 35 [default = -1];
 170 | 
 171 |   optional int32 snapshot = 14 [default = 0]; // The snapshot interval
 172 |   optional string snapshot_prefix = 15; // The prefix for the snapshot.
 173 |   // whether to snapshot diff in the results or not. Snapshotting diff will help
 174 |   // debugging but the final protocol buffer size will be much larger.
 175 |   optional bool snapshot_diff = 16 [default = false];
 176 |   // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
 177 |   enum SolverMode {
 178 |     CPU = 0;
 179 |     GPU = 1;
 180 |   }
 181 |   optional SolverMode solver_mode = 17 [default = GPU];
 182 |   // the device_id will that be used in GPU mode. Use device_id = 0 in default.
 183 |   optional int32 device_id = 18 [default = 0];
 184 |   // If non-negative, the seed with which the Solver will initialize the Caffe
 185 |   // random number generator -- useful for reproducible results. Otherwise,
 186 |   // (and by default) initialize using a seed derived from the system clock.
 187 |   optional int64 random_seed = 20 [default = -1];
 188 | 
 189 |   // Solver type
 190 |   enum SolverType {
 191 |     SGD = 0;
 192 |     NESTEROV = 1;
 193 |     ADAGRAD = 2;
 194 |   }
 195 |   optional SolverType solver_type = 30 [default = SGD];
 196 |   // numerical stability for AdaGrad
 197 |   optional float delta = 31 [default = 1e-8];
 198 | 
 199 |   // If true, print information about the state of the net that may help with
 200 |   // debugging learning problems.
 201 |   optional bool debug_info = 23 [default = false];
 202 | 
 203 |   // If false, don't save a snapshot after training finishes.
 204 |   optional bool snapshot_after_train = 28 [default = true];
 205 | }
 206 | 
 207 | // A message that stores the solver snapshots
 208 | message SolverState {
 209 |   optional int32 iter = 1; // The current iteration
 210 |   optional string learned_net = 2; // The file that stores the learned net.
 211 |   repeated BlobProto history = 3; // The history for sgd solvers
 212 |   optional int32 current_step = 4 [default = 0]; // The current step for learning rate
 213 | }
 214 | 
 215 | enum Phase {
 216 |    TRAIN = 0;
 217 |    TEST = 1;
 218 | }
 219 | 
 220 | message NetState {
 221 |   optional Phase phase = 1 [default = TEST];
 222 |   optional int32 level = 2 [default = 0];
 223 |   repeated string stage = 3;
 224 | }
 225 | 
 226 | message NetStateRule {
 227 |   // Set phase to require the NetState have a particular phase (TRAIN or TEST)
 228 |   // to meet this rule.
 229 |   optional Phase phase = 1;
 230 | 
 231 |   // Set the minimum and/or maximum levels in which the layer should be used.
 232 |   // Leave undefined to meet the rule regardless of level.
 233 |   optional int32 min_level = 2;
 234 |   optional int32 max_level = 3;
 235 | 
 236 |   // Customizable sets of stages to include or exclude.
 237 |   // The net must have ALL of the specified stages and NONE of the specified
 238 |   // "not_stage"s to meet the rule.
 239 |   // (Use multiple NetStateRules to specify conjunctions of stages.)
 240 |   repeated string stage = 4;
 241 |   repeated string not_stage = 5;
 242 | }
 243 | 
 244 | // Specifies training parameters (multipliers on global learning constants,
 245 | // and the name and other settings used for weight sharing).
 246 | message ParamSpec {
 247 |   // The names of the parameter blobs -- useful for sharing parameters among
 248 |   // layers, but never required otherwise.  To share a parameter between two
 249 |   // layers, give it a (non-empty) name.
 250 |   optional string name = 1;
 251 | 
 252 |   // Whether to require shared weights to have the same shape, or just the same
 253 |   // count -- defaults to STRICT if unspecified.
 254 |   optional DimCheckMode share_mode = 2;
 255 |   enum DimCheckMode {
 256 |     // STRICT (default) requires that num, channels, height, width each match.
 257 |     STRICT = 0;
 258 |     // PERMISSIVE requires only the count (num*channels*height*width) to match.
 259 |     PERMISSIVE = 1;
 260 |   }
 261 | 
 262 |   // The multiplier on the global learning rate for this parameter.
 263 |   optional float lr_mult = 3 [default = 1.0];
 264 | 
 265 |   // The multiplier on the global weight decay for this parameter.
 266 |   optional float decay_mult = 4 [default = 1.0];
 267 | }
 268 | 
 269 | // NOTE
 270 | // Update the next available ID when you add a new LayerParameter field.
 271 | //
 272 | // LayerParameter next available layer-specific ID: 140 (last added: reduction_param)
 273 | message LayerParameter {
 274 |   optional string name = 1; // the layer name
 275 |   optional string type = 2; // the layer type
 276 |   repeated string bottom = 3; // the name of each bottom blob
 277 |   repeated string top = 4; // the name of each top blob
 278 | 
 279 |   // The train / test phase for computation.
 280 |   optional Phase phase = 10;
 281 | 
 282 |   // The amount of weight to assign each top blob in the objective.
 283 |   // Each layer assigns a default value, usually of either 0 or 1,
 284 |   // to each top blob.
 285 |   repeated float loss_weight = 5;
 286 | 
 287 |   // Specifies training parameters (multipliers on global learning constants,
 288 |   // and the name and other settings used for weight sharing).
 289 |   repeated ParamSpec param = 6;
 290 | 
 291 |   // The blobs containing the numeric parameters of the layer.
 292 |   repeated BlobProto blobs = 7;
 293 |   
 294 |   // Specifies on which bottoms the backpropagation should be skipped.
 295 |   // The size must be either 0 or equal to the number of bottoms.
 296 |   repeated bool propagate_down = 11;
 297 | 
 298 |   // Rules controlling whether and when a layer is included in the network,
 299 |   // based on the current NetState.  You may specify a non-zero number of rules
 300 |   // to include OR exclude, but not both.  If no include or exclude rules are
 301 |   // specified, the layer is always included.  If the current NetState meets
 302 |   // ANY (i.e., one or more) of the specified rules, the layer is
 303 |   // included/excluded.
 304 |   repeated NetStateRule include = 8;
 305 |   repeated NetStateRule exclude = 9;
 306 | 
 307 |   // Parameters for data pre-processing.
 308 |   optional TransformationParameter transform_param = 100;
 309 | 
 310 |   // Parameters shared by loss layers.
 311 |   optional LossParameter loss_param = 101;
 312 | 
 313 |   // Layer type-specific parameters.
 314 |   //
 315 |   // Note: certain layers may have more than one computational engine
 316 |   // for their implementation. These layers include an Engine type and
 317 |   // engine parameter for selecting the implementation.
 318 |   // The default for the engine is set by the ENGINE switch at compile-time.
 319 |   optional AccuracyParameter accuracy_param = 102;
 320 |   optional ArgMaxParameter argmax_param = 103;
 321 |   optional ConcatParameter concat_param = 104;
 322 |   optional ContrastiveLossParameter contrastive_loss_param = 105;
 323 |   optional ConvolutionParameter convolution_param = 106;
 324 |   optional DataParameter data_param = 107;
 325 |   optional DropoutParameter dropout_param = 108;
 326 |   optional DummyDataParameter dummy_data_param = 109;
 327 |   optional EltwiseParameter eltwise_param = 110;
 328 |   optional ExpParameter exp_param = 111;
 329 |   optional FlattenParameter flatten_param = 135;
 330 |   optional HDF5DataParameter hdf5_data_param = 112;
 331 |   optional HDF5OutputParameter hdf5_output_param = 113;
 332 |   optional HingeLossParameter hinge_loss_param = 114;
 333 |   optional ImageDataParameter image_data_param = 115;
 334 |   optional InfogainLossParameter infogain_loss_param = 116;
 335 |   optional InnerProductParameter inner_product_param = 117;
 336 |   optional LogParameter log_param = 134;
 337 |   optional LRNParameter lrn_param = 118;
 338 |   optional MemoryDataParameter memory_data_param = 119;
 339 |   optional MVNParameter mvn_param = 120;
 340 |   optional PoolingParameter pooling_param = 121;
 341 |   optional PowerParameter power_param = 122;
 342 |   optional PReLUParameter prelu_param = 131;
 343 |   optional PythonParameter python_param = 130;
 344 |   optional ReductionParameter reduction_param = 136;
 345 |   optional ReLUParameter relu_param = 123;
 346 |   optional ReshapeParameter reshape_param = 133;
 347 |   optional SigmoidParameter sigmoid_param = 124;
 348 |   optional SoftmaxParameter softmax_param = 125;
 349 |   optional SPPParameter spp_param = 132;
 350 |   optional SliceParameter slice_param = 126;
 351 |   optional TanHParameter tanh_param = 127;
 352 |   optional ThresholdParameter threshold_param = 128;
 353 |   optional WindowDataParameter window_data_param = 129;
 354 |   optional SuperCategoryParameter super_category_param= 138;
 355 | }
 356 | 
 357 | // Message that stores parameters used to apply transformation
 358 | // to the data layer's data
 359 | message TransformationParameter {
 360 |   // For data pre-processing, we can do simple scaling and subtracting the
 361 |   // data mean, if provided. Note that the mean subtraction is always carried
 362 |   // out before scaling.
 363 |   optional float scale = 1 [default = 1];
 364 |   // Specify if we want to randomly mirror data.
 365 |   optional bool mirror = 2 [default = false];
 366 |   // Specify if we would like to randomly crop an image.
 367 |   optional uint32 crop_size = 3 [default = 0];
 368 |   // mean_file and mean_value cannot be specified at the same time
 369 |   optional string mean_file = 4;
 370 |   // if specified can be repeated once (would substract it from all the channels)
 371 |   // or can be repeated the same number of times as channels
 372 |   // (would subtract them from the corresponding channel)
 373 |   repeated float mean_value = 5;
 374 |   // Force the decoded image to have 3 color channels.
 375 |   optional bool force_color = 6 [default = false];
 376 |   // Force the decoded image to have 1 color channels.
 377 |   optional bool force_gray = 7 [default = false];
 378 | }
 379 | 
 380 | // Message that stores parameters shared by loss layers
 381 | message LossParameter {
 382 |   // If specified, ignore instances with the given label.
 383 |   optional int32 ignore_label = 1;
 384 |   // If true, normalize each batch across all instances (including spatial
 385 |   // dimesions, but not ignored instances); else, divide by batch size only.
 386 |   optional bool normalize = 2 [default = true];
 387 | }
 388 | 
 389 | // Messages that store parameters used by individual layer types follow, in
 390 | // alphabetical order.
 391 | 
 392 | message AccuracyParameter {
 393 |   // When computing accuracy, count as correct by comparing the true label to
 394 |   // the top k scoring classes.  By default, only compare to the top scoring
 395 |   // class (i.e. argmax).
 396 |   optional uint32 top_k = 1 [default = 1];
 397 | 
 398 |   // The "label" axis of the prediction blob, whose argmax corresponds to the
 399 |   // predicted label -- may be negative to index from the end (e.g., -1 for the
 400 |   // last axis).  For example, if axis == 1 and the predictions are
 401 |   // (N x C x H x W), the label blob is expected to contain N*H*W ground truth
 402 |   // labels with integer values in {0, 1, ..., C-1}.
 403 |   optional int32 axis = 2 [default = 1];
 404 | 
 405 |   // If specified, ignore instances with the given label.
 406 |   optional int32 ignore_label = 3;
 407 | 
 408 |   optional int32 num_label = 4;
 409 | 
 410 |   optional bool print = 5 [default = false];
 411 | }
 412 | 
 413 | message ArgMaxParameter {
 414 |   // If true produce pairs (argmax, maxval)
 415 |   optional bool out_max_val = 1 [default = false];
 416 |   optional uint32 top_k = 2 [default = 1];
 417 | }
 418 | 
 419 | message ConcatParameter {
 420 |   // The axis along which to concatenate -- may be negative to index from the
 421 |   // end (e.g., -1 for the last axis).  Other axes must have the
 422 |   // same dimension for all the bottom blobs.
 423 |   // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
 424 |   optional int32 axis = 2 [default = 1];
 425 | 
 426 |   // DEPRECATED: alias for "axis" -- does not support negative indexing.
 427 |   optional uint32 concat_dim = 1 [default = 1];
 428 | }
 429 | 
 430 | message ContrastiveLossParameter {
 431 |   // margin for dissimilar pair
 432 |   optional float margin = 1 [default = 1.0];
 433 |   // The first implementation of this cost did not exactly match the cost of
 434 |   // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
 435 |   // legacy_version = false (the default) uses (margin - d)^2 as proposed in the
 436 |   // Hadsell paper. New models should probably use this version.
 437 |   // legacy_version = true uses (margin - d^2). This is kept to support /
 438 |   // reproduce existing models and results
 439 |   optional bool legacy_version = 2 [default = false]; 
 440 | }
 441 | 
 442 | message ConvolutionParameter {
 443 |   optional uint32 num_output = 1; // The number of outputs for the layer
 444 |   optional bool bias_term = 2 [default = true]; // whether to have bias terms
 445 |   // Pad, kernel size, and stride are all given as a single value for equal
 446 |   // dimensions in height and width or as Y, X pairs.
 447 |   optional uint32 pad = 3 [default = 0]; // The padding size (equal in Y, X)
 448 |   optional uint32 pad_h = 9 [default = 0]; // The padding height
 449 |   optional uint32 pad_w = 10 [default = 0]; // The padding width
 450 |   optional uint32 kernel_size = 4; // The kernel size (square)
 451 |   optional uint32 kernel_h = 11; // The kernel height
 452 |   optional uint32 kernel_w = 12; // The kernel width
 453 |   optional uint32 group = 5 [default = 1]; // The group size for group conv
 454 |   optional uint32 stride = 6 [default = 1]; // The stride (equal in Y, X)
 455 |   optional uint32 stride_h = 13; // The stride height
 456 |   optional uint32 stride_w = 14; // The stride width
 457 |   optional FillerParameter weight_filler = 7; // The filler for the weight
 458 |   optional FillerParameter bias_filler = 8; // The filler for the bias
 459 |   enum Engine {
 460 |     DEFAULT = 0;
 461 |     CAFFE = 1;
 462 |     CUDNN = 2;
 463 |   }
 464 |   optional Engine engine = 15 [default = DEFAULT];
 465 | }
 466 | 
 467 | message DataParameter {
 468 |   enum DB {
 469 |     LEVELDB = 0;
 470 |     LMDB = 1;
 471 |   }
 472 |   // Specify the data source.
 473 |   optional string source = 1;
 474 |   // Specify the batch size.
 475 |   optional uint32 batch_size = 4;
 476 |   // The rand_skip variable is for the data layer to skip a few data points
 477 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
 478 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
 479 |   // be larger than the number of keys in the database.
 480 |   optional uint32 rand_skip = 7 [default = 0];
 481 |   optional DB backend = 8 [default = LEVELDB];
 482 |   // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
 483 |   // simple scaling and subtracting the data mean, if provided. Note that the
 484 |   // mean subtraction is always carried out before scaling.
 485 |   optional float scale = 2 [default = 1];
 486 |   optional string mean_file = 3;
 487 |   // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
 488 |   // crop an image.
 489 |   optional uint32 crop_size = 5 [default = 0];
 490 |   // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
 491 |   // data.
 492 |   optional bool mirror = 6 [default = false];
 493 |   // Force the encoded image to have 3 color channels
 494 |   optional bool force_encoded_color = 9 [default = false];
 495 | }
 496 | 
 497 | message DropoutParameter {
 498 |   optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
 499 | }
 500 | 
 501 | // DummyDataLayer fills any number of arbitrarily shaped blobs with random
 502 | // (or constant) data generated by "Fillers" (see "message FillerParameter").
 503 | message DummyDataParameter {
 504 |   // This layer produces N >= 1 top blobs.  DummyDataParameter must specify 1 or N
 505 |   // shape fields, and 0, 1 or N data_fillers.
 506 |   //
 507 |   // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
 508 |   // If 1 data_filler is specified, it is applied to all top blobs.  If N are
 509 |   // specified, the ith is applied to the ith top blob.
 510 |   repeated FillerParameter data_filler = 1;
 511 |   repeated BlobShape shape = 6;
 512 | 
 513 |   // 4D dimensions -- deprecated.  Use "shape" instead.
 514 |   repeated uint32 num = 2;
 515 |   repeated uint32 channels = 3;
 516 |   repeated uint32 height = 4;
 517 |   repeated uint32 width = 5;
 518 | }
 519 | 
 520 | message EltwiseParameter {
 521 |   enum EltwiseOp {
 522 |     PROD = 0;
 523 |     SUM = 1;
 524 |     MAX = 2;
 525 |     MIN = 3;
 526 |     AVG = 4;
 527 |     MINUS = 5;
 528 |     MINUS_REVERSE = 6;
 529 |   }
 530 |   optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
 531 |   repeated float coeff = 2; // blob-wise coefficient for SUM operation
 532 | 
 533 |   // Whether to use an asymptotically slower (for >2 inputs) but stabler method
 534 |   // of computing the gradient for the PROD operation. (No effect for SUM op.)
 535 |   optional bool stable_prod_grad = 3 [default = true];
 536 | }
 537 | 
 538 | message ExpParameter {
 539 |   // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
 540 |   // Or if base is set to the default (-1), base is set to e,
 541 |   // so y = exp(shift + scale * x).
 542 |   optional float base = 1 [default = -1.0];
 543 |   optional float scale = 2 [default = 1.0];
 544 |   optional float shift = 3 [default = 0.0];
 545 | }
 546 | 
 547 | /// Message that stores parameters used by FlattenLayer
 548 | message FlattenParameter {
 549 |   // The first axis to flatten: all preceding axes are retained in the output.
 550 |   // May be negative to index from the end (e.g., -1 for the last axis).
 551 |   optional int32 axis = 1 [default = 1];
 552 | 
 553 |   // The last axis to flatten: all following axes are retained in the output.
 554 |   // May be negative to index from the end (e.g., the default -1 for the last
 555 |   // axis).
 556 |   optional int32 end_axis = 2 [default = -1];
 557 | }
 558 | 
 559 | // Message that stores parameters used by HDF5DataLayer
 560 | message HDF5DataParameter {
 561 |   // Specify the data source.
 562 |   optional string source = 1;
 563 |   // Specify the batch size.
 564 |   optional uint32 batch_size = 2;
 565 | 
 566 |   // Specify whether to shuffle the data.
 567 |   // If shuffle == true, the ordering of the HDF5 files is shuffled,
 568 |   // and the ordering of data within any given HDF5 file is shuffled,
 569 |   // but data between different files are not interleaved; all of a file's
 570 |   // data are output (in a random order) before moving onto another file.
 571 |   optional bool shuffle = 3 [default = false];
 572 | }
 573 | 
 574 | message HDF5OutputParameter {
 575 |   optional string file_name = 1;
 576 | }
 577 | 
 578 | message HingeLossParameter {
 579 |   enum Norm {
 580 |     L1 = 1;
 581 |     L2 = 2;
 582 |   }
 583 |   // Specify the Norm to use L1 or L2
 584 |   optional Norm norm = 1 [default = L1];
 585 | }
 586 | 
 587 | message ImageDataParameter {
 588 |   // Specify the data source.
 589 |   optional string source = 1;
 590 |   // Specify the batch size.
 591 |   optional uint32 batch_size = 4;
 592 |   // The rand_skip variable is for the data layer to skip a few data points
 593 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
 594 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
 595 |   // be larger than the number of keys in the database.
 596 |   optional uint32 rand_skip = 7 [default = 0];
 597 |   // Whether or not ImageLayer should shuffle the list of files at every epoch.
 598 |   optional bool shuffle = 8 [default = false];
 599 |   // It will also resize images if new_height or new_width are not zero.
 600 |   optional uint32 new_height = 9 [default = 0];
 601 |   optional uint32 new_width = 10 [default = 0];
 602 |   // Specify if the images are color or gray
 603 |   optional bool is_color = 11 [default = true];
 604 |   // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
 605 |   // simple scaling and subtracting the data mean, if provided. Note that the
 606 |   // mean subtraction is always carried out before scaling.
 607 |   optional float scale = 2 [default = 1];
 608 |   optional string mean_file = 3;
 609 |   // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
 610 |   // crop an image.
 611 |   optional uint32 crop_size = 5 [default = 0];
 612 |   // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
 613 |   // data.
 614 |   optional bool mirror = 6 [default = false];
 615 |   optional string root_folder = 12 [default = ""];
 616 | }
 617 | 
 618 | message InfogainLossParameter {
 619 |   // Specify the infogain matrix source.
 620 |   optional string source = 1;
 621 | }
 622 | 
 623 | message InnerProductParameter {
 624 |   optional uint32 num_output = 1; // The number of outputs for the layer
 625 |   optional bool bias_term = 2 [default = true]; // whether to have bias terms
 626 |   optional FillerParameter weight_filler = 3; // The filler for the weight
 627 |   optional FillerParameter bias_filler = 4; // The filler for the bias
 628 | 
 629 |   // The first axis to be lumped into a single inner product computation;
 630 |   // all preceding axes are retained in the output.
 631 |   // May be negative to index from the end (e.g., -1 for the last axis).
 632 |   optional int32 axis = 5 [default = 1];
 633 | }
 634 | 
 635 | // Message that stores parameters used by LogLayer
 636 | message LogParameter {
 637 |   // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
 638 |   // Or if base is set to the default (-1), base is set to e,
 639 |   // so y = ln(shift + scale * x) = log_e(shift + scale * x)
 640 |   optional float base = 1 [default = -1.0];
 641 |   optional float scale = 2 [default = 1.0];
 642 |   optional float shift = 3 [default = 0.0];
 643 | }
 644 | 
 645 | // Message that stores parameters used by LRNLayer
 646 | message LRNParameter {
 647 |   optional uint32 local_size = 1 [default = 5];
 648 |   optional float alpha = 2 [default = 1.];
 649 |   optional float beta = 3 [default = 0.75];
 650 |   enum NormRegion {
 651 |     ACROSS_CHANNELS = 0;
 652 |     WITHIN_CHANNEL = 1;
 653 |   }
 654 |   optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
 655 |   optional float k = 5 [default = 1.];
 656 | }
 657 | 
 658 | message MemoryDataParameter {
 659 |   optional uint32 batch_size = 1;
 660 |   optional uint32 channels = 2;
 661 |   optional uint32 height = 3;
 662 |   optional uint32 width = 4;
 663 | }
 664 | 
 665 | message MVNParameter {
 666 |   // This parameter can be set to false to normalize mean only
 667 |   optional bool normalize_variance = 1 [default = true];
 668 | 
 669 |   // This parameter can be set to true to perform DNN-like MVN
 670 |   optional bool across_channels = 2 [default = false];
 671 | 
 672 |   // Epsilon for not dividing by zero while normalizing variance
 673 |   optional float eps = 3 [default = 1e-9];
 674 | }
 675 | 
 676 | message PoolingParameter {
 677 |   enum PoolMethod {
 678 |     MAX = 0;
 679 |     AVE = 1;
 680 |     STOCHASTIC = 2;
 681 |   }
 682 |   optional PoolMethod pool = 1 [default = MAX]; // The pooling method
 683 |   // Pad, kernel size, and stride are all given as a single value for equal
 684 |   // dimensions in height and width or as Y, X pairs.
 685 |   optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
 686 |   optional uint32 pad_h = 9 [default = 0]; // The padding height
 687 |   optional uint32 pad_w = 10 [default = 0]; // The padding width
 688 |   optional uint32 kernel_size = 2; // The kernel size (square)
 689 |   optional uint32 kernel_h = 5; // The kernel height
 690 |   optional uint32 kernel_w = 6; // The kernel width
 691 |   optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
 692 |   optional uint32 stride_h = 7; // The stride height
 693 |   optional uint32 stride_w = 8; // The stride width
 694 |   enum Engine {
 695 |     DEFAULT = 0;
 696 |     CAFFE = 1;
 697 |     CUDNN = 2;
 698 |   }
 699 |   optional Engine engine = 11 [default = DEFAULT];
 700 |   // If global_pooling then it will pool over the size of the bottom by doing
 701 |   // kernel_h = bottom->height and kernel_w = bottom->width
 702 |   optional bool global_pooling = 12 [default = false];
 703 | }
 704 | 
 705 | message PowerParameter {
 706 |   // PowerLayer computes outputs y = (shift + scale * x) ^ power.
 707 |   optional float power = 1 [default = 1.0];
 708 |   optional float scale = 2 [default = 1.0];
 709 |   optional float shift = 3 [default = 0.0];
 710 | }
 711 | 
 712 | message PythonParameter {
 713 |   optional string module = 1;
 714 |   optional string layer = 2;
 715 | }
 716 | 
 717 | // Message that stores parameters used by ReductionLayer
 718 | message ReductionParameter {
 719 |   enum ReductionOp {
 720 |     SUM = 1;
 721 |     ASUM = 2;
 722 |     SUMSQ = 3;
 723 |     MEAN = 4;
 724 |   }
 725 | 
 726 |   optional ReductionOp operation = 1 [default = SUM]; // reduction operation
 727 | 
 728 |   // The first axis to reduce to a scalar -- may be negative to index from the
 729 |   // end (e.g., -1 for the last axis).
 730 |   // (Currently, only reduction along ALL "tail" axes is supported; reduction
 731 |   // of axis M through N, where N < num_axes - 1, is unsupported.)
 732 |   // Suppose we have an n-axis bottom Blob with shape:
 733 |   //     (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
 734 |   // If axis == m, the output Blob will have shape
 735 |   //     (d0, d1, d2, ..., d(m-1)),
 736 |   // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
 737 |   // times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
 738 |   // If axis == 0 (the default), the output Blob always has the empty shape
 739 |   // (count 1), performing reduction across the entire input --
 740 |   // often useful for creating new loss functions.
 741 |   optional int32 axis = 2 [default = 0];
 742 | 
 743 |   optional float coeff = 3 [default = 1.0]; // coefficient for output
 744 | }
 745 | 
 746 | // Message that stores parameters used by ReLULayer
 747 | message ReLUParameter {
 748 |   // Allow non-zero slope for negative inputs to speed up optimization
 749 |   // Described in:
 750 |   // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
 751 |   // improve neural network acoustic models. In ICML Workshop on Deep Learning
 752 |   // for Audio, Speech, and Language Processing.
 753 |   optional float negative_slope = 1 [default = 0];
 754 |   enum Engine {
 755 |     DEFAULT = 0;
 756 |     CAFFE = 1;
 757 |     CUDNN = 2;
 758 |   }
 759 |   optional Engine engine = 2 [default = DEFAULT];
 760 | }
 761 | 
 762 | message ReshapeParameter {
 763 |   // Specify the output dimensions. If some of the dimensions are set to 0,
 764 |   // the corresponding dimension from the bottom layer is used (unchanged).
 765 |   // Exactly one dimension may be set to -1, in which case its value is
 766 |   // inferred from the count of the bottom blob and the remaining dimensions.
 767 |   // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
 768 |   //
 769 |   //   layer {
 770 |   //     type: "Reshape" bottom: "input" top: "output"
 771 |   //     reshape_param { ... }
 772 |   //   }
 773 |   //
 774 |   // If "input" is 2D with shape 2 x 8, then the following reshape_param
 775 |   // specifications are all equivalent, producing a 3D blob "output" with shape
 776 |   // 2 x 2 x 4:
 777 |   //
 778 |   //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
 779 |   //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
 780 |   //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
 781 |   //   reshape_param { shape { dim: -1  dim: 0  dim:  2 } }
 782 |   //
 783 |   optional BlobShape shape = 1;
 784 | 
 785 |   // axis and num_axes control the portion of the bottom blob's shape that are
 786 |   // replaced by (included in) the reshape. By default (axis == 0 and
 787 |   // num_axes == -1), the entire bottom blob shape is included in the reshape,
 788 |   // and hence the shape field must specify the entire output shape.
 789 |   //
 790 |   // axis may be non-zero to retain some portion of the beginning of the input
 791 |   // shape (and may be negative to index from the end; e.g., -1 to begin the
 792 |   // reshape after the last axis, including nothing in the reshape,
 793 |   // -2 to include only the last axis, etc.).
 794 |   //
 795 |   // For example, suppose "input" is a 2D blob with shape 2 x 8.
 796 |   // Then the following ReshapeLayer specifications are all equivalent,
 797 |   // producing a blob "output" with shape 2 x 2 x 4:
 798 |   //
 799 |   //   reshape_param { shape { dim: 2  dim: 2  dim: 4 } }
 800 |   //   reshape_param { shape { dim: 2  dim: 4 } axis:  1 }
 801 |   //   reshape_param { shape { dim: 2  dim: 4 } axis: -3 }
 802 |   //
 803 |   // num_axes specifies the extent of the reshape.
 804 |   // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
 805 |   // input axes in the range [axis, axis+num_axes].
 806 |   // num_axes may also be -1, the default, to include all remaining axes
 807 |   // (starting from axis).
 808 |   //
 809 |   // For example, suppose "input" is a 2D blob with shape 2 x 8.
 810 |   // Then the following ReshapeLayer specifications are equivalent,
 811 |   // producing a blob "output" with shape 1 x 2 x 8.
 812 |   //
 813 |   //   reshape_param { shape { dim:  1  dim: 2  dim:  8 } }
 814 |   //   reshape_param { shape { dim:  1  dim: 2  }  num_axes: 1 }
 815 |   //   reshape_param { shape { dim:  1  }  num_axes: 0 }
 816 |   //
 817 |   // On the other hand, these would produce output blob shape 2 x 1 x 8:
 818 |   //
 819 |   //   reshape_param { shape { dim: 2  dim: 1  dim: 8  }  }
 820 |   //   reshape_param { shape { dim: 1 }  axis: 1  num_axes: 0 }
 821 |   //
 822 |   optional int32 axis = 2 [default = 0];
 823 |   optional int32 num_axes = 3 [default = -1];
 824 | }
 825 | 
 826 | message SigmoidParameter {
 827 |   enum Engine {
 828 |     DEFAULT = 0;
 829 |     CAFFE = 1;
 830 |     CUDNN = 2;
 831 |   }
 832 |   optional Engine engine = 1 [default = DEFAULT];
 833 | }
 834 | 
 835 | message SliceParameter {
 836 |   // The axis along which to slice -- may be negative to index from the end
 837 |   // (e.g., -1 for the last axis).
 838 |   // By default, SliceLayer concatenates blobs along the "channels" axis (1).
 839 |   optional int32 axis = 3 [default = 1];
 840 |   repeated uint32 slice_point = 2;
 841 | 
 842 |   // DEPRECATED: alias for "axis" -- does not support negative indexing.
 843 |   optional uint32 slice_dim = 1 [default = 1];
 844 | }
 845 | 
 846 | // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
 847 | message SoftmaxParameter {
 848 |   enum Engine {
 849 |     DEFAULT = 0;
 850 |     CAFFE = 1;
 851 |     CUDNN = 2;
 852 |   }
 853 |   optional Engine engine = 1 [default = DEFAULT];
 854 | 
 855 |   // The axis along which to perform the softmax -- may be negative to index
 856 |   // from the end (e.g., -1 for the last axis).
 857 |   // Any other axes will be evaluated as independent softmaxes.
 858 |   optional int32 axis = 2 [default = 1];
 859 | }
 860 | 
 861 | message TanHParameter {
 862 |   enum Engine {
 863 |     DEFAULT = 0;
 864 |     CAFFE = 1;
 865 |     CUDNN = 2;
 866 |   }
 867 |   optional Engine engine = 1 [default = DEFAULT];
 868 | }
 869 | 
 870 | message ThresholdParameter {
 871 |   optional float threshold = 1 [default = 0]; // Strictly positive values
 872 | }
 873 | 
 874 | message WindowDataParameter {
 875 |   // Specify the data source.
 876 |   optional string source = 1;
 877 |   // For data pre-processing, we can do simple scaling and subtracting the
 878 |   // data mean, if provided. Note that the mean subtraction is always carried
 879 |   // out before scaling.
 880 |   optional float scale = 2 [default = 1];
 881 |   optional string mean_file = 3;
 882 |   // Specify the batch size.
 883 |   optional uint32 batch_size = 4;
 884 |   // Specify if we would like to randomly crop an image.
 885 |   optional uint32 crop_size = 5 [default = 0];
 886 |   // Specify if we want to randomly mirror data.
 887 |   optional bool mirror = 6 [default = false];
 888 |   // Foreground (object) overlap threshold
 889 |   optional float fg_threshold = 7 [default = 0.5];
 890 |   // Background (non-object) overlap threshold
 891 |   optional float bg_threshold = 8 [default = 0.5];
 892 |   // Fraction of batch that should be foreground objects
 893 |   optional float fg_fraction = 9 [default = 0.25];
 894 |   // Amount of contextual padding to add around a window
 895 |   // (used only by the window_data_layer)
 896 |   optional uint32 context_pad = 10 [default = 0];
 897 |   // Mode for cropping out a detection window
 898 |   // warp: cropped window is warped to a fixed size and aspect ratio
 899 |   // square: the tightest square around the window is cropped
 900 |   optional string crop_mode = 11 [default = "warp"];
 901 |   // cache_images: will load all images in memory for faster access
 902 |   optional bool cache_images = 12 [default = false];
 903 |   // append root_folder to locate images
 904 |   optional string root_folder = 13 [default = ""];
 905 | }
 906 | 
 907 | message SPPParameter {
 908 |   enum PoolMethod {
 909 |     MAX = 0;
 910 |     AVE = 1;
 911 |     STOCHASTIC = 2;
 912 |   }
 913 |   optional uint32 pyramid_height = 1;
 914 |   optional PoolMethod pool = 2 [default = MAX]; // The pooling method
 915 |   enum Engine {
 916 |     DEFAULT = 0;
 917 |     CAFFE = 1;
 918 |     CUDNN = 2;
 919 |   }
 920 |   optional Engine engine = 6 [default = DEFAULT];
 921 | }
 922 | 
 923 | message SuperCategoryParameter {
 924 |   message TreeScheme {
 925 | 	  optional int32 label = 1 [default = -1];
 926 | 	  repeated TreeScheme children = 3;
 927 |   }
 928 |   optional string file_name = 4; 
 929 |   optional TreeScheme root = 1;
 930 |   optional FillerParameter weight_filler = 2; // The filler for the weight
 931 |   enum Engine {
 932 |     DEFAULT = 0;
 933 |     CAFFE = 1;
 934 |     CUDNN = 2;
 935 |   }
 936 |   optional Engine engine = 3 [default = DEFAULT];
 937 | }
 938 | 
 939 | // DEPRECATED: use LayerParameter.
 940 | message V1LayerParameter {
 941 |   repeated string bottom = 2;
 942 |   repeated string top = 3;
 943 |   optional string name = 4;
 944 |   repeated NetStateRule include = 32;
 945 |   repeated NetStateRule exclude = 33;
 946 |   enum LayerType {
 947 |     NONE = 0;
 948 |     ABSVAL = 35;
 949 |     ACCURACY = 1;
 950 |     ARGMAX = 30;
 951 |     BNLL = 2;
 952 |     CONCAT = 3;
 953 |     CONTRASTIVE_LOSS = 37;
 954 |     CONVOLUTION = 4;
 955 |     DATA = 5;
 956 |     DECONVOLUTION = 39;
 957 |     DROPOUT = 6;
 958 |     DUMMY_DATA = 32;
 959 |     EUCLIDEAN_LOSS = 7;
 960 |     ELTWISE = 25;
 961 |     EXP = 38;
 962 |     FLATTEN = 8;
 963 |     HDF5_DATA = 9;
 964 |     HDF5_OUTPUT = 10;
 965 |     HINGE_LOSS = 28;
 966 |     IM2COL = 11;
 967 |     IMAGE_DATA = 12;
 968 |     INFOGAIN_LOSS = 13;
 969 |     INNER_PRODUCT = 14;
 970 |     LRN = 15;
 971 |     MEMORY_DATA = 29;
 972 |     MULTINOMIAL_LOGISTIC_LOSS = 16;
 973 |     MVN = 34;
 974 |     POOLING = 17;
 975 |     POWER = 26;
 976 |     RELU = 18;
 977 |     SIGMOID = 19;
 978 |     SIGMOID_CROSS_ENTROPY_LOSS = 27;
 979 |     SILENCE = 36;
 980 |     SOFTMAX = 20;
 981 |     SOFTMAX_LOSS = 21;
 982 |     SPLIT = 22;
 983 |     SLICE = 33;
 984 |     TANH = 23;
 985 |     WINDOW_DATA = 24;
 986 |     THRESHOLD = 31;
 987 |   }
 988 |   optional LayerType type = 5;
 989 |   repeated BlobProto blobs = 6;
 990 |   repeated string param = 1001;
 991 |   repeated DimCheckMode blob_share_mode = 1002;
 992 |   enum DimCheckMode {
 993 |     STRICT = 0;
 994 |     PERMISSIVE = 1;
 995 |   }
 996 |   repeated float blobs_lr = 7;
 997 |   repeated float weight_decay = 8;
 998 |   repeated float loss_weight = 35;
 999 |   optional AccuracyParameter accuracy_param = 27;
1000 |   optional ArgMaxParameter argmax_param = 23;
1001 |   optional ConcatParameter concat_param = 9;
1002 |   optional ContrastiveLossParameter contrastive_loss_param = 40;
1003 |   optional ConvolutionParameter convolution_param = 10;
1004 |   optional DataParameter data_param = 11;
1005 |   optional DropoutParameter dropout_param = 12;
1006 |   optional DummyDataParameter dummy_data_param = 26;
1007 |   optional EltwiseParameter eltwise_param = 24;
1008 |   optional ExpParameter exp_param = 41;
1009 |   optional HDF5DataParameter hdf5_data_param = 13;
1010 |   optional HDF5OutputParameter hdf5_output_param = 14;
1011 |   optional HingeLossParameter hinge_loss_param = 29;
1012 |   optional ImageDataParameter image_data_param = 15;
1013 |   optional InfogainLossParameter infogain_loss_param = 16;
1014 |   optional InnerProductParameter inner_product_param = 17;
1015 |   optional LRNParameter lrn_param = 18;
1016 |   optional MemoryDataParameter memory_data_param = 22;
1017 |   optional MVNParameter mvn_param = 34;
1018 |   optional PoolingParameter pooling_param = 19;
1019 |   optional PowerParameter power_param = 21;
1020 |   optional ReLUParameter relu_param = 30;
1021 |   optional SigmoidParameter sigmoid_param = 38;
1022 |   optional SoftmaxParameter softmax_param = 39;
1023 |   optional SliceParameter slice_param = 31;
1024 |   optional TanHParameter tanh_param = 37;
1025 |   optional ThresholdParameter threshold_param = 25;
1026 |   optional WindowDataParameter window_data_param = 20;
1027 |   optional TransformationParameter transform_param = 36;
1028 |   optional LossParameter loss_param = 42;
1029 |   optional V0LayerParameter layer = 1;
1030 | }
1031 | 
1032 | // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
1033 | // in Caffe.  We keep this message type around for legacy support.
1034 | message V0LayerParameter {
1035 |   optional string name = 1; // the layer name
1036 |   optional string type = 2; // the string to specify the layer type
1037 | 
1038 |   // Parameters to specify layers with inner products.
1039 |   optional uint32 num_output = 3; // The number of outputs for the layer
1040 |   optional bool biasterm = 4 [default = true]; // whether to have bias terms
1041 |   optional FillerParameter weight_filler = 5; // The filler for the weight
1042 |   optional FillerParameter bias_filler = 6; // The filler for the bias
1043 | 
1044 |   optional uint32 pad = 7 [default = 0]; // The padding size
1045 |   optional uint32 kernelsize = 8; // The kernel size
1046 |   optional uint32 group = 9 [default = 1]; // The group size for group conv
1047 |   optional uint32 stride = 10 [default = 1]; // The stride
1048 |   enum PoolMethod {
1049 |     MAX = 0;
1050 |     AVE = 1;
1051 |     STOCHASTIC = 2;
1052 |   }
1053 |   optional PoolMethod pool = 11 [default = MAX]; // The pooling method
1054 |   optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
1055 | 
1056 |   optional uint32 local_size = 13 [default = 5]; // for local response norm
1057 |   optional float alpha = 14 [default = 1.]; // for local response norm
1058 |   optional float beta = 15 [default = 0.75]; // for local response norm
1059 |   optional float k = 22 [default = 1.];
1060 | 
1061 |   // For data layers, specify the data source
1062 |   optional string source = 16;
1063 |   // For data pre-processing, we can do simple scaling and subtracting the
1064 |   // data mean, if provided. Note that the mean subtraction is always carried
1065 |   // out before scaling.
1066 |   optional float scale = 17 [default = 1];
1067 |   optional string meanfile = 18;
1068 |   // For data layers, specify the batch size.
1069 |   optional uint32 batchsize = 19;
1070 |   // For data layers, specify if we would like to randomly crop an image.
1071 |   optional uint32 cropsize = 20 [default = 0];
1072 |   // For data layers, specify if we want to randomly mirror data.
1073 |   optional bool mirror = 21 [default = false];
1074 | 
1075 |   // The blobs containing the numeric parameters of the layer
1076 |   repeated BlobProto blobs = 50;
1077 |   // The ratio that is multiplied on the global learning rate. If you want to
1078 |   // set the learning ratio for one blob, you need to set it for all blobs.
1079 |   repeated float blobs_lr = 51;
1080 |   // The weight decay that is multiplied on the global weight decay.
1081 |   repeated float weight_decay = 52;
1082 | 
1083 |   // The rand_skip variable is for the data layer to skip a few data points
1084 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
1085 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
1086 |   // be larger than the number of keys in the database.
1087 |   optional uint32 rand_skip = 53 [default = 0];
1088 | 
1089 |   // Fields related to detection (det_*)
1090 |   // foreground (object) overlap threshold
1091 |   optional float det_fg_threshold = 54 [default = 0.5];
1092 |   // background (non-object) overlap threshold
1093 |   optional float det_bg_threshold = 55 [default = 0.5];
1094 |   // Fraction of batch that should be foreground objects
1095 |   optional float det_fg_fraction = 56 [default = 0.25];
1096 | 
1097 |   // optional bool OBSOLETE_can_clobber = 57 [default = true];
1098 | 
1099 |   // Amount of contextual padding to add around a window
1100 |   // (used only by the window_data_layer)
1101 |   optional uint32 det_context_pad = 58 [default = 0];
1102 | 
1103 |   // Mode for cropping out a detection window
1104 |   // warp: cropped window is warped to a fixed size and aspect ratio
1105 |   // square: the tightest square around the window is cropped
1106 |   optional string det_crop_mode = 59 [default = "warp"];
1107 | 
1108 |   // For ReshapeLayer, one needs to specify the new dimensions.
1109 |   optional int32 new_num = 60 [default = 0];
1110 |   optional int32 new_channels = 61 [default = 0];
1111 |   optional int32 new_height = 62 [default = 0];
1112 |   optional int32 new_width = 63 [default = 0];
1113 | 
1114 |   // Whether or not ImageLayer should shuffle the list of files at every epoch.
1115 |   // It will also resize images if new_height or new_width are not zero.
1116 |   optional bool shuffle_images = 64 [default = false];
1117 | 
1118 |   // For ConcatLayer, one needs to specify the dimension for concatenation, and
1119 |   // the other dimensions must be the same for all the bottom blobs.
1120 |   // By default it will concatenate blobs along the channels dimension.
1121 |   optional uint32 concat_dim = 65 [default = 1];
1122 | 
1123 |   optional HDF5OutputParameter hdf5_output_param = 1001;
1124 | }
1125 | 
1126 | message PReLUParameter {
1127 |   // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
1128 |   // Surpassing Human-Level Performance on ImageNet Classification, 2015.
1129 | 
1130 |   // Initial value of a_i. Default is a_i=0.25 for all i.
1131 |   optional FillerParameter filler = 1;
1132 |   // Whether or not slope paramters are shared across channels.
1133 |   optional bool channel_shared = 2 [default = false];
1134 | }
1135 | 


--------------------------------------------------------------------------------
/example/solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "train_val.prototxt"
 2 | test_iter: 100
 3 | test_interval: 500
 4 | base_lr: 0.1
 5 | solver_type:SGD
 6 | momentum: 0.9
 7 | weight_decay: 0.0001
 8 | lr_policy: "step"
 9 | gamma: 0.1
10 | stepsize: 20000
11 | display: 100
12 | max_iter: 60000
13 | snapshot: 20000
14 | snapshot_prefix: "cifar100_nin"
15 | solver_mode: CPU
16 | 


--------------------------------------------------------------------------------
/example/super_category.prototxt:
--------------------------------------------------------------------------------
  1 | children: {
  2 | 	children: {
  3 | 		label: 30 # dolphin
  4 | 	}
  5 | 	children: {
  6 | 		label: 95 # whale
  7 | 	}
  8 | 	children: {
  9 | 		label: 73 # shark
 10 | 	}
 11 | }
 12 | children: {
 13 | 	children: {
 14 | 		label: 1 # aquarium_fish
 15 | 	}
 16 | 	children: {
 17 | 		label: 91 # trout
 18 | 	}
 19 | 	children: {
 20 | 		label: 32 # flatfish
 21 | 	}
 22 | }
 23 | children: {
 24 | 	children: {
 25 | 		label: 54 # orchid
 26 | 	}
 27 | 	children: {
 28 | 		label: 62 # poppy
 29 | 	}
 30 | 	children: {
 31 | 		label: 70 # rose
 32 | 	}
 33 | 	children: {
 34 | 		label: 82 # sunflower
 35 | 	}
 36 | 	children: {
 37 | 		label: 92 # tulip
 38 | 	}
 39 | 	children: {
 40 | 		label: 14 # butterfly
 41 | 	}
 42 | }
 43 | children: {
 44 | 	children: {
 45 | 		label: 9 # bottle
 46 | 	}
 47 | 	children: {
 48 | 		label: 10 # bowl
 49 | 	}
 50 | 	children: {
 51 | 		label: 16 # can
 52 | 	}
 53 | 	children: {
 54 | 		label: 28 # cup
 55 | 	}
 56 | 	children: {
 57 | 		label: 61 # plate
 58 | 	}
 59 | }
 60 | children: {
 61 | 	children: {
 62 | 		label: 0 # apple
 63 | 	}
 64 | 	children: {
 65 | 		label: 51 # mushroom
 66 | 	}
 67 | 	children: {
 68 | 		label: 53 # orange
 69 | 	}
 70 | 	children: {
 71 | 		label: 57 # pear
 72 | 	}
 73 | 	children: {
 74 | 		label: 83 # sweet_pepper
 75 | 	}
 76 | }
 77 | children: {
 78 | 	children: {
 79 | 		label: 39 # keyboard
 80 | 	}
 81 | 	children: {
 82 | 		label: 86 # telephone
 83 | 	}
 84 | }
 85 | children: {
 86 | 	children: {
 87 | 		label: 5 # bed
 88 | 	}
 89 | 	children: {
 90 | 		label: 20 # chair
 91 | 	}
 92 | 	children: {
 93 | 		label: 25 # couch
 94 | 	}
 95 | 	children: {
 96 | 		label: 84 # table
 97 | 	}
 98 | 	children: {
 99 | 		label: 94 # wardrobe
100 | 	}
101 | 	children: {
102 | 		label: 87 # television
103 | 	}
104 | }
105 | children: {
106 | 	children: {
107 | 		label: 6 # bee
108 | 	}
109 | 	children: {
110 | 		label: 7 # beetle
111 | 	}
112 | 	children: {
113 | 		label: 24 # cockroach
114 | 	}
115 | 	children: {
116 | 		label: 45 # lobster
117 | 	}
118 | }
119 | children: {
120 | 	children: {
121 | 		label: 3 # bear
122 | 	}
123 | 	children: {
124 | 		label: 42 # leopard
125 | 	}
126 | 	children: {
127 | 		label: 43 # lion
128 | 	}
129 | 	children: {
130 | 		label: 88 # tiger
131 | 	}
132 | 	children: {
133 | 		label: 97 # wolf
134 | 	}
135 | }
136 | children: {
137 | 	children: {
138 | 		label: 17 # castle
139 | 	}
140 | 	children: {
141 | 		label: 37 # house
142 | 	}
143 | 	children: {
144 | 		label: 76 # skyscraper
145 | 	}
146 | 	children: {
147 | 		label: 90 # train
148 | 	}
149 | }
150 | children: {
151 | 	children: {
152 | 		label: 23 # cloud
153 | 	}
154 | 	children: {
155 | 		label: 33 # forest
156 | 	}
157 | 	children: {
158 | 		label: 49 # mountain
159 | 	}
160 | 	children: {
161 | 		label: 60 # plain
162 | 	}
163 | 	children: {
164 | 		label: 71 # sea
165 | 	}
166 | }
167 | children: {
168 | 	children: {
169 | 		label: 15 # camel
170 | 	}
171 | 	children: {
172 | 		label: 19 # cattle
173 | 	}
174 | 	children: {
175 | 		label: 21 # chimpanzee
176 | 	}
177 | 	children: {
178 | 		label: 31 # elephant
179 | 	}
180 | 	children: {
181 | 		label: 38 # kangaroo
182 | 	}
183 | 	children: {
184 | 		label: 29 # dinosaur
185 | 	}
186 | }
187 | children: {
188 | 	children: {
189 | 		label: 34 # fox
190 | 	}
191 | 	children: {
192 | 		label: 63 # porcupine
193 | 	}
194 | 	children: {
195 | 		label: 64 # possum
196 | 	}
197 | 	children: {
198 | 		label: 66 # raccoon
199 | 	}
200 | 	children: {
201 | 		label: 75 # skunk
202 | 	}
203 | }
204 | children: {
205 | 	children: {
206 | 		label: 77 # snail
207 | 	}
208 | 	children: {
209 | 		label: 99 # worm
210 | 	}
211 | 	children: {
212 | 		label: 78 # snake
213 | 	}
214 | 	children: {
215 | 		label: 18 # caterpillar
216 | 	}
217 | 	children: {
218 | 		label: 67 # ray
219 | 	}
220 | }
221 | children: {
222 | 	children: {
223 | 		label: 2 # baby
224 | 	}
225 | 	children: {
226 | 		label: 11 # boy
227 | 	}
228 | 	children: {
229 | 		label: 35 # girl
230 | 	}
231 | 	children: {
232 | 		label: 46 # man
233 | 	}
234 | 	children: {
235 | 		label: 98 # woman
236 | 	}
237 | }
238 | children: {
239 | 	children: {
240 | 		label: 27 # crocodile
241 | 	}
242 | 	children: {
243 | 		label: 44 # lizard
244 | 	}
245 | 	children: {
246 | 		label: 93 # turtle
247 | 	}
248 | }
249 | children: {
250 | 	children: {
251 | 		label: 36 # hamster
252 | 	}
253 | 	children: {
254 | 		label: 50 # mouse
255 | 	}
256 | 	children: {
257 | 		label: 65 # rabbit
258 | 	}
259 | 	children: {
260 | 		label: 74 # shrew
261 | 	}
262 | 	children: {
263 | 		label: 80 # squirrel
264 | 	}
265 | 	children: {
266 | 		label: 4 # beaver
267 | 	}
268 | 	children: {
269 | 		label: 55 # otter
270 | 	}
271 | }
272 | children: {
273 | 	children: {
274 | 		label: 47 # maple_tree
275 | 	}
276 | 	children: {
277 | 		label: 52 # oak_tree
278 | 	}
279 | 	children: {
280 | 		label: 56 # palm_tree
281 | 	}
282 | 	children: {
283 | 		label: 59 # pine_tree
284 | 	}
285 | 	children: {
286 | 		label: 96 # willow_tree
287 | 	}
288 | }
289 | children: {
290 | 	children: {
291 | 		label: 81 # streetcar
292 | 	}
293 | 	children: {
294 | 		label: 13 # bus
295 | 	}
296 | 	children: {
297 | 		label: 48 # motorcycle
298 | 	}
299 | 	children: {
300 | 		label: 68 # road
301 | 	}
302 | }
303 | children: {
304 | 	children: {
305 | 		label: 41 # lawn_mower
306 | 	}
307 | 	children: {
308 | 		label: 58 # pickup_truck
309 | 	}
310 | 	children: {
311 | 		label: 85 # tank
312 | 	}
313 | 	children: {
314 | 		label: 89 # tractor
315 | 	}
316 | }
317 | children: {
318 | 	children: {
319 | 		label: 8 # bicycle
320 | 	}
321 | 	children: {
322 | 		label: 22 # clock
323 | 	}
324 | }
325 | children: {
326 | 	children: {
327 | 		label: 26 # crab
328 | 	}
329 | 	children: {
330 | 		label: 79 # spider
331 | 	}
332 | }
333 | children: {
334 | 	children: {
335 | 		label: 12 # bridge
336 | 	}
337 | 	children: {
338 | 		label: 72 # seal
339 | 	}
340 | }
341 | children: {
342 | 	children: {
343 | 		label: 69 # rocket
344 | 	}
345 | }
346 | children: {
347 | 	children: {
348 | 		label: 40 # lamp
349 | 	}
350 | }
351 | 


--------------------------------------------------------------------------------
/example/train_val.prototxt:
--------------------------------------------------------------------------------
  1 | name: "CIFAR100_nin_sc"
  2 | layer {
  3 |   name: "cifar"
  4 |   type: "Data" 
  5 |   top: "data"
  6 |   top: "label"
  7 |   transform_param {
  8 |     mirror: true
  9 |     crop_size: 26
 10 |   }
 11 |   data_param {
 12 | 	source: "../cifar100/cifar_train"
 13 | 	backend: LMDB
 14 |     batch_size: 128
 15 |   }
 16 |   include: { phase: TRAIN }
 17 | }
 18 | layer {
 19 |   name: "cifar"
 20 |   type: "Data"
 21 |   top: "data"
 22 |   top: "label"
 23 |   transform_param {
 24 |     mirror: false
 25 |     crop_size: 26
 26 |   }
 27 |   data_param {
 28 | 	source: "../cifar100/cifar_test"
 29 | 	backend: LMDB
 30 |     batch_size: 100
 31 |   }
 32 |   include: { phase: TEST }
 33 | }
 34 | layer {
 35 |   name: "conv1"
 36 |   type: "Convolution" 
 37 |   bottom: "data"
 38 |   top: "conv1"
 39 |   param {
 40 | 	  lr_mult: 1
 41 | 	  decay_mult: 1
 42 |   }
 43 |   param {
 44 | 	  lr_mult: 2
 45 | 	  decay_mult: 0
 46 |   }
 47 |   convolution_param {
 48 |     num_output: 192
 49 |     pad: 5
 50 |     kernel_size: 5
 51 |     weight_filler {
 52 |       type: "gaussian"
 53 |       std: 0.05
 54 |     }
 55 |     bias_filler {
 56 |       type: "constant"
 57 |     }
 58 |   }
 59 | }
 60 | layer {
 61 |   name: "relu1"
 62 |   type: "ReLU"
 63 |   bottom: "conv1"
 64 |   top: "conv1"
 65 | }
 66 | layer {
 67 |   name: "cccp1"
 68 |   type: "Convolution" 
 69 |   bottom: "conv1"
 70 |   top: "cccp1"
 71 |   param {
 72 | 	  lr_mult: 1
 73 | 	  decay_mult: 1
 74 |   }
 75 |   param {
 76 | 	  lr_mult: 2
 77 | 	  decay_mult: 0
 78 |   }
 79 |   convolution_param {
 80 |     num_output: 160
 81 |     group: 1
 82 |     kernel_size: 1
 83 |     weight_filler {
 84 |       type: "gaussian"
 85 |       std: 0.05
 86 |     }
 87 |     bias_filler {
 88 |       type: "constant"
 89 |       value: 0
 90 |     }
 91 |   }
 92 | }
 93 | layer {
 94 |   name: "relu_cccp1"
 95 |   type: "ReLU"
 96 |   bottom: "cccp1"
 97 |   top: "cccp1"
 98 | }
 99 | layer {
100 |   name: "cccp2"
101 |   type: "Convolution" 
102 |   bottom: "cccp1"
103 |   top: "cccp2"
104 |   param {
105 | 	  lr_mult: 1
106 | 	  decay_mult: 1
107 |   }
108 |   param {
109 | 	  lr_mult: 2
110 | 	  decay_mult: 0
111 |   }
112 |   convolution_param {
113 |     num_output: 96
114 |     group: 1
115 |     kernel_size: 1
116 |     weight_filler {
117 |       type: "gaussian"
118 |       std: 0.05
119 |     }
120 |     bias_filler {
121 |       type: "constant"
122 |       value: 0
123 |     }
124 |   }
125 | }
126 | layer {
127 |   name: "relu_cccp2"
128 |   type: "ReLU"
129 |   bottom: "cccp2"
130 |   top: "cccp2"
131 | }
132 | layer {
133 |   name: "pool1"
134 |   type: "Pooling"
135 |   bottom: "cccp2"
136 |   top: "pool1"
137 |   pooling_param {
138 |     pool: MAX
139 |     kernel_size: 3
140 |     stride: 2
141 |   }
142 | }
143 | layer {
144 |   name: "drop3"
145 |   type: "Dropout"
146 |   bottom: "pool1"
147 |   top: "pool1"
148 |   dropout_param {
149 |     dropout_ratio: 0.5
150 |   }
151 | }
152 | layer {
153 |   name: "conv2"
154 |   type: "Convolution" 
155 |   bottom: "pool1"
156 |   top: "conv2"
157 |   param {
158 | 	  lr_mult: 1
159 | 	  decay_mult: 1
160 |   }
161 |   param {
162 | 	  lr_mult: 2
163 | 	  decay_mult: 0
164 |   }
165 |   convolution_param {
166 |     num_output: 192
167 |     pad: 2
168 |     kernel_size: 5
169 |     weight_filler {
170 |       type: "gaussian"
171 |       std: 0.05
172 |     }
173 |     bias_filler {
174 |       type: "constant"
175 |     }
176 |   }
177 | }
178 | layer {
179 |   name: "relu2"
180 |   type: "ReLU"
181 |   bottom: "conv2"
182 |   top: "conv2"
183 | }
184 | layer {
185 |   name: "cccp3"
186 |   type: "Convolution"
187 |   bottom: "conv2"
188 |   top: "cccp3"
189 |   param {
190 | 	  lr_mult: 1
191 | 	  decay_mult: 1
192 |   }
193 |   param {
194 | 	  lr_mult: 2
195 | 	  decay_mult: 0
196 |   }
197 |   convolution_param {
198 |     num_output: 192
199 |     group: 1
200 |     kernel_size: 1
201 |     weight_filler {
202 |       type: "gaussian"
203 |       std: 0.05
204 |     }
205 |     bias_filler {
206 |       type: "constant"
207 |       value: 0
208 |     }
209 |   }
210 | }
211 | layer {
212 |   name: "relu_cccp3"
213 |   type: "ReLU" 
214 |   bottom: "cccp3"
215 |   top: "cccp3"
216 | }
217 | layer {
218 |   name: "cccp4"
219 |   type: "Convolution"
220 |   bottom: "cccp3"
221 |   top: "cccp4"
222 |   param {
223 | 	  lr_mult: 1
224 | 	  decay_mult: 1
225 |   }
226 |   param {
227 | 	  lr_mult: 2
228 | 	  decay_mult: 0
229 |   }
230 |   convolution_param {
231 |     num_output: 192
232 |     group: 1
233 |     kernel_size: 1
234 |     weight_filler {
235 |       type: "gaussian"
236 |       std: 0.05
237 |     }
238 |     bias_filler {
239 |       type: "constant"
240 |       value: 0
241 |     }
242 |   }
243 | }
244 | layer {
245 |   name: "relu_cccp4"
246 |   type: "ReLU"
247 |   bottom: "cccp4"
248 |   top: "cccp4"
249 | }
250 | layer {
251 |   name: "pool2"
252 |   type: "Pooling" 
253 |   bottom: "cccp4"
254 |   top: "pool2"
255 |   pooling_param {
256 |     pool: AVE
257 |     kernel_size: 3
258 |     stride: 2
259 |   }
260 | }
261 | layer {
262 |   name: "drop6"
263 |   type: "Dropout"
264 |   bottom: "pool2"
265 |   top: "pool2"
266 |   dropout_param {
267 |     dropout_ratio: 0.5
268 |   }
269 | }
270 | layer {
271 |   name: "conv3"
272 |   type: "Convolution" 
273 |   bottom: "pool2"
274 |   top: "conv3"
275 |   param {
276 | 	  lr_mult: 1
277 | 	  decay_mult: 1
278 |   }
279 |   param {
280 | 	  lr_mult: 2
281 | 	  decay_mult: 0
282 |   }
283 |   convolution_param {
284 |     num_output: 192
285 |     pad: 1
286 |     kernel_size: 3
287 |     weight_filler {
288 |       type: "gaussian"
289 |       std: 0.05
290 |     }
291 |     bias_filler {
292 |       type: "constant"
293 |     }
294 |   }
295 | }
296 | layer {
297 |   name: "relu3"
298 |   type: "ReLU"
299 |   bottom: "conv3"
300 |   top: "conv3"
301 | }
302 | layer {
303 |   name: "cccp5"
304 |   type: "Convolution" 
305 |   bottom: "conv3"
306 |   top: "cccp5"
307 |   param {
308 | 	  lr_mult: 1
309 | 	  decay_mult: 1
310 |   }
311 |   param {
312 | 	  lr_mult: 2
313 | 	  decay_mult: 0
314 |   }
315 |   convolution_param {
316 |     num_output: 192
317 |     group: 1
318 |     kernel_size: 1
319 |     weight_filler {
320 |       type: "gaussian"
321 |       std: 0.05
322 |     }
323 |     bias_filler {
324 |       type: "constant"
325 |       value: 0
326 |     }
327 |   }
328 | }
329 | layer {
330 |   name: "relu_cccp5"
331 |   type: "ReLU"
332 |   bottom: "cccp5"
333 |   top: "cccp5"
334 | }
335 | layer {
336 |   name: "cccp6"
337 |   type: "Convolution"
338 |   bottom: "cccp5"
339 |   top: "cccp6"
340 |   param {
341 | 	  lr_mult: 0.1
342 | 	  decay_mult: 1
343 |   }
344 |   param {
345 | 	  lr_mult: 0.1
346 | 	  decay_mult: 0
347 |   }
348 |   convolution_param {
349 |     num_output: 100
350 |     group: 1
351 |     kernel_size: 1
352 |     weight_filler {
353 |       type: "gaussian"
354 |       std: 0.05
355 |     }
356 |     bias_filler {
357 |       type: "constant"
358 |       value: 0
359 |     }
360 |   }
361 | }
362 | layer {
363 |   name: "relu_cccp6"
364 |   type: "ReLU"
365 |   bottom: "cccp6"
366 |   top: "cccp6"
367 | }
368 | layer {
369 | 	name: "super_category"
370 | 	type: "SuperCategoryFM"
371 | 	bottom: "cccp6"
372 | 	top: "layer1"
373 | 	top: "layer2"
374 | 	eltwise_param {
375 | 		operation : MIN
376 | 	}
377 |   super_category_param{
378 | 	  file_name : "super_category.prototxt"
379 |   }
380 | }
381 | layer {
382 |   name: "super_category_label"
383 |   type: "SuperCategoryLabel"
384 |   bottom: "label"
385 |   top: "label_1"
386 |   top: "label_2"
387 |   super_category_param{
388 | 	  file_name : "super_category.prototxt"
389 |   }
390 | } 
391 | layer {
392 | 	name: "SumOver"
393 | 	type: "SuperCategoryFMPost"
394 | 	eltwise_param {
395 | 		operation : MINUS
396 | 	}
397 | 	bottom: "layer1"
398 | 	bottom: "layer2"
399 | 	top: "layer11"
400 | 	top: "layer22"
401 | 	super_category_param {
402 | 		file_name : "super_category.prototxt"
403 | 	}
404 | }
405 | layer {
406 |   name: "pool3"
407 |   type: "Pooling" 
408 |   bottom: "layer1"
409 |   top: "pool3"
410 |   pooling_param {
411 |     pool: AVE
412 |     kernel_size: 8
413 |     stride: 1
414 |   }
415 | }
416 | layer {
417 |   name: "pool4"
418 |   type: "Pooling" 
419 |   bottom: "layer2"
420 |   top: "pool4"
421 |   pooling_param {
422 |     pool: AVE
423 |     kernel_size: 8
424 |     stride: 1
425 |   }
426 | }
427 | layer {
428 |   name: "pool5_diff"
429 |   type: "Pooling" 
430 |   bottom: "layer22"
431 |   top: "pool5_diff"
432 |   pooling_param {
433 |     pool: AVE
434 |     kernel_size: 8
435 |     stride: 1
436 |   }
437 | }
438 | layer {
439 |   name: "loss1"
440 |   type: "SoftmaxWithLoss"
441 |   bottom: "pool4"
442 |   bottom: "label_2"
443 |   top: "loss1"
444 |   loss_weight: 0.4
445 | }
446 | layer {
447 |   name: "loss2_supercategory"
448 |   type: "SoftmaxWithLoss"
449 |   bottom: "pool3"
450 |   bottom: "label_1"
451 |   top: "loss2_supercategory"
452 |   loss_weight: 0.5
453 | }
454 | layer {
455 |   name: "loss3_diffloss"
456 |   type: "SoftmaxWithLoss"
457 |   bottom: "pool5_diff"
458 |   bottom: "label_2"
459 |   top: "loss3_diffloss"
460 |   loss_weight: 0.5
461 | }
462 | layer {
463 |   name: "accuracy"
464 |   type: "Accuracy"
465 |   bottom: "pool4"
466 |   bottom: "label"
467 |   top: "accuracy"
468 |   accuracy_param: {
469 | 	  top_k : 1
470 |   }
471 | }
472 | layer {
473 |   name: "accuracy_supercategory"
474 |   type: "Accuracy"
475 |   bottom: "pool3"
476 |   bottom: "label_1"
477 |   top: "accuracy_supercategory"
478 | }
479 | layer {
480 | 	name: "silence"
481 | 	type: "Silence"
482 | 	bottom: "layer11"
483 | }
484 | 


--------------------------------------------------------------------------------
/miscellaneous/headline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiwonjoon/eccv16-taxonomy/4c065e2ab08e140c6ca2eca5d0df0ad8fec666f7/miscellaneous/headline.png


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | cd ./example
 3 | 
 4 | count=($(ls -1 ./ | grep .solverstate | wc -l))
 5 | filename=$(date +"%F_%H_%M")
 6 | echo $filename
 7 | 
 8 | gpu_num=0
 9 | gpu_count=($(nvidia-smi -L | wc -l))
10 | if [ $gpu_count -gt 1 ]
11 | then
12 | 	read -p "You have more than one graphic card. Do you want to see the current process list?(y/n)" answer
13 | 	case ${answer:0:1} in
14 | 		y|Y )
15 | 			nvidia-smi
16 | 		;;
17 | 	esac
18 | 	while :
19 | 	do
20 | 		read -p "Enter GPU number : " answer
21 | 		gpu_num=${answer}
22 | 		if [ "$gpu_num" -ge 0 -a "$gpu_num" -lt "$gpu_count" ]
23 | 		then
24 | 			break
25 | 		fi
26 | 	done
27 | fi
28 | 
29 | echo Using GPU '#'$gpu_num.
30 | 
31 | if [ $count -ge "1" ]
32 | then
33 | 	list=($(ls -1 ./*.solverstate | tr '\n' '\0' | xargs -0 -n 1 basename | sort -V -r))
34 | 	read -p "You have a solverstate. Do you want to continue learning process from the last(y/n)? " answer
35 | 	case ${answer:0:1} in
36 | 		y|Y )
37 | 			../caffe/build/tools/caffe train -solver ./solver.prototxt -gpu $gpu_num -snapshot ./$list &> $filename.log &
38 | 		;;
39 | 		* )
40 | 			../caffe/build/tools/caffe train -solver ./solver.prototxt -gpu $gpu_num &> $filename.log &
41 | 		;;
42 | 	esac
43 | else
44 | 	../caffe/build/tools/caffe train -solver ./solver.prototxt -gpu $gpu_num &> $filename.log &
45 | fi
46 | 
47 | cd ..
48 | 
49 | tail -F ./example/$filename.log
50 | 
51 | #script for future use
52 | 
53 | #!/bin/bash
54 | #list=$(ls -1 ./regularized_fix/*.solverstate | tr '\n' '\0' | xargs -0 -n 1 basename)
55 | #for file in $list
56 | #do
57 | #	echo $file
58 | #done
59 | #files=./regularized_fix/"*.solverstate"
60 | #regex='([0-9]+)\.solverstate'
61 | #for f in $files
62 | #do
63 | #	[[ $f =~ $regex ]]
64 | #	echo ${BASH_REMATCH[1]}
65 | #done
66 | 
67 | #list=$(ls -1 ./regularized_fix/*.solverstate | tr '\n' '\0' | xargs -0 -n 1 basename | sort -V)
68 | 


--------------------------------------------------------------------------------