├── src
    └── caffe
    │   ├── CMakeLists.txt
    │   ├── layers
    │       ├── neuron_layer.cpp
    │       ├── relu_layer.cpp
    │       ├── sigmoid_layer.cpp
    │       ├── bnll_layer.cpp
    │       ├── flatten_layer.cpp
    │       ├── im2col_layer.cpp
    │       ├── softmax_loss_layer.cpp
    │       ├── tanh_layer.cpp
    │       ├── dropout_layer.cpp
    │       ├── split_layer.cpp
    │       ├── raw_image_layer.cpp
    │       ├── padding_layer.cpp
    │       ├── softmax_layer.cpp
    │       ├── concat_layer.cpp
    │       ├── inner_product_layer.cpp
    │       ├── verification_loss.cpp
    │       ├── dropout_group_layer.cpp
    │       ├── lrn_layer.cpp
    │       ├── pooling_layer.cpp
    │       ├── loss_layer.cpp
    │       └── conv_layer.cpp
    │   ├── common.cpp
    │   ├── syncedmem.cpp
    │   ├── layer_factory.cpp
    │   ├── util
    │       ├── io.cpp
    │       ├── insert_splits.cpp
    │       ├── im2col.cpp
    │       └── math_functions.cpp
    │   ├── blob.cpp
    │   ├── proto
    │       └── caffe.proto
    │   └── net.cpp
├── CMakeLists.txt
├── include
    └── caffe
    │   ├── caffe.hpp
    │   ├── util
    │       ├── insert_splits.hpp
    │       ├── io.hpp
    │       ├── im2col.hpp
    │       ├── math_functions.hpp
    │       └── mkl_alternate.hpp
    │   ├── glog-compact.hpp
    │   ├── syncedmem.hpp
    │   ├── blob.hpp
    │   ├── common.hpp
    │   ├── layer.hpp
    │   ├── net.hpp
    │   └── filler.hpp
├── LICENSE
├── Readme.md
├── Makefile
├── .ycm_extra_conf.py
├── feat_net_raw.cpp
└── align_test.cpp


/src/caffe/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | file(GLOB_RECURSE CPP_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
2 | file(GLOB_RECURSE CC_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cc)
3 | 
4 | add_library(caffe-compact STATIC ${CPP_SOURCES} ${CC_SOURCES})
5 | 
6 | INSTALL(TARGETS caffe-compact DESTINATION lib)
7 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(CAFFE_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include)
 2 | include_directories(${CAFFE_INCLUDE_DIRS})
 3 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
 4 | 
 5 | add_definitions(-DUSE_EIGEN)
 6 | 
 7 | if(NOT MSVC)
 8 | 	SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++0x -Wno-sign-compare")
 9 | 	SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++0x -Wno-sign-compare" )
10 | endif()
11 | 
12 | add_subdirectory(src/caffe)
13 | 


--------------------------------------------------------------------------------
/include/caffe/caffe.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright Yangqing Jia 2013
 2 | // caffe.hpp is the header file that you need to include in your code. It wraps
 3 | // all the internal caffe header files into one for simpler inclusion.
 4 | 
 5 | #ifndef CAFFE_CAFFE_HPP_
 6 | #define CAFFE_CAFFE_HPP_
 7 | 
 8 | #include "caffe/common.hpp"
 9 | #include "caffe/blob.hpp"
10 | #include "caffe/filler.hpp"
11 | #include "caffe/layer.hpp"
12 | #include "caffe/net.hpp"
13 | #include "caffe/util/io.hpp"
14 | #include "caffe/vision_layers.hpp"
15 | 
16 | #include "caffe/proto/caffe.pb.h"
17 | 
18 | #endif  // CAFFE_CAFFE_HPP_
19 | 


--------------------------------------------------------------------------------
/src/caffe/layers/neuron_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include "caffe/layer.hpp"
 6 | #include "caffe/vision_layers.hpp"
 7 | 
 8 | namespace caffe {
 9 | 
10 | template <typename Dtype>
11 | void NeuronLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
12 |       vector<Blob<Dtype>*>* top) {
13 |   CHECK_EQ(bottom.size(), 1) << "Neuron Layer takes a single blob as input.";
14 |   CHECK_EQ(top->size(), 1) << "Neuron Layer takes a single blob as output.";
15 |   // NeuronLayer allows in-place computations. If the computation is not
16 |   // in-place, we will need to initialize the top blob.
17 |   if ((*top)[0] != bottom[0]) {
18 |     (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(),
19 |         bottom[0]->height(), bottom[0]->width());
20 |   }
21 | }
22 | 
23 | INSTANTIATE_CLASS(NeuronLayer);
24 | 
25 | }  // namespace caffe
26 | 


--------------------------------------------------------------------------------
/include/caffe/util/insert_splits.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jeff Donahue
 2 | 
 3 | #ifndef _CAFFE_UTIL_INSERT_SPLITS_HPP_
 4 | #define _CAFFE_UTIL_INSERT_SPLITS_HPP_
 5 | 
 6 | #include <string>
 7 | 
 8 | #include "caffe/proto/caffe.pb.h"
 9 | 
10 | using std::pair;
11 | using std::string;
12 | 
13 | namespace caffe {
14 | 
15 | // Copy NetParameters with SplitLayers added to replace any shared bottom
16 | // blobs with unique bottom blobs provided by the SplitLayer.
17 | void insert_splits(const NetParameter& param, NetParameter* param_split);
18 | 
19 | void configure_split_layer(const string& layer_name, const string& blob_name,
20 |     const int blob_idx, const int split_count,
21 |     LayerConnection* split_layer_connection);
22 | 
23 | string get_split_layer_name(const string& layer_name, const string& blob_name,
24 |     const int blob_idx);
25 | 
26 | string get_split_blob_name(const string& layer_name, const string& blob_name,
27 |     const int blob_idx, const int split_idx);
28 | 
29 | }  // namespace caffe
30 | 
31 | #endif  // CAFFE_UTIL_INSERT_SPLITS_HPP_
32 | 


--------------------------------------------------------------------------------
/src/caffe/common.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #include <cstdio>
 4 | #include <ctime>
 5 | #ifndef _MSC_VER
 6 | #include <unistd.h>
 7 | #endif
 8 | 
 9 | #include "caffe/common.hpp"
10 | 
11 | namespace caffe {
12 | 
13 | shared_ptr<Caffe> Caffe::singleton_;
14 | nullstream __nullstream;
15 | bool LogMessage::enable = false;
16 | 
17 | int64_t cluster_seedgen(void) {
18 |   int64_t s, seed, pid;
19 | #ifdef _MSC_VER
20 |   pid = 0x32423;
21 | #else
22 |   pid = getpid();
23 | #endif
24 |   s = time(NULL);
25 |   seed = abs(((s * 181) * ((pid - 83) * 359)) % 104729);
26 |   return seed;
27 | }
28 | 
29 | 
30 | Caffe::Caffe()
31 |     : mode_(Caffe::CPU), phase_(Caffe::TRAIN){
32 | }
33 | 
34 | Caffe::~Caffe() {
35 | }
36 | 
37 | void Caffe::set_random_seed(const unsigned int seed) {
38 |   // Curand seed
39 |   // Yangqing's note: simply setting the generator seed does not seem to
40 |   // work on the tesla K20s, so I wrote the ugly reset thing below.
41 | }
42 | 
43 | void Caffe::SetDevice(const int device_id) {
44 | 	LOG(INFO) << "Caffe-compact only support CPU";
45 | }
46 | 
47 | void Caffe::DeviceQuery() {
48 | 	LOG(INFO) << "Caffe-compact only support CPU";
49 |     return;
50 | }
51 | 
52 | }  // namespace caffe
53 | 


--------------------------------------------------------------------------------
/src/caffe/layers/relu_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #include <algorithm>
 4 | #include <vector>
 5 | 
 6 | #include "caffe/layer.hpp"
 7 | #include "caffe/vision_layers.hpp"
 8 | 
 9 | using std::max;
10 | 
11 | namespace caffe {
12 | 
13 | template <typename Dtype>
14 | void ReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
15 |     vector<Blob<Dtype>*>* top) {
16 |   const Dtype* bottom_data = bottom[0]->cpu_data();
17 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
18 |   const int count = bottom[0]->count();
19 |   for (int i = 0; i < count; ++i) {
20 |     top_data[i] = max(bottom_data[i], Dtype(0));
21 |   }
22 | }
23 | 
24 | template <typename Dtype>
25 | Dtype ReLULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
26 |     const bool propagate_down,
27 |     vector<Blob<Dtype>*>* bottom) {
28 |   if (propagate_down) {
29 |     const Dtype* bottom_data = (*bottom)[0]->cpu_data();
30 |     const Dtype* top_diff = top[0]->cpu_diff();
31 |     Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
32 |     const int count = (*bottom)[0]->count();
33 |     for (int i = 0; i < count; ++i) {
34 |       bottom_diff[i] = top_diff[i] * (bottom_data[i] > 0);
35 |     }
36 |   }
37 |   return Dtype(0);
38 | }
39 | 
40 | 
41 | INSTANTIATE_CLASS(ReLULayer);
42 | 
43 | 
44 | }  // namespace caffe
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, The Regents of the University of California (Regents)
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 


--------------------------------------------------------------------------------
/src/caffe/layers/sigmoid_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Tobias Domhan
 2 | 
 3 | #include <algorithm>
 4 | #include <cmath>
 5 | #include <vector>
 6 | 
 7 | #include "caffe/layer.hpp"
 8 | #include "caffe/vision_layers.hpp"
 9 | 
10 | namespace caffe {
11 | 
12 | template <typename Dtype>
13 | inline Dtype sigmoid(Dtype x) {
14 |   return 1. / (1. + exp(-x));
15 | }
16 | 
17 | template <typename Dtype>
18 | void SigmoidLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
19 |     vector<Blob<Dtype>*>* top) {
20 |   const Dtype* bottom_data = bottom[0]->cpu_data();
21 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
22 |   const int count = bottom[0]->count();
23 |   for (int i = 0; i < count; ++i) {
24 |     top_data[i] = sigmoid(bottom_data[i]);
25 |   }
26 | }
27 | 
28 | template <typename Dtype>
29 | Dtype SigmoidLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
30 |     const bool propagate_down,
31 |     vector<Blob<Dtype>*>* bottom) {
32 |   if (propagate_down) {
33 |     const Dtype* bottom_data = (*bottom)[0]->cpu_data();
34 |     const Dtype* top_diff = top[0]->cpu_diff();
35 |     Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
36 |     const int count = (*bottom)[0]->count();
37 |     for (int i = 0; i < count; ++i) {
38 |       Dtype sigmoid_x = sigmoid(bottom_data[i]);
39 |       bottom_diff[i] = top_diff[i] * sigmoid_x * (1. - sigmoid_x);
40 |     }
41 |   }
42 |   return Dtype(0);
43 | }
44 | 
45 | INSTANTIATE_CLASS(SigmoidLayer);
46 | 
47 | 
48 | }  // namespace caffe
49 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | Caffe-compact
 2 | ==================
 3 | Caffe-compact aims to provide a self-contained CNN model testing library.
 4 | 
 5 | This project remove most unnecessary dependency for CNN net testing and
 6 | feature extraction. Note that we completely remove CUDA dependency in
 7 | caffe-compact.
 8 | 
 9 | Current dependency:
10 | * c++11 compiler (for shared_ptr)
11 | * google protobuf
12 | 
13 | Optional dependency:
14 | * cblas (e.g. libatlas3gf-base)
15 | * Eigen3
16 | 
17 | You can select an matrix backend by setting the USE_EIGEN environment in the
18 | Makefile.
19 | 
20 | These dependencies can be satisfied on most platform including Windows and
21 | mobile. It makes Caffe-compact much easier to deploy.
22 | 
23 | This work also avoids potential license problems along with the 
24 | third-party libraris when release your caffe CNN model.
25 | 
26 | Difference
27 | ==================
28 | The original project can be found at: https://github.com/BVLC/caffe
29 | Caffe-compact only support a subset of functionality of caffe:
30 | 
31 | * CNN forward pass only 
32 | * CPU only
33 | * Raw image input only
34 | 
35 | Performance
36 | ==================
37 | MKL has performance problem when dealing with small matrix (e.g. testing your
38 | model on only one input image), especially multithreading is enabled. Atlas or
39 | other open source BLAS implementation may perform better.
40 | 
41 | TODO: benchmark
42 | 
43 | Future Work
44 | ==================
45 | * integrate protobuf
46 | 
47 | 
48 | Yuheng Chen, 2014
49 | chyh1990@gmail.com
50 | 


--------------------------------------------------------------------------------
/src/caffe/layers/bnll_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #include <algorithm>
 4 | #include <vector>
 5 | 
 6 | #include "caffe/layer.hpp"
 7 | #include "caffe/vision_layers.hpp"
 8 | 
 9 | using std::min;
10 | 
11 | namespace caffe {
12 | 
13 | const float kBNLL_THRESHOLD = 50.;
14 | 
15 | template <typename Dtype>
16 | void BNLLLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
17 |     vector<Blob<Dtype>*>* top) {
18 |   const Dtype* bottom_data = bottom[0]->cpu_data();
19 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
20 |   const int count = bottom[0]->count();
21 |   for (int i = 0; i < count; ++i) {
22 |     top_data[i] = bottom_data[i] > 0 ?
23 |         bottom_data[i] + log(1. + exp(-bottom_data[i])) :
24 |         log(1. + exp(bottom_data[i]));
25 |   }
26 | }
27 | 
28 | template <typename Dtype>
29 | Dtype BNLLLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
30 |     const bool propagate_down,
31 |     vector<Blob<Dtype>*>* bottom) {
32 |   if (propagate_down) {
33 |     const Dtype* bottom_data = (*bottom)[0]->cpu_data();
34 |     const Dtype* top_diff = top[0]->cpu_diff();
35 |     Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
36 |     const int count = (*bottom)[0]->count();
37 |     Dtype expval;
38 |     for (int i = 0; i < count; ++i) {
39 |       expval = exp(min(bottom_data[i], Dtype(kBNLL_THRESHOLD)));
40 |       bottom_diff[i] = top_diff[i] * expval / (expval + 1.);
41 |     }
42 |   }
43 |   return Dtype(0);
44 | }
45 | 
46 | 
47 | INSTANTIATE_CLASS(BNLLLayer);
48 | 
49 | 
50 | }  // namespace caffe
51 | 


--------------------------------------------------------------------------------
/include/caffe/glog-compact.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef GLOG_COMPACT_HPP
 2 | #define GLOG_COMPACT_HPP
 3 | 
 4 | #include <iostream>
 5 | #include <string>
 6 | #include <ctime>
 7 | #include <cstdlib>
 8 | 
 9 | namespace caffe{
10 | struct nullstream: std::ostream{
11 | 	nullstream(): std::ostream(0){}
12 | };
13 | 
14 | template <typename T>
15 | nullstream &operator<<(nullstream &o, T const & x) { return o;}
16 | extern nullstream __nullstream;
17 | 
18 | class LogMessage{
19 | 	std::string level;
20 | 	std::ostream &ofs;
21 | 	static bool enable;
22 | 	public:
23 | 		LogMessage(const std::string &l)
24 | 			:level(l), ofs(enable ? std::cerr : __nullstream){
25 | 			stream() << "[" << level << "]\t";
26 | 		}
27 | 		LogMessage(std::ostream &o)
28 | 			:level("ERROR"), ofs(o){
29 | 			stream() << "[" << level << "]\t";
30 | 		}
31 | 		inline std::ostream &stream(){ 
32 | 			return ofs;
33 | 		}
34 | 		~LogMessage() { 
35 | 			stream() << std::endl; 
36 | 		}
37 | 
38 | 		static void Enable(bool _enable){
39 | 			enable = _enable;
40 | 		}
41 | };
42 | }
43 | 
44 | #define 	LOG(type)   caffe::LogMessage(#type).stream()
45 | #define 	DLOG(type)   caffe::LogMessage(#type).stream()
46 | #define 	VLOG(level)   if ((level) <= FLAGS_v) LOG(INFO)
47 | 
48 | #define CHECK(x) if(x) {} else LOG(ERROR) << #x
49 | #define DCHECK(x) CHECK(x)
50 | 
51 | #define 	CHECK_EQ(x, y)   CHECK((x) == (y))
52 | #define 	CHECK_LT(x, y)   CHECK((x) < (y))
53 | #define 	CHECK_GT(x, y)   CHECK((x) > (y))
54 | #define 	CHECK_LE(x, y)   CHECK((x) <= (y))
55 | #define 	CHECK_GE(x, y)   CHECK((x) >= (y))
56 | #define 	CHECK_NE(x, y)   CHECK((x) != (y))
57 | 
58 | #endif
59 | 


--------------------------------------------------------------------------------
/src/caffe/layers/flatten_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include "caffe/layer.hpp"
 6 | #include "caffe/vision_layers.hpp"
 7 | #include "caffe/util/math_functions.hpp"
 8 | 
 9 | namespace caffe {
10 | 
11 | template <typename Dtype>
12 | void FlattenLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
13 |       vector<Blob<Dtype>*>* top) {
14 |   CHECK_EQ(bottom.size(), 1) << "Flatten Layer takes a single blob as input.";
15 |   CHECK_EQ(top->size(), 1) << "Flatten Layer takes a single blob as output.";
16 |   int channels_out = bottom[0]->channels() * bottom[0]->height()
17 |       * bottom[0]->width();
18 |   (*top)[0]->Reshape(bottom[0]->num(), channels_out, 1, 1);
19 |   count_ = bottom[0]->num() * channels_out;
20 |   CHECK_EQ(count_, bottom[0]->count());
21 |   CHECK_EQ(count_, (*top)[0]->count());
22 | }
23 | 
24 | template <typename Dtype>
25 | void FlattenLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
26 |       vector<Blob<Dtype>*>* top) {
27 |   const Dtype* bottom_data = bottom[0]->cpu_data();
28 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
29 |   caffe_copy(count_, bottom_data, top_data);
30 | }
31 | 
32 | template <typename Dtype>
33 | Dtype FlattenLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
34 |       const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
35 |   const Dtype* top_diff = top[0]->cpu_diff();
36 |   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
37 |   caffe_copy(count_, top_diff, bottom_diff);
38 |   return Dtype(0.);
39 | }
40 | 
41 | INSTANTIATE_CLASS(FlattenLayer);
42 | 
43 | }  // namespace caffe
44 | 


--------------------------------------------------------------------------------
/include/caffe/util/io.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright Yangqing Jia 2013
 2 | 
 3 | #ifndef CAFFE_UTIL_IO_H_
 4 | #define CAFFE_UTIL_IO_H_
 5 | 
 6 | #include <string>
 7 | 
 8 | #include "google/protobuf/message.h"
 9 | #include "caffe/proto/caffe.pb.h"
10 | 
11 | #include "caffe/blob.hpp"
12 | 
13 | using std::string;
14 | using ::google::protobuf::Message;
15 | 
16 | namespace caffe {
17 | 
18 | void ReadProtoFromTextFile(const char* filename,
19 |     Message* proto);
20 | inline void ReadProtoFromTextFile(const string& filename,
21 |     Message* proto) {
22 |   ReadProtoFromTextFile(filename.c_str(), proto);
23 | }
24 | 
25 | void WriteProtoToTextFile(const Message& proto, const char* filename);
26 | inline void WriteProtoToTextFile(const Message& proto, const string& filename) {
27 |   WriteProtoToTextFile(proto, filename.c_str());
28 | }
29 | 
30 | void ReadProtoFromBinaryFile(const char* filename,
31 |     Message* proto);
32 | inline void ReadProtoFromBinaryFile(const string& filename,
33 |     Message* proto) {
34 |   ReadProtoFromBinaryFile(filename.c_str(), proto);
35 | }
36 | 
37 | void WriteProtoToBinaryFile(const Message& proto, const char* filename);
38 | inline void WriteProtoToBinaryFile(
39 |     const Message& proto, const string& filename) {
40 |   WriteProtoToBinaryFile(proto, filename.c_str());
41 | }
42 | 
43 | bool ReadImageToDatum(const string& filename, const int label,
44 |     const int height, const int width, Datum* datum);
45 | 
46 | inline bool ReadImageToDatum(const string& filename, const int label,
47 |     Datum* datum) {
48 |   return ReadImageToDatum(filename, label, 0, 0, datum);
49 | }
50 | 
51 | #if 0
52 | template <typename Dtype>
53 | void hdf5_load_nd_dataset_helper(
54 |   hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
55 |   Blob<Dtype>* blob);
56 | 
57 | template <typename Dtype>
58 | void hdf5_load_nd_dataset(
59 |   hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
60 |   Blob<Dtype>* blob);
61 | #endif
62 | 
63 | }  // namespace caffe
64 | 
65 | #endif   // CAFFE_UTIL_IO_H_
66 | 


--------------------------------------------------------------------------------
/src/caffe/syncedmem.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #include <cstring>
 4 | 
 5 | #include "caffe/common.hpp"
 6 | #include "caffe/syncedmem.hpp"
 7 | 
 8 | namespace caffe {
 9 | 
10 | SyncedMemory::~SyncedMemory() {
11 |   if (cpu_ptr_) {
12 |     CaffeFreeHost(cpu_ptr_);
13 |   }
14 | 
15 |   if (gpu_ptr_) {
16 |   }
17 | }
18 | 
19 | inline void SyncedMemory::to_cpu() {
20 |   switch (head_) {
21 |   case UNINITIALIZED:
22 |     CaffeMallocHost(&cpu_ptr_, size_);
23 |     CHECK(cpu_ptr_ != 0) << "size " << size_;
24 |     memset(cpu_ptr_, 0, size_);
25 |     head_ = HEAD_AT_CPU;
26 |     break;
27 | #if 0
28 |   case HEAD_AT_GPU:
29 |     if (cpu_ptr_ == NULL) {
30 |       CaffeMallocHost(&cpu_ptr_, size_);
31 |     }
32 |     CUDA_CHECK(cudaMemcpy(cpu_ptr_, gpu_ptr_, size_, cudaMemcpyDeviceToHost));
33 |     head_ = SYNCED;
34 |     break;
35 | #endif
36 |   case HEAD_AT_CPU:
37 |   case SYNCED:
38 |     break;
39 |   }
40 | }
41 | 
42 | #if 0
43 | inline void SyncedMemory::to_gpu() {
44 |   switch (head_) {
45 |   case UNINITIALIZED:
46 |     CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
47 |     CUDA_CHECK(cudaMemset(gpu_ptr_, 0, size_));
48 |     head_ = HEAD_AT_GPU;
49 |     break;
50 |   case HEAD_AT_CPU:
51 |     if (gpu_ptr_ == NULL) {
52 |       CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
53 |     }
54 |     CUDA_CHECK(cudaMemcpy(gpu_ptr_, cpu_ptr_, size_, cudaMemcpyHostToDevice));
55 |     head_ = SYNCED;
56 |     break;
57 |   case HEAD_AT_GPU:
58 |   case SYNCED:
59 |     break;
60 |   }
61 | }
62 | #endif
63 | 
64 | const void* SyncedMemory::cpu_data() {
65 |   to_cpu();
66 |   return (const void*)cpu_ptr_;
67 | }
68 | 
69 | #if 0
70 | const void* SyncedMemory::gpu_data() {
71 |   to_gpu();
72 |   return (const void*)gpu_ptr_;
73 | }
74 | #endif
75 | 
76 | void* SyncedMemory::mutable_cpu_data() {
77 |   to_cpu();
78 |   head_ = HEAD_AT_CPU;
79 |   return cpu_ptr_;
80 | }
81 | 
82 | #if 0
83 | void* SyncedMemory::mutable_gpu_data() {
84 |   to_gpu();
85 |   head_ = HEAD_AT_GPU;
86 |   return gpu_ptr_;
87 | }
88 | #endif
89 | 
90 | 
91 | }  // namespace caffe
92 | 
93 | 


--------------------------------------------------------------------------------
/src/caffe/layers/im2col_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include "caffe/layer.hpp"
 6 | #include "caffe/util/im2col.hpp"
 7 | #include "caffe/vision_layers.hpp"
 8 | #include "caffe/common.hpp"
 9 | 
10 | namespace caffe {
11 | 
12 | template <typename Dtype>
13 | void Im2colLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
14 |       vector<Blob<Dtype>*>* top) {
15 |   CHECK_EQ(bottom.size(), 1) << "Im2col Layer takes a single blob as input.";
16 |   CHECK_EQ(top->size(), 1) << "Im2col Layer takes a single blob as output.";
17 |   KSIZE_ = this->layer_param_.kernelsize();
18 |   STRIDE_ = this->layer_param_.stride();
19 |   PAD_ = this->layer_param_.pad();
20 |   CHANNELS_ = bottom[0]->channels();
21 |   HEIGHT_ = bottom[0]->height();
22 |   WIDTH_ = bottom[0]->width();
23 |   (*top)[0]->Reshape(bottom[0]->num(), CHANNELS_ * KSIZE_ * KSIZE_,
24 |       (HEIGHT_ + 2 * PAD_ - KSIZE_) / STRIDE_ + 1,
25 |       (WIDTH_ + 2 * PAD_ - KSIZE_) / STRIDE_ + 1);
26 | }
27 | 
28 | template <typename Dtype>
29 | void Im2colLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
30 |       vector<Blob<Dtype>*>* top) {
31 |   const Dtype* bottom_data = bottom[0]->cpu_data();
32 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
33 |   for (int n = 0; n < bottom[0]->num(); ++n) {
34 |     im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
35 |         WIDTH_, KSIZE_, PAD_, STRIDE_, top_data + (*top)[0]->offset(n));
36 |   }
37 | }
38 | 
39 | template <typename Dtype>
40 | Dtype Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
41 |       const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
42 |   const Dtype* top_diff = top[0]->cpu_diff();
43 |   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
44 |   for (int n = 0; n < top[0]->num(); ++n) {
45 |     col2im_cpu(top_diff + top[0]->offset(n), CHANNELS_, HEIGHT_,
46 |         WIDTH_, KSIZE_, PAD_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
47 |   }
48 |   return Dtype(0.);
49 | }
50 | 
51 | INSTANTIATE_CLASS(Im2colLayer);
52 | 
53 | }  // namespace caffe
54 | 


--------------------------------------------------------------------------------
/include/caffe/syncedmem.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #ifndef CAFFE_SYNCEDMEM_HPP_
 4 | #define CAFFE_SYNCEDMEM_HPP_
 5 | 
 6 | #include <cstdlib>
 7 | 
 8 | #include "caffe/common.hpp"
 9 | 
10 | namespace caffe {
11 | 
12 | // Theoretically, CaffeMallocHost and CaffeFreeHost should simply call the
13 | // cudaMallocHost and cudaFree functions in order to create pinned memory.
14 | // However, those codes rely on the existence of a cuda GPU (I don't know
15 | // why that is a must since allocating memory should not be accessing the
16 | // GPU resorce, but it just creates an error as of Cuda 5.0) and will cause
17 | // problem when running on a machine without GPU. Thus, we simply define
18 | // these two functions for safety and possible future change if the problem
19 | // of calling cuda functions disappears in a future version.
20 | //
21 | // In practice, although we are creating unpinned memory here, as long as we
22 | // are constantly accessing them the memory pages almost always stays in
23 | // the physical memory (assuming we have large enough memory installed), and
24 | // does not seem to create a memory bottleneck here.
25 | 
26 | inline void CaffeMallocHost(void** ptr, size_t size) {
27 |   *ptr = malloc(size);
28 | }
29 | 
30 | inline void CaffeFreeHost(void* ptr) {
31 |   free(ptr);
32 | }
33 | 
34 | 
35 | class SyncedMemory {
36 |  public:
37 |   SyncedMemory()
38 |       : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED) {}
39 |   explicit SyncedMemory(size_t size)
40 |       : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED) {}
41 |   ~SyncedMemory();
42 |   const void* cpu_data();
43 |   //const void* gpu_data();
44 |   void* mutable_cpu_data();
45 |   //void* mutable_gpu_data();
46 |   enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, SYNCED };
47 |   SyncedHead head() { return head_; }
48 |   size_t size() { return size_; }
49 |  private:
50 |   void to_cpu();
51 |   //void to_gpu();
52 |   void* cpu_ptr_;
53 |   void* gpu_ptr_;
54 |   size_t size_;
55 |   SyncedHead head_;
56 | 
57 |   DISABLE_COPY_AND_ASSIGN(SyncedMemory);
58 | };  // class SyncedMemory
59 | 
60 | }  // namespace caffe
61 | 
62 | #endif  // CAFFE_SYNCEDMEM_HPP_
63 | 


--------------------------------------------------------------------------------
/src/caffe/layers/softmax_loss_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #include <algorithm>
 4 | #include <cfloat>
 5 | #include <vector>
 6 | 
 7 | #include "caffe/layer.hpp"
 8 | #include "caffe/vision_layers.hpp"
 9 | #include "caffe/util/math_functions.hpp"
10 | 
11 | using std::max;
12 | 
13 | namespace caffe {
14 | 
15 | template <typename Dtype>
16 | void SoftmaxWithLossLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
17 |       vector<Blob<Dtype>*>* top) {
18 |   CHECK_EQ(bottom.size(), 2) << "SoftmaxLoss Layer takes two blobs as input.";
19 |   CHECK_EQ(top->size(), 0) << "SoftmaxLoss Layer takes no blob as output.";
20 |   softmax_bottom_vec_.clear();
21 |   softmax_bottom_vec_.push_back(bottom[0]);
22 |   softmax_top_vec_.push_back(&prob_);
23 |   softmax_layer_->SetUp(softmax_bottom_vec_, &softmax_top_vec_);
24 | }
25 | 
26 | template <typename Dtype>
27 | void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
28 |     const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
29 |   // The forward pass computes the softmax prob values.
30 |   softmax_bottom_vec_[0] = bottom[0];
31 |   softmax_layer_->Forward(softmax_bottom_vec_, &softmax_top_vec_);
32 | }
33 | 
34 | template <typename Dtype>
35 | Dtype SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
36 |     const bool propagate_down,
37 |     vector<Blob<Dtype>*>* bottom) {
38 |   // First, compute the diff
39 |   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
40 |   const Dtype* prob_data = prob_.cpu_data();
41 |   memcpy(bottom_diff, prob_data, sizeof(Dtype) * prob_.count());
42 |   const Dtype* label = (*bottom)[1]->cpu_data();
43 |   int num = prob_.num();
44 |   int dim = prob_.count() / num;
45 |   Dtype loss = 0;
46 |   for (int i = 0; i < num; ++i) {
47 | 	  CHECK_LT(label[i], dim);
48 |     bottom_diff[i * dim + static_cast<int>(label[i])] -= 1;
49 |     loss += -log(max(prob_data[i * dim + static_cast<int>(label[i])],
50 |                      Dtype(FLT_MIN)));
51 |   }
52 |   // Scale down gradient
53 |   caffe_scal(prob_.count(), Dtype(1) / num, bottom_diff);
54 |   return loss / num;
55 | }
56 | 
57 | 
58 | INSTANTIATE_CLASS(SoftmaxWithLossLayer);
59 | 
60 | 
61 | }  // namespace caffe
62 | 


--------------------------------------------------------------------------------
/src/caffe/layers/tanh_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Aravindh Mahendran
 2 | // TanH neuron activation function layer.
 3 | // Adapted from ReLU layer code written by Yangqing Jia
 4 | 
 5 | #include <algorithm>
 6 | #include <vector>
 7 | 
 8 | #include "caffe/layer.hpp"
 9 | #include "caffe/vision_layers.hpp"
10 | 
11 | #ifdef __SSE2__
12 | #include "caffe/fmath.hpp"
13 | #define EXP(x) fmath::exp(x)
14 | #else
15 | #define cast_uint32_t static_cast<uint32_t>
16 | static inline float
17 | fastpow2 (float p)
18 | {
19 | 	float offset = (p < 0) ? 1.0f : 0.0f;
20 | 	float clipp = (p < -126) ? -126.0f : p;
21 | 	int w = clipp;
22 | 	float z = clipp - w + offset;
23 | 	union { uint32_t i; float f; } v = { cast_uint32_t ( (1 << 23) * (clipp + 121.2740575f + 27.7280233f / (4.84252568f - z) - 1.49012907f * z) ) };
24 | 
25 | 	return v.f;
26 | }
27 | 
28 | 	static inline float
29 | fastexp (float p)
30 | {
31 | 	return fastpow2 (1.442695040f * p);
32 | }
33 | #define EXP(x) fastexp(x)
34 | #endif
35 | 
36 | namespace caffe {
37 | 
38 | template <typename Dtype>
39 | 	void TanHLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
40 | 			vector<Blob<Dtype>*>* top) {
41 | 		const Dtype* bottom_data = bottom[0]->cpu_data();
42 | 		Dtype* top_data = (*top)[0]->mutable_cpu_data();
43 | 		Dtype exp2x;
44 | 		const int count = bottom[0]->count();
45 | 		for (int i = 0; i < count; ++i) {
46 | 			exp2x = EXP(2*bottom_data[i]);
47 | 			top_data[i] = (exp2x - Dtype(1))/(exp2x + Dtype(1));
48 | 		}
49 | 	}
50 | 
51 | template <typename Dtype>
52 | 	Dtype TanHLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
53 | 			const bool propagate_down,
54 | 			vector<Blob<Dtype>*>* bottom) {
55 | 		if (propagate_down) {
56 | 			const Dtype* bottom_data = (*bottom)[0]->cpu_data();
57 | 			const Dtype* top_diff = top[0]->cpu_diff();
58 | 			Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
59 | 			const int count = (*bottom)[0]->count();
60 | 			Dtype exp2x;
61 | 			Dtype tanhx;
62 | 			for (int i = 0; i < count; ++i) {
63 | 				exp2x = exp(2*bottom_data[i]);
64 | 				tanhx = (exp2x - Dtype(1))/(exp2x + Dtype(1));
65 | 				bottom_diff[i] = top_diff[i] * (1 - tanhx*tanhx);
66 | 			}
67 | 		}
68 | 		return Dtype(0);
69 | 	}
70 | 
71 | INSTANTIATE_CLASS(TanHLayer);
72 | 
73 | }  // namespace caffe
74 | 


--------------------------------------------------------------------------------
/include/caffe/blob.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #ifndef CAFFE_BLOB_HPP_
 4 | #define CAFFE_BLOB_HPP_
 5 | 
 6 | #include "caffe/common.hpp"
 7 | #include "caffe/syncedmem.hpp"
 8 | #include "caffe/proto/caffe.pb.h"
 9 | 
10 | namespace caffe {
11 | 
12 | template <typename Dtype>
13 | class Blob {
14 |  public:
15 |   Blob()
16 |        : num_(0), channels_(0), height_(0), width_(0), count_(0), data_(),
17 |        diff_() {}
18 |   explicit Blob(const int num, const int channels, const int height,
19 |     const int width);
20 |   virtual ~Blob() {}
21 |   void Reshape(const int num, const int height,
22 |       const int width, const int channels);
23 |   inline int num() const { return num_; }
24 |   inline int channels() const { return channels_; }
25 |   inline int height() const { return height_; }
26 |   inline int width() const { return width_; }
27 |   inline int count() const {return count_; }
28 |   inline int offset(const int n, const int c = 0, const int h = 0,
29 |       const int w = 0) const {
30 |     return ((n * channels_ + c) * height_ + h) * width_ + w;
31 |   }
32 |   // Copy from source. If copy_diff is false, we copy the data; if copy_diff
33 |   // is true, we copy the diff.
34 |   void CopyFrom(const Blob<Dtype>& source, bool copy_diff = false,
35 |       bool reshape = false);
36 | 
37 |   inline Dtype data_at(const int n, const int c, const int h,
38 |       const int w) const {
39 |     return *(cpu_data() + offset(n, c, h, w));
40 |   }
41 | 
42 |   inline Dtype diff_at(const int n, const int c, const int h,
43 |       const int w) const {
44 |     return *(cpu_diff() + offset(n, c, h, w));
45 |   }
46 | 
47 |   const Dtype* cpu_data() const;
48 |   //const Dtype* gpu_data() const;
49 |   const Dtype* cpu_diff() const;
50 |   //const Dtype* gpu_diff() const;
51 |   Dtype* mutable_cpu_data();
52 |   //Dtype* mutable_gpu_data();
53 |   Dtype* mutable_cpu_diff();
54 |   //Dtype* mutable_gpu_diff();
55 |   void Update();
56 |   void FromProto(const BlobProto& proto);
57 |   void ToProto(BlobProto* proto, bool write_diff = false) const;
58 | 
59 |  protected:
60 |   int num_;
61 |   int channels_;
62 |   int height_;
63 |   int width_;
64 |   int count_;
65 |   shared_ptr<SyncedMemory> data_;
66 |   shared_ptr<SyncedMemory> diff_;
67 | 
68 |   DISABLE_COPY_AND_ASSIGN(Blob);
69 | };  // class Blob
70 | 
71 | }  // namespace caffe
72 | 
73 | #endif  // CAFFE_BLOB_HPP_
74 | 


--------------------------------------------------------------------------------
/src/caffe/layers/dropout_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #include <vector>
 4 | #include <climits>
 5 | 
 6 | #include "caffe/common.hpp"
 7 | #include "caffe/layer.hpp"
 8 | #include "caffe/syncedmem.hpp"
 9 | #include "caffe/vision_layers.hpp"
10 | 
11 | namespace caffe {
12 | 
13 | template <typename Dtype>
14 | void DropoutLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
15 |       vector<Blob<Dtype>*>* top) {
16 |   NeuronLayer<Dtype>::SetUp(bottom, top);
17 |   // Set up the cache for random number generation
18 |   rand_vec_.reset(new SyncedMemory(bottom[0]->count() * sizeof(int)));
19 |   threshold_ = this->layer_param_.dropout_ratio();
20 |   DCHECK(threshold_ > 0.);
21 |   DCHECK(threshold_ < 1.);
22 |   scale_ = 1. / (1. - threshold_);
23 |   uint_thres_ = (unsigned int)(UINT_MAX * threshold_);
24 | }
25 | 
26 | template <typename Dtype>
27 | void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
28 |     vector<Blob<Dtype>*>* top) {
29 |   const Dtype* bottom_data = bottom[0]->cpu_data();
30 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
31 |   int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
32 |   const int count = bottom[0]->count();
33 |   if (Caffe::phase() == Caffe::TRAIN) {
34 |     // Create random numbers
35 | #if 0 
36 |     viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(),
37 |         count, mask, 1. - threshold_);
38 |     for (int i = 0; i < count; ++i) {
39 |       top_data[i] = bottom_data[i] * mask[i] * scale_;
40 |     }
41 | #else
42 |     NOT_IMPLEMENTED;
43 | #endif
44 |   } else {
45 |     memcpy(top_data, bottom_data, bottom[0]->count() * sizeof(Dtype));
46 |   }
47 | }
48 | 
49 | template <typename Dtype>
50 | Dtype DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
51 |     const bool propagate_down,
52 |     vector<Blob<Dtype>*>* bottom) {
53 |   CHECK(Caffe::phase() == Caffe::TRAIN);
54 |   if (propagate_down) {
55 |     const Dtype* top_diff = top[0]->cpu_diff();
56 |     Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
57 |     const int* mask = reinterpret_cast<const int*>(rand_vec_->cpu_data());
58 |     const int count = (*bottom)[0]->count();
59 |     for (int i = 0; i < count; ++i) {
60 |       bottom_diff[i] = top_diff[i] * mask[i] * scale_;
61 |     }
62 |   }
63 |   return Dtype(0);
64 | }
65 | 
66 | 
67 | INSTANTIATE_CLASS(DropoutLayer);
68 | 
69 | 
70 | }  // namespace caffe
71 | 


--------------------------------------------------------------------------------
/src/caffe/layers/split_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Jeff Donahue
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include "caffe/layer.hpp"
 6 | #include "caffe/vision_layers.hpp"
 7 | #include "caffe/util/math_functions.hpp"
 8 | 
 9 | namespace caffe {
10 | 
11 | template <typename Dtype>
12 | void SplitLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
13 |       vector<Blob<Dtype>*>* top) {
14 |   CHECK_EQ(bottom.size(), 1) << "Split Layer takes a single blob as input.";
15 |   CHECK_GE(top->size(), 1) << "Split Layer takes at least one blob as output.";
16 |   count_ = bottom[0]->count();
17 |   for (int i = 0; i < top->size(); ++i) {
18 |     // Allow the 0th top blob to be 'in-place', but no others.
19 |     if (i == 0 && (*top)[i] == bottom[0]) {
20 |       continue;
21 |     } else {
22 |       CHECK_NE((*top)[i], bottom[0]) << "Only 0th top blob may be in place.";
23 |     }
24 |     (*top)[i]->Reshape(bottom[0]->num(), bottom[0]->channels(),
25 |                        bottom[0]->height(), bottom[0]->width());
26 |     CHECK_EQ(count_, (*top)[i]->count());
27 |   }
28 | }
29 | 
30 | template <typename Dtype>
31 | void SplitLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
32 |       vector<Blob<Dtype>*>* top) {
33 |   const Dtype* bottom_data = bottom[0]->cpu_data();
34 |   for (int i = 0; i < top->size(); ++i) {
35 |     if (i == 0 && (*top)[i] == bottom[0]) {
36 |       continue;
37 |     }
38 |     Dtype* top_data = (*top)[i]->mutable_cpu_data();
39 |     caffe_copy(count_, bottom_data, top_data);
40 |   }
41 | }
42 | 
43 | template <typename Dtype>
44 | Dtype SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
45 |       const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
46 |   if (propagate_down) {
47 |     const Dtype* top_diff = top[0]->cpu_diff();
48 |     Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
49 |     // Initialize by copying first top blob diff to our diff, unless we're
50 |     // doing in-place computation for the first blob, in which case the diff is
51 |     // already initialized.
52 |     if (top[0] != (*bottom)[0]) {
53 |       caffe_copy(count_, top_diff, bottom_diff);
54 |     }
55 |     // Add remaining top blob diffs.
56 |     for (int i = 1; i < top.size(); ++i) {
57 |       top_diff = top[i]->cpu_diff();
58 |       caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff);
59 |     }
60 |   }
61 |   return Dtype(0.);
62 | }
63 | 
64 | 
65 | INSTANTIATE_CLASS(SplitLayer);
66 | 
67 | }  // namespace caffe
68 | 


--------------------------------------------------------------------------------
/include/caffe/util/im2col.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #ifndef _CAFFE_UTIL_IM2COL_HPP_
 4 | #define _CAFFE_UTIL_IM2COL_HPP_
 5 | 
 6 | namespace caffe {
 7 | 
 8 | template <typename Dtype>
 9 | void im2col_cpu(const Dtype* data_im, const int channels,
10 |     const int height, const int width, const int ksize, const int pad,
11 |     const int stride, Dtype* data_col);
12 | 
13 | template <typename Dtype>
14 | void col2im_cpu(const Dtype* data_col, const int channels,
15 |     const int height, const int width, const int psize, const int pad,
16 |     const int stride, Dtype* data_im);
17 | 
18 | template <typename Dtype>
19 | void im2col_gpu(const Dtype* data_im, const int channels,
20 |     const int height, const int width, const int ksize, const int pad,
21 |     const int stride, Dtype* data_col);
22 | 
23 | template <typename Dtype>
24 | void col2im_gpu(const Dtype* data_col, const int channels,
25 |     const int height, const int width, const int psize, const int pad,
26 |     const int stride, Dtype* data_im);
27 | 
28 | template <typename Dtype>
29 | void im2col_tile_gpu(const Dtype* data_im, const int channels,
30 | 		const int stride_h, const int stride_w,
31 |     const int ksize, Dtype* data_col, 
32 |     const int height_col, const int width_col);
33 | 
34 | template <typename Dtype>
35 | void copy_stride_gpu(const Dtype* src_data, 
36 | 		const int channels,
37 | 		const int height, const int width, Dtype *dst_data, 
38 | 		const int stride_h, const int stride_w);
39 | 
40 | template <typename Dtype>
41 | void copy_stride_cpu(const Dtype* src_data, 
42 | 		const int channels,
43 | 		const int height, const int width, Dtype *dst_data, 
44 | 		const int stride_h, const int stride_w);
45 | 
46 | 
47 | template <typename Dtype>
48 | void copy_stride_gather_gpu(Dtype* src_data, 
49 | 		const int channels,
50 | 		const int height, const int width, const Dtype *dst_data, 
51 | 		const int stride_h, const int stride_w);
52 | 
53 | template <typename Dtype>
54 | void col2im_tile_gpu(const Dtype* data_col, const int channels,
55 |     const int height_col, const int width_col,
56 |     const int ksize,
57 |     const int stride_h, const int stride_w,
58 |     Dtype* data_im);
59 | 
60 | template <typename Dtype>
61 | void im2col_tile_cpu(const Dtype* data_im, const int channels,
62 | 		const int stride_h, const int stride_w,
63 |     const int ksize, Dtype* data_col, 
64 |     const int height_col, const int width_col);
65 | 
66 | }  // namespace caffe
67 | 
68 | #endif  // CAFFE_UTIL_IM2COL_HPP_
69 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CROSS_COMPILE?=
 2 | CXX=$(CROSS_COMPILE)g++
 3 | AR=$(CROSS_COMPILE)ar
 4 | PROJECT := caffe
 5 | STATIC_NAME := lib$(PROJECT).a
 6 | USE_EIGEN?=y
 7 | 
 8 | CXX_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cpp" -name "*.cpp")
 9 | HXX_SRCS := $(shell find include/$(PROJECT) ! -name "*.hpp")
10 | PROTO_SRCS := $(wildcard src/$(PROJECT)/proto/*.proto)
11 | 
12 | PROTO_GEN_HEADER := ${PROTO_SRCS:.proto=.pb.h}
13 | PROTO_GEN_CC := ${PROTO_SRCS:.proto=.pb.cc}
14 | 
15 | BUILD_DIR := build
16 | CXX_OBJS := $(addprefix $(BUILD_DIR)/, ${CXX_SRCS:.cpp=.o})
17 | PROTO_OBJS := $(addprefix $(BUILD_DIR)/, ${PROTO_GEN_CC:.cc=.o})
18 | OBJS := $(PROTO_OBJS) $(CXX_OBJS)
19 | 
20 | INCLUDE_DIRS += ./src ./include ./protobuf-2.4.1/build/include
21 | CXXFLAGS+=-std=gnu++0x
22 | CXXFLAGS+=$(EXTRA_CXXFLAGS)
23 | CXXFLAGS+=-fvisibility=hidden #hide symbols for static lib
24 | LDFLAGS+=-L./protobuf-2.4.1/build/lib
25 | LIBRARIES:=protobuf 
26 | 
27 | ifeq ($(USE_EIGEN), y)
28 | 	CXXFLAGS += -DUSE_EIGEN
29 | 	CXXFLAGS += -I./eigen3
30 | else
31 | 	LIBRARIES += cblas
32 | endif
33 | 
34 | 
35 | COMMON_FLAGS := -O2 $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
36 | CXXFLAGS +=  -fPIC $(COMMON_FLAGS)
37 | LDFLAGS += $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) \
38 | 		$(foreach library,$(LIBRARIES),-l$(library))
39 | 
40 | all: init $(STATIC_NAME) 
41 | 
42 | init:
43 | 	@ mkdir -p $(foreach obj,$(OBJS),$(dir $(obj)))
44 | 
45 | $(OBJS): $(PROTO_GEN_CC) $(HXX_SRCS)
46 | 
47 | $(BUILD_DIR)/src/$(PROJECT)/%.o: src/$(PROJECT)/%.cpp
48 | 	$(CXX) $< $(CXXFLAGS) -c -o $@
49 | 
50 | $(BUILD_DIR)/src/$(PROJECT)/layers/%.o: src/$(PROJECT)/layers/%.cpp
51 | 	$(CXX) $< $(CXXFLAGS) -c -o $@
52 | 
53 | $(BUILD_DIR)/src/$(PROJECT)/proto/%.o: src/$(PROJECT)/proto/%.cc
54 | 	$(CXX) $< $(CXXFLAGS) -c -o $@
55 | 
56 | $(PROTO_GEN_CC): $(PROTO_SRCS)
57 | 	protoc --proto_path=src --cpp_out=src $(PROTO_SRCS)
58 | 	mkdir -p include/$(PROJECT)/proto
59 | 	cp $(PROTO_GEN_HEADER) include/$(PROJECT)/proto/
60 | 	@echo
61 | 
62 | $(STATIC_NAME): init $(PROTO_OBJS) $(OBJS)
63 | 	$(AR) rcs $(STATIC_NAME) $(PROTO_OBJS) $(OBJS)
64 | 	@echo
65 | 
66 | feat_net_raw: feat_net_raw.cpp $(STATIC_NAME)
67 | 	$(CXX) $< $(CXXFLAGS) -o $@ -L. -lcaffe $(LDFLAGS) -lpthread
68 | 
69 | align_test: align_test.cpp $(STATIC_NAME)
70 | 	$(CXX) $< $(CXXFLAGS) -o $@ -L. -lcaffe $(LDFLAGS) $(shell pkg-config --libs opencv)
71 | 
72 | clean:
73 | 	@- $(RM) $(NAME) $(STATIC_NAME)
74 | 	@- $(RM) $(PROTO_GEN_HEADER) $(PROTO_GEN_CC) $(PROTO_GEN_PY)
75 | 	@- $(RM) include/$(PROJECT)/proto/$(PROJECT).pb.h
76 | 	@- $(RM) -rf $(BUILD_DIR)
77 | 	@- rm -f feat_net_raw
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/src/caffe/layers/raw_image_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #include <stdint.h>
 4 | 
 5 | #include <string>
 6 | #include <vector>
 7 | #include <iostream>  // NOLINT(readability/streams)
 8 | #include <fstream>  // NOLINT(readability/streams)
 9 | 
10 | #include "caffe/layer.hpp"
11 | #include "caffe/util/io.hpp"
12 | #include "caffe/vision_layers.hpp"
13 | 
14 | using std::string;
15 | using std::pair;
16 | 
17 | namespace caffe {
18 | 
19 | template <typename Dtype>
20 | RawImageLayer<Dtype>::~RawImageLayer<Dtype>() {
21 |   // Finally, join the thread
22 | }
23 | 
24 | template <typename Dtype>
25 | void RawImageLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
26 |       vector<Blob<Dtype>*>* top) {
27 |   CHECK_EQ(bottom.size(), 0) << "Input Layer takes no input blobs.";
28 |   CHECK_EQ(top->size(), 2) << "Input Layer takes two blobs as output.";
29 |   // datum size
30 |   datum_height_ = this->layer_param_.new_height();
31 |   datum_width_ = this->layer_param_.new_width();
32 |   datum_channels_ = this->layer_param_.new_channels();
33 |   datum_size_ = datum_channels_ * datum_height_ * datum_width_;
34 |   // Read the file with filenames and labels
35 |   (*top)[0]->Reshape(
36 |         this->layer_param_.batchsize(), datum_channels_, 
37 | 	datum_height_, datum_width_);
38 |   LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
39 |       << (*top)[0]->channels() << "," << (*top)[0]->height() << ","
40 |       << (*top)[0]->width();
41 |   // label
42 |   (*top)[1]->Reshape(this->layer_param_.batchsize(), 1, 1, 1);
43 |  }
44 | 
45 | template <typename Dtype>
46 | void RawImageLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
47 |       vector<Blob<Dtype>*>* top) {
48 |   (*top)[0]->cpu_data();
49 |   (*top)[1]->cpu_data();
50 |  }
51 | 
52 | #if 0
53 | template <typename Dtype>
54 | void RawImageLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
55 |       vector<Blob<Dtype>*>* top) {
56 |   (*top)[0]->gpu_data();
57 |   (*top)[1]->gpu_data();
58 | }
59 | #endif
60 | 
61 | // The backward operations are dummy - they do not carry any computation.
62 | template <typename Dtype>
63 | Dtype RawImageLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
64 |       const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
65 |   return Dtype(0.);
66 | }
67 | 
68 | #if 0
69 | template <typename Dtype>
70 | Dtype RawImageLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
71 |       const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
72 |   return Dtype(0.);
73 | }
74 | #endif
75 | 
76 | INSTANTIATE_CLASS(RawImageLayer);
77 | 
78 | }  // namespace caffe
79 | 


--------------------------------------------------------------------------------
/include/caffe/common.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #ifndef CAFFE_COMMON_HPP_
 4 | #define CAFFE_COMMON_HPP_
 5 | 
 6 | //#include <boost/shared_ptr.hpp>
 7 | #include <memory>
 8 | #include <cmath>
 9 | #include <cstdint>
10 | #include "glog-compact.hpp"
11 | 
12 | // Disable the copy and assignment operator for a class.
13 | #define DISABLE_COPY_AND_ASSIGN(classname) \
14 | private:\
15 |   classname(const classname&);\
16 |   classname& operator=(const classname&)
17 | 
18 | // Instantiate a class with float and double specifications.
19 | #define INSTANTIATE_CLASS(classname) \
20 |   template class classname<float>; \
21 |   template class classname<double>
22 | 
23 | // A simple macro to mark codes that are not implemented, so that when the code
24 | // is executed we will see a fatal log.
25 | #define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet"
26 | 
27 | 
28 | namespace caffe {
29 | 
30 | // We will use the boost shared_ptr instead of the new C++11 one mainly
31 | // because cuda does not work (at least now) well with C++11 features.
32 | //using boost::shared_ptr;
33 | using std::shared_ptr;
34 | 
35 | 
36 | // A singleton class to hold common caffe stuff, such as the handler that
37 | // caffe is going to use for cublas, curand, etc.
38 | class Caffe {
39 |  public:
40 |   ~Caffe();
41 |   inline static Caffe& Get() {
42 |     if (!singleton_.get()) {
43 |       singleton_.reset(new Caffe());
44 |     }
45 |     return *singleton_;
46 |   }
47 |   enum Brew { CPU, GPU };
48 |   enum Phase { TRAIN, TEST };
49 | 
50 |   // Returns the mode: running on CPU or GPU.
51 |   inline static Brew mode() { return Get().mode_; }
52 |   // Returns the phase: TRAIN or TEST.
53 |   inline static Phase phase() { return Get().phase_; }
54 |   // The setters for the variables
55 |   // Sets the mode. It is recommended that you don't change the mode halfway
56 |   // into the program since that may cause allocation of pinned memory being
57 |   // freed in a non-pinned way, which may cause problems - I haven't verified
58 |   // it personally but better to note it here in the header file.
59 |   inline static void set_mode(Brew mode) { Get().mode_ = mode; }
60 |   // Sets the phase.
61 |   inline static void set_phase(Phase phase) { Get().phase_ = phase; }
62 |   // Sets the random seed of both MKL and curand
63 |   static void set_random_seed(const unsigned int seed);
64 |   static void SetDevice(const int device_id);
65 |   static void DeviceQuery();
66 |  protected:
67 |   Brew mode_;
68 |   Phase phase_;
69 |   static shared_ptr<Caffe> singleton_;
70 | 
71 |  private:
72 |   // The private constructor to avoid duplicate instantiation.
73 |   Caffe();
74 | 
75 |   DISABLE_COPY_AND_ASSIGN(Caffe);
76 | };
77 | 
78 | 
79 | }  // namespace caffe
80 | 
81 | #endif  // CAFFE_COMMON_HPP_
82 | 


--------------------------------------------------------------------------------
/src/caffe/layers/padding_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #include <iostream>  // NOLINT(readability/streams)
 4 | #include <vector>
 5 | 
 6 | #include "caffe/layer.hpp"
 7 | #include "caffe/vision_layers.hpp"
 8 | 
 9 | namespace caffe {
10 | 
11 | template <typename Dtype>
12 | void PaddingLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
13 |       vector<Blob<Dtype>*>* top) {
14 |   // DEPRECATION
15 |   LOG(WARNING) << "Padding layers are deprecated in favor of padding-aware "
16 |                   "convolutions and WILL BE REMOVED. Please update your model "
17 |                   "prototxt to replace padding layers with pad fields. "
18 |                   "See https://github.com/BVLC/caffe/pull/128.";
19 |   PAD_ = this->layer_param_.pad();
20 |   CHECK_EQ(bottom.size(), 1) << "Padding Layer takes a single blob as input.";
21 |   CHECK_EQ(top->size(), 1) << "Padding Layer takes a single blob as output.";
22 |   NUM_ = bottom[0]->num();
23 |   CHANNEL_ = bottom[0]->channels();
24 |   HEIGHT_IN_ = bottom[0]->height();
25 |   WIDTH_IN_ = bottom[0]->width();
26 |   HEIGHT_OUT_ = HEIGHT_IN_ + PAD_ * 2;
27 |   WIDTH_OUT_ = WIDTH_IN_ + PAD_ * 2;
28 |   (*top)[0]->Reshape(NUM_, CHANNEL_, HEIGHT_OUT_, WIDTH_OUT_);
29 | }
30 | 
31 | template <typename Dtype>
32 | void PaddingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
33 |       vector<Blob<Dtype>*>* top) {
34 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
35 |   const Dtype* bottom_data = bottom[0]->cpu_data();
36 |   memset(top_data, 0, sizeof(Dtype) * (*top)[0]->count());
37 |   // In short, top[n, c, h, w] = bottom[n, c, h-pad, w-pad] if in range
38 |   for (int n = 0; n < NUM_; ++n) {
39 |     for (int c = 0; c < CHANNEL_; ++c) {
40 |       for (int h = 0; h < HEIGHT_IN_; ++h) {
41 |         // copy the width part
42 |         memcpy(
43 |             top_data + ((n * CHANNEL_ + c) * HEIGHT_OUT_ + h + PAD_)
44 |                 * WIDTH_OUT_ + PAD_,
45 |             bottom_data + ((n * CHANNEL_ + c) * HEIGHT_IN_ + h) * WIDTH_IN_,
46 |             sizeof(Dtype) * WIDTH_IN_);
47 |       }
48 |     }
49 |   }
50 | }
51 | 
52 | template <typename Dtype>
53 | Dtype PaddingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
54 |       const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
55 |   const Dtype* top_diff = top[0]->cpu_diff();
56 |   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
57 |   for (int n = 0; n < NUM_; ++n) {
58 |     for (int c = 0; c < CHANNEL_; ++c) {
59 |       for (int h = 0; h < HEIGHT_IN_; ++h) {
60 |         // copy the width part
61 |         memcpy(
62 |             bottom_diff + ((n * CHANNEL_ + c) * HEIGHT_IN_ + h) * WIDTH_IN_,
63 |             top_diff + ((n * CHANNEL_ + c) * HEIGHT_OUT_ + h + PAD_)
64 |                 * WIDTH_OUT_ + PAD_,
65 |             sizeof(Dtype) * WIDTH_IN_);
66 |       }
67 |     }
68 |   }
69 |   return Dtype(0.);
70 | }
71 | 
72 | INSTANTIATE_CLASS(PaddingLayer);
73 | 
74 | }  // namespace caffe
75 | 


--------------------------------------------------------------------------------
/.ycm_extra_conf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import ycm_core
 3 | from clang_helpers import PrepareClangFlags
 4 |  
 5 | # Set this to the absolute path to the folder (NOT the file!) containing the
 6 | # compile_commands.json file to use that instead of 'flags'. See here for
 7 | # more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
 8 | # Most projects will NOT need to set this to anything; you can just change the
 9 | # 'flags' list of compilation flags. Notice that YCM itself uses that approach.
10 | compilation_database_folder = ''
11 |  
12 | # These are the compilation flags that will be used in case there's no
13 | # compilation database set.
14 | flags = [
15 |     '-Wall',
16 |     '-std=c++11',
17 |     '-stdlib=libc++',
18 |     '-x',
19 |     'c++',
20 |     '-I',
21 |     '.',
22 |     '-I',
23 |     './include',
24 |     '-I',
25 |     '../include',
26 |     '-I',
27 |     './eigen3',
28 |     '-isystem',
29 |     '/usr/include/c++/4.6',
30 |     '-isystem',
31 |     '/usr/lib/openmpi/include/',
32 |     '-isystem',
33 |     '/usr/include/c++/4.6/x86_64-linux-gnu/'
34 |     ]
35 | 
36 | if compilation_database_folder:
37 |     database = ycm_core.CompilationDatabase(compilation_database_folder)
38 | else:
39 |     database = None
40 |  
41 |  
42 | def DirectoryOfThisScript():
43 |     return os.path.dirname(os.path.abspath(__file__))
44 |  
45 |  
46 | def MakeRelativePathsInFlagsAbsolute(flags, working_directory):
47 |     if not working_directory:
48 |         return flags
49 |     new_flags = []
50 |     make_next_absolute = False
51 |     path_flags = ['-isystem', '-I', '-iquote', '--sysroot=']
52 |     for flag in flags:
53 |         new_flag = flag
54 |  
55 |         if make_next_absolute:
56 |             make_next_absolute = False
57 |             if not flag.startswith('/'):
58 |                 new_flag = os.path.join(working_directory, flag)
59 |  
60 |         for path_flag in path_flags:
61 |             if flag == path_flag:
62 |                 make_next_absolute = True
63 |                 break
64 |  
65 |             if flag.startswith(path_flag):
66 |                 path = flag[len(path_flag):]
67 |                 new_flag = path_flag + os.path.join(working_directory, path)
68 |                 break
69 |  
70 |         if new_flag:
71 |             new_flags.append(new_flag)
72 |     return new_flags
73 |  
74 |  
75 | def FlagsForFile(filename):
76 |     if database:
77 |         # Bear in mind that compilation_info.compiler_flags_ does NOT return a
78 |         # python list, but a "list-like" StringVec object
79 |         compilation_info = database.GetCompilationInfoForFile(filename)
80 |         final_flags = PrepareClangFlags(
81 |             MakeRelativePathsInFlagsAbsolute(
82 |                 compilation_info.compiler_flags_,
83 |                 compilation_info.compiler_working_dir_),
84 |             filename)
85 |     else:
86 |         #relative_to = DirectoryOfThisScript()
87 |         relative_to = os.getcwd()
88 |         final_flags = MakeRelativePathsInFlagsAbsolute(flags, relative_to)
89 |  
90 |     return {
91 |         'flags': final_flags,
92 |         'do_cache': True}
93 | 


--------------------------------------------------------------------------------
/src/caffe/layers/softmax_layer.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | //
 3 | #include <algorithm>
 4 | #include <vector>
 5 | 
 6 | #include "caffe/layer.hpp"
 7 | #include "caffe/vision_layers.hpp"
 8 | #include "caffe/util/math_functions.hpp"
 9 | 
10 | using std::max;
11 | 
12 | namespace caffe {
13 | 
14 | template <typename Dtype>
15 | void SoftmaxLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
16 |       vector<Blob<Dtype>*>* top) {
17 |   CHECK_EQ(bottom.size(), 1) << "Softmax Layer takes a single blob as input.";
18 |   CHECK_EQ(top->size(), 1) << "Softmax Layer takes a single blob as output.";
19 |   (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(),
20 |       bottom[0]->height(), bottom[0]->width());
21 |   sum_multiplier_.Reshape(1, bottom[0]->channels(),
22 |       bottom[0]->height(), bottom[0]->width());
23 |   Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data();
24 |   for (int i = 0; i < sum_multiplier_.count(); ++i) {
25 |     multiplier_data[i] = 1.;
26 |   }
27 |   scale_.Reshape(bottom[0]->num(), 1, 1, 1);
28 | }
29 | 
30 | template <typename Dtype>
31 | void SoftmaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
32 |     vector<Blob<Dtype>*>* top) {
33 |   const Dtype* bottom_data = bottom[0]->cpu_data();
34 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
35 |   Dtype* scale_data = scale_.mutable_cpu_data();
36 |   int num = bottom[0]->num();
37 |   int dim = bottom[0]->count() / bottom[0]->num();
38 |   memcpy(top_data, bottom_data, sizeof(Dtype) * bottom[0]->count());
39 |   // we need to subtract the max to avoid numerical issues, compute the exp,
40 |   // and then normalize.
41 |   for (int i = 0; i < num; ++i) {
42 |     scale_data[i] = bottom_data[i*dim];
43 |     for (int j = 0; j < dim; ++j) {
44 |       scale_data[i] = max(scale_data[i], bottom_data[i * dim + j]);
45 |     }
46 |   }
47 |   // subtraction
48 |   caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
49 |     scale_data, sum_multiplier_.cpu_data(), 1., top_data);
50 |   // Perform exponentiation
51 |   caffe_exp<Dtype>(num * dim, top_data, top_data);
52 |   // sum after exp
53 |   caffe_cpu_gemv<Dtype>(CblasNoTrans, num, dim, 1., top_data,
54 |       sum_multiplier_.cpu_data(), 0., scale_data);
55 |   // Do division
56 |   for (int i = 0; i < num; ++i) {
57 |     caffe_scal<Dtype>(dim, Dtype(1.) / scale_data[i], top_data + i * dim);
58 |   }
59 | }
60 | 
61 | template <typename Dtype>
62 | Dtype SoftmaxLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
63 |     const bool propagate_down,
64 |     vector<Blob<Dtype>*>* bottom) {
65 |   const Dtype* top_diff = top[0]->cpu_diff();
66 |   const Dtype* top_data = top[0]->cpu_data();
67 |   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
68 |   Dtype* scale_data = scale_.mutable_cpu_data();
69 |   int num = top[0]->num();
70 |   int dim = top[0]->count() / top[0]->num();
71 |   memcpy(bottom_diff, top_diff, sizeof(Dtype) * top[0]->count());
72 |   // Compute inner1d(top_diff, top_data) and subtract them from the bottom diff
73 |   for (int i = 0; i < num; ++i) {
74 |     scale_data[i] = caffe_cpu_dot<Dtype>(dim, top_diff + i * dim,
75 |         top_data + i * dim);
76 |   }
77 |   // subtraction
78 |   caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num, dim, 1, -1.,
79 |       scale_data, sum_multiplier_.cpu_data(), 1., bottom_diff);
80 |   // elementwise multiplication
81 |   caffe_mul<Dtype>(top[0]->count(), bottom_diff, top_data, bottom_diff);
82 |   return Dtype(0);
83 | }
84 | 
85 | 
86 | INSTANTIATE_CLASS(SoftmaxLayer);
87 | 
88 | 
89 | }  // namespace caffe
90 | 


--------------------------------------------------------------------------------
/src/caffe/layer_factory.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 Yangqing Jia
 2 | 
 3 | #ifndef CAFFE_LAYER_FACTORY_HPP_
 4 | #define CAFFE_LAYER_FACTORY_HPP_
 5 | 
 6 | #include <string>
 7 | 
 8 | #include "caffe/layer.hpp"
 9 | #include "caffe/vision_layers.hpp"
10 | #include "caffe/proto/caffe.pb.h"
11 | 
12 | 
13 | namespace caffe {
14 | 
15 | 
16 | // A function to get a specific layer from the specification given in
17 | // LayerParameter. Ideally this would be replaced by a factory pattern,
18 | // but we will leave it this way for now.
19 | template <typename Dtype>
20 | Layer<Dtype>* GetLayer(const LayerParameter& param) {
21 |   const std::string& type = param.type();
22 |   if (type == "accuracy") {
23 |     return new AccuracyLayer<Dtype>(param);
24 |   } else if (type == "verif_accuracy") {
25 |     return new VerificationAccuracyLayer<Dtype>(param);
26 |   } else if (type == "bnll") {
27 |     return new BNLLLayer<Dtype>(param);
28 |   } else if (type == "concat") {
29 |     return new ConcatLayer<Dtype>(param);
30 |   } else if (type == "conv") {
31 |     return new ConvolutionLayer<Dtype>(param);
32 | #if 0
33 |   } else if (type == "data") {
34 |     return new DataLayer<Dtype>(param);
35 |   } else if (type == "shuffle_data") {
36 |     return new ShuffleDataLayer<Dtype>(param);
37 | #endif
38 |   } else if (type == "dropout") {
39 |     return new DropoutLayer<Dtype>(param);
40 |   } else if (type == "dropout_group") {
41 |     return new DropoutGroupLayer<Dtype>(param);
42 |   } else if (type == "euclidean_loss") {
43 |     return new EuclideanLossLayer<Dtype>(param);
44 |   } else if (type == "flatten") {
45 |     return new FlattenLayer<Dtype>(param);
46 | #if 0
47 |   } else if (type == "hdf5_data") {
48 |     return new HDF5DataLayer<Dtype>(param);
49 |   } else if (type == "images") {
50 |     return new ImagesLayer<Dtype>(param);
51 | #endif
52 |   } else if (type == "raw_image") {
53 |     return new RawImageLayer<Dtype>(param);
54 |   } else if (type == "im2col") {
55 |     return new Im2colLayer<Dtype>(param);
56 |   } else if (type == "infogain_loss") {
57 |     return new InfogainLossLayer<Dtype>(param);
58 |   } else if (type == "innerproduct") {
59 |     return new InnerProductLayer<Dtype>(param);
60 |   } else if (type == "lrn") {
61 |     return new LRNLayer<Dtype>(param);
62 |   } else if (type == "multinomial_logistic_loss") {
63 |     return new MultinomialLogisticLossLayer<Dtype>(param);
64 |   } else if (type == "padding") {
65 |     return new PaddingLayer<Dtype>(param);
66 |   } else if (type == "pool") {
67 |     return new PoolingLayer<Dtype>(param);
68 |   } else if (type == "relu") {
69 |     return new ReLULayer<Dtype>(param);
70 |   } else if (type == "sigmoid") {
71 |     return new SigmoidLayer<Dtype>(param);
72 |   } else if (type == "softmax") {
73 |     return new SoftmaxLayer<Dtype>(param);
74 |   } else if (type == "softmax_loss") {
75 |     return new SoftmaxWithLossLayer<Dtype>(param);
76 |   } else if (type == "split") {
77 |     return new SplitLayer<Dtype>(param);
78 |   } else if (type == "tanh") {
79 |     return new TanHLayer<Dtype>(param);
80 | #if 0
81 |   } else if (type == "window_data") {
82 |     return new WindowDataLayer<Dtype>(param);
83 | #endif
84 |   } else {
85 |     LOG(FATAL) << "Unknown layer name: " << type;
86 |   }
87 |   // just to suppress old compiler warnings.
88 |   return (Layer<Dtype>*)(NULL);
89 | }
90 | 
91 | template Layer<float>* GetLayer(const LayerParameter& param);
92 | template Layer<double>* GetLayer(const LayerParameter& param);
93 | 
94 | }  // namespace caffe
95 | 
96 | #endif  // CAFFE_LAYER_FACTORY_HPP_
97 | 


--------------------------------------------------------------------------------
/include/caffe/util/math_functions.hpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | #ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_
  4 | #define CAFFE_UTIL_MATH_FUNCTIONS_H_
  5 | 
  6 | #include "mkl_alternate.hpp"
  7 | namespace caffe {
  8 | //enum  	CBLAS_TRANSPOSE { CblasNoTrans = 111, CblasTrans = 112, CblasConjTrans = 113 };
  9 | 
 10 | // Decaf gemm provides a simpler interface to the gemm functions, with the
 11 | // limitation that the data has to be contiguous in memory.
 12 | template <typename Dtype>
 13 | void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA,
 14 |     const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
 15 |     const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
 16 |     Dtype* C);
 17 | 
 18 | template <typename Dtype>
 19 | void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
 20 |     const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
 21 |     Dtype* y);
 22 | 
 23 | template <typename Dtype>
 24 | void caffe_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
 25 |     const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
 26 |     Dtype* y);
 27 | 
 28 | template <typename Dtype>
 29 | void caffe_axpy(const int N, const Dtype alpha, const Dtype* X,
 30 |     Dtype* Y);
 31 | 
 32 | template <typename Dtype>
 33 | void caffe_gpu_axpy(const int N, const Dtype alpha, const Dtype* X,
 34 |     Dtype* Y);
 35 | 
 36 | template <typename Dtype>
 37 | void caffe_axpby(const int N, const Dtype alpha, const Dtype* X,
 38 |     const Dtype beta, Dtype* Y);
 39 | 
 40 | template <typename Dtype>
 41 | void caffe_gpu_axpby(const int N, const Dtype alpha, const Dtype* X,
 42 |     const Dtype beta, Dtype* Y);
 43 | 
 44 | template <typename Dtype>
 45 | void caffe_copy(const int N, const Dtype *X, Dtype *Y);
 46 | 
 47 | template <typename Dtype>
 48 | void caffe_gpu_copy(const int N, const Dtype *X, Dtype *Y);
 49 | 
 50 | template <typename Dtype>
 51 | void caffe_scal(const int N, const Dtype alpha, Dtype *X);
 52 | 
 53 | template <typename Dtype>
 54 | void caffe_gpu_scal(const int N, const Dtype alpha, Dtype *X);
 55 | 
 56 | template <typename Dtype>
 57 | void caffe_sqr(const int N, const Dtype* a, Dtype* y);
 58 | 
 59 | template <typename Dtype>
 60 | void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
 61 | 
 62 | template <typename Dtype>
 63 | void caffe_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
 64 | 
 65 | template <typename Dtype>
 66 | void caffe_gpu_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
 67 | 
 68 | template <typename Dtype>
 69 | void caffe_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
 70 | 
 71 | template <typename Dtype>
 72 | void caffe_gpu_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
 73 | 
 74 | template <typename Dtype>
 75 | void caffe_div(const int N, const Dtype* a, const Dtype* b, Dtype* y);
 76 | 
 77 | template <typename Dtype>
 78 | void caffe_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
 79 | 
 80 | template <typename Dtype>
 81 | void caffe_vRngUniform(const int n, Dtype* r, const Dtype a, const Dtype b);
 82 | 
 83 | template <typename Dtype>
 84 | void caffe_vRngGaussian(const int n, Dtype* r, const Dtype a,
 85 |     const Dtype sigma);
 86 | 
 87 | template <typename Dtype>
 88 | void caffe_exp(const int n, const Dtype* a, Dtype* y);
 89 | 
 90 | template <typename Dtype>
 91 | Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y);
 92 | 
 93 | template <typename Dtype>
 94 | void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out);
 95 | 
 96 | }  // namespace caffe
 97 | 
 98 | 
 99 | #endif  // CAFFE_UTIL_MATH_FUNCTIONS_H_
100 | 


--------------------------------------------------------------------------------
/src/caffe/layers/concat_layer.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Sergio Guadarrama
  2 | 
  3 | #include <vector>
  4 | 
  5 | #include "caffe/layer.hpp"
  6 | #include "caffe/vision_layers.hpp"
  7 | #include "caffe/util/math_functions.hpp"
  8 | 
  9 | namespace caffe {
 10 | 
 11 | template <typename Dtype>
 12 | void ConcatLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
 13 |       vector<Blob<Dtype>*>* top) {
 14 |   CHECK_GT(bottom.size(), 1) <<
 15 |     "Concat Layer takes at least two blobs as input.";
 16 |   CHECK_EQ(top->size(), 1) <<
 17 |     "Concat Layer takes a single blob as output.";
 18 |   concat_dim_ = this->layer_param_.concat_dim();
 19 |   CHECK_GE(concat_dim_, 0) << "concat_dim should be >= 0";
 20 |   CHECK_LE(concat_dim_, 1) <<
 21 |     "For now concat_dim <=1, it can only concat num and channels";
 22 |   // Intialize with the first blob
 23 |   COUNT_ = bottom[0]->count();
 24 |   NUM_ = bottom[0]->num();
 25 |   CHANNELS_ = bottom[0]->channels();
 26 |   HEIGHT_ = bottom[0]->height();
 27 |   WIDTH_ = bottom[0]->width();
 28 |   for (int i = 1; i < bottom.size(); ++i) {
 29 |     COUNT_ += bottom[i]->count();
 30 |     if (concat_dim_== 0) {
 31 |       NUM_ += bottom[i]->num();
 32 |     } else if (concat_dim_ == 1) {
 33 |       CHANNELS_ += bottom[i]->channels();
 34 |     } else if (concat_dim_ == 2) {
 35 |       HEIGHT_ += bottom[i]->height();
 36 |     } else if (concat_dim_ == 3) {
 37 |       WIDTH_ += bottom[i]->width();
 38 |     }
 39 |   }
 40 |   (*top)[0]->Reshape(NUM_, CHANNELS_, HEIGHT_, WIDTH_);
 41 |   CHECK_EQ(COUNT_, (*top)[0]->count());
 42 | }
 43 | 
 44 | template <typename Dtype>
 45 | void ConcatLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 46 |       vector<Blob<Dtype>*>* top) {
 47 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
 48 |   if (concat_dim_== 0) {
 49 |     int offset_num = 0;
 50 |     for (int i = 0; i < bottom.size(); ++i) {
 51 |       const Dtype* bottom_data = bottom[i]->cpu_data();
 52 |       int num_elem = bottom[i]->count();
 53 |       caffe_copy(num_elem, bottom_data, top_data+(*top)[0]->offset(offset_num));
 54 |       offset_num += bottom[i]->num();
 55 |     }
 56 |   } else if (concat_dim_ == 1) {
 57 |     int offset_channel = 0;
 58 |     for (int i = 0; i < bottom.size(); ++i) {
 59 |       const Dtype* bottom_data = bottom[i]->cpu_data();
 60 |       int num_elem =
 61 |         bottom[i]->channels()*bottom[i]->height()*bottom[i]->width();
 62 |       for (int n = 0; n < NUM_; ++n) {
 63 |         caffe_copy(num_elem, bottom_data+bottom[i]->offset(n),
 64 |           top_data+(*top)[0]->offset(n, offset_channel));
 65 |       }
 66 |       offset_channel += bottom[i]->channels();
 67 |     }
 68 |   } else {
 69 |     LOG(FATAL) << "concat_dim along dim" << concat_dim_ <<
 70 |       " not implemented yet";
 71 |   }
 72 | }
 73 | 
 74 | template <typename Dtype>
 75 | Dtype ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
 76 |       const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
 77 |   const Dtype* top_diff = top[0]->cpu_diff();
 78 |   if (concat_dim_ == 0) {
 79 |     int offset_num = 0;
 80 |     for (int i = 0; i < bottom->size(); ++i) {
 81 |       Blob<Dtype>* blob = (*bottom)[i];
 82 |       Dtype* bottom_diff = blob->mutable_cpu_diff();
 83 |       caffe_copy(blob->count(),
 84 |         top_diff+top[0]->offset(offset_num), bottom_diff);
 85 |       offset_num += blob->num();
 86 |     }
 87 |   } else if (concat_dim_ == 1) {
 88 |     int offset_channel = 0;
 89 |     for (int i = 0; i < bottom->size(); ++i) {
 90 |       Blob<Dtype>* blob = (*bottom)[i];
 91 |       Dtype* bottom_diff = blob->mutable_cpu_diff();
 92 |       int num_elem = blob->channels()*blob->height()*blob->width();
 93 |       for (int n = 0; n < NUM_; ++n) {
 94 |         caffe_copy(num_elem, top_diff+top[0]->offset(n, offset_channel),
 95 |           bottom_diff+blob->offset(n));
 96 |       }
 97 |       offset_channel += blob->channels();
 98 |     }
 99 |   } else {
100 |     LOG(FATAL) << "concat_dim along dim" << concat_dim_ <<
101 |       " not implemented yet";
102 |   }
103 |   return Dtype(0.);
104 | }
105 | 
106 | INSTANTIATE_CLASS(ConcatLayer);
107 | 
108 | }  // namespace caffe
109 | 


--------------------------------------------------------------------------------
/src/caffe/layers/inner_product_layer.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | 
  4 | #include <vector>
  5 | 
  6 | #include "caffe/blob.hpp"
  7 | #include "caffe/common.hpp"
  8 | #include "caffe/filler.hpp"
  9 | #include "caffe/layer.hpp"
 10 | #include "caffe/vision_layers.hpp"
 11 | #include "caffe/util/math_functions.hpp"
 12 | 
 13 | namespace caffe {
 14 | 
 15 | template <typename Dtype>
 16 | void InnerProductLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
 17 |       vector<Blob<Dtype>*>* top) {
 18 |   CHECK_EQ(bottom.size(), 1) << "IP Layer takes a single blob as input.";
 19 |   CHECK_EQ(top->size(), 1) << "IP Layer takes a single blob as output.";
 20 |   const int num_output = this->layer_param_.num_output();
 21 |   biasterm_ = this->layer_param_.biasterm();
 22 |   // Figure out the dimensions
 23 |   M_ = bottom[0]->num();
 24 |   K_ = bottom[0]->count() / bottom[0]->num();
 25 |   N_ = num_output;
 26 |   (*top)[0]->Reshape(bottom[0]->num(), num_output, 1, 1);
 27 |   // Check if we need to set up the weights
 28 |   if (this->blobs_.size() > 0) {
 29 |     LOG(INFO) << "Skipping parameter initialization";
 30 |   } else {
 31 |     if (biasterm_) {
 32 |       this->blobs_.resize(2);
 33 |     } else {
 34 |       this->blobs_.resize(1);
 35 |     }
 36 |     // Intialize the weight
 37 |     this->blobs_[0].reset(new Blob<Dtype>(1, 1, N_, K_));
 38 |     // fill the weights
 39 |     shared_ptr<Filler<Dtype> > weight_filler(
 40 |         GetFiller<Dtype>(this->layer_param_.weight_filler()));
 41 |     weight_filler->Fill(this->blobs_[0].get());
 42 |     // If necessary, intiialize and fill the bias term
 43 |     if (biasterm_) {
 44 |       this->blobs_[1].reset(new Blob<Dtype>(1, 1, 1, N_));
 45 |       shared_ptr<Filler<Dtype> > bias_filler(
 46 |           GetFiller<Dtype>(this->layer_param_.bias_filler()));
 47 |       bias_filler->Fill(this->blobs_[1].get());
 48 |     }
 49 |   }  // parameter initialization
 50 |   // Setting up the bias multiplier
 51 |   if (biasterm_) {
 52 |     bias_multiplier_.reset(new SyncedMemory(M_ * sizeof(Dtype)));
 53 |     Dtype* bias_multiplier_data =
 54 |         reinterpret_cast<Dtype*>(bias_multiplier_->mutable_cpu_data());
 55 |     for (int i = 0; i < M_; ++i) {
 56 |         bias_multiplier_data[i] = 1.;
 57 |     }
 58 |   }
 59 | }
 60 | 
 61 | template <typename Dtype>
 62 | void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 63 |     vector<Blob<Dtype>*>* top) {
 64 |   const Dtype* bottom_data = bottom[0]->cpu_data();
 65 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
 66 |   const Dtype* weight = this->blobs_[0]->cpu_data();
 67 |   caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1.,
 68 |       bottom_data, weight, (Dtype)0., top_data);
 69 |   if (biasterm_) {
 70 |     caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
 71 |         reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
 72 |         this->blobs_[1]->cpu_data(), (Dtype)1., top_data);
 73 |   }
 74 | }
 75 | 
 76 | template <typename Dtype>
 77 | Dtype InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
 78 |     const bool propagate_down,
 79 |     vector<Blob<Dtype>*>* bottom) {
 80 |   const Dtype* top_diff = top[0]->cpu_diff();
 81 |   const Dtype* bottom_data = (*bottom)[0]->cpu_data();
 82 |   // Gradient with respect to weight
 83 |   caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
 84 |       top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff());
 85 |   if (biasterm_) {
 86 |     // Gradient with respect to bias
 87 |     caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
 88 |         reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), (Dtype)0.,
 89 |         this->blobs_[1]->mutable_cpu_diff());
 90 |   }
 91 |   if (propagate_down) {
 92 |     // Gradient with respect to bottom data
 93 |     caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1.,
 94 |         top_diff, this->blobs_[0]->cpu_data(), (Dtype)0.,
 95 |         (*bottom)[0]->mutable_cpu_diff());
 96 |   }
 97 |   return Dtype(0);
 98 | }
 99 | 
100 | INSTANTIATE_CLASS(InnerProductLayer);
101 | 
102 | }  // namespace caffe
103 | 


--------------------------------------------------------------------------------
/feat_net_raw.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | //
  3 | // This is a simple script that allows one to quickly test a network whose
  4 | // structure is specified by text format protocol buffers, and whose parameter
  5 | // are loaded from a pre-trained network.
  6 | // Usage:
  7 | //    test_net net_proto pretrained_net_proto iterations [CPU/GPU]
  8 | 
  9 | #include <cstring>
 10 | #include <cstdlib>
 11 | #include <cstdio>
 12 | #include <vector>
 13 | #include <string>
 14 | 
 15 | #include "caffe/caffe.hpp"
 16 | 
 17 | using namespace caffe;  // NOLINT(build/namespaces)
 18 | 
 19 | template <typename Dtype>
 20 | static void save_blob(const string& fn, Blob<Dtype> *b){
 21 | 	LOG(INFO) << "Saving " << fn;
 22 | 	FILE *f = fopen(fn.c_str(), "wb");
 23 | 	CHECK(f != NULL);
 24 | 	fwrite(b->cpu_data(), sizeof(Dtype), b->count(), f);
 25 | 	fclose(f);
 26 | }
 27 | 
 28 | int main(int argc, char** argv) {
 29 |   if (argc < 5) {
 30 |     LOG(ERROR) << "test_net net_proto pretrained_net_proto iterations inputbin output_dir"
 31 |         << " [CPU/GPU]";
 32 |     return 0;
 33 |   }
 34 | 
 35 |   Caffe::set_phase(Caffe::TEST);
 36 | 
 37 |   if (argc == 7 && strcmp(argv[6], "GPU") == 0) {
 38 |     LOG(ERROR) << "Using GPU";
 39 |     Caffe::set_mode(Caffe::GPU);
 40 |   } else {
 41 |     LOG(ERROR) << "Using CPU";
 42 |     Caffe::set_mode(Caffe::CPU);
 43 |   }
 44 | 
 45 |   NetParameter test_net_param;
 46 |   ReadProtoFromTextFile(argv[1], &test_net_param);
 47 |   Net<float> caffe_test_net(test_net_param);
 48 |   NetParameter trained_net_param;
 49 |   ReadProtoFromBinaryFile(argv[2], &trained_net_param);
 50 |   caffe_test_net.CopyTrainedLayersFrom(trained_net_param);
 51 | 
 52 | #if 0
 53 |   SolverState state;
 54 |   std::string state_file = std::string(argv[2]) + ".solverstate";
 55 |   ReadProtoFromBinaryFile(state_file, &state);
 56 | #endif
 57 | 
 58 |   int total_iter = atoi(argv[3]);
 59 |   LOG(ERROR) << "Running " << total_iter << " Iterations.";
 60 | 
 61 |   double test_accuracy = 0;
 62 |   vector<Blob<float>*> dummy_blob_input_vec;
 63 | 
 64 |   //save layer
 65 |   char output_dir[1024];
 66 |   int feature_layer_idx = -1;
 67 |   int data_layer_idx = -1;
 68 |   for(int i=0;i<caffe_test_net.layer_names().size();i++)
 69 | 	  if(caffe_test_net.layer_names()[i] == "relu5"){
 70 | 		  feature_layer_idx = i;
 71 | 		  break;
 72 | 	  }
 73 |   for(int i=0;i<caffe_test_net.layer_names().size();i++)
 74 | 	  if(caffe_test_net.layer_names()[i] == "data"){
 75 | 		  data_layer_idx = i;
 76 | 		  break;
 77 | 	  }
 78 | 
 79 |   CHECK_NE(feature_layer_idx, -1);
 80 |   CHECK_NE(data_layer_idx, -1);
 81 |   LOG(INFO) << "Data layer: " << data_layer_idx;
 82 |   LOG(INFO) << "Feature layer: " << feature_layer_idx;
 83 | 
 84 |   Blob<float>* output = caffe_test_net.top_vecs()[feature_layer_idx][0],
 85 | 	*data_blob = caffe_test_net.top_vecs()[data_layer_idx][0];
 86 |   RawImageLayer<float> *data_layer = dynamic_cast<RawImageLayer<float>* >(caffe_test_net.layers()[data_layer_idx].get());
 87 |   CHECK(data_layer != 0);
 88 |   
 89 |   LOG(INFO) << "OUTPUT BLOB dim: " << output->num() << ' '
 90 | 	  << output->channels() << ' '
 91 | 	  << output->width() << ' '
 92 | 	  << output->height();
 93 |   FILE *finput = fopen(argv[5], "rb");
 94 |   CHECK(finput != NULL);
 95 |   const int ih = data_blob->height(), iw = data_blob->width(), ic = data_blob->channels();
 96 |   double buf[ih*iw*ic];
 97 |   for (int i = 0; i < total_iter; ++i) {
 98 | 	  float *d = data_blob->mutable_cpu_data();
 99 | 	  size_t len = ih * iw * ic;
100 | 	  for(int j = 0; j < data_blob->num(); j++){
101 | 		  size_t nread = fread(buf, sizeof(double), len, finput);
102 | 		  CHECK_EQ(nread, len);
103 | 		  for(int k=0;k<len;k++){
104 | 			  d[k] = buf[k];
105 | 		  }
106 | 		  d += len;
107 | 	  }
108 |     const vector<Blob<float>*>& result =
109 |         caffe_test_net.Forward(dummy_blob_input_vec);
110 | 
111 |     sprintf(output_dir, "%s/feat_%05d", argv[4], i);
112 |     save_blob(output_dir, output);
113 | 
114 |     //test_accuracy += result[0]->cpu_data()[0];
115 |     //LOG(ERROR) << "Batch " << i << ", accuracy: " << result[0]->cpu_data()[0];
116 |   }
117 |   fclose(finput);
118 |   //test_accuracy /= total_iter;
119 |   //LOG(ERROR) << "Test accuracy:" << test_accuracy;
120 | 
121 |   return 0;
122 | }
123 | 


--------------------------------------------------------------------------------
/src/caffe/layers/verification_loss.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | #include <algorithm>
  4 | #include <cfloat>
  5 | #include <vector>
  6 | 
  7 | #include "caffe/layer.hpp"
  8 | #include "caffe/vision_layers.hpp"
  9 | #include "caffe/util/math_functions.hpp"
 10 | 
 11 | using std::max;
 12 | 
 13 | namespace caffe {
 14 | 
 15 | template <typename Dtype>
 16 | Dtype VerificationLossLayer<Dtype>::CalcThreshold(bool update) {
 17 | 	int i, j, is, id, is_ = 0, id_ = 0;
 18 | 	Dtype th, th_c, s, d, f;
 19 | 	int n = same_.size();
 20 | 	CHECK_EQ(n, distance_.size());
 21 | 	if(!n)
 22 | 		return M_;
 23 | 	for(i = 0; i < n; i++)
 24 | 	{
 25 | 		if(same_[i])
 26 | 		{
 27 | 			is_++;
 28 | 		}
 29 | 		else
 30 | 		{
 31 | 			id_++;
 32 | 		}
 33 | 	}
 34 | 
 35 | 	Dtype stat[3];
 36 | 	stat[0] = 1.0;
 37 | 	stat[1] = 0.5;
 38 | 	stat[2] = 0.5;
 39 | 	th = -1.0;
 40 | 
 41 | 	for(i = 0; i < 4000; i++)
 42 | 	{
 43 | 		th_c = i * 0.1;
 44 | 		is = 0;
 45 | 		id = 0;
 46 | 		for(j = 0; j < n; j++)
 47 | 		{
 48 | 			if(same_[j])
 49 | 			{
 50 | 				if(distance_[j] > th_c)
 51 | 				{
 52 | 					is++;
 53 | 				}
 54 | 			}
 55 | 			else
 56 | 			{
 57 | 				if(distance_[j] <= th_c)
 58 | 				{
 59 | 					id++;
 60 | 				}
 61 | 			}
 62 | 		}
 63 | 		s = (Dtype)is / (2 * is_);
 64 | 		d = (Dtype)id / (2 * id_);
 65 | 		f = s + d;
 66 | 		if(f < stat[0])
 67 | 		{
 68 | 			stat[0] = f;
 69 | 			stat[1] = s;
 70 | 			stat[2] = d;
 71 | 			th = th_c;
 72 | 		}
 73 | 	}
 74 | 	LOG(INFO) << "margin: " << th << " ("
 75 | 		<< stat[0] << ", " << stat[1]
 76 | 		<< ", " << stat[2] << ")";
 77 | 
 78 | 	if(update)
 79 | 		SetThreshold(th);
 80 | 	return th;
 81 | 
 82 | }
 83 | 
 84 | template <typename Dtype>
 85 | void VerificationLossLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
 86 |       vector<Blob<Dtype>*>* top) {
 87 |   CHECK_EQ(bottom.size(), 4) << "VerificationLoss Layer takes four blobs as input.";
 88 |   CHECK_EQ(top->size(), 0) << "VerificationLoss Layer takes no blob as output.";
 89 | 
 90 |   diffy1_.Reshape(bottom[0]->num(), bottom[0]->channels(), 1, 1);
 91 |   diffy2_.Reshape(bottom[0]->num(), bottom[0]->channels(), 1, 1);
 92 |   M_ = this->layer_param_.dual_threshold();
 93 |   LAMDA_ = this->layer_param_.dual_lamda();
 94 | 
 95 |   ResetDistanceStat();
 96 |   LOG(INFO) << "Initial: threshold " << M_ << ", " << "lamda: " << LAMDA_;
 97 | }
 98 | 
 99 | template <typename Dtype>
100 | void VerificationLossLayer<Dtype>::Forward_cpu(
101 |     const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
102 | }
103 | 
104 | template <typename Dtype>
105 | Dtype VerificationLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
106 |     const bool propagate_down,
107 |     vector<Blob<Dtype>*>* bottom) {
108 |   const Dtype* feat_1 = (*bottom)[0]->cpu_data();
109 |   const Dtype* feat_2 = (*bottom)[2]->cpu_data();
110 |   const Dtype* label_1 = (*bottom)[1]->cpu_data();
111 |   const Dtype* label_2 = (*bottom)[3]->cpu_data();
112 |   
113 |   //Dtype *diffy_ptr = diffy_.mutable_cpu_data();
114 | 
115 |   Dtype* bottom_diff1 = diffy1_.mutable_cpu_data();
116 |   Dtype* bottom_diff2 = diffy2_.mutable_cpu_data();
117 | 
118 |   int num = (*bottom)[0]->num();
119 |   int count = (*bottom)[0]->count();
120 |   //y1 - y2
121 |   caffe_sub(count, feat_1, feat_2, bottom_diff1);
122 |   caffe_sub(count, feat_2, feat_1, bottom_diff2);
123 | 
124 |   const int feat_len = (*bottom)[0]->channels();
125 | 
126 |   for (int i = 0; i < (*bottom)[0]->num(); ++i) {
127 | 	int l1 = static_cast<int>(label_1[i]);
128 | 	int l2 = static_cast<int>(label_2[i]);
129 | 	int offset = i*feat_len;
130 | 	if(l1 == l2){
131 | 		/* nothing */
132 | 	}else{
133 | 		Dtype norm2 = caffe_cpu_dot(feat_len, bottom_diff1+offset, bottom_diff1+offset);
134 | 		Dtype norm = sqrt(norm2);
135 | 		if(norm > M_){
136 | 			memset(bottom_diff1+offset,0, sizeof(Dtype)*feat_len);
137 | 			memset(bottom_diff2+offset,0, sizeof(Dtype)*feat_len);
138 | 		}else{
139 | 			norm = (M_ - norm) / (norm+Dtype(FLT_MIN));
140 | 			caffe_scal(feat_len, -norm, bottom_diff1+offset);
141 | 			caffe_scal(feat_len, -norm, bottom_diff2+offset);
142 | 		}
143 | 	}
144 |   }
145 | 
146 |   //Add gradien to original
147 |   Dtype* _bottom_diff1 = (*bottom)[0]->mutable_cpu_diff();
148 |   Dtype* _bottom_diff2 = (*bottom)[2]->mutable_cpu_diff();
149 | #if 0
150 |   for(int i=0;i<(*bottom)[0]->count();i++){
151 | 	  printf("%d %f %f\n", num, _bottom_diff1[i], bottom_diff1[i] / num);
152 |   }
153 | #endif
154 | 
155 |   // Scale down gradient
156 |   caffe_axpy(count, LAMDA_/num, bottom_diff1, _bottom_diff1);
157 |   caffe_axpy(count, LAMDA_/num, bottom_diff2, _bottom_diff2);
158 |   return Dtype(0.);
159 | }
160 | 
161 | 
162 | INSTANTIATE_CLASS(VerificationLossLayer);
163 | 
164 | 
165 | }  // namespace caffe
166 | 


--------------------------------------------------------------------------------
/include/caffe/util/mkl_alternate.hpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 BVLC and contributors.
  2 | 
  3 | #ifndef CAFFE_UTIL_MKL_ALTERNATE_H_
  4 | #define CAFFE_UTIL_MKL_ALTERNATE_H_
  5 | 
  6 | #include "../common.hpp"
  7 | #ifdef USE_MKL
  8 | 
  9 | #include <mkl.h>
 10 | 
 11 | #else  // If use MKL, simply include the MKL header
 12 | 
 13 | #ifndef USE_EIGEN
 14 | extern "C" {
 15 | #include <cblas.h>
 16 | }
 17 | #else
 18 | #include <Eigen/Dense>
 19 | enum  	CBLAS_ORDER { CblasRowMajor = 101, CblasColMajor = 102 };
 20 | enum  	CBLAS_TRANSPOSE { CblasNoTrans = 111, CblasTrans = 112, CblasConjTrans = 113 };
 21 | 
 22 | #define MAP_SVECTOR(name, ptr, N) Eigen::Map<Eigen::VectorXf> name(ptr, N)
 23 | #define MAP_CONST_SVECTOR(name, ptr, N) Eigen::Map<const Eigen::VectorXf> name(ptr, N)
 24 | #define MAP_DVECTOR(name, ptr, N) Eigen::Map<Eigen::VectorXd> name(ptr, N)
 25 | #define MAP_CONST_DVECTOR(name, ptr, N) Eigen::Map<const Eigen::VectorXd> name(ptr, N)
 26 | typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> MatXf;
 27 | typedef Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> MatXd;
 28 | 
 29 | #define MAP_SMATRIX(name, ptr, M, N) Eigen::Map<MatXf> name(ptr, M, N)
 30 | #define MAP_CONST_SMATRIX(name, ptr, M, N) Eigen::Map<const MatXf> name(ptr, M, N)
 31 | #define MAP_DMATRIX(name, ptr, M, N) Eigen::Map<MatXd> name(ptr, M, N)
 32 | #define MAP_CONST_DMATRIX(name, ptr, M, N) Eigen::Map<const MatXd> name(ptr, M, N)
 33 | 
 34 | 
 35 | #endif
 36 | 
 37 | #include <math.h>
 38 | 
 39 | // Functions that caffe uses but are not present if MKL is not linked.
 40 | 
 41 | // A simple way to define the vsl unary functions. The operation should
 42 | // be in the form e.g. y[i] = sqrt(a[i])
 43 | #define DEFINE_VSL_UNARY_FUNC(name, operation) \
 44 |   template<typename Dtype> \
 45 |   void v##name(const int n, const Dtype* a, Dtype* y) { \
 46 |     CHECK_GT(n, 0); CHECK(a); CHECK(y); \
 47 |     for (int i = 0; i < n; ++i) { operation; } \
 48 |   } \
 49 |   inline void vs##name( \
 50 |     const int n, const float* a, float* y) { \
 51 |     v##name<float>(n, a, y); \
 52 |   } \
 53 |   inline void vd##name( \
 54 |       const int n, const double* a, double* y) { \
 55 |     v##name<double>(n, a, y); \
 56 |   }
 57 | 
 58 | DEFINE_VSL_UNARY_FUNC(Sqr, y[i] = a[i] * a[i]);
 59 | DEFINE_VSL_UNARY_FUNC(Exp, y[i] = exp(a[i]));
 60 | 
 61 | // A simple way to define the vsl unary functions with singular parameter b.
 62 | // The operation should be in the form e.g. y[i] = pow(a[i], b)
 63 | #define DEFINE_VSL_UNARY_FUNC_WITH_PARAM(name, operation) \
 64 |   template<typename Dtype> \
 65 |   void v##name(const int n, const Dtype* a, const Dtype b, Dtype* y) { \
 66 |     CHECK_GT(n, 0); CHECK(a); CHECK(y); \
 67 |     for (int i = 0; i < n; ++i) { operation; } \
 68 |   } \
 69 |   inline void vs##name( \
 70 |     const int n, const float* a, const float b, float* y) { \
 71 |     v##name<float>(n, a, b, y); \
 72 |   } \
 73 |   inline void vd##name( \
 74 |       const int n, const double* a, const float b, double* y) { \
 75 |     v##name<double>(n, a, b, y); \
 76 |   }
 77 | 
 78 | DEFINE_VSL_UNARY_FUNC_WITH_PARAM(Powx, y[i] = pow(a[i], b));
 79 | 
 80 | // A simple way to define the vsl binary functions. The operation should
 81 | // be in the form e.g. y[i] = a[i] + b[i]
 82 | #define DEFINE_VSL_BINARY_FUNC(name, operation) \
 83 |   template<typename Dtype> \
 84 |   void v##name(const int n, const Dtype* a, const Dtype* b, Dtype* y) { \
 85 |     CHECK_GT(n, 0); CHECK(a); CHECK(b); CHECK(y); \
 86 |     for (int i = 0; i < n; ++i) { operation; } \
 87 |   } \
 88 |   inline void vs##name( \
 89 |     const int n, const float* a, const float* b, float* y) { \
 90 |     v##name<float>(n, a, b, y); \
 91 |   } \
 92 |   inline void vd##name( \
 93 |       const int n, const double* a, const double* b, double* y) { \
 94 |     v##name<double>(n, a, b, y); \
 95 |   }
 96 | 
 97 | DEFINE_VSL_BINARY_FUNC(Add, y[i] = a[i] + b[i]);
 98 | DEFINE_VSL_BINARY_FUNC(Sub, y[i] = a[i] - b[i]);
 99 | DEFINE_VSL_BINARY_FUNC(Mul, y[i] = a[i] * b[i]);
100 | DEFINE_VSL_BINARY_FUNC(Div, y[i] = a[i] / b[i]);
101 | 
102 | #ifndef USE_EIGEN
103 | // In addition, MKL comes with an additional function axpby that is not present
104 | // in standard blas. We will simply use a two-step (inefficient, of course) way
105 | // to mimic that.
106 | inline void cblas_saxpby(const int N, const float alpha, const float* X,
107 |                          const int incX, const float beta, float* Y,
108 |                          const int incY) {
109 |   cblas_sscal(N, beta, Y, incY);
110 |   cblas_saxpy(N, alpha, X, incX, Y, incY);
111 | }
112 | inline void cblas_daxpby(const int N, const double alpha, const double* X,
113 |                          const int incX, const double beta, double* Y,
114 |                          const int incY) {
115 |   cblas_dscal(N, beta, Y, incY);
116 |   cblas_daxpy(N, alpha, X, incX, Y, incY);
117 | }
118 | #endif
119 | 
120 | #endif  // USE_MKL
121 | #endif  // CAFFE_UTIL_MKL_ALTERNATE_H_
122 | 


--------------------------------------------------------------------------------
/include/caffe/layer.hpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | #ifndef CAFFE_LAYER_H_
  4 | #define CAFFE_LAYER_H_
  5 | 
  6 | #include <vector>
  7 | #include "caffe/blob.hpp"
  8 | #include "caffe/common.hpp"
  9 | #include "caffe/proto/caffe.pb.h"
 10 | 
 11 | using std::vector;
 12 | 
 13 | namespace caffe {
 14 | 
 15 | template <typename Dtype>
 16 | class Layer {
 17 |  public:
 18 |   // You should not implement your own constructor. Any set up code should go
 19 |   // to SetUp(), where the dimensions of the bottom blobs are provided to the
 20 |   // layer.
 21 |   explicit Layer(const LayerParameter& param)
 22 |     : layer_param_(param) {
 23 |       // The only thing we do is to copy blobs if there are any.
 24 |       if (layer_param_.blobs_size() > 0) {
 25 |         blobs_.resize(layer_param_.blobs_size());
 26 |         for (int i = 0; i < layer_param_.blobs_size(); ++i) {
 27 |           blobs_[i].reset(new Blob<Dtype>());
 28 |           blobs_[i]->FromProto(layer_param_.blobs(i));
 29 |         }
 30 |       }
 31 |     }
 32 |   virtual ~Layer() {}
 33 |   // SetUp: your function should implement this.
 34 |   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
 35 |       vector<Blob<Dtype>*>* top) = 0;
 36 | 
 37 |   // Forward and backward wrappers. You should implement the cpu and
 38 |   // gpu specific implementations instead, and should not change these
 39 |   // functions.
 40 |   inline void Forward(const vector<Blob<Dtype>*>& bottom,
 41 |       vector<Blob<Dtype>*>* top);
 42 |   inline Dtype Backward(const vector<Blob<Dtype>*>& top,
 43 |       const bool propagate_down,
 44 |       vector<Blob<Dtype>*>* bottom);
 45 | 
 46 |   // Returns the vector of blobs.
 47 |   vector<shared_ptr<Blob<Dtype> > >& blobs() {
 48 |     return blobs_;
 49 |   }
 50 | 
 51 |   // Returns the layer parameter
 52 |   const LayerParameter& layer_param() { return layer_param_; }
 53 |   // Writes the layer parameter to a protocol buffer
 54 |   virtual void ToProto(LayerParameter* param, bool write_diff = false);
 55 | 
 56 |  protected:
 57 |   // The protobuf that stores the layer parameters
 58 |   LayerParameter layer_param_;
 59 |   // The vector that stores the parameters as a set of blobs.
 60 |   vector<shared_ptr<Blob<Dtype> > > blobs_;
 61 | 
 62 |   // Forward functions
 63 |   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 64 |       vector<Blob<Dtype>*>* top) = 0;
 65 |   // If no gpu code is provided, we will simply use cpu code.
 66 | #if 0
 67 |   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 68 |       vector<Blob<Dtype>*>* top) {
 69 |     // LOG(WARNING) << "Using CPU code as backup.";
 70 |     Forward_cpu(bottom, top);
 71 |   }
 72 | #endif
 73 | 
 74 |   // Backward functions: the backward function will compute the gradients for
 75 |   // any parameters and also for the bottom blobs if propagate_down is true.
 76 |   // It will return the loss produced from this layer.
 77 |   virtual Dtype Backward_cpu(const vector<Blob<Dtype>*>& top,
 78 |       const bool propagate_down,
 79 |       vector<Blob<Dtype>*>* bottom) = 0;
 80 | #if 0
 81 |   virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top,
 82 |       const bool propagate_down,
 83 |       vector<Blob<Dtype>*>* bottom) {
 84 |     // LOG(WARNING) << "Using CPU code as backup.";
 85 |     return Backward_cpu(top, propagate_down, bottom);
 86 |   }
 87 | #endif
 88 | 
 89 |   DISABLE_COPY_AND_ASSIGN(Layer);
 90 | };  // class Layer
 91 | 
 92 | // Forward and backward wrappers. You should implement the cpu and
 93 | // gpu specific implementations instead, and should not change these
 94 | // functions.
 95 | template <typename Dtype>
 96 | inline void Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
 97 |     vector<Blob<Dtype>*>* top) {
 98 |   switch (Caffe::mode()) {
 99 |   case Caffe::CPU:
100 |     Forward_cpu(bottom, top);
101 |     break;
102 | #if 0
103 |   case Caffe::GPU:
104 |     Forward_gpu(bottom, top);
105 |     break;
106 | #endif
107 |   default:
108 |     LOG(FATAL) << "Unknown caffe mode.";
109 |   }
110 | }
111 | 
112 | template <typename Dtype>
113 | inline Dtype Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
114 |     const bool propagate_down,
115 |     vector<Blob<Dtype>*>* bottom) {
116 |   switch (Caffe::mode()) {
117 |   case Caffe::CPU:
118 |     return Backward_cpu(top, propagate_down, bottom);
119 | #if 0
120 |   case Caffe::GPU:
121 |     return Backward_gpu(top, propagate_down, bottom);
122 | #endif
123 |   default:
124 |     LOG(FATAL) << "Unknown caffe mode.";
125 |     return 0;
126 |   }
127 | }
128 | 
129 | template <typename Dtype>
130 | void Layer<Dtype>::ToProto(LayerParameter* param, bool write_diff) {
131 |   param->Clear();
132 |   param->CopyFrom(layer_param_);
133 |   param->clear_blobs();
134 |   for (int i = 0; i < blobs_.size(); ++i) {
135 |     blobs_[i]->ToProto(param->add_blobs(), write_diff);
136 |   }
137 | }
138 | 
139 | // The layer factory function
140 | template <typename Dtype>
141 | Layer<Dtype>* GetLayer(const LayerParameter& param);
142 | 
143 | }  // namespace caffe
144 | 
145 | #endif  // CAFFE_LAYER_H_
146 | 


--------------------------------------------------------------------------------
/include/caffe/net.hpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | #ifndef CAFFE_NET_HPP_
  4 | #define CAFFE_NET_HPP_
  5 | 
  6 | #include <map>
  7 | #include <string>
  8 | #include <vector>
  9 | 
 10 | #include "caffe/blob.hpp"
 11 | #include "caffe/common.hpp"
 12 | #include "caffe/layer.hpp"
 13 | #include "caffe/proto/caffe.pb.h"
 14 | 
 15 | using std::map;
 16 | using std::vector;
 17 | using std::string;
 18 | 
 19 | namespace caffe {
 20 | 
 21 | 
 22 | template <typename Dtype>
 23 | class Net {
 24 |  public:
 25 |   explicit Net(const NetParameter& param);
 26 |   explicit Net(const string& param_file);
 27 |   virtual ~Net() {}
 28 | 
 29 |   // Initialize a network with the network parameter.
 30 |   void Init(const NetParameter& param);
 31 | 
 32 |   // Run forward with the input blobs already fed separately. You can get the
 33 |   // input blobs using input_blobs().
 34 |   const vector<Blob<Dtype>*>& ForwardPrefilled();
 35 |   // Run forward using a set of bottom blobs, and return the result.
 36 |   const vector<Blob<Dtype>*>& Forward(const vector<Blob<Dtype>* > & bottom);
 37 |   // Run forward using a serialized BlobProtoVector and return the result
 38 |   // as a serialized BlobProtoVector
 39 |   string Forward(const string& input_blob_protos);
 40 | 
 41 |   // The network backward should take no input and output, since it solely
 42 |   // computes the gradient w.r.t the parameters, and the data has already
 43 |   // been provided during the forward pass.
 44 |   Dtype Backward();
 45 |   Dtype BackwardBetween(int layer_top, int layer_bottom);
 46 | 
 47 |   Dtype ForwardBackward(const vector<Blob<Dtype>* > & bottom) {
 48 |     Forward(bottom);
 49 |     return Backward();
 50 |   }
 51 | 
 52 |   // Updates the network weights based on the diff values computed.
 53 |   void Update();
 54 | 
 55 |   // For an already initialized net, CopyTrainedLayersFrom() copies the already
 56 |   // trained layers from another net parameter instance.
 57 |   void CopyLayersFrom(const Net<Dtype>& rhs, bool copy_diff);
 58 |   void CopyTrainedLayersFrom(const NetParameter& param);
 59 |   void CopyTrainedLayersFrom(const string trained_filename);
 60 |   // Writes the net to a proto.
 61 |   void ToProto(NetParameter* param, bool write_diff = false);
 62 | 
 63 |   // returns the network name.
 64 |   inline const string& name() { return name_; }
 65 |   // returns the layer names
 66 |   inline const vector<string>& layer_names() { return layer_names_; }
 67 |   // returns the blob names
 68 |   inline const vector<string>& blob_names() { return blob_names_; }
 69 |   // returns the blobs
 70 |   inline const vector<shared_ptr<Blob<Dtype> > >& blobs() { return blobs_; }
 71 |   // returns the layers
 72 |   inline const vector<shared_ptr<Layer<Dtype> > >& layers() { return layers_; }
 73 |   inline vector<shared_ptr<Layer<Dtype> > >& mutable_layers() { return layers_; }
 74 |   // returns the bottom and top vecs for each layer - usually you won't need
 75 |   // this unless you do per-layer checks such as gradients.
 76 |   inline vector<vector<Blob<Dtype>*> >& bottom_vecs() { return bottom_vecs_; }
 77 |   inline vector<vector<Blob<Dtype>*> >& top_vecs() { return top_vecs_; }
 78 |   // returns the parameters
 79 |   inline vector<shared_ptr<Blob<Dtype> > >& params() { return params_; }
 80 |   // returns the parameter learning rate multipliers
 81 |   inline vector<float>& params_lr() {return params_lr_; }
 82 |   inline vector<float>& params_weight_decay() { return params_weight_decay_; }
 83 |   // Input and output blob numbers
 84 |   inline int num_inputs() { return net_input_blobs_.size(); }
 85 |   inline int num_outputs() { return net_output_blobs_.size(); }
 86 |   inline vector<Blob<Dtype>*>& input_blobs() { return net_input_blobs_; }
 87 |   inline vector<Blob<Dtype>*>& output_blobs() { return net_output_blobs_; }
 88 | 
 89 |  protected:
 90 |   // Function to get misc parameters, e.g. the learning rate multiplier and
 91 |   // weight decay.
 92 |   void GetLearningRateAndWeightDecay();
 93 | 
 94 |   // Individual layers in the net
 95 |   vector<shared_ptr<Layer<Dtype> > > layers_;
 96 |   vector<string> layer_names_;
 97 |   vector<bool> layer_need_backward_;
 98 |   // blobs stores the blobs that store intermediate results between the
 99 |   // layers.
100 |   vector<shared_ptr<Blob<Dtype> > > blobs_;
101 |   vector<string> blob_names_;
102 |   vector<bool> blob_need_backward_;
103 |   // bottom_vecs stores the vectors containing the input for each layer.
104 |   // They don't actually host the blobs (blobs_ does), so we simply store
105 |   // pointers.
106 |   vector<vector<Blob<Dtype>*> > bottom_vecs_;
107 |   vector<vector<int> > bottom_id_vecs_;
108 |   // top_vecs stores the vectors containing the output for each layer
109 |   vector<vector<Blob<Dtype>*> > top_vecs_;
110 |   vector<vector<int> > top_id_vecs_;
111 |   // blob indices for the input and the output of the net
112 |   vector<int> net_input_blob_indices_;
113 |   vector<Blob<Dtype>*> net_input_blobs_;
114 |   vector<Blob<Dtype>*> net_output_blobs_;
115 |   string name_;
116 |   // The parameters in the network.
117 |   vector<shared_ptr<Blob<Dtype> > > params_;
118 |   // the learning rate multipliers
119 |   vector<float> params_lr_;
120 |   // the weight decay multipliers
121 |   vector<float> params_weight_decay_;
122 |   DISABLE_COPY_AND_ASSIGN(Net);
123 | };
124 | 
125 | 
126 | }  // namespace caffe
127 | 
128 | #endif  // CAFFE_NET_HPP_
129 | 


--------------------------------------------------------------------------------
/include/caffe/filler.hpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | // Fillers are random number generators that fills a blob using the specified
  4 | // algorithm. The expectation is that they are only going to be used during
  5 | // initialization time and will not involve any GPUs.
  6 | 
  7 | #ifndef CAFFE_FILLER_HPP
  8 | #define CAFFE_FILLER_HPP
  9 | 
 10 | #include <string>
 11 | #include <cmath>
 12 | 
 13 | #include "caffe/common.hpp"
 14 | #include "caffe/blob.hpp"
 15 | #include "caffe/syncedmem.hpp"
 16 | #include "caffe/util/math_functions.hpp"
 17 | #include "caffe/proto/caffe.pb.h"
 18 | 
 19 | namespace caffe {
 20 | 
 21 | template <typename Dtype>
 22 | class Filler {
 23 |  public:
 24 |   explicit Filler(const FillerParameter& param) : filler_param_(param) {}
 25 |   virtual ~Filler() {}
 26 |   virtual void Fill(Blob<Dtype>* blob) = 0;
 27 |  protected:
 28 |   FillerParameter filler_param_;
 29 | };  // class Filler
 30 | 
 31 | 
 32 | template <typename Dtype>
 33 | class ConstantFiller : public Filler<Dtype> {
 34 |  public:
 35 |   explicit ConstantFiller(const FillerParameter& param)
 36 |       : Filler<Dtype>(param) {}
 37 |   virtual void Fill(Blob<Dtype>* blob) {
 38 |     Dtype* data = blob->mutable_cpu_data();
 39 |     const int count = blob->count();
 40 |     const Dtype value = this->filler_param_.value();
 41 |     CHECK(count);
 42 |     for (int i = 0; i < count; ++i) {
 43 |       data[i] = value;
 44 |     }
 45 |   }
 46 | };
 47 | 
 48 | template <typename Dtype>
 49 | class UniformFiller : public Filler<Dtype> {
 50 |  public:
 51 |   explicit UniformFiller(const FillerParameter& param)
 52 |       : Filler<Dtype>(param) {}
 53 |   virtual void Fill(Blob<Dtype>* blob) {
 54 |     CHECK(blob->count());
 55 | #if 0
 56 |     caffe_vRngUniform<Dtype>(blob->count(), blob->mutable_cpu_data(),
 57 |         Dtype(this->filler_param_.min()),
 58 |         Dtype(this->filler_param_.max()));
 59 | #endif
 60 |   }
 61 | };
 62 | 
 63 | template <typename Dtype>
 64 | class GaussianFiller : public Filler<Dtype> {
 65 |  public:
 66 |   explicit GaussianFiller(const FillerParameter& param)
 67 |       : Filler<Dtype>(param) {}
 68 |   virtual void Fill(Blob<Dtype>* blob) {
 69 |     Dtype* data = blob->mutable_cpu_data();
 70 |     CHECK(blob->count());
 71 | #if 0
 72 |     caffe_vRngGaussian<Dtype>(blob->count(), blob->mutable_cpu_data(),
 73 |         Dtype(this->filler_param_.mean()),
 74 |         Dtype(this->filler_param_.std()));
 75 | #endif
 76 |   }
 77 | };
 78 | 
 79 | template <typename Dtype>
 80 | class PositiveUnitballFiller : public Filler<Dtype> {
 81 |  public:
 82 |   explicit PositiveUnitballFiller(const FillerParameter& param)
 83 |       : Filler<Dtype>(param) {}
 84 |   virtual void Fill(Blob<Dtype>* blob) {
 85 | #if 0
 86 |     Dtype* data = blob->mutable_cpu_data();
 87 |     DCHECK(blob->count());
 88 |     caffe_vRngUniform<Dtype>(blob->count(), blob->mutable_cpu_data(), 0, 1);
 89 |     // We expect the filler to not be called very frequently, so we will
 90 |     // just use a simple implementation
 91 |     int dim = blob->count() / blob->num();
 92 |     CHECK(dim);
 93 |     for (int i = 0; i < blob->num(); ++i) {
 94 |       Dtype sum = 0;
 95 |       for (int j = 0; j < dim; ++j) {
 96 |         sum += data[i * dim + j];
 97 |       }
 98 |       for (int j = 0; j < dim; ++j) {
 99 |         data[i * dim + j] /= sum;
100 |       }
101 |     }
102 | #endif
103 |   }
104 | };
105 | 
106 | // A filler based on the paper [Bengio and Glorot 2010]: Understanding
107 | // the difficulty of training deep feedforward neuralnetworks, but does not
108 | // use the fan_out value.
109 | //
110 | // It fills the incoming matrix by randomly sampling uniform data from
111 | // [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number
112 | // of input nodes. You should make sure the input blob has shape (num, a, b, c)
113 | // where a * b * c = fan_in.
114 | template <typename Dtype>
115 | class XavierFiller : public Filler<Dtype> {
116 |  public:
117 |   explicit XavierFiller(const FillerParameter& param)
118 |       : Filler<Dtype>(param) {}
119 |   virtual void Fill(Blob<Dtype>* blob) {
120 |     CHECK(blob->count());
121 |     int fan_in = blob->count() / blob->num();
122 |     Dtype scale = sqrt(Dtype(3) / fan_in);
123 | #if 0
124 |     caffe_vRngUniform<Dtype>(blob->count(), blob->mutable_cpu_data(),
125 |         -scale, scale);
126 | #endif
127 |   }
128 | };
129 | 
130 | 
131 | // A function to get a specific filler from the specification given in
132 | // FillerParameter. Ideally this would be replaced by a factory pattern,
133 | // but we will leave it this way for now.
134 | template <typename Dtype>
135 | Filler<Dtype>* GetFiller(const FillerParameter& param) {
136 |   const std::string& type = param.type();
137 |   if (type == "constant") {
138 |     return new ConstantFiller<Dtype>(param);
139 |   } else if (type == "gaussian") {
140 |     return new GaussianFiller<Dtype>(param);
141 |   } else if (type == "positive_unitball") {
142 |     return new PositiveUnitballFiller<Dtype>(param);
143 |   } else if (type == "uniform") {
144 |     return new UniformFiller<Dtype>(param);
145 |   } else if (type == "xavier") {
146 |     return new XavierFiller<Dtype>(param);
147 |   } else {
148 |     CHECK(false) << "Unknown filler name: " << param.type();
149 |   }
150 |   return (Filler<Dtype>*)(NULL);
151 | }
152 | 
153 | }  // namespace caffe
154 | 
155 | #endif  // CAFFE_FILLER_HPP_
156 | 


--------------------------------------------------------------------------------
/src/caffe/util/io.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | #include <stdint.h>
  4 | #include <fcntl.h>
  5 | #include <google/protobuf/text_format.h>
  6 | #include <google/protobuf/io/zero_copy_stream_impl.h>
  7 | #include <google/protobuf/io/coded_stream.h>
  8 | 
  9 | #include <algorithm>
 10 | #include <string>
 11 | #include <vector>
 12 | #include <fstream>  // NOLINT(readability/streams)
 13 | 
 14 | #ifdef _MSC_VER
 15 | #include <io.h>  /* for open/close */
 16 | #else
 17 | #include <unistd.h>
 18 | #endif
 19 | 
 20 | #include "caffe/common.hpp"
 21 | #include "caffe/util/io.hpp"
 22 | #include "caffe/proto/caffe.pb.h"
 23 | 
 24 | using std::fstream;
 25 | using std::ios;
 26 | using std::max;
 27 | using std::string;
 28 | using google::protobuf::io::FileInputStream;
 29 | using google::protobuf::io::FileOutputStream;
 30 | using google::protobuf::io::ZeroCopyInputStream;
 31 | using google::protobuf::io::CodedInputStream;
 32 | using google::protobuf::io::ZeroCopyOutputStream;
 33 | using google::protobuf::io::CodedOutputStream;
 34 | 
 35 | namespace caffe {
 36 | 
 37 | void ReadProtoFromTextFile(const char* filename,
 38 |     ::google::protobuf::Message* proto) {
 39 |   int fd = open(filename, O_RDONLY);
 40 |   CHECK_NE(fd, -1) << "File not found: " << filename;
 41 |   FileInputStream* input = new FileInputStream(fd);
 42 |   CHECK(google::protobuf::TextFormat::Parse(input, proto));
 43 |   delete input;
 44 |   close(fd);
 45 | }
 46 | 
 47 | void WriteProtoToTextFile(const Message& proto, const char* filename) {
 48 |   int fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0644);
 49 |   FileOutputStream* output = new FileOutputStream(fd);
 50 |   CHECK(google::protobuf::TextFormat::Print(proto, output));
 51 |   delete output;
 52 |   close(fd);
 53 | }
 54 | 
 55 | void ReadProtoFromBinaryFile(const char* filename, Message* proto) {
 56 | #ifdef _MSC_VER
 57 |   int fd = open(filename, O_RDONLY|O_BINARY);
 58 | #else
 59 |   int fd = open(filename, O_RDONLY);
 60 | #endif
 61 |   CHECK_NE(fd, -1) << "File not found: " << filename;
 62 |   ZeroCopyInputStream* raw_input = new FileInputStream(fd);
 63 |   CodedInputStream* coded_input = new CodedInputStream(raw_input);
 64 |   coded_input->SetTotalBytesLimit(536870912, 268435456);
 65 | 
 66 |   CHECK(proto->ParseFromCodedStream(coded_input));
 67 | 
 68 |   delete coded_input;
 69 |   delete raw_input;
 70 |   close(fd);
 71 | }
 72 | 
 73 | void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
 74 |   fstream output(filename, ios::out | ios::trunc | ios::binary);
 75 |   CHECK(proto.SerializeToOstream(&output));
 76 | }
 77 | 
 78 | #if 0
 79 | bool ReadImageToDatum(const string& filename, const int label,
 80 |     const int height, const int width, Datum* datum) {
 81 |   cv::Mat cv_img;
 82 |   if (height > 0 && width > 0) {
 83 |     cv::Mat cv_img_origin = cv::imread(filename, CV_LOAD_IMAGE_COLOR);
 84 |     cv::resize(cv_img_origin, cv_img, cv::Size(height, width));
 85 |   } else {
 86 |     cv_img = cv::imread(filename, CV_LOAD_IMAGE_COLOR);
 87 |   }
 88 |   if (!cv_img.data) {
 89 |     LOG(ERROR) << "Could not open or find file " << filename;
 90 |     return false;
 91 |   }
 92 |   datum->set_channels(3);
 93 |   datum->set_height(cv_img.rows);
 94 |   datum->set_width(cv_img.cols);
 95 |   datum->set_label(label);
 96 |   datum->clear_data();
 97 |   datum->clear_float_data();
 98 |   string* datum_string = datum->mutable_data();
 99 |   for (int c = 0; c < 3; ++c) {
100 |     for (int h = 0; h < cv_img.rows; ++h) {
101 |       for (int w = 0; w < cv_img.cols; ++w) {
102 |         datum_string->push_back(
103 |             static_cast<char>(cv_img.at<cv::Vec3b>(h, w)[c]));
104 |       }
105 |     }
106 |   }
107 |   return true;
108 | }
109 | 
110 | // Verifies format of data stored in HDF5 file and reshapes blob accordingly.
111 | template <typename Dtype>
112 | void hdf5_load_nd_dataset_helper(
113 |     hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
114 |     Blob<Dtype>* blob) {
115 |   // Verify that the number of dimensions is in the accepted range.
116 |   herr_t status;
117 |   int ndims;
118 |   status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims);
119 |   CHECK_GE(ndims, min_dim);
120 |   CHECK_LE(ndims, max_dim);
121 | 
122 |   // Verify that the data format is what we expect: float or double.
123 |   std::vector<hsize_t> dims(ndims);
124 |   H5T_class_t class_;
125 |   status = H5LTget_dataset_info(
126 |       file_id, dataset_name_, dims.data(), &class_, NULL);
127 |   CHECK_EQ(class_, H5T_FLOAT) << "Expected float or double data";
128 | 
129 |   blob->Reshape(
130 |     dims[0],
131 |     (dims.size() > 1) ? dims[1] : 1,
132 |     (dims.size() > 2) ? dims[2] : 1,
133 |     (dims.size() > 3) ? dims[3] : 1);
134 | }
135 | 
136 | template <>
137 | void hdf5_load_nd_dataset<float>(hid_t file_id, const char* dataset_name_,
138 |         int min_dim, int max_dim, Blob<float>* blob) {
139 |   hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob);
140 |   herr_t status = H5LTread_dataset_float(
141 |     file_id, dataset_name_, blob->mutable_cpu_data());
142 | }
143 | 
144 | template <>
145 | void hdf5_load_nd_dataset<double>(hid_t file_id, const char* dataset_name_,
146 |         int min_dim, int max_dim, Blob<double>* blob) {
147 |   hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob);
148 |   herr_t status = H5LTread_dataset_double(
149 |     file_id, dataset_name_, blob->mutable_cpu_data());
150 | }
151 | #endif
152 | 
153 | }  // namespace caffe
154 | 


--------------------------------------------------------------------------------
/src/caffe/util/insert_splits.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Jeff Donahue
  2 | 
  3 | #include <map>
  4 | #include <string>
  5 | #include <sstream>
  6 | #include <utility>
  7 | 
  8 | #include "caffe/common.hpp"
  9 | #include "caffe/util/insert_splits.hpp"
 10 | 
 11 | using std::map;
 12 | using std::ostringstream;
 13 | using std::pair;
 14 | using std::make_pair;
 15 | 
 16 | namespace caffe {
 17 | 
 18 | void insert_splits(const NetParameter& param, NetParameter* param_split) {
 19 |   // Initialize by copying from the input NetParameter.
 20 |   param_split->CopyFrom(param);
 21 |   param_split->clear_layers();
 22 |   map<string, pair<int, int> > blob_name_to_last_top_idx;
 23 |   map<pair<int, int>, pair<int, int> > bottom_idx_to_source_top_idx;
 24 |   map<pair<int, int>, int> top_idx_to_bottom_count;
 25 |   map<pair<int, int>, int> top_idx_to_bottom_split_idx;
 26 |   map<int, string> layer_idx_to_layer_name;
 27 |   layer_idx_to_layer_name[-1] = "input";
 28 |   // Determine the number of times each blob is used as an input (bottom) blob.
 29 |   for (int i = 0; i < param.input_size(); ++i) {
 30 |     const string& blob_name = param.input(i);
 31 |     blob_name_to_last_top_idx[blob_name] = make_pair(-1, i);
 32 |   }
 33 |   for (int i = 0; i < param.layers_size(); ++i) {
 34 |     const LayerConnection& layer_connection = param.layers(i);
 35 |     layer_idx_to_layer_name[i] = layer_connection.layer().name();
 36 |     for (int j = 0; j < layer_connection.bottom_size(); ++j) {
 37 |       const string& blob_name = layer_connection.bottom(j);
 38 |       if (blob_name_to_last_top_idx.find(blob_name) ==
 39 |           blob_name_to_last_top_idx.end()) {
 40 |         LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j;
 41 |       }
 42 |       const pair<int, int>& bottom_idx = make_pair(i, j);
 43 |       const pair<int, int>& top_idx = blob_name_to_last_top_idx[blob_name];
 44 |       bottom_idx_to_source_top_idx[bottom_idx] = top_idx;
 45 |       ++top_idx_to_bottom_count[top_idx];
 46 |     }
 47 |     for (int j = 0; j < layer_connection.top_size(); ++j) {
 48 |       const string& blob_name = layer_connection.top(j);
 49 |       blob_name_to_last_top_idx[blob_name] = make_pair(i, j);
 50 |     }
 51 |   }
 52 |   // Create split layer for any input blobs used by other layers as bottom
 53 |   // blobs more than once.
 54 |   for (int i = 0; i < param.input_size(); ++i) {
 55 |     const int split_count = top_idx_to_bottom_count[make_pair(-1, i)];
 56 |     if (split_count > 1) {
 57 |       const string& layer_name = layer_idx_to_layer_name[-1];
 58 |       const string& blob_name = param.input(i);
 59 |       LayerConnection* split_layer_connection = param_split->add_layers();
 60 |       configure_split_layer(layer_name, blob_name, i, split_count,
 61 |           split_layer_connection);
 62 |     }
 63 |   }
 64 |   for (int i = 0; i < param.layers_size(); ++i) {
 65 |     LayerConnection* layer_connection = param_split->add_layers();
 66 |     layer_connection->CopyFrom(param.layers(i));
 67 |     // Replace any shared bottom blobs with split layer outputs.
 68 |     for (int j = 0; j < layer_connection->bottom_size(); ++j) {
 69 |       const pair<int, int>& top_idx =
 70 |           bottom_idx_to_source_top_idx[make_pair(i, j)];
 71 |       const int split_count = top_idx_to_bottom_count[top_idx];
 72 |       if (split_count > 1) {
 73 |         const string& layer_name = layer_idx_to_layer_name[top_idx.first];
 74 |         const string& blob_name = layer_connection->bottom(j);
 75 |         layer_connection->set_bottom(j, get_split_blob_name(layer_name,
 76 |             blob_name, top_idx.second, top_idx_to_bottom_split_idx[top_idx]++));
 77 |       }
 78 |     }
 79 |     // Create split layer for any top blobs used by other layers as bottom
 80 |     // blobs more than once.
 81 |     for (int j = 0; j < layer_connection->top_size(); ++j) {
 82 |       const int split_count = top_idx_to_bottom_count[make_pair(i, j)];
 83 |       if (split_count > 1) {
 84 |         const string& layer_name = layer_idx_to_layer_name[i];
 85 |         const string& blob_name = layer_connection->top(j);
 86 |         LayerConnection* split_layer_connection = param_split->add_layers();
 87 |         configure_split_layer(layer_name, blob_name, j, split_count,
 88 |             split_layer_connection);
 89 |       }
 90 |     }
 91 |   }
 92 | }
 93 | 
 94 | void configure_split_layer(const string& layer_name, const string& blob_name,
 95 |     const int blob_idx, const int split_count,
 96 |     LayerConnection* split_layer_connection) {
 97 |   split_layer_connection->Clear();
 98 |   split_layer_connection->add_bottom(blob_name);
 99 |   LayerParameter* split_layer_param = split_layer_connection->mutable_layer();
100 |   split_layer_param->set_name(
101 |       get_split_layer_name(layer_name, blob_name, blob_idx));
102 |   split_layer_param->set_type("split");
103 |   for (int k = 0; k < split_count; ++k) {
104 |     split_layer_connection->add_top(
105 |         get_split_blob_name(layer_name, blob_name, blob_idx, k));
106 |   }
107 | }
108 | 
109 | string get_split_layer_name(const string& layer_name, const string& blob_name,
110 |     const int blob_idx) {
111 |   ostringstream split_layer_name;
112 |   split_layer_name << blob_name << "_" << layer_name << "_" << blob_idx
113 |       << "_split";
114 |   return split_layer_name.str();
115 | }
116 | 
117 | string get_split_blob_name(const string& layer_name, const string& blob_name,
118 |     const int blob_idx, const int split_idx) {
119 |   // 0th split top blob is given the same name as the bottom blob so that
120 |   // computation is done 'in-place', saving a bit of time and memory.
121 |   if (split_idx == 0) {
122 |     return blob_name;
123 |   }
124 |   ostringstream split_blob_name;
125 |   split_blob_name << blob_name << "_" << layer_name << "_" << blob_idx
126 |       << "_split_" << split_idx;
127 |   return split_blob_name.str();
128 | }
129 | 
130 | }  // namespace caffe
131 | 


--------------------------------------------------------------------------------
/src/caffe/blob.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | #include "caffe/blob.hpp"
  4 | #include "caffe/common.hpp"
  5 | #include "caffe/syncedmem.hpp"
  6 | #include "caffe/util/math_functions.hpp"
  7 | 
  8 | namespace caffe {
  9 | 
 10 | template <typename Dtype>
 11 | void Blob<Dtype>::Reshape(const int num, const int channels, const int height,
 12 |     const int width) {
 13 |   CHECK_GE(num, 0);
 14 |   CHECK_GE(channels, 0);
 15 |   CHECK_GE(height, 0);
 16 |   CHECK_GE(width, 0);
 17 |   num_ = num;
 18 |   channels_ = channels;
 19 |   height_ = height;
 20 |   width_ = width;
 21 |   count_ = num_ * channels_ * height_ * width_;
 22 |   if (count_) {
 23 |     data_.reset(new SyncedMemory(count_ * sizeof(Dtype)));
 24 |     diff_.reset(new SyncedMemory(count_ * sizeof(Dtype)));
 25 |   } else {
 26 |     data_.reset(reinterpret_cast<SyncedMemory*>(NULL));
 27 |     diff_.reset(reinterpret_cast<SyncedMemory*>(NULL));
 28 |   }
 29 | }
 30 | 
 31 | template <typename Dtype>
 32 | Blob<Dtype>::Blob(const int num, const int channels, const int height,
 33 |     const int width) {
 34 |   Reshape(num, channels, height, width);
 35 | }
 36 | 
 37 | template <typename Dtype>
 38 | const Dtype* Blob<Dtype>::cpu_data() const {
 39 |   CHECK(data_);
 40 |   return (const Dtype*)data_->cpu_data();
 41 | }
 42 | 
 43 | #if 0
 44 | template <typename Dtype>
 45 | const Dtype* Blob<Dtype>::gpu_data() const {
 46 |   CHECK(data_);
 47 |   return (const Dtype*)data_->gpu_data();
 48 | }
 49 | #endif
 50 | 
 51 | template <typename Dtype>
 52 | const Dtype* Blob<Dtype>::cpu_diff() const {
 53 |   CHECK(diff_);
 54 |   return (const Dtype*)diff_->cpu_data();
 55 | }
 56 | 
 57 | #if 0
 58 | template <typename Dtype>
 59 | const Dtype* Blob<Dtype>::gpu_diff() const {
 60 |   CHECK(diff_);
 61 |   return (const Dtype*)diff_->gpu_data();
 62 | }
 63 | #endif
 64 | 
 65 | template <typename Dtype>
 66 | Dtype* Blob<Dtype>::mutable_cpu_data() {
 67 |   CHECK(data_);
 68 |   return reinterpret_cast<Dtype*>(data_->mutable_cpu_data());
 69 | }
 70 | 
 71 | #if 0
 72 | template <typename Dtype>
 73 | Dtype* Blob<Dtype>::mutable_gpu_data() {
 74 |   CHECK(data_);
 75 |   return reinterpret_cast<Dtype*>(data_->mutable_gpu_data());
 76 | }
 77 | #endif
 78 | 
 79 | template <typename Dtype>
 80 | Dtype* Blob<Dtype>::mutable_cpu_diff() {
 81 |   CHECK(diff_);
 82 |   return reinterpret_cast<Dtype*>(diff_->mutable_cpu_data());
 83 | }
 84 | 
 85 | #if 0
 86 | template <typename Dtype>
 87 | Dtype* Blob<Dtype>::mutable_gpu_diff() {
 88 |   CHECK(diff_);
 89 |   return reinterpret_cast<Dtype*>(diff_->mutable_gpu_data());
 90 | }
 91 | #endif
 92 | 
 93 | template <typename Dtype>
 94 | void Blob<Dtype>::Update() {
 95 |   // We will perform update based on where the data is located.
 96 |   switch (data_->head()) {
 97 |   case SyncedMemory::HEAD_AT_CPU:
 98 |     // perform computation on CPU
 99 |     caffe_axpy<Dtype>(count_, Dtype(-1),
100 |         reinterpret_cast<const Dtype*>(diff_->cpu_data()),
101 |         reinterpret_cast<Dtype*>(data_->mutable_cpu_data()));
102 |     break;
103 | #if 0
104 |   case SyncedMemory::HEAD_AT_GPU:
105 |   case SyncedMemory::SYNCED:
106 |     // perform computation on GPU
107 |     caffe_gpu_axpy<Dtype>(count_, Dtype(-1),
108 |         reinterpret_cast<const Dtype*>(diff_->gpu_data()),
109 |         reinterpret_cast<Dtype*>(data_->mutable_gpu_data()));
110 |     break;
111 | #endif
112 |   default:
113 |     LOG(FATAL) << "Syncedmem not initialized.";
114 |   }
115 | }
116 | 
117 | template <typename Dtype>
118 | void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
119 |   if (num_ != source.num() || channels_ != source.channels() ||
120 |       height_ != source.height() || width_ != source.width()) {
121 |     if (reshape) {
122 |       Reshape(source.num(), source.channels(), source.height(), source.width());
123 |     } else {
124 |       LOG(FATAL) << "Trying to copy blobs of different sizes.";
125 |     }
126 |   }
127 |   switch (Caffe::mode()) {
128 | #if 0
129 |   case Caffe::GPU:
130 |     if (copy_diff) {
131 |       CUDA_CHECK(cudaMemcpy(diff_->mutable_gpu_data(), source.gpu_diff(),
132 |           sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice));
133 |     } else {
134 |       CUDA_CHECK(cudaMemcpy(data_->mutable_gpu_data(), source.gpu_data(),
135 |           sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice));
136 |     }
137 |     break;
138 | #endif
139 |   case Caffe::CPU:
140 |     if (copy_diff) {
141 |       memcpy(diff_->mutable_cpu_data(), source.cpu_diff(),
142 |           sizeof(Dtype) * count_);
143 |     } else {
144 |       memcpy(data_->mutable_cpu_data(), source.cpu_data(),
145 |         sizeof(Dtype) * count_);
146 |     }
147 |     break;
148 |   default:
149 |     LOG(FATAL) << "Unknown caffe mode.";
150 |   }
151 | }
152 | 
153 | template <typename Dtype>
154 | void Blob<Dtype>::FromProto(const BlobProto& proto) {
155 |   Reshape(proto.num(), proto.channels(), proto.height(), proto.width());
156 |   // copy data
157 |   Dtype* data_vec = mutable_cpu_data();
158 |   for (int i = 0; i < count_; ++i) {
159 |     data_vec[i] = proto.data(i);
160 |   }
161 |   if (proto.diff_size() > 0) {
162 |     Dtype* diff_vec = mutable_cpu_diff();
163 |     for (int i = 0; i < count_; ++i) {
164 |       diff_vec[i] = proto.diff(i);
165 |     }
166 |   }
167 | }
168 | 
169 | template <typename Dtype>
170 | void Blob<Dtype>::ToProto(BlobProto* proto, bool write_diff) const {
171 |   proto->set_num(num_);
172 |   proto->set_channels(channels_);
173 |   proto->set_height(height_);
174 |   proto->set_width(width_);
175 |   proto->clear_data();
176 |   proto->clear_diff();
177 |   const Dtype* data_vec = cpu_data();
178 |   for (int i = 0; i < count_; ++i) {
179 |     proto->add_data(data_vec[i]);
180 |   }
181 |   if (write_diff) {
182 |     const Dtype* diff_vec = cpu_diff();
183 |     for (int i = 0; i < count_; ++i) {
184 |       proto->add_diff(diff_vec[i]);
185 |     }
186 |   }
187 | }
188 | 
189 | INSTANTIATE_CLASS(Blob);
190 | 
191 | }  // namespace caffe
192 | 
193 | 


--------------------------------------------------------------------------------
/src/caffe/layers/dropout_group_layer.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Yuheng Chen
  2 | 
  3 | #include <vector>
  4 | #include <algorithm>
  5 | 
  6 | #include "caffe/common.hpp"
  7 | #include "caffe/layer.hpp"
  8 | #include "caffe/syncedmem.hpp"
  9 | #include "caffe/vision_layers.hpp"
 10 | 
 11 | namespace caffe {
 12 | 
 13 | template <typename Dtype>
 14 | void DropoutGroupLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
 15 |       vector<Blob<Dtype>*>* top) {
 16 |   NeuronLayer<Dtype>::SetUp(bottom, top);
 17 |   const int mask_count = bottom[0]->count() / bottom[0]->channels();
 18 |   const int mask_size = bottom[0]->width() * bottom[0]->height();
 19 |   NUM_ = bottom[0]->num();
 20 |   HEIGHT_ = bottom[0]->height();
 21 |   WIDTH_ = bottom[0]->width();
 22 |   // Set up the cache for random number generation
 23 |   rand_vec_.reset(new SyncedMemory(mask_count * sizeof(int)));
 24 |   threshold_ = this->layer_param_.dropout_ratio();
 25 |   DCHECK(threshold_ > 0.);
 26 |   DCHECK(threshold_ < 1.);
 27 |   Dtype scale = 1. / (1. - threshold_);
 28 |   scale_.reset(new SyncedMemory(NUM_ * sizeof(Dtype)));
 29 |   uint_thres_ = (unsigned int)((mask_size * (1. - threshold_)) + 0.5);
 30 | 
 31 |   int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
 32 |   Dtype *scale_ptr = reinterpret_cast<Dtype*>(scale_->mutable_cpu_data());
 33 |   for(int n = 0; n < bottom[0]->num(); n++){
 34 |   	for(int i = 0; i < mask_size; i++)
 35 | 	  mask[i] = i;
 36 | 	mask += mask_size;
 37 | 	scale_ptr[n] = scale;
 38 |   }
 39 | }
 40 | 
 41 | template <typename Dtype>
 42 | void DropoutGroupLayer<Dtype>::UpdateMask() {
 43 | 	const int count = rand_vec_->size() / sizeof(int);
 44 | 	const int mask_size = count / NUM_;
 45 | 	int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
 46 |   	for(int n = 0; n < NUM_; n++){
 47 | 		std::random_shuffle(mask, mask + mask_size);
 48 | 		mask += mask_size;
 49 | 	}
 50 | }
 51 | 
 52 | template <typename Dtype>
 53 | void DropoutGroupLayer<Dtype>::UpscaleMaskFrom(DropoutGroupLayer *dropout) {
 54 | 	const int ksize = HEIGHT_ - dropout->HEIGHT_ + 1;
 55 | 	CHECK_EQ(ksize, WIDTH_ - dropout->WIDTH_ + 1);
 56 | 	CHECK(ksize > 0);
 57 | 	CHECK_EQ(NUM_, dropout->NUM_);
 58 | 	int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
 59 | 	const int count = rand_vec_->size() / sizeof(int);
 60 | 	const int mask_size = count / NUM_;
 61 | 	const int* mask_downscale = reinterpret_cast<const int*>(dropout->rand_vec_->cpu_data());
 62 | 	const int mask_ds_size = dropout->rand_vec_->size() / sizeof(int) / NUM_;
 63 | 
 64 | 	//mask all
 65 | 	uint_thres_ = 1;
 66 | 	for(int n = 0; n < count; n++)
 67 | 		mask[n] = 1;
 68 | 
 69 | 	Dtype *scale_ptr = reinterpret_cast<Dtype*>(scale_->mutable_cpu_data());
 70 |   	for(int n = 0; n < NUM_; n++){
 71 | 		int idx_ds = 0;
 72 | 		for(int y = 0; y < dropout->HEIGHT_; y++){
 73 | 			for(int x = 0; x < dropout->WIDTH_; x++){
 74 | 				/* if kept */
 75 | 				if(mask_downscale[idx_ds++] < dropout->uint_thres_){
 76 | 					for(int ty = 0; ty < ksize; ty++){
 77 | 						int *ptr = mask + (y + ty) * WIDTH_;	
 78 | 						for(int tx = 0; tx < ksize; tx++)
 79 | 							ptr[x + tx] = 0;
 80 | 					} /* ty */
 81 | 				}
 82 | 			}
 83 | 		}
 84 | 		int nonzeros = 0;
 85 | 		for(int y = 0; y < HEIGHT_; y++){
 86 | 			int *ptr = mask + y * WIDTH_;	
 87 | 			for(int x = 0; x < WIDTH_; x++){
 88 | 				if(ptr[x] == 0)
 89 | 					nonzeros ++;
 90 | 				//fprintf(stderr, "%d ", ptr[x]);
 91 | 			}
 92 | 			//fprintf(stderr, "\n");
 93 | 		}
 94 | 			//fprintf(stderr, "\n\n");
 95 | 		CHECK(nonzeros > 0);
 96 | 		scale_ptr[n] = HEIGHT_ * WIDTH_ / (Dtype)nonzeros;
 97 | 		mask += mask_size;
 98 | 		mask_downscale +=  mask_ds_size;
 99 | 	}
100 | }
101 | 
102 | template <typename Dtype>
103 | void DropoutGroupLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
104 |     vector<Blob<Dtype>*>* top) {
105 |   const Dtype* bottom_data = bottom[0]->cpu_data();
106 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
107 |   int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
108 |   const int mask_size = bottom[0]->width() * bottom[0]->height();
109 |   if (Caffe::phase() == Caffe::TRAIN) {
110 | #if 0
111 | 	  for(int n = 0; n < bottom[0]->num(); n++){
112 | 		  for(int c = 0; c < bottom[0]->channels(); c++){
113 | 			  int i = 0;
114 | 			  for (; i < uint_thres_; ++i) {
115 | 				  int idx = mask[i];
116 | 				  top_data[idx] = bottom_data[idx] * scale_;
117 | 			  }
118 | 			  for (; i < mask_size; i++) {
119 | 				  int idx = mask[i];
120 | 				  top_data[idx] = 0.;
121 | 			  }
122 | 			  top_data += mask_size;
123 | 			  bottom_data += mask_size;
124 | 		  }
125 | 		  mask += mask_size;
126 | 	  }
127 | #else
128 | 	  NOT_IMPLEMENTED;
129 | #endif
130 |   } else {
131 |     memcpy(top_data, bottom_data, bottom[0]->count() * sizeof(Dtype));
132 |   }
133 | }
134 | 
135 | template <typename Dtype>
136 | Dtype DropoutGroupLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
137 |     const bool propagate_down,
138 |     vector<Blob<Dtype>*>* bottom) {
139 |   CHECK(Caffe::phase() == Caffe::TRAIN);
140 |   if (propagate_down) {
141 | #if 0
142 |     const Dtype* top_diff = top[0]->cpu_diff();
143 |     Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
144 |     const int* mask = reinterpret_cast<const int*>(rand_vec_->cpu_data());
145 |     const int mask_size = top[0]->width() * top[0]->height();
146 |     for(int n = 0; n < top[0]->num(); n++){
147 | 	    for(int c = 0; c < top[0]->channels(); c++){
148 | 		    int i = 0;
149 | 		    for (; i < uint_thres_; ++i) {
150 | 			    int idx = mask[i];
151 | 			    bottom_diff[idx] = top_diff[idx] * scale_;
152 | 		    }
153 | 		    for (; i < mask_size; i++) {
154 | 			    int idx = mask[i];
155 | 			    bottom_diff[idx] = 0.;
156 | 		    }
157 | 		    top_diff += mask_size;
158 | 		    bottom_diff += mask_size;
159 | 	    }
160 | 	    mask += mask_size;
161 |     }
162 | #else
163 |     NOT_IMPLEMENTED;
164 | #endif
165 |   }
166 |   return Dtype(0);
167 | }
168 | 
169 | 
170 | INSTANTIATE_CLASS(DropoutGroupLayer);
171 | 
172 | 
173 | }  // namespace caffe
174 | 


--------------------------------------------------------------------------------
/src/caffe/layers/lrn_layer.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | #include <vector>
  4 | 
  5 | #include "caffe/layer.hpp"
  6 | #include "caffe/vision_layers.hpp"
  7 | #include "caffe/util/math_functions.hpp"
  8 | 
  9 | namespace caffe {
 10 | 
 11 | template <typename Dtype>
 12 | void LRNLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
 13 |       vector<Blob<Dtype>*>* top) {
 14 |   CHECK_EQ(bottom.size(), 1) <<
 15 |       "Local Response Normalization Layer takes a single blob as input.";
 16 |   CHECK_EQ(top->size(), 1) <<
 17 |       "Local Response Normalization Layer takes a single blob as output.";
 18 |   num_ = bottom[0]->num();
 19 |   channels_ = bottom[0]->channels();
 20 |   height_ = bottom[0]->height();
 21 |   width_ = bottom[0]->width();
 22 |   (*top)[0]->Reshape(num_, channels_, height_, width_);
 23 |   scale_.Reshape(num_, channels_, height_, width_);
 24 |   size_ = this->layer_param_.local_size();
 25 |   pre_pad_ = (size_ - 1) / 2;
 26 |   alpha_ = this->layer_param_.alpha();
 27 |   beta_ = this->layer_param_.beta();
 28 | }
 29 | 
 30 | template <typename Dtype>
 31 | void LRNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 32 |     vector<Blob<Dtype>*>* top) {
 33 |   const Dtype* bottom_data = bottom[0]->cpu_data();
 34 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
 35 |   Dtype* scale_data = scale_.mutable_cpu_data();
 36 |   // start with the constant value
 37 |   for (int i = 0; i < scale_.count(); ++i) {
 38 |     scale_data[i] = 1.;
 39 |   }
 40 |   Blob<Dtype> padded_square(1, channels_ + size_ - 1, height_, width_);
 41 |   Dtype* padded_square_data = padded_square.mutable_cpu_data();
 42 |   memset(padded_square_data, 0, sizeof(Dtype) * padded_square.count());
 43 |   Dtype alpha_over_size = alpha_ / size_;
 44 |   // go through the images
 45 |   for (int n = 0; n < num_; ++n) {
 46 |     // compute the padded square
 47 |     caffe_sqr(channels_ * height_ * width_,
 48 |         bottom_data + bottom[0]->offset(n),
 49 |         padded_square_data + padded_square.offset(0, pre_pad_));
 50 |     // Create the first channel scale
 51 |     for (int c = 0; c < size_; ++c) {
 52 |       caffe_axpy<Dtype>(height_ * width_, alpha_over_size,
 53 |           padded_square_data + padded_square.offset(0, c),
 54 |           scale_data + scale_.offset(n, 0));
 55 |     }
 56 |     for (int c = 1; c < channels_; ++c) {
 57 |       // copy previous scale
 58 |       caffe_copy<Dtype>(height_ * width_,
 59 |           scale_data + scale_.offset(n, c - 1),
 60 |           scale_data + scale_.offset(n, c));
 61 |       // add head
 62 |       caffe_axpy<Dtype>(height_ * width_, alpha_over_size,
 63 |           padded_square_data + padded_square.offset(0, c + size_ - 1),
 64 |           scale_data + scale_.offset(n, c));
 65 |       // subtract tail
 66 |       caffe_axpy<Dtype>(height_ * width_, -alpha_over_size,
 67 |           padded_square_data + padded_square.offset(0, c - 1),
 68 |           scale_data + scale_.offset(n, c));
 69 |     }
 70 |   }
 71 | 
 72 |   // In the end, compute output
 73 |   caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, top_data);
 74 |   caffe_mul<Dtype>(scale_.count(), top_data, bottom_data, top_data);
 75 | }
 76 | 
 77 | template <typename Dtype>
 78 | Dtype LRNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
 79 |     const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
 80 |   const Dtype* top_diff = top[0]->cpu_diff();
 81 |   const Dtype* top_data = top[0]->cpu_data();
 82 |   const Dtype* bottom_data = (*bottom)[0]->cpu_data();
 83 |   const Dtype* scale_data = scale_.cpu_data();
 84 |   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
 85 |   Blob<Dtype> padded_ratio(1, channels_ + size_ - 1, height_, width_);
 86 |   Blob<Dtype> accum_ratio(1, 1, height_, width_);
 87 |   Dtype* padded_ratio_data = padded_ratio.mutable_cpu_data();
 88 |   Dtype* accum_ratio_data = accum_ratio.mutable_cpu_data();
 89 |   // We hack a little bit by using the diff() to store an additional result
 90 |   Dtype* accum_ratio_times_bottom = accum_ratio.mutable_cpu_diff();
 91 |   memset(padded_ratio_data, 0, sizeof(Dtype) * padded_ratio.count());
 92 |   Dtype cache_ratio_value = 2. * alpha_ * beta_ / size_;
 93 | 
 94 |   caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, bottom_diff);
 95 |   caffe_mul<Dtype>(scale_.count(), top_diff, bottom_diff, bottom_diff);
 96 | 
 97 |   // go through individual data
 98 |   int inverse_pre_pad = size_ - (size_ + 1) / 2;
 99 |   for (int n = 0; n < num_; ++n) {
100 |     int block_offset = scale_.offset(n);
101 |     // first, compute diff_i * y_i / s_i
102 |     caffe_mul<Dtype>(channels_ * height_ * width_,
103 |         top_diff + block_offset, top_data + block_offset,
104 |         padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad));
105 |     caffe_div<Dtype>(channels_ * height_ * width_,
106 |         padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad),
107 |         scale_data + block_offset,
108 |         padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad));
109 |     // Now, compute the accumulated ratios and the bottom diff
110 |     memset(accum_ratio_data, 0, sizeof(Dtype) * accum_ratio.count());
111 |     for (int c = 0; c < size_ - 1; ++c) {
112 |       caffe_axpy<Dtype>(height_ * width_, 1.,
113 |           padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data);
114 |     }
115 |     for (int c = 0; c < channels_; ++c) {
116 |       caffe_axpy<Dtype>(height_ * width_, 1.,
117 |           padded_ratio_data + padded_ratio.offset(0, c + size_ - 1),
118 |           accum_ratio_data);
119 |       // compute bottom diff
120 |       caffe_mul<Dtype>(height_ * width_,
121 |           bottom_data + top[0]->offset(n, c),
122 |           accum_ratio_data, accum_ratio_times_bottom);
123 |       caffe_axpy<Dtype>(height_ * width_, -cache_ratio_value,
124 |           accum_ratio_times_bottom, bottom_diff + top[0]->offset(n, c));
125 |       caffe_axpy<Dtype>(height_ * width_, -1.,
126 |           padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data);
127 |     }
128 |   }
129 |   return Dtype(0.);
130 | }
131 | 
132 | INSTANTIATE_CLASS(LRNLayer);
133 | 
134 | 
135 | }  // namespace caffe
136 | 


--------------------------------------------------------------------------------
/src/caffe/util/im2col.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | #include <cmath>
  4 | #include <cstdlib>
  5 | #include <cstring>
  6 | 
  7 | #include "caffe/util/im2col.hpp"
  8 | 
  9 | namespace caffe {
 10 | 
 11 | template <typename Dtype>
 12 | void im2col_cpu(const Dtype* data_im, const int channels,
 13 |     const int height, const int width, const int ksize, const int pad,
 14 |     const int stride, Dtype* data_col) {
 15 |   int height_col = (height + 2 * pad - ksize) / stride + 1;
 16 |   int width_col = (width + 2 * pad - ksize) / stride + 1;
 17 |   int channels_col = channels * ksize * ksize;
 18 |   for (int c = 0; c < channels_col; ++c) {
 19 |     int w_offset = c % ksize;
 20 |     int h_offset = (c / ksize) % ksize;
 21 |     int c_im = c / ksize / ksize;
 22 |     for (int h = 0; h < height_col; ++h) {
 23 |       for (int w = 0; w < width_col; ++w) {
 24 |         int h_pad = h * stride - pad + h_offset;
 25 |         int w_pad = w * stride - pad + w_offset;
 26 |         if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
 27 |           data_col[(c * height_col + h) * width_col + w] =
 28 |             data_im[(c_im * height + h_pad) * width + w_pad];
 29 |         else
 30 |           data_col[(c * height_col + h) * width_col + w] = 0;
 31 |       }
 32 |     }
 33 |   }
 34 | }
 35 | 
 36 | // Explicit instantiation
 37 | template void im2col_cpu<float>(const float* data_im, const int channels,
 38 |     const int height, const int width, const int ksize, const int pad,
 39 |     const int stride, float* data_col);
 40 | template void im2col_cpu<double>(const double* data_im, const int channels,
 41 |     const int height, const int width, const int ksize, const int pad,
 42 |     const int stride, double* data_col);
 43 | 
 44 | template <typename Dtype>
 45 | void col2im_cpu(const Dtype* data_col, const int channels,
 46 |     const int height, const int width, const int ksize, const int pad,
 47 |     const int stride, Dtype* data_im) {
 48 |   memset(data_im, 0, sizeof(Dtype) * height * width * channels);
 49 |   int height_col = (height + 2 * pad - ksize) / stride + 1;
 50 |   int width_col = (width + 2 * pad - ksize) / stride + 1;
 51 |   int channels_col = channels * ksize * ksize;
 52 |   for (int c = 0; c < channels_col; ++c) {
 53 |     int w_offset = c % ksize;
 54 |     int h_offset = (c / ksize) % ksize;
 55 |     int c_im = c / ksize / ksize;
 56 |     for (int h = 0; h < height_col; ++h) {
 57 |       for (int w = 0; w < width_col; ++w) {
 58 |         int h_pad = h * stride - pad + h_offset;
 59 |         int w_pad = w * stride - pad + w_offset;
 60 |         if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
 61 |           data_im[(c_im * height + h_pad) * width + w_pad] +=
 62 |               data_col[(c * height_col + h) * width_col + w];
 63 |       }
 64 |     }
 65 |   }
 66 | }
 67 | 
 68 | template <typename Dtype>
 69 | static void im2col_tile_cpu_kernel(const int n, const Dtype* _data_im,
 70 |     const int strideh, const int stridew, 
 71 |     const int ksize, 
 72 |     const int height_col, const int width_col,
 73 |     Dtype* _data_col) {
 74 | 	for(int _index = 0; _index < n; _index++){
 75 | 		int index = _index;
 76 | 		int w_out = index % width_col;
 77 | 		index /= width_col;
 78 | 		int h_out = index % height_col;
 79 | 		int channel_in = index / height_col;
 80 | 		int channel_out = channel_in * ksize * ksize;
 81 | 		int h_in = h_out;
 82 | 		int w_in = w_out;
 83 | 		Dtype * data_col = _data_col + (channel_out * height_col + h_out) * width_col + w_out;
 84 | 		const Dtype *data_im = _data_im + (channel_in * strideh + h_in) * stridew + w_in;
 85 | 		for (int i = 0; i < ksize; ++i) {
 86 | 			for (int j = 0; j < ksize; ++j) {
 87 | 				*data_col = data_im[i * stridew + j];
 88 | 				data_col += height_col * width_col;
 89 | 			}
 90 | 		}
 91 | 	}
 92 | }
 93 | 
 94 | template <typename Dtype>
 95 | void im2col_tile_cpu(const Dtype* data_im, const int channels,
 96 | 		const int stride_h, const int stride_w,
 97 |     const int ksize, Dtype* data_col, 
 98 |     const int height_col, const int width_col) {
 99 |   // We are going to launch channels * height_col * width_col kernels, each
100 |   // kernel responsible for copying a single-channel grid.
101 |   int num_kernels = channels * height_col * width_col;
102 |   // NOLINT_NEXT_LINE(whitespace/operators)
103 |   im2col_tile_cpu_kernel<Dtype>(num_kernels, data_im, stride_h, stride_w, ksize, height_col,
104 |       width_col, data_col);
105 | }
106 | 
107 | template <typename Dtype>
108 | static void copy_stride_cpu_kernel(int n, const Dtype* _src_data, 
109 | 		const int channels,
110 | 		const int height, const int width, Dtype *_dst_data, 
111 | 		const int stride_h, const int stride_w) {
112 | #if 0
113 |   for(int index = 0; index < n; index++){
114 |     int w = index % width;
115 |     int h = (index / width) % height;
116 |     int c = index / (width * height);
117 |     
118 |     const Dtype * src_data = _src_data + (c * height + h) * width + w;
119 |     Dtype * dst_data = _dst_data + (c * stride_h + h) * stride_w + w;
120 |     *dst_data = *src_data;
121 |   }
122 | #endif
123 |   for(int c = 0; c < channels; c++){
124 | 	  Dtype *pd = _dst_data + c * stride_h * stride_w;
125 | 	  for(int h = 0; h < height; h++){
126 | 		  for(int w = 0; w < width; w++){
127 | 			  pd[w] = *_src_data++;
128 | 		  }
129 | 		  pd += stride_w;
130 | 	  }
131 |   }
132 | }
133 | 
134 | template <typename Dtype>
135 | void copy_stride_cpu(const Dtype* src_data, 
136 | 		const int channels,
137 | 		const int height, const int width, Dtype *dst_data, 
138 | 		const int stride_h, const int stride_w) {
139 |   int num_kernels = channels * height * width;
140 |   // To avoid involving atomic operations, we will launch one kernel per
141 |   // bottom dimension, and then in the kernel add up the top dimensions.
142 |   // NOLINT_NEXT_LINE(whitespace/operators)
143 |   copy_stride_cpu_kernel<Dtype>(
144 |       num_kernels, src_data, channels, height, width,
145 |       dst_data, stride_h, stride_w);
146 | }
147 | 
148 | // Explicit instantiation
149 | template void col2im_cpu<float>(const float* data_col, const int channels,
150 |     const int height, const int width, const int psize, const int pad,
151 |     const int stride, float* data_im);
152 | template void col2im_cpu<double>(const double* data_col, const int channels,
153 |     const int height, const int width, const int psize, const int pad,
154 |     const int stride, double* data_im);
155 | 
156 | template void im2col_tile_cpu(const float* data_im, const int channels,
157 | 		const int stride_h, const int stride_w,
158 |     const int ksize, float* data_col, 
159 |     const int height_col, const int width_col);
160 | template void im2col_tile_cpu(const double* data_im, const int channels,
161 | 		const int stride_h, const int stride_w,
162 |     const int ksize, double* data_col, 
163 |     const int height_col, const int width_col);
164 | 
165 | template void copy_stride_cpu<float>(const float* src_data, 
166 | 		const int channels,
167 | 		const int height, const int width, float *dst_data, 
168 | 		const int stride_h, const int stride_w) ;
169 | template void copy_stride_cpu<double>(const double* src_data, 
170 | 		const int channels,
171 | 		const int height, const int width, double *dst_data, 
172 | 		const int stride_h, const int stride_w) ;
173 | 
174 | 
175 | }  // namespace caffe
176 | 


--------------------------------------------------------------------------------
/align_test.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | //
  3 | // This is a simple script that allows one to quickly test a network whose
  4 | // structure is specified by text format protocol buffers, and whose parameter
  5 | // are loaded from a pre-trained network.
  6 | // Usage:
  7 | //    test_net net_proto pretrained_net_proto iterations [CPU/GPU]
  8 | 
  9 | #include <cstring>
 10 | #include <cstdlib>
 11 | #include <cstdio>
 12 | #include <vector>
 13 | #include <string>
 14 | 
 15 | #include "caffe/caffe.hpp"
 16 | #include <opencv2/core/core.hpp>
 17 | #include <opencv2/imgproc/imgproc.hpp>
 18 | #include <opencv2/highgui/highgui.hpp>
 19 | 
 20 | #define CROP_WINSIZE 39
 21 | #define CROP_PADDING 2.5
 22 | 
 23 | using namespace caffe;  // NOLINT(build/namespaces)
 24 | using namespace cv;
 25 | 
 26 | float getMean( float * p )
 27 | {
 28 | 	float ans;
 29 | 	for(int i = 0 ; i < CROP_WINSIZE * CROP_WINSIZE ; i++, p++)
 30 | 		ans += *p;
 31 | 	ans = ans/ float( CROP_WINSIZE * CROP_WINSIZE );
 32 | 	return ans;
 33 | }
 34 | 
 35 | float getStd( float * p , float mean)
 36 | {
 37 | 	float ans;
 38 | 	for(int i = 0 ; i < CROP_WINSIZE * CROP_WINSIZE ; i++, p++)
 39 | 		ans += ( (*p-mean) * (*p-mean) );
 40 | 	ans = ans/ float( CROP_WINSIZE * CROP_WINSIZE - 1);
 41 | 	ans = sqrt( ans );
 42 | 	return ans;
 43 | }
 44 | void getZscore( Mat & img, int left, int right, int top, int bottom, float * & score )
 45 | {
 46 | 	if( img.type()==CV_8UC3 )
 47 | 	{
 48 | 		std::cerr << "warning! a color image input" << std::endl;
 49 | 		cv::cvtColor( img , img , CV_RGB2GRAY );
 50 | 	}
 51 | 
 52 | 	double scale = (right - left) / double( CROP_WINSIZE );
 53 | 	
 54 | 	left  -= int( scale * CROP_PADDING );
 55 | 	right += int( scale * CROP_PADDING );
 56 | 	top   -= int( scale * CROP_PADDING );
 57 | 	bottom+= int( scale * CROP_PADDING );
 58 | 		
 59 | 	if( top<0 || left < 0 || right >= img.cols || bottom >= img.rows )
 60 | 	{
 61 | 		std::cerr << "warning! invalid bounding box " << std::endl;
 62 | 		return;
 63 | 	}
 64 | 	
 65 | 	
 66 | 	Mat patch = img( Range( top, bottom	), Range( left, right ) );
 67 | 	cv::resize( patch , patch, Size( CROP_WINSIZE, CROP_WINSIZE ) );
 68 | 	
 69 | 	patch.convertTo( patch, CV_32F );	
 70 | 	
 71 | 	float mu = getMean(  patch.ptr<float>() );
 72 | 	float sigma = getStd(  patch.ptr<float>() , mu);
 73 | 	
 74 | 	score = new( float[ CROP_WINSIZE * CROP_WINSIZE ] );
 75 | 	
 76 | 	float * p_patch = patch.ptr<float>();
 77 | 	
 78 | 	for(int i = 0 ; i < CROP_WINSIZE * CROP_WINSIZE ; i++)
 79 | 		score[i] = ( p_patch[i] - mu ) / sigma;
 80 | }
 81 | 
 82 | 
 83 | template <typename Dtype>
 84 | static void save_blob(const string& fn, Blob<Dtype> *b){
 85 | 	LOG(INFO) << "Saving " << fn;
 86 | 	FILE *f = fopen(fn.c_str(), "wb");
 87 | 	CHECK(f != NULL);
 88 | 	fwrite(b->cpu_data(), sizeof(Dtype), b->count(), f);
 89 | 	fclose(f);
 90 | }
 91 | 
 92 | static void draw(const float *buf, const float *pt){
 93 | 	const int ph = 39, pw = 39;
 94 | 	const float scale = 4.0f;
 95 | 	cv::Mat m = cv::Mat::zeros(ph, pw, CV_32FC1);
 96 | 	memcpy(m.data, buf, sizeof(float)*pw*ph);
 97 | 	cv::Mat dsp;
 98 | 	cv::normalize(m, dsp, 0, 255, cv::NORM_MINMAX, CV_8UC1);
 99 | 	cv::resize(dsp, dsp, cv::Size(), scale, scale);
100 | 	cv::cvtColor(dsp, dsp, CV_GRAY2BGR);
101 | 
102 | #if 1
103 | 	for(int i=0;i<5;i++){
104 | 		const float *t = pt + 2*i;
105 | 		cv::circle(dsp, cv::Point(t[0]*scale, t[1]*scale), 2, cv::Scalar(255,0,0), 2);
106 | 	}
107 | #endif
108 | 		cv::imshow("A", dsp);
109 | 		cv::waitKey(0);
110 | }
111 | 
112 | 
113 | int main(int argc, char** argv) {
114 | 	if (argc < 3) {
115 | 		LOG(ERROR) << "test_net net_proto pretrained_net_proto iterations inputbin output_dir"
116 | 			<< " [CPU/GPU]";
117 | 		return 0;
118 | 	}
119 | 
120 | 	LogMessage::Enable(true);
121 | 	Caffe::set_phase(Caffe::TEST);
122 | 	Caffe::set_mode(Caffe::CPU);
123 | 
124 | 	NetParameter test_net_param;
125 | 	ReadProtoFromTextFile(argv[1], &test_net_param);
126 | 	Net<float> caffe_test_net(test_net_param);
127 | 	NetParameter trained_net_param;
128 | 	ReadProtoFromBinaryFile(argv[2], &trained_net_param);
129 | 	caffe_test_net.CopyTrainedLayersFrom(trained_net_param);
130 | 
131 | #if 0
132 | 	SolverState state;
133 | 	std::string state_file = std::string(argv[2]) + ".solverstate";
134 | 	ReadProtoFromBinaryFile(state_file, &state);
135 | #endif
136 | 
137 | 	vector<Blob<float>*> dummy_blob_input_vec;
138 | 
139 | 	//save layer
140 | 	int feature_layer_idx = -1;
141 | 	int data_layer_idx = -1;
142 | 	for(int i=0;i<caffe_test_net.layer_names().size();i++)
143 | 		if(caffe_test_net.layer_names()[i] == "ip2"){
144 | 			feature_layer_idx = i;
145 | 			break;
146 | 		}
147 | 	for(int i=0;i<caffe_test_net.layer_names().size();i++)
148 | 		if(caffe_test_net.layer_names()[i] == "image_input"){
149 | 			data_layer_idx = i;
150 | 			break;
151 | 		}
152 | 
153 | 	CHECK_NE(feature_layer_idx, -1);
154 | 	CHECK_NE(data_layer_idx, -1);
155 | 	LOG(INFO) << "Data layer: " << data_layer_idx;
156 | 	LOG(INFO) << "Feature layer: " << feature_layer_idx;
157 | 
158 | 	Blob<float>* output = caffe_test_net.top_vecs()[feature_layer_idx][0],
159 | 		*data_blob = caffe_test_net.top_vecs()[data_layer_idx][0];
160 | 	RawImageLayer<float> *data_layer = dynamic_cast<RawImageLayer<float>* >(caffe_test_net.layers()[data_layer_idx].get());
161 | 	CHECK(data_layer != 0);
162 | 
163 | 	LOG(INFO) << "OUTPUT BLOB dim: " << output->num() << ' '
164 | 		<< output->channels() << ' '
165 | 		<< output->width() << ' '
166 | 		<< output->height();
167 | 	const int ih = data_blob->height(), iw = data_blob->width(), ic = data_blob->channels();
168 | 	//double buf[ih*iw*ic];
169 | 	FILE *finput = fopen(argv[3], "r");
170 | 	CHECK(finput != NULL);
171 | 	for (;;) {
172 | 		char fn[1024];
173 | 		int l,r,t,b;
174 | 		int nread = fscanf(finput, "%s%d%d%d%d", fn, &l, &r, &t, &b);
175 | 		if(nread != 5)
176 | 			break;
177 | 		cv::Mat mat = cv::imread(fn);
178 | 		if(!mat.data){
179 | 			printf("%s\n", fn);
180 | 			continue;
181 | 		}
182 | 		cv::cvtColor(mat, mat, CV_BGR2GRAY);
183 | 		float * p = 0;
184 | 		getZscore( mat, l, r, t, b, p);
185 | 		if(!p)
186 | 			continue;
187 | 
188 | 		float *d = data_blob->mutable_cpu_data();
189 | 		size_t len = ih * iw * ic;
190 | 		for(int j = 0; j < data_blob->num(); j++){
191 | 			memcpy(d, p, sizeof(float)*CROP_WINSIZE*CROP_WINSIZE);
192 | 			/*
193 | 			   size_t nread = fread(buf, sizeof(double), len, finput);
194 | 			   CHECK_EQ(nread, len);
195 | 			   for(int k=0;k<len;k++){
196 | 			   d[k] = buf[k];
197 | 			   }
198 | 			   d += len;
199 | 			   */
200 | 		}
201 | 		const vector<Blob<float>*>& result =
202 | 			caffe_test_net.Forward(dummy_blob_input_vec);
203 | 
204 | 		printf("%s %d %d %d %d ", fn, l, r, t, b);
205 | 		const float *pt = output->cpu_data();
206 | 		for(int i=0;i<output->num();i++){
207 | 			for(int j=0;j<output->channels();j++)
208 | 				printf("%f\t", pt[j]);
209 | 			printf("\n");
210 | 		}
211 | 		fflush(stdout);
212 | 
213 | 		//draw(p, pt);
214 | 		delete [] p;
215 | 
216 | 		//sprintf(output_dir, "%s/feat_%05d", argv[4], i);
217 | 		//save_blob(output_dir, output);
218 | 
219 | 		//test_accuracy += result[0]->cpu_data()[0];
220 | 		//LOG(ERROR) << "Batch " << i << ", accuracy: " << result[0]->cpu_data()[0];
221 | 	}
222 | 	fclose(finput);
223 | 	//test_accuracy /= total_iter;
224 | 	//LOG(ERROR) << "Test accuracy:" << test_accuracy;
225 | 
226 | 	return 0;
227 | }
228 | 
229 | 


--------------------------------------------------------------------------------
/src/caffe/layers/pooling_layer.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | #include <algorithm>
  4 | #include <cfloat>
  5 | #include <vector>
  6 | 
  7 | #include "caffe/layer.hpp"
  8 | #include "caffe/vision_layers.hpp"
  9 | #include "caffe/util/math_functions.hpp"
 10 | 
 11 | using std::max;
 12 | using std::min;
 13 | 
 14 | namespace caffe {
 15 | 
 16 | template <typename Dtype>
 17 | void PoolingLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
 18 |       vector<Blob<Dtype>*>* top) {
 19 |   CHECK_EQ(bottom.size(), 1) << "PoolingLayer takes a single blob as input.";
 20 |   CHECK_EQ(top->size(), 1) << "PoolingLayer takes a single blob as output.";
 21 |   KSIZE_ = this->layer_param_.kernelsize();
 22 |   STRIDE_ = this->layer_param_.stride();
 23 |   CHANNELS_ = bottom[0]->channels();
 24 |   HEIGHT_ = bottom[0]->height();
 25 |   WIDTH_ = bottom[0]->width();
 26 |   POOLED_HEIGHT_ = static_cast<int>(
 27 |       ceil(static_cast<float>(HEIGHT_ - KSIZE_) / STRIDE_)) + 1;
 28 |   POOLED_WIDTH_ = static_cast<int>(
 29 |       ceil(static_cast<float>(WIDTH_ - KSIZE_) / STRIDE_)) + 1;
 30 |   (*top)[0]->Reshape(bottom[0]->num(), CHANNELS_, POOLED_HEIGHT_,
 31 |       POOLED_WIDTH_);
 32 |   // If stochastic pooling, we will initialize the random index part.
 33 |   if (this->layer_param_.pool() == LayerParameter_PoolMethod_STOCHASTIC) {
 34 |     rand_idx_.Reshape(bottom[0]->num(), CHANNELS_, POOLED_HEIGHT_,
 35 |       POOLED_WIDTH_);
 36 |   }
 37 | }
 38 | 
 39 | // TODO(Yangqing): Is there a faster way to do pooling in the channel-first
 40 | // case?
 41 | template <typename Dtype>
 42 | void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 43 |       vector<Blob<Dtype>*>* top) {
 44 |   const Dtype* bottom_data = bottom[0]->cpu_data();
 45 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
 46 |   // Different pooling methods. We explicitly do the switch outside the for
 47 |   // loop to save time, although this results in more codes.
 48 |   int top_count = (*top)[0]->count();
 49 |   switch (this->layer_param_.pool()) {
 50 |   case LayerParameter_PoolMethod_MAX:
 51 |     // Initialize
 52 |     for (int i = 0; i < top_count; ++i) {
 53 |       top_data[i] = -FLT_MAX;
 54 |     }
 55 |     // The main loop
 56 |     for (int n = 0; n < bottom[0]->num(); ++n) {
 57 |       for (int c = 0; c < CHANNELS_; ++c) {
 58 |         for (int ph = 0; ph < POOLED_HEIGHT_; ++ph) {
 59 |           for (int pw = 0; pw < POOLED_WIDTH_; ++pw) {
 60 |             int hstart = ph * STRIDE_;
 61 |             int wstart = pw * STRIDE_;
 62 |             int hend = min(hstart + KSIZE_, HEIGHT_);
 63 |             int wend = min(wstart + KSIZE_, WIDTH_);
 64 |             for (int h = hstart; h < hend; ++h) {
 65 |               for (int w = wstart; w < wend; ++w) {
 66 |                 top_data[ph * POOLED_WIDTH_ + pw] =
 67 |                   max(top_data[ph * POOLED_WIDTH_ + pw],
 68 |                       bottom_data[h * WIDTH_ + w]);
 69 |               }
 70 |             }
 71 |           }
 72 |         }
 73 |         // compute offset
 74 |         bottom_data += bottom[0]->offset(0, 1);
 75 |         top_data += (*top)[0]->offset(0, 1);
 76 |       }
 77 |     }
 78 |     break;
 79 |   case LayerParameter_PoolMethod_AVE:
 80 |     for (int i = 0; i < top_count; ++i) {
 81 |       top_data[i] = 0;
 82 |     }
 83 |     // The main loop
 84 |     for (int n = 0; n < bottom[0]->num(); ++n) {
 85 |       for (int c = 0; c < CHANNELS_; ++c) {
 86 |         for (int ph = 0; ph < POOLED_HEIGHT_; ++ph) {
 87 |           for (int pw = 0; pw < POOLED_WIDTH_; ++pw) {
 88 |             int hstart = ph * STRIDE_;
 89 |             int wstart = pw * STRIDE_;
 90 |             int hend = min(hstart + KSIZE_, HEIGHT_);
 91 |             int wend = min(wstart + KSIZE_, WIDTH_);
 92 |             for (int h = hstart; h < hend; ++h) {
 93 |               for (int w = wstart; w < wend; ++w) {
 94 |                 top_data[ph * POOLED_WIDTH_ + pw] +=
 95 |                     bottom_data[h * WIDTH_ + w];
 96 |               }
 97 |             }
 98 |             top_data[ph * POOLED_WIDTH_ + pw] /=
 99 |                 (hend - hstart) * (wend - wstart);
100 |           }
101 |         }
102 |         // compute offset
103 |         bottom_data += bottom[0]->offset(0, 1);
104 |         top_data += (*top)[0]->offset(0, 1);
105 |       }
106 |     }
107 |     break;
108 |   case LayerParameter_PoolMethod_STOCHASTIC:
109 |     NOT_IMPLEMENTED;
110 |     break;
111 |   default:
112 |     LOG(FATAL) << "Unknown pooling method.";
113 |   }
114 | }
115 | 
116 | template <typename Dtype>
117 | Dtype PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
118 |       const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
119 |   if (!propagate_down) {
120 |     return Dtype(0.);
121 |   }
122 |   const Dtype* top_diff = top[0]->cpu_diff();
123 |   const Dtype* top_data = top[0]->cpu_data();
124 |   const Dtype* bottom_data = (*bottom)[0]->cpu_data();
125 |   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
126 |   // Different pooling methods. We explicitly do the switch outside the for
127 |   // loop to save time, although this results in more codes.
128 |   memset(bottom_diff, 0, (*bottom)[0]->count() * sizeof(Dtype));
129 |   switch (this->layer_param_.pool()) {
130 |   case LayerParameter_PoolMethod_MAX:
131 |     // The main loop
132 |     for (int n = 0; n < top[0]->num(); ++n) {
133 |       for (int c = 0; c < CHANNELS_; ++c) {
134 |         for (int ph = 0; ph < POOLED_HEIGHT_; ++ph) {
135 |           for (int pw = 0; pw < POOLED_WIDTH_; ++pw) {
136 |             int hstart = ph * STRIDE_;
137 |             int wstart = pw * STRIDE_;
138 |             int hend = min(hstart + KSIZE_, HEIGHT_);
139 |             int wend = min(wstart + KSIZE_, WIDTH_);
140 |             for (int h = hstart; h < hend; ++h) {
141 |               for (int w = wstart; w < wend; ++w) {
142 |                 bottom_diff[h * WIDTH_ + w] +=
143 |                     top_diff[ph * POOLED_WIDTH_ + pw] *
144 |                     (bottom_data[h * WIDTH_ + w] ==
145 |                         top_data[ph * POOLED_WIDTH_ + pw]);
146 |               }
147 |             }
148 |           }
149 |         }
150 |         // offset
151 |         bottom_data += (*bottom)[0]->offset(0, 1);
152 |         top_data += top[0]->offset(0, 1);
153 |         bottom_diff += (*bottom)[0]->offset(0, 1);
154 |         top_diff += top[0]->offset(0, 1);
155 |       }
156 |     }
157 |     break;
158 |   case LayerParameter_PoolMethod_AVE:
159 |     // The main loop
160 |     for (int n = 0; n < top[0]->num(); ++n) {
161 |       for (int c = 0; c < CHANNELS_; ++c) {
162 |         for (int ph = 0; ph < POOLED_HEIGHT_; ++ph) {
163 |           for (int pw = 0; pw < POOLED_WIDTH_; ++pw) {
164 |             int hstart = ph * STRIDE_;
165 |             int wstart = pw * STRIDE_;
166 |             int hend = min(hstart + KSIZE_, HEIGHT_);
167 |             int wend = min(wstart + KSIZE_, WIDTH_);
168 |             int poolsize = (hend - hstart) * (wend - wstart);
169 |             for (int h = hstart; h < hend; ++h) {
170 |               for (int w = wstart; w < wend; ++w) {
171 |                 bottom_diff[h * WIDTH_ + w] +=
172 |                   top_diff[ph * POOLED_WIDTH_ + pw] / poolsize;
173 |               }
174 |             }
175 |           }
176 |         }
177 |         // offset
178 |         bottom_data += (*bottom)[0]->offset(0, 1);
179 |         top_data += top[0]->offset(0, 1);
180 |         bottom_diff += (*bottom)[0]->offset(0, 1);
181 |         top_diff += top[0]->offset(0, 1);
182 |       }
183 |     }
184 |     break;
185 |   case LayerParameter_PoolMethod_STOCHASTIC:
186 |     NOT_IMPLEMENTED;
187 |     break;
188 |   default:
189 |     LOG(FATAL) << "Unknown pooling method.";
190 |   }
191 |   return Dtype(0.);
192 | }
193 | 
194 | 
195 | INSTANTIATE_CLASS(PoolingLayer);
196 | 
197 | 
198 | }  // namespace caffe
199 | 


--------------------------------------------------------------------------------
/src/caffe/layers/loss_layer.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | #include <algorithm>
  4 | #include <cmath>
  5 | #include <cfloat>
  6 | #include <vector>
  7 | 
  8 | #include "caffe/layer.hpp"
  9 | #include "caffe/vision_layers.hpp"
 10 | #include "caffe/util/math_functions.hpp"
 11 | #include "caffe/util/io.hpp"
 12 | 
 13 | using std::max;
 14 | 
 15 | namespace caffe {
 16 | 
 17 | const float kLOG_THRESHOLD = 1e-20;
 18 | 
 19 | template <typename Dtype>
 20 | void MultinomialLogisticLossLayer<Dtype>::SetUp(
 21 |     const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
 22 |   CHECK_EQ(bottom.size(), 2) << "Loss Layer takes two blobs as input.";
 23 |   CHECK_EQ(top->size(), 0) << "Loss Layer takes no output.";
 24 |   CHECK_EQ(bottom[0]->num(), bottom[1]->num())
 25 |       << "The data and label should have the same number.";
 26 |   CHECK_EQ(bottom[1]->channels(), 1);
 27 |   CHECK_EQ(bottom[1]->height(), 1);
 28 |   CHECK_EQ(bottom[1]->width(), 1);
 29 | }
 30 | 
 31 | 
 32 | template <typename Dtype>
 33 | Dtype MultinomialLogisticLossLayer<Dtype>::Backward_cpu(
 34 |     const vector<Blob<Dtype>*>& top, const bool propagate_down,
 35 |     vector<Blob<Dtype>*>* bottom) {
 36 |   const Dtype* bottom_data = (*bottom)[0]->cpu_data();
 37 |   const Dtype* bottom_label = (*bottom)[1]->cpu_data();
 38 |   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
 39 |   int num = (*bottom)[0]->num();
 40 |   int dim = (*bottom)[0]->count() / (*bottom)[0]->num();
 41 |   memset(bottom_diff, 0, sizeof(Dtype) * (*bottom)[0]->count());
 42 |   Dtype loss = 0;
 43 |   for (int i = 0; i < num; ++i) {
 44 |     int label = static_cast<int>(bottom_label[i]);
 45 |     Dtype prob = max(bottom_data[i * dim + label], Dtype(kLOG_THRESHOLD));
 46 |     loss -= log(prob);
 47 |     bottom_diff[i * dim + label] = - 1. / prob / num;
 48 |   }
 49 |   return loss / num;
 50 | }
 51 | 
 52 | // TODO: implement the GPU version for multinomial loss
 53 | 
 54 | 
 55 | template <typename Dtype>
 56 | void InfogainLossLayer<Dtype>::SetUp(
 57 |     const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
 58 |   CHECK_EQ(bottom.size(), 2) << "Loss Layer takes two blobs as input.";
 59 |   CHECK_EQ(top->size(), 0) << "Loss Layer takes no output.";
 60 |   CHECK_EQ(bottom[0]->num(), bottom[1]->num())
 61 |       << "The data and label should have the same number.";
 62 |   CHECK_EQ(bottom[1]->channels(), 1);
 63 |   CHECK_EQ(bottom[1]->height(), 1);
 64 |   CHECK_EQ(bottom[1]->width(), 1);
 65 |   BlobProto blob_proto;
 66 |   ReadProtoFromBinaryFile(this->layer_param_.source(), &blob_proto);
 67 |   infogain_.FromProto(blob_proto);
 68 |   CHECK_EQ(infogain_.num(), 1);
 69 |   CHECK_EQ(infogain_.channels(), 1);
 70 |   CHECK_EQ(infogain_.height(), infogain_.width());
 71 | }
 72 | 
 73 | 
 74 | template <typename Dtype>
 75 | Dtype InfogainLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
 76 |     const bool propagate_down,
 77 |     vector<Blob<Dtype>*>* bottom) {
 78 |   const Dtype* bottom_data = (*bottom)[0]->cpu_data();
 79 |   const Dtype* bottom_label = (*bottom)[1]->cpu_data();
 80 |   const Dtype* infogain_mat = infogain_.cpu_data();
 81 |   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
 82 |   int num = (*bottom)[0]->num();
 83 |   int dim = (*bottom)[0]->count() / (*bottom)[0]->num();
 84 |   CHECK_EQ(infogain_.height(), dim);
 85 |   Dtype loss = 0;
 86 |   for (int i = 0; i < num; ++i) {
 87 |     int label = static_cast<int>(bottom_label[i]);
 88 |     for (int j = 0; j < dim; ++j) {
 89 |       Dtype prob = max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD));
 90 |       loss -= infogain_mat[label * dim + j] * log(prob);
 91 |       bottom_diff[i * dim + j] = - infogain_mat[label * dim + j] / prob / num;
 92 |     }
 93 |   }
 94 |   return loss / num;
 95 | }
 96 | 
 97 | 
 98 | template <typename Dtype>
 99 | void EuclideanLossLayer<Dtype>::SetUp(
100 |   const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
101 |   CHECK_EQ(bottom.size(), 2) << "Loss Layer takes two blobs as input.";
102 |   CHECK_EQ(top->size(), 0) << "Loss Layer takes no as output.";
103 |   CHECK_EQ(bottom[0]->num(), bottom[1]->num())
104 |       << "The data and label should have the same number.";
105 |   CHECK_EQ(bottom[0]->channels(), bottom[1]->channels());
106 |   CHECK_EQ(bottom[0]->height(), bottom[1]->height());
107 |   CHECK_EQ(bottom[0]->width(), bottom[1]->width());
108 |   difference_.Reshape(bottom[0]->num(), bottom[0]->channels(),
109 |       bottom[0]->height(), bottom[0]->width());
110 | }
111 | 
112 | template <typename Dtype>
113 | Dtype EuclideanLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
114 |     const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
115 |   int count = (*bottom)[0]->count();
116 |   int num = (*bottom)[0]->num();
117 |   caffe_sub(count, (*bottom)[0]->cpu_data(), (*bottom)[1]->cpu_data(),
118 |       difference_.mutable_cpu_data());
119 |   Dtype loss = caffe_cpu_dot(
120 |       count, difference_.cpu_data(), difference_.cpu_data()) / num / Dtype(2);
121 |   // Compute the gradient
122 |   caffe_axpby(count, Dtype(1) / num, difference_.cpu_data(), Dtype(0),
123 |       (*bottom)[0]->mutable_cpu_diff());
124 |   return loss;
125 | }
126 | 
127 | template <typename Dtype>
128 | void AccuracyLayer<Dtype>::SetUp(
129 |   const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
130 |   CHECK_EQ(bottom.size(), 2) << "Accuracy Layer takes two blobs as input.";
131 |   CHECK_EQ(top->size(), 1) << "Accuracy Layer takes 1 output.";
132 |   CHECK_EQ(bottom[0]->num(), bottom[1]->num())
133 |       << "The data and label should have the same number.";
134 |   CHECK_EQ(bottom[1]->channels(), 1);
135 |   CHECK_EQ(bottom[1]->height(), 1);
136 |   CHECK_EQ(bottom[1]->width(), 1);
137 |   (*top)[0]->Reshape(1, 2, 1, 1);
138 | }
139 | 
140 | template <typename Dtype>
141 | void AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
142 |     vector<Blob<Dtype>*>* top) {
143 |   Dtype accuracy = 0;
144 |   Dtype logprob = 0;
145 |   const Dtype* bottom_data = bottom[0]->cpu_data();
146 |   const Dtype* bottom_label = bottom[1]->cpu_data();
147 |   int num = bottom[0]->num();
148 |   int dim = bottom[0]->count() / bottom[0]->num();
149 |   for (int i = 0; i < num; ++i) {
150 |     // Accuracy
151 |     Dtype maxval = -FLT_MAX;
152 |     int max_id = 0;
153 |     for (int j = 0; j < dim; ++j) {
154 |       if (bottom_data[i * dim + j] > maxval) {
155 |         maxval = bottom_data[i * dim + j];
156 |         max_id = j;
157 |       }
158 |     }
159 |     if (max_id == static_cast<int>(bottom_label[i])) {
160 |       ++accuracy;
161 |     }
162 |     Dtype prob = max(bottom_data[i * dim + static_cast<int>(bottom_label[i])],
163 |                      Dtype(kLOG_THRESHOLD));
164 |     logprob -= log(prob);
165 |   }
166 |   // LOG(INFO) << "Accuracy: " << accuracy;
167 |   (*top)[0]->mutable_cpu_data()[0] = accuracy / num;
168 |   (*top)[0]->mutable_cpu_data()[1] = logprob / num;
169 | }
170 | 
171 | template <typename Dtype>
172 | void VerificationAccuracyLayer<Dtype>::SetUp(
173 |   const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
174 |   CHECK_EQ(bottom.size(), 4) << "VerificationAccuracyLayer takes four blobs as input.";
175 |   CHECK_EQ(top->size(), 1) << "VerificationAccuracy Layer takes 1 output.";
176 |   CHECK_EQ(bottom[0]->num(), bottom[1]->num())
177 |       << "The data and label should have the same number.";
178 |   //CHECK_EQ(bottom[1]->channels(), 1);
179 |   CHECK_EQ(bottom[1]->height(), 1);
180 |   CHECK_EQ(bottom[1]->width(), 1);
181 |   (*top)[0]->Reshape(1, 2, 1, 1);
182 |   diffy_.Reshape(bottom[0]->num(), bottom[0]->channels(), 1, 1);
183 |   M_ = this->layer_param_.dual_threshold();
184 |   LOG(INFO) << "Initial: " << M_;
185 | }
186 | 
187 | template <typename Dtype>
188 | void VerificationAccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
189 |     vector<Blob<Dtype>*>* top) {
190 |   Dtype accuracy = 0;
191 |   Dtype logprob = 0;
192 |   const Dtype* bottom_data1 = bottom[0]->cpu_data();
193 |   const Dtype* bottom_label1 = bottom[1]->cpu_data();
194 |   const Dtype* bottom_data2 = bottom[2]->cpu_data();
195 |   const Dtype* bottom_label2 = bottom[3]->cpu_data();
196 |   int num = bottom[0]->num();
197 |   int dim = bottom[0]->count() / bottom[0]->num();
198 | 
199 |   int count = bottom[0]->count();
200 |   Dtype* diffy = diffy_.mutable_cpu_data();
201 |   caffe_sub(count, bottom_data1, bottom_data2, diffy);
202 | 
203 |   Dtype M2 = M_*M_;
204 |   for (int i = 0; i < num; ++i) {
205 | 	int l1 = static_cast<int>(bottom_label1[i]);
206 | 	int l2 = static_cast<int>(bottom_label2[i]);
207 | 	int offset = i*dim;
208 | 	Dtype norm2 = caffe_cpu_dot(dim, diffy+offset, diffy+offset);
209 | 	if(l1 == l2 && norm2 <= M2)
210 | 		accuracy++;
211 | 	else if(l1 != l2 && norm2 > M2)
212 | 		accuracy++;
213 |   }
214 |   // LOG(INFO) << "Accuracy: " << accuracy;
215 |   (*top)[0]->mutable_cpu_data()[0] = accuracy / num;
216 |   (*top)[0]->mutable_cpu_data()[1] = logprob / num;
217 | }
218 | 
219 | 
220 | INSTANTIATE_CLASS(MultinomialLogisticLossLayer);
221 | INSTANTIATE_CLASS(InfogainLossLayer);
222 | INSTANTIATE_CLASS(EuclideanLossLayer);
223 | INSTANTIATE_CLASS(AccuracyLayer);
224 | INSTANTIATE_CLASS(VerificationAccuracyLayer);
225 | 
226 | }  // namespace caffe
227 | 


--------------------------------------------------------------------------------
/src/caffe/layers/conv_layer.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | #include <vector>
  4 | 
  5 | #include "caffe/layer.hpp"
  6 | #include "caffe/vision_layers.hpp"
  7 | #include "caffe/util/im2col.hpp"
  8 | #include "caffe/filler.hpp"
  9 | #include "caffe/util/math_functions.hpp"
 10 | 
 11 | namespace caffe {
 12 | 
 13 | template <typename Dtype>
 14 | void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
 15 |       vector<Blob<Dtype>*>* top) {
 16 |   CHECK_EQ(bottom.size(), 1) << "Conv Layer takes a single blob as input.";
 17 |   CHECK_EQ(top->size(), 1) << "Conv Layer takes a single blob as output.";
 18 |   KSIZE_ = this->layer_param_.kernelsize();
 19 |   STRIDE_ = this->layer_param_.stride();
 20 |   GROUP_ = this->layer_param_.group();
 21 |   PAD_ = this->layer_param_.pad();
 22 |   NUM_ = bottom[0]->num();
 23 |   CHANNELS_ = bottom[0]->channels();
 24 |   HEIGHT_ = bottom[0]->height();
 25 |   WIDTH_ = bottom[0]->width();
 26 |   NTILE_WIDTH_ = this->layer_param_.ntile_width();
 27 |   NTILE_HEIGHT_ = this->layer_param_.ntile_height();
 28 |   NUM_OUTPUT_ = this->layer_param_.num_output();
 29 |   CHECK_GT(NUM_OUTPUT_, 0);
 30 |   CHECK_EQ(CHANNELS_ % GROUP_, 0);
 31 |   // The im2col result buffer would only hold one image at a time to avoid
 32 |   // overly large memory usage.
 33 |   int height_out = (HEIGHT_ + 2 * PAD_ - KSIZE_) / STRIDE_ + 1;
 34 |   int width_out = (WIDTH_ + 2 * PAD_ - KSIZE_) / STRIDE_ + 1;
 35 | 
 36 |   CHECK(height_out % NTILE_HEIGHT_ == 0);
 37 |   CHECK(width_out % NTILE_WIDTH_ == 0);
 38 |   TILE_WIDTH_ = width_out / NTILE_WIDTH_;
 39 |   TILE_HEIGHT_ = height_out / NTILE_HEIGHT_;
 40 | 
 41 |   col_buffer_.Reshape(1, CHANNELS_ * KSIZE_ * KSIZE_, TILE_HEIGHT_, TILE_WIDTH_);
 42 |   out_buffer_.Reshape(1, NUM_OUTPUT_, TILE_HEIGHT_, TILE_WIDTH_);
 43 |   // Set the parameters
 44 |   CHECK_EQ(NUM_OUTPUT_ % GROUP_, 0)
 45 |       << "Number of output should be multiples of group.";
 46 |   biasterm_ = this->layer_param_.biasterm();
 47 |   // Figure out the dimensions for individual gemms.
 48 |   M_ = NUM_OUTPUT_ / GROUP_;
 49 |   K_ = CHANNELS_ * KSIZE_ * KSIZE_ / GROUP_;
 50 |   N_ = TILE_WIDTH_ * TILE_HEIGHT_;
 51 |   (*top)[0]->Reshape(bottom[0]->num(), NUM_OUTPUT_, height_out, width_out);
 52 |   int ntiles = NTILE_WIDTH_ * NTILE_HEIGHT_;
 53 |   // Check if we need to set up the weights
 54 |   if (this->blobs_.size() > 0) {
 55 |     LOG(INFO) << "Skipping parameter initialization";
 56 |   } else {
 57 |     if (biasterm_) {
 58 |       this->blobs_.resize(2*ntiles);
 59 |     } else {
 60 |       this->blobs_.resize(1*ntiles);
 61 |     }
 62 |     // Intialize the weight
 63 |     for(int i = 0; i < ntiles; i++) {
 64 | 	    this->blobs_[i].reset(
 65 | 		new Blob<Dtype>(NUM_OUTPUT_, CHANNELS_ / GROUP_, KSIZE_, KSIZE_));
 66 | 	    // fill the weights
 67 | 	    shared_ptr<Filler<Dtype> > weight_filler(
 68 | 		GetFiller<Dtype>(this->layer_param_.weight_filler()));
 69 | 	    weight_filler->Fill(this->blobs_[i].get());
 70 | 	    // If necessary, intiialize and fill the bias term
 71 | 	    if (biasterm_) {
 72 | 	      this->blobs_[ntiles+i].reset(new Blob<Dtype>(1, 1, 1, NUM_OUTPUT_));
 73 | 	      shared_ptr<Filler<Dtype> > bias_filler(
 74 | 		  GetFiller<Dtype>(this->layer_param_.bias_filler()));
 75 | 	      bias_filler->Fill(this->blobs_[ntiles+i].get());
 76 | 	    }
 77 |     }
 78 |   }
 79 |   // Set up the bias filler
 80 |   if (biasterm_) {
 81 |     bias_multiplier_.reset(new SyncedMemory(N_ * sizeof(Dtype)));
 82 |     Dtype* bias_multiplier_data =
 83 |         reinterpret_cast<Dtype*>(bias_multiplier_->mutable_cpu_data());
 84 |     for (int i = 0; i < N_; ++i) {
 85 |         bias_multiplier_data[i] = 1.;
 86 |     }
 87 |   }
 88 | }
 89 | 
 90 | 
 91 | template <typename Dtype>
 92 | void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 93 |       vector<Blob<Dtype>*>* top) {
 94 |   const Dtype* bottom_data = bottom[0]->cpu_data();
 95 |   Dtype* top_data = (*top)[0]->mutable_cpu_data();
 96 |   Dtype* col_data = col_buffer_.mutable_cpu_data();
 97 |   if(NTILE_WIDTH_ * NTILE_HEIGHT_ <= 1){
 98 | 	  const Dtype* weight = this->blobs_[0]->cpu_data();
 99 | 	  int weight_offset = M_ * K_;
100 | 	  int col_offset = K_ * N_;
101 | 	  int top_offset = M_ * N_;
102 | 	  for (int n = 0; n < NUM_; ++n) {
103 | 		  // First, im2col
104 | 		  im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
105 | 				  WIDTH_, KSIZE_, PAD_, STRIDE_, col_data);
106 | 		  // Second, innerproduct with groups
107 | 		  for (int g = 0; g < GROUP_; ++g) {
108 | 			  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
109 | 					  (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
110 | 					  (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
111 | 		  }
112 | 		  // third, add bias
113 | 		  if (biasterm_) {
114 | 			  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
115 | 					  N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(),
116 | 					  reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
117 | 					  (Dtype)1., top_data + (*top)[0]->offset(n));
118 | 		  }
119 | 	  }
120 |   }else{
121 | 	  //NOT_IMPLEMENTED;
122 | 	  CHECK_EQ(STRIDE_, 1);
123 | 	  CHECK_EQ(PAD_, 0);
124 | 	  CHECK_EQ(GROUP_, 1);
125 | 	  CHECK_EQ(col_buffer_.height(), TILE_HEIGHT_);
126 | 	  Dtype *out_buffer = out_buffer_.mutable_cpu_data();
127 | 	  for (int n = 0; n < NUM_; ++n) {
128 | 		  for(int ny = 0; ny < NTILE_HEIGHT_; ny++){
129 | 			  for(int nx = 0; nx < NTILE_WIDTH_; nx++){
130 | 				  int idx = ny * NTILE_WIDTH_ + nx;
131 | 				  const Dtype* weight = this->blobs_[idx]->cpu_data();
132 | 				  const Dtype * img = bottom_data + bottom[0]->offset(n, 0,
133 | 						  TILE_HEIGHT_ * ny, TILE_WIDTH_ * nx);
134 | 				  im2col_tile_cpu(img,   CHANNELS_, HEIGHT_,
135 | 						  WIDTH_, KSIZE_, col_data,
136 | 						  TILE_HEIGHT_, TILE_WIDTH_);
137 | 				  //dump(&col_buffer_);
138 | 				  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
139 | 						  (Dtype)1., weight, col_data, (Dtype)0., out_buffer);
140 | 				  if (biasterm_) {
141 | 					  const Dtype *bias_ptr = this->blobs_[idx + NTILE_WIDTH_ *
142 | 						  NTILE_HEIGHT_]->cpu_data();
143 | 					  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
144 | 							  N_, 1, (Dtype)1., bias_ptr,
145 | 							  reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
146 | 							  (Dtype)1., out_buffer);
147 | 				  }
148 | 				  //dump(&out_buffer_);
149 | 				  /* copy back */
150 | 
151 | 				  int height_out = HEIGHT_ - KSIZE_ + 1;
152 | 				  int width_out = WIDTH_ - KSIZE_ + 1;
153 | 				  copy_stride_cpu(out_buffer, NUM_OUTPUT_, TILE_HEIGHT_, TILE_WIDTH_,
154 | 						  top_data + (*top)[0]->offset(n, 0, TILE_HEIGHT_*ny,
155 | 							  TILE_WIDTH_*nx), height_out, width_out);
156 | 
157 | 			  }
158 | 		  }
159 | 	  }/* n */
160 | 
161 |   }
162 | }
163 | 
164 | template <typename Dtype>
165 | Dtype ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
166 | 		const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
167 | 	const Dtype* top_diff = top[0]->cpu_diff();
168 | 	const Dtype* weight = this->blobs_[0]->cpu_data();
169 | 	Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
170 | 	const Dtype* bottom_data = (*bottom)[0]->cpu_data();
171 | 	Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
172 | 	Dtype* col_data = col_buffer_.mutable_cpu_data();
173 | 	Dtype* col_diff = col_buffer_.mutable_cpu_diff();
174 | 	// bias gradient if necessary
175 | 	Dtype* bias_diff = NULL;
176 | 
177 | 	int ntiles = NTILE_WIDTH_ * NTILE_HEIGHT_;
178 | 	if(ntiles <= 1){
179 | 		if (biasterm_) {
180 | 			bias_diff = this->blobs_[1]->mutable_cpu_diff();
181 | 			memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count());
182 | 			for (int n = 0; n < NUM_; ++n) {
183 | 				caffe_cpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
184 | 						1., top_diff + top[0]->offset(n),
185 | 						reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), 1.,
186 | 						bias_diff);
187 | 			}
188 | 		}
189 | 
190 | 		int weight_offset = M_ * K_;
191 | 		int col_offset = K_ * N_;
192 | 		int top_offset = M_ * N_;
193 | 		memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count());
194 | 		for (int n = 0; n < NUM_; ++n) {
195 | 			// since we saved memory in the forward pass by not storing all col data,
196 | 			// we will need to recompute them.
197 | 			im2col_cpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
198 | 					WIDTH_, KSIZE_, PAD_, STRIDE_, col_data);
199 | 			// gradient w.r.t. weight. Note that we will accumulate diffs.
200 | 			for (int g = 0; g < GROUP_; ++g) {
201 | 				caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
202 | 						(Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
203 | 						col_data + col_offset * g, (Dtype)1.,
204 | 						weight_diff + weight_offset * g);
205 | 			}
206 | 			// gradient w.r.t. bottom data, if necessary
207 | 			if (propagate_down) {
208 | 				for (int g = 0; g < GROUP_; ++g) {
209 | 					caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
210 | 							(Dtype)1., weight + weight_offset * g,
211 | 							top_diff + top[0]->offset(n) + top_offset * g,
212 | 							(Dtype)0., col_diff + col_offset * g);
213 | 				}
214 | 				// col2im back to the data
215 | 				col2im_cpu(col_diff, CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, PAD_, STRIDE_,
216 | 						bottom_diff + (*bottom)[0]->offset(n));
217 | 			}
218 | 		}
219 | 	}else{
220 | 		NOT_IMPLEMENTED;
221 | 	}
222 | 	return Dtype(0.);
223 | }
224 | 
225 | INSTANTIATE_CLASS(ConvolutionLayer);
226 | 
227 | }  // namespace caffe
228 | 


--------------------------------------------------------------------------------
/src/caffe/proto/caffe.proto:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | package caffe;
  4 | 
  5 | message BlobProto {
  6 |   optional int32 num = 1 [default = 0];
  7 |   optional int32 channels = 2 [default = 0];
  8 |   optional int32 height = 3 [default = 0];
  9 |   optional int32 width = 4 [default = 0];
 10 |   repeated float data = 5 [packed=true];
 11 |   repeated float diff = 6 [packed=true];
 12 | }
 13 | 
 14 | // The BlobProtoVector is simply a way to pass multiple blobproto instances
 15 | // around.
 16 | message BlobProtoVector {
 17 |   repeated BlobProto blobs = 1;
 18 | }
 19 | 
 20 | message Datum {
 21 |   optional int32 channels = 1;
 22 |   optional int32 height = 2;
 23 |   optional int32 width = 3;
 24 |   // the actual image data, in bytes
 25 |   optional bytes data = 4;
 26 |   optional int32 label = 5;
 27 |   // Optionally, the datum could also hold float data.
 28 |   repeated float float_data = 6;
 29 | }
 30 | 
 31 | message FillerParameter {
 32 |   // The filler type.
 33 |   optional string type = 1 [default = 'constant'];
 34 |   optional float value = 2 [default = 0]; // the value in constant filler
 35 |   optional float min = 3 [default = 0]; // the min value in uniform filler
 36 |   optional float max = 4 [default = 1]; // the max value in uniform filler
 37 |   optional float mean = 5 [default = 0]; // the mean value in gaussian filler
 38 |   optional float std = 6 [default = 1]; // the std value in gaussian filler
 39 | }
 40 | 
 41 | message LayerParameter {
 42 |   optional string name = 1; // the layer name
 43 |   optional string type = 2; // the string to specify the layer type
 44 | 
 45 |   // Parameters to specify layers with inner products.
 46 |   optional uint32 num_output = 3; // The number of outputs for the layer
 47 |   optional bool biasterm = 4 [default = true]; // whether to have bias terms
 48 |   optional FillerParameter weight_filler = 5; // The filler for the weight
 49 |   optional FillerParameter bias_filler = 6; // The filler for the bias
 50 | 
 51 |   optional uint32 pad = 7 [default = 0]; // The padding size
 52 |   optional uint32 kernelsize = 8; // The kernel size
 53 |   optional uint32 group = 9 [default = 1]; // The group size for group conv
 54 |   optional uint32 stride = 10 [default = 1]; // The stride
 55 |   enum PoolMethod {
 56 |     MAX = 0;
 57 |     AVE = 1;
 58 |     STOCHASTIC = 2;
 59 |   }
 60 |   optional PoolMethod pool = 11 [default = MAX]; // The pooling method
 61 |   optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
 62 | 
 63 |   optional uint32 local_size = 13 [default = 5]; // for local response norm
 64 |   optional float alpha = 14 [default = 1.]; // for local response norm
 65 |   optional float beta = 15 [default = 0.75]; // for local response norm
 66 | 
 67 |   // For data layers, specify the data source
 68 |   optional string source = 16;
 69 |   // For data pre-processing, we can do simple scaling and subtracting the
 70 |   // data mean, if provided. Note that the mean subtraction is always carried
 71 |   // out before scaling.
 72 |   optional float scale = 17 [ default = 1 ];
 73 |   optional string meanfile = 18;
 74 |   // For data layers, specify the batch size.
 75 |   optional uint32 batchsize = 19;
 76 |   // For data layers, specify if we would like to randomly crop an image.
 77 |   optional uint32 cropsize = 20 [default = 0];
 78 |   // For data layers, specify if we want to randomly mirror data.
 79 |   optional bool mirror = 21 [default = false];
 80 | 
 81 |   // The blobs containing the numeric parameters of the layer
 82 |   repeated BlobProto blobs = 50;
 83 |   // The ratio that is multiplied on the global learning rate. If you want to
 84 |   // set the learning ratio for one blob, you need to set it for all blobs.
 85 |   repeated float blobs_lr = 51;
 86 |   // The weight decay that is multiplied on the global weight decay.
 87 |   repeated float weight_decay = 52;
 88 | 
 89 |   // The rand_skip variable is for the data layer to skip a few data points
 90 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
 91 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
 92 |   // be larger than the number of keys in the leveldb.
 93 |   optional uint32 rand_skip = 53 [ default = 0 ];
 94 | 
 95 |   // Fields related to detection (det_*)
 96 |   // foreground (object) overlap threshold
 97 |   optional float det_fg_threshold = 54 [default = 0.5];
 98 |   // background (non-object) overlap threshold
 99 |   optional float det_bg_threshold = 55 [default = 0.5];
100 |   // Fraction of batch that should be foreground objects
101 |   optional float det_fg_fraction = 56 [default = 0.25];
102 | 
103 |   // optional bool OBSOLETE_can_clobber = 57 [ default = true ];
104 | 
105 |   // Amount of contextual padding to add around a window
106 |   // (used only by the window_data_layer)
107 |   optional uint32 det_context_pad = 58 [default = 0];
108 | 
109 |   // Mode for cropping out a detection window
110 |   // warp: cropped window is warped to a fixed size and aspect ratio
111 |   // square: the tightest square around the window is cropped
112 |   optional string det_crop_mode = 59 [default = "warp"];
113 | 
114 |   // For ReshapeLayer, one needs to specify the new dimensions.
115 |   optional int32 new_num = 60 [default = 0];
116 |   optional int32 new_channels = 61 [default = 0];
117 |   optional int32 new_height = 62 [default = 0];
118 |   optional int32 new_width = 63 [default = 0];
119 | 
120 |   // Whether or not ImageLayer should shuffle the list of files at every epoch.
121 |   // It will also resize images if new_height or new_width are not zero.
122 |   optional bool shuffle_images = 64 [default = false];
123 | 
124 |   // For ConcatLayer, one needs to specify the dimension for concatenation, and
125 |   // the other dimensions must be the same for all the bottom blobs.
126 |   // By default it will concatenate blobs along the channels dimension.
127 |   optional uint32 concat_dim = 65 [default = 1];
128 | 
129 |   optional string source_list = 66;
130 |   optional bool share_data = 67 [default = false];
131 |   optional uint32 data_count = 68;
132 | 
133 |   optional float dual_lamda = 69 [default = 16];
134 |   optional float dual_threshold = 70 [default = 0];
135 |   optional float bias = 71[default = 0];
136 | 
137 |   optional int32 ntile_width = 72 [default = 1];
138 |   optional int32 ntile_height = 73 [default = 1];
139 | }
140 | 
141 | message LayerConnection {
142 |   optional LayerParameter layer = 1; // the layer parameter
143 |   repeated string bottom = 2; // the name of the bottom blobs
144 |   repeated string top = 3; // the name of the top blobs
145 | }
146 | 
147 | message NetParameter {
148 |   optional string name = 1; // consider giving the network a name
149 |   repeated LayerConnection layers = 2; // a bunch of layers.
150 |   // The input blobs to the network.
151 |   repeated string input = 3;
152 |   // The dim of the input blobs. For each input blob there should be four
153 |   // values specifying the num, channels, height and width of the input blob.
154 |   // Thus, there should be a total of (4 * #input) numbers.
155 |   repeated int32 input_dim = 4;
156 |   // Whether the network will force every layer to carry out backward operation.
157 |   // If set False, then whether to carry out backward is determined
158 |   // automatically according to the net structure and learning rates.
159 |   optional bool force_backward = 5 [ default = false ];
160 | }
161 | 
162 | message SolverParameter {
163 |   optional string train_net = 1; // The proto file for the training net.
164 |   optional string test_net = 2; // The proto file for the testing net.
165 |   // The number of iterations for each testing phase.
166 |   optional int32 test_iter = 3 [ default = 0 ];
167 |   // The number of iterations between two testing phases.
168 |   optional int32 test_interval = 4 [ default = 0 ];
169 |   optional float base_lr = 5; // The base learning rate
170 |   // the number of iterations between displaying info. If display = 0, no info
171 |   // will be displayed.
172 |   optional int32 display = 6;
173 |   optional int32 max_iter = 7; // the maximum number of iterations
174 |   optional string lr_policy = 8; // The learning rate decay policy.
175 |   optional float gamma = 9; // The parameter to compute the learning rate.
176 |   optional float power = 10; // The parameter to compute the learning rate.
177 |   optional float momentum = 11; // The momentum value.
178 |   optional float weight_decay = 12; // The weight decay.
179 |   optional int32 stepsize = 13; // the stepsize for learning rate policy "step"
180 |   optional int32 snapshot = 14 [default = 0]; // The snapshot interval
181 |   optional string snapshot_prefix = 15; // The prefix for the snapshot.
182 |   // whether to snapshot diff in the results or not. Snapshotting diff will help
183 |   // debugging but the final protocol buffer size will be much larger.
184 |   optional bool snapshot_diff = 16 [ default = false];
185 |   // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
186 |   optional int32 solver_mode = 17 [default = 1];
187 |   // the device_id will that be used in GPU mode. Use device_id=0 in default.
188 |   optional int32 device_id = 18 [default = 0];
189 | 
190 |   optional int32 update_dual_thr_interval = 19 [default = 10000];
191 |   optional int32 pretrain_iterations = 20 [default = 20000];
192 | }
193 | 
194 | // A message that stores the solver snapshots
195 | message SolverState {
196 |   optional int32 iter = 1; // The current iteration
197 |   optional string learned_net = 2; // The file that stores the learned net.
198 |   repeated BlobProto history = 3; // The history for sgd solvers
199 | 
200 |   optional float dual_thr = 4;
201 | }
202 | 


--------------------------------------------------------------------------------
/src/caffe/util/math_functions.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 Yangqing Jia
  2 | 
  3 | #include "caffe/common.hpp"
  4 | #include "caffe/util/math_functions.hpp"
  5 | 
  6 | namespace caffe {
  7 | 
  8 | template<>
  9 | void caffe_cpu_gemm<float>(const CBLAS_TRANSPOSE TransA,
 10 |     const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
 11 |     const float alpha, const float* A, const float* B, const float beta,
 12 |     float* C) {
 13 | #ifdef USE_EIGEN
 14 | 	MAP_SMATRIX(eC, C, M, N);
 15 | 	eC *= beta;
 16 | 	if(TransA == CblasNoTrans && TransB == CblasNoTrans){
 17 | 		MAP_CONST_SMATRIX(eA, A, M, K);
 18 | 		MAP_CONST_SMATRIX(eB, B, K, N);
 19 | 		eC.noalias() += alpha * (eA * eB);
 20 | 	}else if(TransA == CblasNoTrans && TransB == CblasTrans){
 21 | 		MAP_CONST_SMATRIX(eA, A, M, K);
 22 | 		MAP_CONST_SMATRIX(eB, B, N, K);
 23 | 		eC.noalias() += alpha * (eA * eB.transpose());
 24 | 	}else if(TransA == CblasTrans && TransB == CblasNoTrans){
 25 | 		MAP_CONST_SMATRIX(eA, A, K, M);
 26 | 		MAP_CONST_SMATRIX(eB, B, K, N);
 27 | 		eC.noalias() += alpha * (eA.transpose() * eB);
 28 | 	}else{
 29 | 		MAP_CONST_SMATRIX(eA, A, K, M);
 30 | 		MAP_CONST_SMATRIX(eB, B, N, K);
 31 | 		eC.noalias() += alpha * (eA.transpose() * eB.transpose());
 32 | 	}
 33 | #else
 34 |   int lda = (TransA == CblasNoTrans) ? K : M;
 35 |   int ldb = (TransB == CblasNoTrans) ? N : K;
 36 |   cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
 37 |       ldb, beta, C, N);
 38 | #endif
 39 | }
 40 | 
 41 | template<>
 42 | void caffe_cpu_gemm<double>(const CBLAS_TRANSPOSE TransA,
 43 |     const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
 44 |     const double alpha, const double* A, const double* B, const double beta,
 45 |     double* C) {
 46 | #ifdef USE_EIGEN
 47 | 	MAP_DMATRIX(eC, C, M, N);
 48 | 	eC *= beta;
 49 | 	if(TransA == CblasNoTrans && TransB == CblasNoTrans){
 50 | 		MAP_CONST_DMATRIX(eA, A, M, K);
 51 | 		MAP_CONST_DMATRIX(eB, B, K, N);
 52 | 		eC.noalias() += alpha * (eA * eB);
 53 | 	}else if(TransA == CblasNoTrans && TransB == CblasTrans){
 54 | 		MAP_CONST_DMATRIX(eA, A, M, K);
 55 | 		MAP_CONST_DMATRIX(eB, B, N, K);
 56 | 		eC.noalias() += alpha * (eA * eB.transpose());
 57 | 	}else if(TransA == CblasTrans && TransB == CblasNoTrans){
 58 | 		MAP_CONST_DMATRIX(eA, A, K, M);
 59 | 		MAP_CONST_DMATRIX(eB, B, K, N);
 60 | 		eC.noalias() += alpha * (eA.transpose() * eB);
 61 | 	}else{
 62 | 		MAP_CONST_DMATRIX(eA, A, K, M);
 63 | 		MAP_CONST_DMATRIX(eB, B, N, K);
 64 | 		eC.noalias() += alpha * (eA.transpose() * eB.transpose());
 65 | 	}
 66 | #else
 67 |   int lda = (TransA == CblasNoTrans) ? K : M;
 68 |   int ldb = (TransB == CblasNoTrans) ? N : K;
 69 |   cblas_dgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
 70 |       ldb, beta, C, N);
 71 | #endif
 72 | }
 73 | 
 74 | #if 0
 75 | template <>
 76 | void caffe_gpu_gemm<float>(const CBLAS_TRANSPOSE TransA,
 77 |     const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
 78 |     const float alpha, const float* A, const float* B, const float beta,
 79 |     float* C) {
 80 |   // Note that cublas follows fortran order.
 81 |   int lda = (TransA == CblasNoTrans) ? K : M;
 82 |   int ldb = (TransB == CblasNoTrans) ? N : K;
 83 |   cublasOperation_t cuTransA =
 84 |       (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
 85 |   cublasOperation_t cuTransB =
 86 |       (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
 87 |   CUBLAS_CHECK(cublasSgemm(Caffe::cublas_handle(), cuTransB, cuTransA,
 88 |       N, M, K, &alpha, B, ldb, A, lda, &beta, C, N));
 89 | }
 90 | 
 91 | template <>
 92 | void caffe_gpu_gemm<double>(const CBLAS_TRANSPOSE TransA,
 93 |     const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
 94 |     const double alpha, const double* A, const double* B, const double beta,
 95 |     double* C) {
 96 |   // Note that cublas follows fortran order.
 97 |   int lda = (TransA == CblasNoTrans) ? K : M;
 98 |   int ldb = (TransB == CblasNoTrans) ? N : K;
 99 |   cublasOperation_t cuTransA =
100 |       (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
101 |   cublasOperation_t cuTransB =
102 |       (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
103 |   CUBLAS_CHECK(cublasDgemm(Caffe::cublas_handle(), cuTransB, cuTransA,
104 |       N, M, K, &alpha, B, ldb, A, lda, &beta, C, N));
105 | }
106 | #endif
107 | 
108 | template <>
109 | void caffe_cpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
110 |     const int N, const float alpha, const float* A, const float* x,
111 |     const float beta, float* y) {
112 | #ifdef USE_EIGEN
113 | 	MAP_CONST_SMATRIX(eA, A, M, N);
114 | 	if(TransA == CblasNoTrans){
115 | 		MAP_SVECTOR(eY, y, M);
116 | 		eY *= beta;
117 | 		MAP_CONST_SVECTOR(eX, x, N);
118 | 		eY.noalias() += alpha * (eA * eX);
119 | 	}else{
120 | 		MAP_SVECTOR(eY, y, N);
121 | 		eY *= beta;
122 | 		MAP_CONST_SVECTOR(eX, x, M);
123 | 		eY.noalias() += alpha * (eA.transpose() * eX);
124 | 	}
125 | #else
126 |   cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);
127 | #endif
128 | }
129 | 
130 | template <>
131 | void caffe_cpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
132 |     const int N, const double alpha, const double* A, const double* x,
133 |     const double beta, double* y) {
134 | #ifdef USE_EIGEN
135 | 	MAP_CONST_DMATRIX(eA, A, M, N);
136 | 	if(TransA == CblasNoTrans){
137 | 		MAP_DVECTOR(eY, y, M);
138 | 		eY *= beta;
139 | 		MAP_CONST_DVECTOR(eX, x, N);
140 | 		eY.noalias() += alpha * (eA * eX);
141 | 	}else{
142 | 		MAP_DVECTOR(eY, y, N);
143 | 		eY *= beta;
144 | 		MAP_CONST_DVECTOR(eX, x, M);
145 | 		eY.noalias() += alpha * (eA.transpose() * eX);
146 | 	}
147 | #else
148 |  cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);
149 | #endif
150 | }
151 | 
152 | #if 0
153 | template <>
154 | void caffe_gpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
155 |     const int N, const float alpha, const float* A, const float* x,
156 |     const float beta, float* y) {
157 |   cublasOperation_t cuTransA =
158 |       (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
159 |   CUBLAS_CHECK(cublasSgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha,
160 |       A, N, x, 1, &beta, y, 1));
161 | }
162 | 
163 | template <>
164 | void caffe_gpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
165 |     const int N, const double alpha, const double* A, const double* x,
166 |     const double beta, double* y) {
167 |   cublasOperation_t cuTransA =
168 |       (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
169 |   CUBLAS_CHECK(cublasDgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha,
170 |       A, N, x, 1, &beta, y, 1));
171 | }
172 | #endif
173 | 
174 | template <>
175 | void caffe_axpy<float>(const int N, const float alpha, const float* X,
176 |     float* Y) { 
177 | #ifdef USE_EIGEN
178 | 	MAP_SVECTOR(eY, Y, N);
179 | 	MAP_CONST_SVECTOR(eX, X, N);
180 | 	eY = alpha * eX + eY;
181 | #else
182 | 	cblas_saxpy(N, alpha, X, 1, Y, 1); 
183 | #endif
184 | }
185 | 
186 | template <>
187 | void caffe_axpy<double>(const int N, const double alpha, const double* X,
188 |     double* Y) 
189 | {
190 | #ifdef USE_EIGEN
191 | 	MAP_DVECTOR(eY, Y, N);
192 | 	MAP_CONST_DVECTOR(eX, X, N);
193 | 	eY = alpha * eX + eY;
194 | #else
195 | 	cblas_daxpy(N, alpha, X, 1, Y, 1); 
196 | #endif
197 | }
198 | 
199 | 
200 | #if 0
201 | template <>
202 | void caffe_gpu_axpy<float>(const int N, const float alpha, const float* X,
203 |     float* Y) {
204 |   CUBLAS_CHECK(cublasSaxpy(Caffe::cublas_handle(), N, &alpha, X, 1, Y, 1));
205 | }
206 | 
207 | template <>
208 | void caffe_gpu_axpy<double>(const int N, const double alpha, const double* X,
209 |     double* Y) {
210 |   CUBLAS_CHECK(cublasDaxpy(Caffe::cublas_handle(), N, &alpha, X, 1, Y, 1));
211 | }
212 | #endif
213 | 
214 | template <>
215 | void caffe_axpby<float>(const int N, const float alpha, const float* X,
216 |     const float beta, float* Y) {
217 | #ifdef USE_EIGEN
218 | 	MAP_SVECTOR(eY, Y, N);
219 | 	MAP_CONST_SVECTOR(eX, X, N);
220 | 	eY = alpha * eX + beta * eY;
221 | #else
222 |   cblas_saxpby(N, alpha, X, 1, beta, Y, 1);
223 | #endif
224 | }
225 | 
226 | template <>
227 | void caffe_axpby<double>(const int N, const double alpha, const double* X,
228 |     const double beta, double* Y) {
229 | #ifdef USE_EIGEN
230 | 	MAP_DVECTOR(eY, Y, N);
231 | 	MAP_CONST_DVECTOR(eX, X, N);
232 | 	eY = alpha * eX + beta * eY;
233 | #else
234 |   cblas_daxpby(N, alpha, X, 1, beta, Y, 1);
235 | #endif
236 | }
237 | 
238 | template <>
239 | void caffe_copy<float>(const int N, const float* X, float* Y) {
240 | #ifdef USE_EIGEN
241 | 	memcpy(Y, X, sizeof(float)*N);
242 | #else
243 |   cblas_scopy(N, X, 1, Y, 1);
244 | #endif
245 | }
246 | 
247 | template <>
248 | void caffe_copy<double>(const int N, const double* X, double* Y) {
249 | #ifdef USE_EIGEN
250 | 	memcpy(Y, X, sizeof(double)*N);
251 | #else
252 |   cblas_dcopy(N, X, 1, Y, 1);
253 | #endif
254 | }
255 | 
256 | #if 0
257 | template <>
258 | void caffe_gpu_copy<float>(const int N, const float* X, float* Y) {
259 |   CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), N, X, 1, Y, 1));
260 | }
261 | 
262 | template <>
263 | void caffe_gpu_copy<double>(const int N, const double* X, double* Y) {
264 |   CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), N, X, 1, Y, 1));
265 | }
266 | #endif
267 | 
268 | template <>
269 | void caffe_scal<float>(const int N, const float alpha, float *X) {
270 | #ifdef USE_EIGEN
271 | 	MAP_SVECTOR(eX, X, N);
272 | 	eX *= alpha;
273 | #else
274 |   cblas_sscal(N, alpha, X, 1);
275 | #endif
276 | }
277 | 
278 | template <>
279 | void caffe_scal<double>(const int N, const double alpha, double *X) {
280 | #ifdef USE_EIGEN
281 | 	MAP_DVECTOR(eX, X, N);
282 | 	eX *= alpha;
283 | #else
284 |   cblas_dscal(N, alpha, X, 1);
285 | #endif
286 | }
287 | 
288 | #if 0
289 | template <>
290 | void caffe_gpu_scal<float>(const int N, const float alpha, float *X) {
291 |   CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), N, &alpha, X, 1));
292 | }
293 | 
294 | template <>
295 | void caffe_gpu_scal<double>(const int N, const double alpha, double *X) {
296 |   CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), N, &alpha, X, 1));
297 | }
298 | 
299 | template <>
300 | void caffe_gpu_axpby<float>(const int N, const float alpha, const float* X,
301 |     const float beta, float* Y) {
302 |   caffe_gpu_scal<float>(N, beta, Y);
303 |   caffe_gpu_axpy<float>(N, alpha, X, Y);
304 | }
305 | 
306 | template <>
307 | void caffe_gpu_axpby<double>(const int N, const double alpha, const double* X,
308 |     const double beta, double* Y) {
309 |   caffe_gpu_scal<double>(N, beta, Y);
310 |   caffe_gpu_axpy<double>(N, alpha, X, Y);
311 | }
312 | #endif
313 | 
314 | template <>
315 | void caffe_sqr<float>(const int n, const float* a, float* y) {
316 |   vsSqr(n, a, y);
317 | }
318 | 
319 | template <>
320 | void caffe_sqr<double>(const int n, const double* a, double* y) {
321 |   vdSqr(n, a, y);
322 | }
323 | 
324 | template <>
325 | void caffe_add<float>(const int n, const float* a, const float* b,
326 |     float* y) { vsAdd(n, a, b, y); }
327 | 
328 | template <>
329 | void caffe_add<double>(const int n, const double* a, const double* b,
330 |     double* y) { vdAdd(n, a, b, y); }
331 | 
332 | template <>
333 | void caffe_sub<float>(const int n, const float* a, const float* b,
334 |     float* y) { vsSub(n, a, b, y); }
335 | 
336 | template <>
337 | void caffe_sub<double>(const int n, const double* a, const double* b,
338 |     double* y) { vdSub(n, a, b, y); }
339 | 
340 | template <>
341 | void caffe_mul<float>(const int n, const float* a, const float* b,
342 |     float* y) { vsMul(n, a, b, y); }
343 | 
344 | template <>
345 | void caffe_mul<double>(const int n, const double* a, const double* b,
346 |     double* y) { vdMul(n, a, b, y); }
347 | 
348 | template <>
349 | void caffe_div<float>(const int n, const float* a, const float* b,
350 |     float* y) { vsDiv(n, a, b, y); }
351 | 
352 | template <>
353 | void caffe_div<double>(const int n, const double* a, const double* b,
354 |     double* y) { vdDiv(n, a, b, y); }
355 | 
356 | template <>
357 | void caffe_powx<float>(const int n, const float* a, const float b,
358 |     float* y) { vsPowx(n, a, b, y); }
359 | 
360 | template <>
361 | void caffe_powx<double>(const int n, const double* a, const double b,
362 |     double* y) { vdPowx(n, a, b, y); }
363 | 
364 | #if 0
365 | template <>
366 | void caffe_vRngUniform<float>(const int n, float* r,
367 |     const float a, const float b) {
368 |   VSL_CHECK(vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, Caffe::vsl_stream(),
369 |       n, r, a, b));
370 | }
371 | 
372 | template <>
373 | void caffe_vRngUniform<double>(const int n, double* r,
374 |     const double a, const double b) {
375 |   VSL_CHECK(vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, Caffe::vsl_stream(),
376 |       n, r, a, b));
377 | }
378 | 
379 | template <>
380 | void caffe_vRngGaussian<float>(const int n, float* r, const float a,
381 |     const float sigma) {
382 |   VSL_CHECK(vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER,
383 |       Caffe::vsl_stream(), n, r, a, sigma));
384 | }
385 | 
386 | 
387 | template <>
388 | void caffe_vRngGaussian<double>(const int n, double* r, const double a,
389 |     const double sigma) {
390 |   VSL_CHECK(vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER,
391 |       Caffe::vsl_stream(), n, r, a, sigma));
392 | }
393 | #endif
394 | 
395 | template <>
396 | void caffe_exp<float>(const int n, const float* a, float* y) {
397 |   vsExp(n, a, y);
398 | }
399 | 
400 | template <>
401 | void caffe_exp<double>(const int n, const double* a, double* y) {
402 |   vdExp(n, a, y);
403 | }
404 | 
405 | template <>
406 | float caffe_cpu_dot<float>(const int n, const float* x, const float* y) {
407 | #ifdef USE_EIGEN
408 | 	MAP_CONST_SVECTOR(eX, x, n);
409 | 	MAP_CONST_SVECTOR(eY, y, n);
410 | 	return eX.dot(eY);
411 | #else
412 |   return cblas_sdot(n, x, 1, y, 1);
413 | #endif
414 | }
415 | 
416 | template <>
417 | double caffe_cpu_dot<double>(const int n, const double* x, const double* y) {
418 | #ifdef USE_EIGEN
419 | 	MAP_CONST_DVECTOR(eX, x, n);
420 | 	MAP_CONST_DVECTOR(eY, y, n);
421 | 	return eX.dot(eY);
422 | #else
423 |   return cblas_ddot(n, x, 1, y, 1);
424 | #endif
425 | }
426 | 
427 | #if 0 
428 | template <>
429 | void caffe_gpu_dot<float>(const int n, const float* x, const float* y,
430 |     float* out) {
431 |   CUBLAS_CHECK(cublasSdot(Caffe::cublas_handle(), n, x, 1, y, 1, out));
432 | }
433 | 
434 | template <>
435 | void caffe_gpu_dot<double>(const int n, const double* x, const double* y,
436 |     double * out) {
437 |   CUBLAS_CHECK(cublasDdot(Caffe::cublas_handle(), n, x, 1, y, 1, out));
438 | }
439 | #endif
440 | 
441 | }  // namespace caffe
442 | 


--------------------------------------------------------------------------------
/src/caffe/net.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright Yangqing Jia 2013
  2 | 
  3 | #include <map>
  4 | #include <set>
  5 | #include <string>
  6 | #include <vector>
  7 | 
  8 | #include "caffe/proto/caffe.pb.h"
  9 | #include "caffe/layer.hpp"
 10 | #include "caffe/net.hpp"
 11 | #include "caffe/util/io.hpp"
 12 | #include "caffe/util/insert_splits.hpp"
 13 | 
 14 | using std::pair;
 15 | using std::map;
 16 | using std::set;
 17 | 
 18 | namespace caffe {
 19 | 
 20 | template <typename Dtype>
 21 | Net<Dtype>::Net(const NetParameter& param) {
 22 |   Init(param);
 23 | }
 24 | 
 25 | template <typename Dtype>
 26 | Net<Dtype>::Net(const string& param_file) {
 27 |   NetParameter param;
 28 |   ReadProtoFromTextFile(param_file, &param);
 29 |   Init(param);
 30 | }
 31 | 
 32 | template <typename Dtype>
 33 | void Net<Dtype>::Init(const NetParameter& in_param) {
 34 |   // Create a copy of in_param with splits added where necessary.
 35 |   NetParameter param;
 36 |   insert_splits(in_param, &param);
 37 |   // Basically, build all the layers and set up its connections.
 38 |   name_ = param.name();
 39 |   map<string, int> blob_name_to_idx;
 40 |   set<string> available_blobs;
 41 |   int num_layers = param.layers_size();
 42 |   CHECK_EQ(param.input_size() * 4, param.input_dim_size())
 43 |       << "Incorrect bottom blob dimension specifications.";
 44 |   size_t memory_used = 0;
 45 |   // set the input blobs
 46 |   for (int i = 0; i < param.input_size(); ++i) {
 47 |     const string& blob_name = param.input(i);
 48 |     shared_ptr<Blob<Dtype> > blob_pointer(
 49 |         new Blob<Dtype>(param.input_dim(i * 4),
 50 |                         param.input_dim(i * 4 + 1),
 51 |                         param.input_dim(i * 4 + 2),
 52 |                         param.input_dim(i * 4 + 3)));
 53 |     blobs_.push_back(blob_pointer);
 54 |     blob_names_.push_back(blob_name);
 55 |     blob_need_backward_.push_back(param.force_backward());
 56 |     net_input_blob_indices_.push_back(i);
 57 |     net_input_blobs_.push_back(blob_pointer.get());
 58 |     blob_name_to_idx[blob_name] = i;
 59 |     available_blobs.insert(blob_name);
 60 |     memory_used += blob_pointer->count();
 61 |   }
 62 |   DLOG(INFO) << "Memory required for Data" << memory_used*sizeof(Dtype);
 63 |   // For each layer, set up their input and output
 64 |   bottom_vecs_.resize(param.layers_size());
 65 |   top_vecs_.resize(param.layers_size());
 66 |   bottom_id_vecs_.resize(param.layers_size());
 67 |   top_id_vecs_.resize(param.layers_size());
 68 |   for (int i = 0; i < param.layers_size(); ++i) {
 69 |     bool in_place = false;
 70 |     const LayerConnection& layer_connection = param.layers(i);
 71 |     const LayerParameter& layer_param = layer_connection.layer();
 72 |     layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param)));
 73 |     layer_names_.push_back(layer_param.name());
 74 |     LOG(INFO) << "Creating Layer " << layer_param.name();
 75 |     bool need_backward = param.force_backward();
 76 |     // Figure out this layer's input and output
 77 |     for (int j = 0; j < layer_connection.bottom_size(); ++j) {
 78 |       const string& blob_name = layer_connection.bottom(j);
 79 |       const int blob_id = blob_name_to_idx[blob_name];
 80 |       if (available_blobs.find(blob_name) == available_blobs.end()) {
 81 |         LOG(FATAL) << "Unknown blob input " << blob_name <<
 82 |             " to layer" << j;
 83 |       }
 84 |       LOG(INFO) << layer_param.name() << " <- " << blob_name;
 85 |       bottom_vecs_[i].push_back(
 86 |           blobs_[blob_id].get());
 87 |       bottom_id_vecs_[i].push_back(blob_id);
 88 |       // If a blob needs backward, this layer should provide it.
 89 |       need_backward |= blob_need_backward_[blob_id];
 90 |       available_blobs.erase(blob_name);
 91 |     }
 92 |     for (int j = 0; j < layer_connection.top_size(); ++j) {
 93 |       const string& blob_name = layer_connection.top(j);
 94 |       // Check if we are doing in-place computation
 95 |       if (layer_connection.bottom_size() > j &&
 96 |           blob_name == layer_connection.bottom(j)) {
 97 |         // In-place computation
 98 |         LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)";
 99 |         in_place = true;
100 |         available_blobs.insert(blob_name);
101 |         top_vecs_[i].push_back(
102 |             blobs_[blob_name_to_idx[blob_name]].get());
103 |         top_id_vecs_[i].push_back(blob_name_to_idx[blob_name]);
104 |       } else if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) {
105 |         // If we are not doing in-place computation but has duplicated blobs,
106 |         // raise an error.
107 |         LOG(FATAL) << "Duplicate blobs produced by multiple sources.";
108 |       } else {
109 |         // Normal output.
110 |         LOG(INFO) << layer_param.name() << " -> " << blob_name;
111 |         shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
112 |         blobs_.push_back(blob_pointer);
113 |         blob_names_.push_back(blob_name);
114 |         blob_need_backward_.push_back(param.force_backward());
115 |         blob_name_to_idx[blob_name] = blob_names_.size() - 1;
116 |         available_blobs.insert(blob_name);
117 |         top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get());
118 |         top_id_vecs_[i].push_back(blob_names_.size() - 1);
119 |       }
120 |     }
121 |     // After this layer is connected, set it up.
122 |     // LOG(INFO) << "Setting up " << layer_names_[i];
123 |     layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]);
124 |     for (int topid = 0; topid < top_vecs_[i].size(); ++topid) {
125 |       LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->num() << " "
126 |           << top_vecs_[i][topid]->channels() << " "
127 |           << top_vecs_[i][topid]->height() << " "
128 |           << top_vecs_[i][topid]->width() << " ("
129 |           << top_vecs_[i][topid]->count() << ")";
130 |       if (!in_place)
131 |         memory_used += top_vecs_[i][topid]->count();
132 |     }
133 |     DLOG(INFO) << "Memory  required for Data " << memory_used*sizeof(Dtype);
134 |     int blobs_lr_size = layers_[i]->layer_param().blobs_lr_size();
135 |     CHECK(blobs_lr_size == layers_[i]->blobs().size() || blobs_lr_size == 0)
136 |         << "Incorrect blobs lr size: should be either 0 or the same as "
137 |            "the number of the layer's parameter blobs, " << blobs_lr_size << "vs. " << layers_[i]->blobs().size();
138 |     if (blobs_lr_size) {
139 |       // Check if this layer needs backward operation itself
140 |       for (int j = 0; j < blobs_lr_size; ++j) {
141 |         need_backward |= (layers_[i]->layer_param().blobs_lr(j) > 0);
142 |       }
143 |     } else if (layers_[i]->blobs().size()) {
144 |       // catch: if a layer param does not specify blobs_lr, we should assume the
145 |       // learning rate to be 1. Thus we will need to perform backward.
146 |       need_backward = true;
147 |     }
148 |     // Finally, set the backward flag
149 |     layer_need_backward_.push_back(need_backward);
150 |     if (need_backward) {
151 |       LOG(INFO) << layer_names_[i] << " needs backward computation.";
152 |       for (int j = 0; j < top_id_vecs_[i].size(); ++j) {
153 |         blob_need_backward_[top_id_vecs_[i][j]] = true;
154 |       }
155 |     } else {
156 |       LOG(INFO) << layer_names_[i] << " does not need backward computation.";
157 |     }
158 |   }
159 |   // In the end, all remaining blobs are considered output blobs.
160 |   for (set<string>::iterator it = available_blobs.begin();
161 |       it != available_blobs.end(); ++it) {
162 |     LOG(INFO) << "This network produces output " << *it;
163 |     net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
164 |   }
165 |   GetLearningRateAndWeightDecay();
166 |   LOG(INFO) << "Network initialization done.";
167 |   LOG(INFO) << "Memory required for Data " << memory_used*sizeof(Dtype);
168 | }
169 | 
170 | 
171 | template <typename Dtype>
172 | void Net<Dtype>::GetLearningRateAndWeightDecay() {
173 |   LOG(INFO) << "Collecting Learning Rate and Weight Decay.";
174 |   for (int i = 0; i < layers_.size(); ++i) {
175 |     vector<shared_ptr<Blob<Dtype> > >& layer_blobs = layers_[i]->blobs();
176 |     for (int j = 0; j < layer_blobs.size(); ++j) {
177 |       params_.push_back(layer_blobs[j]);
178 |     }
179 |     // push the learning rate mutlipliers
180 |     if (layers_[i]->layer_param().blobs_lr_size()) {
181 |       CHECK_EQ(layers_[i]->layer_param().blobs_lr_size(), layer_blobs.size());
182 |       for (int j = 0; j < layer_blobs.size(); ++j) {
183 |         float local_lr = layers_[i]->layer_param().blobs_lr(j);
184 |         CHECK_GE(local_lr, 0.);
185 |         params_lr_.push_back(local_lr);
186 |       }
187 |     } else {
188 |       for (int j = 0; j < layer_blobs.size(); ++j) {
189 |         params_lr_.push_back(1.);
190 |       }
191 |     }
192 |     // push the weight decay multipliers
193 |     if (layers_[i]->layer_param().weight_decay_size()) {
194 |       CHECK_EQ(layers_[i]->layer_param().weight_decay_size(),
195 |           layer_blobs.size());
196 |       for (int j = 0; j < layer_blobs.size(); ++j) {
197 |         float local_decay = layers_[i]->layer_param().weight_decay(j);
198 |         CHECK_GE(local_decay, 0.);
199 |         params_weight_decay_.push_back(local_decay);
200 |       }
201 |     } else {
202 |       for (int j = 0; j < layer_blobs.size(); ++j) {
203 |         params_weight_decay_.push_back(1.);
204 |       }
205 |     }
206 |   }
207 | }
208 | 
209 | template <typename Dtype>
210 | const vector<Blob<Dtype>*>& Net<Dtype>::ForwardPrefilled() {
211 |   for (int i = 0; i < layers_.size(); ++i) {
212 |     // LOG(ERROR) << "Forwarding " << layer_names_[i];
213 |     layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]);
214 |   }
215 |   return net_output_blobs_;
216 | }
217 | 
218 | template <typename Dtype>
219 | const vector<Blob<Dtype>*>& Net<Dtype>::Forward(
220 |     const vector<Blob<Dtype>*> & bottom) {
221 |   // Copy bottom to internal bottom
222 |   for (int i = 0; i < bottom.size(); ++i) {
223 |     net_input_blobs_[i]->CopyFrom(*bottom[i]);
224 |   }
225 |   return ForwardPrefilled();
226 | }
227 | 
228 | 
229 | template <typename Dtype>
230 | string Net<Dtype>::Forward(const string& input_blob_protos) {
231 |   BlobProtoVector blob_proto_vec;
232 |   if (net_input_blobs_.size()) {
233 |     blob_proto_vec.ParseFromString(input_blob_protos);
234 |     CHECK_EQ(blob_proto_vec.blobs_size(), net_input_blobs_.size())
235 |         << "Incorrect input size.";
236 |     for (int i = 0; i < blob_proto_vec.blobs_size(); ++i) {
237 |       net_input_blobs_[i]->FromProto(blob_proto_vec.blobs(i));
238 |     }
239 |   }
240 |   ForwardPrefilled();
241 |   blob_proto_vec.Clear();
242 |   for (int i = 0; i < net_output_blobs_.size(); ++i) {
243 |     net_output_blobs_[i]->ToProto(blob_proto_vec.add_blobs());
244 |   }
245 |   string output;
246 |   blob_proto_vec.SerializeToString(&output);
247 |   return output;
248 | }
249 | 
250 | 
251 | template <typename Dtype>
252 | Dtype Net<Dtype>::Backward() {
253 |   Dtype loss = 0;
254 |   for (int i = layers_.size() - 1; i >= 0; --i) {
255 |     if (layer_need_backward_[i]) {
256 |       Dtype layer_loss = layers_[i]->Backward(
257 |           top_vecs_[i], true, &bottom_vecs_[i]);
258 |       loss += layer_loss;
259 |     }
260 |   }
261 |   return loss;
262 | }
263 | 
264 | template <typename Dtype>
265 | Dtype Net<Dtype>::BackwardBetween(int layer_top, int layer_bottom)
266 | {
267 |   Dtype loss = 0;
268 |   CHECK_GE(layer_top, layer_bottom);
269 |   CHECK_LE(layer_top, layers_.size());
270 |   CHECK_GE(layer_bottom, 0);
271 |   for (int i = layer_top; i >= layer_bottom; --i) {
272 |     if (layer_need_backward_[i]) {
273 |       Dtype layer_loss = layers_[i]->Backward(
274 |           top_vecs_[i], true, &bottom_vecs_[i]);
275 |       loss += layer_loss;
276 |     }
277 |   }
278 |   return loss;
279 | }
280 | 
281 | template <typename Dtype>
282 | void Net<Dtype>::CopyLayersFrom(const Net<Dtype>& rhs, bool copy_diff)
283 | {
284 | 	CHECK_EQ(layers_.size(), rhs.layers_.size());
285 |   	for (int i = 0; i < layers_.size(); ++i) {
286 |     		vector<shared_ptr<Blob<Dtype> > >& target_blobs =
287 |         		layers_[i]->blobs();
288 |   		const shared_ptr<Layer<Dtype> >& source_layer = rhs.layers_[i];
289 |     		CHECK_EQ(target_blobs.size(), source_layer->blobs().size());
290 |     		const vector<shared_ptr<Blob<Dtype> > >& source_blobs =
291 | 			source_layer->blobs();
292 | 		for (int j = 0; j < target_blobs.size(); ++j) {
293 | 			CHECK_EQ(target_blobs[j]->num(), source_blobs[j]->num());
294 | 			CHECK_EQ(target_blobs[j]->channels(), source_blobs[j]->channels());
295 | 			CHECK_EQ(target_blobs[j]->height(), source_blobs[j]->height());
296 | 			CHECK_EQ(target_blobs[j]->width(), source_blobs[j]->width());
297 | 			target_blobs[j]->CopyFrom(*source_blobs[j], copy_diff);
298 | 		}
299 | 	}
300 | }
301 | 
302 | template <typename Dtype>
303 | void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
304 |   int num_source_layers = param.layers_size();
305 |   for (int i = 0; i < num_source_layers; ++i) {
306 |     const LayerParameter& source_layer = param.layers(i).layer();
307 |     const string& source_layer_name = source_layer.name();
308 |     int target_layer_id = 0;
309 |     while (target_layer_id != layer_names_.size() &&
310 |         layer_names_[target_layer_id] != source_layer_name) {
311 |       ++target_layer_id;
312 |     }
313 |     if (target_layer_id == layer_names_.size()) {
314 |       DLOG(INFO) << "Ignoring source layer " << source_layer_name;
315 |       continue;
316 |     }
317 |     DLOG(INFO) << "Copying source layer " << source_layer_name;
318 |     vector<shared_ptr<Blob<Dtype> > >& target_blobs =
319 |         layers_[target_layer_id]->blobs();
320 |     CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
321 |         << "Incompatible number of blobs for layer " << source_layer_name;
322 |     for (int j = 0; j < target_blobs.size(); ++j) {
323 |       CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num());
324 |       CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels());
325 |       CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height());
326 |       CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width());
327 |       target_blobs[j]->FromProto(source_layer.blobs(j));
328 |     }
329 |   }
330 | }
331 | 
332 | template <typename Dtype>
333 | void Net<Dtype>::CopyTrainedLayersFrom(const string trained_filename) {
334 |   NetParameter param;
335 |   ReadProtoFromBinaryFile(trained_filename, &param);
336 |   CopyTrainedLayersFrom(param);
337 | }
338 | 
339 | template <typename Dtype>
340 | void Net<Dtype>::ToProto(NetParameter* param, bool write_diff) {
341 |   param->Clear();
342 |   param->set_name(name_);
343 |   // Add bottom and top
344 |   for (int i = 0; i < net_input_blob_indices_.size(); ++i) {
345 |     param->add_input(blob_names_[net_input_blob_indices_[i]]);
346 |   }
347 |   DLOG(INFO) << "Serializing " << layers_.size() << " layers";
348 |   for (int i = 0; i < layers_.size(); ++i) {
349 |     LayerConnection* layer_connection = param->add_layers();
350 |     for (int j = 0; j < bottom_id_vecs_[i].size(); ++j) {
351 |       layer_connection->add_bottom(blob_names_[bottom_id_vecs_[i][j]]);
352 |     }
353 |     for (int j = 0; j < top_id_vecs_[i].size(); ++j) {
354 |       layer_connection->add_top(blob_names_[top_id_vecs_[i][j]]);
355 |     }
356 |     LayerParameter* layer_parameter = layer_connection->mutable_layer();
357 |     layers_[i]->ToProto(layer_parameter, write_diff);
358 |   }
359 | }
360 | 
361 | template <typename Dtype>
362 | void Net<Dtype>::Update() {
363 |   for (int i = 0; i < params_.size(); ++i) {
364 |     params_[i]->Update();
365 |   }
366 | }
367 | 
368 | INSTANTIATE_CLASS(Net);
369 | 
370 | }  // namespace caffe
371 | 


--------------------------------------------------------------------------------