├── src └── caffe │ ├── CMakeLists.txt │ ├── layers │ ├── neuron_layer.cpp │ ├── relu_layer.cpp │ ├── sigmoid_layer.cpp │ ├── bnll_layer.cpp │ ├── flatten_layer.cpp │ ├── im2col_layer.cpp │ ├── softmax_loss_layer.cpp │ ├── tanh_layer.cpp │ ├── dropout_layer.cpp │ ├── split_layer.cpp │ ├── raw_image_layer.cpp │ ├── padding_layer.cpp │ ├── softmax_layer.cpp │ ├── concat_layer.cpp │ ├── inner_product_layer.cpp │ ├── verification_loss.cpp │ ├── dropout_group_layer.cpp │ ├── lrn_layer.cpp │ ├── pooling_layer.cpp │ ├── loss_layer.cpp │ └── conv_layer.cpp │ ├── common.cpp │ ├── syncedmem.cpp │ ├── layer_factory.cpp │ ├── util │ ├── io.cpp │ ├── insert_splits.cpp │ ├── im2col.cpp │ └── math_functions.cpp │ ├── blob.cpp │ ├── proto │ └── caffe.proto │ └── net.cpp ├── CMakeLists.txt ├── include └── caffe │ ├── caffe.hpp │ ├── util │ ├── insert_splits.hpp │ ├── io.hpp │ ├── im2col.hpp │ ├── math_functions.hpp │ └── mkl_alternate.hpp │ ├── glog-compact.hpp │ ├── syncedmem.hpp │ ├── blob.hpp │ ├── common.hpp │ ├── layer.hpp │ ├── net.hpp │ └── filler.hpp ├── LICENSE ├── Readme.md ├── Makefile ├── .ycm_extra_conf.py ├── feat_net_raw.cpp └── align_test.cpp /src/caffe/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB_RECURSE CPP_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) 2 | file(GLOB_RECURSE CC_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cc) 3 | 4 | add_library(caffe-compact STATIC ${CPP_SOURCES} ${CC_SOURCES}) 5 | 6 | INSTALL(TARGETS caffe-compact DESTINATION lib) 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CAFFE_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include) 2 | include_directories(${CAFFE_INCLUDE_DIRS}) 3 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) 4 | 5 | add_definitions(-DUSE_EIGEN) 6 | 7 | if(NOT MSVC) 8 | SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++0x -Wno-sign-compare") 9 | SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++0x -Wno-sign-compare" ) 10 | endif() 11 | 12 | add_subdirectory(src/caffe) 13 | -------------------------------------------------------------------------------- /include/caffe/caffe.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Yangqing Jia 2013 2 | // caffe.hpp is the header file that you need to include in your code. It wraps 3 | // all the internal caffe header files into one for simpler inclusion. 4 | 5 | #ifndef CAFFE_CAFFE_HPP_ 6 | #define CAFFE_CAFFE_HPP_ 7 | 8 | #include "caffe/common.hpp" 9 | #include "caffe/blob.hpp" 10 | #include "caffe/filler.hpp" 11 | #include "caffe/layer.hpp" 12 | #include "caffe/net.hpp" 13 | #include "caffe/util/io.hpp" 14 | #include "caffe/vision_layers.hpp" 15 | 16 | #include "caffe/proto/caffe.pb.h" 17 | 18 | #endif // CAFFE_CAFFE_HPP_ 19 | -------------------------------------------------------------------------------- /src/caffe/layers/neuron_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | 5 | #include "caffe/layer.hpp" 6 | #include "caffe/vision_layers.hpp" 7 | 8 | namespace caffe { 9 | 10 | template 11 | void NeuronLayer::SetUp(const vector*>& bottom, 12 | vector*>* top) { 13 | CHECK_EQ(bottom.size(), 1) << "Neuron Layer takes a single blob as input."; 14 | CHECK_EQ(top->size(), 1) << "Neuron Layer takes a single blob as output."; 15 | // NeuronLayer allows in-place computations. If the computation is not 16 | // in-place, we will need to initialize the top blob. 17 | if ((*top)[0] != bottom[0]) { 18 | (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), 19 | bottom[0]->height(), bottom[0]->width()); 20 | } 21 | } 22 | 23 | INSTANTIATE_CLASS(NeuronLayer); 24 | 25 | } // namespace caffe 26 | -------------------------------------------------------------------------------- /include/caffe/util/insert_splits.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Jeff Donahue 2 | 3 | #ifndef _CAFFE_UTIL_INSERT_SPLITS_HPP_ 4 | #define _CAFFE_UTIL_INSERT_SPLITS_HPP_ 5 | 6 | #include 7 | 8 | #include "caffe/proto/caffe.pb.h" 9 | 10 | using std::pair; 11 | using std::string; 12 | 13 | namespace caffe { 14 | 15 | // Copy NetParameters with SplitLayers added to replace any shared bottom 16 | // blobs with unique bottom blobs provided by the SplitLayer. 17 | void insert_splits(const NetParameter& param, NetParameter* param_split); 18 | 19 | void configure_split_layer(const string& layer_name, const string& blob_name, 20 | const int blob_idx, const int split_count, 21 | LayerConnection* split_layer_connection); 22 | 23 | string get_split_layer_name(const string& layer_name, const string& blob_name, 24 | const int blob_idx); 25 | 26 | string get_split_blob_name(const string& layer_name, const string& blob_name, 27 | const int blob_idx, const int split_idx); 28 | 29 | } // namespace caffe 30 | 31 | #endif // CAFFE_UTIL_INSERT_SPLITS_HPP_ 32 | -------------------------------------------------------------------------------- /src/caffe/common.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | #include 5 | #ifndef _MSC_VER 6 | #include 7 | #endif 8 | 9 | #include "caffe/common.hpp" 10 | 11 | namespace caffe { 12 | 13 | shared_ptr Caffe::singleton_; 14 | nullstream __nullstream; 15 | bool LogMessage::enable = false; 16 | 17 | int64_t cluster_seedgen(void) { 18 | int64_t s, seed, pid; 19 | #ifdef _MSC_VER 20 | pid = 0x32423; 21 | #else 22 | pid = getpid(); 23 | #endif 24 | s = time(NULL); 25 | seed = abs(((s * 181) * ((pid - 83) * 359)) % 104729); 26 | return seed; 27 | } 28 | 29 | 30 | Caffe::Caffe() 31 | : mode_(Caffe::CPU), phase_(Caffe::TRAIN){ 32 | } 33 | 34 | Caffe::~Caffe() { 35 | } 36 | 37 | void Caffe::set_random_seed(const unsigned int seed) { 38 | // Curand seed 39 | // Yangqing's note: simply setting the generator seed does not seem to 40 | // work on the tesla K20s, so I wrote the ugly reset thing below. 41 | } 42 | 43 | void Caffe::SetDevice(const int device_id) { 44 | LOG(INFO) << "Caffe-compact only support CPU"; 45 | } 46 | 47 | void Caffe::DeviceQuery() { 48 | LOG(INFO) << "Caffe-compact only support CPU"; 49 | return; 50 | } 51 | 52 | } // namespace caffe 53 | -------------------------------------------------------------------------------- /src/caffe/layers/relu_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | #include 5 | 6 | #include "caffe/layer.hpp" 7 | #include "caffe/vision_layers.hpp" 8 | 9 | using std::max; 10 | 11 | namespace caffe { 12 | 13 | template 14 | void ReLULayer::Forward_cpu(const vector*>& bottom, 15 | vector*>* top) { 16 | const Dtype* bottom_data = bottom[0]->cpu_data(); 17 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 18 | const int count = bottom[0]->count(); 19 | for (int i = 0; i < count; ++i) { 20 | top_data[i] = max(bottom_data[i], Dtype(0)); 21 | } 22 | } 23 | 24 | template 25 | Dtype ReLULayer::Backward_cpu(const vector*>& top, 26 | const bool propagate_down, 27 | vector*>* bottom) { 28 | if (propagate_down) { 29 | const Dtype* bottom_data = (*bottom)[0]->cpu_data(); 30 | const Dtype* top_diff = top[0]->cpu_diff(); 31 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 32 | const int count = (*bottom)[0]->count(); 33 | for (int i = 0; i < count; ++i) { 34 | bottom_diff[i] = top_diff[i] * (bottom_data[i] > 0); 35 | } 36 | } 37 | return Dtype(0); 38 | } 39 | 40 | 41 | INSTANTIATE_CLASS(ReLULayer); 42 | 43 | 44 | } // namespace caffe 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, The Regents of the University of California (Regents) 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /src/caffe/layers/sigmoid_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tobias Domhan 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "caffe/layer.hpp" 8 | #include "caffe/vision_layers.hpp" 9 | 10 | namespace caffe { 11 | 12 | template 13 | inline Dtype sigmoid(Dtype x) { 14 | return 1. / (1. + exp(-x)); 15 | } 16 | 17 | template 18 | void SigmoidLayer::Forward_cpu(const vector*>& bottom, 19 | vector*>* top) { 20 | const Dtype* bottom_data = bottom[0]->cpu_data(); 21 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 22 | const int count = bottom[0]->count(); 23 | for (int i = 0; i < count; ++i) { 24 | top_data[i] = sigmoid(bottom_data[i]); 25 | } 26 | } 27 | 28 | template 29 | Dtype SigmoidLayer::Backward_cpu(const vector*>& top, 30 | const bool propagate_down, 31 | vector*>* bottom) { 32 | if (propagate_down) { 33 | const Dtype* bottom_data = (*bottom)[0]->cpu_data(); 34 | const Dtype* top_diff = top[0]->cpu_diff(); 35 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 36 | const int count = (*bottom)[0]->count(); 37 | for (int i = 0; i < count; ++i) { 38 | Dtype sigmoid_x = sigmoid(bottom_data[i]); 39 | bottom_diff[i] = top_diff[i] * sigmoid_x * (1. - sigmoid_x); 40 | } 41 | } 42 | return Dtype(0); 43 | } 44 | 45 | INSTANTIATE_CLASS(SigmoidLayer); 46 | 47 | 48 | } // namespace caffe 49 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | Caffe-compact 2 | ================== 3 | Caffe-compact aims to provide a self-contained CNN model testing library. 4 | 5 | This project remove most unnecessary dependency for CNN net testing and 6 | feature extraction. Note that we completely remove CUDA dependency in 7 | caffe-compact. 8 | 9 | Current dependency: 10 | * c++11 compiler (for shared_ptr) 11 | * google protobuf 12 | 13 | Optional dependency: 14 | * cblas (e.g. libatlas3gf-base) 15 | * Eigen3 16 | 17 | You can select an matrix backend by setting the USE_EIGEN environment in the 18 | Makefile. 19 | 20 | These dependencies can be satisfied on most platform including Windows and 21 | mobile. It makes Caffe-compact much easier to deploy. 22 | 23 | This work also avoids potential license problems along with the 24 | third-party libraris when release your caffe CNN model. 25 | 26 | Difference 27 | ================== 28 | The original project can be found at: https://github.com/BVLC/caffe 29 | Caffe-compact only support a subset of functionality of caffe: 30 | 31 | * CNN forward pass only 32 | * CPU only 33 | * Raw image input only 34 | 35 | Performance 36 | ================== 37 | MKL has performance problem when dealing with small matrix (e.g. testing your 38 | model on only one input image), especially multithreading is enabled. Atlas or 39 | other open source BLAS implementation may perform better. 40 | 41 | TODO: benchmark 42 | 43 | Future Work 44 | ================== 45 | * integrate protobuf 46 | 47 | 48 | Yuheng Chen, 2014 49 | chyh1990@gmail.com 50 | -------------------------------------------------------------------------------- /src/caffe/layers/bnll_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | #include 5 | 6 | #include "caffe/layer.hpp" 7 | #include "caffe/vision_layers.hpp" 8 | 9 | using std::min; 10 | 11 | namespace caffe { 12 | 13 | const float kBNLL_THRESHOLD = 50.; 14 | 15 | template 16 | void BNLLLayer::Forward_cpu(const vector*>& bottom, 17 | vector*>* top) { 18 | const Dtype* bottom_data = bottom[0]->cpu_data(); 19 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 20 | const int count = bottom[0]->count(); 21 | for (int i = 0; i < count; ++i) { 22 | top_data[i] = bottom_data[i] > 0 ? 23 | bottom_data[i] + log(1. + exp(-bottom_data[i])) : 24 | log(1. + exp(bottom_data[i])); 25 | } 26 | } 27 | 28 | template 29 | Dtype BNLLLayer::Backward_cpu(const vector*>& top, 30 | const bool propagate_down, 31 | vector*>* bottom) { 32 | if (propagate_down) { 33 | const Dtype* bottom_data = (*bottom)[0]->cpu_data(); 34 | const Dtype* top_diff = top[0]->cpu_diff(); 35 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 36 | const int count = (*bottom)[0]->count(); 37 | Dtype expval; 38 | for (int i = 0; i < count; ++i) { 39 | expval = exp(min(bottom_data[i], Dtype(kBNLL_THRESHOLD))); 40 | bottom_diff[i] = top_diff[i] * expval / (expval + 1.); 41 | } 42 | } 43 | return Dtype(0); 44 | } 45 | 46 | 47 | INSTANTIATE_CLASS(BNLLLayer); 48 | 49 | 50 | } // namespace caffe 51 | -------------------------------------------------------------------------------- /include/caffe/glog-compact.hpp: -------------------------------------------------------------------------------- 1 | #ifndef GLOG_COMPACT_HPP 2 | #define GLOG_COMPACT_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace caffe{ 10 | struct nullstream: std::ostream{ 11 | nullstream(): std::ostream(0){} 12 | }; 13 | 14 | template 15 | nullstream &operator<<(nullstream &o, T const & x) { return o;} 16 | extern nullstream __nullstream; 17 | 18 | class LogMessage{ 19 | std::string level; 20 | std::ostream &ofs; 21 | static bool enable; 22 | public: 23 | LogMessage(const std::string &l) 24 | :level(l), ofs(enable ? std::cerr : __nullstream){ 25 | stream() << "[" << level << "]\t"; 26 | } 27 | LogMessage(std::ostream &o) 28 | :level("ERROR"), ofs(o){ 29 | stream() << "[" << level << "]\t"; 30 | } 31 | inline std::ostream &stream(){ 32 | return ofs; 33 | } 34 | ~LogMessage() { 35 | stream() << std::endl; 36 | } 37 | 38 | static void Enable(bool _enable){ 39 | enable = _enable; 40 | } 41 | }; 42 | } 43 | 44 | #define LOG(type) caffe::LogMessage(#type).stream() 45 | #define DLOG(type) caffe::LogMessage(#type).stream() 46 | #define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO) 47 | 48 | #define CHECK(x) if(x) {} else LOG(ERROR) << #x 49 | #define DCHECK(x) CHECK(x) 50 | 51 | #define CHECK_EQ(x, y) CHECK((x) == (y)) 52 | #define CHECK_LT(x, y) CHECK((x) < (y)) 53 | #define CHECK_GT(x, y) CHECK((x) > (y)) 54 | #define CHECK_LE(x, y) CHECK((x) <= (y)) 55 | #define CHECK_GE(x, y) CHECK((x) >= (y)) 56 | #define CHECK_NE(x, y) CHECK((x) != (y)) 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /src/caffe/layers/flatten_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | 5 | #include "caffe/layer.hpp" 6 | #include "caffe/vision_layers.hpp" 7 | #include "caffe/util/math_functions.hpp" 8 | 9 | namespace caffe { 10 | 11 | template 12 | void FlattenLayer::SetUp(const vector*>& bottom, 13 | vector*>* top) { 14 | CHECK_EQ(bottom.size(), 1) << "Flatten Layer takes a single blob as input."; 15 | CHECK_EQ(top->size(), 1) << "Flatten Layer takes a single blob as output."; 16 | int channels_out = bottom[0]->channels() * bottom[0]->height() 17 | * bottom[0]->width(); 18 | (*top)[0]->Reshape(bottom[0]->num(), channels_out, 1, 1); 19 | count_ = bottom[0]->num() * channels_out; 20 | CHECK_EQ(count_, bottom[0]->count()); 21 | CHECK_EQ(count_, (*top)[0]->count()); 22 | } 23 | 24 | template 25 | void FlattenLayer::Forward_cpu(const vector*>& bottom, 26 | vector*>* top) { 27 | const Dtype* bottom_data = bottom[0]->cpu_data(); 28 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 29 | caffe_copy(count_, bottom_data, top_data); 30 | } 31 | 32 | template 33 | Dtype FlattenLayer::Backward_cpu(const vector*>& top, 34 | const bool propagate_down, vector*>* bottom) { 35 | const Dtype* top_diff = top[0]->cpu_diff(); 36 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 37 | caffe_copy(count_, top_diff, bottom_diff); 38 | return Dtype(0.); 39 | } 40 | 41 | INSTANTIATE_CLASS(FlattenLayer); 42 | 43 | } // namespace caffe 44 | -------------------------------------------------------------------------------- /include/caffe/util/io.hpp: -------------------------------------------------------------------------------- 1 | // Copyright Yangqing Jia 2013 2 | 3 | #ifndef CAFFE_UTIL_IO_H_ 4 | #define CAFFE_UTIL_IO_H_ 5 | 6 | #include 7 | 8 | #include "google/protobuf/message.h" 9 | #include "caffe/proto/caffe.pb.h" 10 | 11 | #include "caffe/blob.hpp" 12 | 13 | using std::string; 14 | using ::google::protobuf::Message; 15 | 16 | namespace caffe { 17 | 18 | void ReadProtoFromTextFile(const char* filename, 19 | Message* proto); 20 | inline void ReadProtoFromTextFile(const string& filename, 21 | Message* proto) { 22 | ReadProtoFromTextFile(filename.c_str(), proto); 23 | } 24 | 25 | void WriteProtoToTextFile(const Message& proto, const char* filename); 26 | inline void WriteProtoToTextFile(const Message& proto, const string& filename) { 27 | WriteProtoToTextFile(proto, filename.c_str()); 28 | } 29 | 30 | void ReadProtoFromBinaryFile(const char* filename, 31 | Message* proto); 32 | inline void ReadProtoFromBinaryFile(const string& filename, 33 | Message* proto) { 34 | ReadProtoFromBinaryFile(filename.c_str(), proto); 35 | } 36 | 37 | void WriteProtoToBinaryFile(const Message& proto, const char* filename); 38 | inline void WriteProtoToBinaryFile( 39 | const Message& proto, const string& filename) { 40 | WriteProtoToBinaryFile(proto, filename.c_str()); 41 | } 42 | 43 | bool ReadImageToDatum(const string& filename, const int label, 44 | const int height, const int width, Datum* datum); 45 | 46 | inline bool ReadImageToDatum(const string& filename, const int label, 47 | Datum* datum) { 48 | return ReadImageToDatum(filename, label, 0, 0, datum); 49 | } 50 | 51 | #if 0 52 | template 53 | void hdf5_load_nd_dataset_helper( 54 | hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, 55 | Blob* blob); 56 | 57 | template 58 | void hdf5_load_nd_dataset( 59 | hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, 60 | Blob* blob); 61 | #endif 62 | 63 | } // namespace caffe 64 | 65 | #endif // CAFFE_UTIL_IO_H_ 66 | -------------------------------------------------------------------------------- /src/caffe/syncedmem.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | 5 | #include "caffe/common.hpp" 6 | #include "caffe/syncedmem.hpp" 7 | 8 | namespace caffe { 9 | 10 | SyncedMemory::~SyncedMemory() { 11 | if (cpu_ptr_) { 12 | CaffeFreeHost(cpu_ptr_); 13 | } 14 | 15 | if (gpu_ptr_) { 16 | } 17 | } 18 | 19 | inline void SyncedMemory::to_cpu() { 20 | switch (head_) { 21 | case UNINITIALIZED: 22 | CaffeMallocHost(&cpu_ptr_, size_); 23 | CHECK(cpu_ptr_ != 0) << "size " << size_; 24 | memset(cpu_ptr_, 0, size_); 25 | head_ = HEAD_AT_CPU; 26 | break; 27 | #if 0 28 | case HEAD_AT_GPU: 29 | if (cpu_ptr_ == NULL) { 30 | CaffeMallocHost(&cpu_ptr_, size_); 31 | } 32 | CUDA_CHECK(cudaMemcpy(cpu_ptr_, gpu_ptr_, size_, cudaMemcpyDeviceToHost)); 33 | head_ = SYNCED; 34 | break; 35 | #endif 36 | case HEAD_AT_CPU: 37 | case SYNCED: 38 | break; 39 | } 40 | } 41 | 42 | #if 0 43 | inline void SyncedMemory::to_gpu() { 44 | switch (head_) { 45 | case UNINITIALIZED: 46 | CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); 47 | CUDA_CHECK(cudaMemset(gpu_ptr_, 0, size_)); 48 | head_ = HEAD_AT_GPU; 49 | break; 50 | case HEAD_AT_CPU: 51 | if (gpu_ptr_ == NULL) { 52 | CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_)); 53 | } 54 | CUDA_CHECK(cudaMemcpy(gpu_ptr_, cpu_ptr_, size_, cudaMemcpyHostToDevice)); 55 | head_ = SYNCED; 56 | break; 57 | case HEAD_AT_GPU: 58 | case SYNCED: 59 | break; 60 | } 61 | } 62 | #endif 63 | 64 | const void* SyncedMemory::cpu_data() { 65 | to_cpu(); 66 | return (const void*)cpu_ptr_; 67 | } 68 | 69 | #if 0 70 | const void* SyncedMemory::gpu_data() { 71 | to_gpu(); 72 | return (const void*)gpu_ptr_; 73 | } 74 | #endif 75 | 76 | void* SyncedMemory::mutable_cpu_data() { 77 | to_cpu(); 78 | head_ = HEAD_AT_CPU; 79 | return cpu_ptr_; 80 | } 81 | 82 | #if 0 83 | void* SyncedMemory::mutable_gpu_data() { 84 | to_gpu(); 85 | head_ = HEAD_AT_GPU; 86 | return gpu_ptr_; 87 | } 88 | #endif 89 | 90 | 91 | } // namespace caffe 92 | 93 | -------------------------------------------------------------------------------- /src/caffe/layers/im2col_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | 5 | #include "caffe/layer.hpp" 6 | #include "caffe/util/im2col.hpp" 7 | #include "caffe/vision_layers.hpp" 8 | #include "caffe/common.hpp" 9 | 10 | namespace caffe { 11 | 12 | template 13 | void Im2colLayer::SetUp(const vector*>& bottom, 14 | vector*>* top) { 15 | CHECK_EQ(bottom.size(), 1) << "Im2col Layer takes a single blob as input."; 16 | CHECK_EQ(top->size(), 1) << "Im2col Layer takes a single blob as output."; 17 | KSIZE_ = this->layer_param_.kernelsize(); 18 | STRIDE_ = this->layer_param_.stride(); 19 | PAD_ = this->layer_param_.pad(); 20 | CHANNELS_ = bottom[0]->channels(); 21 | HEIGHT_ = bottom[0]->height(); 22 | WIDTH_ = bottom[0]->width(); 23 | (*top)[0]->Reshape(bottom[0]->num(), CHANNELS_ * KSIZE_ * KSIZE_, 24 | (HEIGHT_ + 2 * PAD_ - KSIZE_) / STRIDE_ + 1, 25 | (WIDTH_ + 2 * PAD_ - KSIZE_) / STRIDE_ + 1); 26 | } 27 | 28 | template 29 | void Im2colLayer::Forward_cpu(const vector*>& bottom, 30 | vector*>* top) { 31 | const Dtype* bottom_data = bottom[0]->cpu_data(); 32 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 33 | for (int n = 0; n < bottom[0]->num(); ++n) { 34 | im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_, 35 | WIDTH_, KSIZE_, PAD_, STRIDE_, top_data + (*top)[0]->offset(n)); 36 | } 37 | } 38 | 39 | template 40 | Dtype Im2colLayer::Backward_cpu(const vector*>& top, 41 | const bool propagate_down, vector*>* bottom) { 42 | const Dtype* top_diff = top[0]->cpu_diff(); 43 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 44 | for (int n = 0; n < top[0]->num(); ++n) { 45 | col2im_cpu(top_diff + top[0]->offset(n), CHANNELS_, HEIGHT_, 46 | WIDTH_, KSIZE_, PAD_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n)); 47 | } 48 | return Dtype(0.); 49 | } 50 | 51 | INSTANTIATE_CLASS(Im2colLayer); 52 | 53 | } // namespace caffe 54 | -------------------------------------------------------------------------------- /include/caffe/syncedmem.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #ifndef CAFFE_SYNCEDMEM_HPP_ 4 | #define CAFFE_SYNCEDMEM_HPP_ 5 | 6 | #include 7 | 8 | #include "caffe/common.hpp" 9 | 10 | namespace caffe { 11 | 12 | // Theoretically, CaffeMallocHost and CaffeFreeHost should simply call the 13 | // cudaMallocHost and cudaFree functions in order to create pinned memory. 14 | // However, those codes rely on the existence of a cuda GPU (I don't know 15 | // why that is a must since allocating memory should not be accessing the 16 | // GPU resorce, but it just creates an error as of Cuda 5.0) and will cause 17 | // problem when running on a machine without GPU. Thus, we simply define 18 | // these two functions for safety and possible future change if the problem 19 | // of calling cuda functions disappears in a future version. 20 | // 21 | // In practice, although we are creating unpinned memory here, as long as we 22 | // are constantly accessing them the memory pages almost always stays in 23 | // the physical memory (assuming we have large enough memory installed), and 24 | // does not seem to create a memory bottleneck here. 25 | 26 | inline void CaffeMallocHost(void** ptr, size_t size) { 27 | *ptr = malloc(size); 28 | } 29 | 30 | inline void CaffeFreeHost(void* ptr) { 31 | free(ptr); 32 | } 33 | 34 | 35 | class SyncedMemory { 36 | public: 37 | SyncedMemory() 38 | : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED) {} 39 | explicit SyncedMemory(size_t size) 40 | : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED) {} 41 | ~SyncedMemory(); 42 | const void* cpu_data(); 43 | //const void* gpu_data(); 44 | void* mutable_cpu_data(); 45 | //void* mutable_gpu_data(); 46 | enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, SYNCED }; 47 | SyncedHead head() { return head_; } 48 | size_t size() { return size_; } 49 | private: 50 | void to_cpu(); 51 | //void to_gpu(); 52 | void* cpu_ptr_; 53 | void* gpu_ptr_; 54 | size_t size_; 55 | SyncedHead head_; 56 | 57 | DISABLE_COPY_AND_ASSIGN(SyncedMemory); 58 | }; // class SyncedMemory 59 | 60 | } // namespace caffe 61 | 62 | #endif // CAFFE_SYNCEDMEM_HPP_ 63 | -------------------------------------------------------------------------------- /src/caffe/layers/softmax_loss_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "caffe/layer.hpp" 8 | #include "caffe/vision_layers.hpp" 9 | #include "caffe/util/math_functions.hpp" 10 | 11 | using std::max; 12 | 13 | namespace caffe { 14 | 15 | template 16 | void SoftmaxWithLossLayer::SetUp(const vector*>& bottom, 17 | vector*>* top) { 18 | CHECK_EQ(bottom.size(), 2) << "SoftmaxLoss Layer takes two blobs as input."; 19 | CHECK_EQ(top->size(), 0) << "SoftmaxLoss Layer takes no blob as output."; 20 | softmax_bottom_vec_.clear(); 21 | softmax_bottom_vec_.push_back(bottom[0]); 22 | softmax_top_vec_.push_back(&prob_); 23 | softmax_layer_->SetUp(softmax_bottom_vec_, &softmax_top_vec_); 24 | } 25 | 26 | template 27 | void SoftmaxWithLossLayer::Forward_cpu( 28 | const vector*>& bottom, vector*>* top) { 29 | // The forward pass computes the softmax prob values. 30 | softmax_bottom_vec_[0] = bottom[0]; 31 | softmax_layer_->Forward(softmax_bottom_vec_, &softmax_top_vec_); 32 | } 33 | 34 | template 35 | Dtype SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, 36 | const bool propagate_down, 37 | vector*>* bottom) { 38 | // First, compute the diff 39 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 40 | const Dtype* prob_data = prob_.cpu_data(); 41 | memcpy(bottom_diff, prob_data, sizeof(Dtype) * prob_.count()); 42 | const Dtype* label = (*bottom)[1]->cpu_data(); 43 | int num = prob_.num(); 44 | int dim = prob_.count() / num; 45 | Dtype loss = 0; 46 | for (int i = 0; i < num; ++i) { 47 | CHECK_LT(label[i], dim); 48 | bottom_diff[i * dim + static_cast(label[i])] -= 1; 49 | loss += -log(max(prob_data[i * dim + static_cast(label[i])], 50 | Dtype(FLT_MIN))); 51 | } 52 | // Scale down gradient 53 | caffe_scal(prob_.count(), Dtype(1) / num, bottom_diff); 54 | return loss / num; 55 | } 56 | 57 | 58 | INSTANTIATE_CLASS(SoftmaxWithLossLayer); 59 | 60 | 61 | } // namespace caffe 62 | -------------------------------------------------------------------------------- /src/caffe/layers/tanh_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Aravindh Mahendran 2 | // TanH neuron activation function layer. 3 | // Adapted from ReLU layer code written by Yangqing Jia 4 | 5 | #include 6 | #include 7 | 8 | #include "caffe/layer.hpp" 9 | #include "caffe/vision_layers.hpp" 10 | 11 | #ifdef __SSE2__ 12 | #include "caffe/fmath.hpp" 13 | #define EXP(x) fmath::exp(x) 14 | #else 15 | #define cast_uint32_t static_cast 16 | static inline float 17 | fastpow2 (float p) 18 | { 19 | float offset = (p < 0) ? 1.0f : 0.0f; 20 | float clipp = (p < -126) ? -126.0f : p; 21 | int w = clipp; 22 | float z = clipp - w + offset; 23 | union { uint32_t i; float f; } v = { cast_uint32_t ( (1 << 23) * (clipp + 121.2740575f + 27.7280233f / (4.84252568f - z) - 1.49012907f * z) ) }; 24 | 25 | return v.f; 26 | } 27 | 28 | static inline float 29 | fastexp (float p) 30 | { 31 | return fastpow2 (1.442695040f * p); 32 | } 33 | #define EXP(x) fastexp(x) 34 | #endif 35 | 36 | namespace caffe { 37 | 38 | template 39 | void TanHLayer::Forward_cpu(const vector*>& bottom, 40 | vector*>* top) { 41 | const Dtype* bottom_data = bottom[0]->cpu_data(); 42 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 43 | Dtype exp2x; 44 | const int count = bottom[0]->count(); 45 | for (int i = 0; i < count; ++i) { 46 | exp2x = EXP(2*bottom_data[i]); 47 | top_data[i] = (exp2x - Dtype(1))/(exp2x + Dtype(1)); 48 | } 49 | } 50 | 51 | template 52 | Dtype TanHLayer::Backward_cpu(const vector*>& top, 53 | const bool propagate_down, 54 | vector*>* bottom) { 55 | if (propagate_down) { 56 | const Dtype* bottom_data = (*bottom)[0]->cpu_data(); 57 | const Dtype* top_diff = top[0]->cpu_diff(); 58 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 59 | const int count = (*bottom)[0]->count(); 60 | Dtype exp2x; 61 | Dtype tanhx; 62 | for (int i = 0; i < count; ++i) { 63 | exp2x = exp(2*bottom_data[i]); 64 | tanhx = (exp2x - Dtype(1))/(exp2x + Dtype(1)); 65 | bottom_diff[i] = top_diff[i] * (1 - tanhx*tanhx); 66 | } 67 | } 68 | return Dtype(0); 69 | } 70 | 71 | INSTANTIATE_CLASS(TanHLayer); 72 | 73 | } // namespace caffe 74 | -------------------------------------------------------------------------------- /include/caffe/blob.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #ifndef CAFFE_BLOB_HPP_ 4 | #define CAFFE_BLOB_HPP_ 5 | 6 | #include "caffe/common.hpp" 7 | #include "caffe/syncedmem.hpp" 8 | #include "caffe/proto/caffe.pb.h" 9 | 10 | namespace caffe { 11 | 12 | template 13 | class Blob { 14 | public: 15 | Blob() 16 | : num_(0), channels_(0), height_(0), width_(0), count_(0), data_(), 17 | diff_() {} 18 | explicit Blob(const int num, const int channels, const int height, 19 | const int width); 20 | virtual ~Blob() {} 21 | void Reshape(const int num, const int height, 22 | const int width, const int channels); 23 | inline int num() const { return num_; } 24 | inline int channels() const { return channels_; } 25 | inline int height() const { return height_; } 26 | inline int width() const { return width_; } 27 | inline int count() const {return count_; } 28 | inline int offset(const int n, const int c = 0, const int h = 0, 29 | const int w = 0) const { 30 | return ((n * channels_ + c) * height_ + h) * width_ + w; 31 | } 32 | // Copy from source. If copy_diff is false, we copy the data; if copy_diff 33 | // is true, we copy the diff. 34 | void CopyFrom(const Blob& source, bool copy_diff = false, 35 | bool reshape = false); 36 | 37 | inline Dtype data_at(const int n, const int c, const int h, 38 | const int w) const { 39 | return *(cpu_data() + offset(n, c, h, w)); 40 | } 41 | 42 | inline Dtype diff_at(const int n, const int c, const int h, 43 | const int w) const { 44 | return *(cpu_diff() + offset(n, c, h, w)); 45 | } 46 | 47 | const Dtype* cpu_data() const; 48 | //const Dtype* gpu_data() const; 49 | const Dtype* cpu_diff() const; 50 | //const Dtype* gpu_diff() const; 51 | Dtype* mutable_cpu_data(); 52 | //Dtype* mutable_gpu_data(); 53 | Dtype* mutable_cpu_diff(); 54 | //Dtype* mutable_gpu_diff(); 55 | void Update(); 56 | void FromProto(const BlobProto& proto); 57 | void ToProto(BlobProto* proto, bool write_diff = false) const; 58 | 59 | protected: 60 | int num_; 61 | int channels_; 62 | int height_; 63 | int width_; 64 | int count_; 65 | shared_ptr data_; 66 | shared_ptr diff_; 67 | 68 | DISABLE_COPY_AND_ASSIGN(Blob); 69 | }; // class Blob 70 | 71 | } // namespace caffe 72 | 73 | #endif // CAFFE_BLOB_HPP_ 74 | -------------------------------------------------------------------------------- /src/caffe/layers/dropout_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | #include 5 | 6 | #include "caffe/common.hpp" 7 | #include "caffe/layer.hpp" 8 | #include "caffe/syncedmem.hpp" 9 | #include "caffe/vision_layers.hpp" 10 | 11 | namespace caffe { 12 | 13 | template 14 | void DropoutLayer::SetUp(const vector*>& bottom, 15 | vector*>* top) { 16 | NeuronLayer::SetUp(bottom, top); 17 | // Set up the cache for random number generation 18 | rand_vec_.reset(new SyncedMemory(bottom[0]->count() * sizeof(int))); 19 | threshold_ = this->layer_param_.dropout_ratio(); 20 | DCHECK(threshold_ > 0.); 21 | DCHECK(threshold_ < 1.); 22 | scale_ = 1. / (1. - threshold_); 23 | uint_thres_ = (unsigned int)(UINT_MAX * threshold_); 24 | } 25 | 26 | template 27 | void DropoutLayer::Forward_cpu(const vector*>& bottom, 28 | vector*>* top) { 29 | const Dtype* bottom_data = bottom[0]->cpu_data(); 30 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 31 | int* mask = reinterpret_cast(rand_vec_->mutable_cpu_data()); 32 | const int count = bottom[0]->count(); 33 | if (Caffe::phase() == Caffe::TRAIN) { 34 | // Create random numbers 35 | #if 0 36 | viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(), 37 | count, mask, 1. - threshold_); 38 | for (int i = 0; i < count; ++i) { 39 | top_data[i] = bottom_data[i] * mask[i] * scale_; 40 | } 41 | #else 42 | NOT_IMPLEMENTED; 43 | #endif 44 | } else { 45 | memcpy(top_data, bottom_data, bottom[0]->count() * sizeof(Dtype)); 46 | } 47 | } 48 | 49 | template 50 | Dtype DropoutLayer::Backward_cpu(const vector*>& top, 51 | const bool propagate_down, 52 | vector*>* bottom) { 53 | CHECK(Caffe::phase() == Caffe::TRAIN); 54 | if (propagate_down) { 55 | const Dtype* top_diff = top[0]->cpu_diff(); 56 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 57 | const int* mask = reinterpret_cast(rand_vec_->cpu_data()); 58 | const int count = (*bottom)[0]->count(); 59 | for (int i = 0; i < count; ++i) { 60 | bottom_diff[i] = top_diff[i] * mask[i] * scale_; 61 | } 62 | } 63 | return Dtype(0); 64 | } 65 | 66 | 67 | INSTANTIATE_CLASS(DropoutLayer); 68 | 69 | 70 | } // namespace caffe 71 | -------------------------------------------------------------------------------- /src/caffe/layers/split_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Jeff Donahue 2 | 3 | #include 4 | 5 | #include "caffe/layer.hpp" 6 | #include "caffe/vision_layers.hpp" 7 | #include "caffe/util/math_functions.hpp" 8 | 9 | namespace caffe { 10 | 11 | template 12 | void SplitLayer::SetUp(const vector*>& bottom, 13 | vector*>* top) { 14 | CHECK_EQ(bottom.size(), 1) << "Split Layer takes a single blob as input."; 15 | CHECK_GE(top->size(), 1) << "Split Layer takes at least one blob as output."; 16 | count_ = bottom[0]->count(); 17 | for (int i = 0; i < top->size(); ++i) { 18 | // Allow the 0th top blob to be 'in-place', but no others. 19 | if (i == 0 && (*top)[i] == bottom[0]) { 20 | continue; 21 | } else { 22 | CHECK_NE((*top)[i], bottom[0]) << "Only 0th top blob may be in place."; 23 | } 24 | (*top)[i]->Reshape(bottom[0]->num(), bottom[0]->channels(), 25 | bottom[0]->height(), bottom[0]->width()); 26 | CHECK_EQ(count_, (*top)[i]->count()); 27 | } 28 | } 29 | 30 | template 31 | void SplitLayer::Forward_cpu(const vector*>& bottom, 32 | vector*>* top) { 33 | const Dtype* bottom_data = bottom[0]->cpu_data(); 34 | for (int i = 0; i < top->size(); ++i) { 35 | if (i == 0 && (*top)[i] == bottom[0]) { 36 | continue; 37 | } 38 | Dtype* top_data = (*top)[i]->mutable_cpu_data(); 39 | caffe_copy(count_, bottom_data, top_data); 40 | } 41 | } 42 | 43 | template 44 | Dtype SplitLayer::Backward_cpu(const vector*>& top, 45 | const bool propagate_down, vector*>* bottom) { 46 | if (propagate_down) { 47 | const Dtype* top_diff = top[0]->cpu_diff(); 48 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 49 | // Initialize by copying first top blob diff to our diff, unless we're 50 | // doing in-place computation for the first blob, in which case the diff is 51 | // already initialized. 52 | if (top[0] != (*bottom)[0]) { 53 | caffe_copy(count_, top_diff, bottom_diff); 54 | } 55 | // Add remaining top blob diffs. 56 | for (int i = 1; i < top.size(); ++i) { 57 | top_diff = top[i]->cpu_diff(); 58 | caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff); 59 | } 60 | } 61 | return Dtype(0.); 62 | } 63 | 64 | 65 | INSTANTIATE_CLASS(SplitLayer); 66 | 67 | } // namespace caffe 68 | -------------------------------------------------------------------------------- /include/caffe/util/im2col.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #ifndef _CAFFE_UTIL_IM2COL_HPP_ 4 | #define _CAFFE_UTIL_IM2COL_HPP_ 5 | 6 | namespace caffe { 7 | 8 | template 9 | void im2col_cpu(const Dtype* data_im, const int channels, 10 | const int height, const int width, const int ksize, const int pad, 11 | const int stride, Dtype* data_col); 12 | 13 | template 14 | void col2im_cpu(const Dtype* data_col, const int channels, 15 | const int height, const int width, const int psize, const int pad, 16 | const int stride, Dtype* data_im); 17 | 18 | template 19 | void im2col_gpu(const Dtype* data_im, const int channels, 20 | const int height, const int width, const int ksize, const int pad, 21 | const int stride, Dtype* data_col); 22 | 23 | template 24 | void col2im_gpu(const Dtype* data_col, const int channels, 25 | const int height, const int width, const int psize, const int pad, 26 | const int stride, Dtype* data_im); 27 | 28 | template 29 | void im2col_tile_gpu(const Dtype* data_im, const int channels, 30 | const int stride_h, const int stride_w, 31 | const int ksize, Dtype* data_col, 32 | const int height_col, const int width_col); 33 | 34 | template 35 | void copy_stride_gpu(const Dtype* src_data, 36 | const int channels, 37 | const int height, const int width, Dtype *dst_data, 38 | const int stride_h, const int stride_w); 39 | 40 | template 41 | void copy_stride_cpu(const Dtype* src_data, 42 | const int channels, 43 | const int height, const int width, Dtype *dst_data, 44 | const int stride_h, const int stride_w); 45 | 46 | 47 | template 48 | void copy_stride_gather_gpu(Dtype* src_data, 49 | const int channels, 50 | const int height, const int width, const Dtype *dst_data, 51 | const int stride_h, const int stride_w); 52 | 53 | template 54 | void col2im_tile_gpu(const Dtype* data_col, const int channels, 55 | const int height_col, const int width_col, 56 | const int ksize, 57 | const int stride_h, const int stride_w, 58 | Dtype* data_im); 59 | 60 | template 61 | void im2col_tile_cpu(const Dtype* data_im, const int channels, 62 | const int stride_h, const int stride_w, 63 | const int ksize, Dtype* data_col, 64 | const int height_col, const int width_col); 65 | 66 | } // namespace caffe 67 | 68 | #endif // CAFFE_UTIL_IM2COL_HPP_ 69 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CROSS_COMPILE?= 2 | CXX=$(CROSS_COMPILE)g++ 3 | AR=$(CROSS_COMPILE)ar 4 | PROJECT := caffe 5 | STATIC_NAME := lib$(PROJECT).a 6 | USE_EIGEN?=y 7 | 8 | CXX_SRCS := $(shell find src/$(PROJECT) ! -name "test_*.cpp" -name "*.cpp") 9 | HXX_SRCS := $(shell find include/$(PROJECT) ! -name "*.hpp") 10 | PROTO_SRCS := $(wildcard src/$(PROJECT)/proto/*.proto) 11 | 12 | PROTO_GEN_HEADER := ${PROTO_SRCS:.proto=.pb.h} 13 | PROTO_GEN_CC := ${PROTO_SRCS:.proto=.pb.cc} 14 | 15 | BUILD_DIR := build 16 | CXX_OBJS := $(addprefix $(BUILD_DIR)/, ${CXX_SRCS:.cpp=.o}) 17 | PROTO_OBJS := $(addprefix $(BUILD_DIR)/, ${PROTO_GEN_CC:.cc=.o}) 18 | OBJS := $(PROTO_OBJS) $(CXX_OBJS) 19 | 20 | INCLUDE_DIRS += ./src ./include ./protobuf-2.4.1/build/include 21 | CXXFLAGS+=-std=gnu++0x 22 | CXXFLAGS+=$(EXTRA_CXXFLAGS) 23 | CXXFLAGS+=-fvisibility=hidden #hide symbols for static lib 24 | LDFLAGS+=-L./protobuf-2.4.1/build/lib 25 | LIBRARIES:=protobuf 26 | 27 | ifeq ($(USE_EIGEN), y) 28 | CXXFLAGS += -DUSE_EIGEN 29 | CXXFLAGS += -I./eigen3 30 | else 31 | LIBRARIES += cblas 32 | endif 33 | 34 | 35 | COMMON_FLAGS := -O2 $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir)) 36 | CXXFLAGS += -fPIC $(COMMON_FLAGS) 37 | LDFLAGS += $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) \ 38 | $(foreach library,$(LIBRARIES),-l$(library)) 39 | 40 | all: init $(STATIC_NAME) 41 | 42 | init: 43 | @ mkdir -p $(foreach obj,$(OBJS),$(dir $(obj))) 44 | 45 | $(OBJS): $(PROTO_GEN_CC) $(HXX_SRCS) 46 | 47 | $(BUILD_DIR)/src/$(PROJECT)/%.o: src/$(PROJECT)/%.cpp 48 | $(CXX) $< $(CXXFLAGS) -c -o $@ 49 | 50 | $(BUILD_DIR)/src/$(PROJECT)/layers/%.o: src/$(PROJECT)/layers/%.cpp 51 | $(CXX) $< $(CXXFLAGS) -c -o $@ 52 | 53 | $(BUILD_DIR)/src/$(PROJECT)/proto/%.o: src/$(PROJECT)/proto/%.cc 54 | $(CXX) $< $(CXXFLAGS) -c -o $@ 55 | 56 | $(PROTO_GEN_CC): $(PROTO_SRCS) 57 | protoc --proto_path=src --cpp_out=src $(PROTO_SRCS) 58 | mkdir -p include/$(PROJECT)/proto 59 | cp $(PROTO_GEN_HEADER) include/$(PROJECT)/proto/ 60 | @echo 61 | 62 | $(STATIC_NAME): init $(PROTO_OBJS) $(OBJS) 63 | $(AR) rcs $(STATIC_NAME) $(PROTO_OBJS) $(OBJS) 64 | @echo 65 | 66 | feat_net_raw: feat_net_raw.cpp $(STATIC_NAME) 67 | $(CXX) $< $(CXXFLAGS) -o $@ -L. -lcaffe $(LDFLAGS) -lpthread 68 | 69 | align_test: align_test.cpp $(STATIC_NAME) 70 | $(CXX) $< $(CXXFLAGS) -o $@ -L. -lcaffe $(LDFLAGS) $(shell pkg-config --libs opencv) 71 | 72 | clean: 73 | @- $(RM) $(NAME) $(STATIC_NAME) 74 | @- $(RM) $(PROTO_GEN_HEADER) $(PROTO_GEN_CC) $(PROTO_GEN_PY) 75 | @- $(RM) include/$(PROJECT)/proto/$(PROJECT).pb.h 76 | @- $(RM) -rf $(BUILD_DIR) 77 | @- rm -f feat_net_raw 78 | 79 | 80 | -------------------------------------------------------------------------------- /src/caffe/layers/raw_image_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include // NOLINT(readability/streams) 8 | #include // NOLINT(readability/streams) 9 | 10 | #include "caffe/layer.hpp" 11 | #include "caffe/util/io.hpp" 12 | #include "caffe/vision_layers.hpp" 13 | 14 | using std::string; 15 | using std::pair; 16 | 17 | namespace caffe { 18 | 19 | template 20 | RawImageLayer::~RawImageLayer() { 21 | // Finally, join the thread 22 | } 23 | 24 | template 25 | void RawImageLayer::SetUp(const vector*>& bottom, 26 | vector*>* top) { 27 | CHECK_EQ(bottom.size(), 0) << "Input Layer takes no input blobs."; 28 | CHECK_EQ(top->size(), 2) << "Input Layer takes two blobs as output."; 29 | // datum size 30 | datum_height_ = this->layer_param_.new_height(); 31 | datum_width_ = this->layer_param_.new_width(); 32 | datum_channels_ = this->layer_param_.new_channels(); 33 | datum_size_ = datum_channels_ * datum_height_ * datum_width_; 34 | // Read the file with filenames and labels 35 | (*top)[0]->Reshape( 36 | this->layer_param_.batchsize(), datum_channels_, 37 | datum_height_, datum_width_); 38 | LOG(INFO) << "output data size: " << (*top)[0]->num() << "," 39 | << (*top)[0]->channels() << "," << (*top)[0]->height() << "," 40 | << (*top)[0]->width(); 41 | // label 42 | (*top)[1]->Reshape(this->layer_param_.batchsize(), 1, 1, 1); 43 | } 44 | 45 | template 46 | void RawImageLayer::Forward_cpu(const vector*>& bottom, 47 | vector*>* top) { 48 | (*top)[0]->cpu_data(); 49 | (*top)[1]->cpu_data(); 50 | } 51 | 52 | #if 0 53 | template 54 | void RawImageLayer::Forward_gpu(const vector*>& bottom, 55 | vector*>* top) { 56 | (*top)[0]->gpu_data(); 57 | (*top)[1]->gpu_data(); 58 | } 59 | #endif 60 | 61 | // The backward operations are dummy - they do not carry any computation. 62 | template 63 | Dtype RawImageLayer::Backward_cpu(const vector*>& top, 64 | const bool propagate_down, vector*>* bottom) { 65 | return Dtype(0.); 66 | } 67 | 68 | #if 0 69 | template 70 | Dtype RawImageLayer::Backward_gpu(const vector*>& top, 71 | const bool propagate_down, vector*>* bottom) { 72 | return Dtype(0.); 73 | } 74 | #endif 75 | 76 | INSTANTIATE_CLASS(RawImageLayer); 77 | 78 | } // namespace caffe 79 | -------------------------------------------------------------------------------- /include/caffe/common.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #ifndef CAFFE_COMMON_HPP_ 4 | #define CAFFE_COMMON_HPP_ 5 | 6 | //#include 7 | #include 8 | #include 9 | #include 10 | #include "glog-compact.hpp" 11 | 12 | // Disable the copy and assignment operator for a class. 13 | #define DISABLE_COPY_AND_ASSIGN(classname) \ 14 | private:\ 15 | classname(const classname&);\ 16 | classname& operator=(const classname&) 17 | 18 | // Instantiate a class with float and double specifications. 19 | #define INSTANTIATE_CLASS(classname) \ 20 | template class classname; \ 21 | template class classname 22 | 23 | // A simple macro to mark codes that are not implemented, so that when the code 24 | // is executed we will see a fatal log. 25 | #define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet" 26 | 27 | 28 | namespace caffe { 29 | 30 | // We will use the boost shared_ptr instead of the new C++11 one mainly 31 | // because cuda does not work (at least now) well with C++11 features. 32 | //using boost::shared_ptr; 33 | using std::shared_ptr; 34 | 35 | 36 | // A singleton class to hold common caffe stuff, such as the handler that 37 | // caffe is going to use for cublas, curand, etc. 38 | class Caffe { 39 | public: 40 | ~Caffe(); 41 | inline static Caffe& Get() { 42 | if (!singleton_.get()) { 43 | singleton_.reset(new Caffe()); 44 | } 45 | return *singleton_; 46 | } 47 | enum Brew { CPU, GPU }; 48 | enum Phase { TRAIN, TEST }; 49 | 50 | // Returns the mode: running on CPU or GPU. 51 | inline static Brew mode() { return Get().mode_; } 52 | // Returns the phase: TRAIN or TEST. 53 | inline static Phase phase() { return Get().phase_; } 54 | // The setters for the variables 55 | // Sets the mode. It is recommended that you don't change the mode halfway 56 | // into the program since that may cause allocation of pinned memory being 57 | // freed in a non-pinned way, which may cause problems - I haven't verified 58 | // it personally but better to note it here in the header file. 59 | inline static void set_mode(Brew mode) { Get().mode_ = mode; } 60 | // Sets the phase. 61 | inline static void set_phase(Phase phase) { Get().phase_ = phase; } 62 | // Sets the random seed of both MKL and curand 63 | static void set_random_seed(const unsigned int seed); 64 | static void SetDevice(const int device_id); 65 | static void DeviceQuery(); 66 | protected: 67 | Brew mode_; 68 | Phase phase_; 69 | static shared_ptr singleton_; 70 | 71 | private: 72 | // The private constructor to avoid duplicate instantiation. 73 | Caffe(); 74 | 75 | DISABLE_COPY_AND_ASSIGN(Caffe); 76 | }; 77 | 78 | 79 | } // namespace caffe 80 | 81 | #endif // CAFFE_COMMON_HPP_ 82 | -------------------------------------------------------------------------------- /src/caffe/layers/padding_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include // NOLINT(readability/streams) 4 | #include 5 | 6 | #include "caffe/layer.hpp" 7 | #include "caffe/vision_layers.hpp" 8 | 9 | namespace caffe { 10 | 11 | template 12 | void PaddingLayer::SetUp(const vector*>& bottom, 13 | vector*>* top) { 14 | // DEPRECATION 15 | LOG(WARNING) << "Padding layers are deprecated in favor of padding-aware " 16 | "convolutions and WILL BE REMOVED. Please update your model " 17 | "prototxt to replace padding layers with pad fields. " 18 | "See https://github.com/BVLC/caffe/pull/128."; 19 | PAD_ = this->layer_param_.pad(); 20 | CHECK_EQ(bottom.size(), 1) << "Padding Layer takes a single blob as input."; 21 | CHECK_EQ(top->size(), 1) << "Padding Layer takes a single blob as output."; 22 | NUM_ = bottom[0]->num(); 23 | CHANNEL_ = bottom[0]->channels(); 24 | HEIGHT_IN_ = bottom[0]->height(); 25 | WIDTH_IN_ = bottom[0]->width(); 26 | HEIGHT_OUT_ = HEIGHT_IN_ + PAD_ * 2; 27 | WIDTH_OUT_ = WIDTH_IN_ + PAD_ * 2; 28 | (*top)[0]->Reshape(NUM_, CHANNEL_, HEIGHT_OUT_, WIDTH_OUT_); 29 | } 30 | 31 | template 32 | void PaddingLayer::Forward_cpu(const vector*>& bottom, 33 | vector*>* top) { 34 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 35 | const Dtype* bottom_data = bottom[0]->cpu_data(); 36 | memset(top_data, 0, sizeof(Dtype) * (*top)[0]->count()); 37 | // In short, top[n, c, h, w] = bottom[n, c, h-pad, w-pad] if in range 38 | for (int n = 0; n < NUM_; ++n) { 39 | for (int c = 0; c < CHANNEL_; ++c) { 40 | for (int h = 0; h < HEIGHT_IN_; ++h) { 41 | // copy the width part 42 | memcpy( 43 | top_data + ((n * CHANNEL_ + c) * HEIGHT_OUT_ + h + PAD_) 44 | * WIDTH_OUT_ + PAD_, 45 | bottom_data + ((n * CHANNEL_ + c) * HEIGHT_IN_ + h) * WIDTH_IN_, 46 | sizeof(Dtype) * WIDTH_IN_); 47 | } 48 | } 49 | } 50 | } 51 | 52 | template 53 | Dtype PaddingLayer::Backward_cpu(const vector*>& top, 54 | const bool propagate_down, vector*>* bottom) { 55 | const Dtype* top_diff = top[0]->cpu_diff(); 56 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 57 | for (int n = 0; n < NUM_; ++n) { 58 | for (int c = 0; c < CHANNEL_; ++c) { 59 | for (int h = 0; h < HEIGHT_IN_; ++h) { 60 | // copy the width part 61 | memcpy( 62 | bottom_diff + ((n * CHANNEL_ + c) * HEIGHT_IN_ + h) * WIDTH_IN_, 63 | top_diff + ((n * CHANNEL_ + c) * HEIGHT_OUT_ + h + PAD_) 64 | * WIDTH_OUT_ + PAD_, 65 | sizeof(Dtype) * WIDTH_IN_); 66 | } 67 | } 68 | } 69 | return Dtype(0.); 70 | } 71 | 72 | INSTANTIATE_CLASS(PaddingLayer); 73 | 74 | } // namespace caffe 75 | -------------------------------------------------------------------------------- /.ycm_extra_conf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ycm_core 3 | from clang_helpers import PrepareClangFlags 4 | 5 | # Set this to the absolute path to the folder (NOT the file!) containing the 6 | # compile_commands.json file to use that instead of 'flags'. See here for 7 | # more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html 8 | # Most projects will NOT need to set this to anything; you can just change the 9 | # 'flags' list of compilation flags. Notice that YCM itself uses that approach. 10 | compilation_database_folder = '' 11 | 12 | # These are the compilation flags that will be used in case there's no 13 | # compilation database set. 14 | flags = [ 15 | '-Wall', 16 | '-std=c++11', 17 | '-stdlib=libc++', 18 | '-x', 19 | 'c++', 20 | '-I', 21 | '.', 22 | '-I', 23 | './include', 24 | '-I', 25 | '../include', 26 | '-I', 27 | './eigen3', 28 | '-isystem', 29 | '/usr/include/c++/4.6', 30 | '-isystem', 31 | '/usr/lib/openmpi/include/', 32 | '-isystem', 33 | '/usr/include/c++/4.6/x86_64-linux-gnu/' 34 | ] 35 | 36 | if compilation_database_folder: 37 | database = ycm_core.CompilationDatabase(compilation_database_folder) 38 | else: 39 | database = None 40 | 41 | 42 | def DirectoryOfThisScript(): 43 | return os.path.dirname(os.path.abspath(__file__)) 44 | 45 | 46 | def MakeRelativePathsInFlagsAbsolute(flags, working_directory): 47 | if not working_directory: 48 | return flags 49 | new_flags = [] 50 | make_next_absolute = False 51 | path_flags = ['-isystem', '-I', '-iquote', '--sysroot='] 52 | for flag in flags: 53 | new_flag = flag 54 | 55 | if make_next_absolute: 56 | make_next_absolute = False 57 | if not flag.startswith('/'): 58 | new_flag = os.path.join(working_directory, flag) 59 | 60 | for path_flag in path_flags: 61 | if flag == path_flag: 62 | make_next_absolute = True 63 | break 64 | 65 | if flag.startswith(path_flag): 66 | path = flag[len(path_flag):] 67 | new_flag = path_flag + os.path.join(working_directory, path) 68 | break 69 | 70 | if new_flag: 71 | new_flags.append(new_flag) 72 | return new_flags 73 | 74 | 75 | def FlagsForFile(filename): 76 | if database: 77 | # Bear in mind that compilation_info.compiler_flags_ does NOT return a 78 | # python list, but a "list-like" StringVec object 79 | compilation_info = database.GetCompilationInfoForFile(filename) 80 | final_flags = PrepareClangFlags( 81 | MakeRelativePathsInFlagsAbsolute( 82 | compilation_info.compiler_flags_, 83 | compilation_info.compiler_working_dir_), 84 | filename) 85 | else: 86 | #relative_to = DirectoryOfThisScript() 87 | relative_to = os.getcwd() 88 | final_flags = MakeRelativePathsInFlagsAbsolute(flags, relative_to) 89 | 90 | return { 91 | 'flags': final_flags, 92 | 'do_cache': True} 93 | -------------------------------------------------------------------------------- /src/caffe/layers/softmax_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | // 3 | #include 4 | #include 5 | 6 | #include "caffe/layer.hpp" 7 | #include "caffe/vision_layers.hpp" 8 | #include "caffe/util/math_functions.hpp" 9 | 10 | using std::max; 11 | 12 | namespace caffe { 13 | 14 | template 15 | void SoftmaxLayer::SetUp(const vector*>& bottom, 16 | vector*>* top) { 17 | CHECK_EQ(bottom.size(), 1) << "Softmax Layer takes a single blob as input."; 18 | CHECK_EQ(top->size(), 1) << "Softmax Layer takes a single blob as output."; 19 | (*top)[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), 20 | bottom[0]->height(), bottom[0]->width()); 21 | sum_multiplier_.Reshape(1, bottom[0]->channels(), 22 | bottom[0]->height(), bottom[0]->width()); 23 | Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data(); 24 | for (int i = 0; i < sum_multiplier_.count(); ++i) { 25 | multiplier_data[i] = 1.; 26 | } 27 | scale_.Reshape(bottom[0]->num(), 1, 1, 1); 28 | } 29 | 30 | template 31 | void SoftmaxLayer::Forward_cpu(const vector*>& bottom, 32 | vector*>* top) { 33 | const Dtype* bottom_data = bottom[0]->cpu_data(); 34 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 35 | Dtype* scale_data = scale_.mutable_cpu_data(); 36 | int num = bottom[0]->num(); 37 | int dim = bottom[0]->count() / bottom[0]->num(); 38 | memcpy(top_data, bottom_data, sizeof(Dtype) * bottom[0]->count()); 39 | // we need to subtract the max to avoid numerical issues, compute the exp, 40 | // and then normalize. 41 | for (int i = 0; i < num; ++i) { 42 | scale_data[i] = bottom_data[i*dim]; 43 | for (int j = 0; j < dim; ++j) { 44 | scale_data[i] = max(scale_data[i], bottom_data[i * dim + j]); 45 | } 46 | } 47 | // subtraction 48 | caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., 49 | scale_data, sum_multiplier_.cpu_data(), 1., top_data); 50 | // Perform exponentiation 51 | caffe_exp(num * dim, top_data, top_data); 52 | // sum after exp 53 | caffe_cpu_gemv(CblasNoTrans, num, dim, 1., top_data, 54 | sum_multiplier_.cpu_data(), 0., scale_data); 55 | // Do division 56 | for (int i = 0; i < num; ++i) { 57 | caffe_scal(dim, Dtype(1.) / scale_data[i], top_data + i * dim); 58 | } 59 | } 60 | 61 | template 62 | Dtype SoftmaxLayer::Backward_cpu(const vector*>& top, 63 | const bool propagate_down, 64 | vector*>* bottom) { 65 | const Dtype* top_diff = top[0]->cpu_diff(); 66 | const Dtype* top_data = top[0]->cpu_data(); 67 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 68 | Dtype* scale_data = scale_.mutable_cpu_data(); 69 | int num = top[0]->num(); 70 | int dim = top[0]->count() / top[0]->num(); 71 | memcpy(bottom_diff, top_diff, sizeof(Dtype) * top[0]->count()); 72 | // Compute inner1d(top_diff, top_data) and subtract them from the bottom diff 73 | for (int i = 0; i < num; ++i) { 74 | scale_data[i] = caffe_cpu_dot(dim, top_diff + i * dim, 75 | top_data + i * dim); 76 | } 77 | // subtraction 78 | caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, dim, 1, -1., 79 | scale_data, sum_multiplier_.cpu_data(), 1., bottom_diff); 80 | // elementwise multiplication 81 | caffe_mul(top[0]->count(), bottom_diff, top_data, bottom_diff); 82 | return Dtype(0); 83 | } 84 | 85 | 86 | INSTANTIATE_CLASS(SoftmaxLayer); 87 | 88 | 89 | } // namespace caffe 90 | -------------------------------------------------------------------------------- /src/caffe/layer_factory.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #ifndef CAFFE_LAYER_FACTORY_HPP_ 4 | #define CAFFE_LAYER_FACTORY_HPP_ 5 | 6 | #include 7 | 8 | #include "caffe/layer.hpp" 9 | #include "caffe/vision_layers.hpp" 10 | #include "caffe/proto/caffe.pb.h" 11 | 12 | 13 | namespace caffe { 14 | 15 | 16 | // A function to get a specific layer from the specification given in 17 | // LayerParameter. Ideally this would be replaced by a factory pattern, 18 | // but we will leave it this way for now. 19 | template 20 | Layer* GetLayer(const LayerParameter& param) { 21 | const std::string& type = param.type(); 22 | if (type == "accuracy") { 23 | return new AccuracyLayer(param); 24 | } else if (type == "verif_accuracy") { 25 | return new VerificationAccuracyLayer(param); 26 | } else if (type == "bnll") { 27 | return new BNLLLayer(param); 28 | } else if (type == "concat") { 29 | return new ConcatLayer(param); 30 | } else if (type == "conv") { 31 | return new ConvolutionLayer(param); 32 | #if 0 33 | } else if (type == "data") { 34 | return new DataLayer(param); 35 | } else if (type == "shuffle_data") { 36 | return new ShuffleDataLayer(param); 37 | #endif 38 | } else if (type == "dropout") { 39 | return new DropoutLayer(param); 40 | } else if (type == "dropout_group") { 41 | return new DropoutGroupLayer(param); 42 | } else if (type == "euclidean_loss") { 43 | return new EuclideanLossLayer(param); 44 | } else if (type == "flatten") { 45 | return new FlattenLayer(param); 46 | #if 0 47 | } else if (type == "hdf5_data") { 48 | return new HDF5DataLayer(param); 49 | } else if (type == "images") { 50 | return new ImagesLayer(param); 51 | #endif 52 | } else if (type == "raw_image") { 53 | return new RawImageLayer(param); 54 | } else if (type == "im2col") { 55 | return new Im2colLayer(param); 56 | } else if (type == "infogain_loss") { 57 | return new InfogainLossLayer(param); 58 | } else if (type == "innerproduct") { 59 | return new InnerProductLayer(param); 60 | } else if (type == "lrn") { 61 | return new LRNLayer(param); 62 | } else if (type == "multinomial_logistic_loss") { 63 | return new MultinomialLogisticLossLayer(param); 64 | } else if (type == "padding") { 65 | return new PaddingLayer(param); 66 | } else if (type == "pool") { 67 | return new PoolingLayer(param); 68 | } else if (type == "relu") { 69 | return new ReLULayer(param); 70 | } else if (type == "sigmoid") { 71 | return new SigmoidLayer(param); 72 | } else if (type == "softmax") { 73 | return new SoftmaxLayer(param); 74 | } else if (type == "softmax_loss") { 75 | return new SoftmaxWithLossLayer(param); 76 | } else if (type == "split") { 77 | return new SplitLayer(param); 78 | } else if (type == "tanh") { 79 | return new TanHLayer(param); 80 | #if 0 81 | } else if (type == "window_data") { 82 | return new WindowDataLayer(param); 83 | #endif 84 | } else { 85 | LOG(FATAL) << "Unknown layer name: " << type; 86 | } 87 | // just to suppress old compiler warnings. 88 | return (Layer*)(NULL); 89 | } 90 | 91 | template Layer* GetLayer(const LayerParameter& param); 92 | template Layer* GetLayer(const LayerParameter& param); 93 | 94 | } // namespace caffe 95 | 96 | #endif // CAFFE_LAYER_FACTORY_HPP_ 97 | -------------------------------------------------------------------------------- /include/caffe/util/math_functions.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_ 4 | #define CAFFE_UTIL_MATH_FUNCTIONS_H_ 5 | 6 | #include "mkl_alternate.hpp" 7 | namespace caffe { 8 | //enum CBLAS_TRANSPOSE { CblasNoTrans = 111, CblasTrans = 112, CblasConjTrans = 113 }; 9 | 10 | // Decaf gemm provides a simpler interface to the gemm functions, with the 11 | // limitation that the data has to be contiguous in memory. 12 | template 13 | void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA, 14 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 15 | const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, 16 | Dtype* C); 17 | 18 | template 19 | void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, 20 | const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta, 21 | Dtype* y); 22 | 23 | template 24 | void caffe_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, 25 | const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta, 26 | Dtype* y); 27 | 28 | template 29 | void caffe_axpy(const int N, const Dtype alpha, const Dtype* X, 30 | Dtype* Y); 31 | 32 | template 33 | void caffe_gpu_axpy(const int N, const Dtype alpha, const Dtype* X, 34 | Dtype* Y); 35 | 36 | template 37 | void caffe_axpby(const int N, const Dtype alpha, const Dtype* X, 38 | const Dtype beta, Dtype* Y); 39 | 40 | template 41 | void caffe_gpu_axpby(const int N, const Dtype alpha, const Dtype* X, 42 | const Dtype beta, Dtype* Y); 43 | 44 | template 45 | void caffe_copy(const int N, const Dtype *X, Dtype *Y); 46 | 47 | template 48 | void caffe_gpu_copy(const int N, const Dtype *X, Dtype *Y); 49 | 50 | template 51 | void caffe_scal(const int N, const Dtype alpha, Dtype *X); 52 | 53 | template 54 | void caffe_gpu_scal(const int N, const Dtype alpha, Dtype *X); 55 | 56 | template 57 | void caffe_sqr(const int N, const Dtype* a, Dtype* y); 58 | 59 | template 60 | void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y); 61 | 62 | template 63 | void caffe_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y); 64 | 65 | template 66 | void caffe_gpu_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y); 67 | 68 | template 69 | void caffe_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y); 70 | 71 | template 72 | void caffe_gpu_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y); 73 | 74 | template 75 | void caffe_div(const int N, const Dtype* a, const Dtype* b, Dtype* y); 76 | 77 | template 78 | void caffe_powx(const int n, const Dtype* a, const Dtype b, Dtype* y); 79 | 80 | template 81 | void caffe_vRngUniform(const int n, Dtype* r, const Dtype a, const Dtype b); 82 | 83 | template 84 | void caffe_vRngGaussian(const int n, Dtype* r, const Dtype a, 85 | const Dtype sigma); 86 | 87 | template 88 | void caffe_exp(const int n, const Dtype* a, Dtype* y); 89 | 90 | template 91 | Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y); 92 | 93 | template 94 | void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out); 95 | 96 | } // namespace caffe 97 | 98 | 99 | #endif // CAFFE_UTIL_MATH_FUNCTIONS_H_ 100 | -------------------------------------------------------------------------------- /src/caffe/layers/concat_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Sergio Guadarrama 2 | 3 | #include 4 | 5 | #include "caffe/layer.hpp" 6 | #include "caffe/vision_layers.hpp" 7 | #include "caffe/util/math_functions.hpp" 8 | 9 | namespace caffe { 10 | 11 | template 12 | void ConcatLayer::SetUp(const vector*>& bottom, 13 | vector*>* top) { 14 | CHECK_GT(bottom.size(), 1) << 15 | "Concat Layer takes at least two blobs as input."; 16 | CHECK_EQ(top->size(), 1) << 17 | "Concat Layer takes a single blob as output."; 18 | concat_dim_ = this->layer_param_.concat_dim(); 19 | CHECK_GE(concat_dim_, 0) << "concat_dim should be >= 0"; 20 | CHECK_LE(concat_dim_, 1) << 21 | "For now concat_dim <=1, it can only concat num and channels"; 22 | // Intialize with the first blob 23 | COUNT_ = bottom[0]->count(); 24 | NUM_ = bottom[0]->num(); 25 | CHANNELS_ = bottom[0]->channels(); 26 | HEIGHT_ = bottom[0]->height(); 27 | WIDTH_ = bottom[0]->width(); 28 | for (int i = 1; i < bottom.size(); ++i) { 29 | COUNT_ += bottom[i]->count(); 30 | if (concat_dim_== 0) { 31 | NUM_ += bottom[i]->num(); 32 | } else if (concat_dim_ == 1) { 33 | CHANNELS_ += bottom[i]->channels(); 34 | } else if (concat_dim_ == 2) { 35 | HEIGHT_ += bottom[i]->height(); 36 | } else if (concat_dim_ == 3) { 37 | WIDTH_ += bottom[i]->width(); 38 | } 39 | } 40 | (*top)[0]->Reshape(NUM_, CHANNELS_, HEIGHT_, WIDTH_); 41 | CHECK_EQ(COUNT_, (*top)[0]->count()); 42 | } 43 | 44 | template 45 | void ConcatLayer::Forward_cpu(const vector*>& bottom, 46 | vector*>* top) { 47 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 48 | if (concat_dim_== 0) { 49 | int offset_num = 0; 50 | for (int i = 0; i < bottom.size(); ++i) { 51 | const Dtype* bottom_data = bottom[i]->cpu_data(); 52 | int num_elem = bottom[i]->count(); 53 | caffe_copy(num_elem, bottom_data, top_data+(*top)[0]->offset(offset_num)); 54 | offset_num += bottom[i]->num(); 55 | } 56 | } else if (concat_dim_ == 1) { 57 | int offset_channel = 0; 58 | for (int i = 0; i < bottom.size(); ++i) { 59 | const Dtype* bottom_data = bottom[i]->cpu_data(); 60 | int num_elem = 61 | bottom[i]->channels()*bottom[i]->height()*bottom[i]->width(); 62 | for (int n = 0; n < NUM_; ++n) { 63 | caffe_copy(num_elem, bottom_data+bottom[i]->offset(n), 64 | top_data+(*top)[0]->offset(n, offset_channel)); 65 | } 66 | offset_channel += bottom[i]->channels(); 67 | } 68 | } else { 69 | LOG(FATAL) << "concat_dim along dim" << concat_dim_ << 70 | " not implemented yet"; 71 | } 72 | } 73 | 74 | template 75 | Dtype ConcatLayer::Backward_cpu(const vector*>& top, 76 | const bool propagate_down, vector*>* bottom) { 77 | const Dtype* top_diff = top[0]->cpu_diff(); 78 | if (concat_dim_ == 0) { 79 | int offset_num = 0; 80 | for (int i = 0; i < bottom->size(); ++i) { 81 | Blob* blob = (*bottom)[i]; 82 | Dtype* bottom_diff = blob->mutable_cpu_diff(); 83 | caffe_copy(blob->count(), 84 | top_diff+top[0]->offset(offset_num), bottom_diff); 85 | offset_num += blob->num(); 86 | } 87 | } else if (concat_dim_ == 1) { 88 | int offset_channel = 0; 89 | for (int i = 0; i < bottom->size(); ++i) { 90 | Blob* blob = (*bottom)[i]; 91 | Dtype* bottom_diff = blob->mutable_cpu_diff(); 92 | int num_elem = blob->channels()*blob->height()*blob->width(); 93 | for (int n = 0; n < NUM_; ++n) { 94 | caffe_copy(num_elem, top_diff+top[0]->offset(n, offset_channel), 95 | bottom_diff+blob->offset(n)); 96 | } 97 | offset_channel += blob->channels(); 98 | } 99 | } else { 100 | LOG(FATAL) << "concat_dim along dim" << concat_dim_ << 101 | " not implemented yet"; 102 | } 103 | return Dtype(0.); 104 | } 105 | 106 | INSTANTIATE_CLASS(ConcatLayer); 107 | 108 | } // namespace caffe 109 | -------------------------------------------------------------------------------- /src/caffe/layers/inner_product_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | 4 | #include 5 | 6 | #include "caffe/blob.hpp" 7 | #include "caffe/common.hpp" 8 | #include "caffe/filler.hpp" 9 | #include "caffe/layer.hpp" 10 | #include "caffe/vision_layers.hpp" 11 | #include "caffe/util/math_functions.hpp" 12 | 13 | namespace caffe { 14 | 15 | template 16 | void InnerProductLayer::SetUp(const vector*>& bottom, 17 | vector*>* top) { 18 | CHECK_EQ(bottom.size(), 1) << "IP Layer takes a single blob as input."; 19 | CHECK_EQ(top->size(), 1) << "IP Layer takes a single blob as output."; 20 | const int num_output = this->layer_param_.num_output(); 21 | biasterm_ = this->layer_param_.biasterm(); 22 | // Figure out the dimensions 23 | M_ = bottom[0]->num(); 24 | K_ = bottom[0]->count() / bottom[0]->num(); 25 | N_ = num_output; 26 | (*top)[0]->Reshape(bottom[0]->num(), num_output, 1, 1); 27 | // Check if we need to set up the weights 28 | if (this->blobs_.size() > 0) { 29 | LOG(INFO) << "Skipping parameter initialization"; 30 | } else { 31 | if (biasterm_) { 32 | this->blobs_.resize(2); 33 | } else { 34 | this->blobs_.resize(1); 35 | } 36 | // Intialize the weight 37 | this->blobs_[0].reset(new Blob(1, 1, N_, K_)); 38 | // fill the weights 39 | shared_ptr > weight_filler( 40 | GetFiller(this->layer_param_.weight_filler())); 41 | weight_filler->Fill(this->blobs_[0].get()); 42 | // If necessary, intiialize and fill the bias term 43 | if (biasterm_) { 44 | this->blobs_[1].reset(new Blob(1, 1, 1, N_)); 45 | shared_ptr > bias_filler( 46 | GetFiller(this->layer_param_.bias_filler())); 47 | bias_filler->Fill(this->blobs_[1].get()); 48 | } 49 | } // parameter initialization 50 | // Setting up the bias multiplier 51 | if (biasterm_) { 52 | bias_multiplier_.reset(new SyncedMemory(M_ * sizeof(Dtype))); 53 | Dtype* bias_multiplier_data = 54 | reinterpret_cast(bias_multiplier_->mutable_cpu_data()); 55 | for (int i = 0; i < M_; ++i) { 56 | bias_multiplier_data[i] = 1.; 57 | } 58 | } 59 | } 60 | 61 | template 62 | void InnerProductLayer::Forward_cpu(const vector*>& bottom, 63 | vector*>* top) { 64 | const Dtype* bottom_data = bottom[0]->cpu_data(); 65 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 66 | const Dtype* weight = this->blobs_[0]->cpu_data(); 67 | caffe_cpu_gemm(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1., 68 | bottom_data, weight, (Dtype)0., top_data); 69 | if (biasterm_) { 70 | caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1., 71 | reinterpret_cast(bias_multiplier_->cpu_data()), 72 | this->blobs_[1]->cpu_data(), (Dtype)1., top_data); 73 | } 74 | } 75 | 76 | template 77 | Dtype InnerProductLayer::Backward_cpu(const vector*>& top, 78 | const bool propagate_down, 79 | vector*>* bottom) { 80 | const Dtype* top_diff = top[0]->cpu_diff(); 81 | const Dtype* bottom_data = (*bottom)[0]->cpu_data(); 82 | // Gradient with respect to weight 83 | caffe_cpu_gemm(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1., 84 | top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff()); 85 | if (biasterm_) { 86 | // Gradient with respect to bias 87 | caffe_cpu_gemv(CblasTrans, M_, N_, (Dtype)1., top_diff, 88 | reinterpret_cast(bias_multiplier_->cpu_data()), (Dtype)0., 89 | this->blobs_[1]->mutable_cpu_diff()); 90 | } 91 | if (propagate_down) { 92 | // Gradient with respect to bottom data 93 | caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1., 94 | top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., 95 | (*bottom)[0]->mutable_cpu_diff()); 96 | } 97 | return Dtype(0); 98 | } 99 | 100 | INSTANTIATE_CLASS(InnerProductLayer); 101 | 102 | } // namespace caffe 103 | -------------------------------------------------------------------------------- /feat_net_raw.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | // 3 | // This is a simple script that allows one to quickly test a network whose 4 | // structure is specified by text format protocol buffers, and whose parameter 5 | // are loaded from a pre-trained network. 6 | // Usage: 7 | // test_net net_proto pretrained_net_proto iterations [CPU/GPU] 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "caffe/caffe.hpp" 16 | 17 | using namespace caffe; // NOLINT(build/namespaces) 18 | 19 | template 20 | static void save_blob(const string& fn, Blob *b){ 21 | LOG(INFO) << "Saving " << fn; 22 | FILE *f = fopen(fn.c_str(), "wb"); 23 | CHECK(f != NULL); 24 | fwrite(b->cpu_data(), sizeof(Dtype), b->count(), f); 25 | fclose(f); 26 | } 27 | 28 | int main(int argc, char** argv) { 29 | if (argc < 5) { 30 | LOG(ERROR) << "test_net net_proto pretrained_net_proto iterations inputbin output_dir" 31 | << " [CPU/GPU]"; 32 | return 0; 33 | } 34 | 35 | Caffe::set_phase(Caffe::TEST); 36 | 37 | if (argc == 7 && strcmp(argv[6], "GPU") == 0) { 38 | LOG(ERROR) << "Using GPU"; 39 | Caffe::set_mode(Caffe::GPU); 40 | } else { 41 | LOG(ERROR) << "Using CPU"; 42 | Caffe::set_mode(Caffe::CPU); 43 | } 44 | 45 | NetParameter test_net_param; 46 | ReadProtoFromTextFile(argv[1], &test_net_param); 47 | Net caffe_test_net(test_net_param); 48 | NetParameter trained_net_param; 49 | ReadProtoFromBinaryFile(argv[2], &trained_net_param); 50 | caffe_test_net.CopyTrainedLayersFrom(trained_net_param); 51 | 52 | #if 0 53 | SolverState state; 54 | std::string state_file = std::string(argv[2]) + ".solverstate"; 55 | ReadProtoFromBinaryFile(state_file, &state); 56 | #endif 57 | 58 | int total_iter = atoi(argv[3]); 59 | LOG(ERROR) << "Running " << total_iter << " Iterations."; 60 | 61 | double test_accuracy = 0; 62 | vector*> dummy_blob_input_vec; 63 | 64 | //save layer 65 | char output_dir[1024]; 66 | int feature_layer_idx = -1; 67 | int data_layer_idx = -1; 68 | for(int i=0;i* output = caffe_test_net.top_vecs()[feature_layer_idx][0], 85 | *data_blob = caffe_test_net.top_vecs()[data_layer_idx][0]; 86 | RawImageLayer *data_layer = dynamic_cast* >(caffe_test_net.layers()[data_layer_idx].get()); 87 | CHECK(data_layer != 0); 88 | 89 | LOG(INFO) << "OUTPUT BLOB dim: " << output->num() << ' ' 90 | << output->channels() << ' ' 91 | << output->width() << ' ' 92 | << output->height(); 93 | FILE *finput = fopen(argv[5], "rb"); 94 | CHECK(finput != NULL); 95 | const int ih = data_blob->height(), iw = data_blob->width(), ic = data_blob->channels(); 96 | double buf[ih*iw*ic]; 97 | for (int i = 0; i < total_iter; ++i) { 98 | float *d = data_blob->mutable_cpu_data(); 99 | size_t len = ih * iw * ic; 100 | for(int j = 0; j < data_blob->num(); j++){ 101 | size_t nread = fread(buf, sizeof(double), len, finput); 102 | CHECK_EQ(nread, len); 103 | for(int k=0;k*>& result = 109 | caffe_test_net.Forward(dummy_blob_input_vec); 110 | 111 | sprintf(output_dir, "%s/feat_%05d", argv[4], i); 112 | save_blob(output_dir, output); 113 | 114 | //test_accuracy += result[0]->cpu_data()[0]; 115 | //LOG(ERROR) << "Batch " << i << ", accuracy: " << result[0]->cpu_data()[0]; 116 | } 117 | fclose(finput); 118 | //test_accuracy /= total_iter; 119 | //LOG(ERROR) << "Test accuracy:" << test_accuracy; 120 | 121 | return 0; 122 | } 123 | -------------------------------------------------------------------------------- /src/caffe/layers/verification_loss.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "caffe/layer.hpp" 8 | #include "caffe/vision_layers.hpp" 9 | #include "caffe/util/math_functions.hpp" 10 | 11 | using std::max; 12 | 13 | namespace caffe { 14 | 15 | template 16 | Dtype VerificationLossLayer::CalcThreshold(bool update) { 17 | int i, j, is, id, is_ = 0, id_ = 0; 18 | Dtype th, th_c, s, d, f; 19 | int n = same_.size(); 20 | CHECK_EQ(n, distance_.size()); 21 | if(!n) 22 | return M_; 23 | for(i = 0; i < n; i++) 24 | { 25 | if(same_[i]) 26 | { 27 | is_++; 28 | } 29 | else 30 | { 31 | id_++; 32 | } 33 | } 34 | 35 | Dtype stat[3]; 36 | stat[0] = 1.0; 37 | stat[1] = 0.5; 38 | stat[2] = 0.5; 39 | th = -1.0; 40 | 41 | for(i = 0; i < 4000; i++) 42 | { 43 | th_c = i * 0.1; 44 | is = 0; 45 | id = 0; 46 | for(j = 0; j < n; j++) 47 | { 48 | if(same_[j]) 49 | { 50 | if(distance_[j] > th_c) 51 | { 52 | is++; 53 | } 54 | } 55 | else 56 | { 57 | if(distance_[j] <= th_c) 58 | { 59 | id++; 60 | } 61 | } 62 | } 63 | s = (Dtype)is / (2 * is_); 64 | d = (Dtype)id / (2 * id_); 65 | f = s + d; 66 | if(f < stat[0]) 67 | { 68 | stat[0] = f; 69 | stat[1] = s; 70 | stat[2] = d; 71 | th = th_c; 72 | } 73 | } 74 | LOG(INFO) << "margin: " << th << " (" 75 | << stat[0] << ", " << stat[1] 76 | << ", " << stat[2] << ")"; 77 | 78 | if(update) 79 | SetThreshold(th); 80 | return th; 81 | 82 | } 83 | 84 | template 85 | void VerificationLossLayer::SetUp(const vector*>& bottom, 86 | vector*>* top) { 87 | CHECK_EQ(bottom.size(), 4) << "VerificationLoss Layer takes four blobs as input."; 88 | CHECK_EQ(top->size(), 0) << "VerificationLoss Layer takes no blob as output."; 89 | 90 | diffy1_.Reshape(bottom[0]->num(), bottom[0]->channels(), 1, 1); 91 | diffy2_.Reshape(bottom[0]->num(), bottom[0]->channels(), 1, 1); 92 | M_ = this->layer_param_.dual_threshold(); 93 | LAMDA_ = this->layer_param_.dual_lamda(); 94 | 95 | ResetDistanceStat(); 96 | LOG(INFO) << "Initial: threshold " << M_ << ", " << "lamda: " << LAMDA_; 97 | } 98 | 99 | template 100 | void VerificationLossLayer::Forward_cpu( 101 | const vector*>& bottom, vector*>* top) { 102 | } 103 | 104 | template 105 | Dtype VerificationLossLayer::Backward_cpu(const vector*>& top, 106 | const bool propagate_down, 107 | vector*>* bottom) { 108 | const Dtype* feat_1 = (*bottom)[0]->cpu_data(); 109 | const Dtype* feat_2 = (*bottom)[2]->cpu_data(); 110 | const Dtype* label_1 = (*bottom)[1]->cpu_data(); 111 | const Dtype* label_2 = (*bottom)[3]->cpu_data(); 112 | 113 | //Dtype *diffy_ptr = diffy_.mutable_cpu_data(); 114 | 115 | Dtype* bottom_diff1 = diffy1_.mutable_cpu_data(); 116 | Dtype* bottom_diff2 = diffy2_.mutable_cpu_data(); 117 | 118 | int num = (*bottom)[0]->num(); 119 | int count = (*bottom)[0]->count(); 120 | //y1 - y2 121 | caffe_sub(count, feat_1, feat_2, bottom_diff1); 122 | caffe_sub(count, feat_2, feat_1, bottom_diff2); 123 | 124 | const int feat_len = (*bottom)[0]->channels(); 125 | 126 | for (int i = 0; i < (*bottom)[0]->num(); ++i) { 127 | int l1 = static_cast(label_1[i]); 128 | int l2 = static_cast(label_2[i]); 129 | int offset = i*feat_len; 130 | if(l1 == l2){ 131 | /* nothing */ 132 | }else{ 133 | Dtype norm2 = caffe_cpu_dot(feat_len, bottom_diff1+offset, bottom_diff1+offset); 134 | Dtype norm = sqrt(norm2); 135 | if(norm > M_){ 136 | memset(bottom_diff1+offset,0, sizeof(Dtype)*feat_len); 137 | memset(bottom_diff2+offset,0, sizeof(Dtype)*feat_len); 138 | }else{ 139 | norm = (M_ - norm) / (norm+Dtype(FLT_MIN)); 140 | caffe_scal(feat_len, -norm, bottom_diff1+offset); 141 | caffe_scal(feat_len, -norm, bottom_diff2+offset); 142 | } 143 | } 144 | } 145 | 146 | //Add gradien to original 147 | Dtype* _bottom_diff1 = (*bottom)[0]->mutable_cpu_diff(); 148 | Dtype* _bottom_diff2 = (*bottom)[2]->mutable_cpu_diff(); 149 | #if 0 150 | for(int i=0;i<(*bottom)[0]->count();i++){ 151 | printf("%d %f %f\n", num, _bottom_diff1[i], bottom_diff1[i] / num); 152 | } 153 | #endif 154 | 155 | // Scale down gradient 156 | caffe_axpy(count, LAMDA_/num, bottom_diff1, _bottom_diff1); 157 | caffe_axpy(count, LAMDA_/num, bottom_diff2, _bottom_diff2); 158 | return Dtype(0.); 159 | } 160 | 161 | 162 | INSTANTIATE_CLASS(VerificationLossLayer); 163 | 164 | 165 | } // namespace caffe 166 | -------------------------------------------------------------------------------- /include/caffe/util/mkl_alternate.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 BVLC and contributors. 2 | 3 | #ifndef CAFFE_UTIL_MKL_ALTERNATE_H_ 4 | #define CAFFE_UTIL_MKL_ALTERNATE_H_ 5 | 6 | #include "../common.hpp" 7 | #ifdef USE_MKL 8 | 9 | #include 10 | 11 | #else // If use MKL, simply include the MKL header 12 | 13 | #ifndef USE_EIGEN 14 | extern "C" { 15 | #include 16 | } 17 | #else 18 | #include 19 | enum CBLAS_ORDER { CblasRowMajor = 101, CblasColMajor = 102 }; 20 | enum CBLAS_TRANSPOSE { CblasNoTrans = 111, CblasTrans = 112, CblasConjTrans = 113 }; 21 | 22 | #define MAP_SVECTOR(name, ptr, N) Eigen::Map name(ptr, N) 23 | #define MAP_CONST_SVECTOR(name, ptr, N) Eigen::Map name(ptr, N) 24 | #define MAP_DVECTOR(name, ptr, N) Eigen::Map name(ptr, N) 25 | #define MAP_CONST_DVECTOR(name, ptr, N) Eigen::Map name(ptr, N) 26 | typedef Eigen::Matrix MatXf; 27 | typedef Eigen::Matrix MatXd; 28 | 29 | #define MAP_SMATRIX(name, ptr, M, N) Eigen::Map name(ptr, M, N) 30 | #define MAP_CONST_SMATRIX(name, ptr, M, N) Eigen::Map name(ptr, M, N) 31 | #define MAP_DMATRIX(name, ptr, M, N) Eigen::Map name(ptr, M, N) 32 | #define MAP_CONST_DMATRIX(name, ptr, M, N) Eigen::Map name(ptr, M, N) 33 | 34 | 35 | #endif 36 | 37 | #include 38 | 39 | // Functions that caffe uses but are not present if MKL is not linked. 40 | 41 | // A simple way to define the vsl unary functions. The operation should 42 | // be in the form e.g. y[i] = sqrt(a[i]) 43 | #define DEFINE_VSL_UNARY_FUNC(name, operation) \ 44 | template \ 45 | void v##name(const int n, const Dtype* a, Dtype* y) { \ 46 | CHECK_GT(n, 0); CHECK(a); CHECK(y); \ 47 | for (int i = 0; i < n; ++i) { operation; } \ 48 | } \ 49 | inline void vs##name( \ 50 | const int n, const float* a, float* y) { \ 51 | v##name(n, a, y); \ 52 | } \ 53 | inline void vd##name( \ 54 | const int n, const double* a, double* y) { \ 55 | v##name(n, a, y); \ 56 | } 57 | 58 | DEFINE_VSL_UNARY_FUNC(Sqr, y[i] = a[i] * a[i]); 59 | DEFINE_VSL_UNARY_FUNC(Exp, y[i] = exp(a[i])); 60 | 61 | // A simple way to define the vsl unary functions with singular parameter b. 62 | // The operation should be in the form e.g. y[i] = pow(a[i], b) 63 | #define DEFINE_VSL_UNARY_FUNC_WITH_PARAM(name, operation) \ 64 | template \ 65 | void v##name(const int n, const Dtype* a, const Dtype b, Dtype* y) { \ 66 | CHECK_GT(n, 0); CHECK(a); CHECK(y); \ 67 | for (int i = 0; i < n; ++i) { operation; } \ 68 | } \ 69 | inline void vs##name( \ 70 | const int n, const float* a, const float b, float* y) { \ 71 | v##name(n, a, b, y); \ 72 | } \ 73 | inline void vd##name( \ 74 | const int n, const double* a, const float b, double* y) { \ 75 | v##name(n, a, b, y); \ 76 | } 77 | 78 | DEFINE_VSL_UNARY_FUNC_WITH_PARAM(Powx, y[i] = pow(a[i], b)); 79 | 80 | // A simple way to define the vsl binary functions. The operation should 81 | // be in the form e.g. y[i] = a[i] + b[i] 82 | #define DEFINE_VSL_BINARY_FUNC(name, operation) \ 83 | template \ 84 | void v##name(const int n, const Dtype* a, const Dtype* b, Dtype* y) { \ 85 | CHECK_GT(n, 0); CHECK(a); CHECK(b); CHECK(y); \ 86 | for (int i = 0; i < n; ++i) { operation; } \ 87 | } \ 88 | inline void vs##name( \ 89 | const int n, const float* a, const float* b, float* y) { \ 90 | v##name(n, a, b, y); \ 91 | } \ 92 | inline void vd##name( \ 93 | const int n, const double* a, const double* b, double* y) { \ 94 | v##name(n, a, b, y); \ 95 | } 96 | 97 | DEFINE_VSL_BINARY_FUNC(Add, y[i] = a[i] + b[i]); 98 | DEFINE_VSL_BINARY_FUNC(Sub, y[i] = a[i] - b[i]); 99 | DEFINE_VSL_BINARY_FUNC(Mul, y[i] = a[i] * b[i]); 100 | DEFINE_VSL_BINARY_FUNC(Div, y[i] = a[i] / b[i]); 101 | 102 | #ifndef USE_EIGEN 103 | // In addition, MKL comes with an additional function axpby that is not present 104 | // in standard blas. We will simply use a two-step (inefficient, of course) way 105 | // to mimic that. 106 | inline void cblas_saxpby(const int N, const float alpha, const float* X, 107 | const int incX, const float beta, float* Y, 108 | const int incY) { 109 | cblas_sscal(N, beta, Y, incY); 110 | cblas_saxpy(N, alpha, X, incX, Y, incY); 111 | } 112 | inline void cblas_daxpby(const int N, const double alpha, const double* X, 113 | const int incX, const double beta, double* Y, 114 | const int incY) { 115 | cblas_dscal(N, beta, Y, incY); 116 | cblas_daxpy(N, alpha, X, incX, Y, incY); 117 | } 118 | #endif 119 | 120 | #endif // USE_MKL 121 | #endif // CAFFE_UTIL_MKL_ALTERNATE_H_ 122 | -------------------------------------------------------------------------------- /include/caffe/layer.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #ifndef CAFFE_LAYER_H_ 4 | #define CAFFE_LAYER_H_ 5 | 6 | #include 7 | #include "caffe/blob.hpp" 8 | #include "caffe/common.hpp" 9 | #include "caffe/proto/caffe.pb.h" 10 | 11 | using std::vector; 12 | 13 | namespace caffe { 14 | 15 | template 16 | class Layer { 17 | public: 18 | // You should not implement your own constructor. Any set up code should go 19 | // to SetUp(), where the dimensions of the bottom blobs are provided to the 20 | // layer. 21 | explicit Layer(const LayerParameter& param) 22 | : layer_param_(param) { 23 | // The only thing we do is to copy blobs if there are any. 24 | if (layer_param_.blobs_size() > 0) { 25 | blobs_.resize(layer_param_.blobs_size()); 26 | for (int i = 0; i < layer_param_.blobs_size(); ++i) { 27 | blobs_[i].reset(new Blob()); 28 | blobs_[i]->FromProto(layer_param_.blobs(i)); 29 | } 30 | } 31 | } 32 | virtual ~Layer() {} 33 | // SetUp: your function should implement this. 34 | virtual void SetUp(const vector*>& bottom, 35 | vector*>* top) = 0; 36 | 37 | // Forward and backward wrappers. You should implement the cpu and 38 | // gpu specific implementations instead, and should not change these 39 | // functions. 40 | inline void Forward(const vector*>& bottom, 41 | vector*>* top); 42 | inline Dtype Backward(const vector*>& top, 43 | const bool propagate_down, 44 | vector*>* bottom); 45 | 46 | // Returns the vector of blobs. 47 | vector > >& blobs() { 48 | return blobs_; 49 | } 50 | 51 | // Returns the layer parameter 52 | const LayerParameter& layer_param() { return layer_param_; } 53 | // Writes the layer parameter to a protocol buffer 54 | virtual void ToProto(LayerParameter* param, bool write_diff = false); 55 | 56 | protected: 57 | // The protobuf that stores the layer parameters 58 | LayerParameter layer_param_; 59 | // The vector that stores the parameters as a set of blobs. 60 | vector > > blobs_; 61 | 62 | // Forward functions 63 | virtual void Forward_cpu(const vector*>& bottom, 64 | vector*>* top) = 0; 65 | // If no gpu code is provided, we will simply use cpu code. 66 | #if 0 67 | virtual void Forward_gpu(const vector*>& bottom, 68 | vector*>* top) { 69 | // LOG(WARNING) << "Using CPU code as backup."; 70 | Forward_cpu(bottom, top); 71 | } 72 | #endif 73 | 74 | // Backward functions: the backward function will compute the gradients for 75 | // any parameters and also for the bottom blobs if propagate_down is true. 76 | // It will return the loss produced from this layer. 77 | virtual Dtype Backward_cpu(const vector*>& top, 78 | const bool propagate_down, 79 | vector*>* bottom) = 0; 80 | #if 0 81 | virtual Dtype Backward_gpu(const vector*>& top, 82 | const bool propagate_down, 83 | vector*>* bottom) { 84 | // LOG(WARNING) << "Using CPU code as backup."; 85 | return Backward_cpu(top, propagate_down, bottom); 86 | } 87 | #endif 88 | 89 | DISABLE_COPY_AND_ASSIGN(Layer); 90 | }; // class Layer 91 | 92 | // Forward and backward wrappers. You should implement the cpu and 93 | // gpu specific implementations instead, and should not change these 94 | // functions. 95 | template 96 | inline void Layer::Forward(const vector*>& bottom, 97 | vector*>* top) { 98 | switch (Caffe::mode()) { 99 | case Caffe::CPU: 100 | Forward_cpu(bottom, top); 101 | break; 102 | #if 0 103 | case Caffe::GPU: 104 | Forward_gpu(bottom, top); 105 | break; 106 | #endif 107 | default: 108 | LOG(FATAL) << "Unknown caffe mode."; 109 | } 110 | } 111 | 112 | template 113 | inline Dtype Layer::Backward(const vector*>& top, 114 | const bool propagate_down, 115 | vector*>* bottom) { 116 | switch (Caffe::mode()) { 117 | case Caffe::CPU: 118 | return Backward_cpu(top, propagate_down, bottom); 119 | #if 0 120 | case Caffe::GPU: 121 | return Backward_gpu(top, propagate_down, bottom); 122 | #endif 123 | default: 124 | LOG(FATAL) << "Unknown caffe mode."; 125 | return 0; 126 | } 127 | } 128 | 129 | template 130 | void Layer::ToProto(LayerParameter* param, bool write_diff) { 131 | param->Clear(); 132 | param->CopyFrom(layer_param_); 133 | param->clear_blobs(); 134 | for (int i = 0; i < blobs_.size(); ++i) { 135 | blobs_[i]->ToProto(param->add_blobs(), write_diff); 136 | } 137 | } 138 | 139 | // The layer factory function 140 | template 141 | Layer* GetLayer(const LayerParameter& param); 142 | 143 | } // namespace caffe 144 | 145 | #endif // CAFFE_LAYER_H_ 146 | -------------------------------------------------------------------------------- /include/caffe/net.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #ifndef CAFFE_NET_HPP_ 4 | #define CAFFE_NET_HPP_ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "caffe/blob.hpp" 11 | #include "caffe/common.hpp" 12 | #include "caffe/layer.hpp" 13 | #include "caffe/proto/caffe.pb.h" 14 | 15 | using std::map; 16 | using std::vector; 17 | using std::string; 18 | 19 | namespace caffe { 20 | 21 | 22 | template 23 | class Net { 24 | public: 25 | explicit Net(const NetParameter& param); 26 | explicit Net(const string& param_file); 27 | virtual ~Net() {} 28 | 29 | // Initialize a network with the network parameter. 30 | void Init(const NetParameter& param); 31 | 32 | // Run forward with the input blobs already fed separately. You can get the 33 | // input blobs using input_blobs(). 34 | const vector*>& ForwardPrefilled(); 35 | // Run forward using a set of bottom blobs, and return the result. 36 | const vector*>& Forward(const vector* > & bottom); 37 | // Run forward using a serialized BlobProtoVector and return the result 38 | // as a serialized BlobProtoVector 39 | string Forward(const string& input_blob_protos); 40 | 41 | // The network backward should take no input and output, since it solely 42 | // computes the gradient w.r.t the parameters, and the data has already 43 | // been provided during the forward pass. 44 | Dtype Backward(); 45 | Dtype BackwardBetween(int layer_top, int layer_bottom); 46 | 47 | Dtype ForwardBackward(const vector* > & bottom) { 48 | Forward(bottom); 49 | return Backward(); 50 | } 51 | 52 | // Updates the network weights based on the diff values computed. 53 | void Update(); 54 | 55 | // For an already initialized net, CopyTrainedLayersFrom() copies the already 56 | // trained layers from another net parameter instance. 57 | void CopyLayersFrom(const Net& rhs, bool copy_diff); 58 | void CopyTrainedLayersFrom(const NetParameter& param); 59 | void CopyTrainedLayersFrom(const string trained_filename); 60 | // Writes the net to a proto. 61 | void ToProto(NetParameter* param, bool write_diff = false); 62 | 63 | // returns the network name. 64 | inline const string& name() { return name_; } 65 | // returns the layer names 66 | inline const vector& layer_names() { return layer_names_; } 67 | // returns the blob names 68 | inline const vector& blob_names() { return blob_names_; } 69 | // returns the blobs 70 | inline const vector > >& blobs() { return blobs_; } 71 | // returns the layers 72 | inline const vector > >& layers() { return layers_; } 73 | inline vector > >& mutable_layers() { return layers_; } 74 | // returns the bottom and top vecs for each layer - usually you won't need 75 | // this unless you do per-layer checks such as gradients. 76 | inline vector*> >& bottom_vecs() { return bottom_vecs_; } 77 | inline vector*> >& top_vecs() { return top_vecs_; } 78 | // returns the parameters 79 | inline vector > >& params() { return params_; } 80 | // returns the parameter learning rate multipliers 81 | inline vector& params_lr() {return params_lr_; } 82 | inline vector& params_weight_decay() { return params_weight_decay_; } 83 | // Input and output blob numbers 84 | inline int num_inputs() { return net_input_blobs_.size(); } 85 | inline int num_outputs() { return net_output_blobs_.size(); } 86 | inline vector*>& input_blobs() { return net_input_blobs_; } 87 | inline vector*>& output_blobs() { return net_output_blobs_; } 88 | 89 | protected: 90 | // Function to get misc parameters, e.g. the learning rate multiplier and 91 | // weight decay. 92 | void GetLearningRateAndWeightDecay(); 93 | 94 | // Individual layers in the net 95 | vector > > layers_; 96 | vector layer_names_; 97 | vector layer_need_backward_; 98 | // blobs stores the blobs that store intermediate results between the 99 | // layers. 100 | vector > > blobs_; 101 | vector blob_names_; 102 | vector blob_need_backward_; 103 | // bottom_vecs stores the vectors containing the input for each layer. 104 | // They don't actually host the blobs (blobs_ does), so we simply store 105 | // pointers. 106 | vector*> > bottom_vecs_; 107 | vector > bottom_id_vecs_; 108 | // top_vecs stores the vectors containing the output for each layer 109 | vector*> > top_vecs_; 110 | vector > top_id_vecs_; 111 | // blob indices for the input and the output of the net 112 | vector net_input_blob_indices_; 113 | vector*> net_input_blobs_; 114 | vector*> net_output_blobs_; 115 | string name_; 116 | // The parameters in the network. 117 | vector > > params_; 118 | // the learning rate multipliers 119 | vector params_lr_; 120 | // the weight decay multipliers 121 | vector params_weight_decay_; 122 | DISABLE_COPY_AND_ASSIGN(Net); 123 | }; 124 | 125 | 126 | } // namespace caffe 127 | 128 | #endif // CAFFE_NET_HPP_ 129 | -------------------------------------------------------------------------------- /include/caffe/filler.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | // Fillers are random number generators that fills a blob using the specified 4 | // algorithm. The expectation is that they are only going to be used during 5 | // initialization time and will not involve any GPUs. 6 | 7 | #ifndef CAFFE_FILLER_HPP 8 | #define CAFFE_FILLER_HPP 9 | 10 | #include 11 | #include 12 | 13 | #include "caffe/common.hpp" 14 | #include "caffe/blob.hpp" 15 | #include "caffe/syncedmem.hpp" 16 | #include "caffe/util/math_functions.hpp" 17 | #include "caffe/proto/caffe.pb.h" 18 | 19 | namespace caffe { 20 | 21 | template 22 | class Filler { 23 | public: 24 | explicit Filler(const FillerParameter& param) : filler_param_(param) {} 25 | virtual ~Filler() {} 26 | virtual void Fill(Blob* blob) = 0; 27 | protected: 28 | FillerParameter filler_param_; 29 | }; // class Filler 30 | 31 | 32 | template 33 | class ConstantFiller : public Filler { 34 | public: 35 | explicit ConstantFiller(const FillerParameter& param) 36 | : Filler(param) {} 37 | virtual void Fill(Blob* blob) { 38 | Dtype* data = blob->mutable_cpu_data(); 39 | const int count = blob->count(); 40 | const Dtype value = this->filler_param_.value(); 41 | CHECK(count); 42 | for (int i = 0; i < count; ++i) { 43 | data[i] = value; 44 | } 45 | } 46 | }; 47 | 48 | template 49 | class UniformFiller : public Filler { 50 | public: 51 | explicit UniformFiller(const FillerParameter& param) 52 | : Filler(param) {} 53 | virtual void Fill(Blob* blob) { 54 | CHECK(blob->count()); 55 | #if 0 56 | caffe_vRngUniform(blob->count(), blob->mutable_cpu_data(), 57 | Dtype(this->filler_param_.min()), 58 | Dtype(this->filler_param_.max())); 59 | #endif 60 | } 61 | }; 62 | 63 | template 64 | class GaussianFiller : public Filler { 65 | public: 66 | explicit GaussianFiller(const FillerParameter& param) 67 | : Filler(param) {} 68 | virtual void Fill(Blob* blob) { 69 | Dtype* data = blob->mutable_cpu_data(); 70 | CHECK(blob->count()); 71 | #if 0 72 | caffe_vRngGaussian(blob->count(), blob->mutable_cpu_data(), 73 | Dtype(this->filler_param_.mean()), 74 | Dtype(this->filler_param_.std())); 75 | #endif 76 | } 77 | }; 78 | 79 | template 80 | class PositiveUnitballFiller : public Filler { 81 | public: 82 | explicit PositiveUnitballFiller(const FillerParameter& param) 83 | : Filler(param) {} 84 | virtual void Fill(Blob* blob) { 85 | #if 0 86 | Dtype* data = blob->mutable_cpu_data(); 87 | DCHECK(blob->count()); 88 | caffe_vRngUniform(blob->count(), blob->mutable_cpu_data(), 0, 1); 89 | // We expect the filler to not be called very frequently, so we will 90 | // just use a simple implementation 91 | int dim = blob->count() / blob->num(); 92 | CHECK(dim); 93 | for (int i = 0; i < blob->num(); ++i) { 94 | Dtype sum = 0; 95 | for (int j = 0; j < dim; ++j) { 96 | sum += data[i * dim + j]; 97 | } 98 | for (int j = 0; j < dim; ++j) { 99 | data[i * dim + j] /= sum; 100 | } 101 | } 102 | #endif 103 | } 104 | }; 105 | 106 | // A filler based on the paper [Bengio and Glorot 2010]: Understanding 107 | // the difficulty of training deep feedforward neuralnetworks, but does not 108 | // use the fan_out value. 109 | // 110 | // It fills the incoming matrix by randomly sampling uniform data from 111 | // [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number 112 | // of input nodes. You should make sure the input blob has shape (num, a, b, c) 113 | // where a * b * c = fan_in. 114 | template 115 | class XavierFiller : public Filler { 116 | public: 117 | explicit XavierFiller(const FillerParameter& param) 118 | : Filler(param) {} 119 | virtual void Fill(Blob* blob) { 120 | CHECK(blob->count()); 121 | int fan_in = blob->count() / blob->num(); 122 | Dtype scale = sqrt(Dtype(3) / fan_in); 123 | #if 0 124 | caffe_vRngUniform(blob->count(), blob->mutable_cpu_data(), 125 | -scale, scale); 126 | #endif 127 | } 128 | }; 129 | 130 | 131 | // A function to get a specific filler from the specification given in 132 | // FillerParameter. Ideally this would be replaced by a factory pattern, 133 | // but we will leave it this way for now. 134 | template 135 | Filler* GetFiller(const FillerParameter& param) { 136 | const std::string& type = param.type(); 137 | if (type == "constant") { 138 | return new ConstantFiller(param); 139 | } else if (type == "gaussian") { 140 | return new GaussianFiller(param); 141 | } else if (type == "positive_unitball") { 142 | return new PositiveUnitballFiller(param); 143 | } else if (type == "uniform") { 144 | return new UniformFiller(param); 145 | } else if (type == "xavier") { 146 | return new XavierFiller(param); 147 | } else { 148 | CHECK(false) << "Unknown filler name: " << param.type(); 149 | } 150 | return (Filler*)(NULL); 151 | } 152 | 153 | } // namespace caffe 154 | 155 | #endif // CAFFE_FILLER_HPP_ 156 | -------------------------------------------------------------------------------- /src/caffe/util/io.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include // NOLINT(readability/streams) 13 | 14 | #ifdef _MSC_VER 15 | #include /* for open/close */ 16 | #else 17 | #include 18 | #endif 19 | 20 | #include "caffe/common.hpp" 21 | #include "caffe/util/io.hpp" 22 | #include "caffe/proto/caffe.pb.h" 23 | 24 | using std::fstream; 25 | using std::ios; 26 | using std::max; 27 | using std::string; 28 | using google::protobuf::io::FileInputStream; 29 | using google::protobuf::io::FileOutputStream; 30 | using google::protobuf::io::ZeroCopyInputStream; 31 | using google::protobuf::io::CodedInputStream; 32 | using google::protobuf::io::ZeroCopyOutputStream; 33 | using google::protobuf::io::CodedOutputStream; 34 | 35 | namespace caffe { 36 | 37 | void ReadProtoFromTextFile(const char* filename, 38 | ::google::protobuf::Message* proto) { 39 | int fd = open(filename, O_RDONLY); 40 | CHECK_NE(fd, -1) << "File not found: " << filename; 41 | FileInputStream* input = new FileInputStream(fd); 42 | CHECK(google::protobuf::TextFormat::Parse(input, proto)); 43 | delete input; 44 | close(fd); 45 | } 46 | 47 | void WriteProtoToTextFile(const Message& proto, const char* filename) { 48 | int fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0644); 49 | FileOutputStream* output = new FileOutputStream(fd); 50 | CHECK(google::protobuf::TextFormat::Print(proto, output)); 51 | delete output; 52 | close(fd); 53 | } 54 | 55 | void ReadProtoFromBinaryFile(const char* filename, Message* proto) { 56 | #ifdef _MSC_VER 57 | int fd = open(filename, O_RDONLY|O_BINARY); 58 | #else 59 | int fd = open(filename, O_RDONLY); 60 | #endif 61 | CHECK_NE(fd, -1) << "File not found: " << filename; 62 | ZeroCopyInputStream* raw_input = new FileInputStream(fd); 63 | CodedInputStream* coded_input = new CodedInputStream(raw_input); 64 | coded_input->SetTotalBytesLimit(536870912, 268435456); 65 | 66 | CHECK(proto->ParseFromCodedStream(coded_input)); 67 | 68 | delete coded_input; 69 | delete raw_input; 70 | close(fd); 71 | } 72 | 73 | void WriteProtoToBinaryFile(const Message& proto, const char* filename) { 74 | fstream output(filename, ios::out | ios::trunc | ios::binary); 75 | CHECK(proto.SerializeToOstream(&output)); 76 | } 77 | 78 | #if 0 79 | bool ReadImageToDatum(const string& filename, const int label, 80 | const int height, const int width, Datum* datum) { 81 | cv::Mat cv_img; 82 | if (height > 0 && width > 0) { 83 | cv::Mat cv_img_origin = cv::imread(filename, CV_LOAD_IMAGE_COLOR); 84 | cv::resize(cv_img_origin, cv_img, cv::Size(height, width)); 85 | } else { 86 | cv_img = cv::imread(filename, CV_LOAD_IMAGE_COLOR); 87 | } 88 | if (!cv_img.data) { 89 | LOG(ERROR) << "Could not open or find file " << filename; 90 | return false; 91 | } 92 | datum->set_channels(3); 93 | datum->set_height(cv_img.rows); 94 | datum->set_width(cv_img.cols); 95 | datum->set_label(label); 96 | datum->clear_data(); 97 | datum->clear_float_data(); 98 | string* datum_string = datum->mutable_data(); 99 | for (int c = 0; c < 3; ++c) { 100 | for (int h = 0; h < cv_img.rows; ++h) { 101 | for (int w = 0; w < cv_img.cols; ++w) { 102 | datum_string->push_back( 103 | static_cast(cv_img.at(h, w)[c])); 104 | } 105 | } 106 | } 107 | return true; 108 | } 109 | 110 | // Verifies format of data stored in HDF5 file and reshapes blob accordingly. 111 | template 112 | void hdf5_load_nd_dataset_helper( 113 | hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, 114 | Blob* blob) { 115 | // Verify that the number of dimensions is in the accepted range. 116 | herr_t status; 117 | int ndims; 118 | status = H5LTget_dataset_ndims(file_id, dataset_name_, &ndims); 119 | CHECK_GE(ndims, min_dim); 120 | CHECK_LE(ndims, max_dim); 121 | 122 | // Verify that the data format is what we expect: float or double. 123 | std::vector dims(ndims); 124 | H5T_class_t class_; 125 | status = H5LTget_dataset_info( 126 | file_id, dataset_name_, dims.data(), &class_, NULL); 127 | CHECK_EQ(class_, H5T_FLOAT) << "Expected float or double data"; 128 | 129 | blob->Reshape( 130 | dims[0], 131 | (dims.size() > 1) ? dims[1] : 1, 132 | (dims.size() > 2) ? dims[2] : 1, 133 | (dims.size() > 3) ? dims[3] : 1); 134 | } 135 | 136 | template <> 137 | void hdf5_load_nd_dataset(hid_t file_id, const char* dataset_name_, 138 | int min_dim, int max_dim, Blob* blob) { 139 | hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob); 140 | herr_t status = H5LTread_dataset_float( 141 | file_id, dataset_name_, blob->mutable_cpu_data()); 142 | } 143 | 144 | template <> 145 | void hdf5_load_nd_dataset(hid_t file_id, const char* dataset_name_, 146 | int min_dim, int max_dim, Blob* blob) { 147 | hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob); 148 | herr_t status = H5LTread_dataset_double( 149 | file_id, dataset_name_, blob->mutable_cpu_data()); 150 | } 151 | #endif 152 | 153 | } // namespace caffe 154 | -------------------------------------------------------------------------------- /src/caffe/util/insert_splits.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Jeff Donahue 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "caffe/common.hpp" 9 | #include "caffe/util/insert_splits.hpp" 10 | 11 | using std::map; 12 | using std::ostringstream; 13 | using std::pair; 14 | using std::make_pair; 15 | 16 | namespace caffe { 17 | 18 | void insert_splits(const NetParameter& param, NetParameter* param_split) { 19 | // Initialize by copying from the input NetParameter. 20 | param_split->CopyFrom(param); 21 | param_split->clear_layers(); 22 | map > blob_name_to_last_top_idx; 23 | map, pair > bottom_idx_to_source_top_idx; 24 | map, int> top_idx_to_bottom_count; 25 | map, int> top_idx_to_bottom_split_idx; 26 | map layer_idx_to_layer_name; 27 | layer_idx_to_layer_name[-1] = "input"; 28 | // Determine the number of times each blob is used as an input (bottom) blob. 29 | for (int i = 0; i < param.input_size(); ++i) { 30 | const string& blob_name = param.input(i); 31 | blob_name_to_last_top_idx[blob_name] = make_pair(-1, i); 32 | } 33 | for (int i = 0; i < param.layers_size(); ++i) { 34 | const LayerConnection& layer_connection = param.layers(i); 35 | layer_idx_to_layer_name[i] = layer_connection.layer().name(); 36 | for (int j = 0; j < layer_connection.bottom_size(); ++j) { 37 | const string& blob_name = layer_connection.bottom(j); 38 | if (blob_name_to_last_top_idx.find(blob_name) == 39 | blob_name_to_last_top_idx.end()) { 40 | LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j; 41 | } 42 | const pair& bottom_idx = make_pair(i, j); 43 | const pair& top_idx = blob_name_to_last_top_idx[blob_name]; 44 | bottom_idx_to_source_top_idx[bottom_idx] = top_idx; 45 | ++top_idx_to_bottom_count[top_idx]; 46 | } 47 | for (int j = 0; j < layer_connection.top_size(); ++j) { 48 | const string& blob_name = layer_connection.top(j); 49 | blob_name_to_last_top_idx[blob_name] = make_pair(i, j); 50 | } 51 | } 52 | // Create split layer for any input blobs used by other layers as bottom 53 | // blobs more than once. 54 | for (int i = 0; i < param.input_size(); ++i) { 55 | const int split_count = top_idx_to_bottom_count[make_pair(-1, i)]; 56 | if (split_count > 1) { 57 | const string& layer_name = layer_idx_to_layer_name[-1]; 58 | const string& blob_name = param.input(i); 59 | LayerConnection* split_layer_connection = param_split->add_layers(); 60 | configure_split_layer(layer_name, blob_name, i, split_count, 61 | split_layer_connection); 62 | } 63 | } 64 | for (int i = 0; i < param.layers_size(); ++i) { 65 | LayerConnection* layer_connection = param_split->add_layers(); 66 | layer_connection->CopyFrom(param.layers(i)); 67 | // Replace any shared bottom blobs with split layer outputs. 68 | for (int j = 0; j < layer_connection->bottom_size(); ++j) { 69 | const pair& top_idx = 70 | bottom_idx_to_source_top_idx[make_pair(i, j)]; 71 | const int split_count = top_idx_to_bottom_count[top_idx]; 72 | if (split_count > 1) { 73 | const string& layer_name = layer_idx_to_layer_name[top_idx.first]; 74 | const string& blob_name = layer_connection->bottom(j); 75 | layer_connection->set_bottom(j, get_split_blob_name(layer_name, 76 | blob_name, top_idx.second, top_idx_to_bottom_split_idx[top_idx]++)); 77 | } 78 | } 79 | // Create split layer for any top blobs used by other layers as bottom 80 | // blobs more than once. 81 | for (int j = 0; j < layer_connection->top_size(); ++j) { 82 | const int split_count = top_idx_to_bottom_count[make_pair(i, j)]; 83 | if (split_count > 1) { 84 | const string& layer_name = layer_idx_to_layer_name[i]; 85 | const string& blob_name = layer_connection->top(j); 86 | LayerConnection* split_layer_connection = param_split->add_layers(); 87 | configure_split_layer(layer_name, blob_name, j, split_count, 88 | split_layer_connection); 89 | } 90 | } 91 | } 92 | } 93 | 94 | void configure_split_layer(const string& layer_name, const string& blob_name, 95 | const int blob_idx, const int split_count, 96 | LayerConnection* split_layer_connection) { 97 | split_layer_connection->Clear(); 98 | split_layer_connection->add_bottom(blob_name); 99 | LayerParameter* split_layer_param = split_layer_connection->mutable_layer(); 100 | split_layer_param->set_name( 101 | get_split_layer_name(layer_name, blob_name, blob_idx)); 102 | split_layer_param->set_type("split"); 103 | for (int k = 0; k < split_count; ++k) { 104 | split_layer_connection->add_top( 105 | get_split_blob_name(layer_name, blob_name, blob_idx, k)); 106 | } 107 | } 108 | 109 | string get_split_layer_name(const string& layer_name, const string& blob_name, 110 | const int blob_idx) { 111 | ostringstream split_layer_name; 112 | split_layer_name << blob_name << "_" << layer_name << "_" << blob_idx 113 | << "_split"; 114 | return split_layer_name.str(); 115 | } 116 | 117 | string get_split_blob_name(const string& layer_name, const string& blob_name, 118 | const int blob_idx, const int split_idx) { 119 | // 0th split top blob is given the same name as the bottom blob so that 120 | // computation is done 'in-place', saving a bit of time and memory. 121 | if (split_idx == 0) { 122 | return blob_name; 123 | } 124 | ostringstream split_blob_name; 125 | split_blob_name << blob_name << "_" << layer_name << "_" << blob_idx 126 | << "_split_" << split_idx; 127 | return split_blob_name.str(); 128 | } 129 | 130 | } // namespace caffe 131 | -------------------------------------------------------------------------------- /src/caffe/blob.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include "caffe/blob.hpp" 4 | #include "caffe/common.hpp" 5 | #include "caffe/syncedmem.hpp" 6 | #include "caffe/util/math_functions.hpp" 7 | 8 | namespace caffe { 9 | 10 | template 11 | void Blob::Reshape(const int num, const int channels, const int height, 12 | const int width) { 13 | CHECK_GE(num, 0); 14 | CHECK_GE(channels, 0); 15 | CHECK_GE(height, 0); 16 | CHECK_GE(width, 0); 17 | num_ = num; 18 | channels_ = channels; 19 | height_ = height; 20 | width_ = width; 21 | count_ = num_ * channels_ * height_ * width_; 22 | if (count_) { 23 | data_.reset(new SyncedMemory(count_ * sizeof(Dtype))); 24 | diff_.reset(new SyncedMemory(count_ * sizeof(Dtype))); 25 | } else { 26 | data_.reset(reinterpret_cast(NULL)); 27 | diff_.reset(reinterpret_cast(NULL)); 28 | } 29 | } 30 | 31 | template 32 | Blob::Blob(const int num, const int channels, const int height, 33 | const int width) { 34 | Reshape(num, channels, height, width); 35 | } 36 | 37 | template 38 | const Dtype* Blob::cpu_data() const { 39 | CHECK(data_); 40 | return (const Dtype*)data_->cpu_data(); 41 | } 42 | 43 | #if 0 44 | template 45 | const Dtype* Blob::gpu_data() const { 46 | CHECK(data_); 47 | return (const Dtype*)data_->gpu_data(); 48 | } 49 | #endif 50 | 51 | template 52 | const Dtype* Blob::cpu_diff() const { 53 | CHECK(diff_); 54 | return (const Dtype*)diff_->cpu_data(); 55 | } 56 | 57 | #if 0 58 | template 59 | const Dtype* Blob::gpu_diff() const { 60 | CHECK(diff_); 61 | return (const Dtype*)diff_->gpu_data(); 62 | } 63 | #endif 64 | 65 | template 66 | Dtype* Blob::mutable_cpu_data() { 67 | CHECK(data_); 68 | return reinterpret_cast(data_->mutable_cpu_data()); 69 | } 70 | 71 | #if 0 72 | template 73 | Dtype* Blob::mutable_gpu_data() { 74 | CHECK(data_); 75 | return reinterpret_cast(data_->mutable_gpu_data()); 76 | } 77 | #endif 78 | 79 | template 80 | Dtype* Blob::mutable_cpu_diff() { 81 | CHECK(diff_); 82 | return reinterpret_cast(diff_->mutable_cpu_data()); 83 | } 84 | 85 | #if 0 86 | template 87 | Dtype* Blob::mutable_gpu_diff() { 88 | CHECK(diff_); 89 | return reinterpret_cast(diff_->mutable_gpu_data()); 90 | } 91 | #endif 92 | 93 | template 94 | void Blob::Update() { 95 | // We will perform update based on where the data is located. 96 | switch (data_->head()) { 97 | case SyncedMemory::HEAD_AT_CPU: 98 | // perform computation on CPU 99 | caffe_axpy(count_, Dtype(-1), 100 | reinterpret_cast(diff_->cpu_data()), 101 | reinterpret_cast(data_->mutable_cpu_data())); 102 | break; 103 | #if 0 104 | case SyncedMemory::HEAD_AT_GPU: 105 | case SyncedMemory::SYNCED: 106 | // perform computation on GPU 107 | caffe_gpu_axpy(count_, Dtype(-1), 108 | reinterpret_cast(diff_->gpu_data()), 109 | reinterpret_cast(data_->mutable_gpu_data())); 110 | break; 111 | #endif 112 | default: 113 | LOG(FATAL) << "Syncedmem not initialized."; 114 | } 115 | } 116 | 117 | template 118 | void Blob::CopyFrom(const Blob& source, bool copy_diff, bool reshape) { 119 | if (num_ != source.num() || channels_ != source.channels() || 120 | height_ != source.height() || width_ != source.width()) { 121 | if (reshape) { 122 | Reshape(source.num(), source.channels(), source.height(), source.width()); 123 | } else { 124 | LOG(FATAL) << "Trying to copy blobs of different sizes."; 125 | } 126 | } 127 | switch (Caffe::mode()) { 128 | #if 0 129 | case Caffe::GPU: 130 | if (copy_diff) { 131 | CUDA_CHECK(cudaMemcpy(diff_->mutable_gpu_data(), source.gpu_diff(), 132 | sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice)); 133 | } else { 134 | CUDA_CHECK(cudaMemcpy(data_->mutable_gpu_data(), source.gpu_data(), 135 | sizeof(Dtype) * count_, cudaMemcpyDeviceToDevice)); 136 | } 137 | break; 138 | #endif 139 | case Caffe::CPU: 140 | if (copy_diff) { 141 | memcpy(diff_->mutable_cpu_data(), source.cpu_diff(), 142 | sizeof(Dtype) * count_); 143 | } else { 144 | memcpy(data_->mutable_cpu_data(), source.cpu_data(), 145 | sizeof(Dtype) * count_); 146 | } 147 | break; 148 | default: 149 | LOG(FATAL) << "Unknown caffe mode."; 150 | } 151 | } 152 | 153 | template 154 | void Blob::FromProto(const BlobProto& proto) { 155 | Reshape(proto.num(), proto.channels(), proto.height(), proto.width()); 156 | // copy data 157 | Dtype* data_vec = mutable_cpu_data(); 158 | for (int i = 0; i < count_; ++i) { 159 | data_vec[i] = proto.data(i); 160 | } 161 | if (proto.diff_size() > 0) { 162 | Dtype* diff_vec = mutable_cpu_diff(); 163 | for (int i = 0; i < count_; ++i) { 164 | diff_vec[i] = proto.diff(i); 165 | } 166 | } 167 | } 168 | 169 | template 170 | void Blob::ToProto(BlobProto* proto, bool write_diff) const { 171 | proto->set_num(num_); 172 | proto->set_channels(channels_); 173 | proto->set_height(height_); 174 | proto->set_width(width_); 175 | proto->clear_data(); 176 | proto->clear_diff(); 177 | const Dtype* data_vec = cpu_data(); 178 | for (int i = 0; i < count_; ++i) { 179 | proto->add_data(data_vec[i]); 180 | } 181 | if (write_diff) { 182 | const Dtype* diff_vec = cpu_diff(); 183 | for (int i = 0; i < count_; ++i) { 184 | proto->add_diff(diff_vec[i]); 185 | } 186 | } 187 | } 188 | 189 | INSTANTIATE_CLASS(Blob); 190 | 191 | } // namespace caffe 192 | 193 | -------------------------------------------------------------------------------- /src/caffe/layers/dropout_group_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Yuheng Chen 2 | 3 | #include 4 | #include 5 | 6 | #include "caffe/common.hpp" 7 | #include "caffe/layer.hpp" 8 | #include "caffe/syncedmem.hpp" 9 | #include "caffe/vision_layers.hpp" 10 | 11 | namespace caffe { 12 | 13 | template 14 | void DropoutGroupLayer::SetUp(const vector*>& bottom, 15 | vector*>* top) { 16 | NeuronLayer::SetUp(bottom, top); 17 | const int mask_count = bottom[0]->count() / bottom[0]->channels(); 18 | const int mask_size = bottom[0]->width() * bottom[0]->height(); 19 | NUM_ = bottom[0]->num(); 20 | HEIGHT_ = bottom[0]->height(); 21 | WIDTH_ = bottom[0]->width(); 22 | // Set up the cache for random number generation 23 | rand_vec_.reset(new SyncedMemory(mask_count * sizeof(int))); 24 | threshold_ = this->layer_param_.dropout_ratio(); 25 | DCHECK(threshold_ > 0.); 26 | DCHECK(threshold_ < 1.); 27 | Dtype scale = 1. / (1. - threshold_); 28 | scale_.reset(new SyncedMemory(NUM_ * sizeof(Dtype))); 29 | uint_thres_ = (unsigned int)((mask_size * (1. - threshold_)) + 0.5); 30 | 31 | int* mask = reinterpret_cast(rand_vec_->mutable_cpu_data()); 32 | Dtype *scale_ptr = reinterpret_cast(scale_->mutable_cpu_data()); 33 | for(int n = 0; n < bottom[0]->num(); n++){ 34 | for(int i = 0; i < mask_size; i++) 35 | mask[i] = i; 36 | mask += mask_size; 37 | scale_ptr[n] = scale; 38 | } 39 | } 40 | 41 | template 42 | void DropoutGroupLayer::UpdateMask() { 43 | const int count = rand_vec_->size() / sizeof(int); 44 | const int mask_size = count / NUM_; 45 | int* mask = reinterpret_cast(rand_vec_->mutable_cpu_data()); 46 | for(int n = 0; n < NUM_; n++){ 47 | std::random_shuffle(mask, mask + mask_size); 48 | mask += mask_size; 49 | } 50 | } 51 | 52 | template 53 | void DropoutGroupLayer::UpscaleMaskFrom(DropoutGroupLayer *dropout) { 54 | const int ksize = HEIGHT_ - dropout->HEIGHT_ + 1; 55 | CHECK_EQ(ksize, WIDTH_ - dropout->WIDTH_ + 1); 56 | CHECK(ksize > 0); 57 | CHECK_EQ(NUM_, dropout->NUM_); 58 | int* mask = reinterpret_cast(rand_vec_->mutable_cpu_data()); 59 | const int count = rand_vec_->size() / sizeof(int); 60 | const int mask_size = count / NUM_; 61 | const int* mask_downscale = reinterpret_cast(dropout->rand_vec_->cpu_data()); 62 | const int mask_ds_size = dropout->rand_vec_->size() / sizeof(int) / NUM_; 63 | 64 | //mask all 65 | uint_thres_ = 1; 66 | for(int n = 0; n < count; n++) 67 | mask[n] = 1; 68 | 69 | Dtype *scale_ptr = reinterpret_cast(scale_->mutable_cpu_data()); 70 | for(int n = 0; n < NUM_; n++){ 71 | int idx_ds = 0; 72 | for(int y = 0; y < dropout->HEIGHT_; y++){ 73 | for(int x = 0; x < dropout->WIDTH_; x++){ 74 | /* if kept */ 75 | if(mask_downscale[idx_ds++] < dropout->uint_thres_){ 76 | for(int ty = 0; ty < ksize; ty++){ 77 | int *ptr = mask + (y + ty) * WIDTH_; 78 | for(int tx = 0; tx < ksize; tx++) 79 | ptr[x + tx] = 0; 80 | } /* ty */ 81 | } 82 | } 83 | } 84 | int nonzeros = 0; 85 | for(int y = 0; y < HEIGHT_; y++){ 86 | int *ptr = mask + y * WIDTH_; 87 | for(int x = 0; x < WIDTH_; x++){ 88 | if(ptr[x] == 0) 89 | nonzeros ++; 90 | //fprintf(stderr, "%d ", ptr[x]); 91 | } 92 | //fprintf(stderr, "\n"); 93 | } 94 | //fprintf(stderr, "\n\n"); 95 | CHECK(nonzeros > 0); 96 | scale_ptr[n] = HEIGHT_ * WIDTH_ / (Dtype)nonzeros; 97 | mask += mask_size; 98 | mask_downscale += mask_ds_size; 99 | } 100 | } 101 | 102 | template 103 | void DropoutGroupLayer::Forward_cpu(const vector*>& bottom, 104 | vector*>* top) { 105 | const Dtype* bottom_data = bottom[0]->cpu_data(); 106 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 107 | int* mask = reinterpret_cast(rand_vec_->mutable_cpu_data()); 108 | const int mask_size = bottom[0]->width() * bottom[0]->height(); 109 | if (Caffe::phase() == Caffe::TRAIN) { 110 | #if 0 111 | for(int n = 0; n < bottom[0]->num(); n++){ 112 | for(int c = 0; c < bottom[0]->channels(); c++){ 113 | int i = 0; 114 | for (; i < uint_thres_; ++i) { 115 | int idx = mask[i]; 116 | top_data[idx] = bottom_data[idx] * scale_; 117 | } 118 | for (; i < mask_size; i++) { 119 | int idx = mask[i]; 120 | top_data[idx] = 0.; 121 | } 122 | top_data += mask_size; 123 | bottom_data += mask_size; 124 | } 125 | mask += mask_size; 126 | } 127 | #else 128 | NOT_IMPLEMENTED; 129 | #endif 130 | } else { 131 | memcpy(top_data, bottom_data, bottom[0]->count() * sizeof(Dtype)); 132 | } 133 | } 134 | 135 | template 136 | Dtype DropoutGroupLayer::Backward_cpu(const vector*>& top, 137 | const bool propagate_down, 138 | vector*>* bottom) { 139 | CHECK(Caffe::phase() == Caffe::TRAIN); 140 | if (propagate_down) { 141 | #if 0 142 | const Dtype* top_diff = top[0]->cpu_diff(); 143 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 144 | const int* mask = reinterpret_cast(rand_vec_->cpu_data()); 145 | const int mask_size = top[0]->width() * top[0]->height(); 146 | for(int n = 0; n < top[0]->num(); n++){ 147 | for(int c = 0; c < top[0]->channels(); c++){ 148 | int i = 0; 149 | for (; i < uint_thres_; ++i) { 150 | int idx = mask[i]; 151 | bottom_diff[idx] = top_diff[idx] * scale_; 152 | } 153 | for (; i < mask_size; i++) { 154 | int idx = mask[i]; 155 | bottom_diff[idx] = 0.; 156 | } 157 | top_diff += mask_size; 158 | bottom_diff += mask_size; 159 | } 160 | mask += mask_size; 161 | } 162 | #else 163 | NOT_IMPLEMENTED; 164 | #endif 165 | } 166 | return Dtype(0); 167 | } 168 | 169 | 170 | INSTANTIATE_CLASS(DropoutGroupLayer); 171 | 172 | 173 | } // namespace caffe 174 | -------------------------------------------------------------------------------- /src/caffe/layers/lrn_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | 5 | #include "caffe/layer.hpp" 6 | #include "caffe/vision_layers.hpp" 7 | #include "caffe/util/math_functions.hpp" 8 | 9 | namespace caffe { 10 | 11 | template 12 | void LRNLayer::SetUp(const vector*>& bottom, 13 | vector*>* top) { 14 | CHECK_EQ(bottom.size(), 1) << 15 | "Local Response Normalization Layer takes a single blob as input."; 16 | CHECK_EQ(top->size(), 1) << 17 | "Local Response Normalization Layer takes a single blob as output."; 18 | num_ = bottom[0]->num(); 19 | channels_ = bottom[0]->channels(); 20 | height_ = bottom[0]->height(); 21 | width_ = bottom[0]->width(); 22 | (*top)[0]->Reshape(num_, channels_, height_, width_); 23 | scale_.Reshape(num_, channels_, height_, width_); 24 | size_ = this->layer_param_.local_size(); 25 | pre_pad_ = (size_ - 1) / 2; 26 | alpha_ = this->layer_param_.alpha(); 27 | beta_ = this->layer_param_.beta(); 28 | } 29 | 30 | template 31 | void LRNLayer::Forward_cpu(const vector*>& bottom, 32 | vector*>* top) { 33 | const Dtype* bottom_data = bottom[0]->cpu_data(); 34 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 35 | Dtype* scale_data = scale_.mutable_cpu_data(); 36 | // start with the constant value 37 | for (int i = 0; i < scale_.count(); ++i) { 38 | scale_data[i] = 1.; 39 | } 40 | Blob padded_square(1, channels_ + size_ - 1, height_, width_); 41 | Dtype* padded_square_data = padded_square.mutable_cpu_data(); 42 | memset(padded_square_data, 0, sizeof(Dtype) * padded_square.count()); 43 | Dtype alpha_over_size = alpha_ / size_; 44 | // go through the images 45 | for (int n = 0; n < num_; ++n) { 46 | // compute the padded square 47 | caffe_sqr(channels_ * height_ * width_, 48 | bottom_data + bottom[0]->offset(n), 49 | padded_square_data + padded_square.offset(0, pre_pad_)); 50 | // Create the first channel scale 51 | for (int c = 0; c < size_; ++c) { 52 | caffe_axpy(height_ * width_, alpha_over_size, 53 | padded_square_data + padded_square.offset(0, c), 54 | scale_data + scale_.offset(n, 0)); 55 | } 56 | for (int c = 1; c < channels_; ++c) { 57 | // copy previous scale 58 | caffe_copy(height_ * width_, 59 | scale_data + scale_.offset(n, c - 1), 60 | scale_data + scale_.offset(n, c)); 61 | // add head 62 | caffe_axpy(height_ * width_, alpha_over_size, 63 | padded_square_data + padded_square.offset(0, c + size_ - 1), 64 | scale_data + scale_.offset(n, c)); 65 | // subtract tail 66 | caffe_axpy(height_ * width_, -alpha_over_size, 67 | padded_square_data + padded_square.offset(0, c - 1), 68 | scale_data + scale_.offset(n, c)); 69 | } 70 | } 71 | 72 | // In the end, compute output 73 | caffe_powx(scale_.count(), scale_data, -beta_, top_data); 74 | caffe_mul(scale_.count(), top_data, bottom_data, top_data); 75 | } 76 | 77 | template 78 | Dtype LRNLayer::Backward_cpu(const vector*>& top, 79 | const bool propagate_down, vector*>* bottom) { 80 | const Dtype* top_diff = top[0]->cpu_diff(); 81 | const Dtype* top_data = top[0]->cpu_data(); 82 | const Dtype* bottom_data = (*bottom)[0]->cpu_data(); 83 | const Dtype* scale_data = scale_.cpu_data(); 84 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 85 | Blob padded_ratio(1, channels_ + size_ - 1, height_, width_); 86 | Blob accum_ratio(1, 1, height_, width_); 87 | Dtype* padded_ratio_data = padded_ratio.mutable_cpu_data(); 88 | Dtype* accum_ratio_data = accum_ratio.mutable_cpu_data(); 89 | // We hack a little bit by using the diff() to store an additional result 90 | Dtype* accum_ratio_times_bottom = accum_ratio.mutable_cpu_diff(); 91 | memset(padded_ratio_data, 0, sizeof(Dtype) * padded_ratio.count()); 92 | Dtype cache_ratio_value = 2. * alpha_ * beta_ / size_; 93 | 94 | caffe_powx(scale_.count(), scale_data, -beta_, bottom_diff); 95 | caffe_mul(scale_.count(), top_diff, bottom_diff, bottom_diff); 96 | 97 | // go through individual data 98 | int inverse_pre_pad = size_ - (size_ + 1) / 2; 99 | for (int n = 0; n < num_; ++n) { 100 | int block_offset = scale_.offset(n); 101 | // first, compute diff_i * y_i / s_i 102 | caffe_mul(channels_ * height_ * width_, 103 | top_diff + block_offset, top_data + block_offset, 104 | padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad)); 105 | caffe_div(channels_ * height_ * width_, 106 | padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad), 107 | scale_data + block_offset, 108 | padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad)); 109 | // Now, compute the accumulated ratios and the bottom diff 110 | memset(accum_ratio_data, 0, sizeof(Dtype) * accum_ratio.count()); 111 | for (int c = 0; c < size_ - 1; ++c) { 112 | caffe_axpy(height_ * width_, 1., 113 | padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data); 114 | } 115 | for (int c = 0; c < channels_; ++c) { 116 | caffe_axpy(height_ * width_, 1., 117 | padded_ratio_data + padded_ratio.offset(0, c + size_ - 1), 118 | accum_ratio_data); 119 | // compute bottom diff 120 | caffe_mul(height_ * width_, 121 | bottom_data + top[0]->offset(n, c), 122 | accum_ratio_data, accum_ratio_times_bottom); 123 | caffe_axpy(height_ * width_, -cache_ratio_value, 124 | accum_ratio_times_bottom, bottom_diff + top[0]->offset(n, c)); 125 | caffe_axpy(height_ * width_, -1., 126 | padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data); 127 | } 128 | } 129 | return Dtype(0.); 130 | } 131 | 132 | INSTANTIATE_CLASS(LRNLayer); 133 | 134 | 135 | } // namespace caffe 136 | -------------------------------------------------------------------------------- /src/caffe/util/im2col.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "caffe/util/im2col.hpp" 8 | 9 | namespace caffe { 10 | 11 | template 12 | void im2col_cpu(const Dtype* data_im, const int channels, 13 | const int height, const int width, const int ksize, const int pad, 14 | const int stride, Dtype* data_col) { 15 | int height_col = (height + 2 * pad - ksize) / stride + 1; 16 | int width_col = (width + 2 * pad - ksize) / stride + 1; 17 | int channels_col = channels * ksize * ksize; 18 | for (int c = 0; c < channels_col; ++c) { 19 | int w_offset = c % ksize; 20 | int h_offset = (c / ksize) % ksize; 21 | int c_im = c / ksize / ksize; 22 | for (int h = 0; h < height_col; ++h) { 23 | for (int w = 0; w < width_col; ++w) { 24 | int h_pad = h * stride - pad + h_offset; 25 | int w_pad = w * stride - pad + w_offset; 26 | if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) 27 | data_col[(c * height_col + h) * width_col + w] = 28 | data_im[(c_im * height + h_pad) * width + w_pad]; 29 | else 30 | data_col[(c * height_col + h) * width_col + w] = 0; 31 | } 32 | } 33 | } 34 | } 35 | 36 | // Explicit instantiation 37 | template void im2col_cpu(const float* data_im, const int channels, 38 | const int height, const int width, const int ksize, const int pad, 39 | const int stride, float* data_col); 40 | template void im2col_cpu(const double* data_im, const int channels, 41 | const int height, const int width, const int ksize, const int pad, 42 | const int stride, double* data_col); 43 | 44 | template 45 | void col2im_cpu(const Dtype* data_col, const int channels, 46 | const int height, const int width, const int ksize, const int pad, 47 | const int stride, Dtype* data_im) { 48 | memset(data_im, 0, sizeof(Dtype) * height * width * channels); 49 | int height_col = (height + 2 * pad - ksize) / stride + 1; 50 | int width_col = (width + 2 * pad - ksize) / stride + 1; 51 | int channels_col = channels * ksize * ksize; 52 | for (int c = 0; c < channels_col; ++c) { 53 | int w_offset = c % ksize; 54 | int h_offset = (c / ksize) % ksize; 55 | int c_im = c / ksize / ksize; 56 | for (int h = 0; h < height_col; ++h) { 57 | for (int w = 0; w < width_col; ++w) { 58 | int h_pad = h * stride - pad + h_offset; 59 | int w_pad = w * stride - pad + w_offset; 60 | if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) 61 | data_im[(c_im * height + h_pad) * width + w_pad] += 62 | data_col[(c * height_col + h) * width_col + w]; 63 | } 64 | } 65 | } 66 | } 67 | 68 | template 69 | static void im2col_tile_cpu_kernel(const int n, const Dtype* _data_im, 70 | const int strideh, const int stridew, 71 | const int ksize, 72 | const int height_col, const int width_col, 73 | Dtype* _data_col) { 74 | for(int _index = 0; _index < n; _index++){ 75 | int index = _index; 76 | int w_out = index % width_col; 77 | index /= width_col; 78 | int h_out = index % height_col; 79 | int channel_in = index / height_col; 80 | int channel_out = channel_in * ksize * ksize; 81 | int h_in = h_out; 82 | int w_in = w_out; 83 | Dtype * data_col = _data_col + (channel_out * height_col + h_out) * width_col + w_out; 84 | const Dtype *data_im = _data_im + (channel_in * strideh + h_in) * stridew + w_in; 85 | for (int i = 0; i < ksize; ++i) { 86 | for (int j = 0; j < ksize; ++j) { 87 | *data_col = data_im[i * stridew + j]; 88 | data_col += height_col * width_col; 89 | } 90 | } 91 | } 92 | } 93 | 94 | template 95 | void im2col_tile_cpu(const Dtype* data_im, const int channels, 96 | const int stride_h, const int stride_w, 97 | const int ksize, Dtype* data_col, 98 | const int height_col, const int width_col) { 99 | // We are going to launch channels * height_col * width_col kernels, each 100 | // kernel responsible for copying a single-channel grid. 101 | int num_kernels = channels * height_col * width_col; 102 | // NOLINT_NEXT_LINE(whitespace/operators) 103 | im2col_tile_cpu_kernel(num_kernels, data_im, stride_h, stride_w, ksize, height_col, 104 | width_col, data_col); 105 | } 106 | 107 | template 108 | static void copy_stride_cpu_kernel(int n, const Dtype* _src_data, 109 | const int channels, 110 | const int height, const int width, Dtype *_dst_data, 111 | const int stride_h, const int stride_w) { 112 | #if 0 113 | for(int index = 0; index < n; index++){ 114 | int w = index % width; 115 | int h = (index / width) % height; 116 | int c = index / (width * height); 117 | 118 | const Dtype * src_data = _src_data + (c * height + h) * width + w; 119 | Dtype * dst_data = _dst_data + (c * stride_h + h) * stride_w + w; 120 | *dst_data = *src_data; 121 | } 122 | #endif 123 | for(int c = 0; c < channels; c++){ 124 | Dtype *pd = _dst_data + c * stride_h * stride_w; 125 | for(int h = 0; h < height; h++){ 126 | for(int w = 0; w < width; w++){ 127 | pd[w] = *_src_data++; 128 | } 129 | pd += stride_w; 130 | } 131 | } 132 | } 133 | 134 | template 135 | void copy_stride_cpu(const Dtype* src_data, 136 | const int channels, 137 | const int height, const int width, Dtype *dst_data, 138 | const int stride_h, const int stride_w) { 139 | int num_kernels = channels * height * width; 140 | // To avoid involving atomic operations, we will launch one kernel per 141 | // bottom dimension, and then in the kernel add up the top dimensions. 142 | // NOLINT_NEXT_LINE(whitespace/operators) 143 | copy_stride_cpu_kernel( 144 | num_kernels, src_data, channels, height, width, 145 | dst_data, stride_h, stride_w); 146 | } 147 | 148 | // Explicit instantiation 149 | template void col2im_cpu(const float* data_col, const int channels, 150 | const int height, const int width, const int psize, const int pad, 151 | const int stride, float* data_im); 152 | template void col2im_cpu(const double* data_col, const int channels, 153 | const int height, const int width, const int psize, const int pad, 154 | const int stride, double* data_im); 155 | 156 | template void im2col_tile_cpu(const float* data_im, const int channels, 157 | const int stride_h, const int stride_w, 158 | const int ksize, float* data_col, 159 | const int height_col, const int width_col); 160 | template void im2col_tile_cpu(const double* data_im, const int channels, 161 | const int stride_h, const int stride_w, 162 | const int ksize, double* data_col, 163 | const int height_col, const int width_col); 164 | 165 | template void copy_stride_cpu(const float* src_data, 166 | const int channels, 167 | const int height, const int width, float *dst_data, 168 | const int stride_h, const int stride_w) ; 169 | template void copy_stride_cpu(const double* src_data, 170 | const int channels, 171 | const int height, const int width, double *dst_data, 172 | const int stride_h, const int stride_w) ; 173 | 174 | 175 | } // namespace caffe 176 | -------------------------------------------------------------------------------- /align_test.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | // 3 | // This is a simple script that allows one to quickly test a network whose 4 | // structure is specified by text format protocol buffers, and whose parameter 5 | // are loaded from a pre-trained network. 6 | // Usage: 7 | // test_net net_proto pretrained_net_proto iterations [CPU/GPU] 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "caffe/caffe.hpp" 16 | #include 17 | #include 18 | #include 19 | 20 | #define CROP_WINSIZE 39 21 | #define CROP_PADDING 2.5 22 | 23 | using namespace caffe; // NOLINT(build/namespaces) 24 | using namespace cv; 25 | 26 | float getMean( float * p ) 27 | { 28 | float ans; 29 | for(int i = 0 ; i < CROP_WINSIZE * CROP_WINSIZE ; i++, p++) 30 | ans += *p; 31 | ans = ans/ float( CROP_WINSIZE * CROP_WINSIZE ); 32 | return ans; 33 | } 34 | 35 | float getStd( float * p , float mean) 36 | { 37 | float ans; 38 | for(int i = 0 ; i < CROP_WINSIZE * CROP_WINSIZE ; i++, p++) 39 | ans += ( (*p-mean) * (*p-mean) ); 40 | ans = ans/ float( CROP_WINSIZE * CROP_WINSIZE - 1); 41 | ans = sqrt( ans ); 42 | return ans; 43 | } 44 | void getZscore( Mat & img, int left, int right, int top, int bottom, float * & score ) 45 | { 46 | if( img.type()==CV_8UC3 ) 47 | { 48 | std::cerr << "warning! a color image input" << std::endl; 49 | cv::cvtColor( img , img , CV_RGB2GRAY ); 50 | } 51 | 52 | double scale = (right - left) / double( CROP_WINSIZE ); 53 | 54 | left -= int( scale * CROP_PADDING ); 55 | right += int( scale * CROP_PADDING ); 56 | top -= int( scale * CROP_PADDING ); 57 | bottom+= int( scale * CROP_PADDING ); 58 | 59 | if( top<0 || left < 0 || right >= img.cols || bottom >= img.rows ) 60 | { 61 | std::cerr << "warning! invalid bounding box " << std::endl; 62 | return; 63 | } 64 | 65 | 66 | Mat patch = img( Range( top, bottom ), Range( left, right ) ); 67 | cv::resize( patch , patch, Size( CROP_WINSIZE, CROP_WINSIZE ) ); 68 | 69 | patch.convertTo( patch, CV_32F ); 70 | 71 | float mu = getMean( patch.ptr() ); 72 | float sigma = getStd( patch.ptr() , mu); 73 | 74 | score = new( float[ CROP_WINSIZE * CROP_WINSIZE ] ); 75 | 76 | float * p_patch = patch.ptr(); 77 | 78 | for(int i = 0 ; i < CROP_WINSIZE * CROP_WINSIZE ; i++) 79 | score[i] = ( p_patch[i] - mu ) / sigma; 80 | } 81 | 82 | 83 | template 84 | static void save_blob(const string& fn, Blob *b){ 85 | LOG(INFO) << "Saving " << fn; 86 | FILE *f = fopen(fn.c_str(), "wb"); 87 | CHECK(f != NULL); 88 | fwrite(b->cpu_data(), sizeof(Dtype), b->count(), f); 89 | fclose(f); 90 | } 91 | 92 | static void draw(const float *buf, const float *pt){ 93 | const int ph = 39, pw = 39; 94 | const float scale = 4.0f; 95 | cv::Mat m = cv::Mat::zeros(ph, pw, CV_32FC1); 96 | memcpy(m.data, buf, sizeof(float)*pw*ph); 97 | cv::Mat dsp; 98 | cv::normalize(m, dsp, 0, 255, cv::NORM_MINMAX, CV_8UC1); 99 | cv::resize(dsp, dsp, cv::Size(), scale, scale); 100 | cv::cvtColor(dsp, dsp, CV_GRAY2BGR); 101 | 102 | #if 1 103 | for(int i=0;i<5;i++){ 104 | const float *t = pt + 2*i; 105 | cv::circle(dsp, cv::Point(t[0]*scale, t[1]*scale), 2, cv::Scalar(255,0,0), 2); 106 | } 107 | #endif 108 | cv::imshow("A", dsp); 109 | cv::waitKey(0); 110 | } 111 | 112 | 113 | int main(int argc, char** argv) { 114 | if (argc < 3) { 115 | LOG(ERROR) << "test_net net_proto pretrained_net_proto iterations inputbin output_dir" 116 | << " [CPU/GPU]"; 117 | return 0; 118 | } 119 | 120 | LogMessage::Enable(true); 121 | Caffe::set_phase(Caffe::TEST); 122 | Caffe::set_mode(Caffe::CPU); 123 | 124 | NetParameter test_net_param; 125 | ReadProtoFromTextFile(argv[1], &test_net_param); 126 | Net caffe_test_net(test_net_param); 127 | NetParameter trained_net_param; 128 | ReadProtoFromBinaryFile(argv[2], &trained_net_param); 129 | caffe_test_net.CopyTrainedLayersFrom(trained_net_param); 130 | 131 | #if 0 132 | SolverState state; 133 | std::string state_file = std::string(argv[2]) + ".solverstate"; 134 | ReadProtoFromBinaryFile(state_file, &state); 135 | #endif 136 | 137 | vector*> dummy_blob_input_vec; 138 | 139 | //save layer 140 | int feature_layer_idx = -1; 141 | int data_layer_idx = -1; 142 | for(int i=0;i* output = caffe_test_net.top_vecs()[feature_layer_idx][0], 159 | *data_blob = caffe_test_net.top_vecs()[data_layer_idx][0]; 160 | RawImageLayer *data_layer = dynamic_cast* >(caffe_test_net.layers()[data_layer_idx].get()); 161 | CHECK(data_layer != 0); 162 | 163 | LOG(INFO) << "OUTPUT BLOB dim: " << output->num() << ' ' 164 | << output->channels() << ' ' 165 | << output->width() << ' ' 166 | << output->height(); 167 | const int ih = data_blob->height(), iw = data_blob->width(), ic = data_blob->channels(); 168 | //double buf[ih*iw*ic]; 169 | FILE *finput = fopen(argv[3], "r"); 170 | CHECK(finput != NULL); 171 | for (;;) { 172 | char fn[1024]; 173 | int l,r,t,b; 174 | int nread = fscanf(finput, "%s%d%d%d%d", fn, &l, &r, &t, &b); 175 | if(nread != 5) 176 | break; 177 | cv::Mat mat = cv::imread(fn); 178 | if(!mat.data){ 179 | printf("%s\n", fn); 180 | continue; 181 | } 182 | cv::cvtColor(mat, mat, CV_BGR2GRAY); 183 | float * p = 0; 184 | getZscore( mat, l, r, t, b, p); 185 | if(!p) 186 | continue; 187 | 188 | float *d = data_blob->mutable_cpu_data(); 189 | size_t len = ih * iw * ic; 190 | for(int j = 0; j < data_blob->num(); j++){ 191 | memcpy(d, p, sizeof(float)*CROP_WINSIZE*CROP_WINSIZE); 192 | /* 193 | size_t nread = fread(buf, sizeof(double), len, finput); 194 | CHECK_EQ(nread, len); 195 | for(int k=0;k*>& result = 202 | caffe_test_net.Forward(dummy_blob_input_vec); 203 | 204 | printf("%s %d %d %d %d ", fn, l, r, t, b); 205 | const float *pt = output->cpu_data(); 206 | for(int i=0;inum();i++){ 207 | for(int j=0;jchannels();j++) 208 | printf("%f\t", pt[j]); 209 | printf("\n"); 210 | } 211 | fflush(stdout); 212 | 213 | //draw(p, pt); 214 | delete [] p; 215 | 216 | //sprintf(output_dir, "%s/feat_%05d", argv[4], i); 217 | //save_blob(output_dir, output); 218 | 219 | //test_accuracy += result[0]->cpu_data()[0]; 220 | //LOG(ERROR) << "Batch " << i << ", accuracy: " << result[0]->cpu_data()[0]; 221 | } 222 | fclose(finput); 223 | //test_accuracy /= total_iter; 224 | //LOG(ERROR) << "Test accuracy:" << test_accuracy; 225 | 226 | return 0; 227 | } 228 | 229 | -------------------------------------------------------------------------------- /src/caffe/layers/pooling_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "caffe/layer.hpp" 8 | #include "caffe/vision_layers.hpp" 9 | #include "caffe/util/math_functions.hpp" 10 | 11 | using std::max; 12 | using std::min; 13 | 14 | namespace caffe { 15 | 16 | template 17 | void PoolingLayer::SetUp(const vector*>& bottom, 18 | vector*>* top) { 19 | CHECK_EQ(bottom.size(), 1) << "PoolingLayer takes a single blob as input."; 20 | CHECK_EQ(top->size(), 1) << "PoolingLayer takes a single blob as output."; 21 | KSIZE_ = this->layer_param_.kernelsize(); 22 | STRIDE_ = this->layer_param_.stride(); 23 | CHANNELS_ = bottom[0]->channels(); 24 | HEIGHT_ = bottom[0]->height(); 25 | WIDTH_ = bottom[0]->width(); 26 | POOLED_HEIGHT_ = static_cast( 27 | ceil(static_cast(HEIGHT_ - KSIZE_) / STRIDE_)) + 1; 28 | POOLED_WIDTH_ = static_cast( 29 | ceil(static_cast(WIDTH_ - KSIZE_) / STRIDE_)) + 1; 30 | (*top)[0]->Reshape(bottom[0]->num(), CHANNELS_, POOLED_HEIGHT_, 31 | POOLED_WIDTH_); 32 | // If stochastic pooling, we will initialize the random index part. 33 | if (this->layer_param_.pool() == LayerParameter_PoolMethod_STOCHASTIC) { 34 | rand_idx_.Reshape(bottom[0]->num(), CHANNELS_, POOLED_HEIGHT_, 35 | POOLED_WIDTH_); 36 | } 37 | } 38 | 39 | // TODO(Yangqing): Is there a faster way to do pooling in the channel-first 40 | // case? 41 | template 42 | void PoolingLayer::Forward_cpu(const vector*>& bottom, 43 | vector*>* top) { 44 | const Dtype* bottom_data = bottom[0]->cpu_data(); 45 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 46 | // Different pooling methods. We explicitly do the switch outside the for 47 | // loop to save time, although this results in more codes. 48 | int top_count = (*top)[0]->count(); 49 | switch (this->layer_param_.pool()) { 50 | case LayerParameter_PoolMethod_MAX: 51 | // Initialize 52 | for (int i = 0; i < top_count; ++i) { 53 | top_data[i] = -FLT_MAX; 54 | } 55 | // The main loop 56 | for (int n = 0; n < bottom[0]->num(); ++n) { 57 | for (int c = 0; c < CHANNELS_; ++c) { 58 | for (int ph = 0; ph < POOLED_HEIGHT_; ++ph) { 59 | for (int pw = 0; pw < POOLED_WIDTH_; ++pw) { 60 | int hstart = ph * STRIDE_; 61 | int wstart = pw * STRIDE_; 62 | int hend = min(hstart + KSIZE_, HEIGHT_); 63 | int wend = min(wstart + KSIZE_, WIDTH_); 64 | for (int h = hstart; h < hend; ++h) { 65 | for (int w = wstart; w < wend; ++w) { 66 | top_data[ph * POOLED_WIDTH_ + pw] = 67 | max(top_data[ph * POOLED_WIDTH_ + pw], 68 | bottom_data[h * WIDTH_ + w]); 69 | } 70 | } 71 | } 72 | } 73 | // compute offset 74 | bottom_data += bottom[0]->offset(0, 1); 75 | top_data += (*top)[0]->offset(0, 1); 76 | } 77 | } 78 | break; 79 | case LayerParameter_PoolMethod_AVE: 80 | for (int i = 0; i < top_count; ++i) { 81 | top_data[i] = 0; 82 | } 83 | // The main loop 84 | for (int n = 0; n < bottom[0]->num(); ++n) { 85 | for (int c = 0; c < CHANNELS_; ++c) { 86 | for (int ph = 0; ph < POOLED_HEIGHT_; ++ph) { 87 | for (int pw = 0; pw < POOLED_WIDTH_; ++pw) { 88 | int hstart = ph * STRIDE_; 89 | int wstart = pw * STRIDE_; 90 | int hend = min(hstart + KSIZE_, HEIGHT_); 91 | int wend = min(wstart + KSIZE_, WIDTH_); 92 | for (int h = hstart; h < hend; ++h) { 93 | for (int w = wstart; w < wend; ++w) { 94 | top_data[ph * POOLED_WIDTH_ + pw] += 95 | bottom_data[h * WIDTH_ + w]; 96 | } 97 | } 98 | top_data[ph * POOLED_WIDTH_ + pw] /= 99 | (hend - hstart) * (wend - wstart); 100 | } 101 | } 102 | // compute offset 103 | bottom_data += bottom[0]->offset(0, 1); 104 | top_data += (*top)[0]->offset(0, 1); 105 | } 106 | } 107 | break; 108 | case LayerParameter_PoolMethod_STOCHASTIC: 109 | NOT_IMPLEMENTED; 110 | break; 111 | default: 112 | LOG(FATAL) << "Unknown pooling method."; 113 | } 114 | } 115 | 116 | template 117 | Dtype PoolingLayer::Backward_cpu(const vector*>& top, 118 | const bool propagate_down, vector*>* bottom) { 119 | if (!propagate_down) { 120 | return Dtype(0.); 121 | } 122 | const Dtype* top_diff = top[0]->cpu_diff(); 123 | const Dtype* top_data = top[0]->cpu_data(); 124 | const Dtype* bottom_data = (*bottom)[0]->cpu_data(); 125 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 126 | // Different pooling methods. We explicitly do the switch outside the for 127 | // loop to save time, although this results in more codes. 128 | memset(bottom_diff, 0, (*bottom)[0]->count() * sizeof(Dtype)); 129 | switch (this->layer_param_.pool()) { 130 | case LayerParameter_PoolMethod_MAX: 131 | // The main loop 132 | for (int n = 0; n < top[0]->num(); ++n) { 133 | for (int c = 0; c < CHANNELS_; ++c) { 134 | for (int ph = 0; ph < POOLED_HEIGHT_; ++ph) { 135 | for (int pw = 0; pw < POOLED_WIDTH_; ++pw) { 136 | int hstart = ph * STRIDE_; 137 | int wstart = pw * STRIDE_; 138 | int hend = min(hstart + KSIZE_, HEIGHT_); 139 | int wend = min(wstart + KSIZE_, WIDTH_); 140 | for (int h = hstart; h < hend; ++h) { 141 | for (int w = wstart; w < wend; ++w) { 142 | bottom_diff[h * WIDTH_ + w] += 143 | top_diff[ph * POOLED_WIDTH_ + pw] * 144 | (bottom_data[h * WIDTH_ + w] == 145 | top_data[ph * POOLED_WIDTH_ + pw]); 146 | } 147 | } 148 | } 149 | } 150 | // offset 151 | bottom_data += (*bottom)[0]->offset(0, 1); 152 | top_data += top[0]->offset(0, 1); 153 | bottom_diff += (*bottom)[0]->offset(0, 1); 154 | top_diff += top[0]->offset(0, 1); 155 | } 156 | } 157 | break; 158 | case LayerParameter_PoolMethod_AVE: 159 | // The main loop 160 | for (int n = 0; n < top[0]->num(); ++n) { 161 | for (int c = 0; c < CHANNELS_; ++c) { 162 | for (int ph = 0; ph < POOLED_HEIGHT_; ++ph) { 163 | for (int pw = 0; pw < POOLED_WIDTH_; ++pw) { 164 | int hstart = ph * STRIDE_; 165 | int wstart = pw * STRIDE_; 166 | int hend = min(hstart + KSIZE_, HEIGHT_); 167 | int wend = min(wstart + KSIZE_, WIDTH_); 168 | int poolsize = (hend - hstart) * (wend - wstart); 169 | for (int h = hstart; h < hend; ++h) { 170 | for (int w = wstart; w < wend; ++w) { 171 | bottom_diff[h * WIDTH_ + w] += 172 | top_diff[ph * POOLED_WIDTH_ + pw] / poolsize; 173 | } 174 | } 175 | } 176 | } 177 | // offset 178 | bottom_data += (*bottom)[0]->offset(0, 1); 179 | top_data += top[0]->offset(0, 1); 180 | bottom_diff += (*bottom)[0]->offset(0, 1); 181 | top_diff += top[0]->offset(0, 1); 182 | } 183 | } 184 | break; 185 | case LayerParameter_PoolMethod_STOCHASTIC: 186 | NOT_IMPLEMENTED; 187 | break; 188 | default: 189 | LOG(FATAL) << "Unknown pooling method."; 190 | } 191 | return Dtype(0.); 192 | } 193 | 194 | 195 | INSTANTIATE_CLASS(PoolingLayer); 196 | 197 | 198 | } // namespace caffe 199 | -------------------------------------------------------------------------------- /src/caffe/layers/loss_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "caffe/layer.hpp" 9 | #include "caffe/vision_layers.hpp" 10 | #include "caffe/util/math_functions.hpp" 11 | #include "caffe/util/io.hpp" 12 | 13 | using std::max; 14 | 15 | namespace caffe { 16 | 17 | const float kLOG_THRESHOLD = 1e-20; 18 | 19 | template 20 | void MultinomialLogisticLossLayer::SetUp( 21 | const vector*>& bottom, vector*>* top) { 22 | CHECK_EQ(bottom.size(), 2) << "Loss Layer takes two blobs as input."; 23 | CHECK_EQ(top->size(), 0) << "Loss Layer takes no output."; 24 | CHECK_EQ(bottom[0]->num(), bottom[1]->num()) 25 | << "The data and label should have the same number."; 26 | CHECK_EQ(bottom[1]->channels(), 1); 27 | CHECK_EQ(bottom[1]->height(), 1); 28 | CHECK_EQ(bottom[1]->width(), 1); 29 | } 30 | 31 | 32 | template 33 | Dtype MultinomialLogisticLossLayer::Backward_cpu( 34 | const vector*>& top, const bool propagate_down, 35 | vector*>* bottom) { 36 | const Dtype* bottom_data = (*bottom)[0]->cpu_data(); 37 | const Dtype* bottom_label = (*bottom)[1]->cpu_data(); 38 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 39 | int num = (*bottom)[0]->num(); 40 | int dim = (*bottom)[0]->count() / (*bottom)[0]->num(); 41 | memset(bottom_diff, 0, sizeof(Dtype) * (*bottom)[0]->count()); 42 | Dtype loss = 0; 43 | for (int i = 0; i < num; ++i) { 44 | int label = static_cast(bottom_label[i]); 45 | Dtype prob = max(bottom_data[i * dim + label], Dtype(kLOG_THRESHOLD)); 46 | loss -= log(prob); 47 | bottom_diff[i * dim + label] = - 1. / prob / num; 48 | } 49 | return loss / num; 50 | } 51 | 52 | // TODO: implement the GPU version for multinomial loss 53 | 54 | 55 | template 56 | void InfogainLossLayer::SetUp( 57 | const vector*>& bottom, vector*>* top) { 58 | CHECK_EQ(bottom.size(), 2) << "Loss Layer takes two blobs as input."; 59 | CHECK_EQ(top->size(), 0) << "Loss Layer takes no output."; 60 | CHECK_EQ(bottom[0]->num(), bottom[1]->num()) 61 | << "The data and label should have the same number."; 62 | CHECK_EQ(bottom[1]->channels(), 1); 63 | CHECK_EQ(bottom[1]->height(), 1); 64 | CHECK_EQ(bottom[1]->width(), 1); 65 | BlobProto blob_proto; 66 | ReadProtoFromBinaryFile(this->layer_param_.source(), &blob_proto); 67 | infogain_.FromProto(blob_proto); 68 | CHECK_EQ(infogain_.num(), 1); 69 | CHECK_EQ(infogain_.channels(), 1); 70 | CHECK_EQ(infogain_.height(), infogain_.width()); 71 | } 72 | 73 | 74 | template 75 | Dtype InfogainLossLayer::Backward_cpu(const vector*>& top, 76 | const bool propagate_down, 77 | vector*>* bottom) { 78 | const Dtype* bottom_data = (*bottom)[0]->cpu_data(); 79 | const Dtype* bottom_label = (*bottom)[1]->cpu_data(); 80 | const Dtype* infogain_mat = infogain_.cpu_data(); 81 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 82 | int num = (*bottom)[0]->num(); 83 | int dim = (*bottom)[0]->count() / (*bottom)[0]->num(); 84 | CHECK_EQ(infogain_.height(), dim); 85 | Dtype loss = 0; 86 | for (int i = 0; i < num; ++i) { 87 | int label = static_cast(bottom_label[i]); 88 | for (int j = 0; j < dim; ++j) { 89 | Dtype prob = max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD)); 90 | loss -= infogain_mat[label * dim + j] * log(prob); 91 | bottom_diff[i * dim + j] = - infogain_mat[label * dim + j] / prob / num; 92 | } 93 | } 94 | return loss / num; 95 | } 96 | 97 | 98 | template 99 | void EuclideanLossLayer::SetUp( 100 | const vector*>& bottom, vector*>* top) { 101 | CHECK_EQ(bottom.size(), 2) << "Loss Layer takes two blobs as input."; 102 | CHECK_EQ(top->size(), 0) << "Loss Layer takes no as output."; 103 | CHECK_EQ(bottom[0]->num(), bottom[1]->num()) 104 | << "The data and label should have the same number."; 105 | CHECK_EQ(bottom[0]->channels(), bottom[1]->channels()); 106 | CHECK_EQ(bottom[0]->height(), bottom[1]->height()); 107 | CHECK_EQ(bottom[0]->width(), bottom[1]->width()); 108 | difference_.Reshape(bottom[0]->num(), bottom[0]->channels(), 109 | bottom[0]->height(), bottom[0]->width()); 110 | } 111 | 112 | template 113 | Dtype EuclideanLossLayer::Backward_cpu(const vector*>& top, 114 | const bool propagate_down, vector*>* bottom) { 115 | int count = (*bottom)[0]->count(); 116 | int num = (*bottom)[0]->num(); 117 | caffe_sub(count, (*bottom)[0]->cpu_data(), (*bottom)[1]->cpu_data(), 118 | difference_.mutable_cpu_data()); 119 | Dtype loss = caffe_cpu_dot( 120 | count, difference_.cpu_data(), difference_.cpu_data()) / num / Dtype(2); 121 | // Compute the gradient 122 | caffe_axpby(count, Dtype(1) / num, difference_.cpu_data(), Dtype(0), 123 | (*bottom)[0]->mutable_cpu_diff()); 124 | return loss; 125 | } 126 | 127 | template 128 | void AccuracyLayer::SetUp( 129 | const vector*>& bottom, vector*>* top) { 130 | CHECK_EQ(bottom.size(), 2) << "Accuracy Layer takes two blobs as input."; 131 | CHECK_EQ(top->size(), 1) << "Accuracy Layer takes 1 output."; 132 | CHECK_EQ(bottom[0]->num(), bottom[1]->num()) 133 | << "The data and label should have the same number."; 134 | CHECK_EQ(bottom[1]->channels(), 1); 135 | CHECK_EQ(bottom[1]->height(), 1); 136 | CHECK_EQ(bottom[1]->width(), 1); 137 | (*top)[0]->Reshape(1, 2, 1, 1); 138 | } 139 | 140 | template 141 | void AccuracyLayer::Forward_cpu(const vector*>& bottom, 142 | vector*>* top) { 143 | Dtype accuracy = 0; 144 | Dtype logprob = 0; 145 | const Dtype* bottom_data = bottom[0]->cpu_data(); 146 | const Dtype* bottom_label = bottom[1]->cpu_data(); 147 | int num = bottom[0]->num(); 148 | int dim = bottom[0]->count() / bottom[0]->num(); 149 | for (int i = 0; i < num; ++i) { 150 | // Accuracy 151 | Dtype maxval = -FLT_MAX; 152 | int max_id = 0; 153 | for (int j = 0; j < dim; ++j) { 154 | if (bottom_data[i * dim + j] > maxval) { 155 | maxval = bottom_data[i * dim + j]; 156 | max_id = j; 157 | } 158 | } 159 | if (max_id == static_cast(bottom_label[i])) { 160 | ++accuracy; 161 | } 162 | Dtype prob = max(bottom_data[i * dim + static_cast(bottom_label[i])], 163 | Dtype(kLOG_THRESHOLD)); 164 | logprob -= log(prob); 165 | } 166 | // LOG(INFO) << "Accuracy: " << accuracy; 167 | (*top)[0]->mutable_cpu_data()[0] = accuracy / num; 168 | (*top)[0]->mutable_cpu_data()[1] = logprob / num; 169 | } 170 | 171 | template 172 | void VerificationAccuracyLayer::SetUp( 173 | const vector*>& bottom, vector*>* top) { 174 | CHECK_EQ(bottom.size(), 4) << "VerificationAccuracyLayer takes four blobs as input."; 175 | CHECK_EQ(top->size(), 1) << "VerificationAccuracy Layer takes 1 output."; 176 | CHECK_EQ(bottom[0]->num(), bottom[1]->num()) 177 | << "The data and label should have the same number."; 178 | //CHECK_EQ(bottom[1]->channels(), 1); 179 | CHECK_EQ(bottom[1]->height(), 1); 180 | CHECK_EQ(bottom[1]->width(), 1); 181 | (*top)[0]->Reshape(1, 2, 1, 1); 182 | diffy_.Reshape(bottom[0]->num(), bottom[0]->channels(), 1, 1); 183 | M_ = this->layer_param_.dual_threshold(); 184 | LOG(INFO) << "Initial: " << M_; 185 | } 186 | 187 | template 188 | void VerificationAccuracyLayer::Forward_cpu(const vector*>& bottom, 189 | vector*>* top) { 190 | Dtype accuracy = 0; 191 | Dtype logprob = 0; 192 | const Dtype* bottom_data1 = bottom[0]->cpu_data(); 193 | const Dtype* bottom_label1 = bottom[1]->cpu_data(); 194 | const Dtype* bottom_data2 = bottom[2]->cpu_data(); 195 | const Dtype* bottom_label2 = bottom[3]->cpu_data(); 196 | int num = bottom[0]->num(); 197 | int dim = bottom[0]->count() / bottom[0]->num(); 198 | 199 | int count = bottom[0]->count(); 200 | Dtype* diffy = diffy_.mutable_cpu_data(); 201 | caffe_sub(count, bottom_data1, bottom_data2, diffy); 202 | 203 | Dtype M2 = M_*M_; 204 | for (int i = 0; i < num; ++i) { 205 | int l1 = static_cast(bottom_label1[i]); 206 | int l2 = static_cast(bottom_label2[i]); 207 | int offset = i*dim; 208 | Dtype norm2 = caffe_cpu_dot(dim, diffy+offset, diffy+offset); 209 | if(l1 == l2 && norm2 <= M2) 210 | accuracy++; 211 | else if(l1 != l2 && norm2 > M2) 212 | accuracy++; 213 | } 214 | // LOG(INFO) << "Accuracy: " << accuracy; 215 | (*top)[0]->mutable_cpu_data()[0] = accuracy / num; 216 | (*top)[0]->mutable_cpu_data()[1] = logprob / num; 217 | } 218 | 219 | 220 | INSTANTIATE_CLASS(MultinomialLogisticLossLayer); 221 | INSTANTIATE_CLASS(InfogainLossLayer); 222 | INSTANTIATE_CLASS(EuclideanLossLayer); 223 | INSTANTIATE_CLASS(AccuracyLayer); 224 | INSTANTIATE_CLASS(VerificationAccuracyLayer); 225 | 226 | } // namespace caffe 227 | -------------------------------------------------------------------------------- /src/caffe/layers/conv_layer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include 4 | 5 | #include "caffe/layer.hpp" 6 | #include "caffe/vision_layers.hpp" 7 | #include "caffe/util/im2col.hpp" 8 | #include "caffe/filler.hpp" 9 | #include "caffe/util/math_functions.hpp" 10 | 11 | namespace caffe { 12 | 13 | template 14 | void ConvolutionLayer::SetUp(const vector*>& bottom, 15 | vector*>* top) { 16 | CHECK_EQ(bottom.size(), 1) << "Conv Layer takes a single blob as input."; 17 | CHECK_EQ(top->size(), 1) << "Conv Layer takes a single blob as output."; 18 | KSIZE_ = this->layer_param_.kernelsize(); 19 | STRIDE_ = this->layer_param_.stride(); 20 | GROUP_ = this->layer_param_.group(); 21 | PAD_ = this->layer_param_.pad(); 22 | NUM_ = bottom[0]->num(); 23 | CHANNELS_ = bottom[0]->channels(); 24 | HEIGHT_ = bottom[0]->height(); 25 | WIDTH_ = bottom[0]->width(); 26 | NTILE_WIDTH_ = this->layer_param_.ntile_width(); 27 | NTILE_HEIGHT_ = this->layer_param_.ntile_height(); 28 | NUM_OUTPUT_ = this->layer_param_.num_output(); 29 | CHECK_GT(NUM_OUTPUT_, 0); 30 | CHECK_EQ(CHANNELS_ % GROUP_, 0); 31 | // The im2col result buffer would only hold one image at a time to avoid 32 | // overly large memory usage. 33 | int height_out = (HEIGHT_ + 2 * PAD_ - KSIZE_) / STRIDE_ + 1; 34 | int width_out = (WIDTH_ + 2 * PAD_ - KSIZE_) / STRIDE_ + 1; 35 | 36 | CHECK(height_out % NTILE_HEIGHT_ == 0); 37 | CHECK(width_out % NTILE_WIDTH_ == 0); 38 | TILE_WIDTH_ = width_out / NTILE_WIDTH_; 39 | TILE_HEIGHT_ = height_out / NTILE_HEIGHT_; 40 | 41 | col_buffer_.Reshape(1, CHANNELS_ * KSIZE_ * KSIZE_, TILE_HEIGHT_, TILE_WIDTH_); 42 | out_buffer_.Reshape(1, NUM_OUTPUT_, TILE_HEIGHT_, TILE_WIDTH_); 43 | // Set the parameters 44 | CHECK_EQ(NUM_OUTPUT_ % GROUP_, 0) 45 | << "Number of output should be multiples of group."; 46 | biasterm_ = this->layer_param_.biasterm(); 47 | // Figure out the dimensions for individual gemms. 48 | M_ = NUM_OUTPUT_ / GROUP_; 49 | K_ = CHANNELS_ * KSIZE_ * KSIZE_ / GROUP_; 50 | N_ = TILE_WIDTH_ * TILE_HEIGHT_; 51 | (*top)[0]->Reshape(bottom[0]->num(), NUM_OUTPUT_, height_out, width_out); 52 | int ntiles = NTILE_WIDTH_ * NTILE_HEIGHT_; 53 | // Check if we need to set up the weights 54 | if (this->blobs_.size() > 0) { 55 | LOG(INFO) << "Skipping parameter initialization"; 56 | } else { 57 | if (biasterm_) { 58 | this->blobs_.resize(2*ntiles); 59 | } else { 60 | this->blobs_.resize(1*ntiles); 61 | } 62 | // Intialize the weight 63 | for(int i = 0; i < ntiles; i++) { 64 | this->blobs_[i].reset( 65 | new Blob(NUM_OUTPUT_, CHANNELS_ / GROUP_, KSIZE_, KSIZE_)); 66 | // fill the weights 67 | shared_ptr > weight_filler( 68 | GetFiller(this->layer_param_.weight_filler())); 69 | weight_filler->Fill(this->blobs_[i].get()); 70 | // If necessary, intiialize and fill the bias term 71 | if (biasterm_) { 72 | this->blobs_[ntiles+i].reset(new Blob(1, 1, 1, NUM_OUTPUT_)); 73 | shared_ptr > bias_filler( 74 | GetFiller(this->layer_param_.bias_filler())); 75 | bias_filler->Fill(this->blobs_[ntiles+i].get()); 76 | } 77 | } 78 | } 79 | // Set up the bias filler 80 | if (biasterm_) { 81 | bias_multiplier_.reset(new SyncedMemory(N_ * sizeof(Dtype))); 82 | Dtype* bias_multiplier_data = 83 | reinterpret_cast(bias_multiplier_->mutable_cpu_data()); 84 | for (int i = 0; i < N_; ++i) { 85 | bias_multiplier_data[i] = 1.; 86 | } 87 | } 88 | } 89 | 90 | 91 | template 92 | void ConvolutionLayer::Forward_cpu(const vector*>& bottom, 93 | vector*>* top) { 94 | const Dtype* bottom_data = bottom[0]->cpu_data(); 95 | Dtype* top_data = (*top)[0]->mutable_cpu_data(); 96 | Dtype* col_data = col_buffer_.mutable_cpu_data(); 97 | if(NTILE_WIDTH_ * NTILE_HEIGHT_ <= 1){ 98 | const Dtype* weight = this->blobs_[0]->cpu_data(); 99 | int weight_offset = M_ * K_; 100 | int col_offset = K_ * N_; 101 | int top_offset = M_ * N_; 102 | for (int n = 0; n < NUM_; ++n) { 103 | // First, im2col 104 | im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_, 105 | WIDTH_, KSIZE_, PAD_, STRIDE_, col_data); 106 | // Second, innerproduct with groups 107 | for (int g = 0; g < GROUP_; ++g) { 108 | caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, K_, 109 | (Dtype)1., weight + weight_offset * g, col_data + col_offset * g, 110 | (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g); 111 | } 112 | // third, add bias 113 | if (biasterm_) { 114 | caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_, 115 | N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(), 116 | reinterpret_cast(bias_multiplier_->cpu_data()), 117 | (Dtype)1., top_data + (*top)[0]->offset(n)); 118 | } 119 | } 120 | }else{ 121 | //NOT_IMPLEMENTED; 122 | CHECK_EQ(STRIDE_, 1); 123 | CHECK_EQ(PAD_, 0); 124 | CHECK_EQ(GROUP_, 1); 125 | CHECK_EQ(col_buffer_.height(), TILE_HEIGHT_); 126 | Dtype *out_buffer = out_buffer_.mutable_cpu_data(); 127 | for (int n = 0; n < NUM_; ++n) { 128 | for(int ny = 0; ny < NTILE_HEIGHT_; ny++){ 129 | for(int nx = 0; nx < NTILE_WIDTH_; nx++){ 130 | int idx = ny * NTILE_WIDTH_ + nx; 131 | const Dtype* weight = this->blobs_[idx]->cpu_data(); 132 | const Dtype * img = bottom_data + bottom[0]->offset(n, 0, 133 | TILE_HEIGHT_ * ny, TILE_WIDTH_ * nx); 134 | im2col_tile_cpu(img, CHANNELS_, HEIGHT_, 135 | WIDTH_, KSIZE_, col_data, 136 | TILE_HEIGHT_, TILE_WIDTH_); 137 | //dump(&col_buffer_); 138 | caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, K_, 139 | (Dtype)1., weight, col_data, (Dtype)0., out_buffer); 140 | if (biasterm_) { 141 | const Dtype *bias_ptr = this->blobs_[idx + NTILE_WIDTH_ * 142 | NTILE_HEIGHT_]->cpu_data(); 143 | caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_, 144 | N_, 1, (Dtype)1., bias_ptr, 145 | reinterpret_cast(bias_multiplier_->cpu_data()), 146 | (Dtype)1., out_buffer); 147 | } 148 | //dump(&out_buffer_); 149 | /* copy back */ 150 | 151 | int height_out = HEIGHT_ - KSIZE_ + 1; 152 | int width_out = WIDTH_ - KSIZE_ + 1; 153 | copy_stride_cpu(out_buffer, NUM_OUTPUT_, TILE_HEIGHT_, TILE_WIDTH_, 154 | top_data + (*top)[0]->offset(n, 0, TILE_HEIGHT_*ny, 155 | TILE_WIDTH_*nx), height_out, width_out); 156 | 157 | } 158 | } 159 | }/* n */ 160 | 161 | } 162 | } 163 | 164 | template 165 | Dtype ConvolutionLayer::Backward_cpu(const vector*>& top, 166 | const bool propagate_down, vector*>* bottom) { 167 | const Dtype* top_diff = top[0]->cpu_diff(); 168 | const Dtype* weight = this->blobs_[0]->cpu_data(); 169 | Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); 170 | const Dtype* bottom_data = (*bottom)[0]->cpu_data(); 171 | Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); 172 | Dtype* col_data = col_buffer_.mutable_cpu_data(); 173 | Dtype* col_diff = col_buffer_.mutable_cpu_diff(); 174 | // bias gradient if necessary 175 | Dtype* bias_diff = NULL; 176 | 177 | int ntiles = NTILE_WIDTH_ * NTILE_HEIGHT_; 178 | if(ntiles <= 1){ 179 | if (biasterm_) { 180 | bias_diff = this->blobs_[1]->mutable_cpu_diff(); 181 | memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count()); 182 | for (int n = 0; n < NUM_; ++n) { 183 | caffe_cpu_gemv(CblasNoTrans, NUM_OUTPUT_, N_, 184 | 1., top_diff + top[0]->offset(n), 185 | reinterpret_cast(bias_multiplier_->cpu_data()), 1., 186 | bias_diff); 187 | } 188 | } 189 | 190 | int weight_offset = M_ * K_; 191 | int col_offset = K_ * N_; 192 | int top_offset = M_ * N_; 193 | memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count()); 194 | for (int n = 0; n < NUM_; ++n) { 195 | // since we saved memory in the forward pass by not storing all col data, 196 | // we will need to recompute them. 197 | im2col_cpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_, 198 | WIDTH_, KSIZE_, PAD_, STRIDE_, col_data); 199 | // gradient w.r.t. weight. Note that we will accumulate diffs. 200 | for (int g = 0; g < GROUP_; ++g) { 201 | caffe_cpu_gemm(CblasNoTrans, CblasTrans, M_, K_, N_, 202 | (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g, 203 | col_data + col_offset * g, (Dtype)1., 204 | weight_diff + weight_offset * g); 205 | } 206 | // gradient w.r.t. bottom data, if necessary 207 | if (propagate_down) { 208 | for (int g = 0; g < GROUP_; ++g) { 209 | caffe_cpu_gemm(CblasTrans, CblasNoTrans, K_, N_, M_, 210 | (Dtype)1., weight + weight_offset * g, 211 | top_diff + top[0]->offset(n) + top_offset * g, 212 | (Dtype)0., col_diff + col_offset * g); 213 | } 214 | // col2im back to the data 215 | col2im_cpu(col_diff, CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, PAD_, STRIDE_, 216 | bottom_diff + (*bottom)[0]->offset(n)); 217 | } 218 | } 219 | }else{ 220 | NOT_IMPLEMENTED; 221 | } 222 | return Dtype(0.); 223 | } 224 | 225 | INSTANTIATE_CLASS(ConvolutionLayer); 226 | 227 | } // namespace caffe 228 | -------------------------------------------------------------------------------- /src/caffe/proto/caffe.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | package caffe; 4 | 5 | message BlobProto { 6 | optional int32 num = 1 [default = 0]; 7 | optional int32 channels = 2 [default = 0]; 8 | optional int32 height = 3 [default = 0]; 9 | optional int32 width = 4 [default = 0]; 10 | repeated float data = 5 [packed=true]; 11 | repeated float diff = 6 [packed=true]; 12 | } 13 | 14 | // The BlobProtoVector is simply a way to pass multiple blobproto instances 15 | // around. 16 | message BlobProtoVector { 17 | repeated BlobProto blobs = 1; 18 | } 19 | 20 | message Datum { 21 | optional int32 channels = 1; 22 | optional int32 height = 2; 23 | optional int32 width = 3; 24 | // the actual image data, in bytes 25 | optional bytes data = 4; 26 | optional int32 label = 5; 27 | // Optionally, the datum could also hold float data. 28 | repeated float float_data = 6; 29 | } 30 | 31 | message FillerParameter { 32 | // The filler type. 33 | optional string type = 1 [default = 'constant']; 34 | optional float value = 2 [default = 0]; // the value in constant filler 35 | optional float min = 3 [default = 0]; // the min value in uniform filler 36 | optional float max = 4 [default = 1]; // the max value in uniform filler 37 | optional float mean = 5 [default = 0]; // the mean value in gaussian filler 38 | optional float std = 6 [default = 1]; // the std value in gaussian filler 39 | } 40 | 41 | message LayerParameter { 42 | optional string name = 1; // the layer name 43 | optional string type = 2; // the string to specify the layer type 44 | 45 | // Parameters to specify layers with inner products. 46 | optional uint32 num_output = 3; // The number of outputs for the layer 47 | optional bool biasterm = 4 [default = true]; // whether to have bias terms 48 | optional FillerParameter weight_filler = 5; // The filler for the weight 49 | optional FillerParameter bias_filler = 6; // The filler for the bias 50 | 51 | optional uint32 pad = 7 [default = 0]; // The padding size 52 | optional uint32 kernelsize = 8; // The kernel size 53 | optional uint32 group = 9 [default = 1]; // The group size for group conv 54 | optional uint32 stride = 10 [default = 1]; // The stride 55 | enum PoolMethod { 56 | MAX = 0; 57 | AVE = 1; 58 | STOCHASTIC = 2; 59 | } 60 | optional PoolMethod pool = 11 [default = MAX]; // The pooling method 61 | optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio 62 | 63 | optional uint32 local_size = 13 [default = 5]; // for local response norm 64 | optional float alpha = 14 [default = 1.]; // for local response norm 65 | optional float beta = 15 [default = 0.75]; // for local response norm 66 | 67 | // For data layers, specify the data source 68 | optional string source = 16; 69 | // For data pre-processing, we can do simple scaling and subtracting the 70 | // data mean, if provided. Note that the mean subtraction is always carried 71 | // out before scaling. 72 | optional float scale = 17 [ default = 1 ]; 73 | optional string meanfile = 18; 74 | // For data layers, specify the batch size. 75 | optional uint32 batchsize = 19; 76 | // For data layers, specify if we would like to randomly crop an image. 77 | optional uint32 cropsize = 20 [default = 0]; 78 | // For data layers, specify if we want to randomly mirror data. 79 | optional bool mirror = 21 [default = false]; 80 | 81 | // The blobs containing the numeric parameters of the layer 82 | repeated BlobProto blobs = 50; 83 | // The ratio that is multiplied on the global learning rate. If you want to 84 | // set the learning ratio for one blob, you need to set it for all blobs. 85 | repeated float blobs_lr = 51; 86 | // The weight decay that is multiplied on the global weight decay. 87 | repeated float weight_decay = 52; 88 | 89 | // The rand_skip variable is for the data layer to skip a few data points 90 | // to avoid all asynchronous sgd clients to start at the same point. The skip 91 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 92 | // be larger than the number of keys in the leveldb. 93 | optional uint32 rand_skip = 53 [ default = 0 ]; 94 | 95 | // Fields related to detection (det_*) 96 | // foreground (object) overlap threshold 97 | optional float det_fg_threshold = 54 [default = 0.5]; 98 | // background (non-object) overlap threshold 99 | optional float det_bg_threshold = 55 [default = 0.5]; 100 | // Fraction of batch that should be foreground objects 101 | optional float det_fg_fraction = 56 [default = 0.25]; 102 | 103 | // optional bool OBSOLETE_can_clobber = 57 [ default = true ]; 104 | 105 | // Amount of contextual padding to add around a window 106 | // (used only by the window_data_layer) 107 | optional uint32 det_context_pad = 58 [default = 0]; 108 | 109 | // Mode for cropping out a detection window 110 | // warp: cropped window is warped to a fixed size and aspect ratio 111 | // square: the tightest square around the window is cropped 112 | optional string det_crop_mode = 59 [default = "warp"]; 113 | 114 | // For ReshapeLayer, one needs to specify the new dimensions. 115 | optional int32 new_num = 60 [default = 0]; 116 | optional int32 new_channels = 61 [default = 0]; 117 | optional int32 new_height = 62 [default = 0]; 118 | optional int32 new_width = 63 [default = 0]; 119 | 120 | // Whether or not ImageLayer should shuffle the list of files at every epoch. 121 | // It will also resize images if new_height or new_width are not zero. 122 | optional bool shuffle_images = 64 [default = false]; 123 | 124 | // For ConcatLayer, one needs to specify the dimension for concatenation, and 125 | // the other dimensions must be the same for all the bottom blobs. 126 | // By default it will concatenate blobs along the channels dimension. 127 | optional uint32 concat_dim = 65 [default = 1]; 128 | 129 | optional string source_list = 66; 130 | optional bool share_data = 67 [default = false]; 131 | optional uint32 data_count = 68; 132 | 133 | optional float dual_lamda = 69 [default = 16]; 134 | optional float dual_threshold = 70 [default = 0]; 135 | optional float bias = 71[default = 0]; 136 | 137 | optional int32 ntile_width = 72 [default = 1]; 138 | optional int32 ntile_height = 73 [default = 1]; 139 | } 140 | 141 | message LayerConnection { 142 | optional LayerParameter layer = 1; // the layer parameter 143 | repeated string bottom = 2; // the name of the bottom blobs 144 | repeated string top = 3; // the name of the top blobs 145 | } 146 | 147 | message NetParameter { 148 | optional string name = 1; // consider giving the network a name 149 | repeated LayerConnection layers = 2; // a bunch of layers. 150 | // The input blobs to the network. 151 | repeated string input = 3; 152 | // The dim of the input blobs. For each input blob there should be four 153 | // values specifying the num, channels, height and width of the input blob. 154 | // Thus, there should be a total of (4 * #input) numbers. 155 | repeated int32 input_dim = 4; 156 | // Whether the network will force every layer to carry out backward operation. 157 | // If set False, then whether to carry out backward is determined 158 | // automatically according to the net structure and learning rates. 159 | optional bool force_backward = 5 [ default = false ]; 160 | } 161 | 162 | message SolverParameter { 163 | optional string train_net = 1; // The proto file for the training net. 164 | optional string test_net = 2; // The proto file for the testing net. 165 | // The number of iterations for each testing phase. 166 | optional int32 test_iter = 3 [ default = 0 ]; 167 | // The number of iterations between two testing phases. 168 | optional int32 test_interval = 4 [ default = 0 ]; 169 | optional float base_lr = 5; // The base learning rate 170 | // the number of iterations between displaying info. If display = 0, no info 171 | // will be displayed. 172 | optional int32 display = 6; 173 | optional int32 max_iter = 7; // the maximum number of iterations 174 | optional string lr_policy = 8; // The learning rate decay policy. 175 | optional float gamma = 9; // The parameter to compute the learning rate. 176 | optional float power = 10; // The parameter to compute the learning rate. 177 | optional float momentum = 11; // The momentum value. 178 | optional float weight_decay = 12; // The weight decay. 179 | optional int32 stepsize = 13; // the stepsize for learning rate policy "step" 180 | optional int32 snapshot = 14 [default = 0]; // The snapshot interval 181 | optional string snapshot_prefix = 15; // The prefix for the snapshot. 182 | // whether to snapshot diff in the results or not. Snapshotting diff will help 183 | // debugging but the final protocol buffer size will be much larger. 184 | optional bool snapshot_diff = 16 [ default = false]; 185 | // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. 186 | optional int32 solver_mode = 17 [default = 1]; 187 | // the device_id will that be used in GPU mode. Use device_id=0 in default. 188 | optional int32 device_id = 18 [default = 0]; 189 | 190 | optional int32 update_dual_thr_interval = 19 [default = 10000]; 191 | optional int32 pretrain_iterations = 20 [default = 20000]; 192 | } 193 | 194 | // A message that stores the solver snapshots 195 | message SolverState { 196 | optional int32 iter = 1; // The current iteration 197 | optional string learned_net = 2; // The file that stores the learned net. 198 | repeated BlobProto history = 3; // The history for sgd solvers 199 | 200 | optional float dual_thr = 4; 201 | } 202 | -------------------------------------------------------------------------------- /src/caffe/util/math_functions.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2013 Yangqing Jia 2 | 3 | #include "caffe/common.hpp" 4 | #include "caffe/util/math_functions.hpp" 5 | 6 | namespace caffe { 7 | 8 | template<> 9 | void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA, 10 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 11 | const float alpha, const float* A, const float* B, const float beta, 12 | float* C) { 13 | #ifdef USE_EIGEN 14 | MAP_SMATRIX(eC, C, M, N); 15 | eC *= beta; 16 | if(TransA == CblasNoTrans && TransB == CblasNoTrans){ 17 | MAP_CONST_SMATRIX(eA, A, M, K); 18 | MAP_CONST_SMATRIX(eB, B, K, N); 19 | eC.noalias() += alpha * (eA * eB); 20 | }else if(TransA == CblasNoTrans && TransB == CblasTrans){ 21 | MAP_CONST_SMATRIX(eA, A, M, K); 22 | MAP_CONST_SMATRIX(eB, B, N, K); 23 | eC.noalias() += alpha * (eA * eB.transpose()); 24 | }else if(TransA == CblasTrans && TransB == CblasNoTrans){ 25 | MAP_CONST_SMATRIX(eA, A, K, M); 26 | MAP_CONST_SMATRIX(eB, B, K, N); 27 | eC.noalias() += alpha * (eA.transpose() * eB); 28 | }else{ 29 | MAP_CONST_SMATRIX(eA, A, K, M); 30 | MAP_CONST_SMATRIX(eB, B, N, K); 31 | eC.noalias() += alpha * (eA.transpose() * eB.transpose()); 32 | } 33 | #else 34 | int lda = (TransA == CblasNoTrans) ? K : M; 35 | int ldb = (TransB == CblasNoTrans) ? N : K; 36 | cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, 37 | ldb, beta, C, N); 38 | #endif 39 | } 40 | 41 | template<> 42 | void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA, 43 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 44 | const double alpha, const double* A, const double* B, const double beta, 45 | double* C) { 46 | #ifdef USE_EIGEN 47 | MAP_DMATRIX(eC, C, M, N); 48 | eC *= beta; 49 | if(TransA == CblasNoTrans && TransB == CblasNoTrans){ 50 | MAP_CONST_DMATRIX(eA, A, M, K); 51 | MAP_CONST_DMATRIX(eB, B, K, N); 52 | eC.noalias() += alpha * (eA * eB); 53 | }else if(TransA == CblasNoTrans && TransB == CblasTrans){ 54 | MAP_CONST_DMATRIX(eA, A, M, K); 55 | MAP_CONST_DMATRIX(eB, B, N, K); 56 | eC.noalias() += alpha * (eA * eB.transpose()); 57 | }else if(TransA == CblasTrans && TransB == CblasNoTrans){ 58 | MAP_CONST_DMATRIX(eA, A, K, M); 59 | MAP_CONST_DMATRIX(eB, B, K, N); 60 | eC.noalias() += alpha * (eA.transpose() * eB); 61 | }else{ 62 | MAP_CONST_DMATRIX(eA, A, K, M); 63 | MAP_CONST_DMATRIX(eB, B, N, K); 64 | eC.noalias() += alpha * (eA.transpose() * eB.transpose()); 65 | } 66 | #else 67 | int lda = (TransA == CblasNoTrans) ? K : M; 68 | int ldb = (TransB == CblasNoTrans) ? N : K; 69 | cblas_dgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, 70 | ldb, beta, C, N); 71 | #endif 72 | } 73 | 74 | #if 0 75 | template <> 76 | void caffe_gpu_gemm(const CBLAS_TRANSPOSE TransA, 77 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 78 | const float alpha, const float* A, const float* B, const float beta, 79 | float* C) { 80 | // Note that cublas follows fortran order. 81 | int lda = (TransA == CblasNoTrans) ? K : M; 82 | int ldb = (TransB == CblasNoTrans) ? N : K; 83 | cublasOperation_t cuTransA = 84 | (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; 85 | cublasOperation_t cuTransB = 86 | (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; 87 | CUBLAS_CHECK(cublasSgemm(Caffe::cublas_handle(), cuTransB, cuTransA, 88 | N, M, K, &alpha, B, ldb, A, lda, &beta, C, N)); 89 | } 90 | 91 | template <> 92 | void caffe_gpu_gemm(const CBLAS_TRANSPOSE TransA, 93 | const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, 94 | const double alpha, const double* A, const double* B, const double beta, 95 | double* C) { 96 | // Note that cublas follows fortran order. 97 | int lda = (TransA == CblasNoTrans) ? K : M; 98 | int ldb = (TransB == CblasNoTrans) ? N : K; 99 | cublasOperation_t cuTransA = 100 | (TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; 101 | cublasOperation_t cuTransB = 102 | (TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T; 103 | CUBLAS_CHECK(cublasDgemm(Caffe::cublas_handle(), cuTransB, cuTransA, 104 | N, M, K, &alpha, B, ldb, A, lda, &beta, C, N)); 105 | } 106 | #endif 107 | 108 | template <> 109 | void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, 110 | const int N, const float alpha, const float* A, const float* x, 111 | const float beta, float* y) { 112 | #ifdef USE_EIGEN 113 | MAP_CONST_SMATRIX(eA, A, M, N); 114 | if(TransA == CblasNoTrans){ 115 | MAP_SVECTOR(eY, y, M); 116 | eY *= beta; 117 | MAP_CONST_SVECTOR(eX, x, N); 118 | eY.noalias() += alpha * (eA * eX); 119 | }else{ 120 | MAP_SVECTOR(eY, y, N); 121 | eY *= beta; 122 | MAP_CONST_SVECTOR(eX, x, M); 123 | eY.noalias() += alpha * (eA.transpose() * eX); 124 | } 125 | #else 126 | cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); 127 | #endif 128 | } 129 | 130 | template <> 131 | void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, 132 | const int N, const double alpha, const double* A, const double* x, 133 | const double beta, double* y) { 134 | #ifdef USE_EIGEN 135 | MAP_CONST_DMATRIX(eA, A, M, N); 136 | if(TransA == CblasNoTrans){ 137 | MAP_DVECTOR(eY, y, M); 138 | eY *= beta; 139 | MAP_CONST_DVECTOR(eX, x, N); 140 | eY.noalias() += alpha * (eA * eX); 141 | }else{ 142 | MAP_DVECTOR(eY, y, N); 143 | eY *= beta; 144 | MAP_CONST_DVECTOR(eX, x, M); 145 | eY.noalias() += alpha * (eA.transpose() * eX); 146 | } 147 | #else 148 | cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); 149 | #endif 150 | } 151 | 152 | #if 0 153 | template <> 154 | void caffe_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, 155 | const int N, const float alpha, const float* A, const float* x, 156 | const float beta, float* y) { 157 | cublasOperation_t cuTransA = 158 | (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N; 159 | CUBLAS_CHECK(cublasSgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha, 160 | A, N, x, 1, &beta, y, 1)); 161 | } 162 | 163 | template <> 164 | void caffe_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, 165 | const int N, const double alpha, const double* A, const double* x, 166 | const double beta, double* y) { 167 | cublasOperation_t cuTransA = 168 | (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N; 169 | CUBLAS_CHECK(cublasDgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha, 170 | A, N, x, 1, &beta, y, 1)); 171 | } 172 | #endif 173 | 174 | template <> 175 | void caffe_axpy(const int N, const float alpha, const float* X, 176 | float* Y) { 177 | #ifdef USE_EIGEN 178 | MAP_SVECTOR(eY, Y, N); 179 | MAP_CONST_SVECTOR(eX, X, N); 180 | eY = alpha * eX + eY; 181 | #else 182 | cblas_saxpy(N, alpha, X, 1, Y, 1); 183 | #endif 184 | } 185 | 186 | template <> 187 | void caffe_axpy(const int N, const double alpha, const double* X, 188 | double* Y) 189 | { 190 | #ifdef USE_EIGEN 191 | MAP_DVECTOR(eY, Y, N); 192 | MAP_CONST_DVECTOR(eX, X, N); 193 | eY = alpha * eX + eY; 194 | #else 195 | cblas_daxpy(N, alpha, X, 1, Y, 1); 196 | #endif 197 | } 198 | 199 | 200 | #if 0 201 | template <> 202 | void caffe_gpu_axpy(const int N, const float alpha, const float* X, 203 | float* Y) { 204 | CUBLAS_CHECK(cublasSaxpy(Caffe::cublas_handle(), N, &alpha, X, 1, Y, 1)); 205 | } 206 | 207 | template <> 208 | void caffe_gpu_axpy(const int N, const double alpha, const double* X, 209 | double* Y) { 210 | CUBLAS_CHECK(cublasDaxpy(Caffe::cublas_handle(), N, &alpha, X, 1, Y, 1)); 211 | } 212 | #endif 213 | 214 | template <> 215 | void caffe_axpby(const int N, const float alpha, const float* X, 216 | const float beta, float* Y) { 217 | #ifdef USE_EIGEN 218 | MAP_SVECTOR(eY, Y, N); 219 | MAP_CONST_SVECTOR(eX, X, N); 220 | eY = alpha * eX + beta * eY; 221 | #else 222 | cblas_saxpby(N, alpha, X, 1, beta, Y, 1); 223 | #endif 224 | } 225 | 226 | template <> 227 | void caffe_axpby(const int N, const double alpha, const double* X, 228 | const double beta, double* Y) { 229 | #ifdef USE_EIGEN 230 | MAP_DVECTOR(eY, Y, N); 231 | MAP_CONST_DVECTOR(eX, X, N); 232 | eY = alpha * eX + beta * eY; 233 | #else 234 | cblas_daxpby(N, alpha, X, 1, beta, Y, 1); 235 | #endif 236 | } 237 | 238 | template <> 239 | void caffe_copy(const int N, const float* X, float* Y) { 240 | #ifdef USE_EIGEN 241 | memcpy(Y, X, sizeof(float)*N); 242 | #else 243 | cblas_scopy(N, X, 1, Y, 1); 244 | #endif 245 | } 246 | 247 | template <> 248 | void caffe_copy(const int N, const double* X, double* Y) { 249 | #ifdef USE_EIGEN 250 | memcpy(Y, X, sizeof(double)*N); 251 | #else 252 | cblas_dcopy(N, X, 1, Y, 1); 253 | #endif 254 | } 255 | 256 | #if 0 257 | template <> 258 | void caffe_gpu_copy(const int N, const float* X, float* Y) { 259 | CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), N, X, 1, Y, 1)); 260 | } 261 | 262 | template <> 263 | void caffe_gpu_copy(const int N, const double* X, double* Y) { 264 | CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), N, X, 1, Y, 1)); 265 | } 266 | #endif 267 | 268 | template <> 269 | void caffe_scal(const int N, const float alpha, float *X) { 270 | #ifdef USE_EIGEN 271 | MAP_SVECTOR(eX, X, N); 272 | eX *= alpha; 273 | #else 274 | cblas_sscal(N, alpha, X, 1); 275 | #endif 276 | } 277 | 278 | template <> 279 | void caffe_scal(const int N, const double alpha, double *X) { 280 | #ifdef USE_EIGEN 281 | MAP_DVECTOR(eX, X, N); 282 | eX *= alpha; 283 | #else 284 | cblas_dscal(N, alpha, X, 1); 285 | #endif 286 | } 287 | 288 | #if 0 289 | template <> 290 | void caffe_gpu_scal(const int N, const float alpha, float *X) { 291 | CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), N, &alpha, X, 1)); 292 | } 293 | 294 | template <> 295 | void caffe_gpu_scal(const int N, const double alpha, double *X) { 296 | CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), N, &alpha, X, 1)); 297 | } 298 | 299 | template <> 300 | void caffe_gpu_axpby(const int N, const float alpha, const float* X, 301 | const float beta, float* Y) { 302 | caffe_gpu_scal(N, beta, Y); 303 | caffe_gpu_axpy(N, alpha, X, Y); 304 | } 305 | 306 | template <> 307 | void caffe_gpu_axpby(const int N, const double alpha, const double* X, 308 | const double beta, double* Y) { 309 | caffe_gpu_scal(N, beta, Y); 310 | caffe_gpu_axpy(N, alpha, X, Y); 311 | } 312 | #endif 313 | 314 | template <> 315 | void caffe_sqr(const int n, const float* a, float* y) { 316 | vsSqr(n, a, y); 317 | } 318 | 319 | template <> 320 | void caffe_sqr(const int n, const double* a, double* y) { 321 | vdSqr(n, a, y); 322 | } 323 | 324 | template <> 325 | void caffe_add(const int n, const float* a, const float* b, 326 | float* y) { vsAdd(n, a, b, y); } 327 | 328 | template <> 329 | void caffe_add(const int n, const double* a, const double* b, 330 | double* y) { vdAdd(n, a, b, y); } 331 | 332 | template <> 333 | void caffe_sub(const int n, const float* a, const float* b, 334 | float* y) { vsSub(n, a, b, y); } 335 | 336 | template <> 337 | void caffe_sub(const int n, const double* a, const double* b, 338 | double* y) { vdSub(n, a, b, y); } 339 | 340 | template <> 341 | void caffe_mul(const int n, const float* a, const float* b, 342 | float* y) { vsMul(n, a, b, y); } 343 | 344 | template <> 345 | void caffe_mul(const int n, const double* a, const double* b, 346 | double* y) { vdMul(n, a, b, y); } 347 | 348 | template <> 349 | void caffe_div(const int n, const float* a, const float* b, 350 | float* y) { vsDiv(n, a, b, y); } 351 | 352 | template <> 353 | void caffe_div(const int n, const double* a, const double* b, 354 | double* y) { vdDiv(n, a, b, y); } 355 | 356 | template <> 357 | void caffe_powx(const int n, const float* a, const float b, 358 | float* y) { vsPowx(n, a, b, y); } 359 | 360 | template <> 361 | void caffe_powx(const int n, const double* a, const double b, 362 | double* y) { vdPowx(n, a, b, y); } 363 | 364 | #if 0 365 | template <> 366 | void caffe_vRngUniform(const int n, float* r, 367 | const float a, const float b) { 368 | VSL_CHECK(vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, Caffe::vsl_stream(), 369 | n, r, a, b)); 370 | } 371 | 372 | template <> 373 | void caffe_vRngUniform(const int n, double* r, 374 | const double a, const double b) { 375 | VSL_CHECK(vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, Caffe::vsl_stream(), 376 | n, r, a, b)); 377 | } 378 | 379 | template <> 380 | void caffe_vRngGaussian(const int n, float* r, const float a, 381 | const float sigma) { 382 | VSL_CHECK(vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, 383 | Caffe::vsl_stream(), n, r, a, sigma)); 384 | } 385 | 386 | 387 | template <> 388 | void caffe_vRngGaussian(const int n, double* r, const double a, 389 | const double sigma) { 390 | VSL_CHECK(vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, 391 | Caffe::vsl_stream(), n, r, a, sigma)); 392 | } 393 | #endif 394 | 395 | template <> 396 | void caffe_exp(const int n, const float* a, float* y) { 397 | vsExp(n, a, y); 398 | } 399 | 400 | template <> 401 | void caffe_exp(const int n, const double* a, double* y) { 402 | vdExp(n, a, y); 403 | } 404 | 405 | template <> 406 | float caffe_cpu_dot(const int n, const float* x, const float* y) { 407 | #ifdef USE_EIGEN 408 | MAP_CONST_SVECTOR(eX, x, n); 409 | MAP_CONST_SVECTOR(eY, y, n); 410 | return eX.dot(eY); 411 | #else 412 | return cblas_sdot(n, x, 1, y, 1); 413 | #endif 414 | } 415 | 416 | template <> 417 | double caffe_cpu_dot(const int n, const double* x, const double* y) { 418 | #ifdef USE_EIGEN 419 | MAP_CONST_DVECTOR(eX, x, n); 420 | MAP_CONST_DVECTOR(eY, y, n); 421 | return eX.dot(eY); 422 | #else 423 | return cblas_ddot(n, x, 1, y, 1); 424 | #endif 425 | } 426 | 427 | #if 0 428 | template <> 429 | void caffe_gpu_dot(const int n, const float* x, const float* y, 430 | float* out) { 431 | CUBLAS_CHECK(cublasSdot(Caffe::cublas_handle(), n, x, 1, y, 1, out)); 432 | } 433 | 434 | template <> 435 | void caffe_gpu_dot(const int n, const double* x, const double* y, 436 | double * out) { 437 | CUBLAS_CHECK(cublasDdot(Caffe::cublas_handle(), n, x, 1, y, 1, out)); 438 | } 439 | #endif 440 | 441 | } // namespace caffe 442 | -------------------------------------------------------------------------------- /src/caffe/net.cpp: -------------------------------------------------------------------------------- 1 | // Copyright Yangqing Jia 2013 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "caffe/proto/caffe.pb.h" 9 | #include "caffe/layer.hpp" 10 | #include "caffe/net.hpp" 11 | #include "caffe/util/io.hpp" 12 | #include "caffe/util/insert_splits.hpp" 13 | 14 | using std::pair; 15 | using std::map; 16 | using std::set; 17 | 18 | namespace caffe { 19 | 20 | template 21 | Net::Net(const NetParameter& param) { 22 | Init(param); 23 | } 24 | 25 | template 26 | Net::Net(const string& param_file) { 27 | NetParameter param; 28 | ReadProtoFromTextFile(param_file, ¶m); 29 | Init(param); 30 | } 31 | 32 | template 33 | void Net::Init(const NetParameter& in_param) { 34 | // Create a copy of in_param with splits added where necessary. 35 | NetParameter param; 36 | insert_splits(in_param, ¶m); 37 | // Basically, build all the layers and set up its connections. 38 | name_ = param.name(); 39 | map blob_name_to_idx; 40 | set available_blobs; 41 | int num_layers = param.layers_size(); 42 | CHECK_EQ(param.input_size() * 4, param.input_dim_size()) 43 | << "Incorrect bottom blob dimension specifications."; 44 | size_t memory_used = 0; 45 | // set the input blobs 46 | for (int i = 0; i < param.input_size(); ++i) { 47 | const string& blob_name = param.input(i); 48 | shared_ptr > blob_pointer( 49 | new Blob(param.input_dim(i * 4), 50 | param.input_dim(i * 4 + 1), 51 | param.input_dim(i * 4 + 2), 52 | param.input_dim(i * 4 + 3))); 53 | blobs_.push_back(blob_pointer); 54 | blob_names_.push_back(blob_name); 55 | blob_need_backward_.push_back(param.force_backward()); 56 | net_input_blob_indices_.push_back(i); 57 | net_input_blobs_.push_back(blob_pointer.get()); 58 | blob_name_to_idx[blob_name] = i; 59 | available_blobs.insert(blob_name); 60 | memory_used += blob_pointer->count(); 61 | } 62 | DLOG(INFO) << "Memory required for Data" << memory_used*sizeof(Dtype); 63 | // For each layer, set up their input and output 64 | bottom_vecs_.resize(param.layers_size()); 65 | top_vecs_.resize(param.layers_size()); 66 | bottom_id_vecs_.resize(param.layers_size()); 67 | top_id_vecs_.resize(param.layers_size()); 68 | for (int i = 0; i < param.layers_size(); ++i) { 69 | bool in_place = false; 70 | const LayerConnection& layer_connection = param.layers(i); 71 | const LayerParameter& layer_param = layer_connection.layer(); 72 | layers_.push_back(shared_ptr >(GetLayer(layer_param))); 73 | layer_names_.push_back(layer_param.name()); 74 | LOG(INFO) << "Creating Layer " << layer_param.name(); 75 | bool need_backward = param.force_backward(); 76 | // Figure out this layer's input and output 77 | for (int j = 0; j < layer_connection.bottom_size(); ++j) { 78 | const string& blob_name = layer_connection.bottom(j); 79 | const int blob_id = blob_name_to_idx[blob_name]; 80 | if (available_blobs.find(blob_name) == available_blobs.end()) { 81 | LOG(FATAL) << "Unknown blob input " << blob_name << 82 | " to layer" << j; 83 | } 84 | LOG(INFO) << layer_param.name() << " <- " << blob_name; 85 | bottom_vecs_[i].push_back( 86 | blobs_[blob_id].get()); 87 | bottom_id_vecs_[i].push_back(blob_id); 88 | // If a blob needs backward, this layer should provide it. 89 | need_backward |= blob_need_backward_[blob_id]; 90 | available_blobs.erase(blob_name); 91 | } 92 | for (int j = 0; j < layer_connection.top_size(); ++j) { 93 | const string& blob_name = layer_connection.top(j); 94 | // Check if we are doing in-place computation 95 | if (layer_connection.bottom_size() > j && 96 | blob_name == layer_connection.bottom(j)) { 97 | // In-place computation 98 | LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)"; 99 | in_place = true; 100 | available_blobs.insert(blob_name); 101 | top_vecs_[i].push_back( 102 | blobs_[blob_name_to_idx[blob_name]].get()); 103 | top_id_vecs_[i].push_back(blob_name_to_idx[blob_name]); 104 | } else if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) { 105 | // If we are not doing in-place computation but has duplicated blobs, 106 | // raise an error. 107 | LOG(FATAL) << "Duplicate blobs produced by multiple sources."; 108 | } else { 109 | // Normal output. 110 | LOG(INFO) << layer_param.name() << " -> " << blob_name; 111 | shared_ptr > blob_pointer(new Blob()); 112 | blobs_.push_back(blob_pointer); 113 | blob_names_.push_back(blob_name); 114 | blob_need_backward_.push_back(param.force_backward()); 115 | blob_name_to_idx[blob_name] = blob_names_.size() - 1; 116 | available_blobs.insert(blob_name); 117 | top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get()); 118 | top_id_vecs_[i].push_back(blob_names_.size() - 1); 119 | } 120 | } 121 | // After this layer is connected, set it up. 122 | // LOG(INFO) << "Setting up " << layer_names_[i]; 123 | layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]); 124 | for (int topid = 0; topid < top_vecs_[i].size(); ++topid) { 125 | LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->num() << " " 126 | << top_vecs_[i][topid]->channels() << " " 127 | << top_vecs_[i][topid]->height() << " " 128 | << top_vecs_[i][topid]->width() << " (" 129 | << top_vecs_[i][topid]->count() << ")"; 130 | if (!in_place) 131 | memory_used += top_vecs_[i][topid]->count(); 132 | } 133 | DLOG(INFO) << "Memory required for Data " << memory_used*sizeof(Dtype); 134 | int blobs_lr_size = layers_[i]->layer_param().blobs_lr_size(); 135 | CHECK(blobs_lr_size == layers_[i]->blobs().size() || blobs_lr_size == 0) 136 | << "Incorrect blobs lr size: should be either 0 or the same as " 137 | "the number of the layer's parameter blobs, " << blobs_lr_size << "vs. " << layers_[i]->blobs().size(); 138 | if (blobs_lr_size) { 139 | // Check if this layer needs backward operation itself 140 | for (int j = 0; j < blobs_lr_size; ++j) { 141 | need_backward |= (layers_[i]->layer_param().blobs_lr(j) > 0); 142 | } 143 | } else if (layers_[i]->blobs().size()) { 144 | // catch: if a layer param does not specify blobs_lr, we should assume the 145 | // learning rate to be 1. Thus we will need to perform backward. 146 | need_backward = true; 147 | } 148 | // Finally, set the backward flag 149 | layer_need_backward_.push_back(need_backward); 150 | if (need_backward) { 151 | LOG(INFO) << layer_names_[i] << " needs backward computation."; 152 | for (int j = 0; j < top_id_vecs_[i].size(); ++j) { 153 | blob_need_backward_[top_id_vecs_[i][j]] = true; 154 | } 155 | } else { 156 | LOG(INFO) << layer_names_[i] << " does not need backward computation."; 157 | } 158 | } 159 | // In the end, all remaining blobs are considered output blobs. 160 | for (set::iterator it = available_blobs.begin(); 161 | it != available_blobs.end(); ++it) { 162 | LOG(INFO) << "This network produces output " << *it; 163 | net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); 164 | } 165 | GetLearningRateAndWeightDecay(); 166 | LOG(INFO) << "Network initialization done."; 167 | LOG(INFO) << "Memory required for Data " << memory_used*sizeof(Dtype); 168 | } 169 | 170 | 171 | template 172 | void Net::GetLearningRateAndWeightDecay() { 173 | LOG(INFO) << "Collecting Learning Rate and Weight Decay."; 174 | for (int i = 0; i < layers_.size(); ++i) { 175 | vector > >& layer_blobs = layers_[i]->blobs(); 176 | for (int j = 0; j < layer_blobs.size(); ++j) { 177 | params_.push_back(layer_blobs[j]); 178 | } 179 | // push the learning rate mutlipliers 180 | if (layers_[i]->layer_param().blobs_lr_size()) { 181 | CHECK_EQ(layers_[i]->layer_param().blobs_lr_size(), layer_blobs.size()); 182 | for (int j = 0; j < layer_blobs.size(); ++j) { 183 | float local_lr = layers_[i]->layer_param().blobs_lr(j); 184 | CHECK_GE(local_lr, 0.); 185 | params_lr_.push_back(local_lr); 186 | } 187 | } else { 188 | for (int j = 0; j < layer_blobs.size(); ++j) { 189 | params_lr_.push_back(1.); 190 | } 191 | } 192 | // push the weight decay multipliers 193 | if (layers_[i]->layer_param().weight_decay_size()) { 194 | CHECK_EQ(layers_[i]->layer_param().weight_decay_size(), 195 | layer_blobs.size()); 196 | for (int j = 0; j < layer_blobs.size(); ++j) { 197 | float local_decay = layers_[i]->layer_param().weight_decay(j); 198 | CHECK_GE(local_decay, 0.); 199 | params_weight_decay_.push_back(local_decay); 200 | } 201 | } else { 202 | for (int j = 0; j < layer_blobs.size(); ++j) { 203 | params_weight_decay_.push_back(1.); 204 | } 205 | } 206 | } 207 | } 208 | 209 | template 210 | const vector*>& Net::ForwardPrefilled() { 211 | for (int i = 0; i < layers_.size(); ++i) { 212 | // LOG(ERROR) << "Forwarding " << layer_names_[i]; 213 | layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]); 214 | } 215 | return net_output_blobs_; 216 | } 217 | 218 | template 219 | const vector*>& Net::Forward( 220 | const vector*> & bottom) { 221 | // Copy bottom to internal bottom 222 | for (int i = 0; i < bottom.size(); ++i) { 223 | net_input_blobs_[i]->CopyFrom(*bottom[i]); 224 | } 225 | return ForwardPrefilled(); 226 | } 227 | 228 | 229 | template 230 | string Net::Forward(const string& input_blob_protos) { 231 | BlobProtoVector blob_proto_vec; 232 | if (net_input_blobs_.size()) { 233 | blob_proto_vec.ParseFromString(input_blob_protos); 234 | CHECK_EQ(blob_proto_vec.blobs_size(), net_input_blobs_.size()) 235 | << "Incorrect input size."; 236 | for (int i = 0; i < blob_proto_vec.blobs_size(); ++i) { 237 | net_input_blobs_[i]->FromProto(blob_proto_vec.blobs(i)); 238 | } 239 | } 240 | ForwardPrefilled(); 241 | blob_proto_vec.Clear(); 242 | for (int i = 0; i < net_output_blobs_.size(); ++i) { 243 | net_output_blobs_[i]->ToProto(blob_proto_vec.add_blobs()); 244 | } 245 | string output; 246 | blob_proto_vec.SerializeToString(&output); 247 | return output; 248 | } 249 | 250 | 251 | template 252 | Dtype Net::Backward() { 253 | Dtype loss = 0; 254 | for (int i = layers_.size() - 1; i >= 0; --i) { 255 | if (layer_need_backward_[i]) { 256 | Dtype layer_loss = layers_[i]->Backward( 257 | top_vecs_[i], true, &bottom_vecs_[i]); 258 | loss += layer_loss; 259 | } 260 | } 261 | return loss; 262 | } 263 | 264 | template 265 | Dtype Net::BackwardBetween(int layer_top, int layer_bottom) 266 | { 267 | Dtype loss = 0; 268 | CHECK_GE(layer_top, layer_bottom); 269 | CHECK_LE(layer_top, layers_.size()); 270 | CHECK_GE(layer_bottom, 0); 271 | for (int i = layer_top; i >= layer_bottom; --i) { 272 | if (layer_need_backward_[i]) { 273 | Dtype layer_loss = layers_[i]->Backward( 274 | top_vecs_[i], true, &bottom_vecs_[i]); 275 | loss += layer_loss; 276 | } 277 | } 278 | return loss; 279 | } 280 | 281 | template 282 | void Net::CopyLayersFrom(const Net& rhs, bool copy_diff) 283 | { 284 | CHECK_EQ(layers_.size(), rhs.layers_.size()); 285 | for (int i = 0; i < layers_.size(); ++i) { 286 | vector > >& target_blobs = 287 | layers_[i]->blobs(); 288 | const shared_ptr >& source_layer = rhs.layers_[i]; 289 | CHECK_EQ(target_blobs.size(), source_layer->blobs().size()); 290 | const vector > >& source_blobs = 291 | source_layer->blobs(); 292 | for (int j = 0; j < target_blobs.size(); ++j) { 293 | CHECK_EQ(target_blobs[j]->num(), source_blobs[j]->num()); 294 | CHECK_EQ(target_blobs[j]->channels(), source_blobs[j]->channels()); 295 | CHECK_EQ(target_blobs[j]->height(), source_blobs[j]->height()); 296 | CHECK_EQ(target_blobs[j]->width(), source_blobs[j]->width()); 297 | target_blobs[j]->CopyFrom(*source_blobs[j], copy_diff); 298 | } 299 | } 300 | } 301 | 302 | template 303 | void Net::CopyTrainedLayersFrom(const NetParameter& param) { 304 | int num_source_layers = param.layers_size(); 305 | for (int i = 0; i < num_source_layers; ++i) { 306 | const LayerParameter& source_layer = param.layers(i).layer(); 307 | const string& source_layer_name = source_layer.name(); 308 | int target_layer_id = 0; 309 | while (target_layer_id != layer_names_.size() && 310 | layer_names_[target_layer_id] != source_layer_name) { 311 | ++target_layer_id; 312 | } 313 | if (target_layer_id == layer_names_.size()) { 314 | DLOG(INFO) << "Ignoring source layer " << source_layer_name; 315 | continue; 316 | } 317 | DLOG(INFO) << "Copying source layer " << source_layer_name; 318 | vector > >& target_blobs = 319 | layers_[target_layer_id]->blobs(); 320 | CHECK_EQ(target_blobs.size(), source_layer.blobs_size()) 321 | << "Incompatible number of blobs for layer " << source_layer_name; 322 | for (int j = 0; j < target_blobs.size(); ++j) { 323 | CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num()); 324 | CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels()); 325 | CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height()); 326 | CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width()); 327 | target_blobs[j]->FromProto(source_layer.blobs(j)); 328 | } 329 | } 330 | } 331 | 332 | template 333 | void Net::CopyTrainedLayersFrom(const string trained_filename) { 334 | NetParameter param; 335 | ReadProtoFromBinaryFile(trained_filename, ¶m); 336 | CopyTrainedLayersFrom(param); 337 | } 338 | 339 | template 340 | void Net::ToProto(NetParameter* param, bool write_diff) { 341 | param->Clear(); 342 | param->set_name(name_); 343 | // Add bottom and top 344 | for (int i = 0; i < net_input_blob_indices_.size(); ++i) { 345 | param->add_input(blob_names_[net_input_blob_indices_[i]]); 346 | } 347 | DLOG(INFO) << "Serializing " << layers_.size() << " layers"; 348 | for (int i = 0; i < layers_.size(); ++i) { 349 | LayerConnection* layer_connection = param->add_layers(); 350 | for (int j = 0; j < bottom_id_vecs_[i].size(); ++j) { 351 | layer_connection->add_bottom(blob_names_[bottom_id_vecs_[i][j]]); 352 | } 353 | for (int j = 0; j < top_id_vecs_[i].size(); ++j) { 354 | layer_connection->add_top(blob_names_[top_id_vecs_[i][j]]); 355 | } 356 | LayerParameter* layer_parameter = layer_connection->mutable_layer(); 357 | layers_[i]->ToProto(layer_parameter, write_diff); 358 | } 359 | } 360 | 361 | template 362 | void Net::Update() { 363 | for (int i = 0; i < params_.size(); ++i) { 364 | params_[i]->Update(); 365 | } 366 | } 367 | 368 | INSTANTIATE_CLASS(Net); 369 | 370 | } // namespace caffe 371 | --------------------------------------------------------------------------------