├── README.md └── extract_features_txt.cpp /README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | ## Usage 4 | 5 | 1. copy `extract_features_txt.cpp` to `caffe_root/tools/` 6 | 2. cd `caffe_root` 7 | 3. `make all -j` 8 | 4. create a `.sh` file contains 9 | 10 | ## .sh file usage 11 | ``` 12 | extract_features_txt pretrained_net_param feature_extraction_proto_file 13 | extract_feature_blob_name1[,name2,...] 14 | save_feature_txt_name1[,name2,...] 15 | num_mini_batches 16 | [CPU/GPU] [DEVICE_ID=0] 17 | ``` 18 | 19 | ## example 20 | ``` 21 | ./build/tools/extract_features_txt.bin models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel examples/_temp/imagenet_val.prototxt pool5,fc7 examples/_temp/features/pool5.txt,examples/_temp/features/fc7.txt 1000 CPU 22 | ``` 23 | 24 | ``` 25 | ./build/tools/extract_features_txt.bin models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel examples/_temp/imagenet_val.prototxt pool5,fc7 examples/_temp/features/pool5.txt,examples/_temp/features/fc7.txt 1000 GPU 0 26 | ``` 27 | -------------------------------------------------------------------------------- /extract_features_txt.cpp: -------------------------------------------------------------------------------- 1 | #include // for snprintf 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "boost/algorithm/string.hpp" 9 | #include "google/protobuf/text_format.h" 10 | #include "leveldb/db.h" 11 | #include "leveldb/write_batch.h" 12 | 13 | #include "caffe/blob.hpp" 14 | #include "caffe/common.hpp" 15 | #include "caffe/net.hpp" 16 | #include "caffe/proto/caffe.pb.h" 17 | // #include "caffe/util/db.hpp" 18 | #include "caffe/util/io.hpp" 19 | #include "caffe/vision_layers.hpp" 20 | 21 | #include 22 | #include "caffe/caffe.hpp" 23 | #include 24 | using namespace std; 25 | using namespace caffe; // NOLINT(build/namespaces) 26 | 27 | using caffe::Blob; 28 | using caffe::Caffe; 29 | // using caffe::Datum; 30 | using caffe::Net; 31 | using boost::shared_ptr; 32 | // using std::string; 33 | namespace db = caffe::db; 34 | 35 | template 36 | int feature_extraction_pipeline(int argc, char** argv); 37 | 38 | int main(int argc, char** argv) { 39 | return feature_extraction_pipeline(argc, argv); 40 | // return feature_extraction_pipeline(argc, argv); 41 | } 42 | 43 | template 44 | int feature_extraction_pipeline(int argc, char** argv) { 45 | ::google::InitGoogleLogging(argv[0]); 46 | const int num_required_args = 6; 47 | if (argc < num_required_args) { 48 | LOG(ERROR)<< 49 | "This program takes in a trained network and an input data layer, and then" 50 | " extract features of the input data produced by the net.\n" 51 | "Usage: extract_features pretrained_net_param" 52 | " feature_extraction_proto_file extract_feature_blob_name1[,name2,...]" 53 | " save_feature_dataset_name1[,name2,...] num_mini_batches db_type" 54 | " [CPU/GPU] [DEVICE_ID=0]\n" 55 | "Note: you can extract multiple features in one pass by specifying" 56 | " multiple feature blob names and dataset names seperated by ','." 57 | " The names cannot contain white space characters and the number of blobs" 58 | " and datasets must be equal."; 59 | return 1; 60 | } 61 | int arg_pos = num_required_args; 62 | 63 | arg_pos = num_required_args; 64 | if (argc > arg_pos && strcmp(argv[arg_pos], "GPU") == 0) { 65 | LOG(ERROR)<< "Using GPU"; 66 | uint device_id = 0; 67 | if (argc > arg_pos + 1) { 68 | device_id = atoi(argv[arg_pos + 1]); 69 | CHECK_GE(device_id, 0); 70 | } 71 | LOG(ERROR) << "Using Device_id=" << device_id; 72 | Caffe::SetDevice(device_id); 73 | Caffe::set_mode(Caffe::GPU); 74 | } else { 75 | LOG(ERROR) << "Using CPU"; 76 | Caffe::set_mode(Caffe::CPU); 77 | } 78 | 79 | arg_pos = 0; // the name of the executable 80 | string pretrained_binary_proto(argv[++arg_pos]); 81 | 82 | // Expected prototxt contains at least one data layer such as 83 | // the layer data_layer_name and one feature blob such as the 84 | // fc7 top blob to extract features. 85 | /* 86 | layers { 87 | name: "data_layer_name" 88 | type: DATA 89 | data_param { 90 | source: "/path/to/your/images/to/extract/feature/images_leveldb" 91 | mean_file: "/path/to/your/image_mean.binaryproto" 92 | batch_size: 128 93 | crop_size: 227 94 | mirror: false 95 | } 96 | top: "data_blob_name" 97 | top: "label_blob_name" 98 | } 99 | layers { 100 | name: "drop7" 101 | type: DROPOUT 102 | dropout_param { 103 | dropout_ratio: 0.5 104 | } 105 | bottom: "fc7" 106 | top: "fc7" 107 | } 108 | */ 109 | string feature_extraction_proto(argv[++arg_pos]); 110 | shared_ptr > feature_extraction_net( 111 | new Net(feature_extraction_proto, caffe::TEST)); 112 | feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto); 113 | 114 | string extract_feature_blob_names(argv[++arg_pos]); 115 | vector blob_names; 116 | boost::split(blob_names, extract_feature_blob_names, boost::is_any_of(",")); 117 | 118 | string save_feature_dataset_names(argv[++arg_pos]); 119 | vector dataset_names; 120 | boost::split(dataset_names, save_feature_dataset_names, 121 | boost::is_any_of(",")); 122 | CHECK_EQ(blob_names.size(), dataset_names.size()) << 123 | " the number of blob names and dataset names must be equal"; 124 | size_t num_features = blob_names.size(); 125 | 126 | for (size_t i = 0; i < num_features; i++) { 127 | CHECK(feature_extraction_net->has_blob(blob_names[i])) 128 | << "Unknown feature blob name " << blob_names[i] 129 | << " in the network " << feature_extraction_proto; 130 | } 131 | 132 | vector fout; 133 | for(size_t i=0; i < num_features; ++i) { 134 | LOG(ERROR) << "open file " << dataset_names[i]; 135 | fout.push_back(new ofstream(dataset_names[i].c_str(), ios::app)); 136 | } 137 | 138 | int num_mini_batches = atoi(argv[++arg_pos]); 139 | 140 | // std::vector > feature_dbs; 141 | // std::vector > txns; 142 | // const char* db_type = argv[++arg_pos]; 143 | // for (size_t i = 0; i < num_features; ++i) { 144 | // LOG(INFO)<< "Opening dataset " << dataset_names[i]; 145 | // shared_ptr db(db::GetDB(db_type)); 146 | // db->Open(dataset_names.at(i), db::NEW); 147 | // feature_dbs.push_back(db); 148 | // shared_ptr txn(db->NewTransaction()); 149 | // txns.push_back(txn); 150 | // } 151 | 152 | LOG(ERROR)<< "Extacting Features"; 153 | 154 | // Datum datum; 155 | //const int kMaxKeyStrLength = 100; 156 | //char key_str[kMaxKeyStrLength]; 157 | vector*> input_vec; 158 | vector image_indices(num_features, 0); 159 | 160 | for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) { 161 | feature_extraction_net->Forward(input_vec); 162 | for (int i = 0; i < num_features; ++i) { 163 | const shared_ptr > feature_blob = feature_extraction_net 164 | ->blob_by_name(blob_names[i]); 165 | int batch_size = feature_blob->num(); 166 | int dim_features = feature_blob->count() / batch_size; 167 | const Dtype* feature_blob_data; 168 | for (int n = 0; n < batch_size; ++n) { 169 | // datum.set_height(feature_blob->height()); 170 | // datum.set_width(feature_blob->width()); 171 | // datum.set_channels(feature_blob->channels()); 172 | // datum.clear_data(); 173 | // datum.clear_float_data(); 174 | feature_blob_data = feature_blob->cpu_data() + 175 | feature_blob->offset(n); 176 | for (int d = 0; d < dim_features; ++d) { 177 | *(fout[i]) << feature_blob_data[d] << " "; 178 | // datum.add_float_data(feature_blob_data[d]); 179 | } 180 | *(fout[i]) << "\r\n"; 181 | 182 | // int length = snprintf(key_str, kMaxKeyStrLength, "%010d", 183 | // image_indices[i]); 184 | // string out; 185 | // CHECK(datum.SerializeToString(&out)); 186 | // txns.at(i)->Put(std::string(key_str, length), out); 187 | // ++image_indices[i]; 188 | // if (image_indices[i] % 1000 == 0) { 189 | // txns.at(i)->Commit(); 190 | // txns.at(i).reset(feature_dbs.at(i)->NewTransaction()); 191 | // LOG(ERROR)<< "Extracted features of " << image_indices[i] << 192 | // " query images for feature blob " << blob_names[i]; 193 | // } 194 | } // for (int n = 0; n < batch_size; ++n) 195 | } // for (int i = 0; i < num_features; ++i) 196 | } // for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) 197 | // write the last batch 198 | for (int i = 0; i < num_features; ++i) { 199 | fout[i]->close(); 200 | // if (image_indices[i] % 1000 != 0) { 201 | // txns.at(i)->Commit(); 202 | // } 203 | // LOG(ERROR)<< "Extracted features of " << image_indices[i] << 204 | // " query images for feature blob " << blob_names[i]; 205 | // feature_dbs.at(i)->Close(); 206 | } 207 | 208 | LOG(ERROR)<< "Successfully extracted the features!"; 209 | return 0; 210 | } 211 | 212 | --------------------------------------------------------------------------------