├── README.md ├── cpp ├── lfw_rest_API.cpp ├── lfw_rest_API.h ├── lfw_unrest_API.cpp └── lfw_unrest_API.h └── py ├── lfw_rest_API.py └── lfw_unrest_API.py /README.md: -------------------------------------------------------------------------------- 1 | LFW_API 2 | ======= 3 | >**Designer:** Junbo Zhao, Wuhan University, Working in Tsinghua National lab of intelligent images and documents processing. 4 | **Contact:** zhaojunbo1992chasing@gmail.com +86-18672365683 5 | 6 | Introduction 7 | ----------------------------------- 8 | For better using Labeled Faces in the Wild Benchmark, I provide the code of three versions, namely C++, python and Octave. Both **Restricted** and **Unrestricted** configurations are implemented. Files named with "unrest" are written for unrestricted configuration, and those with "rest" aim at restricted configuration. 9 | 10 | C++ version 11 | -------------------------------------- 12 | ### Eigen 13 | Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms. You should install Eigen following the tutorial: http://eigen.tuxfamily.org/dox/GettingStarted.html 14 | Our input as well as output feature matrix are constructed with Eigen::MatrixXd or Eigen::VectorXd. After installing Eigen on your project, you can write matrix as simply as in Matlab! 15 | 16 | ### Get started 17 | Note that you should firstly extract features on the images in LFW, your features should be constructed as a stack of "dict" class, which includes considered each person's name, number of his or her images and extracted feature matrix of the person. You can see how this class constructed in both .cpp files. Furthermore, you can make your feature matrix as row-ordered or column-ordered. The row-ordered matrix means each row represents a feature vector of a specific image, and column-ordered means each column represents the vector. You can change this using the parameter "axis", whose default setting is 0, which means row-ordered setting. 18 | 19 | ### pairs.txt and people.txt 20 | Prepare the .txt files for different configurations of LFW! You can find instructions here: 21 | http://vis-www.cs.umass.edu/lfw/README.txt 22 | 23 | python version 24 | ------------------------------------------------------------ 25 | ### Get started 26 | You can simply make the parameters like what I suggested above with C++ version, but since python provides dictionary and tuple structs, it turns much more gorgeous. The input feature matrix is a dictionary, whose keys are human names and values are feature matrix as numpy.ndarray. Output is a dictionary as well whose keys are "train" and "test", pointing to two tuples which involve feature matrix and their labels. 27 | 28 | Platform 29 | -------------------------------------------------------- 30 | My platform is Ubuntu 12.04 LTS with g++ 4.6.3, Eigen 3.1.4 and python 2.7.5. I propose the code is compatible to other platforms, and if you have some problems compiling or running it, feel free to contact me. 31 | -------------------------------------------------------------------------------- /cpp/lfw_rest_API.cpp: -------------------------------------------------------------------------------- 1 | #include "lfw_rest_API.h" 2 | 3 | bool RestLFW(string filename, int fold, vector features, 4 | MatrixXd& feature_train, MatrixXd& feature_test, 5 | VectorXd& label_train, VectorXd& label_test, int axis){ 6 | // axis: INPUT and OUTPUT features are stored column-ordred or row-ordred. 7 | // And axis = 0 meansrow orded, axis = 1 means column-ordred. 8 | // The default setting is axis = 0. 9 | int i, j, k; 10 | // QA 11 | if (axis == 0) { 12 | for (i = 0; i < features.size(); i++) { 13 | if(features[i].num != features[i].fea.rows()){ 14 | cout<< "Input feature are bad. Examine it carefully" < per_fea_num_train, per_fea_num_test; 53 | for (i = 0; i < Nfold; i++) { 54 | vector per_fea_num_unit; 55 | for (j = 0; j < Npos_neg; j++) { 56 | string pos_line, pos_per, str_num; 57 | int position, num1, num2; 58 | getline(fin, pos_line); 59 | pos_per.assign(pos_line.begin(), pos_line.begin() + pos_line.find_first_of(" ")); 60 | position = pos_line.find_first_not_of(alphabet); 61 | str_num.assign(str_num.begin() + position, str_num.begin() + 62 | str_num.find_first_of(" ", position)); 63 | num1 = atoi(str_num.c_str()); 64 | str_num.assign(str_num.begin() + str_num.find_last_of(" ") + 1, 65 | str_num.end()); 66 | num2 = atoi(str_num.c_str()); 67 | per_fea_num_unit.push_back(samplepair(pos_per, pos_per, num1, num2)); 68 | } 69 | for (j = 0; j < Npos_neg; j++) { 70 | string neg_line, neg_per1, neg_per2, str_num; 71 | int position1, position2, num1, num2; 72 | getline(fin, neg_line); 73 | position1 = neg_line.find_first_of(alphabet_pure, 74 | neg_line.find_first_not_of(alphabet)); 75 | position2 = neg_line.find_first_not_of(alphabet); 76 | neg_per1.assign(neg_line.begin(), neg_line.begin() + 77 | neg_line.find_first_of(" ")); 78 | neg_per2.assign(neg_line.begin() + position1, neg_line.begin() + 79 | neg_line.find_first_of(" ", position1)); 80 | str_num.assign(neg_line.begin() + position2, neg_line.begin() + 81 | neg_line.find_first_of(" ", position2)); 82 | num1 = atoi(str_num.c_str()); 83 | str_num.assign(neg_line.begin() + neg_line.find_first_not_of(alphabet), 84 | neg_line.end()); 85 | num2 = atoi(str_num.c_str()); 86 | per_fea_num_unit.push_back(samplepair(neg_per1, neg_per2, num1, num2)); 87 | } 88 | if (i==fold) { 89 | per_fea_num_test.assign(per_fea_num_unit.begin(), per_fea_num_unit.end()); 90 | } 91 | else{ 92 | per_fea_num_train.insert(per_fea_num_train.end(), 93 | per_fea_num_unit.begin(), 94 | per_fea_num_unit.end()); 95 | } 96 | } 97 | // QA 98 | assert(fin.eof()); 99 | fin.close(); 100 | 101 | vector totalname; 102 | for (i = 0; i < features.size(); i++) { 103 | totalname.push_back(features[i].name); 104 | } 105 | vector::iterator it1, it2; 106 | int it_num1, it_num2; 107 | if (axis == 0) { 108 | feature_train = MatrixXd(per_fea_num_train.size() * 2, 109 | features[0].fea.cols()); 110 | feature_test = MatrixXd(per_fea_num_test.size() * 2, 111 | features[0].fea.cols()); 112 | label_train = VectorXd(per_fea_num_train.size()); 113 | label_test = VectorXd(per_fea_num_test.size()); 114 | for ( i = 0; i < per_fea_num_train.size(); i++) { 115 | it1 = find(totalname.begin(), totalname.end(), per_fea_num_train[i].name1); 116 | it_num1 = it1 - totalname.begin(); 117 | it2 = find(totalname.begin(), totalname.end(), per_fea_num_train[i].name2); 118 | it_num2 = it2 - totalname.begin(); 119 | feature_train.row(2*i) = features[it_num1].fea.row(per_fea_num_train[i].num1); 120 | feature_train.row(2*i+1) = features[it_num2].fea.row(per_fea_num_train[i].num2); 121 | if(per_fea_num_train[i].name1 == per_fea_num_train[i].name2) 122 | label_train[i] = 1; 123 | else 124 | label_train[i] = 0; 125 | } 126 | for ( i = 0; i < per_fea_num_test.size(); i++) { 127 | it1 = find(totalname.begin(), totalname.end(), per_fea_num_test[i].name1); 128 | it_num1 = it1 - totalname.begin(); 129 | it2 = find(totalname.begin(), totalname.end(), per_fea_num_test[i].name2); 130 | it_num2 = it2 - totalname.begin(); 131 | feature_test.row(2*i) = features[it_num1].fea.row(per_fea_num_test[i].num1); 132 | feature_test.row(2*i+1) = features[it_num2].fea.row(per_fea_num_test[i].num2); 133 | if(per_fea_num_test[i].name1 == per_fea_num_test[i].name2) 134 | label_test[i] = 1; 135 | else 136 | label_test[i] = 0; 137 | } 138 | return true; 139 | } 140 | else { 141 | feature_train = MatrixXd(features[0].fea.rows(), 142 | per_fea_num_train.size() * 2) ; 143 | feature_test = MatrixXd(features[0].fea.rows(), 144 | per_fea_num_test.size() * 2) ; 145 | label_train = VectorXd(per_fea_num_train.size()); 146 | label_test = VectorXd(per_fea_num_test.size()); 147 | for ( i = 0; i < per_fea_num_train.size(); i++) { 148 | it1 = find(totalname.begin(), totalname.end(), per_fea_num_train[i].name1); 149 | it_num1 = it1 - totalname.begin(); 150 | it2 = find(totalname.begin(), totalname.end(), per_fea_num_train[i].name2); 151 | it_num2 = it2 - totalname.begin(); 152 | feature_train.col(2*i) = features[it_num1].fea.col(per_fea_num_train[i].num1); 153 | feature_train.col(2*i+1) = features[it_num2].fea.col(per_fea_num_train[i].num2); 154 | if(per_fea_num_train[i].name1 == per_fea_num_train[i].name2) 155 | label_train[i] = 1; 156 | else 157 | label_train[i] = 0; 158 | } 159 | for ( i = 0; i < per_fea_num_test.size(); i++) { 160 | it1 = find(totalname.begin(), totalname.end(), per_fea_num_test[i].name1); 161 | it_num1 = it1 - totalname.begin(); 162 | it2 = find(totalname.begin(), totalname.end(), per_fea_num_test[i].name2); 163 | it_num2 = it2 - totalname.begin(); 164 | feature_test.col(2*i) = features[it_num1].fea.col(per_fea_num_test[i].num1); 165 | feature_test.col(2*i+1) = features[it_num2].fea.col(per_fea_num_test[i].num2); 166 | if(per_fea_num_test[i].name1 == per_fea_num_test[i].name2) 167 | label_test[i] = 1; 168 | else 169 | label_test[i] = 0; 170 | } 171 | return true; 172 | } 173 | 174 | } 175 | -------------------------------------------------------------------------------- /cpp/lfw_rest_API.h: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | * * Designed by Junbo Zhao, working in Center for Intelligent Images and 3 | * * Document Processing Lab, Tsinghua University. 4 | * * Better using Labeled Faces in the Wild (LFW). 5 | * * ***********************************************************************/ 6 | 7 | #ifndef _LFW_REST_API_H_ 8 | #define _LFW_REST_API_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | using namespace Eigen; 19 | 20 | class dict{ 21 | public: 22 | string name; 23 | int num; 24 | MatrixXd fea; 25 | dict(string na, int nu){ 26 | name = na; 27 | num = nu; 28 | } 29 | dict(string na, int nu, MatrixXd fe){ 30 | name = na; 31 | num = nu; 32 | fea = fe; 33 | } 34 | }; 35 | 36 | class samplepair{ 37 | public: 38 | string name1; 39 | string name2; 40 | int num1; 41 | int num2; 42 | samplepair(string na1, string na2){ 43 | name1 = na1; 44 | name2 = na2; 45 | } 46 | samplepair(string na1, string na2, 47 | int nu1, int nu2){ 48 | name1 = na1; 49 | name2 = na2; 50 | num1 = nu1; 51 | num2 = nu2; 52 | } 53 | }; 54 | 55 | string alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_ "); 56 | string alphabet_pure("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); 57 | 58 | bool RestLFW(string filename, int fold, vector features, 59 | MatrixXd& feature_train, MatrixXd& feature_test, 60 | VectorXd& label_train, VectorXd& label_test, int axis=0); 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /cpp/lfw_unrest_API.cpp: -------------------------------------------------------------------------------- 1 | #include "lfw_unrest_API.h" 2 | 3 | bool UnRestLFW(string filename, int fold, vector features, 4 | MatrixXd& feature_train, MatrixXd& feature_test, 5 | VectorXd& label_train, VectorXd& label_test, int axis){ 6 | // axis: INPUT and OUTPUT features are stored column-ordred or row-ordred. 7 | // And axis = 0 meansrow orded, axis = 1 means column-ordred. 8 | // The default setting is axis = 0. 9 | int i, j, k; 10 | // QA 11 | if (axis == 0) { 12 | for (i = 0; i < features.size(); i++) { 13 | if(features[i].num != features[i].fea.rows()){ 14 | cout<< "Input feature are bad. Examine it carefully" < per_fea_num_train; 47 | vector per_fea_num_test; 48 | for( i = 0; i < Nfold; i++){ 49 | vector per_fea_num_unit; 50 | string str_person_num; 51 | int person_num; 52 | vector sample_num; 53 | getline(fin, str_person_num); 54 | person_num = atoi(str_person_num.c_str()); 55 | vector person; 56 | for ( j = 0; j < person_num; j++) { 57 | string line; 58 | string per, str_num; 59 | int num; 60 | getline(fin, line); 61 | per.assign(line.begin(), line.begin() + line.find_first_of(" ")); 62 | str_num.assign(line.begin() + line.find_last_of(" ") + 1, line.end()); 63 | num = atoi(str_num.c_str()); 64 | person.push_back(per); 65 | sample_num.push_back(num); 66 | } 67 | assert(sample_num.size() == person.size()); // QA 68 | for ( k = 0; k < person.size(); k++) { 69 | per_fea_num_unit.push_back(dict(person[k], sample_num[k])); 70 | } 71 | if (i == fold) { 72 | per_fea_num_test.assign(per_fea_num_unit.begin(), per_fea_num_unit.end()); 73 | } 74 | else{ 75 | per_fea_num_train.insert(per_fea_num_train.end(), 76 | per_fea_num_unit.begin(), 77 | per_fea_num_unit.end()); 78 | } 79 | } 80 | // QA 81 | assert(fin.eof()); 82 | fin.close(); 83 | 84 | vector totalname; 85 | for (i = 0; i < features.size(); i++) { 86 | totalname.push_back(features[i].name); 87 | } 88 | vector::iterator it; 89 | int it_num; 90 | if (axis == 0) { 91 | feature_train = MatrixXd(sum_sample_num(per_fea_num_train), 92 | features[0].fea.cols()); 93 | feature_test = MatrixXd(sum_sample_num(per_fea_num_test), 94 | features[0].fea.cols()); 95 | label_train = VectorXd(sum_sample_num(per_fea_num_train)); 96 | label_test = VectorXd(sum_sample_num(per_fea_num_test)); 97 | k = 0; 98 | for ( i = 0; i < per_fea_num_train.size(); i++) { 99 | it = find(totalname.begin(), totalname.end(), per_fea_num_train[i].name); 100 | it_num = it - totalname.begin(); 101 | for ( j = 0; j < per_fea_num_train[i].num; j++) { 102 | feature_train.row(k) = features[it_num].fea.row(j); 103 | label_train(k) = i; 104 | k++; 105 | } 106 | } 107 | k = 0; 108 | for ( i = 0; i < per_fea_num_test.size(); i++) { 109 | it = find(totalname.begin(), totalname.end(), per_fea_num_test[i].name); 110 | it_num = it - totalname.begin(); 111 | for ( j = 0; j < per_fea_num_test[i].num; j++) { 112 | feature_test.row(k) = features[it_num].fea.row(j); 113 | label_test(k) = i; 114 | k++; 115 | } 116 | } 117 | return true; 118 | } 119 | 120 | else{ 121 | feature_train = MatrixXd(features[0].fea.rows(), 122 | sum_sample_num(per_fea_num_train)); 123 | feature_test = MatrixXd(features[0].fea.rows(), 124 | sum_sample_num(per_fea_num_test)); 125 | label_train = VectorXd(sum_sample_num(per_fea_num_train)); 126 | label_test = VectorXd(sum_sample_num(per_fea_num_test)); 127 | k = 0; 128 | for ( i = 0; i < per_fea_num_train.size(); i++) { 129 | it = find(totalname.begin(), totalname.end(), per_fea_num_train[i].name); 130 | it_num = it - totalname.begin(); 131 | for ( j = 0; j < per_fea_num_train[i].num; j++) { 132 | feature_train.col(k) = features[it_num].fea.col(j); 133 | label_train(k) = i; 134 | k++; 135 | } 136 | } 137 | k = 0; 138 | for ( i = 0; i < per_fea_num_test.size(); i++) { 139 | it = find(totalname.begin(), totalname.end(), per_fea_num_test[i].name); 140 | it_num = it - totalname.begin(); 141 | for ( j = 0; j < per_fea_num_test[i].num; j++) { 142 | feature_test.col(k) = features[it_num].fea.col(j); 143 | label_test(k) = i; 144 | k++; 145 | } 146 | } 147 | return true; 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /cpp/lfw_unrest_API.h: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | * * Designed by Junbo Zhao, working in Center for Intelligent Images and 3 | * * Document Processing Lab, Tsinghua University. 4 | * * Better using Labeled Faces in the Wild (LFW). 5 | * * ***********************************************************************/ 6 | 7 | #ifndef _LFW_UNREST_API_H_ 8 | #define _LFW_UNREST_API_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | using namespace Eigen; 19 | 20 | class dict{ 21 | public: 22 | string name; 23 | int num; 24 | MatrixXd fea; 25 | dict(string na, int nu){ 26 | name = na; 27 | num = nu; 28 | } 29 | dict(string na, int nu, MatrixXd fe){ 30 | name = na; 31 | num = nu; 32 | fea = fe; 33 | } 34 | }; 35 | 36 | int sum_sample_num(vector fea){ 37 | int i, sum=0; 38 | for(i = 0; i < fea.size(); i++) { 39 | sum += fea[i].num; 40 | } 41 | } 42 | 43 | bool UnRestLFW(string filename, int fold, vector features, 44 | MatrixXd& feature_train, MatrixXd& feature_test, 45 | VectorXd& label_train, VectorXd& label_test, int axis=0); 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /py/lfw_rest_API.py: -------------------------------------------------------------------------------- 1 | # Designed by Junbo Zhao, * * Designed by Junbo Zhao, 2 | # working in Center for Intelligent Images and Document 3 | # Processing Lab, Tsinghua University. 4 | 5 | import os 6 | import sys 7 | import numpy as np 8 | # Note that this script can be used as an API for processing 9 | # the Labeled Faces in Wild benchmark. 10 | # Regularly building cross-validation training and testing dataset. 11 | # 10 Fold 12 | 13 | 14 | def restricted_lfw_API(filename, fold, features, axis=0): 15 | # axis: INPUT and OUTPUT features are stored column-ordred or row-ordred. 16 | # And axis = 0 meansrow orded, axis = 1 means column-ordred. 17 | # The default setting is axis = 0. 18 | # Read the whole dataset 19 | if not isinstance(features, dict): 20 | print "The featuers (para NO.3) must be a dictionry data, whose keys\ 21 | indicate the people's name and values are extracted\ 22 | feature vectors" 23 | return False 24 | if not os.path.isfile(filename): 25 | return False 26 | fid = file(filename) 27 | Nfold = int(fid.readline().split()[0]) 28 | Npos_neg = int(fid.readline().split()[1]) 29 | per_fea_num = [dict()] * Nfold 30 | per_fea_num_train = dict() 31 | per_fea_num_test = dict() 32 | for i in range(Nfold): 33 | pos_pair = [] 34 | neg_pair = [] 35 | pos_sample_num = [] 36 | neg_sample_num = [] 37 | for j in range(Npos_neg): 38 | pos_line = fid.readline() 39 | pos_line = pos_line.split() 40 | pos_pair.append(pos_line[0]) 41 | pos_sample_num.append([int(pos_line[1], int(pos_line[2]))]) 42 | for j in range(Npos_neg): 43 | neg_line = fid.readline() 44 | neg_line = neg_line.split() 45 | neg_pair.append([neg_line[0], neg_line[2]]) 46 | neg_sample_num.append([int(neg_line[1], int(neg_line[3]))]) 47 | per_fea_num[i] = dict(zip(pos_pair, pos_sample_num)).items() + \ 48 | dict(zip(neg_pair, neg_sample_num)).items() 49 | if i is fold: 50 | per_fea_num_test = per_fea_num[i] 51 | else: 52 | per_fea_num_train.update(per_fea_num[i]) 53 | 54 | # QA, to make sure file is completed. 55 | assert(len(fid.readline()) == 0) 56 | fid.close() 57 | 58 | if axis is 0: 59 | feature_train = np.zeros((1, features.shape[1])) 60 | feature_test = np.zeros((1, features.shape[1])) 61 | label_train = [] 62 | label_test = [] 63 | for per in per_fea_num_train.keys(): 64 | if len(per) is 1: 65 | feature_train = np.hstack( 66 | feature_train, 67 | features[per][per_fea_num_train.get(per), :]) 68 | if len(per) is 2: 69 | feature_train = np.hstack( 70 | feature_train, 71 | features[per[0]][per_fea_num_train.get(per)[0], :]) 72 | feature_train = np.hstack( 73 | feature_train, 74 | features[per[1]][per_fea_num_train.get(per)[1], :]) 75 | else: 76 | sys.exit(-1) 77 | feature_train = np.delete(feature_train, 0) 78 | label_train = [1] * Npos_neg + [-1] * Npos_neg 79 | 80 | for per in per_fea_num_test.keys(): 81 | if len(per) is 1: 82 | feature_test = np.hstack( 83 | feature_test, 84 | features[per][per_fea_num_test.get(per), :]) 85 | if len(per) is 2: 86 | feature_test = np.hstack( 87 | feature_test, 88 | features[per[0]][per_fea_num_test.get(per)[0], :]) 89 | feature_test = np.hstack( 90 | feature_test, 91 | features[per[1]][per_fea_num_test.get(per)[1], :]) 92 | else: 93 | sys.exit(-1) 94 | feature_test = np.delete(feature_test, 0) 95 | label_test = [1] * Npos_neg + [-1] * Npos_neg 96 | return dict(zip(["train", "test"], [(feature_train, label_train), 97 | [feature_test, label_test]])) 98 | 99 | elif axis is 1: 100 | feature_train = np.zeros((features.shape[1], 1)) 101 | feature_test = np.zeros((features.shape[1], 1)) 102 | label_train = [] 103 | label_test = [] 104 | for per in per_fea_num_train.keys(): 105 | if len(per) is 1: 106 | feature_train = np.vstack( 107 | feature_train, 108 | features[per][:, per_fea_num_train.get(per)]) 109 | if len(per) is 2: 110 | feature_train = np.vstack( 111 | feature_train, 112 | features[per[0]][:, per_fea_num_train.get(per)[0]]) 113 | feature_train = np.hstack( 114 | feature_train, 115 | features[per[1]][:, per_fea_num_train.get(per)[1]]) 116 | else: 117 | sys.exit(-1) 118 | feature_train = np.delete(feature_train, 0, 1) 119 | label_train = [1] * Npos_neg + [-1] * Npos_neg 120 | 121 | for per in per_fea_num_test.keys(): 122 | if len(per) is 1: 123 | feature_test = np.vstack( 124 | feature_test, 125 | features[per][:, per_fea_num_test.get(per)]) 126 | if len(per) is 2: 127 | feature_test = np.vstack( 128 | feature_test, 129 | features[per[0]][:, per_fea_num_test.get(per)[0]]) 130 | feature_test = np.hstack( 131 | feature_test, 132 | features[per[1]][:, per_fea_num_test.get(per)[1]]) 133 | else: 134 | sys.exit(-1) 135 | feature_test = np.delete(feature_test, 0, 1) 136 | label_test = [1] * Npos_neg + [-1] * Npos_neg 137 | return dict(zip(["train", "test"], [(feature_train, label_train), 138 | [feature_test, label_test]])) 139 | else: 140 | print "The input parameter 'axis' is limited to 0 and 1" 141 | return False 142 | -------------------------------------------------------------------------------- /py/lfw_unrest_API.py: -------------------------------------------------------------------------------- 1 | # Designed by Junbo Zhao, * * Designed by Junbo Zhao, 2 | # working in Center for Intelligent Images and Document 3 | # Processing Lab, Tsinghua University. 4 | 5 | import os 6 | import numpy as np 7 | # Note that this script can be used as an API for processing 8 | # the Labeled Faces in Wild benchmark. 9 | # Regularly building cross-validation training and testing dataset. 10 | # 10 Fold 11 | 12 | 13 | def unrestricted_lfw_API(filename, fold, features, axis=0): 14 | # axis: INPUT and OUTPUT features are stored column-ordred or row-ordred. 15 | # And axis = 0 meansrow orded, axis = 1 means column-ordred. 16 | # The default setting is axis = 0. 17 | # Read the whole dataset 18 | if not isinstance(features, dict): 19 | print "The features (para NO.3) must be a dictionry data, whose keys\ 20 | indicate the people's name and values are extracted\ 21 | feature vectors" 22 | return False 23 | if not os.path.isfile(filename): 24 | return False 25 | fid = file(filename) 26 | Nfold = int(fid.readline()) 27 | per_fea_num = [dict()] * Nfold 28 | per_fea_num_train = dict() 29 | per_fea_num_test = dict() 30 | for i in range(Nfold): 31 | person_num = int(fid.readline()) 32 | person = [] 33 | sample_num = [] 34 | for j in range(person_num): 35 | line = fid.readline() 36 | line = line.split() 37 | person.append(line[0]) 38 | sample_num.append(int(line[1])) 39 | per_fea_num[i] = dict(zip(person, sample_num)) 40 | if i is fold: 41 | per_fea_num_test = per_fea_num[i] 42 | else: 43 | per_fea_num_train.update(per_fea_num[i]) 44 | 45 | # QA, to make sure file is completed. 46 | assert(len(fid.readline()) == 0) 47 | fid.close() 48 | 49 | if axis is 0: 50 | feature_train = np.zeros((1, features.shape[1])) 51 | feature_test = np.zeros((1, features.shape[1])) 52 | label_train = [] 53 | label_test = [] 54 | for per in per_fea_num_train.keys(): 55 | feature_train = np.hstack(feature_train, features[per]) 56 | label_train = [per_fea_num_train.key().index(per)] * \ 57 | per_fea_num_train.get(per) 58 | # QA 59 | assert(per_fea_num_train.get(per) == features[per].shape[0]) 60 | feature_train = np.delete(feature_train, 0) 61 | label_train = np.array(label_train) 62 | 63 | for per in per_fea_num_test.get(per): 64 | feature_test = np.hstack(feature_test, features[per]) 65 | label_test = [per_fea_num_test.key().index(per)] * \ 66 | per_fea_num_test.get(per) 67 | # QA 68 | assert(per_fea_num_test.get(per) == features[per].shape[0]) 69 | feature_test = np.delete(feature_test, 0) 70 | label_test = np.array(label_test) 71 | return dict(zip(["train", "test"], [(feature_train, label_train), 72 | [feature_test, label_test]])) 73 | 74 | elif axis is 1: 75 | feature_train = np.zeros((features.shape[1], 1)) 76 | feature_test = np.zeros((features.shape[1], 1)) 77 | label_train = [] 78 | label_test = [] 79 | for per in per_fea_num_train.keys(): 80 | feature_train = np.vstack(feature_train, features[per]) 81 | label_train = [per_fea_num_train.key().index(per)] * \ 82 | per_fea_num_train.get(per) 83 | # QA 84 | assert(per_fea_num_train.get(per) == features[per].shape[1]) 85 | feature_train = np.delete(feature_train, 0, 1) 86 | label_train = np.array(label_train) 87 | 88 | for per in per_fea_num_test.get(per): 89 | feature_test = np.vstack(feature_test, features[per]) 90 | label_test = [per_fea_num_test.key().index(per)] * \ 91 | per_fea_num_test.get(per) 92 | # QA 93 | assert(per_fea_num_test.get(per) == features[per].shape[1]) 94 | feature_test = np.delete(feature_test, 0, 1) 95 | label_test = np.array(label_test) 96 | return dict(zip(["train", "test"], [(feature_train, label_train), 97 | [feature_test, label_test]])) 98 | 99 | else: 100 | print "The input parameter 'axis' is limited to 0 and 1" 101 | return False 102 | --------------------------------------------------------------------------------