├── DCG_scorer.cpp ├── DCG_scorer.h ├── DataType.h ├── Evaluator.cpp ├── Evaluator.h ├── Layer.cpp ├── Layer.h ├── MetricScorer.cpp ├── MetricScorer.h ├── Neuron.cpp ├── Neuron.h ├── PropParameter.cpp ├── PropParameter.h ├── RankNet.cpp ├── RankNet.h ├── Ranker.cpp ├── Ranker.h ├── Synapse.cpp ├── Synapse.h ├── TransferFunction.cpp ├── TransferFunction.h ├── Util.cpp ├── Util.h └── main.cpp /DCG_scorer.cpp: -------------------------------------------------------------------------------- 1 | #include "DCG_scorer.h" 2 | #include "math.h" 3 | 4 | double DCG_scorer::score(RankList* rl) 5 | { 6 | vector rel; 7 | int m = rl->getDatapointsCnt(); 8 | for (int i = 0; i < m; i++) 9 | { 10 | rel.push_back(rl->getiDataPoint(i)->getLabel()); 11 | } 12 | 13 | if (rel.size() == 0) 14 | return -1.0; 15 | 16 | return get_dcg(rel, k); 17 | } 18 | 19 | double DCG_scorer::get_dcg( vector &v, int &k) 20 | { 21 | int kk = k; 22 | if (k > (int)v.size() || k <= 0) 23 | kk = (int)v.size(); 24 | 25 | double dcg = 0.0; 26 | for (int i = 1 ; i <= kk; i++) 27 | { 28 | dcg += (pow(2.0, v.at(i-1)) - 1.0) / (log(i+1)/log(2.0)); 29 | } 30 | return dcg; 31 | } 32 | 33 | void DCG_scorer::swapChange(RankList* rl, double** &changes) 34 | { 35 | int m = rl->getDatapointsCnt(); 36 | int size = (m > k) ? k : m; 37 | for (int i = 0; i < size; i++) 38 | { 39 | int p1 = i + 1; 40 | for (int j = i + 1; j < m; j++) 41 | { 42 | int p2 = j + 1; 43 | changes[j][i] = changes[i][j] = (1.0 / (log(p1+1)/log(2)) - 1.0 / (log(p2+1)/log(2))) \ 44 | * (pow(2.0, rl->getiDataPoint(i)->getLabel()) - pow(2.0, rl->getiDataPoint(j)->getLabel())); 45 | } 46 | } 47 | } 48 | 49 | string DCG_scorer::name() 50 | { 51 | return "DCG@" + k; 52 | 53 | } 54 | -------------------------------------------------------------------------------- /DCG_scorer.h: -------------------------------------------------------------------------------- 1 | #ifndef DCG_SCORER_H 2 | #define DCG_SCORER_H 3 | #include "MetricScorer.h" 4 | 5 | 6 | class DCG_scorer : public MetricScorer 7 | { 8 | public: 9 | DCG_scorer() : MetricScorer() 10 | { 11 | 12 | }; 13 | virtual ~DCG_scorer(){} 14 | 15 | virtual double score(RankList *rl); 16 | virtual string name(); 17 | virtual void swapChange(RankList* rl, double** &changes); 18 | 19 | double get_dcg(vector &v, int &k); 20 | protected: 21 | private: 22 | }; 23 | 24 | #endif // DCG_SCORER_H 25 | -------------------------------------------------------------------------------- /DataType.h: -------------------------------------------------------------------------------- 1 | #ifndef DATA_TYPE_H 2 | #define DATA_TYPE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "Util.h" 16 | 17 | using namespace std; 18 | 19 | class DataPoint 20 | { 21 | public: 22 | DataPoint(int max_feature_cnt) 23 | { 24 | features_ = new float[max_feature_cnt + 1]; 25 | for (int i = 0; i != max_feature_cnt; i++) 26 | features_[i] = FLT_MIN; 27 | max_feature_cnt_ = max_feature_cnt; 28 | } 29 | virtual ~DataPoint() 30 | { 31 | delete features_; 32 | } 33 | 34 | 35 | float label_; 36 | int queryid_; 37 | float cached_; 38 | double model_score; 39 | float *features_; 40 | int max_feature_cnt_; 41 | 42 | public: 43 | void setLabel(float label) 44 | { 45 | label_ = label; 46 | } 47 | 48 | float getLabel() 49 | { 50 | return label_; 51 | } 52 | 53 | void setQueryid(int queryid) 54 | { 55 | queryid_ = queryid; 56 | } 57 | 58 | int getQueryid() 59 | { 60 | return queryid_; 61 | } 62 | 63 | void setCached(float cached) 64 | { 65 | cached_ = cached; 66 | } 67 | 68 | void setModelscore(float cached) 69 | { 70 | model_score = cached; 71 | } 72 | 73 | float getiFeature(int i) 74 | { 75 | if (i >= 0 || i <= max_feature_cnt_) 76 | { 77 | return features_[i]; 78 | } 79 | return FLT_MIN; 80 | } 81 | 82 | void setiFeature(int i, float fval) 83 | { 84 | features_[i] = fval; 85 | } 86 | 87 | 88 | }; 89 | 90 | 91 | 92 | class RankList 93 | { 94 | public: 95 | RankList() 96 | { 97 | dp_cnt_ = 0; 98 | rl_ = new vector (); 99 | } 100 | 101 | virtual ~RankList() 102 | { 103 | // cout << "close RankList\n"; 104 | for (vector::iterator it = rl_->begin(); 105 | it != rl_->end(); 106 | it++) 107 | delete *it; 108 | delete rl_; 109 | } 110 | 111 | void add(DataPoint* dp) 112 | { 113 | rl_->push_back(dp); 114 | dp_cnt_++; 115 | } 116 | 117 | int getDatapointsCnt() 118 | { 119 | return dp_cnt_; 120 | } 121 | 122 | DataPoint* getiDataPoint(int i) 123 | { 124 | return (*rl_)[i]; 125 | } 126 | 127 | static bool labelCmp(const DataPoint* dp1, const DataPoint* dp2) 128 | { 129 | return dp1->label_ < dp2->label_; 130 | } 131 | 132 | void getCorrectRanking(vector &dpv) 133 | { 134 | dpv.assign(rl_->begin(), rl_->end()); 135 | std::sort(dpv.begin(), dpv.end(), labelCmp); 136 | } 137 | 138 | static bool modelScoreCmp(const DataPoint* m1, const DataPoint* m2) 139 | { 140 | return m2->model_score < m1->model_score; 141 | } 142 | 143 | void sortByModelscore() 144 | { 145 | std::sort(rl_->begin(), rl_->end(), modelScoreCmp); 146 | } 147 | 148 | vector *rl_; 149 | int dp_cnt_; // datapoint个数 150 | }; 151 | 152 | class DataSet 153 | { 154 | public: 155 | 156 | DataSet() 157 | { 158 | total_query_cnt = 100000; 159 | rls = (RankList**) malloc(100000 * sizeof(RankList*)); 160 | memset(rls, 0, 100000 * sizeof(RankList*)); 161 | } 162 | 163 | DataSet(char* datafile_) 164 | { 165 | max_feature_cnt = 0; 166 | total_query_cnt = 0; 167 | total_doc_cnt = 0; 168 | datafile = datafile_; 169 | } 170 | 171 | void add(int i, RankList* rl) 172 | { 173 | rls[i] = rl; 174 | } 175 | 176 | ~DataSet() 177 | { 178 | //cout << "start close dataset\n"; 179 | for (int i = 1; i <= total_query_cnt; i++) 180 | { 181 | if (rls[i]) 182 | delete rls[i]; 183 | } 184 | delete rls; 185 | //cout << "finish close dataset\n"; 186 | } 187 | 188 | int max_feature_cnt; 189 | int total_query_cnt; 190 | int total_doc_cnt; 191 | char *datafile; 192 | RankList **rls; 193 | 194 | 195 | RankList* getithRankList(int i) 196 | { 197 | return rls[i]; 198 | } 199 | void printDataSetInfo() 200 | { 201 | cout << "max_feature_cnt: " << max_feature_cnt << endl; 202 | cout << "total_query_cnt: " << total_query_cnt << endl; 203 | cout << "total_doc_cnt: " << total_doc_cnt << endl; 204 | } 205 | 206 | void init() 207 | { 208 | getDatasetInfo(); 209 | open(); 210 | load(); 211 | cout << "finish init" << endl; 212 | printDataSetInfo(); 213 | } 214 | 215 | void open() 216 | { 217 | rls = (RankList**) malloc((total_query_cnt+1) * sizeof(RankList*)); 218 | if (rls == NULL) 219 | { 220 | fprintf(stderr, "out of memory\n"); 221 | return; 222 | } 223 | memset(rls, 0, (total_query_cnt+1) * sizeof(RankList*)); 224 | } 225 | 226 | /* void close() 227 | { 228 | cout << "close dataset\n"; 229 | for (int i = 0; i < total_query_cnt; i++) 230 | { 231 | delete rls[i]; 232 | } 233 | delete rls; 234 | } 235 | */ 236 | void getDatasetInfo() 237 | { 238 | ifstream in(datafile); 239 | if (!in) 240 | { 241 | cerr << "cannot open " << string(datafile) << endl; 242 | return; 243 | } 244 | string line; 245 | max_feature_cnt = 0; 246 | total_query_cnt = 0; 247 | total_doc_cnt = 0; 248 | float max_label = -1; 249 | float min_label = 1; 250 | int pre_qid = -1; 251 | while(getline(in, line)) 252 | { 253 | total_doc_cnt++; 254 | //0 qid:1 1:1 3:3 # 255 | vector tokens; 256 | string::size_type pos = line.find("#"); 257 | string str; 258 | if (pos != string::npos) 259 | str = line.substr(0, pos); 260 | else 261 | str = line; 262 | splitString(str, tokens, " "); 263 | float label = atof(tokens[0].c_str()); 264 | if (label > max_label) max_label = label; 265 | if (label < min_label) min_label = label; 266 | 267 | int qid = atoi(tokens[1].substr(tokens[1].find(":")+1).c_str()); 268 | if (qid != pre_qid) 269 | { 270 | total_query_cnt++; 271 | } 272 | pre_qid = qid; 273 | 274 | for (size_t i = 2; i != tokens.size(); i++) 275 | { 276 | int featureid = atoi(tokens[i].substr(0, tokens[i].find(":")+1).c_str()); 277 | if (featureid > max_feature_cnt) 278 | max_feature_cnt = featureid; 279 | } 280 | } 281 | 282 | in.close(); 283 | } 284 | 285 | void load() 286 | { 287 | ifstream in(datafile); 288 | if (!in) 289 | { 290 | cerr << "cannot open " << string(datafile) << endl; 291 | return; 292 | } 293 | string line; 294 | int pre_qid = -1; 295 | int qid = -1; 296 | RankList *rl = new RankList(); 297 | while(getline(in, line)) 298 | { 299 | //cout << "line:" << line << endl; 300 | //0 qid:1 1:1 3:3 # 301 | DataPoint *dp = new DataPoint(max_feature_cnt); 302 | 303 | vector tokens; 304 | string::size_type pos = line.find("#"); 305 | string str; 306 | if (pos != string::npos) 307 | str = line.substr(0, pos); 308 | else 309 | str = line; 310 | splitString(str, tokens, " "); 311 | float label = atof(tokens[0].c_str()); 312 | qid = atoi(tokens[1].substr(tokens[1].find(":")+1).c_str()); 313 | dp->setLabel(label); 314 | dp->setQueryid(qid); 315 | 316 | if (qid != pre_qid && pre_qid != -1) 317 | { 318 | rls[pre_qid] = rl; 319 | rl = new RankList(); 320 | } 321 | pre_qid = qid; 322 | 323 | for (size_t i = 2; i != tokens.size(); i++) 324 | { 325 | int featureid = atoi(tokens[i].substr(0, tokens[i].find(":")).c_str()); 326 | float val = atof(tokens[i].substr(tokens[i].find(":")+1).c_str()); 327 | dp->setiFeature(featureid, val); 328 | } 329 | rl->add(dp); 330 | } 331 | rls[qid] = rl; 332 | cout << "finish load data" << endl; 333 | in.close(); 334 | } 335 | }; 336 | 337 | 338 | #endif 339 | -------------------------------------------------------------------------------- /Evaluator.cpp: -------------------------------------------------------------------------------- 1 | #include "Evaluator.h" 2 | 3 | Evaluator::Evaluator() 4 | { 5 | //ctor 6 | } 7 | 8 | Evaluator::~Evaluator() 9 | { 10 | //dtor 11 | } 12 | 13 | void Evaluator::getFeatureIDFromFile(vector &featureids, map &featureNameIdMap ) 14 | { 15 | if(strlen(featurefile) == 0) 16 | return ; 17 | 18 | ifstream in(featurefile); 19 | if (!in) 20 | { 21 | cerr << "cannot open " << string(featurefile) << endl; 22 | return; 23 | } 24 | string line; 25 | while(getline(in, line)) 26 | { 27 | vector tokens; 28 | splitString(line, tokens, "\t"); 29 | int fid = atoi(tokens[0].c_str()); 30 | string featurename = "NONE"; 31 | 32 | if (tokens.size() >= 2) 33 | featurename = tokens[1]; 34 | featureNameIdMap.insert(make_pair(fid, featurename)); 35 | featureids.push_back(fid); 36 | } 37 | } 38 | 39 | void Evaluator::evaluate() 40 | { 41 | DataSet* trainSet = new DataSet(trainfile); 42 | DataSet* testSet = NULL; 43 | if (strlen(testfile) != 0) 44 | testSet = new DataSet(testfile); 45 | DataSet* validateSet= NULL; 46 | if (strlen(validatefile) != 0) 47 | validateSet = new DataSet(validatefile); 48 | 49 | trainSet->init(); 50 | testSet->init(); 51 | validateSet->init(); 52 | 53 | vector *featureids = new vector(); 54 | map featureNameIdMap; 55 | getFeatureIDFromFile(*featureids, featureNameIdMap); 56 | 57 | rank = new RankNet(); 58 | rank->setTrainMetricScorer(trainScorer); 59 | rank->setTestMetricScorer(testScorer); 60 | rank->setTrainDataSet(trainSet); 61 | rank->setTestDataSet(testSet); 62 | rank->setValidateDataSet(validateSet); 63 | rank->setFeaturs(featureids); 64 | 65 | rank->init(); 66 | rank->learn(); 67 | } 68 | -------------------------------------------------------------------------------- /Evaluator.h: -------------------------------------------------------------------------------- 1 | #ifndef EVALUATOR_H 2 | #define EVALUATOR_H 3 | 4 | #include "MetricScorer.h" 5 | #include "Ranker.h" 6 | #include "RankNet.h" 7 | 8 | class Evaluator 9 | { 10 | public: 11 | Evaluator(); 12 | virtual ~Evaluator(); 13 | 14 | char *trainfile; 15 | char *testfile; 16 | char *validatefile; 17 | char *featurefile; 18 | char *modelfile; 19 | 20 | MetricScorer *trainScorer; 21 | MetricScorer *testScorer; 22 | Ranker *rank; 23 | 24 | void init(); 25 | void getFeatureIDFromFile(vector &featureids, map &featureNameIdMap); 26 | void evaluate(); 27 | void getFeatureIDFromFile( ); 28 | 29 | protected: 30 | private: 31 | }; 32 | 33 | #endif // EVALUATOR_H 34 | -------------------------------------------------------------------------------- /Layer.cpp: -------------------------------------------------------------------------------- 1 | #include "Layer.h" 2 | -------------------------------------------------------------------------------- /Layer.h: -------------------------------------------------------------------------------- 1 | #ifndef LAYER_H 2 | #define LAYER_H 3 | #include 4 | #include 5 | using std::vector; 6 | using std::cout; 7 | #include "Neuron.h" 8 | 9 | class Layer 10 | { 11 | public: 12 | Layer() { } 13 | Layer(int n) 14 | { 15 | size_ = n; 16 | neurons = new vector(); 17 | for (int i = 0; i < n; i++) 18 | neurons->push_back(new Neuron()); 19 | } 20 | virtual ~Layer() 21 | { 22 | if (neurons) 23 | { 24 | for (std::size_t i = 0; i < neurons->size(); i++) 25 | if(neurons->at(i)) 26 | delete neurons->at(i); 27 | } 28 | } 29 | 30 | 31 | vector *neurons; 32 | int size_; 33 | 34 | Neuron* get(int k) 35 | { 36 | return neurons->at(k); 37 | } 38 | 39 | int size() 40 | { 41 | return size_; 42 | } 43 | // 44 | void computerDeltaForOutputlayer(PropParameter *param) 45 | { 46 | for (std::size_t i = 0; i < neurons->size(); i++) 47 | { 48 | neurons->at(i)->computerDeltaOfOuputLayer(param); 49 | } 50 | } 51 | 52 | void updateDelta(PropParameter *param) 53 | { 54 | for (std::size_t i = 0; i < neurons->size(); i++) 55 | neurons->at(i)->updateDelta(param); 56 | } 57 | 58 | void updateWeight(PropParameter *param) 59 | { 60 | for (std::size_t i = 0; i < neurons->size(); i++) 61 | neurons->at(i)->updateWeight(param); 62 | } 63 | 64 | void computeOutput() 65 | { 66 | for (vector::iterator it = neurons->begin(); it != neurons->end(); it++) 67 | { 68 | (*it)->computeOutput(); 69 | } 70 | } 71 | 72 | void computeOutput(int i) 73 | { 74 | for (vector::iterator it = neurons->begin(); it != neurons->end(); it++) 75 | { 76 | (*it)->computeOutput(i); 77 | } 78 | } 79 | protected: 80 | private: 81 | }; 82 | 83 | #endif // LAYER_H 84 | -------------------------------------------------------------------------------- /MetricScorer.cpp: -------------------------------------------------------------------------------- 1 | #include "MetricScorer.h" 2 | 3 | MetricScorer::MetricScorer() : k(10) 4 | { 5 | 6 | } 7 | 8 | MetricScorer::~MetricScorer() 9 | { 10 | //dtor 11 | } 12 | 13 | void MetricScorer::setK(int _k) 14 | { 15 | k = _k; 16 | } 17 | 18 | 19 | double MetricScorer::score(RankList **rls, int qsize) 20 | { 21 | double scores = 0.0; 22 | for (int i = 0; i < qsize; i++) 23 | scores += score(rls[i]); 24 | return scores / qsize; 25 | } 26 | 27 | double MetricScorer::score(RankList *rl) 28 | { 29 | // todo 30 | 31 | return 0; 32 | } 33 | 34 | void MetricScorer::swapChange(const RankList* rl, double** &changes) 35 | { 36 | 37 | } 38 | 39 | -------------------------------------------------------------------------------- /MetricScorer.h: -------------------------------------------------------------------------------- 1 | #ifndef METRIC_SCORER_H 2 | #define METRIC_SCORER_H 3 | 4 | #include "DataType.h" 5 | #include 6 | using namespace std; 7 | 8 | class MetricScorer 9 | { 10 | public: 11 | MetricScorer(); 12 | virtual ~MetricScorer(); 13 | 14 | void setK(int k); 15 | double score(RankList **rls, int qsize); 16 | 17 | virtual double score(RankList *rl); 18 | virtual string name() = 0; 19 | virtual void swapChange(const RankList* rl, double** &changes); 20 | 21 | protected: 22 | int k; 23 | 24 | private: 25 | }; 26 | 27 | #endif // METRIC_SCORER_H 28 | -------------------------------------------------------------------------------- /Neuron.cpp: -------------------------------------------------------------------------------- 1 | #include "Neuron.h" 2 | #include "Synapse.h" 3 | #include 4 | 5 | Neuron::Neuron() 6 | { 7 | tfunc = new LogiFunction(); 8 | output = 0.0;// sigmoid(wsum) 9 | outputs = new vector() ; 10 | delta_i = 0.0; 11 | deltas_j = new vector(); 12 | 13 | inLinks = new vector(); 14 | outLinks= new vector(); 15 | } 16 | 17 | void Neuron::computeOutput() 18 | { 19 | Synapse *s; 20 | double wsum = 0.0; 21 | for (std::size_t j = 0; j != inLinks->size(); j ++) 22 | { 23 | s = inLinks->at(j); 24 | wsum += s->getSource()->getOutput() * s->getWeight(); 25 | } 26 | output = tfunc->compute(wsum); 27 | } 28 | 29 | void Neuron::computeOutput(int i) 30 | { 31 | Synapse *s; 32 | double wsum = 0.0; 33 | for (std::size_t j = 0; j != inLinks->size(); j ++) 34 | { 35 | s = inLinks->at(j); 36 | wsum += s->getSource()->getOutput(i) * s->getWeight(); 37 | } 38 | outputs->push_back(tfunc->compute(wsum)); 39 | } 40 | 41 | void Neuron::computerDeltaOfOuputLayer(PropParameter *param) 42 | { 43 | int ** pairMap = param->pairMap; 44 | int current = param->current; 45 | 46 | for (int k = 0; k < param->pairsCnt; k++) 47 | { 48 | int j = pairMap[current][k]; 49 | float weight = 1.0f; 50 | double Pij = 0; 51 | if (param->pairWeight == NULL) //RankNet 52 | { 53 | weight = 1.0f; 54 | Pij = (double) (1.0 / (1.0 + exp(outputs->at(current)-outputs->at(j)))); 55 | //this is in fact not "pij", but "targetValue-pij": 1 - 1/(1+e^{-o_ij}) 56 | } 57 | else // LambdaRank 58 | { 59 | weight = param->pairWeight[current][k]; 60 | Pij = (double)( param->targetValue[current][k] - 1.0 / (1.0 + exp(-(outputs->at(current)-outputs->at(j))))); 61 | } 62 | 63 | double lambda = weight * Pij; 64 | delta_i += lambda; 65 | deltas_j->push_back(lambda * tfunc->computeDerivative(outputs->at(j))); 66 | } 67 | 68 | delta_i *= tfunc->computeDerivative(outputs->at(current)); 69 | } 70 | 71 | void Neuron::updateDelta(PropParameter *param) 72 | { 73 | int** pairMap = param->pairMap; 74 | int current = param->current; 75 | float** pairWeight = param->pairWeight; 76 | delta_i = 0; 77 | for (int k = 0; k < param->pairsCnt; k++) 78 | { 79 | int j = pairMap[current][k]; 80 | float weight; 81 | if (pairWeight == NULL) 82 | weight = 1.0f; 83 | else 84 | weight = pairWeight[current][k]; 85 | double errorSum = 0.0; 86 | for (std::size_t l = 0; l < outLinks->size(); l++) 87 | { 88 | Synapse *s = outLinks->at(l); 89 | errorSum += s->getTarget()->deltas_j->at(k) * s->weight; 90 | if (k == 0) 91 | delta_i += s->getTarget()->delta_i * s->weight; 92 | } 93 | if (k == 0) 94 | delta_i *= weight * tfunc->computeDerivative(outputs->at(current)); 95 | deltas_j->push_back(errorSum * weight * tfunc->computeDerivative(outputs->at(j))); 96 | } 97 | } 98 | 99 | void Neuron::updateWeight(PropParameter *param) 100 | { 101 | for (std::size_t k = 0; k < inLinks->size(); k++) 102 | { 103 | Synapse *s = inLinks->at(k); 104 | double sum_j = 0.0; 105 | for (std::size_t l = 0; l < deltas_j->size(); l++) 106 | { 107 | sum_j += deltas_j->at(l) * (s->getSource()->getOutput(param->pairMap[param->current][l])); 108 | } 109 | double dw = learningRate * (delta_i * s->getSource()->getOutput(param->current) - sum_j); 110 | s->setWeightAdjust(dw); 111 | s->updateWeight(); 112 | } 113 | } 114 | 115 | Neuron::~Neuron() 116 | { 117 | //dtor 118 | } 119 | -------------------------------------------------------------------------------- /Neuron.h: -------------------------------------------------------------------------------- 1 | #ifndef NEURON_H 2 | #define NEURON_H 3 | 4 | #include "TransferFunction.h" 5 | #include "Synapse.h" 6 | #include "PropParameter.h" 7 | #include 8 | using std::vector; 9 | 10 | class Synapse; 11 | class Neuron 12 | { 13 | public: 14 | Neuron(); 15 | virtual ~Neuron(); 16 | 17 | static double momentum; 18 | static double learningRate; 19 | 20 | TransferFunction *tfunc; 21 | double output;// sigmoid(wsum) 22 | vector *outputs; 23 | double delta_i; 24 | vector* deltas_j; 25 | 26 | vector *inLinks; 27 | vector *outLinks; 28 | 29 | double getOutput() 30 | { 31 | return output; 32 | } 33 | 34 | double getOutput(int i) 35 | { 36 | return outputs->at(i); 37 | } 38 | void setOutput(double v) 39 | { 40 | output = v; 41 | } 42 | void addOutputs(double v) 43 | { 44 | outputs->push_back(v); 45 | } 46 | 47 | void computeOutput(); 48 | void computeOutput(int i); 49 | void computerDeltaOfOuputLayer(PropParameter *param); 50 | void updateDelta(PropParameter *param); 51 | void updateWeight(PropParameter *param); 52 | protected: 53 | private: 54 | }; 55 | 56 | #endif // NEURON_H 57 | -------------------------------------------------------------------------------- /PropParameter.cpp: -------------------------------------------------------------------------------- 1 | #include "PropParameter.h" 2 | 3 | -------------------------------------------------------------------------------- /PropParameter.h: -------------------------------------------------------------------------------- 1 | #ifndef PROPPARAMETER_H 2 | #define PROPPARAMETER_H 3 | 4 | 5 | class PropParameter 6 | { 7 | public: 8 | PropParameter() 9 | :current(-1), pairsCnt(0), pairMap(0),pairWeight(0) 10 | { 11 | 12 | } 13 | 14 | PropParameter(int cur, int pairscnt, int***pairMapAddr) 15 | { 16 | current = cur; 17 | pairMap = *pairMapAddr; 18 | pairsCnt = pairscnt; 19 | pairWeight=0; 20 | } 21 | 22 | PropParameter(int cur, int***pairMapAddr, float ***pairWeightAddr, float ***targetValueAddr) 23 | { 24 | current = cur; 25 | pairMap = *pairMapAddr; 26 | pairWeight = *pairWeightAddr; 27 | targetValue = *targetValueAddr; 28 | } 29 | 30 | virtual ~PropParameter() 31 | { 32 | 33 | } 34 | 35 | int current; // index of current data point in the ranked list 36 | int pairsCnt; // length of pairMap[current] 37 | int **pairMap; 38 | 39 | float **pairWeight; 40 | float **targetValue; 41 | 42 | float *labels; 43 | protected: 44 | private: 45 | }; 46 | 47 | #endif // PROPPARAMETER_H 48 | -------------------------------------------------------------------------------- /RankNet.cpp: -------------------------------------------------------------------------------- 1 | #include "RankNet.h" 2 | 3 | int RankNet::nIteration = 100; 4 | int RankNet::nHiddenLayer = 1; 5 | int RankNet::nHiddenNodePerLayer = 3; 6 | double RankNet::learningRate = 0.00005; 7 | double Neuron::learningRate = 0.00005; 8 | 9 | 10 | RankNet::RankNet() 11 | { 12 | layers = new vector(); 13 | inputLayer = new Layer(); 14 | outputLayer = new Layer(); 15 | Synapses = new vector(); 16 | 17 | lastError = FLT_MAX; 18 | totalPairs = 0; 19 | misorderedPairs = 0; 20 | error = 0.0; 21 | straightLoss = 0; 22 | } 23 | 24 | RankNet::~RankNet() 25 | { 26 | if (layers) 27 | { 28 | for (std::size_t i = 0; i < layers->size(); i++) 29 | if ((*layers)[i]) delete (*layers)[i]; 30 | 31 | delete layers; 32 | } 33 | if (Synapses) 34 | { 35 | for (std::size_t i = 0; i < Synapses->size(); i++) 36 | if ((*Synapses)[i]) delete (*Synapses)[i]; 37 | 38 | delete Synapses; 39 | } 40 | 41 | } 42 | 43 | void RankNet::setInputOuput(int nInput, int nOutput) 44 | { 45 | inputLayer = new Layer(nInput + 1); // plus bias 46 | outputLayer = new Layer(nOutput); 47 | layers->clear(); 48 | layers->push_back(inputLayer); 49 | layers->push_back(outputLayer); 50 | } 51 | 52 | void RankNet::addHiddenLayer(int size) 53 | { 54 | layers->insert(layers->end() - 1, new Layer(size)); 55 | } 56 | 57 | void RankNet::wire() 58 | { 59 | // wire the input layer to the first hidden layer 60 | for (int i = 0; i < inputLayer->size() - 1; i++) 61 | for (std::size_t j = 0; j < layers->at(1)->size(); j++) 62 | connect(0, i, 1, j); 63 | 64 | // wire one layer to the next, starting from the first hidden layer 65 | for (std::size_t i = 1; i < layers->size()-1; i++) 66 | for (int j = 0; j < layers->at(i)->size(); j++) 67 | for(int k = 0 ; k < layers->at(i+1)->size(); k++) 68 | connect(i, j, i+1, k); 69 | //wire the "bias" neuron to all others of all layers 70 | for (std::size_t i = 1; i < layers->size(); i++) 71 | for (int j = 0; j < layers->at(i)->size(); j++) 72 | connect(0, inputLayer->size() - 1, i, j); 73 | 74 | // TODO (kevin#1#): initialize weights 75 | } 76 | 77 | 78 | void RankNet::connect(int sourceLayer, int sourceNeuron, int targetLayer, int targetNeuron) 79 | { 80 | Synapses->push_back(new Synapse(layers->at(sourceLayer)->get(sourceNeuron), layers->at(targetLayer)->get(targetNeuron))); 81 | } 82 | 83 | void RankNet::addInput(DataPoint *p) 84 | { 85 | for (int k = 0; k < inputLayer->size()-1; k++) 86 | inputLayer->get(k)->addOutputs(p->getiFeature(features->at(k))); 87 | // and now the bias node with a fix "1.0" 88 | inputLayer->get(inputLayer->size()-1)->addOutputs(1.0f); 89 | } 90 | 91 | void RankNet::propagate(int i) 92 | { 93 | for (std::size_t k = 1; k < layers->size(); k++) 94 | layers->at(k)->computeOutput(i); 95 | } 96 | 97 | void RankNet::batchFeedForward(RankList* &rl, int** &pairMap, int* paircnts) 98 | { 99 | for (int i = 0; i < rl->getDatapointsCnt(); i++) 100 | { 101 | addInput(rl->getiDataPoint(i)); 102 | propagate(i); 103 | 104 | int count = 0; 105 | for (int j = 0; j < rl->getDatapointsCnt(); j++) 106 | if (rl->getiDataPoint(i)->getLabel() > rl->getiDataPoint(j)->getLabel()) 107 | count++; 108 | pairMap[i] = new int[count]; 109 | paircnts[i] = count; 110 | int k = 0; 111 | for (int j = 0; j < rl->getDatapointsCnt(); j++) 112 | if (rl->getiDataPoint(i)->getLabel() > rl->getiDataPoint(j)->getLabel()) 113 | pairMap[i][k++] = j; 114 | } 115 | } 116 | 117 | /** 118 | * @parameter pairMap[i][k]= j 119 | * @parameter paircnt[i] 120 | */ 121 | void RankNet::batchBackPropagate(int** pairMap, int pointsCnt, int *pairsCnt) 122 | { 123 | for (int i = 0; i < pointsCnt; i++) 124 | { 125 | // back propagate 126 | PropParameter *p = new PropParameter(i, pairsCnt[i], &pairMap); 127 | // starting from output layer 128 | outputLayer->computerDeltaForOutputlayer(p); 129 | // back propagete to the first hidden layer 130 | for (int j = layers->size() - 2; j >= 1; j--) 131 | layers->at(j)->updateDelta(p); 132 | 133 | // weight update 134 | outputLayer->updateWeight(p); 135 | for (int j = layers->size() - 2; j >= 1; j--) 136 | layers->at(j)->updateWeight(p); 137 | delete p; 138 | } 139 | } 140 | 141 | 142 | void RankNet::init() 143 | { 144 | //Set up the network 145 | setInputOuput(features->size(), 1); 146 | for (int i = 0; i < nHiddenLayer; i++) 147 | addHiddenLayer(nHiddenNodePerLayer); 148 | wire(); 149 | 150 | totalPairs = 0; 151 | for(int i = 0; i < trainDataSet->total_query_cnt; i++) 152 | { 153 | /* RankList *rl = trainDataSet->getithRankList(i+1); 154 | //trainDataSet->getithRankList(i+1)->getCorrectRanking(rl); 155 | for(int j = 0; j < rl->dp_cnt_ - 1; j++) 156 | for (int k = j+1; k < rl->dp_cnt_; k++) 157 | if (rl->getiDataPoint(j)->getLabel() > rl->getiDataPoint(k)->getLabel()) 158 | totalPairs++; 159 | */ 160 | vector dpv; 161 | trainDataSet->getithRankList(i+1)->getCorrectRanking(dpv); 162 | for (size_t j = 0; j != dpv.size() - 1; j++) 163 | for (int k = j+1; k != dpv.size(); k++) 164 | if (dpv[j]->getLabel() > dpv[k]->getLabel()) 165 | totalPairs++; 166 | } 167 | if (validateDataSet != NULL) 168 | { 169 | for (std::size_t i = 0; i != layers->size(); i++) 170 | { 171 | vector v; 172 | bestModelOnValidation.push_back(v); 173 | } 174 | } 175 | } 176 | 177 | 178 | void RankNet::learn() 179 | { 180 | for (int i = 0; i < nIteration; i++) 181 | { 182 | cout << "iteration: "<< i <total_query_cnt; j++) 184 | { 185 | cout << "queryid: " << j << endl; 186 | RankList * rl = trainDataSet->getithRankList(j+1); 187 | int dpcnt = rl->getDatapointsCnt(); 188 | int *paircnts = new int[dpcnt]; 189 | int **pairMap = new int*[dpcnt]; 190 | 191 | batchFeedForward(rl, pairMap, paircnts); 192 | batchBackPropagate(pairMap, dpcnt, paircnts); 193 | 194 | // 后续优化 195 | delete paircnts; 196 | for (int k = 0; k < dpcnt; k++) 197 | { 198 | delete pairMap[k]; 199 | pairMap[k] = NULL; 200 | } 201 | delete pairMap; 202 | pairMap = NULL; 203 | paircnts = NULL; 204 | // clearNeuronOutputs??? 205 | } 206 | 207 | for (int j = 0; j < trainDataSet->total_query_cnt; j++) 208 | { 209 | RankList * rl = trainDataSet->getithRankList(j+1); 210 | for (int i = 0; i < rl->getDatapointsCnt(); i++) 211 | { 212 | double cur_val = eval(rl->getiDataPoint(i)); 213 | rl->getiDataPoint(i)->setModelscore(cur_val); 214 | } 215 | // sort rl's datapoint by cached. 216 | // todo 217 | rl->sortByModelscore(); 218 | scoreOnTrainData += trainScorer->score(rl); 219 | } 220 | scoreOnTrainData /= trainDataSet->total_query_cnt; 221 | cout << "scoreOnTrainData: " << scoreOnTrainData<total_query_cnt; j++) 240 | { 241 | RankList *rl = trainDataSet->getithRankList(j+1); 242 | for (int k = 0; k < rl->getDatapointsCnt(); k++) 243 | { 244 | double o1 = eval(rl->getiDataPoint(k)); 245 | for (int l = k + 1; l < rl->getDatapointsCnt(); l++) 246 | { 247 | if (rl->getiDataPoint(k)->getLabel() > rl->getiDataPoint(l)->getLabel()) 248 | { 249 | double o2 = eval(rl->getiDataPoint(l)); 250 | error += crossEntropy(o1, o2, 1.0f); 251 | if (o1 < o2) 252 | misorderedPairs++; 253 | } 254 | } 255 | } 256 | } 257 | error = 1.0 * error/totalPairs; 258 | lastError = error; 259 | cout << "error: " << error << " misorderedPairs: " <size() - 1; k++) 265 | inputLayer->get(k)->setOutput(p->getiFeature(k)); 266 | inputLayer->get(inputLayer->size() - 1)->setOutput(1.0f); 267 | 268 | for (std::size_t k = 1; k < layers->size(); k++) 269 | layers->at(k)->computeOutput(); 270 | val = outputLayer->get(0)->getOutput(); 271 | 272 | return val; 273 | } 274 | 275 | string RankNet::model() 276 | { 277 | string m; 278 | return m; 279 | } 280 | string RankNet::toString() 281 | { 282 | string m; 283 | return m; 284 | } 285 | string RankNet::name() 286 | { 287 | string m; 288 | return m; 289 | } 290 | void RankNet::printParameters() 291 | { 292 | 293 | 294 | } 295 | -------------------------------------------------------------------------------- /RankNet.h: -------------------------------------------------------------------------------- 1 | #ifndef RANKNET_H 2 | #define RANKNET_H 3 | 4 | #include "Ranker.h" 5 | #include "Layer.h" 6 | class Neuron; 7 | 8 | class RankNet : public Ranker 9 | { 10 | public: 11 | RankNet(); 12 | 13 | virtual ~RankNet(); 14 | 15 | virtual void init(); 16 | virtual void learn(); 17 | virtual double eval(DataPoint *p); 18 | 19 | virtual void load(string fn){ 20 | 21 | } 22 | virtual string model(); 23 | virtual string toString(); 24 | virtual string name(); 25 | virtual void printParameters(); 26 | 27 | static int nIteration; 28 | static int nHiddenLayer; 29 | static int nHiddenNodePerLayer; 30 | static double learningRate; 31 | 32 | vector *layers; 33 | Layer *inputLayer; 34 | Layer *outputLayer; 35 | vector *Synapses; 36 | vector > bestModelOnValidation; 37 | 38 | int totalPairs; 39 | int misorderedPairs; 40 | double error; 41 | double lastError; 42 | int straightLoss; 43 | 44 | void setInputOuput(int nInput, int nOutput); 45 | void setInputOuput(int nInput, int nOutput, int nType); 46 | void addHiddenLayer(int size); 47 | void wire(); 48 | void connect(int sourceLayer, int sourceNeuron, int targetLayer, int targetNeuron); 49 | void addInput(DataPoint *p); 50 | void propagate(int i); 51 | void batchFeedForward(RankList* &rl, int** &pairMap, int* paircnts); 52 | void batchBackPropagate(int** pairMap, int pointsCnt, int *pairsCnt); 53 | void clearNeuronOutputs(); 54 | void computePairWeight(int**pairMap, int pointsCnt, int**pairWeight, int *paircnts, RankList rl); 55 | void saveBestModelOnValidation(); 56 | void restoreBestModelOnValidation(); 57 | double crossEntropy(double o1, double o2, double targetValue); 58 | void estimateLoss() ; 59 | void printNetworkConfig(); 60 | void printWeightVector(); 61 | 62 | 63 | 64 | protected: 65 | private: 66 | }; 67 | 68 | #endif // RANKNET_H 69 | -------------------------------------------------------------------------------- /Ranker.cpp: -------------------------------------------------------------------------------- 1 | #include "Ranker.h" 2 | 3 | Ranker::Ranker() 4 | { 5 | //ctor 6 | } 7 | 8 | Ranker::~Ranker() 9 | { 10 | //dtor 11 | } 12 | -------------------------------------------------------------------------------- /Ranker.h: -------------------------------------------------------------------------------- 1 | #ifndef RANKER_H 2 | #define RANKER_H 3 | 4 | #include "DataType.h" 5 | #include "DCG_scorer.h" 6 | 7 | class Ranker 8 | { 9 | public: 10 | Ranker(); 11 | virtual ~Ranker(); 12 | 13 | DataSet *trainDataSet; 14 | DataSet *testDataSet; 15 | DataSet *validateDataSet; 16 | 17 | vector *features; 18 | MetricScorer *trainScorer; 19 | MetricScorer *testScorer; 20 | double scoreOnTrainData; 21 | double bestScoreOnValidationData; 22 | 23 | static bool verbose; 24 | 25 | virtual void init() = 0; 26 | virtual void learn() = 0; 27 | virtual double eval(DataPoint *p) = 0; 28 | 29 | virtual string model() = 0; 30 | virtual string toString() = 0; 31 | virtual string name() = 0; 32 | virtual void printParameters() = 0; 33 | 34 | void setTrainDataSet(DataSet *ds) 35 | { 36 | trainDataSet = ds; 37 | } 38 | 39 | void setTestDataSet(DataSet *ds) 40 | { 41 | testDataSet = ds; 42 | } 43 | 44 | void setValidateDataSet(DataSet *ds) 45 | { 46 | validateDataSet = ds; 47 | } 48 | 49 | void setTrainMetricScorer(MetricScorer *scorer) 50 | { 51 | trainScorer = scorer; 52 | } 53 | 54 | void setTestMetricScorer(MetricScorer *scorer) 55 | { 56 | testScorer = scorer; 57 | } 58 | 59 | void setFeaturs(vector *f) 60 | { 61 | features = f; 62 | } 63 | 64 | double getScoreOnTrainData() 65 | { 66 | return scoreOnTrainData; 67 | } 68 | 69 | double getScoreOnValidationData() 70 | { 71 | return bestScoreOnValidationData; 72 | } 73 | 74 | vector* getFeatures() 75 | { 76 | return features; 77 | } 78 | 79 | protected: 80 | private: 81 | }; 82 | 83 | #endif // RANKER_H 84 | -------------------------------------------------------------------------------- /Synapse.cpp: -------------------------------------------------------------------------------- 1 | #include "Synapse.h" 2 | -------------------------------------------------------------------------------- /Synapse.h: -------------------------------------------------------------------------------- 1 | #ifndef SYNAPSE_H 2 | #define SYNAPSE_H 3 | 4 | #include "Neuron.h" 5 | 6 | class Neuron; 7 | class Synapse 8 | { 9 | public: 10 | Synapse() 11 | { 12 | } 13 | Synapse(Neuron *s, Neuron *t) 14 | { 15 | source = s; 16 | target = t; 17 | } 18 | virtual ~Synapse() 19 | { 20 | 21 | } 22 | 23 | double weight; 24 | double dW; // last weight adjustment 25 | Neuron *source; 26 | Neuron *target; 27 | 28 | Neuron* getSource() 29 | { 30 | return source; 31 | } 32 | Neuron* getTarget() 33 | { 34 | return target; 35 | } 36 | void setSource(Neuron* s) 37 | { 38 | source = s; 39 | } 40 | void setTarget(Neuron* t) 41 | { 42 | target = t; 43 | } 44 | double getWeight() 45 | { 46 | return weight; 47 | } 48 | void setWeight(double w) 49 | { 50 | weight = w; 51 | } 52 | double getdw() 53 | { 54 | return dW; 55 | } 56 | void setdw(double d) 57 | { 58 | dW = d; 59 | } 60 | 61 | void setWeightAdjust(double dwa) 62 | { 63 | dW =dwa; 64 | } 65 | 66 | void updateWeight() 67 | { 68 | weight += dW; 69 | } 70 | protected: 71 | private: 72 | }; 73 | 74 | #endif // SYNAPSE_H 75 | -------------------------------------------------------------------------------- /TransferFunction.cpp: -------------------------------------------------------------------------------- 1 | #include "TransferFunction.h" 2 | 3 | TransferFunction::TransferFunction() 4 | { 5 | //ctor 6 | } 7 | 8 | TransferFunction::~TransferFunction() 9 | { 10 | //dtor 11 | } 12 | -------------------------------------------------------------------------------- /TransferFunction.h: -------------------------------------------------------------------------------- 1 | #ifndef TRANSFERFUNCTION_H 2 | #define TRANSFERFUNCTION_H 3 | 4 | #include 5 | 6 | class TransferFunction 7 | { 8 | public: 9 | TransferFunction(); 10 | virtual ~TransferFunction(); 11 | 12 | virtual double compute(double x) = 0; 13 | virtual double computeDerivative(double x) = 0; 14 | protected: 15 | private: 16 | }; 17 | 18 | class LogiFunction : public TransferFunction 19 | { 20 | public: 21 | LogiFunction():TransferFunction() 22 | { 23 | } 24 | virtual ~LogiFunction() {} 25 | 26 | double compute(double x) 27 | { 28 | return (double) (1.0 / (1.0 + exp(-x))); 29 | } 30 | 31 | double computeDerivative(double x) 32 | { 33 | 34 | double output = (double) (1.0 / (1.0 + exp(-x))); 35 | return (double) (output * (1.0 - output)); 36 | } 37 | }; 38 | #endif // TRANSFERFUNCTION_H 39 | -------------------------------------------------------------------------------- /Util.cpp: -------------------------------------------------------------------------------- 1 | #include "Util.h" 2 | 3 | void splitString(const string& str, vector &tokens, const string& delimiters) 4 | { 5 | // 找到第一个不是分割符号的字符位置 6 | string::size_type start = str.find_first_not_of(delimiters); 7 | // 找到之后的第一个分割符号 8 | string::size_type end = str.find_first_of(delimiters, start); 9 | while(string::npos != start || string::npos != end) 10 | { 11 | tokens.push_back(str.substr(start, end - start)); 12 | start = str.find_first_not_of(delimiters, end); 13 | end = str.find_first_of(delimiters, start); 14 | } 15 | } 16 | 17 | -------------------------------------------------------------------------------- /Util.h: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_H_ 2 | #define UTIL_H_ 3 | 4 | #include 5 | #include 6 | using namespace std; 7 | 8 | void splitString(const string& str, vector &tokens, const string& delimiters = " "); 9 | 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include "DataType.h" 2 | #include "Util.h" 3 | #include "Ranker.h" 4 | #include "RankNet.h" 5 | #include "MetricScorer.h" 6 | #include "DCG_scorer.h" 7 | 8 | void getFeatureIDFromFile(char* fn, vector *featureids, map &featureNameIdMap) 9 | { 10 | if(strlen(fn) == 0) 11 | return ; 12 | 13 | ifstream in(fn); 14 | if (!in) 15 | { 16 | cerr << "cannot open " << string(fn) << endl; 17 | return; 18 | } 19 | string line; 20 | while(getline(in, line)) 21 | { 22 | vector tokens; 23 | splitString(line, tokens, "\t"); 24 | int fid = atoi(tokens[0].c_str()); 25 | string featurename = "NONE"; 26 | 27 | if (tokens.size() >= 2) 28 | featurename = tokens[1]; 29 | featureNameIdMap.insert(make_pair(fid, featurename)); 30 | featureids->push_back(fid); 31 | } 32 | } 33 | 34 | void evaluate(char* trainfile, char* testfile, char* validatefile, char* featurefile, MetricScorer* trainScorer, MetricScorer* testScorer) 35 | { 36 | DataSet* trainSet = new DataSet(trainfile); 37 | DataSet* testSet = NULL; 38 | if (strlen(testfile) != 0) 39 | testSet = new DataSet(testfile); 40 | DataSet* validateSet= NULL; 41 | if (strlen(validatefile) != 0) 42 | validateSet = new DataSet(validatefile); 43 | 44 | trainSet->init(); 45 | // testSet->init(); 46 | // validateSet->init(); 47 | 48 | vector *featureids = new vector(); 49 | map featureNameIdMap; 50 | getFeatureIDFromFile(featurefile, featureids, featureNameIdMap); 51 | 52 | RankNet *rank = new RankNet(); 53 | rank->setTrainMetricScorer(trainScorer); 54 | rank->setTestMetricScorer(testScorer); 55 | rank->setTrainDataSet(trainSet); 56 | rank->setTestDataSet(testSet); 57 | rank->setValidateDataSet(validateSet); 58 | rank->setFeaturs(featureids) ; 59 | 60 | rank->init(); 61 | rank->learn(); 62 | 63 | delete trainSet; 64 | delete featureids; 65 | delete rank; 66 | 67 | } 68 | 69 | int main() 70 | { 71 | char trainfile[100] = "/home/kevin/RankLibc/data/train.txt"; 72 | char testfile[100] = ""; 73 | char validatefile[100] = ""; 74 | char featurefile[100] = "/home/kevin/RankLibc/data/featureid.txt"; 75 | DCG_scorer *trainScorer = new DCG_scorer(); 76 | DCG_scorer *testScorer = new DCG_scorer(); 77 | evaluate(trainfile, testfile, validatefile, featurefile, trainScorer, testScorer); 78 | 79 | return 0; 80 | } 81 | --------------------------------------------------------------------------------