├── DBow ├── BowVector.cpp ├── BowVector.h ├── DBow.h ├── DBow.vcproj ├── Database.cpp ├── Database.h ├── DatabaseTypes.h ├── DbInfo.cpp ├── DbInfo.h ├── HVocParams.cpp ├── HVocParams.h ├── HVocabulary.cpp ├── HVocabulary.h ├── Makefile ├── QueryResults.cpp ├── QueryResults.h ├── VocInfo.cpp ├── VocInfo.h ├── VocParams.cpp ├── VocParams.h ├── Vocabulary.cpp └── Vocabulary.h ├── DBowLibAndDemo.sln ├── DUtils ├── BinaryFile.cpp ├── BinaryFile.h ├── DException.h ├── DUtils.h ├── DUtils.vcproj ├── FileFunctions.cpp ├── FileFunctions.h ├── FileModes.h ├── LineFile.cpp ├── LineFile.h ├── Makefile ├── Math.hpp ├── Random.cpp ├── Random.h ├── Timestamp.cpp ├── Timestamp.h └── dirent_win.h ├── Demo ├── Demo.cpp ├── Demo.vcproj ├── Makefile ├── image1.png ├── image2.png ├── image3.png └── image4.png ├── INSTALL.txt ├── LICENSE.txt ├── Makefile └── README.md /DBow/BowVector.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * File: BowVector.cpp 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: bag-of-words vector for representing images 6 | * Notes: see BowVector.h 7 | */ 8 | 9 | #include "BowVector.h" 10 | #include 11 | #include 12 | #include 13 | using namespace std; 14 | 15 | using namespace DBow; 16 | 17 | BowVector::BowVector(void) 18 | { 19 | } 20 | 21 | BowVector::~BowVector(void) 22 | { 23 | } 24 | 25 | void BowVector::Normalize(VocParams::ScoringType norm_type) 26 | { 27 | assert(norm_type == VocParams::L1_NORM || norm_type == VocParams::L2_NORM); 28 | 29 | BowVector::iterator it; 30 | 31 | double norm = 0.0; 32 | switch(norm_type){ 33 | 34 | case VocParams::L1_NORM: 35 | for(it = begin(); it != end(); it++) 36 | norm += fabs(it->value); 37 | 38 | break; 39 | 40 | case VocParams::L2_NORM: 41 | for(it = begin(); it != end(); it++) 42 | norm += it->value * it->value; 43 | norm = sqrt(norm); 44 | 45 | break; 46 | 47 | default: 48 | break; 49 | } 50 | 51 | if(norm > 0.0){ 52 | for(it = begin(); it != end(); it++) 53 | it->value /= norm; 54 | } 55 | } 56 | 57 | void BowVector::PutInOrder() 58 | { 59 | sort(this->begin(), this->end()); 60 | } 61 | 62 | bool BowVector::isInOrder() const 63 | { 64 | unsigned int n = size(); 65 | for(unsigned int i = 0; i < n-1; i++) 66 | if( (*this)[i].id >= (*this)[i+1].id ) return false; 67 | return true; 68 | } 69 | 70 | bool BowVectorEntry::operator< (const BowVectorEntry &w) const 71 | { 72 | return(this->id < w.id); 73 | } 74 | 75 | bool BowVectorEntry::operator== (const BowVectorEntry &w) const 76 | { 77 | return(this->id == w.id); 78 | } 79 | 80 | bool BowVectorEntry::operator== (WordId id) const 81 | { 82 | return(this->id == id); 83 | } 84 | 85 | WordValue BowVectorEntry::operator+ (const BowVectorEntry &w) const 86 | { 87 | return this->value + w.value; 88 | } 89 | 90 | namespace DBow 91 | { 92 | WordValue operator+ (WordValue w, const BowVectorEntry &v) 93 | { 94 | return v.value + w; 95 | } 96 | } 97 | 98 | -------------------------------------------------------------------------------- /DBow/BowVector.h: -------------------------------------------------------------------------------- 1 | /** 2 | * File: BowVector.h 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: bag-of-words vector for representing image 6 | * Defines: WordId, WordValue, BowEntryVector, BowVector 7 | * 8 | * Note: this vector is implemented with a stl vector. 9 | * The stl vector interface is public so that Vocabulary 10 | * subclasses can deal easily with BowVectors. 11 | * BowVector entries must be in strict ascending order of ids for 12 | * easy scoring between isolated BowVectors with Vocabulary::Score (this 13 | * condition is not necessary if vectors are only going to be 14 | * used in Database::Query and Database::AddEntry). 15 | * Vocabulary subclasses can make use of BowVector::PutInOrder 16 | * if they do not produce bow vectors in order. 17 | */ 18 | 19 | #pragma once 20 | #ifndef __BOW_VECTOR__ 21 | #define __BOW_VECTOR__ 22 | 23 | #include "VocParams.h" 24 | #include 25 | #include 26 | #include 27 | using namespace std; 28 | 29 | namespace DBow { 30 | 31 | typedef unsigned int WordId; 32 | typedef double WordValue; 33 | 34 | // If you change the type of WordValue, make sure you change also the 35 | // epsilon value (this is needed by the KL method) 36 | const WordValue EPSILON = DBL_EPSILON; // FLT_EPSILON 37 | const WordValue LOG_EPS = log(EPSILON); 38 | 39 | /** Type of entries in the vector 40 | */ 41 | struct BowVectorEntry 42 | { 43 | WordId id; 44 | WordValue value; 45 | 46 | /** 47 | * Constructor 48 | */ 49 | BowVectorEntry(){} 50 | BowVectorEntry(WordId _id, WordValue _value){ 51 | id = _id; 52 | value = _value; 53 | } 54 | 55 | /** 56 | * Returns if the word id of the current word is lower than 57 | * w.id (this is used for arranging the vector entries) 58 | */ 59 | bool operator< (const BowVectorEntry &w) const; 60 | 61 | /** 62 | * Returns if the word id of the current word is the same as 63 | * w.id (this is used for arranging the vector entries) 64 | */ 65 | bool operator== (const BowVectorEntry &w) const; 66 | 67 | /** 68 | * Compares the bow vector with a given word id 69 | * @param id 70 | * @return true iif this bow vector's id and the given one are the same 71 | */ 72 | bool operator== (WordId id) const; 73 | 74 | /** 75 | * Returns the sum of the word values of two entries 76 | */ 77 | WordValue operator+ (const BowVectorEntry &w) const; 78 | 79 | /** 80 | * Returns the sum of a value and a vector value 81 | */ 82 | friend WordValue operator+ (WordValue w, const BowVectorEntry &v); 83 | }; 84 | 85 | class BowVector: 86 | public vector 87 | { 88 | public: 89 | 90 | /** Constructor 91 | */ 92 | BowVector(void); 93 | 94 | /** Destructor 95 | */ 96 | ~BowVector(void); 97 | 98 | /** 99 | * Normalizes the values in the vector 100 | * @param norm_type norm used 101 | */ 102 | void Normalize(VocParams::ScoringType norm_type); 103 | 104 | /** 105 | * Puts the vector entries in ascending order of word ids 106 | */ 107 | void PutInOrder(); 108 | 109 | /** 110 | * Returns if the vector is already in order. Used for debugging purposes 111 | * @return true iif in order 112 | */ 113 | bool isInOrder() const; 114 | }; 115 | 116 | } 117 | 118 | #endif 119 | 120 | -------------------------------------------------------------------------------- /DBow/DBow.h: -------------------------------------------------------------------------------- 1 | /** 2 | * File: DBow.h 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: include all the DBow headers 6 | */ 7 | 8 | #include "DatabaseTypes.h" 9 | #include "Database.h" 10 | #include "BowVector.h" 11 | #include "DbInfo.h" 12 | #include "Vocabulary.h" 13 | #include "HVocabulary.h" 14 | #include "HVocParams.h" 15 | #include "QueryResults.h" 16 | #include "VocInfo.h" 17 | #include "VocParams.h" 18 | 19 | -------------------------------------------------------------------------------- /DBow/DBow.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 14 | 15 | 16 | 17 | 18 | 25 | 28 | 31 | 34 | 37 | 40 | 51 | 54 | 57 | 60 | 64 | 67 | 70 | 73 | 76 | 79 | 80 | 88 | 91 | 94 | 97 | 100 | 103 | 115 | 118 | 121 | 124 | 128 | 131 | 134 | 137 | 140 | 143 | 144 | 145 | 146 | 147 | 148 | 153 | 156 | 157 | 160 | 161 | 164 | 165 | 168 | 169 | 172 | 173 | 176 | 177 | 180 | 181 | 184 | 185 | 188 | 189 | 190 | 195 | 198 | 199 | 202 | 203 | 206 | 207 | 210 | 211 | 214 | 215 | 218 | 219 | 222 | 223 | 226 | 227 | 230 | 231 | 234 | 235 | 238 | 239 | 240 | 245 | 246 | 247 | 248 | 249 | 250 | -------------------------------------------------------------------------------- /DBow/Database.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * File: Database.cpp 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: an image database 6 | */ 7 | 8 | #include "Database.h" 9 | #include "Vocabulary.h" 10 | #include "HVocabulary.h" 11 | #include "QueryResults.h" 12 | #include "DUtils.h" 13 | 14 | #include 15 | #include 16 | #include 17 | using namespace std; 18 | 19 | using namespace DBow; 20 | 21 | Database::Database(const Vocabulary &voc) : 22 | m_voc(NULL), m_nentries(0) 23 | { 24 | initVoc(voc.RetrieveInfo().VocType, &voc); 25 | m_index.resize(0); 26 | m_index.resize(voc.NumberOfWords()); 27 | } 28 | 29 | Database::Database(const char *filename) : 30 | m_voc(NULL), m_nentries(0) 31 | { 32 | Load(filename); 33 | } 34 | 35 | Database::~Database(void) 36 | { 37 | delete m_voc; 38 | } 39 | 40 | DbInfo Database::RetrieveInfo() const 41 | { 42 | DbInfo ret(m_voc->RetrieveInfo()); 43 | ret.EntryCount = m_nentries; 44 | return ret; 45 | } 46 | 47 | EntryId Database::_AddEntry(BowVector &v) 48 | { 49 | VocParams::ScoringType norm; 50 | if(VocParams::MustNormalize(m_voc->Scoring(), norm)){ 51 | // vectors are stored normalized if needed 52 | v.Normalize(norm); 53 | } 54 | 55 | EntryId eid = m_nentries; 56 | 57 | // update inverted file 58 | BowVector::const_iterator it; 59 | for(it = v.begin(); it != v.end(); it++){ 60 | // eids are in ascending order in the index 61 | m_index[it->id].push_back(IFEntry(eid, it->value)); 62 | } 63 | 64 | m_nentries++; 65 | 66 | return eid; 67 | } 68 | 69 | 70 | void Database::Clear() 71 | { 72 | m_index.resize(0); 73 | m_index.resize(m_voc->NumberOfWords()); 74 | m_nentries = 0; 75 | } 76 | 77 | void 78 | Database::_Query(QueryResults &ret, BowVector &v, int max_results) const 79 | { 80 | // This implementation is independent from that in Vocabulary::Score 81 | 82 | // check if the vector must be normalized 83 | VocInfo info = m_voc->RetrieveInfo(); 84 | VocParams::ScoringType norm; 85 | if(VocParams::MustNormalize(info.Parameters->Scoring, norm)){ 86 | v.Normalize(norm); 87 | } 88 | 89 | // ret is not in order until the end 90 | ret.resize(0); 91 | ret.reserve(100); 92 | 93 | switch(info.Parameters->Scoring){ 94 | 95 | case VocParams::L1_NORM: 96 | doQueryL1(v, ret, max_results, info.Parameters->ScaleScore); 97 | break; 98 | 99 | case VocParams::L2_NORM: 100 | doQueryL2(v, ret, max_results, info.Parameters->ScaleScore); 101 | break; 102 | 103 | case VocParams::CHI_SQUARE: 104 | doQueryChiSquare(v, ret, max_results, info.Parameters->ScaleScore); 105 | break; 106 | 107 | case VocParams::KL: 108 | doQueryKL(v, ret, max_results, info.Parameters->ScaleScore); 109 | break; 110 | 111 | case VocParams::BHATTACHARYYA: 112 | doQueryBhattacharyya(v, ret, max_results, info.Parameters->ScaleScore); 113 | break; 114 | 115 | case VocParams::DOT_PRODUCT: 116 | doQueryDotProduct(v, ret, max_results, info.Parameters->ScaleScore); 117 | break; 118 | } 119 | } 120 | 121 | void Database::doQueryL1(const BowVector &v, QueryResults &ret, 122 | const int max_results, const bool scale_score) const 123 | { 124 | BowVector::const_iterator it; 125 | IFRow::const_iterator rit; 126 | QueryResults::iterator qit; 127 | 128 | for(it = v.begin(); it != v.end(); it++){ 129 | WordId wid = it->id; 130 | WordValue qvalue = it->value; 131 | 132 | const IFRow& row = m_index[wid]; 133 | 134 | for(rit = row.begin(); rit != row.end(); rit++){ 135 | EntryId eid = rit->id; 136 | WordValue dvalue = rit->value; 137 | 138 | // scoring-dependent value 139 | double value = fabs(qvalue - dvalue) - fabs(qvalue) - fabs(dvalue); 140 | 141 | // check if this entry is already in the returning vector 142 | qit = find(ret.begin(), ret.end(), eid); 143 | 144 | if(qit == ret.end()){ 145 | // insert 146 | ret.push_back(Result(eid, value)); 147 | }else{ 148 | // update 149 | qit->Score += value; 150 | } 151 | } // for each inverted row 152 | } // for each word in features 153 | 154 | // resulting "scores" are now in [-2 best .. 0 worst] 155 | 156 | // sort vector in ascending order 157 | // (scores are inverted now --the lower the better--) 158 | sort(ret.begin(), ret.end()); 159 | 160 | // cut vector 161 | if((int)ret.size() > max_results) ret.resize(max_results); 162 | 163 | // complete score 164 | // ||v - w||_{L1} = 2 + Sum(|v_i - w_i| - |v_i| - |w_i|) 165 | // for all i | v_i != 0 and w_i != 0 166 | // (Nister, 2006) 167 | if(scale_score){ 168 | for(qit = ret.begin(); qit != ret.end(); qit++) 169 | qit->Score = -qit->Score/2.0; 170 | }else{ 171 | for(qit = ret.begin(); qit != ret.end(); qit++) 172 | qit->Score = 2.0 + qit->Score; 173 | } 174 | } 175 | 176 | void Database::doQueryL2(const BowVector &v, QueryResults &ret, 177 | const int max_results, const bool scale_score) const 178 | { 179 | BowVector::const_iterator it; 180 | IFRow::const_iterator rit; 181 | QueryResults::iterator qit; 182 | 183 | for(it = v.begin(); it != v.end(); it++){ 184 | WordId wid = it->id; 185 | WordValue qvalue = it->value; 186 | 187 | const IFRow& row = m_index[wid]; 188 | 189 | for(rit = row.begin(); rit != row.end(); rit++){ 190 | EntryId eid = rit->id; 191 | WordValue dvalue = rit->value; 192 | 193 | // scoring-dependent value 194 | double value = qvalue * dvalue; 195 | value = -value; // trick for smart sorting 196 | 197 | // check if this entry is already in the returning vector 198 | qit = find(ret.begin(), ret.end(), eid); 199 | 200 | if(qit == ret.end()){ 201 | // insert 202 | ret.push_back(Result(eid, value)); 203 | }else{ 204 | // update 205 | qit->Score += value; 206 | } 207 | } // for each inverted row 208 | } // for each word in features 209 | 210 | // resulting "scores" are now in [ -1 best .. 0 worst ] 211 | 212 | // sort vector in ascending order 213 | // (scores are inverted now --the lower the better--) 214 | sort(ret.begin(), ret.end()); 215 | 216 | // cut vector 217 | if((int)ret.size() > max_results) ret.resize(max_results); 218 | 219 | if(scale_score){ 220 | for(qit = ret.begin(); qit != ret.end(); qit++) 221 | qit->Score = 1.0 - sqrt(1.0 + qit->Score); 222 | }else { 223 | for(qit = ret.begin(); qit != ret.end(); qit++) 224 | qit->Score = sqrt(2 + 2 * qit->Score); 225 | } 226 | } 227 | 228 | void Database::doQueryChiSquare(const BowVector &v, QueryResults &ret, 229 | const int max_results, const bool scale_score) const 230 | { 231 | BowVector::const_iterator it; 232 | IFRow::const_iterator rit; 233 | QueryResults::iterator qit; 234 | 235 | for(it = v.begin(); it != v.end(); it++){ 236 | WordId wid = it->id; 237 | WordValue vi = it->value; 238 | 239 | const IFRow& row = m_index[wid]; 240 | 241 | for(rit = row.begin(); rit != row.end(); rit++){ 242 | EntryId eid = rit->id; 243 | WordValue wi = rit->value; 244 | 245 | // scoring-dependent value 246 | double value = (vi - wi)*(vi - wi)/(vi + wi) - vi - wi; 247 | 248 | // check if this db entry is already in the returning vector 249 | qit = find(ret.begin(), ret.end(), eid); 250 | 251 | if(qit == ret.end()){ 252 | // insert 253 | ret.push_back(Result(eid, value)); 254 | }else{ 255 | // update 256 | qit->Score += value; 257 | } 258 | } // for each inverted row 259 | } // for each word in features 260 | 261 | // resulting "scores" are now in [-2 best .. 0 worst] 262 | 263 | // sort vector in ascending order 264 | // (scores are inverted now --the lower the better--) 265 | sort(ret.begin(), ret.end()); 266 | 267 | // cut vector 268 | if((int)ret.size() > max_results) ret.resize(max_results); 269 | 270 | // complete score 271 | // score = Sum (vi - wi)^2 / (vi + wi) == 272 | // Sum vi + Sum wi - Sum{i, wi != 0} vi - Sum{i, vi != 0} wi + 273 | // + Sum_{i, vi != 0 && wi != 0} (vi - wi)^2 / (vi + wi) 274 | // 275 | // if there are no negative items, Sum vi = Sum wi = 1, since they 276 | // are normalized 277 | // 278 | // NOTE: this implementation assumes there are no negative items in 279 | // the vectors (there should not be if tf, idf or tf-idf are used) 280 | // 281 | if(scale_score){ 282 | for(qit = ret.begin(); qit != ret.end(); qit++) 283 | qit->Score = -qit->Score/2.0; 284 | }else{ 285 | for(qit = ret.begin(); qit != ret.end(); qit++) 286 | qit->Score = 2.0 + qit->Score; 287 | } 288 | } 289 | 290 | void Database::doQueryKL(const BowVector &v, QueryResults &ret, 291 | const int max_results, const bool scale_score) const 292 | { 293 | BowVector::const_iterator it; 294 | IFRow::const_iterator rit; 295 | QueryResults::iterator qit; 296 | 297 | for(it = v.begin(); it != v.end(); it++){ 298 | WordId wid = it->id; 299 | WordValue vi = it->value; 300 | 301 | const IFRow& row = m_index[wid]; 302 | 303 | for(rit = row.begin(); rit != row.end(); rit++){ 304 | EntryId eid = rit->id; 305 | WordValue wi = rit->value; 306 | 307 | // scoring-dependent value 308 | double value = vi * log(vi/wi); 309 | 310 | // check if this db entry is already in the returning vector 311 | qit = find(ret.begin(), ret.end(), eid); 312 | 313 | if(qit == ret.end()){ 314 | // insert 315 | ret.push_back(Result(eid, value)); 316 | }else{ 317 | // update 318 | qit->Score += value; 319 | } 320 | } // for each inverted row 321 | } // for each word in features 322 | 323 | // resulting "scores" are now in [-X worst .. 0 best .. X worst] 324 | // but we cannot make sure which ones are better without calculating 325 | // the complete score 326 | 327 | // complete scores 328 | for(qit = ret.begin(); qit != ret.end(); ++qit){ 329 | EntryId eid = qit->Id; 330 | double value = 0.0; 331 | 332 | for(it = v.begin(); it != v.end(); it++){ 333 | const WordValue vi = it->value; 334 | const IFRow& row = m_index[it->id]; 335 | 336 | if(row.end() == find(row.begin(), row.end(), IFEntry(eid, 0) )){ 337 | value += vi * (log(vi) - LOG_EPS); 338 | } 339 | } 340 | 341 | qit->Score += value; 342 | } 343 | 344 | // real scores are now in [0 best .. X worst] 345 | 346 | // sort vector in ascending order 347 | // (scores are inverted now --the lower the better--) 348 | sort(ret.begin(), ret.end()); 349 | 350 | // cut vector 351 | if((int)ret.size() > max_results) ret.resize(max_results); 352 | 353 | // this score cannot be scaled 354 | } 355 | 356 | void Database::doQueryBhattacharyya(const BowVector &v, QueryResults &ret, 357 | const int max_results, const bool scale_score) const 358 | { 359 | BowVector::const_iterator it; 360 | IFRow::const_iterator rit; 361 | QueryResults::iterator qit; 362 | 363 | for(it = v.begin(); it != v.end(); it++){ 364 | WordId wid = it->id; 365 | WordValue vi = it->value; 366 | 367 | const IFRow& row = m_index[wid]; 368 | 369 | for(rit = row.begin(); rit != row.end(); rit++){ 370 | EntryId eid = rit->id; 371 | WordValue wi = rit->value; 372 | 373 | // scoring-dependent value 374 | double value = sqrt(vi * wi); 375 | 376 | // check if this db entry is already in the returning vector 377 | qit = find(ret.begin(), ret.end(), eid); 378 | 379 | if(qit == ret.end()){ 380 | // insert 381 | ret.push_back(Result(eid, value)); 382 | }else{ 383 | // update 384 | qit->Score += value; 385 | } 386 | } // for each inverted row 387 | } // for each word in features 388 | 389 | // resulting "scores" are now in [1 best .. 0 worst] 390 | 391 | // sort vector in descending order 392 | sort(ret.begin(), ret.end(), Result::GreaterThan); 393 | 394 | // cut vector 395 | if((int)ret.size() > max_results) ret.resize(max_results); 396 | 397 | // this score is already scaled 398 | } 399 | 400 | void Database::doQueryDotProduct(const BowVector &v, QueryResults &ret, 401 | const int max_results, const bool scale_score) const 402 | { 403 | BowVector::const_iterator it; 404 | IFRow::const_iterator rit; 405 | QueryResults::iterator qit; 406 | 407 | for(it = v.begin(); it != v.end(); it++){ 408 | WordId wid = it->id; 409 | WordValue vi = it->value; 410 | 411 | const IFRow& row = m_index[wid]; 412 | 413 | for(rit = row.begin(); rit != row.end(); rit++){ 414 | EntryId eid = rit->id; 415 | WordValue wi = rit->value; 416 | 417 | // scoring-dependent value 418 | double value = vi * wi; 419 | 420 | // check if this db entry is already in the returning vector 421 | qit = find(ret.begin(), ret.end(), eid); 422 | 423 | if(qit == ret.end()){ 424 | // insert 425 | ret.push_back(Result(eid, value)); 426 | }else{ 427 | // update 428 | qit->Score += value; 429 | } 430 | } // for each inverted row 431 | } // for each word in features 432 | 433 | // resulting "scores" are now in [0 worst .. X best] 434 | 435 | // sort vector in descending order 436 | sort(ret.begin(), ret.end(), Result::GreaterThan); 437 | 438 | // cut vector 439 | if((int)ret.size() > max_results) ret.resize(max_results); 440 | 441 | // this score cannot be scaled 442 | } 443 | 444 | void Database::Save(const char *filename, bool binary) const 445 | { 446 | if(binary){ 447 | SaveBinary(filename); 448 | }else{ 449 | SaveText(filename); 450 | } 451 | } 452 | 453 | void Database::Load(const char *filename) 454 | { 455 | fstream f(filename, ios::in | ios::binary); 456 | 457 | if(!f.is_open()) throw DUtils::DException("Cannot open file"); 458 | 459 | char c; 460 | f.get(c); 461 | 462 | f.close(); 463 | 464 | // if c is >= 32, it is text 465 | if(c >= 32) 466 | LoadText(filename); 467 | else 468 | LoadBinary(filename); 469 | } 470 | 471 | void Database::SaveBinary(const char *filename) const 472 | { 473 | // Format: 474 | // [Vocabulary] (with magic word) 475 | // N W' 476 | // WordId_0 K_0 EntryId_0_0 Value_0_0 ... EntryId_0_(K_0) Value_0_(K_0) 477 | // ... 478 | // WordId_(W'-1) K_(W'-1) EntryId_(W'-1)_0 Value_(W'-1)_0 ... EntryId_(W'-1)_{K_(W'-1)} Value_(W'-1)_{K_(W'-1)} 479 | // 480 | // Where: 481 | // [Vocabulary]: whole vocabulary in binary format 482 | // N (int32): number of entries in the database 483 | // W' (int32): number of words with some row in the inverted file 484 | // WordId_i (int32): word id in the inverted file 485 | // K_i (int32): number of entries in the row of the WordId_i 486 | // EntryId_i_k (int32): doc (entry) id where the word WordId_i is present 487 | // Value_i_k (double64): value of word WordId_i in entry EntryId_i_k 488 | // 489 | 490 | m_voc->Save(filename, true); 491 | 492 | DUtils::BinaryFile f(filename, DUtils::FILE_MODES(DUtils::WRITE | DUtils::APPEND)); 493 | 494 | int N = m_nentries; 495 | int W = 0; 496 | 497 | InvertedFile::const_iterator it; 498 | for(it = m_index.begin(); it != m_index.end(); it++){ 499 | if(!it->empty()) W++; 500 | } 501 | 502 | f << N << W; 503 | 504 | IFRow::const_iterator rit; 505 | for(it = m_index.begin(); it != m_index.end(); it++){ 506 | if(!it->empty()){ 507 | int wordid = it - m_index.begin(); 508 | int k = (int)it->size(); 509 | 510 | f << wordid << k; 511 | 512 | for(rit = it->begin(); rit != it->end(); rit++){ 513 | f << (int)rit->id << (double)rit->value; 514 | } 515 | } 516 | } 517 | 518 | f.Close(); 519 | } 520 | 521 | 522 | void Database::SaveText(const char *filename) const 523 | { 524 | // Format: 525 | // [Vocabulary] 526 | // N W' 527 | // WordId_0 K_0 EntryId_0_0 Value_0_0 ... EntryId_0_(K_0) Value_0_(K_0) 528 | // ... 529 | // WordId_(W'-1) K_(W'-1) EntryId_(W'-1)_0 Value_(W'-1)_0 ... EntryId_(W'-1)_{K_(W'-1)} Value_(W'-1)_{K_(W'-1)} 530 | // 531 | // Where: 532 | // [Vocabulary]: whole vocabulary in text format 533 | // N (int32): number of entries in the database 534 | // W' (int32): number of words with some row in the inverted file 535 | // WordId_i (int32): word id in the inverted file 536 | // K_i (int32): number of entries in the row of the WordId_i 537 | // EntryId_i_k (int32): doc (entry) id where the word WordId_i is present 538 | // Value_i_k (double64): value of word WordId_i in entry EntryId_i_k 539 | // 540 | 541 | m_voc->Save(filename, false); 542 | 543 | fstream f(filename, ios::out | ios::app); 544 | if(!f.is_open()) throw DUtils::DException("Cannot open file"); 545 | 546 | int N = m_nentries; 547 | int W = 0; 548 | 549 | InvertedFile::const_iterator it; 550 | for(it = m_index.begin(); it != m_index.end(); it++){ 551 | if(!it->empty()) W++; 552 | } 553 | 554 | f << N << " " << W << endl; 555 | 556 | IFRow::const_iterator rit; 557 | for(it = m_index.begin(); it != m_index.end(); it++){ 558 | if(!it->empty()){ 559 | int wordid = it - m_index.begin(); 560 | int k = (int)it->size(); 561 | 562 | f << wordid << " " << k << " "; 563 | 564 | for(rit = it->begin(); rit != it->end(); rit++){ 565 | f << (int)rit->id << " " 566 | << (double)rit->value << " "; 567 | } 568 | f << endl; 569 | } 570 | } 571 | 572 | f.close(); 573 | } 574 | 575 | 576 | void Database::LoadBinary(const char *filename) 577 | { 578 | // read type of voc (@see Vocabulary::SaveBinaryHeader) 579 | DUtils::BinaryFile f(filename, DUtils::READ); 580 | f.DiscardNextByte(); // magic word 581 | int voctype; 582 | f >> voctype; 583 | f.Close(); 584 | 585 | initVoc((VocParams::VocType)voctype); 586 | 587 | unsigned int pos = m_voc->Load(filename); 588 | 589 | f.OpenForReading(filename); 590 | f.DiscardBytes(pos); // vocabulary read 591 | 592 | _load(f); 593 | 594 | f.Close(); 595 | } 596 | 597 | void Database::LoadText(const char *filename) 598 | { 599 | // read type of voc (@see Vocabulary::SaveTextHeader) 600 | fstream f(filename, ios::in); 601 | int voctype; 602 | f >> voctype; 603 | f.close(); 604 | 605 | initVoc((VocParams::VocType)voctype); 606 | 607 | unsigned int pos = m_voc->Load(filename); 608 | 609 | f.open(filename, ios::in); 610 | if(!f.is_open()) throw DUtils::DException("Cannot open file"); 611 | 612 | f.seekg(pos); // vocabulary read 613 | 614 | _load(f); 615 | 616 | f.close(); 617 | } 618 | 619 | template 620 | void Database::_load(T& f) 621 | { 622 | // Format: 623 | // N W' 624 | // WordId_0 K_0 EntryId_0_0 Value_0_0 ... EntryId_0_(K_0) Value_0_(K_0) 625 | // ... 626 | // WordId_(W'-1) K_(W'-1) EntryId_(W'-1)_0 Value_(W'-1)_0 ... EntryId_(W'-1)_{K_(W'-1)} Value_(W'-1)_{K_(W'-1)} 627 | // 628 | // Where: 629 | // [Vocabulary]: whole vocabulary in binary format 630 | // N (int32): number of entries in the database 631 | // W' (int32): number of words with some row in the inverted file 632 | // WordId_i (int32): word id in the inverted file 633 | // K_i (int32): number of entries in the row of the WordId_i 634 | // EntryId_i_k (int32): doc (entry) id where the word WordId_i is present 635 | // Value_i_k (double64): value of word WordId_i in entry EntryId_i_k 636 | // 637 | 638 | int N, W; 639 | f >> N >> W; 640 | 641 | m_index.resize(0); 642 | m_index.resize(m_voc->NumberOfWords()); 643 | m_nentries = N; 644 | 645 | for(int i = 0; i < W; i++){ 646 | int wordid, k; 647 | f >> wordid >> k; 648 | 649 | for(int j = 0; j < k; j++){ 650 | int eid; 651 | double value; 652 | 653 | f >> eid >> value; 654 | 655 | m_index[wordid].push_back(IFEntry(eid, value)); 656 | } 657 | } 658 | 659 | } 660 | 661 | void Database::initVoc(VocParams::VocType type, const Vocabulary *copy) 662 | { 663 | delete m_voc; 664 | 665 | switch(type){ 666 | case VocParams::HIERARCHICAL_VOC: 667 | 668 | if(copy) 669 | m_voc = new HVocabulary( 670 | *(static_cast(copy))); 671 | else 672 | m_voc = new HVocabulary(HVocParams(2,1)); 673 | 674 | break; 675 | } 676 | 677 | } 678 | 679 | -------------------------------------------------------------------------------- /DBow/Database.h: -------------------------------------------------------------------------------- 1 | /** 2 | * File: Database.h 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: an image database 6 | */ 7 | 8 | #pragma once 9 | #ifndef __D_DATABASE__ 10 | #define __D_DATABASE__ 11 | 12 | #include "BowVector.h" 13 | #include "Vocabulary.h" 14 | #include "DbInfo.h" 15 | #include "DatabaseTypes.h" 16 | #include "QueryResults.h" 17 | #include 18 | #include 19 | using namespace std; 20 | 21 | namespace DBow { 22 | 23 | class Database 24 | { 25 | public: 26 | 27 | /** 28 | * Creates a database from the given vocabulary. 29 | * @param voc vocabulary 30 | */ 31 | Database(const Vocabulary &voc); 32 | 33 | /** 34 | * Creates a database from a file 35 | * @param filename 36 | */ 37 | Database(const char *filename); 38 | 39 | /** 40 | * Destructor 41 | */ 42 | virtual ~Database(void); 43 | 44 | /** 45 | * Retrieves infoa bout the database 46 | * @return db info 47 | */ 48 | DbInfo RetrieveInfo() const; 49 | 50 | /** 51 | * Adds an entry to the database 52 | * @param features features of the image, in the opencv format 53 | * @return id of the new entry 54 | */ 55 | EntryId AddEntry(const vector &features); 56 | 57 | /** 58 | * Adds an entry to the database 59 | * @param v bow vector to add 60 | * @return id of the new entry 61 | */ 62 | EntryId AddEntry(const BowVector &v); 63 | 64 | /** 65 | * Empties the database 66 | */ 67 | inline void Clear(); 68 | 69 | /** 70 | * Returns the number of entries in the database 71 | * @return number of entries 72 | */ 73 | inline unsigned int NumberOfEntries() const { return m_nentries; } 74 | 75 | /** 76 | * Queries the database with some features 77 | * @param ret (out) query results 78 | * @param features query features 79 | * @param max_results number of results to return 80 | */ 81 | void Query(QueryResults &ret, const vector &features, 82 | int max_results = 1) const; 83 | 84 | /** 85 | * Queries the database with a bow vector 86 | * @param ret (out) query results 87 | * @param v vector to query with 88 | * @param max_results number of results to return 89 | */ 90 | void Query(QueryResults &ret, const BowVector &v, 91 | int max_results = 1) const; 92 | 93 | /** 94 | * Saves the database along with the vocabulary in the given file 95 | * @param filename file 96 | * @param binary (default: true) store in binary format 97 | */ 98 | void Save(const char *filename, bool binary = true) const; 99 | 100 | /** 101 | * Loads the database from a file 102 | * @param filename 103 | */ 104 | void Load(const char *filename); 105 | 106 | /** 107 | * Saves the vocabulary in a file 108 | * @param filename file to store the vocabulary in 109 | * @param binary (default: true) store in binary format 110 | */ 111 | inline void ExportVocabulary(const char *filename, bool binary = true) const 112 | { 113 | m_voc->Save(filename, binary); 114 | } 115 | 116 | /** 117 | * Returns the vocabulary instance used by this database 118 | * @return ref to the vocabulary 119 | */ 120 | inline const Vocabulary& Voc() const { 121 | return *m_voc; 122 | } 123 | 124 | protected: 125 | 126 | /** 127 | * Saves the database in binary format 128 | * @param filename 129 | */ 130 | void SaveBinary(const char *filename) const; 131 | 132 | /** 133 | * Saves the database in text format 134 | * @param filename 135 | */ 136 | void SaveText(const char *filename) const; 137 | 138 | /** 139 | * Loads the database from a binary file 140 | * @param filename 141 | */ 142 | void LoadBinary(const char *filename); 143 | 144 | /** 145 | * Loads the database from a text file 146 | * @param filename 147 | */ 148 | void LoadText(const char *filename); 149 | 150 | /** 151 | * Does the internal work to add an entry to the database 152 | * @param v vector to add (it is modified) 153 | * @return added entry id 154 | */ 155 | EntryId _AddEntry(BowVector &v); 156 | 157 | /** 158 | * Does the internal work to query the database 159 | * @param ret (out) query results 160 | * @param v bow vector (it is modified) 161 | * @param max_results returns only this number of results 162 | */ 163 | void _Query(QueryResults &ret, BowVector &v, int max_results) const; 164 | 165 | protected: 166 | 167 | /** 168 | * InvertedFile types 169 | */ 170 | 171 | struct IFEntry{ 172 | EntryId id; 173 | WordValue value; 174 | 175 | IFEntry(EntryId _id, WordValue _value){ 176 | id = _id; 177 | value = _value; 178 | } 179 | 180 | /** 181 | * Returns if the current IFEntry has the same id as the given one 182 | * @param e entry to compare with 183 | * @return true iif ids are the same 184 | */ 185 | inline bool operator==(const IFEntry &e) const { return id == e.id; } 186 | }; 187 | 188 | typedef list IFRow; 189 | 190 | // InvertedFile[wordid] = [ , ... ] 191 | class InvertedFile: public vector 192 | { 193 | public: 194 | InvertedFile(){} 195 | ~InvertedFile(){} 196 | }; 197 | 198 | protected: 199 | 200 | // Vocabulary associated to this database 201 | Vocabulary *m_voc; 202 | 203 | // Inverted file 204 | InvertedFile m_index; 205 | 206 | // Number of entries in the db 207 | unsigned int m_nentries; 208 | 209 | private: 210 | 211 | /** 212 | * Creates an instance of a vocabulary object 213 | * depending on the header of the given filename 214 | * @param type type of vocabulary 215 | * @param copy (default: NULL) if given, the vocabulary 216 | * is initiated as a copy of this vocabulary 217 | */ 218 | void initVoc(VocParams::VocType type, const Vocabulary *copy = NULL); 219 | 220 | /** 221 | * Loads the database from a filename 222 | * The vocabulary has already been read 223 | * @param f file stream 224 | */ 225 | template void _load(T& f); 226 | 227 | /** 228 | * Performs several kinds of queries 229 | * @param v bow vector to query (already normalized if necessary) 230 | * @param ret allocated and empty vector to store the results in 231 | * @param max_results maximum number of results in ret 232 | * @param scale_score says if score must be scaled in the end (if applicable) 233 | */ 234 | void doQueryL1(const BowVector &v, QueryResults &ret, 235 | const int max_results, const bool scale_score) const; 236 | void doQueryL2(const BowVector &v, QueryResults &ret, 237 | const int max_results, const bool scale_score) const; 238 | void doQueryChiSquare(const BowVector &v, QueryResults &ret, 239 | const int max_results, const bool scale_score) const; 240 | void doQueryKL(const BowVector &v, QueryResults &ret, 241 | const int max_results, const bool scale_score) const; 242 | void doQueryBhattacharyya(const BowVector &v, QueryResults &ret, 243 | const int max_results, const bool scale_score) const; 244 | void doQueryDotProduct(const BowVector &v, QueryResults &ret, 245 | const int max_results, const bool scale_score) const; 246 | 247 | }; 248 | 249 | } 250 | 251 | // -- Inline functions 252 | 253 | inline DBow::EntryId DBow::Database::AddEntry(const DBow::BowVector &v) 254 | { 255 | DBow::BowVector w = v; 256 | return _AddEntry(w); 257 | } 258 | 259 | inline DBow::EntryId DBow::Database::AddEntry(const vector& features) 260 | { 261 | DBow::BowVector v; 262 | m_voc->Transform(features, v, false); 263 | return _AddEntry(v); 264 | } 265 | 266 | inline void 267 | DBow::Database::Query(DBow::QueryResults &ret, const vector &features, 268 | int max_results) const 269 | { 270 | DBow::BowVector v; 271 | m_voc->Transform(features, v, false); 272 | _Query(ret, v, max_results); 273 | } 274 | 275 | inline void 276 | DBow::Database::Query(DBow::QueryResults &ret, const DBow::BowVector &v, 277 | int max_results) const 278 | { 279 | DBow::BowVector w = v; 280 | _Query(ret, w, max_results); 281 | } 282 | 283 | 284 | #endif 285 | 286 | -------------------------------------------------------------------------------- /DBow/DatabaseTypes.h: -------------------------------------------------------------------------------- 1 | /** 2 | * File: DatabaseTypes.h 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: results obtained by querying a database 6 | */ 7 | 8 | #pragma once 9 | #ifndef __D_DATABASE_TYPES__ 10 | #define __D_DATABASE_TYPES__ 11 | 12 | 13 | namespace DBow { 14 | 15 | // Entry Id 16 | typedef unsigned int EntryId; 17 | 18 | } 19 | 20 | 21 | #endif 22 | 23 | -------------------------------------------------------------------------------- /DBow/DbInfo.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * File: DbInfo.cpp 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: class to encapsulate db info 6 | */ 7 | 8 | #include "DbInfo.h" 9 | #include 10 | #include 11 | using namespace std; 12 | 13 | using namespace DBow; 14 | 15 | DbInfo::DbInfo(void) 16 | { 17 | } 18 | 19 | DbInfo::DbInfo(const DbInfo &v) 20 | { 21 | *this = v; 22 | } 23 | 24 | DbInfo::DbInfo(const VocInfo &v) 25 | { 26 | this->VocInfo::operator =(v); 27 | } 28 | 29 | DbInfo::~DbInfo(void) 30 | { 31 | } 32 | 33 | 34 | DbInfo& DbInfo::operator=(const DBow::DbInfo &v) 35 | { 36 | this->VocInfo::operator =(v); 37 | EntryCount = v.EntryCount; 38 | 39 | return *this; 40 | } 41 | 42 | string DbInfo::toString() const 43 | { 44 | stringstream ss; 45 | ss << "Vocabulary information:" << endl 46 | << VocInfo::toString() << endl; 47 | 48 | ss << "Database information:" << endl 49 | << "Number of entries: " << EntryCount << endl 50 | << endl; 51 | 52 | return ss.str(); 53 | } 54 | 55 | -------------------------------------------------------------------------------- /DBow/DbInfo.h: -------------------------------------------------------------------------------- 1 | /** 2 | * File: DbInfo.h 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: class to encapsulate db info 6 | */ 7 | 8 | #pragma once 9 | #ifndef __D_DB_INFO__ 10 | #define __D_DB_INFO__ 11 | 12 | #include "VocInfo.h" 13 | #include 14 | using namespace std; 15 | 16 | namespace DBow { 17 | 18 | class DbInfo: 19 | public VocInfo 20 | { 21 | public: 22 | 23 | // Number of entries in the database 24 | int EntryCount; 25 | 26 | public: 27 | 28 | /** 29 | * Empty constructor 30 | */ 31 | DbInfo(void); 32 | 33 | /** 34 | * Constructor from a VocInfo object 35 | */ 36 | DbInfo(const VocInfo &v); 37 | 38 | /** 39 | * Copy constructor. Replicates data 40 | */ 41 | DbInfo(const DbInfo &v); 42 | 43 | /** 44 | * Destructor 45 | */ 46 | ~DbInfo(void); 47 | 48 | /** 49 | * Copy operator. Replicates data 50 | * @param v source 51 | */ 52 | DbInfo& operator=(const DBow::DbInfo &v); 53 | 54 | /** 55 | * Returns a string with the database information 56 | * @return information string 57 | */ 58 | string toString() const; 59 | 60 | }; 61 | 62 | } 63 | 64 | #endif 65 | 66 | -------------------------------------------------------------------------------- /DBow/HVocParams.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * File: HVocParams.cpp 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: parameters to create a hierarchical vocabulary 6 | */ 7 | 8 | #include "HVocParams.h" 9 | 10 | #include 11 | #include 12 | using namespace std; 13 | 14 | using namespace DBow; 15 | 16 | HVocParams::HVocParams(int k, int L, int desc_length, 17 | WeightingType weighting, ScoringType scoring, 18 | bool scale_score): 19 | VocParams(HIERARCHICAL_VOC, desc_length, weighting, scoring, scale_score) 20 | { 21 | this->k = k; 22 | this->L = L; 23 | } 24 | 25 | HVocParams::~HVocParams(void) 26 | { 27 | } 28 | 29 | string HVocParams::toString() const 30 | { 31 | stringstream ss; 32 | ss << VocParams::toString(); 33 | ss << "k: " << k << ", L: " << L << endl; 34 | return ss.str(); 35 | } 36 | 37 | -------------------------------------------------------------------------------- /DBow/HVocParams.h: -------------------------------------------------------------------------------- 1 | /** 2 | * File: HVocParams.h 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: parameters to create a hierarchical vocabulary 6 | */ 7 | 8 | #pragma once 9 | #ifndef __H_VOC_PARAMS__ 10 | #define __H_VOC_PARAMS__ 11 | 12 | #include "VocParams.h" 13 | 14 | #include 15 | using namespace std; 16 | 17 | namespace DBow { 18 | 19 | class HVocParams : 20 | public VocParams 21 | { 22 | public: 23 | int k; 24 | int L; 25 | 26 | public: 27 | /** 28 | * Constructor 29 | * @param k branching factor 30 | * @param L max depth levels 31 | * @param desc_length (default: 64): descriptor length 32 | * @param weighting (default: TF_IDF): weighting method 33 | * @param scoring (default: L1_NORM): scoring method 34 | * @param scale_score (default: true): scale scores 35 | */ 36 | HVocParams(int k, int L, int desc_length = 64, 37 | WeightingType weighting = TF_IDF, 38 | ScoringType scoring = L1_NORM, 39 | bool scale_score = true); 40 | 41 | /** 42 | * Destructor 43 | */ 44 | ~HVocParams(void); 45 | 46 | /** 47 | * Returns a string with information about the parameters 48 | * @return information string 49 | */ 50 | string toString() const; 51 | }; 52 | 53 | } 54 | 55 | #endif 56 | 57 | -------------------------------------------------------------------------------- /DBow/HVocabulary.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * File: HVocabulary.cpp 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: hierarchical vocabulary implementing (Nister, 2006) 6 | */ 7 | 8 | #include "HVocabulary.h" 9 | #include "HVocParams.h" 10 | 11 | #include "DUtils.h" 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | using namespace std; 20 | 21 | using namespace DBow; 22 | 23 | // Use Kmeans++ 24 | // (no other method supported currently) 25 | #define KMEANS_PLUS_PLUS 26 | 27 | HVocabulary::HVocabulary(const HVocParams ¶ms): 28 | Vocabulary(params), m_params(params) 29 | { 30 | assert(params.k > 1 && params.L > 0); 31 | } 32 | 33 | HVocabulary::HVocabulary(const char *filename) : 34 | Vocabulary(HVocParams(0,0)), m_params(HVocParams(0,0)) 35 | { 36 | Load(filename); 37 | } 38 | 39 | HVocabulary::HVocabulary(const HVocabulary &voc) : 40 | Vocabulary(voc), m_params(voc.m_params) 41 | { 42 | m_nodes = voc.m_nodes; 43 | 44 | m_words.clear(); 45 | m_words.resize(voc.m_words.size()); 46 | 47 | vector::const_iterator it; 48 | for(it = voc.m_words.begin(); it != voc.m_words.end(); it++){ 49 | const Node *p = *it; 50 | m_words[it - voc.m_words.begin()] = &m_nodes[p->Id]; 51 | } 52 | 53 | } 54 | 55 | HVocabulary::~HVocabulary(void) 56 | { 57 | } 58 | 59 | void HVocabulary::Create(const vector >& training_features) 60 | { 61 | // expected_nodes = Sum_{i=0..L} ( k^i ) 62 | int expected_nodes = 63 | (int)((pow((double)m_params.k, (double)m_params.L + 1) - 1)/(m_params.k - 1)); 64 | 65 | // remove previous tree, allocate memory and insert root node 66 | m_nodes.resize(0); 67 | m_nodes.reserve(expected_nodes); // prevents allocations when creating the tree 68 | m_nodes.push_back(Node(0)); // root 69 | 70 | // prepare data 71 | int nfeatures = 0; 72 | for(unsigned int i = 0; i < training_features.size(); i++){ 73 | assert(training_features[i].size() % m_params.DescriptorLength == 0); 74 | nfeatures += training_features[i].size() / m_params.DescriptorLength; 75 | } 76 | 77 | vector pfeatures; 78 | pfeatures.reserve(nfeatures); 79 | 80 | vector >::const_iterator it; 81 | vector::const_iterator jt; 82 | 83 | for(it = training_features.begin(); it != training_features.end(); it++){ 84 | for(jt = it->begin(); jt < it->end(); jt += m_params.DescriptorLength){ 85 | pfeatures.push_back( jt ); 86 | } 87 | } 88 | 89 | vector buffer; 90 | buffer.reserve( m_params.k * m_params.DescriptorLength ); 91 | 92 | // start hierarchical kmeans 93 | HKMeansStep(0, pfeatures, 1, buffer); 94 | 95 | // create word nodes 96 | CreateWords(); 97 | 98 | // set the flag 99 | m_created = true; 100 | 101 | // set node weigths 102 | SetNodeWeights(training_features); 103 | 104 | } 105 | 106 | void HVocabulary::HKMeansStep(NodeId parentId, const vector &pfeatures, 107 | int level, vector& clusters) 108 | { 109 | if(pfeatures.empty()) return; 110 | 111 | // features associated to each cluster 112 | vector > groups; // indices from pfeatures 113 | groups.reserve(m_params.k); 114 | 115 | // number of final clusters 116 | int nclusters = 0; 117 | 118 | if((int)pfeatures.size() <= m_params.k){ 119 | 120 | // trivial case: if there is a few features, each feature is a cluster 121 | nclusters = pfeatures.size(); 122 | clusters.resize(pfeatures.size() * m_params.DescriptorLength); 123 | groups.resize(pfeatures.size()); 124 | 125 | for(unsigned int i = 0; i < pfeatures.size(); i++){ 126 | copy(pfeatures[i], pfeatures[i] + m_params.DescriptorLength, 127 | clusters.begin() + i * m_params.DescriptorLength); 128 | groups[i].push_back(i); 129 | } 130 | 131 | }else{ // choose clusters with kmeans++ 132 | 133 | bool first_time = true; 134 | bool goon = true; 135 | vector::const_iterator fit; 136 | 137 | // to check if clusters move after iterations 138 | vector last_association, current_association; 139 | 140 | while(goon){ 141 | // 1. Calculate clusters 142 | 143 | if(first_time){ 144 | // random sample 145 | 146 | #ifdef KMEANS_PLUS_PLUS 147 | RandomClustersPlusPlus(clusters, pfeatures); 148 | #else 149 | #error No initial clustering method 150 | #endif 151 | nclusters = clusters.size() / m_params.DescriptorLength; 152 | 153 | }else{ 154 | // calculate cluster centres 155 | 156 | vector::iterator pfirst, pend, cit; 157 | vector::const_iterator vit; 158 | 159 | for(int i = 0; i < nclusters; i++){ 160 | pfirst = clusters.begin() + i * m_params.DescriptorLength; 161 | pend = clusters.begin() + (i+1) * m_params.DescriptorLength; 162 | 163 | fill(pfirst, pend, 0.f); 164 | 165 | for(vit = groups[i].begin(); vit != groups[i].end(); vit++){ 166 | fit = pfeatures.begin() + *vit; 167 | // Possible improvement: divide this into chunks of 4 operations 168 | for(cit = pfirst; cit != pend; cit++){ 169 | *cit += *((*fit) + (cit - pfirst)); 170 | } 171 | } 172 | 173 | for(cit = pfirst; cit != pend; cit++) *cit /= groups[i].size(); 174 | } 175 | 176 | } // if(first_time) 177 | 178 | // 2. Associate features with clusters 179 | 180 | // calculate distances to cluster centers 181 | groups.clear(); 182 | groups.resize(nclusters, vector()); 183 | current_association.resize(pfeatures.size()); 184 | 185 | for(fit = pfeatures.begin(); fit != pfeatures.end(); fit++){ 186 | double best_sqd = DescriptorSqDistance(*fit, clusters.begin()); 187 | int icluster = 0; 188 | 189 | for(int i = 1; i < nclusters; i++){ 190 | double sqd = DescriptorSqDistance(*fit, 191 | clusters.begin() + i * m_params.DescriptorLength); 192 | 193 | if(sqd < best_sqd){ 194 | best_sqd = sqd; 195 | icluster = i; 196 | } 197 | } 198 | groups[icluster].push_back(fit - pfeatures.begin()); 199 | current_association[ fit - pfeatures.begin() ] = icluster; 200 | } 201 | 202 | // remove clusters with no features 203 | // (this is not necessary with kmeans++) 204 | #ifndef KMEANS_PLUS_PLUS 205 | for(int i = nclusters-1; i >= 0; i--){ 206 | if(groups[i].empty()){ 207 | groups.erase(groups.begin() + i); 208 | clusters.erase(clusters.begin() + i * m_params.DescriptorLength, 209 | clusters.begin() + (i+1) * m_params.DescriptorLength); 210 | } 211 | } 212 | nclusters = groups.size(); 213 | #endif 214 | 215 | // 3. check convergence 216 | if(first_time){ 217 | first_time = false; 218 | }else{ 219 | goon = false; 220 | for(unsigned int i = 0; i < current_association.size(); i++){ 221 | if(current_association[i] != last_association[i]){ 222 | goon = true; 223 | break; 224 | } 225 | } 226 | } 227 | 228 | if(goon){ 229 | // copy last feature-cluster association 230 | last_association = current_association; 231 | } 232 | 233 | } // while(goon) 234 | 235 | } // if trivial case 236 | 237 | // Kmeans done, create nodes 238 | 239 | // create child nodes 240 | for(int i = 0; i < nclusters; i++){ 241 | NodeId id = m_nodes.size(); 242 | m_nodes.push_back(Node(id)); 243 | m_nodes.back().Descriptor.resize(m_params.DescriptorLength); 244 | copy(clusters.begin() + i * m_params.DescriptorLength, 245 | clusters.begin() + (i+1) * m_params.DescriptorLength, 246 | m_nodes.back().Descriptor.begin()); 247 | 248 | m_nodes[parentId].Children.push_back(id); 249 | } 250 | 251 | if(level < m_params.L){ 252 | // iterate again with the resulting clusters 253 | for(int i = 0; i < nclusters; i++){ 254 | NodeId id = m_nodes[m_nodes[parentId].Children[i]].Id; 255 | 256 | vector child_features; 257 | child_features.reserve(groups[i].size()); 258 | 259 | vector::const_iterator vit; 260 | for(vit = groups[i].begin(); vit != groups[i].end(); vit++){ 261 | child_features.push_back(pfeatures[*vit]); 262 | } 263 | 264 | if(child_features.size() > 1){ 265 | // (clusters variable can be safely reused now) 266 | HKMeansStep(id, child_features, level + 1, clusters); 267 | } 268 | } 269 | } 270 | } 271 | 272 | int HVocabulary::GetNumberOfWords() const 273 | { 274 | return m_words.size(); 275 | } 276 | 277 | void HVocabulary::SaveBinary(const char *filename) const 278 | { 279 | // Format (binary): 280 | // [Header] 281 | // k L N 282 | // NodeId_1 ParentId Weight d1 ... d_D 283 | // ... 284 | // NodeId_(N-1) ParentId Weight d1 ... d_D 285 | // WordId_0 frequency NodeId 286 | // ... 287 | // WordId_(N-1) frequency NodeId 288 | // 289 | // Where: 290 | // k (int32): branching factor 291 | // L (int32): depth levels 292 | // N (int32): number of nodes, including root 293 | // NodeId (int32): root node is not present. Not in order 294 | // ParentId (int32) 295 | // Weight (double64) 296 | // d_i (float32): descriptor entry 297 | // WordId (int32): in ascending order 298 | // frequency (float32): frequency of word 299 | // NodeId (int32): node associated to word 300 | // 301 | // (the number along with the data type represents the size in bits) 302 | 303 | DUtils::BinaryFile f(filename, DUtils::WRITE); 304 | 305 | const int N = m_nodes.size(); 306 | 307 | // header 308 | SaveBinaryHeader(f); 309 | f << m_params.k << m_params.L << N; 310 | 311 | // tree 312 | vector parents, children; 313 | vector::const_iterator pit; 314 | 315 | parents.push_back(0); // root 316 | 317 | while(!parents.empty()){ 318 | NodeId pid = parents.back(); 319 | parents.pop_back(); 320 | 321 | const Node& parent = m_nodes[pid]; 322 | children = parent.Children; 323 | 324 | for(pit = children.begin(); pit != children.end(); pit++){ 325 | const Node& child = m_nodes[*pit]; 326 | 327 | // save node data 328 | f << (int)child.Id << (int)pid << (double)child.Weight; 329 | for(int i = 0; i < m_params.DescriptorLength; i++){ 330 | f << child.Descriptor[i]; 331 | } 332 | 333 | // add to parent list 334 | if(!child.isLeaf()){ 335 | parents.push_back(*pit); 336 | } 337 | } 338 | } 339 | 340 | // vocabulary 341 | vector::const_iterator wit; 342 | for(wit = m_words.begin(); wit != m_words.end(); wit++){ 343 | WordId id = wit - m_words.begin(); 344 | f << (int)id << GetWordFrequency(id) << (int)(*wit)->Id; 345 | } 346 | 347 | f.Close(); 348 | } 349 | 350 | void HVocabulary::SaveText(const char *filename) const 351 | { 352 | // Format (text) 353 | // [Header] 354 | // k L N 355 | // NodeId_1 ParentId Weight d1 ... d_D 356 | // ... 357 | // NodeId_(N-1) ParentId Weight d1 ... d_D 358 | // WordId_0 frequency NodeId 359 | // ... 360 | // WordId_(N-1) frequency NodeId 361 | 362 | fstream f(filename, ios::out); 363 | if(!f.is_open()) throw DUtils::DException("Cannot open file"); 364 | 365 | // magic word is not necessary in the text file 366 | 367 | f.precision(10); 368 | 369 | const int N = m_nodes.size(); 370 | 371 | // header 372 | SaveTextHeader(f); 373 | f << m_params.k << " " << m_params.L << " " << N << endl; 374 | 375 | // tree 376 | vector parents, children; 377 | vector::const_iterator pit; 378 | 379 | parents.push_back(0); // root 380 | 381 | while(!parents.empty()){ 382 | NodeId pid = parents.back(); 383 | parents.pop_back(); 384 | 385 | const Node& parent = m_nodes[pid]; 386 | children = parent.Children; 387 | 388 | for(pit = children.begin(); pit != children.end(); pit++){ 389 | const Node& child = m_nodes[*pit]; 390 | 391 | // save node data 392 | f << child.Id << " " 393 | << pid << " " 394 | << child.Weight << " "; 395 | for(int i = 0; i < m_params.DescriptorLength; i++){ 396 | f << child.Descriptor[i] << " "; 397 | } 398 | f << endl; 399 | 400 | // add to parent list 401 | if(!child.isLeaf()){ 402 | parents.push_back(*pit); 403 | } 404 | } 405 | } 406 | 407 | // vocabulary 408 | vector::const_iterator wit; 409 | for(wit = m_words.begin(); wit != m_words.end(); wit++){ 410 | WordId id = wit - m_words.begin(); 411 | f << (int)id << " " 412 | << GetWordFrequency(id) << " " 413 | << (int)(*wit)->Id 414 | << endl; 415 | } 416 | 417 | f.close(); 418 | } 419 | 420 | unsigned int HVocabulary::LoadBinary(const char *filename) 421 | { 422 | // Format (binary): 423 | // [Header] 424 | // k L N 425 | // NodeId_1 ParentId Weight d1 ... d_D 426 | // ... 427 | // NodeId_(N-1) ParentId Weight d1 ... d_D 428 | // WordId_0 frequency NodeId 429 | // ... 430 | // WordId_(N-1) frequency NodeId 431 | 432 | DUtils::BinaryFile f(filename, DUtils::READ); 433 | 434 | int nwords = LoadBinaryHeader(f); 435 | 436 | _load(f, nwords); 437 | 438 | unsigned int ret = f.BytesRead(); 439 | 440 | f.Close(); 441 | 442 | return ret; 443 | } 444 | 445 | unsigned int HVocabulary::LoadText(const char *filename) 446 | { 447 | // Format (text) 448 | // [Header] 449 | // k L N 450 | // NodeId_1 ParentId Weight d1 ... d_D 451 | // ... 452 | // NodeId_(N-1) ParentId Weight d1 ... d_D 453 | // WordId_0 frequency NodeId 454 | // ... 455 | // WordId_(N-1) frequency NodeId 456 | 457 | fstream f(filename, ios::in); 458 | if(!f.is_open()) throw DUtils::DException("Cannot open file"); 459 | 460 | int nwords = LoadTextHeader(f); 461 | 462 | _load(f, nwords); 463 | 464 | unsigned int ret = (unsigned int)f.tellg(); 465 | 466 | f.close(); 467 | 468 | return ret; 469 | } 470 | 471 | 472 | template 473 | void HVocabulary::_load(T &f, int nwords) 474 | { 475 | // general header has already been read, 476 | // giving value to these member variables 477 | int nfreq = m_frequent_words_stopped; 478 | int ninfreq = m_infrequent_words_stopped; 479 | 480 | // removes nodes, words and frequencies 481 | m_created = false; 482 | m_words.clear(); 483 | m_nodes.clear(); 484 | m_word_frequency.clear(); 485 | 486 | // h header 487 | int nnodes; 488 | f >> m_params.k >> m_params.L >> nnodes; 489 | 490 | // creates all the nodes at a time 491 | m_nodes.resize(nnodes); 492 | m_nodes[0].Id = 0; // root node 493 | 494 | for(int i = 1; i < nnodes; i++){ 495 | int nodeid, parentid; 496 | double weight; 497 | f >> nodeid >> parentid >> weight; 498 | 499 | m_nodes[nodeid].Id = nodeid; 500 | m_nodes[nodeid].Weight = weight; 501 | m_nodes[parentid].Children.push_back(nodeid); 502 | 503 | m_nodes[nodeid].Descriptor.resize(m_params.DescriptorLength); 504 | for(int j = 0; j < m_params.DescriptorLength; j++){ 505 | f >> m_nodes[nodeid].Descriptor[j]; 506 | } 507 | } 508 | 509 | m_words.resize(nwords); 510 | m_word_frequency.resize(nwords); 511 | 512 | for(int i = 0; i < nwords; i++){ 513 | int wordid, nodeid; 514 | float frequency; 515 | f >> wordid >> frequency >> nodeid; 516 | 517 | m_nodes[nodeid].WId = wordid; 518 | m_words[wordid] = &m_nodes[nodeid]; 519 | m_word_frequency[wordid] = frequency; 520 | } 521 | 522 | // all was ok 523 | m_created = true; 524 | 525 | // create an empty stop list 526 | CreateStopList(); 527 | 528 | // and stop words 529 | StopWords(nfreq, ninfreq); 530 | } 531 | 532 | 533 | void HVocabulary::RandomClustersPlusPlus(vector& clusters, 534 | const vector &pfeatures) const 535 | { 536 | // Implements kmeans++ seeding algorithm 537 | // Algorithm: 538 | // 1. Choose one center uniformly at random from among the data points. 539 | // 2. For each data point x, compute D(x), the distance between x and the nearest 540 | // center that has already been chosen. 541 | // 3. Add one new data point as a center. Each point x is chosen with probability 542 | // proportional to D(x)^2. 543 | // 4. Repeat Steps 2 and 3 until k centers have been chosen. 544 | // 5. Now that the initial centers have been chosen, proceed using standard k-means 545 | // clustering. 546 | 547 | clusters.resize(m_params.k * m_params.DescriptorLength); 548 | 549 | vector feature_used(pfeatures.size(), false); 550 | 551 | // 1. 552 | int ifeature = DUtils::Random::RandomInt(0, pfeatures.size()-1); 553 | feature_used[ifeature] = true; 554 | 555 | // create first cluster 556 | copy(pfeatures[ifeature], pfeatures[ifeature] + m_params.DescriptorLength, 557 | clusters.begin()); 558 | int used_clusters = 1; 559 | 560 | 561 | vector sqdistances; 562 | vector ifeatures; 563 | 564 | sqdistances.reserve(pfeatures.size()); 565 | ifeatures.reserve(pfeatures.size()); 566 | 567 | vector::const_iterator fit; 568 | 569 | while(used_clusters < m_params.k){ 570 | // 2. 571 | sqdistances.resize(0); 572 | ifeatures.resize(0); 573 | 574 | for(fit = pfeatures.begin(); fit != pfeatures.end(); fit++){ 575 | ifeature = fit - pfeatures.begin(); 576 | if(!feature_used[ifeature]){ 577 | 578 | double min_sqd = DescriptorSqDistance(*fit, clusters.begin()); 579 | for(int i = 1; i < used_clusters; i++){ 580 | double sqd = DescriptorSqDistance(*fit, 581 | clusters.begin() + i * m_params.DescriptorLength); 582 | 583 | if(sqd < min_sqd){ 584 | min_sqd = sqd; 585 | } 586 | } 587 | 588 | sqdistances.push_back(min_sqd); 589 | ifeatures.push_back(ifeature); 590 | } 591 | } 592 | 593 | // 3. 594 | double sqd_sum = accumulate(sqdistances.begin(), sqdistances.end(), 0.0); 595 | 596 | if(sqd_sum > 0){ 597 | double cut_d; 598 | do{ 599 | cut_d = DUtils::Random::RandomValue(0, sqd_sum); 600 | }while(cut_d == 0.0); 601 | 602 | double d_up_now = 0; 603 | vector::iterator dit; 604 | for(dit = sqdistances.begin(); dit != sqdistances.end(); dit++){ 605 | d_up_now += *dit; 606 | if(d_up_now >= cut_d) break; 607 | } 608 | if(dit == sqdistances.end()) dit = sqdistances.begin() + sqdistances.size()-1; 609 | 610 | ifeature = ifeatures[dit - sqdistances.begin()]; 611 | 612 | assert(!feature_used[ifeature]); 613 | 614 | copy(pfeatures[ifeature], pfeatures[ifeature] + m_params.DescriptorLength, 615 | clusters.begin() + used_clusters * m_params.DescriptorLength); 616 | feature_used[ifeature] = true; 617 | used_clusters++; 618 | 619 | }else 620 | break; 621 | } 622 | 623 | if(used_clusters < m_params.k) 624 | clusters.resize(used_clusters * m_params.DescriptorLength); 625 | 626 | } 627 | 628 | 629 | double HVocabulary::DescriptorSqDistance(const pFeature &v, 630 | const pFeature &w) const 631 | { 632 | double sqd = 0.0; 633 | 634 | const int rest = m_params.DescriptorLength % 4; 635 | 636 | for(int i = 0; i < m_params.DescriptorLength - rest; i += 4){ 637 | sqd += (*(v + i) - *(w + i)) * (*(v + i) - *(w + i)); 638 | sqd += (*(v + i + 1) - *(w + i + 1)) * (*(v + i + 1) - *(w + i + 1)); 639 | sqd += (*(v + i + 2) - *(w + i + 2)) * (*(v + i + 2) - *(w + i + 2)); 640 | sqd += (*(v + i + 3) - *(w + i + 3)) * (*(v + i + 3) - *(w + i + 3)); 641 | } 642 | 643 | for(int i = m_params.DescriptorLength - rest; i < m_params.DescriptorLength; i++){ 644 | sqd += (*(v + i) - *(w + i)) * (*(v + i) - *(w + i)); 645 | } 646 | 647 | return sqd; 648 | } 649 | 650 | void HVocabulary::SetNodeWeights(const vector >& training_features) 651 | { 652 | vector weights; 653 | GetWordWeightsAndCreateStopList(training_features, weights); 654 | 655 | assert(weights.size() == m_words.size()); 656 | 657 | for(unsigned int i = 0; i < m_words.size(); i++){ 658 | m_words[i]->Weight = weights[i]; 659 | } 660 | } 661 | 662 | WordId HVocabulary::Transform(const vector::const_iterator &pfeature) const 663 | { 664 | if(isEmpty()) return 0; 665 | 666 | assert(!m_nodes[0].isLeaf()); 667 | 668 | // propagate the feature down the tree 669 | vector nodes; 670 | vector::const_iterator it; 671 | 672 | NodeId final_id = 0; // root 673 | 674 | do{ 675 | nodes = m_nodes[final_id].Children; 676 | final_id = nodes[0]; 677 | double best_sqd = DescriptorSqDistance(pfeature, m_nodes[final_id].Descriptor.begin()); 678 | 679 | for(it = nodes.begin() + 1; it != nodes.end(); it++){ 680 | NodeId id = *it; 681 | double sqd = DescriptorSqDistance(pfeature, m_nodes[id].Descriptor.begin()); 682 | if(sqd < best_sqd){ 683 | best_sqd = sqd; 684 | final_id = id; 685 | } 686 | } 687 | } while( !m_nodes[final_id].isLeaf() ); 688 | 689 | // turn node id into word id 690 | return m_nodes[final_id].WId; 691 | } 692 | 693 | void HVocabulary::CreateWords() 694 | { 695 | m_words.resize(0); 696 | m_words.reserve( (int)pow((double)m_params.k, (double)m_params.L) ); 697 | 698 | // the actual order of the words is not important 699 | vector::iterator it; 700 | for(it = m_nodes.begin(); it != m_nodes.end(); it++){ 701 | if(it->isLeaf()){ 702 | it->WId = m_words.size(); 703 | m_words.push_back( &(*it) ); 704 | } 705 | } 706 | } 707 | 708 | WordValue HVocabulary::GetWordWeight(WordId id) const 709 | { 710 | if(isEmpty()) return 0; 711 | 712 | assert(id < m_words.size()); 713 | 714 | return m_words[id]->Weight; 715 | } 716 | -------------------------------------------------------------------------------- /DBow/HVocabulary.h: -------------------------------------------------------------------------------- 1 | /** 2 | * File: HVocabulary.h 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: hierarchical vocabulary implementing (Nister, 2006) 6 | */ 7 | 8 | #pragma once 9 | #ifndef __H_VOCABULARY__ 10 | #define __H_VOCABULARY__ 11 | 12 | #include "Vocabulary.h" 13 | #include "BowVector.h" 14 | #include "HVocParams.h" 15 | 16 | #include 17 | using namespace std; 18 | 19 | namespace DBow { 20 | 21 | class HVocabulary : 22 | public Vocabulary 23 | { 24 | public: 25 | 26 | /** 27 | * Constructor 28 | * @param params vocabulary parameters 29 | */ 30 | HVocabulary(const HVocParams ¶ms); 31 | 32 | /** 33 | * Copy constructor. Allocates new data 34 | * @param voc vocabulary to copy 35 | */ 36 | HVocabulary(const HVocabulary &voc); 37 | 38 | /** 39 | * Constructor 40 | * @param filename file to load in 41 | */ 42 | HVocabulary(const char *filename); 43 | 44 | /** 45 | * Destructor 46 | */ 47 | ~HVocabulary(void); 48 | 49 | /** 50 | * Creates the vocabulary from some training data. 51 | * The current content of the vocabulary is cleared 52 | * @see Vocabulary::Create 53 | * @param training_features vector of groups of features in the OpenCV format. 54 | */ 55 | void Create(const vector >& training_features); 56 | 57 | /** 58 | * Transforms a set of features into a bag-of-words vector 59 | * @see Vocabulary::Transform 60 | */ 61 | using Vocabulary::Transform; 62 | 63 | protected: 64 | 65 | /** 66 | * Saves the vocabulary in binary format. 67 | * @param filename file to store the vocabulary in 68 | */ 69 | void SaveBinary(const char *filename) const; 70 | 71 | /** 72 | * Saves the vocabulary in text format. 73 | * @param filename file to store the vocabulary in 74 | */ 75 | void SaveText(const char *filename) const; 76 | 77 | /** 78 | * Loads the vocabulary in binary format. 79 | * @param filename file to read the vocabulary from 80 | */ 81 | unsigned int LoadBinary(const char *filename); 82 | 83 | /** 84 | * Loads the vocabulary in text format. 85 | * @param filename file to read the vocabulary from 86 | */ 87 | unsigned int LoadText(const char *filename); 88 | 89 | /** 90 | * Returns the weight of a word 91 | * @see Vocabulary::GetWordWeight 92 | * @param id word id 93 | * @return word weight 94 | */ 95 | WordValue GetWordWeight(WordId id) const; 96 | 97 | /** 98 | * Returns the number of words in the vocabulary 99 | * (must not check m_created) 100 | * @return number of words 101 | */ 102 | int GetNumberOfWords() const; 103 | 104 | /** 105 | * Transforms a feature into its word id 106 | * @see Vocabulary::Transform 107 | * @param feature descriptor. Pointer to the beginning of a DescriptorLenght 108 | * size vector containing the feature descriptor 109 | * @return word id 110 | */ 111 | WordId Transform(const vector::const_iterator &pfeature) const; 112 | 113 | protected: 114 | 115 | // Voc parameters 116 | HVocParams m_params; 117 | 118 | typedef unsigned int NodeId; 119 | typedef unsigned int DocId; 120 | 121 | struct Node { 122 | NodeId Id; 123 | vector Children; 124 | WordValue Weight; 125 | vector Descriptor; 126 | 127 | WordId WId; // if this node is a leaf, it will have a word id 128 | 129 | /** 130 | * Constructor 131 | */ 132 | Node(): Id(0), Weight(0), WId(-1){} 133 | Node(NodeId _id): Id(_id), Weight(0), WId(-1){} 134 | 135 | /** 136 | * Returns if the node is a leaf node 137 | * @return true iif the node is a leaf 138 | */ 139 | inline bool isLeaf() const { return Children.empty(); } 140 | }; 141 | 142 | // Nodes in the tree, including root [0] with no descriptor 143 | vector m_nodes; 144 | 145 | // The words of the vocabulary are the tree leaves 146 | vector m_words; 147 | 148 | // Pointer to a feature (only used when Creating the vocabulary) 149 | typedef vector::const_iterator pFeature; 150 | 151 | protected: 152 | 153 | /** 154 | * Performs kmeans recursively and created the vocabulary tree. 155 | * Nodes are created without weights 156 | * @param parentId created nodes will be children of parentId 157 | * @param pfeatures data to perform the kmeans 158 | * @param level current tree level (starting in 1) 159 | * @param clusters a buffer to reuse in all the HKMeansStep calls. 160 | * It should be a vector with memory allocated for k * DescriptorLength 161 | * floats 162 | */ 163 | void HKMeansStep(NodeId parentId, const vector &pfeatures, 164 | int level, vector& clusters); 165 | 166 | /** 167 | * Initiates clusters by using the algorithm of kmeans++ 168 | * @param clusters (out) clusters created. Its size is multiple of 169 | * DescriptoLength 170 | * @param pfeatures features in the data space to create the clusters 171 | */ 172 | void RandomClustersPlusPlus(vector& clusters, 173 | const vector &pfeatures) const; 174 | 175 | /** 176 | * Calculates the Euclidean squared distance between two features 177 | * @param v 178 | * @param w 179 | * @return squared distance 180 | */ 181 | double DescriptorSqDistance(const pFeature &v, const pFeature &w) const; 182 | 183 | /** 184 | * Sets the node weights once the tree has been created according to 185 | * the data used 186 | * @param training_features features used to create the vocabulary 187 | */ 188 | void SetNodeWeights(const vector >& training_features); 189 | 190 | /** 191 | * Creates the words of the vocabulary once the tree is built 192 | */ 193 | void CreateWords(); 194 | 195 | private: 196 | 197 | /** 198 | * Loads data from a file stream. 199 | * The generic header must already be read 200 | * @param f file stream opened in reading mode 201 | * @param nwords number of words in the voc 202 | */ 203 | template void _load(T &f, int nwords); 204 | 205 | }; 206 | 207 | } 208 | 209 | #endif 210 | 211 | -------------------------------------------------------------------------------- /DBow/Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | CFLAGS=-I../DUtils 3 | LFLAGS=-L../DUtils 4 | LIBS=-lstdc++ -lDUtils 5 | 6 | DEPS=BowVector.h DbInfo.h HVocParams.h Vocabulary.h Database.h DBow.h QueryResults.h VocInfo.h DatabaseTypes.h HVocabulary.h VocParams.h 7 | OBJS=BowVector.o DbInfo.o HVocParams.o Vocabulary.o VocParams.o Database.o HVocabulary.o QueryResults.o VocInfo.o 8 | 9 | %.o: %.cpp $(DEPS) 10 | $(CC) $(CFLAGS) -fPIC -O3 -Wall -c $< -o $@ 11 | 12 | libDBow.so: $(OBJS) 13 | $(CC) $^ $(LFLAGS) $(LIBS) -shared -o $@ 14 | 15 | clean: 16 | rm -f *.o *.so 17 | 18 | 19 | -------------------------------------------------------------------------------- /DBow/QueryResults.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * File: QueryResults.cpp 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: results obtained by querying a database 6 | */ 7 | 8 | #include "QueryResults.h" 9 | #include 10 | using namespace std; 11 | 12 | using namespace DBow; 13 | 14 | QueryResults::QueryResults(void) 15 | { 16 | } 17 | 18 | QueryResults::~QueryResults(void) 19 | { 20 | } 21 | -------------------------------------------------------------------------------- /DBow/QueryResults.h: -------------------------------------------------------------------------------- 1 | /** 2 | * File: QueryResults.h 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: results obtained by querying a database 6 | */ 7 | 8 | 9 | #pragma once 10 | #ifndef __D_QUERY_RESULTS__ 11 | #define __D_QUERY_RESULTS__ 12 | 13 | #include "DatabaseTypes.h" 14 | #include 15 | using namespace std; 16 | 17 | namespace DBow { 18 | 19 | struct Result 20 | { 21 | EntryId Id; 22 | double Score; 23 | 24 | /** 25 | * Constructors 26 | */ 27 | inline Result(){} 28 | inline Result(EntryId _id, double _score):Id(_id), Score(_score){} 29 | 30 | /** 31 | * Compares the scores of two results 32 | * @return true iif a.Score > b.Score 33 | */ 34 | inline static bool GreaterThan(const Result &a, const Result &b){ 35 | return a.Score > b.Score; 36 | } 37 | 38 | /** 39 | * Compares the scores of two results 40 | * @return true iif this.score < r.score 41 | */ 42 | inline bool operator<(const Result &r) const { 43 | return this->Score < r.Score; 44 | } 45 | 46 | /** 47 | * Compares the scores of two results 48 | * @return true iif this.score > r.score 49 | */ 50 | inline bool operator>(const Result &r) const 51 | { 52 | return this->Score > r.Score; 53 | } 54 | 55 | /** 56 | * Compares the entry id of the result 57 | * @return true iif this.id == id 58 | */ 59 | inline bool operator==(EntryId id) const 60 | { 61 | return this->Id == id; 62 | } 63 | }; 64 | 65 | class QueryResults: 66 | public vector 67 | { 68 | public: 69 | 70 | /** 71 | * Empty constructor 72 | */ 73 | QueryResults(void); 74 | 75 | /** 76 | * Destructor 77 | */ 78 | ~QueryResults(void); 79 | 80 | }; 81 | 82 | } 83 | 84 | #endif 85 | 86 | -------------------------------------------------------------------------------- /DBow/VocInfo.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * File: VocInfo.cpp 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: information structure retrieved from vocabularies 6 | */ 7 | 8 | 9 | #include "VocInfo.h" 10 | #include "VocParams.h" 11 | 12 | #include 13 | #include 14 | using namespace std; 15 | 16 | using namespace DBow; 17 | 18 | VocInfo::VocInfo(void): Parameters(NULL) 19 | { 20 | } 21 | 22 | VocInfo::VocInfo(const VocInfo &v) 23 | { 24 | *this = v; 25 | } 26 | 27 | VocInfo::VocInfo(const VocParams ¶ms) 28 | { 29 | this->Parameters = new VocParams(params); 30 | this->VocType = params.Type; 31 | } 32 | 33 | VocInfo::~VocInfo(void) 34 | { 35 | delete this->Parameters; 36 | } 37 | 38 | VocInfo& VocInfo::operator=(const DBow::VocInfo &v) 39 | { 40 | this->StoppedFrequentWords = v.StoppedFrequentWords; 41 | this->StoppedInfrequentWords = v.StoppedInfrequentWords; 42 | this->VocType = v.VocType; 43 | this->WordCount = v.WordCount; 44 | this->Parameters = new VocParams(*v.Parameters); 45 | 46 | return *this; 47 | } 48 | 49 | string VocInfo::toString() const 50 | { 51 | stringstream ss; 52 | 53 | if(this->Parameters) 54 | ss << this->Parameters->toString() << endl; 55 | 56 | ss << "Number of words: " << WordCount << endl 57 | << "Frequent words stopped: " << StoppedFrequentWords << endl 58 | << "Infrequent words stopped: " << StoppedInfrequentWords << endl; 59 | 60 | return ss.str(); 61 | } 62 | 63 | -------------------------------------------------------------------------------- /DBow/VocInfo.h: -------------------------------------------------------------------------------- 1 | /** 2 | * File: VocInfo.h 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: information structure retrieved from vocabularies 6 | */ 7 | 8 | #pragma once 9 | #ifndef __VOC_INFO__ 10 | #define __VOC_INFO__ 11 | 12 | #include "VocParams.h" 13 | 14 | #include 15 | using namespace std; 16 | 17 | namespace DBow { 18 | 19 | class VocInfo 20 | { 21 | public: 22 | 23 | // Type of vocabulary (determines subclass of vocParams) 24 | VocParams::VocType VocType; 25 | 26 | // Words in the vocabulary 27 | long WordCount; 28 | 29 | // Number of stopped words because of their high frequency 30 | long StoppedFrequentWords; 31 | 32 | // Number of stopped words because of their low frequency 33 | long StoppedInfrequentWords; 34 | 35 | // Other vocabulary parameters 36 | // Should be casted to the proper VocParams subclass pointer 37 | VocParams *Parameters; 38 | 39 | public: 40 | 41 | /** 42 | * Empty constructor 43 | */ 44 | VocInfo(void); 45 | 46 | /** 47 | * Copy constructor. Replicates data 48 | */ 49 | VocInfo(const DBow::VocInfo &v); 50 | 51 | /** 52 | * Constructor which copies the given vocabulary params 53 | * @param params vocabulary parameters 54 | */ 55 | VocInfo(const DBow::VocParams ¶ms); 56 | 57 | /** 58 | * Destructor 59 | */ 60 | virtual ~VocInfo(void); 61 | 62 | /** 63 | * Copy operator. Replicates data 64 | * @param v source 65 | */ 66 | VocInfo& operator=(const DBow::VocInfo &v); 67 | 68 | /** 69 | * Returns a string with the vocabulary information 70 | * @return information string 71 | */ 72 | string toString() const; 73 | }; 74 | 75 | } 76 | 77 | #endif 78 | 79 | -------------------------------------------------------------------------------- /DBow/VocParams.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * File: VocParams.cpp 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: parameters to create a new vocabulary. 6 | * This class must be inherited 7 | */ 8 | 9 | #include "VocParams.h" 10 | 11 | #include 12 | #include 13 | using namespace std; 14 | 15 | using namespace DBow; 16 | 17 | VocParams::VocParams(VocType type, int desc_length, 18 | WeightingType weighting, ScoringType scoring, bool scale_score) 19 | { 20 | this->Type = type; 21 | this->DescriptorLength = desc_length; 22 | this->Weighting = weighting; 23 | this->Scoring = scoring; 24 | this->ScaleScore = scale_score; 25 | } 26 | 27 | VocParams::~VocParams(void) 28 | { 29 | } 30 | 31 | string VocParams::toString() const 32 | { 33 | stringstream ss; 34 | ss << "Vocabulary type: "; 35 | switch(Type){ 36 | case HIERARCHICAL_VOC: 37 | ss << "hierarchical"; 38 | break; 39 | } 40 | 41 | ss << endl << "WeightingType: "; 42 | switch(Weighting){ 43 | case TF_IDF: 44 | ss << "tf-idf"; 45 | break; 46 | case TF: 47 | ss << "tf"; 48 | break; 49 | case IDF: 50 | ss << "idf"; 51 | break; 52 | case BINARY: 53 | ss << "binary"; 54 | break; 55 | } 56 | 57 | ss << endl << "ScoringType: "; 58 | switch(Scoring){ 59 | case L1_NORM: 60 | ss << "L1 norm"; 61 | break; 62 | case L2_NORM: 63 | ss << "L2 norm"; 64 | break; 65 | case CHI_SQUARE: 66 | ss << "chi-square"; 67 | break; 68 | case KL: 69 | ss << "KL"; 70 | break; 71 | case BHATTACHARYYA: 72 | ss << "Bhattacharyya"; 73 | break; 74 | case DOT_PRODUCT: 75 | ss << "dot product"; 76 | break; 77 | } 78 | if(ScaleScore) 79 | ss << " scaling to 0..1"; 80 | else 81 | ss << " without scaling"; 82 | 83 | ss << endl; 84 | 85 | return ss.str(); 86 | } 87 | 88 | bool VocParams::MustNormalize(ScoringType& norm) const 89 | { 90 | return MustNormalize(Scoring, norm); 91 | } 92 | 93 | bool VocParams::MustNormalize(ScoringType scoring, ScoringType& norm) 94 | { 95 | bool must_normalize; 96 | 97 | switch(scoring){ 98 | case VocParams::L2_NORM: 99 | must_normalize = true; 100 | norm = VocParams::L2_NORM; 101 | break; 102 | 103 | case VocParams::L1_NORM: 104 | case VocParams::CHI_SQUARE: 105 | case VocParams::KL: 106 | case VocParams::BHATTACHARYYA: 107 | must_normalize = true; 108 | norm = VocParams::L1_NORM; 109 | break; 110 | 111 | default: 112 | must_normalize = false; 113 | break; 114 | } 115 | 116 | return must_normalize; 117 | } 118 | -------------------------------------------------------------------------------- /DBow/VocParams.h: -------------------------------------------------------------------------------- 1 | /** 2 | * File: VocParams.h 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: parameters to create a new vocabulary. 6 | * This class must be inherited 7 | */ 8 | 9 | #pragma once 10 | #ifndef __VOC_PARAMS__ 11 | #define __VOC_PARAMS__ 12 | 13 | #include 14 | using namespace std; 15 | 16 | namespace DBow { 17 | 18 | class Vocabulary; 19 | class Database; 20 | 21 | class VocParams 22 | { 23 | public: 24 | 25 | enum VocType 26 | { 27 | HIERARCHICAL_VOC = 0 // HVocParams instance 28 | }; 29 | 30 | enum WeightingType 31 | { 32 | TF_IDF, 33 | TF, 34 | IDF, 35 | BINARY 36 | }; 37 | 38 | enum ScoringType 39 | { 40 | L1_NORM, 41 | L2_NORM, 42 | CHI_SQUARE, 43 | KL, 44 | BHATTACHARYYA, 45 | DOT_PRODUCT 46 | }; 47 | 48 | public: 49 | 50 | // Voc, weighting and scoring types 51 | VocType Type; 52 | WeightingType Weighting; 53 | ScoringType Scoring; 54 | 55 | // Scale scores to 0..1 ? 56 | bool ScaleScore; 57 | 58 | // Descriptor length. It is usually 128 for SIFT and 64 or 128 for SURF 59 | int DescriptorLength; 60 | 61 | public: 62 | 63 | /** 64 | * Returns a string with information about the parameters 65 | * @return information string 66 | */ 67 | virtual string toString() const; 68 | 69 | /** 70 | * Destructor 71 | */ 72 | virtual ~VocParams(void); 73 | 74 | /** 75 | * Says if a bow vector should be normalized according to the scoring 76 | * method chosen. 77 | * @param scoring scoring used 78 | * @param norm (out) norm to use 79 | */ 80 | static bool MustNormalize(ScoringType scoring, ScoringType& norm); 81 | 82 | /** 83 | * Says if a bow vector should be normalized according to the scoring 84 | * method chosen. 85 | * @param norm (out) norm to use 86 | */ 87 | bool MustNormalize(ScoringType& norm) const; 88 | 89 | protected: 90 | 91 | /** 92 | * Constructor available to subclasses only 93 | * @param type type of voc 94 | * @param desc_length descriptor length 95 | * @param weighting weighting method 96 | * @param scoring scoring method 97 | */ 98 | VocParams(VocType type, int desc_length, 99 | WeightingType weighting, ScoringType scoring, bool scale_score); 100 | 101 | }; 102 | 103 | } 104 | 105 | #endif 106 | 107 | -------------------------------------------------------------------------------- /DBow/Vocabulary.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * File: Vocabulary.h 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: generic vocabulary that must be inherited 6 | */ 7 | 8 | #include "Vocabulary.h" 9 | #include "VocParams.h" 10 | #include "DUtils.h" 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | using namespace std; 22 | 23 | using namespace DBow; 24 | 25 | Vocabulary::Vocabulary(const VocParams ¶ms): 26 | m_created(false) 27 | { 28 | m_params = new VocParams(params); 29 | } 30 | 31 | Vocabulary::Vocabulary(const char *filename): 32 | m_params(NULL) 33 | { 34 | Load(filename); 35 | } 36 | 37 | Vocabulary::Vocabulary(const Vocabulary &voc) 38 | { 39 | m_created = voc.m_created; 40 | m_frequent_words_stopped = voc.m_frequent_words_stopped; 41 | m_infrequent_words_stopped = voc.m_infrequent_words_stopped; 42 | m_word_frequency = voc.m_word_frequency; 43 | 44 | m_word_stopped = voc.m_word_stopped; 45 | m_stop_list = voc.m_stop_list; 46 | m_words_in_order = voc.m_words_in_order; 47 | 48 | m_params = new VocParams(*voc.m_params); 49 | } 50 | 51 | Vocabulary::~Vocabulary(void) 52 | { 53 | delete m_params; 54 | } 55 | 56 | void Vocabulary::Save(const char *filename, bool binary) const 57 | { 58 | if(binary) 59 | SaveBinary(filename); 60 | else 61 | SaveText(filename); 62 | } 63 | 64 | unsigned int Vocabulary::Load(const char *filename) 65 | { 66 | fstream f(filename, ios::in | ios::binary); 67 | 68 | if(!f.is_open()) throw DUtils::DException("Cannot open file"); 69 | 70 | char c; 71 | f.get(c); 72 | 73 | f.close(); 74 | 75 | // if c is >= 32, it is text 76 | if(c >= 32) 77 | return LoadText(filename); 78 | else 79 | return LoadBinary(filename); 80 | 81 | } 82 | 83 | 84 | VocInfo Vocabulary::RetrieveInfo() const 85 | { 86 | VocInfo ret(*m_params); 87 | 88 | ret.VocType = m_params->Type; 89 | 90 | if(m_created){ 91 | ret.WordCount = this->NumberOfWords(); 92 | ret.StoppedFrequentWords = m_frequent_words_stopped; 93 | ret.StoppedInfrequentWords = m_infrequent_words_stopped; 94 | }else{ 95 | ret.WordCount = 0; 96 | ret.StoppedFrequentWords = 0; 97 | ret.StoppedInfrequentWords = 0; 98 | } 99 | 100 | return ret; 101 | } 102 | 103 | double Vocabulary::Score(const BowVector &v, const BowVector &w) const 104 | { 105 | // Note: this implementation is independent from the scoring 106 | // implementation in the class Database 107 | 108 | assert(v.isInOrder()); 109 | assert(w.isInOrder()); 110 | 111 | BowVector const *a, *b; 112 | BowVector v2 = v; 113 | BowVector w2 = w; 114 | 115 | VocParams::ScoringType norm; 116 | bool must_normalize = m_params->MustNormalize(norm); 117 | bool do_not_change_order = (m_params->Scoring == VocParams::KL); 118 | 119 | if(must_normalize){ 120 | // normalize vectors 121 | v2.Normalize(norm); 122 | w2.Normalize(norm); 123 | 124 | if(do_not_change_order || (v2.size() < w2.size())){ 125 | a = &v2; 126 | b = &w2; 127 | }else{ 128 | a = &w2; 129 | b = &v2; 130 | } 131 | 132 | }else{ 133 | 134 | if(do_not_change_order || (v.size() < w.size())){ 135 | a = &v; 136 | b = &w; 137 | }else{ 138 | a = &w; 139 | b = &v; 140 | } 141 | } 142 | 143 | // final score 144 | double score = 0.0; 145 | 146 | // a contains the shortest vector if the method is not KL 147 | unsigned int first_index = 0; 148 | 149 | BowVector::const_iterator ita; 150 | for(ita = a->begin(); ita != a->end(); ita++){ 151 | 152 | // binary search for ita->id 153 | int lo = first_index; 154 | int hi = b->size()-1; 155 | int mid; 156 | int pos = -1; 157 | 158 | while (lo <= hi) 159 | { 160 | mid = (lo + hi) / 2; 161 | if (ita->id == (*b)[mid].id){ 162 | pos = mid; 163 | break; 164 | }else if (ita->id < (*b)[mid].id) 165 | hi = mid - 1; 166 | else 167 | lo = mid + 1; 168 | } 169 | 170 | if(pos >= 0){ 171 | const WordValue vi = ita->value; 172 | const WordValue wi = (*b)[pos].value; 173 | 174 | // common non-zero entry found 175 | first_index = pos + 1; 176 | 177 | switch(m_params->Scoring){ 178 | case VocParams::L1_NORM: 179 | score += fabs(vi - wi) - fabs(vi) - fabs(wi); 180 | break; 181 | 182 | case VocParams::L2_NORM: 183 | score += vi * wi; 184 | break; 185 | 186 | case VocParams::CHI_SQUARE: 187 | score += (vi - wi)*(vi - wi)/(vi + wi) - vi - wi; 188 | break; 189 | 190 | case VocParams::KL: 191 | score += vi * log(vi/wi); 192 | break; 193 | 194 | case VocParams::BHATTACHARYYA: 195 | score += sqrt(vi * wi); 196 | break; 197 | 198 | case VocParams::DOT_PRODUCT: 199 | score += vi * wi; 200 | break; 201 | } 202 | }else{ 203 | const WordValue vi = ita->value; 204 | 205 | // in the case of KL, we must make some operations too 206 | if(m_params->Scoring == VocParams::KL){ 207 | score += vi * (log(vi) - LOG_EPS); 208 | } 209 | } 210 | 211 | } 212 | 213 | switch(m_params->Scoring){ 214 | case VocParams::L1_NORM: 215 | // ||v - w||_{L1} = 2 + Sum(|v_i - w_i| - |v_i| - |w_i|) 216 | // for all i | v_i != 0 and w_i != 0 217 | // (Nister, 2006) 218 | if(m_params->ScaleScore) 219 | score = -score/2.0; 220 | else 221 | score = 2.0 + score; 222 | break; 223 | 224 | case VocParams::L2_NORM: 225 | // ||v - w||_{L2} = sqrt( 2 - 2 * Sum(v_i * w_i) 226 | // for all i | v_i != 0 and w_i != 0 ) 227 | // (Nister, 2006) 228 | if(m_params->ScaleScore) 229 | score = 1.0 - sqrt(1.0 - score); 230 | else 231 | score = sqrt(2 - 2 * score); 232 | break; 233 | 234 | case VocParams::CHI_SQUARE: 235 | // score = Sum (vi - wi)^2 / (vi + wi) == 236 | // Sum vi + Sum wi - Sum{i, wi != 0} vi - Sum{i, vi != 0} wi + 237 | // + Sum_{i, vi != 0 && wi != 0} (vi - wi)^2 / (vi + wi) 238 | // 239 | score += accumulate(a->begin(), a->end(), 0.0) 240 | + accumulate(b->begin(), b->end(), 0.0); 241 | 242 | if(m_params->ScaleScore) score = 1.0 - score/2.0; 243 | break; 244 | 245 | default: break; 246 | 247 | /* 248 | case VocParams::KL: 249 | // score = Sum (vi * log(vi/wi)) 250 | // cannot scale 251 | break; 252 | 253 | case VocParams::BHATTACHARYYA: 254 | // score = Sum sqrt(vi * wi) 255 | // it is already scaled 256 | break; 257 | 258 | case VocParams::DOT_PRODUCT: 259 | // score = Sum (vi * wi) 260 | // cannot scale 261 | break; 262 | */ 263 | } 264 | 265 | return score; 266 | } 267 | 268 | void Vocabulary::StopWords(float frequent_words, float infrequent_words) 269 | { 270 | if(!m_created) return; 271 | int nwords = m_words_in_order.size(); 272 | StopWords((int)(frequent_words * nwords), (int)(infrequent_words * nwords)); 273 | } 274 | 275 | void Vocabulary::StopWords(int frequent_words, int infrequent_words) 276 | { 277 | if(!m_created) return; 278 | 279 | int nwords = m_words_in_order.size(); 280 | int nfrequent = frequent_words; 281 | int ninfrequent = infrequent_words; 282 | 283 | if(nfrequent > nwords) nfrequent = nwords; 284 | if(ninfrequent > nwords) ninfrequent = nwords; 285 | 286 | // remove flags from already stopped words 287 | vector::const_iterator it; 288 | for(it = m_stop_list.begin(); it != m_stop_list.end(); it++){ 289 | m_word_stopped[*it] = false; 290 | } 291 | m_stop_list.resize(0); 292 | 293 | // get new stopped words 294 | for(int i = 0; i < ninfrequent; i++){ 295 | WordId id = m_words_in_order[i]; 296 | m_stop_list.push_back(id); 297 | m_word_stopped[id] = true; 298 | } 299 | 300 | for(int i = 0; i < nfrequent; i++){ 301 | WordId id = m_words_in_order[ nwords - i - 1 ]; 302 | m_stop_list.push_back(id); 303 | m_word_stopped[id] = true; 304 | } 305 | 306 | // update the number of stopped words 307 | m_frequent_words_stopped = nfrequent; 308 | m_infrequent_words_stopped = ninfrequent; 309 | } 310 | 311 | void Vocabulary::Transform(const vector& features, BowVector &v, bool arrange) const 312 | { 313 | // words in v must be in ascending order 314 | 315 | assert(features.size() % m_params->DescriptorLength == 0); 316 | 317 | v.resize(0); 318 | v.reserve(features.size() / m_params->DescriptorLength); 319 | 320 | vector stopped; 321 | stopped.reserve(v.capacity()); 322 | 323 | // 3 implementations have been tried: 324 | // 1) unordered vector + sort 325 | // 2) ordered vector 326 | // 3) ordered list + conversion to vector 327 | // Number 1) worked better 328 | 329 | vector::const_iterator it; 330 | 331 | int nd = 0; 332 | 333 | switch(m_params->Weighting){ 334 | 335 | case VocParams::TF: 336 | case VocParams::IDF: 337 | case VocParams::TF_IDF: 338 | // Note: GetWordWeight returns at this moment the IDF value for TF_IDF and IDF, or 339 | // 1 in the TF case. So that by multiplying by the tf- part, we get the final score. 340 | // We must multiply by n_i_d/n_d, 341 | // where n_i_d is the number of occurrences of word i in the document, 342 | // and n_d, the total number of words in the document 343 | 344 | // implementation 1) unordered vector + sort 345 | for(it = features.begin(); it < features.end(); it += m_params->DescriptorLength) 346 | { 347 | WordId id = Transform(it); 348 | 349 | if(isWordStopped(id)){ 350 | vector::iterator fit = find(stopped.begin(), stopped.end(), id); 351 | if(fit == stopped.end()){ 352 | stopped.push_back(id); 353 | nd++; 354 | } 355 | 356 | }else{ 357 | 358 | BowVector::iterator fit = find(v.begin(), v.end(), id); 359 | if(fit == v.end()){ 360 | v.push_back(BowVectorEntry(id, GetWordWeight(id))); 361 | nd++; 362 | }else if(m_params->Weighting != VocParams::IDF){ 363 | fit->value += GetWordWeight(id); // n_i_d is implicit in this operation 364 | } 365 | } // if word is stopped 366 | } // for feature 367 | 368 | // tf or tf-idf 369 | if(nd > 0 && m_params->Weighting != VocParams::IDF){ 370 | for(BowVector::iterator fit = v.begin(); fit != v.end(); fit++) 371 | fit->value /= (double)nd; 372 | } 373 | 374 | break; 375 | 376 | // - - 377 | 378 | case VocParams::BINARY: 379 | // Weights are not used. Just put 1 in active words 380 | for(it = features.begin(); it < features.end(); it += m_params->DescriptorLength) 381 | { 382 | WordId id = Transform(it); 383 | 384 | if(!isWordStopped(id)){ 385 | BowVector::iterator fit = find(v.begin(), v.end(), id); 386 | if(fit == v.end()){ 387 | v.push_back(BowVectorEntry(id, 1)); 388 | } 389 | } // if word is stopped 390 | } // for feature 391 | 392 | break; 393 | } 394 | 395 | if(arrange) v.PutInOrder(); 396 | 397 | } 398 | 399 | void Vocabulary::GetWordWeightsAndCreateStopList( 400 | const vector >& training_features, 401 | vector &weights) 402 | { 403 | const int NWords = GetNumberOfWords(); 404 | const int NDocs = training_features.size(); 405 | 406 | assert(NWords > 0 && NDocs > 0); 407 | 408 | weights.clear(); 409 | weights.insert(weights.end(), NWords, 0); 410 | 411 | vector >::const_iterator mit; 412 | vector::const_iterator fit; 413 | 414 | m_word_frequency.resize(0); 415 | m_word_frequency.resize(NWords, 0); 416 | 417 | switch(m_params->Weighting){ 418 | case VocParams::IDF: 419 | case VocParams::TF_IDF: 420 | { 421 | // Note: 422 | // This is not actually a tf-idf score, but a idf score. 423 | // The complete tf-idf score is calculated in Vocabulary::Transform 424 | 425 | // calculate Ni: number of images in the voc data with 426 | // at least one descriptor vector path through node i. 427 | // calculate word frequency too 428 | vector Ni(NWords, 0); 429 | vector counted(NWords, false); 430 | 431 | for(mit = training_features.begin(); mit != training_features.end(); mit++){ 432 | fill(counted.begin(), counted.end(), false); 433 | 434 | for(fit = mit->begin(); fit < mit->end(); fit += m_params->DescriptorLength){ 435 | WordId id = Transform(fit); 436 | 437 | m_word_frequency[id] += 1.f; 438 | if(!counted[id]){ 439 | Ni[id]++; 440 | counted[id] = true; 441 | } 442 | } 443 | } 444 | 445 | // set ln(N/Ni) 446 | for(int i = 0; i < NWords; i++){ 447 | if(Ni[i] > 0){ 448 | weights[i] = log((double)NDocs / (double)Ni[i]); 449 | }// else // This cannot occur if using kmeans++ 450 | } 451 | } 452 | 453 | break; 454 | 455 | // - - 456 | 457 | case VocParams::TF: 458 | fill(weights.begin(), weights.end(), 1); 459 | 460 | case VocParams::BINARY: 461 | // Note: 462 | // The tf score is calculated in Vocabulary::Transform 463 | // Here, we only create the frequency vector, 464 | // and fill weights with 1's. 465 | // In the binary case, weights are not necessary, so that their value 466 | // do not matter 467 | for(mit = training_features.begin(); mit != training_features.end(); mit++){ 468 | for(fit = mit->begin(); fit < mit->end(); fit += m_params->DescriptorLength){ 469 | WordId id = Transform(fit); 470 | m_word_frequency[id] += 1.f; 471 | } 472 | } 473 | 474 | break; 475 | } 476 | 477 | // set frequencies 478 | float total = accumulate(m_word_frequency.begin(), m_word_frequency.end(), 0.f); 479 | if(total > 0){ 480 | transform(m_word_frequency.begin(), m_word_frequency.end(), 481 | m_word_frequency.begin(), bind2nd(divides(), total)); 482 | } 483 | 484 | // create stop list 485 | CreateStopList(); 486 | 487 | } 488 | 489 | float Vocabulary::GetWordFrequency(WordId id) const 490 | { 491 | if(!m_created) return 0.f; 492 | 493 | assert(0 <= id && id < m_word_frequency.size()); 494 | 495 | return m_word_frequency[id]; 496 | } 497 | 498 | void Vocabulary::CreateStopList() 499 | { 500 | // assert: m_word_frequency is set 501 | 502 | assert(m_word_frequency.size() > 0); 503 | 504 | m_stop_list.resize(0); 505 | 506 | m_word_stopped.resize(m_word_frequency.size()); 507 | fill(m_word_stopped.begin(), m_word_stopped.end(), false); 508 | 509 | m_frequent_words_stopped = 0; 510 | m_infrequent_words_stopped = 0; 511 | 512 | // arrange words by ascending order 513 | m_words_in_order.resize(0); 514 | m_words_in_order.reserve(m_word_frequency.size()); 515 | 516 | // better implementation with an ordered list + copy? 517 | vector > pairs; // 518 | pairs.reserve(m_word_frequency.size()); 519 | 520 | vector::const_iterator it; 521 | for(it = m_word_frequency.begin(); it != m_word_frequency.end(); it++){ 522 | WordId id = it - m_word_frequency.begin(); 523 | float fr = *it; 524 | 525 | pairs.push_back(make_pair(fr, id)); 526 | } 527 | 528 | sort(pairs.begin(), pairs.end()); 529 | 530 | vector >::const_iterator pit; 531 | for(pit = pairs.begin(); pit != pairs.end(); pit++){ 532 | m_words_in_order.push_back(pit->second); 533 | } 534 | } 535 | 536 | void Vocabulary::SaveBinaryHeader(DUtils::BinaryFile &f) const 537 | { 538 | // Binary header format: 539 | // XX Vt Wt St Ss D W SfW SiW 540 | // 541 | // Where: 542 | // XX (byte): magic word (byte with value 0) to identify the binary file 543 | // Vt (int32): vocabulary type 544 | // Wt (int32): weighting type 545 | // St (int32): scoring type 546 | // Ss (int32): scale score 547 | // D (int32): descriptor length 548 | // W (int32): actual number of words 549 | // SfW (int32): frequent nodes stopped 550 | // SiW (int32): infrequent nodes stopped 551 | 552 | f << '\0' // magic word 553 | << (int)m_params->Type 554 | << (int)m_params->Weighting 555 | << (int)m_params->Scoring 556 | << (int)( m_params->ScaleScore ? 1 : 0 ) 557 | << m_params->DescriptorLength 558 | << NumberOfWords() 559 | << m_frequent_words_stopped 560 | << m_infrequent_words_stopped; 561 | } 562 | 563 | void Vocabulary::SaveTextHeader(fstream &f) const 564 | { 565 | // Text header format: 566 | // Vt Wt St Ss D W SfW SiW 567 | // 568 | // Where: 569 | // Vt: vocabulary type 570 | // Wt: weighting type 571 | // St: scoring type 572 | // Ss: scale score 573 | // D: descriptor length 574 | // W: actual number of words 575 | // SfW: frequent nodes stopped 576 | // SiW: infrequent nodes stopped 577 | 578 | f << m_params->Type << " " 579 | << m_params->Weighting << " " 580 | << m_params->Scoring << " " 581 | << ( m_params->ScaleScore ? 1 : 0 ) << " " 582 | << m_params->DescriptorLength << " " 583 | << NumberOfWords() << " " 584 | << m_frequent_words_stopped << " " 585 | << m_infrequent_words_stopped 586 | << endl; 587 | } 588 | 589 | int Vocabulary::LoadBinaryHeader(DUtils::BinaryFile &f) 590 | { 591 | f.DiscardNextByte(); // magic word 592 | 593 | int voctype, weighting, scoring, scalescore, nwords, ndesc; 594 | 595 | f >> voctype >> weighting >> scoring >> scalescore >> ndesc >> nwords 596 | >> m_frequent_words_stopped 597 | >> m_infrequent_words_stopped; 598 | 599 | m_params->Type = (VocParams::VocType)voctype; 600 | m_params->Weighting = (VocParams::WeightingType)weighting; 601 | m_params->Scoring = (VocParams::ScoringType)scoring; 602 | m_params->ScaleScore = (scalescore != 0); 603 | m_params->DescriptorLength = ndesc; 604 | 605 | return nwords; 606 | } 607 | 608 | int Vocabulary::LoadTextHeader(fstream &f) 609 | { 610 | int voctype, weighting, scoring, scalescore, nwords, ndesc; 611 | 612 | f >> voctype >> weighting >> scoring >> scalescore >> ndesc >> nwords 613 | >> m_frequent_words_stopped 614 | >> m_infrequent_words_stopped; 615 | 616 | m_params->Type = (VocParams::VocType)voctype; 617 | m_params->Weighting = (VocParams::WeightingType)weighting; 618 | m_params->Scoring = (VocParams::ScoringType)scoring; 619 | m_params->ScaleScore = (scalescore != 0); 620 | m_params->DescriptorLength = ndesc; 621 | 622 | return nwords; 623 | } 624 | 625 | -------------------------------------------------------------------------------- /DBow/Vocabulary.h: -------------------------------------------------------------------------------- 1 | /** 2 | * File: Vocabulary.h 3 | * Date: April 2010 4 | * Author: Dorian Galvez 5 | * Description: generic vocabulary that must be inherited 6 | */ 7 | 8 | #pragma once 9 | #ifndef __VOCABULARY__ 10 | #define __VOCABULARY__ 11 | 12 | #include "VocParams.h" 13 | #include "VocInfo.h" 14 | #include "BowVector.h" 15 | #include "DUtils.h" 16 | 17 | namespace DBow { 18 | 19 | class Database; 20 | 21 | class Vocabulary 22 | { 23 | public: 24 | 25 | /** 26 | * Creates an empty vocabulary with the given parameters 27 | * @param params vocbulary parameters 28 | */ 29 | Vocabulary(const VocParams ¶ms); 30 | 31 | /** 32 | * Copy constructor. Allocates new data 33 | * @param voc vocabulary to copy 34 | */ 35 | Vocabulary(const Vocabulary &voc); 36 | 37 | /** 38 | * Loads a stored vocabulary 39 | */ 40 | Vocabulary(const char *filename); 41 | 42 | /** Destructor 43 | */ 44 | virtual ~Vocabulary(void); 45 | 46 | /** 47 | * Saves the current vocabulary in filename. 48 | * The vocabulary can be saved in binary format (improves size and speed) 49 | * or in text format (good for interoperability). 50 | * Note that training data is not saved. 51 | * @param filename file to store the vocabulary in 52 | * @param binary (default: true): sets if binary format must be used 53 | */ 54 | void Save(const char *filename, bool binary = true) const; 55 | 56 | /** 57 | * Loads a stored vocabulary (except for its training data). 58 | * The current vocabulary is cleared. 59 | * @param filename file to load 60 | * @return number of bytes or chars read 61 | */ 62 | unsigned int Load(const char *filename); 63 | 64 | /** 65 | * Returns whether the vocabulary is empty or has already been created 66 | */ 67 | inline bool isEmpty() const { return !m_created; } 68 | 69 | /** 70 | * Creates the vocabulary from some training data. 71 | * The current content of the vocabulary is cleared 72 | * @param training_features vector of groups of features in the OpenCV format. 73 | * Each feature group represents a different source of features. 74 | * This is necessary for some weighting methods, like tf-idf 75 | */ 76 | virtual void Create(const vector >& training_features) = 0; 77 | 78 | /** 79 | * Transforms a set of image features into a bag-of-words vector 80 | * according to the current vocabulary. 81 | * Stopped words must not be returned 82 | * @see Vocabulary::isWordStopped 83 | * @param features image features in the OpenCV format 84 | * @param v (out) bow vector 85 | * @param arrange (default: true) iif true, puts entries in v in order. 86 | * This is necessary if v is going to be used with Vocabulary::Score. 87 | * If not (is only used with Database), setting arrange to false can 88 | * slightly save some time 89 | */ 90 | void Transform(const vector& features, BowVector &v, bool arrange = true) const; 91 | 92 | /** 93 | * Returns the number of words in the vocabulary 94 | * @return number of words 95 | */ 96 | inline int NumberOfWords() const 97 | { 98 | if(m_created) return GetNumberOfWords(); 99 | else return 0; 100 | } 101 | 102 | /** 103 | * Stops frequent or infrequent words from the vocabulary. 104 | * When a word is stopped, bow vectors returned by later ::Transform calls 105 | * will not contain it. BowVectors created before stopping words may still 106 | * contain stopped words, so that scores calculated from them will not 107 | * take stopped words into account. Therefore, for correct scoring, 108 | * bow vectors must be calculated after stopping words. 109 | * Calls to ::StopWords do not stack, so that words can be considered 110 | * again by calling ::StopWords with lower frequencies. 111 | * Particularly, StopWords(0,0) disables the stop list. 112 | * 113 | * @param frequent_words how many frequent words must be stopped, 114 | * in per one units 115 | * @param infrequent_words (default 0) how many infrequent words must be stopped, 116 | * in per one units 117 | */ 118 | void StopWords(float frequent_words, float infrequent_words = 0); 119 | 120 | /** 121 | * Stops frequent or infrequent words from the vocabulary. 122 | * @see ::StopWords(float, float) 123 | * @param frequent_words how many frequent words must be stopped 124 | * @param infrequent_words how many infrequent words must be stopped 125 | */ 126 | void StopWords(int frequent_words, int infrequent_words = 0); 127 | 128 | /** 129 | * Retrieves all the information about the vocabulary 130 | * @return information 131 | */ 132 | VocInfo RetrieveInfo() const; 133 | 134 | /** 135 | * Gets the weighting method 136 | * @return weighting method 137 | */ 138 | inline VocParams::WeightingType Weighting() const { 139 | return m_params->Weighting; 140 | } 141 | 142 | /** 143 | * Gets the scoring method 144 | * @return scoring method 145 | */ 146 | inline VocParams::ScoringType Scoring() const { 147 | return m_params->Scoring; 148 | } 149 | 150 | /** 151 | * Returns the score between two vectors according to this voc. 152 | * BowVectors must be in order of ids 153 | * @param v the first bow vector 154 | * @param w the second one 155 | * @return score 156 | */ 157 | double Score(const BowVector &v, const BowVector &w) const; 158 | 159 | protected: 160 | 161 | /** 162 | * Saves the vocabulary in binary format. Must be implemented 163 | * by subclasses. 164 | * @param filename file to store the vocabulary in 165 | */ 166 | virtual void SaveBinary(const char *filename) const = 0; 167 | 168 | /** 169 | * Saves the vocabulary in text format. Must be implemented 170 | * by subclasses. 171 | * @param filename file to store the vocabulary in 172 | */ 173 | virtual void SaveText(const char *filename) const = 0; 174 | 175 | /** 176 | * Loads the vocabulary in binary format. Must be implemented 177 | * by subclasses. 178 | * @param filename file to read the vocabulary from 179 | * @return number of bytes read 180 | */ 181 | virtual unsigned int LoadBinary(const char *filename) = 0; 182 | 183 | /** 184 | * Loads the vocabulary in text format. Must be implemented 185 | * by subclasses. 186 | * @param filename file to read the vocabulary from 187 | * @return number of chars read 188 | */ 189 | virtual unsigned int LoadText(const char *filename) = 0; 190 | 191 | /** 192 | * Transforms a feature into its word id 193 | * @param feature descriptor. Pointer to the beginning of a DescriptorLenght 194 | * size vector containing the feature descriptor 195 | * @return word id 196 | */ 197 | virtual WordId Transform(const vector::const_iterator &pfeature) const = 0; 198 | 199 | /** 200 | * Returns the weight of a word 201 | * @param id word id 202 | * @return word weight 203 | */ 204 | virtual WordValue GetWordWeight(WordId id) const = 0; 205 | 206 | /** 207 | * Returns the number of words in the vocabulary 208 | * (must not check m_created) 209 | * @return number of words 210 | */ 211 | virtual int GetNumberOfWords() const = 0; 212 | 213 | /** 214 | * Returns the frequency of a word 215 | * @param id word id 216 | * @return word frequency 217 | */ 218 | float GetWordFrequency(WordId id) const; 219 | 220 | /** 221 | * Checks if a given word is stopped 222 | * @return true iif the word is stopped 223 | */ 224 | inline bool isWordStopped(WordId id) const { 225 | return(id < m_word_stopped.size() && m_word_stopped[id]); 226 | } 227 | 228 | /** 229 | * Saves a header with vocabulary info in binary format 230 | * @param f (in/out) file 231 | */ 232 | void SaveBinaryHeader(DUtils::BinaryFile &f) const; 233 | 234 | /** 235 | * Saves a header with vocabulary info in binary text 236 | * @param f (in/out) file 237 | */ 238 | void SaveTextHeader(fstream &f) const; 239 | 240 | /** 241 | * Loads header with vocabulary info in binary format 242 | * @param f (in/out) file 243 | * @return number of words 244 | */ 245 | int LoadBinaryHeader(DUtils::BinaryFile &f); 246 | 247 | /** 248 | * Loads a header with vocabulary info in binary format 249 | * @param f (in/out) file 250 | * @return number of words 251 | */ 252 | int LoadTextHeader(fstream &f); 253 | 254 | protected: 255 | 256 | /** 257 | * Calculates the weights of all the words in the vocabulary. 258 | * This function must be called in ::Create after creating the vocabulary 259 | * @param training_features features used to create the vocabulary 260 | * (same format as in ::Create) 261 | * @param weigths (out) vector such that weights[WordId] = weight 262 | */ 263 | void GetWordWeightsAndCreateStopList(const vector >& training_features, 264 | vector &weights); 265 | 266 | /** 267 | * Creates an empty stop list with the word frequencies given. 268 | * m_word_frequency must be filled for all the words in the vocabulary. 269 | * This function is called by GetWordWeightsAndCreateStopList, but 270 | * can be also called by subclasses if they have filled m_word_frequency. 271 | */ 272 | void CreateStopList(); 273 | 274 | protected: 275 | 276 | // Says if the vocabulary was already created 277 | // Must be flagged by subclasses 278 | bool m_created; 279 | 280 | // Number of words stopped 281 | int m_frequent_words_stopped; 282 | int m_infrequent_words_stopped; 283 | 284 | // Stores the frequency of each word: m_word_frequency[word_id] = fr 285 | vector m_word_frequency; 286 | 287 | private: 288 | 289 | // Vocabulary parameters 290 | VocParams *m_params; 291 | 292 | // Allows quick access to check if a word is stopped 293 | // m_word_stopped[word_id] = true/false 294 | vector m_word_stopped; 295 | 296 | // Stores ids of currently stopped words 297 | vector m_stop_list; 298 | 299 | // Stores the ids of the words in order of ascending frequency 300 | vector m_words_in_order; 301 | 302 | }; 303 | 304 | } 305 | 306 | #endif 307 | 308 | -------------------------------------------------------------------------------- /DBowLibAndDemo.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 10.00 3 | # Visual C++ Express 2008 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DUtils", "DUtils\DUtils.vcproj", "{908E3813-0881-4967-AADD-710B987867D5}" 5 | EndProject 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DBow", "DBow\DBow.vcproj", "{0017AE01-4E95-4ED9-98E7-D1225894F01C}" 7 | ProjectSection(ProjectDependencies) = postProject 8 | {908E3813-0881-4967-AADD-710B987867D5} = {908E3813-0881-4967-AADD-710B987867D5} 9 | EndProjectSection 10 | EndProject 11 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Demo", "Demo\Demo.vcproj", "{6A18314B-D5FC-4A05-A0A1-43C077E4C948}" 12 | ProjectSection(ProjectDependencies) = postProject 13 | {0017AE01-4E95-4ED9-98E7-D1225894F01C} = {0017AE01-4E95-4ED9-98E7-D1225894F01C} 14 | {908E3813-0881-4967-AADD-710B987867D5} = {908E3813-0881-4967-AADD-710B987867D5} 15 | EndProjectSection 16 | EndProject 17 | Global 18 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 19 | Debug|Win32 = Debug|Win32 20 | Release|Win32 = Release|Win32 21 | EndGlobalSection 22 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 23 | {908E3813-0881-4967-AADD-710B987867D5}.Debug|Win32.ActiveCfg = Debug|Win32 24 | {908E3813-0881-4967-AADD-710B987867D5}.Debug|Win32.Build.0 = Debug|Win32 25 | {908E3813-0881-4967-AADD-710B987867D5}.Release|Win32.ActiveCfg = Release|Win32 26 | {908E3813-0881-4967-AADD-710B987867D5}.Release|Win32.Build.0 = Release|Win32 27 | {0017AE01-4E95-4ED9-98E7-D1225894F01C}.Debug|Win32.ActiveCfg = Debug|Win32 28 | {0017AE01-4E95-4ED9-98E7-D1225894F01C}.Debug|Win32.Build.0 = Debug|Win32 29 | {0017AE01-4E95-4ED9-98E7-D1225894F01C}.Release|Win32.ActiveCfg = Release|Win32 30 | {0017AE01-4E95-4ED9-98E7-D1225894F01C}.Release|Win32.Build.0 = Release|Win32 31 | {6A18314B-D5FC-4A05-A0A1-43C077E4C948}.Debug|Win32.ActiveCfg = Debug|Win32 32 | {6A18314B-D5FC-4A05-A0A1-43C077E4C948}.Debug|Win32.Build.0 = Debug|Win32 33 | {6A18314B-D5FC-4A05-A0A1-43C077E4C948}.Release|Win32.ActiveCfg = Release|Win32 34 | {6A18314B-D5FC-4A05-A0A1-43C077E4C948}.Release|Win32.Build.0 = Release|Win32 35 | EndGlobalSection 36 | GlobalSection(SolutionProperties) = preSolution 37 | HideSolutionNode = FALSE 38 | EndGlobalSection 39 | EndGlobal 40 | -------------------------------------------------------------------------------- /DUtils/BinaryFile.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * File: BinaryFile.cpp 3 | * Project: DUtils library 4 | * Author: Dorian Galvez 5 | * Date: April 2010 6 | * Description: reads and writes binary files in network byte order. 7 | * Manages endianness and data size automatically. 8 | */ 9 | 10 | #include "FileModes.h" 11 | #include "BinaryFile.h" 12 | 13 | #ifdef WIN32 14 | #include 15 | #else 16 | #include 17 | #endif 18 | 19 | #ifdef _MSC_VER 20 | // Microsoft Visual Studio does not ship stdint.h 21 | typedef __int32 int32_t; 22 | typedef unsigned __int32 uint32_t; 23 | typedef __int64 int64_t; 24 | typedef unsigned __int32 uint64_t; 25 | #else 26 | #include 27 | #endif 28 | 29 | 30 | using namespace DUtils; 31 | 32 | BinaryFile::BinaryFile(void): m_is_little_endian(-1) 33 | { 34 | setEndianness(); 35 | } 36 | 37 | BinaryFile::~BinaryFile(void) 38 | { 39 | Close(); 40 | } 41 | 42 | void BinaryFile::Close() 43 | { 44 | if(m_f.is_open()) m_f.close(); 45 | } 46 | 47 | BinaryFile::BinaryFile(const char *filename, const FILE_MODES mode) 48 | { 49 | Init(filename, mode); 50 | } 51 | 52 | BinaryFile::BinaryFile(const string &filename, const FILE_MODES mode) 53 | { 54 | Init(filename.c_str(), mode); 55 | } 56 | 57 | void BinaryFile::Init(const char *filename, const FILE_MODES mode) 58 | { 59 | m_is_little_endian = -1; 60 | setEndianness(); 61 | 62 | if(mode & READ){ 63 | OpenForReading(filename); 64 | }else if((mode & WRITE) && (mode & APPEND)){ 65 | OpenForAppending(filename); 66 | }else if(mode & WRITE){ 67 | OpenForWriting(filename); 68 | }else{ 69 | throw DException("Wrong access mode"); 70 | } 71 | } 72 | 73 | void BinaryFile::OpenForReading(const char *filename) 74 | { 75 | Close(); 76 | 77 | m_f.open(filename, ios::in | ios::binary); 78 | if(!m_f.is_open()){ 79 | throw DException(string("Cannot open ") + filename + " for reading"); 80 | }else{ 81 | m_mode = READ; 82 | } 83 | } 84 | 85 | void BinaryFile::OpenForWriting(const char *filename) 86 | { 87 | Close(); 88 | 89 | m_f.open(filename, ios::out | ios::binary); 90 | if(!m_f.is_open()){ 91 | throw DException(string("Cannot open ") + filename + " for writing"); 92 | }else{ 93 | m_mode = WRITE; 94 | } 95 | } 96 | 97 | void BinaryFile::OpenForAppending(const char *filename) 98 | { 99 | Close(); 100 | 101 | m_f.open(filename, ios::out | ios::app | ios::binary); 102 | if(!m_f.is_open()){ 103 | throw DException(string("Cannot open ") + filename + " for writing at the end"); 104 | }else{ 105 | m_mode = DUtils::FILE_MODES(WRITE | APPEND); 106 | } 107 | } 108 | 109 | void BinaryFile::DiscardBytes(int count) 110 | { 111 | if(!m_f.is_open()) throw DException("File is not open"); 112 | 113 | if(m_mode & READ){ 114 | m_f.ignore(count); 115 | }else 116 | throw DException("Wrong access mode"); 117 | } 118 | 119 | inline bool BinaryFile::Eof() 120 | { 121 | return(!m_f.is_open() || m_f.eof()); 122 | } 123 | 124 | unsigned int BinaryFile::BytesRead() 125 | { 126 | if(m_mode & READ){ 127 | return (unsigned int)m_f.tellg(); 128 | }else 129 | throw DException("Wrong access mode"); 130 | } 131 | 132 | BinaryFile& BinaryFile::operator<< (char v) 133 | { 134 | if(!m_f.is_open()) throw DException("File is not open"); 135 | 136 | if(m_mode & WRITE){ 137 | m_f.write(&v, 1); 138 | }else 139 | throw DException("Wrong access mode"); 140 | 141 | return *this; 142 | } 143 | 144 | BinaryFile& BinaryFile::operator<< (int v) 145 | { 146 | if(!m_f.is_open()) throw DException("File is not open"); 147 | 148 | if(m_mode & WRITE){ 149 | uint32_t w = htonl(v); 150 | m_f.write((const char *)&w, 4); 151 | }else 152 | throw DException("Wrong access mode"); 153 | 154 | return *this; 155 | } 156 | 157 | BinaryFile& BinaryFile::operator<< (float v) 158 | { 159 | if(!m_f.is_open()) throw DException("File is not open"); 160 | 161 | if(m_mode & WRITE){ 162 | hton_f(v, m_aux); 163 | m_f.write(m_aux, 4); 164 | }else 165 | throw DException("Wrong access mode"); 166 | 167 | return *this; 168 | } 169 | 170 | BinaryFile& BinaryFile::operator<< (double v) 171 | { 172 | if(!m_f.is_open()) throw DException("File is not open"); 173 | 174 | if(m_mode & WRITE){ 175 | hton_d(v, m_aux); 176 | m_f.write(m_aux, 8); 177 | }else 178 | throw DException("Wrong access mode"); 179 | 180 | return *this; 181 | } 182 | 183 | BinaryFile& BinaryFile::operator>>(char &v) 184 | { 185 | if(!m_f.is_open()) throw DException("File is not open"); 186 | 187 | if(m_mode & READ){ 188 | m_f.read(&v, 1); 189 | }else 190 | throw DException("Wrong access mode"); 191 | 192 | return *this; 193 | } 194 | 195 | BinaryFile& BinaryFile::operator>>(int &v) 196 | { 197 | if(!m_f.is_open()) throw DException("File is not open"); 198 | 199 | if(m_mode & READ){ 200 | m_f.read(m_aux, 4); 201 | uint32_t w = *((uint32_t*)&m_aux[0]); 202 | v = (int)htonl(w); 203 | }else 204 | throw DException("Wrong access mode"); 205 | 206 | return *this; 207 | } 208 | 209 | BinaryFile& BinaryFile::operator>>(float &v) 210 | { 211 | if(!m_f.is_open()) throw DException("File is not open"); 212 | 213 | if(m_mode & READ){ 214 | m_f.read(m_aux, 4); 215 | v = ntoh_f(m_aux); 216 | }else 217 | throw DException("Wrong access mode"); 218 | 219 | return *this; 220 | } 221 | 222 | BinaryFile& BinaryFile::operator>>(double &v) 223 | { 224 | if(!m_f.is_open()) throw DException("File is not open"); 225 | 226 | if(m_mode & READ){ 227 | m_f.read(m_aux, 8); 228 | v = ntoh_d(m_aux); 229 | }else 230 | throw DException("Wrong access mode"); 231 | 232 | return *this; 233 | } 234 | 235 | 236 | void BinaryFile::hton_f(float v, char buf[8]) const 237 | { 238 | unsigned char *w = (unsigned char *)&v; 239 | 240 | // network order is big endian 241 | if(isLittleEndian()){ 242 | buf[0] = w[3]; 243 | buf[1] = w[2]; 244 | buf[2] = w[1]; 245 | buf[3] = w[0]; 246 | }else{ 247 | buf[0] = w[0]; 248 | buf[1] = w[1]; 249 | buf[2] = w[2]; 250 | buf[3] = w[3]; 251 | } 252 | } 253 | 254 | float BinaryFile::ntoh_f(char buf[8]) const 255 | { 256 | float v; 257 | unsigned char *w = (unsigned char*)&v; 258 | 259 | // network order is big endian 260 | if(isLittleEndian()){ 261 | w[3] = buf[0]; 262 | w[2] = buf[1]; 263 | w[1] = buf[2]; 264 | w[0] = buf[3]; 265 | }else{ 266 | w[0] = buf[0]; 267 | w[1] = buf[1]; 268 | w[2] = buf[2]; 269 | w[3] = buf[3]; 270 | } 271 | 272 | return v; 273 | } 274 | 275 | void BinaryFile::hton_d(double v, char buf[8]) const 276 | { 277 | unsigned char *w = (unsigned char *)&v; 278 | 279 | // network order is big endian 280 | if(isLittleEndian()){ 281 | buf[0] = w[7]; 282 | buf[1] = w[6]; 283 | buf[2] = w[5]; 284 | buf[3] = w[4]; 285 | buf[4] = w[3]; 286 | buf[5] = w[2]; 287 | buf[6] = w[1]; 288 | buf[7] = w[0]; 289 | }else{ 290 | buf[0] = w[0]; 291 | buf[1] = w[1]; 292 | buf[2] = w[2]; 293 | buf[3] = w[3]; 294 | buf[4] = w[4]; 295 | buf[5] = w[5]; 296 | buf[6] = w[6]; 297 | buf[7] = w[7]; 298 | } 299 | } 300 | 301 | double BinaryFile::ntoh_d(char buf[8]) const 302 | { 303 | double v; 304 | unsigned char *w = (unsigned char*)&v; 305 | 306 | // network order is big endian 307 | if(isLittleEndian()){ 308 | w[7] = buf[0]; 309 | w[6] = buf[1]; 310 | w[5] = buf[2]; 311 | w[4] = buf[3]; 312 | w[3] = buf[4]; 313 | w[2] = buf[5]; 314 | w[1] = buf[6]; 315 | w[0] = buf[7]; 316 | }else{ 317 | w[0] = buf[0]; 318 | w[1] = buf[1]; 319 | w[2] = buf[2]; 320 | w[3] = buf[3]; 321 | w[4] = buf[4]; 322 | w[5] = buf[5]; 323 | w[6] = buf[6]; 324 | w[7] = buf[7]; 325 | } 326 | 327 | return v; 328 | } 329 | 330 | void BinaryFile::setEndianness() 331 | { 332 | if(m_is_little_endian == -1){ 333 | char SwapTest[2] = { 1, 0 }; 334 | m_is_little_endian = (*(short *) SwapTest == 1 ? 1 : 0); 335 | } 336 | } 337 | 338 | -------------------------------------------------------------------------------- /DUtils/BinaryFile.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: BinaryFile.h 3 | * Project: DUtils library 4 | * Author: Dorian Galvez 5 | * Date: April 2010 6 | * Description: reads and writes binary files in network byte order. 7 | * Manages endianness and data size automatically. 8 | */ 9 | 10 | #pragma once 11 | #ifndef __D_BINARY_FILE__ 12 | #define __D_BINARY_FILE__ 13 | 14 | #include "DException.h" 15 | #include "FileModes.h" 16 | #include 17 | using namespace std; 18 | 19 | namespace DUtils { 20 | 21 | class BinaryFile 22 | { 23 | public: 24 | 25 | /* Creates a binary file with no file 26 | */ 27 | BinaryFile(void); 28 | 29 | /* Closes any opened file 30 | */ 31 | ~BinaryFile(void); 32 | 33 | /* Creates a binary file by opening a file 34 | * @param filename 35 | * @param mode: READ or WRITE 36 | * @throws DException if cannot open the file 37 | */ 38 | BinaryFile(const char *filename, const FILE_MODES mode); 39 | BinaryFile(const string &filename, const FILE_MODES mode); 40 | 41 | /* Opens a file for reading. It closes any other opened file 42 | * @param filename 43 | * @throws DException if cannot open the file 44 | */ 45 | void OpenForReading(const char *filename); 46 | inline void OpenForReading(const string &filename) 47 | { 48 | OpenForReading(filename.c_str()); 49 | } 50 | 51 | /* Opens a file for writing. It closes any other opened file 52 | * @param filename 53 | * @throws DException if cannot create the file 54 | */ 55 | void OpenForWriting(const char *filename); 56 | inline void OpenForWriting(const string &filename) 57 | { 58 | OpenForWriting(filename.c_str()); 59 | } 60 | 61 | /* Opens a file for writing at the end. It closes any other opened file 62 | * @param filename 63 | * @throws DException if cannot open the file 64 | */ 65 | void OpenForAppending(const char *filename); 66 | inline void OpenForAppending(const string &filename) 67 | { 68 | OpenForAppending(filename.c_str()); 69 | } 70 | 71 | /* Says whether the end of the file has been reached. Requires to 72 | * read the end of the file to return true 73 | * @return true iif the end of the file has been already read 74 | * @throws DException if wrong access mode 75 | */ 76 | inline bool Eof(); 77 | 78 | /* Closes any opened file. It is not necessary to call this function 79 | * explicitly 80 | */ 81 | void Close(); 82 | 83 | /** 84 | * Reads the next byte and throws it away 85 | * @throws DException if wrong access mode 86 | */ 87 | inline void DiscardNextByte(){ 88 | DiscardBytes(1); 89 | } 90 | 91 | /** 92 | * Reads n bytes and discards them 93 | * @param count number of bytes to discard 94 | * @throws DException if wrong access mode 95 | */ 96 | void DiscardBytes(int count); 97 | 98 | /** 99 | * Returns the number of bytes read in reading mode 100 | * @return number of bytes read 101 | */ 102 | unsigned int BytesRead(); 103 | 104 | /* Writes a byte char 105 | * @throws DException if wrong access mode 106 | */ 107 | BinaryFile& operator<< (char v); 108 | 109 | /* Writes a 4 byte integer value 110 | * @throws DException if wrong access mode 111 | */ 112 | BinaryFile& operator<< (int v); 113 | 114 | /* Writes a 4 byte float value 115 | * @throws DException if wrong access mode 116 | */ 117 | BinaryFile& operator<< (float v); 118 | 119 | /* Writes a 8 byte float value 120 | * @throws DException if wrong access mode 121 | */ 122 | BinaryFile& operator<< (double v); 123 | 124 | /* Reads a byte char 125 | * @throws DException if wrong access mode 126 | */ 127 | BinaryFile& operator>>(char &v); 128 | 129 | /* Reads a 4 byte integer value 130 | * @throws DException if wrong access mode 131 | */ 132 | BinaryFile& operator>>(int &v); 133 | 134 | /* Reads a 4 byte float value 135 | * @throws DException if wrong access mode 136 | */ 137 | BinaryFile& operator>>(float &v); 138 | 139 | /* Reads a 8 byte float value 140 | * @throws DException if wrong access mode 141 | */ 142 | BinaryFile& operator>>(double &v); 143 | 144 | protected: 145 | 146 | /** 147 | * Initializes the object by opening a file 148 | * @param filename file to open 149 | * @param mode opening mode 150 | * @throws DException if cannot open the file 151 | */ 152 | void Init(const char *filename, const FILE_MODES mode); 153 | 154 | /** 155 | * Checks the endianness of this machine 156 | */ 157 | void setEndianness(); 158 | 159 | /** 160 | * Converts a float into 4 bytes in network order 161 | * @param v float value 162 | * @param buf (out) byte buffer output. Only buf[0..3] is used 163 | */ 164 | void hton_f(float v, char buf[8]) const; 165 | 166 | /** 167 | * Converts a double into 8 bytes in network order 168 | * @param v double value 169 | * @param buf (out) byte buffer output 170 | */ 171 | void hton_d(double d, char buf[8]) const; 172 | 173 | /** 174 | * Converts an array of bytes in network order into a 4 byte float 175 | * @param buf byte array. only buf[0..3] is used 176 | * @return float value 177 | */ 178 | float ntoh_f(char buf[8]) const; 179 | 180 | /** 181 | * Converts an array of bytes in network order into a 8 byte double 182 | * @param buf byte array 183 | * @return double value 184 | */ 185 | double ntoh_d(char buf[8]) const; 186 | 187 | /** 188 | * Returns if this machine uses little endian 189 | * @return true iif little endian 190 | */ 191 | inline bool isLittleEndian() const 192 | { 193 | return (m_is_little_endian == 1 ? true : false); 194 | } 195 | 196 | protected: 197 | FILE_MODES m_mode; // opening mode 198 | fstream m_f; // fstream 199 | char m_aux[8]; // auxiliar buffer 200 | 201 | // current machine endianness 202 | int m_is_little_endian; // 1: little endian, 0: big endian, -1: not set 203 | 204 | }; 205 | 206 | } 207 | 208 | #endif 209 | -------------------------------------------------------------------------------- /DUtils/DException.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: DException.h 3 | * Project: DUtils library 4 | * Author: Dorian Galvez 5 | * Date: October 6, 2009 6 | * Description: general exception of the library 7 | * 8 | */ 9 | 10 | #pragma once 11 | 12 | #ifndef __D_EXCEPTION__ 13 | #define __D_EXCEPTION__ 14 | 15 | #include 16 | #include 17 | using namespace std; 18 | 19 | namespace DUtils { 20 | 21 | class DException : 22 | public exception 23 | { 24 | public: 25 | /* Creates an exception with a general error message 26 | */ 27 | DException(void) throw(): m_message("DUtils exception"){} 28 | 29 | /* Creates an exception with a custom error message 30 | * @param msg: message 31 | */ 32 | DException(const char *msg) throw(): m_message(msg){} 33 | DException(const string &msg) throw(): m_message(msg){} 34 | 35 | ~DException(void) throw(){} 36 | 37 | /* Returns the exception message 38 | * @overrides exception::what 39 | */ 40 | virtual const char* what() const throw() 41 | { 42 | return m_message.c_str(); 43 | } 44 | 45 | protected: 46 | string m_message; 47 | }; 48 | 49 | } 50 | 51 | #endif 52 | 53 | -------------------------------------------------------------------------------- /DUtils/DUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: DUtils.h 3 | * Project: DUtils library 4 | * Author: Dorian Galvez 5 | * Date: October 6, 2009 6 | * Description: include file for including all the library functionalities 7 | * 8 | */ 9 | 10 | #pragma once 11 | 12 | #ifndef __D_UTILS__ 13 | #define __D_UTILS__ 14 | 15 | // Exception 16 | #include "DException.h" 17 | 18 | // Files 19 | #include "FileModes.h" 20 | #include "LineFile.h" 21 | #include "BinaryFile.h" 22 | #include "FileFunctions.h" 23 | 24 | // Timestamp 25 | #include "Timestamp.h" 26 | 27 | // Random numbers 28 | #include "Random.h" 29 | 30 | // MAth 31 | #include "Math.hpp" 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /DUtils/DUtils.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 26 | 29 | 32 | 35 | 38 | 41 | 53 | 56 | 59 | 62 | 67 | 70 | 73 | 76 | 79 | 82 | 83 | 91 | 94 | 97 | 100 | 103 | 106 | 118 | 121 | 124 | 127 | 132 | 135 | 138 | 141 | 144 | 147 | 148 | 149 | 150 | 151 | 152 | 155 | 158 | 159 | 160 | 163 | 166 | 167 | 170 | 171 | 174 | 175 | 178 | 179 | 182 | 183 | 186 | 187 | 190 | 191 | 192 | 195 | 198 | 199 | 202 | 203 | 204 | 207 | 210 | 211 | 214 | 215 | 216 | 219 | 222 | 223 | 224 | 227 | 228 | 229 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /DUtils/FileFunctions.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * File: FileFunctions.cpp 3 | * Author: Dorian Galvez 4 | * Date: June 2009 5 | * Description: file system functions 6 | */ 7 | 8 | #include "FileFunctions.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #ifdef WIN32 15 | #include 16 | #include "dirent_win.h" 17 | #define mkdir(a) _mkdir(a) 18 | #else 19 | #include 20 | #include 21 | #endif 22 | 23 | using namespace std; 24 | 25 | void FileFunctions::MkDir(const char *path) 26 | { 27 | mkdir(path); 28 | } 29 | 30 | void FileFunctions::RmDir(const char *path) 31 | { 32 | // empty path 33 | vector files = FileFunctions::Dir(path, ""); 34 | for(vector::iterator it = files.begin(); it != files.end(); it++){ 35 | remove(it->c_str()); 36 | } 37 | rmdir(path); 38 | } 39 | 40 | void FileFunctions::RmFile(const char *path) 41 | { 42 | remove(path); 43 | } 44 | 45 | bool FileFunctions::FileExists(const char *filename) 46 | { 47 | std::fstream f(filename, ios::in); 48 | 49 | if(f.is_open()){ 50 | f.close(); 51 | return true; 52 | }else 53 | return false; 54 | } 55 | 56 | bool FileFunctions::DirExists(const char *path) 57 | { 58 | DIR *dirp; 59 | if(dirp = opendir(path)){ 60 | closedir(dirp); 61 | return true; 62 | }else 63 | return false; 64 | } 65 | 66 | std::vector FileFunctions::Dir(const char *path, const char *right) 67 | { 68 | DIR *dirp; 69 | struct dirent *entry; 70 | vector ret; 71 | 72 | if(dirp = opendir(path)){ 73 | while(entry = readdir(dirp)){ 74 | string name(entry->d_name); 75 | string r(right); 76 | if((name.length() >= r.length()) && 77 | (name.substr(name.length() - r.length()).compare(r) == 0)) 78 | { 79 | ret.push_back(string(path) + "/" + entry->d_name); 80 | } 81 | } 82 | closedir(dirp); 83 | } 84 | return ret; 85 | } 86 | std::string FileFunctions::FileName(const std::string filepath) 87 | { 88 | string::size_type p = filepath.find_last_of('/'); 89 | string::size_type p2 = filepath.find_last_of('\\'); 90 | if(p2 != string::npos && p2 > p) p = p2; 91 | return filepath.substr(p+1); 92 | } 93 | 94 | void FileFunctions::FileParts(const std::string filepath, std::string &path, 95 | std::string &filename, std::string &ext) 96 | { 97 | string::size_type p = filepath.find_last_of('/'); 98 | string::size_type p2 = filepath.find_last_of('\\'); 99 | if(p == string::npos || (p2 != string::npos && p2 > p)) p = p2; 100 | 101 | std::string filext; 102 | 103 | if(p == string::npos){ 104 | path = ""; 105 | filext = filepath; 106 | }else{ 107 | path = filepath.substr(0, p); 108 | filext = filepath.substr(p+1); 109 | } 110 | 111 | p = filext.find_last_of('.'); 112 | if(p == string::npos){ 113 | filename = filext; 114 | ext = ""; 115 | }else{ 116 | filename = filext.substr(0, p); 117 | ext = filext.substr(p+1); 118 | } 119 | } 120 | 121 | -------------------------------------------------------------------------------- /DUtils/FileFunctions.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: FileFunctions.h 3 | * Author: Dorian Galvez 4 | * Date: June 2009 5 | * Description: file system functions 6 | */ 7 | 8 | #ifndef __D_FILE_FUNCTIONS__ 9 | #define __D_FILE_FUNCTIONS__ 10 | 11 | #pragma once 12 | 13 | #include 14 | #include 15 | 16 | class FileFunctions 17 | { 18 | public: 19 | 20 | /* Creates the directory 'path'. The parent directory must exist 21 | * @param path 22 | */ 23 | static void MkDir(const char *path); 24 | 25 | /* Removes a directory and its content 26 | * @param path 27 | */ 28 | static void RmDir(const char *path); 29 | 30 | /* Removes a file 31 | * @param path 32 | */ 33 | static void RmFile(const char *path); 34 | 35 | /* Checks the existence of a folder 36 | * @return true iif the directory exists 37 | */ 38 | static bool DirExists(const char *path); 39 | 40 | /* Checks the existence of a file 41 | * @returns true iif the file exists 42 | */ 43 | static bool FileExists(const char *filename); 44 | 45 | /* Returns the relative path of the files located in the path given and 46 | * whose right path of the name matches 'right' 47 | * @param path: path to directory 48 | * @param right: string like "_L.png" 49 | * @return path list 50 | */ 51 | static std::vector Dir(const char *path, const char *right); 52 | 53 | /* Extracts the filename of the given path 54 | * @param file path 55 | * @return file name 56 | */ 57 | static std::string FileName(const std::string filepath); 58 | 59 | /* Extracts the path, file name and extension of the given path 60 | * @param filepath 61 | * @param path (out): path to file 62 | * @param filename (out): filename without extension or dot 63 | * @param ext (out): extension without dot 64 | */ 65 | static void FileParts(const std::string filepath, std::string &path, 66 | std::string &filename, std::string &ext); 67 | 68 | }; 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /DUtils/FileModes.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: FileModes.h 3 | * Project: DUtils library 4 | * Author: Dorian Galvez 5 | * Date: April 2010 6 | * Description: types used with file managers 7 | * 8 | */ 9 | 10 | #pragma once 11 | #ifndef __D_FILE_MODES__ 12 | #define __D_FILE_MODES__ 13 | 14 | namespace DUtils { 15 | 16 | enum FILE_MODES { 17 | READ = 1, 18 | WRITE = 2, 19 | APPEND = 4 20 | }; 21 | 22 | } 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /DUtils/LineFile.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * File: LineFile.cpp 3 | * Project: DUtils library 4 | * Author: Dorian Galvez 5 | * Date: October 6, 2009 6 | * Description: reads and writes text line files 7 | * 8 | */ 9 | 10 | #include "LineFile.h" 11 | #include "DException.h" 12 | #include "FileModes.h" 13 | #include 14 | #include 15 | using namespace std; 16 | 17 | using namespace DUtils; 18 | 19 | LineFile::LineFile(void): m_next_line("") 20 | { 21 | } 22 | 23 | LineFile::~LineFile(void) 24 | { 25 | Close(); 26 | } 27 | 28 | LineFile::LineFile(const char *filename, const FILE_MODES mode) 29 | { 30 | Init(filename, mode); 31 | } 32 | 33 | LineFile::LineFile(const string &filename, const FILE_MODES mode) 34 | { 35 | Init(filename.c_str(), mode); 36 | } 37 | 38 | void LineFile::Init(const char *filename, const FILE_MODES mode) 39 | { 40 | m_next_line = ""; 41 | 42 | if(mode & READ){ 43 | OpenForReading(filename); 44 | }else if((mode & WRITE) && (mode & APPEND)){ 45 | OpenForAppending(filename); 46 | }else if(mode & WRITE){ 47 | OpenForWriting(filename); 48 | }else{ 49 | throw DException("Wrong access mode"); 50 | } 51 | } 52 | 53 | void LineFile::OpenForReading(const char *filename) 54 | { 55 | m_f.open(filename, ios::in); 56 | if(!m_f.is_open()){ 57 | throw DException(string("Cannot open ") + filename + " for reading"); 58 | }else{ 59 | m_mode = READ; 60 | } 61 | } 62 | 63 | void LineFile::OpenForWriting(const char *filename) 64 | { 65 | m_f.open(filename, ios::out); 66 | if(!m_f.is_open()){ 67 | throw DException(string("Cannot open ") + filename + " for writing"); 68 | }else{ 69 | m_mode = WRITE; 70 | } 71 | } 72 | 73 | void LineFile::OpenForAppending(const char *filename) 74 | { 75 | m_f.open(filename, ios::out | ios::app); 76 | if(!m_f.is_open()){ 77 | throw DException(string("Cannot open ") + filename + " for writing"); 78 | }else{ 79 | m_mode = DUtils::FILE_MODES(WRITE | APPEND); 80 | } 81 | } 82 | 83 | void LineFile::Close() 84 | { 85 | if(m_f.is_open()) m_f.close(); 86 | } 87 | 88 | bool LineFile::Eof() 89 | { 90 | if(!m_f.is_open()) return true; 91 | 92 | if(m_mode & READ){ 93 | if(m_f.eof()) 94 | return true; 95 | else if(!m_next_line.empty()) 96 | return false; 97 | else{ 98 | getline(m_f, m_next_line); 99 | return m_f.eof(); 100 | } 101 | }else 102 | throw DException("Wrong access mode"); 103 | 104 | } 105 | 106 | LineFile& LineFile::operator<< (const char *s) 107 | { 108 | if(!m_f.is_open()) throw DException("File is not open"); 109 | 110 | if(m_mode & WRITE) 111 | m_f << s << endl; 112 | else 113 | throw DException("Wrong access mode"); 114 | 115 | return *this; 116 | } 117 | 118 | LineFile& LineFile::operator>> (string &s) 119 | { 120 | if(!m_f.is_open()) throw DException("File is not open"); 121 | 122 | if(m_mode & READ){ 123 | if(m_f.eof()){ 124 | s.clear(); 125 | }else if(!m_next_line.empty()){ 126 | s = m_next_line; 127 | m_next_line.clear(); 128 | }else{ 129 | getline(m_f, s); 130 | if(m_f.eof()){ 131 | s.clear(); 132 | } 133 | } 134 | 135 | }else 136 | throw DException("Wrong access mode"); 137 | 138 | return *this; 139 | } 140 | 141 | void LineFile::Dump(const vector &v) 142 | { 143 | if(!m_f.is_open()) throw DException("File is not open"); 144 | 145 | if(m_mode & WRITE){ 146 | vector::const_iterator it; 147 | for(it = v.begin(); it != v.end(); it++){ 148 | m_f << *it << endl; 149 | } 150 | }else 151 | throw DException("Wrong access mode"); 152 | } 153 | 154 | void LineFile::DiscardLine() 155 | { 156 | string nul; 157 | *this >> nul; 158 | } 159 | 160 | 161 | -------------------------------------------------------------------------------- /DUtils/LineFile.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: LineFile.h 3 | * Project: DUtils library 4 | * Author: Dorian Galvez 5 | * Date: October 6, 2009 6 | * Description: reads and writes text line files 7 | * 8 | */ 9 | 10 | #pragma once 11 | #ifndef __D_LINE_FILE__ 12 | #define __D_LINE_FILE__ 13 | 14 | #include "DException.h" 15 | #include "FileModes.h" 16 | #include 17 | #include 18 | using namespace std; 19 | 20 | namespace DUtils { 21 | 22 | class LineFile 23 | { 24 | public: 25 | /* Creates a linefile with no file 26 | */ 27 | LineFile(void); 28 | 29 | /* Closes any opened file 30 | */ 31 | ~LineFile(void); 32 | 33 | /* Creates a linefile by opening a file 34 | * @param filename 35 | * @param mode: READ or WRITE 36 | * @throws DException if cannot open the file 37 | */ 38 | LineFile(const char *filename, const FILE_MODES mode); 39 | LineFile(const string &filename, const FILE_MODES mode); 40 | 41 | /* Opens a file for reading. It closes any other opened file 42 | * @param filename 43 | * @throws DException if cannot create the file 44 | */ 45 | void OpenForReading(const char *filename); 46 | inline void OpenForReading(const string &filename) 47 | { 48 | OpenForReading(filename.c_str()); 49 | } 50 | 51 | /* Opens a file for writing. It closes any other opened file 52 | * @param filename 53 | * @throws DException if cannot create the file 54 | */ 55 | void OpenForWriting(const char *filename); 56 | inline void OpenForWriting(const string &filename) 57 | { 58 | OpenForWriting(filename.c_str()); 59 | } 60 | 61 | /* Opens a file for writing at the end. It closes any other opened file 62 | * @param filename 63 | * @throws DException if cannot open the file 64 | */ 65 | void OpenForAppending(const char *filename); 66 | inline void OpenForAppending(const string &filename) 67 | { 68 | OpenForAppending(filename.c_str()); 69 | } 70 | 71 | /* Says whether the end of the file has been reached. It is not necessary 72 | * to read a last empty line to reach eof 73 | * @return true iif there is nothing else to read 74 | * @thows DException if wrong access mode 75 | */ 76 | bool Eof(); 77 | 78 | /* Closes any opened file. It is not necessary to call this function 79 | * explicitly 80 | */ 81 | void Close(); 82 | 83 | /* Writes a line 84 | * @thows DException if wrong access mode 85 | */ 86 | LineFile& operator<< (const char *s); 87 | inline LineFile& operator<< (const string &s) 88 | { 89 | return this->operator <<(s.c_str()); 90 | } 91 | 92 | /* Reads a line 93 | * @param s[]: buffer to store the string. Memory must be allocated 94 | * @param s: string to write on 95 | * @thows DException if wrong access mode 96 | */ 97 | LineFile& operator>> (string &s); 98 | 99 | /* Writes several lines at a time 100 | * @v: vector of line strings 101 | * @throws DException if wrong access mode 102 | */ 103 | void Dump(const vector &v); 104 | inline LineFile& operator<< (const vector &v) 105 | { 106 | Dump(v); 107 | return *this; 108 | } 109 | 110 | /* In reading mode, reads and throws away the next line 111 | */ 112 | void DiscardLine(); 113 | 114 | protected: 115 | 116 | /** 117 | * Initializes the object by opening a file 118 | * @param filename file to open 119 | * @param mode opening mode 120 | * @throws DException if cannot open the file 121 | */ 122 | void Init(const char *filename, const FILE_MODES mode); 123 | 124 | protected: 125 | FILE_MODES m_mode; // opening mode 126 | fstream m_f; // fstream 127 | string m_next_line; // next line to read 128 | }; 129 | 130 | } 131 | 132 | #endif 133 | 134 | -------------------------------------------------------------------------------- /DUtils/Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | DEPS=BinaryFile.h DUtils.h LineFile.h Random.h Timestamp.h DException.h FileModes.h Math.hpp 3 | OBJS=BinaryFile.o LineFile.o Random.o Timestamp.o 4 | 5 | %.o: %.cpp $(DEPS) 6 | $(CC) -fPIC -O3 -Wall -c $< -o $@ 7 | 8 | libDUtils.so: $(OBJS) 9 | $(CC) $^ -shared -o $@ 10 | 11 | clean: 12 | rm -f *.o *.so 13 | 14 | 15 | -------------------------------------------------------------------------------- /DUtils/Math.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * File: Math.h 3 | * Project: DUtils library 4 | * Author: Dorian Galvez 5 | * Date: April 2010 6 | * Description: some math functions 7 | * 8 | */ 9 | 10 | #pragma once 11 | #ifndef __D_MATH__ 12 | #define __D_MATH__ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace DUtils { 20 | 21 | class Math { 22 | 23 | public: 24 | 25 | /* Returns the mean of a population 26 | * @param v 27 | */ 28 | template 29 | static double Mean(const std::vector &v) 30 | { 31 | if(v.empty()) 32 | return 0; 33 | else{ 34 | double sum = 0; 35 | typename std::vector::const_iterator it; 36 | for(it = v.begin(); it != v.end(); it++){ 37 | sum += *it; 38 | } 39 | return sum/double(v.size()); 40 | } 41 | } 42 | 43 | /* Returns the standard deviation of a population 44 | * @param v 45 | * @param mean (optional): the mean of the population 46 | */ 47 | template 48 | static double Stdev(const std::vector &v) 49 | { 50 | return Math::Stdev(v, Math::Mean(v)); 51 | } 52 | 53 | template 54 | static double Stdev(const std::vector &v, double mean) 55 | { 56 | if(v.size() <= 1) 57 | return 0; 58 | else{ 59 | // stdev = sqrt( Sum{ (x_i - mean)^2 } / (N-1) ) 60 | double sum = 0; 61 | typename std::vector::const_iterator it; 62 | for(it = v.begin(); it != v.end(); it++){ 63 | sum += pow(*it - mean, 2); 64 | } 65 | return sqrt(sum/double(v.size()-1)); 66 | } 67 | } 68 | 69 | }; 70 | 71 | } 72 | 73 | #endif 74 | 75 | -------------------------------------------------------------------------------- /DUtils/Random.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * File: Random.cpp 3 | * Project: DUtils library 4 | * Author: Dorian Galvez 5 | * Date: April 2010 6 | * Description: manages pseudo-random numbers 7 | * 8 | */ 9 | 10 | #include "Random.h" 11 | #include "Timestamp.h" 12 | #include 13 | using namespace std; 14 | 15 | using namespace DUtils; 16 | 17 | void Random::SeedRand(){ 18 | Timestamp time; 19 | time.setToCurrentTime(); 20 | srand((unsigned)time.getFloatTime()); 21 | } 22 | 23 | void Random::SeedRand(int seed) 24 | { 25 | srand(seed); 26 | } 27 | 28 | int Random::RandomInt(int min, int max){ 29 | int d = max - min + 1; 30 | return int(((double)rand()/((double)RAND_MAX + 1.0)) * d) + min; 31 | } 32 | -------------------------------------------------------------------------------- /DUtils/Random.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: Random.h 3 | * Project: DUtils library 4 | * Author: Dorian Galvez 5 | * Date: April 2010 6 | * Description: manages pseudo-random numbers 7 | * 8 | */ 9 | 10 | #pragma once 11 | #ifndef __D_RANDOM__ 12 | #define __D_RANDOM__ 13 | 14 | #include 15 | 16 | namespace DUtils { 17 | 18 | class Random 19 | { 20 | public: 21 | /** 22 | * Sets the random number seed to the current time 23 | */ 24 | static void SeedRand(); 25 | 26 | /** 27 | * Sets the given random number seed 28 | * @param seed 29 | */ 30 | static void SeedRand(int seed); 31 | 32 | /** 33 | * Returns a random number in the range [0..1] 34 | * @return random T number in [0..1] 35 | */ 36 | template 37 | static T RandomValue(){ 38 | return (T)rand()/(T)RAND_MAX; 39 | } 40 | 41 | /** 42 | * Returns a random number in the range [min..max] 43 | * @param min 44 | * @param max 45 | * @return random T number in [min..max] 46 | */ 47 | template 48 | static T RandomValue(T min, T max){ 49 | return Random::RandomValue() * (max - min) + min; 50 | } 51 | 52 | /** 53 | * Returns a random int in the range [min..max] 54 | * @param min 55 | * @param max 56 | * @return random int in [min..max] 57 | */ 58 | static int RandomInt(int min, int max); 59 | }; 60 | 61 | } 62 | 63 | #endif 64 | 65 | -------------------------------------------------------------------------------- /DUtils/Timestamp.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * File: Timestamp.cpp 3 | * Author: Dorian Galvez 4 | * Date: March 2009 5 | * Description: timestamping functions 6 | * 7 | * Note: in windows, this class has a 1ms resolution 8 | * 9 | */ 10 | 11 | #include 12 | #include 13 | 14 | #ifdef WIN32 15 | #include 16 | #include 17 | #define sprintf sprintf_s 18 | #else 19 | #include 20 | #endif 21 | 22 | #include "Timestamp.h" 23 | 24 | #include 25 | #include 26 | #include 27 | using namespace std; 28 | 29 | using namespace DUtils; 30 | 31 | Timestamp::Timestamp(void) 32 | { 33 | } 34 | 35 | Timestamp::~Timestamp(void) 36 | { 37 | } 38 | 39 | void Timestamp::setToCurrentTime(){ 40 | 41 | #ifdef WIN32 42 | struct __timeb32 timebuffer; 43 | _ftime32_s( &timebuffer ); // C4996 44 | // Note: _ftime is deprecated; consider using _ftime_s instead 45 | m_secs = timebuffer.time; 46 | m_usecs = timebuffer.millitm * 1000; 47 | #else 48 | struct timeval now; 49 | gettimeofday(&now, NULL); 50 | m_secs = now.tv_sec; 51 | m_usecs = now.tv_usec; 52 | #endif 53 | 54 | } 55 | 56 | void Timestamp::setTime(const string &stime){ 57 | string::size_type p = stime.find('.'); 58 | if(p == string::npos){ 59 | m_secs = atol(stime.c_str()); 60 | }else{ 61 | m_secs = atol(stime.substr(0, p).c_str()); 62 | 63 | string s_usecs = stime.substr(p+1, 6); 64 | m_usecs = atol(stime.substr(p+1).c_str()); 65 | m_usecs *= (unsigned long)pow(10.0, double(6 - s_usecs.length())); 66 | } 67 | } 68 | 69 | double Timestamp::getFloatTime() const { 70 | return double(m_secs) + double(m_usecs)/1000000.0; 71 | } 72 | 73 | string Timestamp::getStringTime() const { 74 | char buf[32]; 75 | sprintf(buf, "%.6lf", this->getFloatTime()); 76 | return string(buf); 77 | } 78 | 79 | double Timestamp::operator- (const Timestamp &t) const { 80 | return this->getFloatTime() - t.getFloatTime(); 81 | } 82 | 83 | Timestamp Timestamp::operator+ (double s) const 84 | { 85 | unsigned long secs = (long)floor(s); 86 | unsigned long usecs = (long)((s - (double)secs) * 1e6); 87 | 88 | Timestamp t; 89 | 90 | const unsigned long max = 1000000ul; 91 | 92 | if(m_usecs + usecs >= max) 93 | t.setTime(m_secs + secs + 1, m_usecs + usecs - max); 94 | else 95 | t.setTime(m_secs + secs, m_usecs + usecs); 96 | 97 | return t; 98 | } 99 | 100 | Timestamp Timestamp::operator- (double s) const 101 | { 102 | unsigned long secs = (long)floor(s); 103 | unsigned long usecs = (long)((s - (double)secs) * 1e6); 104 | 105 | Timestamp t; 106 | 107 | const unsigned long max = 1000000ul; 108 | 109 | if(m_usecs < usecs) 110 | t.setTime(m_secs - secs - 1, max - (usecs - m_usecs)); 111 | else 112 | t.setTime(m_secs - secs, m_usecs - usecs); 113 | 114 | return t; 115 | } 116 | 117 | bool Timestamp::operator> (const Timestamp &t) const 118 | { 119 | if(m_secs > t.m_secs) return true; 120 | else if(m_secs == t.m_secs) return m_usecs > t.m_usecs; 121 | else return false; 122 | } 123 | 124 | bool Timestamp::operator>= (const Timestamp &t) const 125 | { 126 | if(m_secs > t.m_secs) return true; 127 | else if(m_secs == t.m_secs) return m_usecs >= t.m_usecs; 128 | else return false; 129 | } 130 | 131 | bool Timestamp::operator< (const Timestamp &t) const 132 | { 133 | if(m_secs < t.m_secs) return true; 134 | else if(m_secs == t.m_secs) return m_usecs < t.m_usecs; 135 | else return false; 136 | } 137 | 138 | bool Timestamp::operator<= (const Timestamp &t) const 139 | { 140 | if(m_secs < t.m_secs) return true; 141 | else if(m_secs == t.m_secs) return m_usecs <= t.m_usecs; 142 | else return false; 143 | } 144 | 145 | bool Timestamp::operator== (const Timestamp &t) const 146 | { 147 | return(m_secs == t.m_secs && m_usecs == t.m_usecs); 148 | } 149 | 150 | 151 | string Timestamp::Format() const { 152 | return Timestamp::Format(getFloatTime()); 153 | } 154 | 155 | string Timestamp::Format(double s) { 156 | int days = int(s / (24. * 3600.0)); 157 | s -= days * (24. * 3600.0); 158 | int hours = int(s / 3600.0); 159 | s -= hours * 3600; 160 | int minutes = int(s / 60.0); 161 | s -= minutes * 60; 162 | int seconds = int(s); 163 | int ms = int((s - seconds)*1e6); 164 | 165 | stringstream ss; 166 | ss.fill('0'); 167 | bool b; 168 | if(b = (days > 0)) ss << days << "d "; 169 | if(b = (b || hours > 0)) ss << setw(2) << hours << ":"; 170 | if(b = (b || minutes > 0)) ss << setw(2) << minutes << ":"; 171 | if(b) ss << setw(2); 172 | ss << seconds; 173 | if(!b) ss << "." << setw(6) << ms; 174 | 175 | return ss.str(); 176 | } 177 | 178 | 179 | -------------------------------------------------------------------------------- /DUtils/Timestamp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: Timestamp.h 3 | * Author: Dorian Galvez 4 | * Date: March 2009 5 | * Description: timestamping functions 6 | */ 7 | 8 | #ifndef __D_TIMESTAMP__ 9 | #define __D_TIMESTAMP__ 10 | 11 | #include 12 | using namespace std; 13 | 14 | namespace DUtils { 15 | 16 | class Timestamp 17 | { 18 | public: 19 | Timestamp(void); 20 | ~Timestamp(void); 21 | 22 | /* Sets this instance to the current time 23 | */ 24 | void setToCurrentTime(); 25 | 26 | /* Sets the timestamp from seconds and microseconds 27 | * @param secs: seconds 28 | * @param usecs: microseconds 29 | */ 30 | inline void setTime(unsigned long secs, unsigned long usecs){ 31 | m_secs = secs; 32 | m_usecs = usecs; 33 | } 34 | 35 | /* Sets the timestamp from a string with the time in seconds 36 | * @param stime: string such as "1235603336.036609" 37 | */ 38 | void setTime(const string &stime); 39 | 40 | /* Returns this timestamp as the number of seconds in (long) float format 41 | */ 42 | double getFloatTime() const; 43 | 44 | /* Returns this timestamp as the number of seconds in fixed length string format 45 | */ 46 | string getStringTime() const; 47 | 48 | /* Returns the difference in seconds between this timestamp (greater) and t (smaller) 49 | * If the order is swapped, a negative number is returned 50 | * @param t: timestamp to subtract from this timestamp 51 | * @return difference in seconds 52 | */ 53 | double operator- (const Timestamp &t) const; 54 | 55 | /* Returns a copy of this timestamp + s seconds 56 | * @param s: seconds 57 | */ 58 | Timestamp operator+ (double s) const; 59 | 60 | /* Returns a copy of this timestamp - s seconds 61 | * @param s: seconds 62 | */ 63 | Timestamp operator- (double s) const; 64 | 65 | /* Returns whether this timestamp is at the future of t 66 | * @param t 67 | */ 68 | bool operator> (const Timestamp &t) const; 69 | 70 | /* Returns whether this timestamp is at the future of (or is the same as) t 71 | * @param t 72 | */ 73 | bool operator>= (const Timestamp &t) const; 74 | 75 | /* Returns whether this timestamp and t represent the same instant 76 | * @param t 77 | */ 78 | bool operator== (const Timestamp &t) const; 79 | 80 | /* Returns whether this timestamp is at the past of t 81 | * @param t 82 | */ 83 | bool operator< (const Timestamp &t) const; 84 | 85 | /* Returns whether this timestamp is at the past of (or is the same as) t 86 | * @param t 87 | */ 88 | bool operator<= (const Timestamp &t) const; 89 | 90 | /* Returns a string version of the timestamp, with the format 91 | * xd hh:mm:ss, hh:mm:ss, mm:ss or s.us 92 | * @param s: timestamp in long format (given by getFloatTime) to format 93 | */ 94 | string Format() const; 95 | static string Format(double s); 96 | 97 | 98 | protected: 99 | unsigned long m_secs; // seconds 100 | unsigned long m_usecs; // microseconds 101 | }; 102 | 103 | } 104 | 105 | #endif 106 | 107 | -------------------------------------------------------------------------------- /DUtils/dirent_win.h: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * dirent.h - dirent API for Microsoft Visual Studio 3 | * 4 | * Copyright (C) 2006 Toni Ronkko 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining 7 | * a copy of this software and associated documentation files (the 8 | * ``Software''), to deal in the Software without restriction, including 9 | * without limitation the rights to use, copy, modify, merge, publish, 10 | * distribute, sublicense, and/or sell copies of the Software, and to 11 | * permit persons to whom the Software is furnished to do so, subject to 12 | * the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included 15 | * in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 | * IN NO EVENT SHALL TONI RONKKO BE LIABLE FOR ANY CLAIM, DAMAGES OR 21 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 22 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23 | * OTHER DEALINGS IN THE SOFTWARE. 24 | * 25 | * Dec 15, 2009, John Cunningham 26 | * Added rewinddir member function 27 | * 28 | * Jan 18, 2008, Toni Ronkko 29 | * Using FindFirstFileA and WIN32_FIND_DATAA to avoid converting string 30 | * between multi-byte and unicode representations. This makes the 31 | * code simpler and also allows the code to be compiled under MingW. Thanks 32 | * to Azriel Fasten for the suggestion. 33 | * 34 | * Mar 4, 2007, Toni Ronkko 35 | * Bug fix: due to the strncpy_s() function this file only compiled in 36 | * Visual Studio 2005. Using the new string functions only when the 37 | * compiler version allows. 38 | * 39 | * Nov 2, 2006, Toni Ronkko 40 | * Major update: removed support for Watcom C, MS-DOS and Turbo C to 41 | * simplify the file, updated the code to compile cleanly on Visual 42 | * Studio 2005 with both unicode and multi-byte character strings, 43 | * removed rewinddir() as it had a bug. 44 | * 45 | * Aug 20, 2006, Toni Ronkko 46 | * Removed all remarks about MSVC 1.0, which is antiqued now. Simplified 47 | * comments by removing SGML tags. 48 | * 49 | * May 14 2002, Toni Ronkko 50 | * Embedded the function definitions directly to the header so that no 51 | * source modules need to be included in the Visual Studio project. Removed 52 | * all the dependencies to other projects so that this very header can be 53 | * used independently. 54 | * 55 | * May 28 1998, Toni Ronkko 56 | * First version. 57 | *****************************************************************************/ 58 | #ifndef DIRENT_H 59 | #define DIRENT_H 60 | 61 | #include 62 | #include 63 | #include 64 | 65 | 66 | typedef struct dirent 67 | { 68 | char d_name[MAX_PATH + 1]; /* current dir entry (multi-byte char string) */ 69 | WIN32_FIND_DATAA data; /* file attributes */ 70 | } dirent; 71 | 72 | 73 | typedef struct DIR 74 | { 75 | dirent current; /* Current directory entry */ 76 | int cached; /* Indicates un-processed entry in memory */ 77 | HANDLE search_handle; /* File search handle */ 78 | char patt[MAX_PATH + 3]; /* search pattern (3 = pattern + "\\*\0") */ 79 | } DIR; 80 | 81 | 82 | /* Forward declarations */ 83 | static DIR *opendir (const char *dirname); 84 | static struct dirent *readdir (DIR *dirp); 85 | static int closedir (DIR *dirp); 86 | static void rewinddir(DIR* dirp); 87 | 88 | 89 | /* Use the new safe string functions introduced in Visual Studio 2005 */ 90 | #if defined(_MSC_VER) && _MSC_VER >= 1400 91 | # define STRNCPY(dest,src,size) strncpy_s((dest),(size),(src),_TRUNCATE) 92 | #else 93 | # define STRNCPY(dest,src,size) strncpy((dest),(src),(size)) 94 | #endif 95 | 96 | 97 | /***************************************************************************** 98 | * Open directory stream DIRNAME for read and return a pointer to the 99 | * internal working area that is used to retrieve individual directory 100 | * entries. 101 | */ 102 | static DIR *opendir(const char *dirname) 103 | { 104 | DIR *dirp; 105 | assert (dirname != NULL); 106 | assert (strlen (dirname) < MAX_PATH); 107 | 108 | /* construct new DIR structure */ 109 | dirp = (DIR*) malloc (sizeof (struct DIR)); 110 | if (dirp != NULL) { 111 | char *p; 112 | 113 | /* take directory name... */ 114 | STRNCPY (dirp->patt, dirname, sizeof(dirp->patt)); 115 | dirp->patt[MAX_PATH] = '\0'; 116 | 117 | /* ... and append search pattern to it */ 118 | p = strchr (dirp->patt, '\0'); 119 | if (dirp->patt < p && *(p-1) != '\\' && *(p-1) != ':') { 120 | *p++ = '\\'; 121 | } 122 | *p++ = '*'; 123 | *p = '\0'; 124 | 125 | /* open stream and retrieve first file */ 126 | dirp->search_handle = FindFirstFileA (dirp->patt, &dirp->current.data); 127 | if (dirp->search_handle == INVALID_HANDLE_VALUE) { 128 | /* invalid search pattern? */ 129 | free (dirp); 130 | return NULL; 131 | } 132 | 133 | /* there is an un-processed directory entry in memory now */ 134 | dirp->cached = 1; 135 | } 136 | 137 | return dirp; 138 | } 139 | 140 | 141 | /***************************************************************************** 142 | * Read a directory entry, and return a pointer to a dirent structure 143 | * containing the name of the entry in d_name field. Individual directory 144 | * entries returned by this very function include regular files, 145 | * sub-directories, pseudo-directories "." and "..", but also volume labels, 146 | * hidden files and system files may be returned. 147 | */ 148 | static struct dirent *readdir(DIR *dirp) 149 | { 150 | assert (dirp != NULL); 151 | 152 | if (dirp->search_handle == INVALID_HANDLE_VALUE) { 153 | /* directory stream was opened/rewound incorrectly or ended normally */ 154 | return NULL; 155 | } 156 | 157 | /* get next directory entry */ 158 | if (dirp->cached != 0) { 159 | /* a valid directory entry already in memory */ 160 | dirp->cached = 0; 161 | } else { 162 | /* read next directory entry from disk */ 163 | if (FindNextFileA (dirp->search_handle, &dirp->current.data) == FALSE) { 164 | /* the very last file has been processed or an error occured */ 165 | FindClose (dirp->search_handle); 166 | dirp->search_handle = INVALID_HANDLE_VALUE; 167 | return NULL; 168 | } 169 | } 170 | 171 | /* copy as a multibyte character string */ 172 | STRNCPY ( dirp->current.d_name, 173 | dirp->current.data.cFileName, 174 | sizeof(dirp->current.d_name) ); 175 | dirp->current.d_name[MAX_PATH] = '\0'; 176 | 177 | return &dirp->current; 178 | } 179 | 180 | 181 | /***************************************************************************** 182 | * Close directory stream opened by opendir() function. Close of the 183 | * directory stream invalidates the DIR structure as well as any previously 184 | * read directory entry. 185 | */ 186 | static int closedir(DIR *dirp) 187 | { 188 | assert (dirp != NULL); 189 | 190 | /* release search handle */ 191 | if (dirp->search_handle != INVALID_HANDLE_VALUE) { 192 | FindClose (dirp->search_handle); 193 | dirp->search_handle = INVALID_HANDLE_VALUE; 194 | } 195 | 196 | /* release directory handle */ 197 | free (dirp); 198 | return 0; 199 | } 200 | 201 | 202 | /***************************************************************************** 203 | * Resets the position of the directory stream to which dirp refers to the 204 | * beginning of the directory. It also causes the directory stream to refer 205 | * to the current state of the corresponding directory, as a call to opendir() 206 | * would have done. If dirp does not refer to a directory stream, the effect 207 | * is undefined. 208 | */ 209 | static void rewinddir(DIR* dirp) 210 | { 211 | /* release search handle */ 212 | if (dirp->search_handle != INVALID_HANDLE_VALUE) { 213 | FindClose (dirp->search_handle); 214 | dirp->search_handle = INVALID_HANDLE_VALUE; 215 | } 216 | 217 | /* open new search handle and retrieve first file */ 218 | dirp->search_handle = FindFirstFileA (dirp->patt, &dirp->current.data); 219 | if (dirp->search_handle == INVALID_HANDLE_VALUE) { 220 | /* invalid search pattern? */ 221 | free (dirp); 222 | return; 223 | } 224 | 225 | /* there is an un-processed directory entry in memory now */ 226 | dirp->cached = 1; 227 | } 228 | 229 | 230 | #endif /*DIRENT_H*/ 231 | -------------------------------------------------------------------------------- /Demo/Demo.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | // OpenCV 6 | #include 7 | #include 8 | 9 | // DBow 10 | #include "DUtils.h" 11 | #include "DBow.h" 12 | 13 | using namespace DBow; 14 | using namespace DUtils; 15 | using namespace std; 16 | 17 | void loadFeatures(vector > &features); 18 | void testVocCreation(const vector > &features); 19 | void testDatabase(const vector > &features); 20 | 21 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 22 | 23 | // number of training images 24 | const int Nimages = 4; 25 | 26 | // extended surf gives 128-dimensional vectors 27 | const bool extended_surf = false; 28 | 29 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 30 | 31 | void wait() 32 | { 33 | cout << endl << "Press enter to continue" << endl; 34 | getchar(); 35 | } 36 | 37 | int main() 38 | { 39 | vector > features; 40 | loadFeatures(features); 41 | 42 | testVocCreation(features); 43 | 44 | wait(); 45 | 46 | testDatabase(features); 47 | 48 | wait(); 49 | 50 | return 0; 51 | } 52 | 53 | void loadFeatures(vector > &features) 54 | { 55 | features.clear(); 56 | features.reserve(Nimages); 57 | 58 | cv::SURF surf(400, 4, 2, extended_surf); 59 | 60 | cout << "Extracting SURF features..." << endl; 61 | for(int i = 1; i <= Nimages; i++){ 62 | stringstream ss; 63 | ss << "image" << i << ".png"; 64 | 65 | cv::Mat image = cv::imread(ss.str(), 0); 66 | cv::Mat mask; 67 | vector keypoints; 68 | 69 | features.push_back(vector()); 70 | surf(image, mask, keypoints, features.back()); 71 | } 72 | } 73 | 74 | void testVocCreation(const vector > &features) 75 | { 76 | // branching factor and depth levels 77 | const int k = 9; 78 | const int L = 3; 79 | 80 | HVocParams params(k, L, (extended_surf ? 128 : 64)); 81 | HVocabulary voc(params); 82 | 83 | cout << "Creating a small " << k << "^" << L << " vocabulary..." << endl; 84 | voc.Create(features); 85 | cout << "... done!" << endl; 86 | 87 | cout << "Stopping some words..." << endl; 88 | voc.StopWords(0.01f); 89 | 90 | cout << "Vocabulary information: " << endl; 91 | cout << endl << voc.RetrieveInfo().toString() << endl; 92 | 93 | // lets do something with this vocabulary 94 | cout << "Matching images against themselves (0 low, 1 high): " << endl; 95 | BowVector v1, v2; 96 | for(int i = 0; i < Nimages; i++){ 97 | voc.Transform(features[i], v1); 98 | for(int j = i+1; j < Nimages; j++){ 99 | voc.Transform(features[j], v2); 100 | 101 | double score = voc.Score(v1, v2); 102 | cout << "Image " << i+1 << " vs Image " << j+1 << ": " << score << endl; 103 | } 104 | } 105 | 106 | cout << endl << "Saving vocabulary..." << endl; 107 | voc.Save("small_vocabulary.txt", false); 108 | cout << "Done" << endl; 109 | 110 | } 111 | 112 | void testDatabase(const vector > &features) 113 | { 114 | cout << "Creating a small database..." << endl; 115 | 116 | HVocabulary *voc = new HVocabulary("small_vocabulary.txt"); 117 | Database db(*voc); 118 | delete voc; // db maintains its own vocabulary instance 119 | 120 | for(int i = 0; i < Nimages; i++){ 121 | db.AddEntry(features[i]); 122 | } 123 | 124 | cout << "... done!" << endl; 125 | 126 | cout << "Database information: " << endl; 127 | cout << endl << db.RetrieveInfo().toString() << endl; 128 | 129 | cout << "Querying the database: " << endl; 130 | 131 | QueryResults ret; 132 | for(int i = 0; i < Nimages; i++){ 133 | db.Query(ret, features[i], 2); 134 | 135 | // ret[0] is always the same image in this case, because we added it to the 136 | // database. ret[1] is the second best match. 137 | 138 | cout << "Searching for Image " << i+1 << ". Best match: " 139 | << ret[1].Id + 1 << ", score: " << ret[1].Score << endl; 140 | } 141 | 142 | } 143 | -------------------------------------------------------------------------------- /Demo/Demo.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 14 | 15 | 16 | 17 | 18 | 25 | 28 | 31 | 34 | 37 | 40 | 50 | 53 | 56 | 59 | 66 | 69 | 72 | 75 | 78 | 81 | 84 | 87 | 88 | 96 | 99 | 102 | 105 | 108 | 111 | 121 | 124 | 127 | 130 | 140 | 143 | 146 | 149 | 152 | 155 | 158 | 161 | 162 | 163 | 164 | 165 | 166 | 171 | 174 | 175 | 176 | 181 | 182 | 187 | 188 | 189 | 190 | 191 | 192 | -------------------------------------------------------------------------------- /Demo/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-I../DUtils -I../DBow $(OPENCV_CFLAGS) 3 | LFLAGS=-L../DUtils -L../DBow $(OPENCV_LFLAGS) 4 | LIBS=-lDUtils -lDBow $(OPENCV_LIBS) 5 | DEPS= 6 | OBJS=Demo.o 7 | 8 | %.o: %.cpp $(DEPS) 9 | $(CC) $(CFLAGS) -O3 -Wall -c $< -o $@ 10 | 11 | Demo: $(OBJS) 12 | $(CC) $^ $(LFLAGS) $(LIBS) -o $@ 13 | 14 | clean: 15 | rm -f *.o Demo 16 | 17 | -------------------------------------------------------------------------------- /Demo/image1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dorian3d/DBow/275d4504c8a008100f9a038abfe745a0c067e913/Demo/image1.png -------------------------------------------------------------------------------- /Demo/image2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dorian3d/DBow/275d4504c8a008100f9a038abfe745a0c067e913/Demo/image2.png -------------------------------------------------------------------------------- /Demo/image3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dorian3d/DBow/275d4504c8a008100f9a038abfe745a0c067e913/Demo/image3.png -------------------------------------------------------------------------------- /Demo/image4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dorian3d/DBow/275d4504c8a008100f9a038abfe745a0c067e913/Demo/image4.png -------------------------------------------------------------------------------- /INSTALL.txt: -------------------------------------------------------------------------------- 1 | The library is delivered with installation files for Visual Studio 9 (at 2 | least) and simple Makefiles. It has been tested on Windows with Visual 3 | Studio and STLport, and on Ubuntu with gcc 4.2.4. To install in Windows, 4 | open the Visual Studio sln file, open the Property page of the Demo 5 | project, change the include and library path of OpenCV if it is 6 | necessary, and compile all. If you do not have OpenCV installed or do 7 | not want to build de Demo application, disable that project. 8 | 9 | To install in *nix, just type make nocv or make install-nocv to build 10 | the libraries. The latter command also copies them to the lib directory 11 | (not in the system directory). These commands do not build the demo 12 | application, which requires OpenCV2. To build also the demo, first make 13 | sure that pkg-config can find the OpenCV paths. If it cannot, you can 14 | modify the root Makefile and manually set the OPENCV_CFLAGS and 15 | OPENCV_LFLAGS macros. It should look like this: 16 | 17 | Demo/Demo: 18 | make -C Demo \ 19 | OPENCV_CFLAGS='/.../opencv/include/opencv' \ 20 | OPENCV_LFLAGS='/.../opencv/lib' \ 21 | OPENCV_LIBS='-lcxcore -lcv -lhighgui -lcvaux -lml' 22 | 23 | Then, type make to build the demo, or make install to copy it to the bin 24 | directory too. A more portable installation system is not available, 25 | sorry. 26 | 27 | Two lib/so library files are created. Your program must link against 28 | both of them (DBow and DUtils). 29 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | DBow: bag-of-words library for C++ 2 | 3 | Copyright (c) 2015 Dorian Galvez-Lopez. http://doriangalvez.com 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions 8 | are met: 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 2. Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 3. The original author of the work must be notified of any 15 | redistribution of source code or in binary form. 16 | 4. Neither the name of copyright holders nor the names of its 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS 24 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | POSSIBILITY OF SUCH DAMAGE. 31 | 32 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 33 | 34 | This software includes the dirent API for Microsoft Visual Studio, by 35 | Toni Ronkko, and has its own license, reproduced below: 36 | 37 | /***************************************************************************** 38 | * dirent.h - dirent API for Microsoft Visual Studio 39 | * 40 | * Copyright (C) 2006 Toni Ronkko 41 | * 42 | * Permission is hereby granted, free of charge, to any person obtaining 43 | * a copy of this software and associated documentation files (the 44 | * ``Software''), to deal in the Software without restriction, including 45 | * without limitation the rights to use, copy, modify, merge, publish, 46 | * distribute, sublicense, and/or sell copies of the Software, and to 47 | * permit persons to whom the Software is furnished to do so, subject to 48 | * the following conditions: 49 | * 50 | * The above copyright notice and this permission notice shall be included 51 | * in all copies or substantial portions of the Software. 52 | * 53 | * THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 54 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 55 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 56 | * IN NO EVENT SHALL TONI RONKKO BE LIABLE FOR ANY CLAIM, DAMAGES OR 57 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 58 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 59 | * OTHER DEALINGS IN THE SOFTWARE. 60 | *****************************************************************************/ 61 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: libraries demo 3 | 4 | libraries: DUtils/libDUtils.so DBow/libDBow.so 5 | demo: Demo/Demo 6 | 7 | DUtils/libDUtils.so: 8 | make -C DUtils 9 | 10 | DBow/libDBow.so: 11 | make -C DBow 12 | 13 | Demo/Demo: 14 | make -C Demo \ 15 | OPENCV_CFLAGS='`pkg-config --cflags opencv`' \ 16 | OPENCV_LFLAGS='`pkg-config --libs-only-L opencv`' \ 17 | OPENCV_LIBS='-lcxcore -lcv -lhighgui -lcvaux -lml' 18 | 19 | nocv: libraries 20 | 21 | install-nocv: libraries 22 | mkdir -p lib && cp DUtils/*.so lib && cp DBow/*.so lib 23 | 24 | install: all 25 | mkdir -p lib && cp DUtils/*.so lib && cp DBow/*.so lib && \ 26 | mkdir -p bin && cp Demo/Demo bin && cp Demo/*.png bin 27 | 28 | clean: 29 | make -C DUtils clean && make -C DBow clean && make -C Demo clean 30 | 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | DBow 2 | ==== 3 | 4 | Note: this library is old. Consider using the more mature DBoW2 library instead. 5 | 6 | ## Overview 7 | 8 | DBow is an open source C++ library for indexing and converting images into a bag-of-word representation. It implements a hierarchical tree for approximating nearest neighbours in the image feature space and creating a visual vocabulary. DBow also implements an image database, based on an inverted file structure, for indexing images and enabling quick queries. 9 | 10 | DBow does not require OpenCV (except for the demo application), but they are fully compatible. You can check the demo included with the library to see how to use SURF features effortlessly. 11 | 12 | DBow has been tested on a real dataset collected by the Rawseeds FP6-project, for a loop-closing application. In this test, 1755 images of an outdoor route were indexed in their bag-of-word representation and checked for matches in real time. On a Intel Quad CPU at 2.82 GHz, building a vocabulary with 95 words from a set of 1300 images took 3 minutes (without considering the feature extraction). The average time of adding a new image to the database was 1.9 ms, whereas querying the database took 7.2 ms on average. 13 | 14 | ## Installation notes 15 | 16 | The library is delivered with installation files for Visual Studio 9 (at least) and simple Makefiles. It has been tested on Windows with Visual Studio and STLport, and on Ubuntu with gcc 4.2.4. To install in Windows, open the Visual Studio sln file, open the Property page of the Demo project, change the include and library path of OpenCV if it is necessary, and compile all. If you do not have OpenCV installed or do not want to build the Demo application, disable that project. 17 | 18 | To install in *nix, just type `make nocv` or `make install-nocv` to build the libraries. The latter command also copies them to the lib directory (not in the system directory). These commands do not build the demo application, which requires OpenCV2. To build also the demo, first make sure that `pkg-config` can find the OpenCV paths. If it cannot, you can modify the root Makefile and manually set the `OPENCV_CFLAGS` and `OPENCV_LFLAGS` macros. It should look like this: 19 | 20 | Demo/Demo: 21 | make -C Demo \ 22 | OPENCV_CFLAGS='/.../opencv/include/opencv' \ 23 | OPENCV_LFLAGS='/.../opencv/lib' \ 24 | OPENCV_LIBS='-lcxcore -lcv -lhighgui -lcvaux -lml' 25 | 26 | Then, type `make` to build the demo, or `make install` to copy it to the `bin` directory too. A more portable installation system is not yet available, sorry. 27 | 28 | Two lib/so library files are created. Your program must link against both of them (`DBow` and `DUtils`). 29 | 30 | ## Implementation and usage notes 31 | 32 | The library is composed of two main classes: `Vocabulary` and `Database`. The former is a base class for several types of vocabularies, but only a hierarchical one is implemented (class `HVocabulary`). The `Database` class allows to index image features in an inverted file to find matches. 33 | 34 | ###Weighting 35 | 36 | Words in the vocabulary and in bag-of-words vectors are weighted. There are four weighting measures implemented to set a word weight *wi*: 37 | 38 | * Term frequency (*tf*): ![w_i = \frac{n_{id}}{n_d}](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/tf.gif), 39 | ![n_{id}](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/nid.gif): number of occurrences of word *i* in document *d*, 40 | ![n_d](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/nd.gif): number of words in document *d*. 41 | * Inverse document frequency (*idf*): ![w_i = log(\frac{N}{N_i})](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/idf.gif), 42 | ![N](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/N.gif): number of documents, 43 | ![N_i](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/Ni.gif): number of documents containing word *i*. 44 | * Term frequency -- inverse document frequency (*tf-idf*): ![w_i = \frac{n_{id}}{n_d} log(\frac{N}{N_i}](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/tf-idf.gif). 45 | * Binary: ![w_i = 1 if word i is present; 0 otherwise](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/binary.gif) 46 | 47 | **Note:** DBow calculates *N* and *Ni* according to the number of images provided when the vocabulary is created. These values are not changed and are independent of how many entries a `Database` object contains. 48 | 49 | ###Scoring 50 | 51 | A score is calculated when two vectors are compared by means of a `Vocabulary` or when a `Database` is queried. There are several scoring methods implemented. Note that the meaning of the numerical value of the score depends on the metric you are using. However, some of these metrics can be scaled to the interval [0..1], where 0 means no match at all, and 1 perfect match (see below). If you plan to modify the scoring code, note that for efficiency reasons there is an implementation in the `Vocabulary` class, and other in the `Database` class. 52 | These are the metrics implemented to calculate the score *s* between two vectors *v* and *w* (from now on, *v\** and *w\** denote vectors normalized with the L1-norm): 53 | 54 | * Dot product: ![Dot product](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/dot.gif) 55 | * L1-norm: ![L1-norm](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/L1.gif) 56 | * L2-norm: ![L2-norm](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/L2.gif) 57 | * Bhattacharyya coefficient: ![Bhattacharyya coefficient](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/bhat.gif) 58 | * χ² distance: ![Chi square distance](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/chisq_extended.gif) 59 | * KL-divergence: ![KL-divergence](https://raw.githubusercontent.com/dorian3d/dorian3d.github.io/master/other/images/kl_extended.gif) 60 | 61 | Note that with some methods, vectors are normalized before applying the metric. Since vectors are usually sparse and contain several zero entries, the χ² distance and the KL-divergence cannot be applied all along the vectors. For that reason, the entries that cause numerical problems are avoided. In the case of the KL-divergence, ε denotes the epsilon value of the computer (it is usually the smallest float positive number). When calculating a score, you can activate a flag to obtain it linearly scaled to [0..1], where 1 is the highest and 0 the lowest. You can activate this flag when using the L1-norm, the L2-norm and the χ² distance. Note that the Bhattacharyya coefficient is always in [0..1] independently of the scaling flag. 62 | 63 | **Note**: for efficiency reasons, the implementation of the χ² distance assumes that weights are never negative (this is true when using *tf*, *idf*, *tf-idf* and binary vectors). 64 | 65 | The default configuration when creating a vocabulary is *tf-idf*, L1-norm. 66 | 67 | ###Save & Load 68 | 69 | All vocabularies and databases can be saved to and load from disk with the `Save` and `Load` member functions. When a database is saved, the vocabulary it is associated with is also embedded in the file, so that vocabulary and database files are completely independent. 70 | 71 | Both structures can be saved in binary or text format. Binary files are smaller and faster to read and write than text files. DBow deals with the byte order, so that binary files should be machine independent (to some extent). You can use text files for debugging or for interoperating with your own vocabularies. You can check the file format in the `HVocabulary::Save` and `Database::Save` functions. 72 | --------------------------------------------------------------------------------