├── FileSearch.sln ├── FileSearch.suo ├── FileSearch ├── CDriveIndex.cpp ├── CDriveIndex.h ├── FileSearch.def ├── FileSearch.vcxproj ├── FileSearch.vcxproj.filters ├── FileSearch.vcxproj.user ├── dllmain.cpp ├── stdafx.cpp ├── stdafx.h └── targetver.h ├── README.md ├── Release ├── FileSearch.dll └── FileSearchTest.ahk └── x64 └── Release └── FileSearch.dll /FileSearch.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 11.00 3 | # Visual Studio 2010 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FileSearch", "FileSearch\FileSearch.vcxproj", "{417925A9-AA49-4063-A2C1-26C9AC0884F6}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug|Win32 = Debug|Win32 9 | Debug|x64 = Debug|x64 10 | Release|Win32 = Release|Win32 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {417925A9-AA49-4063-A2C1-26C9AC0884F6}.Debug|Win32.ActiveCfg = Debug|Win32 15 | {417925A9-AA49-4063-A2C1-26C9AC0884F6}.Debug|Win32.Build.0 = Debug|Win32 16 | {417925A9-AA49-4063-A2C1-26C9AC0884F6}.Debug|x64.ActiveCfg = Debug|x64 17 | {417925A9-AA49-4063-A2C1-26C9AC0884F6}.Debug|x64.Build.0 = Debug|x64 18 | {417925A9-AA49-4063-A2C1-26C9AC0884F6}.Release|Win32.ActiveCfg = Release|Win32 19 | {417925A9-AA49-4063-A2C1-26C9AC0884F6}.Release|Win32.Build.0 = Release|Win32 20 | {417925A9-AA49-4063-A2C1-26C9AC0884F6}.Release|x64.ActiveCfg = Release|x64 21 | {417925A9-AA49-4063-A2C1-26C9AC0884F6}.Release|x64.Build.0 = Release|x64 22 | EndGlobalSection 23 | GlobalSection(SolutionProperties) = preSolution 24 | HideSolutionNode = FALSE 25 | EndGlobalSection 26 | EndGlobal 27 | -------------------------------------------------------------------------------- /FileSearch.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisS85/FastFileSearch/63dc6700fe6af179171b597455bda125735d3e99/FileSearch.suo -------------------------------------------------------------------------------- /FileSearch/CDriveIndex.cpp: -------------------------------------------------------------------------------- 1 | /********************************************************************************** 2 | Module name: CDriveIndex.cpp 3 | Written by: Christian Sander 4 | Credits for original code this is based on: Jeffrey Cooperstein & Jeffrey Richter 5 | **********************************************************************************/ 6 | 7 | #include "stdafx.h" 8 | #include "CDriveIndex.h" 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | 15 | // Exported function to create the index of a drive 16 | CDriveIndex* _stdcall CreateIndex(WCHAR cDrive) 17 | { 18 | CDriveIndex *di = new CDriveIndex(); 19 | di->Init(cDrive); 20 | di->PopulateIndex(); 21 | return di; 22 | } 23 | 24 | 25 | 26 | // Exported function to delete the index of a drive 27 | void _stdcall DeleteIndex(CDriveIndex *di) 28 | { 29 | if(dynamic_cast(di)) 30 | delete di; 31 | } 32 | 33 | 34 | 35 | // Exported function to search in the index of a drive. 36 | // Returns a string that contains the filepaths of the results, 37 | // separated by newlines for easier processing in non-C++ languages. 38 | // nResults is -1 if more results than the limit were found and 0 if an error occured. In this case the return value is NULL. 39 | WCHAR* _stdcall Search(CDriveIndex *di, WCHAR *szQuery, WCHAR *szPath, BOOL bSort, BOOL bEnhancedSearch, int maxResults, int *nResults) 40 | { 41 | if(dynamic_cast(di) && szQuery) 42 | { 43 | vector results; 44 | wstring result; 45 | int numResults = di->Find(&wstring(szQuery), szPath != NULL ? &wstring(szPath) : NULL, &results, bSort, bEnhancedSearch, maxResults); 46 | if(nResults != NULL) 47 | *nResults = numResults; 48 | for(unsigned int i = 0; i != results.size(); i++) 49 | result += (i == 0 ? TEXT("") : TEXT("\n")) + results[i].Path + results[i].Filename; 50 | WCHAR * szOutput = new WCHAR[result.length() + 1]; 51 | ZeroMemory(szOutput, (result.length() + 1) * sizeof(szOutput[0])); 52 | _snwprintf(szOutput, result.length(), TEXT("%s"), result.c_str()); 53 | return szOutput; 54 | } 55 | if(nResults != NULL) 56 | *nResults = 0; 57 | return NULL; 58 | } 59 | 60 | 61 | 62 | // Exported function to clear the memory of the string returned by Search(). 63 | // This needs to be called after every call to Search to avoid memory leaks. 64 | void _stdcall FreeResultsBuffer(WCHAR *szResults) 65 | { 66 | if(szResults) 67 | delete[] szResults; 68 | } 69 | 70 | 71 | 72 | // Exported function that loads the database from disk 73 | CDriveIndex* _stdcall LoadIndexFromDisk(WCHAR *szPath) 74 | { 75 | if(szPath) 76 | return new CDriveIndex(wstring(szPath)); 77 | return NULL; 78 | } 79 | 80 | 81 | 82 | // Exported function that saves the database to disk 83 | BOOL _stdcall SaveIndexToDisk(CDriveIndex *di, WCHAR *szPath) 84 | { 85 | if(dynamic_cast(di) && szPath) 86 | return di->SaveToDisk(wstring(szPath)); 87 | return false; 88 | } 89 | 90 | 91 | // Exported function that returns the number of files and directories 92 | void _stdcall GetDriveInfo(CDriveIndex *di, DriveInfo *driveInfo) 93 | { 94 | if(dynamic_cast(di)) 95 | *driveInfo = di->GetInfo(); 96 | } 97 | 98 | 99 | 100 | // Constructor 101 | CDriveIndex::CDriveIndex() 102 | { 103 | // Initialize member variables 104 | m_hVol = INVALID_HANDLE_VALUE; 105 | } 106 | 107 | 108 | 109 | // Destructor 110 | CDriveIndex::~CDriveIndex() 111 | { 112 | CleanUp(); 113 | } 114 | 115 | 116 | 117 | // Cleanup function to free resources 118 | void CDriveIndex::CleanUp() 119 | { 120 | // Cleanup the memory and handles we were using 121 | if (m_hVol != INVALID_HANDLE_VALUE) 122 | CloseHandle(m_hVol); 123 | } 124 | 125 | 126 | 127 | // This is a helper function that opens a handle to the volume specified 128 | // by the cDriveLetter parameter. 129 | HANDLE CDriveIndex::Open(TCHAR cDriveLetter, DWORD dwAccess) 130 | { 131 | TCHAR szVolumePath[_MAX_PATH]; 132 | wsprintf(szVolumePath, TEXT("\\\\.\\%c:"), cDriveLetter); 133 | HANDLE hCJ = CreateFile(szVolumePath, dwAccess, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL); 134 | return(hCJ); 135 | } 136 | 137 | 138 | // This function creates a journal on the volume. If a journal already 139 | // exists this function will adjust the MaximumSize and AllocationDelta 140 | // parameters of the journal 141 | BOOL CDriveIndex::Create(DWORDLONG MaximumSize, DWORDLONG AllocationDelta) 142 | { 143 | DWORD cb; 144 | CREATE_USN_JOURNAL_DATA cujd; 145 | cujd.MaximumSize = MaximumSize; 146 | cujd.AllocationDelta = AllocationDelta; 147 | BOOL fOk = DeviceIoControl(m_hVol, FSCTL_CREATE_USN_JOURNAL, 148 | &cujd, sizeof(cujd), NULL, 0, &cb, NULL); 149 | return(fOk); 150 | } 151 | 152 | // Return statistics about the journal on the current volume 153 | BOOL CDriveIndex::Query(PUSN_JOURNAL_DATA pUsnJournalData) 154 | { 155 | DWORD cb; 156 | BOOL fOk = DeviceIoControl(m_hVol, FSCTL_QUERY_USN_JOURNAL, NULL, 0, 157 | pUsnJournalData, sizeof(*pUsnJournalData), &cb, NULL); 158 | return(fOk); 159 | } 160 | 161 | // Call this to initialize the structure. The cDrive parameter 162 | // specifies the drive that this instance will access. The cbBuffer 163 | // parameter specifies the size of the interal buffer used to read records 164 | // from the journal. This should be large enough to hold several records 165 | // (for example, 10 kilobytes will allow this class to buffer several 166 | // dozen journal records at a time) 167 | BOOL CDriveIndex::Init(WCHAR cDrive) 168 | { 169 | // You should not call this function twice for one instance. 170 | if (m_hVol != INVALID_HANDLE_VALUE) 171 | DebugBreak(); 172 | m_cDrive = cDrive; 173 | ClearLastResult(); 174 | BOOL fOk = FALSE; 175 | __try { 176 | // Open a handle to the volume 177 | m_hVol = Open(m_cDrive, GENERIC_WRITE | GENERIC_READ); 178 | if (INVALID_HANDLE_VALUE == m_hVol) 179 | __leave; 180 | fOk = TRUE; 181 | } 182 | __finally { 183 | if (!fOk) 184 | CleanUp(); 185 | } 186 | return(fOk); 187 | } 188 | 189 | void CDriveIndex::ClearLastResult() 190 | { 191 | LastResult = SearchResult(); 192 | } 193 | 194 | // Adds a file to the database 195 | BOOL CDriveIndex::Add(DWORDLONG Index, wstring *szName, DWORDLONG ParentIndex, DWORDLONG Filter) 196 | { 197 | IndexedFile i; 198 | i.Index = Index; 199 | if(!Filter) 200 | Filter = MakeFilter(szName); 201 | i.Filter = Filter; 202 | rgFiles.insert(rgFiles.end(), i); 203 | return(TRUE); 204 | } 205 | 206 | 207 | 208 | // Adds a directory to the database 209 | BOOL CDriveIndex::AddDir(DWORDLONG Index, wstring *szName, DWORDLONG ParentIndex, DWORDLONG Filter) 210 | { 211 | IndexedDirectory i; 212 | i.Index = Index; 213 | if(!Filter) 214 | Filter = MakeFilter(szName); 215 | i.Filter = Filter; 216 | i.nFiles = 0; 217 | rgDirectories.insert(rgDirectories.end(), i); 218 | return(TRUE); 219 | } 220 | 221 | 222 | 223 | // Calculates a 64bit value that is used to filter out many files before comparing their filenames 224 | // This method gives a huge speed boost. 225 | DWORDLONG CDriveIndex::MakeFilter(wstring *szName) 226 | { 227 | /* 228 | Creates an address that is used to filter out strings that don't contain the queried characters 229 | Explanation of the meaning of the single bits: 230 | 0-25 a-z 231 | 26-35 0-9 232 | 36 . 233 | 37 space 234 | 38 !#$&'()+,-~_ 235 | 39 2 same characters 236 | 40 3 same characters 237 | The fields below indicate the presence of 2-character sequences. Based off http://en.wikipedia.org/wiki/Letter_frequency 238 | 41 TH 239 | 42 HE 240 | 43 AN 241 | 44 RE 242 | 45 ER 243 | 46 IN 244 | 47 ON 245 | 48 AT 246 | 49 ND 247 | 50 ST 248 | 51 ES 249 | 52 EN 250 | 53 OF 251 | 54 TE 252 | 55 ED 253 | 56 OR 254 | 57 TI 255 | 58 HI 256 | 59 AS 257 | 60 TO 258 | 61-63 length (max. 8 characters. Queries are usually shorter than this) 259 | */ 260 | if(!(szName->length() > 0)) 261 | return 0; 262 | DWORDLONG Address = 0; 263 | WCHAR c; 264 | wstring szlower(*szName); 265 | transform(szlower.begin(), szlower.end(), szlower.begin(), tolower); 266 | int counts[26] = {0}; //This array is used to check if characters occur two or three times in the string 267 | wstring::size_type l = szlower.length(); 268 | for(unsigned int i = 0; i != l; i++) 269 | { 270 | c = szlower[i]; 271 | if(c > 96 && c < 123) //a-z 272 | { 273 | Address |= 1ui64 << (DWORDLONG)((DWORDLONG)c - 97ui64); 274 | counts[c-97]++; 275 | if(i < l - 1) 276 | { 277 | if(c == L't' && szlower[i+1] == L'h') //th 278 | Address |= 1ui64 << 41; 279 | else if(c == L'h' && szlower[i+1] == L'e') //he 280 | Address |= 1ui64 << 41; 281 | else if(c == L'a' && szlower[i+1] == L'n') //an 282 | Address |= 1ui64 << 41; 283 | else if(c == L'r' && szlower[i+1] == L'e') //re 284 | Address |= 1ui64 << 41; 285 | else if(c == L'e' && szlower[i+1] == L'r') //er 286 | Address |= 1ui64 << 41; 287 | else if(c == L'i' && szlower[i+1] == L'n') //in 288 | Address |= 1ui64 << 41; 289 | else if(c == L'o' && szlower[i+1] == L'n') //on 290 | Address |= 1ui64 << 41; 291 | else if(c == L'a' && szlower[i+1] == L't') //at 292 | Address |= 1ui64 << 41; 293 | else if(c == L'n' && szlower[i+1] == L'd') //nd 294 | Address |= 1ui64 << 41; 295 | else if(c == L's' && szlower[i+1] == L't') //st 296 | Address |= 1ui64 << 41; 297 | else if(c == L'e' && szlower[i+1] == L's') //es 298 | Address |= 1ui64 << 41; 299 | else if(c == L'e' && szlower[i+1] == L'n') //en 300 | Address |= 1ui64 << 41; 301 | else if(c == L'o' && szlower[i+1] == L'f') //of 302 | Address |= 1ui64 << 41; 303 | else if(c == L't' && szlower[i+1] == L'e') //te 304 | Address |= 1ui64 << 41; 305 | else if(c == L'e' && szlower[i+1] == L'd') //ed 306 | Address |= 1ui64 << 41; 307 | else if(c == L'o' && szlower[i+1] == L'r') //or 308 | Address |= 1ui64 << 41; 309 | else if(c == L't' && szlower[i+1] == L'i') //ti 310 | Address |= 1ui64 << 41; 311 | else if(c == L'h' && szlower[i+1] == L'i') //hi 312 | Address |= 1ui64 << 41; 313 | else if(c == L'a' && szlower[i+1] == L's') //as 314 | Address |= 1ui64 << 41; 315 | else if(c == L't' && szlower[i+1] == L'o') //to 316 | Address |= 1ui64 << 41; 317 | } 318 | } 319 | else if(c >= L'0' && c <= '9') //0-9 320 | Address |= 1ui64 << (c - L'0' + 26ui64); 321 | else if(c == L'.') //. 322 | Address |= 1ui64 << 36; 323 | else if(c == L' ') // space 324 | Address |= 1ui64 << 37; 325 | else if(c == L'!' || c == L'#' || c == L'$' || c == L'&' || c == L'\'' || c == L'(' || c == L')' || c == L'+' || c == L',' || c == L'-' || c == L'~' || c == L'_') 326 | Address |= 1ui64 << 38; // !#$&'()+,-~_ 327 | } 328 | for(unsigned int i = 0; i != 26; i++) 329 | { 330 | if(counts[i] == 2) 331 | Address |= 1ui64 << 39; 332 | else if(counts[i] > 2) 333 | Address |= 1ui64 << 40; 334 | } 335 | DWORDLONG length = (szlower.length() > 7 ? 7ui64 : (DWORDLONG)szlower.length()) & 0x00000007ui64; //3 bits for length -> 8 max 336 | Address |= length << 61ui64; 337 | return Address; 338 | } 339 | 340 | 341 | 342 | // Internal function for searching in the database. 343 | // For projects in C++ which use this project it might be preferable to use this function 344 | // to skip the wrapper. 345 | // Returns: number of results, -1 if maxResults != -1 and not all results were found 346 | int CDriveIndex::Find(wstring *strQuery, wstring *strQueryPath, vector *rgsrfResults, BOOL bSort, BOOL bEnhancedSearch, int maxResults) 347 | { 348 | //These variables are used to control the flow of execution in this function. 349 | 350 | //Indicates where results should be searched 351 | unsigned int SearchWhere = IN_FILES; 352 | //Offset for vector marked by SearchWhere 353 | unsigned int iOffset = 0; 354 | //Used to skip the search when the previous two properties should be carried over to the next search without actually using them now. 355 | BOOL bSkipSearch = false; 356 | 357 | //Number of results in this search. -1 if more than maximum number of results. 358 | int nResults = 0; 359 | 360 | //No query, just ignore this call 361 | if(strQuery->length() == 0) 362 | { 363 | // Store this query 364 | LastResult.Query = wstring(TEXT("")); 365 | LastResult.Results = vector(); 366 | return nResults; 367 | } 368 | 369 | if(strQueryPath != NULL) 370 | { 371 | //Check if the path actually matches the drive of this index 372 | WCHAR szDrive[_MAX_DRIVE]; 373 | _wsplitpath(strQueryPath->c_str(), szDrive, NULL, NULL, NULL); 374 | for(unsigned int j = 0; j != _MAX_DRIVE; j++) 375 | szDrive[j] = toupper(szDrive[j]); 376 | if(wstring(szDrive).compare(wstring(1,toupper(m_cDrive))) == 0) 377 | return 0; 378 | } 379 | 380 | //Create lower query string for case-insensitive search 381 | wstring strQueryLower(*strQuery); 382 | for(unsigned int j = 0; j != strQueryLower.length(); j++) 383 | strQueryLower[j] = tolower(strQueryLower[j]); 384 | const WCHAR *szQueryLower = strQueryLower.c_str(); 385 | 386 | //Create lower query path string for case-insensitive search 387 | wstring strQueryPathLower(strQueryPath != NULL ? *strQueryPath : TEXT("")); 388 | for(unsigned int j = 0; j != strQueryPathLower.length(); j++) 389 | strQueryPathLower[j] = tolower((*strQueryPath)[j]); 390 | wstring* pstrQueryPathLower = strQueryPath != NULL && strQueryPathLower.length() > 0 ? &strQueryPathLower : NULL; 391 | 392 | //If the query path is different from the last query so that the results are not valid anymore, the last query needs to be dropped 393 | if(!(strQueryPath != NULL && (LastResult.maxResults == -1 || LastResult.iOffset == 0) && (LastResult.SearchPath.length() == 0 || strQueryPathLower.find(LastResult.SearchPath) == 0))) 394 | LastResult = SearchResult(); 395 | 396 | //Calculate Filter value and length of the current query which are compared with the cached ones to skip many of them 397 | DWORDLONG QueryFilter = MakeFilter(&strQueryLower); 398 | DWORDLONG QueryLength = (QueryFilter & 0xE000000000000000ui64) >> 61ui64; //Bits 61-63 for storing lengths up to 8 399 | QueryFilter = QueryFilter & 0x1FFFFFFFFFFFFFFFui64; //All but the last 3 bits 400 | 401 | //If the same query string as in the last query was used 402 | if(strQueryLower.compare(LastResult.Query) == 0 && LastResult.Results.size() > 0 && (LastResult.SearchEndedWhere == NO_WHERE && iOffset != 1)) // need proper condition here to skip 403 | { 404 | //Keep the position of the last result 405 | SearchWhere = LastResult.SearchEndedWhere; 406 | iOffset = LastResult.iOffset; 407 | bSkipSearch = true; 408 | for(int i = 0; i != LastResult.Results.size(); i++) 409 | { 410 | BOOL bFound = true; 411 | if(pstrQueryPathLower != NULL) 412 | { 413 | wstring strPathLower(LastResult.Results[i].Path); 414 | for(unsigned int j = 0; j != strPathLower.length(); j++) 415 | strPathLower[j] = tolower(LastResult.Results[i].Path[j]); 416 | bFound = strPathLower.find(strQueryPathLower) != -1; 417 | } 418 | if(bFound) 419 | { 420 | nResults++; 421 | //If the result limit has decreased and we have found all (shouldn't happen in common scenarios) 422 | if(maxResults != -1 && nResults > maxResults) 423 | { 424 | nResults = -1; 425 | 426 | //If we get here, the next incremental should start fresh, but only if it requires more results than this one. 427 | //To accomplish this we make this result contain no information about the origin of these results. 428 | SearchWhere = NO_WHERE; 429 | iOffset = 1; 430 | break; 431 | } 432 | rgsrfResults->insert(rgsrfResults->end(), LastResult.Results[i]); 433 | } 434 | } 435 | //if the last search was limited and didn't finish because it found enough files and we don't have the maximum number of results yet 436 | //we need to continue the search where the last one stopped. 437 | if(LastResult.maxResults != -1 && LastResult.SearchEndedWhere != NO_WHERE && (maxResults == -1 || nResults < maxResults)) 438 | bSkipSearch = false; 439 | } 440 | //If this query is more specific than the previous one, it can use the results from the previous query 441 | else if(strQueryLower.find(LastResult.Query) != -1 && LastResult.Results.size() > 0) 442 | { 443 | bSkipSearch = true; 444 | //Keep the position of the last result 445 | SearchWhere = LastResult.SearchEndedWhere; 446 | iOffset = LastResult.iOffset; 447 | FindInPreviousResults(*strQuery, szQueryLower, QueryFilter, QueryLength, pstrQueryPathLower, *rgsrfResults, 0, bEnhancedSearch, maxResults, nResults); 448 | 449 | //if the last search was limited and didn't finish because it found enough files and we don't have the maximum number of results yet 450 | //we need to continue the search where the last one stopped. 451 | if(LastResult.maxResults != -1 && LastResult.SearchEndedWhere != NO_WHERE && (maxResults == -1 || nResults < maxResults)) 452 | bSkipSearch = false; 453 | } 454 | DWORDLONG FRNPath; 455 | long long nFilesInDir = -1; 456 | if(strQueryPath != NULL && strQueryPath->length()) 457 | { 458 | FRNPath = PathToFRN(strQueryPath); 459 | wstring strPath2; 460 | GetDir(FRNPath, &strPath2); 461 | int iOffset = (int) FindDirOffsetByIndex(FRNPath); 462 | if(iOffset != -1) 463 | nFilesInDir = rgDirectories[iOffset].nFiles; 464 | } 465 | if(SearchWhere == IN_FILES && iOffset == 0 && nFilesInDir != -1 && nFilesInDir < 10000 && !bSkipSearch) 466 | { 467 | FindRecursively(*strQuery, szQueryLower, QueryFilter, QueryLength, strQueryPath, *rgsrfResults, bEnhancedSearch, maxResults, nResults); 468 | SearchWhere = NO_WHERE; 469 | } 470 | else if(SearchWhere == IN_FILES && !bSkipSearch) 471 | { 472 | //Find in file index 473 | FindInJournal(*strQuery, szQueryLower, QueryFilter, QueryLength, (strQueryPath != NULL ? &strQueryPathLower : NULL), rgFiles, *rgsrfResults, iOffset, bEnhancedSearch, maxResults, nResults); 474 | //If we found the maximum number of results in the file index we stop here 475 | if(maxResults != -1 && nResults == -1) 476 | iOffset++; //Start with next entry on the next incremental search 477 | else //Search isn't limited or not all results found yet, continue in directory index 478 | { 479 | SearchWhere = IN_DIRECTORIES; 480 | iOffset = 0; 481 | } 482 | } 483 | 484 | if(SearchWhere == IN_DIRECTORIES && !bSkipSearch) 485 | { 486 | //Find in directory index 487 | FindInJournal(*strQuery, szQueryLower, QueryFilter, QueryLength, pstrQueryPathLower, rgDirectories, *rgsrfResults, iOffset, bEnhancedSearch, maxResults, nResults); 488 | //If we found the maximum number of results in the directory index we stop here 489 | if(maxResults != -1 && nResults == -1) 490 | iOffset++; //Start with next entry on the next incremental search 491 | else //Search isn't limited or less than the maximum number of results found 492 | { 493 | SearchWhere = NO_WHERE; 494 | iOffset = 0; 495 | } 496 | } 497 | 498 | //Sort by match quality and name 499 | if(bSort) 500 | sort(rgsrfResults->begin(), rgsrfResults->end()); 501 | 502 | // Store this query 503 | LastResult.Query = wstring(strQueryLower); 504 | 505 | // Store search path 506 | LastResult.SearchPath = strQueryPathLower; 507 | 508 | //Clear old results, they will be replaced with the current ones 509 | LastResult.Results = vector(); 510 | 511 | //Store number of results (Needed for incremental search) 512 | LastResult.iOffset = iOffset; 513 | 514 | //Store if this search was limited 515 | LastResult.maxResults = maxResults; 516 | 517 | //Store where the current search ended due to file limit (or if it didn't); 518 | LastResult.SearchEndedWhere = SearchWhere; 519 | 520 | //Update last results 521 | for(unsigned int i = 0; i != rgsrfResults->size(); i++) 522 | LastResult.Results.insert(LastResult.Results.end(), (*rgsrfResults)[i]); 523 | 524 | return nResults; 525 | } 526 | 527 | void CDriveIndex::FindRecursively(wstring &strQuery, const WCHAR* &szQueryLower, DWORDLONG QueryFilter, DWORDLONG QueryLength, wstring* strQueryPath, vector &rgsrfResults, BOOL bEnhancedSearch, int maxResults, int &nResults) 528 | { 529 | WIN32_FIND_DATA ffd; 530 | size_t length_of_arg; 531 | HANDLE hFind = INVALID_HANDLE_VALUE; 532 | 533 | // Check that the input path plus 3 is not longer than MAX_PATH. 534 | // Three characters are for the "\*" plus NULL appended below. 535 | length_of_arg = strQueryPath->length(); 536 | if (length_of_arg > (MAX_PATH - 3)) 537 | return; 538 | 539 | // Prepare string for use with FindFile functions. First, copy the 540 | // string to a buffer, then append '\*' to the directory name. 541 | wstring strPath = wstring(*strQueryPath); 542 | if((*strQueryPath)[strQueryPath->length() - 1] != L'\\') 543 | strPath += wstring(TEXT("\\*")); 544 | else 545 | strPath += wstring(TEXT("*")); 546 | 547 | const WCHAR* szDir = strPath.c_str(); 548 | 549 | // Find the first file in the directory. 550 | hFind = FindFirstFile(szDir, &ffd); 551 | 552 | if (hFind == INVALID_HANDLE_VALUE) 553 | return; 554 | unsigned int nFiles = 0; 555 | // List all the files in the directory with some info about them. 556 | do 557 | { 558 | if(ffd.dwFileAttributes & FILE_ATTRIBUTE_VIRTUAL || ffd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) 559 | continue; 560 | float MatchQuality; 561 | wstring strFilename(ffd.cFileName); 562 | if(strFilename.compare(TEXT(".")) == 0 || strFilename.compare(TEXT("..")) == 0) 563 | continue; 564 | nFiles++; 565 | if(bEnhancedSearch) 566 | MatchQuality = FuzzySearch(strFilename, strQuery); 567 | else 568 | { 569 | wstring szLower(strFilename); 570 | for(unsigned int j = 0; j != szLower.length(); j++) 571 | szLower[j] = tolower(szLower[j]); 572 | MatchQuality = szLower.find(strQuery) != -1; 573 | } 574 | 575 | if(MatchQuality > 0.6f) 576 | { 577 | nResults++; 578 | if(maxResults != -1 && nResults > maxResults) 579 | { 580 | nResults = -1; 581 | break; 582 | } 583 | SearchResultFile srf; 584 | srf.Filename = strFilename; 585 | srf.Path = *strQueryPath + TEXT("\\"); 586 | srf.Filter = MAXULONG64; 587 | srf.MatchQuality = MatchQuality; 588 | rgsrfResults.insert(rgsrfResults.end(), srf); 589 | } 590 | 591 | if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) 592 | { 593 | wstring strSubPath = wstring(*strQueryPath); 594 | if((*strQueryPath)[strQueryPath->length() - 1] != L'\\') 595 | strSubPath += L'\\'; 596 | strSubPath += ffd.cFileName; 597 | FindRecursively(strQuery, szQueryLower, QueryFilter, QueryLength, &strSubPath, rgsrfResults, bEnhancedSearch, maxResults, nResults); 598 | if(nResults == -1) 599 | break; 600 | } 601 | } 602 | while (FindNextFile(hFind, &ffd) != 0); 603 | FindClose(hFind); 604 | } 605 | 606 | //T needs to be IndexedFile or IndexedDirectory 607 | template 608 | void CDriveIndex::FindInJournal(wstring &strQuery, const WCHAR* &szQueryLower, DWORDLONG QueryFilter, DWORDLONG QueryLength, wstring * strQueryPath, vector &rgJournalIndex, vector &rgsrfResults, unsigned int iOffset, BOOL bEnhancedSearch, int maxResults, int &nResults) 609 | { 610 | for(unsigned int j = 0; j != rgJournalIndex.size(); j++) 611 | { 612 | IndexedFile* i = (IndexedFile*)&rgJournalIndex[j]; 613 | DWORDLONG Length = (i->Filter & 0xE000000000000000ui64) >> 61ui64; //Bits 61-63 for storing lengths up to 8 614 | DWORDLONG Filter = i->Filter & 0x1FFFFFFFFFFFFFFFui64; //All but the last 3 bits 615 | if((Filter & QueryFilter) == QueryFilter && QueryLength <= Length) 616 | { 617 | USNEntry file = FRNToName(i->Index); 618 | float MatchQuality; 619 | if(bEnhancedSearch) 620 | MatchQuality = FuzzySearch(file.Name, strQuery); 621 | else 622 | { 623 | wstring szLower(file.Name); 624 | for(unsigned int j = 0; j != szLower.length(); j++) 625 | szLower[j] = tolower(szLower[j]); 626 | MatchQuality = szLower.find(strQuery) != -1; 627 | } 628 | 629 | if(MatchQuality > 0.6f) 630 | { 631 | nResults++; 632 | if(maxResults != -1 && nResults > maxResults) 633 | { 634 | nResults = -1; 635 | break; 636 | } 637 | SearchResultFile srf; 638 | srf.Filename = file.Name; 639 | srf.Path.reserve(MAX_PATH); 640 | Get(i->Index, &srf.Path); 641 | BOOL bFound = true; 642 | if(strQueryPath != NULL) 643 | { 644 | wstring strPathLower(srf.Path); 645 | for(unsigned int j = 0; j != strPathLower.length(); j++) 646 | strPathLower[j] = tolower(strPathLower[j]); 647 | bFound = strPathLower.find(*strQueryPath) != -1; 648 | } 649 | if(bFound) 650 | { 651 | //split path 652 | WCHAR szDrive[_MAX_DRIVE]; 653 | WCHAR szPath[_MAX_PATH]; 654 | WCHAR szName[_MAX_FNAME]; 655 | WCHAR szExt[_MAX_EXT]; 656 | _wsplitpath(srf.Path.c_str(), szDrive, szPath, szName, szExt); 657 | srf.Path = wstring(szDrive) + wstring(szPath); 658 | srf.Filter = i->Filter; 659 | srf.MatchQuality = MatchQuality; 660 | rgsrfResults.insert(rgsrfResults.end(), srf); 661 | } 662 | } 663 | } 664 | } 665 | } 666 | void CDriveIndex::FindInPreviousResults(wstring &strQuery, const WCHAR* &szQueryLower, DWORDLONG QueryFilter, DWORDLONG QueryLength, wstring * strQueryPath, vector &rgsrfResults, unsigned int iOffset, BOOL bEnhancedSearch, int maxResults, int &nResults) 667 | { 668 | for(int i = 0; i != LastResult.Results.size() && (maxResults == -1 || i < maxResults); i++) 669 | { 670 | SearchResultFile *srf = & LastResult.Results[i]; 671 | DWORDLONG Length = (srf->Filter & 0xE000000000000000ui64) >> 61ui64; //Bits 61-63 for storing lengths up to 8 672 | DWORDLONG Filter = srf->Filter & 0x1FFFFFFFFFFFFFFFui64; //All but the last 3 bits 673 | if((Filter & QueryFilter) == QueryFilter && QueryLength <= Length) 674 | { 675 | if(bEnhancedSearch) 676 | srf->MatchQuality = FuzzySearch(srf->Filename, strQuery); 677 | else 678 | { 679 | wstring szLower(srf->Filename); 680 | for(unsigned int j = 0; j != szLower.length(); j++) 681 | szLower[j] = tolower(szLower[j]); 682 | srf->MatchQuality = szLower.find(szQueryLower) != -1; 683 | } 684 | if(srf->MatchQuality > 0.6f) 685 | { 686 | BOOL bFound = true; 687 | if(strQueryPath != NULL) 688 | { 689 | wstring strPathLower(srf->Path); 690 | for(unsigned int j = 0; j != srf->Path.length(); j++) 691 | strPathLower[j] = tolower(srf->Path[j]); 692 | bFound = strPathLower.find(*strQueryPath) != -1; 693 | } 694 | if(bFound) 695 | { 696 | nResults++; 697 | if(maxResults != -1 && nResults > maxResults) 698 | { 699 | nResults = -1; 700 | break; 701 | } 702 | rgsrfResults.insert(rgsrfResults.end(), *srf); 703 | } 704 | } 705 | } 706 | } 707 | } 708 | 709 | 710 | // Clears the database 711 | BOOL CDriveIndex::Empty() 712 | { 713 | rgFiles.clear(); 714 | rgDirectories.clear(); 715 | return(TRUE); 716 | } 717 | 718 | 719 | 720 | // Constructs a path for a file 721 | BOOL CDriveIndex::Get(DWORDLONG Index, wstring *sz) 722 | { 723 | *sz = TEXT(""); 724 | int n = 0; 725 | do { 726 | USNEntry file = FRNToName(Index); 727 | *sz = file.Name + ((n != 0) ? TEXT("\\") : TEXT("")) + *sz; 728 | Index = file.ParentIndex; 729 | n++; 730 | } while (Index != 0); 731 | return(TRUE); 732 | } 733 | 734 | 735 | 736 | // Constructs a path for a directory 737 | BOOL CDriveIndex::GetDir(DWORDLONG Index, wstring *sz) 738 | { 739 | *sz = TEXT(""); 740 | do { 741 | USNEntry file = FRNToName(Index); 742 | *sz = file.Name + ((sz->length() != 0) ? TEXT("\\") : TEXT("")) + *sz; 743 | Index = file.ParentIndex; 744 | } while (Index != 0); 745 | return(TRUE); 746 | } 747 | 748 | 749 | 750 | //Finds the position of a file in the database by the FileReferenceNumber 751 | INT64 CDriveIndex::FindOffsetByIndex(DWORDLONG Index) { 752 | 753 | vector::difference_type pos; 754 | IndexedFile i; 755 | i.Index = Index; 756 | pos = distance(rgFiles.begin(), lower_bound(rgFiles.begin(), rgFiles.end(), i)); 757 | return (INT64) (pos == rgFiles.size() ? -1 : pos); // this is valid because the number of files doesn't exceed the range of INT64 758 | } 759 | 760 | 761 | 762 | //Finds the position of a directory in the database by the FileReferenceNumber 763 | INT64 CDriveIndex::FindDirOffsetByIndex(DWORDLONG Index) 764 | { 765 | vector::difference_type pos; 766 | IndexedDirectory i; 767 | i.Index = Index; 768 | pos = distance(rgDirectories.begin(), lower_bound(rgDirectories.begin(), rgDirectories.end(), i)); 769 | return (INT64) (pos == rgDirectories.size() ? -1 : pos); // this is valid because the number of files doesn't exceed the range of INT64 770 | } 771 | 772 | DWORDLONG PathToFRN(wstring* strPath) 773 | { 774 | HANDLE hDir = CreateFile(strPath->c_str(), 0, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); 775 | if(hDir == INVALID_HANDLE_VALUE) 776 | return 0; 777 | BY_HANDLE_FILE_INFORMATION fi; 778 | GetFileInformationByHandle(hDir, &fi); 779 | CloseHandle(hDir); 780 | return (((DWORDLONG) fi.nFileIndexHigh) << 32) | fi.nFileIndexLow; 781 | } 782 | 783 | // Enumerate the MFT for all entries. Store the file reference numbers of 784 | // any directories in the database. 785 | void CDriveIndex::PopulateIndex() 786 | { 787 | Empty(); 788 | 789 | vector FileParents; 790 | vector DirectoryParents; 791 | 792 | USN_JOURNAL_DATA ujd; 793 | Query(&ujd); 794 | 795 | // Get the FRN of the root directory 796 | // This had BETTER work, or we can't do anything 797 | 798 | WCHAR szRoot[_MAX_PATH]; 799 | wsprintf(szRoot, TEXT("%c:\\"), m_cDrive); 800 | HANDLE hDir = CreateFile(szRoot, 0, FILE_SHARE_READ | FILE_SHARE_WRITE, 801 | NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); 802 | 803 | BY_HANDLE_FILE_INFORMATION fi; 804 | GetFileInformationByHandle(hDir, &fi); 805 | CloseHandle(hDir); 806 | DWORDLONG IndexRoot = (((DWORDLONG) fi.nFileIndexHigh) << 32) | fi.nFileIndexLow; 807 | wsprintf(szRoot, TEXT("%c:"), m_cDrive); 808 | AddDir(IndexRoot, &wstring(szRoot), 0); 809 | DirectoryParents.insert(DirectoryParents.end(), 0); 810 | m_dwDriveFRN = IndexRoot; 811 | 812 | MFT_ENUM_DATA med; 813 | med.StartFileReferenceNumber = 0; 814 | med.LowUsn = 0; 815 | med.HighUsn = ujd.NextUsn; 816 | 817 | // Process MFT in 64k chunks 818 | BYTE pData[sizeof(DWORDLONG) + 0x10000]; 819 | DWORDLONG fnLast = 0; 820 | DWORD cb; 821 | unsigned int num = 0; 822 | unsigned int numDirs = 1; 823 | while (DeviceIoControl(m_hVol, FSCTL_ENUM_USN_DATA, &med, sizeof(med), pData, sizeof(pData), &cb, NULL) != FALSE) { 824 | 825 | PUSN_RECORD pRecord = (PUSN_RECORD) &pData[sizeof(USN)]; 826 | while ((PBYTE) pRecord < (pData + cb)) { 827 | if ((pRecord->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) 828 | numDirs++; 829 | else 830 | num++; 831 | pRecord = (PUSN_RECORD) ((PBYTE) pRecord + pRecord->RecordLength); 832 | } 833 | med.StartFileReferenceNumber = * (DWORDLONG *) pData; 834 | } 835 | 836 | FileParents.reserve(num); 837 | DirectoryParents.reserve(numDirs); 838 | rgFiles.reserve(num); 839 | rgDirectories.reserve(numDirs); 840 | hash_map hmFiles; 841 | hash_map hmDirectories; 842 | hash_map::iterator it; 843 | med.StartFileReferenceNumber = 0; 844 | while (DeviceIoControl(m_hVol, FSCTL_ENUM_USN_DATA, &med, sizeof(med), pData, sizeof(pData), &cb, NULL) != FALSE) 845 | { 846 | PUSN_RECORD pRecord = (PUSN_RECORD) &pData[sizeof(USN)]; 847 | while ((PBYTE) pRecord < (pData + cb)) 848 | { 849 | wstring sz((LPCWSTR) ((PBYTE) pRecord + pRecord->FileNameOffset), pRecord->FileNameLength / sizeof(WCHAR)); 850 | if ((pRecord->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) 851 | { 852 | AddDir(pRecord->FileReferenceNumber, &sz, pRecord->ParentFileReferenceNumber); 853 | //DirectoryParents.insert(DirectoryParents.end(), pRecord->ParentFileReferenceNumber); 854 | HashMapEntry hme; 855 | hme.iOffset = rgDirectories.size() - 1; 856 | hme.ParentFRN = pRecord->ParentFileReferenceNumber; 857 | hmDirectories[pRecord->FileReferenceNumber] = hme; 858 | } 859 | else 860 | { 861 | Add(pRecord->FileReferenceNumber, &sz, pRecord->ParentFileReferenceNumber); 862 | HashMapEntry hme; 863 | hme.iOffset = rgFiles.size() - 1; 864 | hme.ParentFRN = pRecord->ParentFileReferenceNumber; 865 | //FileParents.insert(FileParents.end(), pRecord->ParentFileReferenceNumber); 866 | hmFiles[pRecord->FileReferenceNumber] = hme; 867 | } 868 | pRecord = (PUSN_RECORD) ((PBYTE) pRecord + pRecord->RecordLength); 869 | } 870 | med.StartFileReferenceNumber = * (DWORDLONG *) pData; 871 | } 872 | 873 | //Calculate files per directory. This takes most of the indexing time, but this information can be useful to reduce the time needed 874 | //for searching in directories with few files (less than 10k). 875 | for ( it=hmFiles.begin() ; it != hmFiles.end(); it++ ) 876 | { 877 | HashMapEntry* hme = &hmDirectories[it->second.ParentFRN]; 878 | do 879 | { 880 | rgDirectories[hme->iOffset].nFiles++; 881 | HashMapEntry* hme2 = &hmDirectories[it->second.ParentFRN]; 882 | 883 | if(hme != hme2) 884 | hme = hme2; 885 | else // This must not happen, otherwise a directory is its own parent! 886 | break; 887 | } while(hme->ParentFRN != 0); 888 | } 889 | //for(unsigned int i = 0; i != FileParents.size(); i++) 890 | //{ 891 | // DWORDLONG dwIndex = FileParents[i]; 892 | // while(dwIndex != 0) 893 | // { 894 | // int iOffset = -1; 895 | // for(unsigned int j = 0; j != rgDirectories.size(); j++) 896 | // if(rgDirectories[j].Index == dwIndex) 897 | // { 898 | // iOffset = j; 899 | // break; 900 | // } 901 | // if(iOffset == -1) 902 | // break; 903 | // rgDirectories[iOffset].nFiles++; 904 | // DWORDLONG dwIndex2 = DirectoryParents[iOffset]; 905 | 906 | // if(dwIndex != dwIndex2) 907 | // dwIndex = dwIndex2; 908 | // else // This must not happen, otherwise a directory is its own parent! 909 | // break; 910 | // } 911 | // //wstring strPath; 912 | // //GetDir(dwIndex, &strPath); 913 | 914 | // //do { 915 | // // //USNEntry file = FRNToName(dwIndex); 916 | // // int iOffset = -1; 917 | // // for(int j = 0; j != rgDirectories.size(); j++) 918 | // // if(rgDirectories[j].Index == dwIndex) 919 | // // { 920 | // // iOffset = j; 921 | // // break; 922 | // // } 923 | // // if(iOffset == -1) 924 | // // break; 925 | // // //USNEntry parent = FRNToName(file.ParentIndex); 926 | // // //USNEntry parent2 = FRNToName(rgDirectories[iOffset].Index); 927 | // // rgDirectories[iOffset].nFiles++; 928 | // // dwIndex = file.ParentIndex; 929 | // //} while (dwIndex != 0); 930 | //} 931 | rgFiles.shrink_to_fit(); 932 | rgDirectories.shrink_to_fit(); 933 | sort(rgFiles.begin(), rgFiles.end()); 934 | sort(rgDirectories.begin(), rgDirectories.end()); 935 | } 936 | 937 | // Resolve FRN to filename by enumerating USN journal with StartFileReferenceNumber=FRN 938 | USNEntry CDriveIndex::FRNToName(DWORDLONG FRN) 939 | { 940 | if(FRN == m_dwDriveFRN) 941 | return USNEntry(wstring(1, m_cDrive) + wstring(TEXT(":")), 0); 942 | USN_JOURNAL_DATA ujd; 943 | Query(&ujd); 944 | 945 | MFT_ENUM_DATA med; 946 | med.StartFileReferenceNumber = FRN; 947 | med.LowUsn = 0; 948 | med.HighUsn = ujd.NextUsn; 949 | 950 | // The structure only needs a single entry so it can be pretty small 951 | BYTE pData[sizeof(DWORDLONG) + 0x300]; 952 | DWORD cb; 953 | while (DeviceIoControl(m_hVol, FSCTL_ENUM_USN_DATA, &med, sizeof(med), pData, sizeof(pData), &cb, NULL) != FALSE) { 954 | 955 | PUSN_RECORD pRecord = (PUSN_RECORD) &pData[sizeof(USN)]; 956 | while ((PBYTE) pRecord < (pData + cb)) { 957 | if(pRecord->FileReferenceNumber == FRN) 958 | return USNEntry(wstring((LPCWSTR) ((PBYTE) pRecord + pRecord->FileNameOffset), pRecord->FileNameLength / sizeof(WCHAR)), pRecord->ParentFileReferenceNumber); 959 | pRecord = (PUSN_RECORD) ((PBYTE) pRecord + pRecord->RecordLength); 960 | } 961 | med.StartFileReferenceNumber = * (DWORDLONG *) pData; 962 | } 963 | return USNEntry(wstring(TEXT("")), 0); 964 | } 965 | 966 | 967 | 968 | // Saves the database to disk. The file can be used to create an instance of CDriveIndex. 969 | BOOL CDriveIndex::SaveToDisk(wstring &strPath) 970 | { 971 | ofstream::pos_type size; 972 | ofstream file (strPath.c_str(), ios::out|ios::binary|ios::trunc); 973 | if (file.is_open()) 974 | { 975 | //Drive character 976 | file.write((char*) &m_cDrive, sizeof(m_cDrive)); 977 | 978 | //Drive FileReferenceNumber 979 | file.write((char*) &m_dwDriveFRN, sizeof(m_dwDriveFRN)); 980 | 981 | unsigned int size = rgFiles.size(); 982 | //Number of files 983 | file.write((char*) &size, sizeof(rgFiles.size())); 984 | //indexed files 985 | file.write((char*) &(rgFiles[0]), sizeof(IndexedFile) * rgFiles.size()); 986 | 987 | size = rgDirectories.size(); 988 | //Number of directories 989 | file.write((char*) &size, sizeof(rgDirectories.size())); 990 | //indexed directories 991 | file.write((char*) &(rgDirectories[0]), sizeof(IndexedDirectory) * rgDirectories.size()); 992 | file.close(); 993 | return true; 994 | } 995 | return false; 996 | } 997 | 998 | 999 | 1000 | // Constructor for loading the index from a previously saved file 1001 | CDriveIndex::CDriveIndex(wstring &strPath) 1002 | { 1003 | m_hVol = INVALID_HANDLE_VALUE; 1004 | Empty(); 1005 | 1006 | ifstream::pos_type size; 1007 | 1008 | ifstream file (strPath.c_str(), ios::in | ios::binary); 1009 | if (file.is_open()) 1010 | { 1011 | //Drive 1012 | WCHAR cDrive; 1013 | file.read((char*) &cDrive, sizeof(WCHAR)); 1014 | 1015 | if(Init(cDrive)) 1016 | { 1017 | // Drive FileReferenceNumber 1018 | file.read((char*) &m_dwDriveFRN, sizeof(m_dwDriveFRN)); 1019 | 1020 | //Number of files 1021 | unsigned int numFiles = 0; 1022 | file.read((char*) &numFiles, sizeof(numFiles)); 1023 | rgFiles.reserve(numFiles); 1024 | 1025 | //indexed files 1026 | for(unsigned int j = 0; j != numFiles; j++) 1027 | { 1028 | IndexedFile i; 1029 | file.read((char*) &i, sizeof(IndexedFile)); 1030 | rgFiles.insert(rgFiles.end(), i); 1031 | } 1032 | 1033 | //Number of directories 1034 | unsigned int numDirs = 0; 1035 | file.read((char*) &numDirs, sizeof(numDirs)); 1036 | rgDirectories.reserve(numDirs); 1037 | 1038 | //indexed directories 1039 | for(unsigned int j = 0; j != numDirs; j++) 1040 | { 1041 | IndexedDirectory i; 1042 | file.read((char*) &i, sizeof(IndexedDirectory)); 1043 | rgDirectories.insert(rgDirectories.end(), i); 1044 | } 1045 | } 1046 | file.close(); 1047 | } 1048 | return; 1049 | } 1050 | 1051 | 1052 | 1053 | // Returns the number of files and folders on this drive 1054 | DriveInfo CDriveIndex::GetInfo() 1055 | { 1056 | DriveInfo di; 1057 | di.NumFiles = (DWORDLONG) rgFiles.size(); 1058 | di.NumDirectories = (DWORDLONG) rgDirectories.size(); 1059 | return di; 1060 | } 1061 | 1062 | 1063 | 1064 | 1065 | //Performs a fuzzy search for shorter in longer. 1066 | //return values range from 0.0 = identical to 1.0 = completely different. 0.4 seems appropriate 1067 | float FuzzySearch(wstring &longer, wstring &shorter) 1068 | { 1069 | //Note: All string lengths are shorter than MAX_PATH, so an uint is perfectly fitted. 1070 | unsigned int lenl = (unsigned int) longer.length(); 1071 | unsigned int lens = (unsigned int) shorter.length(); 1072 | 1073 | if(lens > lenl) 1074 | return 0.0f; 1075 | 1076 | //Check if the shorter string is a substring of the longer string 1077 | unsigned int Contained = (unsigned int) longer.find(shorter); 1078 | if(Contained != wstring::npos) 1079 | return Contained == 0 ? 1.0f : 0.8f; 1080 | 1081 | wstring longerlower(longer); 1082 | wstring shorterlower(shorter); 1083 | for(unsigned int i = 0; i != lenl; i++) 1084 | longerlower[i] = tolower(longer[i]); 1085 | for(unsigned int i = 0; i != lens; i++) 1086 | shorterlower[i] = tolower(shorter[i]); 1087 | 1088 | //Check if the shorter string is a substring of the longer string 1089 | Contained = (unsigned int) longerlower.find(shorterlower); 1090 | if(Contained != wstring::npos) 1091 | return Contained == 0 ? 0.9f : 0.7f; 1092 | 1093 | //Check if string can be matched by omitting characters 1094 | if(lens < 5) 1095 | { 1096 | unsigned int pos = 0; 1097 | unsigned int matched = 0; 1098 | for(unsigned int i = 0; i != lens; i++) 1099 | { 1100 | WCHAR c = toupper(shorter[i]); //only look for capital letters in longer string, (e.g. match tc in TrueCrypt) 1101 | for(unsigned int j = 0; j != lenl - pos; j++) 1102 | { 1103 | if(longer[pos + j] == c) 1104 | { 1105 | pos = j; 1106 | matched++; 1107 | break; 1108 | } 1109 | else 1110 | continue; 1111 | } 1112 | } 1113 | if(matched == lens) 1114 | return 0.9f; //Slightly worse than direct matches 1115 | } 1116 | return 0; 1117 | } -------------------------------------------------------------------------------- /FileSearch/CDriveIndex.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | Module name: CDriveIndex.cpp 3 | Written by: Christian Sander 4 | Credits for original code this is based on: Jeffrey Cooperstein & Jeffrey Richter 5 | ******************************************************************************/ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | using namespace std; 17 | 18 | #define NO_WHERE 0 19 | #define IN_FILES 1 20 | #define IN_DIRECTORIES 2 21 | struct HashMapEntry 22 | { 23 | DWORDLONG ParentFRN; 24 | unsigned int iOffset; 25 | }; 26 | struct IndexedFile 27 | { 28 | DWORDLONG Index; 29 | //DWORDLONG ParentIndex; 30 | DWORDLONG Filter; 31 | bool operator<(const IndexedFile& i) 32 | { 33 | return Index < i.Index; 34 | } 35 | IndexedFile() 36 | { 37 | Index = 0; 38 | Filter = 0; 39 | } 40 | }; 41 | struct IndexedDirectory 42 | { 43 | DWORDLONG Index; 44 | //DWORDLONG ParentIndex; 45 | DWORDLONG Filter; 46 | unsigned int nFiles; 47 | bool operator<(const IndexedDirectory& i) 48 | { 49 | return Index < i.Index; 50 | } 51 | IndexedDirectory() 52 | { 53 | Index = 0; 54 | Filter = 0; 55 | nFiles = 0; 56 | } 57 | }; 58 | struct USNEntry 59 | { 60 | DWORDLONG ParentIndex; 61 | wstring Name; 62 | USNEntry(wstring aName, DWORDLONG aParentIndex) 63 | { 64 | Name = aName; 65 | ParentIndex = aParentIndex; 66 | } 67 | USNEntry() 68 | { 69 | ParentIndex = 0; 70 | Name = wstring(); 71 | } 72 | }; 73 | 74 | struct DriveInfo 75 | { 76 | DWORDLONG NumFiles; 77 | DWORDLONG NumDirectories; 78 | DriveInfo() 79 | { 80 | NumFiles = 0; 81 | NumDirectories = 0; 82 | } 83 | }; 84 | 85 | struct SearchResultFile 86 | { 87 | wstring Filename; 88 | wstring Path; 89 | DWORDLONG Filter; 90 | float MatchQuality; 91 | SearchResultFile() 92 | { 93 | Filename = wstring(); 94 | Path = wstring(); 95 | Filter = 0; 96 | MatchQuality = 0.0f; 97 | } 98 | SearchResultFile(const SearchResultFile &srf) 99 | { 100 | Filename = srf.Filename; 101 | Path = srf.Path; 102 | Filter = srf.Filter; 103 | MatchQuality = srf.MatchQuality; 104 | } 105 | SearchResultFile(wstring aPath, wstring aFilename, DWORDLONG aFilter, float aMatchQuality = 1) 106 | { 107 | Filename = aFilename; 108 | Path = aPath; 109 | Filter = aFilter; 110 | MatchQuality = aMatchQuality; 111 | } 112 | bool operator<(const SearchResultFile& i) 113 | { 114 | return MatchQuality == i.MatchQuality ? Path + Filename < i.Path + i.Filename : MatchQuality > i.MatchQuality; 115 | } 116 | }; 117 | struct SearchResult 118 | { 119 | wstring Query; 120 | wstring SearchPath; 121 | vector Results; 122 | int iOffset; //0 when finished 123 | unsigned int SearchEndedWhere; 124 | int maxResults; 125 | SearchResult() 126 | { 127 | Query = wstring(); 128 | SearchPath = wstring(); 129 | Results = vector(); 130 | iOffset = 0; 131 | SearchEndedWhere = NO_WHERE; 132 | maxResults = -1; 133 | } 134 | }; 135 | class CDriveIndex { 136 | public: 137 | CDriveIndex(); 138 | CDriveIndex(wstring &strPath); 139 | ~CDriveIndex(); 140 | BOOL Init(WCHAR cDrive); 141 | int Find(wstring *strQuery, wstring *strPath, vector *rgsrfResults, BOOL bSort = true, BOOL bEnhancedSearch = true, int maxResults = -1); 142 | void PopulateIndex(); 143 | BOOL SaveToDisk(wstring &strPath); 144 | DriveInfo GetInfo(); 145 | 146 | protected: 147 | BOOL Empty(); 148 | HANDLE Open(WCHAR cDriveLetter, DWORD dwAccess); 149 | BOOL Create(DWORDLONG MaximumSize, DWORDLONG AllocationDelta); 150 | BOOL Query(PUSN_JOURNAL_DATA pUsnJournalData); 151 | void FindRecursively(wstring &strQuery, const WCHAR* &szQueryLower, DWORDLONG QueryFilter, DWORDLONG QueryLength, wstring * strQueryPath, vector &rgsrfResults, BOOL bEnhancedSearch, int maxResults, int &nResults); 152 | template 153 | void FindInJournal(wstring &strQuery, const WCHAR* &szQueryLower, DWORDLONG QueryFilter, DWORDLONG QueryLength, wstring * strQueryPath, vector &rgJournalIndex, vector &rgsrfResults, unsigned int iOffset, BOOL bEnhancedSearch, int maxResults, int &nResults); 154 | void FindInPreviousResults(wstring &strQuery, const WCHAR* &szQueryLower, DWORDLONG QueryFilter, DWORDLONG QueryLength, wstring * strQueryPath, vector &rgsrfResults, unsigned int iOffset, BOOL bEnhancedSearch, int maxResults, int &nResults); 155 | 156 | INT64 FindOffsetByIndex(DWORDLONG Index); 157 | INT64 FindDirOffsetByIndex(DWORDLONG Index); 158 | DWORDLONG MakeFilter(wstring *szName); 159 | USNEntry FRNToName(DWORDLONG FRN); 160 | void CleanUp(); 161 | BOOL Add(DWORDLONG Index, wstring *szName, DWORDLONG ParentIndex, DWORDLONG Address = 0); 162 | BOOL AddDir(DWORDLONG Index, wstring *szName, DWORDLONG ParentIndex, DWORDLONG Address = 0); 163 | BOOL Get(DWORDLONG Index, wstring *sz); 164 | BOOL GetDir(DWORDLONG Index, wstring *sz); 165 | unsigned int GetParentDirectory(DWORDLONG Index); 166 | void ClearLastResult(); 167 | // Members used to enumerate journal records 168 | HANDLE m_hVol; // handle to volume 169 | WCHAR m_cDrive; // drive letter of volume 170 | DWORDLONG m_dwDriveFRN; // drive FileReferenceNumber 171 | 172 | //Database containers 173 | vector rgFiles; 174 | vector rgDirectories; 175 | SearchResult LastResult; 176 | }; 177 | float FuzzySearch(wstring &longer, wstring &shorter); 178 | DWORDLONG PathToFRN(wstring* strPath); 179 | 180 | //Exported functions 181 | CDriveIndex* _stdcall CreateIndex(WCHAR Drive); 182 | void _stdcall DeleteIndex(CDriveIndex *di); 183 | WCHAR* _stdcall Search(CDriveIndex *di, WCHAR *szQuery, WCHAR *szPath, BOOL bSort, BOOL bEnhancedSearch, int maxResults, BOOL *bFoundAll); 184 | void _stdcall FreeResultsBuffer(WCHAR *szResults); 185 | BOOL _stdcall SaveIndexToDisk(CDriveIndex *di, WCHAR *szPath); 186 | CDriveIndex* _stdcall LoadIndexFromDisk(WCHAR *szPath); 187 | void _stdcall GetDriveInfo(CDriveIndex *di, DriveInfo *driveInfo); -------------------------------------------------------------------------------- /FileSearch/FileSearch.def: -------------------------------------------------------------------------------- 1 | LIBRARY FileSearch 2 | DESCRIPTION "NTFS Indexing and searching" 3 | EXPORTS 4 | CreateIndex @1 5 | DeleteIndex @2 6 | Search @3 7 | FreeResultsBuffer @4 8 | LoadIndexFromDisk @5 9 | SaveIndexToDisk @6 10 | GetDriveInfo @7 -------------------------------------------------------------------------------- /FileSearch/FileSearch.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {417925A9-AA49-4063-A2C1-26C9AC0884F6} 23 | Win32Proj 24 | FileSearch 25 | 26 | 27 | 28 | DynamicLibrary 29 | true 30 | Unicode 31 | 32 | 33 | DynamicLibrary 34 | true 35 | Unicode 36 | 37 | 38 | DynamicLibrary 39 | false 40 | true 41 | Unicode 42 | 43 | 44 | DynamicLibrary 45 | false 46 | true 47 | Unicode 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | true 67 | 68 | 69 | true 70 | 71 | 72 | false 73 | 74 | 75 | false 76 | 77 | 78 | 79 | Use 80 | Level3 81 | Disabled 82 | WIN32;_DEBUG;_WINDOWS;_USRDLL;FILESEARCH_EXPORTS;%(PreprocessorDefinitions) 83 | 84 | 85 | Windows 86 | true 87 | FileSearch.def 88 | 89 | 90 | 91 | 92 | Use 93 | Level3 94 | Disabled 95 | WIN32;_DEBUG;_WINDOWS;_USRDLL;FILESEARCH_EXPORTS;%(PreprocessorDefinitions) 96 | 97 | 98 | Windows 99 | true 100 | FileSearch.def 101 | 102 | 103 | 104 | 105 | Level3 106 | Use 107 | MaxSpeed 108 | true 109 | true 110 | WIN32;NDEBUG;_WINDOWS;_USRDLL;FILESEARCH_EXPORTS;%(PreprocessorDefinitions) 111 | Speed 112 | 113 | 114 | Windows 115 | true 116 | true 117 | true 118 | FileSearch.def 119 | true 120 | 121 | 122 | 123 | 124 | Level3 125 | Use 126 | MaxSpeed 127 | true 128 | true 129 | WIN32;NDEBUG;_WINDOWS;_USRDLL;FILESEARCH_EXPORTS;%(PreprocessorDefinitions) 130 | Speed 131 | 132 | 133 | Windows 134 | true 135 | true 136 | true 137 | FileSearch.def 138 | true 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | false 153 | false 154 | 155 | 156 | 157 | 158 | false 159 | false 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | Create 168 | Create 169 | Create 170 | Create 171 | 172 | 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /FileSearch/FileSearch.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | Quelldateien 21 | 22 | 23 | 24 | 25 | Headerdateien 26 | 27 | 28 | Headerdateien 29 | 30 | 31 | Headerdateien 32 | 33 | 34 | 35 | 36 | Quelldateien 37 | 38 | 39 | Quelldateien 40 | 41 | 42 | Quelldateien 43 | 44 | 45 | -------------------------------------------------------------------------------- /FileSearch/FileSearch.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | C:\Program Files\Autohotkey\Autohotkey.exe 5 | WindowsLocalDebugger 6 | $(OutDir)FileSearchTest.ahk 7 | $(OutDir) 8 | 9 | 10 | C:\Program Files\Autohotkey\AutoHotkey.exe 11 | WindowsLocalDebugger 12 | $(OutDir)FileSearchTest.ahk 13 | $(OutDir) 14 | 15 | -------------------------------------------------------------------------------- /FileSearch/dllmain.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisS85/FastFileSearch/63dc6700fe6af179171b597455bda125735d3e99/FileSearch/dllmain.cpp -------------------------------------------------------------------------------- /FileSearch/stdafx.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisS85/FastFileSearch/63dc6700fe6af179171b597455bda125735d3e99/FileSearch/stdafx.cpp -------------------------------------------------------------------------------- /FileSearch/stdafx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisS85/FastFileSearch/63dc6700fe6af179171b597455bda125735d3e99/FileSearch/stdafx.h -------------------------------------------------------------------------------- /FileSearch/targetver.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisS85/FastFileSearch/63dc6700fe6af179171b597455bda125735d3e99/FileSearch/targetver.h -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | FastFileSearch 2 | ============== 3 | 4 | Extremely fast file search using the NTFS USN journal. The performance is comparable to the (probably well-known) program Everything (www.voidtools.com). Since this program wasn't open source and I didn't know of any free library for this task I wrote this one. 5 | 6 | It is targeted at being usable from other languages than C++, so the data types in the exported functions of the DLL are a bit friendlier to use. 7 | 8 | The project includes a test AutoHotkey script file ("FileSearchTest.ahk") in the Release directory that can be used to see how the library is used. -------------------------------------------------------------------------------- /Release/FileSearch.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisS85/FastFileSearch/63dc6700fe6af179171b597455bda125735d3e99/Release/FileSearch.dll -------------------------------------------------------------------------------- /Release/FileSearchTest.ahk: -------------------------------------------------------------------------------- 1 | #SingleInstance off 2 | if(!A_IsAdmin) 3 | { 4 | run *runas %A_ScriptFullPath% 5 | ExitApp 6 | } 7 | Drive := "D" 8 | DllPath := A_ScriptDir "\FileSearch.dll" 9 | hModule := DllCall("LoadLibrary", "Str", DllPath, "PTR") 10 | tmIndex := A_TickCount 11 | DriveIndex := DllCall(DllPath "\CreateIndex", ushort, NumGet(Drive, "ushort"), "PTR") 12 | tmIndex := (A_TickCount - tmIndex) / 1000 13 | VarSetCapacity(DriveInfo, 16, 0) 14 | DllCall(DllPath "\GetDriveInfo", "PTR", DriveIndex, "PTR", &DriveInfo, "UINT") 15 | NumFiles := NumGet(DriveInfo, 0, "uint64") 16 | NumDirectories := NumGet(DriveInfo, 8, "uint64") 17 | Gui, Add, Edit, section w200 gButton vQueryString, .exe 18 | Gui, Add, Text,, In this path: 19 | Gui, Add, Edit, x+10 w200 vQueryPath, 20 | Gui, Add, Button, xs+0 gButton Default, Search 21 | Gui, Add, CheckBox, xs+0 vLimitResults, Limit Results to 1000 22 | Gui, Add, Edit, xs+0 w500 h500 vResults Multi, Index time: %tmIndex% seconds`n%NumFiles% files total, %NumDirectories% directories total 23 | Gui, Add, Button, xs+0 gLoad, Load Index 24 | Gui, Add, Button, xs+0 gSave, Save Index 25 | Gui, Show 26 | GoSub Button 27 | return 28 | 29 | Button: 30 | Gui, Submit, NoHide 31 | tmSearch := A_TickCount 32 | SearchString := QueryString 33 | if(StrLen(QueryString) > 2) 34 | results := Query(QueryString, QueryPath, LimitResults ? 1000 : -1, nResults) 35 | else 36 | { 37 | results := "" 38 | nResults := 0 39 | } 40 | tmSearch := (A_TickCount - tmSearch ) / 1000 41 | GuiControl,, Results, Index time: %tmIndex% seconds`n%NumFiles% files total, %NumDirectories% directories total`nSearch time for "%QueryString%": %tmSearch% seconds`n%nResults% Result(s)`n%results% 42 | 43 | ;It may happen that the search takes long enough to swallow this g-label notification while typing. To fix this we simply run it again if the query string was changed. 44 | Gui, Submit, NoHide 45 | if(SearchString != QueryString) 46 | GoSub Button 47 | return 48 | 49 | Load: 50 | if(DriveIndex) 51 | DllCall(DllPath "\DeleteIndex", "PTR", DriveIndex) 52 | tmLoad := A_TickCount 53 | DriveIndex := DllCall(DllPath "\LoadIndexFromDisk", "str", A_ScriptDir "\" Drive "Index.dat", "PTR") 54 | tmLoad := A_TickCount - tmLoad 55 | DllCall(DllPath "\GetDriveInfo", "PTR", DriveIndex, "PTR", &DriveInfo, "UINT") 56 | NumFiles := NumGet(DriveInfo, 0, "uint64") 57 | NumDirectories := NumGet(DriveInfo, 8, "uint64") 58 | GuiControl,, Results, Load time: %tmLoad% seconds`n%NumFiles% files total, %NumDirectories% directories total 59 | return 60 | 61 | Save: 62 | tmSave := A_TickCount 63 | Path := A_ScriptDir "\" Drive "Index.dat" 64 | result := DllCall(DllPath "\SaveIndexToDisk", "PTR", DriveIndex, wstr, Path, "UINT") 65 | tmSave := A_TickCount - tmSave 66 | GuiControl,, Results, Save time: %tmSave% seconds`n%NumFiles% files total, %NumDirectories% directories total 67 | return 68 | 69 | Query(String, QueryPath, LimitResults, ByRef nResults) 70 | { 71 | global DriveIndex, DllPath 72 | pResult := DllCall(DllPath "\Search", "PTR", DriveIndex, "wstr", String, "wstr", QueryPath, "int", true, "int", true, "int", LimitResults, "int*", nResults, PTR) 73 | strResult := StrGet(presult + 0) (nResults = -1 ? "`nThere were more results..." : "") 74 | DllCall(DllPath "FreeResultsBuffer", "PTR", pResult) 75 | return strResult 76 | } 77 | 78 | GuiClose: 79 | DllCall(DllPath "\DeleteIndex", "PTR", DriveIndex) 80 | DllCall("FreeLibrary", "PTR", hModule) 81 | ExitApp -------------------------------------------------------------------------------- /x64/Release/FileSearch.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChrisS85/FastFileSearch/63dc6700fe6af179171b597455bda125735d3e99/x64/Release/FileSearch.dll --------------------------------------------------------------------------------