├── Wrappers ├── WinRT │ ├── pch.cpp │ ├── pch.h │ ├── SPTAG.def │ ├── packages.config │ ├── PropertySheet.props │ ├── AnnIndex.idl │ ├── SPTAG.WinRT.targets │ └── AnnIndex.h ├── inc │ ├── PythonClient.i │ ├── JavaClient.i │ ├── CsharpClient.i │ ├── JavaFileIO.i │ ├── PythonCore.i │ ├── CsharpCore.i │ ├── JavaCore.i │ ├── TransferDataType.h │ ├── ClientInterface.h │ └── ManagedObject.h ├── packages.config ├── src │ └── AssemblyInfo.cpp ├── CLRCore.vcxproj.filters ├── JavaCore.vcxproj.filters ├── CsharpCore.vcxproj.filters ├── PythonCore.vcxproj.filters ├── JavaClient.vcxproj.filters ├── CsharpClient.vcxproj.filters └── PythonClient.vcxproj.filters ├── setup.txt ├── .gitattributes ├── SPTAG.sdf ├── MANIFEST.in ├── docs ├── img │ ├── sptag.png │ ├── swigpath.PNG │ └── visualstudio.png ├── examples │ └── requirements.txt ├── LinuxInstallation.md └── WindowsInstallation.md ├── Script_AE ├── Figure1 │ ├── plot_motivation_result.sh │ └── motivation.sh ├── Figure7 │ ├── plot_iops_result.sh │ ├── iops_limitation.sh │ ├── process_iopslimit.py │ └── limits.p ├── Figure11 │ ├── plot_balance_result.sh │ ├── parameter_study_balance.sh │ ├── process_balance.py │ └── foreground_background.p ├── Figure9 │ ├── plot_shifting_result.sh │ ├── data_shifting.sh │ └── parameter_study_shifting.p ├── Figure10 │ ├── plot_range_result.sh │ ├── parameter_study_range.sh │ ├── process_para_range.py │ └── parameter_study_range.p ├── Figure8 │ ├── plot_stress_result.sh │ └── stress_spfresh.sh ├── Figure6 │ ├── plot_overall_result.sh │ ├── overall_spacev_spann.sh │ ├── overall_spacev_spfresh.sh │ └── overall_spacev_diskann.sh ├── bdev.json ├── iniFile │ ├── genTruth.ini │ ├── genTruth_clustering.ini │ ├── build_SPANN_sift1b.ini │ ├── build_SPANN_spacev100m.ini │ ├── build_sift1m.ini │ ├── build_clustering_1m.ini │ └── build_clustering_2m.ini ├── generate_dataset.py └── generateOverallPerformanceTraceAndTruth.sh ├── Tools ├── nni-auto-tune │ ├── picture │ │ ├── glove-25-angular.png │ │ ├── glove-100-angular.png │ │ ├── nytimes-256-angular.png │ │ ├── sift-128-euclidean.png │ │ └── fashion-mnist-784-euclidean.png │ ├── search_space_small.json │ ├── config.yml │ ├── config_aml.yml │ ├── search_space.json │ └── runner.py └── OPQ │ └── README.md ├── datasets └── SPACEV1B │ ├── query.bin │ ├── truth.bin │ ├── query_log.bin │ ├── vectors.bin │ ├── vectors_1.bin │ ├── vectors_10.bin │ ├── vectors_11.bin │ ├── vectors_12.bin │ ├── vectors_13.bin │ ├── vectors_14.bin │ ├── vectors_15.bin │ ├── vectors_16.bin │ ├── vectors_17.bin │ ├── vectors_18.bin │ ├── vectors_19.bin │ ├── vectors_2.bin │ ├── vectors_20.bin │ ├── vectors_21.bin │ ├── vectors_22.bin │ ├── vectors_23.bin │ ├── vectors_24.bin │ ├── vectors_25.bin │ ├── vectors_26.bin │ ├── vectors_27.bin │ ├── vectors_28.bin │ ├── vectors_29.bin │ ├── vectors_3.bin │ ├── vectors_30.bin │ ├── vectors_31.bin │ ├── vectors_32.bin │ ├── vectors_33.bin │ ├── vectors_4.bin │ ├── vectors_5.bin │ ├── vectors_6.bin │ ├── vectors_7.bin │ ├── vectors_8.bin │ └── vectors_9.bin │ └── README.md ├── Test ├── WinRTTest │ ├── packages.config │ └── WinRTTest.vcxproj.filters ├── inc │ └── Test.h ├── packages.config ├── cuda │ ├── cuda_tests.cpp │ └── common.hxx ├── src │ ├── main.cpp │ ├── Base64HelperTest.cpp │ ├── IniReaderTest.cpp │ ├── SIMDTest.cpp │ └── make_gist_sptag.py └── CMakeLists.txt ├── AnnService ├── src │ ├── Socket │ │ └── Common.cpp │ ├── Server │ │ ├── ServiceSettings.cpp │ │ └── main.cpp │ ├── Aggregator │ │ ├── AggregatorSettings.cpp │ │ ├── main.cpp │ │ └── AggregatorExecutionContext.cpp │ ├── Core │ │ └── Common │ │ │ ├── NeighborhoodGraph.cpp │ │ │ └── CommonUtils.cpp │ ├── Client │ │ ├── Options.cpp │ │ └── main.cpp │ ├── SPFresh │ │ └── main.cpp │ └── Helper │ │ ├── Concurrent.cpp │ │ ├── ArgumentsParser.cpp │ │ └── VectorSetReader.cpp ├── inc │ ├── Socket │ │ ├── Common.h │ │ ├── Server.h │ │ ├── Client.h │ │ ├── ConnectionManager.h │ │ └── RemoteSearchQuery.h │ ├── Server │ │ ├── ServiceSettings.h │ │ ├── ServiceContext.h │ │ ├── SearchExecutor.h │ │ ├── SearchService.h │ │ ├── QueryParser.h │ │ └── SearchExecutionContext.h │ ├── Client │ │ ├── Options.h │ │ └── ClientWrapper.h │ ├── Aggregator │ │ ├── AggregatorSettings.h │ │ ├── AggregatorExecutionContext.h │ │ ├── AggregatorContext.h │ │ └── AggregatorService.h │ ├── Helper │ │ ├── Base64Encode.h │ │ ├── VectorSetReaders │ │ │ ├── XvecReader.h │ │ │ ├── DefaultReader.h │ │ │ └── MemoryReader.h │ │ ├── DynamicNeighbors.h │ │ ├── CommonHelper.h │ │ ├── VectorSetReader.h │ │ └── Concurrent.h │ ├── SSDServing │ │ ├── main.h │ │ └── Utils.h │ └── Core │ │ ├── ResultIterator.h │ │ ├── Common │ │ ├── InstructionUtils.h │ │ ├── Checksum.h │ │ ├── WorkSpacePool.h │ │ ├── KNearestNeighborhoodGraph.h │ │ ├── FineGrainedLock.h │ │ └── IQuantizer.h │ │ ├── SPANN │ │ └── PersistentBuffer.h │ │ ├── VectorSet.h │ │ ├── SearchResult.h │ │ └── MultiIndexScan.h ├── packages.config ├── GPUIndexBuilder.vcxproj.filters ├── IndexBuilder.vcxproj.filters ├── IndexSearcher.vcxproj.filters ├── BalancedDataPartition.vcxproj.filters ├── GPUSSDServing.vcxproj.filters ├── SSDServing.vcxproj.filters ├── Quantizer.vcxproj.filters ├── Client.vcxproj.filters ├── Aggregator.vcxproj.filters └── Server.vcxproj.filters ├── bdev.json ├── .gitmodules ├── SPTAG.targets ├── .github └── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── AnnService.users.props ├── Dockerfile ├── LICENSE ├── SPTAG.WinRT.nuspec └── Dockerfile.cuda /Wrappers/WinRT/pch.cpp: -------------------------------------------------------------------------------- 1 | #include "pch.h" 2 | -------------------------------------------------------------------------------- /setup.txt: -------------------------------------------------------------------------------- 1 | pip < 21.4 2 | setuptools < 61 3 | wheel < 0.38 -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.bin filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /SPTAG.sdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SPTAG/HEAD/SPTAG.sdf -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include sptag *.py _SPTAG* *.so *.dll Server.exe server -------------------------------------------------------------------------------- /docs/img/sptag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SPTAG/HEAD/docs/img/sptag.png -------------------------------------------------------------------------------- /docs/img/swigpath.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SPTAG/HEAD/docs/img/swigpath.PNG -------------------------------------------------------------------------------- /Script_AE/Figure1/plot_motivation_result.sh: -------------------------------------------------------------------------------- 1 | python process_motivation.py 2 | gnuplot motivation.p -------------------------------------------------------------------------------- /Script_AE/Figure7/plot_iops_result.sh: -------------------------------------------------------------------------------- 1 | python process_iopslimit.py log_searchthread 2 | gnuplot limits.p -------------------------------------------------------------------------------- /docs/img/visualstudio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SPTAG/HEAD/docs/img/visualstudio.png -------------------------------------------------------------------------------- /Script_AE/Figure11/plot_balance_result.sh: -------------------------------------------------------------------------------- 1 | python process_balance.py log_ 2 | gnuplot foreground_background.p -------------------------------------------------------------------------------- /Script_AE/Figure9/plot_shifting_result.sh: -------------------------------------------------------------------------------- 1 | python process_shifting.py 2 | gnuplot parameter_study_shifting.p -------------------------------------------------------------------------------- /Script_AE/Figure10/plot_range_result.sh: -------------------------------------------------------------------------------- 1 | python process_para_range.py log_top 2 | gnuplot parameter_study_range.p -------------------------------------------------------------------------------- /Script_AE/Figure8/plot_stress_result.sh: -------------------------------------------------------------------------------- 1 | python process_stress_test.py log_stress_spfresh.log 2 | gnuplot stress_test_new.p -------------------------------------------------------------------------------- /docs/examples/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.24.2 2 | matplotlib==2.2.2 3 | Keras==2.11.0 4 | Pillow==10.0.1 5 | scikit_learn==0.24.2 6 | -------------------------------------------------------------------------------- /Tools/nni-auto-tune/picture/glove-25-angular.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SPTAG/HEAD/Tools/nni-auto-tune/picture/glove-25-angular.png -------------------------------------------------------------------------------- /Tools/nni-auto-tune/picture/glove-100-angular.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SPTAG/HEAD/Tools/nni-auto-tune/picture/glove-100-angular.png -------------------------------------------------------------------------------- /Tools/nni-auto-tune/picture/nytimes-256-angular.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SPTAG/HEAD/Tools/nni-auto-tune/picture/nytimes-256-angular.png -------------------------------------------------------------------------------- /Tools/nni-auto-tune/picture/sift-128-euclidean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SPTAG/HEAD/Tools/nni-auto-tune/picture/sift-128-euclidean.png -------------------------------------------------------------------------------- /Wrappers/WinRT/pch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | -------------------------------------------------------------------------------- /Tools/nni-auto-tune/picture/fashion-mnist-784-euclidean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SPTAG/HEAD/Tools/nni-auto-tune/picture/fashion-mnist-784-euclidean.png -------------------------------------------------------------------------------- /Wrappers/WinRT/SPTAG.def: -------------------------------------------------------------------------------- 1 | EXPORTS 2 | DllCanUnloadNow = WINRT_CanUnloadNow PRIVATE 3 | DllGetActivationFactory = WINRT_GetActivationFactory PRIVATE 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/query.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6a531887daca721f633da2b92dacc0aa4c3512aef8ae671031e825ce7134a9e6 3 | size 2931608 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/truth.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:94d385c2f34b1f2899f276d87c14a67ef969dcb473e44753261af05ad62461bc 3 | size 23452808 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/query_log.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d4768ea964b595302b08f0b9ef484d7003b2e7848f2f7baf9e2b4bbb3bb36c49 3 | size 9416208 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_1.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:802f735e6bd1472bacdd93eedf8d1305291659117c2b5e3ca9ac38feb89509af 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_10.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ca7053c8582853c7a9984b56be8b9954cd8cbeda159b9b63fa5a56f776561be2 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_11.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b42ee98dbdf215d9c03e99d3c3ac401e615433d70119c7411c689884159773d1 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_12.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0dd7f1a25a6aa53622fa3d66a35af760bccd187a82bbe39a0bf79c681b646289 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_13.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:69d09f33ba230ee9b38e6fc1bf949c7f57b755c49ae8cdaf13ebf14b12272fe5 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_14.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d575a0ec126bfd3ffb6a3dfb7c36ad94c71a398c0a77b0462620feaa7181cbfb 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_15.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0286768443fbdd55ec0ffd25cecf85121aafef6c65226dc2008dda2ece5c7c8e 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_16.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b600774407a1446fcac0748c7beb5fa4214737bf406c2a162ccbb84254c5041d 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_17.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:5fb2167810380da46d894428ff8f2c61de05c9616a6343f1fa6d084759abb062 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_18.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d2e69b2f20fb99063944379667b8b9dc33d672079fe6e744d83076c2de5b1ecc 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_19.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1942f09f15c34c1e141a2bdb31138efcfda9a9fe5671c757e6b6a2e509864d65 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_2.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c8f772d430aa817bbd60319486dce7390924a7b71c3038312c89ab93a6382d43 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_20.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2becd2dcedb147d6c3927fa3b6f2d0f5ede03870efad8c8d04055e25694d60b2 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_21.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0cc41b1a8f6b3fb8ef87dd918b8f12433f1a4bac5711260e6722e9172de14754 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_22.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8cb1780b449120a124d71d4a6e5268c1ac2ec57bf27bb37c23a859b41702f3b8 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_23.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:b04df15421deb05670f21f34a26b4c0d108e5dd0bc8e3024701e46a90bd2d222 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_24.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2ec09e657776aec02783886de7448642a85ddb004d70df0d10537a7837d076f5 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_25.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a4a3c0ccf4da399397f01748b54f686a416cd5719c126ec93ce9effa124debe3 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_26.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:20c541f53d1e0772a270df12543dc0d715b4edb17ca415ebdf45065d2440525f 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_27.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:69c820cd585cb7883e6489e1e96f8a943be7800bb21e25ed88cff1b63a11a660 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_28.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d82070201cb0b34fa38579118ad01ba13efb62d8b45e30867ab763e7794ced54 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_29.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a11ce9f677543ab2f4b71d199e41e2026020c23a47147f536cbdd93a4266fcd8 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_3.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:787c77685a1603d4aa7f2b4b7f9e19ddff2d828ea21554aca52c82e8d7a6a755 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_30.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e426936bcc481e878b6f7ec32c1cb85438d23f63c0dd27330149f64053475a8d 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_31.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:351d25192805efc17f8fc588c872b907ca2b907ca9cb0d3f1413ea1d8fc09a8f 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_32.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:142c672c7c1f1e0df93ac87a3d39466c9a7fb199309761ba21771bb1f5d2de86 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_33.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e7bb8962700f6b240481d462a6e0f1a997d085857690cf63b5b5be9270e17b5e 3 | size 2763118568 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_4.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d2f3526062048781bdc36d3b0c80ce49c26a4874206109ff79064de2b3acc4dc 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_5.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:7b94a7e08eb31cf2fbe7d24b1e12b23f1341a75aeed96e4885f288333fdfdfbc 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_6.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a2b2699313be8e6ca09dc67b3267b6bf42ab2ac350c1198041b2fa9778b7905c 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_7.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:7960bac8a7579fcaf27a17271a49d0d61f13bbf6397712f513892e84fe89a7d0 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_8.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6f968cbd5bd1eb53722373ffedeb6bed9659a69a01df030e39f67a755418ca45 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/vectors.bin/vectors_9.bin: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f50e029134bc17703be6bc69748953ef2c9bb81b29d2286bda4d006d8bfce34c 3 | size 4294967295 4 | -------------------------------------------------------------------------------- /Test/WinRTTest/packages.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /Tools/nni-auto-tune/search_space_small.json: -------------------------------------------------------------------------------- 1 | { 2 | "BKTKmeansK": {"_type": "choice", "_value": [4,8,16,32]}, 3 | "CEF": {"_type": "choice", "_value": [1000, 1100, 1200, 1400, 1800,2000]} 4 | } 5 | 6 | -------------------------------------------------------------------------------- /Test/inc/Test.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | -------------------------------------------------------------------------------- /Script_AE/Figure8/stress_spfresh.sh: -------------------------------------------------------------------------------- 1 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh /home/sosp/data/store_sift1b/|tee log_stress_spfresh.log -------------------------------------------------------------------------------- /Wrappers/WinRT/packages.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /Script_AE/Figure6/plot_overall_result.sh: -------------------------------------------------------------------------------- 1 | python OverallPerformance_merge_result.py log_spfresh_ log_spann_ log_diskann_ overall_performance_spacev_spfresh_result.csv overall_performance_spacev_spann_result.csv overall_performance_spacev_diskann_result.csv 2 | gnuplot gnuplot overall_performance_spacev_new.p -------------------------------------------------------------------------------- /Wrappers/inc/PythonClient.i: -------------------------------------------------------------------------------- 1 | %module SPTAGClient 2 | 3 | %{ 4 | #include "inc/ClientInterface.h" 5 | %} 6 | 7 | %include 8 | %shared_ptr(AnnClient) 9 | %shared_ptr(RemoteSearchResult) 10 | %include "PythonCommon.i" 11 | 12 | %{ 13 | #define SWIG_FILE_WITH_INIT 14 | %} 15 | 16 | %include "ClientInterface.h" -------------------------------------------------------------------------------- /Wrappers/inc/JavaClient.i: -------------------------------------------------------------------------------- 1 | %module JAVASPTAGClient 2 | 3 | %{ 4 | #include "inc/ClientInterface.h" 5 | %} 6 | 7 | %include 8 | %shared_ptr(AnnClient) 9 | %shared_ptr(RemoteSearchResult) 10 | %include "JavaCommon.i" 11 | 12 | %{ 13 | #define SWIG_FILE_WITH_INIT 14 | %} 15 | 16 | %include "ClientInterface.h" 17 | -------------------------------------------------------------------------------- /Wrappers/inc/CsharpClient.i: -------------------------------------------------------------------------------- 1 | %module CSHARPSPTAGClient 2 | 3 | %{ 4 | #include "inc/ClientInterface.h" 5 | %} 6 | 7 | %include 8 | %shared_ptr(AnnClient) 9 | %shared_ptr(RemoteSearchResult) 10 | %include "CsharpCommon.i" 11 | 12 | %{ 13 | #define SWIG_FILE_WITH_INIT 14 | %} 15 | 16 | %include "ClientInterface.h" 17 | -------------------------------------------------------------------------------- /AnnService/src/Socket/Common.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Socket/Common.h" 5 | 6 | using namespace SPTAG::Socket; 7 | 8 | const ConnectionID SPTAG::Socket::c_invalidConnectionID = 0; 9 | 10 | const ResourceID SPTAG::Socket::c_invalidResourceID = 0; 11 | -------------------------------------------------------------------------------- /Tools/nni-auto-tune/config.yml: -------------------------------------------------------------------------------- 1 | experimentName: sift128 2 | trialConcurrency: 4 3 | maxExperimentDuration: 168h 4 | searchSpaceFile: search_space.json 5 | trialCommand: python main.py --train_file sift-128-euclidean.hdf5 6 | 7 | tuner: 8 | name: TPE 9 | classArgs: 10 | optimize_mode: maximize 11 | 12 | trainingService: 13 | platform: local 14 | 15 | -------------------------------------------------------------------------------- /AnnService/src/Server/ServiceSettings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Server/ServiceSettings.h" 5 | 6 | using namespace SPTAG; 7 | using namespace SPTAG::Service; 8 | 9 | ServiceSettings::ServiceSettings() : m_defaultMaxResultNumber(10), m_threadNum(12) 10 | { 11 | } 12 | -------------------------------------------------------------------------------- /bdev.json: -------------------------------------------------------------------------------- 1 | { 2 | "subsystems": [ 3 | { 4 | "subsystem": "bdev", 5 | "config": [ 6 | { 7 | "method": "bdev_nvme_attach_controller", 8 | "params": { 9 | "trtype": "pcie", 10 | "name": "Nvme1", 11 | "traddr": "0000:5c:00.0" 12 | } 13 | } 14 | ] 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /Script_AE/bdev.json: -------------------------------------------------------------------------------- 1 | { 2 | "subsystems": [ 3 | { 4 | "subsystem": "bdev", 5 | "config": [ 6 | { 7 | "method": "bdev_nvme_attach_controller", 8 | "params": { 9 | "trtype": "pcie", 10 | "name": "Nvme0", 11 | "traddr": "c636:00:00.0" 12 | } 13 | } 14 | ] 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /AnnService/src/Aggregator/AggregatorSettings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Aggregator/AggregatorSettings.h" 5 | 6 | using namespace SPTAG; 7 | using namespace SPTAG::Aggregator; 8 | 9 | AggregatorSettings::AggregatorSettings() : m_searchTimeout(100), m_threadNum(8), m_socketThreadNum(8) 10 | { 11 | } 12 | -------------------------------------------------------------------------------- /AnnService/src/Server/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Server/SearchService.h" 5 | 6 | SPTAG::Service::SearchService g_service; 7 | 8 | int main(int argc, char *argv[]) 9 | { 10 | if (!g_service.Initialize(argc, argv)) 11 | { 12 | return 1; 13 | } 14 | 15 | g_service.Run(); 16 | 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /AnnService/src/Aggregator/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Aggregator/AggregatorService.h" 5 | 6 | SPTAG::Aggregator::AggregatorService g_service; 7 | 8 | int main(int argc, char *argv[]) 9 | { 10 | if (!g_service.Initialize()) 11 | { 12 | return 1; 13 | } 14 | 15 | g_service.Run(); 16 | 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /Wrappers/inc/JavaFileIO.i: -------------------------------------------------------------------------------- 1 | %module JAVAFileIO 2 | 3 | %{ 4 | #include "inc/FileIOInterface.h" 5 | %} 6 | 7 | %include 8 | %include 9 | %include "std_string.i" 10 | %include "std_vector.i" 11 | // %shared_ptr(FileIOInterface) 12 | %include "JavaCommon.i" 13 | 14 | %include "../../AnnService/inc/Helper/KeyValueIO.h" 15 | %include "../../AnnService/inc/Core/SPANN/ExtraFileController.h" 16 | %include "FileIOInterface.h" -------------------------------------------------------------------------------- /Wrappers/inc/PythonCore.i: -------------------------------------------------------------------------------- 1 | %module SPTAG 2 | 3 | %{ 4 | #include "inc/CoreInterface.h" 5 | #include "inc/Core/ResultIterator.h" 6 | %} 7 | 8 | %include 9 | %include 10 | %shared_ptr(AnnIndex) 11 | %shared_ptr(QueryResult) 12 | %shared_ptr(ResultIterator) 13 | %include "PythonCommon.i" 14 | 15 | %{ 16 | #define SWIG_FILE_WITH_INIT 17 | %} 18 | 19 | %include "CoreInterface.h" 20 | %include "../../AnnService/inc/Core/ResultIterator.h" -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "ThirdParty/zstd"] 2 | path = ThirdParty/zstd 3 | url = https://github.com/facebook/zstd 4 | branch = release 5 | [submodule "ThirdParty/spdk"] 6 | path = ThirdParty/spdk 7 | url = https://github.com/spdk/spdk 8 | [submodule "ThirdParty/isal-l_crypto"] 9 | path = ThirdParty/isal-l_crypto 10 | url = https://github.com/intel/isa-l_crypto 11 | [submodule "ThirdParty/RocksDB"] 12 | path = ThirdParty/RocksDB 13 | url = https://github.com/PtilopsisL/rocksdb 14 | -------------------------------------------------------------------------------- /Wrappers/inc/CsharpCore.i: -------------------------------------------------------------------------------- 1 | %module CSHARPSPTAG 2 | 3 | %{ 4 | #include "inc/CoreInterface.h" 5 | #include "inc/Core/ResultIterator.h" 6 | %} 7 | 8 | %include 9 | %include 10 | %shared_ptr(AnnIndex) 11 | %shared_ptr(QueryResult) 12 | %shared_ptr(ResultIterator) 13 | %include "CsharpCommon.i" 14 | 15 | %{ 16 | #define SWIG_FILE_WITH_INIT 17 | %} 18 | 19 | %include "CoreInterface.h" 20 | %include "../../AnnService/inc/Core/SearchResult.h" 21 | %include "../../AnnService/inc/Core/ResultIterator.h" -------------------------------------------------------------------------------- /Wrappers/inc/JavaCore.i: -------------------------------------------------------------------------------- 1 | %module JAVASPTAG 2 | 3 | %{ 4 | #include "inc/CoreInterface.h" 5 | #include "inc/Core/ResultIterator.h" 6 | %} 7 | 8 | %include 9 | %include 10 | %shared_ptr(AnnIndex) 11 | %shared_ptr(QueryResult) 12 | %shared_ptr(ResultIterator) 13 | %include "JavaCommon.i" 14 | 15 | %{ 16 | #define SWIG_FILE_WITH_INIT 17 | %} 18 | 19 | %include "CoreInterface.h" 20 | %include "../../AnnService/inc/Core/SearchResult.h" 21 | %include "../../AnnService/inc/Core/ResultIterator.h" 22 | -------------------------------------------------------------------------------- /Script_AE/iniFile/genTruth.ini: -------------------------------------------------------------------------------- 1 | [Base] 2 | ValueType=Int8 3 | DistCalcMethod=L2 4 | IndexAlgoType=BKT 5 | Dim=100 6 | VectorPath=spacev100m_update_set88 7 | VectorType=DEFAULT 8 | VectorSize=2000000 9 | VectorDelimiter= 10 | QueryPath=/home/sosp/data/spacev_data/query.i8bin 11 | QueryType=DEFAULT 12 | QuerySize=29316 13 | QueryDelimiter= 14 | WarmupPath= 15 | WarmupType=DEFAULT 16 | WarmupSize=10000 17 | WarmupDelimiter= 18 | TruthPath=spacev100m_update_truth88 19 | TruthType=DEFAULT 20 | GenerateTruth=true 21 | 22 | [SearchSSDIndex] 23 | ResultNum=100 24 | NumberOfThreads=160 -------------------------------------------------------------------------------- /AnnService/inc/Socket/Common.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_SOCKET_COMMON_H_ 5 | #define _SPTAG_SOCKET_COMMON_H_ 6 | 7 | #include 8 | 9 | namespace SPTAG 10 | { 11 | namespace Socket 12 | { 13 | 14 | typedef std::uint32_t ConnectionID; 15 | 16 | typedef std::uint32_t ResourceID; 17 | 18 | extern const ConnectionID c_invalidConnectionID; 19 | 20 | extern const ResourceID c_invalidResourceID; 21 | 22 | } // namespace Socket 23 | } // namespace SPTAG 24 | 25 | #endif // _SPTAG_SOCKET_COMMON_H_ 26 | -------------------------------------------------------------------------------- /Script_AE/Figure1/motivation.sh: -------------------------------------------------------------------------------- 1 | # static 2 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh /home/sosp/data/store_sift_cluster_2m |tee log_static.log 3 | 4 | # nolimit 5 | cp /home/sosp/data/store_sift_cluster/indexloader_nolimit.ini /home/sosp/data/store_sift_cluster/indexloader.ini 6 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh /home/sosp/data/store_sift_cluster |tee log_nolimit.log 7 | -------------------------------------------------------------------------------- /SPTAG.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | x64 6 | <_nugetNativeFolder>$(MSBuildThisFileDirectory)..\runtimes\win-$(Native-Platform)\native\ 7 | 8 | 9 | 10 | 11 | %(FileName)%(Extension) 12 | PreserveNewest 13 | 14 | 15 | -------------------------------------------------------------------------------- /Script_AE/iniFile/genTruth_clustering.ini: -------------------------------------------------------------------------------- 1 | [Base] 2 | ValueType=UInt8 3 | DistCalcMethod=L2 4 | IndexAlgoType=BKT 5 | Dim=128 6 | VectorPath=/home/sosp/data/sift_data/bigann2m_update_clustering 7 | VectorType=DEFAULT 8 | VectorSize=2000000 9 | VectorDelimiter= 10 | QueryPath=/home/sosp/data/sift_data/query.public.10K.u8bin 11 | QueryType=DEFAULT 12 | QuerySize=10000 13 | QueryDelimiter= 14 | WarmupPath= 15 | WarmupType=DEFAULT 16 | WarmupSize=10000 17 | WarmupDelimiter= 18 | TruthPath=/home/sosp/data/sift_data/bigann2m_update_clustering_origin_truth0 19 | TruthType=DEFAULT 20 | GenerateTruth=true 21 | 22 | [SearchSSDIndex] 23 | ResultNum=100 24 | NumberOfThreads=160 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | **Is your feature request related to a problem? Please describe.** 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 9 | 10 | **Describe the solution you'd like** 11 | A clear and concise description of what you want to happen. 12 | 13 | **Describe alternatives you've considered** 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /Script_AE/Figure10/parameter_study_range.sh: -------------------------------------------------------------------------------- 1 | loaderPath="/home/sosp/data/store_sift_cluster/indexloader.ini" 2 | storePath="/home/sosp/data/store_sift_cluster" 3 | ReassignLine="118c ReassignK=" 4 | logPath="log_top" 5 | 6 | cp /home/sosp/data/store_sift_cluster/indexloader_top64.ini /home/sosp/data/store_sift_cluster/indexloader.ini 7 | 8 | for i in 0 8 64 128 9 | do 10 | newReassignLine=$ReassignLine$i 11 | sed -i "$newReassignLine" $loaderPath 12 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh $storePath |tee $logPath$i 13 | done -------------------------------------------------------------------------------- /Wrappers/inc/TransferDataType.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_PW_TRANSFERDATATYPE_H_ 5 | #define _SPTAG_PW_TRANSFERDATATYPE_H_ 6 | 7 | #include "inc/Core/CommonDataStructure.h" 8 | #include "inc/Core/SearchQuery.h" 9 | #include "inc/Core/ResultIterator.h" 10 | #include "inc/Socket/RemoteSearchQuery.h" 11 | 12 | typedef SPTAG::ByteArray ByteArray; 13 | 14 | typedef SPTAG::QueryResult QueryResult; 15 | 16 | typedef SPTAG::BasicResult BasicResult; 17 | 18 | typedef SPTAG::Socket::RemoteSearchResult RemoteSearchResult; 19 | 20 | #endif // _SPTAG_PW_TRANSFERDATATYPE_H_ 21 | -------------------------------------------------------------------------------- /Wrappers/WinRT/PropertySheet.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /Tools/OPQ/README.md: -------------------------------------------------------------------------------- 1 | # OPQ gpu training and inference tool 2 | 3 | ## Package Requirements (tbd) 4 | 5 | 1. Python>=3.7 6 | 2. numpy>=1.18.1 7 | 3. faiss>=1.7.0 8 | 4. LibVQ 9 | 10 | 11 | ## Parameter Sample 12 | --data_file [input_path]\vectors.bin.0 --query_file [model_path]\query.bin --output_truth [output_path] --output_dir [output_path]\5474\cluster_unzip --task 0 --data_type float32 --k 5 --dim 1024 --B 1000000 --Q 1000 --D L2 --data_format DEFAULT --T 18 --train_samples 1000000 --quan_type opq --quan_dim 1024 --output_quantizer quantizer.bin --output_quan_vector_file dssm_vectors.bin --output_rec_vector_file vectors.bin --quan_test 1 --data_normalize 0 --query_normalize 0 13 | -------------------------------------------------------------------------------- /Script_AE/Figure7/iops_limitation.sh: -------------------------------------------------------------------------------- 1 | cp /home/sosp/data/store_spacev100m/indexloader_iopslimit.ini /home/sosp/data/store_spacev100m/indexloader.ini 2 | SearchThreadNumLine="107c SearchThreadNum=" 3 | loaderPath="/home/sosp/data/store_spacev100m/indexloader.ini " 4 | storePath="/home/sosp/data/store_spacev100m" 5 | logPath="log_searchthread" 6 | 7 | for i in 1 2 4 8 10 12 8 | do 9 | newSearchThreadNumLine=$SearchThreadNumLine$i 10 | sed -i "$newSearchThreadNumLine" $loaderPath 11 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh $storePath |tee $logPath$i 12 | done -------------------------------------------------------------------------------- /Tools/nni-auto-tune/config_aml.yml: -------------------------------------------------------------------------------- 1 | experimentName: sift128 2 | trialConcurrency: 4 3 | maxExperimentDuration: 168h 4 | searchSpaceFile: search_space.json 5 | 6 | trialCommand: 7 | python --version; 8 | pip --version; 9 | pip install -i https://test.pypi.org/simple/ sptag; 10 | git clone https://github.com/microsoft/SPTAG.git; 11 | cd SPTAG/tools/nni-auto-tune; 12 | wget http://ann-benchmarks.com/sift-128-euclidean.hdf5; 13 | python main.py --train_file sift-128-euclidean.hdf5 14 | 15 | tuner: 16 | name: TPE 17 | classArgs: 18 | optimize_mode: maximize 19 | 20 | trainingService: 21 | platform: aml 22 | dockerImage: msranni/nni 23 | subscriptionId: 24 | resourceGroup: 25 | workspaceName: 26 | computeTarget: -------------------------------------------------------------------------------- /AnnService/src/Core/Common/NeighborhoodGraph.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Core/Common/NeighborhoodGraph.h" 5 | #include "inc/Core/Common/KNearestNeighborhoodGraph.h" 6 | #include "inc/Core/Common/RelativeNeighborhoodGraph.h" 7 | 8 | using namespace SPTAG::COMMON; 9 | 10 | std::shared_ptr NeighborhoodGraph::CreateInstance(std::string type) 11 | { 12 | std::shared_ptr res; 13 | if (type == "RNG") 14 | { 15 | res.reset(new RelativeNeighborhoodGraph); 16 | } 17 | else if (type == "NNG") 18 | { 19 | res.reset(new KNearestNeighborhoodGraph); 20 | } 21 | return res; 22 | } -------------------------------------------------------------------------------- /AnnService/packages.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /Tools/nni-auto-tune/search_space.json: -------------------------------------------------------------------------------- 1 | { 2 | "BKTKmeansK": {"_type": "quniform", "_value": [4,32,8]}, 3 | "Samples": {"_type": "quniform", "_value": [1000, 10000, 2000]}, 4 | "TPTNumber": {"_type": "quniform", "_value": [32, 192, 16]}, 5 | "RefineIterations": {"_type": "choice", "_value": [2, 3]}, 6 | "NeighborhoodSize": {"_type": "quniform", "_value": [16, 192, 8]}, 7 | "CEF": {"_type": "quniform", "_value": [1000, 2000,100]}, 8 | "MaxCheckForRefineGraph": {"_type": "quniform", "_value": [4096, 16324, 1024]}, 9 | "NumberOfInitialDynamicPivots": {"_type": "quniform", "_value": [1, 50, 10]}, 10 | "GraphNeighborhoodScale": {"_type": "choice", "_value": [2, 3, 4]}, 11 | "NumberOfOtherDynamicPivots": {"_type": "quniform", "_value": [1, 10, 2]} 12 | } 13 | 14 | -------------------------------------------------------------------------------- /Wrappers/WinRT/AnnIndex.idl: -------------------------------------------------------------------------------- 1 | namespace SPTAG 2 | { 3 | 4 | enum LogLevel 5 | { 6 | Debug = 0, 7 | Info, 8 | Status, 9 | Warning, 10 | Error, 11 | Assert, 12 | Count, 13 | Empty 14 | }; 15 | 16 | [default_interface] 17 | runtimeclass SearchResult 18 | { 19 | UInt8[] Metadata {get; }; 20 | Single Distance{ get; }; 21 | }; 22 | 23 | [default_interface] 24 | runtimeclass AnnIndex 25 | { 26 | AnnIndex(); 27 | [default_overload] 28 | void AddWithMetadata(Single[] data, UInt8[] metadata); 29 | 30 | void Save(Windows.Storage.StorageFile file); 31 | void Load(Windows.Storage.StorageFile file); 32 | Windows.Foundation.Collections.IVector Search(Single[] vector, UInt32 neighborCount); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /AnnService.users.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | $(SystemVersionDef) %(AdditionalOptions) 8 | 9 | 10 | 11 | 12 | $(SolutionDir)\$(Platform)\$(Configuration)\ 13 | $(SolutionDir)\$(Platform)\$(Configuration)\ 14 | 15 | 16 | 3.9 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /AnnService/GPUIndexBuilder.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {c260e4c4-ec44-4d50-941f-078454da2a89} 6 | 7 | 8 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 9 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 10 | 11 | 12 | 13 | 14 | Source Files 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /AnnService/inc/Server/ServiceSettings.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_SERVER_SERVICESTTINGS_H_ 5 | #define _SPTAG_SERVER_SERVICESTTINGS_H_ 6 | 7 | #include "../Core/Common.h" 8 | 9 | #include 10 | 11 | namespace SPTAG 12 | { 13 | namespace Service 14 | { 15 | 16 | struct ServiceSettings 17 | { 18 | ServiceSettings(); 19 | 20 | std::string m_vectorSeparator; 21 | 22 | std::string m_listenAddr; 23 | 24 | std::string m_listenPort; 25 | 26 | SizeType m_defaultMaxResultNumber; 27 | 28 | SizeType m_threadNum; 29 | 30 | SizeType m_socketThreadNum; 31 | }; 32 | 33 | 34 | 35 | 36 | } // namespace Server 37 | } // namespace AnnService 38 | 39 | 40 | #endif // _SPTAG_SERVER_SERVICESTTINGS_H_ 41 | 42 | -------------------------------------------------------------------------------- /Test/packages.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /AnnService/src/Client/Options.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Client/Options.h" 5 | #include "inc/Helper/StringConvert.h" 6 | 7 | #include 8 | 9 | using namespace SPTAG; 10 | using namespace SPTAG::Client; 11 | 12 | ClientOptions::ClientOptions() : m_searchTimeout(9000), m_threadNum(1), m_socketThreadNum(2) 13 | { 14 | AddRequiredOption(m_serverAddr, "-s", "--server", "Server address."); 15 | AddRequiredOption(m_serverPort, "-p", "--port", "Server port."); 16 | AddOptionalOption(m_searchTimeout, "-t", "", "Search timeout."); 17 | AddOptionalOption(m_threadNum, "-cth", "", "Client Thread Number."); 18 | AddOptionalOption(m_socketThreadNum, "-sth", "", "Socket Thread Number."); 19 | } 20 | 21 | ClientOptions::~ClientOptions() 22 | { 23 | } 24 | -------------------------------------------------------------------------------- /AnnService/IndexBuilder.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | 14 | 15 | Source Files 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /AnnService/inc/Client/Options.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_CLIENT_OPTIONS_H_ 5 | #define _SPTAG_CLIENT_OPTIONS_H_ 6 | 7 | #include "inc/Helper/ArgumentsParser.h" 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace SPTAG 14 | { 15 | namespace Client 16 | { 17 | 18 | class ClientOptions : public Helper::ArgumentsParser 19 | { 20 | public: 21 | ClientOptions(); 22 | 23 | virtual ~ClientOptions(); 24 | 25 | std::string m_serverAddr; 26 | 27 | std::string m_serverPort; 28 | 29 | // in milliseconds. 30 | std::uint32_t m_searchTimeout; 31 | 32 | std::uint32_t m_threadNum; 33 | 34 | std::uint32_t m_socketThreadNum; 35 | 36 | }; 37 | 38 | 39 | } // namespace Socket 40 | } // namespace SPTAG 41 | 42 | #endif // _SPTAG_CLIENT_OPTIONS_H_ 43 | -------------------------------------------------------------------------------- /Wrappers/packages.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /AnnService/src/SPFresh/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include 5 | 6 | #include "inc/Core/Common.h" 7 | #include "inc/Core/Common/TruthSet.h" 8 | #include "inc/Core/SPANN/Index.h" 9 | #include "inc/Core/VectorIndex.h" 10 | #include "inc/Helper/SimpleIniReader.h" 11 | #include "inc/Helper/StringConvert.h" 12 | #include "inc/Helper/VectorSetReader.h" 13 | 14 | #include "inc/SPFresh/SPFresh.h" 15 | 16 | using namespace SPTAG; 17 | 18 | // switch between exe and static library by _$(OutputType) 19 | #ifdef _exe 20 | 21 | int main(int argc, char *argv[]) 22 | { 23 | if (argc < 2) 24 | { 25 | SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "spfresh storePath\n"); 26 | exit(-1); 27 | } 28 | 29 | auto ret = SSDServing::SPFresh::UpdateTest(argv[1]); 30 | return ret; 31 | } 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/oss/mirror/docker.io/library/ubuntu:20.04 2 | WORKDIR /app 3 | 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | 6 | RUN apt-get update && apt-get -y install wget build-essential swig cmake git libnuma-dev python3.8-dev python3-distutils gcc-8 g++-8 \ 7 | libboost-filesystem-dev libboost-test-dev libboost-serialization-dev libboost-regex-dev libboost-serialization-dev libboost-regex-dev libboost-thread-dev libboost-system-dev 8 | 9 | RUN wget https://bootstrap.pypa.io/get-pip.py && python3.8 get-pip.py && python3.8 -m pip install numpy 10 | 11 | ENV PYTHONPATH=/app/Release 12 | 13 | COPY CMakeLists.txt ./ 14 | COPY AnnService ./AnnService/ 15 | COPY Test ./Test/ 16 | COPY Wrappers ./Wrappers/ 17 | COPY GPUSupport ./GPUSupport/ 18 | COPY ThirdParty ./ThirdParty/ 19 | 20 | RUN export CC=/usr/bin/gcc-8 && export CXX=/usr/bin/g++-8 && mkdir build && cd build && cmake .. && make -j && cd .. 21 | -------------------------------------------------------------------------------- /AnnService/inc/Aggregator/AggregatorSettings.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_AGGREGATOR_AGGREGATORSETTINGS_H_ 5 | #define _SPTAG_AGGREGATOR_AGGREGATORSETTINGS_H_ 6 | 7 | #include "../Core/Common.h" 8 | 9 | #include 10 | 11 | namespace SPTAG 12 | { 13 | namespace Aggregator 14 | { 15 | 16 | struct AggregatorSettings 17 | { 18 | AggregatorSettings(); 19 | 20 | std::string m_listenAddr; 21 | 22 | std::string m_listenPort; 23 | 24 | std::uint32_t m_searchTimeout; 25 | 26 | SizeType m_threadNum; 27 | 28 | SizeType m_socketThreadNum; 29 | 30 | std::string m_centers; 31 | 32 | VectorValueType m_valueType; 33 | 34 | SizeType m_topK; 35 | 36 | DistCalcMethod m_distMethod; 37 | }; 38 | 39 | 40 | 41 | 42 | } // namespace Aggregator 43 | } // namespace AnnService 44 | 45 | 46 | #endif // _SPTAG_AGGREGATOR_AGGREGATORSETTINGS_H_ 47 | 48 | -------------------------------------------------------------------------------- /AnnService/inc/Helper/Base64Encode.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_HELPER_BASE64ENCODE_H_ 5 | #define _SPTAG_HELPER_BASE64ENCODE_H_ 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace SPTAG 12 | { 13 | namespace Helper 14 | { 15 | namespace Base64 16 | { 17 | 18 | bool Encode(const std::uint8_t* p_in, std::size_t p_inLen, char* p_out, std::size_t& p_outLen); 19 | 20 | bool Encode(const std::uint8_t* p_in, std::size_t p_inLen, std::ostream& p_out, std::size_t& p_outLen); 21 | 22 | bool Decode(const char* p_in, std::size_t p_inLen, std::uint8_t* p_out, std::size_t& p_outLen); 23 | 24 | std::size_t CapacityForEncode(std::size_t p_inLen); 25 | 26 | std::size_t CapacityForDecode(std::size_t p_inLen); 27 | 28 | 29 | } // namespace Base64 30 | } // namespace Helper 31 | } // namespace SPTAG 32 | 33 | #endif // _SPTAG_HELPER_BASE64ENCODE_H_ 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Describe the bug** 8 | A clear and concise description of what the bug is. 9 | 10 | **To Reproduce** 11 | Steps to reproduce the behavior: 12 | 1. Go to '...' 13 | 2. Click on '....' 14 | 3. Scroll down to '....' 15 | 4. See error 16 | 17 | **Expected behavior** 18 | A clear and concise description of what you expected to happen. 19 | 20 | **Screenshots** 21 | If applicable, add screenshots to help explain your problem. 22 | 23 | **Desktop (please complete the following information):** 24 | - OS: [e.g. iOS] 25 | - Browser [e.g. chrome, safari] 26 | - Version [e.g. 22] 27 | 28 | **Smartphone (please complete the following information):** 29 | - Device: [e.g. iPhone6] 30 | - OS: [e.g. iOS8.1] 31 | - Browser [e.g. stock browser, safari] 32 | - Version [e.g. 22] 33 | 34 | **Additional context** 35 | Add any other context about the problem here. 36 | -------------------------------------------------------------------------------- /Test/cuda/cuda_tests.cpp: -------------------------------------------------------------------------------- 1 | //#include "test_kernels.cu" 2 | 3 | #define BOOST_TEST_MODULE GPU 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | int GPUBuildKNNTest(); 13 | 14 | BOOST_AUTO_TEST_CASE(RandomTests) 15 | { 16 | BOOST_CHECK(1 == 1); 17 | 18 | int errors = GPUBuildKNNTest(); 19 | printf("outside\n"); 20 | BOOST_CHECK(errors == 0); 21 | } 22 | 23 | /* 24 | int GPUTestDistance_All(); 25 | 26 | BOOST_AUTO_TEST_CASE(DistanceTests) { 27 | int errs = GPUTestDistance_All(); 28 | BOOST_CHECK(errs == 0); 29 | } 30 | 31 | int GPUBuildTPTTest(); 32 | 33 | BOOST_AUTO_TEST_CASE(TPTreeTests) { 34 | int errs = GPUBuildTPTTest(); 35 | BOOST_CHECK(errs == 0); 36 | } 37 | 38 | int GPUBuildSSDTest_All(); 39 | 40 | BOOST_AUTO_TEST_CASE(BuildSSDTests) { 41 | int errs = GPUBuildSSDTest_All(); 42 | BOOST_CHECK(errs == 0); 43 | } 44 | */ 45 | -------------------------------------------------------------------------------- /AnnService/inc/SSDServing/main.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #pragma once 5 | #include 6 | #include 7 | #include "inc/Core/Common.h" 8 | 9 | namespace SPTAG { 10 | namespace SSDServing { 11 | 12 | int BootProgram(bool forANNIndexTestTool, 13 | std::map>* config_map, 14 | const char* configurationPath = nullptr, 15 | SPTAG::VectorValueType valueType = SPTAG::VectorValueType::Undefined, 16 | SPTAG::DistCalcMethod distCalcMethod = SPTAG::DistCalcMethod::Undefined, 17 | const char* dataFilePath = nullptr, 18 | const char* indexFilePath = nullptr); 19 | 20 | const std::string SEC_BASE = "Base"; 21 | const std::string SEC_SELECT_HEAD = "SelectHead"; 22 | const std::string SEC_BUILD_HEAD = "BuildHead"; 23 | const std::string SEC_BUILD_SSD_INDEX = "BuildSSDIndex"; 24 | const std::string SEC_SEARCH_SSD_INDEX = "SearchSSDIndex"; 25 | } 26 | } -------------------------------------------------------------------------------- /AnnService/inc/Helper/VectorSetReaders/XvecReader.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_HELPER_VECTORSETREADERS_XVECREADER_H_ 5 | #define _SPTAG_HELPER_VECTORSETREADERS_XVECREADER_H_ 6 | 7 | #include "inc/Helper/VectorSetReader.h" 8 | 9 | namespace SPTAG 10 | { 11 | namespace Helper 12 | { 13 | 14 | class XvecVectorReader : public VectorSetReader 15 | { 16 | public: 17 | XvecVectorReader(std::shared_ptr p_options); 18 | 19 | virtual ~XvecVectorReader(); 20 | 21 | virtual ErrorCode LoadFile(const std::string& p_filePaths); 22 | 23 | virtual std::shared_ptr GetVectorSet(SizeType start = 0, SizeType end = -1) const; 24 | 25 | virtual std::shared_ptr GetMetadataSet() const; 26 | 27 | private: 28 | std::string m_vectorOutput; 29 | }; 30 | 31 | 32 | 33 | } // namespace Helper 34 | } // namespace SPTAG 35 | 36 | #endif // _SPTAG_HELPER_VECTORSETREADERS_XVECREADER_H_ 37 | -------------------------------------------------------------------------------- /AnnService/inc/Helper/DynamicNeighbors.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace SPTAG { 6 | namespace Helper { 7 | class DynamicNeighbors 8 | { 9 | public: 10 | DynamicNeighbors(const int* p_data, const int p_length); 11 | 12 | ~DynamicNeighbors(); 13 | 14 | int operator[](const int p_id) const; 15 | 16 | int Size() const; 17 | 18 | private: 19 | const int* const c_data; 20 | 21 | const int c_length; 22 | }; 23 | 24 | 25 | class DynamicNeighborsSet 26 | { 27 | public: 28 | DynamicNeighborsSet(const char* p_filePath); 29 | 30 | ~DynamicNeighborsSet(); 31 | 32 | DynamicNeighbors operator[](const int p_id) const; 33 | 34 | int VectorCount() const 35 | { 36 | return m_vectorCount; 37 | } 38 | 39 | private: 40 | std::unique_ptr m_data; 41 | 42 | std::unique_ptr m_neighborOffset; 43 | 44 | int m_vectorCount; 45 | }; 46 | } 47 | } 48 | 49 | -------------------------------------------------------------------------------- /Test/src/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #define BOOST_TEST_MAIN 5 | #define BOOST_TEST_MODULE Main 6 | #include "inc/Test.h" 7 | 8 | #include 9 | #include 10 | 11 | using namespace boost::unit_test; 12 | 13 | class SPTAGVisitor : public test_tree_visitor 14 | { 15 | public: 16 | void visit(test_case const &test) 17 | { 18 | std::string prefix(2, '\t'); 19 | std::cout << prefix << "Case: " << test.p_name << std::endl; 20 | } 21 | 22 | bool test_suite_start(test_suite const &suite) 23 | { 24 | std::string prefix(1, '\t'); 25 | std::cout << prefix << "Suite: " << suite.p_name << std::endl; 26 | return true; 27 | } 28 | }; 29 | 30 | struct GlobalFixture 31 | { 32 | GlobalFixture() 33 | { 34 | SPTAGVisitor visitor; 35 | traverse_test_tree(framework::master_test_suite(), visitor, false); 36 | } 37 | }; 38 | 39 | BOOST_TEST_GLOBAL_FIXTURE(GlobalFixture); 40 | -------------------------------------------------------------------------------- /AnnService/inc/Server/ServiceContext.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_SERVER_SERVICECONTEX_H_ 5 | #define _SPTAG_SERVER_SERVICECONTEX_H_ 6 | 7 | #include "inc/Core/VectorIndex.h" 8 | #include "ServiceSettings.h" 9 | 10 | #include 11 | #include 12 | 13 | namespace SPTAG 14 | { 15 | namespace Service 16 | { 17 | 18 | class ServiceContext 19 | { 20 | public: 21 | ServiceContext(const std::string& p_configFilePath); 22 | 23 | ~ServiceContext(); 24 | 25 | const std::map>& GetIndexMap() const; 26 | 27 | const std::shared_ptr& GetServiceSettings() const; 28 | 29 | bool IsInitialized() const; 30 | 31 | private: 32 | bool m_initialized; 33 | 34 | std::shared_ptr m_settings; 35 | 36 | std::map> m_fullIndexList; 37 | }; 38 | 39 | 40 | } // namespace Server 41 | } // namespace AnnService 42 | 43 | #endif // _SPTAG_SERVER_SERVICECONTEX_H_ 44 | 45 | -------------------------------------------------------------------------------- /AnnService/inc/Helper/VectorSetReaders/DefaultReader.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_HELPER_VECTORSETREADERS_DEFAULTREADER_H_ 5 | #define _SPTAG_HELPER_VECTORSETREADERS_DEFAULTREADER_H_ 6 | 7 | #include "inc/Helper/VectorSetReader.h" 8 | 9 | namespace SPTAG 10 | { 11 | namespace Helper 12 | { 13 | 14 | class DefaultVectorReader : public VectorSetReader 15 | { 16 | public: 17 | DefaultVectorReader(std::shared_ptr p_options); 18 | 19 | virtual ~DefaultVectorReader(); 20 | 21 | virtual ErrorCode LoadFile(const std::string& p_filePaths); 22 | 23 | virtual std::shared_ptr GetVectorSet(SizeType start = 0, SizeType end = -1) const; 24 | 25 | virtual std::shared_ptr GetMetadataSet() const; 26 | 27 | private: 28 | std::string m_vectorOutput; 29 | 30 | std::string m_metadataConentOutput; 31 | 32 | std::string m_metadataIndexOutput; 33 | }; 34 | 35 | 36 | 37 | } // namespace Helper 38 | } // namespace SPTAG 39 | 40 | #endif // _SPTAG_HELPER_VECTORSETREADERS_DEFAULTREADER_H_ 41 | -------------------------------------------------------------------------------- /Wrappers/WinRT/SPTAG.WinRT.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | x86 6 | $(Platform) 7 | <_nugetNativeFolder>$(MSBuildThisFileDirectory)..\..\runtimes\win10-$(Native-Platform)\native\ 8 | 9 | 10 | 11 | 12 | SPTAG.dll 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /AnnService/IndexSearcher.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /AnnService/BalancedDataPartition.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | Source Files 23 | 24 | 25 | -------------------------------------------------------------------------------- /Test/WinRTTest/WinRTTest.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /AnnService/GPUSSDServing.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {5bf17d80-3c16-4aef-918c-6994296bb320} 6 | 7 | 8 | {edefc9a5-628e-4695-aaad-22951d646b18} 9 | 10 | 11 | 12 | 13 | Header Files 14 | 15 | 16 | Header Files 17 | 18 | 19 | Header Files 20 | 21 | 22 | Header Files 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /AnnService/SSDServing.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {d5aba8fd-7ec7-458a-b728-ae5eaea04e43} 6 | 7 | 8 | {6835b7a3-6818-4a89-89c0-fc6527dbf613} 9 | 10 | 11 | 12 | 13 | Header Files 14 | 15 | 16 | Header Files 17 | 18 | 19 | Header Files 20 | 21 | 22 | Header Files 23 | 24 | 25 | 26 | 27 | Source Files 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /AnnService/inc/Core/ResultIterator.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_RESULT_ITERATOR_H 5 | #define _SPTAG_RESULT_ITERATOR_H 6 | 7 | #include 8 | 9 | #include "VectorIndex.h" 10 | #include "SearchQuery.h" 11 | 12 | typedef SPTAG::VectorIndex VectorIndex; 13 | typedef SPTAG::ByteArray ByteArray; 14 | typedef SPTAG::QueryResult QueryResult; 15 | 16 | class ResultIterator 17 | { 18 | public: 19 | ResultIterator(const void* p_index, const void* p_target, bool p_searchDeleted, int p_workspaceBatch, std::function p_filterFunc, int p_maxCheck); 20 | 21 | ~ResultIterator(); 22 | 23 | void* GetWorkSpace(); 24 | 25 | virtual std::shared_ptr Next(int batch); 26 | 27 | virtual bool GetRelaxedMono(); 28 | 29 | virtual SPTAG::ErrorCode GetErrorCode(); 30 | 31 | virtual void Close(); 32 | 33 | const void* GetTarget(); 34 | 35 | protected: 36 | const VectorIndex* m_index; 37 | const void* m_target; 38 | ByteArray m_byte_target; 39 | std::shared_ptr m_queryResult; 40 | void* m_workspace; 41 | bool m_searchDeleted; 42 | bool m_isFirstResult; 43 | int m_batch = 1; 44 | }; 45 | 46 | #endif -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /AnnService/Quantizer.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /AnnService/inc/Helper/CommonHelper.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_HELPER_COMMONHELPER_H_ 5 | #define _SPTAG_HELPER_COMMONHELPER_H_ 6 | 7 | #include "inc/Core/Common.h" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | namespace SPTAG 18 | { 19 | namespace Helper 20 | { 21 | namespace StrUtils 22 | { 23 | 24 | void ToLowerInPlace(std::string& p_str); 25 | 26 | std::vector SplitString(const std::string& p_str, const std::string& p_separator); 27 | 28 | std::pair FindTrimmedSegment(const char* p_begin, 29 | const char* p_end, 30 | const std::function& p_isSkippedChar); 31 | 32 | bool StartsWith(const char* p_str, const char* p_prefix); 33 | 34 | bool StrEqualIgnoreCase(const char* p_left, const char* p_right); 35 | 36 | std::string ReplaceAll(const std::string& orig, const std::string& from, const std::string& to); 37 | 38 | } // namespace StrUtils 39 | } // namespace Helper 40 | } // namespace SPTAG 41 | 42 | #endif // _SPTAG_HELPER_COMMONHELPER_H_ 43 | -------------------------------------------------------------------------------- /Script_AE/Figure7/process_iopslimit.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | import csv 4 | 5 | 6 | process_list = [1, 2, 4, 8, 10, 12] 7 | 8 | 9 | throughput = [] 10 | KIOPS = [] 11 | 12 | throughput.append('') 13 | KIOPS.append('') 14 | 15 | line_count = 0 16 | 17 | for i in process_list: 18 | log_f = open(sys.argv[1] + str(i)) 19 | while True: 20 | line = log_f.readline() 21 | line_count+=1 22 | 23 | if line == '': 24 | break 25 | 26 | result_group = line.split() 27 | 28 | if len(result_group) > 7 and result_group[7] == "AvgQPS:": 29 | throughput.append(float(result_group[8].rstrip('.'))) 30 | while result_group[0] != 'IOPS:': 31 | line = log_f.readline() 32 | line_count+=1 33 | result_group = line.split() 34 | KIOPS.append(float((result_group[1].rstrip('k')))*1000) 35 | break 36 | 37 | process_list_search = [] 38 | process_list_search.append('') 39 | process_list_search += process_list 40 | 41 | batch = [] 42 | batch.append('') 43 | for i in range(0, 6): 44 | batch.append(i) 45 | 46 | print(KIOPS) 47 | 48 | with open("IOPS_limit.csv", 'w') as f: 49 | writer = csv.writer(f, delimiter=',') 50 | writer.writerows(zip(batch, throughput, KIOPS, process_list_search)) -------------------------------------------------------------------------------- /Script_AE/generate_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | import struct 4 | 5 | def process_args(): 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("--src", help="The input file (.fvecs)") 8 | parser.add_argument("--dst", help="The output file (.fvecs)") 9 | parser.add_argument("--topk", type=int, help="The number of element to pick up") 10 | return parser.parse_args() 11 | 12 | 13 | if __name__ == "__main__": 14 | args = process_args() 15 | 16 | # Read topk vector one by one 17 | vecs = "" 18 | row_bin = ""; 19 | dim_bin = ""; 20 | with open(args.src, "rb") as f: 21 | 22 | row_bin = f.read(4) 23 | assert row_bin != b'' 24 | row, = struct.unpack('i', row_bin) 25 | 26 | dim_bin = f.read(4) 27 | assert dim_bin != b'' 28 | dim, = struct.unpack('i', dim_bin) 29 | 30 | i = 0 31 | while 1: 32 | 33 | # The next 4 * dim byte is for a vector 34 | vec = f.read(dim) 35 | 36 | # Store it 37 | vecs += vec 38 | i += 1 39 | if i == args.topk: 40 | break 41 | 42 | with open(args.dst, "wb") as f: 43 | f.write(struct.pack('i', args.topk)) 44 | f.write(dim_bin) 45 | f.write(vecs) -------------------------------------------------------------------------------- /Wrappers/src/AssemblyInfo.cpp: -------------------------------------------------------------------------------- 1 | using namespace System; 2 | using namespace System::Reflection; 3 | using namespace System::Runtime::CompilerServices; 4 | using namespace System::Runtime::InteropServices; 5 | using namespace System::Security::Permissions; 6 | 7 | // 8 | // General Information about an assembly is controlled through the following 9 | // set of attributes. Change these attribute values to modify the information 10 | // associated with an assembly. 11 | // 12 | [assembly:AssemblyTitleAttribute(L"CLRCore")]; 13 | [assembly:AssemblyDescriptionAttribute(L"")]; 14 | [assembly:AssemblyConfigurationAttribute(L"")]; 15 | [assembly:AssemblyCompanyAttribute(L"")]; 16 | [assembly:AssemblyProductAttribute(L"CLRCore")]; 17 | [assembly:AssemblyCopyrightAttribute(L"Copyright (c) 2019")]; 18 | [assembly:AssemblyTrademarkAttribute(L"")]; 19 | [assembly:AssemblyCultureAttribute(L"")]; 20 | 21 | // 22 | // Version information for an assembly consists of the following four values: 23 | // 24 | // Major Version 25 | // Minor Version 26 | // Build Number 27 | // Revision 28 | // 29 | // You can specify all the value or you can default the Revision and Build Numbers 30 | // by using the '*' as shown below: 31 | 32 | [assembly:AssemblyVersionAttribute("1.0.*")]; 33 | 34 | [assembly:ComVisible(false)]; 35 | 36 | [assembly:CLSCompliantAttribute(true)]; -------------------------------------------------------------------------------- /AnnService/Client.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | 14 | 15 | Source Files 16 | 17 | 18 | Source Files 19 | 20 | 21 | Source Files 22 | 23 | 24 | 25 | 26 | Header Files 27 | 28 | 29 | Header Files 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /Script_AE/iniFile/build_SPANN_sift1b.ini: -------------------------------------------------------------------------------- 1 | [Base] 2 | ValueType=UInt8 3 | DistCalcMethod=L2 4 | IndexAlgoType=BKT 5 | Dim=128 6 | VectorPath=/home/sosp/data/sift_data/base.1B.u8bin 7 | VectorType=DEFAULT 8 | VectorSize=1000000000 9 | VectorDelimiter= 10 | QueryPath=/home/sosp/data/sift_data/query.public.10K.u8bin 11 | QueryType=DEFAULT 12 | QuerySize=10000 13 | QueryDelimiter= 14 | WarmupPath= 15 | WarmupType=DEFAULT 16 | WarmupSize=10000 17 | WarmupDelimiter= 18 | TruthPath=/home/sosp/data/ 19 | TruthType=DEFAULT 20 | GenerateTruth=false 21 | HeadVectorIDs=head_vectors_ID_Int8_L2_base_DEFUALT.bin 22 | HeadVectors=head_vectors_Int8_L2_base_DEFUALT.bin 23 | IndexDirectory=/home/sift/data/store_sift1b/ 24 | 25 | [SelectHead] 26 | isExecute=true 27 | TreeNumber=1 28 | BKTKmeansK=32 29 | BKTLeafSize=8 30 | SamplesNumber=1000 31 | NumberOfThreads=80 32 | SaveBKT=false 33 | AnalyzeOnly=false 34 | CalcStd=true 35 | SelectDynamically=true 36 | NoOutput=false 37 | SelectThreshold=12 38 | SplitFactor=9 39 | SplitThreshold=18 40 | Ratio=0.1 41 | RecursiveCheckSmallCluster=true 42 | PrintSizeCount=true 43 | 44 | [BuildHead] 45 | isExecute=true 46 | NumberOfThreads=160 47 | 48 | [BuildSSDIndex] 49 | isExecute=true 50 | BuildSsdIndex=true 51 | InternalResultNum=64 52 | NumberOfThreads=40 53 | ReplicaCount=8 54 | PostingPageLimit=4 55 | OutputEmptyReplicaID=1 56 | TmpDir=/home/sosp/sift/data/store_sift1b/tmpdir -------------------------------------------------------------------------------- /Wrappers/CLRCore.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {ba4289c4-f872-4dbc-a57f-7b415614afb3} 14 | 15 | 16 | 17 | 18 | Header Files 19 | 20 | 21 | Header Files 22 | 23 | 24 | 25 | 26 | Source Files 27 | 28 | 29 | Source Files 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /AnnService/inc/Socket/Server.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_SOCKET_SERVER_H_ 5 | #define _SPTAG_SOCKET_SERVER_H_ 6 | 7 | #include "Connection.h" 8 | #include "ConnectionManager.h" 9 | #include "Packet.h" 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace SPTAG 16 | { 17 | namespace Socket 18 | { 19 | 20 | class Server 21 | { 22 | public: 23 | Server(const std::string& p_address, 24 | const std::string& p_port, 25 | const PacketHandlerMapPtr& p_handlerMap, 26 | std::size_t p_threadNum); 27 | 28 | ~Server(); 29 | 30 | void StartListen(); 31 | 32 | void SendPacket(ConnectionID p_connection, Packet p_packet, std::function p_callback); 33 | 34 | void SetEventOnConnectionClose(std::function p_event); 35 | 36 | private: 37 | void StartAccept(); 38 | 39 | private: 40 | boost::asio::io_context m_ioContext; 41 | 42 | boost::asio::ip::tcp::acceptor m_acceptor; 43 | 44 | std::shared_ptr m_connectionManager; 45 | 46 | std::vector m_threadPool; 47 | 48 | const PacketHandlerMapPtr m_requestHandlerMap; 49 | }; 50 | 51 | 52 | } // namespace Socket 53 | } // namespace SPTAG 54 | 55 | #endif // _SPTAG_SOCKET_SERVER_H_ 56 | -------------------------------------------------------------------------------- /AnnService/src/Aggregator/AggregatorExecutionContext.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Aggregator/AggregatorExecutionContext.h" 5 | 6 | using namespace SPTAG; 7 | using namespace SPTAG::Aggregator; 8 | 9 | AggregatorExecutionContext::AggregatorExecutionContext(std::size_t p_totalServerNumber, 10 | Socket::PacketHeader p_requestHeader) 11 | : m_requestHeader(std::move(p_requestHeader)) 12 | { 13 | m_results.clear(); 14 | m_results.resize(p_totalServerNumber); 15 | 16 | m_unfinishedCount = static_cast(p_totalServerNumber); 17 | } 18 | 19 | AggregatorExecutionContext::~AggregatorExecutionContext() 20 | { 21 | } 22 | 23 | std::size_t AggregatorExecutionContext::GetServerNumber() const 24 | { 25 | return m_results.size(); 26 | } 27 | 28 | AggregatorResult &AggregatorExecutionContext::GetResult(std::size_t p_num) 29 | { 30 | return m_results[p_num]; 31 | } 32 | 33 | const Socket::PacketHeader &AggregatorExecutionContext::GetRequestHeader() const 34 | { 35 | return m_requestHeader; 36 | } 37 | 38 | bool AggregatorExecutionContext::IsCompletedAfterFinsh(std::uint32_t p_finishedCount) 39 | { 40 | auto lastCount = m_unfinishedCount.fetch_sub(p_finishedCount); 41 | return lastCount <= p_finishedCount; 42 | } 43 | -------------------------------------------------------------------------------- /Script_AE/iniFile/build_SPANN_spacev100m.ini: -------------------------------------------------------------------------------- 1 | [Base] 2 | ValueType=Int8 3 | DistCalcMethod=L2 4 | IndexAlgoType=BKT 5 | Dim=100 6 | VectorPath=/home/sosp/data/spacev_data/spacev100m_base.i8bin 7 | VectorType=DEFAULT 8 | VectorSize=100000000 9 | VectorDelimiter= 10 | QueryPath=/home/sosp/data/spacev_data/query.i8bin 11 | QueryType=DEFAULT 12 | QuerySize=29316 13 | QueryDelimiter= 14 | WarmupPath= 15 | WarmupType=DEFAULT 16 | WarmupSize=29316 17 | WarmupDelimiter= 18 | TruthPath=/home/sosp/data/ 19 | TruthType=DEFAULT 20 | GenerateTruth=false 21 | HeadVectorIDs=head_vectors_ID_UInt8_L2_base_DEFUALT.bin 22 | HeadVectors=head_vectors_UInt8_L2_base_DEFUALT.bin 23 | IndexDirectory=/home/sosp/data/store_spacev100m 24 | HeadIndexFolder=head_index 25 | 26 | [SelectHead] 27 | isExecute=true 28 | TreeNumber=1 29 | BKTKmeansK=32 30 | BKTLeafSize=8 31 | SamplesNumber=1000 32 | NumberOfThreads=80 33 | SaveBKT=false 34 | AnalyzeOnly=false 35 | CalcStd=true 36 | SelectDynamically=true 37 | NoOutput=false 38 | SelectThreshold=12 39 | SplitFactor=9 40 | SplitThreshold=18 41 | Ratio=0.1 42 | RecursiveCheckSmallCluster=true 43 | PrintSizeCount=true 44 | 45 | [BuildHead] 46 | isExecute=true 47 | 48 | [BuildSSDIndex] 49 | isExecute=true 50 | BuildSsdIndex=true 51 | InternalResultNum=64 52 | NumberOfThreads=160 53 | ReplicaCount=8 54 | PostingPageLimit=4 55 | OutputEmptyReplicaID=1 56 | TmpDir=/home/sosp/data/store_spacev100m/tmpdir -------------------------------------------------------------------------------- /Script_AE/Figure9/data_shifting.sh: -------------------------------------------------------------------------------- 1 | # noreassign/static/top64/inplace 2 | 3 | # noreassign 4 | cp /home/sosp/data/store_sift_cluster/indexloader_noreassign.ini /home/sosp/data/store_sift_cluster/indexloader.ini 5 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh /home/sosp/data/store_sift_cluster |tee log_noreassign.log 6 | 7 | # static 8 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh /home/sosp/data/store_sift_cluster_2m |tee log_static.log 9 | 10 | # inplace 11 | cp /home/sosp/data/store_sift_cluster/indexloader_inplace.ini /home/sosp/data/store_sift_cluster/indexloader.ini 12 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh /home/sosp/data/store_sift_cluster |tee log_inplace.log 13 | 14 | # top64 15 | cp /home/sosp/data/store_sift_cluster/indexloader_top64.ini /home/sosp/data/store_sift_cluster/indexloader.ini 16 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh /home/sosp/data/store_sift_cluster |tee log_split+reassign.log -------------------------------------------------------------------------------- /AnnService/inc/Aggregator/AggregatorExecutionContext.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_AGGREGATOR_AGGREGATOREXECUTIONCONTEXT_H_ 5 | #define _SPTAG_AGGREGATOR_AGGREGATOREXECUTIONCONTEXT_H_ 6 | 7 | #include "inc/Socket/RemoteSearchQuery.h" 8 | #include "inc/Socket/Packet.h" 9 | 10 | #include 11 | #include 12 | 13 | namespace SPTAG 14 | { 15 | namespace Aggregator 16 | { 17 | 18 | typedef std::shared_ptr AggregatorResult; 19 | 20 | class AggregatorExecutionContext 21 | { 22 | public: 23 | AggregatorExecutionContext(std::size_t p_totalServerNumber, 24 | Socket::PacketHeader p_requestHeader); 25 | 26 | ~AggregatorExecutionContext(); 27 | 28 | std::size_t GetServerNumber() const; 29 | 30 | AggregatorResult& GetResult(std::size_t p_num); 31 | 32 | const Socket::PacketHeader& GetRequestHeader() const; 33 | 34 | bool IsCompletedAfterFinsh(std::uint32_t p_finishedCount); 35 | 36 | private: 37 | std::atomic m_unfinishedCount; 38 | 39 | std::vector m_results; 40 | 41 | Socket::PacketHeader m_requestHeader; 42 | 43 | }; 44 | 45 | 46 | 47 | 48 | } // namespace Aggregator 49 | } // namespace AnnService 50 | 51 | 52 | #endif // _SPTAG_AGGREGATOR_AGGREGATOREXECUTIONCONTEXT_H_ 53 | 54 | -------------------------------------------------------------------------------- /AnnService/inc/Server/SearchExecutor.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_SERVER_SEARCHEXECUTOR_H_ 5 | #define _SPTAG_SERVER_SEARCHEXECUTOR_H_ 6 | 7 | #include "ServiceContext.h" 8 | #include "ServiceSettings.h" 9 | #include "SearchExecutionContext.h" 10 | #include "QueryParser.h" 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | namespace SPTAG 17 | { 18 | namespace Service 19 | { 20 | 21 | class SearchExecutor 22 | { 23 | public: 24 | typedef std::function)> CallBack; 25 | 26 | SearchExecutor(std::string p_queryString, 27 | std::shared_ptr p_serviceContext, 28 | const CallBack& p_callback); 29 | 30 | ~SearchExecutor(); 31 | 32 | void Execute(); 33 | 34 | private: 35 | void ExecuteInternal(); 36 | 37 | void SelectIndex(); 38 | 39 | private: 40 | CallBack m_callback; 41 | 42 | const std::shared_ptr c_serviceContext; 43 | 44 | std::shared_ptr m_executionContext; 45 | 46 | std::string m_queryString; 47 | 48 | std::vector> m_selectedIndex; 49 | }; 50 | 51 | 52 | } // namespace Server 53 | } // namespace AnnService 54 | 55 | 56 | #endif // _SPTAG_SERVER_SEARCHEXECUTOR_H_ 57 | -------------------------------------------------------------------------------- /AnnService/src/Helper/Concurrent.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Helper/Concurrent.h" 5 | 6 | using namespace SPTAG; 7 | using namespace SPTAG::Helper::Concurrent; 8 | 9 | WaitSignal::WaitSignal() : m_isWaiting(false), m_unfinished(0) 10 | { 11 | } 12 | 13 | WaitSignal::WaitSignal(std::uint32_t p_unfinished) : m_isWaiting(false), m_unfinished(p_unfinished) 14 | { 15 | } 16 | 17 | WaitSignal::~WaitSignal() 18 | { 19 | std::lock_guard guard(m_mutex); 20 | if (m_isWaiting) 21 | { 22 | m_cv.notify_all(); 23 | } 24 | } 25 | 26 | void WaitSignal::Reset(std::uint32_t p_unfinished) 27 | { 28 | std::lock_guard guard(m_mutex); 29 | if (m_isWaiting) 30 | { 31 | m_cv.notify_all(); 32 | } 33 | 34 | m_isWaiting = false; 35 | m_unfinished = p_unfinished; 36 | } 37 | 38 | void WaitSignal::Wait() 39 | { 40 | std::unique_lock lock(m_mutex); 41 | if (m_unfinished > 0) 42 | { 43 | m_isWaiting = true; 44 | m_cv.wait(lock); 45 | } 46 | } 47 | 48 | void WaitSignal::FinishOne() 49 | { 50 | if (1 == m_unfinished.fetch_sub(1)) 51 | { 52 | std::lock_guard guard(m_mutex); 53 | if (m_isWaiting) 54 | { 55 | m_isWaiting = false; 56 | m_cv.notify_all(); 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /Test/src/Base64HelperTest.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Helper/Base64Encode.h" 5 | #include "inc/Test.h" 6 | 7 | #include 8 | 9 | BOOST_AUTO_TEST_SUITE(Base64Test) 10 | 11 | BOOST_AUTO_TEST_CASE(Base64EncDec) 12 | { 13 | using namespace SPTAG::Helper::Base64; 14 | 15 | const size_t bufferSize = 1 << 10; 16 | std::unique_ptr rawBuffer(new uint8_t[bufferSize]); 17 | std::unique_ptr encBuffer(new char[bufferSize]); 18 | std::unique_ptr rawBuffer2(new uint8_t[bufferSize]); 19 | 20 | for (size_t inputSize = 1; inputSize < 128; ++inputSize) 21 | { 22 | for (size_t i = 0; i < inputSize; ++i) 23 | { 24 | rawBuffer[i] = static_cast(i); 25 | } 26 | 27 | size_t encBufLen = CapacityForEncode(inputSize); 28 | BOOST_CHECK(encBufLen < bufferSize); 29 | 30 | size_t encOutLen = 0; 31 | BOOST_CHECK(Encode(rawBuffer.get(), inputSize, encBuffer.get(), encOutLen)); 32 | BOOST_CHECK(encBufLen >= encOutLen); 33 | 34 | size_t decBufLen = CapacityForDecode(encOutLen); 35 | BOOST_CHECK(decBufLen < bufferSize); 36 | 37 | size_t decOutLen = 0; 38 | BOOST_CHECK(Decode(encBuffer.get(), encOutLen, rawBuffer.get(), decOutLen)); 39 | BOOST_CHECK(decBufLen >= decOutLen); 40 | } 41 | } 42 | 43 | BOOST_AUTO_TEST_SUITE_END() -------------------------------------------------------------------------------- /Test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. All rights reserved. 2 | # Licensed under the MIT License. 3 | 4 | if (NOT LIBRARYONLY) 5 | if(NOT WIN32) 6 | ADD_DEFINITIONS(-DBOOST_TEST_DYN_LINK) 7 | message (STATUS "BOOST_TEST_DYN_LINK") 8 | endif() 9 | 10 | find_package(Boost 1.66 COMPONENTS system thread serialization wserialization regex filesystem unit_test_framework) 11 | if (Boost_FOUND) 12 | include_directories (${Boost_INCLUDE_DIR}) 13 | link_directories (${Boost_LIBRARY_DIR}) 14 | message (STATUS "Found Boost.") 15 | message (STATUS "Include Path: ${Boost_INCLUDE_DIRS}") 16 | message (STATUS "Library Path: ${Boost_LIBRARY_DIRS}") 17 | message (STATUS "Library: ${Boost_LIBRARIES}") 18 | else() 19 | message (FATAL_ERROR "Could not find Boost 1.67!") 20 | endif() 21 | 22 | include_directories(${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Test ${PROJECT_SOURCE_DIR}/ThirdParty/spdk/build/include) 23 | 24 | file(GLOB TEST_HDR_FILES ${PROJECT_SOURCE_DIR}/Test/inc/Test.h) 25 | file(GLOB TEST_SRC_FILES ${PROJECT_SOURCE_DIR}/Test/src/*.cpp) 26 | add_executable(SPTAGTest ${TEST_SRC_FILES} ${TEST_HDR_FILES}) 27 | target_link_libraries(SPTAGTest SPTAGLibStatic ssdservingLib ${Boost_LIBRARIES}) 28 | 29 | install(TARGETS SPTAGTest 30 | RUNTIME DESTINATION bin 31 | ARCHIVE DESTINATION lib 32 | LIBRARY DESTINATION lib) 33 | endif() 34 | -------------------------------------------------------------------------------- /AnnService/inc/Core/Common/InstructionUtils.h: -------------------------------------------------------------------------------- 1 | #ifndef _SPTAG_COMMON_INSTRUCTIONUTILS_H_ 2 | #define _SPTAG_COMMON_INSTRUCTIONUTILS_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifndef GPU 10 | 11 | #ifndef _MSC_VER 12 | #include 13 | #include 14 | #include 15 | 16 | void cpuid(int info[4], int InfoType); 17 | 18 | #else 19 | #include 20 | #define cpuid(info, x) __cpuidex(info, x, 0) 21 | #endif 22 | 23 | #endif 24 | 25 | namespace SPTAG { 26 | namespace COMMON { 27 | 28 | class InstructionSet 29 | { 30 | // forward declarations 31 | class InstructionSet_Internal; 32 | 33 | public: 34 | // getters 35 | static bool AVX(void); 36 | static bool SSE(void); 37 | static bool SSE2(void); 38 | static bool AVX2(void); 39 | static bool AVX512(void); 40 | static void PrintInstructionSet(void); 41 | 42 | private: 43 | static const InstructionSet_Internal CPU_Rep; 44 | 45 | class InstructionSet_Internal 46 | { 47 | public: 48 | InstructionSet_Internal(); 49 | bool HW_SSE; 50 | bool HW_SSE2; 51 | bool HW_AVX; 52 | bool HW_AVX2; 53 | bool HW_AVX512; 54 | }; 55 | }; 56 | } 57 | } 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /Script_AE/Figure6/overall_spacev_spann.sh: -------------------------------------------------------------------------------- 1 | cp /home/sosp/store_spacev100m/data/indexloader_spann.ini /home/sosp/data/store_spacev100m/indexloader.ini 2 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh /home/sosp/data/store_spacev100m/|tee log_overall_performance_spacev_spann.log 3 | python process_spann.py log_overall_performance_spacev_spann.log overall_performance_spacev_spann_result.csv 4 | 5 | mkdir spann_result 6 | mv /home/sosp/result_overall_spacev_spann* spann_result 7 | 8 | resultnamePrefix=/spann_result/ 9 | i=-1 10 | for FILE in `ls -v1 ./spann_result/` 11 | do 12 | if [ $i -eq -1 ]; 13 | then 14 | /home/sosp/SPFresh/Release/usefultool -CallRecall true -resultNum 10 -queryPath /home/sosp/data/spacev_data/query.i8bin -searchResult $PWD$resultnamePrefix$FILE -truthType DEFAULT -truthPath /home/sosp/data/spacev_data/msspacev-100M -VectorPath /home/sosp/data/spacev_data/spacev200m_base.i8bin --vectortype int8 -d 100 -f DEFAULT |tee log_spann_$i 15 | else 16 | /home/sosp/SPFresh/Release/usefultool -CallRecall true -resultNum 10 -queryPath /home/sosp/data/spacev_data/query.i8bin -searchResult $PWD$resultnamePrefix$FILE -truthType DEFAULT -truthPath /home/sosp/data/spacev_data/spacev100m_update_truth_after$i -VectorPath /home/sosp/data/spacev_data/spacev200m_base.i8bin --vectortype int8 -d 100 -f DEFAULT |tee log_spann_$i 17 | fi 18 | let "i=i+1" 19 | done -------------------------------------------------------------------------------- /Script_AE/Figure6/overall_spacev_spfresh.sh: -------------------------------------------------------------------------------- 1 | cp /home/sosp/data/store_spacev100m/indexloader_spfresh.ini /home/sosp/data/store_spacev100m/indexloader.ini 2 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh /home/sosp/data/store_spacev100m/|tee log_overall_performance_spacev_spfresh.log 3 | python process_spfresh.py log_overall_performance_spacev_spfresh.log overall_performance_spacev_spfresh_result.csv 4 | 5 | mkdir spfresh_result 6 | mv /home/sosp/result_overall_spacev_spfresh* spfresh_result 7 | 8 | resultnamePrefix=/spfresh_result/ 9 | i=-1 10 | for FILE in `ls -v1 ./spfresh_result/` 11 | do 12 | if [ $i -eq -1 ]; 13 | then 14 | /home/sosp/SPFresh/Release/usefultool -CallRecall true -resultNum 10 -queryPath /home/sosp/data/spacev_data/query.i8bin -searchResult $PWD$resultnamePrefix$FILE -truthType DEFAULT -truthPath /home/sosp/data/spacev_data/msspacev-100M -VectorPath /home/sosp/data/spacev_data/spacev200m_base.i8bin --vectortype int8 -d 100 -f DEFAULT |tee log_spfresh_$i 15 | else 16 | /home/sosp/SPFresh/Release/usefultool -CallRecall true -resultNum 10 -queryPath /home/sosp/data/spacev_data/query.i8bin -searchResult $PWD$resultnamePrefix$FILE -truthType DEFAULT -truthPath /home/sosp/data/spacev_data/spacev100m_update_truth_after$i -VectorPath /home/sosp/data/spacev_data/spacev200m_base.i8bin --vectortype int8 -d 100 -f DEFAULT |tee log_spfresh_$i 17 | fi 18 | let "i=i+1" 19 | done -------------------------------------------------------------------------------- /Script_AE/Figure6/overall_spacev_diskann.sh: -------------------------------------------------------------------------------- 1 | /home/sosp/DiskANN_Baseline/build/tests/overall_performance int8 ~/data/spacev_data/spacev100m_base.i8bin 75 32 1.2 75 64 1.2 100000000 1 25 0 ~/testbed/store_diskann_100m/diskann_spacev_100m true false ~/data/spacev_data/spacev200m_base.i8bin ~/data/spacev_data/query.i8bin ~/data/truth 10 40 2 ~/data/spacev_data/spacev100m_update_trace 100 |tee log_overall_performance_spacev_diskann.log 2 | python process_diskann.py log_overall_performance_spacev_diskann.log overall_performance_spacev_diskann_result.csv 3 | 4 | mkdir diskann_result 5 | mv /home/sosp/result_overall_spacev_diskann* diskann_result 6 | 7 | resultnamePrefix=/diskann_result/ 8 | i=-1 9 | for FILE in `ls -v1 ./diskann_result/` 10 | do 11 | if [ $i -eq -1 ]; 12 | then 13 | /home/sosp/SPFresh/Release/usefultool -CallRecall true -resultNum 10 -queryPath /home/sosp/data/spacev_data/query.i8bin -searchResult $PWD$resultnamePrefix$FILE -truthType DEFAULT -truthPath /home/sosp/data/spacev_data/msspacev-100M -VectorPath /home/sosp/data/spacev_data/spacev200m_base.i8bin --vectortype int8 -d 100 -f DEFAULT |tee log_diskann_$i 14 | else 15 | /home/sosp/SPFresh/Release/usefultool -CallRecall true -resultNum 10 -queryPath /home/sosp/data/spacev_data/query.i8bin -searchResult $PWD$resultnamePrefix$FILE -truthType DEFAULT -truthPath /home/sosp/data/spacev_data/spacev100m_update_truth_after$i -VectorPath /home/sosp/data/spacev_data/spacev200m_base.i8bin --vectortype int8 -d 100 -f DEFAULT |tee log_diskann_$i 16 | fi 17 | let "i=i+1" 18 | done -------------------------------------------------------------------------------- /AnnService/inc/Core/Common/Checksum.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_COMMON_CHECKSUM_H_ 5 | #define _SPTAG_COMMON_CHECKSUM_H_ 6 | 7 | #include 8 | #include "inc/Core/Common.h" 9 | 10 | namespace SPTAG 11 | { 12 | typedef uint8_t ChecksumType; 13 | 14 | namespace COMMON 15 | { 16 | class Checksum 17 | { 18 | public: 19 | Checksum() : m_type(0), m_seed(0), m_skip(false) 20 | { 21 | } 22 | 23 | void Initialize(bool p_skip, uint8_t p_type, int p_seed = 0) 24 | { 25 | m_type = p_type; 26 | m_seed = p_seed; 27 | m_skip = p_skip; 28 | } 29 | 30 | ChecksumType CalcChecksum(const char *p_data, int p_length) 31 | { 32 | uint8_t cs = m_seed; 33 | for (int i = 0; i < p_length; i++) 34 | cs ^= p_data[i]; 35 | return cs; 36 | } 37 | 38 | ChecksumType AppendChecksum(ChecksumType p_checksum, const char* p_data, int p_length) 39 | { 40 | for (int i = 0; i < p_length; i++) 41 | p_checksum ^= p_data[i]; 42 | return p_checksum; 43 | } 44 | 45 | bool ValidateChecksum(const char *p_data, int p_length, ChecksumType p_checksum) 46 | { 47 | if (m_skip) return true; 48 | return (CalcChecksum(p_data, p_length) == p_checksum); 49 | } 50 | 51 | private: 52 | uint8_t m_type; 53 | int m_seed; 54 | bool m_skip; 55 | }; 56 | } // namespace COMMON 57 | } // namespace SPTAG 58 | 59 | #endif // _SPTAG_COMMON_CHECKSUM_H_ -------------------------------------------------------------------------------- /Script_AE/Figure11/parameter_study_balance.sh: -------------------------------------------------------------------------------- 1 | loaderPath="/home/sosp/data/store_sift1m/indexloader.ini" 2 | storePath="/home/sosp/data/store_sift1m" 3 | InsertLine="109c InsertThreadNum=" 4 | AppendLine="110c AppendThreadNum=" 5 | DeleteQPSLine="122c DeleteQPS=" 6 | Insert=1 7 | Append=1 8 | DeleteQPS=1000 9 | logPath="log_" 10 | newDeleteQPS=0 11 | 12 | 13 | for i in {1..4} 14 | do 15 | let 'newDeleteQPS=Insert*DeleteQPS' 16 | newInsertLine=$InsertLine$Insert 17 | newAppendLine=$AppendLine$Append 18 | newDeleteQPSLine=$DeleteQPSLine$newDeleteQPS 19 | sed -i "$newInsertLine" $loaderPath 20 | sed -i "$newAppendLine" $loaderPath 21 | sed -i "$newDeleteQPSLine" $loaderPath 22 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh $storePath |tee $logPath$Insert$Append 23 | let 'Insert=Insert*2' 24 | done 25 | 26 | let 'Insert=8' 27 | 28 | for i in {1..2} 29 | do 30 | let 'Append=Append*2' 31 | let 'newDeleteQPS=Insert*DeleteQPS' 32 | newInsertLine=$InsertLine$Insert 33 | newAppendLine=$AppendLine$Append 34 | newDeleteQPSLine=$DeleteQPSLine$newDeleteQPS 35 | sed -i "$newInsertLine" $loaderPath 36 | sed -i "$newAppendLine" $loaderPath 37 | sed -i "$newDeleteQPSLine" $loaderPath 38 | PCI_ALLOWED="c636:00:00.0" SPFRESH_SPDK_USE_SSD_IMPL=1 SPFRESH_SPDK_CONF=/home/sosp/SPFresh/bdev.json SPFRESH_SPDK_BDEV=Nvme0n1 sudo -E /home/sosp/SPFresh/Release/spfresh $storePath |tee $logPath$Insert$Append 39 | done -------------------------------------------------------------------------------- /AnnService/src/Core/Common/CommonUtils.cpp: -------------------------------------------------------------------------------- 1 | #include "inc/Core/Common/CommonUtils.h" 2 | #include "inc/Core/Common/DistanceUtils.h" 3 | 4 | using namespace SPTAG; 5 | using namespace SPTAG::COMMON; 6 | 7 | #define DefineVectorValueType(Name, Type) template int Utils::GetBase(); 8 | #include "inc/Core/DefinitionList.h" 9 | #undef DefineVectorValueType 10 | 11 | template void Utils::BatchNormalize(T *data, SizeType row, DimensionType col, int base, int threads) 12 | { 13 | std::vector mythreads; 14 | mythreads.reserve(threads); 15 | std::atomic_size_t sent(0); 16 | for (int tid = 0; tid < threads; tid++) 17 | { 18 | mythreads.emplace_back([&, tid]() { 19 | size_t i = 0; 20 | while (true) 21 | { 22 | i = sent.fetch_add(1); 23 | if (i < row) 24 | { 25 | SPTAG::COMMON::Utils::Normalize(data + i * (size_t)col, col, base); 26 | } 27 | else 28 | { 29 | return; 30 | } 31 | } 32 | }); 33 | } 34 | for (auto &t : mythreads) 35 | { 36 | t.join(); 37 | } 38 | mythreads.clear(); 39 | } 40 | 41 | #define DefineVectorValueType(Name, Type) \ 42 | template void Utils::BatchNormalize(Type * data, SizeType row, DimensionType col, int base, int threads); 43 | #include "inc/Core/DefinitionList.h" 44 | #undef DefineVectorValueType -------------------------------------------------------------------------------- /Wrappers/JavaCore.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {ba4289c4-f872-4dbc-a57f-7b415614afb3} 14 | 15 | 16 | 17 | 18 | Header Files 19 | 20 | 21 | Header Files 22 | 23 | 24 | 25 | 26 | Source Files 27 | 28 | 29 | Source Files 30 | 31 | 32 | 33 | 34 | Resources 35 | 36 | 37 | Resources 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /Wrappers/CsharpCore.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {ba4289c4-f872-4dbc-a57f-7b415614afb3} 14 | 15 | 16 | 17 | 18 | Header Files 19 | 20 | 21 | Header Files 22 | 23 | 24 | 25 | 26 | Source Files 27 | 28 | 29 | Source Files 30 | 31 | 32 | 33 | 34 | Resources 35 | 36 | 37 | Resources 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /Wrappers/PythonCore.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {ba4289c4-f872-4dbc-a57f-7b415614afb3} 14 | 15 | 16 | 17 | 18 | Header Files 19 | 20 | 21 | Header Files 22 | 23 | 24 | 25 | 26 | Source Files 27 | 28 | 29 | Source Files 30 | 31 | 32 | 33 | 34 | Resources 35 | 36 | 37 | Resources 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /SPTAG.WinRT.nuspec: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | SPTAG.WinRT 5 | 1.2.13-mainOPQ-CoreLib-SPANN-withsource-a6b7604 6 | SPTAG.WinRT 7 | cheqi,haidwa,mingqli 8 | cheqi,haidwa,mingqli,zhah 9 | false 10 | https://github.com/microsoft/SPTAG 11 | https://github.com/microsoft/SPTAG 12 | SPTAG (Space Partition Tree And Graph) is a library for large scale vector approximate nearest neighbor search scenario released by Microsoft Research (MSR) and Microsoft Bing. 13 | publish with commit microsoft/add python version in wheel package/a6b7604 14 | Copyright © Microsoft 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /Test/src/IniReaderTest.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Helper/SimpleIniReader.h" 5 | #include "inc/Test.h" 6 | 7 | #include 8 | 9 | BOOST_AUTO_TEST_SUITE(IniReaderTest) 10 | 11 | BOOST_AUTO_TEST_CASE(IniReaderLoadTest) 12 | { 13 | std::ofstream tmpIni("temp.ini"); 14 | tmpIni << "[Common]" << std::endl; 15 | tmpIni << "; Comment " << std::endl; 16 | tmpIni << "Param1=1" << std::endl; 17 | tmpIni << "Param2=Exp=2" << std::endl; 18 | 19 | tmpIni.close(); 20 | 21 | SPTAG::Helper::IniReader reader; 22 | BOOST_CHECK(SPTAG::ErrorCode::Success == reader.LoadIniFile("temp.ini")); 23 | 24 | BOOST_CHECK(reader.DoesSectionExist("Common")); 25 | BOOST_CHECK(reader.DoesParameterExist("Common", "Param1")); 26 | BOOST_CHECK(reader.DoesParameterExist("Common", "Param2")); 27 | 28 | BOOST_CHECK(!reader.DoesSectionExist("NotExist")); 29 | BOOST_CHECK(!reader.DoesParameterExist("NotExist", "Param1")); 30 | BOOST_CHECK(!reader.DoesParameterExist("Common", "ParamNotExist")); 31 | 32 | BOOST_CHECK(1 == reader.GetParameter("Common", "Param1", 0)); 33 | BOOST_CHECK(0 == reader.GetParameter("Common", "ParamNotExist", 0)); 34 | 35 | BOOST_CHECK(std::string("Exp=2") == reader.GetParameter("Common", "Param2", std::string())); 36 | BOOST_CHECK(std::string("1") == reader.GetParameter("Common", "Param1", std::string())); 37 | BOOST_CHECK(std::string() == reader.GetParameter("Common", "ParamNotExist", std::string())); 38 | } 39 | 40 | BOOST_AUTO_TEST_SUITE_END() -------------------------------------------------------------------------------- /AnnService/inc/Aggregator/AggregatorContext.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_AGGREGATOR_AGGREGATORCONTEXT_H_ 5 | #define _SPTAG_AGGREGATOR_AGGREGATORCONTEXT_H_ 6 | 7 | #include "inc/Socket/Common.h" 8 | #include "inc/Core/VectorSet.h" 9 | #include "AggregatorSettings.h" 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace SPTAG 16 | { 17 | namespace Aggregator 18 | { 19 | 20 | enum RemoteMachineStatus : uint8_t 21 | { 22 | Disconnected = 0, 23 | 24 | Connecting, 25 | 26 | Connected 27 | }; 28 | 29 | 30 | struct RemoteMachine 31 | { 32 | RemoteMachine(); 33 | 34 | std::string m_address; 35 | 36 | std::string m_port; 37 | 38 | Socket::ConnectionID m_connectionID; 39 | 40 | std::atomic m_status; 41 | }; 42 | 43 | class AggregatorContext 44 | { 45 | public: 46 | AggregatorContext(const std::string& p_filePath); 47 | 48 | ~AggregatorContext(); 49 | 50 | bool IsInitialized() const; 51 | 52 | const std::vector>& GetRemoteServers() const; 53 | 54 | const std::shared_ptr& GetSettings() const; 55 | 56 | const std::shared_ptr& GetCenters() const; 57 | 58 | private: 59 | std::vector> m_remoteServers; 60 | 61 | std::shared_ptr m_centers; 62 | 63 | std::shared_ptr m_settings; 64 | 65 | bool m_initialized; 66 | }; 67 | 68 | } // namespace Aggregator 69 | } // namespace AnnService 70 | 71 | 72 | #endif // _SPTAG_AGGREGATOR_AGGREGATORCONTEXT_H_ 73 | -------------------------------------------------------------------------------- /Script_AE/generateOverallPerformanceTraceAndTruth.sh: -------------------------------------------------------------------------------- 1 | setname="6c VectorPath=spacev100m_update_set" 2 | truthname="18c TruthPath=spacev100m_update_truth" 3 | deletesetname="spacev100m_update_set" 4 | reservesetname="spacev100m_update_reserve" 5 | currentsetname="spacev100m_update_current" 6 | for i in {0..99} 7 | do 8 | /home/sosp/SPFresh/Release/usefultool -GenTrace true --vectortype int8 --VectorPath /home/sosp/data/spacev_data/spacev200m_base.i8bin --filetype DEFAULT --UpdateSize 1000000 --BaseNum 100000000 --ReserveNum 100000000 --CurrentListFileName spacev100m_update_current --ReserveListFileName spacev100m_update_reserve --TraceFileName spacev100m_update_trace -NewDataSetFileName spacev100m_update_set -d 100 --Batch $i -f DEFAULT 9 | newsetname=$setname$i 10 | newtruthname=$truthname$i 11 | newdeletesetname=$deletesetname$i 12 | newreservesetname=$reservesetname$i 13 | newcurrentsetname=$currentsetname$i 14 | sed -i "$newsetname" genTruth.ini 15 | sed -i "$newtruthname" genTruth.ini 16 | /home/sosp/SPFresh/Release/ssdserving genTruth.ini 17 | /home/sosp/SPFresh/Release/usefultool -ConvertTruth true --vectortype int8 --VectorPath /home/sosp/data/spacev_data/spacev200m_base.i8bin --filetype DEFAULT --UpdateSize 1000000 --BaseNum 100000000 --ReserveNum 100000000 --CurrentListFileName spacev100m_update_current --ReserveListFileName spacev100m_update_reserve --TraceFileName spacev100m_update_trace -NewDataSetFileName spacev100m_update_set -d 100 --Batch $i -f DEFAULT --truthPath spacev100m_update_truth --truthType DEFAULT --querySize 10000 --resultNum 100 18 | rm -rf $deletesetname 19 | rm -rf $newreservesetname 20 | rm -rf $newcurrentsetname 21 | done -------------------------------------------------------------------------------- /AnnService/inc/Server/SearchService.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_SERVER_SERVICE_H_ 5 | #define _SPTAG_SERVER_SERVICE_H_ 6 | 7 | #include "ServiceContext.h" 8 | #include "../Socket/Server.h" 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | namespace SPTAG 18 | { 19 | namespace Service 20 | { 21 | 22 | class SearchExecutionContext; 23 | 24 | class SearchService 25 | { 26 | public: 27 | SearchService(); 28 | 29 | ~SearchService(); 30 | 31 | bool Initialize(int p_argNum, char* p_args[]); 32 | 33 | void Run(); 34 | 35 | private: 36 | void RunSocketMode(); 37 | 38 | void RunInteractiveMode(); 39 | 40 | void SearchHanlder(Socket::ConnectionID p_localConnectionID, Socket::Packet p_packet); 41 | 42 | void SearchHanlderCallback(std::shared_ptr p_exeContext, 43 | Socket::Packet p_srcPacket); 44 | 45 | private: 46 | enum class ServeMode : std::uint8_t 47 | { 48 | Interactive, 49 | 50 | Socket 51 | }; 52 | 53 | std::shared_ptr m_serviceContext; 54 | 55 | std::shared_ptr m_socketServer; 56 | 57 | bool m_initialized; 58 | 59 | ServeMode m_serveMode; 60 | 61 | std::unique_ptr m_threadPool; 62 | 63 | boost::asio::io_context m_ioContext; 64 | 65 | boost::asio::signal_set m_shutdownSignals; 66 | }; 67 | 68 | 69 | } // namespace Server 70 | } // namespace AnnService 71 | 72 | 73 | #endif // _SPTAG_SERVER_SERVICE_H_ 74 | -------------------------------------------------------------------------------- /Wrappers/JavaClient.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Header Files 20 | 21 | 22 | Header Files 23 | 24 | 25 | 26 | 27 | Source Files 28 | 29 | 30 | Source Files 31 | 32 | 33 | 34 | 35 | Resource Files 36 | 37 | 38 | Resource Files 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /Wrappers/CsharpClient.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Header Files 20 | 21 | 22 | Header Files 23 | 24 | 25 | 26 | 27 | Source Files 28 | 29 | 30 | Source Files 31 | 32 | 33 | 34 | 35 | Resource Files 36 | 37 | 38 | Resource Files 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /Wrappers/PythonClient.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Header Files 20 | 21 | 22 | Header Files 23 | 24 | 25 | 26 | 27 | Source Files 28 | 29 | 30 | Source Files 31 | 32 | 33 | 34 | 35 | Resource Files 36 | 37 | 38 | Resource Files 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /AnnService/inc/Helper/VectorSetReaders/MemoryReader.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_HELPER_VECTORSETREADERS_MEMORYREADER_H_ 5 | #define _SPTAG_HELPER_VECTORSETREADERS_MEMORYREADER_H_ 6 | 7 | #include "inc/Helper/VectorSetReader.h" 8 | 9 | namespace SPTAG 10 | { 11 | namespace Helper 12 | { 13 | 14 | class MemoryVectorReader : public VectorSetReader 15 | { 16 | public: 17 | MemoryVectorReader(std::shared_ptr p_options, std::shared_ptr p_vectors) : 18 | VectorSetReader(p_options), m_vectors(p_vectors) 19 | {} 20 | 21 | virtual ~MemoryVectorReader() {} 22 | 23 | virtual ErrorCode LoadFile(const std::string& p_filePaths) { return ErrorCode::Success; } 24 | 25 | virtual std::shared_ptr GetVectorSet(SizeType start = 0, SizeType end = -1) const 26 | { 27 | if (end < 0 || end > m_vectors->Count()) end = m_vectors->Count(); 28 | return std::shared_ptr(new BasicVectorSet(ByteArray((std::uint8_t*)(m_vectors->GetVector(start)), (end - start) * m_vectors->PerVectorDataSize(), false), 29 | m_vectors->GetValueType(), 30 | m_vectors->Dimension(), 31 | end - start)); 32 | } 33 | 34 | virtual std::shared_ptr GetMetadataSet() const { return nullptr; } 35 | 36 | private: 37 | std::shared_ptr m_vectors; 38 | }; 39 | 40 | } // namespace Helper 41 | } // namespace SPTAG 42 | 43 | #endif // _SPTAG_HELPER_VECTORSETREADERS_MEMORYREADER_H_ 44 | -------------------------------------------------------------------------------- /Script_AE/Figure10/process_para_range.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | import csv 4 | 5 | 6 | topkList = [0, 8, 64, 128] 7 | 8 | avg_latency = [] 9 | accuracy = [] 10 | 11 | for i in topkList: 12 | templist_latency = [] 13 | 14 | templist_accuracy = [] 15 | 16 | templist_accuracy.append('') 17 | templist_latency.append('') 18 | 19 | fileName = sys.argv[1] + str(i) 20 | 21 | log_f = open(fileName) 22 | 23 | line_count = 0 24 | 25 | while True: 26 | line = log_f.readline() 27 | line_count+=1 28 | 29 | result_group = line.split() 30 | 31 | if len(result_group) > 2 and result_group[1] == "Total" and result_group[2] == "Vector": 32 | break 33 | 34 | while True: 35 | line = log_f.readline() 36 | line_count+=1 37 | 38 | if line == '': 39 | break 40 | 41 | result_group = line.split() 42 | 43 | if len(result_group) > 1 and result_group[0] == "Total" and result_group[1] == "Latency": 44 | line = log_f.readline() 45 | line_count+=1 46 | line = log_f.readline() 47 | line_count+=1 48 | result_group = line.split() 49 | templist_latency.append(float(result_group[1])) 50 | if len(result_group) > 2 and result_group[1] == "Recall10@10:": 51 | templist_accuracy.append(float(result_group[2])) 52 | 53 | accuracy.append(templist_accuracy) 54 | avg_latency.append(templist_latency) 55 | 56 | 57 | with open("parameter_study_range.csv", 'w') as f: 58 | writer = csv.writer(f, delimiter=',') 59 | writer.writerows(zip(avg_latency[0], accuracy[0], avg_latency[1], accuracy[1], avg_latency[2], accuracy[2], avg_latency[3], accuracy[3])) 60 | 61 | -------------------------------------------------------------------------------- /Script_AE/Figure7/limits.p: -------------------------------------------------------------------------------- 1 | # For a single column, set the width at 3.3 inches 2 | # For across two columns, set the width at 7 inches 3 | 4 | set terminal pdfcairo size 3.3, 1.75 font 'Linux Biolinum O,12' 5 | # set terminal pdfcairo size 7, 2.07 font "UbuntuMono-Regular, 11" 6 | 7 | # set default line style 8 | set style line 1 lc rgb '#056bfa' lt 1 lw 1.7 pt 7 ps 1.5 9 | set style line 2 lc rgb '#05a8fa' lt 1 lw 1.7 pt 7 ps 1.5 10 | set style line 3 lc rgb '#fb8500' lt 1 lw 1.7 pt 7 ps 1.5 11 | set style line 4 lc rgb '#ffb703' lt 1 lw 1.7 pt 7 ps 1.5 12 | set style line 5 lc rgb '#b30018' lt 1 lw 1.7 pt 7 ps 1.5 13 | set style line 6 lc rgb '#fa3605' lt 1 lw 1.7 pt 7 ps 1.5 14 | 15 | # set grid style 16 | set style line 20 lc rgb '#dddddd' lt 1 lw 1 17 | set style fill solid 18 | 19 | set datafile separator "," 20 | set encoding utf8 21 | set autoscale 22 | set grid ls 20 noxtics ytics 23 | # set key box ls 20 opaque fc rgb "#3fffffff" 24 | set tics scale 0.5 25 | set xtics nomirror out autofreq offset 0,0.5,0 26 | set ytics nomirror out autofreq offset 0.5,0,0 27 | set border lw 2 28 | set yrange [0:*] 29 | 30 | # Start the first plot 31 | set output "IOPSLimit.pdf" 32 | 33 | set multiplot 34 | 35 | set xlabel "ThreadNum" offset 0,1,0 36 | set ylabel "Query per Second" offset 1,0,0 37 | 38 | set y2tics out autofreq format '%gk' 39 | set y2label "IOPS" 40 | set y2range [0:700] 41 | set key reverse Left 42 | set boxwidth 0.5 43 | 44 | set size 1.045, 0.92 45 | set origin -0.01, 0.08 46 | 47 | # set title "IOPS Limit" offset 0, -0.7 48 | plot "IOPS_limit.csv" using 1:2:xtic(4) every ::1 with boxes title 'QPS' at 0.2, 0.07, \ 49 | "IOPS_limit.csv" using 1:($3/1000.):xtic(4) every ::1 with linespoints title 'IOPS' at 0.8, 0.07 axes x1y2 ls 1 pointsize 0.5 50 | 51 | unset multiplot -------------------------------------------------------------------------------- /Test/src/SIMDTest.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | // #include 5 | #include "inc/Core/Common/SIMDUtils.h" 6 | #include "inc/Test.h" 7 | #include 8 | 9 | template static void ComputeSum(T *pX, const T *pY, SPTAG::DimensionType length) 10 | { 11 | const T *pEnd1 = pX + length; 12 | while (pX < pEnd1) 13 | { 14 | *pX++ += *pY++; 15 | } 16 | } 17 | 18 | template T random(int high = RAND_MAX, int low = 0) // Generates a random value. 19 | { 20 | return (T)(low + float(high - low) * (std::rand() / static_cast(RAND_MAX + 1.0))); 21 | } 22 | 23 | template void test(int high) 24 | { 25 | SPTAG::DimensionType dimension = random(256, 2); 26 | T *X = new T[dimension], *Y = new T[dimension]; 27 | BOOST_ASSERT(X != nullptr && Y != nullptr); 28 | for (SPTAG::DimensionType i = 0; i < dimension; i++) 29 | { 30 | X[i] = random(high, -high); 31 | Y[i] = random(high, -high); 32 | } 33 | T *X_copy = new T[dimension]; 34 | for (SPTAG::DimensionType i = 0; i < dimension; i++) 35 | { 36 | X_copy[i] = X[i]; 37 | } 38 | ComputeSum(X, Y, dimension); 39 | SPTAG::COMMON::SIMDUtils::ComputeSum(X_copy, Y, dimension); 40 | for (SPTAG::DimensionType i = 0; i < dimension; i++) 41 | { 42 | BOOST_CHECK_CLOSE_FRACTION(double(X[i]), double(X_copy[i]), 1e-5); 43 | } 44 | 45 | delete[] X; 46 | delete[] Y; 47 | delete[] X_copy; 48 | } 49 | 50 | BOOST_AUTO_TEST_SUITE(SIMDTest) 51 | 52 | BOOST_AUTO_TEST_CASE(TestDistanceComputation) 53 | { 54 | test(1); 55 | test(127); 56 | test(32767); 57 | } 58 | 59 | BOOST_AUTO_TEST_SUITE_END() 60 | -------------------------------------------------------------------------------- /AnnService/inc/Core/SPANN/PersistentBuffer.h: -------------------------------------------------------------------------------- 1 | #include "inc/Helper/KeyValueIO.h" 2 | #include 3 | 4 | namespace SPTAG { 5 | namespace SPANN { 6 | // concurrently safe with RocksDBIO 7 | class PersistentBuffer 8 | { 9 | public: 10 | PersistentBuffer(std::shared_ptr db) : db(db), _size(0) { } 11 | 12 | ~PersistentBuffer() {} 13 | 14 | inline int GetNewAssignmentID() { return _size++; } 15 | 16 | inline int PutAssignment(std::string& assignment) 17 | { 18 | int assignmentID = GetNewAssignmentID(); 19 | db->Put(assignmentID, assignment, MaxTimeout, nullptr); 20 | return assignmentID; 21 | } 22 | 23 | inline bool StartToScan(std::string& assignment) 24 | { 25 | SizeType newSize = 0; 26 | if (db->StartToScan(newSize, &assignment) != ErrorCode::Success) return false; 27 | _size = newSize+1; 28 | return true; 29 | } 30 | 31 | inline bool NextToScan(std::string& assignment) 32 | { 33 | SizeType newSize = 0; 34 | if (db->NextToScan(newSize, &assignment) != ErrorCode::Success) return false; 35 | _size = newSize+1; 36 | return true; 37 | } 38 | 39 | inline void ClearPreviousRecord() 40 | { 41 | db->DeleteRange(0, _size.load()); 42 | _size = 0; 43 | } 44 | 45 | inline int StopPB() 46 | { 47 | db->ShutDown(); 48 | return 0; 49 | } 50 | 51 | private: 52 | std::shared_ptr db; 53 | std::atomic_int _size; 54 | }; 55 | } 56 | } -------------------------------------------------------------------------------- /AnnService/inc/Helper/VectorSetReader.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_HELPER_VECTORSETREADER_H_ 5 | #define _SPTAG_HELPER_VECTORSETREADER_H_ 6 | 7 | #include "inc/Core/Common.h" 8 | #include "inc/Core/VectorSet.h" 9 | #include "inc/Core/MetadataSet.h" 10 | #include "inc/Helper/ArgumentsParser.h" 11 | 12 | #include 13 | 14 | namespace SPTAG 15 | { 16 | namespace Helper 17 | { 18 | 19 | class ReaderOptions : public ArgumentsParser 20 | { 21 | public: 22 | ReaderOptions(VectorValueType p_valueType, DimensionType p_dimension, VectorFileType p_fileType, std::string p_vectorDelimiter = "|", std::uint32_t p_threadNum = 32, bool p_normalized = false); 23 | 24 | ~ReaderOptions(); 25 | 26 | SPTAG::VectorValueType m_inputValueType; 27 | 28 | DimensionType m_dimension; 29 | 30 | SPTAG::VectorFileType m_inputFileType; 31 | 32 | std::string m_vectorDelimiter; 33 | 34 | std::uint32_t m_threadNum; 35 | 36 | bool m_normalized; 37 | }; 38 | 39 | class VectorSetReader 40 | { 41 | public: 42 | VectorSetReader(std::shared_ptr p_options); 43 | 44 | virtual ~VectorSetReader(); 45 | 46 | virtual ErrorCode LoadFile(const std::string& p_filePath) = 0; 47 | 48 | virtual std::shared_ptr GetVectorSet(SizeType start = 0, SizeType end = -1) const = 0; 49 | 50 | virtual std::shared_ptr GetMetadataSet() const = 0; 51 | 52 | virtual bool IsNormalized() const { return m_options->m_normalized; } 53 | 54 | static std::shared_ptr CreateInstance(std::shared_ptr p_options); 55 | 56 | protected: 57 | std::shared_ptr m_options; 58 | }; 59 | 60 | 61 | 62 | } // namespace Helper 63 | } // namespace SPTAG 64 | 65 | #endif // _SPTAG_HELPER_VECTORSETREADER_H_ 66 | -------------------------------------------------------------------------------- /Script_AE/Figure11/process_balance.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | import csv 4 | 5 | 6 | insert = [1, 2, 4, 8] 7 | append = [1, 2, 4] 8 | 9 | threadlist = [11, 21, 41, 81, 82, 84] 10 | 11 | fore_throughput = [] 12 | back_throughput = [] 13 | 14 | for i in threadlist: 15 | fileName = sys.argv[1] + str(i) 16 | 17 | log_f = open(fileName) 18 | 19 | while True: 20 | line = log_f.readline() 21 | 22 | if line == '': 23 | break 24 | 25 | result_group = line.split() 26 | 27 | if len(result_group) > 11 and result_group[1] == "Insert:" and result_group[7] == "sending": 28 | fore_throughput.append(float(result_group[10])) 29 | 30 | if len(result_group) > 11 and result_group[1] == "Insert:" and result_group[7] == "actuall": 31 | back_throughput.append(float(result_group[10].rstrip(','))) 32 | 33 | with open("foreground_background.csv", 'w') as f: 34 | writer = csv.writer(f, delimiter=',') 35 | templist = [] 36 | for i in range(0, 11): 37 | templist.append('') 38 | writer.writerow(templist) 39 | for i in range(0,4): 40 | templist = [] 41 | templist.append('') 42 | templist.append(insert[i]) 43 | if insert[i] != 8: 44 | templist.append(fore_throughput[i]) 45 | templist.append(back_throughput[i]) 46 | for j in range(0, 4): 47 | templist.append('') 48 | templist.append(insert[i]) 49 | templist.append(fore_throughput[i+3]) 50 | templist.append(back_throughput[i+3]) 51 | else: 52 | for j in range(0,3): 53 | templist.append(fore_throughput[j+3]) 54 | templist.append(back_throughput[j+3]) 55 | for j in range(0, 3): 56 | templist.append('') 57 | writer.writerow(templist) 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /AnnService/inc/Socket/Client.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_SOCKET_CLIENT_H_ 5 | #define _SPTAG_SOCKET_CLIENT_H_ 6 | 7 | #include "inc/Core/Common.h" 8 | #include "Connection.h" 9 | #include "ConnectionManager.h" 10 | #include "Packet.h" 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | namespace SPTAG 18 | { 19 | namespace Socket 20 | { 21 | 22 | class Client 23 | { 24 | public: 25 | typedef std::function ConnectCallback; 26 | 27 | Client(const PacketHandlerMapPtr& p_handlerMap, 28 | std::size_t p_threadNum, 29 | std::uint32_t p_heartbeatIntervalSeconds); 30 | 31 | ~Client(); 32 | 33 | ConnectionID ConnectToServer(const std::string& p_address, 34 | const std::string& p_port, 35 | SPTAG::ErrorCode& p_ec); 36 | 37 | void AsyncConnectToServer(const std::string& p_address, 38 | const std::string& p_port, 39 | ConnectCallback p_callback); 40 | 41 | void SendPacket(ConnectionID p_connection, Packet p_packet, std::function p_callback); 42 | 43 | void SetEventOnConnectionClose(std::function p_event); 44 | 45 | private: 46 | void KeepIoContext(); 47 | 48 | private: 49 | std::atomic_bool m_stopped; 50 | 51 | std::uint32_t m_heartbeatIntervalSeconds; 52 | 53 | boost::asio::io_context m_ioContext; 54 | 55 | boost::asio::deadline_timer m_deadlineTimer; 56 | 57 | std::shared_ptr m_connectionManager; 58 | 59 | std::vector m_threadPool; 60 | 61 | const PacketHandlerMapPtr c_requestHandlerMap; 62 | }; 63 | 64 | 65 | } // namespace Socket 66 | } // namespace SPTAG 67 | 68 | #endif // _SPTAG_SOCKET_CLIENT_H_ 69 | -------------------------------------------------------------------------------- /Wrappers/inc/ClientInterface.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_PW_CLIENTINTERFACE_H_ 5 | #define _SPTAG_PW_CLIENTINTERFACE_H_ 6 | 7 | #include "TransferDataType.h" 8 | #include "inc/Socket/Client.h" 9 | #include "inc/Socket/ResourceManager.h" 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | class AnnClient 16 | { 17 | public: 18 | AnnClient(const char* p_serverAddr, const char* p_serverPort); 19 | 20 | ~AnnClient(); 21 | 22 | void SetTimeoutMilliseconds(int p_timeout); 23 | 24 | void SetSearchParam(const char* p_name, const char* p_value); 25 | 26 | void ClearSearchParam(); 27 | 28 | std::shared_ptr Search(ByteArray p_data, int p_resultNum, const char* p_valueType, bool p_withMetaData); 29 | 30 | bool IsConnected() const; 31 | 32 | private: 33 | std::string CreateSearchQuery(const ByteArray& p_data, 34 | int p_resultNum, 35 | bool p_extractMetadata, 36 | SPTAG::VectorValueType p_valueType); 37 | 38 | SPTAG::Socket::PacketHandlerMapPtr GetHandlerMap(); 39 | 40 | void SearchResponseHanlder(SPTAG::Socket::ConnectionID p_localConnectionID, 41 | SPTAG::Socket::Packet p_packet); 42 | 43 | private: 44 | typedef std::function Callback; 45 | 46 | std::uint32_t m_timeoutInMilliseconds; 47 | 48 | std::string m_server; 49 | 50 | std::string m_port; 51 | 52 | std::unique_ptr m_socketClient; 53 | 54 | std::atomic m_connectionID; 55 | 56 | SPTAG::Socket::ResourceManager m_callbackManager; 57 | 58 | std::unordered_map m_params; 59 | 60 | std::mutex m_paramMutex; 61 | }; 62 | 63 | #endif // _SPTAG_PW_CLIENTINTERFACE_H_ 64 | -------------------------------------------------------------------------------- /AnnService/inc/Server/QueryParser.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_SERVER_QUERYPARSER_H_ 5 | #define _SPTAG_SERVER_QUERYPARSER_H_ 6 | 7 | #include "../Core/Common.h" 8 | #include "../Core/CommonDataStructure.h" 9 | #include "inc/Helper/StringConvert.h" 10 | 11 | #include 12 | 13 | namespace SPTAG 14 | { 15 | namespace Service 16 | { 17 | 18 | template 19 | ErrorCode 20 | ConvertVectorFromString(const std::vector& p_source, ByteArray& p_dest, SizeType& p_dimension) 21 | { 22 | p_dimension = (SizeType)p_source.size(); 23 | p_dest = ByteArray::Alloc(p_dimension * sizeof(ValueType)); 24 | ValueType* arr = reinterpret_cast(p_dest.Data()); 25 | for (std::size_t i = 0; i < p_source.size(); ++i) 26 | { 27 | if (!Helper::Convert::ConvertStringTo(p_source[i], arr[i])) 28 | { 29 | p_dest.Clear(); 30 | p_dimension = 0; 31 | return ErrorCode::Fail; 32 | } 33 | } 34 | return ErrorCode::Success; 35 | } 36 | 37 | class QueryParser 38 | { 39 | public: 40 | typedef std::pair OptionPair; 41 | 42 | QueryParser(); 43 | 44 | ~QueryParser(); 45 | 46 | ErrorCode Parse(const std::string& p_query, const char* p_vectorSeparator); 47 | 48 | const std::vector& GetVectorElements() const; 49 | 50 | const std::vector& GetOptions() const; 51 | 52 | const char* GetVectorBase64() const; 53 | 54 | SizeType GetVectorBase64Length() const; 55 | 56 | private: 57 | std::vector m_options; 58 | 59 | std::vector m_vectorElements; 60 | 61 | const char* m_vectorBase64; 62 | 63 | SizeType m_vectorBase64Length; 64 | 65 | ByteArray m_dataHolder; 66 | 67 | static const char* c_defaultVectorSeparator; 68 | }; 69 | 70 | 71 | } // namespace Server 72 | } // namespace AnnService 73 | 74 | 75 | #endif // _SPTAG_SERVER_QUERYPARSER_H_ 76 | -------------------------------------------------------------------------------- /AnnService/inc/Helper/Concurrent.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_HELPER_CONCURRENT_H_ 5 | #define _SPTAG_HELPER_CONCURRENT_H_ 6 | 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | 13 | namespace SPTAG 14 | { 15 | namespace Helper 16 | { 17 | namespace Concurrent 18 | { 19 | 20 | class SpinLock 21 | { 22 | public: 23 | SpinLock() = default; 24 | 25 | void Lock() noexcept 26 | { 27 | while (m_lock.test_and_set(std::memory_order_acquire)) 28 | { 29 | } 30 | } 31 | 32 | void Unlock() noexcept 33 | { 34 | m_lock.clear(std::memory_order_release); 35 | } 36 | 37 | SpinLock(const SpinLock&) = delete; 38 | SpinLock& operator = (const SpinLock&) = delete; 39 | 40 | private: 41 | std::atomic_flag m_lock = ATOMIC_FLAG_INIT; 42 | }; 43 | 44 | template 45 | class LockGuard { 46 | public: 47 | LockGuard(Lock& lock) noexcept 48 | : m_lock(lock) { 49 | lock.Lock(); 50 | } 51 | 52 | LockGuard(Lock& lock, std::adopt_lock_t) noexcept 53 | : m_lock(lock) {} 54 | 55 | ~LockGuard() { 56 | m_lock.Unlock(); 57 | } 58 | 59 | LockGuard(const LockGuard&) = delete; 60 | LockGuard& operator=(const LockGuard&) = delete; 61 | 62 | private: 63 | Lock& m_lock; 64 | }; 65 | 66 | 67 | class WaitSignal 68 | { 69 | public: 70 | WaitSignal(); 71 | 72 | WaitSignal(std::uint32_t p_unfinished); 73 | 74 | ~WaitSignal(); 75 | 76 | void Reset(std::uint32_t p_unfinished); 77 | 78 | void Wait(); 79 | 80 | void FinishOne(); 81 | 82 | private: 83 | std::atomic m_unfinished; 84 | 85 | std::atomic_bool m_isWaiting; 86 | 87 | std::mutex m_mutex; 88 | 89 | std::condition_variable m_cv; 90 | }; 91 | 92 | 93 | } // namespace Base64 94 | } // namespace Helper 95 | } // namespace SPTAG 96 | 97 | #endif // _SPTAG_HELPER_CONCURRENT_H_ 98 | -------------------------------------------------------------------------------- /AnnService/src/Helper/ArgumentsParser.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Helper/ArgumentsParser.h" 5 | 6 | using namespace SPTAG::Helper; 7 | 8 | ArgumentsParser::IArgument::IArgument() 9 | { 10 | } 11 | 12 | ArgumentsParser::IArgument::~IArgument() 13 | { 14 | } 15 | 16 | ArgumentsParser::ArgumentsParser() 17 | { 18 | } 19 | 20 | ArgumentsParser::~ArgumentsParser() 21 | { 22 | } 23 | 24 | bool ArgumentsParser::Parse(int p_argc, char **p_args) 25 | { 26 | while (p_argc > 0) 27 | { 28 | int last = p_argc; 29 | for (auto &option : m_arguments) 30 | { 31 | if (!option->ParseValue(p_argc, p_args)) 32 | { 33 | SPTAGLIB_LOG(Helper::LogLevel::LL_Empty, "Failed to parse args around \"%s\"\n", *p_args); 34 | PrintHelp(); 35 | return false; 36 | } 37 | } 38 | 39 | if (last == p_argc) 40 | { 41 | p_argc -= 1; 42 | p_args += 1; 43 | } 44 | } 45 | 46 | bool isValid = true; 47 | for (auto &option : m_arguments) 48 | { 49 | if (option->IsRequiredButNotSet()) 50 | { 51 | SPTAGLIB_LOG(Helper::LogLevel::LL_Empty, "Required option not set:\n "); 52 | option->PrintDescription(); 53 | SPTAGLIB_LOG(Helper::LogLevel::LL_Empty, "\n"); 54 | isValid = false; 55 | } 56 | } 57 | 58 | if (!isValid) 59 | { 60 | SPTAGLIB_LOG(Helper::LogLevel::LL_Empty, "\n"); 61 | PrintHelp(); 62 | return false; 63 | } 64 | 65 | return true; 66 | } 67 | 68 | void ArgumentsParser::PrintHelp() 69 | { 70 | SPTAGLIB_LOG(Helper::LogLevel::LL_Empty, "Usage: "); 71 | for (auto &option : m_arguments) 72 | { 73 | SPTAGLIB_LOG(Helper::LogLevel::LL_Empty, "\n "); 74 | option->PrintDescription(); 75 | } 76 | 77 | SPTAGLIB_LOG(Helper::LogLevel::LL_Empty, "\n\n"); 78 | } 79 | -------------------------------------------------------------------------------- /docs/LinuxInstallation.md: -------------------------------------------------------------------------------- 1 | # Setup SPTAG on Ubuntu 2 | 3 | In this section, will describe how to setup SPTAG on Ubuntu machine. 4 | 5 | 1. Update ubuntu and build env 6 | ``` 7 | sudo apt-get update && sudo apt-get install build-essential 8 | ``` 9 | 2. Install cmake 10 | 11 | - Download cmake. You can find the cmake releases [here](https://github.com/Kitware/CMake/releases). 12 | ```bash 13 | # e.g. download cmake version 3.14.7 14 | wget https://github.com/Kitware/CMake/releases/download/v3.14.7/cmake-3.14.7-Linux-x86_64.sh -P opt/ 15 | ``` 16 | - Follow these instructions: 17 | 1. `chmod +x opt/cmake-3..sh` (chmod makes the script executable) 18 | 2. `sudo bash opt/cmake-3..sh` 19 | 3. The script installs to a target directory so in order to get the `cmake` command, make a symbolic link from the target directory where cmake was extracted to: `sudo ln -s /cmake-3./bin/* /usr/local/bin` 20 | 4. `cmake --version` Note: If you encounter this error: *The program 'cmake' is currently not installed*, Please try the command from step 3 again with a full path (e.g. `sudo ln -s /home//SPTAG/opt/cmake-3.-Linux-x86_64/bin/* /usr/local/bin`) 21 | 22 | 5. Install boost 23 | - Download boost 1.67 version: 24 | ```bash 25 | wget https://netix.dl.sourceforge.net/project/boost/boost/1.67.0/boost_1_67_0.tar.gz 26 | ``` 27 | (There are some [version mis-matching issues](https://github.com/microsoft/SPTAG/issues/26) and reported on github issue) 28 | 29 | 6. Extract and install 30 | ```bash 31 | tar -xzvf boost* 32 | cd boost_1_67_0 33 | ./bootstrap.sh --prefix=/usr/local 34 | ./b2 35 | sudo ./b2 install 36 | sudo apt-get install swig 37 | ``` 38 | 39 | 7. Generate a Release folder in the code directory which will contain all the build targets: 40 | ```bash 41 | mkdir build 42 | cd build && cmake .. && make 43 | ``` 44 | 45 | 8. Add SPTAG module to python path 46 | ```bash 47 | # so python can find the SPTAG module 48 | ENV PYTHONPATH=/app/Release 49 | ``` 50 | -------------------------------------------------------------------------------- /AnnService/Aggregator.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | 14 | 15 | Header Files 16 | 17 | 18 | Header Files 19 | 20 | 21 | Header Files 22 | 23 | 24 | Header Files 25 | 26 | 27 | 28 | 29 | Source Files 30 | 31 | 32 | Source Files 33 | 34 | 35 | Source Files 36 | 37 | 38 | Source Files 39 | 40 | 41 | Source Files 42 | 43 | 44 | Source Files 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /AnnService/inc/Socket/ConnectionManager.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_SOCKET_CONNECTIONMANAGER_H_ 5 | #define _SPTAG_SOCKET_CONNECTIONMANAGER_H_ 6 | 7 | #include "Connection.h" 8 | #include "inc/Helper/Concurrent.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | namespace SPTAG 19 | { 20 | namespace Socket 21 | { 22 | 23 | class ConnectionManager : public std::enable_shared_from_this 24 | { 25 | public: 26 | ConnectionManager(); 27 | 28 | ConnectionID AddConnection(boost::asio::ip::tcp::socket&& p_socket, 29 | const PacketHandlerMapPtr& p_handlerMap, 30 | std::uint32_t p_heartbeatIntervalSeconds); 31 | 32 | void RemoveConnection(ConnectionID p_connectionID); 33 | 34 | Connection::Ptr GetConnection(ConnectionID p_connectionID); 35 | 36 | void SetEventOnRemoving(std::function p_event); 37 | 38 | void StopAll(); 39 | 40 | private: 41 | inline static std::uint32_t GetPosition(ConnectionID p_connectionID); 42 | 43 | private: 44 | static constexpr std::uint32_t c_connectionPoolSize = 1 << 8; 45 | 46 | static constexpr std::uint32_t c_connectionPoolMask = c_connectionPoolSize - 1; 47 | 48 | struct ConnectionItem 49 | { 50 | ConnectionItem(); 51 | 52 | std::atomic_bool m_isEmpty; 53 | 54 | Connection::Ptr m_connection; 55 | }; 56 | 57 | // Start from 1. 0 means not assigned. 58 | std::atomic m_nextConnectionID; 59 | 60 | std::atomic m_connectionCount; 61 | 62 | std::array m_connections; 63 | 64 | Helper::Concurrent::SpinLock m_spinLock; 65 | 66 | std::function m_eventOnRemoving; 67 | }; 68 | 69 | 70 | } // namespace Socket 71 | } // namespace SPTAG 72 | 73 | #endif // _SPTAG_SOCKET_CONNECTIONMANAGER_H_ 74 | -------------------------------------------------------------------------------- /AnnService/inc/Server/SearchExecutionContext.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_SERVER_SEARCHEXECUTIONCONTEXT_H_ 5 | #define _SPTAG_SERVER_SEARCHEXECUTIONCONTEXT_H_ 6 | 7 | #include "inc/Core/VectorIndex.h" 8 | #include "inc/Core/SearchQuery.h" 9 | #include "inc/Socket/RemoteSearchQuery.h" 10 | #include "ServiceSettings.h" 11 | #include "QueryParser.h" 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | 18 | namespace SPTAG 19 | { 20 | namespace Service 21 | { 22 | 23 | typedef Socket::IndexSearchResult SearchResult; 24 | 25 | class SearchExecutionContext 26 | { 27 | public: 28 | SearchExecutionContext(const std::shared_ptr& p_serviceSettings); 29 | 30 | ~SearchExecutionContext(); 31 | 32 | ErrorCode ParseQuery(const std::string& p_query); 33 | 34 | ErrorCode ExtractOption(); 35 | 36 | ErrorCode ExtractVector(VectorValueType p_targetType); 37 | 38 | void AddResults(std::string p_indexName, QueryResult& p_results); 39 | 40 | std::vector& GetResults(); 41 | 42 | const std::vector& GetResults() const; 43 | 44 | const ByteArray& GetVector() const; 45 | 46 | const std::vector& GetSelectedIndexNames() const; 47 | 48 | const SizeType GetVectorDimension() const; 49 | 50 | const std::vector& GetOptions() const; 51 | 52 | const SizeType GetResultNum() const; 53 | 54 | const bool GetExtractMetadata() const; 55 | 56 | private: 57 | const std::shared_ptr c_serviceSettings; 58 | 59 | QueryParser m_queryParser; 60 | 61 | std::vector m_indexNames; 62 | 63 | ByteArray m_vector; 64 | 65 | SizeType m_vectorDimension; 66 | 67 | std::vector m_results; 68 | 69 | VectorValueType m_inputValueType; 70 | 71 | bool m_extractMetadata; 72 | 73 | SizeType m_resultNum; 74 | }; 75 | 76 | } // namespace Server 77 | } // namespace AnnService 78 | 79 | 80 | #endif // _SPTAG_SERVER_SEARCHEXECUTIONCONTEXT_H_ 81 | -------------------------------------------------------------------------------- /Dockerfile.cuda: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/mirror/nvcr/nvidia/cuda:11.8.0-base-ubuntu20.04 2 | WORKDIR /app 3 | 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | 6 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub 7 | 8 | RUN apt-get -y install wget build-essential swig cmake git libnuma-dev python3.8-dev python3-distutils gcc-8 g++-8 \ 9 | libboost-filesystem-dev libboost-test-dev libboost-serialization-dev libboost-regex-dev libboost-serialization-dev libboost-regex-dev libboost-thread-dev libboost-system-dev 10 | 11 | RUN wget https://bootstrap.pypa.io/get-pip.py && python3.8 get-pip.py && python3.8 -m pip install numpy 12 | 13 | RUN git clone https://github.com/NVIDIA/cub && cd cub && git checkout c3cceac && cp -r cub /usr/include/ && cd .. 14 | 15 | RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 \ 16 | && wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-ubuntu2004-11-8-local_11.8.0-520.61.05-1_amd64.deb \ 17 | && dpkg -i cuda-repo-ubuntu2004-11-8-local_11.8.0-520.61.05-1_amd64.deb && cp /var/cuda-repo-ubuntu2004-11-8-local/cuda-*-keyring.gpg /usr/share/keyrings/ \ 18 | && apt-get update && apt-get install -y cuda-toolkit-11-8 19 | 20 | LABEL com.nvidia.volumes.needed="nvidia_driver" 21 | LABEL com.nvidia.cuda.version="11.8.0" 22 | 23 | ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} 24 | ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH} 25 | 26 | ENV NVIDIA_VISIBLE_DEVICES all 27 | ENV NVIDIA_DRIVER_CAPABILITIES compute,utility 28 | ENV NVIDIA_REQUIRE_CUDA "cuda>=11.0" 29 | 30 | 31 | ENV PYTHONPATH=/app/Release 32 | 33 | COPY CMakeLists.txt ./ 34 | COPY AnnService ./AnnService/ 35 | COPY Test ./Test/ 36 | COPY Wrappers ./Wrappers/ 37 | COPY GPUSupport ./GPUSupport/ 38 | COPY ThirdParty ./ThirdParty/ 39 | 40 | RUN export CC=/usr/bin/gcc-8 && export CXX=/usr/bin/g++-8 && mkdir build && cd build && cmake .. && make -j && cd .. 41 | -------------------------------------------------------------------------------- /AnnService/inc/Client/ClientWrapper.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_CLIENT_CLIENTWRAPPER_H_ 5 | #define _SPTAG_CLIENT_CLIENTWRAPPER_H_ 6 | 7 | #include "inc/Socket/Client.h" 8 | #include "inc/Socket/RemoteSearchQuery.h" 9 | #include "inc/Socket/ResourceManager.h" 10 | #include "Options.h" 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace SPTAG 22 | { 23 | namespace Client 24 | { 25 | 26 | class ClientWrapper 27 | { 28 | public: 29 | typedef std::function Callback; 30 | 31 | ClientWrapper(const ClientOptions& p_options); 32 | 33 | ~ClientWrapper(); 34 | 35 | void SendQueryAsync(const Socket::RemoteQuery& p_query, 36 | Callback p_callback, 37 | const ClientOptions& p_options); 38 | 39 | void WaitAllFinished(); 40 | 41 | bool IsAvailable() const; 42 | 43 | private: 44 | typedef std::pair ConnectionPair; 45 | 46 | Socket::PacketHandlerMapPtr GetHandlerMap(); 47 | 48 | void DecreaseUnfnishedJobCount(); 49 | 50 | const ConnectionPair& GetConnection(); 51 | 52 | void SearchResponseHanlder(Socket::ConnectionID p_localConnectionID, Socket::Packet p_packet); 53 | 54 | void HandleDeadConnection(Socket::ConnectionID p_cid); 55 | 56 | private: 57 | ClientOptions m_options; 58 | 59 | std::unique_ptr m_client; 60 | 61 | std::atomic m_unfinishedJobCount; 62 | 63 | std::atomic_bool m_isWaitingFinish; 64 | 65 | std::condition_variable m_waitingQueue; 66 | 67 | std::mutex m_waitingMutex; 68 | 69 | std::vector m_connections; 70 | 71 | std::atomic m_spinCountOfConnection; 72 | 73 | Socket::ResourceManager m_callbackManager; 74 | }; 75 | 76 | 77 | } // namespace Socket 78 | } // namespace SPTAG 79 | 80 | #endif // _SPTAG_CLIENT_OPTIONS_H_ 81 | -------------------------------------------------------------------------------- /AnnService/inc/Core/VectorSet.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_VECTORSET_H_ 5 | #define _SPTAG_VECTORSET_H_ 6 | 7 | #include "CommonDataStructure.h" 8 | 9 | namespace SPTAG 10 | { 11 | 12 | class VectorSet 13 | { 14 | public: 15 | VectorSet(); 16 | 17 | virtual ~VectorSet(); 18 | 19 | virtual VectorValueType GetValueType() const = 0; 20 | 21 | virtual void* GetVector(SizeType p_vectorID) const = 0; 22 | 23 | virtual void* GetData() const = 0; 24 | 25 | virtual DimensionType Dimension() const = 0; 26 | 27 | virtual SizeType Count() const = 0; 28 | 29 | virtual bool Available() const = 0; 30 | 31 | virtual ErrorCode Save(const std::string& p_vectorFile) const = 0; 32 | 33 | virtual ErrorCode AppendSave(const std::string& p_vectorFile) const = 0; 34 | 35 | virtual SizeType PerVectorDataSize() const = 0; 36 | 37 | virtual void Normalize(int p_threads) = 0; 38 | }; 39 | 40 | 41 | class BasicVectorSet : public VectorSet 42 | { 43 | public: 44 | BasicVectorSet(const ByteArray& p_bytesArray, 45 | VectorValueType p_valueType, 46 | DimensionType p_dimension, 47 | SizeType p_vectorCount); 48 | 49 | virtual ~BasicVectorSet(); 50 | 51 | virtual VectorValueType GetValueType() const; 52 | 53 | virtual void* GetVector(SizeType p_vectorID) const; 54 | 55 | virtual void* GetData() const; 56 | 57 | virtual DimensionType Dimension() const; 58 | 59 | virtual SizeType Count() const; 60 | 61 | virtual bool Available() const; 62 | 63 | virtual ErrorCode Save(const std::string& p_vectorFile) const; 64 | 65 | virtual ErrorCode AppendSave(const std::string& p_vectorFile) const; 66 | 67 | virtual SizeType PerVectorDataSize() const; 68 | 69 | virtual void Normalize(int p_threads); 70 | 71 | private: 72 | ByteArray m_data; 73 | 74 | VectorValueType m_valueType; 75 | 76 | DimensionType m_dimension; 77 | 78 | SizeType m_vectorCount; 79 | 80 | size_t m_perVectorDataSize; 81 | }; 82 | 83 | } // namespace SPTAG 84 | 85 | #endif // _SPTAG_VECTORSET_H_ 86 | -------------------------------------------------------------------------------- /Test/src/make_gist_sptag.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import faiss 4 | 5 | def DEFAULT_read(fname, np_type): 6 | buf = np.fromfile(fname, dtype='int32') 7 | n = buf[0] 8 | d = buf[1] 9 | print(buf.shape) 10 | print(n) 11 | print(d) 12 | #assert((n*d) % (buf.shape[0] - 2) == 0) 13 | return buf[2:].view(np_type).reshape(n, d).copy() 14 | 15 | def DEFAULT_write(fname, m): 16 | with open(fname, 'wb') as f: 17 | np.array(m.shape[0], dtype=np.int32).tofile(f) 18 | np.array(m.shape[1], dtype=np.int32).tofile(f) 19 | m.tofile(f) 20 | 21 | def codebooks_write(fname, m): 22 | with open(fname, 'wb') as f: 23 | np.array(m.shape[0], dtype=np.int32).tofile(f) 24 | np.array(m.shape[1], dtype=np.int32).tofile(f) 25 | np.array(m.shape[2], dtype=np.int32).tofile(f) 26 | m.tofile(f) 27 | 28 | def ivecs_write(fname, m): 29 | n, d = m.shape 30 | m1 = np.empty((n, d + 1), dtype='int32') 31 | m1[:, 0] = d 32 | m1[:, 1:] = m 33 | m1.tofile(fname) 34 | 35 | 36 | def fvecs_write(fname, m): 37 | m = m.astype('float32') 38 | ivecs_write(fname, m.view('int32')) 39 | 40 | def ivecs_read(fname): 41 | a = np.fromfile(fname, dtype='int32') 42 | d = a[0] 43 | return a.reshape(-1, d + 1)[:, 1:].copy() 44 | 45 | 46 | def fvecs_read(fname): 47 | return ivecs_read(fname).view('float32') 48 | 49 | 50 | def sanitize(x): 51 | return np.ascontiguousarray(x, dtype='float32') 52 | 53 | def main(): 54 | xd = fvecs_read("D:\data\gist\gist_base.fvecs") 55 | xq = fvecs_read("D:\data\gist\gist_query.fvecs") 56 | xt = fvecs_read("D:\data\gist\gist_learn.fvecs") 57 | 58 | 59 | index = faiss.IndexFlatL2(xd.shape[1]) 60 | index.add(sanitize(xd)) 61 | 62 | pqidx = faiss.IndexPQ(xd.shape[1], int(xd.shape[1] / 2), 8) 63 | pqidx.train(sanitize(xt)) 64 | centroids = faiss.vector_to_array(pqidx.pq.centroids) 65 | print(centroids.shape) 66 | centroids = centroids.reshape(int(xd.shape[1] / 2), 256, 2) 67 | 68 | DEFAULT_write("gist_vector.bin", xd) 69 | DEFAULT_write("gist_query.bin", xq) 70 | codebooks_write("gist_codebook.bin", centroids) 71 | 72 | main() -------------------------------------------------------------------------------- /Script_AE/Figure11/foreground_background.p: -------------------------------------------------------------------------------- 1 | # For a single column, set the width at 3.3 inches 2 | # For across two columns, set the width at 7 inches 3 | 4 | set terminal pdfcairo size 3.3, 1.75 font 'Linux Biolinum O,12' 5 | # set terminal pdfcairo size 7, 2.07 font "UbuntuMono-Regular, 11" 6 | 7 | # set default line style 8 | set style line 1 lc rgb '#056bfa' lt 1 lw 1.7 pt 7 ps 1.5 9 | set style line 2 lc rgb '#05a8fa' lt 1 lw 1.7 pt 7 ps 1.5 10 | set style line 3 lc rgb '#fb8500' lt 1 lw 1.7 pt 7 ps 1.5 11 | set style line 4 lc rgb '#ffb703' lt 1 lw 1.7 pt 7 ps 1.5 12 | set style line 5 lc rgb '#b30018' lt 1 lw 1.7 pt 7 ps 1.5 13 | set style line 6 lc rgb '#fa3605' lt 1 lw 1.7 pt 7 ps 1.5 14 | 15 | # set grid style 16 | set style line 20 lc rgb '#dddddd' lt 1 lw 1 17 | set style fill solid 18 | 19 | set datafile separator "," 20 | set encoding utf8 21 | set autoscale 22 | set grid ls 20 noxtics ytics 23 | # set key box ls 20 opaque fc rgb "#3fffffff" 24 | set tics scale 0.5 25 | set xtics nomirror out autofreq offset 0,0.5,0 26 | set ytics nomirror out offset 0.5,0,0 27 | set border lw 2 28 | set yrange [0:6000] 29 | set ytics 1500 30 | 31 | set style data histogram 32 | set style histogram cluster gap 1 33 | set style fill solid 34 | set boxwidth 0.9 35 | 36 | # Start the first plot 37 | set output "Scalability.pdf" 38 | 39 | set multiplot 40 | 41 | set xlabel "Foreground Update\nThread Num" offset 0,1,0 42 | set ylabel "Throughput" offset 1,0,0 43 | 44 | set key reverse Left 45 | 46 | set size 0.6, 0.88 47 | set origin -0.02, 0.12 48 | 49 | # set title "Scalability (Background Thread = 1)" offset 0, -0.7 50 | plot "foreground_background.csv" using 3:xtic(2) every ::1 title 'Foreground' at 0.2, 0.07, \ 51 | "foreground_background.csv" using 4 every ::1 title 'Background' at 0.75, 0.07 52 | 53 | set size 0.5, 0.88 54 | set origin 0.52, 0.12 55 | set xlabel "Background Update\nThread Num" offset 0,1,0 56 | unset ylabel 57 | set ytics format "" 58 | 59 | # set title "Scalability (Background Thread = 8)" offset 0, -0.7 60 | plot "foreground_background.csv" using 10:xtic(9) every ::1::3 title 'Foreground' at 0.2, 0.07, \ 61 | "foreground_background.csv" using 11 every ::1::3 title 'Background' at 0.75, 0.07 62 | 63 | unset multiplot -------------------------------------------------------------------------------- /AnnService/inc/Core/Common/WorkSpacePool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_COMMON_WORKSPACEPOOL_H_ 5 | #define _SPTAG_COMMON_WORKSPACEPOOL_H_ 6 | 7 | #include "WorkSpace.h" 8 | #include "inc/Helper/ConcurrentSet.h" 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | namespace SPTAG 15 | { 16 | namespace COMMON 17 | { 18 | 19 | template 20 | class WorkSpacePool 21 | { 22 | public: 23 | WorkSpacePool() {} 24 | 25 | ~WorkSpacePool() 26 | { 27 | std::shared_ptr workspace; 28 | while (m_workSpacePool.try_pop(workspace)) 29 | { 30 | workspace.reset(); 31 | } 32 | T::Reset(); 33 | } 34 | 35 | std::shared_ptr Rent() 36 | { 37 | std::shared_ptr workSpace; 38 | { 39 | if (m_workSpacePool.try_pop(workSpace)) 40 | { 41 | } 42 | else 43 | { 44 | workSpace.reset(new T(m_workSpace)); 45 | } 46 | } 47 | return workSpace; 48 | } 49 | 50 | void Return(const std::shared_ptr& p_workSpace) 51 | { 52 | m_workSpacePool.push(p_workSpace); 53 | } 54 | 55 | void Init(int size, ...) 56 | { 57 | va_list args; 58 | va_start(args, size); 59 | m_workSpace.Initialize(args); 60 | va_end(args); 61 | for (int i = 0; i < size; i++) 62 | { 63 | std::shared_ptr workSpace(new T(m_workSpace)); 64 | m_workSpacePool.push(std::move(workSpace)); 65 | } 66 | } 67 | 68 | private: 69 | Helper::Concurrent::ConcurrentQueue> m_workSpacePool; 70 | T m_workSpace; 71 | }; 72 | 73 | } 74 | } 75 | 76 | #endif // _SPTAG_COMMON_WORKSPACEPOOL_H_ 77 | -------------------------------------------------------------------------------- /docs/WindowsInstallation.md: -------------------------------------------------------------------------------- 1 | ## Requirements 2 | 3 | ### swig >= 3.0 4 | 5 | Steps to install SWIG: 6 | 7 | 1. Download the latest SWIG binary from http://www.swig.org/download.html, if you need a specific version, windows binaries are located at https://sourceforge.net/projects/swig/files/swigwin/. Once downloaded, please unzip to a local folder (prefarably inside the SPTAG folder) 8 | 2. Add the path where it was unzipped to the environment PATH example: `C:\Sptag\swigwin-3.0.12\` in System properties 9 | 10 | ### cmake >= 3.12.0 11 | 12 | 1. If you have Visual Studio 2019 you already have the required tools to compile, please ensure you have the `Desktop Development with C++` selected to get all the tools, just ensure this option is enabled in the installation workloads 13 | 2. If not, please download cmake 14 | 15 | ![Visual Studio](img/visualstudio.png) 16 | 17 | ### boost == 1.67.0 18 | 19 | Boost's precompiled binaries are available at https://sourceforge.net/projects/boost/files/boost-binaries/1.67.0/ (to avoid compiling from scratch) 20 | 21 | 1. Select the lastest version for 1.67.0 (boost_1_67_0-msvc-14.1-64.exe) 22 | 2. Launch the installation 23 | 3. Add the folder path to PATH environment variable, for instance `C:\Sptag\boost_1_67_0\` 24 | 25 | On the off chance of needing to build boost, because you encountered this message during the Build step `Could NOT find Boost (missing: system thread serialization wserialization regex filesystem)`, try the following 26 | 1. Locate and run `bootstrap.bat`, this creates a file called `b2.exe` 27 | 2. Run the generated exe (in this case `b2.exe`), be patient this may take +1 hr 28 | 3. Once complete, the built libraries will be located in `\stage\lib` (or something along the lines) 29 | 30 | ## Build 31 | 32 | 1. git clone [microsoft/SPTAG](https://github.com/microsoft/SPTAG/) 33 | 2. Go the folder location where you cloned the repo in the command prompt and execute the following commands: 34 | ``` 35 | mkdir build 36 | cd build 37 | cmake -A x64 .. 38 | ``` 39 | 5. From the build folder, open SPTAGLib.sln solution in Visual Studio and compile all projects 40 | 41 | 6. [..]/build/release contains now all components needed, add this path to PYTHONPATH environment variable to reference the required modules 42 | -------------------------------------------------------------------------------- /AnnService/src/Helper/VectorSetReader.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Helper/VectorSetReader.h" 5 | #include "inc/Helper/VectorSetReaders/DefaultReader.h" 6 | #include "inc/Helper/VectorSetReaders/TxtReader.h" 7 | #include "inc/Helper/VectorSetReaders/XvecReader.h" 8 | 9 | using namespace SPTAG; 10 | using namespace SPTAG::Helper; 11 | 12 | ReaderOptions::ReaderOptions(VectorValueType p_valueType, DimensionType p_dimension, VectorFileType p_fileType, 13 | std::string p_vectorDelimiter, std::uint32_t p_threadNum, bool p_normalized) 14 | : m_inputValueType(p_valueType), m_dimension(p_dimension), m_inputFileType(p_fileType), 15 | m_vectorDelimiter(p_vectorDelimiter), m_threadNum(p_threadNum), m_normalized(p_normalized) 16 | { 17 | AddOptionalOption(m_threadNum, "-t", "--thread", "Thread Number."); 18 | AddOptionalOption(m_vectorDelimiter, "-dl", "--delimiter", "Vector delimiter."); 19 | AddOptionalOption(m_normalized, "-norm", "--normalized", "Vector is normalized."); 20 | AddRequiredOption(m_dimension, "-d", "--dimension", "Dimension of vector."); 21 | AddRequiredOption(m_inputValueType, "-v", "--vectortype", "Input vector data type. Default is float."); 22 | AddRequiredOption(m_inputFileType, "-f", "--filetype", "Input file type (DEFAULT, TXT, XVEC). Default is DEFAULT."); 23 | } 24 | 25 | ReaderOptions::~ReaderOptions() 26 | { 27 | } 28 | 29 | VectorSetReader::VectorSetReader(std::shared_ptr p_options) : m_options(p_options) 30 | { 31 | } 32 | 33 | VectorSetReader::~VectorSetReader() 34 | { 35 | } 36 | 37 | std::shared_ptr VectorSetReader::CreateInstance(std::shared_ptr p_options) 38 | { 39 | if (p_options->m_inputFileType == VectorFileType::DEFAULT) 40 | { 41 | return std::make_shared(p_options); 42 | } 43 | else if (p_options->m_inputFileType == VectorFileType::TXT) 44 | { 45 | return std::make_shared(p_options); 46 | } 47 | else if (p_options->m_inputFileType == VectorFileType::XVEC) 48 | { 49 | return std::make_shared(p_options); 50 | } 51 | return nullptr; 52 | } 53 | -------------------------------------------------------------------------------- /Test/cuda/common.hxx: -------------------------------------------------------------------------------- 1 | //#include "inc/Core/Common/cuda/KNN.hxx" 2 | #include 3 | #include 4 | 5 | #define CHECK_ERRS(errs) \ 6 | if(errs > 0) { \ 7 | SPTAGLIB_LOG(SPTAG::Helper::LogLevel::LL_Info, "%d errors found\n", errs); \ 8 | } 9 | 10 | #define CHECK_VAL(val,exp,errs) \ 11 | if(val != exp) { \ 12 | errs++; \ 13 | SPTAGLIB_LOG(SPTAG::Helper::LogLevel::LL_Error, "%s != %s\n",#val,#exp); \ 14 | } 15 | 16 | #define CHECK_VAL_LT(val,exp,errs) \ 17 | if(val > exp) { \ 18 | errs++; \ 19 | SPTAGLIB_LOG(SPTAG::Helper::LogLevel::LL_Error, "%s > %s\n",#val,#exp); \ 20 | } 21 | 22 | #define GPU_CHECK_VAL(val,exp,dtype,errs) \ 23 | dtype temp; \ 24 | CUDA_CHECK(cudaMemcpy(&temp, val, sizeof(dtype), cudaMemcpyDeviceToHost)); \ 25 | float eps = 0.01; \ 26 | if((float)temp>0.0 && ((float)temp*(1.0+eps) < (float)(exp) || (float)temp*(1.0-eps) > (float)(exp))) { \ 27 | errs++; \ 28 | SPTAGLIB_LOG(SPTAG::Helper::LogLevel::LL_Error, "%s != %s\n",#val,#exp); \ 29 | } 30 | 31 | 32 | template 33 | T* create_dataset(size_t rows, int dim) { 34 | 35 | srand(0); 36 | T* h_data = new T[rows*dim]; 37 | for(size_t i=0; i::value) { 39 | h_data[i] = (rand()/(float)RAND_MAX); 40 | } 41 | else if(std::is_same::value) { 42 | h_data[i] = static_cast((rand()%INT_MAX)); 43 | } 44 | else if(std::is_same::value) { 45 | h_data[i] = static_cast((rand()%127)); 46 | } 47 | else if(std::is_same::value) { 48 | h_data[i] = static_cast((rand()%127)); 49 | } 50 | } 51 | return h_data; 52 | } 53 | /* 54 | __global__ void count_leaf_sizes(LeafNode* leafs, int* node_ids, int N, int internal_nodes); 55 | __global__ void assign_leaf_points_in_batch(LeafNode* leafs, int* leaf_points, int* node_ids, int N, int internal_nodes, int min_id, int max_id); 56 | __global__ void assign_leaf_points_out_batch(LeafNode* leafs, int* leaf_points, int* node_ids, int N, int internal_nodes, int min_id, int max_id); 57 | __global__ void compute_mean(KEYTYPE* split_keys, int* node_sizes, int num_nodes); 58 | __global__ void initialize_rands(curandState* states, int iter); 59 | */ 60 | -------------------------------------------------------------------------------- /Script_AE/Figure9/parameter_study_shifting.p: -------------------------------------------------------------------------------- 1 | # For a single column, set the width at 3.3 inches 2 | # For across two columns, set the width at 7 inches 3 | 4 | set terminal pdfcairo size 3.3, 1.75 font 'Linux Biolinum O,12' 5 | # set terminal pdfcairo size 7, 1.75 font "UbuntuMono-Regular, 11" 6 | 7 | # set default line style 8 | set style line 1 lc rgb '#056bfa' lt 1 lw 1.7 pt 7 ps 1.5 9 | set style line 2 lc rgb '#05a8fa' lt 1 lw 1.7 pt 7 ps 1.5 10 | set style line 3 lc rgb '#fb8500' lt 1 lw 1.7 pt 7 ps 1.5 11 | set style line 4 lc rgb '#ffb703' lt 1 lw 1.7 pt 7 ps 1.5 12 | set style line 5 lc rgb '#b30018' lt 1 lw 1.7 pt 7 ps 1.5 13 | set style line 6 lc rgb '#fa3605' lt 1 lw 1.7 pt 7 ps 1.5 14 | set style line 7 lc rgb '#80d653' lt 1 lw 1.7 pt 7 ps 1.5 15 | # set style line 1 lc rgb '#00d5ff' lt 1 lw 1.5 pt 7 ps 1.5 16 | # set style line 2 lc rgb '#000080' lt 1 lw 1.5 pt 7 ps 1.5 17 | # set style line 3 lc rgb '#ff7f0e' lt 1 lw 1.5 pt 7 ps 1.5 18 | # set style line 4 lc rgb '#008176' lt 1 lw 1.5 pt 7 ps 1.5 19 | # set style line 5 lc rgb '#b3b3b3' lt 1 lw 1.5 pt 7 ps 1.5 20 | # set style line 6 lc rgb '#000000' lt 1 lw 1.5 pt 7 ps 1.5 21 | 22 | # set grid style 23 | set style line 20 lc rgb '#dddddd' lt 1 lw 1 24 | 25 | set datafile separator "," 26 | set encoding utf8 27 | set autoscale 28 | set grid ls 20 29 | set key box ls 20 opaque width -2 30 | set tics scale 0.5 31 | set xtics nomirror out autofreq offset 0,0.5,0 32 | set ytics nomirror out autofreq offset 0.5,0,0 33 | set border lw 2 34 | 35 | # Start the first plot 36 | set output "ParameterStudy1.pdf" 37 | 38 | set xlabel "Latency (ms)" offset 0,1,0 39 | set ylabel 'Recall 10\@10' offset 1.5,0,0 40 | set key bottom right reverse Left 41 | 42 | set size 1, 1 43 | set origin 0, 0 44 | # set title "Search Latency" offset 0, -0.7 45 | unset title 46 | set yrange [0.92:1] 47 | set ytics 0.02 48 | plot "parameter_study_shifting.csv" using 3:4 every ::3 with lines title 'Static' ls 7, \ 49 | "parameter_study_shifting.csv" using 1:2 every ::3 with lines title 'In-place Update' ls 4, \ 50 | "parameter_study_shifting.csv" using 5:6 every ::3 with lines title 'In-place Update + Split Only' ls 3, \ 51 | "parameter_study_shifting.csv" using 7:8 every ::3 with lines title 'In-place Update + Split/Reassign' ls 1 52 | 53 | unset key -------------------------------------------------------------------------------- /Script_AE/iniFile/build_sift1m.ini: -------------------------------------------------------------------------------- 1 | [Base] 2 | ValueType=UInt8 3 | DistCalcMethod=L2 4 | IndexAlgoType=BKT 5 | Dim=128 6 | VectorPath=/home/sosp/data/sift_data/bigann1m_base.u8bin 7 | VectorType=DEFAULT 8 | VectorSize=1000000 9 | VectorDelimiter= 10 | QueryPath=/home/sosp/data/sift_data/query.public.10K.u8bin 11 | QueryType=DEFAULT 12 | QuerySize=10000 13 | QueryDelimiter= 14 | WarmupPath= 15 | WarmupType=DEFAULT 16 | WarmupSize=10000 17 | WarmupDelimiter= 18 | TruthPath=/home/sosp/data/sift_data/1m_trace/bigann-1M 19 | TruthType=DEFAULT 20 | GenerateTruth=false 21 | IndexDirectory=/home/sosp/data/store_sift1m 22 | HeadIndexFolder=head_index 23 | 24 | [SelectHead] 25 | isExecute=true 26 | TreeNumber=1 27 | BKTKmeansK=32 28 | BKTLeafSize=8 29 | SamplesNumber=1000 30 | NumberOfThreads=80 31 | SaveBKT=false 32 | AnalyzeOnly=false 33 | CalcStd=true 34 | SelectDynamically=true 35 | NoOutput=false 36 | SelectThreshold=12 37 | SplitFactor=9 38 | SplitThreshold=18 39 | Ratio=0.15 40 | RecursiveCheckSmallCluster=true 41 | PrintSizeCount=true 42 | 43 | [BuildHead] 44 | isExecute=true 45 | TreeFilePath=tree.bin 46 | GraphFilePath=graph.bin 47 | VectorFilePath=vectors.bin 48 | DeleteVectorFilePath=deletes.bin 49 | EnableBfs=0 50 | BKTNumber=1 51 | BKTKmeansK=32 52 | BKTLeafSize=8 53 | Samples=1000 54 | BKTLambdaFactor=100.000000 55 | TPTNumber=32 56 | TPTLeafSize=2000 57 | NumTopDimensionTpTreeSplit=5 58 | NeighborhoodSize=32 59 | GraphNeighborhoodScale=2.000000 60 | GraphCEFScale=2.000000 61 | RefineIterations=2 62 | EnableRebuild=0 63 | CEF=1000 64 | AddCEF=500 65 | MaxCheckForRefineGraph=8192 66 | RNGFactor=1.000000 67 | GPUGraphType=2 68 | GPURefineSteps=0 69 | GPURefineDepth=30 70 | GPULeafSize=500 71 | HeadNumGPUs=1 72 | TPTBalanceFactor=2 73 | NumberOfThreads=160 74 | DistCalcMethod=L2 75 | DeletePercentageForRefine=0.400000 76 | AddCountForRebuild=1000 77 | MaxCheck=4096 78 | ThresholdOfNumberOfContinuousNoBetterPropagation=3 79 | NumberOfInitialDynamicPivots=50 80 | NumberOfOtherDynamicPivots=4 81 | HashTableExponent=2 82 | DataBlockSize=1048576 83 | DataCapacity=2147483647 84 | MetaRecordSize=10 85 | 86 | [BuildSSDIndex] 87 | isExecute=true 88 | BuildSsdIndex=true 89 | NumberOfThreads=40 90 | InternalResultNum=64 91 | ReplicaCount=8 92 | PostingPageLimit=3 93 | OutputEmptyReplicaID=1 94 | TmpDir=/home/sosp/data/store_sift1m/tmpdir -------------------------------------------------------------------------------- /AnnService/Server.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | 14 | 15 | Header Files 16 | 17 | 18 | Header Files 19 | 20 | 21 | Header Files 22 | 23 | 24 | Header Files 25 | 26 | 27 | Header Files 28 | 29 | 30 | Header Files 31 | 32 | 33 | 34 | 35 | Source Files 36 | 37 | 38 | Source Files 39 | 40 | 41 | Source Files 42 | 43 | 44 | Source Files 45 | 46 | 47 | Source Files 48 | 49 | 50 | Source Files 51 | 52 | 53 | Source Files 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /Script_AE/Figure10/parameter_study_range.p: -------------------------------------------------------------------------------- 1 | # For a single column, set the width at 3.3 inches 2 | # For across two columns, set the width at 7 inches 3 | 4 | set terminal pdfcairo size 3.3, 1.75 font 'Linux Biolinum O,12' 5 | # set terminal pdfcairo size 7, 1.75 font "UbuntuMono-Regular, 11" 6 | 7 | # set default line style 8 | set style line 1 lc rgb '#056bfa' lt 1 lw 1.7 pt 7 ps 1.5 9 | set style line 2 lc rgb '#05a8fa' lt 1 lw 1.7 pt 7 ps 1.5 10 | set style line 3 lc rgb '#fb8500' lt 1 lw 1.7 pt 7 ps 1.5 11 | set style line 4 lc rgb '#ffb703' lt 1 lw 1.7 pt 7 ps 1.5 12 | set style line 5 lc rgb '#b30018' lt 1 lw 1.7 pt 7 ps 1.5 13 | set style line 6 lc rgb '#fa3605' lt 1 lw 1.7 pt 7 ps 1.5 14 | set style line 7 lc rgb '#80d653' lt 1 lw 1.7 pt 7 ps 1.5 15 | # set style line 1 lc rgb '#00d5ff' lt 1 lw 1.5 pt 7 ps 1.5 16 | # set style line 2 lc rgb '#000080' lt 1 lw 1.5 pt 7 ps 1.5 17 | # set style line 3 lc rgb '#ff7f0e' lt 1 lw 1.5 pt 7 ps 1.5 18 | # set style line 4 lc rgb '#008176' lt 1 lw 1.5 pt 7 ps 1.5 19 | # set style line 5 lc rgb '#b3b3b3' lt 1 lw 1.5 pt 7 ps 1.5 20 | # set style line 6 lc rgb '#000000' lt 1 lw 1.5 pt 7 ps 1.5 21 | 22 | # set grid style 23 | set style line 20 lc rgb '#dddddd' lt 1 lw 1 24 | 25 | set datafile separator "," 26 | set encoding utf8 27 | set autoscale 28 | set grid ls 20 29 | # set key box ls 20 opaque fc rgb "#3fffffff" width -2 30 | set tics scale 0.5 31 | set xtics nomirror out autofreq offset 0,0.5,0 32 | set ytics nomirror out autofreq offset 0.5,0,0 33 | set border lw 2 34 | 35 | # Start the second plot 36 | 37 | set output "ParameterStudyRange.pdf" 38 | 39 | set xlabel "Latency (ms)" offset 0,1,0 40 | set ylabel 'Recall 10\@10' offset 1.5,0,0 41 | set key bottom right reverse Left 42 | 43 | set size 1, 1 44 | set origin 0, 0 45 | # set title "Search Latency" offset 0, -0.7 46 | unset title 47 | set yrange [0.93:1] 48 | set ytics ("" 0.93, "0.94" 0.94, "" 0.95, "0.96" 0.96, "" 0.97, "0.98" 0.98, "" 0.99, "1" 1) 49 | plot "parameter_study_range.csv" using 1:2 every ::3 with lines title 'Reassign top0' ls 1, \ 50 | "parameter_study_range.csv" using 3:4 every ::3 with lines title 'Reassign top8' ls 2, \ 51 | "parameter_study_range.csv" using 5:6 every ::3 with lines title 'Reassign top64' ls 3, \ 52 | "parameter_study_range.csv" using 7:8 every ::3 with lines title 'Reassign top128' ls 4 53 | unset key -------------------------------------------------------------------------------- /Tools/nni-auto-tune/runner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | from model import metrics 5 | import time 6 | import os 7 | import psutil 8 | 9 | 10 | def run_individual_query(algo, 11 | X_train, 12 | X_test, 13 | distance, 14 | k, 15 | run_count=1, 16 | max_mem=-1): 17 | 18 | best_search_time = float('inf') 19 | for i in range(run_count): 20 | print('Run %d/%d...' % (i + 1, run_count)) 21 | # a bit dumb but can't be a scalar since of Python's scoping rules 22 | n_items_processed = [0] 23 | 24 | def single_query(v): 25 | start = time.time() 26 | candidates = algo.query(v, k) 27 | if max_mem > 0 and psutil.Process( 28 | os.getpid()).memory_info().rss > max_mem: 29 | raise MemoryError 30 | total = (time.time() - start) 31 | 32 | candidates = [ 33 | (int(idx), 34 | float(metrics[distance]['distance'](v, X_train[idx]))) # noqa 35 | for idx in candidates 36 | ] 37 | n_items_processed[0] += 1 38 | if n_items_processed[0] % 1000 == 0: 39 | print('Processed %d/%d queries...' % 40 | (n_items_processed[0], len(X_test))) 41 | if len(candidates) > k: 42 | print('warning: algorithm returned %d results, but k' 43 | ' is only %d)' % (len(candidates), k)) 44 | return (total, candidates) 45 | 46 | results = [single_query(x) for x in X_test] 47 | 48 | total_time = sum(t for t, _ in results) 49 | total_candidates = sum(len(candidates) for _, candidates in results) 50 | search_time = total_time / len(X_test) 51 | avg_candidates = total_candidates / len(X_test) 52 | best_search_time = min(best_search_time, search_time) 53 | 54 | attrs = { 55 | "best_search_time": best_search_time, 56 | "candidates": avg_candidates, 57 | "name": 'BKT', 58 | "run_count": run_count, 59 | "distance": distance, 60 | "count": int(k) 61 | } 62 | 63 | return (attrs, results) 64 | -------------------------------------------------------------------------------- /AnnService/inc/SSDServing/Utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #pragma once 5 | #include 6 | 7 | namespace SPTAG { 8 | namespace SSDServing { 9 | namespace Utils { 10 | typedef std::chrono::steady_clock SteadClock; 11 | 12 | double getMsInterval(std::chrono::steady_clock::time_point start, std::chrono::steady_clock::time_point end) { 13 | return (std::chrono::duration_cast(end - start).count() * 1.0) / 1000.0; 14 | } 15 | 16 | double getSecInterval(std::chrono::steady_clock::time_point start, std::chrono::steady_clock::time_point end) { 17 | return (std::chrono::duration_cast(end - start).count() * 1.0) / 1000.0; 18 | } 19 | 20 | double getMinInterval(std::chrono::steady_clock::time_point start, std::chrono::steady_clock::time_point end) { 21 | return (std::chrono::duration_cast(end - start).count() * 1.0) / 60.0; 22 | } 23 | 24 | /// Clock class 25 | class StopW { 26 | private: 27 | std::chrono::steady_clock::time_point time_begin; 28 | public: 29 | StopW() { 30 | time_begin = std::chrono::steady_clock::now(); 31 | } 32 | 33 | double getElapsedMs() { 34 | std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); 35 | return getMsInterval(time_begin, time_end); 36 | } 37 | 38 | double getElapsedSec() { 39 | std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); 40 | return getSecInterval(time_begin, time_end); 41 | } 42 | 43 | double getElapsedMin() { 44 | std::chrono::steady_clock::time_point time_end = std::chrono::steady_clock::now(); 45 | return getMinInterval(time_begin, time_end); 46 | } 47 | 48 | void reset() { 49 | time_begin = std::chrono::steady_clock::now(); 50 | } 51 | }; 52 | } 53 | } 54 | } -------------------------------------------------------------------------------- /AnnService/inc/Core/Common/KNearestNeighborhoodGraph.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_COMMON_KNG_H_ 5 | #define _SPTAG_COMMON_KNG_H_ 6 | 7 | #include "NeighborhoodGraph.h" 8 | 9 | namespace SPTAG 10 | { 11 | namespace COMMON 12 | { 13 | class KNearestNeighborhoodGraph : public NeighborhoodGraph 14 | { 15 | public: 16 | KNearestNeighborhoodGraph() { m_pNeighborhoodGraph.SetName("NNG"); } 17 | 18 | void RebuildNeighbors(VectorIndex* index, const SizeType node, SizeType* nodes, const BasicResult* queryResults, const int numResults) { 19 | DimensionType count = 0; 20 | for (int j = 0; j < numResults && count < m_iNeighborhoodSize; j++) { 21 | const BasicResult& item = queryResults[j]; 22 | if (item.VID < 0) break; 23 | if (item.VID == node) continue; 24 | nodes[count++] = item.VID; 25 | } 26 | for (DimensionType j = count; j < m_iNeighborhoodSize; j++) nodes[j] = -1; 27 | } 28 | 29 | void InsertNeighbors(VectorIndex* index, const SizeType node, SizeType insertNode, float insertDist) 30 | { 31 | std::lock_guard lock(m_dataUpdateLock[node]); 32 | 33 | SizeType* nodes = m_pNeighborhoodGraph[node]; 34 | SizeType tmpNode; 35 | float tmpDist; 36 | for (DimensionType k = 0; k < m_iNeighborhoodSize; k++) 37 | { 38 | tmpNode = nodes[k]; 39 | if (tmpNode < -1) break; 40 | 41 | if (tmpNode < 0 || (tmpDist = index->ComputeDistance(index->GetSample(node), index->GetSample(tmpNode))) > insertDist 42 | || (insertDist == tmpDist && insertNode < tmpNode)) 43 | { 44 | nodes[k] = insertNode; 45 | while (tmpNode >= 0 && ++k < m_iNeighborhoodSize && nodes[k] >= -1) 46 | { 47 | std::swap(tmpNode, nodes[k]); 48 | } 49 | break; 50 | } 51 | } 52 | } 53 | }; 54 | } 55 | } 56 | #endif -------------------------------------------------------------------------------- /AnnService/inc/Core/Common/FineGrainedLock.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_COMMON_FINEGRAINEDLOCK_H_ 5 | #define _SPTAG_COMMON_FINEGRAINEDLOCK_H_ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace SPTAG 13 | { 14 | namespace COMMON 15 | { 16 | class FineGrainedLock { 17 | public: 18 | FineGrainedLock() { 19 | m_locks.reset(new std::mutex[PoolSize + 1]); 20 | } 21 | ~FineGrainedLock() {} 22 | 23 | std::mutex& operator[](SizeType idx) { 24 | unsigned index = hash_func((unsigned)idx); 25 | return m_locks[index]; 26 | } 27 | 28 | const std::mutex& operator[](SizeType idx) const { 29 | unsigned index = hash_func((unsigned)idx); 30 | return m_locks[index]; 31 | } 32 | 33 | static inline unsigned hash_func(unsigned idx) 34 | { 35 | return ((unsigned)(idx * 99991) + _rotl(idx, 2) + 101) & PoolSize; 36 | } 37 | 38 | private: 39 | static const int PoolSize = 32767; 40 | std::unique_ptr m_locks; 41 | }; 42 | 43 | class FineGrainedRWLock { 44 | public: 45 | FineGrainedRWLock() { 46 | m_locks.reset(new std::shared_timed_mutex[PoolSize + 1]); 47 | } 48 | ~FineGrainedRWLock() {} 49 | 50 | std::shared_timed_mutex& operator[](SizeType idx) { 51 | unsigned index = hash_func((unsigned)idx); 52 | return m_locks[index]; 53 | } 54 | 55 | const std::shared_timed_mutex& operator[](SizeType idx) const { 56 | unsigned index = hash_func((unsigned)idx); 57 | return m_locks[index]; 58 | } 59 | 60 | static inline unsigned hash_func(unsigned idx) 61 | { 62 | return ((unsigned)(idx * 99991) + _rotl(idx, 2) + 101) & PoolSize; 63 | } 64 | private: 65 | static const int PoolSize = 32767; 66 | std::unique_ptr m_locks; 67 | }; 68 | } 69 | } 70 | 71 | #endif // _SPTAG_COMMON_FINEGRAINEDLOCK_H_ -------------------------------------------------------------------------------- /AnnService/inc/Aggregator/AggregatorService.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_AGGREGATOR_AGGREGATORSERVICE_H_ 5 | #define _SPTAG_AGGREGATOR_AGGREGATORSERVICE_H_ 6 | 7 | #include "AggregatorContext.h" 8 | #include "AggregatorExecutionContext.h" 9 | #include "inc/Socket/Server.h" 10 | #include "inc/Socket/Client.h" 11 | #include "inc/Socket/ResourceManager.h" 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace SPTAG 21 | { 22 | namespace Aggregator 23 | { 24 | 25 | class AggregatorService 26 | { 27 | public: 28 | AggregatorService(); 29 | 30 | ~AggregatorService(); 31 | 32 | bool Initialize(); 33 | 34 | void Run(); 35 | 36 | private: 37 | 38 | void StartClient(); 39 | 40 | void StartListen(); 41 | 42 | void WaitForShutdown(); 43 | 44 | void ConnectToPendingServers(); 45 | 46 | void AddToPendingServers(std::shared_ptr p_remoteServer); 47 | 48 | void SearchRequestHanlder(Socket::ConnectionID p_localConnectionID, Socket::Packet p_packet); 49 | 50 | void SearchResponseHanlder(Socket::ConnectionID p_localConnectionID, Socket::Packet p_packet); 51 | 52 | void AggregateResults(std::shared_ptr p_exectionContext); 53 | 54 | std::shared_ptr GetContext(); 55 | 56 | private: 57 | typedef std::function AggregatorCallback; 58 | 59 | std::shared_ptr m_aggregatorContext; 60 | 61 | std::shared_ptr m_socketServer; 62 | 63 | std::shared_ptr m_socketClient; 64 | 65 | bool m_initalized; 66 | 67 | std::unique_ptr m_threadPool; 68 | 69 | boost::asio::io_context m_ioContext; 70 | 71 | boost::asio::signal_set m_shutdownSignals; 72 | 73 | std::vector> m_pendingConnectServers; 74 | 75 | std::mutex m_pendingConnectServersMutex; 76 | 77 | boost::asio::deadline_timer m_pendingConnectServersTimer; 78 | 79 | Socket::ResourceManager m_aggregatorCallbackManager; 80 | }; 81 | 82 | 83 | 84 | } // namespace Aggregator 85 | } // namespace AnnService 86 | 87 | 88 | #endif // _SPTAG_AGGREGATOR_AGGREGATORSERVICE_H_ 89 | -------------------------------------------------------------------------------- /AnnService/inc/Socket/RemoteSearchQuery.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_SOCKET_REMOTESEARCHQUERY_H_ 5 | #define _SPTAG_SOCKET_REMOTESEARCHQUERY_H_ 6 | 7 | #include "inc/Core/CommonDataStructure.h" 8 | #include "inc/Core/SearchQuery.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace SPTAG 17 | { 18 | namespace Socket 19 | { 20 | 21 | // TODO: use Bond replace below structures. 22 | 23 | struct RemoteQuery 24 | { 25 | static constexpr std::uint16_t MajorVersion() { return 1; } 26 | static constexpr std::uint16_t MirrorVersion() { return 0; } 27 | 28 | enum class QueryType : std::uint8_t 29 | { 30 | String = 0 31 | }; 32 | 33 | RemoteQuery(); 34 | 35 | std::size_t EstimateBufferSize() const; 36 | 37 | std::uint8_t* Write(std::uint8_t* p_buffer) const; 38 | 39 | const std::uint8_t* Read(const std::uint8_t* p_buffer); 40 | 41 | 42 | QueryType m_type; 43 | 44 | std::string m_queryString; 45 | }; 46 | 47 | 48 | struct IndexSearchResult 49 | { 50 | std::string m_indexName; 51 | 52 | QueryResult m_results; 53 | }; 54 | 55 | 56 | struct RemoteSearchResult 57 | { 58 | static constexpr std::uint16_t MajorVersion() { return 1; } 59 | static constexpr std::uint16_t MirrorVersion() { return 0; } 60 | 61 | enum class ResultStatus : std::uint8_t 62 | { 63 | Success = 0, 64 | 65 | Timeout = 1, 66 | 67 | FailedNetwork = 2, 68 | 69 | FailedExecute = 3, 70 | 71 | Dropped = 4 72 | }; 73 | 74 | RemoteSearchResult(); 75 | 76 | RemoteSearchResult(const RemoteSearchResult& p_right); 77 | 78 | RemoteSearchResult(RemoteSearchResult&& p_right); 79 | 80 | RemoteSearchResult& operator=(RemoteSearchResult&& p_right); 81 | 82 | std::size_t EstimateBufferSize() const; 83 | 84 | std::uint8_t* Write(std::uint8_t* p_buffer) const; 85 | 86 | const std::uint8_t* Read(const std::uint8_t* p_buffer); 87 | 88 | 89 | ResultStatus m_status; 90 | 91 | std::vector m_allIndexResults; 92 | }; 93 | 94 | 95 | 96 | } // namespace SPTAG 97 | } // namespace Socket 98 | 99 | #endif // _SPTAG_SOCKET_REMOTESEARCHQUERY_H_ 100 | -------------------------------------------------------------------------------- /Script_AE/iniFile/build_clustering_1m.ini: -------------------------------------------------------------------------------- 1 | [Base] 2 | ValueType=UInt8 3 | DistCalcMethod=L2 4 | IndexAlgoType=BKT 5 | Dim=128 6 | VectorPath=/home/sosp/data/sift_data/bigann1m_update_clustering 7 | VectorType=DEFAULT 8 | VectorSize=1049411 9 | VectorDelimiter= 10 | QueryPath=/home/sosp/data/sift_data/query.public.10K.u8bin 11 | QueryType=DEFAULT 12 | QuerySize=10000 13 | QueryDelimiter= 14 | WarmupPath= 15 | WarmupType=DEFAULT 16 | WarmupSize=10000 17 | WarmupDelimiter= 18 | TruthPath=/home/sosp/data/sift_data/bigann1m_update_clustering_origin_truth 19 | TruthType=DEFAULT 20 | GenerateTruth=false 21 | HeadVectorIDs=head_vectors_ID_UInt8_L2_base_DEFUALT.bin 22 | HeadVectors=head_vectors_UInt8_L2_base_DEFUALT.bin 23 | IndexDirectory=/home/sosp/data/store_sift_cluster 24 | HeadIndexFolder=head_index 25 | 26 | [SelectHead] 27 | isExecute=true 28 | TreeNumber=1 29 | BKTKmeansK=32 30 | BKTLeafSize=8 31 | SamplesNumber=1000 32 | NumberOfThreads=80 33 | SaveBKT=false 34 | AnalyzeOnly=false 35 | CalcStd=true 36 | SelectDynamically=true 37 | NoOutput=false 38 | SelectThreshold=12 39 | SplitFactor=9 40 | SplitThreshold=18 41 | Ratio=0.15 42 | RecursiveCheckSmallCluster=true 43 | PrintSizeCount=true 44 | 45 | [BuildHead] 46 | isExecute=true 47 | TreeFilePath=tree.bin 48 | GraphFilePath=graph.bin 49 | VectorFilePath=vectors.bin 50 | DeleteVectorFilePath=deletes.bin 51 | EnableBfs=0 52 | BKTNumber=1 53 | BKTKmeansK=32 54 | BKTLeafSize=8 55 | Samples=1000 56 | BKTLambdaFactor=100.000000 57 | TPTNumber=32 58 | TPTLeafSize=2000 59 | NumTopDimensionTpTreeSplit=5 60 | NeighborhoodSize=32 61 | GraphNeighborhoodScale=2.000000 62 | GraphCEFScale=2.000000 63 | RefineIterations=2 64 | EnableRebuild=1 65 | CEF=1000 66 | AddCEF=500 67 | MaxCheckForRefineGraph=8192 68 | RNGFactor=1.000000 69 | GPUGraphType=2 70 | GPURefineSteps=0 71 | GPURefineDepth=30 72 | GPULeafSize=500 73 | HeadNumGPUs=1 74 | TPTBalanceFactor=2 75 | NumberOfThreads=80 76 | DistCalcMethod=L2 77 | DeletePercentageForRefine=0.400000 78 | AddCountForRebuild=1000 79 | MaxCheck=4096 80 | ThresholdOfNumberOfContinuousNoBetterPropagation=3 81 | NumberOfInitialDynamicPivots=50 82 | NumberOfOtherDynamicPivots=4 83 | HashTableExponent=2 84 | DataBlockSize=1048576 85 | DataCapacity=2147483647 86 | MetaRecordSize=10 87 | 88 | [BuildSSDIndex] 89 | isExecute=true 90 | BuildSsdIndex=true 91 | NumberOfThreads=10 92 | InternalResultNum=64 93 | ReplicaCount=8 94 | PostingPageLimit=3 95 | OutputEmptyReplicaID=1 96 | TmpDir=/home/sosp/data/store_sift_cluster/tmpdir -------------------------------------------------------------------------------- /AnnService/inc/Core/SearchResult.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_SEARCHRESULT_H_ 5 | #define _SPTAG_SEARCHRESULT_H_ 6 | 7 | #include "CommonDataStructure.h" 8 | 9 | namespace SPTAG 10 | { 11 | struct NodeDistPair 12 | { 13 | SizeType node; 14 | float distance; 15 | 16 | NodeDistPair(SizeType _node = -1, float _distance = MaxDist) : node(_node), distance(_distance) {} 17 | 18 | inline bool operator < (const NodeDistPair& rhs) const 19 | { 20 | return distance < rhs.distance; 21 | } 22 | 23 | inline bool operator > (const NodeDistPair& rhs) const 24 | { 25 | return distance > rhs.distance; 26 | } 27 | }; 28 | 29 | struct Edge 30 | { 31 | SizeType node; 32 | float distance; 33 | SizeType tonode; 34 | Edge() : node(MaxSize), distance(MaxDist), tonode(MaxSize) {} 35 | }; 36 | 37 | struct EdgeCompare 38 | { 39 | inline bool operator()(const Edge& a, int b) const 40 | { 41 | return a.node < b; 42 | }; 43 | 44 | inline bool operator()(int a, const Edge& b) const 45 | { 46 | return a < b.node; 47 | }; 48 | 49 | inline bool operator()(const Edge& a, const Edge& b) const 50 | { 51 | if (a.node == b.node) 52 | { 53 | if (a.distance == b.distance) 54 | { 55 | return a.tonode < b.tonode; 56 | } 57 | 58 | return a.distance < b.distance; 59 | } 60 | 61 | return a.node < b.node; 62 | }; 63 | }; 64 | 65 | struct BasicResult 66 | { 67 | SizeType VID; 68 | float Dist; 69 | ByteArray Meta; 70 | bool RelaxedMono; 71 | 72 | BasicResult() : VID(-1), Dist(MaxDist), RelaxedMono(false) {} 73 | 74 | BasicResult(SizeType p_vid, float p_dist) : VID(p_vid), Dist(p_dist), RelaxedMono(false) {} 75 | 76 | BasicResult(SizeType p_vid, float p_dist, ByteArray p_meta) : VID(p_vid), Dist(p_dist), Meta(p_meta), RelaxedMono(false) {} 77 | BasicResult(SizeType p_vid, float p_dist, ByteArray p_meta, bool p_relaxedMono) : VID(p_vid), Dist(p_dist), Meta(p_meta), RelaxedMono(p_relaxedMono) {} 78 | }; 79 | 80 | } // namespace SPTAG 81 | 82 | #endif // _SPTAG_SEARCHRESULT_H_ 83 | -------------------------------------------------------------------------------- /AnnService/inc/Core/Common/IQuantizer.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_COMMON_QUANTIZER_H_ 5 | #define _SPTAG_COMMON_QUANTIZER_H_ 6 | 7 | #include "inc/Core/Common.h" 8 | #include "inc/Helper/DiskIO.h" 9 | #include 10 | #include "inc/Core/CommonDataStructure.h" 11 | #include "DistanceUtils.h" 12 | 13 | namespace SPTAG 14 | { 15 | namespace COMMON 16 | { 17 | class IQuantizer 18 | { 19 | public: 20 | virtual float L2Distance(const std::uint8_t* pX, const std::uint8_t* pY) const = 0; 21 | 22 | virtual float CosineDistance(const std::uint8_t* pX, const std::uint8_t* pY) const = 0; 23 | 24 | template 25 | std::function DistanceCalcSelector(SPTAG::DistCalcMethod p_method) const; 26 | 27 | virtual void QuantizeVector(const void* vec, std::uint8_t* vecout, bool ADC = true) const = 0; 28 | 29 | virtual SizeType QuantizeSize() const = 0; 30 | 31 | virtual void ReconstructVector(const std::uint8_t* qvec, void* vecout) const = 0; 32 | 33 | virtual SizeType ReconstructSize() const = 0; 34 | 35 | virtual DimensionType ReconstructDim() const = 0; 36 | 37 | virtual std::uint64_t BufferSize() const = 0; 38 | 39 | virtual ErrorCode SaveQuantizer(std::shared_ptr p_out) const = 0; 40 | 41 | virtual ErrorCode LoadQuantizer(std::shared_ptr p_in) = 0; 42 | 43 | virtual ErrorCode LoadQuantizer(uint8_t* raw_bytes) = 0; 44 | 45 | static std::shared_ptr LoadIQuantizer(std::shared_ptr p_in); 46 | 47 | static std::shared_ptr LoadIQuantizer(SPTAG::ByteArray bytes); 48 | 49 | virtual bool GetEnableADC() const = 0; 50 | 51 | virtual void SetEnableADC(bool enableADC) = 0; 52 | 53 | virtual QuantizerType GetQuantizerType() const = 0; 54 | 55 | virtual VectorValueType GetReconstructType() const = 0; 56 | 57 | virtual DimensionType GetNumSubvectors() const = 0; 58 | 59 | virtual int GetBase() const = 0; 60 | 61 | virtual float* GetL2DistanceTables() = 0; 62 | 63 | template 64 | T* GetCodebooks(); 65 | }; 66 | } 67 | } 68 | 69 | #endif // _SPTAG_COMMON_QUANTIZER_H_ 70 | -------------------------------------------------------------------------------- /AnnService/src/Client/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "inc/Client/ClientWrapper.h" 5 | #include "inc/Client/Options.h" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace SPTAG; 12 | 13 | std::unique_ptr g_client; 14 | 15 | int main(int argc, char **argv) 16 | { 17 | SPTAG::Client::ClientOptions options; 18 | if (!options.Parse(argc - 1, argv + 1)) 19 | { 20 | return 1; 21 | } 22 | 23 | g_client.reset(new SPTAG::Client::ClientWrapper(options)); 24 | if (!g_client->IsAvailable()) 25 | { 26 | return 1; 27 | } 28 | 29 | g_client->WaitAllFinished(); 30 | SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "connection done\n"); 31 | 32 | std::string line; 33 | std::cout << "Query: " << std::flush; 34 | while (std::getline(std::cin, line)) 35 | { 36 | if (line.empty()) 37 | { 38 | break; 39 | } 40 | 41 | SPTAG::Socket::RemoteQuery query; 42 | query.m_type = SPTAG::Socket::RemoteQuery::QueryType::String; 43 | query.m_queryString = std::move(line); 44 | 45 | SPTAG::Socket::RemoteSearchResult result; 46 | auto callback = [&result](SPTAG::Socket::RemoteSearchResult p_result) { result = std::move(p_result); }; 47 | 48 | g_client->SendQueryAsync(query, callback, options); 49 | g_client->WaitAllFinished(); 50 | 51 | std::cout << "Status: " << static_cast(result.m_status) << std::endl; 52 | 53 | for (const auto &indexRes : result.m_allIndexResults) 54 | { 55 | std::cout << "Index: " << indexRes.m_indexName << std::endl; 56 | 57 | int idx = 0; 58 | for (const auto &res : indexRes.m_results) 59 | { 60 | std::cout << "------------------" << std::endl; 61 | std::cout << "DocIndex: " << res.VID << " Distance: " << res.Dist; 62 | if (indexRes.m_results.WithMeta()) 63 | { 64 | const auto &metadata = indexRes.m_results.GetMetadata(idx); 65 | std::cout << " MetaData: " << std::string((char *)metadata.Data(), metadata.Length()); 66 | } 67 | std::cout << std::endl; 68 | ++idx; 69 | } 70 | } 71 | 72 | std::cout << "Query: " << std::flush; 73 | } 74 | 75 | return 0; 76 | } 77 | -------------------------------------------------------------------------------- /Wrappers/WinRT/AnnIndex.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "AnnIndex.g.h" 4 | #include "SearchResult.g.h" 5 | #include "inc/Core/VectorIndex.h" 6 | #include "inc/Core/SearchQuery.h" 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | template 13 | struct ReadOnlyProperty { 14 | ReadOnlyProperty() = default; 15 | ReadOnlyProperty(const T& value) : m_value(value) {} 16 | T operator()() const noexcept { return m_value; } 17 | T m_value{}; 18 | }; 19 | 20 | template 21 | struct Property : ReadOnlyProperty { 22 | void operator()(const T& value) { m_value = value; } 23 | }; 24 | 25 | namespace sptag = ::SPTAG; 26 | namespace winrt::SPTAG::implementation 27 | { 28 | using EmbeddingVector = winrt::array_view; 29 | 30 | struct SearchResult : SearchResultT { 31 | winrt::com_array Metadata() { return winrt::com_array(m_metadata); } 32 | std::vector m_metadata; 33 | ReadOnlyProperty Distance; 34 | 35 | SearchResult() = default; 36 | SearchResult(winrt::array_view metadata, float d) : m_metadata(metadata.begin(), metadata.end()), Distance(d) {} 37 | SearchResult(uint8_t* metadata, size_t length, float d) : m_metadata(metadata, metadata + length), Distance(d) {} 38 | }; 39 | 40 | 41 | struct AnnIndex : AnnIndexT 42 | { 43 | sptag::DimensionType m_dimension{ }; 44 | sptag::VectorValueType m_inputValueType{ sptag::VectorValueType::Float }; 45 | 46 | 47 | AnnIndex() { 48 | sptag::SetLogger(std::make_shared(sptag::Helper::LogLevel::LL_Empty)); 49 | m_index = sptag::VectorIndex::CreateInstance(sptag::IndexAlgoType::BKT, sptag::GetEnumValueType()); 50 | } 51 | 52 | void AddWithMetadata(array_view data, array_view metadata); 53 | 54 | void Save(winrt::Windows::Storage::StorageFile file); 55 | void Load(winrt::Windows::Storage::StorageFile file); 56 | 57 | SPTAG::SearchResult GetResultFromMetadata(const sptag::BasicResult& r) const; 58 | 59 | winrt::Windows::Foundation::Collections::IVector Search(EmbeddingVector p_data, uint32_t p_resultNum) const; 60 | 61 | std::shared_ptr m_index; 62 | template 63 | void _AddWithMetadataImpl(EmbeddingVector p_data, T metadata); 64 | }; 65 | 66 | } 67 | 68 | namespace winrt::SPTAG::factory_implementation 69 | { 70 | struct AnnIndex : AnnIndexT {}; 71 | } 72 | -------------------------------------------------------------------------------- /Wrappers/inc/ManagedObject.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #pragma once 5 | #include "inc/Helper/StringConvert.h" 6 | 7 | using namespace System; 8 | using namespace System::Runtime::InteropServices; 9 | 10 | namespace Microsoft 11 | { 12 | namespace ANN 13 | { 14 | namespace SPTAGManaged 15 | { 16 | /// 17 | /// hold a pointer to an umnanaged object from the core project 18 | /// 19 | template 20 | public ref class ManagedObject 21 | { 22 | protected: 23 | T* m_Instance; 24 | 25 | public: 26 | ManagedObject(T* instance) 27 | :m_Instance(instance) 28 | { 29 | } 30 | 31 | ManagedObject(T& instance) 32 | { 33 | m_Instance = new T(instance); 34 | } 35 | 36 | /// 37 | /// destructor, which is called whenever delete an object with delete keyword 38 | /// 39 | virtual ~ManagedObject() 40 | { 41 | if (m_Instance != nullptr) 42 | { 43 | delete m_Instance; 44 | } 45 | } 46 | 47 | /// 48 | /// finalizer which is called by Garbage Collector whenever it destroys the wrapper object. 49 | /// 50 | !ManagedObject() 51 | { 52 | if (m_Instance != nullptr) 53 | { 54 | delete m_Instance; 55 | } 56 | } 57 | 58 | T* GetInstance() 59 | { 60 | return m_Instance; 61 | } 62 | 63 | static const char* string_to_char_array(String^ string) 64 | { 65 | const char* str = (const char*)(Marshal::StringToHGlobalAnsi(string)).ToPointer(); 66 | return str; 67 | } 68 | 69 | template 70 | static T string_to(String^ string) 71 | { 72 | T data; 73 | SPTAG::Helper::Convert::ConvertStringTo(string_to_char_array(string), data); 74 | return data; 75 | } 76 | }; 77 | } 78 | } 79 | } 80 | 81 | -------------------------------------------------------------------------------- /datasets/SPACEV1B/README.md: -------------------------------------------------------------------------------- 1 | # SPACEV1B: A billion-Scale vector dataset for text descriptors 2 | This is a dataset released by Microsoft from SpaceV, Bing web vector search scenario, for large scale vector search related research usage. It consists of more than one billion document vectors 3 | and 29K+ query vectors encoded by Microsoft SpaceV Superior model. This model is trained to capture generic intent representation for both documents and queries. 4 | The goal is to match the query vector to the closest document vectors in order to achieve topk relevant documents for each query. 5 | 6 | ## Introduction 7 | 8 | This dataset contains: 9 | 10 | * [vectors.bin](vectors.bin): It contains 1,402,020,720 100-dimensional int8-type document descriptors. 11 | * [query.bin](query.bin): It contains 29,316 100-dimensional int8-type query descriptors. 12 | * [truth.bin](truth.bin): It contains 100 nearest ground truth(include vector ids and distances) of 29,316 queries according to L2 distance. 13 | * [query_log.bin](query_log.bin): It contains 94,162 100-dimensional int8-type history query descriptors. 14 | 15 | ## How to read the vectors, queries, and truth 16 | 17 | ```python 18 | import struct 19 | import numpy as np 20 | import os 21 | 22 | part_count = len(os.listdir('vectors.bin')) 23 | for i in range(1, part_count + 1): 24 | fvec = open(os.path.join('vectors.bin', 'vectors_%d.bin' % i), 'rb') 25 | if i == 1: 26 | vec_count = struct.unpack('i', fvec.read(4))[0] 27 | vec_dimension = struct.unpack('i', fvec.read(4))[0] 28 | vecbuf = bytearray(vec_count * vec_dimension) 29 | vecbuf_offset = 0 30 | while True: 31 | part = fvec.read(1048576) 32 | if len(part) == 0: break 33 | vecbuf[vecbuf_offset: vecbuf_offset + len(part)] = part 34 | vecbuf_offset += len(part) 35 | fvec.close() 36 | X = np.frombuffer(vecbuf, dtype=np.int8).reshape((vec_count, vec_dimension)) 37 | 38 | fq = open('query.bin', 'rb') 39 | q_count = struct.unpack('i', fq.read(4))[0] 40 | q_dimension = struct.unpack('i', fq.read(4))[0] 41 | queries = np.frombuffer(fq.read(q_count * q_dimension), dtype=np.int8).reshape((q_count, q_dimension)) 42 | 43 | ftruth = open('truth.bin', 'rb') 44 | t_count = struct.unpack('i', ftruth.read(4))[0] 45 | topk = struct.unpack('i', ftruth.read(4))[0] 46 | truth_vids = np.frombuffer(ftruth.read(t_count * topk * 4), dtype=np.int32).reshape((t_count, topk)) 47 | truth_distances = np.frombuffer(ftruth.read(t_count * topk * 4), dtype=np.float32).reshape((t_count, topk)) 48 | ``` 49 | 50 | ## License 51 | 52 | The entire dataset is under [O-UDA license](LICENSE) -------------------------------------------------------------------------------- /AnnService/inc/Core/MultiIndexScan.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #ifndef _SPTAG_MULTI_INDEX_SCAN_H 5 | #define _SPTAG_MULTI_INDEX_SCAN_H 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "ResultIterator.h" 17 | #include "VectorIndex.h" 18 | #include 19 | namespace SPTAG 20 | { 21 | class MultiIndexScan 22 | { 23 | public: 24 | MultiIndexScan(); 25 | MultiIndexScan(std::vector> vecIndices, 26 | std::vector p_targets, 27 | unsigned int k, 28 | float (*rankFunction)(std::vector), 29 | bool useTimer, 30 | int termCondVal, 31 | int searchLimit 32 | ); 33 | ~MultiIndexScan(); 34 | void Init(std::vector> vecIndices, 35 | std::vector p_targets, 36 | std::vector weight, 37 | unsigned int k, 38 | bool useTimer, 39 | int termCondVal, 40 | int searchLimit); 41 | bool Next(BasicResult& result); 42 | void Close(); 43 | 44 | private: 45 | std::vector> indexIters; 46 | std::vector> fwdLUTs; 47 | std::unordered_set seenSet; 48 | std::vector p_data_array; 49 | std::vector weight; 50 | 51 | unsigned int k; 52 | 53 | 54 | 55 | bool useTimer; 56 | unsigned int termCondVal; 57 | int searchLimit; 58 | std::chrono::time_point t_start; 59 | 60 | float (*func)(std::vector); 61 | 62 | unsigned int consecutive_drops; 63 | 64 | bool terminate; 65 | using pq_item = std::pair; 66 | class pq_item_compare 67 | { 68 | public: 69 | bool operator()(const pq_item& lhs, const pq_item& rhs) 70 | { 71 | return lhs.first < rhs.first; 72 | } 73 | }; 74 | std::priority_queue, pq_item_compare> pq; 75 | std::stack outputStk; 76 | float WeightedRankFunc(std::vector); 77 | 78 | }; 79 | } // namespace SPTAG 80 | #endif 81 | -------------------------------------------------------------------------------- /Script_AE/iniFile/build_clustering_2m.ini: -------------------------------------------------------------------------------- 1 | [Index] 2 | IndexAlgoType=SPANN 3 | ValueType=UInt8 4 | 5 | [Base] 6 | ValueType=UInt8 7 | DistCalcMethod=L2 8 | IndexAlgoType=BKT 9 | Dim=128 10 | VectorPath=/home/sosp/data/sift_data/bigann2m_update_clustering 11 | VectorType=DEFAULT 12 | VectorSize=2000000 13 | VectorDelimiter= 14 | QueryPath=/home/sosp/data/sift_data/query.public.10K.u8bin 15 | QueryType=DEFAULT 16 | QuerySize=10000 17 | QueryDelimiter= 18 | WarmupPath= 19 | WarmupType=DEFAULT 20 | WarmupSize=10000 21 | WarmupDelimiter= 22 | TruthPath=/home/sosp/data/sift_data/bigann2m_update_clustering_origin_truth0 23 | TruthType=DEFAULT 24 | GenerateTruth=false 25 | HeadVectorIDs=head_vectors_ID_UInt8_L2_base_DEFUALT.bin 26 | HeadVectors=head_vectors_UInt8_L2_base_DEFUALT.bin 27 | IndexDirectory=/home/sosp/data/store_sift_cluster_2m 28 | HeadIndexFolder=head_index 29 | 30 | [SelectHead] 31 | isExecute=true 32 | TreeNumber=1 33 | BKTKmeansK=32 34 | BKTLeafSize=8 35 | SamplesNumber=1000 36 | NumberOfThreads=80 37 | SaveBKT=false 38 | AnalyzeOnly=false 39 | CalcStd=true 40 | SelectDynamically=true 41 | NoOutput=false 42 | SelectThreshold=12 43 | SplitFactor=9 44 | SplitThreshold=18 45 | Ratio=0.15 46 | RecursiveCheckSmallCluster=true 47 | PrintSizeCount=true 48 | 49 | [BuildHead] 50 | isExecute=true 51 | TreeFilePath=tree.bin 52 | GraphFilePath=graph.bin 53 | VectorFilePath=vectors.bin 54 | DeleteVectorFilePath=deletes.bin 55 | EnableBfs=0 56 | BKTNumber=1 57 | BKTKmeansK=32 58 | BKTLeafSize=8 59 | Samples=1000 60 | BKTLambdaFactor=100.000000 61 | TPTNumber=32 62 | TPTLeafSize=2000 63 | NumTopDimensionTpTreeSplit=5 64 | NeighborhoodSize=32 65 | GraphNeighborhoodScale=2.000000 66 | GraphCEFScale=2.000000 67 | RefineIterations=2 68 | EnableRebuild=1 69 | CEF=1000 70 | AddCEF=500 71 | MaxCheckForRefineGraph=8192 72 | RNGFactor=1.000000 73 | GPUGraphType=2 74 | GPURefineSteps=0 75 | GPURefineDepth=30 76 | GPULeafSize=500 77 | HeadNumGPUs=1 78 | TPTBalanceFactor=2 79 | NumberOfThreads=80 80 | DistCalcMethod=L2 81 | DeletePercentageForRefine=0.400000 82 | AddCountForRebuild=1000 83 | MaxCheck=4096 84 | ThresholdOfNumberOfContinuousNoBetterPropagation=3 85 | NumberOfInitialDynamicPivots=50 86 | NumberOfOtherDynamicPivots=4 87 | HashTableExponent=2 88 | DataBlockSize=1048576 89 | DataCapacity=2147483647 90 | MetaRecordSize=10 91 | 92 | [BuildSSDIndex] 93 | isExecute=true 94 | BuildSsdIndex=true 95 | NumberOfThreads=10 96 | InternalResultNum=64 97 | ReplicaCount=8 98 | PostingPageLimit=3 99 | OutputEmptyReplicaID=1 100 | TmpDir=/home/sosp/data/store_sift_cluster/tmpdir 101 | --------------------------------------------------------------------------------