├── VERSION ├── python ├── ngt │ ├── __init__.py │ └── README.md ├── MANIFEST.in ├── pyproject.toml ├── README-jp.md ├── README.md ├── sample │ └── sample.py ├── setup.py ├── README-ngtpy-jp.md └── README-ngtpy.md ├── assets ├── logo.png └── logo.svg ├── .gitignore ├── lib ├── CMakeLists.txt └── NGT │ ├── MmapManagerException.h │ ├── SharedMemoryAllocator.cpp │ ├── Version.cpp │ ├── Version.h │ ├── CMakeLists.txt │ ├── MmapManagerDefs.h │ ├── ArrayFile.cpp │ ├── MmapManager.h │ ├── defines.h.in │ ├── Thread.cpp │ ├── HashBasedBooleanSet.h │ ├── ObjectSpace.cpp │ ├── NGTQ │ ├── QuantizedGraph.cpp │ ├── QbgCli.h │ └── Capi.h │ ├── Command.h │ ├── ArrayFile.h │ ├── SharedMemoryAllocator.h │ ├── Thread.h │ └── Node.cpp ├── tests ├── ann-benchmarks-results │ ├── glove-100-angular.png │ ├── glove-25-angular.png │ ├── glove-50-angular.png │ ├── gist-960-euclidean.png │ ├── nytimes-256-angular.png │ ├── sift-128-euclidean.png │ └── fashion-mnist-784-euclidean.png └── datasets │ └── ann-benchmarks │ ├── sift-128-euclidean.tsv │ ├── sift-128-euclidean_gt.tsv │ ├── sift-128-euclidean_gtdist.tsv │ └── sift-128-euclidean_query.tsv ├── data ├── lorentz-query.tsv ├── poincare-query.tsv ├── poincare_input_generator.py ├── lorentz_input_generator.py ├── sift-query-3.tsv └── poincare-input-20.tsv ├── README.jp ├── samples ├── qg-capi │ ├── CMakeLists.txt │ └── qg-capi.cpp ├── l2-uint8 │ ├── CMakeLists.txt │ └── l2-uint8.cpp ├── qbg-capi │ ├── CMakeLists.txt │ └── qbg-capi.cpp ├── qg-l2-float │ ├── CMakeLists.txt │ └── qg-l2-float.cpp ├── cosine-float │ ├── CMakeLists.txt │ └── cosine-float.cpp ├── hamming-uint8 │ ├── CMakeLists.txt │ └── hamming-uint8.cpp ├── jaccard-sparse │ ├── CMakeLists.txt │ └── jaccard-sparse.cpp ├── l2-uint8-range-search │ ├── CMakeLists.txt │ └── l2-uint8-range-search.cpp └── CMakeLists.txt ├── bin ├── CMakeLists.txt ├── qbg │ ├── CMakeLists.txt │ └── qbg.cpp └── ngt │ ├── CMakeLists.txt │ └── ngt.cpp ├── utils └── mk_version_defs_h.sh ├── CMakeLists.txt ├── README-jp.md ├── README.md └── LICENSE /VERSION: -------------------------------------------------------------------------------- 1 | 2.3.13 2 | -------------------------------------------------------------------------------- /python/ngt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include VERSION 2 | include LICENSE 3 | -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irokaru/NGT/main/assets/logo.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | python/VERSION 3 | python/dist/ 4 | python/ngt.egg-info/ 5 | -------------------------------------------------------------------------------- /lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | add_subdirectory(${PROJECT_SOURCE_DIR}/lib/NGT) 3 | endif() 4 | -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "pybind11"] 3 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /tests/ann-benchmarks-results/glove-100-angular.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irokaru/NGT/main/tests/ann-benchmarks-results/glove-100-angular.png -------------------------------------------------------------------------------- /tests/ann-benchmarks-results/glove-25-angular.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irokaru/NGT/main/tests/ann-benchmarks-results/glove-25-angular.png -------------------------------------------------------------------------------- /tests/ann-benchmarks-results/glove-50-angular.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irokaru/NGT/main/tests/ann-benchmarks-results/glove-50-angular.png -------------------------------------------------------------------------------- /tests/ann-benchmarks-results/gist-960-euclidean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irokaru/NGT/main/tests/ann-benchmarks-results/gist-960-euclidean.png -------------------------------------------------------------------------------- /tests/ann-benchmarks-results/nytimes-256-angular.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irokaru/NGT/main/tests/ann-benchmarks-results/nytimes-256-angular.png -------------------------------------------------------------------------------- /tests/ann-benchmarks-results/sift-128-euclidean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irokaru/NGT/main/tests/ann-benchmarks-results/sift-128-euclidean.png -------------------------------------------------------------------------------- /data/lorentz-query.tsv: -------------------------------------------------------------------------------- 1 | 4.2284383085 2.1138341877 0.1087792225 -0.7132611282 -1.8140892833 -0.2704006454 2.0958643952 2.0094021157 0.2327657764 0.2055734200 -------------------------------------------------------------------------------- /data/poincare-query.tsv: -------------------------------------------------------------------------------- 1 | 0.1707004946 -0.0515373831 0.0113428903 -0.0843610949 -0.0245908908 0.0036906201 0.0009854802 -0.0181909946 -0.0781970177 0.1162063167 -------------------------------------------------------------------------------- /tests/ann-benchmarks-results/fashion-mnist-784-euclidean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irokaru/NGT/main/tests/ann-benchmarks-results/fashion-mnist-784-euclidean.png -------------------------------------------------------------------------------- /README.jp: -------------------------------------------------------------------------------- 1 | 2 | NGT - Neighborhood Graph and Tree for Indexing 3 | 4 | READMEは以下の場所に移動致しました。 5 | 6 | https://github.com/yahoojapan/NGT/blob/master/README-jp.md 7 | 8 | --- 9 | 10 | -------------------------------------------------------------------------------- /tests/datasets/ann-benchmarks/sift-128-euclidean.tsv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:16d310aa376a141ed9b573386d6766b29ba3faeef5f6892e931c431f35923373 3 | size 586929838 4 | -------------------------------------------------------------------------------- /tests/datasets/ann-benchmarks/sift-128-euclidean_gt.tsv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0b0247bfe4d7f28794e55b5ce8c68708657ff76d24fd1bc7d3d540947e051964 3 | size 6887118 4 | -------------------------------------------------------------------------------- /tests/datasets/ann-benchmarks/sift-128-euclidean_gtdist.tsv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e505b950e8b5e93e3ff47bfd3f8d34586d5e7659968fd8f913ad5449f2ea2917 3 | size 9763708 4 | -------------------------------------------------------------------------------- /tests/datasets/ann-benchmarks/sift-128-euclidean_query.tsv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8737c7d49404a9bcd4272b1acca91848d97869b62b1e5e35940b473b8701d7c5 3 | size 5873068 4 | -------------------------------------------------------------------------------- /samples/qg-capi/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | include_directories("${PROJECT_BINARY_DIR}/lib") 3 | include_directories("${PROJECT_SOURCE_DIR}/lib") 4 | link_directories("${PROJECT_SOURCE_DIR}/lib/NGT") 5 | 6 | add_executable(qg-capi qg-capi.cpp) 7 | add_dependencies(qg-capi ngt) 8 | target_link_libraries(qg-capi ngt pthread) 9 | endif() 10 | -------------------------------------------------------------------------------- /bin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(${UNIX}) 2 | include_directories("${PROJECT_SOURCE_DIR}/lib" "${PROJECT_BINARY_DIR}/lib/") 3 | link_directories("${PROJECT_BINARY_DIR}/lib/NGT") 4 | add_subdirectory("${PROJECT_SOURCE_DIR}/bin/ngt") 5 | if(NOT DEFINED NGT_QBG_DISABLED OR (NOT ${NGT_QBG_DISABLED})) 6 | add_subdirectory("${PROJECT_SOURCE_DIR}/bin/qbg") 7 | endif() 8 | endif() 9 | -------------------------------------------------------------------------------- /samples/l2-uint8/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | include_directories("${PROJECT_BINARY_DIR}/lib") 3 | include_directories("${PROJECT_SOURCE_DIR}/lib") 4 | link_directories("${PROJECT_SOURCE_DIR}/lib/NGT") 5 | 6 | add_executable(l2-uint8 l2-uint8.cpp) 7 | add_dependencies(l2-uint8 ngt) 8 | target_link_libraries(l2-uint8 ngt pthread) 9 | endif() 10 | -------------------------------------------------------------------------------- /samples/qbg-capi/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | include_directories("${PROJECT_BINARY_DIR}/lib") 3 | include_directories("${PROJECT_SOURCE_DIR}/lib") 4 | link_directories("${PROJECT_SOURCE_DIR}/lib/NGT") 5 | 6 | add_executable(qbg-capi qbg-capi.cpp) 7 | add_dependencies(qbg-capi ngt) 8 | target_link_libraries(qbg-capi ngt pthread) 9 | endif() 10 | -------------------------------------------------------------------------------- /data/poincare_input_generator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | n = 20 5 | d = 10 6 | 7 | for i in range(n): 8 | data = np.random.randn(d) 9 | data = data / np.linalg.norm(data) * np.random.rand() # norm must be within 1 10 | for j in range(d): 11 | print('%.10f' % data[j], end="") 12 | if j < d-1: 13 | print("\t", end="") 14 | if i < n-1: 15 | print("") -------------------------------------------------------------------------------- /samples/qg-l2-float/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | include_directories("${PROJECT_BINARY_DIR}/lib") 3 | include_directories("${PROJECT_SOURCE_DIR}/lib") 4 | link_directories("${PROJECT_SOURCE_DIR}/lib/NGT") 5 | 6 | add_executable(qg-l2-float qg-l2-float.cpp) 7 | add_dependencies(qg-l2-float ngt) 8 | target_link_libraries(qg-l2-float ngt pthread) 9 | endif() 10 | -------------------------------------------------------------------------------- /samples/cosine-float/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | include_directories("${PROJECT_BINARY_DIR}/lib") 3 | include_directories("${PROJECT_SOURCE_DIR}/lib") 4 | link_directories("${PROJECT_SOURCE_DIR}/lib/NGT") 5 | 6 | add_executable(cosine-float cosine-float.cpp) 7 | add_dependencies(cosine-float ngt) 8 | target_link_libraries(cosine-float ngt pthread) 9 | endif() 10 | -------------------------------------------------------------------------------- /data/lorentz_input_generator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | n = 5000 5 | d = 10 6 | 7 | for i in range(n): 8 | data = np.random.randn(d-1) 9 | h = np.linalg.norm(data) 10 | h = np.sqrt(1 + h*h) 11 | print('%.10f\t' % h, end="") 12 | for j in range(d-1): 13 | print('%.10f' % data[j], end="") 14 | if j < d-2: 15 | print("\t", end="") 16 | if i < n-1: 17 | print("") -------------------------------------------------------------------------------- /samples/hamming-uint8/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | include_directories("${PROJECT_BINARY_DIR}/lib") 3 | include_directories("${PROJECT_SOURCE_DIR}/lib") 4 | link_directories("${PROJECT_SOURCE_DIR}/lib/NGT") 5 | 6 | add_executable(hamming-uint8 hamming-uint8.cpp) 7 | add_dependencies(hamming-uint8 ngt) 8 | target_link_libraries(hamming-uint8 ngt pthread) 9 | endif() 10 | -------------------------------------------------------------------------------- /samples/jaccard-sparse/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | include_directories("${PROJECT_BINARY_DIR}/lib") 3 | include_directories("${PROJECT_SOURCE_DIR}/lib") 4 | link_directories("${PROJECT_SOURCE_DIR}/lib/NGT") 5 | 6 | add_executable(jaccard-sparse jaccard-sparse.cpp) 7 | add_dependencies(jaccard-sparse ngt) 8 | target_link_libraries(jaccard-sparse ngt pthread) 9 | endif() 10 | -------------------------------------------------------------------------------- /samples/l2-uint8-range-search/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | include_directories("${PROJECT_BINARY_DIR}/lib") 3 | include_directories("${PROJECT_SOURCE_DIR}/lib") 4 | link_directories("${PROJECT_SOURCE_DIR}/lib/NGT") 5 | 6 | add_executable(l2-uint8-range-search l2-uint8-range-search.cpp) 7 | add_dependencies(l2-uint8-range-search ngt) 8 | target_link_libraries(l2-uint8-range-search ngt pthread) 9 | endif() 10 | -------------------------------------------------------------------------------- /bin/qbg/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | include_directories("${PROJECT_BINARY_DIR}/lib") 3 | include_directories("${PROJECT_SOURCE_DIR}/lib") 4 | link_directories("${PROJECT_SOURCE_DIR}/lib/NGT") 5 | 6 | add_executable(qbg_exe qbg.cpp) 7 | add_dependencies(qbg_exe ngt) 8 | set_target_properties(qbg_exe PROPERTIES OUTPUT_NAME qbg) ## 名前をqbgに 9 | target_link_libraries(qbg_exe ngt pthread) 10 | 11 | install(TARGETS qbg_exe RUNTIME DESTINATION bin) 12 | 13 | endif() 14 | -------------------------------------------------------------------------------- /bin/ngt/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | include_directories("${PROJECT_BINARY_DIR}/lib") 3 | include_directories("${PROJECT_SOURCE_DIR}/lib") 4 | link_directories("${PROJECT_SOURCE_DIR}/lib/NGT") 5 | 6 | add_executable(ngt_exe ngt.cpp) 7 | add_dependencies(ngt_exe ngt) 8 | set_target_properties(ngt_exe PROPERTIES OUTPUT_NAME ngt) 9 | if(CMAKE_VERSION VERSION_LESS 3.1) 10 | target_link_libraries(ngt_exe ngt pthread) 11 | else() 12 | target_link_libraries(ngt_exe ngt Threads::Threads) 13 | endif() 14 | install(TARGETS ngt_exe RUNTIME DESTINATION bin) 15 | endif() 16 | -------------------------------------------------------------------------------- /utils/mk_version_defs_h.sh: -------------------------------------------------------------------------------- 1 | rm -f $2 2 | echo "//" >> $2 3 | echo "// Do *NOT* edit this file." >> $2 4 | echo "//" >> $2 5 | if which date > /dev/null 2>&1; then 6 | echo "#define NGT_BUILD_DATE \"`date +'%Y/%m/%d %H:%M:%S'`\"" >> $2 7 | fi 8 | if which git > /dev/null 2>&1; then 9 | echo "#define NGT_GIT_HASH \"`git log -1 --format='%H'`\"" >> $2 10 | echo "#define NGT_GIT_DATE \"`git log -1 --format='%cd'`\"" >> $2 11 | echo "#define NGT_GIT_TAG \"`git describe --abbrev=0`\"" >> $2 12 | fi 13 | if which cat > /dev/null 2>&1; then 14 | echo "#define NGT_VERSION \"`cat $1/VERSION`\"" >> $2 15 | fi 16 | 17 | touch -r $1/VERSION $2 18 | -------------------------------------------------------------------------------- /samples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | include_directories("${PROJECT_SOURCE_DIR}/lib" "${PROJECT_BINARY_DIR}/lib/") 3 | link_directories("${PROJECT_BINARY_DIR}/lib/NGT") 4 | add_subdirectory("${PROJECT_SOURCE_DIR}/samples/hamming-uint8") 5 | add_subdirectory("${PROJECT_SOURCE_DIR}/samples/l2-uint8") 6 | add_subdirectory("${PROJECT_SOURCE_DIR}/samples/l2-uint8-range-search") 7 | add_subdirectory("${PROJECT_SOURCE_DIR}/samples/cosine-float") 8 | add_subdirectory("${PROJECT_SOURCE_DIR}/samples/jaccard-sparse") 9 | if(NOT DEFINED NGT_QBG_DISABLED) 10 | add_subdirectory("${PROJECT_SOURCE_DIR}/samples/qg-l2-float") 11 | add_subdirectory("${PROJECT_SOURCE_DIR}/samples/qg-capi") 12 | add_subdirectory("${PROJECT_SOURCE_DIR}/samples/qbg-capi") 13 | endif() 14 | endif() 15 | -------------------------------------------------------------------------------- /lib/NGT/MmapManagerException.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | namespace MemoryManager { 24 | class MmapManagerException : public std::domain_error { 25 | public: 26 | MmapManagerException(const std::string &msg) : std::domain_error(msg) {} 27 | }; 28 | } // namespace MemoryManager 29 | -------------------------------------------------------------------------------- /bin/qbg/qbg.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2020 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #include "NGT/NGTQ/QbgCli.h" 18 | 19 | int main(int argc, char **argv) { 20 | NGT::Args args(argc, argv); 21 | 22 | QBG::CLI ngt; 23 | 24 | try { 25 | ngt.execute(args); 26 | } catch (NGT::Exception &err) { 27 | cerr << "qbg: Error: " << err.what() << endl; 28 | return 1; 29 | } catch (...) { 30 | cerr << "qbg: Error: " << endl; 31 | return 1; 32 | } 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /data/sift-query-3.tsv: -------------------------------------------------------------------------------- 1 | 17 21 18 17 31 33 25 26 19 42 31 25 26 49 30 19 23 29 29 22 19 28 27 28 19 13 12 25 21 25 21 35 12 44 36 19 49 104 33 29 77 43 36 28 44 90 46 52 37 65 42 33 40 104 103 44 26 50 43 18 20 48 68 28 16 104 27 6 36 98 32 7 53 81 40 36 61 104 44 27 42 84 55 54 49 53 28 27 103 42 27 28 24 53 60 66 7 42 14 6 32 69 15 3 4 79 27 7 30 82 26 3 15 27 18 6 19 52 21 16 104 72 30 40 22 36 19 22 2 | 1 2 2 1 0 0 37 19 3 3 2 1 3 3 101 47 27 11 3 8 24 22 104 67 29 11 4 32 31 13 104 77 42 37 49 24 6 13 98 99 34 31 20 9 19 14 102 57 98 45 16 41 102 55 64 54 100 43 7 56 93 43 20 42 67 49 57 24 31 32 33 88 47 60 42 25 40 38 34 27 85 48 26 35 69 39 36 24 77 26 13 14 77 39 13 9 37 58 76 34 23 14 53 61 38 46 43 23 18 10 23 21 44 36 13 10 52 9 10 9 35 20 16 14 49 10 4 5 3 | 65 38 2 1 45 45 4 7 104 48 2 2 95 42 4 10 96 18 1 9 95 10 2 18 51 9 4 6 36 36 9 13 73 36 3 5 62 64 6 10 116 59 3 7 116 61 7 20 116 28 1 39 116 16 1 52 27 0 0 5 15 2 2 17 65 42 7 12 58 37 8 8 116 45 9 32 116 46 4 22 116 14 3 34 74 11 4 49 2 0 0 11 9 1 1 3 52 53 6 9 57 36 6 5 116 30 3 28 116 17 2 24 101 3 1 17 50 2 1 29 1 0 0 24 18 4 5 6 4 | -------------------------------------------------------------------------------- /python/README-jp.md: -------------------------------------------------------------------------------- 1 | 2 | # python NGT 3 | 4 | 5 | ## インストール 6 | 次のコマンドでpybind11 (ngtpy) の python binding がインストールできます。 7 | ``` 8 | pip3 install ngt 9 | ``` 10 | また、ソースコードからもpython bindingsをインストールできます。ただし、python bindingsをインストールする前にNGTライブラリを[README-jp](../README-jp.md#build)にしたがって**必ず**インストールしてください。 11 | ``` 12 | cd NGT_ROOT/python 13 | pip3 install . 14 | ``` 15 | なお、古いCPUでも動作するように、PyPIのngtpyは個々のコンピュータ上でビルドしたngtpyよりも検索が遅い点、ご注意ください。 16 | 17 | ## ドキュメント 18 | 19 | [ngtpy (pybind11) リファレンス](README-ngtpy-jp.md) 20 | 21 | ## サンプルコード 22 | 23 | ### ngtpy (pybind11) 24 | 25 | ngtpy(pybind11)はngt(ctypes)より処理時間を削減できます。特に短時間の検索において効果があります。 26 | 27 | ```python 28 | import ngtpy 29 | import random 30 | 31 | dim = 10 32 | objects = [] 33 | for i in range(0, 100) : 34 | vector = random.sample(range(100), dim) 35 | objects.append(vector) 36 | 37 | query = objects[0] 38 | 39 | ngtpy.create(b"tmp", dim) 40 | index = ngtpy.Index(b"tmp") 41 | index.batch_insert(objects) 42 | index.save() 43 | 44 | result = index.search(query, 3) 45 | 46 | for i, o in enumerate(result) : 47 | print(str(i) + ": " + str(o[0]) + ", " + str(o[1])) 48 | object = index.get_object(o[0]) 49 | print(object) 50 | ``` 51 | 52 | ご参考: [sample.py](sample/sample.py). 53 | 54 | -------------------------------------------------------------------------------- /lib/NGT/SharedMemoryAllocator.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #include "NGT/SharedMemoryAllocator.h" 18 | 19 | 20 | 21 | void *operator new(size_t size, SharedMemoryAllocator &allocator) { 22 | void *addr = allocator.allocate(size); 23 | #ifdef MEMORY_ALLOCATOR_INFO 24 | std::cerr << "new:" << size << " " << addr << " " << allocator.getTotalSize() << std::endl; 25 | #endif 26 | return addr; 27 | } 28 | 29 | void *operator new[](size_t size, SharedMemoryAllocator &allocator) { 30 | 31 | void *addr = allocator.allocate(size); 32 | #ifdef MEMORY_ALLOCATOR_INFO 33 | std::cerr << "new[]:" << size << " " << addr << " " << allocator.getTotalSize() << std::endl; 34 | #endif 35 | return addr; 36 | } 37 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | 2 | # python NGT 3 | 4 | [日本語](README-jp.md) 5 | 6 | ## Install 7 | Python binding with pybind11 (ngtpy) is installed as follows. 8 | ``` 9 | pip3 install ngt 10 | ``` 11 | You can install the python bindings from source code. You **MUST** install the NGT library according to the [README](../README.md#build) before installing the python bindings as follows. 12 | ``` 13 | cd NGT_ROOT/python 14 | pip3 install . 15 | ``` 16 | Please note that the search speed of the ngtpy packages from PyPI is slower than that of the ngtpy that is built on your computer so that the package can be run on older CPUs. 17 | 18 | ## Documents 19 | 20 | [ngtpy (pybind11) reference](README-ngtpy.md) 21 | 22 | ## Simple samples 23 | 24 | ### ngtpy (pybind11) 25 | 26 | ngtpy(pybind11) can reduce the processing times than ngt(ctypes). It is more effective especially for the short search time. 27 | 28 | ```python 29 | import ngtpy 30 | import random 31 | 32 | 33 | dim = 10 34 | nb = 100 35 | vectors = [[random.random() for _ in range(dim)] for _ in range(nb)] 36 | query = vectors[0] 37 | 38 | ngtpy.create(b"tmp", dim) 39 | index = ngtpy.Index(b"tmp") 40 | index.batch_insert(vectors) 41 | index.save() 42 | 43 | results = index.search(query, 3) 44 | for i, (id, distance) in enumerate(results) : 45 | print(str(i) + ": " + str(id) + ", " + str(distance)) 46 | object = index.get_object(id) 47 | print(object) 48 | 49 | ``` 50 | 51 | See also [sample.py](sample/sample.py). 52 | 53 | -------------------------------------------------------------------------------- /lib/NGT/Version.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #include "NGT/Version.h" 18 | 19 | void NGT::Version::get(std::ostream &os) { 20 | os << " Version:" << NGT::Version::getVersion() << std::endl; 21 | os << " Built date:" << NGT::Version::getBuildDate() << std::endl; 22 | os << " The last git tag:" << Version::getGitTag() << std::endl; 23 | os << " The last git commit hash:" << Version::getGitHash() << std::endl; 24 | os << " The last git commit date:" << Version::getGitDate() << std::endl; 25 | } 26 | 27 | const std::string NGT::Version::getVersion() { return NGT_VERSION; } 28 | 29 | const std::string NGT::Version::getBuildDate() { return NGT_BUILD_DATE; } 30 | 31 | const std::string NGT::Version::getGitHash() { return NGT_GIT_HASH; } 32 | 33 | const std::string NGT::Version::getGitDate() { return NGT_GIT_DATE; } 34 | 35 | const std::string NGT::Version::getGitTag() { return NGT_GIT_TAG; } 36 | -------------------------------------------------------------------------------- /lib/NGT/Version.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include "NGT/version_defs.h" 22 | 23 | #ifndef NGT_VERSION 24 | #define NGT_VERSION "-" 25 | #endif 26 | #ifndef NGT_BUILD_DATE 27 | #define NGT_BUILD_DATE "-" 28 | #endif 29 | #ifndef NGT_GIT_HASH 30 | #define NGT_GIT_HASH "-" 31 | #endif 32 | #ifndef NGT_GIT_DATE 33 | #define NGT_GIT_DATE "-" 34 | #endif 35 | #ifndef NGT_GIT_TAG 36 | #define NGT_GIT_TAG "-" 37 | #endif 38 | 39 | namespace NGT { 40 | class Version { 41 | public: 42 | static void get(std::ostream &os); 43 | static const std::string getVersion(); 44 | static const std::string getBuildDate(); 45 | static const std::string getGitHash(); 46 | static const std::string getGitDate(); 47 | static const std::string getGitTag(); 48 | static const std::string get(); 49 | }; 50 | 51 | }; // namespace NGT 52 | 53 | #ifdef NGT_VERSION_FOR_HEADER 54 | #include "Version.cpp" 55 | #endif 56 | -------------------------------------------------------------------------------- /lib/NGT/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if( ${UNIX} ) 2 | option(NGT_SHARED_MEMORY_ALLOCATOR "enable shared memory" OFF) 3 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/defines.h.in ${CMAKE_CURRENT_BINARY_DIR}/defines.h) 4 | include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/lib" "${PROJECT_BINARY_DIR}/lib/") 5 | 6 | if(${NGT_QBG_DISABLED}) 7 | file(GLOB NGT_SOURCES *.cpp) 8 | else() 9 | file(GLOB NGT_SOURCES *.cpp NGTQ/*.cpp) 10 | endif() 11 | file(GLOB HEADER_FILES *.h *.hpp) 12 | file(GLOB NGTQ_HEADER_FILES NGTQ/*.h NGTQ/*.hpp) 13 | 14 | add_library(ngtstatic STATIC ${NGT_SOURCES}) 15 | set_target_properties(ngtstatic PROPERTIES OUTPUT_NAME ngt) 16 | set_target_properties(ngtstatic PROPERTIES COMPILE_FLAGS "-fPIC") 17 | target_link_libraries(ngtstatic) 18 | if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") 19 | target_link_libraries(ngtstatic OpenMP::OpenMP_CXX) 20 | endif() 21 | 22 | add_library(ngt SHARED ${NGT_SOURCES}) 23 | set_target_properties(ngt PROPERTIES VERSION ${ngt_VERSION}) 24 | set_target_properties(ngt PROPERTIES SOVERSION ${ngt_SOVERSION}) 25 | add_dependencies(ngt ngtstatic) 26 | if(NOT DEFINED NGT_QBG_DISABLED OR (NOT ${NGT_QBG_DISABLED})) 27 | target_link_libraries(ngt lapack blas) 28 | endif() 29 | if(${APPLE}) 30 | if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") 31 | target_link_libraries(ngt OpenMP::OpenMP_CXX) 32 | else() 33 | target_link_libraries(ngt gomp) 34 | endif() 35 | else(${APPLE}) 36 | target_link_libraries(ngt gomp rt) 37 | endif(${APPLE}) 38 | 39 | add_custom_command(OUTPUT command DEPENDS ${NGT_SOURCES} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND sh ${PROJECT_SOURCE_DIR}/utils/mk_version_defs_h.sh ${PROJECT_SOURCE_DIR} version_defs.h) 40 | add_custom_target(versiondef DEPENDS command) 41 | add_dependencies(ngtstatic versiondef) 42 | install(TARGETS 43 | ngt 44 | ngtstatic 45 | RUNTIME DESTINATION bin 46 | LIBRARY DESTINATION lib 47 | ARCHIVE DESTINATION lib) 48 | install(FILES ${HEADER_FILES} ${CMAKE_CURRENT_BINARY_DIR}/defines.h ${CMAKE_CURRENT_BINARY_DIR}/version_defs.h DESTINATION include/NGT) 49 | install(FILES ${NGTQ_HEADER_FILES} DESTINATION include/NGT/NGTQ) 50 | endif() 51 | -------------------------------------------------------------------------------- /python/sample/sample.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import ngtpy 3 | 4 | # create an index framwork in filesystem. 5 | ngtpy.create(path=b'index', dimension=128, distance_type="L2") 6 | 7 | # load objects. 8 | objects = [] 9 | with open(b'../../data/sift-dataset-5k.tsv', 'r') as fp: 10 | for object in csv.reader(fp, delimiter='\t'): 11 | objects.append(object[0:128]) 12 | 13 | # open index. 14 | index = ngtpy.Index(b'index') 15 | 16 | # insert the objects. 17 | index.batch_insert(objects) 18 | 19 | # save the index. 20 | index.save() 21 | 22 | # close the index. 23 | index.close() 24 | 25 | # open the index. 26 | index = ngtpy.Index(b'index') 27 | 28 | # load query data. 29 | with open(b'../../data/sift-query-3.tsv', 'r') as fp: 30 | query = list(csv.reader(fp, delimiter='\t')) 31 | 32 | # search for the index with the first query. 33 | results = index.search(query[0], size=5) 34 | 35 | print('ID\tDistance') 36 | for result in results: 37 | print('{}\t{}'.format(*result)) 38 | print('# of distance computations=' + str(index.get_num_of_distance_computations())) 39 | 40 | # get an object in the index. 41 | object = index.get_object(4078) 42 | 43 | # search with the object in the index. 44 | results = index.search(object, size=5) 45 | 46 | print('\nID\tDistance') 47 | for result in results: 48 | print('{}\t{}'.format(*result)) 49 | print('# of distance computations=' + str(index.get_num_of_distance_computations())) 50 | 51 | # insert the same objects individually. not build the index for them. 52 | with open(b'../../data/sift-dataset-5k.tsv', 'r') as fp: 53 | for object in csv.reader(fp, delimiter='\t'): 54 | objectID = index.insert(object[0:128]) 55 | if objectID % 1000 == 0: 56 | print('Processed {} objects.'.format(objectID)) 57 | 58 | # build the index for the inserted objects to search. 59 | index.build_index() 60 | 61 | # search with the first query to confirm the insertion. 62 | results = index.search(query[0], size=6) 63 | 64 | # get the search results. 65 | print('\nID\tDistance') 66 | for result in results: 67 | print('{}\t{}'.format(*result)) 68 | print('# of distance computations=' + str(index.get_num_of_distance_computations())) 69 | 70 | # remove an object. 71 | index.remove(3030) 72 | 73 | # search with the first query to confirm the removal. 74 | results = index.search(query[0], size=6) 75 | print('\nID\tDistance') 76 | for result in results: 77 | print('{}\t{}'.format(*result)) 78 | print('# of distance computations=' + str(index.get_num_of_distance_computations())) 79 | 80 | index.save() 81 | index.close() 82 | -------------------------------------------------------------------------------- /lib/NGT/MmapManagerDefs.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #pragma once 18 | 19 | #include "MmapManager.h" 20 | 21 | #include 22 | 23 | namespace MemoryManager { 24 | const uint64_t MMAP_MANAGER_VERSION = 5; 25 | 26 | const bool MMAP_DEFAULT_ALLOW_EXPAND = false; 27 | const uint64_t MMAP_CNTL_FILE_RANGE = 16; 28 | const size_t MMAP_CNTL_FILE_SIZE = MMAP_CNTL_FILE_RANGE * sysconf(_SC_PAGESIZE); 29 | const uint64_t MMAP_MAX_FILE_NAME_LENGTH = 1024; 30 | const std::string MMAP_CNTL_FILE_SUFFIX = "c"; 31 | 32 | const size_t MMAP_LOWER_SIZE = 1; 33 | const size_t MMAP_MEMORY_ALIGN = 8; 34 | const size_t MMAP_MEMORY_ALIGN_EXP = 3; 35 | 36 | #ifndef MMANAGER_TEST_MODE 37 | const uint64_t MMAP_MAX_UNIT_NUM = 1024; 38 | #else 39 | const uint64_t MMAP_MAX_UNIT_NUM = 8; 40 | #endif 41 | 42 | const uint64_t MMAP_FREE_QUEUE_SIZE = 1024; 43 | 44 | const uint64_t MMAP_FREE_LIST_NUM = 64; 45 | 46 | typedef struct _boot_st { 47 | uint32_t version; 48 | uint64_t reserve; 49 | size_t size; 50 | } boot_st; 51 | 52 | typedef struct _head_st { 53 | off_t break_p; 54 | uint64_t chunk_num; 55 | uint64_t reserve; 56 | } head_st; 57 | 58 | typedef struct _free_list_st { 59 | off_t free_p; 60 | off_t free_last_p; 61 | } free_list_st; 62 | 63 | typedef struct _free_st { 64 | free_list_st large_list; 65 | free_list_st free_lists[MMAP_FREE_LIST_NUM]; 66 | } free_st; 67 | 68 | typedef struct _free_queue_st { 69 | off_t data; 70 | size_t capacity; 71 | uint64_t tail; 72 | } free_queue_st; 73 | 74 | typedef struct _control_st { 75 | bool use_expand; 76 | uint16_t unit_num; 77 | uint16_t active_unit; 78 | uint64_t reserve; 79 | size_t base_size; 80 | off_t entry_p; 81 | option_reuse_t reuse_type; 82 | free_st free_data; 83 | free_queue_st free_queue; 84 | head_st data_headers[MMAP_MAX_UNIT_NUM]; 85 | } control_st; 86 | 87 | typedef struct _chunk_head_st { 88 | bool delete_flg; 89 | uint16_t unit_id; 90 | off_t free_next; 91 | size_t size; 92 | } chunk_head_st; 93 | } // namespace MemoryManager 94 | -------------------------------------------------------------------------------- /data/poincare-input-20.tsv: -------------------------------------------------------------------------------- 1 | 0.0539693764 0.0303956118 -0.0074227113 -0.0497335689 -0.0091653618 0.0091111956 -0.0393322269 0.0127646100 0.0030987307 -0.0049561696 2 | -0.6750574905 -0.2949394379 -0.1235979314 -0.1028807002 0.1503880859 -0.2583549310 0.1160592930 0.2628373440 0.3862675675 0.0224174436 3 | -0.0360671359 -0.0073441587 -0.2107548171 -0.2461925668 -0.1312595171 -0.1775549223 -0.2856538448 -0.0522342692 -0.1868687535 0.2869020291 4 | 0.3544083190 -0.1456367023 -0.2698970586 -0.1690473044 0.2203370384 0.4871266798 -0.1195049554 0.2266318301 -0.2888911204 0.3176602006 5 | 0.0550967214 0.0098574821 -0.0034901847 0.0579247169 0.0086909763 -0.0086247956 0.0318097279 -0.0386925694 0.0144537781 -0.0104913130 6 | -0.2491678183 -0.0535075273 -0.1843838826 -0.2668354264 -0.1877963653 0.1777929842 0.6013758073 0.4088135539 -0.1874556730 -0.1062331322 7 | -0.0289513964 0.0382303098 -0.1046881085 0.1309216420 -0.1671726185 0.3960495060 -0.0785376837 0.2680107142 -0.1984678832 0.2166010613 8 | 0.0941946301 -0.2120171654 -0.2406071617 0.4089505903 0.1902111849 0.2704775502 -0.3887895406 -0.1190278597 0.1232601443 0.2210340412 9 | 0.1576399731 0.0239737002 -0.1341368741 0.1858752864 -0.2567138149 0.0485849136 -0.0575836257 -0.0135160436 -0.0116894253 -0.1602031679 10 | 0.0384355185 -0.2463410437 -0.1118858788 -0.1807598026 0.0536810125 -0.1794429572 0.0439627386 -0.0945290238 0.2232658508 -0.0159557001 11 | 0.1866130010 0.0000944770 0.0742050547 0.0230911711 -0.1000465695 -0.0887608103 -0.0771175265 0.0142893792 -0.0447475843 -0.0740124400 12 | -0.0013748639 -0.0061211299 -0.0018128171 -0.0010133893 0.0008707639 -0.0035298443 -0.0028321847 0.0045917970 -0.0025764327 0.0002247128 13 | -0.4102633599 -0.0406506479 -0.2715863283 0.1658512569 -0.2367607359 -0.1535345741 -0.1064446543 -0.0599810978 0.1394121064 0.3970657142 14 | -0.1776582881 0.0772218212 0.0051801714 -0.1041740413 0.0077542395 0.0832179452 0.0318777475 -0.0238848073 -0.0081999377 -0.0793602879 15 | 0.0029746490 -0.0315098618 0.0247696707 0.0295409728 -0.0229715138 -0.0600329791 -0.0027417619 -0.0013954853 -0.0683863952 -0.0109046039 16 | -0.1825406561 -0.2480434733 0.2780168065 -0.0184406361 0.0189940169 0.1529574592 0.1580279176 0.5964094693 0.0931508009 -0.2074990905 17 | -0.1835463471 0.2868305950 -0.0123933100 -0.1115274745 0.1223087460 -0.0894914393 0.2548559506 -0.2158974447 -0.3464066569 -0.1108353433 18 | 0.0850437934 0.0319097754 -0.0129088445 -0.1181493024 0.3841676941 0.1034544110 -0.0095688560 0.0603214717 -0.0156859487 -0.1329866479 19 | -0.3046320335 0.0510537378 0.2711974353 0.2183829419 -0.1050398017 -0.0907717146 -0.0091389744 0.1906129697 0.0515529098 -0.3253357657 20 | 0.0357817962 0.0636046226 -0.2935027675 0.2700788871 0.0700831486 -0.0183280392 0.0494729314 -0.1308073317 0.0062290551 -0.1253784571 -------------------------------------------------------------------------------- /lib/NGT/ArrayFile.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #include "ArrayFile.h" 18 | #include 19 | #include 20 | 21 | class ItemID { 22 | public: 23 | void serialize(std::ostream &os, NGT::ObjectSpace *ospace = 0) { os.write((char *)&value, sizeof(value)); } 24 | void deserialize(std::istream &is, NGT::ObjectSpace *ospace = 0) { is.read((char *)&value, sizeof(value)); } 25 | static size_t getSerializedDataSize() { return sizeof(uint64_t); } 26 | uint64_t value; 27 | }; 28 | 29 | void sampleForUsage() { 30 | { 31 | ArrayFile itemIDFile; 32 | itemIDFile.create("test.data", ItemID::getSerializedDataSize()); 33 | itemIDFile.open("test.data"); 34 | ItemID itemID; 35 | size_t id; 36 | 37 | id = 1; 38 | itemID.value = 4910002490100; 39 | itemIDFile.put(id, itemID); 40 | itemID.value = 0; 41 | itemIDFile.get(id, itemID); 42 | std::cerr << "value=" << itemID.value << std::endl; 43 | assert(itemID.value == 4910002490100); 44 | 45 | id = 2; 46 | itemID.value = 4910002490101; 47 | itemIDFile.put(id, itemID); 48 | itemID.value = 0; 49 | itemIDFile.get(id, itemID); 50 | std::cerr << "value=" << itemID.value << std::endl; 51 | assert(itemID.value == 4910002490101); 52 | 53 | itemID.value = 4910002490102; 54 | id = itemIDFile.insert(itemID); 55 | itemID.value = 0; 56 | itemIDFile.get(id, itemID); 57 | std::cerr << "value=" << itemID.value << std::endl; 58 | assert(itemID.value == 4910002490102); 59 | 60 | itemIDFile.close(); 61 | } 62 | { 63 | ArrayFile itemIDFile; 64 | itemIDFile.create("test.data", ItemID::getSerializedDataSize()); 65 | itemIDFile.open("test.data"); 66 | ItemID itemID; 67 | size_t id; 68 | 69 | id = 10; 70 | itemIDFile.get(id, itemID); 71 | std::cerr << "value=" << itemID.value << std::endl; 72 | assert(itemID.value == 4910002490100); 73 | 74 | id = 20; 75 | itemIDFile.get(id, itemID); 76 | std::cerr << "value=" << itemID.value << std::endl; 77 | assert(itemID.value == 4910002490101); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /lib/NGT/MmapManager.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #define ABS_ADDR(x, y) (void *)(x + (char *)y); 25 | 26 | #define USE_MMAP_MANAGER 27 | 28 | namespace MemoryManager { 29 | 30 | typedef enum _option_reuse_t { 31 | REUSE_DATA_CLASSIFY, 32 | REUSE_DATA_QUEUE, 33 | REUSE_DATA_QUEUE_PLUS, 34 | } option_reuse_t; 35 | 36 | typedef enum _reuse_state_t { 37 | REUSE_STATE_OK, 38 | REUSE_STATE_FALSE, 39 | REUSE_STATE_ALLOC, 40 | } reuse_state_t; 41 | 42 | typedef enum _check_statistics_t { 43 | CHECK_STATS_USE_SIZE, 44 | CHECK_STATS_USE_NUM, 45 | CHECK_STATS_FREE_SIZE, 46 | CHECK_STATS_FREE_NUM, 47 | } check_statistics_t; 48 | 49 | typedef struct _init_option_st { 50 | bool use_expand; 51 | option_reuse_t reuse_type; 52 | } init_option_st; 53 | 54 | class MmapManager { 55 | public: 56 | MmapManager(); 57 | ~MmapManager(); 58 | 59 | bool init(const std::string &filePath, size_t size, const init_option_st *optionst = NULL) const; 60 | bool openMemory(const std::string &filePath); 61 | void closeMemory(const bool force = false); 62 | off_t alloc(const size_t size, const bool not_reuse_flag = false); 63 | void free(const off_t p); 64 | off_t reuse(const size_t size, reuse_state_t &reuse_state); 65 | void *getAbsAddr(off_t p) const; 66 | off_t getRelAddr(const void *p) const; 67 | 68 | size_t getTotalSize() const; 69 | size_t getUseSize() const; 70 | uint64_t getUseNum() const; 71 | size_t getFreeSize() const; 72 | uint64_t getFreeNum() const; 73 | uint16_t getUnitNum() const; 74 | size_t getQueueCapacity() const; 75 | uint64_t getQueueNum() const; 76 | uint64_t getLargeListNum() const; 77 | 78 | void dumpHeap() const; 79 | 80 | bool isOpen() const; 81 | void *getEntryHook() const; 82 | void setEntryHook(const void *entry_p); 83 | 84 | // static method --- 85 | static void setDefaultOptionValue(init_option_st &optionst); 86 | static size_t getAlignSize(size_t size); 87 | 88 | private: 89 | class Impl; 90 | std::unique_ptr _impl; 91 | }; 92 | 93 | std::string getErrorStr(int32_t err_num); 94 | } // namespace MemoryManager 95 | -------------------------------------------------------------------------------- /lib/NGT/defines.h.in: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #pragma once 18 | 19 | // Begin of cmake defines 20 | #cmakedefine NGT_SHARED_MEMORY_ALLOCATOR // use shared memory for indexes 21 | #cmakedefine NGT_GRAPH_CHECK_VECTOR // use vector to check whether accessed 22 | #cmakedefine NGT_AVX_DISABLED // not use avx 23 | #cmakedefine NGT_AVX2 // use avx2 24 | #cmakedefine NGT_LARGE_DATASET // more than 10M objects 25 | #cmakedefine NGT_DISTANCE_COMPUTATION_COUNT // count # of distance computations 26 | #cmakedefine NGT_QBG_DISABLED 27 | #cmakedefine NGTQG_ZERO_GLOBAL 28 | #cmakedefine NGTQG_NO_ROTATION 29 | #cmakedefine NGT_BFLOAT_DISABLED // not use bfloat 30 | #cmakedefine NGT_BFLOAT_ENABLED 31 | #cmakedefine NGT_GRAPH_COMPACT_READ_ONLY_GRAPH 32 | #cmakedefine NGT_ENABLE_TIME_SEED_FOR_RANDOM 33 | // End of cmake defines 34 | 35 | ////////////////////////////////////////////////////////////////////////// 36 | // Release Definitions for OSS 37 | 38 | //#define NGT_DISTANCE_COMPUTATION_COUNT 39 | //#define NGT_SEARCH_TIMER 40 | 41 | #define NGT_CREATION_EDGE_SIZE 10 42 | #define NGT_EXPLORATION_COEFFICIENT 1.1 43 | #define NGT_INSERTION_EXPLORATION_COEFFICIENT 1.1 44 | #define NGT_SHARED_MEMORY_MAX_SIZE 1024 // MB 45 | #define NGT_FORCED_REMOVE // When errors occur due to the index inconsistency, ignore them. 46 | 47 | #define NGT_COMPACT_VECTOR 48 | #define NGT_GRAPH_READ_ONLY_GRAPH 49 | #define NGT_HALF_FLOAT 50 | 51 | #ifdef NGT_BFLOAT_ENABLED 52 | #define NGT_BFLOAT 53 | #endif 54 | #ifdef NGT_BFLOAT_DISABLED 55 | #undef NGT_BFLOAT 56 | #endif 57 | 58 | #define RESULT_DEFINED_RANGE 59 | 60 | #ifdef NGT_LARGE_DATASET 61 | #define NGT_GRAPH_CHECK_HASH_BASED_BOOLEAN_SET 62 | #else 63 | #define NGT_GRAPH_CHECK_VECTOR 64 | #endif 65 | 66 | #ifdef NGT_SHARED_MEMORY_ALLOCATOR 67 | #define NGT_QBG_DISABLED 68 | #endif 69 | 70 | #define NGT_REFINEMENT 71 | 72 | 73 | #if defined(NGT_AVX_DISABLED) 74 | #define NGT_NO_AVX 75 | #elif defined(NGT_AVX2) 76 | #undef NGT_AVX512 77 | #else 78 | #if defined(__AVX512F__) && defined(__AVX512DQ__) 79 | #if defined(__AVX512VNNI__) 80 | #endif 81 | #define NGT_AVX512 82 | #elif defined(__AVX2__) 83 | #define NGT_AVX2 84 | #else 85 | #define NGT_NO_AVX 86 | #endif 87 | #endif 88 | 89 | -------------------------------------------------------------------------------- /lib/NGT/Thread.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #include 18 | 19 | #include "Thread.h" 20 | 21 | using namespace std; 22 | using namespace NGT; 23 | 24 | namespace NGT { 25 | class ThreadInfo { 26 | public: 27 | pthread_t threadid; 28 | pthread_attr_t threadAttr; 29 | }; 30 | 31 | class ThreadMutex { 32 | public: 33 | pthread_mutex_t mutex; 34 | pthread_cond_t condition; 35 | }; 36 | } // namespace NGT 37 | 38 | Thread::Thread() { 39 | threadInfo = new ThreadInfo; 40 | threadInfo->threadid = 0; 41 | threadNo = -1; 42 | isTerminate = false; 43 | } 44 | 45 | Thread::~Thread() { 46 | if (threadInfo != 0) { 47 | delete threadInfo; 48 | } 49 | } 50 | 51 | ThreadMutex *Thread::constructThreadMutex() { return new ThreadMutex; } 52 | 53 | void Thread::destructThreadMutex(ThreadMutex *t) { 54 | if (t != 0) { 55 | pthread_mutex_destroy(&(t->mutex)); 56 | pthread_cond_destroy(&(t->condition)); 57 | delete t; 58 | } 59 | } 60 | 61 | int Thread::start() { 62 | pthread_attr_init(&(threadInfo->threadAttr)); 63 | size_t stackSize = 0; 64 | pthread_attr_getstacksize(&(threadInfo->threadAttr), &stackSize); 65 | if (stackSize < 0xa00000) { // 64bit stack size 66 | stackSize *= 4; 67 | } 68 | pthread_attr_setstacksize(&(threadInfo->threadAttr), stackSize); 69 | pthread_attr_getstacksize(&(threadInfo->threadAttr), &stackSize); 70 | return pthread_create(&(threadInfo->threadid), &(threadInfo->threadAttr), Thread::startThread, this); 71 | } 72 | 73 | int Thread::join() { return pthread_join(threadInfo->threadid, 0); } 74 | 75 | void Thread::lock(ThreadMutex &m) { pthread_mutex_lock(&m.mutex); } 76 | void Thread::unlock(ThreadMutex &m) { pthread_mutex_unlock(&m.mutex); } 77 | void Thread::signal(ThreadMutex &m) { pthread_cond_signal(&m.condition); } 78 | 79 | void Thread::wait(ThreadMutex &m) { 80 | if (pthread_cond_wait(&m.condition, &m.mutex) != 0) { 81 | cerr << "waitForSignalFromThread: internal error" << endl; 82 | NGTThrowException("waitForSignalFromThread: internal error"); 83 | } 84 | } 85 | 86 | void Thread::broadcast(ThreadMutex &m) { pthread_cond_broadcast(&m.condition); } 87 | 88 | void Thread::mutexInit(ThreadMutex &m) { 89 | if (pthread_mutex_init(&m.mutex, NULL) != 0) { 90 | NGTThrowException("Thread::mutexInit: Cannot initialize mutex"); 91 | } 92 | if (pthread_cond_init(&m.condition, NULL) != 0) { 93 | NGTThrowException("Thread::mutexInit: Cannot initialize condition"); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /lib/NGT/HashBasedBooleanSet.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | template class HashBasedBooleanSet { 26 | private: 27 | TYPE *_table; 28 | uint32_t _tableSize; 29 | uint32_t _mask; 30 | 31 | std::unordered_set _stlHash; 32 | 33 | 34 | inline uint32_t _hash1(const TYPE value) { return value & _mask; } 35 | 36 | public: 37 | HashBasedBooleanSet() : _table(NULL), _tableSize(0), _mask(0) {} 38 | 39 | HashBasedBooleanSet(const uint64_t size) : _table(NULL), _tableSize(0), _mask(0) { 40 | size_t bitSize = 0; 41 | size_t bit = size; 42 | while (bit != 0) { 43 | bitSize++; 44 | bit >>= 1; 45 | } 46 | size_t bucketSize = 0x1 << ((bitSize + 4) / 2 + 3); 47 | initialize(bucketSize); 48 | } 49 | void initialize(const uint32_t tableSize) { 50 | _tableSize = tableSize; 51 | _mask = _tableSize - 1; 52 | const uint32_t checkValue = _hash1(tableSize); 53 | if (checkValue != 0) { 54 | std::cerr << "[WARN] table size is not 2^N : " << tableSize << std::endl; 55 | } 56 | _table = new TYPE[tableSize]; 57 | memset(_table, 0, tableSize * sizeof(TYPE)); 58 | } 59 | 60 | virtual ~HashBasedBooleanSet() { 61 | delete[] _table; 62 | _table = 0; 63 | _stlHash.clear(); 64 | } 65 | 66 | inline bool operator[](const TYPE num) { 67 | const uint32_t hashValue = _hash1(num); 68 | 69 | auto v = _table[hashValue]; 70 | if (v == num) { 71 | return true; 72 | } 73 | if (v == 0) { 74 | return false; 75 | } 76 | if (_stlHash.count(num) <= 0) { 77 | return false; 78 | } 79 | return true; 80 | } 81 | 82 | inline void set(const TYPE num) { 83 | TYPE &value = _table[_hash1(num)]; 84 | if (value == 0) { 85 | value = num; 86 | } else { 87 | if (value != num) { 88 | _stlHash.insert(num); 89 | } 90 | } 91 | } 92 | 93 | inline void insert(const TYPE num) { set(num); } 94 | 95 | inline void reset(const TYPE num) { 96 | const uint32_t hashValue = _hash1(num); 97 | if (_table[hashValue] != 0) { 98 | if (_table[hashValue] != num) { 99 | _stlHash.erase(num); 100 | } else { 101 | _table[hashValue] = UINT_MAX; 102 | } 103 | } 104 | } 105 | }; 106 | -------------------------------------------------------------------------------- /lib/NGT/ObjectSpace.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #include "NGT/defines.h" 18 | #include "NGT/Common.h" 19 | #include "NGT/ObjectSpace.h" 20 | #include "NGT/ObjectRepository.h" 21 | 22 | NGT::Distance NGT::ObjectSpace::compareWithL1(NGT::Object &o1, NGT::Object &o2) { 23 | auto dim = getPaddedDimension(); 24 | NGT::Distance d; 25 | if (getObjectType() == typeid(uint8_t) || getObjectType() == typeid(quint8) || 26 | getObjectType() == typeid(qsint8)) { 27 | d = PrimitiveComparator::compareL1(reinterpret_cast(o1.getPointer()), 28 | reinterpret_cast(o2.getPointer()), dim); 29 | #ifdef NGT_HALF_FLOAT 30 | } else if (getObjectType() == typeid(float16)) { 31 | d = PrimitiveComparator::compareL1(reinterpret_cast(o1.getPointer()), 32 | reinterpret_cast(o2.getPointer()), dim); 33 | #endif 34 | } else if (getObjectType() == typeid(float)) { 35 | d = PrimitiveComparator::compareL1(reinterpret_cast(o1.getPointer()), 36 | reinterpret_cast(o2.getPointer()), dim); 37 | } else { 38 | std::stringstream msg; 39 | msg << "ObjectSpace::compareWithL1: Fatal Inner Error! Unexpected object type. " 40 | << getObjectType().name(); 41 | NGTThrowException(msg); 42 | } 43 | return d; 44 | } 45 | 46 | #ifdef NGT_SHARED_MEMORY_ALLOCATOR 47 | NGT::Distance NGT::ObjectSpace::compareWithL1(NGT::Object &o1, NGT::PersistentObject &o2) { 48 | auto dim = getPaddedDimension(); 49 | NGT::Distance d; 50 | if (getObjectType() == typeid(uint8_t)) { 51 | d = PrimitiveComparator::compareL1( 52 | reinterpret_cast(o1.getPointer()), 53 | reinterpret_cast(o2.getPointer(getRepository().getAllocator())), dim); 54 | #ifdef NGT_HALF_FLOAT 55 | } else if (getObjectType() == typeid(float16)) { 56 | d = PrimitiveComparator::compareL1( 57 | reinterpret_cast(o1.getPointer()), 58 | reinterpret_cast(o2.getPointer(getRepository().getAllocator())), dim); 59 | #endif 60 | } else if (getObjectType() == typeid(float)) { 61 | d = PrimitiveComparator::compareL1( 62 | reinterpret_cast(o1.getPointer()), 63 | reinterpret_cast(o2.getPointer(getRepository().getAllocator())), dim); 64 | } else { 65 | std::stringstream msg; 66 | msg << "ObjectSpace::compareWithL1: Fatal Inner Error! Unexpected object type."; 67 | NGTThrowException(msg); 68 | } 69 | return d; 70 | } 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /samples/cosine-float/cosine-float.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "NGT/Index.h" 3 | 4 | using namespace std; 5 | 6 | int main(int argc, char **argv) { 7 | string indexPath = "index"; 8 | string objectFile = "./data/sift-dataset-5k.tsv"; 9 | string queryFile = "./data/sift-query-3.tsv"; 10 | // index construction 11 | try { 12 | NGT::Property property; 13 | property.dimension = 128; 14 | property.objectType = NGT::ObjectSpace::ObjectType::Float; 15 | property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeCosine; 16 | NGT::Index::create(indexPath, property); 17 | NGT::Index index(indexPath); 18 | ifstream is(objectFile); 19 | string line; 20 | while (getline(is, line)) { 21 | vector obj; 22 | stringstream linestream(line); 23 | while (!linestream.eof()) { 24 | float value; 25 | linestream >> value; 26 | if (linestream.fail()) { 27 | obj.clear(); 28 | break; 29 | } 30 | obj.push_back(value); 31 | } 32 | if (obj.empty()) { 33 | cerr << "An empty line or invalid value: " << line << endl; 34 | continue; 35 | } 36 | obj.resize(property.dimension); // cut off additional data in the file. 37 | index.append(obj); 38 | } 39 | index.createIndex(16); 40 | index.save(); 41 | } catch (NGT::Exception &err) { 42 | cerr << "Error " << err.what() << endl; 43 | return 1; 44 | } catch (...) { 45 | cerr << "Error" << endl; 46 | return 1; 47 | } 48 | 49 | // nearest neighbor search 50 | try { 51 | NGT::Index index(indexPath); 52 | NGT::Property property; 53 | index.getProperty(property); 54 | ifstream is(queryFile); 55 | string line; 56 | while (getline(is, line)) { 57 | vector query; 58 | { 59 | stringstream linestream(line); 60 | while (!linestream.eof()) { 61 | float value; 62 | linestream >> value; 63 | query.push_back(value); 64 | } 65 | query.resize(property.dimension); 66 | cout << "Query : "; 67 | for (size_t i = 0; i < 5; i++) { 68 | cout << query[i] << " "; 69 | } 70 | cout << "..."; 71 | } 72 | NGT::SearchQuery sc(query); 73 | NGT::ObjectDistances objects; 74 | sc.setResults(&objects); 75 | sc.setSize(10); 76 | sc.setEpsilon(0.1); 77 | 78 | index.search(sc); 79 | cout << endl << "Rank\tID\tDistance" << std::showbase << endl; 80 | for (size_t i = 0; i < objects.size(); i++) { 81 | cout << i + 1 << "\t" << objects[i].id << "\t" << objects[i].distance << "\t: "; 82 | NGT::ObjectSpace &objectSpace = index.getObjectSpace(); 83 | float *object = static_cast(objectSpace.getObject(objects[i].id)); 84 | for (size_t idx = 0; idx < 5; idx++) { 85 | cout << object[idx] << " "; 86 | } 87 | cout << endl; 88 | } 89 | cout << endl; 90 | } 91 | } catch (NGT::Exception &err) { 92 | cerr << "Error " << err.what() << endl; 93 | return 1; 94 | } catch (...) { 95 | cerr << "Error" << endl; 96 | return 1; 97 | } 98 | 99 | return 0; 100 | } 101 | -------------------------------------------------------------------------------- /samples/l2-uint8/l2-uint8.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "NGT/Index.h" 3 | 4 | using namespace std; 5 | 6 | int main(int argc, char **argv) { 7 | string indexPath = "index"; 8 | string objectFile = "./data/sift-dataset-5k.tsv"; 9 | string queryFile = "./data/sift-query-3.tsv"; 10 | // index construction 11 | try { 12 | NGT::Property property; 13 | property.dimension = 128; 14 | property.objectType = NGT::ObjectSpace::ObjectType::Uint8; 15 | property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; 16 | NGT::Index::create(indexPath, property); 17 | NGT::Index index(indexPath); 18 | ifstream is(objectFile); 19 | string line; 20 | while (getline(is, line)) { 21 | vector obj; 22 | stringstream linestream(line); 23 | while (!linestream.eof()) { 24 | int value; 25 | linestream >> value; 26 | if (linestream.fail()) { 27 | obj.clear(); 28 | break; 29 | } 30 | obj.push_back(value); 31 | } 32 | if (obj.empty()) { 33 | cerr << "An empty line or invalid value: " << line << endl; 34 | continue; 35 | } 36 | obj.resize(property.dimension); // cut off additional data in the file. 37 | index.append(obj); 38 | } 39 | index.createIndex(16); 40 | index.save(); 41 | } catch (NGT::Exception &err) { 42 | cerr << "Error " << err.what() << endl; 43 | return 1; 44 | } catch (...) { 45 | cerr << "Error" << endl; 46 | return 1; 47 | } 48 | 49 | // nearest neighbor search 50 | try { 51 | NGT::Index index(indexPath); 52 | NGT::Property property; 53 | index.getProperty(property); 54 | ifstream is(queryFile); 55 | string line; 56 | while (getline(is, line)) { 57 | vector query; 58 | { 59 | stringstream linestream(line); 60 | while (!linestream.eof()) { 61 | int value; 62 | linestream >> value; 63 | query.push_back(value); 64 | } 65 | query.resize(property.dimension); 66 | cout << "Query : "; 67 | for (size_t i = 0; i < 5; i++) { 68 | cout << static_cast(query[i]) << " "; 69 | } 70 | cout << "..."; 71 | } 72 | 73 | NGT::SearchQuery sc(query); 74 | NGT::ObjectDistances objects; 75 | sc.setResults(&objects); 76 | sc.setSize(10); 77 | sc.setEpsilon(0.1); 78 | 79 | index.search(sc); 80 | cout << endl << "Rank\tID\tDistance" << std::showbase << endl; 81 | for (size_t i = 0; i < objects.size(); i++) { 82 | cout << i + 1 << "\t" << objects[i].id << "\t" << objects[i].distance << "\t: "; 83 | NGT::ObjectSpace &objectSpace = index.getObjectSpace(); 84 | uint8_t *object = static_cast(objectSpace.getObject(objects[i].id)); 85 | for (size_t idx = 0; idx < 5; idx++) { 86 | cout << static_cast(object[idx]) << " "; 87 | } 88 | cout << "..." << endl; 89 | } 90 | cout << endl; 91 | } 92 | } catch (NGT::Exception &err) { 93 | cerr << "Error " << err.what() << endl; 94 | return 1; 95 | } catch (...) { 96 | cerr << "Error" << endl; 97 | return 1; 98 | } 99 | 100 | return 0; 101 | } 102 | -------------------------------------------------------------------------------- /samples/hamming-uint8/hamming-uint8.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "NGT/Index.h" 3 | 4 | using namespace std; 5 | 6 | int main(int argc, char **argv) { 7 | string indexPath = "index"; 8 | string objectFile = "./data/sift-dataset-5k.tsv"; 9 | string queryFile = "./data/sift-query-3.tsv"; 10 | size_t bitSize = 64; 11 | // index construction 12 | try { 13 | NGT::Property property; 14 | property.dimension = bitSize / 8; 15 | property.objectType = NGT::ObjectSpace::ObjectType::Uint8; 16 | property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeHamming; 17 | NGT::Index::create(indexPath, property); 18 | NGT::Index index(indexPath); 19 | ifstream is(objectFile); 20 | string line; 21 | while (getline(is, line)) { 22 | vector obj; 23 | stringstream linestream(line); 24 | while (!linestream.eof()) { 25 | int value; 26 | linestream >> value; 27 | if (linestream.fail()) { 28 | obj.clear(); 29 | break; 30 | } 31 | obj.push_back(value); 32 | } 33 | if (obj.empty()) { 34 | cerr << "An empty line or invalid value: " << line << endl; 35 | continue; 36 | } 37 | obj.resize(property.dimension); // cut off additional data in the file. 38 | index.append(obj); 39 | } 40 | index.createIndex(16); 41 | index.save(); 42 | } catch (NGT::Exception &err) { 43 | cerr << "Error " << err.what() << endl; 44 | return 1; 45 | } catch (...) { 46 | cerr << "Error" << endl; 47 | return 1; 48 | } 49 | 50 | // nearest neighbor search 51 | try { 52 | NGT::Index index(indexPath); 53 | NGT::Property property; 54 | index.getProperty(property); 55 | ifstream is(queryFile); 56 | string line; 57 | while (getline(is, line)) { 58 | vector query; 59 | { 60 | stringstream linestream(line); 61 | while (!linestream.eof()) { 62 | int value; 63 | linestream >> value; 64 | query.push_back(value); 65 | } 66 | query.resize(property.dimension); 67 | cout << "Query : "; 68 | for (size_t i = 0; i < query.size(); i++) { 69 | cout << std::bitset<8>(query[i]) << " "; 70 | } 71 | } 72 | 73 | NGT::SearchQuery sc(query); 74 | NGT::ObjectDistances objects; 75 | sc.setResults(&objects); 76 | sc.setSize(10); 77 | sc.setEpsilon(0.2); 78 | 79 | index.search(sc); 80 | cout << endl << "Rank\tID\tDistance" << std::showbase << endl; 81 | for (size_t i = 0; i < objects.size(); i++) { 82 | cout << i + 1 << "\t" << objects[i].id << "\t" << objects[i].distance << "\t: "; 83 | NGT::ObjectSpace &objectSpace = index.getObjectSpace(); 84 | uint8_t *object = static_cast(objectSpace.getObject(objects[i].id)); 85 | for (size_t idx = 0; idx < objectSpace.getDimension(); idx++) { 86 | cout << std::bitset<8>(object[idx]) << " "; 87 | } 88 | cout << endl; 89 | } 90 | cout << endl; 91 | } 92 | } catch (NGT::Exception &err) { 93 | cerr << "Error " << err.what() << endl; 94 | return 1; 95 | } catch (...) { 96 | cerr << "Error" << endl; 97 | return 1; 98 | } 99 | 100 | return 0; 101 | } 102 | -------------------------------------------------------------------------------- /samples/l2-uint8-range-search/l2-uint8-range-search.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "NGT/Index.h" 3 | 4 | using namespace std; 5 | 6 | int main(int argc, char **argv) { 7 | string indexPath = "index"; 8 | string objectFile = "./data/sift-dataset-5k.tsv"; 9 | string queryFile = "./data/sift-query-3.tsv"; 10 | // index construction 11 | try { 12 | NGT::Property property; 13 | property.dimension = 128; 14 | property.objectType = NGT::ObjectSpace::ObjectType::Uint8; 15 | property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; 16 | NGT::Index::create(indexPath, property); 17 | NGT::Index index(indexPath); 18 | ifstream is(objectFile); 19 | string line; 20 | while (getline(is, line)) { 21 | vector obj; 22 | stringstream linestream(line); 23 | while (!linestream.eof()) { 24 | int value; 25 | linestream >> value; 26 | if (linestream.fail()) { 27 | obj.clear(); 28 | break; 29 | } 30 | obj.push_back(value); 31 | } 32 | if (obj.empty()) { 33 | cerr << "An empty line or invalid value: " << line << endl; 34 | continue; 35 | } 36 | obj.resize(property.dimension); // cut off additional data in the file. 37 | index.append(obj); 38 | } 39 | index.createIndex(16); 40 | index.save(); 41 | } catch (NGT::Exception &err) { 42 | cerr << "Error " << err.what() << endl; 43 | return 1; 44 | } catch (...) { 45 | cerr << "Error" << endl; 46 | return 1; 47 | } 48 | 49 | // nearest neighbor search 50 | try { 51 | NGT::Index index(indexPath); 52 | NGT::Property property; 53 | index.getProperty(property); 54 | ifstream is(queryFile); 55 | string line; 56 | while (getline(is, line)) { 57 | vector query; 58 | { 59 | stringstream linestream(line); 60 | while (!linestream.eof()) { 61 | int value; 62 | linestream >> value; 63 | query.push_back(value); 64 | } 65 | query.resize(property.dimension); 66 | cout << "Query : "; 67 | for (size_t i = 0; i < 5; i++) { 68 | cout << static_cast(query[i]) << " "; 69 | } 70 | cout << "..."; 71 | } 72 | 73 | NGT::SearchQuery sc(query); 74 | NGT::ObjectDistances objects; 75 | sc.setResults(&objects); 76 | sc.setRadius(250.0); 77 | sc.setSize(10); 78 | sc.setEpsilon(0.6); 79 | 80 | index.search(sc); 81 | cout << endl << "Rank\tID\tDistance" << std::showbase << endl; 82 | for (size_t i = 0; i < objects.size(); i++) { 83 | cout << i + 1 << "\t" << objects[i].id << "\t" << objects[i].distance << "\t: "; 84 | NGT::ObjectSpace &objectSpace = index.getObjectSpace(); 85 | uint8_t *object = static_cast(objectSpace.getObject(objects[i].id)); 86 | for (size_t idx = 0; idx < 5; idx++) { 87 | cout << static_cast(object[idx]) << " "; 88 | } 89 | cout << "..." << endl; 90 | } 91 | cout << endl; 92 | } 93 | } catch (NGT::Exception &err) { 94 | cerr << "Error " << err.what() << endl; 95 | return 1; 96 | } catch (...) { 97 | cerr << "Error" << endl; 98 | return 1; 99 | } 100 | 101 | return 0; 102 | } 103 | -------------------------------------------------------------------------------- /lib/NGT/NGTQ/QuantizedGraph.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2020 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #include "NGT/NGTQ/QuantizedGraph.h" 18 | #include "NGT/NGTQ/QuantizedBlobGraph.h" 19 | #include "NGT/NGTQ/Optimizer.h" 20 | 21 | #ifdef NGTQ_QBG 22 | void NGTQG::Index::quantize(const std::string indexPath, size_t dimensionOfSubvector, size_t maxNumOfEdges, 23 | bool verbose) { 24 | { 25 | NGT::Index index(indexPath); 26 | const std::string quantizedIndexPath = indexPath + "/qg"; 27 | struct stat st; 28 | if (stat(quantizedIndexPath.c_str(), &st) != 0) { 29 | NGT::Property ngtProperty; 30 | index.getProperty(ngtProperty); 31 | QBG::BuildParameters buildParameters; 32 | buildParameters.creation.dimensionOfSubvector = dimensionOfSubvector; 33 | buildParameters.setVerbose(verbose); 34 | 35 | NGTQG::Index::create(indexPath, buildParameters); 36 | 37 | NGTQG::Index::append(indexPath, buildParameters); 38 | 39 | QBG::Optimizer optimizer(buildParameters); 40 | #ifdef NGTQG_NO_ROTATION 41 | if (optimizer.rotation || optimizer.repositioning) { 42 | std::cerr << "build-qg: Warning! Although rotation or repositioning is specified, turn off rotation " 43 | "and repositioning because of unavailable options." 44 | << std::endl; 45 | optimizer.rotation = false; 46 | optimizer.repositioning = false; 47 | } 48 | #endif 49 | 50 | if (optimizer.globalType == QBG::Optimizer::GlobalTypeNone) { 51 | if (verbose) 52 | std::cerr 53 | << "build-qg: Warning! None is unavailable for the global type. Zero is set to the global type." 54 | << std::endl; 55 | optimizer.globalType = QBG::Optimizer::GlobalTypeZero; 56 | } 57 | 58 | optimizer.optimize(quantizedIndexPath); 59 | 60 | QBG::Index::buildNGTQ(quantizedIndexPath, verbose); 61 | 62 | NGTQG::Index::realign(indexPath, maxNumOfEdges, verbose); 63 | } 64 | } 65 | } 66 | 67 | void NGTQG::Index::create(const std::string indexPath, QBG::BuildParameters &buildParameters) { 68 | auto dimensionOfSubvector = buildParameters.creation.dimensionOfSubvector; 69 | auto dimension = buildParameters.creation.dimension; 70 | if (dimension != 0 && buildParameters.creation.numOfSubvectors != 0) { 71 | if (dimension % buildParameters.creation.numOfSubvectors != 0) { 72 | std::stringstream msg; 73 | msg << "NGTQBG:Index::create: Invalid dimension and local division No. " << dimension << ":" 74 | << buildParameters.creation.numOfSubvectors; 75 | NGTThrowException(msg); 76 | } 77 | dimensionOfSubvector = dimension / buildParameters.creation.numOfSubvectors; 78 | } 79 | create(indexPath, dimensionOfSubvector, dimension); 80 | } 81 | 82 | void NGTQG::Index::append(const std::string indexPath, QBG::BuildParameters &buildParameters) { 83 | QBG::Index::appendFromObjectRepository(indexPath, indexPath + "/qg", buildParameters.verbose); 84 | } 85 | #endif 86 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(APPLE) 2 | cmake_minimum_required(VERSION 3.0) 3 | else() 4 | cmake_minimum_required(VERSION 2.8) 5 | endif() 6 | 7 | project(ngt) 8 | 9 | file(STRINGS "VERSION" ngt_VERSION) 10 | message(STATUS "VERSION: ${ngt_VERSION}") 11 | string(REGEX MATCH "^[0-9]+" ngt_VERSION_MAJOR ${ngt_VERSION}) 12 | 13 | set(ngt_VERSION ${ngt_VERSION}) 14 | set(ngt_SOVERSION ${ngt_VERSION_MAJOR}) 15 | 16 | if(NOT CMAKE_BUILD_TYPE) 17 | set(CMAKE_BUILD_TYPE "Release") 18 | endif(NOT CMAKE_BUILD_TYPE) 19 | string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER) 20 | message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") 21 | message(STATUS "CMAKE_BUILD_TYPE_LOWER: ${CMAKE_BUILD_TYPE_LOWER}") 22 | 23 | if(${NGT_SHARED_MEMORY_ALLOCATOR}) 24 | set(NGT_QBG_DISABLED TRUE) 25 | endif(${NGT_SHARED_MEMORY_ALLOCATOR}) 26 | 27 | if(${UNIX}) 28 | set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) 29 | 30 | if(CMAKE_VERSION VERSION_LESS 3.1) 31 | set(BASE_OPTIONS "-Wall -std=gnu++0x -lrt") 32 | 33 | if(${NGT_AVX_DISABLED}) 34 | message(STATUS "AVX will not be used to compute distances.") 35 | endif() 36 | 37 | if(${NGT_OPENMP_DISABLED}) 38 | message(STATUS "OpenMP is disabled.") 39 | else() 40 | set(BASE_OPTIONS "${BASE_OPTIONS} -fopenmp") 41 | endif() 42 | 43 | if(${NGT_ASSERT_DISABLED}) 44 | message(STATUS "assert() is disabled.") 45 | set(BASE_OPTIONS "${BASE_OPTIONS} -DNDEBUG") 46 | endif() 47 | 48 | set(CMAKE_CXX_FLAGS_DEBUG "-g ${BASE_OPTIONS}") 49 | 50 | if(${NGT_MARCH_NATIVE_DISABLED}) 51 | message(STATUS "Compile option -march=native is disabled.") 52 | set(CMAKE_CXX_FLAGS_RELEASE "-O2 ${BASE_OPTIONS}") 53 | else() 54 | set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native ${BASE_OPTIONS}") 55 | endif() 56 | else() 57 | if (CMAKE_BUILD_TYPE_LOWER STREQUAL "release") 58 | set(CMAKE_CXX_FLAGS_RELEASE "") 59 | if(${NGT_MARCH_NATIVE_DISABLED}) 60 | message(STATUS "Compile option -march=native is disabled.") 61 | add_compile_options(-O2 -DNDEBUG) 62 | elseif(${NGT_AVX2}) 63 | add_compile_options(-Ofast -march=haswell -DNDEBUG) 64 | elseif(${NGT_AVX_DISABLED}) 65 | message(STATUS "AVX will not be used to compute distances.") 66 | add_compile_options(-Ofast -march=x86-64 -DNDEBUG) 67 | else() 68 | add_compile_options(-Ofast -march=native -DNDEBUG) 69 | endif() 70 | endif() 71 | add_compile_options(-Wall) 72 | if(${NGT_OPENMP_DISABLED}) 73 | message(STATUS "OpenMP is disabled.") 74 | else() 75 | if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") 76 | if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8.1.0") 77 | message(FATAL_ERROR "Insufficient AppleClang version") 78 | endif() 79 | cmake_minimum_required(VERSION 3.16) 80 | endif() 81 | find_package(OpenMP REQUIRED) 82 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 83 | endif() 84 | set(CMAKE_CXX_STANDARD 11) # for std::unordered_set, std::unique_ptr 85 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 86 | find_package(Threads REQUIRED) 87 | if(${NGT_QBG_DISABLED}) 88 | message(STATUS "QBG is disabled.") 89 | else() 90 | find_package(BLAS REQUIRED) 91 | find_package(LAPACK REQUIRED) 92 | endif() 93 | endif() 94 | 95 | if(${NGT_BFLOAT_DISABLED}) 96 | message(STATUS "bfloat is disabled.") 97 | endif() 98 | 99 | add_subdirectory("${PROJECT_SOURCE_DIR}/lib") 100 | add_subdirectory("${PROJECT_SOURCE_DIR}/bin") 101 | add_subdirectory("${PROJECT_SOURCE_DIR}/samples") 102 | endif(${UNIX}) 103 | -------------------------------------------------------------------------------- /samples/qg-l2-float/qg-l2-float.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "NGT/NGTQ/QuantizedGraph.h" 3 | 4 | int main(int argc, char **argv) { 5 | #ifdef NGTQ_QBG 6 | string indexPath = "index"; 7 | string objectFile = "./data/sift-dataset-5k.tsv"; 8 | string queryFile = "./data/sift-query-3.tsv"; 9 | 10 | // index construction 11 | try { 12 | NGT::Property property; 13 | property.dimension = 128; 14 | property.objectType = NGT::ObjectSpace::ObjectType::Uint8; 15 | property.distanceType = NGT::Index::Property::DistanceType::DistanceTypeL2; 16 | std::cout << "creating the index framework..." << std::endl; 17 | NGT::Index::create(indexPath, property); 18 | NGT::Index index(indexPath); 19 | ifstream is(objectFile); 20 | string line; 21 | std::cout << "appending the objects..." << std::endl; 22 | while (getline(is, line)) { 23 | vector obj; 24 | stringstream linestream(line); 25 | while (!linestream.eof()) { 26 | int value; 27 | linestream >> value; 28 | if (linestream.fail()) { 29 | obj.clear(); 30 | break; 31 | } 32 | obj.push_back(value); 33 | } 34 | if (obj.empty()) { 35 | cerr << "An empty line or invalid value: " << line << endl; 36 | continue; 37 | } 38 | obj.resize(property.dimension); // cut off additional data in the file. 39 | index.insert(obj); 40 | } 41 | std::cout << "building the index..." << std::endl; 42 | index.createIndex(16); 43 | index.save(); 44 | } catch (NGT::Exception &err) { 45 | cerr << "Error " << err.what() << endl; 46 | return 1; 47 | } catch (...) { 48 | cerr << "Error" << endl; 49 | return 1; 50 | } 51 | 52 | // quantization 53 | size_t dimensionOfSubvector = 1; 54 | size_t maxNumberOfEdges = 50; 55 | try { 56 | std::cout << "quantizing the index..." << std::endl; 57 | NGTQG::Index::quantize(indexPath, dimensionOfSubvector, maxNumberOfEdges, true); 58 | } catch (NGT::Exception &err) { 59 | cerr << "Error " << err.what() << endl; 60 | return 1; 61 | } catch (...) { 62 | cerr << "Error" << endl; 63 | return 1; 64 | } 65 | 66 | // nearest neighbor search 67 | try { 68 | NGTQG::Index index(indexPath); 69 | NGT::Property property; 70 | index.getProperty(property); 71 | ifstream is(queryFile); 72 | string line; 73 | std::cout << "searching the index..." << std::endl; 74 | while (getline(is, line)) { 75 | vector query; 76 | { 77 | stringstream linestream(line); 78 | while (!linestream.eof()) { 79 | int value; 80 | linestream >> value; 81 | query.push_back(value); 82 | } 83 | query.resize(property.dimension); 84 | cout << "Query : "; 85 | for (size_t i = 0; i < 5; i++) { 86 | cout << static_cast(query[i]) << " "; 87 | } 88 | cout << "..."; 89 | } 90 | 91 | NGTQG::SearchQuery sc(query); 92 | NGT::ObjectDistances objects; 93 | sc.setResults(&objects); 94 | sc.setSize(10); 95 | sc.setEpsilon(0.1); 96 | 97 | index.search(sc); 98 | cout << endl << "Rank\tID\tDistance: Object" << std::showbase << endl; 99 | for (size_t i = 0; i < objects.size(); i++) { 100 | cout << i + 1 << "\t" << objects[i].id << "\t" << objects[i].distance << "\t: "; 101 | NGT::ObjectSpace &objectSpace = index.getObjectSpace(); 102 | uint8_t *object = static_cast(objectSpace.getObject(objects[i].id)); 103 | for (size_t idx = 0; idx < 5; idx++) { 104 | cout << static_cast(object[idx]) << " "; 105 | } 106 | cout << "..." << endl; 107 | } 108 | cout << endl; 109 | } 110 | } catch (NGT::Exception &err) { 111 | cerr << "Error " << err.what() << endl; 112 | return 1; 113 | } catch (...) { 114 | cerr << "Error" << endl; 115 | return 1; 116 | } 117 | #endif 118 | return 0; 119 | } 120 | -------------------------------------------------------------------------------- /bin/ngt/ngt.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #include "NGT/Command.h" 18 | #include "NGT/Optimizer.h" 19 | 20 | #define NGT_VERSION_FOR_HEADER 21 | #include "NGT/Version.h" 22 | 23 | 24 | #ifndef BUILD_DATE 25 | #define BUILD_DATE "-" 26 | #endif 27 | #ifndef GIT_HASH 28 | #define GIT_HASH "-" 29 | #endif 30 | #ifndef GIT_DATE 31 | #define GIT_DATE "-" 32 | #endif 33 | #ifndef GIT_TAG 34 | #define GIT_TAG "-" 35 | #endif 36 | 37 | using namespace std; 38 | 39 | static void version(ostream &os) { 40 | os << "ngt:" << endl; 41 | NGT::Version::get(os); 42 | } 43 | 44 | void help() { 45 | cerr << "Usage : ngt command [options] index [data]" << endl; 46 | cerr << " command : info create search remove append export import prune reconstruct-graph " 47 | "optimize-search-parameters optimize-#-of-edges repair rebuild" 48 | << endl; 49 | cerr << "Version : " << NGT::Index::getVersion() << endl; 50 | if (NGT::Index::getVersion() != NGT::Version::getVersion()) { 51 | version(cerr); 52 | NGT::Index::version(cerr); 53 | } 54 | } 55 | 56 | int main(int argc, char **argv) { 57 | NGT::Args args(argc, argv); 58 | 59 | NGT::Command ngt; 60 | 61 | string command; 62 | try { 63 | command = args.get("#0"); 64 | } catch (...) { 65 | help(); 66 | return 0; 67 | } 68 | 69 | ngt.setDebugLevel(args.getl("X", 0)); 70 | 71 | try { 72 | if (ngt.getDebugLevel() >= 1) { 73 | cerr << "ngt: command=" << command << endl; 74 | } 75 | if (command == "search") { 76 | ngt.search(args); 77 | } else if (command == "create") { 78 | ngt.create(args); 79 | } else if (command == "append") { 80 | ngt.append(args); 81 | } else if (command == "remove") { 82 | ngt.remove(args); 83 | } else if (command == "export") { 84 | ngt.exportIndex(args); 85 | } else if (command == "import") { 86 | ngt.importIndex(args); 87 | } else if (command == "prune") { 88 | ngt.prune(args); 89 | } else if (command == "reconstruct-graph") { 90 | ngt.reconstructGraph(args); 91 | } else if (command == "eval") { 92 | NGT::Optimizer::evaluate(args); 93 | } else if (command == "optimize-search-parameters") { 94 | ngt.optimizeSearchParameters(args); 95 | } else if (command == "refine-anng") { 96 | ngt.refineANNG(args); 97 | } else if (command == "repair") { 98 | ngt.repair(args); 99 | } else if (command == "optimize-#-of-edges") { 100 | ngt.optimizeNumberOfEdgesForANNG(args); 101 | } else if (command == "export-graph") { 102 | ngt.exportGraph(args); 103 | } else if (command == "export-objects") { 104 | ngt.exportObjects(args); 105 | } else if (command == "rebuild") { 106 | ngt.rebuild(args); 107 | #ifndef NGT_SHARED_MEMORY_ALLOCATOR 108 | } else if (command == "extract-query") { 109 | NGT::Optimizer::extractQueries(args); 110 | } else if (command == "adjust-edge-size") { 111 | NGT::Optimizer::adjustSearchEdgeSize(args); 112 | #endif 113 | } else if (command == "info") { 114 | if (NGT::Index::getVersion() != NGT::Version::getVersion()) { 115 | version(cerr); 116 | NGT::Index::version(cerr); 117 | } 118 | ngt.info(args); 119 | } else { 120 | cerr << "ngt: Error: Illegal command. " << command << endl; 121 | help(); 122 | } 123 | } catch (NGT::Exception &err) { 124 | cerr << "ngt: Error: " << err.what() << endl; 125 | return 1; 126 | } 127 | return 0; 128 | } 129 | -------------------------------------------------------------------------------- /python/ngt/README.md: -------------------------------------------------------------------------------- 1 |

ngt

2 | 3 | 4 |

Index

5 | 6 | ```python 7 | Index(self, path) 8 | ``` 9 | 10 | NGT: Neighborhood Graph and Tree for Indexing High-dimensional Data 11 | NGT provides the functionality of searching for approximate nearest neighbors in high-dimensional data. 12 | 13 | Example: 14 | ```python 15 | from ngt import base as ngt 16 | import random 17 | 18 | dim = 10 19 | objects = [] 20 | for i in range(0, 100) : 21 | vector = random.sample(range(100), dim) 22 | objects.append(vector) 23 | 24 | query = objects[0] 25 | index = ngt.Index.create("tmp", dim) 26 | index.insert(objects) 27 | # You can also insert objects from a file like this. 28 | # index.insert_from_tsv('list.dat') 29 | 30 | index.save() 31 | 32 | result = index.search(query, 3) 33 | 34 | for i, o in enumerate(result) : 35 | print(str(i) + ": " + str(o.id) + ", " + str(o.distance)) 36 | object = index.get_object(o.id) 37 | print(object) 38 | ``` 39 | 40 |

insert

41 | 42 | ```python 43 | Index.insert(self, objects, num_threads=8) 44 | ``` 45 | 46 | insert the specified objects into the index and build the index. 47 | 48 | objects : Inserted objects. 49 | return : List of the IDs of the inserted objects. 50 | 51 |

get_object

52 | 53 | ```python 54 | Index.get_object(self, id) 55 | ``` 56 | 57 | get the specfied object by id. 58 | 59 | id : Object id. 60 | 61 |

insert_object

62 | 63 | ```python 64 | Index.insert_object(self, object) 65 | ``` 66 | 67 | insert the specified object into the index. 68 | must call build_index after call this method. 69 | 70 | object : Inserted object. 71 | return : The ID of the inserted object. 72 | 73 |

insert_from_tsv

74 | 75 | ```python 76 | Index.insert_from_tsv(self, path, num_threads=8, dlmt='\t') 77 | ``` 78 | 79 | insert objects in the specified file and build the index. 80 | 81 | path : Path of the object file. 82 | num_threads : Number of threads in building index. 83 | dlmt : Delimiter to sepalate each element in the object file. 84 | 85 |

remove

86 | 87 | ```python 88 | Index.remove(self, id) 89 | ``` 90 | 91 | remove the specified object by id 92 | 93 | id : Object id. 94 | 95 |

build_index

96 | 97 | ```python 98 | Index.build_index(self, num_threads=8) 99 | ``` 100 | 101 | build the inserted objects into the index. 102 | 103 | num_threads : Number of threads in building index. 104 | 105 |

search

106 | 107 | ```python 108 | Index.search(self, query, k=20, epsilon=0.1) 109 | ``` 110 | 111 | search for the k nearest neighbors of the specifiecd query object. 112 | 113 | query : Query object. 114 | k : Number of searched objects. 115 | epsilon : Epsilon defines a search range. 116 | 117 |

create

118 | 119 | ```python 120 | Index.create(path, dimension, edge_size_for_creation=10, edge_size_for_search=40, object_type='Float', distance_type='L2') 121 | ``` 122 | 123 | create an empty index with the specified parameters. 124 | edge_size_for_creation : Number of edges for each node in the graph. 125 | edge_size_for_search : Number of edges to search. 126 | object_type : Type of the data object. (Float, Integer [Integer is 1 byte]) 127 | distance_type : Type of the distance function. (L1,L2,Angle,Hamming,Jaccard) 128 | 129 |

save

130 | 131 | ```python 132 | Index.save(self, path=None) 133 | ``` 134 | 135 | save the index. 136 | 137 | path : Path to save the index. default overwrite the files. 138 | 139 |

insert_blob

140 | 141 | ```python 142 | Index.insert_blob(self, objects, num_threads=8) 143 | ``` 144 | 145 | insert the specified objects into the index and build the index. 146 | Although this is the same as the fucntion, both implementations are different. 147 | 148 | objects : Inserted objects. 149 | num_threads : Number of threads in building index. 150 | 151 | -------------------------------------------------------------------------------- /assets/logo.svg: -------------------------------------------------------------------------------- 1 | logo_ngt -------------------------------------------------------------------------------- /samples/qbg-capi/qbg-capi.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "NGT/Index.h" 3 | #include "NGT/NGTQ/Capi.h" 4 | int main(int argc, char **argv) { 5 | #if !defined(NGT_SHARED_MEMORY_ALLOCATOR) 6 | std::string indexPath = "qbg-index"; 7 | std::string objectFile = "sift-128-euclidean.tsv"; 8 | std::string queryFile = "query.tsv"; 9 | 10 | std::cerr << "run the following commands to prepare data for this sample program." << std::endl; 11 | std::cerr 12 | << " curl -L -O " 13 | "https://github.com/yahoojapan/NGT/raw/main/tests/datasets/ann-benchmarks/sift-128-euclidean.tsv" 14 | << std::endl; 15 | std::cerr << " curl -L -O " 16 | "https://github.com/yahoojapan/NGT/raw/main/tests/datasets/ann-benchmarks/" 17 | "sift-128-euclidean_query.tsv" 18 | << std::endl; 19 | std::cerr << " head -1 sift-128-euclidean_query.tsv > query.tsv" << std::endl; 20 | std::cerr << std::endl; 21 | std::cerr << "index path=" << indexPath << std::endl; 22 | std::cerr << "object file=" << objectFile << std::endl; 23 | std::cerr << "query file=" << queryFile << std::endl; 24 | std::cerr << std::endl; 25 | 26 | { 27 | std::cerr << "remove the existing index. " << indexPath << std::endl; 28 | const std::string com = "rm -rf " + indexPath; 29 | if (system(com.c_str()) == -1) { 30 | std::cerr << "Cannot exec. " << com << std::endl; 31 | } 32 | } 33 | 34 | size_t dimension = 128; 35 | NGTError err = ngt_create_error_object(); 36 | 37 | std::cerr << "create an empty index..." << std::endl; 38 | QBGConstructionParameters constructionParameters; 39 | qbg_initialize_construction_parameters(&constructionParameters); 40 | constructionParameters.dimension = dimension; 41 | constructionParameters.number_of_subvectors = 64; 42 | constructionParameters.number_of_blobs = 0; 43 | if (!qbg_create(indexPath.c_str(), &constructionParameters, err)) { 44 | std::cerr << "Cannot create" << std::endl; 45 | std::cerr << ngt_get_error_string(err) << std::endl; 46 | return 1; 47 | } 48 | 49 | std::cerr << "append objects..." << std::endl; 50 | auto index = qbg_open_index(indexPath.c_str(), false, err); 51 | if (index == 0) { 52 | std::cerr << "Cannot open" << std::endl; 53 | std::cerr << ngt_get_error_string(err) << std::endl; 54 | return 1; 55 | } 56 | 57 | try { 58 | std::ifstream is(objectFile); 59 | std::string line; 60 | while (getline(is, line)) { 61 | std::vector obj; 62 | std::stringstream linestream(line); 63 | while (!linestream.eof()) { 64 | float value; 65 | linestream >> value; 66 | if (linestream.fail()) { 67 | obj.clear(); 68 | break; 69 | } 70 | obj.push_back(value); 71 | } 72 | if (obj.empty()) { 73 | std::cerr << "An empty line or invalid value: " << line << std::endl; 74 | return 1; 75 | } 76 | if (qbg_append_object(index, obj.data(), dimension, err) == 0) { 77 | std::cerr << ngt_get_error_string(err) << std::endl; 78 | return 1; 79 | } 80 | } 81 | } catch (...) { 82 | std::cerr << "Error" << std::endl; 83 | return 1; 84 | } 85 | 86 | qbg_save_index(index, err); 87 | qbg_close_index(index); 88 | 89 | std::cerr << "building the index..." << std::endl; 90 | QBGBuildParameters buildParameters; 91 | qbg_initialize_build_parameters(&buildParameters); 92 | buildParameters.number_of_objects = 500; 93 | auto status = qbg_build_index(indexPath.c_str(), &buildParameters, err); 94 | if (!status) { 95 | std::cerr << "Cannot build. " << ngt_get_error_string(err) << std::endl; 96 | return 1; 97 | } 98 | 99 | index = qbg_open_index(indexPath.c_str(), true, err); 100 | if (index == 0) { 101 | std::cerr << "Cannot open. " << ngt_get_error_string(err) << std::endl; 102 | return 1; 103 | } 104 | 105 | std::ifstream is(queryFile); 106 | if (!is) { 107 | std::cerr << "Cannot open the specified file. " << queryFile << std::endl; 108 | return 1; 109 | } 110 | 111 | std::string line; 112 | float queryVector[dimension]; 113 | if (getline(is, line)) { 114 | std::vector queryObject; 115 | { 116 | std::vector tokens; 117 | NGT::Common::tokenize(line, tokens, " \t"); 118 | if (tokens.size() != dimension) { 119 | std::cerr << "dimension of the query is invalid. dimesion=" << tokens.size() << ":" << dimension 120 | << std::endl; 121 | return 1; 122 | } 123 | for (std::vector::iterator ti = tokens.begin(); ti != tokens.end(); ++ti) { 124 | queryVector[distance(tokens.begin(), ti)] = NGT::Common::strtod(*ti); 125 | } 126 | } 127 | QBGObjectDistances result = ngt_create_empty_results(err); 128 | QBGQuery query; 129 | qbg_initialize_query(&query); 130 | query.query = &queryVector[0]; 131 | std::cerr << "search the index for the specified query..." << std::endl; 132 | auto status = qbg_search_index(index, query, result, err); 133 | if (!status) { 134 | std::cerr << "Cannot search. " << ngt_get_error_string(err) << std::endl; 135 | return 1; 136 | } 137 | auto rsize = qbg_get_result_size(result, err); 138 | std::cout << "Rank\tID\tDistance" << std::endl; 139 | for (size_t i = 0; i < rsize; i++) { 140 | NGTObjectDistance object = qbg_get_result(result, i, err); 141 | std::cout << i + 1 << "\t" << object.id << "\t" << object.distance << std::endl; 142 | } 143 | 144 | qbg_destroy_results(result); 145 | } 146 | 147 | qbg_close_index(index); 148 | ngt_destroy_error_object(err); 149 | #endif 150 | return 0; 151 | } 152 | -------------------------------------------------------------------------------- /lib/NGT/Command.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | #pragma once 17 | 18 | #include "NGT/Index.h" 19 | 20 | namespace NGT { 21 | 22 | class Command { 23 | public: 24 | class CreateParameters { 25 | public: 26 | CreateParameters() {} 27 | CreateParameters(Args &args); 28 | 29 | std::string index; 30 | std::string objectPath; 31 | size_t numOfObjects; 32 | NGT::Property property; 33 | char indexType; 34 | }; 35 | 36 | class SearchParameters { 37 | public: 38 | SearchParameters() { 39 | openMode = 'r'; 40 | query = ""; 41 | querySize = 0; 42 | indexType = 't'; 43 | size = 20; 44 | edgeSize = -1; 45 | outputMode = "-"; 46 | radius = FLT_MAX; 47 | step = 0; 48 | trial = 1; 49 | beginOfEpsilon = endOfEpsilon = stepOfEpsilon = 0.1; 50 | accuracy = 0.0; 51 | #ifdef NGT_REFINEMENT 52 | refinementExpansion = 0.0; 53 | #endif 54 | #ifdef RESULT_DEFINED_RANGE 55 | expandedSizeByEpsilon = false; 56 | #endif 57 | } 58 | SearchParameters(Args &args, const std::string epsilonDefault = "0.1") { parse(args, epsilonDefault); } 59 | void parse(Args &args, const std::string epsilonDefault) { 60 | openMode = args.getChar("m", 'r'); 61 | try { 62 | query = args.get("#2"); 63 | } catch (...) { 64 | NGTThrowException("ngt: Error: Query is not specified"); 65 | } 66 | querySize = args.getl("Q", 0); 67 | indexType = args.getChar("i", 't'); 68 | size = args.getl("n", 20); 69 | // edgeSize 70 | // -1(default) : using the size which was specified at the index creation. 71 | // 0 : no limitation for the edge size. 72 | // -2('e') : automatically set it according to epsilon. 73 | if (args.getChar("E", '-') == 'e') { 74 | edgeSize = -2; 75 | } else { 76 | edgeSize = args.getl("E", -1); 77 | } 78 | outputMode = args.getString("o", "-"); 79 | radius = args.getf("r", FLT_MAX); 80 | trial = args.getl("t", 1); 81 | { 82 | beginOfEpsilon = endOfEpsilon = stepOfEpsilon = 0.1; 83 | std::string epsilon = args.getString("e", epsilonDefault.c_str()); 84 | std::vector tokens; 85 | NGT::Common::tokenize(epsilon, tokens, ":"); 86 | if (tokens.size() >= 1) { 87 | beginOfEpsilon = endOfEpsilon = NGT::Common::strtod(tokens[0]); 88 | } 89 | if (tokens.size() >= 2) { 90 | endOfEpsilon = NGT::Common::strtod(tokens[1]); 91 | } 92 | if (tokens.size() >= 3) { 93 | stepOfEpsilon = NGT::Common::strtod(tokens[2]); 94 | } 95 | step = 0; 96 | if (tokens.size() >= 4) { 97 | step = NGT::Common::strtol(tokens[3]); 98 | } 99 | } 100 | accuracy = args.getf("a", 0.0); 101 | #ifdef NGT_REFINEMENT 102 | refinementExpansion = args.getf("R", 0.0); 103 | #endif 104 | #ifdef RESULT_DEFINED_RANGE 105 | expandedSizeByEpsilon = args.getBool("N"); 106 | std::cerr << "expandedSizeByEpsilon=" << (expandedSizeByEpsilon ? "True" : "False") << std::endl; 107 | #endif 108 | } 109 | char openMode; 110 | std::string query; 111 | size_t querySize; 112 | char indexType; 113 | int size; 114 | long edgeSize; 115 | std::string outputMode; 116 | float radius; 117 | float beginOfEpsilon; 118 | float endOfEpsilon; 119 | float stepOfEpsilon; 120 | float accuracy; 121 | size_t step; 122 | size_t trial; 123 | #ifdef NGT_REFINEMENT 124 | float refinementExpansion; 125 | #endif 126 | #ifdef RESULT_DEFINED_RANGE 127 | bool expandedSizeByEpsilon; 128 | #endif 129 | }; 130 | 131 | Command() : debugLevel(0) {} 132 | 133 | void create(Args &args); 134 | void append(Args &args); 135 | static void search(NGT::Index &index, SearchParameters &searchParameters, std::ostream &stream) { 136 | std::ifstream is(searchParameters.query); 137 | if (!is) { 138 | std::stringstream msg; 139 | msg << "Cannot open the specified query file. " << searchParameters.query; 140 | NGTThrowException(msg); 141 | } 142 | search(index, searchParameters, is, stream); 143 | } 144 | static void search(NGT::Index &index, SearchParameters &searchParameters, std::istream &is, 145 | std::ostream &stream); 146 | void search(Args &args); 147 | void remove(Args &args); 148 | void exportIndex(Args &args); 149 | void importIndex(Args &args); 150 | void prune(Args &args); 151 | void reconstructGraph(Args &args); 152 | void optimizeSearchParameters(Args &args); 153 | void optimizeNumberOfEdgesForANNG(Args &args); 154 | void refineANNG(Args &args); 155 | void repair(Args &args); 156 | void exportGraph(Args &args); 157 | void exportObjects(Args &args); 158 | void rebuild(Args &args); 159 | 160 | void info(Args &args); 161 | void setDebugLevel(int level) { debugLevel = level; } 162 | int getDebugLevel() { return debugLevel; } 163 | 164 | protected: 165 | int debugLevel; 166 | }; 167 | 168 | }; // namespace NGT 169 | -------------------------------------------------------------------------------- /lib/NGT/NGTQ/QbgCli.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2021 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #pragma once 18 | 19 | #include "NGT/NGTQ/QuantizedBlobGraph.h" 20 | #include "NGT/Command.h" 21 | 22 | namespace QBG { 23 | 24 | class CLI { 25 | public: 26 | int debugLevel; 27 | 28 | #if !defined(NGTQ_QBG) || defined(NGTQ_SHARED_INVERTED_INDEX) 29 | void create(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 30 | void load(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 31 | void append(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 32 | void insert(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 33 | void remove(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 34 | void expandBlob(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 35 | void buildIndex(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 36 | void hierarchicalKmeans(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 37 | void search(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 38 | void assign(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 39 | void extract(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 40 | void gt(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 41 | void gtRange(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 42 | void optimize(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 43 | void build(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 44 | void rebuild(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 45 | void createQG(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 46 | void buildQG(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 47 | void appendQG(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 48 | void searchQG(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 49 | void info(NGT::Args &args) { std::cerr << "not implemented." << std::endl; }; 50 | #else 51 | void create(NGT::Args &args); 52 | void load(NGT::Args &args); 53 | void append(NGT::Args &args); 54 | void insert(NGT::Args &args); 55 | void remove(NGT::Args &args); 56 | void expandBlob(NGT::Args &args); 57 | void buildIndex(NGT::Args &args); 58 | void hierarchicalKmeans(NGT::Args &args); 59 | void search(NGT::Args &args); 60 | void assign(NGT::Args &args); 61 | void extract(NGT::Args &args); 62 | void gt(NGT::Args &args); 63 | void gtRange(NGT::Args &args); 64 | void optimize(NGT::Args &args); 65 | void build(NGT::Args &args); 66 | void rebuild(NGT::Args &args); 67 | void createQG(NGT::Args &args); 68 | void buildQG(NGT::Args &args); 69 | void appendQG(NGT::Args &args); 70 | void searchQG(NGT::Args &args); 71 | void info(NGT::Args &args); 72 | #endif 73 | 74 | void setDebugLevel(int level) { debugLevel = level; } 75 | int getDebugLevel() { return debugLevel; } 76 | 77 | void help() { 78 | cerr << "Usage : qbg command database [data]" << endl; 79 | cerr << " command : create build quantize search" << endl; 80 | } 81 | 82 | void execute(NGT::Args args) { 83 | string command; 84 | try { 85 | command = args.get("#0"); 86 | } catch (...) { 87 | help(); 88 | return; 89 | } 90 | 91 | debugLevel = args.getl("X", 0); 92 | 93 | if (debugLevel >= 1) { 94 | cerr << "ngt::command=" << command << endl; 95 | } 96 | if (command == "search") { 97 | search(args); 98 | } else if (command == "create") { 99 | create(args); 100 | } else if (command == "load") { 101 | load(args); 102 | } else if (command == "append") { 103 | append(args); 104 | } else if (command == "insert") { 105 | insert(args); 106 | } else if (command == "remove") { 107 | remove(args); 108 | } else if (command == "expand-blob") { 109 | expandBlob(args); 110 | } else if (command == "build-index") { 111 | buildIndex(args); 112 | } else if (command == "kmeans") { 113 | hierarchicalKmeans(args); 114 | } else if (command == "assign") { 115 | assign(args); 116 | } else if (command == "extract") { 117 | extract(args); 118 | } else if (command == "gt") { 119 | gt(args); 120 | } else if (command == "gt-range") { 121 | gtRange(args); 122 | } else if (command == "optimize") { 123 | optimize(args); 124 | } else if (command == "build") { 125 | build(args); 126 | } else if (command == "rebuild") { 127 | rebuild(args); 128 | } else if (command == "create-qg") { 129 | createQG(args); 130 | } else if (command == "build-qg") { 131 | buildQG(args); 132 | } else if (command == "append-qg") { 133 | appendQG(args); 134 | } else if (command == "search-qg") { 135 | searchQG(args); 136 | } else if (command == "info") { 137 | info(args); 138 | } else if (command == "-h") { 139 | help(); 140 | } else { 141 | help(); 142 | std::stringstream msg; 143 | msg << "qbg: Illegal command. " << command << endl; 144 | NGTThrowException(msg); 145 | } 146 | } 147 | }; 148 | 149 | }; // namespace QBG 150 | -------------------------------------------------------------------------------- /samples/qg-capi/qg-capi.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "NGT/Index.h" 3 | #include "NGT/NGTQ/Capi.h" 4 | int main(int argc, char **argv) { 5 | #if !defined(NGT_SHARED_MEMORY_ALLOCATOR) 6 | std::string indexPath = "qg-index"; 7 | std::string objectFile = "sift-128-euclidean.tsv"; 8 | std::string queryFile = "query.tsv"; 9 | 10 | std::cerr << "run the following commands to prepare data for this sample program." << std::endl; 11 | std::cerr 12 | << " curl -L -O " 13 | "https://github.com/yahoojapan/NGT/raw/main/tests/datasets/ann-benchmarks/sift-128-euclidean.tsv" 14 | << std::endl; 15 | std::cerr << " curl -L -O " 16 | "https://github.com/yahoojapan/NGT/raw/main/tests/datasets/ann-benchmarks/" 17 | "sift-128-euclidean_query.tsv" 18 | << std::endl; 19 | std::cerr << " head -1 sift-128-euclidean_query.tsv > query.tsv" << std::endl; 20 | std::cerr << std::endl; 21 | std::cerr << "index path=" << indexPath << std::endl; 22 | std::cerr << "object file=" << objectFile << std::endl; 23 | std::cerr << "query file=" << queryFile << std::endl; 24 | std::cerr << std::endl; 25 | 26 | { 27 | std::cerr << "remove the existing index. " << indexPath << std::endl; 28 | const std::string com = "rm -rf " + indexPath; 29 | if (system(com.c_str()) == -1) { 30 | std::cerr << "Cannot exec. " << com << std::endl; 31 | } 32 | } 33 | 34 | NGTError err = ngt_create_error_object(); 35 | NGTProperty prop = ngt_create_property(err); 36 | if (prop == NULL) { 37 | std::cerr << ngt_get_error_string(err) << std::endl; 38 | return 1; 39 | } 40 | size_t dimension = 128; 41 | ngt_set_property_dimension(prop, dimension, err); 42 | 43 | std::cerr << "create an empty index..." << std::endl; 44 | NGTIndex index = ngt_create_graph_and_tree(indexPath.c_str(), prop, err); 45 | if (index == NULL) { 46 | std::cerr << ngt_get_error_string(err) << std::endl; 47 | return 1; 48 | } 49 | 50 | std::cerr << "insert objects..." << std::endl; 51 | try { 52 | std::ifstream is(objectFile); 53 | std::string line; 54 | while (getline(is, line)) { 55 | std::vector obj; 56 | std::stringstream linestream(line); 57 | while (!linestream.eof()) { 58 | float value; 59 | linestream >> value; 60 | if (linestream.fail()) { 61 | obj.clear(); 62 | break; 63 | } 64 | obj.push_back(value); 65 | } 66 | if (obj.empty()) { 67 | std::cerr << "An empty line or invalid value: " << line << std::endl; 68 | return 1; 69 | } 70 | if (ngt_insert_index(index, obj.data(), dimension, err) == 0) { 71 | std::cerr << ngt_get_error_string(err) << std::endl; 72 | return 1; 73 | } 74 | } 75 | } catch (NGT::Exception &err) { 76 | std::cerr << "Error " << err.what() << std::endl; 77 | return 1; 78 | } catch (...) { 79 | std::cerr << "Error" << std::endl; 80 | return 1; 81 | } 82 | 83 | std::cerr << "build the index..." << std::endl; 84 | if (ngt_create_index(index, 100, err) == false) { 85 | std::cerr << "Error:" << ngt_get_error_string(err) << std::endl; 86 | return 1; 87 | } 88 | 89 | std::cerr << "save the index..." << std::endl; 90 | if (ngt_save_index(index, indexPath.c_str(), err) == false) { 91 | std::cerr << ngt_get_error_string(err) << std::endl; 92 | return 1; 93 | } 94 | 95 | std::cerr << "close the index..." << std::endl; 96 | ngt_close_index(index); 97 | 98 | NGTQGQuantizationParameters quantizationParameters; 99 | ngtqg_initialize_quantization_parameters(&quantizationParameters); 100 | 101 | std::cerr << "quantize the index..." << std::endl; 102 | if (ngtqg_quantize(indexPath.c_str(), quantizationParameters, err) == false) { 103 | std::cerr << ngt_get_error_string(err) << std::endl; 104 | return 1; 105 | } 106 | 107 | std::cerr << "open the quantized index..." << std::endl; 108 | index = ngtqg_open_index(indexPath.c_str(), err); 109 | if (index == NULL) { 110 | std::cerr << ngt_get_error_string(err) << std::endl; 111 | return 1; 112 | } 113 | 114 | std::ifstream is(queryFile); 115 | if (!is) { 116 | std::cerr << "Cannot open the specified file. " << queryFile << std::endl; 117 | return 1; 118 | } 119 | 120 | std::string line; 121 | float queryVector[dimension]; 122 | if (getline(is, line)) { 123 | std::vector queryObject; 124 | { 125 | std::vector tokens; 126 | NGT::Common::tokenize(line, tokens, " \t"); 127 | if (tokens.size() != dimension) { 128 | std::cerr << "dimension of the query is invalid. dimesion=" << tokens.size() << ":" << dimension 129 | << std::endl; 130 | return 1; 131 | } 132 | for (std::vector::iterator ti = tokens.begin(); ti != tokens.end(); ++ti) { 133 | queryVector[distance(tokens.begin(), ti)] = NGT::Common::strtod(*ti); 134 | } 135 | } 136 | NGTObjectDistances result = ngt_create_empty_results(err); 137 | NGTQGQuery query; 138 | ngtqg_initialize_query(&query); 139 | query.query = queryVector; 140 | query.size = 10; 141 | query.result_expansion = 100; 142 | query.epsilon = 0.1; 143 | std::cerr << "search the index for the specified query..." << std::endl; 144 | ngtqg_search_index(index, query, result, err); 145 | 146 | auto rsize = ngt_get_result_size(result, err); 147 | std::cout << "Rank\tID\tDistance" << std::endl; 148 | for (size_t i = 0; i < rsize; i++) { 149 | NGTObjectDistance object = ngt_get_result(result, i, err); 150 | std::cout << i + 1 << "\t" << object.id << "\t" << object.distance << std::endl; 151 | } 152 | 153 | ngt_destroy_results(result); 154 | } 155 | 156 | std::cerr << "close the quantized index" << std::endl; 157 | ngtqg_close_index(index); 158 | ngt_destroy_error_object(err); 159 | #endif 160 | return 0; 161 | } 162 | -------------------------------------------------------------------------------- /lib/NGT/ArrayFile.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | namespace NGT { 29 | class ObjectSpace; 30 | }; 31 | 32 | template class ArrayFile { 33 | protected: 34 | struct FileHeadStruct { 35 | size_t recordSize; 36 | uint64_t extraData; // reserve 37 | }; 38 | 39 | struct RecordStruct { 40 | bool deleteFlag; 41 | uint64_t extraData; // reserve 42 | }; 43 | 44 | bool _isOpen; 45 | std::fstream _stream; 46 | FileHeadStruct _fileHead; 47 | 48 | bool _readFileHead(); 49 | pthread_mutex_t _mutex; 50 | 51 | public: 52 | ArrayFile(); 53 | ~ArrayFile(); 54 | bool create(const std::string &file, size_t recordSize); 55 | bool open(const std::string &file); 56 | void close(); 57 | size_t insert(TYPE &data, NGT::ObjectSpace *objectSpace = 0); 58 | void put(const size_t id, TYPE &data, NGT::ObjectSpace *objectSpace = 0); 59 | bool get(const size_t id, TYPE &data, NGT::ObjectSpace *objectSpace = 0); 60 | void remove(const size_t id); 61 | bool isOpen() const; 62 | size_t size(); 63 | size_t getRecordSize() { return _fileHead.recordSize; } 64 | }; 65 | 66 | // constructor 67 | template 68 | ArrayFile::ArrayFile() : _isOpen(false), _mutex((pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER) { 69 | if (pthread_mutex_init(&_mutex, NULL) < 0) throw std::runtime_error("pthread init error."); 70 | } 71 | 72 | // destructor 73 | template ArrayFile::~ArrayFile() { 74 | pthread_mutex_destroy(&_mutex); 75 | close(); 76 | } 77 | 78 | template bool ArrayFile::create(const std::string &file, size_t recordSize) { 79 | std::fstream tmpstream; 80 | tmpstream.open(file.c_str()); 81 | if (tmpstream) { 82 | return false; 83 | } 84 | 85 | tmpstream.open(file.c_str(), std::ios::out); 86 | tmpstream.seekp(0, std::ios::beg); 87 | FileHeadStruct fileHead = {recordSize, 0}; 88 | tmpstream.write((char *)(&fileHead), sizeof(FileHeadStruct)); 89 | tmpstream.close(); 90 | 91 | return true; 92 | } 93 | 94 | template bool ArrayFile::open(const std::string &file) { 95 | _stream.open(file.c_str(), std::ios::in | std::ios::out); 96 | if (!_stream) { 97 | _isOpen = false; 98 | return false; 99 | } 100 | _isOpen = true; 101 | 102 | bool ret = _readFileHead(); 103 | return ret; 104 | } 105 | 106 | template void ArrayFile::close() { 107 | _stream.close(); 108 | _isOpen = false; 109 | } 110 | 111 | template size_t ArrayFile::insert(TYPE &data, NGT::ObjectSpace *objectSpace) { 112 | _stream.seekp(sizeof(RecordStruct), std::ios::end); 113 | int64_t write_pos = _stream.tellg(); 114 | for (size_t i = 0; i < _fileHead.recordSize; i++) { 115 | _stream.write("", 1); 116 | } 117 | _stream.seekp(write_pos, std::ios::beg); 118 | data.serialize(_stream, objectSpace); 119 | 120 | int64_t offset_pos = _stream.tellg(); 121 | offset_pos -= sizeof(FileHeadStruct); 122 | size_t id = offset_pos / (sizeof(RecordStruct) + _fileHead.recordSize); 123 | if (offset_pos % (sizeof(RecordStruct) + _fileHead.recordSize) == 0) { 124 | id -= 1; 125 | } 126 | 127 | return id; 128 | } 129 | 130 | template void ArrayFile::put(const size_t id, TYPE &data, NGT::ObjectSpace *objectSpace) { 131 | uint64_t offset_pos = (id * (sizeof(RecordStruct) + _fileHead.recordSize)) + sizeof(FileHeadStruct); 132 | offset_pos += sizeof(RecordStruct); 133 | _stream.seekp(offset_pos, std::ios::beg); 134 | 135 | for (size_t i = 0; i < _fileHead.recordSize; i++) { 136 | _stream.write("", 1); 137 | } 138 | _stream.seekp(offset_pos, std::ios::beg); 139 | data.serialize(_stream, objectSpace); 140 | } 141 | 142 | template bool ArrayFile::get(const size_t id, TYPE &data, NGT::ObjectSpace *objectSpace) { 143 | pthread_mutex_lock(&_mutex); 144 | 145 | if (size() <= id) { 146 | pthread_mutex_unlock(&_mutex); 147 | return false; 148 | } 149 | 150 | uint64_t offset_pos = (id * (sizeof(RecordStruct) + _fileHead.recordSize)) + sizeof(FileHeadStruct); 151 | offset_pos += sizeof(RecordStruct); 152 | _stream.seekg(offset_pos, std::ios::beg); 153 | if (!_stream.fail()) { 154 | data.deserialize(_stream, objectSpace); 155 | } 156 | if (_stream.fail()) { 157 | const int trialCount = 10; 158 | for (int tc = 0; tc < trialCount; tc++) { 159 | _stream.clear(); 160 | _stream.seekg(offset_pos, std::ios::beg); 161 | if (_stream.fail()) { 162 | continue; 163 | } 164 | data.deserialize(_stream, objectSpace); 165 | if (_stream.fail()) { 166 | continue; 167 | } else { 168 | break; 169 | } 170 | } 171 | if (_stream.fail()) { 172 | throw std::runtime_error("ArrayFile::get: Error!"); 173 | } 174 | } 175 | 176 | pthread_mutex_unlock(&_mutex); 177 | return true; 178 | } 179 | 180 | template void ArrayFile::remove(const size_t id) { 181 | uint64_t offset_pos = (id * (sizeof(RecordStruct) + _fileHead.recordSize)) + sizeof(FileHeadStruct); 182 | _stream.seekp(offset_pos, std::ios::beg); 183 | RecordStruct recordHead = {1, 0}; 184 | _stream.write((char *)(&recordHead), sizeof(RecordStruct)); 185 | } 186 | 187 | template bool ArrayFile::isOpen() const { return _isOpen; } 188 | 189 | template size_t ArrayFile::size() { 190 | _stream.seekp(0, std::ios::end); 191 | int64_t offset_pos = _stream.tellg(); 192 | offset_pos -= sizeof(FileHeadStruct); 193 | size_t num = offset_pos / (sizeof(RecordStruct) + _fileHead.recordSize); 194 | 195 | return num; 196 | } 197 | 198 | template bool ArrayFile::_readFileHead() { 199 | _stream.seekp(0, std::ios::beg); 200 | _stream.read((char *)(&_fileHead), sizeof(FileHeadStruct)); 201 | if (_stream.bad()) { 202 | return false; 203 | } 204 | return true; 205 | } 206 | -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | import json 5 | import shutil 6 | import glob 7 | import setuptools 8 | import pybind11 9 | import platform 10 | 11 | static_library_option = '--static-library' 12 | static_library_native_option = '--static-library-native' 13 | static_library_avx2_option = '--static-library-avx2' 14 | included_library_option = '--included-library' 15 | shared_library_without_avx_option = '--shared-library-without-avx' 16 | shared_library_option = '--shared-library' 17 | shared_library_avx2_option = '--shared-library-avx2' 18 | version_file = 'VERSION' 19 | package = 'ngt' 20 | module = 'ngtpy' 21 | 22 | static_library = False 23 | if static_library_option in sys.argv: 24 | print('use the NGT static library') 25 | sys.argv.remove(static_library_option) 26 | static_library = True 27 | 28 | static_library_native = False 29 | if static_library_native_option in sys.argv: 30 | print('use the NGT static library with native') 31 | sys.argv.remove(static_library_native_option) 32 | static_library_native = True 33 | 34 | static_library_avx2 = False 35 | if static_library_avx2_option in sys.argv: 36 | print('use the NGT static library with avx2') 37 | sys.argv.remove(static_library_avx2_option) 38 | static_library_avx2 = True 39 | package = 'ngt_avx2' 40 | module = 'ngtpy_avx2' 41 | 42 | included_library = False 43 | if included_library_option in sys.argv: 44 | print('use the NGT included library') 45 | sys.argv.remove(included_library_option) 46 | included_library = True 47 | 48 | shared_library_without_avx = False 49 | if shared_library_without_avx_option in sys.argv: 50 | print('use the shared library without avx') 51 | sys.argv.remove(shared_library_without_avx_option) 52 | shared_library_without_avx = True 53 | 54 | shared_library = False 55 | if shared_library_option in sys.argv: 56 | print('use the shared library') 57 | sys.argv.remove(shared_library_option) 58 | shared_library = True 59 | 60 | shared_library_avx2 = False 61 | if shared_library_avx2_option in sys.argv: 62 | print('use the shared library with avx2') 63 | sys.argv.remove(shared_library_avx2_option) 64 | shared_library_avx2 = True 65 | package = 'ngt_avx2' 66 | module = 'ngtpy_avx2' 67 | 68 | if sys.version_info.major >= 3: 69 | from setuptools import Extension 70 | 71 | if os.path.isfile('../' + version_file): 72 | shutil.copyfile('../' + version_file, version_file) 73 | 74 | with open(version_file, 'r') as fh: 75 | version = fh.read().rstrip('\n') 76 | 77 | basedir = os.path.abspath(os.path.dirname(__file__)) 78 | 79 | gcc_compiler = True 80 | if platform.system() == 'Darwin': 81 | gcc_compiler = False 82 | if 'CC' in os.environ: 83 | if 'gcc' in os.environ['CC']: 84 | gcc_compiler = True 85 | 86 | if gcc_compiler: 87 | openmplib = 'gomp' 88 | else: 89 | openmplib = 'omp' 90 | 91 | openmp_root = None 92 | if 'OpenMP_ROOT' in os.environ: 93 | openmp_root = os.environ['OpenMP_ROOT'] 94 | 95 | with open('README.md', 'r', encoding='utf-8') as fh: 96 | long_description = fh.read() 97 | 98 | args = { 99 | 'name': package, 100 | 'version': version, 101 | 'author': 'Yahoo! JAPAN research', 102 | 'author_email': 'miwasaki@yahoo-corp.jp', 103 | 'url': 'https://github.com/yahoojapan/NGT', 104 | 'description': 'python NGT', 105 | 'long_description': long_description, 106 | 'long_description_content_type': 'text/markdown', 107 | 'license': 'Apache License Version 2.0', 108 | 'install_requires': ['numpy'] 109 | } 110 | 111 | if sys.version_info.major >= 3: 112 | if static_library or included_library or shared_library_without_avx: 113 | params = { 114 | 'include_dirs': ['/usr/local/include', 115 | pybind11.get_include(True), 116 | pybind11.get_include(False)], 117 | 'extra_compile_args': ['-std=c++11', '-Ofast', 118 | '-march=x86-64' if platform.processor() == 'x86-64' or 119 | platform.processor() == 'x86_64' else '', 120 | '-DNDEBUG'], 121 | 'sources': ['src/ngtpy.cpp'] 122 | } 123 | elif static_library_avx2 or shared_library_avx2: 124 | params = { 125 | 'include_dirs': ['/usr/local/include', 126 | pybind11.get_include(True), 127 | pybind11.get_include(False)], 128 | 'extra_compile_args': ['-std=c++11', '-Ofast', 129 | '-march=haswell' if platform.processor() == 'x86-64' or 130 | platform.processor() == 'x86_64' else '', 131 | '-DNDEBUG'], 132 | 'sources': ['src/ngtpy_avx2.cpp'] 133 | } 134 | else: 135 | params = { 136 | 'include_dirs': ['/usr/local/include', 137 | pybind11.get_include(True), 138 | pybind11.get_include(False)], 139 | 'extra_compile_args': ['-std=c++11', '-Ofast', '-march=native', '-DNDEBUG'], 140 | 'sources': ['src/ngtpy.cpp'] 141 | } 142 | if gcc_compiler: 143 | params['extra_compile_args'].append('-fopenmp') 144 | params['extra_compile_args'].append('-lrt') 145 | else: 146 | params['extra_compile_args'].append('-Xpreprocessor') 147 | params['extra_compile_args'].append('-fopenmp') 148 | 149 | shared_lib_params = { 150 | 'library_dirs': ['/usr/local/lib', '/usr/local/lib64'], 151 | 'libraries': ['ngt', openmplib, 'blas', 'lapack'] 152 | } 153 | included_lib_params = { 154 | 'library_dirs': ['/usr/local/lib', '/usr/local/lib64'], 155 | 'libraries': ['ngt', openmplib, 'blas', 'lapack'], 156 | 'extra_link_args': ['-static-libstdc++'] 157 | } 158 | static_lib_params = { 159 | 'library_dirs': ['/usr/local/lib', '/usr/local/lib64'], 160 | 'extra_objects': ['../build-ngtpy-release/lib/NGT/libngt.a'], 161 | 'libraries': [openmplib, 'blas', 'lapack'], 162 | } 163 | if static_library or static_library_native or static_library_avx2: 164 | params.update(static_lib_params) 165 | elif included_library: 166 | params.update(included_lib_params) 167 | else: 168 | params.update(shared_lib_params) 169 | 170 | if openmp_root: 171 | params['include_dirs'].append(openmp_root + '/include') 172 | params['library_dirs'].append(openmp_root + '/lib') 173 | 174 | module1 = Extension(module, **params) 175 | args['ext_modules'] = [module1] 176 | 177 | setup_arguments = args 178 | 179 | if os.path.isdir('scripts'): 180 | setup_arguments['scripts'] = [ 181 | os.path.join('scripts', f) for f in os.listdir('scripts') 182 | ] 183 | 184 | if __name__ == '__main__': 185 | setuptools.setup(**setup_arguments) 186 | -------------------------------------------------------------------------------- /lib/NGT/SharedMemoryAllocator.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #pragma once 18 | 19 | #include "NGT/defines.h" 20 | #include "NGT/MmapManager.h" 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #define MMAP_MANAGER 32 | 33 | 34 | 35 | /////////////////////////////////////////////////////////////////////// 36 | class SharedMemoryAllocator { 37 | public: 38 | enum GetMemorySizeType { GetTotalMemorySize = 0, GetAllocatedMemorySize = 1, GetFreedMemorySize = 2 }; 39 | 40 | SharedMemoryAllocator() : isValid(false) { 41 | #ifdef SMA_TRACE 42 | std::cerr << "SharedMemoryAllocatorSiglton::constructor" << std::endl; 43 | #endif 44 | } 45 | SharedMemoryAllocator(const SharedMemoryAllocator &a) {} 46 | SharedMemoryAllocator &operator=(const SharedMemoryAllocator &a) { return *this; } 47 | 48 | public: 49 | void *allocate(size_t size) { 50 | if (isValid == false) { 51 | std::cerr << "SharedMemoryAllocator::allocate: Fatal error! " << std::endl; 52 | assert(isValid); 53 | } 54 | #ifdef SMA_TRACE 55 | std::cerr << "SharedMemoryAllocator::allocate: size=" << size << std::endl; 56 | std::cerr << "SharedMemoryAllocator::allocate: before " << getTotalSize() << ":" << getAllocatedSize() 57 | << ":" << getFreedSize() << std::endl; 58 | #endif 59 | #if defined(MMAP_MANAGER) && !defined(NOT_USE_MMAP_ALLOCATOR) 60 | if (!isValid) { 61 | return NULL; 62 | } 63 | off_t file_offset = mmanager->alloc(size, true); 64 | if (file_offset == -1) { 65 | std::cerr << "Fatal Error: Allocating memory size is too big for this settings." << std::endl; 66 | std::cerr << " Max allocation size should be enlarged." << std::endl; 67 | abort(); 68 | } 69 | void *p = mmanager->getAbsAddr(file_offset); 70 | std::memset(p, 0, size); 71 | #ifdef SMA_TRACE 72 | std::cerr << "SharedMemoryAllocator::allocate: end" << std::endl; 73 | #endif 74 | return p; 75 | #else 76 | void *ptr = std::malloc(size); 77 | std::memset(ptr, 0, size); 78 | return ptr; 79 | #endif 80 | } 81 | void free(void *ptr) { 82 | #ifdef SMA_TRACE 83 | std::cerr << "SharedMemoryAllocator::free: ptr=" << ptr << std::endl; 84 | #endif 85 | if (ptr == 0) { 86 | std::cerr << "SharedMemoryAllocator::free: ptr is invalid! ptr=" << ptr << std::endl; 87 | } 88 | if (ptr == 0) { 89 | return; 90 | } 91 | #if defined(MMAP_MANAGER) && !defined(NOT_USE_MMAP_ALLOCATOR) 92 | off_t file_offset = mmanager->getRelAddr(ptr); 93 | mmanager->free(file_offset); 94 | #else 95 | std::free(ptr); 96 | #endif 97 | } 98 | 99 | void *construct(const std::string &filePath, size_t memorysize = 0) { 100 | file = filePath; // debug 101 | #ifdef SMA_TRACE 102 | std::cerr << "ObjectSharedMemoryAllocator::construct: file " << filePath << std::endl; 103 | #endif 104 | void *hook = 0; 105 | #ifdef MMAP_MANAGER 106 | mmanager = new MemoryManager::MmapManager(); 107 | // msize is the maximum allocated size (M byte) at once. 108 | size_t msize = memorysize; 109 | if (msize == 0) { 110 | msize = NGT_SHARED_MEMORY_MAX_SIZE; 111 | } 112 | size_t bsize = msize * 1048576 / sysconf(_SC_PAGESIZE) + 1; // 1048576=1M 113 | uint64_t size = bsize * sysconf(_SC_PAGESIZE); 114 | MemoryManager::init_option_st option; 115 | MemoryManager::MmapManager::setDefaultOptionValue(option); 116 | option.use_expand = true; 117 | option.reuse_type = MemoryManager::REUSE_DATA_CLASSIFY; 118 | bool create = true; 119 | if (!mmanager->init(filePath, size, &option)) { 120 | #ifdef SMA_TRACE 121 | std::cerr << "SMA: info. already existed." << std::endl; 122 | #endif 123 | create = false; 124 | } else { 125 | #ifdef SMA_TRACE 126 | std::cerr << "SMA::construct: msize=" << msize << ":" << memorysize << std::endl; 127 | #endif 128 | } 129 | if (!mmanager->openMemory(filePath)) { 130 | std::cerr << "SMA: open error" << std::endl; 131 | return 0; 132 | } 133 | if (!create) { 134 | #ifdef SMA_TRACE 135 | std::cerr << "SMA: get hook to initialize data structure" << std::endl; 136 | #endif 137 | hook = mmanager->getEntryHook(); 138 | assert(hook != 0); 139 | } 140 | #endif 141 | isValid = true; 142 | #ifdef SMA_TRACE 143 | std::cerr << "SharedMemoryAllocator::construct: " << filePath << " total=" << getTotalSize() 144 | << " allocated=" << getAllocatedSize() << " freed=" << getFreedSize() << " (" 145 | << (double)getFreedSize() / (double)getTotalSize() << ") " << std::endl; 146 | #endif 147 | return hook; 148 | } 149 | void destruct() { 150 | if (!isValid) { 151 | return; 152 | } 153 | isValid = false; 154 | #ifdef MMAP_MANAGER 155 | mmanager->closeMemory(); 156 | delete mmanager; 157 | #endif 158 | }; 159 | void setEntry(void *entry) { 160 | #ifdef MMAP_MANAGER 161 | mmanager->setEntryHook(entry); 162 | #endif 163 | } 164 | void *getAddr(off_t oft) { 165 | if (oft == 0) { 166 | return 0; 167 | } 168 | assert(oft > 0); 169 | #if defined(MMAP_MANAGER) && !defined(NOT_USE_MMAP_ALLOCATOR) 170 | return mmanager->getAbsAddr(oft); 171 | #else 172 | return (void *)oft; 173 | #endif 174 | } 175 | off_t getOffset(void *adr) { 176 | if (adr == 0) { 177 | return 0; 178 | } 179 | #if defined(MMAP_MANAGER) && !defined(NOT_USE_MMAP_ALLOCATOR) 180 | return mmanager->getRelAddr(adr); 181 | #else 182 | return (off_t)adr; 183 | #endif 184 | } 185 | size_t getMemorySize(GetMemorySizeType t) { 186 | switch (t) { 187 | case GetTotalMemorySize: return getTotalSize(); 188 | case GetAllocatedMemorySize: return getAllocatedSize(); 189 | case GetFreedMemorySize: return getFreedSize(); 190 | } 191 | return getTotalSize(); 192 | } 193 | size_t getTotalSize() { return mmanager->getTotalSize(); } 194 | size_t getAllocatedSize() { return mmanager->getUseSize(); } 195 | size_t getFreedSize() { return mmanager->getFreeSize(); } 196 | 197 | bool isValid; 198 | std::string file; 199 | #ifdef MMAP_MANAGER 200 | MemoryManager::MmapManager *mmanager; 201 | #endif 202 | }; 203 | 204 | ///////////////////////////////////////////////////////////////////////// 205 | 206 | void *operator new(size_t size, SharedMemoryAllocator &allocator); 207 | void *operator new[](size_t size, SharedMemoryAllocator &allocator); 208 | -------------------------------------------------------------------------------- /python/README-ngtpy-jp.md: -------------------------------------------------------------------------------- 1 | ngtpy 2 | ===== 3 | 4 | NGT python 5 | 6 | Class Index 7 | =========== 8 | 9 | ## Member Functions 10 | 11 | ### \_\_init\_\_ 12 | 指定されたインデックスをオープンし、そのインデックスのオブジェクトを生成します。 13 | 14 | __init__(self: ngtpy.Index, path: str, read_only: bool=False, zero_based_numbering: bool=True, log_disabled: bool=False) 15 | 16 | **Returns** 17 | なし 18 | 19 | **path** 20 | オープンするインデックスのパスを指定します。 21 | 22 | **read_only** 23 | 書込み不可でインデックスをオープンします。Falseは書込み可を意味します。 24 | 25 | **zero_based_numbering** 26 | オブジェクトIDが0から始まることを指定します。Falseは1から始まることを意味します。 27 | 28 | **log_disabled** 29 | 処理の進捗に関する標準エラーのメッセージを無効にします。 30 | 31 | ### close 32 | インデックスをクローズします。 33 | 34 | close(self: ngtpy.Index) 35 | 36 | **Returns** 37 | なし 38 | 39 | ### insert 40 | 指定されたオブジェクトをインデックスに登録します。そのオブジェクトのインデックスは生成されません。この関数でオブジェクトを登録後に以下の関数build_indexを呼び出してインデックスを生成しなければなりません。 41 | 42 | int insert(self: ngtpy.Index, object: numpy.ndarray[float64]) 43 | 44 | **Returns** 45 | 登録したオブジェクトのID 46 | 47 | **object** 48 | 登録するオブジェクトを指定します。 49 | 50 | ### build_index 51 | 関数insertで登録されたオブジェクトのインデックスを生成します。 52 | 53 | build_index(self: ngtpy.Index, num_threads: int=8) 54 | 55 | **Returns** 56 | なし 57 | 58 | **num_thread** 59 | インデックスを生成する時に利用するスレッド数を指定します。 60 | 61 | ### batch_insert 62 | 指定した複数のオブジェクトを登録し、そのオブジェクトのインデックスを生成します。この関数はngtコマンドの"ngt append"を実行するのとほぼ同じです。この関数を呼び出す代わりにngtコマンドを使っても構いません。 63 | 64 | batch_insert(self: ngtpy.Index, objects: numpy.ndarray[float64], num_threads: int=8) 65 | 66 | **Returns** 67 | なし 68 | 69 | **objects** 70 | 登録する複数のオブジェクトを指定します。 71 | 72 | **num_thread** 73 | インデックスを生成する時に利用するスレッド数を指定します。 74 | 75 | 76 | ### remove 77 | 指定されたオブジェクトを削除します。 78 | 79 | remove(self: ngtpy.Index, object_id: int) 80 | 81 | **Returns** 82 | なし 83 | 84 | **object_id** 85 | 削除するオブジェクトのIDを指定します。 86 | 87 | ### save 88 | インデックスを保存します。 89 | 90 | save(self: ngtpy.Index) 91 | 92 | **Returns** 93 | なし 94 | 95 | ### get_object 96 | 指定されたオブジェクトを取得します。 97 | 98 | List[float] get_object(self: ngtpy.Index, object_id: int) 99 | 100 | **Returns** 101 | 指定されたオブジェクト 102 | 103 | ### get_num_of_objects 104 | 登録済みのオブジェクト数を返します。 105 | 106 | int get_num_of_objects() 107 | 108 | **Returns** 109 | 登録済みのオブジェクト数 110 | 111 | ### search 112 | 指定されたクエリオブジェクトに対する近傍のオブジェクトを検索します。 113 | 114 | object search(self: ngtpy.Index, query: object, size: int, epsilon: float, edge_size: int, with_distance: bool=True) 115 | 116 | **Returns** 117 | 検索結果としてタプル(ID、距離)のリスト 118 | 119 | **query** 120 | クエリオブジェクトを指定します。 121 | 122 | **size** 123 | 検索結果として返るオブジェクトの数を指定します。 124 | 125 | **epsilon** 126 | グラフの探索範囲を決定する変数イプシロンを指定します。 127 | 128 | **edge_size** 129 | グラフを探索するのに利用する各ノードのエッジ数を指定します。 130 | 131 | **with_distance** 132 | 距離付きオブジェクトIDの検索結果を返すことを指定します。Falseは結果がオブジェクトIDののみのリストとなることを意味します。 133 | 134 | ### set 135 | 検索パラメータのデフォルト値を指定します。 136 | 137 | set(self: ngtpy.Index, num_of_search_objects: int, epsilon: float, search_radius: float) 138 | 139 | **Returns** 140 | なし。 141 | 142 | **num_of_search_objects** 143 | 検索結果数を指定します。初期デフォルト値は20です。 144 | 145 | **epsilon** 146 | グラフの探索範囲を決定する変数イプシロンを指定します。初期デフォルト値は0.1です。 147 | 148 | **search_radius** 149 | 検索範囲を指定します。初期デフォルト値は無限です。 150 | 151 | ### export_index 152 | インデックスをエクスポートします。 153 | 154 | export_index(self: ngtpy.Index, path: str) 155 | 156 | **Returns** 157 | なし。 158 | 159 | **path** 160 | エクスポートで保存されるパスを指定します。 161 | 162 | ### import_index 163 | インデックスをインポートします。 164 | 165 | import_index(self: ngtpy.Index, path: str) 166 | 167 | **Returns** 168 | なし。 169 | 170 | **path** 171 | インポートするパスを指定します。 172 | 173 | FUNCTIONS 174 | ========= 175 | 176 | ### create 177 | 空のインデックスを生成します。この関数はngtコマンドの"ngt create"を実行するのとほぼ同じです。この関数を呼び出す代わりにngtコマンドを使っても構いません。 178 | 179 | create(path: str, dimension: int, edge_size_for_creation: int=10, edge_size_for_search: int=40, distance_type: str='L2', object_type: str='Float') 180 | 181 | **Returns** 182 | なし 183 | 184 | **path** 185 | インデックスのパスを指定します。 186 | 187 | **dimension** 188 | 登録するオブジェクトの次元数を指定します。 189 | 190 | **edge_size_for_creation** 191 | 各ノードの初期エッジ数を指定します。 192 | 193 | **edge_size_for_search** 194 | 検索時にグラフを探索するためのノードのエッジ数を指定します。 195 | 196 | **distance_type** 197 | オブジェクトの距離関数を指定します。 198 | - __L1__: L1 距離 199 | - __L2__: L2 距離(デフォルト) 200 | - __Angle__: 角度距離 201 | - __Normalized Angle__: 正規化角度距離。指定されたデータは自動的に正規化された上でインデックスに登録されます。 202 | - __Cosine__: コサイン類似度 203 | - __Normalized Cosine__: 正規化コサイン類似度。指定されたデータは自動的に正規化された上でインデックスに登録されます。 204 | - __Hamming__: ハミング距離 205 | - __Jaccard__: ジャッカード距離 206 | - __Inner Product__: 内積 207 | 208 | **object_type** 209 | オブジェクトのデータタイプを指定します。 210 | - __Float__: 4 バイト浮動小数点 211 | - __Float16__: 2 バイト浮動小数点 212 | - __Byte__: 1 バイト符号なし整数 213 | 214 | Class Optimizer 215 | =============== 216 | 217 | ### \_\_init\_\_ 218 | 指定されたパラメータを設定したoptimizerオブジェクトを生成します。 219 | 220 | __init__(self: ngtpy.Optimizer, num_of_outgoings: int=10, num_of_incomings: int=120, log_disabled: bool=False) 221 | 222 | **Returns** 223 | なし 224 | 225 | **num_of_outgoings** 226 | 入力のグラフから再構築のグラフへ加える各ノードの出力エッジ数を指定します。指定された値は再構築グラフの出次数の下限値を意味します。 227 | 228 | **num_of_incomings** 229 | 入力のグラフから再構築のグラフへ加える各ノードの入力エッジ数を指定します。*num_of_outgoings*とは異り、エッジの方向を逆転した後、再構築のグラフに加ます。この値は再構築グラフの入次数の下限値を意味します。 230 | 231 | **log_disabled** 232 | 処理の進捗に関する標準エラーのメッセージを無効にします。 233 | 234 | ### execute 235 | 事前に指定されたパラメータを用いて指定されたインデックスから新なインデックスを再構築し、検索時の係数を最適化します。この最適化は*adjust_search_coefficients*を呼び出すのと同じです。 236 | 237 | execute(self: ngtpy.Optimizer, in_index_path: str, out_index_path: str) 238 | 239 | 240 | **in_index_path** 241 | 入力のインデックスを指定します。 242 | 243 | 244 | **out_index_path** 245 | 出力のインデックスを指定します。 246 | 247 | ### adjust_search_coefficients 248 | 検索係数を最適化します。 249 | 250 | adjust_search_coefficients(self: ngtpy.Optimizer, index_path: str) 251 | 252 | **index_path** 253 | 最適化するインデックスを指定します。 254 | 255 | Class QuantizedIndex 256 | =========== 257 | 258 | ## Member Functions 259 | 260 | ### \_\_init\_\_ 261 | 指定された量子化インデックスをオープンし、そのインデックスのオブジェクトを生成します。 262 | 263 | __init__(self: ngtpy.QuantizedIndex, path: str, zero_based_numbering: bool=True, log_disabled: bool=False) 264 | 265 | **Returns** 266 | なし 267 | 268 | **path** 269 | オープンするインデックスのパスを指定します。量子化インデックスは`qbg create-qg`と`qbg build-qg`で事前にONNGやANNGからビルドしてください。pythonでの量子化する関数はまだ利用できません。 270 | 271 | **zero_based_numbering** 272 | オブジェクトIDが0から始まることを指定します。Falseは1から始まることを意味します。 273 | 274 | **log_disabled** 275 | 処理の進捗に関する標準エラーのメッセージを無効にします。 276 | 277 | ### search 278 | 指定されたクエリオブジェクトに対する近傍のオブジェクトを検索します。 279 | 280 | object search(self: ngtpy.QuantizedIndex, query: object, size: int=20, epsilon: float, result_expansion: float) 281 | 282 | **Returns** 283 | 検索結果としてタプル(ID、距離)のリスト 284 | 285 | **query** 286 | クエリオブジェクトを指定します。 287 | 288 | **size** 289 | 検索結果として返るオブジェクトの数を指定します。 290 | 291 | **epsilon** 292 | グラフの探索範囲を決定する変数イプシロンを指定します。 293 | 294 | **result_expansion** 295 | 検索結果数に対する内部の近似検索結果数の拡張割合を指定します。例えば、この割合が10で検索結果数が20の場合には、検索処理中の近似検索数は200に設定されます。大きな値ほど精度は高くなりますが、検索に時間がかかります。 296 | 297 | ### set 298 | 検索パラメータのデフォルト値を指定します。 299 | 300 | set(self: ngtpy.QuantizedIndex, num_of_search_objects: int, epsilon: float, result_expansion: float) 301 | 302 | **Returns** 303 | なし。 304 | 305 | **num_of_search_objects** 306 | 検索結果数を指定します。初期デフォルト値は20です。 307 | 308 | **epsilon** 309 | グラフの探索範囲を決定する変数イプシロンを指定します。初期デフォルト値は0.02です。 310 | 311 | **result_expansion** 312 | 検索結果数に対する内部の近似検索結果数の拡張割合を指定します。初期デフォルト値は3.0です。 313 | -------------------------------------------------------------------------------- /lib/NGT/NGTQ/Capi.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2021 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #pragma once 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | #include "NGT/Capi.h" 28 | 29 | typedef void *NGTQGIndex; 30 | typedef NGTObjectDistance NGTObjectDistance; 31 | typedef NGTError NGTQGError; 32 | 33 | typedef struct { 34 | float *query; 35 | size_t size; // # of returned objects 36 | float epsilon; 37 | float result_expansion; 38 | float radius; 39 | } NGTQGQuery; 40 | 41 | typedef struct { 42 | size_t size; // # of returned objects 43 | float epsilon; 44 | float result_expansion; 45 | float radius; 46 | } NGTQGQueryParameters; 47 | 48 | typedef struct { 49 | float *query; 50 | NGTQGQueryParameters params; 51 | } NGTQGQueryFloat; 52 | 53 | typedef struct { 54 | uint8_t *query; 55 | NGTQGQueryParameters params; 56 | } NGTQGQueryUint8; 57 | 58 | typedef struct { 59 | NGTFloat16 *query; 60 | NGTQGQueryParameters params; 61 | } NGTQGQueryFloat16; 62 | 63 | typedef struct { 64 | float dimension_of_subvector; 65 | size_t max_number_of_edges; 66 | } NGTQGQuantizationParameters; 67 | 68 | NGTQGIndex ngtqg_open_index(const char *, NGTQGError); 69 | 70 | void ngtqg_close_index(NGTQGIndex); 71 | 72 | void ngtqg_initialize_quantization_parameters(NGTQGQuantizationParameters *); 73 | 74 | bool ngtqg_quantize(const char *, NGTQGQuantizationParameters, NGTQGError); 75 | 76 | void ngtqg_initialize_query_parameters(NGTQGQueryParameters *); 77 | 78 | void ngtqg_initialize_query(NGTQGQuery *); 79 | 80 | bool ngtqg_search_index(NGTQGIndex, NGTQGQuery, NGTObjectDistances, NGTQGError); 81 | 82 | bool ngtqg_search_index_float(NGTQGIndex, NGTQGQueryFloat, NGTObjectDistances, NGTQGError); 83 | 84 | bool ngtqg_search_index_uint8(NGTQGIndex, NGTQGQueryUint8, NGTObjectDistances, NGTQGError); 85 | 86 | bool ngtqg_search_index_float16(NGTQGIndex, NGTQGQueryFloat16, NGTObjectDistances, NGTQGError); 87 | 88 | // QBG CAPI 89 | 90 | typedef void *QBGIndex; 91 | typedef NGTError QBGError; 92 | typedef NGTObjectDistances QBGObjectDistances; 93 | 94 | uint32_t qbg_get_result_size(QBGObjectDistances results, NGTError error); 95 | 96 | NGTObjectDistance qbg_get_result(const QBGObjectDistances results, const uint32_t idx, NGTError error); 97 | 98 | void qbg_destroy_results(QBGObjectDistances results); 99 | 100 | typedef struct { 101 | size_t extended_dimension; 102 | size_t dimension; 103 | size_t number_of_subvectors; 104 | size_t number_of_blobs; 105 | int internal_data_type; 106 | int data_type; 107 | int distance_type; 108 | } QBGConstructionParameters; 109 | 110 | typedef struct { 111 | // hierarchical kmeans 112 | int hierarchical_clustering_init_mode; 113 | size_t number_of_first_objects; 114 | size_t number_of_first_clusters; 115 | size_t number_of_second_objects; 116 | size_t number_of_second_clusters; 117 | size_t number_of_third_clusters; 118 | // optimization 119 | size_t number_of_objects; 120 | size_t number_of_subvectors; 121 | int optimization_clustering_init_mode; 122 | size_t rotation_iteration; 123 | size_t subvector_iteration; 124 | size_t number_of_matrices; 125 | bool rotation; 126 | bool repositioning; 127 | } QBGBuildParameters; 128 | 129 | typedef struct { 130 | size_t number_of_results; 131 | float epsilon; 132 | float blob_epsilon; 133 | float result_expansion; 134 | size_t number_of_explored_blobs; 135 | size_t number_of_edges; 136 | float radius; 137 | } QBGQueryParameters; 138 | 139 | typedef struct { 140 | float *query; 141 | size_t number_of_results; 142 | float epsilon; 143 | float blob_epsilon; 144 | float result_expansion; 145 | size_t number_of_explored_blobs; 146 | size_t number_of_edges; 147 | float radius; 148 | } QBGQuery; 149 | 150 | typedef struct { 151 | float *query; 152 | QBGQueryParameters params; 153 | } QBGQueryFloat; 154 | 155 | typedef struct { 156 | uint8_t *query; 157 | QBGQueryParameters params; 158 | } QBGQueryUint8; 159 | 160 | typedef struct { 161 | NGTFloat16 *query; 162 | QBGQueryParameters params; 163 | } QBGQueryFloat16; 164 | 165 | void qbg_initialize_construction_parameters(QBGConstructionParameters *parameters); 166 | 167 | bool qbg_create(const char *indexPath, QBGConstructionParameters *parameters, QBGError error); 168 | 169 | QBGIndex qbg_open_index(const char *index_path, bool prebuilt, QBGError error); 170 | 171 | void qbg_close_index(QBGIndex index); 172 | 173 | bool qbg_save_index(QBGIndex index, QBGError error); 174 | 175 | ObjectID qbg_append_object(QBGIndex index, float *object, uint32_t obj_dim, QBGError error); 176 | 177 | ObjectID qbg_append_object_as_uint8(QBGIndex index, uint8_t *object, uint32_t obj_dim, QBGError error); 178 | 179 | ObjectID qbg_append_object_as_float16(QBGIndex index, NGTFloat16 *object, uint32_t obj_dim, QBGError error); 180 | 181 | ObjectID qbg_insert_object(QBGIndex index, float *object, uint32_t obj_dim, QBGError error); 182 | 183 | ObjectID qbg_insert_object_as_uint8(QBGIndex index, uint8_t *object, uint32_t obj_dim, QBGError error); 184 | 185 | ObjectID qbg_insert_object_as_float16(QBGIndex index, NGTFloat16 *object, uint32_t obj_dim, QBGError error); 186 | 187 | uint32_t qbg_insert_objects(QBGIndex index, float *objects, uint32_t n_of_objects, ObjectID *ids, 188 | NGTError error); 189 | 190 | uint32_t qbg_insert_objects_as_uint8(QBGIndex index, uint8_t *objects, uint32_t n_of_objects, ObjectID *ids, 191 | NGTError error); 192 | 193 | uint32_t qbg_insert_objects_as_float16(QBGIndex index, NGTFloat16 *objects, uint32_t n_of_objects, 194 | ObjectID *ids, NGTError error); 195 | 196 | bool qbg_remove_object(QBGIndex index, ObjectID id, QBGError error); 197 | 198 | bool qbg_remove_objects(QBGIndex index, ObjectID *ids, uint32_t n_of_ids, QBGError error); 199 | 200 | void qbg_initialize_build_parameters(QBGBuildParameters *parameters); 201 | 202 | bool qbg_build_index(const char *index_path, QBGBuildParameters *parameters, QBGError error); 203 | 204 | void qbg_initialize_query_parameters(QBGQueryParameters *parameters); 205 | 206 | void qbg_initialize_query(QBGQuery *parameters); 207 | 208 | bool qbg_search_index(QBGIndex index, QBGQuery query, NGTObjectDistances results, QBGError error); 209 | 210 | bool qbg_search_index_float(QBGIndex index, QBGQueryFloat query, NGTObjectDistances results, QBGError error); 211 | 212 | bool qbg_search_index_uint8(QBGIndex index, QBGQueryUint8 query, NGTObjectDistances results, QBGError error); 213 | 214 | bool qbg_search_index_float16(QBGIndex index, QBGQueryFloat16 query, NGTObjectDistances results, 215 | QBGError error); 216 | 217 | float *qbg_get_object(QBGIndex index, ObjectID id, QBGError error); 218 | 219 | uint8_t *qbg_get_object_as_uint8(QBGIndex index, ObjectID id, QBGError error); 220 | 221 | NGTFloat16 *qbg_get_object_as_float16(QBGIndex index, ObjectID id, QBGError error); 222 | 223 | size_t qbg_get_dimension(QBGIndex index, QBGError error); 224 | 225 | #ifdef __cplusplus 226 | } 227 | #endif 228 | -------------------------------------------------------------------------------- /samples/jaccard-sparse/jaccard-sparse.cpp: -------------------------------------------------------------------------------- 1 | 2 | // sort -R sparse_binary.tsv |head -10 > sparse_binary_query_10.tsv 3 | // ./jaccard-sparse create -d 100 -D J sparse 4 | // ./jaccard-sparse append sparse sparse_binary.tsv 5 | // ./jaccard-sparse search sparse sparse_binary_query_10.tsv 6 | // 7 | 8 | #include "NGT/Command.h" 9 | 10 | using namespace std; 11 | 12 | void help() { 13 | cerr << "Usage : jaccard-sparse command index [data]" << endl; 14 | cerr << " command : info create search append" << endl; 15 | } 16 | 17 | void append(NGT::Args &args) { 18 | const string usage = "Usage: jaccard-sparse append [-p #-of-thread] [-n data-size] " 19 | "index(output) [data.tsv(input)]"; 20 | string database; 21 | try { 22 | database = args.get("#1"); 23 | } catch (...) { 24 | cerr << "jaccard-sparse: Error: DB is not specified." << endl; 25 | cerr << usage << endl; 26 | return; 27 | } 28 | string data; 29 | try { 30 | data = args.get("#2"); 31 | } catch (...) { 32 | cerr << "jaccard-sparse: Warning: No specified object file. Just build an index for the existing objects." 33 | << endl; 34 | } 35 | 36 | int threadSize = args.getl("p", 50); 37 | size_t dataSize = args.getl("n", 0); 38 | 39 | std::istream *is; 40 | std::ifstream *ifs = 0; 41 | 42 | try { 43 | NGT::Index index(database); 44 | if (data == "-") { 45 | is = &std::cin; 46 | } else { 47 | ifs = new std::ifstream; 48 | ifs->std::ifstream::open(data); 49 | if (!(*ifs)) { 50 | cerr << "Cannot open the specified data file. " << data << endl; 51 | return; 52 | } 53 | is = ifs; 54 | } 55 | string line; 56 | size_t count = 0; 57 | while (getline(*is, line)) { 58 | if (dataSize > 0 && count >= dataSize) { 59 | break; 60 | } 61 | count++; 62 | vector object; 63 | stringstream linestream(line); 64 | while (!linestream.eof()) { 65 | uint32_t value; 66 | linestream >> value; 67 | if (linestream.fail()) { 68 | object.clear(); 69 | break; 70 | } 71 | object.push_back(value); 72 | } 73 | if (object.empty()) { 74 | std::cerr << "jaccard-sparse: Empty line or invalid value. " << count << ":" << line << std::endl; 75 | continue; 76 | } 77 | } 78 | if (data != "-") { 79 | delete ifs; 80 | } 81 | index.createIndex(threadSize); 82 | index.saveIndex(database); 83 | } catch (NGT::Exception &err) { 84 | if (data != "-") { 85 | delete ifs; 86 | } 87 | cerr << "jaccard-sparse: Error " << err.what() << endl; 88 | cerr << usage << endl; 89 | } 90 | return; 91 | } 92 | 93 | void search(NGT::Index &index, NGT::Command::SearchParameters &searchParameters, ostream &stream) { 94 | 95 | std::ifstream is(searchParameters.query); 96 | if (!is) { 97 | std::cerr << "Cannot open the specified file. " << searchParameters.query << std::endl; 98 | return; 99 | } 100 | 101 | if (searchParameters.outputMode[0] == 'e') { 102 | stream << "# Beginning of Evaluation" << endl; 103 | } 104 | 105 | string line; 106 | double totalTime = 0; 107 | size_t queryCount = 0; 108 | double epsilon = searchParameters.beginOfEpsilon; 109 | 110 | while (getline(is, line)) { 111 | if (searchParameters.querySize > 0 && queryCount >= searchParameters.querySize) { 112 | break; 113 | } 114 | vector query; 115 | stringstream linestream(line); 116 | while (!linestream.eof()) { 117 | uint32_t value; 118 | linestream >> value; 119 | query.push_back(value); 120 | } 121 | auto sparseQuery = index.makeSparseObject(query); 122 | queryCount++; 123 | NGT::SearchQuery sc(sparseQuery); 124 | NGT::ObjectDistances objects; 125 | sc.setResults(&objects); 126 | sc.setSize(searchParameters.size); 127 | sc.setRadius(searchParameters.radius); 128 | if (searchParameters.accuracy > 0.0) { 129 | sc.setExpectedAccuracy(searchParameters.accuracy); 130 | } else { 131 | sc.setEpsilon(epsilon); 132 | } 133 | sc.setEdgeSize(searchParameters.edgeSize); 134 | NGT::Timer timer; 135 | switch (searchParameters.indexType) { 136 | case 't': 137 | timer.start(); 138 | index.search(sc); 139 | timer.stop(); 140 | break; 141 | case 'g': 142 | timer.start(); 143 | index.searchUsingOnlyGraph(sc); 144 | timer.stop(); 145 | break; 146 | case 's': 147 | timer.start(); 148 | index.linearSearch(sc); 149 | timer.stop(); 150 | break; 151 | } 152 | totalTime += timer.time; 153 | if (searchParameters.outputMode[0] == 'e') { 154 | stream << "# Query No.=" << queryCount << endl; 155 | stream << "# Query=" << line.substr(0, 20) + " ..." << endl; 156 | stream << "# Index Type=" << searchParameters.indexType << endl; 157 | stream << "# Size=" << searchParameters.size << endl; 158 | stream << "# Radius=" << searchParameters.radius << endl; 159 | stream << "# Epsilon=" << epsilon << endl; 160 | stream << "# Query Time (msec)=" << timer.time * 1000.0 << endl; 161 | stream << "# Distance Computation=" << sc.distanceComputationCount << endl; 162 | stream << "# Visit Count=" << sc.visitCount << endl; 163 | } else { 164 | stream << "Query No." << queryCount << endl; 165 | stream << "Rank\tID\tDistance" << endl; 166 | } 167 | for (size_t i = 0; i < objects.size(); i++) { 168 | stream << i + 1 << "\t" << objects[i].id << "\t"; 169 | stream << objects[i].distance << endl; 170 | } 171 | if (searchParameters.outputMode[0] == 'e') { 172 | stream << "# End of Search" << endl; 173 | } else { 174 | stream << "Query Time= " << timer.time << " (sec), " << timer.time * 1000.0 << " (msec)" << endl; 175 | } 176 | if (searchParameters.outputMode[0] == 'e') { 177 | stream << "# End of Query" << endl; 178 | } 179 | } 180 | if (searchParameters.outputMode[0] == 'e') { 181 | stream << "# Average Query Time (msec)=" << totalTime * 1000.0 / (double)queryCount << endl; 182 | stream << "# Number of queries=" << queryCount << endl; 183 | stream << "# End of Evaluation" << endl; 184 | } else { 185 | stream << "Average Query Time= " << totalTime / (double)queryCount << " (sec), " 186 | << totalTime * 1000.0 / (double)queryCount << " (msec), (" << totalTime << "/" << queryCount << ")" 187 | << endl; 188 | } 189 | } 190 | 191 | void search(NGT::Args &args) { 192 | const string usage = 193 | "Usage: ngt search [-i index-type(g|t|s)] [-n result-size] [-e epsilon] [-E edge-size] " 194 | "[-m open-mode(r|w)] [-o output-mode] index(input) query.tsv(input)"; 195 | 196 | string database; 197 | try { 198 | database = args.get("#1"); 199 | } catch (...) { 200 | cerr << "jaccard-sparse: Error: DB is not specified" << endl; 201 | cerr << usage << endl; 202 | return; 203 | } 204 | 205 | NGT::Command::SearchParameters searchParameters(args); 206 | 207 | try { 208 | NGT::Index index(database, searchParameters.openMode == 'r'); 209 | search(index, searchParameters, cout); 210 | } catch (NGT::Exception &err) { 211 | cerr << "jaccard-sparse: Error " << err.what() << endl; 212 | cerr << usage << endl; 213 | } catch (...) { 214 | cerr << "jaccard-sparse: Error" << endl; 215 | cerr << usage << endl; 216 | } 217 | } 218 | 219 | int main(int argc, char **argv) { 220 | 221 | NGT::Args args(argc, argv); 222 | 223 | NGT::Command ngt; 224 | 225 | string command; 226 | try { 227 | command = args.get("#0"); 228 | } catch (...) { 229 | help(); 230 | return 0; 231 | } 232 | 233 | try { 234 | if (command == "create") { 235 | ngt.create(args); 236 | } else if (command == "append") { 237 | append(args); 238 | } else if (command == "search") { 239 | search(args); 240 | } else { 241 | cerr << "jaccard-sparse: Error: Illegal command. " << command << endl; 242 | help(); 243 | } 244 | } catch (NGT::Exception &err) { 245 | cerr << "jaccard-sparse: Error: " << err.what() << endl; 246 | help(); 247 | return 0; 248 | } 249 | return 0; 250 | } 251 | -------------------------------------------------------------------------------- /lib/NGT/Thread.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #pragma once 18 | 19 | #include "NGT/Common.h" 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | 29 | namespace NGT { 30 | void *evaluate_responce(void *); 31 | 32 | class ThreadTerminationException : public Exception { 33 | public: 34 | ThreadTerminationException(const std::string &file, const std::string &function, size_t line, 35 | std::stringstream &m) { 36 | set(file, function, line, m.str()); 37 | } 38 | ThreadTerminationException(const std::string &file, const std::string &function, size_t line, 39 | const std::string &m) { 40 | set(file, function, line, m); 41 | } 42 | }; 43 | 44 | class ThreadInfo; 45 | class ThreadMutex; 46 | 47 | class Thread { 48 | public: 49 | Thread(); 50 | 51 | virtual ~Thread(); 52 | virtual int start(); 53 | 54 | virtual int join(); 55 | 56 | static ThreadMutex *constructThreadMutex(); 57 | static void destructThreadMutex(ThreadMutex *t); 58 | 59 | static void mutexInit(ThreadMutex &m); 60 | 61 | static void lock(ThreadMutex &m); 62 | static void unlock(ThreadMutex &m); 63 | static void signal(ThreadMutex &m); 64 | static void wait(ThreadMutex &m); 65 | static void broadcast(ThreadMutex &m); 66 | 67 | protected: 68 | virtual int run() { return 0; } 69 | 70 | private: 71 | static void *startThread(void *thread) { 72 | if (thread == 0) { 73 | return 0; 74 | } 75 | Thread *p = (Thread *)thread; 76 | p->run(); 77 | return thread; 78 | } 79 | 80 | public: 81 | int threadNo; 82 | bool isTerminate; 83 | 84 | protected: 85 | ThreadInfo *threadInfo; 86 | }; 87 | 88 | template class ThreadPool { 89 | public: 90 | class JobQueue : public std::deque { 91 | public: 92 | JobQueue() { 93 | threadMutex = Thread::constructThreadMutex(); 94 | Thread::mutexInit(*threadMutex); 95 | } 96 | ~JobQueue() { Thread::destructThreadMutex(threadMutex); } 97 | bool isDeficient() { return std::deque::size() <= requestSize; } 98 | bool isEmpty() { return std::deque::size() == 0; } 99 | bool isFull() { return std::deque::size() >= maxSize; } 100 | void setRequestSize(int s) { requestSize = s; } 101 | void setMaxSize(int s) { maxSize = s; } 102 | void lock() { Thread::lock(*threadMutex); } 103 | void unlock() { Thread::unlock(*threadMutex); } 104 | void signal() { Thread::signal(*threadMutex); } 105 | void wait() { Thread::wait(*threadMutex); } 106 | void wait(JobQueue &q) { wait(*q.threadMutex); } 107 | void broadcast() { Thread::broadcast(*threadMutex); } 108 | unsigned int requestSize; 109 | unsigned int maxSize; 110 | ThreadMutex *threadMutex; 111 | }; 112 | class InputJobQueue : public JobQueue { 113 | public: 114 | InputJobQueue() { 115 | isTerminate = false; 116 | underPushing = false; 117 | pushedSize = 0; 118 | } 119 | 120 | void popFront(JOB &d) { 121 | JobQueue::lock(); 122 | while (JobQueue::isEmpty()) { 123 | if (isTerminate) { 124 | JobQueue::unlock(); 125 | NGTThrowSpecificException("Thread::termination", ThreadTerminationException); 126 | } 127 | JobQueue::wait(); 128 | } 129 | d = std::deque::front(); 130 | std::deque::pop_front(); 131 | JobQueue::unlock(); 132 | return; 133 | } 134 | 135 | void popFront(std::deque &d, size_t s) { 136 | JobQueue::lock(); 137 | while (JobQueue::isEmpty()) { 138 | if (isTerminate) { 139 | JobQueue::unlock(); 140 | NGTThrowSpecificException("Thread::termination", ThreadTerminationException); 141 | } 142 | JobQueue::wait(); 143 | } 144 | for (size_t i = 0; i < s; i++) { 145 | d.push_back(std::deque::front()); 146 | std::deque::pop_front(); 147 | if (JobQueue::isEmpty()) { 148 | break; 149 | } 150 | } 151 | JobQueue::unlock(); 152 | return; 153 | } 154 | 155 | void pushBack(JOB &data) { 156 | JobQueue::lock(); 157 | if (!underPushing) { 158 | underPushing = true; 159 | pushedSize = 0; 160 | } 161 | pushedSize++; 162 | std::deque::push_back(data); 163 | JobQueue::unlock(); 164 | JobQueue::signal(); 165 | } 166 | 167 | void pushBackEnd() { underPushing = false; } 168 | 169 | void terminate() { 170 | JobQueue::lock(); 171 | if (underPushing || !JobQueue::isEmpty()) { 172 | JobQueue::unlock(); 173 | NGTThrowException("Thread::teminate:Under pushing!"); 174 | } 175 | isTerminate = true; 176 | JobQueue::unlock(); 177 | JobQueue::broadcast(); 178 | } 179 | 180 | bool isTerminate; 181 | bool underPushing; 182 | size_t pushedSize; 183 | }; 184 | 185 | class OutputJobQueue : public JobQueue { 186 | public: 187 | void waitForFull() { 188 | JobQueue::wait(); 189 | JobQueue::unlock(); 190 | } 191 | 192 | void pushBack(JOB &data) { 193 | JobQueue::lock(); 194 | std::deque::push_back(data); 195 | if (!JobQueue::isFull()) { 196 | JobQueue::unlock(); 197 | return; 198 | } 199 | JobQueue::unlock(); 200 | JobQueue::signal(); 201 | } 202 | }; 203 | 204 | class SharedData { 205 | public: 206 | SharedData() : isAvailable(false) { 207 | inputJobs.requestSize = 5; 208 | inputJobs.maxSize = 50; 209 | } 210 | SHARED_DATA sharedData; 211 | InputJobQueue inputJobs; 212 | OutputJobQueue outputJobs; 213 | bool isAvailable; 214 | }; 215 | 216 | class Thread : public THREAD { 217 | public: 218 | SHARED_DATA &getSharedData() { 219 | if (threadPool->sharedData.isAvailable) { 220 | return threadPool->sharedData.sharedData; 221 | } else { 222 | NGTThrowException("Thread::getSharedData: Shared data is unavailable. No set yet."); 223 | } 224 | } 225 | InputJobQueue &getInputJobQueue() { return threadPool->sharedData.inputJobs; } 226 | OutputJobQueue &getOutputJobQueue() { return threadPool->sharedData.outputJobs; } 227 | ThreadPool *threadPool; 228 | }; 229 | 230 | ThreadPool(int s) { 231 | size = s; 232 | threads = new Thread[s]; 233 | } 234 | 235 | ~ThreadPool() { delete[] threads; } 236 | 237 | void setSharedData(SHARED_DATA d) { 238 | sharedData.sharedData = d; 239 | sharedData.isAvailable = true; 240 | } 241 | 242 | void create() { 243 | for (unsigned int i = 0; i < size; i++) { 244 | threads[i].threadPool = this; 245 | threads[i].threadNo = i; 246 | threads[i].start(); 247 | } 248 | } 249 | 250 | void pushInputQueue(JOB &data) { 251 | if (!sharedData.inputJobs.underPushing) { 252 | sharedData.outputJobs.lock(); 253 | } 254 | sharedData.inputJobs.pushBack(data); 255 | } 256 | 257 | void waitForFinish() { 258 | sharedData.inputJobs.pushBackEnd(); 259 | sharedData.outputJobs.setMaxSize(sharedData.inputJobs.pushedSize); 260 | sharedData.inputJobs.pushedSize = 0; 261 | sharedData.outputJobs.waitForFull(); 262 | } 263 | 264 | void terminate() { 265 | sharedData.inputJobs.terminate(); 266 | for (unsigned int i = 0; i < size; i++) { 267 | threads[i].join(); 268 | } 269 | } 270 | 271 | InputJobQueue &getInputJobQueue() { return sharedData.inputJobs; } 272 | OutputJobQueue &getOutputJobQueue() { return sharedData.outputJobs; } 273 | 274 | SharedData sharedData; // shared data 275 | Thread *threads; // thread set 276 | unsigned int size; // thread size 277 | }; 278 | 279 | } // namespace NGT 280 | -------------------------------------------------------------------------------- /README-jp.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 | Neighborhood Graph and Tree for Indexing High-dimensional Data 6 | 7 | [トップ](/README-jp.md) / [インストール](/README-jp.md#インストール) / [コマンド](/bin/ngt/README-jp.md) / [ライセンス](/README-jp.md#ライセンス) / [関連文献](/README-jp.md#関連文献) / [About Us](http://research-lab.yahoo.co.jp/) / [English](/README.md) 8 | 9 | 大量(数百万から数千万)の高次元ベクトルデータ(数十~数千次元)に対して高速な近似近傍検索を可能とするコマンド及びライブラリを提供します。 10 | 11 | ニュース 12 | ------- 13 | - 2024/04/10 内積が利用可能になりました。(v2.1.0) 14 | - 2022/08/10 QBG(Quantized Blob Graph)およびQG(NGTQGの改良版)が利用可能となりました。ngtqおよびngtqgは[qbg](https://github.com/yahoojapan/NGT/blob/main/bin/qbg/README.md)で置き換えられました。 15 | - 2022/02/04 FP16(半精度浮動小数点)が利用可能になりました。(v1.14.0) 16 | - 2021/03/12 READMEに量子化グラフの結果を追加しました。 17 | - 2021/01/15 [量子化グラフ (NGTQG)](bin/ngtqg/README.md)を実装した NGT v1.13.0 をリリースしました。 18 | - 2019/11/04 [NGT チュートリアル](https://github.com/yahoojapan/NGT/wiki) をリリースしました。 19 | - 2019/06/26 Jaccard距離が利用可能になりました。(v1.7.6) 20 | - 2019/06/10 PyPI NGT パッケージ v1.7.5 が利用可能になりました。 21 | - 2019/01/17 Python NGTはPYPIからpipでインストールが可能になりました。(v1.5.1) 22 | - 2018/12/14 [NGTQ](bin/ngtq/README-jp.md) (NGT with Quantization) が利用可能になりました。(v1.5.0) 23 | - 2018/08/08 [ONNG](README-jp.md#onng)が利用可能になりました。(v1.4.0) 24 | 25 | 手法 26 | --- 27 | このリポジトリは次の手法を提供します。 28 | - NGT: Graph and tree-based method 29 | - QG: Quantized graph-based method 30 | - QBG: Quantized blob graph-based method 31 | 32 | 注:QGおよびQBGはBLASおよびLAPACKライブラリを必要としますので、もし、V1のようにNGT (Graph and tree-based method)のみしか利用しない場合には、QGおよびQBGを[このオプション](#QGおよびQBGの無効化)により無効化できます。 33 | 34 | インストール 35 | ----------- 36 | 37 | ### ダウンロード 38 | 39 | - [Releases](https://github.com/yahoojapan/NGT/releases) 40 | 41 | ### ビルド 42 | 43 | #### Linux (QG QBGの無効化) 44 | 45 | $ unzip NGT-x.x.x.zip 46 | $ cd NGT-x.x.x 47 | $ mkdir build 48 | $ cd build 49 | $ cmake -DNGT_QBG_DISABLED=ON .. 50 | $ make 51 | $ make install 52 | $ ldconfig /usr/local/lib 53 | 54 | #### CentOS 55 | 56 | $ yum install blas-devel lapack-devel 57 | $ unzip NGT-x.x.x.zip 58 | $ cd NGT-x.x.x 59 | $ mkdir build 60 | $ cd build 61 | $ cmake .. 62 | $ make 63 | $ make install 64 | $ ldconfig /usr/local/lib 65 | 66 | #### Ubuntu 67 | 68 | $ apt install libblas-dev liblapack-dev 69 | $ unzip NGT-x.x.x.zip 70 | $ cd NGT-x.x.x 71 | $ mkdir build 72 | $ cd build 73 | $ cmake .. 74 | $ make 75 | $ make install 76 | $ ldconfig /usr/local/lib 77 | 78 | #### macOS 79 | 80 | $ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" 81 | $ brew install cmake 82 | $ brew install libomp 83 | $ unzip NGT-x.x.x.zip 84 | $ cd NGT-x.x.x 85 | $ mkdir build 86 | $ cd build 87 | $ export OpenMP_ROOT=$(brew --prefix)/opt/libomp 88 | $ cmake .. 89 | $ make 90 | $ make install 91 | 92 | ### ビルド済み 93 | 94 | #### macOS 95 | 96 | $ brew install ngt 97 | 98 | NGT (Graph and tree-based method) 99 | ================================= 100 | 101 | 特徴 102 | ---- 103 | - OS:Linux、macOS 104 | - データの追加削除が可能 105 | - [共有メモリ(マップドメモリ)](README-jp.md#共有メモリの利用)のオプションによるNGTではメモリサイズを超えるデータが利用可能 106 | - データ型:1バイト整数、4バイト単精度浮動小数点、2バイト半精度浮動小数点 107 | - 距離関数:L1、L2、コサイン類似度、角度、ハミング、ジャッカード、ポアンカレ、ローレンツ、内積 108 | - 対応言語:[Python](/python/README-jp.md)、[Ruby](https://github.com/ankane/ngt)、[PHP](https://github.com/ankane/ngt-php)、[Rust](https://crates.io/crates/ngt)、[Go](https://github.com/yahoojapan/gongt)、C、C++ 109 | - 分散サーバ:[ngtd](https://github.com/yahoojapan/ngtd), [vald](https://github.com/vdaas/vald) 110 | 111 | ドキュメント 112 | ----------- 113 | 114 | - [NGT チュートリアル](https://github.com/yahoojapan/NGT/wiki) 115 | 116 | 117 | ユーティリティ 118 | ------------- 119 | 120 | - コマンド : [ngt](/bin/ngt/README-jp.md#command) 121 | - サーバ : [ngtd](https://github.com/yahoojapan/ngtd), [vald](https://github.com/vdaas/vald) 122 | 123 | 対応言語 124 | -------- 125 | 126 | - [Python](/python/README-jp.md) 127 | - [Ruby](https://github.com/ankane/ngt) (Thanks Andrew!) 128 | - [PHP](https://github.com/ankane/ngt-php) (Thanks Andrew!) 129 | - [Rust](https://crates.io/crates/ngt) (Thanks Romain!) 130 | - JavaScript/NodeJS : [ngt-tool](https://www.npmjs.com/package/ngt-tool), [spatial-db-ngt](https://www.npmjs.com/package/spatial-db-ngt) (Thanks stonkpunk!) 131 | - [Go](https://github.com/yahoojapan/gongt) 132 | - C 133 | - C++([sample code](samples)) 134 | 135 | ビルドパラメータ 136 | ----------- 137 | 138 | #### 共有メモリの利用 139 | 140 | メモリマップドファイルを用いた共有メモリにインデックスを配置することが可能です。共有メモリを利用することにより複数のプロセスが同一のインデックスを利用する場合にメモリ使用量を抑制することが可能です。さらに、メモリにロードできないような大量のオブジェクトを有するインデックスを扱うことが可能なだけでなく、インデックスをオープンする時間を削減することも可能です。共有メモリを利用するにはビルド時の変更が必要となりますので、cmake実行時に以下のパラメータを追加してください。 141 | 142 | $ cmake -DNGT_SHARED_MEMORY_ALLOCATOR=ON .. 143 | 144 | 注:ロック機能はありませんので、複数プロセスで同一のインデックスを利用する場合には参照のみでご使用ください。 145 | 146 | #### 大規模データの利用 147 | 148 | 約500万以上のオブジェクトをNGTに登録する場合には、検索速度向上のために以下のパラメータを追加してください。 149 | 150 | $ cmake -DNGT_LARGE_DATASET=ON .. 151 | 152 | #### QGおよびQBGの無効化 153 | 154 | QGおよびQBGはBLASおよびLAPACKライブラリを必要とします。もし、これらのライブラリをインストールしたくなく、かつ、QGやQBGを利用しない場合には、QGおよびQBGを無効化できます。 155 | 156 | $ cmake -DNGT_QBG_DISABLED=ON .. 157 | 158 | QG (Quantized graph-based method) 159 | ================================= 160 | 161 | 特徴 162 | ---- 163 | - NGTよりも高性能 164 | - OS:Linux、macOS 165 | - 距離関数:L2、コサイン類似度 166 | - 対応言語:C++, C, Python 167 | 168 | ユーティリティ 169 | ------------- 170 | 171 | - コマンド : [qbg](bin/qbg/README.md) 172 | 173 | 対応言語 174 | -------- 175 | 176 | - C++ 177 | - C 178 | - Python (検索のみ対応) 179 | 180 | ビルドパラメータ 181 | ----------- 182 | 183 | QGでは性能向上のためにベクトル空間の回転や残差ベクトルを無効化することを推奨しています。 184 | 185 | $ cmake -DNGTQG_NO_ROTATION=ON -DNGTQG_ZERO_GLOBAL=ON .. 186 | 187 | QBG (Quantized blob graph-based method) 188 | ======================================= 189 | 190 | 特徴 191 | ---- 192 | - 10億ものオブジェクトの検索が可能 193 | - OS:Linux、macOS 194 | - 距離関数:L2 195 | - 対応言語:C++, C, Python 196 | 197 | ユーティリティ 198 | ------------- 199 | - コマンド : [qbg](bin/qbg/README.md) 200 | 201 | 対応言語 202 | -------- 203 | 204 | - C++ 205 | - C 206 | - Python (検索のみ対応) 207 | 208 | ベンチマーク結果 209 | --------------- 210 | 211 | 以下はAWS c5.4xlargeのインスタンス上で測定したNGT vのベンチマーク([ann benchmarks](https://github.com/erikbern/ann-benchmarks))の結果です。 212 | 213 | #### glove-100-angular 214 | 215 | 216 | #### gist-960-euclidean 217 | 218 | 219 | #### fashion-mnist-784-euclidean 220 | 221 | 222 | #### nytimes-256-angular 223 | 224 | 225 | #### sift-128-euclidean 226 | 227 | 228 | ライセンス 229 | ---------- 230 | 231 | Copyright (C) 2015 Yahoo Japan Corporation 232 | 233 | ヤフー株式会社はApacheライセンスバージョン2.0の下で本ソフトウェアを公開致します。以下のサイトよりライセンスの内容をご確認頂けます。 234 | 235 | http://www.apache.org/licenses/LICENSE-2.0 236 | 237 | 238 | 貢献者ライセンス同意(CLA) 239 | ------------------------- 240 | 241 | 本ソフトウェアへのソースコードのご提供者は[貢献者ライセンス](https://gist.github.com/yahoojapanoss/9bf8afd6ea67f32d29b4082abf220340)に同意して頂きます。 242 | 243 | なお、GitHub (https://github.com/yahoojapan/NGT) へのご提供の場合のみ、個別の同意書面なしに、上記貢献者ライセンスに同意して頂いたと見なしますので、ご注意ください。 244 | 245 | お問い合わせ 246 | ------------ 247 | 248 | [masajiro](https://github.com/masajiro) 249 | 250 | 関連文献 251 | -------- 252 | ##### [ONNG](bin/ngt/README-jp.md#onng) 253 | - Iwasaki, M., Miyazaki, D.: Optimization of Indexing Based on k-Nearest Neighbor Graph for Proximity. arXiv:1810.07355 [cs] (2018). ([pdf](https://arxiv.org/abs/1810.07355)) 254 | 255 | ##### [PANNG](bin/ngt/README-jp.md#panng) 256 | - Iwasaki, M.: Pruned Bi-directed K-nearest Neighbor Graph for Proximity Search. Proc. of SISAP2016 (2016) 20-33. ([pdf](https://link.springer.com/chapter/10.1007/978-3-319-46759-7_2)) 257 | - Sugawara, K., Kobayashi, H. and Iwasaki, M.: On Approximately Searching for Similar Word Embeddings. Proc. of ACL2016 (2016) 2265-2275. ([pdf](https://aclweb.org/anthology/P/P16/P16-1214.pdf)) 258 | 259 | ##### [ANNGT](bin/ngt/README-jp.md#anngt) 260 | - Iwasaki, M.: Applying a Graph-Structured Index to Product Image Search (in Japanese). IIEEJ Journal 42(5) (2013) 633-641. ([pdf](https://s.yimg.jp/i/docs/research_lab/articles/miwasaki-iieej-jnl-2013.pdf)) 261 | - Iwasaki, M.: Proximity search using approximate k nearest neighbor graph with a tree structured index (in Japanese). IPSJ Journal 52(2) (2011) 817-828. ([pdf](https://s.yimg.jp/i/docs/research_lab/articles/miwasaki-ipsj-jnl-2011.pdf)) 262 | 263 | ##### [ANNG](bin/ngt/README-jp.md#anng) 264 | - Iwasaki, M.: Proximity search in metric spaces using approximate k nearest neighbor graph (in Japanese). IPSJ Trans. on Database 3(1) (2010) 18-28. ([pdf](https://s.yimg.jp/i/docs/research_lab/articles/miwasaki-ipsj-tod-2010.pdf)) 265 | 266 | 267 | -------------------------------------------------------------------------------- /python/README-ngtpy.md: -------------------------------------------------------------------------------- 1 | ngtpy 2 | ===== 3 | 4 | NGT python 5 | 6 | Class Index 7 | =========== 8 | 9 | ## Member Functions 10 | 11 | ### \_\_init\_\_ 12 | Open the specified index and create the index object for the index. 13 | 14 | __init__(self: ngtpy.Index, path: str, read_only: bool=False, zero_based_numbering: bool=True, log_disabled: bool=False) 15 | 16 | **Returns** 17 | None. 18 | 19 | **path** 20 | Specify the path of the index to open. 21 | 22 | **read_only** 23 | Open the index as read-only. False means opening as read-write. 24 | 25 | **zero_based_numbering** 26 | Specify zero-based numbering for object IDs. False means one-based numbering. 27 | 28 | **log_disabled** 29 | Disable stderr messages about the progression of an operation. 30 | 31 | ### close 32 | Close the index. 33 | 34 | close(self: ngtpy.Index) 35 | 36 | **Returns** 37 | None. 38 | 39 | ### insert 40 | Insert the specified object into the index. Not build the index for the object. The function build_index below should be called to build the index after inserting objects by using this function. 41 | 42 | int insert(self: ngtpy.Index, object: numpy.ndarray[float64]) 43 | 44 | **Returns** 45 | ID for the inserted object. 46 | 47 | **object** 48 | Specify the inserted object. 49 | 50 | ### build_index 51 | Build the index for the objects that have been inserted by using the function insert. 52 | 53 | build_index(self: ngtpy.Index, num_threads: int=8) 54 | 55 | **Returns** 56 | None. 57 | 58 | **num_thread** 59 | Specify the number of threads to build the index. 60 | 61 | ### batch_insert 62 | Insert the specified objects and builds the index for the objects. This function is almost the same as executing the ngt command "ngt append". You may execute the ngt command instead of calling this function. 63 | 64 | batch_insert(self: ngtpy.Index, objects: numpy.ndarray[float64], num_threads: int=8) 65 | 66 | **Returns** 67 | None. 68 | 69 | **objects** 70 | Specify the inserted objects. 71 | 72 | **num_thread** 73 | Specify the number of threads to build the index. 74 | 75 | 76 | ### remove 77 | Remove the specified object. 78 | 79 | remove(self: ngtpy.Index, object_id: int) 80 | 81 | **Returns** 82 | None. 83 | 84 | **object_id** 85 | Specify the removed object ID. 86 | 87 | ### save 88 | Save the index. 89 | 90 | save(self: ngtpy.Index) 91 | 92 | **Returns** 93 | None. 94 | 95 | ### get_object 96 | Get the specified object. 97 | 98 | List[float] get_object(self: ngtpy.Index, object_id: int) 99 | 100 | **Returns** 101 | The specified object. 102 | 103 | 104 | ### get_num_of_objects 105 | Get the number of the registered objects. 106 | 107 | int get_num_of_objects() 108 | 109 | **Returns** 110 | The number of the registered objects. 111 | 112 | 113 | ### search 114 | Search the nearest objects to the specified query object. 115 | 116 | object search(self: ngtpy.Index, query: object, size: int, epsilon: float, edge_size: int, with_distance: bool=True) 117 | 118 | **Returns** 119 | The list of tuples(object ID, distance) as the search result. 120 | 121 | **query** 122 | Specify the query object. 123 | 124 | **size** 125 | Specify the number of the objects as the search result. 126 | 127 | **epsilon** 128 | Specify epsilon which defines the explored range for the graph. 129 | 130 | **edge_size** 131 | Specify the number of edges for each node to explore the graph. 132 | 133 | **with_distance** 134 | Specify object IDs with distances as the result. False means that the result is a list of only object IDs. 135 | 136 | 137 | ### set 138 | Specify the default search parameters. 139 | 140 | set(self: ngtpy.Index, num_of_search_objects: int, epsilon: float, search_radius: float) 141 | 142 | **Returns** 143 | None. 144 | 145 | **num_of_search_objects** 146 | Specify the number of search objects. The initial default is 20. 147 | 148 | **epsilon** 149 | Specify the epsilon which defines the explored range for the graph. The initial default is 0.1. 150 | 151 | **search_radius** 152 | Specify the search radius. The initial default is infinity. 153 | 154 | ### export_index 155 | Exports the index to a file. 156 | 157 | export_index(self: ngtpy.Index, path: str) 158 | 159 | **Returns** 160 | None. 161 | 162 | **path** 163 | Path to file in which the exported index will be stored. 164 | 165 | ### import_index 166 | Imports the index from a file 167 | 168 | import_index(self: ngtpy.Index, path: str) 169 | 170 | **Returns** 171 | None. 172 | 173 | **path** 174 | Path to file from which to load the index. 175 | 176 | 177 | FUNCTIONS 178 | ========= 179 | 180 | ### create 181 | Create an empty index. This function is almost the same as executing the ngt command "ngt create" with an empty inserted object file. You may execute the ngt command instead of calling this function. 182 | 183 | create(path: str, dimension: int, edge_size_for_creation: int=10, edge_size_for_search: int=40, distance_type: str='L2', object_type: str='Float') 184 | 185 | 186 | **Returns** 187 | None. 188 | 189 | **path** 190 | Specify the path of the index. 191 | 192 | **dimension** 193 | Specify the dimensionality of the inserted object. 194 | 195 | **edge_size_for_creation** 196 | Specify the initial number of edges for each node. 197 | 198 | **edge_size_for_search** 199 | Specify the number of edges for each node to explore the graph for the search processing. 200 | 201 | **distance_type** 202 | Specify the distance function for the objects. 203 | - __L1__: L1 distance 204 | - __L2__: L2 distance (default) 205 | - __Normalized L2__: Normalized L2 distance. The specified data are automatically normalized to be appended to the index. 206 | - __Angle__: Angle distance 207 | - __Normalized Angle__: Normalized angle distance. The specified data are automatically normalized to be appended to the index. 208 | - __Cosine__: Cosine similarity 209 | - __Normalized Cosine__: Normalized cosine similarity. The specified data are automatically normalized to be appended to the index. 210 | - __Hamming__: Hamming distance 211 | - __Jaccard__: Jaccard distance 212 | - __Inner Product__: Inner product (or dot product) 213 | 214 | **object_type** 215 | Specify the data type of the objects. 216 | - __Float__: 4 byte floating point number 217 | - __Float16__: 2 byte floating point number 218 | - __Byte__: 1 byte unsigned integer 219 | 220 | Class Optimizer 221 | =============== 222 | 223 | ### \_\_init\_\_ 224 | Create the optimizer object with the specified parameters. 225 | 226 | __init__(self: ngtpy.Optimizer, num_of_outgoings: int=10, num_of_incomings: int=120, log_disabled: bool=False) 227 | 228 | **Returns** 229 | None. 230 | 231 | **num_of_outgoings** 232 | Specify the number of outgoing edges for each node to add to the reconstructed graph from the input graph. The specified number also means the lower bound of the outdegrees of the reconstructed graph. 233 | 234 | **num_of_incomings** 235 | Specify the number of incoming edges for each node to add to the reconstructed graph from the input graph. Unlike *num_of_outgoings*, after the direction of the edges are reversed, the edges are added to the reconstructed graph. The specified number also means the lower bound of the indegrees of the reconstructed graph. 236 | 237 | **log_disabled** 238 | Disable stderr messages about the progression of an operation. 239 | 240 | ### execute 241 | Reconstruct an index from the specified index with the previously specified parameters, and optimize search coefficients, which is the same as call *adjust_search_coefficients* below. 242 | 243 | 244 | execute(self: ngtpy.Optimizer, in_index_path: str, out_index_path: str) 245 | 246 | 247 | **in_index_path** 248 | Specify the input index path. 249 | 250 | **out_index_path** 251 | Specify the output index path. 252 | 253 | ### adjust_search_coefficients 254 | Optimize search coefficients. 255 | 256 | adjust_search_coefficients(self: ngtpy.Optimizer, index_path: str) 257 | 258 | **index_path** 259 | Specify the index which is optimized. 260 | 261 | Class QuantizedIndex 262 | =========== 263 | 264 | ## Member Functions 265 | 266 | ### \_\_init\_\_ 267 | Open the specified quantized index and create the index object for the index. 268 | 269 | __init__(self: ngtpy.QuantizedIndex, path: str, zero_based_numbering: bool=True, log_disabled: bool=False) 270 | 271 | **Returns** 272 | None. 273 | 274 | **path** 275 | Specify the path of the quantized index to open. The quantized index should be built by using the command `ngtqg quantize` from ONNG or ANNG in advance. The python function for quantization is not available yet. 276 | 277 | **zero_based_numbering** 278 | Specify zero-based numbering for object IDs. False means one-based numbering. 279 | 280 | **log_disabled** 281 | Disable stderr messages about the progression of an operation. 282 | 283 | ### search 284 | Search the nearest objects to the specified query object. 285 | 286 | object search(self: ngtpy.QuantizedIndex, query: object, size: int, epsilon: float, result_expansion: float) 287 | 288 | **Returns** 289 | The list of tuples(object ID, distance) as the search result. 290 | 291 | **query** 292 | Specify the query object. 293 | 294 | **size** 295 | Specify the number of the objects as the search result. 296 | 297 | **epsilon** 298 | Specify epsilon which defines the explored range for the quantized graph. 299 | 300 | **result_expansion** 301 | Specify the expansion ratio of the number of approximate inner search objects to the number of search objects. For example, when the ratio is 10 and the number of search objects is 20, the number of the approximate search objects is set to 200 inside the search processing. A larger value brings higher accuracy but slower searching. 302 | 303 | ### set 304 | Specify the default search parameters. 305 | 306 | set(self: ngtpy.QuantizedIndex, num_of_search_objects: int, search_radius: float, result_expansion: float) 307 | 308 | **Returns** 309 | None. 310 | 311 | **num_of_search_objects** 312 | Specify the number of search objects. The initial default is 20. 313 | 314 | **epsilon** 315 | Specify epsilon which defines the explored range for the graph. The initial default is 0.02. 316 | 317 | **result_expansion** 318 | Specify the expansion ratio of the number of approximate inner search objects to the number of search objects. The initial default is 3.0. 319 | 320 | -------------------------------------------------------------------------------- /lib/NGT/Node.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (C) 2015 Yahoo Japan Corporation 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | #include "NGT/defines.h" 18 | 19 | #include "NGT/Node.h" 20 | #include "NGT/Tree.h" 21 | 22 | #include 23 | 24 | using namespace std; 25 | 26 | const double NGT::Node::Object::Pivot = -1.0; 27 | 28 | using namespace NGT; 29 | 30 | void 31 | #if defined(NGT_SHARED_MEMORY_ALLOCATOR) 32 | InternalNode::updateChild(DVPTree &dvptree, Node::ID src, Node::ID dst, 33 | SharedMemoryAllocator &allocator) { 34 | #else 35 | InternalNode::updateChild(DVPTree &dvptree, Node::ID src, Node::ID dst) { 36 | #endif 37 | int cs = dvptree.internalChildrenSize; 38 | for (int i = 0; i < cs; i++) { 39 | #if defined(NGT_SHARED_MEMORY_ALLOCATOR) 40 | if (getChildren(allocator)[i] == src) { 41 | getChildren(allocator)[i] = dst; 42 | #else 43 | if (getChildren()[i] == src) { 44 | getChildren()[i] = dst; 45 | #endif 46 | return; 47 | } 48 | } 49 | } 50 | 51 | int LeafNode::selectPivotByMaxDistance(Container &c, Node::Objects &fs) { 52 | DVPTree::InsertContainer &iobj = (DVPTree::InsertContainer &)c; 53 | int fsize = fs.size(); 54 | Distance maxd = 0.0; 55 | int maxid = 0; 56 | for (int i = 1; i < fsize; i++) { 57 | Distance d = iobj.vptree->objectSpace->getComparator()(*fs[0].object, *fs[i].object); 58 | if (d >= maxd) { 59 | maxd = d; 60 | maxid = i; 61 | } 62 | } 63 | 64 | int aid = maxid; 65 | maxd = 0.0; 66 | maxid = 0; 67 | for (int i = 0; i < fsize; i++) { 68 | Distance d = iobj.vptree->objectSpace->getComparator()(*fs[aid].object, *fs[i].object); 69 | if (i == aid) { 70 | continue; 71 | } 72 | if (d >= maxd) { 73 | maxd = d; 74 | maxid = i; 75 | } 76 | } 77 | 78 | int bid = maxid; 79 | maxd = 0.0; 80 | maxid = 0; 81 | for (int i = 0; i < fsize; i++) { 82 | Distance d = iobj.vptree->objectSpace->getComparator()(*fs[bid].object, *fs[i].object); 83 | if (i == bid) { 84 | continue; 85 | } 86 | if (d >= maxd) { 87 | maxd = d; 88 | maxid = i; 89 | } 90 | } 91 | return maxid; 92 | } 93 | 94 | int LeafNode::selectPivotByMaxVariance(Container &c, Node::Objects &fs) { 95 | DVPTree::InsertContainer &iobj = (DVPTree::InsertContainer &)c; 96 | 97 | int fsize = fs.size(); 98 | Distance *distance = new Distance[fsize * fsize]; 99 | 100 | for (int i = 0; i < fsize; i++) { 101 | distance[i * fsize + i] = 0; 102 | } 103 | 104 | for (int i = 0; i < fsize; i++) { 105 | for (int j = i + 1; j < fsize; j++) { 106 | Distance d = iobj.vptree->objectSpace->getComparator()(*fs[i].object, *fs[j].object); 107 | distance[i * fsize + j] = d; 108 | distance[j * fsize + i] = d; 109 | } 110 | } 111 | 112 | double *variance = new double[fsize]; 113 | for (int i = 0; i < fsize; i++) { 114 | double avg = 0.0; 115 | for (int j = 0; j < fsize; j++) { 116 | avg += distance[i * fsize + j]; 117 | } 118 | avg /= (double)fsize; 119 | 120 | double v = 0.0; 121 | for (int j = 0; j < fsize; j++) { 122 | v += pow(distance[i * fsize + j] - avg, 2.0); 123 | } 124 | variance[i] = v / (double)fsize; 125 | } 126 | 127 | double maxv = variance[0]; 128 | int maxid = 0; 129 | for (int i = 0; i < fsize; i++) { 130 | if (variance[i] > maxv) { 131 | maxv = variance[i]; 132 | maxid = i; 133 | } 134 | } 135 | delete[] variance; 136 | delete[] distance; 137 | 138 | return maxid; 139 | } 140 | 141 | void LeafNode::splitObjects(Container &c, Objects &fs, int pv) { 142 | DVPTree::InsertContainer &iobj = (DVPTree::InsertContainer &)c; 143 | 144 | // sort the objects by distance 145 | int fsize = fs.size(); 146 | for (int i = 0; i < fsize; i++) { 147 | if (i == pv) { 148 | fs[i].distance = 0; 149 | } else { 150 | Distance d = iobj.vptree->objectSpace->getComparator()(*fs[pv].object, *fs[i].object); 151 | fs[i].distance = d; 152 | } 153 | } 154 | 155 | sort(fs.begin(), fs.end()); 156 | 157 | int childrenSize = iobj.vptree->internalChildrenSize; 158 | int cid = childrenSize - 1; 159 | int cms = (fsize * cid) / childrenSize; 160 | 161 | // divide the objects into child clusters. 162 | fs[fsize - 1].clusterID = cid; 163 | for (int i = fsize - 2; i >= 0; i--) { 164 | if (i < cms && cid > 0) { 165 | if (fs[i].distance != fs[i + 1].distance) { 166 | cid--; 167 | cms = (fsize * cid) / childrenSize; 168 | } 169 | } 170 | fs[i].clusterID = cid; 171 | } 172 | 173 | if (cid != 0) { 174 | // the required number of child nodes could not be acquired 175 | stringstream msg; 176 | msg << "LeafNode::splitObjects: Too many same distances. Reduce internal children size for the tree " 177 | "index or not use the tree index." 178 | << endl; 179 | msg << " internalChildrenSize=" << childrenSize << endl; 180 | msg << " # of the children=" << (childrenSize - cid) << endl; 181 | msg << " Size=" << fsize << endl; 182 | msg << " pivot=" << pv << endl; 183 | msg << " cluster id=" << cid << endl; 184 | msg << " Show distances for debug." << endl; 185 | for (int i = 0; i < fsize; i++) { 186 | msg << " " << fs[i].id << ":" << fs[i].distance << endl; 187 | msg << " "; 188 | PersistentObject &po = *fs[i].object; 189 | iobj.vptree->objectSpace->show(msg, po); 190 | msg << endl; 191 | } 192 | if (fs[fsize - 1].clusterID == cid) { 193 | msg << "LeafNode::splitObjects: All of the object distances are the same!" << endl; 194 | ; 195 | NGTThrowException(msg.str()); 196 | } else { 197 | cerr << msg.str() << endl; 198 | cerr << "LeafNode::splitObjects: Anyway, continue..." << endl; 199 | // sift the cluster IDs to start from 0 to continue. 200 | for (int i = 0; i < fsize; i++) { 201 | fs[i].clusterID -= cid; 202 | } 203 | } 204 | } 205 | 206 | long long *pivots = new long long[childrenSize]; 207 | for (int i = 0; i < childrenSize; i++) { 208 | pivots[i] = -1; 209 | } 210 | 211 | // find the boundaries for the subspaces 212 | for (int i = 0; i < fsize; i++) { 213 | if (pivots[fs[i].clusterID] == -1) { 214 | pivots[fs[i].clusterID] = i; 215 | fs[i].leafDistance = Object::Pivot; 216 | } else { 217 | Distance d = 218 | iobj.vptree->objectSpace->getComparator()(*fs[pivots[fs[i].clusterID]].object, *fs[i].object); 219 | fs[i].leafDistance = d; 220 | } 221 | } 222 | delete[] pivots; 223 | 224 | return; 225 | } 226 | 227 | void 228 | #if defined(NGT_SHARED_MEMORY_ALLOCATOR) 229 | LeafNode::removeObject(size_t id, size_t replaceId, SharedMemoryAllocator &allocator) { 230 | #else 231 | LeafNode::removeObject(size_t id, size_t replaceId) { 232 | #endif 233 | 234 | size_t fsize = getObjectSize(); 235 | size_t idx; 236 | if (replaceId != 0) { 237 | for (idx = 0; idx < fsize; idx++) { 238 | #if defined(NGT_SHARED_MEMORY_ALLOCATOR) 239 | if (getObjectIDs(allocator)[idx].id == replaceId) { 240 | #else 241 | if (getObjectIDs()[idx].id == replaceId) { 242 | #endif 243 | std::cerr << " Warning. found the same ID as the replaced ID. " << id << ":" << replaceId 244 | << std::endl; 245 | std::cerr << " ignore it, if normalized distance." << std::endl; 246 | replaceId = 0; 247 | break; 248 | } 249 | } 250 | } 251 | for (idx = 0; idx < fsize; idx++) { 252 | #if defined(NGT_SHARED_MEMORY_ALLOCATOR) 253 | if (getObjectIDs(allocator)[idx].id == id) { 254 | if (replaceId != 0) { 255 | getObjectIDs(allocator)[idx].id = replaceId; 256 | #else 257 | if (getObjectIDs()[idx].id == id) { 258 | if (replaceId != 0) { 259 | getObjectIDs()[idx].id = replaceId; 260 | #endif 261 | return; 262 | } else { 263 | break; 264 | } 265 | } 266 | } 267 | if (idx == fsize) { 268 | if (pivot == 0) { 269 | NGTThrowException("LeafNode::removeObject: Internal error!. the pivot is illegal."); 270 | } 271 | stringstream msg; 272 | msg << "VpTree::Leaf::remove: Warning. Cannot find the specified object. ID=" << id << "," << replaceId 273 | << " idx=" << idx << " If the same objects were inserted into the index, ignore this message."; 274 | NGTThrowException(msg.str()); 275 | } 276 | 277 | #ifdef NGT_NODE_USE_VECTOR 278 | for (; idx < objectIDs.size() - 1; idx++) { 279 | getObjectIDs()[idx] = getObjectIDs()[idx + 1]; 280 | } 281 | objectIDs.pop_back(); 282 | #else 283 | objectSize--; 284 | for (; idx < objectSize; idx++) { 285 | #if defined(NGT_SHARED_MEMORY_ALLOCATOR) 286 | getObjectIDs(allocator)[idx] = getObjectIDs(allocator)[idx + 1]; 287 | #else 288 | getObjectIDs()[idx] = getObjectIDs()[idx + 1]; 289 | #endif 290 | } 291 | #endif 292 | 293 | return; 294 | } 295 | 296 | #if defined(NGT_SHARED_MEMORY_ALLOCATOR) 297 | bool InternalNode::verify(PersistentRepository &internalNodes, 298 | PersistentRepository &leafNodes, SharedMemoryAllocator &allocator) { 299 | #else 300 | bool InternalNode::verify(Repository &internalNodes, Repository &leafNodes) { 301 | #endif 302 | size_t isize = internalNodes.size(); 303 | size_t lsize = leafNodes.size(); 304 | bool valid = true; 305 | for (size_t i = 0; i < childrenSize; i++) { 306 | #if defined(NGT_SHARED_MEMORY_ALLOCATOR) 307 | size_t nid = getChildren(allocator)[i].getID(); 308 | ID::Type type = getChildren(allocator)[i].getType(); 309 | #else 310 | size_t nid = getChildren()[i].getID(); 311 | ID::Type type = getChildren()[i].getType(); 312 | #endif 313 | size_t size = type == ID::Leaf ? lsize : isize; 314 | if (nid >= size) { 315 | cerr << "Error! Internal children node id is too big." << nid << ":" << size << endl; 316 | valid = false; 317 | } 318 | try { 319 | if (type == ID::Leaf) { 320 | leafNodes.get(nid); 321 | } else { 322 | internalNodes.get(nid); 323 | } 324 | } catch (...) { 325 | cerr << "Error! Cannot get the node. " << ((type == ID::Leaf) ? "Leaf" : "Internal") << endl; 326 | valid = false; 327 | } 328 | } 329 | return valid; 330 | } 331 | 332 | #if defined(NGT_SHARED_MEMORY_ALLOCATOR) 333 | bool LeafNode::verify(size_t nobjs, vector &status, SharedMemoryAllocator &allocator) { 334 | #else 335 | bool LeafNode::verify(size_t nobjs, vector &status) { 336 | #endif 337 | bool valid = true; 338 | for (size_t i = 0; i < objectSize; i++) { 339 | #if defined(NGT_SHARED_MEMORY_ALLOCATOR) 340 | size_t nid = getObjectIDs(allocator)[i].id; 341 | #else 342 | size_t nid = getObjectIDs()[i].id; 343 | #endif 344 | if (nid > nobjs) { 345 | cerr << "Error! Object id is too big. " << nid << ":" << nobjs << endl; 346 | valid = false; 347 | continue; 348 | } 349 | status[nid] |= 0x04; 350 | } 351 | return valid; 352 | } 353 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 | Neighborhood Graph and Tree for Indexing High-dimensional Data 6 | 7 | [Home](/README.md) / [Installation](/README.md#Installation) / [Command](/bin/ngt/README.md#command) / [License](/README.md#license) / [Publications](/README.md#publications) / [About Us](http://research-lab.yahoo.co.jp/en/) / [日本語](/README-jp.md) 8 | 9 | **NGT** provides commands and a library for performing high-speed approximate nearest neighbor searches against a large volume of data in high dimensional vector data space (several ten to several thousand dimensions). 10 | 11 | News 12 | ---- 13 | - 04/10/2024 Inner product (or dot product) is now available. (v2.1.0) 14 | - 08/10/2022 [QBG](https://github.com/yahoojapan/NGT#qbg-quantized-blob-graph-based-method) (Quantized Blob Graph) and [QG](https://github.com/yahoojapan/NGT#qg-quantized-graph-based-method) (renewed NGTQG) are now available. The command-line interface ngtq and ngtqg are now obsolete by replacing [qbg](bin/qbg/README.md). (v2.0.0) 15 | - 02/04/2022 FP16 (half-precision floating point) is now available. (v1.14.0) 16 | - 03/12/2021 The results for the quantized graph are added to this README. 17 | - 01/15/2021 NGT v1.13.0 to provide the [quantized graph (NGTQG)](bin/ngtqg/README.md) is released. 18 | - 11/04/2019 [NGT tutorial](https://github.com/yahoojapan/NGT/wiki) has been released. 19 | - 06/26/2019 Jaccard distance is available. (v1.7.6) 20 | - 06/10/2019 PyPI NGT package v1.7.5 is now available. 21 | - 01/17/2019 Python NGT can be installed via pip from PyPI. (v1.5.1) 22 | - 12/14/2018 [NGTQ](bin/ngtq/README.md) (NGT with Quantization) is now available. (v1.5.0) 23 | - 08/08/2018 [ONNG](README.md#onng) is now available. (v1.4.0) 24 | 25 | Methods 26 | ------- 27 | This repository provides the following methods. 28 | - NGT: Graph and tree-based method 29 | - QG: Quantized graph-based method 30 | - QBG: Quantized blob graph-based method 31 | 32 | Note: Since QG and QBG require BLAS and LAPACK libraries, if you use only NGT (Graph and tree-based method) without the additional libraries like V1, you can disable QB and QBG with [this option](#disable-qg-and-qbg). 33 | 34 | Installation 35 | ------------ 36 | 37 | ### Build 38 | 39 | #### Downloads 40 | 41 | - [Releases](https://github.com/yahoojapan/NGT/releases) 42 | 43 | #### On Linux without QG and QBG 44 | 45 | $ unzip NGT-x.x.x.zip 46 | $ cd NGT-x.x.x 47 | $ mkdir build 48 | $ cd build 49 | $ cmake -DNGT_QBG_DISABLED=ON .. 50 | $ make 51 | $ make install 52 | $ ldconfig /usr/local/lib 53 | 54 | #### On CentOS 55 | 56 | $ yum install blas-devel lapack-devel 57 | $ unzip NGT-x.x.x.zip 58 | $ cd NGT-x.x.x 59 | $ mkdir build 60 | $ cd build 61 | $ cmake .. 62 | $ make 63 | $ make install 64 | $ ldconfig /usr/local/lib 65 | 66 | #### On Ubuntu 67 | 68 | $ apt install libblas-dev liblapack-dev 69 | $ unzip NGT-x.x.x.zip 70 | $ cd NGT-x.x.x 71 | $ mkdir build 72 | $ cd build 73 | $ cmake .. 74 | $ make 75 | $ make install 76 | $ ldconfig /usr/local/lib 77 | 78 | #### On macOS using homebrew 79 | 80 | $ /usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" 81 | $ brew install cmake 82 | $ brew install libomp 83 | $ unzip NGT-x.x.x.zip 84 | $ cd NGT-x.x.x 85 | $ mkdir build 86 | $ cd build 87 | $ export OpenMP_ROOT=$(brew --prefix)/opt/libomp 88 | $ cmake .. 89 | $ make 90 | $ make install 91 | 92 | ### Pre-Built 93 | 94 | #### On macOS 95 | 96 | $ brew install ngt 97 | 98 | NGT (Graph and tree-based method) 99 | ================================= 100 | 101 | Key Features 102 | ------------ 103 | - Supported operating systems: Linux and macOS 104 | - Object additional registration and removal are available. 105 | - Objects beyond the memory size can be handled using [the shared memory (memory mapped file) option](README.md#shared-memory-use). 106 | - Supported distance functions: L1, L2, Cosine similarity, Angular, Hamming, Jaccard, Poincare, Lorentz, and Inner product 107 | - Data Types: 4 byte floating point number, 2 byte floating point number, and 1 byte unsigned integer 108 | - Supported languages: [Python](/python/README.md), [Ruby](https://github.com/ankane/ngt), [PHP](https://github.com/ankane/ngt-php), [Rust](https://crates.io/crates/ngt), [Go](https://github.com/yahoojapan/gongt), C, and C++ 109 | - Distributed servers: [ngtd](https://github.com/yahoojapan/ngtd) and [vald](https://github.com/vdaas/vald) 110 | 111 | Documents 112 | --------- 113 | 114 | - [NGT tutorial](https://github.com/yahoojapan/NGT/wiki) 115 | 116 | Utilities 117 | --------- 118 | 119 | - Command : [ngt](/bin/ngt/README.md#command) and [qbg](bin/qbg/README.md) 120 | - Server : [ngtd](https://github.com/yahoojapan/ngtd) and [vald](https://github.com/vdaas/vald) 121 | 122 | Supported Programming Languages 123 | ------------------------------- 124 | 125 | - [Python](/python/README.md) 126 | - [Ruby](https://github.com/ankane/ngt) (Thanks Andrew!) 127 | - [PHP](https://github.com/ankane/ngt-php) (Thanks Andrew!) 128 | - [Rust](https://crates.io/crates/ngt) (Thanks Romain!) 129 | - JavaScript/NodeJS : [ngt-tool](https://www.npmjs.com/package/ngt-tool) and [spatial-db-ngt](https://www.npmjs.com/package/spatial-db-ngt) (Thanks stonkpunk!) 130 | - [Go](https://github.com/yahoojapan/gongt) 131 | - C 132 | - C++([sample code](samples)) 133 | 134 | 135 | The following build parameters are available 136 | 137 | Build parameters 138 | ---------------- 139 | 140 | #### Shared memory use 141 | 142 | The index can be placed in shared memory with memory mapped files. Using shared memory can reduce the amount of memory needed when multiple processes are using the same index. In addition, it can not only handle an index with a large number of objects that cannot be loaded into memory, but also reduce time to open it. Since changes become necessary at build time, please add the following parameter when executing "cmake" in order to use shared memory. 143 | 144 | $ cmake -DNGT_SHARED_MEMORY_ALLOCATOR=ON .. 145 | 146 | Note: Since there is no lock function, the index should be used only for reference when multiple processes are using the same index. 147 | 148 | #### Large-scale data use 149 | 150 | When you insert more than about 5 million objects for the graph-based method, please add the following parameter to improve the search time. 151 | 152 | $ cmake -DNGT_LARGE_DATASET=ON .. 153 | 154 | #### Disable QG and QBG 155 | QG and QBG require BLAS and LAPACK libraries. If you would not like to install these libraries and do not use QG and QBG, you can disable QG and QBG. 156 | 157 | $ cmake -DNGT_QBG_DISABLED=ON .. 158 | 159 | QG (Quantized graph-based method) 160 | ================================= 161 | 162 | Key Features 163 | ------------ 164 | - Higher performance than the graph and tree-based method 165 | - Supported operating systems: Linux and macOS 166 | - Supported distance functions: L2 and Cosine similarity 167 | 168 | Documents 169 | --------- 170 | 171 | - [QG tutorial](https://medium.com/@masajiro.iwasaki/fusion-of-graph-based-indexing-and-product-quantization-for-ann-search-7d1f0336d0d0) 172 | 173 | 174 | Utilities 175 | --------- 176 | - Command : [qbg](bin/qbg/README.md) 177 | 178 | Supported Programming Languages 179 | ------------------------------- 180 | 181 | - C++ 182 | - C 183 | - Python only for search 184 | 185 | Build parameters 186 | ---------------- 187 | 188 | For QG, it is recommended to disable rotation of the vector space and residual vectors to improve performance as follows. 189 | 190 | $ cmake -DNGTQG_NO_ROTATION=ON -DNGTQG_ZERO_GLOBAL=ON .. 191 | 192 | QBG (Quantized blob graph-based method) 193 | ======================================= 194 | 195 | Key Features 196 | ------------ 197 | - [QBG](bin/qbg/README.md) can handle billions of objects. 198 | - Supported operating systems: Linux and macOS 199 | - Supported distance functions: L2 200 | 201 | Utilities 202 | --------- 203 | - Command : [qbg](bin/qbg/README.md) 204 | 205 | Supported Programming Languages 206 | ------------------------------- 207 | 208 | - C++ 209 | - C 210 | - Python only for search 211 | 212 | Benchmark Results 213 | ----------------- 214 | The followings are the results of [ann benchmarks](https://github.com/erikbern/ann-benchmarks) for NGT v2.0.0 where the timeout is 5 hours on an AWS c5.4xlarge instance. 215 | 216 | #### glove-100-angular 217 | 218 | 219 | #### gist-960-euclidean 220 | 221 | 222 | #### fashion-mnist-784-euclidean 223 | 224 | 225 | #### nytimes-256-angular 226 | 227 | 228 | #### sift-128-euclidean 229 | 230 | 231 | 232 | License 233 | ------- 234 | 235 | Copyright (C) 2015 Yahoo Japan Corporation 236 | 237 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this software except in compliance with the License. You may obtain a copy of the License at 238 | 239 | http://www.apache.org/licenses/LICENSE-2.0 240 | 241 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 242 | See the License for the specific language governing permissions and limitations under the License. 243 | 244 | Contributor License Agreement 245 | ----------------------------- 246 | 247 | This project requires contributors to accept the terms in the [Contributor License Agreement (CLA)](https://gist.github.com/yahoojapanoss/9bf8afd6ea67f32d29b4082abf220340). 248 | 249 | Please note that contributors to the NGT repository on GitHub (https://github.com/yahoojapan/NGT) shall be deemed to have accepted the CLA without individual written agreements. 250 | 251 | Contact Person 252 | -------------- 253 | [masajiro](https://github.com/masajiro) 254 | 255 | Publications 256 | ------------ 257 | ##### [ONNG](bin/ngt/README.md#onng) 258 | - Iwasaki, M., Miyazaki, D.: Optimization of Indexing Based on k-Nearest Neighbor Graph for Proximity. arXiv:1810.07355 [cs] (2018). ([pdf](https://arxiv.org/abs/1810.07355)) 259 | 260 | ##### [PANNG](bin/ngt/README.md#panng) 261 | - Iwasaki, M.: Pruned Bi-directed K-nearest Neighbor Graph for Proximity Search. Proc. of SISAP2016 (2016) 20-33. ([pdf](https://link.springer.com/chapter/10.1007/978-3-319-46759-7_2)) 262 | - Sugawara, K., Kobayashi, H. and Iwasaki, M.: On Approximately Searching for Similar Word Embeddings. Proc. of ACL2016 (2016) 2265-2275. ([pdf](https://aclweb.org/anthology/P/P16/P16-1214.pdf)) 263 | 264 | ##### [ANNGT](bin/ngt/README.md#anngt) 265 | - Iwasaki, M.: Applying a Graph-Structured Index to Product Image Search (in Japanese). IIEEJ Journal 42(5) (2013) 633-641. ([pdf](https://s.yimg.jp/i/docs/research_lab/articles/miwasaki-iieej-jnl-2013.pdf)) 266 | - Iwasaki, M.: Proximity search using approximate k nearest neighbor graph with a tree structured index (in Japanese). IPSJ Journal 52(2) (2011) 817-828. ([pdf](https://s.yimg.jp/i/docs/research_lab/articles/miwasaki-ipsj-jnl-2011.pdf)) 267 | 268 | ##### [ANNG](bin/ngt/README.md#anng) 269 | - Iwasaki, M.: Proximity search in metric spaces using approximate k nearest neighbor graph (in Japanese). IPSJ Trans. on Database 3(1) (2010) 18-28. ([pdf](https://s.yimg.jp/i/docs/research_lab/articles/miwasaki-ipsj-tod-2010.pdf)) 270 | 271 | 272 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | --------------------------------------------------------------------------------