16 | {% endif %}
17 | {% endif %}
18 | {% for plot in plot_data[item] %}
19 | {{ plot }}
20 | {% endfor %}
21 |
22 | {% endfor %}
23 | {% endblock %}
24 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/annoy.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import annoy
3 | from ann_benchmarks.algorithms.base import BaseANN
4 |
5 |
6 | class Annoy(BaseANN):
7 | def __init__(self, metric, n_trees):
8 | self._n_trees = n_trees
9 | self._search_k = None
10 | self._metric = metric
11 |
12 | def fit(self, X):
13 | self._annoy = annoy.AnnoyIndex(X.shape[1], metric=self._metric)
14 | for i, x in enumerate(X):
15 | self._annoy.add_item(i, x.tolist())
16 | self._annoy.build(self._n_trees)
17 |
18 | def set_query_arguments(self, search_k):
19 | self._search_k = search_k
20 |
21 | def query(self, v, n):
22 | return self._annoy.get_nns_by_vector(v.tolist(), n, self._search_k)
23 |
24 | def __str__(self):
25 | return 'Annoy(n_trees=%d, search_k=%d)' % (self._n_trees,
26 | self._search_k)
27 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/base.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import psutil
3 |
4 |
5 | class BaseANN(object):
6 | def done(self):
7 | pass
8 |
9 | def get_memory_usage(self):
10 | """Return the current memory usage of this algorithm instance
11 | (in kilobytes), or None if this information is not available."""
12 | # return in kB for backwards compatibility
13 | return psutil.Process().memory_info().rss / 1024
14 |
15 | def fit(self, X):
16 | pass
17 |
18 | def query(self, q, n):
19 | return [] # array of candidate indices
20 |
21 | def batch_query(self, X, n):
22 | self.res = []
23 | for q in X:
24 | self.res.append(self.query(q, n))
25 |
26 | def get_batch_results(self):
27 | return self.res
28 |
29 | def get_additional(self):
30 | return {}
31 |
32 | def __str__(self):
33 | return self.name
34 |
--------------------------------------------------------------------------------
/templates/latex.template:
--------------------------------------------------------------------------------
1 |
2 | \begin{figure}
3 | \centering
4 | \begin{tikzpicture}
5 | \begin{axis}[
6 | xlabel={ {{xlabel}} },
7 | ylabel={ {{ylabel}} },
8 | ymode = log,
9 | yticklabel style={/pgf/number format/fixed,
10 | /pgf/number format/precision=3},
11 | legend style = { anchor=west},
12 | cycle list name = black white
13 | ]
14 | {% for algo in plot_data %}
15 | {% if algo.scatter %}
16 | \addplot [only marks] coordinates {
17 | {% else %}
18 | \addplot coordinates {
19 | {% endif %}
20 | {% for coord in algo.coords %}
21 | ({{ coord[0]}}, {{ coord[1] }})
22 | {% endfor %}
23 | };
24 | \addlegendentry{ {{algo.name}} };
25 | {% endfor %}
26 | \end{axis}
27 | \end{tikzpicture}
28 | \caption{ {{caption}} }
29 | \label{}
30 | \end{figure}
31 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/n2.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import n2
3 | from ann_benchmarks.algorithms.base import BaseANN
4 |
5 |
6 | class N2(BaseANN):
7 | def __init__(self, metric, method_param):
8 | self._metric = metric
9 | self._m = method_param['M']
10 | self._m0 = self._m * 2
11 | self._ef_construction = method_param['efConstruction']
12 | self._n_threads = 1
13 | self._ef_search = -1
14 |
15 | def fit(self, X):
16 | self._n2 = n2.HnswIndex(X.shape[1], self._metric)
17 | for x in X:
18 | self._n2.add_data(x)
19 | self._n2.build(m=self._m, max_m0=self._m0, ef_construction=self._ef_construction, n_threads=self._n_threads, graph_merging='merge_level0')
20 |
21 | def set_query_arguments(self, ef):
22 | self._ef_search = ef
23 |
24 | def query(self, v, n):
25 | return self._n2.search_by_vector(v, n, self._ef_search)
26 |
27 | def __str__(self):
28 | return "N2 (M%d_efCon%d)" % (self._m, self._ef_construction)
29 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/sptag.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import SPTAG
3 | from ann_benchmarks.algorithms.base import BaseANN
4 |
5 |
6 | class Sptag(BaseANN):
7 | def __init__(self, metric, algo):
8 | self._algo = str(algo)
9 | self._metric = {
10 | 'angular': 'Cosine', 'euclidean': 'L2'}[metric]
11 |
12 | def fit(self, X):
13 | self._sptag = SPTAG.AnnIndex(self._algo, 'Float', X.shape[1])
14 | self._sptag.SetBuildParam("NumberOfThreads", '32')
15 | self._sptag.SetBuildParam("DistCalcMethod", self._metric)
16 | self._sptag.Build(X, X.shape[0])
17 |
18 | def set_query_arguments(self, MaxCheck):
19 | self._maxCheck = MaxCheck
20 | self._sptag.SetSearchParam("MaxCheck", str(self._maxCheck))
21 |
22 | def query(self, v, k):
23 | return self._sptag.Search(v, k)[0]
24 |
25 | def __str__(self):
26 | return 'Sptag(metric=%s, algo=%s, check=%d)' % (self._metric,
27 | self._algo, self._maxCheck)
28 |
29 |
--------------------------------------------------------------------------------
/install/Dockerfile.scann:
--------------------------------------------------------------------------------
1 | FROM ann-benchmarks
2 |
3 | RUN apt-get install -y software-properties-common curl gnupg rsync
4 |
5 | RUN curl https://bazel.build/bazel-release.pub.gpg | apt-key add -
6 | RUN echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list
7 | RUN apt-get update && apt-get install -y bazel-3.4.1
8 |
9 | RUN add-apt-repository -y ppa:ubuntu-toolchain-r/test
10 | RUN apt-get update
11 | RUN apt-get install -y g++-9 clang-8
12 |
13 | RUN pip3 install --upgrade pip
14 | RUN git clone https://github.com/google-research/google-research.git --depth=1
15 | RUN cd google-research/scann && python3 configure.py
16 | RUN PY3="$(which python3)" && cd google-research/scann && PYTHON_BIN_PATH=$PY3 CC=clang-8 bazel-3.4.1 build -c opt --copt=-mavx2 --copt=-mfma --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --cxxopt="-std=c++17" --copt=-fsized-deallocation --copt=-w :build_pip_pkg
17 | RUN cd google-research/scann && PYTHON=python3 ./bazel-bin/build_pip_pkg && pip3 install *.whl
18 | RUN python3 -c 'import scann'
19 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/flann.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import pyflann
3 | import numpy
4 | import sklearn.preprocessing
5 | from ann_benchmarks.algorithms.base import BaseANN
6 |
7 |
8 | class FLANN(BaseANN):
9 | def __init__(self, metric, target_precision):
10 | self._target_precision = target_precision
11 | self.name = 'FLANN(target_precision=%f)' % self._target_precision
12 | self._metric = metric
13 |
14 | def fit(self, X):
15 | self._flann = pyflann.FLANN(
16 | target_precision=self._target_precision,
17 | algorithm='autotuned', log_level='info')
18 | if self._metric == 'angular':
19 | X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
20 | self._flann.build_index(X)
21 |
22 | def query(self, v, n):
23 | if self._metric == 'angular':
24 | v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
25 | if v.dtype != numpy.float32:
26 | v = v.astype(numpy.float32)
27 | return self._flann.nn_index(v, n)[0][0]
28 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/lshf.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import sklearn.neighbors
3 | import sklearn.preprocessing
4 | from ann_benchmarks.algorithms.base import BaseANN
5 |
6 |
7 | class LSHF(BaseANN):
8 | def __init__(self, metric, n_estimators=10, n_candidates=50):
9 | self.name = 'LSHF(n_est=%d, n_cand=%d)' % (n_estimators, n_candidates)
10 | self._metric = metric
11 | self._n_estimators = n_estimators
12 | self._n_candidates = n_candidates
13 |
14 | def fit(self, X):
15 | self._lshf = sklearn.neighbors.LSHForest(
16 | n_estimators=self._n_estimators, n_candidates=self._n_candidates)
17 | if self._metric == 'angular':
18 | X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
19 | self._lshf.fit(X)
20 |
21 | def query(self, v, n):
22 | if self._metric == 'angular':
23 | v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
24 | return self._lshf.kneighbors([v], return_distance=False,
25 | n_neighbors=n)[0]
26 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/dolphinnpy.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import sys
3 | sys.path.append("install/lib-dolphinnpy") # noqa
4 | import numpy
5 | import ctypes
6 | from dolphinn import Dolphinn
7 | from utils import findmean, isotropize
8 | from ann_benchmarks.algorithms.base import BaseANN
9 |
10 |
11 | class DolphinnPy(BaseANN):
12 | def __init__(self, num_probes):
13 | self.name = 'Dolphinn(num_probes={} )'.format(num_probes)
14 | self.num_probes = num_probes
15 | self.m = 1
16 | self._index = None
17 |
18 | def fit(self, X):
19 | if X.dtype != numpy.float32:
20 | X = numpy.array(X, dtype=numpy.float32)
21 | d = X.shape[1]
22 | self.m = findmean(X, d, 10)
23 | X = isotropize(X, d, self.m)
24 | hypercube_dim = int(numpy.log2(len(X))) - 2
25 | self._index = Dolphinn(X, d, hypercube_dim)
26 |
27 | def query(self, v, n):
28 | q = numpy.array([v])
29 | q = isotropize(q, len(v), self.m)
30 | res = self._index.queries(q, n, self.num_probes)
31 | return res[0]
32 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Erik Bernhardsson
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/datasketch.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from datasketch import MinHashLSHForest, MinHash
3 | from ann_benchmarks.algorithms.base import BaseANN
4 |
5 |
6 | class DataSketch(BaseANN):
7 | def __init__(self, metric, n_perm, n_rep):
8 | if metric not in ('jaccard'):
9 | raise NotImplementedError(
10 | "Datasketch doesn't support metric %s" % metric)
11 | self._n_perm = n_perm
12 | self._n_rep = n_rep
13 | self._metric = metric
14 | self.name = 'Datasketch(n_perm=%d, n_rep=%d)' % (n_perm, n_rep)
15 |
16 | def fit(self, X):
17 | self._index = MinHashLSHForest(num_perm=self._n_perm, l=self._n_rep)
18 | for i, x in enumerate(X):
19 | m = MinHash(num_perm=self._n_perm)
20 | for e in x:
21 | m.update(str(e).encode('utf8'))
22 | self._index.add(str(i), m)
23 | self._index.index()
24 |
25 | def query(self, v, n):
26 | m = MinHash(num_perm=self._n_perm)
27 | for e in v:
28 | m.update(str(e).encode('utf8'))
29 | return map(int, self._index.query(m, n))
30 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/scann.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import os
3 | import numpy as np
4 | import scann
5 | from ann_benchmarks.algorithms.base import BaseANN
6 |
7 | class Scann(BaseANN):
8 |
9 | def __init__(self, n_leaves, avq_threshold, dims_per_block):
10 | self.name = "scann n_leaves={} avq_threshold={:.02f} dims_per_block={}".format(
11 | n_leaves, avq_threshold, dims_per_block)
12 | self.n_leaves = n_leaves
13 | self.avq_threshold = avq_threshold
14 | self.dims_per_block = dims_per_block
15 |
16 | def fit(self, X):
17 | X[np.linalg.norm(X, axis=1) == 0] = 1.0 / np.sqrt(X.shape[1])
18 | X /= np.linalg.norm(X, axis=1)[:, np.newaxis]
19 |
20 | self.searcher = scann.scann_ops_pybind.builder(X, 10, "dot_product").tree(
21 | self.n_leaves, 1, training_sample_size=350000, spherical=True, quantize_centroids=True).score_ah(
22 | self.dims_per_block, anisotropic_quantization_threshold=self.avq_threshold).reorder(
23 | 1).build()
24 |
25 | def set_query_arguments(self, leaves_reorder):
26 | self.leaves_to_search, self.reorder = leaves_reorder
27 |
28 | def query(self, v, n):
29 | return self.searcher.search(v, n, self.reorder, self.leaves_to_search)[0]
30 |
--------------------------------------------------------------------------------
/install/Dockerfile.sptag:
--------------------------------------------------------------------------------
1 | # Adopted from https://github.com/microsoft/SPTAG/blob/master/Dockerfile
2 |
3 | FROM ann-benchmarks
4 |
5 | RUN git clone https://github.com/microsoft/SPTAG
6 | RUN apt-get update && apt-get -y install wget build-essential libtbb-dev software-properties-common swig
7 |
8 | # cmake >= 3.12 is required
9 | RUN wget "https://github.com/Kitware/CMake/releases/download/v3.14.4/cmake-3.14.4-Linux-x86_64.tar.gz" -q -O - \
10 | | tar -xz --strip-components=1 -C /usr/local
11 |
12 | # specific version of boost
13 | RUN wget "https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.gz" -q -O - \
14 | | tar -xz && \
15 | cd boost_1_67_0 && \
16 | ./bootstrap.sh && \
17 | ./b2 install && \
18 | # update ld cache so it finds boost in /usr/local/lib
19 | ldconfig && \
20 | cd .. && rm -rf boost_1_67_0
21 |
22 | # SPTAG defaults to Python 2 if it's found on the system, so as a hack, we remove it. See https://github.com/microsoft/SPTAG/blob/master/Wrappers/CMakeLists.txt
23 | RUN apt-get -y remove libpython2.7
24 |
25 | # Compile
26 | RUN cd SPTAG && mkdir build && cd build && cmake .. && make && cd ..
27 |
28 | # so python can find the SPTAG module
29 | ENV PYTHONPATH=/home/app/SPTAG/Release
30 | RUN python3 -c 'import SPTAG'
31 |
--------------------------------------------------------------------------------
/install/Dockerfile.milvus:
--------------------------------------------------------------------------------
1 | # Install Milvus
2 | FROM milvusdb/milvus:0.6.0-cpu-d120719-2b40dd as milvus
3 | RUN apt-get update
4 | RUN apt-get install -y wget
5 | RUN wget https://raw.githubusercontent.com/milvus-io/docs/master/v0.6.0/assets/server_config.yaml
6 | RUN sed -i 's/cpu_cache_capacity: 16/cpu_cache_capacity: 4/' server_config.yaml # otherwise my Docker blows up
7 | RUN mv server_config.yaml /var/lib/milvus/conf/server_config.yaml
8 |
9 | # Switch back to ANN-benchmarks base image and copy all files
10 | FROM ann-benchmarks
11 | COPY --from=milvus /var/lib/milvus /var/lib/milvus
12 | ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/var/lib/milvus/lib"
13 | RUN apt-get update
14 | RUN apt-get install -y libmysqlclient-dev
15 |
16 | # Python client
17 | RUN pip3 install pymilvus==0.2.7
18 |
19 | # Fixing some version incompatibility thing
20 | RUN pip3 install numpy==1.18 scipy==1.1.0 scikit-learn==0.21
21 |
22 | # Dumb entrypoint thing that runs the daemon as well
23 | RUN echo '#!/bin/bash' >> entrypoint.sh
24 | RUN echo '/var/lib/milvus/bin/milvus_server -d -c /var/lib/milvus/conf/server_config.yaml -l /var/lib/milvus/conf/log_config.conf' >> entrypoint.sh
25 | RUN echo 'sleep 5' >> entrypoint.sh
26 | RUN echo 'python3 run_algorithm.py "$@"' >> entrypoint.sh
27 | RUN chmod u+x entrypoint.sh
28 | ENTRYPOINT ["/home/app/entrypoint.sh"]
29 |
--------------------------------------------------------------------------------
/ann_benchmarks/data.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import numpy
3 |
4 |
5 | def float_parse_entry(line):
6 | return [float(x) for x in line.strip().split()]
7 |
8 |
9 | def float_unparse_entry(entry):
10 | return " ".join(map(str, entry))
11 |
12 |
13 | def int_parse_entry(line):
14 | return frozenset([int(x) for x in line.strip().split()])
15 |
16 |
17 | def int_unparse_entry(entry):
18 | return " ".join(map(str, map(int, entry)))
19 |
20 |
21 | def bit_parse_entry(line):
22 | return [bool(int(x)) for x in list(line.strip()
23 | .replace(" ", "")
24 | .replace("\t", ""))]
25 |
26 |
27 | def bit_unparse_entry(entry):
28 | return " ".join(map(lambda el: "1" if el else "0", entry))
29 |
30 |
31 | type_info = {
32 | "float": {
33 | "type": numpy.float,
34 | "parse_entry": float_parse_entry,
35 | "unparse_entry": float_unparse_entry,
36 | "finish_entries": numpy.vstack
37 | },
38 | "bit": {
39 | "type": numpy.bool_,
40 | "parse_entry": bit_parse_entry,
41 | "unparse_entry": bit_unparse_entry
42 | },
43 | "int": {
44 | "type": numpy.object,
45 | "parse_entry": int_parse_entry,
46 | "unparse_entry": int_unparse_entry,
47 | },
48 | }
49 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/faiss_hnsw.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import os
3 | import faiss
4 | import numpy as np
5 | from ann_benchmarks.constants import INDEX_DIR
6 | from ann_benchmarks.algorithms.base import BaseANN
7 | from ann_benchmarks.algorithms.faiss import Faiss
8 |
9 |
10 | class FaissHNSW(Faiss):
11 | def __init__(self, metric, method_param):
12 | self._metric = metric
13 | self.method_param = method_param
14 |
15 | def fit(self, X):
16 | self.index = faiss.IndexHNSWFlat(len(X[0]), self.method_param["M"])
17 | self.index.hnsw.efConstruction = self.method_param["efConstruction"]
18 | self.index.verbose = True
19 |
20 | if self._metric == 'angular':
21 | X = X / np.linalg.norm(X, axis=1)[:, np.newaxis]
22 | if X.dtype != np.float32:
23 | X = X.astype(np.float32)
24 |
25 | self.index.add(X)
26 | faiss.omp_set_num_threads(1)
27 |
28 | def set_query_arguments(self, ef):
29 | faiss.cvar.hnsw_stats.reset()
30 | self.index.hnsw.efSearch = ef
31 |
32 | def get_additional(self):
33 | return {"dist_comps": faiss.cvar.hnsw_stats.ndis}
34 |
35 | def __str__(self):
36 | return 'faiss (%s, ef: %d)' % (self.method_param, self.index.hnsw.efSearch)
37 |
38 | def freeIndex(self):
39 | del self.p
40 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/hnswlib.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import os
3 | import hnswlib
4 | import numpy as np
5 | from ann_benchmarks.constants import INDEX_DIR
6 | from ann_benchmarks.algorithms.base import BaseANN
7 |
8 |
9 | class HnswLib(BaseANN):
10 | def __init__(self, metric, method_param):
11 | self.metric = {'angular': 'cosine', 'euclidean': 'l2'}[metric]
12 | self.method_param = method_param
13 | # print(self.method_param,save_index,query_param)
14 | # self.ef=query_param['ef']
15 | self.name = 'hnswlib (%s)' % (self.method_param)
16 |
17 | def fit(self, X):
18 | # Only l2 is supported currently
19 | self.p = hnswlib.Index(space=self.metric, dim=len(X[0]))
20 | self.p.init_index(max_elements=len(X),
21 | ef_construction=self.method_param["efConstruction"],
22 | M=self.method_param["M"])
23 | data_labels = np.arange(len(X))
24 | self.p.add_items(np.asarray(X), data_labels)
25 | self.p.set_num_threads(1)
26 |
27 | def set_query_arguments(self, ef):
28 | self.p.set_ef(ef)
29 |
30 | def query(self, v, n):
31 | # print(np.expand_dims(v,axis=0).shape)
32 | # print(self.p.knn_query(np.expand_dims(v,axis=0), k = n)[0])
33 | return self.p.knn_query(np.expand_dims(v, axis=0), k=n)[0][0]
34 |
35 | def freeIndex(self):
36 | del self.p
37 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/kgraph.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import os
3 | import numpy
4 | import pykgraph
5 | from ann_benchmarks.constants import INDEX_DIR
6 | from ann_benchmarks.algorithms.base import BaseANN
7 |
8 |
9 | class KGraph(BaseANN):
10 | def __init__(self, metric, index_params, save_index):
11 | metric = str(metric)
12 | self.name = 'KGraph(%s)' % (metric)
13 | self._metric = metric
14 | self._index_params = index_params
15 | self._save_index = save_index
16 |
17 | def fit(self, X):
18 | if X.dtype != numpy.float32:
19 | X = X.astype(numpy.float32)
20 | self._kgraph = pykgraph.KGraph(X, self._metric)
21 | path = os.path.join(INDEX_DIR, 'kgraph-index-%s' % self._metric)
22 | if os.path.exists(path):
23 | self._kgraph.load(path)
24 | else:
25 | # iterations=30, L=100, delta=0.002, recall=0.99, K=25)
26 | self._kgraph.build(**self._index_params)
27 | if not os.path.exists(INDEX_DIR):
28 | os.makedirs(INDEX_DIR)
29 | self._kgraph.save(path)
30 |
31 | def set_query_arguments(self, P):
32 | self._P = P
33 |
34 | def query(self, v, n):
35 | if v.dtype != numpy.float32:
36 | v = v.astype(numpy.float32)
37 | result = self._kgraph.search(
38 | numpy.array([v]), K=n, threads=1, P=self._P)
39 | return result[0]
40 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/mrpt.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import numpy
3 | import sklearn.preprocessing
4 | import mrpt
5 | from ann_benchmarks.algorithms.base import BaseANN
6 |
7 |
8 | class MRPT(BaseANN):
9 | def __init__(self, metric, count):
10 | self._metric = metric
11 | self._k = count
12 |
13 | def fit(self, X):
14 | if X.dtype != numpy.float32:
15 | X = X.astype(numpy.float32)
16 | if self._metric == 'angular':
17 | X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
18 |
19 | self._index_autotuned = mrpt.MRPTIndex(X)
20 | self._index_autotuned.build_autotune_sample(
21 | target_recall=None, k=self._k, n_test=1000)
22 |
23 | def set_query_arguments(self, target_recall):
24 | self._target_recall = target_recall
25 | self._index = self._index_autotuned.subset(target_recall)
26 | self._par = self._index.parameters()
27 |
28 | def query(self, v, n):
29 | if v.dtype != numpy.float32:
30 | v = v.astype(numpy.float32)
31 | if self._metric == 'angular':
32 | v = sklearn.preprocessing.normalize(
33 | v.reshape(1, -1), axis=1, norm='l2').flatten()
34 | return self._index.ann(v)
35 |
36 | def __str__(self):
37 | str_template = ('MRPT(target recall=%.3f, trees=%d, depth=%d, vote '
38 | 'threshold=%d, estimated recall=%.3f)')
39 | return str_template % (self._target_recall, self._par['n_trees'],
40 | self._par['depth'], self._par['votes'],
41 | self._par['estimated_recall'])
42 |
--------------------------------------------------------------------------------
/install/Dockerfile.elasticsearch:
--------------------------------------------------------------------------------
1 | FROM ann-benchmarks
2 |
3 | WORKDIR /home/app
4 |
5 | # Install elasticsearch.
6 | ENV DEBIAN_FRONTEND noninteractive
7 | RUN apt install -y wget curl htop
8 | RUN wget --quiet https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-amd64.deb \
9 | && dpkg -i elasticsearch-7.9.2-amd64.deb \
10 | && rm elasticsearch-7.9.2-amd64.deb
11 |
12 | # Install python client.
13 | RUN python3 -m pip install --upgrade elasticsearch==7.9.1
14 |
15 | # Configure elasticsearch and JVM for single-node, single-core.
16 | RUN echo '\
17 | discovery.type: single-node\n\
18 | network.host: 0.0.0.0\n\
19 | node.master: true\n\
20 | node.data: true\n\
21 | node.processors: 1\n\
22 | thread_pool.write.size: 1\n\
23 | thread_pool.search.size: 1\n\
24 | thread_pool.search.queue_size: 1\n\
25 | path.data: /var/lib/elasticsearch\n\
26 | path.logs: /var/log/elasticsearch\n\
27 | ' > /etc/elasticsearch/elasticsearch.yml
28 |
29 | RUN echo '\
30 | -Xms3G\n\
31 | -Xmx3G\n\
32 | -XX:+UseG1GC\n\
33 | -XX:G1ReservePercent=25\n\
34 | -XX:InitiatingHeapOccupancyPercent=30\n\
35 | -XX:+HeapDumpOnOutOfMemoryError\n\
36 | -XX:HeapDumpPath=/var/lib/elasticsearch\n\
37 | -XX:ErrorFile=/var/log/elasticsearch/hs_err_pid%p.log\n\
38 | -Xlog:gc*,gc+age=trace,safepoint:file=/var/log/elasticsearch/gc.log:utctime,pid,tags:filecount=32,filesize=64m' > /etc/elasticsearch/jvm.options
39 |
40 | # Make sure you can start the service.
41 | RUN service elasticsearch start && service elasticsearch stop
42 |
43 | # Custom entrypoint that also starts the Elasticsearch server.
44 | RUN echo 'service elasticsearch start && python3 -u run_algorithm.py "$@"' > entrypoint.sh
45 | ENTRYPOINT ["/bin/bash", "/home/app/entrypoint.sh"]
46 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/puffinn.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import puffinn
3 | from ann_benchmarks.algorithms.base import BaseANN
4 | import numpy
5 |
6 | class Puffinn(BaseANN):
7 | def __init__(self, metric, space=10**6, hash_function="fht_crosspolytope", hash_source='pool', hash_args=None):
8 | if metric not in ['jaccard', 'angular']:
9 | raise NotImplementedError(
10 | "Puffinn doesn't support metric %s" % metric)
11 | self.metric = metric
12 | self.space = space
13 | self.hash_function = hash_function
14 | self.hash_source = hash_source
15 | self.hash_args = hash_args
16 |
17 | def fit(self, X):
18 | if self.hash_args:
19 | self.index = puffinn.Index(self.metric, len(X[0]), self.space,\
20 | hash_function=self.hash_function, hash_source=self.hash_source,\
21 | hash_args=self.hash_args)
22 | else:
23 | self.index = puffinn.Index(self.metric, len(X[0]), self.space,\
24 | hash_function=self.hash_function, hash_source=self.hash_source)
25 | for i, x in enumerate(X):
26 | if self.metric == 'angular':
27 | x = x.tolist()
28 | self.index.insert(x)
29 | self.index.rebuild()
30 |
31 | def set_query_arguments(self, recall):
32 | self.recall = recall
33 |
34 | def query(self, v, n):
35 | if self.metric == 'angular':
36 | v = v.tolist()
37 | return self.index.search(v, n, self.recall)
38 |
39 | def __str__(self):
40 | return 'PUFFINN(space=%d, recall=%f, hf=%s, hashsource=%s)' % (self.space, self.recall, self.hash_function, self.hash_source)
41 |
42 |
--------------------------------------------------------------------------------
/ann_benchmarks/distance.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from scipy.spatial.distance import pdist as scipy_pdist
3 | import itertools
4 | import numpy as np
5 |
6 | def pdist(a, b, metric):
7 | return scipy_pdist([a, b], metric=metric)[0]
8 |
9 | # Need own implementation of jaccard because scipy's
10 | # implementation is different
11 |
12 | def jaccard(a, b):
13 | if len(a) == 0 or len(b) == 0:
14 | return 0
15 | intersect = len(set(a) & set(b))
16 | return intersect / (float)(len(a) + len(b) - intersect)
17 |
18 | def transform_dense_to_sparse(X):
19 | """Converts the n * m dataset into a sparse format
20 | that only holds the non-zero entries (Jaccard)."""
21 | # get list of indices of non-zero elements
22 | indices = np.transpose(np.where(X))
23 | keys = []
24 | for _, js in itertools.groupby(indices, lambda ij: ij[0]):
25 | keys.append([j for _, j in js])
26 |
27 | assert len(X) == len(keys)
28 |
29 | return keys
30 |
31 | metrics = {
32 | 'hamming': {
33 | 'distance': lambda a, b: pdist(a, b, "hamming"),
34 | 'distance_valid': lambda a: True
35 | },
36 | # return 1 - jaccard similarity, because smaller distances are better.
37 | 'jaccard': {
38 | 'distance': lambda a, b: 1 - jaccard(a, b),
39 | 'distance_valid': lambda a: a < 1 - 1e-5
40 | },
41 | 'euclidean': {
42 | 'distance': lambda a, b: pdist(a, b, "euclidean"),
43 | 'distance_valid': lambda a: True
44 | },
45 | 'angular': {
46 | 'distance': lambda a, b: pdist(a, b, "cosine"),
47 | 'distance_valid': lambda a: True
48 | }
49 | }
50 |
51 | dataset_transform = {
52 | 'hamming': lambda X: X,
53 | 'euclidean': lambda X: X,
54 | 'angular': lambda X: X,
55 | 'jaccard' : lambda X: transform_dense_to_sparse(X)
56 | }
57 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/nearpy.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import nearpy
3 | from nearpy.filters import NearestFilter
4 | import sklearn.preprocessing
5 | from ann_benchmarks.algorithms.base import BaseANN
6 |
7 |
8 | class NearPy(BaseANN):
9 | def __init__(self, metric, n_bits, hash_counts):
10 | self._n_bits = n_bits
11 | self._hash_counts = hash_counts
12 | self._metric = metric
13 | self._filter = NearestFilter(10)
14 | self.name = 'NearPy(n_bits=%d, hash_counts=%d)' % (
15 | self._n_bits, self._hash_counts)
16 |
17 | def fit(self, X):
18 | hashes = []
19 |
20 | for k in range(self._hash_counts):
21 | nearpy_rbp = nearpy.hashes.RandomBinaryProjections(
22 | 'rbp_%d' % k, self._n_bits)
23 | hashes.append(nearpy_rbp)
24 |
25 | if self._metric == 'euclidean':
26 | dist = nearpy.distances.EuclideanDistance()
27 | self._nearpy_engine = nearpy.Engine(
28 | X.shape[1],
29 | lshashes=hashes,
30 | distance=dist)
31 | else: # Default (angular) = Cosine distance
32 | self._nearpy_engine = nearpy.Engine(
33 | X.shape[1],
34 | lshashes=hashes,
35 | vector_filters=[self._filter])
36 |
37 | if self._metric == 'angular':
38 | X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
39 | for i, x in enumerate(X):
40 | self._nearpy_engine.store_vector(x, i)
41 |
42 | def query(self, v, n):
43 | # XXX: This feels like an unpleasant hack, but it's not clear how to do
44 | # better without making changes to NearPy
45 | self._filter.N = n
46 | if self._metric == 'angular':
47 | v = sklearn.preprocessing.normalize([v], axis=1, norm='l2')[0]
48 | return [y for x, y, z in self._nearpy_engine.neighbours(v)]
49 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 |
3 | language: python
4 | python:
5 | - "3.6"
6 |
7 | services:
8 | - docker
9 |
10 | env:
11 | - LIBRARY=annoy DATASET=random-xs-20-angular
12 | - LIBRARY=dolphinn DATASET=random-xs-20-angular
13 | - LIBRARY=faiss DATASET=random-xs-20-angular
14 | - LIBRARY=flann DATASET=random-xs-20-angular
15 | - LIBRARY=kgraph DATASET=random-xs-20-angular
16 | - LIBRARY=milvus DATASET=random-xs-20-angular
17 | - LIBRARY=mrpt DATASET=random-xs-20-angular
18 | - LIBRARY=n2 DATASET=random-xs-20-angular
19 | - LIBRARY=nearpy DATASET=random-xs-20-angular
20 | - LIBRARY=ngt DATASET=random-xs-20-angular
21 | - LIBRARY=nmslib DATASET=random-xs-20-angular
22 | - LIBRARY=hnswlib DATASET=random-xs-20-angular
23 | - LIBRARY=puffinn DATASET=random-xs-20-angular
24 | - LIBRARY=pynndescent DATASET=random-xs-20-angular
25 | - LIBRARY=rpforest DATASET=random-xs-20-angular
26 | - LIBRARY=sklearn DATASET=random-xs-20-angular
27 | - LIBRARY=sptag DATASET=random-xs-20-angular
28 | - LIBRARY=mih DATASET=random-xs-16-hamming
29 | - LIBRARY=datasketch DATASET=random-s-jaccard
30 | - LIBRARY=scann DATASET=random-xs-20-angular
31 | - LIBRARY=elasticsearch DATASET=random-xs-20-angular
32 | - LIBRARY=elastiknn DATASET=random-xs-20-angular
33 |
34 | before_install:
35 | - pip install -r requirements.txt
36 | - python install.py
37 |
38 | script:
39 | - python run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 5 --dataset $DATASET --run-disabled --timeout 300
40 | - python run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 5 --dataset $DATASET --run-disabled --batch --timeout 300
41 | - sudo chmod -R 777 results/
42 | - python plot.py --dataset $DATASET --output plot.png
43 | - python plot.py --dataset $DATASET --output plot-batch.png --batch
44 | - python -m unittest test/test-metrics.py
45 | - python -m unittest test/test-jaccard.py
46 | - python create_website.py --outputdir . --scatter --latex
47 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/milvus.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import milvus
3 | import numpy
4 | import sklearn.preprocessing
5 | from ann_benchmarks.algorithms.base import BaseANN
6 |
7 |
8 | class Milvus(BaseANN):
9 | def __init__(self, metric, index_type, nlist):
10 | self._nlist = nlist
11 | self._nprobe = None
12 | self._metric = metric
13 | self._milvus = milvus.Milvus()
14 | self._milvus.connect(host='localhost', port='19530')
15 | self._table_name = 'test01'
16 | self._index_type = index_type
17 |
18 | def fit(self, X):
19 | if self._metric == 'angular':
20 | X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
21 |
22 | self._milvus.create_table({'table_name': self._table_name, 'dimension': X.shape[1]})
23 | vector_ids = [id for id in range(len(X))]
24 | self._milvus.insert(table_name=self._table_name, records=X.tolist(), ids=vector_ids)
25 | index_type = getattr(milvus.IndexType, self._index_type) # a bit hacky but works
26 | self._milvus.create_index(self._table_name, {'index_type': index_type, 'nlist': self._nlist})
27 |
28 | def set_query_arguments(self, nprobe):
29 | if nprobe > self._nlist:
30 | print('warning! nprobe > nlist')
31 | nprobe = self._nlist
32 | self._nprobe = nprobe
33 |
34 | def query(self, v, n):
35 | if self._metric == 'angular':
36 | v /= numpy.linalg.norm(v)
37 | v = v.tolist()
38 | status, results = self._milvus.search(table_name=self._table_name, query_records=[v], top_k=n, nprobe=self._nprobe)
39 | if not results:
40 | return [] # Seems to happen occasionally, not sure why
41 | result_ids = [result.id for result in results[0]]
42 | return result_ids
43 |
44 | def __str__(self):
45 | return 'Milvus(index_type=%s, nlist=%d, nprobe=%d)' % (self._index_type, self._nlist, self._nprobe)
46 |
--------------------------------------------------------------------------------
/protocol/ext-query-parameters.md:
--------------------------------------------------------------------------------
1 | (This document describes an extension that front-ends aren't required to implement. Front-ends that don't implement this extension should reject attempts to set the `query-parameters` front-end configuration option.)
2 |
3 | Many algorithms expose parameters that can be changed to adjust their search strategies without requiring that training data be resubmitted. When the front-end configuration option `query-parameters` is set to `1`, a new command will be added to query mode allowing these query configuration parameters to be changed.
4 |
5 | (Front-ends that support other optional query modes, such as prepared or batch queries, should also add this command to those modes.)
6 |
7 | ## Commands
8 |
9 | ### Configuration mode
10 |
11 | #### `frontend query-parameters V` (three tokens)
12 |
13 | If `V` is `1`, then request that query mode expose the `query-params` command. If `V` is anything else, then withdraw this request.
14 |
15 | Responses:
16 |
17 | * `epbprtv0 ok`
18 |
19 | The availability of the `query-params` command has been changed accordingly.
20 |
21 | * `epbprtv0 fail`
22 |
23 | This command has had no effect on the availability of the `query-params` command.
24 |
25 | ### Training mode
26 |
27 | This extension makes no changes to training mode.
28 |
29 | ### Query mode
30 |
31 | When the `query-parameters` front-end configuration option has been set to `1`, this extension adds one new command to query mode:
32 |
33 | #### `query-params [VALUE0, ..., VALUEk] set` (two or more tokens)
34 |
35 | Change the values of the query parameters.
36 |
37 | (The final token `set` is required. It exists for the sake of compatibility with the `batch-queries` extension, which also uses variable-length commands but which requires that the last token specify a number.)
38 |
39 | Responses:
40 |
41 | * `epbprtv0 ok`
42 |
43 | The query parameters were changed to the given values.
44 |
45 | * `epbprtv0 fail`
46 |
47 | The query parameters were not changed to the given values, perhaps because one of them was invalid.
48 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/faiss_gpu.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import sys
3 | # Assumes local installation of FAISS
4 | sys.path.append("faiss") # noqa
5 | import numpy
6 | import ctypes
7 | import faiss
8 | from ann_benchmarks.algorithms.base import BaseANN
9 |
10 | # Implementation based on
11 | # https://github.com/facebookresearch/faiss/blob/master/benchs/bench_gpu_sift1m.py # noqa
12 |
13 |
14 | class FaissGPU(BaseANN):
15 | def __init__(self, n_bits, n_probes):
16 | self.name = 'FaissGPU(n_bits={}, n_probes={})'.format(
17 | n_bits, n_probes)
18 | self._n_bits = n_bits
19 | self._n_probes = n_probes
20 | self._res = faiss.StandardGpuResources()
21 | self._index = None
22 |
23 | def fit(self, X):
24 | X = X.astype(numpy.float32)
25 | self._index = faiss.GpuIndexIVFFlat(self._res, len(X[0]), self._n_bits,
26 | faiss.METRIC_L2)
27 | # self._index = faiss.index_factory(len(X[0]),
28 | # "IVF%d,Flat" % self._n_bits)
29 | # co = faiss.GpuClonerOptions()
30 | # co.useFloat16 = True
31 | # self._index = faiss.index_cpu_to_gpu(self._res, 0,
32 | # self._index, co)
33 | self._index.train(X)
34 | self._index.add(X)
35 | self._index.setNumProbes(self._n_probes)
36 |
37 | def query(self, v, n):
38 | return [label for label, _ in self.query_with_distances(v, n)]
39 |
40 | def query_with_distances(self, v, n):
41 | v = v.astype(numpy.float32).reshape(1, -1)
42 | distances, labels = self._index.search(v, n)
43 | r = []
44 | for l, d in zip(labels[0], distances[0]):
45 | if l != -1:
46 | r.append((l, d))
47 | return r
48 |
49 | def batch_query(self, X, n):
50 | self.res = self._index.search(X.astype(numpy.float32), n)
51 |
52 | def get_batch_results(self):
53 | D, L = self.res
54 | res = []
55 | for i in range(len(D)):
56 | r = []
57 | for l, d in zip(L[i], D[i]):
58 | if l != -1:
59 | r.append(l)
60 | res.append(r)
61 | return res
62 |
--------------------------------------------------------------------------------
/install/Dockerfile.elastiknn:
--------------------------------------------------------------------------------
1 | FROM ann-benchmarks
2 |
3 | WORKDIR /home/app
4 |
5 | # Install elasticsearch.
6 | ENV DEBIAN_FRONTEND noninteractive
7 | RUN apt install -y wget curl htop
8 | RUN wget --quiet https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-oss-7.9.2-amd64.deb \
9 | && dpkg -i elasticsearch-oss-7.9.2-amd64.deb \
10 | && rm elasticsearch-oss-7.9.2-amd64.deb
11 |
12 | # Install python client.
13 | RUN python3 -m pip install --upgrade elastiknn-client==0.1.0rc47
14 |
15 | # Install plugin.
16 | RUN /usr/share/elasticsearch/bin/elasticsearch-plugin install --batch \
17 | https://github.com/alexklibisz/elastiknn/releases/download/0.1.0-PRE47/elastiknn-0.1.0-PRE47_es7.9.2.zip
18 |
19 | # Configure elasticsearch and JVM for single-node, single-core.
20 | RUN cp /etc/elasticsearch/jvm.options /etc/elasticsearch/jvm.options.bak
21 | RUN cp /etc/elasticsearch/elasticsearch.yml /etc/elasticsearch/elasticsearch.yml.bak
22 |
23 | RUN echo '\
24 | discovery.type: single-node\n\
25 | network.host: 0.0.0.0\n\
26 | node.master: true\n\
27 | node.data: true\n\
28 | node.processors: 1\n\
29 | thread_pool.write.size: 1\n\
30 | thread_pool.search.size: 1\n\
31 | thread_pool.search.queue_size: 1\n\
32 | path.data: /var/lib/elasticsearch\n\
33 | path.logs: /var/log/elasticsearch\n\
34 | ' > /etc/elasticsearch/elasticsearch.yml
35 |
36 | RUN echo '\
37 | -Xms3G\n\
38 | -Xmx3G\n\
39 | -XX:+UseG1GC\n\
40 | -XX:G1ReservePercent=25\n\
41 | -XX:InitiatingHeapOccupancyPercent=30\n\
42 | -XX:+HeapDumpOnOutOfMemoryError\n\
43 | -XX:HeapDumpPath=/var/lib/elasticsearch\n\
44 | -XX:ErrorFile=/var/log/elasticsearch/hs_err_pid%p.log\n\
45 | -Xlog:gc*,gc+age=trace,safepoint:file=/var/log/elasticsearch/gc.log:utctime,pid,tags:filecount=32,filesize=64m\n\
46 | -Dcom.sun.management.jmxremote.ssl=false\n\
47 | -Dcom.sun.management.jmxremote.authenticate=false\n\
48 | -Dcom.sun.management.jmxremote.local.only=false\n\
49 | -Dcom.sun.management.jmxremote.port=8097\n\
50 | -Dcom.sun.management.jmxremote.rmi.port=8097\n\
51 | -Djava.rmi.server.hostname=localhost' > /etc/elasticsearch/jvm.options
52 |
53 | # JMX port. Need to also map the port when running.
54 | EXPOSE 8097
55 |
56 | # Make sure you can start the service.
57 | RUN service elasticsearch start && service elasticsearch stop
58 |
59 | # Custom entrypoint that also starts the Elasticsearch server.\
60 | RUN echo 'service elasticsearch start && python3 -u run_algorithm.py "$@"' > entrypoint.sh
61 | ENTRYPOINT ["/bin/bash", "/home/app/entrypoint.sh"]
62 |
--------------------------------------------------------------------------------
/algosP.yaml:
--------------------------------------------------------------------------------
1 | float:
2 | any:
3 | bruteforce:
4 | docker-tag: ann-benchmarks-sklearn
5 | module: ann_benchmarks.algorithms.bruteforce
6 | constructor: BruteForce
7 | base-args: ["@metric"]
8 | run-groups:
9 | empty:
10 | args: []
11 | bruteforce-blas:
12 | docker-tag: ann-benchmarks-sklearn
13 | module: ann_benchmarks.algorithms.bruteforce
14 | constructor: BruteForceBLAS
15 | base-args: ["@metric"]
16 | run-groups:
17 | empty:
18 | args: []
19 | angular:
20 | pp-bruteforce-lo:
21 | module: ann_benchmarks.algorithms.subprocess
22 | docker-tag: ann-benchmarks-subprocess
23 | constructor: FloatSubprocess
24 | base-args: [["protocol/bf-runner"]]
25 | run-groups:
26 | jf-linear:
27 | args: {"point-type": "float", "distance": "angular"}
28 | pp-bruteforce-hi:
29 | module: ann_benchmarks.algorithms.subprocess
30 | docker-tag: ann-benchmarks-subprocess
31 | constructor: FloatSubprocessPrepared
32 | base-args: [["protocol/bf-runner"]]
33 | run-groups:
34 | jf-linear:
35 | args: {"point-type": "float", "distance": "angular"}
36 | pp-bruteforce-blas-lo:
37 | module: ann_benchmarks.algorithms.subprocess
38 | docker-tag: ann-benchmarks-subprocess
39 | constructor: FloatSubprocess
40 | base-args: [["protocol/bf-runner"]]
41 | run-groups:
42 | jf-linear:
43 | args: {"point-type": "float", "distance": "angular", "fast": 1}
44 | pp-bruteforce-blas-hi:
45 | module: ann_benchmarks.algorithms.subprocess
46 | docker-tag: ann-benchmarks-subprocess
47 | constructor: FloatSubprocessPrepared
48 | base-args: [["protocol/bf-runner"]]
49 | run-groups:
50 | jf-linear:
51 | args: {"point-type": "float", "distance": "angular", "fast": 1}
52 | pp-bruteforce-batch:
53 | module: ann_benchmarks.algorithms.subprocess
54 | docker-tag: ann-benchmarks-subprocess
55 | constructor: FloatSubprocessBatch
56 | base-args: [["protocol/bf-runner"]]
57 | run-groups:
58 | jf-linear:
59 | args: {"point-type": "float", "distance": "angular"}
60 | pp-bruteforce-blas-batch:
61 | module: ann_benchmarks.algorithms.subprocess
62 | docker-tag: ann-benchmarks-subprocess
63 | constructor: FloatSubprocessBatch
64 | base-args: [["protocol/bf-runner"]]
65 | run-groups:
66 | jf-linear:
67 | args: {"point-type": "float", "distance": "angular", "fast": 1}
68 |
--------------------------------------------------------------------------------
/install.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import argparse
4 | import subprocess
5 | from multiprocessing import Pool
6 | from ann_benchmarks.main import positive_int
7 |
8 |
9 | def build(library, args):
10 | print('Building %s...' % library)
11 | if args is not None and len(args) != 0:
12 | q = " ".join(["--build-arg " + x.replace(" ", "\\ ") for x in args])
13 | else:
14 | q = ""
15 |
16 | try:
17 | subprocess.check_call(
18 | 'docker build %s --rm -t ann-benchmarks-%s -f'
19 | ' install/Dockerfile.%s .' % (q, library, library), shell=True)
20 | return {library: 'success'}
21 | except subprocess.CalledProcessError:
22 | return {library: 'fail'}
23 |
24 |
25 | def build_multiprocess(args):
26 | return build(*args)
27 |
28 |
29 | if __name__ == "__main__":
30 | parser = argparse.ArgumentParser(
31 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
32 | parser.add_argument(
33 | "--proc",
34 | default=1,
35 | type=positive_int,
36 | help="the number of process to build docker images")
37 | parser.add_argument(
38 | '--algorithm',
39 | metavar='NAME',
40 | help='build only the named algorithm image',
41 | default=None)
42 | parser.add_argument(
43 | '--build-arg',
44 | help='pass given args to all docker builds',
45 | nargs="+")
46 | args = parser.parse_args()
47 |
48 | print('Building base image...')
49 | subprocess.check_call(
50 | 'docker build \
51 | --rm -t ann-benchmarks -f install/Dockerfile .', shell=True)
52 |
53 | if args.algorithm:
54 | print('Building algorithm(%s) image...' % args.algorithm)
55 | build(args.algorithm, args.build_arg)
56 | elif os.getenv('LIBRARY'):
57 | print('Building algorithm(%s) image...' % os.getenv('LIBRARY'))
58 | build(os.getenv('LIBRARY'), args.build_arg)
59 | else:
60 | print('Building algorithm images... with (%d) processes' % args.proc)
61 | tags = [fn.split('.')[-1] for fn in os.listdir('install') if fn.startswith('Dockerfile.')]
62 |
63 | if args.proc == 1:
64 | install_status = [build(tag, args.build_arg) for tag in tags]
65 | else:
66 | pool = Pool(processes=args.proc)
67 | install_status = pool.map(build_multiprocess, [(tag, args.build_arg) for tag in tags])
68 | pool.close()
69 | pool.join()
70 |
71 | print('\n\nInstall Status:\n' + '\n'.join(str(algo) for algo in install_status))
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/faiss.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import sys
3 | sys.path.append("install/lib-faiss") # noqa
4 | import numpy
5 | import sklearn.preprocessing
6 | import ctypes
7 | import faiss
8 | from ann_benchmarks.algorithms.base import BaseANN
9 |
10 |
11 | class Faiss(BaseANN):
12 | def query(self, v, n):
13 | if self._metric == 'angular':
14 | v /= numpy.linalg.norm(v)
15 | D, I = self.index.search(numpy.expand_dims(
16 | v, axis=0).astype(numpy.float32), n)
17 | return I[0]
18 |
19 | def batch_query(self, X, n):
20 | if self._metric == 'angular':
21 | X /= numpy.linalg.norm(X)
22 | self.res = self.index.search(X.astype(numpy.float32), n)
23 |
24 | def get_batch_results(self):
25 | D, L = self.res
26 | res = []
27 | for i in range(len(D)):
28 | r = []
29 | for l, d in zip(L[i], D[i]):
30 | if l != -1:
31 | r.append(l)
32 | res.append(r)
33 | return res
34 |
35 |
36 | class FaissLSH(Faiss):
37 | def __init__(self, metric, n_bits):
38 | self._n_bits = n_bits
39 | self.index = None
40 | self._metric = metric
41 | self.name = 'FaissLSH(n_bits={})'.format(self._n_bits)
42 |
43 | def fit(self, X):
44 | if X.dtype != numpy.float32:
45 | X = X.astype(numpy.float32)
46 | f = X.shape[1]
47 | self.index = faiss.IndexLSH(f, self._n_bits)
48 | self.index.train(X)
49 | self.index.add(X)
50 |
51 |
52 | class FaissIVF(Faiss):
53 | def __init__(self, metric, n_list):
54 | self._n_list = n_list
55 | self._metric = metric
56 |
57 | def fit(self, X):
58 | if self._metric == 'angular':
59 | X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
60 |
61 | if X.dtype != numpy.float32:
62 | X = X.astype(numpy.float32)
63 |
64 | self.quantizer = faiss.IndexFlatL2(X.shape[1])
65 | index = faiss.IndexIVFFlat(
66 | self.quantizer, X.shape[1], self._n_list, faiss.METRIC_L2)
67 | index.train(X)
68 | index.add(X)
69 | self.index = index
70 |
71 | def set_query_arguments(self, n_probe):
72 | faiss.cvar.indexIVF_stats.reset()
73 | self._n_probe = n_probe
74 | self.index.nprobe = self._n_probe
75 |
76 | def get_additional(self):
77 | return {"dist_comps": faiss.cvar.indexIVF_stats.ndis + # noqa
78 | faiss.cvar.indexIVF_stats.nq * self._n_list}
79 |
80 | def __str__(self):
81 | return 'FaissIVF(n_list=%d, n_probe=%d)' % (self._n_list,
82 | self._n_probe)
83 |
--------------------------------------------------------------------------------
/protocol/ext-batch-queries.md:
--------------------------------------------------------------------------------
1 | (This document describes an extension that front-ends aren't required to implement. Front-ends that don't implement this extension should reject attempts to set the `batch-queries` front-end configuration option.)
2 |
3 | When the front-end configuration option `batch-queries` is set to `1`, after finishing training mode, the front-end will transition to batch query mode instead of query mode. In batch query mode, all queries are submitted at once, and the front-end will indicate when the queries have finished before any results are returned.
4 |
5 | ## Commands
6 |
7 | ### Configuration mode
8 |
9 | #### `frontend batch-queries V` (three tokens)
10 |
11 | If `V` is `1`, then request that the front-end transition into batch query mode, and not query mode, after training mode has finished. If `V` is anything else, then request that it transition into query mode as usual.
12 |
13 | Responses:
14 |
15 | * `epbprtv0 ok`
16 |
17 | The front-end will transition into the requested query mode after the training mode has finished.
18 |
19 | * `epbprtv0 fail`
20 |
21 | This command has had no effect on the query mode transition.
22 |
23 | ### Training mode
24 |
25 | This extension changes the behaviour of one command in training mode:
26 |
27 | #### *empty line* (zero tokens)
28 |
29 | Finish training mode and enter batch query mode.
30 |
31 | Responses:
32 |
33 | * `epbprtv0 ok COUNT1 [fail COUNT2]`
34 |
35 | `COUNT1` (potentially zero) entries were successfully interpreted and added to the data structure. (`COUNT2` entries couldn't be interpreted or couldn't be added for other reasons.):
36 |
37 | ### Batch query mode
38 |
39 | In batch query mode, front-ends should respond to three different kinds of command:
40 |
41 | #### `ENTRY0 [..., ENTRYk] N` (two or more tokens)
42 |
43 | Prepare to run a query to find at most `N` (greater than or equal to 1) close matches for each of the `k` query points from `ENTRY0` to `ENTRYk`.
44 |
45 | Responses:
46 |
47 | * `epbprtv0 ok`
48 |
49 | Preparation is complete, and the `query` command can now be used.
50 |
51 | * `epbprtv0 fail`
52 |
53 | Preparation has failed, and the `query` command should not be used. This may occur if one of the `k` query points could not be parsed.
54 |
55 | #### `query` (one token)
56 |
57 | Run the last prepared query.
58 |
59 | Responses:
60 |
61 | * `epbprtv0 ok`
62 |
63 | The query was executed successfully. `k` sets of results will appear after this line, each of them of the same form as in the normal query mode.
64 |
65 | * `epbprtv0 fail`
66 |
67 | No query has been prepared.
68 |
69 | #### *empty line* (zero tokens)
70 |
71 | Finish prepared query mode and terminate the front-end.
72 |
73 | Responses:
74 |
75 | * `epbprtv0 ok`
76 |
77 | The front-end has terminated.
78 |
--------------------------------------------------------------------------------
/protocol/ext-add-query-metric.md:
--------------------------------------------------------------------------------
1 | (This document describes an extension that front-ends aren't required to implement. In fact, no front-end is *known* to implement it; this document serves as an example of how to extend the protocol. Front-ends that don't implement this extension should reject attempts to set the `add-query-metric` configuration option.)
2 |
3 | When the configuration option `add-query-metric` is set to a value other than `all`, if that value identifies a query metric known to the front-end, then the value for this metric will be appended to each query response. This option may be set several times; each one will (try to) add another query metric.
4 |
5 | Setting this option to the value `all` will cause *all* metrics known to the front-end to be included.
6 |
7 | ## Commands
8 |
9 | ### Configuration mode
10 |
11 | #### `add-query-metric METRIC` (two tokens)
12 |
13 | Request that query responses also include the value of the query metric `METRIC`, if that's recognised by the front-end.
14 |
15 | Responses:
16 |
17 | * `epbprtv0 ok`
18 |
19 | The metric `METRIC` was recognised, and query responses will include a value for it.
20 |
21 | * `epbprtv0 fail`
22 |
23 | The metric `METRIC` was not recognised; query responses will not include a value for it.
24 |
25 | #### `add-query-metric all` (two tokens)
26 |
27 | Request that query responses also include the values of all query metrics recognised by the front-end.
28 |
29 | Responses:
30 |
31 | * `epbprtv0 ok`
32 |
33 | Query responses will include the values of all metrics known to the front-end. (This may not actually change the output; the front-end could, in principle, support this extension but not recognise any query metrics.)
34 |
35 | * `epbprtv0 fail`
36 |
37 | Front-ends may choose to emit this response if they do not recognise *any* query metrics, but they may also emit `epbprtv0 ok` in these circumstances (to indicate that all zero metrics will be included in the output).
38 |
39 | ### Query mode
40 |
41 | #### `ENTRY N` (two tokens)
42 |
43 | This extension changes the behaviour of one response:
44 |
45 | * `epbprtv0 ok R [NAME0 VALUE0 ...]`
46 |
47 | `R` (greater than zero and less than or equal to `N`) close matches were found. Each of the next `R` lines, when tokenised, will consist of the token `epbprtv0` followed by a token specifying the index of a close match. (The first line should identify the *closest* close match, and the `R`-th should identify the furthest away.)
48 |
49 | If additional query metrics were specified and recognised during configuration mode, then their names and values will be provided as a number of pairs of tokens after `R`. For example, a response including the hypothetical `buckets_searched` and `candidates_checked` metrics might look like this:
50 |
51 | `epbprtv0 ok 10 buckets_searched 8 candidates_checked 507`
52 |
--------------------------------------------------------------------------------
/ann_benchmarks/results.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | import h5py
4 | import json
5 | import os
6 | import re
7 | import traceback
8 |
9 |
10 | def get_algorithm_name(name, batch_mode):
11 | if batch_mode:
12 | return name + "-batch"
13 | return name
14 |
15 |
16 | def is_batch(name):
17 | return "-batch" in name
18 |
19 |
20 | def get_result_filename(dataset=None, count=None, definition=None,
21 | query_arguments=None, batch_mode=False):
22 | d = ['results']
23 | if dataset:
24 | d.append(dataset)
25 | if count:
26 | d.append(str(count))
27 | if definition:
28 | d.append(get_algorithm_name(definition.algorithm, batch_mode))
29 | data = definition.arguments + query_arguments
30 | d.append(re.sub(r'\W+', '_', json.dumps(data,
31 | sort_keys=True)).strip('_'))
32 | return os.path.join(*d)
33 |
34 |
35 | def store_results(dataset, count, definition, query_arguments, attrs, results,
36 | batch):
37 | fn = get_result_filename(
38 | dataset, count, definition, query_arguments, batch)
39 | head, tail = os.path.split(fn)
40 | if not os.path.isdir(head):
41 | os.makedirs(head)
42 | f = h5py.File(fn, 'w')
43 | for k, v in attrs.items():
44 | f.attrs[k] = v
45 | times = f.create_dataset('times', (len(results),), 'f')
46 | neighbors = f.create_dataset('neighbors', (len(results), count), 'i')
47 | distances = f.create_dataset('distances', (len(results), count), 'f')
48 | for i, (time, ds) in enumerate(results):
49 | times[i] = time
50 | neighbors[i] = [n for n, d in ds] + [-1] * (count - len(ds))
51 | distances[i] = [d for n, d in ds] + [float('inf')] * (count - len(ds))
52 | f.close()
53 |
54 |
55 | def load_all_results(dataset=None, count=None, split_batched=False,
56 | batch_mode=False):
57 | for root, _, files in os.walk(get_result_filename(dataset, count)):
58 | for fn in files:
59 | try:
60 | if split_batched and batch_mode != is_batch(root):
61 | continue
62 | f = h5py.File(os.path.join(root, fn), 'r+')
63 | properties = dict(f.attrs)
64 | # TODO Fix this properly. Sometimes the hdf5 file returns bytes
65 | # This converts these bytes to strings before we work with them
66 | for k in properties.keys():
67 | try:
68 | properties[k] = properties[k].decode()
69 | except:
70 | pass
71 | yield properties, f
72 | f.close()
73 | except:
74 | print('Was unable to read', fn)
75 | traceback.print_exc()
76 |
77 |
78 | def get_unique_algorithms():
79 | algorithms = set()
80 | for properties, _ in load_all_results():
81 | algorithms.add(properties['algo'])
82 | return algorithms
83 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/nmslib.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import os
3 | import nmslib
4 | from ann_benchmarks.constants import INDEX_DIR
5 | from ann_benchmarks.algorithms.base import BaseANN
6 |
7 |
8 | class NmslibReuseIndex(BaseANN):
9 | @staticmethod
10 | def encode(d):
11 | return ["%s=%s" % (a, b) for (a, b) in d.items()]
12 |
13 | def __init__(self, metric, method_name, index_param, query_param):
14 | self._nmslib_metric = {
15 | 'angular': 'cosinesimil', 'euclidean': 'l2'}[metric]
16 | self._method_name = method_name
17 | self._save_index = False
18 | self._index_param = NmslibReuseIndex.encode(index_param)
19 | if query_param is not False:
20 | self._query_param = NmslibReuseIndex.encode(query_param)
21 | self.name = ('Nmslib(method_name={}, index_param={}, '
22 | 'query_param={})'.format(self._method_name,
23 | self._index_param,
24 | self._query_param))
25 | else:
26 | self._query_param = None
27 | self.name = 'Nmslib(method_name=%s, index_param=%s)' % (
28 | self._method_name, self._index_param)
29 |
30 | self._index_name = os.path.join(INDEX_DIR, "nmslib_%s_%s_%s" % (
31 | self._method_name, metric, '_'.join(self._index_param)))
32 |
33 | d = os.path.dirname(self._index_name)
34 | if not os.path.exists(d):
35 | os.makedirs(d)
36 |
37 | def fit(self, X):
38 | if self._method_name == 'vptree':
39 | # To avoid this issue: terminate called after throwing an instance
40 | # of 'std::runtime_error'
41 | # what(): The data size is too small or the bucket size is too
42 | # big. Select the parameters so that is NOT
43 | # less than * 1000
44 | # Aborted (core dumped)
45 | self._index_param.append('bucketSize=%d' %
46 | min(int(X.shape[0] * 0.0005), 1000))
47 |
48 | self._index = nmslib.init(
49 | space=self._nmslib_metric, method=self._method_name)
50 | self._index.addDataPointBatch(X)
51 |
52 | if os.path.exists(self._index_name):
53 | print('Loading index from file')
54 | self._index.loadIndex(self._index_name)
55 | else:
56 | self._index.createIndex(self._index_param)
57 | if self._save_index:
58 | self._index.saveIndex(self._index_name)
59 | if self._query_param is not None:
60 | self._index.setQueryTimeParams(self._query_param)
61 |
62 | def set_query_arguments(self, ef):
63 | if self._method_name == 'hnsw' or self._method_name == 'sw-graph':
64 | self._index.setQueryTimeParams(["efSearch=%s" % (ef)])
65 |
66 | def query(self, v, n):
67 | ids, distances = self._index.knnQuery(v, n)
68 | return ids
69 |
70 | def batch_query(self, X, n):
71 | self.res = self._index.knnQueryBatch(X, n)
72 |
73 | def get_batch_results(self):
74 | return [x for x, _ in self.res]
75 |
--------------------------------------------------------------------------------
/ann_benchmarks/algorithms/panng_ngt.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import sys
3 | import os
4 | import ngtpy
5 | import numpy as np
6 | import subprocess
7 | import time
8 | from ann_benchmarks.algorithms.base import BaseANN
9 | from ann_benchmarks.constants import INDEX_DIR
10 |
11 |
12 | class PANNG(BaseANN):
13 | def __init__(self, metric, object_type, param):
14 | metrics = {'euclidean': 'L2', 'angular': 'Cosine'}
15 | self._edge_size = int(param['edge'])
16 | self._pathadj_size = int(param['pathadj'])
17 | self._edge_size_for_search = int(param['searchedge'])
18 | self._metric = metrics[metric]
19 | self._object_type = object_type
20 | print('PANNG: edge_size=' + str(self._edge_size))
21 | print('PANNG: pathadj_size=' + str(self._pathadj_size))
22 | print('PANNG: edge_size_for_search=' + str(self._edge_size_for_search))
23 | print('PANNG: metric=' + metric)
24 | print('PANNG: object_type=' + object_type)
25 |
26 | def fit(self, X):
27 | print('PANNG: start indexing...')
28 | dim = len(X[0])
29 | print('PANNG: # of data=' + str(len(X)))
30 | print('PANNG: Dimensionality=' + str(dim))
31 | index_dir = 'indexes'
32 | if not os.path.exists(index_dir):
33 | os.makedirs(index_dir)
34 | index = os.path.join(
35 | index_dir,
36 | 'PANNG-' + str(self._edge_size) + '-' + str(self._pathadj_size))
37 | print(index)
38 | if os.path.exists(index):
39 | print('PANNG: index already exists! ' + str(index))
40 | else:
41 | t0 = time.time()
42 | ngtpy.create(path=index, dimension=dim,
43 | edge_size_for_creation=self._edge_size,
44 | distance_type=self._metric,
45 | object_type=self._object_type)
46 | idx = ngtpy.Index(path=index)
47 | idx.batch_insert(X, num_threads=24, debug=False)
48 | idx.save()
49 | idx.close()
50 | if self._pathadj_size > 0:
51 | print('PANNG: path adjustment')
52 | args = ['ngt', 'prune', '-s ' + str(self._pathadj_size),
53 | index]
54 | subprocess.call(args)
55 | indexingtime = time.time() - t0
56 | print('PANNG: indexing, adjustment and saving time(sec)={}'
57 | .format(indexingtime))
58 | t0 = time.time()
59 | self.index = ngtpy.Index(path=index, read_only=True)
60 | opentime = time.time() - t0
61 | print('PANNG: open time(sec)=' + str(opentime))
62 |
63 | def set_query_arguments(self, epsilon):
64 | print("PANNG: epsilon=" + str(epsilon))
65 | self._epsilon = epsilon - 1.0
66 | self.name = 'PANNG-NGT(%d, %d, %d, %1.3f)' % (
67 | self._edge_size,
68 | self._pathadj_size,
69 | self._edge_size_for_search,
70 | self._epsilon + 1.0)
71 |
72 | def query(self, v, n):
73 | results = self.index.search(
74 | v, n, self._epsilon, self._edge_size_for_search,
75 | with_distance=False)
76 | return results
77 |
78 | def freeIndex(self):
79 | print('PANNG: free')
80 |
--------------------------------------------------------------------------------
/protocol/ext-prepared-queries.md:
--------------------------------------------------------------------------------
1 | (This document describes an extension that front-ends aren't required to implement. Front-ends that don't implement this extension should reject attempts to set the `prepared-queries` front-end configuration option.)
2 |
3 | When the front-end configuration option `prepared-queries` is set to `1`, after finishing training mode, the front-end will transition to prepared query mode instead of query mode. In prepared query mode, parsing a query point -- a potentially expensive operation -- and actually running a query are two different commands; this makes the query timings more representative of the underlying algorithm's behaviour without the overhead of this protocol.
4 |
5 | ## Commands
6 |
7 | ### Configuration mode
8 |
9 | #### `frontend prepared-queries V` (three tokens)
10 |
11 | If `V` is `1`, then request that the front-end transition into prepared query mode, and not query mode, after training mode has finished. If `V` is anything else, then request that it transition into query mode as usual.
12 |
13 | Responses:
14 |
15 | * `epbprtv0 ok`
16 |
17 | The front-end will transition into the requested query mode after the training mode has finished.
18 |
19 | * `epbprtv0 fail`
20 |
21 | This command has had no effect on the query mode transition.
22 |
23 | ### Training mode
24 |
25 | This extension changes the behaviour of one command in training mode:
26 |
27 | #### *empty line* (zero tokens)
28 |
29 | Finish training mode and enter prepared query mode.
30 |
31 | Responses:
32 |
33 | * `epbprtv0 ok COUNT1 [fail COUNT2]`
34 |
35 | `COUNT1` (potentially zero) entries were successfully interpreted and added to the data structure. (`COUNT2` entries couldn't be interpreted or couldn't be added for other reasons.):
36 |
37 | ### Prepared query mode
38 |
39 | In prepared query mode, front-ends should respond to three different kinds of command:
40 |
41 | #### `ENTRY N` (two tokens)
42 |
43 | Prepare to run a query to find at most `N` (greater than or equal to 1) close matches for `ENTRY`.
44 |
45 | Responses:
46 |
47 | * `epbprtv0 ok prepared true`
48 |
49 | Preparation is complete, the `query` command can now be used, and the underlying library wrapper has special support for prepared queries.
50 |
51 | * `epbprtv0 ok prepared false`
52 |
53 | The `query` command can now be used, but the underlying library wrapper doesn't have support for prepared queries, so the `query` command will perform the parsing of `ENTRY` as it would in normal query mode.
54 |
55 | #### `query` (one token)
56 |
57 | Run the last prepared query.
58 |
59 | Responses:
60 |
61 | * `epbprtv0 ok R`
62 |
63 | `R` (greater than zero and less than or equal to the value of `N` that was specified when the query was prepared) close matches were found. The next `R` lines, when tokenised, will consist of the token `epbprtv0` followed by a token specifying the index of a close match. (The first line should identify the *closest* close match, and the `R`-th should identify the furthest away.)
64 |
65 | * `epbprtv0 fail`
66 |
67 | Either no close matches were found, or no query has been prepared.
68 |
69 | #### *empty line* (zero tokens)
70 |
71 | Finish prepared query mode and terminate the front-end.
72 |
73 | Responses:
74 |
75 | * `epbprtv0 ok`
76 |
77 | The front-end has terminated.
78 |
--------------------------------------------------------------------------------
/templates/general.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | {{ title }}
9 |
10 |
11 |
12 |
13 |
14 |
15 |
18 |
19 |
20 |
24 |
25 |
26 |
27 |
48 |
49 | {% block content %} {% endblock %}
50 |
51 |
52 |
Contact
53 |
ANN-Benchmarks has been developed by Martin Aumueller (maau@itu.dk), Erik Bernhardsson (mail@erikbern.com), and Alec Faitfull (alef@itu.dk). Please use
54 | Github to submit your implementation or improvements.
ANN-Benchmarks is a benchmarking environment for approximate nearest neighbor algorithms search. This website contains the current benchmarking results. Please visit http://github.com/erikbern/ann-benchmarks/ to get an overview over evaluated data sets and algorithms. Make a pull request on Github to add your own code or improvements to the
6 | benchmarking system.
7 |
8 |
9 |
Benchmarking Results
10 |
Results are split by distance measure and dataset. In the bottom, you can find an overview of an algorithm's performance on all datasets. Each dataset is annoted
11 | by (k = ...), the number of nearest neighbors an algorithm was supposed to return. The plot shown depicts Recall (the fraction
12 | of true nearest neighbors found, on average over all queries) against Queries per second. Clicking on a plot reveils detailled interactive plots, including
13 | approximate recall, index size, and build time.
14 | {% for type in ['non-batch', 'batch'] %}
15 | {% if len(dataset_with_distances[type]) > 0 %}
16 | {% if type == 'batch' %}
17 |
Benchmarks for Batched Queries
18 | {% else %}
19 |
Benchmarks for Single Queries
20 | {% endif %}
21 |
22 |
Results by Dataset
23 | {% for distance_data in dataset_with_distances[type] %}
24 |
Distance: {{ distance_data.name }}
25 | {% for entry in distance_data.entries %}
26 |
27 |