├── .dockerignore ├── .github └── workflows │ ├── neurips21.yml │ └── neurips23.yml ├── .gitignore ├── LICENSE ├── README.md ├── algos-2021.yaml ├── benchmark ├── __init__.py ├── algorithms │ ├── base.py │ ├── base_runner.py │ ├── bbann.py │ ├── buddy_t1.py │ ├── cuanns_ivfpq.py │ ├── cuanns_multigpu.py │ ├── definitions.py │ ├── diskann-t2.py │ ├── diskann-v0_3.py │ ├── elastiknn.py │ ├── faiss_inmem.py │ ├── faiss_t1.py │ ├── faiss_t3.py │ ├── gemini.py │ ├── httpann.py │ ├── httpann_example.py │ ├── kota-t2.py │ ├── kst_t1.py │ ├── puck_t1.py │ └── team11.py ├── dataset_io.py ├── datasets.py ├── distances.py ├── main.py ├── plotting │ ├── __init__.py │ ├── eval_range_search.py │ ├── metrics.py │ ├── plot_variants.py │ └── utils.py ├── results.py ├── runner.py ├── sensors │ └── power_capture.py ├── streaming │ ├── __init__.py │ ├── compute_gt.py │ ├── download_gt.py │ └── load_runbook.py └── t3 │ ├── __init__.py │ └── helper.py ├── create_dataset.py ├── data_export.py ├── dataset_preparation ├── FB_ssnpp_dataset.md ├── fb_ssnpp_images │ ├── IoU.png │ ├── distance_histogram.png │ ├── pr_compression.png │ └── result_stats.png ├── make_filtered_groundtruth.py ├── make_groundtruth.py ├── make_sparse_groundtruth.py ├── prepare_bigann.py ├── prepare_fb_ssnpp.py ├── prepare_yfcc100m.py └── sparse_dataset.md ├── eval └── show_operating_points.py ├── install.py ├── install ├── Dockerfile ├── Dockerfile.bbann ├── Dockerfile.diskann ├── Dockerfile.elastiknn ├── Dockerfile.faiss ├── Dockerfile.faissconda ├── Dockerfile.httpann_example ├── Dockerfile.kota ├── Dockerfile.kst_ann_t1 ├── Dockerfile.pqbuddy ├── Dockerfile.puck └── requirements_conda.txt ├── logging.conf ├── neurips21 ├── README.md ├── t1_t2 │ ├── README.md │ └── results │ │ ├── T1 │ │ ├── bigann-1B.png │ │ ├── deep-1B.png │ │ ├── msspacev-1B.png │ │ ├── msturing-1B.png │ │ ├── neurips21 │ │ │ ├── bigann-1B.png │ │ │ ├── deep-1B.png │ │ │ ├── msspacev-1B.png │ │ │ ├── msturing-1B.png │ │ │ ├── ssnpp-1B.png │ │ │ ├── t1.csv │ │ │ └── text2image-1B.png │ │ ├── ssnpp-1B.png │ │ └── text2image-1B.png │ │ └── T2 │ │ ├── bigann-1B-IO.png │ │ ├── bigann-1B.png │ │ ├── deep-1B-IO.png │ │ ├── deep-1B.png │ │ ├── msspacev-1B-IO.png │ │ ├── msspacev-1B.png │ │ ├── msturing-1B-IO.png │ │ ├── msturing-1B.png │ │ ├── neurips21 │ │ ├── bigann-1B.png │ │ ├── deep-1B.png │ │ ├── msspacev-1B.png │ │ ├── msturing-1B.png │ │ ├── ssnpp-1B.png │ │ ├── t2.csv │ │ └── text2image-1B.png │ │ ├── ssnpp-1B-IO.png │ │ ├── ssnpp-1B.png │ │ ├── text2image-1B-IO.png │ │ └── text2image-1B.png ├── t3 │ ├── LB_history │ │ ├── Dec.2.2021 │ │ │ ├── LEADERBOARDS.md │ │ │ └── LEADERBOARDS_REJECT_ANOMALIES.md │ │ └── Nov.29.2021 │ │ │ ├── LEADERBOARDS.md │ │ │ └── TASKS_ISSUES_RESOLUTIONS.md │ ├── LEADERBOARDS.md │ ├── LEADERBOARDS_PRIVATE.md │ ├── LEADERBOARDS_PRIVATE_REJECT_ANOMALIES.md │ ├── LEADERBOARDS_PUBLIC.md │ ├── LEADERBOARDS_PUBLIC_REJECT_ANOMALIES.md │ ├── LEADERBOARDS_REJECT_ANOMALIES.md │ ├── RANKING.md │ ├── README.md │ ├── TASKS_ISSUES_RESOLUTIONS.md │ ├── cuanns_ivfpq │ │ ├── Dockerfile │ │ ├── README.md │ │ └── algos.yaml │ ├── cuanns_multigpu │ │ ├── Dockerfile │ │ ├── README.md │ │ └── algos.yaml │ ├── eval_2021 │ │ └── faiss_t3 │ │ │ └── prun.sh │ ├── faiss_t3 │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── algos.yaml │ │ ├── baseline_plots │ │ │ ├── bigann-1B-r-vs-p.png │ │ │ ├── bigann-1B-r-vs-t.png │ │ │ ├── deep-1B-r-vs-p.png │ │ │ ├── deep-1B-r-vs-t.png │ │ │ ├── msspacev-1B-r-vs-p.png │ │ │ ├── msspacev-1B-r-vs-t.png │ │ │ ├── msturing-1B-r-vs-p.png │ │ │ ├── msturing-1B-r-vs-t.png │ │ │ ├── text2image-1B-r-vs-p.png │ │ │ └── text2image-1B-r-vs-t.png │ │ ├── cost │ │ │ ├── AdvantechSky6200.pdf │ │ │ ├── GPU.pdf │ │ │ ├── RAM.pdf │ │ │ └── SSD.pdf │ │ └── faiss-gpu_requirements.txt │ └── gemini │ │ ├── .gitignore │ │ ├── README.md │ │ ├── algos.yaml │ │ ├── buildidx │ │ ├── build_index.py │ │ ├── htest.py │ │ ├── run_bin_build_index.sh │ │ └── test.py │ │ ├── cost │ │ ├── AdvantechSky6200.pdf │ │ ├── GPU.pdf │ │ ├── RAM.pdf │ │ └── SSD.pdf │ │ ├── requirements.txt │ │ ├── run_bin_python.sh │ │ └── run_conda_python.sh ├── track1_baseline_faiss │ ├── README.md │ ├── __init__.py │ ├── baseline_faiss.py │ ├── baseline_faiss_filtered.py │ ├── parse_results.py │ ├── plots │ │ ├── bigann-1B.png │ │ ├── deep-1B.png │ │ ├── msspacev-1B.png │ │ ├── msturing-1B.png │ │ ├── ssnpp-1B.png │ │ └── text2image-1B.png │ ├── run_baselines.bash │ └── test_bow_id_selector.py └── track3_baseline_faiss │ ├── README.md │ ├── gpu_baseline_faiss.py │ └── plots │ └── T3_deep-1B.png ├── neurips23 ├── Azure_D8lds_v5_table.md ├── Dockerfile ├── README.md ├── __init__.py ├── common.py ├── ec2_c6i.2xlarge_res.csv ├── ec2_c6i.2xlarge_table.md ├── filter │ ├── base.py │ ├── cufe │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── bow_id_selector.swig │ │ ├── config.yaml │ │ └── faissCUFE.py │ ├── dhq │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── dhq.py │ ├── faiss │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── bow_id_selector.swig │ │ ├── config.yaml │ │ └── faiss.py │ ├── faissplus │ │ ├── Dockerfile │ │ ├── bow_id_selector.swig │ │ ├── config.yaml │ │ └── faiss.py │ ├── fdufilterdiskann │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── fdufilterdiskann.py │ ├── hwtl_sdu_anns_filter │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── hwtl_sdu_anns_filter.py │ ├── install_neurips23.sh │ ├── operating_points_private_queries_AzureD8lds_v5.csv │ ├── operating_points_public_queries_AzureD8lds_v5.txt │ ├── parlayivf │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── config.yaml │ │ └── parlayivf.py │ ├── pinecone │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── config.yaml │ │ └── pinecone_index.py │ ├── plot_public_queries_AzureD8lds_v5.png │ ├── puck │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── puck.py │ ├── pyanns │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── pyanns.py │ ├── res_private_queries_AzureD8lds_v5.csv │ ├── res_public_queries_AzureD8lds_v5.csv │ ├── run.py │ ├── wm_filter │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── config.yaml │ │ └── wm_filter.py │ └── zilliz │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── zilliz.py ├── leaderboard.md ├── notes │ ├── README.md │ └── streaming │ │ └── hnsw_result │ │ ├── hnsw_result.md │ │ ├── recall_over_steps_10_48_hnsw.png │ │ ├── recall_over_steps_10_48_hnsw_robust_prune.png │ │ └── recall_over_steps_10_48_hnsw_search_based_repair.png ├── ongoing_leaderboard │ ├── Azure_D8lds_v5_table.md │ ├── filter │ │ ├── operating_points_public_queries_AzureD8lds_v5.txt │ │ ├── res_public_queries_AzureD8lds_v5.csv │ │ └── yfcc-10M.png │ ├── leaderboard.md │ ├── ood │ │ ├── operating_points_public_queries_AzureD8lds_v5.txt │ │ ├── res_public_queries_AzureD8lds_v5.csv │ │ └── text2image-10M.png │ ├── sparse │ │ ├── operating_points_public_queries_AzureD8lds_v5.txt │ │ ├── res_public_queries_AzureD8lds_v5.csv │ │ └── sparse-full.png │ └── streaming │ │ └── res_final_runbook_AzureD8lds_v5.csv ├── ood │ ├── base.py │ ├── cufe │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── diskann-in-mem.py │ ├── diskann │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── diskann-in-mem.py │ ├── epsearch │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── diskann-in-mem-ep-hnsw.py │ ├── hanns │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── config.yaml │ │ └── hanns.py │ ├── install_neurips23.sh │ ├── mysteryann-dif │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── mysteryann-dif.py │ ├── mysteryann │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── mysteryann.py │ ├── ngt │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── module.py │ ├── operating_points_public_queries_AzureD8lds_v5.txt │ ├── pinecone-ood │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── config.yaml │ │ └── s2_index.py │ ├── plot_public_queries_AzureD8lds_v5.png │ ├── puck-fizz │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── puck.py │ ├── puck │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── puck.py │ ├── pyanns │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── pyanns.py │ ├── res_public_queries_AzureD8lds_v5.csv │ ├── run.py │ ├── scann │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── scann.py │ ├── sustech-ood │ │ ├── Dockerfile │ │ ├── SUSTech-OOD.py │ │ └── config.yaml │ ├── vamana │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── vamana.py │ └── zilliz │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── zilliz.py ├── runbooks │ ├── clustered_data_gen.py │ ├── clustered_replace_runbook.yaml │ ├── clustered_runbook.yaml │ ├── delete_runbook.yaml │ ├── final_runbook.yaml │ ├── final_runbook_gen.py │ ├── gen_expiration_time_runbook.py │ ├── gen_replace_runbooks.py │ ├── generate_msturing10m_runbooks.py │ ├── msmarco-100M_expirationtime_runbook.yaml │ ├── msturing-10M_slidingwindow_runbook.yaml │ ├── random_replace_runbook.yaml │ ├── simple_replace_runbook.yaml │ ├── simple_runbook.yaml │ ├── wikipedia-1M_expiration_time_replace_delete_runbook.yaml │ ├── wikipedia-1M_expiration_time_replace_only_runbook.yaml │ ├── wikipedia-1M_expiration_time_runbook.yaml │ ├── wikipedia-35M_expiration_time_replace_delete_runbook.yaml │ ├── wikipedia-35M_expiration_time_replace_only_runbook.yaml │ └── wikipedia-35M_expirationtime_runbook.yaml ├── sparse │ ├── base.py │ ├── cufe │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── linscan.py │ ├── install_neurips23.sh │ ├── linscan │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── linscan.py │ ├── nle │ │ ├── Dockerfile │ │ ├── config.yaml │ │ ├── interface.py │ │ └── nle.py │ ├── operating_points_private_queries_AzureD8lds_v5.csv │ ├── operating_points_public_queries_AzureD8lds_v5.txt │ ├── pinecone_smips │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── config.yaml │ │ └── pinecone_smips.py │ ├── plot_public_queries_AzureD8lds_v5.png │ ├── pyanns │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── pyanns.py │ ├── res_private_queries_AzureD8lds_v5.csv │ ├── res_public_queries_AzureD8lds_v5.csv │ ├── run.py │ ├── shnsw │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── shnsw.py │ ├── spmat │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── spmat.py │ ├── sustech-whu │ │ ├── Dockerfile │ │ ├── SUSTech-WHU.py │ │ └── config.yaml │ └── zilliz │ │ ├── Dockerfile │ │ ├── config.yaml │ │ └── zilliz.py └── streaming │ ├── README.md │ ├── __init__.py │ ├── base.py │ ├── cufe │ ├── Dockerfile │ ├── config.yaml │ └── diskann-str.py │ ├── diskann │ ├── Dockerfile │ ├── config.yaml │ └── diskann-str.py │ ├── hwtl_sdu_anns_stream │ ├── Dockerfile │ ├── config.yaml │ └── hwtl_sdu_anns_stream.py │ ├── pinecone │ ├── Dockerfile │ ├── README.md │ ├── config.yaml │ └── pinecone.py │ ├── puck │ ├── Dockerfile │ ├── config.yaml │ └── puck.py │ ├── pyanns │ ├── Dockerfile │ ├── config.yaml │ └── pyanns.py │ ├── res_final_runbook_AzureD8lds_v5.csv │ ├── run.py │ └── scann │ ├── Dockerfile │ ├── config.yaml │ └── scann.py ├── plot.py ├── preparation ├── neurips21 │ └── notebooks │ │ ├── check_1B_groundtruth.ipynb │ │ ├── compare_track1_1B_vs_2x500M.ipynb │ │ ├── eval_t2i_results.ipynb │ │ ├── find_suitable_nq.ipynb │ │ └── large_coarse_quantizer.ipynb └── neurips23 │ ├── parse_filtered_results.ipynb │ └── sparse_algorithms │ ├── basic_sparse_index.py │ └── eval_sparse.py ├── requirements.txt ├── requirements_py3.10.txt ├── requirements_py38.txt ├── run.py ├── run_algorithm.py ├── setup_links.sh └── tests ├── recall_tests.py └── tests.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | data 2 | results 3 | *.bvecs 4 | venv 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pyc 3 | *.o 4 | 5 | data/* 6 | *.class 7 | 8 | *.log 9 | 10 | results/* 11 | !results/*.png 12 | 13 | venv 14 | 15 | .idea 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Martin Aumüller 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Big ANN Benchmarks 2 | 3 | 4 | 5 | ## Datasets 6 | 7 | See for details on the different datasets. 8 | 9 | ## NeurIPS 2023 competition: Practical Vector Search 10 | 11 | Please see [this readme](./neurips23/README.md) for a guide to the NeurIPS 23 competition. 12 | 13 | ## NeurIPS 2021 competition: Billion-Scale ANN 14 | 15 | Please see [this readme](./neurips21/README.md) for a guide of running billion-scale benchmarks and a summary of the results from the NeurIPS 21 competition. 16 | 17 | # Credits 18 | 19 | This project is a version of [ann-benchmarks](https://github.com/erikbern/ann-benchmarks) by [Erik Bernhardsson](https://erikbern.com/) and contributors targeting evaluation of algorithms and hardware for newer billion-scale datasets and practical variants of nearest neighbor search. 20 | -------------------------------------------------------------------------------- /benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | -------------------------------------------------------------------------------- /benchmark/algorithms/base_runner.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import time 3 | 4 | class BaseRunner(): 5 | def build(algo, dataset): 6 | t0 = time.time() 7 | algo.fit(dataset) 8 | return time.time() - t0 9 | 10 | def run_task(algo, ds, distance, count, run_count, search_type, private_query, runbook=None): 11 | best_search_time = float('inf') 12 | search_times = [] 13 | 14 | if not private_query: 15 | X = ds.get_queries() 16 | else: 17 | X = ds.get_private_queries() 18 | 19 | print(fr"Got {X.shape[0]} queries") 20 | 21 | for i in range(run_count): 22 | print('Run %d/%d...' % (i + 1, run_count)) 23 | 24 | start = time.time() 25 | if search_type == "knn": 26 | algo.query(X, count) 27 | total = (time.time() - start) 28 | results = algo.get_results() 29 | assert results.shape[0] == X.shape[0] 30 | elif search_type == "range": 31 | algo.range_query(X, count) 32 | total = (time.time() - start) 33 | results = algo.get_range_results() 34 | else: 35 | raise NotImplementedError(f"Search type {search_type} not available.") 36 | 37 | search_time = total 38 | if search_time < best_search_time: 39 | best_search_time = search_time 40 | best_results = results 41 | 42 | search_times.append( search_time ) 43 | 44 | attrs = { 45 | "best_search_time": best_search_time, 46 | "name": str(algo), 47 | "run_count": run_count, 48 | "distance": distance, 49 | "type": search_type, 50 | "count": int(count), 51 | "search_times": search_times, 52 | "private_queries": private_query, 53 | } 54 | additional = algo.get_additional() 55 | for k in additional: 56 | attrs[k] = additional[k] 57 | return (attrs, best_results) 58 | -------------------------------------------------------------------------------- /benchmark/algorithms/elastiknn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/benchmark/algorithms/elastiknn.py -------------------------------------------------------------------------------- /benchmark/algorithms/faiss_inmem.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | #import sys 3 | #sys.path.append("install/lib-faiss") # noqa 4 | import numpy 5 | import sklearn.preprocessing 6 | import ctypes 7 | import faiss 8 | import os 9 | from benchmark.algorithms.base import BaseANN 10 | from benchmark.datasets import DATASETS 11 | 12 | 13 | class Faiss(BaseANN): 14 | def query(self, X, n): 15 | if self._metric == 'angular': 16 | X /= numpy.linalg.norm(X) 17 | self.res = self.index.search(X.astype(numpy.float32), n) 18 | 19 | def get_results(self): 20 | D, I = self.res 21 | return I 22 | # res = [] 23 | # for i in range(len(D)): 24 | # r = [] 25 | # for l, d in zip(L[i], D[i]): 26 | # if l != -1: 27 | # r.append(l) 28 | # res.append(r) 29 | # return res 30 | 31 | 32 | class FaissIVF(Faiss): 33 | def __init__(self, metric, n_list): 34 | self._n_list = n_list 35 | self._metric = metric 36 | 37 | def index_name(self, name): 38 | return f"data/ivf_{name}_{self._n_list}_{self._metric}" 39 | 40 | def fit(self, dataset): 41 | X = DATASETS[dataset]().get_dataset() # assumes it fits into memory 42 | 43 | if self._metric == 'angular': 44 | X = sklearn.preprocessing.normalize(X, axis=1, norm='l2') 45 | 46 | if X.dtype != numpy.float32: 47 | X = X.astype(numpy.float32) 48 | 49 | self.quantizer = faiss.IndexFlatL2(X.shape[1]) 50 | index = faiss.IndexIVFFlat( 51 | self.quantizer, X.shape[1], self._n_list, faiss.METRIC_L2) 52 | index.train(X) 53 | index.add(X) 54 | faiss.write_index(index, self.index_name(dataset)) 55 | self.index = index 56 | 57 | def load_index(self, dataset): 58 | if not os.path.exists(self.index_name(dataset)): 59 | return False 60 | 61 | self.index = faiss.read_index(self.index_name(dataset)) 62 | return True 63 | 64 | def set_query_arguments(self, n_probe): 65 | faiss.cvar.indexIVF_stats.reset() 66 | self._n_probe = n_probe 67 | self.index.nprobe = self._n_probe 68 | 69 | def get_additional(self): 70 | return {"dist_comps": faiss.cvar.indexIVF_stats.ndis + # noqa 71 | faiss.cvar.indexIVF_stats.nq * self._n_list} 72 | 73 | def __str__(self): 74 | return 'FaissIVF(n_list=%d, n_probe=%d)' % (self._n_list, 75 | self._n_probe) 76 | -------------------------------------------------------------------------------- /benchmark/distances.py: -------------------------------------------------------------------------------- 1 | from scipy.spatial.distance import pdist as scipy_pdist 2 | import itertools 3 | import numpy as np 4 | 5 | def pdist(a, b, metric): 6 | return scipy_pdist([a, b], metric=metric)[0] 7 | 8 | metrics = { 9 | 'euclidean': { 10 | 'distance': lambda a, b: pdist(a, b, "euclidean"), 11 | }, 12 | 'angular': { 13 | 'distance': lambda a, b: pdist(a, b, "cosine"), 14 | } 15 | } 16 | 17 | -------------------------------------------------------------------------------- /benchmark/plotting/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from benchmark.plotting import * 3 | -------------------------------------------------------------------------------- /benchmark/plotting/plot_variants.py: -------------------------------------------------------------------------------- 1 | from benchmark.plotting.metrics import all_metrics as metrics 2 | 3 | all_plot_variants = { 4 | "recall/time": ("k-nn", "qps"), 5 | "recall/buildtime": ("k-nn", "build"), 6 | "recall/indexsize": ("k-nn", "indexsize"), 7 | "recall/distcomps": ("k-nn", "distcomps"), 8 | "recall/candidates": ("k-nn", "candidates"), 9 | "recall/qpssize": ("k-nn", "queriessize"), 10 | } 11 | -------------------------------------------------------------------------------- /benchmark/streaming/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | -------------------------------------------------------------------------------- /benchmark/streaming/download_gt.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from benchmark.datasets import DATASETS 5 | from benchmark.dataset_io import download 6 | from benchmark.streaming.load_runbook import load_runbook, get_gt_url 7 | from benchmark.streaming.compute_gt import gt_dir 8 | 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser( 12 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 13 | 14 | parser.add_argument( 15 | '--dataset', 16 | choices=DATASETS.keys(), 17 | help=f'Dataset to benchmark on.', 18 | required=True) 19 | parser.add_argument( 20 | '--runbook_file', 21 | help='Runbook yaml file path' 22 | ) 23 | args = parser.parse_args() 24 | 25 | ds = DATASETS[args.dataset]() 26 | print(args.runbook_file) 27 | max_pts, runbook = load_runbook(args.dataset, ds.nb, args.runbook_file) 28 | gt_url = get_gt_url(args.dataset, args.runbook_file) 29 | 30 | download_dir = gt_dir(ds, args.runbook_file) 31 | os.makedirs(download_dir, exist_ok=True) 32 | for step, entry in enumerate(runbook): 33 | if entry['operation'] == 'search': 34 | step_filename = 'step' + str(step+1) + '.gt100' 35 | step_url = gt_url + '/' + step_filename 36 | download(step_url, os.path.join(download_dir, step_filename)) 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /benchmark/t3/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | -------------------------------------------------------------------------------- /create_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from benchmark.datasets import DATASETS 3 | 4 | if __name__ == "__main__": 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument( 7 | '--dataset', 8 | choices=DATASETS.keys(), 9 | required=True) 10 | parser.add_argument( 11 | '--skip-data', 12 | action='store_true', 13 | help='skip downloading base vectors') 14 | args = parser.parse_args() 15 | ds = DATASETS[args.dataset]() 16 | ds.prepare(True if args.skip_data else False) 17 | -------------------------------------------------------------------------------- /dataset_preparation/fb_ssnpp_images/IoU.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/dataset_preparation/fb_ssnpp_images/IoU.png -------------------------------------------------------------------------------- /dataset_preparation/fb_ssnpp_images/distance_histogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/dataset_preparation/fb_ssnpp_images/distance_histogram.png -------------------------------------------------------------------------------- /dataset_preparation/fb_ssnpp_images/pr_compression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/dataset_preparation/fb_ssnpp_images/pr_compression.png -------------------------------------------------------------------------------- /dataset_preparation/fb_ssnpp_images/result_stats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/dataset_preparation/fb_ssnpp_images/result_stats.png -------------------------------------------------------------------------------- /dataset_preparation/prepare_bigann.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Prepare the bigann dataset in the format expected for the 1B ANN competition 4 | 5 | """ 6 | 7 | import sys 8 | 9 | from faiss.contrib import datasets as faiss_datasets 10 | import numpy as np 11 | 12 | 13 | # source data is in the native Faiss format 14 | ds = faiss_datasets.DatasetBigANN() 15 | 16 | stage = int(sys.argv[1]) 17 | 18 | outdir = "/scratch/matthijs/bigann_competiton_format/" 19 | 20 | def u8bin_write(x, fname): 21 | assert x.dtype == 'uint8' 22 | f = open(fname, "wb") 23 | n, d = x.shape 24 | np.array([n, d], dtype='uint32').tofile(f) 25 | x.tofile(f) 26 | 27 | def ibin_write(x, fname): 28 | assert x.dtype == 'int32' 29 | f = open(fname, "wb") 30 | n, d = x.shape 31 | np.array([n, d], dtype='uint32').tofile(f) 32 | x.tofile(f) 33 | 34 | 35 | if stage == 1: # convert query format 36 | # xq = ds.get_queries() 37 | xq = faiss_datasets.bvecs_mmap(ds.basedir + 'bigann_query.bvecs') 38 | xq = np.ascontiguousarray(xq) 39 | u8bin_write(xq, outdir + "query.public.10K.u8bin") 40 | 41 | elif stage == 2: # sample new queries from train set 42 | secretkey = int(sys.argv[2]) 43 | rs = np.random.RandomState(secretkey) 44 | xt = faiss_datasets.bvecs_mmap(ds.basedir + 'bigann_learn.bvecs') 45 | print("size", xt.shape) 46 | selection = rs.choice(len(xt), 10000, replace=False) 47 | u8bin_write(xt[selection], outdir + f"query.private.{secretkey}.10K.u8bin") 48 | 49 | elif stage == 3: # convert 10M subset 50 | 51 | xb = faiss_datasets.bvecs_mmap(ds.basedir + 'bigann_base.bvecs') 52 | u8bin_write(xb[:10**7], outdir + "base.10M.u8bin") 53 | 54 | elif stage == 4: # write the 1B vectors... 55 | 56 | xb = faiss_datasets.bvecs_mmap(ds.basedir + 'bigann_base.bvecs') 57 | bs = 10**6 58 | f = open(outdir + "base.1B.u8bin", "wb") 59 | np.array(xb.shape, dtype='uint32').tofile(f) 60 | for i in range(1000): 61 | print(i, end="\r", flush=True) 62 | xb[i * bs : (i + 1) * bs].tofile(f) 63 | 64 | elif stage == 5: # convert the training vectors 65 | 66 | xb = faiss_datasets.bvecs_mmap(ds.basedir + 'bigann_learn.bvecs') 67 | bs = 10**6 68 | f = open(outdir + "learn.100M.u8bin", "wb") 69 | np.array(xb.shape, dtype='uint32').tofile(f) 70 | for i in range(100): 71 | print(i, end="\r", flush=True) 72 | xb[i * bs : (i + 1) * bs].tofile(f) 73 | 74 | elif stage == 6: 75 | # convert ground-truth files for public queries 76 | gt = ds.get_groundtruth() 77 | ibin_write(gt, outdir + "GT.public.1B.ibin") 78 | 79 | ds10M = faiss_datasets.DatasetBigANN(10) 80 | gt = ds.get_groundtruth() 81 | ibin_write(gt, outdir + "GT.public.10M.ibin") 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /dataset_preparation/prepare_fb_ssnpp.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Prepare the FB SSN++ dataset in the format expected for the 1B ANN competition 4 | 5 | The datafiles have already been produced on the prod side: 6 | 7 | - FB_ssnpp_database.u8bin: the 1B database vectors, deduplicated, already 8 | in correct format 9 | 10 | - 1M_queries_no_bursts_compressed.npy: a little less than 1M query vectors, 11 | selected not to be bursty 12 | 13 | """ 14 | import sys 15 | import numpy as np 16 | 17 | secret_suffix = sys.argv[1] 18 | 19 | basedir = "/checkpoint/matthijs/billion-scale-ann-benchmarks/FB_ssnpp/" 20 | 21 | def u8bin_write(x, fname): 22 | assert x.dtype == 'uint8' 23 | f = open(fname, "wb") 24 | n, d = x.shape 25 | np.array([n, d], dtype='uint32').tofile(f) 26 | x.tofile(f) 27 | 28 | xqall_fp32 = np.load(basedir + "1M_queries_no_bursts_compressed.npy") 29 | xqall = xqall_fp32.astype('uint8') 30 | assert np.all(xqall == xqall_fp32) 31 | u8bin_write( 32 | xqall[:10**5], 33 | basedir + "FB_ssnpp_public_queries.u8bin" 34 | ) 35 | u8bin_write( 36 | xqall[10**5: 2 * 10**5], 37 | basedir + "FB_ssnpp_heldout_queries_" + secret_suffix + ".u8bin" 38 | ) 39 | 40 | -------------------------------------------------------------------------------- /eval/show_operating_points.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | 4 | if __name__ == "__main__": 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument( 7 | '--algorithm', 8 | required=False) 9 | parser.add_argument( 10 | '--metric', 11 | choices=['qps', 'recall'], 12 | default='recall') 13 | parser.add_argument( 14 | '--threshold', 15 | default=0.9, 16 | help='threshold', 17 | type=float) 18 | parser.add_argument( 19 | '--csv', 20 | metavar='CSV', 21 | help='input csv') 22 | parser.add_argument( 23 | '--dataset', 24 | required=False) 25 | 26 | args = parser.parse_args() 27 | df = pd.read_csv(args.csv) 28 | 29 | if args.algorithm: 30 | df = df[df.algorithm == args.algorithm] 31 | if args.dataset: 32 | df = df[df.dataset == args.dataset] 33 | 34 | if args.metric == "qps": 35 | print(df[(df.qps > args.threshold)].groupby(['dataset', 'algorithm']).max()[['recall/ap']]) 36 | elif args.metric == "recall": 37 | print(df[(df['recall/ap'] > args.threshold)].groupby(['dataset', 'algorithm']).max()[['qps']].sort_values(by=["dataset", "qps"], ascending=False)) 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /install/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | RUN apt-get update && apt-get install -y python3-numpy python3-scipy python3-pip build-essential git axel wget 4 | RUN wget https://aka.ms/downloadazcopy-v10-linux && mv downloadazcopy-v10-linux azcopy.tgz && tar xzf azcopy.tgz --transform 's!^[^/]\+\($\|/\)!azcopy_folder\1!' 5 | RUN cp azcopy_folder/azcopy /usr/bin 6 | 7 | RUN pip3 install -U pip 8 | 9 | WORKDIR /home/app 10 | COPY requirements_py3.10.txt run_algorithm.py ./ 11 | RUN pip3 install -r requirements_py3.10.txt 12 | 13 | ENTRYPOINT ["python3", "-u", "run_algorithm.py"] 14 | -------------------------------------------------------------------------------- /install/Dockerfile.bbann: -------------------------------------------------------------------------------- 1 | FROM billion-scale-benchmark 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev libboost-dev python3 python3-setuptools python3-pip libomp-dev 5 | RUN pip3 install pybind11 numpy 6 | 7 | RUN git clone --single-branch --branch master https://github.com/zilliztech/BBAnn.git 8 | 9 | RUN mkdir -p BBAnn/build 10 | RUN cd BBAnn/build && cmake -DCMAKE_BUILD_TYPE=Release .. 11 | RUN cd BBAnn/build && make -j 12 | RUN cd BBAnn/python && pip install -e . 13 | RUN python3 -c 'import bbannpy' 14 | -------------------------------------------------------------------------------- /install/Dockerfile.diskann: -------------------------------------------------------------------------------- 1 | FROM billion-scale-benchmark 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip 5 | RUN pip3 install pybind11 numpy 6 | 7 | RUN cd /tmp && wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB 8 | RUN cd /tmp && apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB 9 | RUN cd /tmp && rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB 10 | RUN cd /tmp && sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' 11 | RUN apt-get update 12 | RUN apt-get install -y intel-mkl-64bit-2020.0-088 13 | 14 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/libblas.so libblas.so-x86_64-linux-gnu /opt/intel/mkl/lib/intel64/libmkl_rt.so 150 15 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/libblas.so.3 libblas.so.3-x86_64-linux-gnu /opt/intel/mkl/lib/intel64/libmkl_rt.so 150 16 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/liblapack.so liblapack.so-x86_64-linux-gnu /opt/intel/mkl/lib/intel64/libmkl_rt.so 150 17 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/liblapack.so.3 liblapack.so.3-x86_64-linux-gnu /opt/intel/mkl/lib/intel64/libmkl_rt.so 150 18 | 19 | RUN echo "/opt/intel/lib/intel64" > /etc/ld.so.conf.d/mkl.conf 20 | RUN echo "/opt/intel/mkl/lib/intel64" >> /etc/ld.so.conf.d/mkl.conf 21 | RUN ldconfig 22 | RUN echo "MKL_THREADING_LAYER=GNU" >> /etc/environment 23 | 24 | RUN git clone --single-branch --branch python_bindings_diskann https://github.com/microsoft/diskann 25 | RUN mkdir -p diskann/build 26 | RUN cd diskann/build && cmake -DCMAKE_BUILD_TYPE=Release .. 27 | RUN cd diskann/build && make -j 28 | RUN cd diskann/python && pip install -e . 29 | RUN python3 -c 'import diskannpy' 30 | -------------------------------------------------------------------------------- /install/Dockerfile.elastiknn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/install/Dockerfile.elastiknn -------------------------------------------------------------------------------- /install/Dockerfile.faiss: -------------------------------------------------------------------------------- 1 | FROM billion-scale-benchmark 2 | 3 | RUN apt-get update && apt-get install -y libopenblas-base libopenblas-dev libpython3-dev swig python3-dev libssl-dev wget 4 | RUN wget https://github.com/Kitware/CMake/releases/download/v3.18.3/cmake-3.18.3-Linux-x86_64.sh && mkdir cmake && sh cmake-3.18.3-Linux-x86_64.sh --skip-license --prefix=cmake && rm cmake-3.18.3-Linux-x86_64.sh 5 | RUN git clone https://github.com/facebookresearch/faiss lib-faiss 6 | RUN cd lib-faiss && ../cmake/bin/cmake -DFAISS_OPT_LEVEL=avx2 -DCMAKE_BUILD_TYPE=Release -DFAISS_ENABLE_GPU=OFF -DPython_EXECUTABLE=/usr/bin/python3 -B build . 7 | RUN cd lib-faiss && make -C build -j4 8 | RUN cd lib-faiss && cd build && cd faiss && cd python && python3 setup.py install && cd && rm -rf cmake 9 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2)' 10 | -------------------------------------------------------------------------------- /install/Dockerfile.faissconda: -------------------------------------------------------------------------------- 1 | FROM billion-scale-benchmark 2 | 3 | RUN apt update && apt install -y wget 4 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-1-Linux-x86_64.sh 5 | RUN bash Anaconda3-2023.03-1-Linux-x86_64.sh -b 6 | 7 | ENV PATH /root/anaconda3/bin:$PATH 8 | 9 | RUN conda install -c pytorch faiss-cpu 10 | COPY install/requirements_conda.txt ./ 11 | # conda doesn't like some of our packages, use pip 12 | RUN python3 -m pip install -r requirements_conda.txt 13 | 14 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2)' 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /install/Dockerfile.httpann_example: -------------------------------------------------------------------------------- 1 | FROM billion-scale-benchmark 2 | 3 | RUN python3 -m pip install flask==2.0.1 4 | -------------------------------------------------------------------------------- /install/Dockerfile.kota: -------------------------------------------------------------------------------- 1 | FROM billion-scale-benchmark 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip 5 | RUN pip3 install pybind11 numpy 6 | 7 | RUN cd /tmp && wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB 8 | RUN cd /tmp && apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB 9 | RUN cd /tmp && rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB 10 | RUN cd /tmp && sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' 11 | RUN apt-get update 12 | RUN apt-get install -y intel-mkl-64bit-2020.0-088 13 | 14 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/libblas.so libblas.so-x86_64-linux-gnu /opt/intel/mkl/lib/intel64/libmkl_rt.so 150 15 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/libblas.so.3 libblas.so.3-x86_64-linux-gnu /opt/intel/mkl/lib/intel64/libmkl_rt.so 150 16 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/liblapack.so liblapack.so-x86_64-linux-gnu /opt/intel/mkl/lib/intel64/libmkl_rt.so 150 17 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/liblapack.so.3 liblapack.so.3-x86_64-linux-gnu /opt/intel/mkl/lib/intel64/libmkl_rt.so 150 18 | 19 | RUN echo "/opt/intel/lib/intel64" > /etc/ld.so.conf.d/mkl.conf 20 | RUN echo "/opt/intel/mkl/lib/intel64" >> /etc/ld.so.conf.d/mkl.conf 21 | RUN ldconfig 22 | RUN echo "MKL_THREADING_LAYER=GNU" >> /etc/environment 23 | 24 | RUN git clone --branch python_binding https://github.com/LLLjun/DiskANN_NICS 25 | RUN mkdir -p DiskANN_NICS/build 26 | RUN cd DiskANN_NICS/build && cmake -DCMAKE_BUILD_TYPE=Release .. 27 | RUN cd DiskANN_NICS/build && make -j 28 | RUN cd DiskANN_NICS/python && pip install -e . 29 | RUN python3 -c 'import diskannpy' 30 | -------------------------------------------------------------------------------- /install/Dockerfile.kst_ann_t1: -------------------------------------------------------------------------------- 1 | FROM billion-scale-benchmark 2 | 3 | RUN apt-get update && apt-get install -y libopenblas-base libopenblas-dev libpython3-dev swig python3-dev libssl-dev wget 4 | RUN wget https://github.com/Kitware/CMake/releases/download/v3.18.3/cmake-3.18.3-Linux-x86_64.sh && mkdir cmake && sh cmake-3.18.3-Linux-x86_64.sh --skip-license --prefix=cmake && rm cmake-3.18.3-Linux-x86_64.sh 5 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2021.05-Linux-x86_64.sh 6 | RUN bash Anaconda3-2021.05-Linux-x86_64.sh -b 7 | 8 | ENV PATH /root/anaconda3/bin:$PATH 9 | RUN git clone https://github.com/NJU-yasuo/faiss_t faiss 10 | RUN cd faiss && bash build-lib.sh 11 | RUN cd faiss/_build/faiss/python && python3 setup.py install && cd && rm -rf cmake 12 | COPY install/requirements_conda.txt ./ 13 | # conda doesn't like some of our packages, use pip 14 | RUN python3 -m pip install -r requirements_conda.txt 15 | 16 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2)' 17 | -------------------------------------------------------------------------------- /install/Dockerfile.pqbuddy: -------------------------------------------------------------------------------- 1 | FROM billion-scale-benchmark 2 | 3 | RUN apt update && apt install -y wget 4 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2020.11-Linux-x86_64.sh 5 | RUN bash Anaconda3-2020.11-Linux-x86_64.sh -b 6 | 7 | ENV PATH /root/anaconda3/bin:$PATH 8 | 9 | RUN conda install -c pytorch faiss-cpu 10 | COPY install/requirements_conda.txt ./ 11 | # conda doesn't like some of our packages, use pip 12 | RUN python3 -m pip install -r requirements_conda.txt 13 | 14 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2)' 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /install/Dockerfile.puck: -------------------------------------------------------------------------------- 1 | FROM billion-scale-benchmark 2 | 3 | RUN wget https://gips-test-bucket-0-gz.gz.bcebos.com/similar/puck_to_python.tar.gz 4 | RUN tar zxvf puck_to_python.tar.gz 5 | RUN mv lib puck 6 | RUN python3 -c 'from puck import py_puck_api' 7 | -------------------------------------------------------------------------------- /install/requirements_conda.txt: -------------------------------------------------------------------------------- 1 | ansicolors 2 | docker-py 3 | h5py==3.8.0 4 | matplotlib 5 | numpy 6 | pyyaml 7 | psutil 8 | scipy 9 | scikit-learn 10 | jinja2 11 | pandas 12 | -------------------------------------------------------------------------------- /logging.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root,annb 3 | 4 | [handlers] 5 | keys=consoleHandler,fileHandler 6 | 7 | [formatters] 8 | keys=simpleFormatter 9 | 10 | [formatter_simpleFormatter] 11 | format=%(asctime)s - %(name)s - %(levelname)s - %(message)s 12 | datefmt= 13 | 14 | [handler_consoleHandler] 15 | class=StreamHandler 16 | level=INFO 17 | formatter=simpleFormatter 18 | args=(sys.stdout,) 19 | 20 | [handler_fileHandler] 21 | class=FileHandler 22 | level=INFO 23 | formatter=simpleFormatter 24 | args=('annb.log','w') 25 | 26 | [logger_root] 27 | level=WARN 28 | handlers=consoleHandler 29 | 30 | [logger_annb] 31 | level=INFO 32 | handlers=consoleHandler,fileHandler 33 | qualname=annb 34 | propagate=0 35 | -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T1/bigann-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/bigann-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T1/deep-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/deep-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T1/msspacev-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/msspacev-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T1/msturing-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/msturing-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T1/neurips21/bigann-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/neurips21/bigann-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T1/neurips21/deep-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/neurips21/deep-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T1/neurips21/msspacev-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/neurips21/msspacev-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T1/neurips21/msturing-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/neurips21/msturing-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T1/neurips21/ssnpp-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/neurips21/ssnpp-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T1/neurips21/text2image-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/neurips21/text2image-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T1/ssnpp-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/ssnpp-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T1/text2image-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/text2image-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/bigann-1B-IO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/bigann-1B-IO.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/bigann-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/bigann-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/deep-1B-IO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/deep-1B-IO.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/deep-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/deep-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/msspacev-1B-IO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/msspacev-1B-IO.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/msspacev-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/msspacev-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/msturing-1B-IO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/msturing-1B-IO.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/msturing-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/msturing-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/neurips21/bigann-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/neurips21/bigann-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/neurips21/deep-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/neurips21/deep-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/neurips21/msspacev-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/neurips21/msspacev-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/neurips21/msturing-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/neurips21/msturing-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/neurips21/ssnpp-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/neurips21/ssnpp-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/neurips21/text2image-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/neurips21/text2image-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/ssnpp-1B-IO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/ssnpp-1B-IO.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/ssnpp-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/ssnpp-1B.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/text2image-1B-IO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/text2image-1B-IO.png -------------------------------------------------------------------------------- /neurips21/t1_t2/results/T2/text2image-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/text2image-1B.png -------------------------------------------------------------------------------- /neurips21/t3/LEADERBOARDS.md: -------------------------------------------------------------------------------- 1 | 2 | # BigANN Challenge T3 Leaderboards and Winners 3 | 4 | We rank participants based on 4 different but inter-related benchmarks: 5 | * One based on recall/average precision 6 | * One based on throughput 7 | * One based on power consumption 8 | * One based on hardware cost 9 | 10 | We maintain two sets of leaderboards that rank participants on all benchmarks: 11 | * [Leaderboards based on a public query dataset](LEADERBOARDS_PUBLIC.md) in which participants had access during the competition. 12 | * [Leaderboards based on a private query dataet](LEADERBOARDS_PRIVATE.md) in which submissions are currently being evaluated. 13 | 14 | Please consult the main [T3 track README](README.md) for more details about benchmarks and ranking methodology. 15 | 16 | ## Public Dataset Leaderboards And Winners 17 | 18 | The leaderboards and rankings on the public dataset set lives [here](LEADERBOARDS_PUBLIC.md). 19 | 20 | We would like to congratulate all the winners of this part of the competition, teams from Intel and NVidia: 21 | * Sourabh Dongaonkar (Intel Corporate) 22 | * Mariano Tepper (Intel Labs) 23 | * Yong Wong (NVidia) 24 | * Akira Naruse (NVidia) 25 | * Jingrong Zhang (NVidia) 26 | * Mahesh Doijade (NVidia) 27 | 28 | We are in the process of resolving the remaining issues and tasks. 29 | 30 | Upon completion, we will make the rankings and winners official. 31 | 32 | Please revisit this page again soon for more updates. 33 | 34 | ## Private Dataset Leaderboards Status 35 | 36 | The status of the leaderboards and rankings on the private dataset lives [here](LEADERBOARDS_PRIVATE.md). 37 | 38 | All submissions are currently being evaluated using the private data sets so the scores (and rankings) could change as evaluation proceeds 39 | 40 | Please revisit this page again soon for more updates. 41 | 42 | -------------------------------------------------------------------------------- /neurips21/t3/cuanns_ivfpq/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.4.2-devel-ubuntu20.04 2 | 3 | RUN apt-get update 4 | RUN apt-get install --no-install-recommends -y build-essential wget git python3-dev python3-pip 5 | RUN pip3 install -U pip 6 | 7 | RUN mkdir /home/soft 8 | RUN cd /home/soft && wget -nv "https://drive.google.com/uc?export=download&id=1IybRBhZPQzMqQ2HRX9KSL7woftO8WZdV" -O pkg-libcuann-0.0.7.tgz && tar xfz pkg-libcuann-0.0.7.tgz 9 | RUN pip3 install /home/soft/libcuann/python/dist/cuann-0.0.7-cp38-cp38-linux_x86_64.whl 10 | RUN rm /home/soft/pkg-libcuann-0.0.7.tgz 11 | RUN rm -rf /home/soft/libcuann/python 12 | 13 | WORKDIR /home/app 14 | COPY requirements_py38.txt run_algorithm.py ./ 15 | RUN pip3 install -r requirements_py38.txt 16 | RUN pip3 install 'Cython>=0.29.*' 'numpy>=1.19.*' cupy-cuda114 17 | 18 | ENV LD_LIBRARY_PATH="/home/soft/libcuann/lib:${LD_LIBRARY_PATH}" 19 | ENV NVIDIA_TF32_OVERRIDE=1 20 | # ENV OMP_NUM_THREADS=32 21 | 22 | ENTRYPOINT ["python3", "run_algorithm.py"] 23 | -------------------------------------------------------------------------------- /neurips21/t3/cuanns_ivfpq/README.md: -------------------------------------------------------------------------------- 1 | # T3: Cuanns IVFPQ (Single GPU) 2 | 3 | ## Hardware Configuration And Cost 4 | 5 | |Part |Model |No. |Unit Price |Total Price| 6 | |---------------|-------------------------------------------|----|------------------------------------|-----------| 7 | |System |[NVIDIA DGX A100 640GB] | 1| | | 8 | |Total | | 1| | | 9 | 10 | Details of the system can be found at https://www.nvidia.com/en-us/data-center/dgx-a100/. However, no price information is provided. Therefore, we will not participate in the leaderboards based on hardware cost. 11 | 12 | ## Hardware Access 13 | 14 | SSH access to the system will be provided to competition organizers. 15 | 16 | ## No Source Code Declarations 17 | 18 | This submission requires the following software components where source-code is not available and/or not part of the source-code for this submission: 19 | * NVIDIA docker container runtime ( https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html ) 20 | * NVIDIA CUDA libraries (including library for fast ANNS) and host drivers 21 | 22 | ## Hardware Setup And Software Installation 23 | 24 | ## Prerequisites 25 | 26 | * Linux Ubuntu 20.04 27 | * CUDA 11.4 or above 28 | * The NVidia docker container runtime ( https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html ) 29 | * This cloned project repository 30 | 31 | ### Setup and Installation Instructions 32 | 33 | Note that all the subsequent commands must be run in the top-level directory of this repo on your machine. 34 | 35 | First build the cuanns_ivfpq docker container: 36 | ``` 37 | python3 install.py --dockerfile t3/cuanns_ivfpq/Dockerfile 38 | ``` 39 | Setup links to downloaded dataset and pre-built index files. 40 | ``` 41 | ./setup_links.sh 42 | ``` 43 | Otherwise, download datasets for competitions supported by cuanns_ivfpq. 44 | ``` 45 | python3 create_dataset.py --dataset [bigann-1B|deep-1B|msturing-1B|msspacev-1B|text2image-1B] 46 | ``` 47 | 48 | ## Run Competition Algorithm 49 | 50 | You can run this algorithm on the competition dataset using the run.py script. 51 | ``` 52 | python3 run.py --t3 --definitions t3/cuanns_ivfpq/algos.yaml --dataset [bigann-1B|deep-1B|msturing-1B|msspacev-1B|text2image-1B] 53 | ``` 54 | 55 | #### Known Issues 56 | 57 | The program to build the index file from the competition dataset is not yet implemented in this repo. When the program is ready, we will describe how to build the index. 58 | -------------------------------------------------------------------------------- /neurips21/t3/cuanns_multigpu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.4.2-devel-ubuntu20.04 2 | 3 | RUN apt-get update 4 | RUN apt-get install --no-install-recommends -y build-essential wget git python3-dev python3-pip libopenblas-dev 5 | RUN pip3 install -U pip 6 | 7 | RUN mkdir /home/soft 8 | RUN cd /home/soft && wget -nv https://github.com/Kitware/CMake/releases/download/v3.21.3/cmake-3.21.3-linux-x86_64.tar.gz && tar xzf cmake-3.21.3-linux-x86_64.tar.gz 9 | RUN cd /home/soft && wget -nv https://github.com/facebookresearch/faiss/archive/refs/tags/v1.7.1.tar.gz -O faiss-1.7.1.tar.gz && tar xzf faiss-1.7.1.tar.gz 10 | RUN cd /home/soft/faiss-1.7.1 \ 11 | && ../cmake-3.21.3-linux-x86_64/bin/cmake -B build\ 12 | -DFAISS_ENABLE_GPU=ON \ 13 | -DFAISS_ENABLE_PYTHON=OFF -DBUILD_TESTING=OFF \ 14 | -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release \ 15 | -DCMAKE_CUDA_ARCHITECTURES="80" \ 16 | -DCMAKE_INSTALL_PREFIX=/home/soft/faiss . \ 17 | && cd build && make -j 8 && make install 18 | 19 | 20 | 21 | WORKDIR /home/app 22 | COPY requirements_py38.txt run_algorithm.py ./ 23 | RUN pip3 install -r requirements_py38.txt 24 | 25 | RUN cd /home/soft/ && wget -nv "https://drive.google.com/uc?export=download&id=1jU4aLrihX6cPPzOB9oRQrrSYZkyUGoQ5" -O pycuann.tar.gz && tar xf pycuann.tar.gz pycuann.so 26 | 27 | ENV LD_LIBRARY_PATH="/home/soft:${LD_LIBRARY_PATH}" 28 | ENV PATH="/home/soft:${PATH}" 29 | ENTRYPOINT ["python3", "run_algorithm.py"] 30 | -------------------------------------------------------------------------------- /neurips21/t3/cuanns_multigpu/README.md: -------------------------------------------------------------------------------- 1 | # T3: Cuanns MultiGPU 2 | 3 | ## Hardware Configuration And Cost 4 | 5 | |Part |Model |No. |Unit Price |Total Price| 6 | |---------------|-------------------------------------------|----|------------------------------------|-----------| 7 | |System |[NVIDIA DGX A100 640GB] | 1| | | 8 | |Total | | 1| | | 9 | 10 | Details of the system can be found at https://www.nvidia.com/en-us/data-center/dgx-a100/. However, no price information is provided. Therefore, we will not participate in the leaderboards based on hardware cost. 11 | 12 | ## Hardware Access 13 | 14 | SSH access to the system will be provided to competition organizers. 15 | 16 | ## No Source Code Declarations 17 | 18 | This submission requires the following software components where source-code is not available and/or not part of the source-code for this submission: 19 | * NVIDIA docker container runtime ( https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html ) 20 | * NVIDIA CUDA libraries and host drivers 21 | * the algorithm implementation is provided as a library 22 | 23 | ## Hardware Setup And Software Installation 24 | 25 | ### Prerequisites 26 | 27 | * Linux Ubuntu 20.04 28 | * CUDA 11.4 or above 29 | * The NVidia docker container runtime ( https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html ) 30 | * This cloned project repository 31 | 32 | ### Setup and Installation Instructions 33 | 34 | Note that all the subsequent commands must be run in the top-level directory of this repo on your machine. 35 | 36 | First build the docker container: 37 | ``` 38 | python install.py --dockerfile t3/cuanns_multigpu/Dockerfile 39 | ``` 40 | Download datasets for competitions supported by cuanns_multigpu. Note that even if you do not build the index files, you still need to download the datasets. 41 | ``` 42 | python create_dataset.py --dataset [bigann-1B|deep-1B|msturing-1B|msspacev-1B] 43 | ``` 44 | Donwload index files. The download instructions will be updated as soon as the location of the index files is determined. 45 | ``` 46 | (to be updated) 47 | ``` 48 | 49 | ## Run Competition Algorithm 50 | 51 | You can run this algorithm on the competition dataset using the run.py script. 52 | ``` 53 | python run.py --t3 --definitions t3/cuanns_multigpu/algos.yaml --dataset [bigann-1B|deep-1B|msturing-1B|msspacev-1B] 54 | ``` 55 | 56 | #### Known Issues 57 | 58 | The program to build the index file from the competition dataset is not yet implemented in this repo. When the program is ready, we will describe how to build the index. 59 | -------------------------------------------------------------------------------- /neurips21/t3/eval_2021/faiss_t3/prun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #python run.py --t3 --private-query --definitions t3/faiss_t3/algos.yaml --dataset deep-1B --nodocker 4 | python run.py --t3 --private-query --definitions t3/faiss_t3/algos.yaml --dataset bigann-1B --nodocker 5 | #python run.py --t3 --private-query --definitions t3/faiss_t3/algos.yaml --dataset text2image-1B --nodocker 6 | #python run.py --t3 --private-query --definitions t3/faiss_t3/algos.yaml --dataset msturing-1B --nodocker 7 | #python run.py --t3 --private-query --definitions t3/faiss_t3/algos.yaml --dataset msspacev-1B --nodocker 8 | #python run.py --t3 --private-query --definitions t3/faiss_t3/algos.yaml --dataset ssnpp-1B --nodocker 9 | 10 | #python run.py --definitions t3/faiss_t3/algos.yaml --dataset deep-1B --t3 --private-query --nodocker --power-capture 192.168.99.110:1237:10 11 | #python run.py --definitions t3/faiss_t3/algos.yaml --dataset bigann-1B --t3 --private-query --nodocker --power-capture 192.168.99.110:1237:10 12 | #python run.py --definitions t3/faiss_t3/algos.yaml --dataset text2image-1B --t3 --private-query --nodocker --power-capture 192.168.99.110:1237:10 13 | #python run.py --definitions t3/faiss_t3/algos.yaml --dataset msturing-1B --t3 --private-query --nodocker --power-capture 192.168.99.110:1237:10 14 | #python run.py --definitions t3/faiss_t3/algos.yaml --dataset msspacev-1B --t3 --private-query --nodocker --power-capture 192.168.99.110:1237:10 15 | #python run.py --definitions t3/faiss_t3/algos.yaml --dataset ssnpp-1B --t3 --private-query --nodocker --power-capture 192.168.99.110:1237:10 16 | -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | #FROM nvidia/cuda:11.0-devel-ubuntu18.04 3 | FROM nvidia/cuda:11.0.3-cudnn8-devel-ubuntu18.04 4 | 5 | # CONDA 6 | 7 | ENV PATH="/root/miniconda3/bin:${PATH}" 8 | ARG PATH="/root/miniconda3/bin:${PATH}" 9 | 10 | RUN apt-get update 11 | RUN apt-get install -y wget 12 | RUN apt-get install -y build-essential 13 | RUN apt-get install -y git 14 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py38_23.1.0-1-Linux-x86_64.sh 15 | RUN bash Miniconda3-py38_23.1.0-1-Linux-x86_64.sh -b 16 | RUN rm -f Miniconda3-py38_23.1.0-1-Linux-x86_64.sh 17 | RUN conda --version 18 | RUN conda create -n py38 python=3.8 -y 19 | RUN echo "source activate env" > ~/.bashrc 20 | RUN ls /opt/ 21 | ENV PATH /opt/conda/envs/py38/bin:$PATH 22 | RUN conda config --set remote_read_timeout_secs 300 23 | RUN conda install -c pytorch faiss-gpu 24 | #RUN conda install cudatoolkit=11.0 25 | RUN conda --version && which conda && which python && which pip3 26 | 27 | # BIGANN 28 | 29 | RUN pip3 install -U pip 30 | 31 | WORKDIR /home/app 32 | COPY t3/faiss_t3/faiss-gpu_requirements.txt run_algorithm.py ./ 33 | RUN pip3 install -r faiss-gpu_requirements.txt 34 | 35 | ENTRYPOINT ["python3", "run_algorithm.py"] 36 | 37 | ## For the following RUN command to work, we need to initiate docker build 38 | ## with a gpu device request much like what's done with docker eval run. 39 | # RUN python3 -c 'import faiss; print("gpus=", faiss.get_num_gpus())' 40 | 41 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2)' 42 | -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/baseline_plots/bigann-1B-r-vs-p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/bigann-1B-r-vs-p.png -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/baseline_plots/bigann-1B-r-vs-t.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/bigann-1B-r-vs-t.png -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/baseline_plots/deep-1B-r-vs-p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/deep-1B-r-vs-p.png -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/baseline_plots/deep-1B-r-vs-t.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/deep-1B-r-vs-t.png -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/baseline_plots/msspacev-1B-r-vs-p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/msspacev-1B-r-vs-p.png -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/baseline_plots/msspacev-1B-r-vs-t.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/msspacev-1B-r-vs-t.png -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/baseline_plots/msturing-1B-r-vs-p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/msturing-1B-r-vs-p.png -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/baseline_plots/msturing-1B-r-vs-t.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/msturing-1B-r-vs-t.png -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/baseline_plots/text2image-1B-r-vs-p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/text2image-1B-r-vs-p.png -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/baseline_plots/text2image-1B-r-vs-t.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/text2image-1B-r-vs-t.png -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/cost/AdvantechSky6200.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/cost/AdvantechSky6200.pdf -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/cost/GPU.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/cost/GPU.pdf -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/cost/RAM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/cost/RAM.pdf -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/cost/SSD.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/cost/SSD.pdf -------------------------------------------------------------------------------- /neurips21/t3/faiss_t3/faiss-gpu_requirements.txt: -------------------------------------------------------------------------------- 1 | ansicolors==1.1.8 2 | docker==2.6.1 3 | h5py==2.10.0 4 | matplotlib 5 | numpy 6 | pyyaml==5.1 7 | psutil==5.6.6 8 | scipy 9 | scikit-learn 10 | jinja2==2.10.1 11 | pandas 12 | -------------------------------------------------------------------------------- /neurips21/t3/gemini/.gitignore: -------------------------------------------------------------------------------- 1 | 1b 2 | centroids_2m 3 | records_weights 4 | -------------------------------------------------------------------------------- /neurips21/t3/gemini/algos.yaml: -------------------------------------------------------------------------------- 1 | deep-1B: 2 | gemini-t3: 3 | docker-tag: billion-scale-benchmark-faissconda 4 | module: benchmark.algorithms.gemini 5 | constructor: GeminiT3 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [ 11 | "{'nbits': 512, 'qbits':768, 'nlist':2097152, 'nt':83886080, 'num_apuc':3, 'f16':True }" 12 | ] 13 | query-args: | 14 | [ 15 | "{'nprobe': 800, 'nprobe_refine': 480, 'hamming_k': 5000, 'average_clstr_size_factor': 0.0}", 16 | "{'nprobe': 800, 'nprobe_refine': 480, 'hamming_k': 2000, 'average_clstr_size_factor': 0.0}", 17 | "{'nprobe': 800, 'nprobe_refine': 480, 'hamming_k': 1000, 'average_clstr_size_factor': 0.0}", 18 | "{'nprobe': 800, 'nprobe_refine': 480, 'hamming_k': 500, 'average_clstr_size_factor': 0.0}", 19 | "{'nprobe': 800, 'nprobe_refine': 480, 'hamming_k': 250, 'average_clstr_size_factor': 0.0}", 20 | "{'nprobe': 800, 'nprobe_refine': 400, 'hamming_k': 1000, 'average_clstr_size_factor': 0.0}", 21 | "{'nprobe': 800, 'nprobe_refine': 300, 'hamming_k': 1000, 'average_clstr_size_factor': 0.0}", 22 | "{'nprobe': 700, 'nprobe_refine': 380, 'hamming_k': 1000, 'average_clstr_size_factor': 0.0}", 23 | "{'nprobe': 600, 'nprobe_refine': 280, 'hamming_k': 1000, 'average_clstr_size_factor': 0.0}", 24 | "{'nprobe': 500, 'nprobe_refine': 180, 'hamming_k': 1000, 'average_clstr_size_factor': 0.0}" 25 | ] 26 | -------------------------------------------------------------------------------- /neurips21/t3/gemini/buildidx/run_bin_build_index.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PATH=/usr/bin:$PATH python3 build_index.py 4 | -------------------------------------------------------------------------------- /neurips21/t3/gemini/buildidx/test.py: -------------------------------------------------------------------------------- 1 | import faiss 2 | import numpy as np 3 | 4 | qd=768 5 | d=512 6 | nlist=5 7 | 8 | nb=1000 9 | db = np.empty((nb, d // 8), dtype='uint8') 10 | 11 | quantizer = faiss.IndexBinaryFlat( d ) 12 | index = faiss.IndexBinaryIVF( quantizer, d, nlist ) 13 | index.train(db) 14 | index.add(db) 15 | 16 | def convert_index_to_cluster_and_ids_lists(index, nbits): 17 | cluster_list = np.empty(index.invlists.nlist, dtype=object) 18 | ids_list = np.empty(index.invlists.nlist, dtype=object) 19 | 20 | zero_count = 0 21 | 22 | for i in range(index.invlists.nlist): 23 | list_sz = index.invlists.list_size(i) 24 | 25 | if list_sz == 0: 26 | zero_count = zero_count + 1 27 | ids = None 28 | else: 29 | ids_ptr = index.invlists.get_ids(i) 30 | ids = np.array(faiss.rev_swig_ptr(ids_ptr, list_sz)).reshape(-1, 1).astype(np.uint32) # GSL requires a 2d arrray for some reason 31 | index.invlists.release_ids(ids_ptr) 32 | #GW index.invlists.release_ids(list_sz, ids_ptr) 33 | ids_list[i] = ids 34 | 35 | codes_ptr = index.invlists.get_codes(i) 36 | codes = np.array(faiss.rev_swig_ptr(codes_ptr, list_sz * nbits // 8)).reshape(list_sz, nbits//8) 37 | index.invlists.release_codes(codes_ptr) 38 | #GW index.invlists.release_codes(list_sz * nbits // 8, codes_ptr) 39 | cluster_list[i] = codes 40 | 41 | print('zero_count =', zero_count) 42 | return cluster_list, ids_list 43 | 44 | cls, ids = convert_index_to_cluster_and_ids_lists(index,d) 45 | print("cls", cls) 46 | print("ids", ids) 47 | 48 | # Querying the index 49 | nq = 10 50 | queries = np.empty((nq, d // 8), dtype='uint8') 51 | print("queries", queries) 52 | k = 1 53 | D, I = index.search(queries, k) 54 | print("di",D,I) 55 | 56 | 57 | quantizer = faiss.downcast_IndexBinary(index.quantizer) 58 | print("Quantizer", type(quantizer)) 59 | centroids = faiss.vector_to_array(quantizer.xb) 60 | print("Centroids", type(centroids), centroids.shape) 61 | centroids = np.reshape(centroids, (quantizer.ntotal, quantizer.d//8)) 62 | print("Centroids", type(centroids), centroids.shape) 63 | print('centroids (binary):', centroids.shape, centroids.dtype) 64 | -------------------------------------------------------------------------------- /neurips21/t3/gemini/cost/AdvantechSky6200.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/gemini/cost/AdvantechSky6200.pdf -------------------------------------------------------------------------------- /neurips21/t3/gemini/cost/GPU.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/gemini/cost/GPU.pdf -------------------------------------------------------------------------------- /neurips21/t3/gemini/cost/RAM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/gemini/cost/RAM.pdf -------------------------------------------------------------------------------- /neurips21/t3/gemini/cost/SSD.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/gemini/cost/SSD.pdf -------------------------------------------------------------------------------- /neurips21/t3/gemini/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.16.0 2 | scipy==1.0.0 3 | scikit-learn==0.19.1 4 | faiss==1.5.3 5 | docker==2.6.1 6 | psutil==5.6.6 7 | h5py==2.10.0 8 | ansicolors==1.1.8 9 | tqdm==4.62.2 10 | dataclasses==0.8 11 | pyyaml 12 | matplotlib 13 | -------------------------------------------------------------------------------- /neurips21/t3/gemini/run_bin_python.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PATH=/usr/bin:$PATH which python3 4 | PATH=/usr/bin:$PATH python3 -c "import numpy;print('numpy',numpy.version.version)" 5 | PATH=/usr/bin:$PATH pip3 show numpy 6 | 7 | PATH=/usr/bin:$PATH python3 -c "import scipy;print('scipy',scipy.version.version)" 8 | PATH=/usr/bin:$PATH pip3 show scipy 9 | 10 | PATH=/usr/bin:$PATH python3 -c "import sklearn;print('sklearn',sklearn.__version__)" 11 | PATH=/usr/bin:$PATH pip3 show sklearn 12 | 13 | PATH=/usr/bin:$PATH python3 -c "import faiss;print('faiss',faiss.__version__)" 14 | PATH=/usr/bin:$PATH pip3 show faiss 15 | 16 | PATH=/usr/bin:$PATH LD_LIBRARY_PATH="./gsl_resources:$HOME/.local/lib/python3.6/site-packages/faiss" PYTHONPATH="./gsl_resources:$HOME/.local/lib/python3.6/site-packages/faiss" python3 run.py --t3 --nodocker --definitions t3/gemini/algos.yaml --dataset deep-1B --runs 1 17 | 18 | #PATH=/usr/bin:$PATH LD_LIBRARY_PATH="/home/silo/BigANN/big-ann-benchmarks/gsl_resources:/home/silo/.local/lib/python3.6/site-packages/faiss" PYTHONPATH="/home/silo/BigANN/big-ann-benchmarks/gsl_resources:/home/silo/.local/lib/python3.6/site-packages/faiss" python3 run.py --t3 --nodocker --definitions t3/gemini/algos.yaml --dataset deep-1B --runs 1 19 | -------------------------------------------------------------------------------- /neurips21/t3/gemini/run_conda_python.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | 4 | #conda activate bigann-silo-py369 5 | which python3 6 | which pip3 7 | 8 | python3 -c "import numpy;print('numpy',numpy.version.version)" 9 | pip3 show numpy 10 | 11 | python3 -c "import scipy;print('scipy',scipy.version.version)" 12 | pip3 show scipy 13 | 14 | python3 -c "import sklearn;print('sklearn',sklearn.__version__)" 15 | pip3 show sklearn 16 | 17 | python3 -c "import faiss;print('faiss',faiss.__version__)" 18 | pip3 show faiss 19 | 20 | LD_LIBRARY_PATH=./gsl_resources PYTHONPATH=./gsl_resources python3 run.py --t3 --nodocker --definitions t3/gemini/algos.yaml --dataset deep-1B --runs 1 21 | -------------------------------------------------------------------------------- /neurips21/track1_baseline_faiss/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/__init__.py -------------------------------------------------------------------------------- /neurips21/track1_baseline_faiss/plots/bigann-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/plots/bigann-1B.png -------------------------------------------------------------------------------- /neurips21/track1_baseline_faiss/plots/deep-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/plots/deep-1B.png -------------------------------------------------------------------------------- /neurips21/track1_baseline_faiss/plots/msspacev-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/plots/msspacev-1B.png -------------------------------------------------------------------------------- /neurips21/track1_baseline_faiss/plots/msturing-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/plots/msturing-1B.png -------------------------------------------------------------------------------- /neurips21/track1_baseline_faiss/plots/ssnpp-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/plots/ssnpp-1B.png -------------------------------------------------------------------------------- /neurips21/track1_baseline_faiss/plots/text2image-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/plots/text2image-1B.png -------------------------------------------------------------------------------- /neurips21/track1_baseline_faiss/test_bow_id_selector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import faiss 3 | import bow_id_selector 4 | 5 | sp = faiss.swig_ptr 6 | 7 | from benchmark.datasets import DATASETS 8 | 9 | ds = DATASETS["yfcc-10M"]() 10 | 11 | print("load dataset + query metadata") 12 | meta_b = ds.get_dataset_metadata() 13 | meta_q = ds.get_queries_metadata() 14 | print("Sort") 15 | meta_b.sort_indices() 16 | 17 | #size_t nb, const int64_t *lims, const int32_t *indices, 18 | # int32_t w1, int32_t w2): 19 | 20 | 21 | print(meta_b.indptr.dtype) 22 | 23 | rs = np.random.RandomState(123) 24 | 25 | 26 | 27 | def csr_get_row_indices(m, i): 28 | """ get the non-0 column indices for row i in matrix m """ 29 | return m.indices[m.indptr[i] : m.indptr[i + 1]] 30 | 31 | print("TEST") 32 | 33 | for i in range(500): 34 | j = rs.choice(meta_b.shape[0]) 35 | row = csr_get_row_indices(meta_b, j) 36 | 37 | if len(row) < 3: 38 | continue 39 | 40 | w12 = rs.choice(row, 2) 41 | 42 | cc = rs.choice(3) 43 | if cc == 1: 44 | w12[1] = -1 45 | 46 | if cc == 2: 47 | w12[i % 2] += 1 48 | if w12[i % 2] in row: 49 | continue 50 | 51 | sel = bow_id_selector.IDSelectorBOW( 52 | meta_b.shape[0], sp(meta_b.indptr), 53 | sp(meta_b.indices)) 54 | 55 | sel.set_query_words(int(w12[0]), int(w12[1])) 56 | 57 | if cc == 0 or cc == 1: 58 | assert sel.is_member(int(j)) 59 | else: 60 | assert not sel.is_member(int(j)) 61 | 62 | 63 | def intersect_sorted(a1, a2): 64 | n1, = a1.shape 65 | n2, = a2.shape 66 | res = np.empty(n1 + n2, dtype=a1.dtype) 67 | nres = bow_id_selector.intersect_sorted( 68 | n1, faiss.swig_ptr(a1), 69 | n2, faiss.swig_ptr(a2), 70 | faiss.swig_ptr(res) 71 | ) 72 | return res[:nres] 73 | 74 | 75 | print(intersect_sorted( 76 | np.array([1, 3, 6, 8, 10], dtype='int32'), 77 | np.array([1, 5,8, 10], dtype='int32') 78 | )) -------------------------------------------------------------------------------- /neurips21/track3_baseline_faiss/plots/T3_deep-1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track3_baseline_faiss/plots/T3_deep-1B.png -------------------------------------------------------------------------------- /neurips23/Azure_D8lds_v5_table.md: -------------------------------------------------------------------------------- 1 | | dataset | algorithm | qps | 2 | |-----------------|-----------------------|----------------| 3 | | yfcc-10M | parlayivf | 37670.703774 | 4 | | (filter track) | puck | 19153.425169 | 5 | | | hwtl_sdu_anns_filter | 15188.577106 | 6 | | | wm_filter | 14076.445534 | 7 | | | dhq | 13517.047874 | 8 | | | fdufilterdiskann | 5752.463409 | 9 | | | pyanns | 5335.916507 | 10 | | | faissplus | 3625.027286 | 11 | | | faiss | 3252.682553 | 12 | | | cufe | 2291.031703 | 13 | | text2image-10M | mysteryann | 22555.248017 | 14 | | (OOD track) | pyanns [1] | 22295.584534 | 15 | | | mysteryann-dif | 22491.577263 | 16 | | | sustech-ood | 13772.370641 | 17 | | | puck | 8699.573200 | 18 | | | vamana | 6753.344080 | 19 | | | ngt | 6373.934425 | 20 | | | epsearch | 5876.982706 | 21 | | | diskann | 4132.829728 | 22 | | | cufe | 3561.416286 | 23 | | sparse-full | pyanns [1] | 6499.652881 | 24 | | (sparse track) | shnsw | 5078.449772 | 25 | | | NLE-Full | 1314.194166 | 26 | | | nle | 1312.961060 | 27 | | | sustech-whu [2] | 788.168885 | 28 | | | cufe | 97.860465 | 29 | | | linscan | 95.098871 | 30 | 31 | 32 | [1] The entry was from an author affiliated with Zilliz, a company involved in the organizing team. The conflict was not disclosed by the author, and was discovered post evaluation. 33 | [2] Build time exceeded 12 hours 34 | 35 | Table lists highest QPS measured with at least 90% recall@10. Private queries were used for yfcc-10M and sparse-full and public queries for text2image-10M. -------------------------------------------------------------------------------- /neurips23/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:jammy 2 | 3 | RUN apt-get update && apt-get install -y python3-numpy python3-scipy python3-pip build-essential git axel wget 4 | RUN wget https://aka.ms/downloadazcopy-v10-linux && mv downloadazcopy-v10-linux azcopy.tgz && tar xzf azcopy.tgz --transform 's!^[^/]\+\($\|/\)!azcopy_folder\1!' 5 | RUN cp azcopy_folder/azcopy /usr/bin 6 | 7 | RUN pip3 install -U pip 8 | 9 | WORKDIR /home/app 10 | COPY requirements_py3.10.txt run_algorithm.py ./ 11 | RUN pip3 install -r requirements_py3.10.txt 12 | 13 | ENTRYPOINT ["python3", "-u", "run_algorithm.py"] 14 | -------------------------------------------------------------------------------- /neurips23/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/__init__.py -------------------------------------------------------------------------------- /neurips23/common.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | from neurips23.filter.run import FilterRunner 5 | from neurips23.sparse.run import SparseRunner 6 | from neurips23.ood.run import OODRunner 7 | from neurips23.streaming.run import StreamingRunner 8 | 9 | def docker_tag_base(): 10 | return 'neurips23' 11 | 12 | def basedir(): 13 | return 'neurips23' 14 | 15 | def docker_tag(track, algo): 16 | return docker_tag_base() + '-' + track + '-' + algo 17 | 18 | def dockerfile_path_base(): 19 | return os.path.join('neurips23', 'Dockerfile') 20 | 21 | def track_path(track): 22 | return os.path.join('neurips23', track) 23 | 24 | def dockerfile_path(track, algo): 25 | return os.path.join(track_path(track), algo, 'Dockerfile') 26 | 27 | def yaml_path(track, algo): 28 | return os.path.join(track_path(track), algo, 'config.yaml') 29 | 30 | def get_definitions(track, algo): 31 | return yaml.load(yaml_path(track, algo)) 32 | 33 | RUNNERS = { 34 | "filter": FilterRunner, 35 | "sparse": SparseRunner, 36 | "ood": OODRunner, 37 | "streaming": StreamingRunner 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /neurips23/ec2_c6i.2xlarge_table.md: -------------------------------------------------------------------------------- 1 | | dataset | algorithm | qps | 2 | |-----------------|-----------------------|----------------| 3 | | yfcc-10M | parlayivf | 30744.178014 | 4 | | (filter track) | puck | 17160.390356 | 5 | | | hwtl_sdu_anns_filter | 15433.837498 | 6 | | | wm_filter | 13723.065895 | 7 | | | fdufilterdiskann | 6085.293763 | 8 | | | pyanns | 5260.477613 | 9 | | | faissplus | 3851.283822 | 10 | | | rubignn | 3289.234566 | 11 | | | faiss | 3254.200190 | 12 | | text2image-10M | pyanns [1] | 22476.070400 | 13 | | (OOD track) | mysteryann-dif | 17764.966620 | 14 | | | mysteryann | 17665.716757 | 15 | | | sustech-ood | 11594.134313 | 16 | | | puck-fizz | 8460.214238 | 17 | | | puck | 8167.845988 | 18 | | | vamana | 6322.589569 | 19 | | | cufe | 3414.617622 | 20 | | sparse-full | pyanns [1] | 6280.871386 | 21 | | (sparse track) | shnsw | 4359.145718 | 22 | | | nle | 1297.986119 | 23 | | | sustech-whu [2] | 670.864748 | 24 | | | cufe | 64.665603 | 25 | | | linscan | 63.026394 | 26 | 27 | [1] The entry was from an author affiliated with Zilliz, a company involved in the organizing team. The conflict was not disclosed by the author, and was discovered post evaluation. 28 | [2] Build time exceeded 12 hours (13 hours, 34 minutes) 29 | 30 | Table lists highest QPS measured with at least 90% recall@10. Private queries were used for yfcc-10M and sparse-full and public queries for text2image-10M. 31 | -------------------------------------------------------------------------------- /neurips23/filter/base.py: -------------------------------------------------------------------------------- 1 | from benchmark.algorithms.base import BaseANN 2 | 3 | class BaseFilterANN(BaseANN): 4 | def filtered_query(self, X, filter, k): 5 | """ 6 | Carry out a batch query for k-NN of query set X with associated filter. 7 | Query X[i] has asks for k-NN in the index that pass all filters in filter[i]. 8 | """ 9 | raise NotImplementedError() 10 | 11 | def track(self): 12 | return "filter" -------------------------------------------------------------------------------- /neurips23/filter/cufe/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update && apt install -y wget swig 4 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-0-Linux-x86_64.sh 5 | RUN bash Anaconda3-2023.03-0-Linux-x86_64.sh -b 6 | 7 | ENV PATH /root/anaconda3/bin:$PATH 8 | ENV CONDA_PREFIX /root/anaconda3/ 9 | 10 | RUN conda install -c pytorch faiss-cpu 11 | COPY install/requirements_conda.txt ./ 12 | # conda doesn't like some of our packages, use pip 13 | RUN python3 -m pip install -r requirements_conda.txt 14 | 15 | COPY neurips23/filter/cufe/bow_id_selector.swig ./ 16 | 17 | RUN swig -c++ -python -I$CONDA_PREFIX/include -Ifaiss bow_id_selector.swig 18 | RUN g++ -shared -O3 -g -fPIC bow_id_selector_wrap.cxx -o _bow_id_selector.so \ 19 | -I $( python -c "import distutils.sysconfig ; print(distutils.sysconfig.get_python_inc())" ) \ 20 | -I $CONDA_PREFIX/include $CONDA_PREFIX/lib/libfaiss_avx2.so -Ifaiss 21 | 22 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2); print(faiss.__version__)' 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /neurips23/filter/cufe/config.yaml: -------------------------------------------------------------------------------- 1 | random-filter-s: 2 | cufe: 3 | docker-tag: neurips23-filter-cufe 4 | module: neurips23.filter.cufe.faissCUFE 5 | constructor: faissCUFE 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"indexkey": "IVF1024,SQ8"}] 11 | query-args: | 12 | [{"nprobe": 1}, 13 | {"nprobe":2}, 14 | {"nprobe":4}] 15 | random-s: 16 | cufe: 17 | docker-tag: neurips23-filter-cufe 18 | module: neurips23.filter.cufe.faissCUFE 19 | constructor: faissCUFE 20 | base-args: ["@metric"] 21 | run-groups: 22 | base: 23 | args: | 24 | [{"indexkey": "IVF1024,SQ8"}] 25 | query-args: | 26 | [{"nprobe": 1}, 27 | {"nprobe":2}, 28 | {"nprobe":4}] 29 | yfcc-10M-unfiltered: 30 | cufe: 31 | docker-tag: neurips23-filter-cufe 32 | module: neurips23.filter.cufe.faissCUFE 33 | constructor: faissCUFE 34 | base-args: ["@metric"] 35 | run-groups: 36 | base: 37 | args: | 38 | [{"indexkey": "IVF16384,SQ8", "binarysig": true, "threads": 16}] 39 | query-args: | 40 | [{"nprobe": 1}, {"nprobe": 4}, {"nprobe": 16}, {"nprobe": 64}] 41 | yfcc-10M: 42 | cufe: 43 | docker-tag: neurips23-filter-cufe 44 | module: neurips23.filter.cufe.faissCUFE 45 | constructor: faissCUFE 46 | base-args: ["@metric"] 47 | run-groups: 48 | base: 49 | args: | 50 | [{"indexkey": "IVF4096,SQ8", 51 | "binarysig": true, 52 | "threads": 16 53 | }] 54 | query-args: | 55 | [{"nprobe": 4, "mt_threshold":0.0003}, 56 | {"nprobe": 16, "mt_threshold":0.0003}, 57 | {"nprobe": 4, "mt_threshold":0.0001}, 58 | {"nprobe": 16, "mt_threshold":0.0001}, 59 | {"nprobe": 10, "mt_threshold":0.0001}, 60 | {"nprobe": 8, "mt_threshold": 0.0003}, 61 | {"nprobe": 32, "mt_threshold": 0.00033}, 62 | {"nprobe": 30, "mt_threshold": 0.00033}, 63 | {"nprobe": 12, "mt_threshold": 0.0002}, 64 | {"nprobe": 16, "mt_threshold": 0.00033} 65 | ] 66 | 67 | -------------------------------------------------------------------------------- /neurips23/filter/dhq/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 python3-numpy python3-scipy python3-pip build-essential git libblas-dev liblapack-dev wget libaio-dev libgoogle-perftools-dev clang-format libboost-all-dev libopenblas-dev liblapacke-dev 8 | 9 | 10 | 11 | RUN apt-get update; DEBIAN_FRONTEND=noninteractive apt install intel-mkl python3-setuptools wget python3-matplotlib build-essential checkinstall libssl-dev swig4.0 python3-dev python3-numpy python3-numpy-dev -y 12 | COPY install/requirements_conda.txt ./ 13 | # conda doesn't like some of our packages, use pip 14 | RUN python3 -m pip install -r requirements_conda.txt 15 | 16 | RUN pip3 install -U pip numpy pybind11 tqdm 17 | RUN git clone https://github.com/SDU-L/DHQ.git 18 | RUN chmod -R +777 DHQ/ 19 | RUN cp /home/app/DHQ/faiss/build/faiss/python/libfaiss_python_callbacks.so /lib 20 | RUN cp /home/app/DHQ/faiss/build/faiss/libfaiss.so /lib 21 | RUN cp /home/app/DHQ/faiss/build/faiss/libfaiss_avx2.so /lib 22 | RUN cp /home/app/DHQ/faiss/build/faiss/python/_swigfaiss_avx2.so /lib 23 | RUN cp /home/app/DHQ/faiss/build/faiss/python/_swigfaiss.so /lib 24 | #RUN 25 | RUN cd /home/app/DHQ/faiss/build/faiss/python/ && python3 setup.py install 26 | 27 | RUN cd /home/app 28 | 29 | ENV PYTHONPATH=DHQ/faiss/build/faiss/python/build/lib/ 30 | RUN python3 -c 'import faiss; print(faiss.IDSelectorFilterWise); print(faiss.__version__)' 31 | RUN pip3 install DHQ/DHQ-1.0.3-cp310-cp310-linux_x86_64.whl 32 | RUN python3 -c 'import DHQ' 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /neurips23/filter/dhq/config.yaml: -------------------------------------------------------------------------------- 1 | random-filter-s: 2 | dhq: 3 | docker-tag: neurips23-filter-dhq 4 | module: neurips23.filter.dhq.dhq 5 | constructor: DHQINDEX 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"L": 200, "R": 24, "level": 2, "indexkey": "IVF1024,SQ8"}] 11 | query-args: | 12 | [ 13 | {"nprobe": 110, "num": 2100, "ef": 11, "random": 40}, 14 | {"nprobe": 110, "num": 2100, "ef": 13, "random": 35}, 15 | {"nprobe": 100, "num": 2400, "ef": 10, "random": 30}, 16 | {"nprobe": 100, "num": 2400, "ef": 10, "random": 45}, 17 | {"nprobe": 140, "num": 2100, "ef": 10, "random": 42}, 18 | {"nprobe": 140, "num": 2100, "ef": 10, "random": 41}, 19 | {"nprobe": 145, "num": 2100, "ef": 10, "random": 35}, 20 | {"nprobe": 115, "num": 2400, "ef": 10, "random": 30}, 21 | {"nprobe": 140, "num": 2100, "ef": 10, "random": 36}, 22 | {"nprobe": 155, "num": 2100, "ef": 10, "random": 35} 23 | ] 24 | 25 | yfcc-10M: 26 | dhq: 27 | docker-tag: neurips23-filter-dhq 28 | module: neurips23.filter.dhq.dhq 29 | constructor: DHQINDEX 30 | base-args: ["@metric"] 31 | run-groups: 32 | base: 33 | args: | 34 | [{"L": 200, "R": 24, "level": 2}] 35 | query-args: | 36 | [ 37 | {"nprobe": 110, "num": 2100, "ef": 11, "random": 40}, 38 | {"nprobe": 110, "num": 2100, "ef": 13, "random": 35}, 39 | {"nprobe": 100, "num": 2400, "ef": 10, "random": 30}, 40 | {"nprobe": 100, "num": 2400, "ef": 10, "random": 45}, 41 | {"nprobe": 140, "num": 2100, "ef": 10, "random": 42}, 42 | {"nprobe": 140, "num": 2100, "ef": 10, "random": 41}, 43 | {"nprobe": 145, "num": 2100, "ef": 10, "random": 35}, 44 | {"nprobe": 115, "num": 2400, "ef": 10, "random": 30}, 45 | {"nprobe": 140, "num": 2100, "ef": 10, "random": 36}, 46 | {"nprobe": 155, "num": 2100, "ef": 10, "random": 35} 47 | ] 48 | -------------------------------------------------------------------------------- /neurips23/filter/faiss/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update && apt install -y wget swig 4 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-0-Linux-x86_64.sh 5 | RUN bash Anaconda3-2023.03-0-Linux-x86_64.sh -b 6 | 7 | ENV PATH /root/anaconda3/bin:$PATH 8 | ENV CONDA_PREFIX /root/anaconda3/ 9 | 10 | RUN conda install -c pytorch faiss-cpu 11 | COPY install/requirements_conda.txt ./ 12 | # conda doesn't like some of our packages, use pip 13 | RUN python3 -m pip install -r requirements_conda.txt 14 | 15 | COPY neurips23/filter/faiss/bow_id_selector.swig ./ 16 | 17 | RUN swig -c++ -python -I$CONDA_PREFIX/include -Ifaiss bow_id_selector.swig 18 | RUN g++ -shared -O3 -g -fPIC bow_id_selector_wrap.cxx -o _bow_id_selector.so \ 19 | -I $( python -c "import distutils.sysconfig ; print(distutils.sysconfig.get_python_inc())" ) \ 20 | -I $CONDA_PREFIX/include $CONDA_PREFIX/lib/libfaiss_avx2.so -Ifaiss 21 | 22 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2); print(faiss.__version__)' 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /neurips23/filter/faiss/config.yaml: -------------------------------------------------------------------------------- 1 | random-filter-s: 2 | faiss: 3 | docker-tag: neurips23-filter-faiss 4 | module: neurips23.filter.faiss.faiss 5 | constructor: FAISS 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"indexkey": "IVF1024,SQ8"}] 11 | query-args: | 12 | [{"nprobe": 1}, 13 | {"nprobe":2}, 14 | {"nprobe":4}] 15 | random-s: 16 | faiss: 17 | docker-tag: neurips23-filter-faiss 18 | module: neurips23.filter.faiss.faiss 19 | constructor: FAISS 20 | base-args: ["@metric"] 21 | run-groups: 22 | base: 23 | args: | 24 | [{"indexkey": "IVF1024,SQ8"}] 25 | query-args: | 26 | [{"nprobe": 1}, 27 | {"nprobe":2}, 28 | {"nprobe":4}] 29 | yfcc-10M-unfiltered: 30 | faiss: 31 | docker-tag: neurips23-filter-faiss 32 | module: neurips23.filter.faiss.faiss 33 | constructor: FAISS 34 | base-args: ["@metric"] 35 | run-groups: 36 | base: 37 | args: | 38 | [{"indexkey": "IVF16384,SQ8", "binarysig": true, "threads": 16}] 39 | query-args: | 40 | [{"nprobe": 1}, {"nprobe": 4}, {"nprobe": 16}, {"nprobe": 64}] 41 | yfcc-10M: 42 | faiss: 43 | docker-tag: neurips23-filter-faiss 44 | module: neurips23.filter.faiss.faiss 45 | constructor: FAISS 46 | base-args: ["@metric"] 47 | run-groups: 48 | base: 49 | args: | 50 | [{"indexkey": "IVF16384,SQ8", 51 | "binarysig": true, 52 | "threads": 16 53 | }] 54 | query-args: | 55 | [{"nprobe": 1, "mt_threshold":0.0003}, 56 | {"nprobe": 4, "mt_threshold":0.0003}, 57 | {"nprobe": 16, "mt_threshold":0.0003}, 58 | {"nprobe": 32, "mt_threshold":0.0003}, 59 | {"nprobe": 64, "mt_threshold":0.0003}, 60 | {"nprobe": 96, "mt_threshold":0.0003}, 61 | {"nprobe": 1, "mt_threshold":0.0001}, 62 | {"nprobe": 4, "mt_threshold":0.0001}, 63 | {"nprobe": 16, "mt_threshold":0.0001}, 64 | {"nprobe": 32, "mt_threshold":0.0001}, 65 | {"nprobe": 64, "mt_threshold":0.0001}, 66 | {"nprobe": 96, "mt_threshold":0.0001}, 67 | {"nprobe": 1, "mt_threshold":0.01}, 68 | {"nprobe": 4, "mt_threshold":0.01}, 69 | {"nprobe": 16, "mt_threshold":0.01}, 70 | {"nprobe": 32, "mt_threshold":0.01}, 71 | {"nprobe": 64, "mt_threshold":0.01}, 72 | {"nprobe": 96, "mt_threshold":0.01} 73 | ] 74 | 75 | -------------------------------------------------------------------------------- /neurips23/filter/faissplus/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update && apt install -y wget swig 4 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-0-Linux-x86_64.sh 5 | RUN bash Anaconda3-2023.03-0-Linux-x86_64.sh -b 6 | 7 | ENV PATH /root/anaconda3/bin:$PATH 8 | ENV CONDA_PREFIX /root/anaconda3/ 9 | 10 | RUN conda install -c pytorch faiss-cpu 11 | COPY install/requirements_conda.txt ./ 12 | # conda doesn't like some of our packages, use pip 13 | RUN python3 -m pip install -r requirements_conda.txt 14 | 15 | COPY neurips23/filter/faissplus/bow_id_selector.swig ./ 16 | 17 | RUN swig -c++ -python -I$CONDA_PREFIX/include -Ifaiss bow_id_selector.swig 18 | RUN g++ -shared -O3 -g -fPIC bow_id_selector_wrap.cxx -o _bow_id_selector.so \ 19 | -I $( python -c "import distutils.sysconfig ; print(distutils.sysconfig.get_python_inc())" ) \ 20 | -I $CONDA_PREFIX/include $CONDA_PREFIX/lib/libfaiss_avx2.so -Ifaiss 21 | 22 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2); print(faiss.__version__)' 23 | 24 | -------------------------------------------------------------------------------- /neurips23/filter/faissplus/config.yaml: -------------------------------------------------------------------------------- 1 | random-filter-s: 2 | faissplus: 3 | docker-tag: neurips23-filter-faissplus 4 | module: neurips23.filter.faissplus.faiss 5 | constructor: FAISS 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"indexkey": "IVF1024,SQ8"}] 11 | query-args: | 12 | [{"nprobe": 1}, 13 | {"nprobe":2}, 14 | {"nprobe":4}] 15 | random-s: 16 | faissplus: 17 | docker-tag: neurips23-filter-faissplus 18 | module: neurips23.filter.faissplus.faiss 19 | constructor: FAISS 20 | base-args: ["@metric"] 21 | run-groups: 22 | base: 23 | args: | 24 | [{"indexkey": "IVF1024,SQ8"}] 25 | query-args: | 26 | [{"nprobe": 1}, 27 | {"nprobe":2}, 28 | {"nprobe":4}] 29 | yfcc-10M-unfiltered: 30 | faissplus: 31 | docker-tag: neurips23-filter-faissplus 32 | module: neurips23.filter.faissplus.faiss 33 | constructor: FAISS 34 | base-args: ["@metric"] 35 | run-groups: 36 | base: 37 | args: | 38 | [{"indexkey": "IVF16384,SQ8", "binarysig": true, "threads": 16}] 39 | query-args: | 40 | [{"nprobe": 1}, {"nprobe": 4}, {"nprobe": 16}, {"nprobe": 64}] 41 | yfcc-10M: 42 | faissplus: 43 | docker-tag: neurips23-filter-faissplus 44 | module: neurips23.filter.faissplus.faiss 45 | constructor: FAISS 46 | base-args: ["@metric"] 47 | run-groups: 48 | base: 49 | args: | 50 | [{"indexkey": "IVF11264,SQ8", 51 | "binarysig": true, 52 | "threads": 16 53 | }] 54 | query-args: | 55 | [ 56 | {"nprobe": 34, "mt_threshold": 0.00031}, 57 | {"nprobe": 32, "mt_threshold": 0.0003}, 58 | {"nprobe": 32, "mt_threshold": 0.00031}, 59 | {"nprobe": 34, "mt_threshold": 0.0003}, 60 | {"nprobe": 34, "mt_threshold": 0.00035}, 61 | {"nprobe": 32, "mt_threshold": 0.00033}, 62 | {"nprobe": 30, "mt_threshold": 0.00033}, 63 | {"nprobe": 32, "mt_threshold": 0.00035}, 64 | {"nprobe": 34, "mt_threshold": 0.00033}, 65 | {"nprobe": 40, "mt_threshold": 0.0003} 66 | ] 67 | -------------------------------------------------------------------------------- /neurips23/filter/fdufilterdiskann/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | 9 | # COPY FilterDiskann /home/app/FilterDiskann 10 | WORKDIR /home/app 11 | RUN git clone --recursive --branch main https://github.com/PUITAR/FduFilterDiskANN.git 12 | WORKDIR /home/app/FduFilterDiskANN/pybindings 13 | 14 | RUN pip3 install virtualenv build 15 | RUN pip3 install pybind11[global] 16 | RUN pip3 install . 17 | WORKDIR /home/app 18 | -------------------------------------------------------------------------------- /neurips23/filter/fdufilterdiskann/config.yaml: -------------------------------------------------------------------------------- 1 | random-filter-s: 2 | fdufilterdiskann: 3 | docker-tag: neurips23-filter-fdufilterdiskann 4 | module: neurips23.filter.fdufilterdiskann.fdufilterdiskann 5 | constructor: fdufilterdiskann 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"R":2, "L":10, "buildthreads":16, "alpha":1.2}] 11 | query-args: | 12 | [{"Ls":10, "T":1, "threshold_1":20000, "threshold_2":40000}] 13 | yfcc-10M: 14 | fdufilterdiskann: 15 | docker-tag: neurips23-filter-fdufilterdiskann 16 | module: neurips23.filter.fdufilterdiskann.fdufilterdiskann 17 | constructor: fdufilterdiskann 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{"R":60, "L":80, "buildthreads":16, "alpha":1.0}] 23 | query-args: | 24 | [ 25 | {"Ls":11, "T":16, "threshold_1":53500, "threshold_2":5000}, 26 | {"Ls":11, "T":16, "threshold_1":53500, "threshold_2":5005}, 27 | {"Ls":11, "T":16, "threshold_1":53500, "threshold_2":5010}, 28 | {"Ls":11, "T":16, "threshold_1":53500, "threshold_2":5015}, 29 | {"Ls":11, "T":16, "threshold_1":53500, "threshold_2":5020}, 30 | {"Ls":12, "T":16, "threshold_1":53500, "threshold_2":5025}, 31 | {"Ls":12, "T":16, "threshold_1":53500, "threshold_2":5030}, 32 | {"Ls":12, "T":16, "threshold_1":53500, "threshold_2":5035}, 33 | {"Ls":12, "T":16, "threshold_1":53500, "threshold_2":5040}, 34 | {"Ls":12, "T":16, "threshold_1":53500, "threshold_2":5045} 35 | ] 36 | 37 | -------------------------------------------------------------------------------- /neurips23/filter/hwtl_sdu_anns_filter/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install apt-utils 5 | RUN apt update && apt install -y software-properties-common 6 | RUN add-apt-repository -y ppa:git-core/ppa 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 python3-numpy python3-scipy python3-pip build-essential git libblas-dev liblapack-dev wget libaio-dev libgoogle-perftools-dev clang-format libboost-all-dev libopenblas-dev liblapacke-dev 8 | 9 | 10 | 11 | RUN apt update && apt install -y wget swig 12 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-0-Linux-x86_64.sh 13 | RUN bash Anaconda3-2023.03-0-Linux-x86_64.sh -b 14 | 15 | ENV PATH /root/anaconda3/bin:$PATH 16 | ENV CONDA_PREFIX /root/anaconda3/ 17 | 18 | RUN conda install -c pytorch faiss-cpu 19 | COPY install/requirements_conda.txt ./ 20 | # conda doesn't like some of our packages, use pip 21 | RUN python3 -m pip install -r requirements_conda.txt 22 | WORKDIR /home/app/ 23 | RUN pip3 install -U pip pybind11 numpy 24 | 25 | RUN git clone https://github.com/WPJiang/HWTL_SDU-ANNS-filter.git 26 | RUN cp ./HWTL_SDU-ANNS-filter/bow_id_selector.py ./ 27 | RUN cp ./HWTL_SDU-ANNS-filter/_bow_id_selector.so ./ 28 | 29 | ENV LD_PRELOAD /root/anaconda3/lib/libmkl_core.so:/root/anaconda3/lib/libmkl_sequential.so 30 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2); print(faiss.__version__)' 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /neurips23/filter/hwtl_sdu_anns_filter/config.yaml: -------------------------------------------------------------------------------- 1 | random-filter-s: 2 | hwtl_sdu_anns_filter: 3 | docker-tag: neurips23-filter-hwtl_sdu_anns_filter 4 | module: neurips23.filter.hwtl_sdu_anns_filter.hwtl_sdu_anns_filter 5 | constructor: hwtl_sdu_anns_filter 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"default": 1}] 11 | query-args: | 12 | [{"nprobe": 4, "expansion":3, "threshold":3000}, 13 | {"nprobe": 8, "expansion":3, "threshold":3000}, 14 | {"nprobe": 12, "expansion":3, "threshold":3000}, 15 | {"nprobe": 16, "expansion":3, "threshold":3000}, 16 | {"nprobe": 32, "expansion":3, "threshold":3000} 17 | ] 18 | 19 | yfcc-10M: 20 | hwtl_sdu_anns_filter: 21 | docker-tag: neurips23-filter-hwtl_sdu_anns_filter 22 | module: neurips23.filter.hwtl_sdu_anns_filter.hwtl_sdu_anns_filter 23 | constructor: hwtl_sdu_anns_filter 24 | base-args: ["@metric"] 25 | run-groups: 26 | base: 27 | args: | 28 | [{"L": 200, "R": 32, "level": 2, "threads": 16}] 29 | query-args: | 30 | [{"nprobe": 4, "expansion":3, "threshold":3000}, 31 | {"nprobe": 8, "expansion":3, "threshold":3000}, 32 | {"nprobe": 12, "expansion":3, "threshold":3000}, 33 | {"nprobe": 16, "expansion":3, "threshold":3000}, 34 | {"nprobe": 32, "expansion":3, "threshold":3000}, 35 | {"nprobe": 4, "expansion":4, "threshold":3000}, 36 | {"nprobe": 8, "expansion":4, "threshold":3000}, 37 | {"nprobe": 12, "expansion":4, "threshold":3000}, 38 | {"nprobe": 16, "expansion":4, "threshold":3000}, 39 | {"nprobe": 32, "expansion":4, "threshold":3000} 40 | ] 41 | -------------------------------------------------------------------------------- /neurips23/filter/install_neurips23.sh: -------------------------------------------------------------------------------- 1 | python install.py --neurips23track filter --algorithm cufe 2 | python install.py --neurips23track filter --algorithm dhq 3 | python install.py --neurips23track filter --algorithm faiss 4 | python install.py --neurips23track filter --algorithm faissplus 5 | python install.py --neurips23track filter --algorithm fdufilterdiskann 6 | python install.py --neurips23track filter --algorithm hwtl_sdu_anns_filter 7 | python install.py --neurips23track filter --algorithm parlayivf 8 | python install.py --neurips23track filter --algorithm puck 9 | python install.py --neurips23track filter --algorithm pyanns 10 | python install.py --neurips23track filter --algorithm wm_filter 11 | -------------------------------------------------------------------------------- /neurips23/filter/operating_points_private_queries_AzureD8lds_v5.csv: -------------------------------------------------------------------------------- 1 | qps 2 | dataset algorithm 3 | yfcc-10M cufe 2291.031703 4 | dhq 13517.047874 5 | faiss 3252.682553 6 | faissplus 3625.027286 7 | fdufilterdiskann 5752.463409 8 | hwtl_sdu_anns_filter 15188.577106 9 | parlayivf 37670.703774 10 | puck 19153.425169 11 | pyanns 5335.916507 12 | wm_filter 14076.445534 13 | -------------------------------------------------------------------------------- /neurips23/filter/operating_points_public_queries_AzureD8lds_v5.txt: -------------------------------------------------------------------------------- 1 | qps 2 | dataset algorithm 3 | yfcc-10M cufe 2917.132715 4 | dhq 13670.864704 5 | faiss 3032.534357 6 | faissplus 3776.539092 7 | fdufilterdiskann 5679.748583 8 | hwtl_sdu_anns_filter 15059.124141 9 | parlayivf 37902.113726 10 | puck 19193.294823 11 | pyanns 5184.844352 12 | wm_filter 14467.961514 13 | -------------------------------------------------------------------------------- /neurips23/filter/parlayivf/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | 9 | # RUN echo "docker caching is so painful pt. 31" 10 | 11 | RUN git clone -b filter https://github.com/cmuparlay/ParlayANN.git && cd ParlayANN && git checkout f7208ba && git submodule update --init --recursive && cd python && pip install pybind11 && bash compile.sh 12 | # WORKDIR /home/app/ParlayANN 13 | # RUN git submodule update --init --recursive 14 | # WORKDIR /home/app/ParlayANN/python 15 | 16 | # RUN pip install pybind11 17 | 18 | # RUN bash compile.sh 19 | 20 | ENV PYTHONPATH=$PYTHONPATH:/home/app/ParlayANN/python 21 | 22 | # ENV PARLAY_NUM_THREADS=8 23 | 24 | WORKDIR /home/app -------------------------------------------------------------------------------- /neurips23/filter/parlayivf/README.md: -------------------------------------------------------------------------------- 1 | # ParlayANN's IVF² 2 | ## Introduction 3 | This submission is from the team at Carnegie Mellon and the University of Maryland responsible for the [ParlayANN library](https://github.com/cmuparlay/ParlayANN), a collection of algorithms for approximate nearest neighbor search implemented with [ParlayLib](https://github.com/cmuparlay/parlaylib), an efficient library for shared memory parallelism. 4 | 5 | ## Approach 6 | We leverage the fact that filtered search, especially when using 'and' queries, provides beneficial constraints on the set of points needing to be searched, and the fact that IVF indices can be stored in relatively little memory. The name IVF² refers to the fact we treat the filters like an actual inverted file index (in contrast to the 'IVF' indices used by faiss, pgvector, etc. for vector search), and the posting lists of the filters are (above a size threshold) themselves IVF indices. 7 | 8 | More details to come. 9 | -------------------------------------------------------------------------------- /neurips23/filter/pinecone/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | # install MKL support 4 | RUN DEBIAN_FRONTEND=noninteractive apt-get install -y libmkl-full-dev 5 | 6 | # copy and install the pys2 python package 7 | RUN git clone --branch filter https://github.com/pinecone-io/bigann.git 8 | RUN pip install ./bigann/*.whl 9 | # verify that the build worked 10 | RUN python3 -c 'import pys2;' 11 | 12 | -------------------------------------------------------------------------------- /neurips23/filter/pinecone/README.md: -------------------------------------------------------------------------------- 1 | # Pinecone filter ANN algorithm 2 | 3 | Our algorithm is based on the classical IVF architecture, where the data is first divided into geometrical clusters, 4 | combined with a metadata inverted index: for every metadata tag, we store a list of vectors with that tag. 5 | 6 | Given a query, we first evaluate its level of selectivity (i.e., the count of vectors that pass the filter), 7 | and scan a varying number of clusters for that query. 8 | We efficiently scan only the relevant vectors based on the inverted index, so the number of operations is 9 | O(query selectivity) rather than O(# of vectors in the selected clusters). 10 | The intuition is that for wide queries, the closest vectors are in neighboring clusters, 11 | and for more selective queries there is a need to scan more clusters. 12 | Additionally, we make sure that a minimal number of relevant vectors have been scanned, 13 | to account for queries whose selectivity is less localized. 14 | 15 | To accelerate the search, we pre-compute some of the intersections of the inverted lists (based on their size), 16 | and use AVX for efficient computation of distances. 17 | To optimize the hyperparameters on the public query set, we formalized the problem as a constrained convex 18 | optimization problem, assigning the optimal recall value for each selectivity bucket. 19 | For the most selective queries, it turns out that it is beneficial to simply scan all relevant vectors 20 | (and ignore the geometrical clustering). -------------------------------------------------------------------------------- /neurips23/filter/pinecone/config.yaml: -------------------------------------------------------------------------------- 1 | random-filter-s: 2 | pinecone: 3 | docker-tag: neurips23-filter-pinecone 4 | module: neurips23.filter.pinecone.pinecone_index 5 | constructor: PineconeIndex 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"indexkey": "FilterIVFFlatU8", "num_clusters": "128", "precompute_intersection_threshold": "5000"}] 11 | query-args: | 12 | [ 13 | {"fraction_coefficient": "0.3", "fraction_exponent": "0.1", "skip_clustering_threshold": 2000}, 14 | {"fraction_coefficient": "0.7", "fraction_exponent": "0.1", "skip_clustering_threshold": 2000}, 15 | {"fraction_coefficient": "1.0", "fraction_exponent": "0.1", "skip_clustering_threshold": 2000}, 16 | {"fraction_coefficient": "2.0", "fraction_exponent": "0.1", "skip_clustering_threshold": 2000} 17 | ] 18 | yfcc-10M: 19 | pinecone: 20 | docker-tag: neurips23-filter-pinecone 21 | module: neurips23.filter.pinecone.pinecone_index 22 | constructor: PineconeIndex 23 | base-args: ["@metric"] 24 | run-groups: 25 | base: 26 | args: | 27 | [{"indexkey": "FilterIVFFlatU8", "num_clusters": "2048", "precompute_intersection_threshold": "1600"}] 28 | query-args: | 29 | [ 30 | {"fraction_coefficient": "13.0", "fraction_exponent": "0.65", "skip_clustering_threshold": 2000}, 31 | {"fraction_coefficient": "12.0", "fraction_exponent": "0.65", "skip_clustering_threshold": 2000}, 32 | {"fraction_coefficient": "11.5", "fraction_exponent": "0.65", "skip_clustering_threshold": 2000}, 33 | {"fraction_coefficient": "11.0", "fraction_exponent": "0.65", "skip_clustering_threshold": 2000}, 34 | {"fraction_coefficient": "10.5", "fraction_exponent": "0.65", "skip_clustering_threshold": 2000}, 35 | {"fraction_coefficient": "10.5", "fraction_exponent": "0.64", "skip_clustering_threshold": 2000}, 36 | {"fraction_coefficient": "10.0", "fraction_exponent": "0.64", "skip_clustering_threshold": 2000}, 37 | {"fraction_coefficient": "9.5", "fraction_exponent": "0.64", "skip_clustering_threshold": 2000}, 38 | {"fraction_coefficient": "9.0", "fraction_exponent": "0.64", "skip_clustering_threshold": 2000}, 39 | {"fraction_coefficient": "8.5", "fraction_exponent": "0.64", "skip_clustering_threshold": 2000} 40 | ] 41 | -------------------------------------------------------------------------------- /neurips23/filter/plot_public_queries_AzureD8lds_v5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/filter/plot_public_queries_AzureD8lds_v5.png -------------------------------------------------------------------------------- /neurips23/filter/puck/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt-get update 5 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip 6 | #swig 7 | RUN apt-get update && apt-get install -y swig cmake 8 | RUN pip3 install pybind11 numpy 9 | RUN cat /etc/ld.so.conf 10 | RUN ls /etc/ld.so.conf.d/ 11 | ##cmake 12 | RUN wget https://cmake.org/files/v3.22/cmake-3.22.0-linux-x86_64.sh 13 | RUN mkdir cmake && sh cmake-3.22.0-linux-x86_64.sh --skip-license --prefix=cmake 14 | ENV PATH /home/app/cmake/bin:$PATH 15 | 16 | #mkl 17 | RUN wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/adb8a02c-4ee7-4882-97d6-a524150da358/l_onemkl_p_2023.2.0.49497_offline.sh 18 | RUN sh l_onemkl_p_2023.2.0.49497_offline.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s 19 | 20 | RUN echo "/opt/intel/oneapi/mkl/latest/lib/intel64" > /etc/ld.so.conf.d/mkl.conf 21 | RUN ldconfig 22 | RUN touch /etc/profile.d/intel.sh 23 | RUN echo ". /opt/intel/oneapi/mkl/latest/env/vars.sh" >> /etc/profile.d/intel.sh 24 | RUN . /etc/profile.d/intel.sh 25 | 26 | ENV CMAKE_ARGS "-DMKLROOT=/opt/intel/oneapi/mkl/latest/ -DBLA_VENDOR=Intel10_64lp_seq -DBLA_STATIC=ON" 27 | #RUN git config --global http.sslVerify false 28 | 29 | RUN git clone -b filter https://github.com/baidu/puck.git 30 | RUN cd puck && . /etc/profile.d/intel.sh && python3 setup.py install 31 | RUN python3 -c 'from puck import py_puck_api' 32 | -------------------------------------------------------------------------------- /neurips23/filter/pyanns/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update && apt install -y wget swig 4 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-0-Linux-x86_64.sh 5 | RUN bash Anaconda3-2023.03-0-Linux-x86_64.sh -b 6 | 7 | ENV PATH /root/anaconda3/bin:$PATH 8 | ENV CONDA_PREFIX /root/anaconda3/ 9 | 10 | RUN conda install -c pytorch faiss-cpu 11 | COPY install/requirements_conda.txt ./ 12 | # conda doesn't like some of our packages, use pip 13 | RUN python3 -m pip install -r requirements_conda.txt 14 | RUN python3 -m pip install pybind11 15 | 16 | COPY neurips23/filter/faiss/bow_id_selector.swig ./ 17 | 18 | RUN swig -c++ -python -I$CONDA_PREFIX/include -Ifaiss bow_id_selector.swig 19 | RUN g++ -shared -O3 -g -fPIC bow_id_selector_wrap.cxx -o _bow_id_selector.so \ 20 | -I $( python -c "import distutils.sysconfig ; print(distutils.sysconfig.get_python_inc())" ) \ 21 | -I $CONDA_PREFIX/include $CONDA_PREFIX/lib/libfaiss_avx2.so -Ifaiss 22 | 23 | RUN git clone https://github.com/veaaaab/uint8_knn.git 24 | WORKDIR /home/app/uint8_knn 25 | RUN bash build.sh 26 | 27 | WORKDIR /home/app 28 | 29 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2); print(faiss.__version__)' 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /neurips23/filter/pyanns/config.yaml: -------------------------------------------------------------------------------- 1 | random-filter-s: 2 | pyanns: 3 | docker-tag: neurips23-filter-pyanns 4 | module: neurips23.filter.pyanns.pyanns 5 | constructor: Pyanns 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"indexkey": "IVF1024,SQ8"}] 11 | query-args: | 12 | [{"nprobe": 1}, 13 | {"nprobe":2}, 14 | {"nprobe":4}] 15 | random-s: 16 | pyanns: 17 | docker-tag: neurips23-filter-pyanns 18 | module: neurips23.filter.pyanns.pyanns 19 | constructor: Pyanns 20 | base-args: ["@metric"] 21 | run-groups: 22 | base: 23 | args: | 24 | [{"indexkey": "IVF1024,SQ8"}] 25 | query-args: | 26 | [{"nprobe": 1}, 27 | {"nprobe":2}, 28 | {"nprobe":4}] 29 | yfcc-10M-unfiltered: 30 | pyanns: 31 | docker-tag: neurips23-filter-pyanns 32 | module: neurips23.filter.pyanns.pyanns 33 | constructor: Pyanns 34 | base-args: ["@metric"] 35 | run-groups: 36 | base: 37 | args: | 38 | [{"indexkey": "IVF16384,SQ8", "binarysig": true, "threads": 16}] 39 | query-args: | 40 | [{"nprobe": 1}, {"nprobe": 4}, {"nprobe": 16}, {"nprobe": 64}] 41 | yfcc-10M: 42 | pyanns: 43 | docker-tag: neurips23-filter-pyanns 44 | module: neurips23.filter.pyanns.pyanns 45 | constructor: Pyanns 46 | base-args: ["@metric"] 47 | run-groups: 48 | base: 49 | args: | 50 | [{"indexkey": "IVF16384,SQ8", 51 | "binarysig": true, 52 | "threads": 16 53 | }] 54 | query-args: | 55 | [ 56 | {"nprobe": 16, "mt_threshold":0.0032}, 57 | {"nprobe": 16, "mt_threshold":0.0035}, 58 | {"nprobe": 32, "mt_threshold":0.001} 59 | ] 60 | -------------------------------------------------------------------------------- /neurips23/filter/run.py: -------------------------------------------------------------------------------- 1 | from benchmark.algorithms.base_runner import BaseRunner 2 | import time 3 | 4 | class FilterRunner(BaseRunner): 5 | def run_task(algo, ds, distance, count, run_count, search_type, private_query): 6 | best_search_time = float('inf') 7 | search_times = [] 8 | 9 | if not private_query: 10 | X = ds.get_queries() 11 | else: 12 | X = ds.get_private_queries() 13 | 14 | print(fr"Got {X.shape[0]} queries") 15 | 16 | for i in range(run_count): 17 | print('Run %d/%d...' % (i + 1, run_count)) 18 | 19 | start = time.time() 20 | if search_type == "knn": 21 | algo.query(X, count) 22 | total = (time.time() - start) 23 | results = algo.get_results() 24 | assert results.shape[0] == X.shape[0] 25 | elif search_type == "knn_filtered": 26 | if not private_query: 27 | metadata = ds.get_queries_metadata() 28 | else: 29 | metadata = ds.get_private_queries_metadata() 30 | algo.filtered_query(X, metadata, count) 31 | total = (time.time() - start) 32 | results = algo.get_results() 33 | assert results.shape[0] == X.shape[0] 34 | else: 35 | raise NotImplementedError() 36 | 37 | search_time = total 38 | best_search_time = min(best_search_time, search_time) 39 | search_times.append( search_time ) 40 | 41 | attrs = { 42 | "best_search_time": best_search_time, 43 | "name": str(algo), 44 | "run_count": run_count, 45 | "distance": distance, 46 | "type": search_type, 47 | "count": int(count), 48 | "search_times": search_times, 49 | "private_queries": private_query, 50 | } 51 | additional = algo.get_additional() 52 | for k in additional: 53 | attrs[k] = additional[k] 54 | return (attrs, results) 55 | 56 | -------------------------------------------------------------------------------- /neurips23/filter/wm_filter/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt-get update; DEBIAN_FRONTEND=noninteractive apt install intel-mkl python3-setuptools wget python3-matplotlib build-essential checkinstall libssl-dev swig4.0 python3-dev python3-numpy python3-numpy-dev -y 4 | COPY install/requirements_conda.txt ./ 5 | # conda doesn't like some of our packages, use pip 6 | RUN python3 -m pip install -r requirements_conda.txt 7 | 8 | 9 | # CMAKE with good enough version 10 | RUN mkdir /build && wget https://github.com/Kitware/CMake/archive/refs/tags/v3.27.1.tar.gz && mv v3.27.1.tar.gz /build 11 | RUN cd /build; tar -zxvf v3.27.1.tar.gz 12 | RUN cd /build/CMake-3.27.1 && ./bootstrap && make && make install 13 | 14 | 15 | RUN cd / && git clone https://github.com/alemagnani/faiss.git && cd /faiss && git pull && git checkout wm_filter 16 | 17 | RUN cd /faiss && rm -rf ./build 18 | RUN cd /faiss/; cmake -B build /faiss/ -DFAISS_ENABLE_GPU=OFF -DFAISS_ENABLE_PYTHON=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DFAISS_OPT_LEVEL=avx2 -DBLA_VENDOR=Intel10_64_dyn -DBUILD_TESTING=ON -DPython_EXECUTABLE=/usr/bin/python3 -DMKL_LIBRARIES=/usr/lib/x86_64-linux-gnu/libmkl_rt.so 19 | RUN cd /faiss/; make -C build -j faiss faiss_avx2 swigfaiss swigfaiss_avx2 20 | RUN (cd /faiss/build/faiss/python && python3 setup.py install) 21 | 22 | #RUN pip install tritonclient[all] 23 | ENV PYTHONPATH=/faiss/build/faiss/python/build/lib/ 24 | 25 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2); print(faiss.__version__)' 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /neurips23/filter/wm_filter/README.md: -------------------------------------------------------------------------------- 1 | 2 | ### Submission for Neurips23 Filter track of WM_filter team 3 | This submission leverages the IVF index to run the filter in a fast way. 4 | 5 | More info to come... 6 | 7 | 8 | -------------------------------------------------------------------------------- /neurips23/filter/wm_filter/config.yaml: -------------------------------------------------------------------------------- 1 | random-filter-s: 2 | wm_filter: 3 | docker-tag: neurips23-filter-wm_filter 4 | module: neurips23.filter.wm_filter.wm_filter 5 | constructor: FAISS 6 | base-args: [ "@metric" ] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"indexkey": "IVF1024,SQ8", 11 | "threads": 8, 12 | "train_size": 2000000, 13 | "type": "direct" 14 | }] 15 | query-args: | 16 | [ 17 | {"nprobe": 80, "max_codes": 100, "selector_probe_limit": 80}, 18 | {"nprobe": 100, "max_codes": 500, "selector_probe_limit": 100}, 19 | {"nprobe": 120, "max_codes": 1000, "selector_probe_limit": 120}, 20 | {"nprobe": 140, "max_codes": 1800, "selector_probe_limit": 140}, 21 | {"nprobe": 160, "max_codes": 500, "selector_probe_limit": 160}, 22 | {"nprobe": 70, "max_codes": 1000, "selector_probe_limit": 70} 23 | ] 24 | yfcc-10M: 25 | wm_filter: 26 | docker-tag: neurips23-filter-wm_filter 27 | module: neurips23.filter.wm_filter.wm_filter 28 | constructor: FAISS 29 | base-args: [ "@metric" ] 30 | run-groups: 31 | base: 32 | args: | 33 | [{"indexkey": "IVF1024,SQ8", 34 | "threads": 8, 35 | "train_size": 2000000, 36 | "type": "direct" 37 | }] 38 | query-args: | 39 | [ 40 | {"nprobe": 80, "max_codes": 1800, "selector_probe_limit": 80}, 41 | {"nprobe": 100, "max_codes": 1800, "selector_probe_limit": 100}, 42 | {"nprobe": 120, "max_codes": 1800, "selector_probe_limit": 120}, 43 | {"nprobe": 140, "max_codes": 1800, "selector_probe_limit": 140}, 44 | {"nprobe": 160, "max_codes": 1800, "selector_probe_limit": 160}, 45 | {"nprobe": 70, "max_codes": 2100, "selector_probe_limit": 70}, 46 | {"nprobe": 100, "max_codes": 2100, "selector_probe_limit": 100}, 47 | {"nprobe": 130, "max_codes": 2100, "selector_probe_limit": 130}, 48 | {"nprobe": 160, "max_codes": 2100, "selector_probe_limit": 160}, 49 | {"nprobe": 200, "max_codes": 2100, "selector_probe_limit": 200} 50 | ] 51 | -------------------------------------------------------------------------------- /neurips23/filter/zilliz/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | # install MKL support 4 | RUN DEBIAN_FRONTEND=noninteractive apt-get install -y libmkl-full-dev libaio-dev 5 | 6 | RUN git clone https://github.com/hhy3/zilliz-bigann.git --branch filter 7 | RUN pip install ./zilliz-bigann/*.whl 8 | 9 | 10 | -------------------------------------------------------------------------------- /neurips23/filter/zilliz/config.yaml: -------------------------------------------------------------------------------- 1 | random-filter-s: 2 | zilliz: 3 | docker-tag: neurips23-filter-zilliz 4 | module: neurips23.filter.zilliz.zilliz 5 | constructor: Zilliz 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"R": 12, "L": 200, "threshold": 8000 11 | }] 12 | query-args: | 13 | [ 14 | {"ef": 16}, 15 | {"ef": 18}, 16 | {"ef": 20}, 17 | {"ef": 24}, 18 | {"ef": 30}, 19 | {"ef": 40}, 20 | {"ef": 50}, 21 | {"ef": 70} 22 | ] 23 | 24 | yfcc-10M: 25 | zilliz: 26 | docker-tag: neurips23-filter-zilliz 27 | module: neurips23.filter.zilliz.zilliz 28 | constructor: Zilliz 29 | base-args: ["@metric"] 30 | run-groups: 31 | base: 32 | args: | 33 | [{"R": 12, "L": 100, "threshold": 8000, "threshold2": 10000 34 | }] 35 | query-args: | 36 | [ 37 | {"ef": 16}, 38 | {"ef": 18}, 39 | {"ef": 19}, 40 | {"ef": 20}, 41 | {"ef": 22}, 42 | {"ef": 24}, 43 | {"ef": 26}, 44 | {"ef": 28}, 45 | {"ef": 32}, 46 | {"ef": 36} 47 | ] 48 | -------------------------------------------------------------------------------- /neurips23/leaderboard.md: -------------------------------------------------------------------------------- 1 | ### Leaderboard 2 | 3 | **Note**: this is the leaderboard of the original submissions for the NeurIPS'32 competition (Dec. 2023). To view the ongoing leaderboard, which also includes new results, see [here](ongoing_leaderboard/leaderboard.md). 4 | 5 | This leaderboard is based on the standard recall@10 vs throughput benchmark that has become a standard benchmark when evaluating and comparing approximate nearest neighbor algorithms. 6 | The recall of the baselines at this QPS threshold is listed [above](#measuring_your_algorithm). 7 | 8 | For tasks "Filter", "Out-of-Distribution" and "Sparse" tracks, algorithms were ranked on the QPS they achieve on the track dataset, as long as the recall@10 is at least 90%. 9 | These results files for [Azure D8lds_v5](Azure_D8lds_v5_table.md) and [AWS EC2 c6i.2xlarge](ec2_c6i.2xlarge_table.md) list the maximum QPS measured for each algorithm with at least 90% recall@10. 10 | 11 | For the Streaming track, algorithms were ranked on recall@10, as long as each algorithm completes the runbook within the allotted 1 hour. The leading entry had a recall of 0.9849. 12 | The [result file](streaming/res_final_runbook_AzureD8lds_v5.csv) lists measurements for all streaming algorithms on Azure D8lds_v5. 13 | 14 | QPS vs recall@10 plots for tracks based on public queries on Azure D8lds_v5: 15 | **Filter track** 16 | ![yfcc-10M](filter/plot_public_queries_AzureD8lds_v5.png) 17 | 18 | **OOD track** 19 | ![text2image-10M](ood/plot_public_queries_AzureD8lds_v5.png) 20 | 21 | **Sparse track** 22 | ![sparse-full](sparse/plot_public_queries_AzureD8lds_v5.png) 23 | 24 | More plots to follow. 25 | -------------------------------------------------------------------------------- /neurips23/notes/README.md: -------------------------------------------------------------------------------- 1 | # Additional notes NeurIPS'23 challenge 2 | 3 | - [HNSW vs. Vamana streaming comparison](streaming/hnsw_result/hnsw_result.md) (contributed by [ekzhu](https://github.com/ekzhu)) 4 | 5 | -------------------------------------------------------------------------------- /neurips23/notes/streaming/hnsw_result/hnsw_result.md: -------------------------------------------------------------------------------- 1 | # Final Runbook Results of Vamana and HNSW Implementations 2 | 3 | We look at the recall stability of DiskANN's Vamana and various HNSW implementations 4 | under streaming workload provided by the final runbook. 5 | It is important to note that Vamana and HNSW are set up with different parameters, 6 | so rather than comparing the absolute recall values, we compare the stability of 7 | recall over the duration of the workload. 8 | 9 | ## Vamana and HNSW with search-based edge repair algorithm. 10 | 11 | For graph ANN indexes supporting in-place deletes, they all need to perform 12 | edge repair to maintain the graph structure. Edge repair is done for each 13 | in-coming neighbor of a deleted point, as the deleted point is removed from 14 | the neighbor's adjacency list. 15 | 16 | Search-based edge repair algorithm is implemented by [`hnswlib`](https://github.com/nmslib/hnswlib) 17 | in a function called [`repairConnectionsForUpdate`](https://github.com/nmslib/hnswlib/blob/359b2ba87358224963986f709e593d799064ace6/hnswlib/hnswalg.h#L987). The idea is to perform a "re-insert" of the node to be repaired 18 | and update its adjacency lists of all levels. 19 | 20 | ![recall over steps hnsw search-based repair](recall_over_steps_10_48_hnsw_search_based_repair.png) 21 | 22 | ## Vamana and HNSW with Vamana's edge repair algorithm. 23 | 24 | Vamana's edge repair algorithm is different from the previously described search-based edge repair 25 | algorithm. The idea is to connect each in-coming neighbor of a deleted node to the 26 | out-going neighbors of the deleted node, while applying a pruning step to maintain 27 | the maximum degree constraint. In this case, we use HNSW's original pruning 28 | algorithm. It is implemented by `hnswlib` in a function called 29 | [`getNeighborsByHeuristic2`](https://github.com/nmslib/hnswlib/blob/359b2ba87358224963986f709e593d799064ace6/hnswlib/hnswalg.h#L382C16-L382C16) 30 | 31 | ![recall over step hnsw vamana repair](recall_over_steps_10_48_hnsw.png) 32 | 33 | ## Vamana and HNSW with Vamana's edge repair and robust pruning algorithm. 34 | 35 | Lastly, we replaces HNSW's pruning algorithm with Vamana's. Now the HNSW 36 | algorithm is exactly the same as Vamana's, except that it has multiple layers. 37 | We can call this "Multi-layer Vamana". 38 | 39 | ![recall over step hnsw vamana pruning](recall_over_steps_10_48_hnsw_robust_prune.png) -------------------------------------------------------------------------------- /neurips23/notes/streaming/hnsw_result/recall_over_steps_10_48_hnsw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/notes/streaming/hnsw_result/recall_over_steps_10_48_hnsw.png -------------------------------------------------------------------------------- /neurips23/notes/streaming/hnsw_result/recall_over_steps_10_48_hnsw_robust_prune.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/notes/streaming/hnsw_result/recall_over_steps_10_48_hnsw_robust_prune.png -------------------------------------------------------------------------------- /neurips23/notes/streaming/hnsw_result/recall_over_steps_10_48_hnsw_search_based_repair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/notes/streaming/hnsw_result/recall_over_steps_10_48_hnsw_search_based_repair.png -------------------------------------------------------------------------------- /neurips23/ongoing_leaderboard/Azure_D8lds_v5_table.md: -------------------------------------------------------------------------------- 1 | | dataset | algorithm | qps | 2 | |----------------|--------------------------|--------------| 3 | | yfcc-10M | pinecone [*] | 85491.542780 | 4 | | (filter track) | zilliz [*] | 84596.419213 | 5 | | | parlayivf | 37902.113726 | 6 | | | puck | 19193.294823 | 7 | | | hwtl_sdu_anns_filter [*] | 15059.124141 | 8 | | | wm_filter | 14467.961514 | 9 | | | dhq | 13670.864704 | 10 | | | fdufilterdiskann | 5679.748583 | 11 | | | pyanns | 5184.844352 | 12 | | | faissplus | 3776.539092 | 13 | | | faiss | 3032.534357 | 14 | | | cufe | 2917.132715 | 15 | | text2image-10M | pinecone-ood [*] | 38087.669026 | 16 | | (OOD track) | zilliz [*] | 33240.822128 | 17 | | | mysteryann | 22555.248017 | 18 | | | pyanns | 22295.584534 | 19 | | | mysteryann-dif | 22491.577263 | 20 | | | sustech-ood | 13772.370641 | 21 | | | puck | 8699.573200 | 22 | | | vamana | 6753.344080 | 23 | | | ngt | 6373.934425 | 24 | | | epsearch | 5876.982706 | 25 | | | diskann | 4132.829728 | 26 | | | cufe | 3561.416286 | 27 | | sparse-full | zilliz [*] | 10749.188262 | 28 | | (sparse track) | pinecone_smips [*] | 10439.909652 | 29 | | | pyanns | 8732.172708 | 30 | | | shnsw | 7136.927865 | 31 | | | nle | 2358.590429 | 32 | | | cufe | 104.768194 | 33 | | | linscan | 92.510615 | 34 | 35 | [*] not open source 36 | 37 | Table lists highest QPS measured with at least 90% recall@10, on the *public* query set. 38 | 39 | Last evaluation date: March 1st, 2024 (includes all submission until March 1st, 2024, AOE) -------------------------------------------------------------------------------- /neurips23/ongoing_leaderboard/filter/operating_points_public_queries_AzureD8lds_v5.txt: -------------------------------------------------------------------------------- 1 | qps 2 | dataset algorithm 3 | yfcc-10M pinecone 85491.542780 [*] 4 | zilliz 84596.419213 [*] 5 | parlayivf 37902.113726 6 | puck 19193.294823 7 | hwtl_sdu_anns_filter 15059.124141 [*] 8 | wm_filter 14467.961514 9 | dhq 13670.864704 10 | fdufilterdiskann 5679.748583 11 | pyanns 5184.844352 12 | faissplus 3776.539092 13 | faiss 3032.534357 14 | cufe 2917.132715 15 | 16 | [*] not open source (binary only) 17 | -------------------------------------------------------------------------------- /neurips23/ongoing_leaderboard/filter/yfcc-10M.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/ongoing_leaderboard/filter/yfcc-10M.png -------------------------------------------------------------------------------- /neurips23/ongoing_leaderboard/leaderboard.md: -------------------------------------------------------------------------------- 1 | ## Ongoing Leaderboard 2 | 3 | This leaderboard tracks the performance of submitted algorithms, starting at NeurIPS'23, including new algorithms. It is evaluated periodically, see details [below](#ongoing-leaderboard-rules). 4 | 5 | This leaderboard tracks recall@10 vs throughput benchmark that has become a standard benchmark when evaluating and comparing approximate nearest neighbor algorithms. 6 | 7 | For tasks "Filter", "Out-of-Distribution" and "Sparse" tracks, algorithms were ranked on the QPS they achieve on the track dataset, as long as the recall@10 is at least 90%. 8 | These results file for [Azure D8lds_v5](Azure_D8lds_v5_table.md) list the maximum QPS measured for each algorithm with at least 90% recall@10. 9 | 10 | For the Streaming track, algorithms were ranked on recall@10, as long as each algorithm completes the runbook within the alloted 1 hour. The leading entry had a recall of 0.99786, see details [below](#streaming-track). 11 | 12 | To see the original leaderboard of the NeurIPS'23 submissions (Dec. 2023), see [here](../leaderboard.md). 13 | 14 | QPS vs recall@10 plots for tracks based on public queries on Azure D8lds_v5: 15 | ### Filter track 16 | ![yfcc-10M](filter/yfcc-10M.png) 17 | Note: "pinecone", "zilliz" and "hwtl_sdu_anns_filter" are not open source 18 | 19 | ### OOD track 20 | ![text2image-10M](ood/text2image-10M.png) 21 | Note: "pinecone-ood" and "zilliz" are not open source 22 | 23 | ### Sparse track 24 | ![sparse-full](sparse/sparse-full.png) 25 | Note: "pinecone_smips" and "zilliz" are not open source 26 | 27 | ### Streaming track 28 | The [result file](streaming/res_final_runbook_AzureD8lds_v5.csv) lists measurements for all streaming algorithms on Azure D8lds_v5. 29 | 30 | ## Ongoing Leaderboard Rules 31 | 32 | The leaderboard is evaluated periodically, collecting new algorithm submissions. The current update is from March. 1st, 2024. 33 | 34 | The rules of the ongoing leaderboard are similar to the original competition, with the following exceptions: 35 | - Open source is encouraged but not enforced. Closed-source entries will be marked as such. In any case, a short description of the algorithm is required. 36 | - We will only evaluate the public query sets in the new leaderboard (on the same VM type: Azure Standard D8lds v5 with 8 vcpus, 16 GiB memory) 37 | - To participate, simply send a PR with the new algorithm (no need for a CMT entry). 38 | 39 | -------------------------------------------------------------------------------- /neurips23/ongoing_leaderboard/ood/operating_points_public_queries_AzureD8lds_v5.txt: -------------------------------------------------------------------------------- 1 | qps 2 | dataset algorithm 3 | text2image-10M hanns 46033.867231 [*] 4 | scann 42854.013477 5 | pinecone-ood 38087.669026 [*] 6 | zilliz 33240.822128 [*] 7 | mysteryann 22555.248017 8 | mysteryann-dif 22491.577263 9 | pyanns 22295.584534 10 | sustech-ood 13772.370641 11 | puck 8699.573200 12 | vamana 6753.344080 13 | ngt 6373.934425 14 | epsearch 5876.982706 15 | diskann 4132.829728 16 | cufe 3561.416286 17 | 18 | [*] not open source (binary only) 19 | -------------------------------------------------------------------------------- /neurips23/ongoing_leaderboard/ood/text2image-10M.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/ongoing_leaderboard/ood/text2image-10M.png -------------------------------------------------------------------------------- /neurips23/ongoing_leaderboard/sparse/operating_points_public_queries_AzureD8lds_v5.txt: -------------------------------------------------------------------------------- 1 | qps 2 | dataset algorithm 3 | sparse-full zilliz 10749.188262 [*] 4 | pinecone_smips 10439.909652 [*] 5 | pyanns 8732.172708 6 | shnsw 7136.927865 7 | nle 2358.590429 8 | cufe 104.768194 9 | linscan 92.510615 10 | 11 | [*] not open source (binary only) 12 | -------------------------------------------------------------------------------- /neurips23/ongoing_leaderboard/sparse/sparse-full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/ongoing_leaderboard/sparse/sparse-full.png -------------------------------------------------------------------------------- /neurips23/ood/base.py: -------------------------------------------------------------------------------- 1 | from benchmark.algorithms.base import BaseANN 2 | 3 | class BaseOODANN(BaseANN): 4 | def track(self): 5 | return "ood" -------------------------------------------------------------------------------- /neurips23/ood/cufe/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | 9 | ADD "https://github.com/AbdelrahmanMohamed129/DiskANN/tree/CUFE_OOD" latest_commit 10 | RUN git clone https://github.com/AbdelrahmanMohamed129/DiskANN --branch CUFE_OOD 11 | 12 | WORKDIR /home/app/DiskANN 13 | RUN pip3 install virtualenv build 14 | RUN python3 -m build 15 | RUN pip install dist/diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl 16 | WORKDIR /home/app 17 | -------------------------------------------------------------------------------- /neurips23/ood/cufe/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | cufe: 3 | docker-tag: neurips23-ood-cufe 4 | module: neurips23.ood.cufe.diskann-in-mem 5 | constructor: cufe 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"R":32, "L":50, "buildthreads":32}] 11 | query-args: | 12 | [{"Ls":50, "T":8}] 13 | text2image-10M: 14 | cufe: 15 | docker-tag: neurips23-ood-cufe 16 | module: neurips23.ood.cufe.diskann-in-mem 17 | constructor: cufe 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{"R":64, "L":500, "buildthreads":32}] 23 | query-args: | 24 | [{"Ls":30, "T":8}, 25 | {"Ls":50, "T":8}, 26 | {"Ls":70, "T":8}, 27 | {"Ls":100, "T":8}] 28 | -------------------------------------------------------------------------------- /neurips23/ood/diskann/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | 9 | RUN git clone https://github.com/microsoft/DiskANN.git --branch 0.5.0.rc3.post1 10 | WORKDIR /home/app/DiskANN 11 | RUN pip3 install virtualenv build 12 | RUN python3 -m build 13 | RUN pip install dist/diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl 14 | WORKDIR /home/app 15 | -------------------------------------------------------------------------------- /neurips23/ood/diskann/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | diskann: 3 | docker-tag: neurips23-ood-diskann 4 | module: neurips23.ood.diskann.diskann-in-mem 5 | constructor: diskann 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"R":32, "L":50, "buildthreads":32}] 11 | query-args: | 12 | [{"Ls":50, "T":8}] 13 | text2image-1M: 14 | diskann: 15 | docker-tag: neurips23-ood-diskann 16 | module: neurips23.ood.diskann.diskann-in-mem 17 | constructor: diskann 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{"R":64, "L":500, "buildthreads":32}] 23 | query-args: | 24 | [{"Ls":30, "T":8}, 25 | {"Ls":50, "T":8}, 26 | {"Ls":70, "T":8}, 27 | {"Ls":100, "T":8}] 28 | text2image-10M: 29 | diskann: 30 | docker-tag: neurips23-ood-diskann 31 | module: neurips23.ood.diskann.diskann-in-mem 32 | constructor: diskann 33 | base-args: ["@metric"] 34 | run-groups: 35 | base: 36 | args: | 37 | [{"R":64, "L":500, "buildthreads":32}] 38 | query-args: | 39 | [{"Ls":30, "T":8}, 40 | {"Ls":50, "T":8}, 41 | {"Ls":70, "T":8}, 42 | {"Ls":100, "T":8}] 43 | -------------------------------------------------------------------------------- /neurips23/ood/epsearch/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | epsearch: 3 | docker-tag: neurips23-ood-epsearch 4 | module: neurips23.ood.epsearch.diskann-in-mem-ep-hnsw 5 | constructor: epdiskann 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"R":30, "L":500, "alpha":1.2, "n_ep_candidates":32, "buildthreads":8, "ep_train":"id", "M":32, "efConstruction":200}] 11 | query-args: | 12 | [{"Ls":50, "T":8, "efSearch":4}] 13 | text2image-10M: 14 | epsearch: 15 | docker-tag: neurips23-ood-epsearch 16 | module: neurips23.ood.epsearch.diskann-in-mem-ep-hnsw 17 | constructor: epdiskann 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{"R":56, "L":500, "alpha":1.0, "n_ep_candidates":16384, "buildthreads":8, "ep_train":"id", "M":32, "efConstruction":200}] 23 | query-args: | 24 | [{"Ls":100, "T":8, "efSearch":32}, 25 | {"Ls":105, "T":8, "efSearch":32}, 26 | {"Ls":110, "T":8, "efSearch":16}, 27 | {"Ls":110, "T":8, "efSearch":32}, 28 | {"Ls":115, "T":8, "efSearch":16}, 29 | {"Ls":115, "T":8, "efSearch":32}, 30 | {"Ls":120, "T":8, "efSearch":32}, 31 | {"Ls":125, "T":8, "efSearch":32}, 32 | {"Ls":130, "T":8, "efSearch":32}, 33 | {"Ls":140, "T":8, "efSearch":32}] 34 | -------------------------------------------------------------------------------- /neurips23/ood/hanns/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y sudo software-properties-common python3.10 5 | RUN git clone https://github.com/AndrewHYu/Hanns.git 6 | RUN pip install ./Hanns/*.whl 7 | 8 | WORKDIR /home/app 9 | -------------------------------------------------------------------------------- /neurips23/ood/hanns/README.md: -------------------------------------------------------------------------------- 1 | # Hanns 2 | Our OOD track solution consists of a vamana index, a mutil-scale spatial clustering index, and a layout-optimized quantization acceleration index. 3 | The entire retrieval process is from coarse to fine. First, the vamana index is used to quick find the nearst clusters. Then, within these clusters, the quantization-accelerated index is uesed for fast distance comparisons to identify the coarsely ranked candidates. Finally, SIMD instructions are used to re-rank these candidates, and the final results are returned. 4 | # Performance 5 | ![ood-track](https://github.com/AndrewHYu/Hanns/blob/main/pic/text2image-10M.png) 6 | -------------------------------------------------------------------------------- /neurips23/ood/hanns/config.yaml: -------------------------------------------------------------------------------- 1 | text2image-10M: 2 | hanns: 3 | docker-tag: neurips23-ood-hanns 4 | module: neurips23.ood.hanns.hanns 5 | constructor: Hanns 6 | base-args: ["@metric"] 7 | run-groups: 8 | tree40k-config0: 9 | args: | 10 | [{"tree_size": 40000, "download": true, "config_id": 0}] 11 | query-args: | 12 | [{"leaves_to_search": 27, "reorder": 111}, 13 | {"leaves_to_search": 27, "reorder": 130}, 14 | {"leaves_to_search": 32, "reorder": 140}, 15 | {"leaves_to_search": 32, "reorder": 150}, 16 | {"leaves_to_search": 34, "reorder": 150}, 17 | {"leaves_to_search": 36, "reorder": 150}, 18 | {"leaves_to_search": 37, "reorder": 145}, 19 | {"leaves_to_search": 38, "reorder": 140}, 20 | {"leaves_to_search": 42, "reorder": 160}, 21 | {"leaves_to_search": 34, "reorder": 155}] 22 | -------------------------------------------------------------------------------- /neurips23/ood/install_neurips23.sh: -------------------------------------------------------------------------------- 1 | python install.py --neurips23track ood --algorithm cufe 2 | python install.py --neurips23track ood --algorithm diskann 3 | python install.py --neurips23track ood --algorithm epsearch 4 | python install.py --neurips23track ood --algorithm mysteryann 5 | python install.py --neurips23track ood --algorithm mysteryann-dif 6 | python install.py --neurips23track ood --algorithm ngt 7 | python install.py --neurips23track ood --algorithm puck 8 | python install.py --neurips23track ood --algorithm puck-fizz 9 | python install.py --neurips23track ood --algorithm pyanns 10 | python install.py --neurips23track ood --algorithm sustech-ood 11 | python install.py --neurips23track ood --algorithm vamana 12 | -------------------------------------------------------------------------------- /neurips23/ood/mysteryann-dif/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | WORKDIR /home/app 9 | RUN git clone --recursive --branch one_g_opt_diff_d_10 https://github.com/matchyc/mysteryann.git 10 | WORKDIR /home/app/mysteryann/pybindings 11 | RUN pip3 install virtualenv build 12 | RUN pip3 install pybind11[global] 13 | RUN pip3 install . 14 | WORKDIR /home/app -------------------------------------------------------------------------------- /neurips23/ood/mysteryann-dif/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | mysteryann-dif: 3 | docker-tag: neurips23-ood-mysteryann-dif 4 | module: neurips23.ood.mysteryann-dif.mysteryann-dif 5 | constructor: mysteryann 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"M_pjbp":35, "L_pjpq":500, "NoT":5, "NoP":2, "T":8, "EoP":1}] 11 | query-args: | 12 | [{"L_pq":75, "T":8}, 13 | {"L_pq":78, "T":8}, 14 | {"L_pq":80, "T":8}, 15 | {"L_pq":82, "T":8}, 16 | {"L_pq":83, "T":8}, 17 | {"L_pq":85, "T":8}, 18 | {"L_pq":87, "T":8}, 19 | {"L_pq":89, "T":8}, 20 | {"L_pq":92, "T":8}] 21 | text2image-10M: 22 | mysteryann-dif: 23 | docker-tag: neurips23-ood-mysteryann-dif 24 | module: neurips23.ood.mysteryann-dif.mysteryann-dif 25 | constructor: mysteryann 26 | base-args: ["@metric"] 27 | run-groups: 28 | base: 29 | args: | 30 | [{"M_pjbp":45, "L_pjpq":800, "NoT": 5, "NoP": 3, "T": 8, "EoP": 1}] 31 | query-args: | 32 | [{"L_pq":80, "T":8}, 33 | {"L_pq":83, "T":8}, 34 | {"L_pq":85, "T":8}, 35 | {"L_pq":88, "T":8}, 36 | {"L_pq":90, "T":8}, 37 | {"L_pq":92, "T":8}, 38 | {"L_pq":93, "T":8}, 39 | {"L_pq":95, "T":8}, 40 | {"L_pq":100, "T":8}, 41 | {"L_pq":103, "T":8}, 42 | {"L_pq":107, "T":8}, 43 | {"L_pq":110, "T":8}, 44 | {"L_pq":115, "T":8}, 45 | {"L_pq":120, "T":8}] 46 | 47 | -------------------------------------------------------------------------------- /neurips23/ood/mysteryann/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | WORKDIR /home/app 9 | RUN git clone --recursive --branch one_g_opt https://github.com/matchyc/mysteryann.git 10 | WORKDIR /home/app/mysteryann/pybindings 11 | RUN pip3 install virtualenv build 12 | RUN pip3 install pybind11[global] 13 | RUN pip3 install . 14 | WORKDIR /home/app 15 | -------------------------------------------------------------------------------- /neurips23/ood/mysteryann/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | mysteryann: 3 | docker-tag: neurips23-ood-mysteryann 4 | module: neurips23.ood.mysteryann.mysteryann 5 | constructor: mysteryann 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"M_pjbp":35, "L_pjpq":500, "NoT":5, "NoP":2, "T":8, "EoP":1}] 11 | query-args: | 12 | [{"L_pq":75, "T":8}, 13 | {"L_pq":78, "T":8}, 14 | {"L_pq":80, "T":8}, 15 | {"L_pq":82, "T":8}, 16 | {"L_pq":83, "T":8}, 17 | {"L_pq":85, "T":8}, 18 | {"L_pq":87, "T":8}, 19 | {"L_pq":89, "T":8}, 20 | {"L_pq":92, "T":8}] 21 | text2image-10M: 22 | mysteryann: 23 | docker-tag: neurips23-ood-mysteryann 24 | module: neurips23.ood.mysteryann.mysteryann 25 | constructor: mysteryann 26 | base-args: ["@metric"] 27 | run-groups: 28 | base: 29 | args: | 30 | [{"M_pjbp":35, "L_pjpq":800, "NoT": 5, "NoP": 3, "T": 8, "EoP": 1}] 31 | query-args: | 32 | [{"L_pq":100, "T":8}, 33 | {"L_pq":110, "T":8}, 34 | {"L_pq":113, "T":8}, 35 | {"L_pq":115, "T":8}, 36 | {"L_pq":117, "T":8}, 37 | {"L_pq":118, "T":8}, 38 | {"L_pq":120, "T":8}, 39 | {"L_pq":123, "T":8}, 40 | {"L_pq":125, "T":8}, 41 | {"L_pq":128, "T":8}, 42 | {"L_pq":130, "T":8}, 43 | {"L_pq":133, "T":8}, 44 | {"L_pq":135, "T":8}, 45 | {"L_pq":140, "T":8}, 46 | {"L_pq":145, "T":8}, 47 | {"L_pq":150, "T":8}] 48 | 49 | -------------------------------------------------------------------------------- /neurips23/ood/ngt/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y git cmake liblapack-dev bc 5 | RUN pip3 install wheel pybind11 6 | RUN git clone https://github.com/masajiro/NGT-neurips23.git NGT 7 | RUN cd NGT && git log -n 1 8 | RUN cd NGT && mkdir build && cd build && cmake .. 9 | RUN cd NGT/build && make -j 8 && make install 10 | RUN ldconfig 11 | RUN cd NGT/python && python3 setup.py bdist_wheel 12 | RUN pip3 install NGT/python/dist/ngt-*-linux_x86_64.whl 13 | -------------------------------------------------------------------------------- /neurips23/ood/ngt/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | ngt: 3 | docker-tag: neurips23-ood-ngt 4 | module: neurips23.ood.ngt.module 5 | constructor: NGT 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"edge": 50, "outdegree": 10, "indegree": 100, 11 | "epsilon": 0.1, "reduction": 0.39}] 12 | # "url": "https://public-rlab.east.edge.storage-yahoo.jp/neurips23/indexes/onng-random-50-10-100-0.10-0.39.tgz"}] 13 | query-args: | 14 | [{"epsilon": 1.1}] 15 | text2image-10M: 16 | ngt: 17 | docker-tag: neurips23-ood-ngt 18 | module: neurips23.ood.ngt.module 19 | constructor: NGT 20 | base-args: ["@metric"] 21 | run-groups: 22 | base: 23 | args: | 24 | [{"edge": 140, "outdegree": 10, "indegree": 175, 25 | "epsilon": 0.11, "reduction": 0.38}] 26 | # "url": "https://public-rlab.east.edge.storage-yahoo.jp/neurips23/indexes/onng-text2image-140-10-180-0.10-0.39.tgz"}] 27 | query-args: | 28 | [{"epsilon": 1.010}, 29 | {"epsilon": 1.014}, 30 | {"epsilon": 1.016}, 31 | {"epsilon": 1.017}, 32 | {"epsilon": 1.018}, 33 | {"epsilon": 1.020}, 34 | {"epsilon": 1.025}] 35 | -------------------------------------------------------------------------------- /neurips23/ood/operating_points_public_queries_AzureD8lds_v5.txt: -------------------------------------------------------------------------------- 1 | qps 2 | dataset algorithm 3 | text2image-10M cufe 3561.416286 4 | diskann 4132.829728 5 | epsearch 5876.982706 6 | mysteryann 22555.248017 7 | mysteryann-dif 22491.577263 8 | ngt 6373.934425 9 | puck 8699.573200 10 | pyanns 22295.584534 11 | sustech-ood 13772.370641 12 | vamana 6753.344080 13 | -------------------------------------------------------------------------------- /neurips23/ood/pinecone-ood/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | 9 | # copy and install the pys2 python package 10 | RUN git clone --branch ood https://github.com/pinecone-io/bigann.git 11 | RUN pip install ./bigann/*.whl 12 | # verify that the build worked 13 | RUN python3 -c 'import diskannpy;' 14 | RUN python3 -c 'import pys2;' 15 | -------------------------------------------------------------------------------- /neurips23/ood/pinecone-ood/README.md: -------------------------------------------------------------------------------- 1 | # Pinecone OOD ANN algorithm 2 | 3 | Our solution for the OOD track is based on three main components – 4 | an inverted-file (IVF) index on the vector collection using a clustering algorithm tailored for inner-product search, 5 | a k-MIP (max inner product) graph constructed using the co-occurence of vectors as nearest neighbors for a set of 6 | training queries, and, quantization tailored for SIMD-based acceleration for fast scoring and retrieval. 7 | 8 | We perform retrieval in three stages. 9 | First, we retrieve a small number of candidates from top clusters by scoring quantized vectors. 10 | Next, we use the k-MIP graph to “expand” the set of retrieved candidates by adding their neighbors 11 | in the graph to the candidate set. 12 | Finally, we score all the candidates by computing their distance to the query using a 13 | fine-grained quantized representation of the vectors. 14 | In addition, in order to accelerate the search, we process the queries in a batch to take advantage of cache locality. -------------------------------------------------------------------------------- /neurips23/ood/pinecone-ood/config.yaml: -------------------------------------------------------------------------------- 1 | text2image-1M: 2 | pinecone-ood: 3 | docker-tag: neurips23-ood-pinecone-ood 4 | module: neurips23.ood.pinecone-ood.s2_index 5 | constructor: S2_index 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"index_str":"OODIndex1024[HNSW32]_spillage=1"}] 11 | query-args: | 12 | [{"nprobe":"15", "kfactor":"4"}] 13 | text2image-10M: 14 | pinecone-ood: 15 | docker-tag: neurips23-ood-pinecone-ood 16 | module: neurips23.ood.pinecone-ood.s2_index 17 | constructor: S2_index 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{"index_str":"OODIndex32768[HNSW32]_spillage=1"}] 23 | query-args: | 24 | [{"nprobe":"45", "kfactor":"1"}, 25 | {"nprobe":"52", "kfactor":"2"}, 26 | {"nprobe":"53", "kfactor":"2"}, 27 | {"nprobe":"54", "kfactor":"2"}, 28 | {"nprobe":"55", "kfactor":"2"}, 29 | {"nprobe":"60", "kfactor":"2"}, 30 | {"nprobe":"49", "kfactor":"3"}, 31 | {"nprobe":"50", "kfactor":"3"}, 32 | {"nprobe":"51", "kfactor":"3"}, 33 | {"nprobe":"52", "kfactor":"3"} 34 | ] 35 | -------------------------------------------------------------------------------- /neurips23/ood/plot_public_queries_AzureD8lds_v5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/ood/plot_public_queries_AzureD8lds_v5.png -------------------------------------------------------------------------------- /neurips23/ood/puck-fizz/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt-get update 5 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip 6 | #swig 7 | RUN apt-get update && apt-get install -y swig cmake 8 | RUN pip3 install pybind11 numpy 9 | RUN cat /etc/ld.so.conf 10 | RUN ls /etc/ld.so.conf.d/ 11 | ##cmake 12 | # COPY cmake-3.22.0-linux-x86_64.sh . 13 | RUN wget https://cmake.org/files/v3.22/cmake-3.22.0-linux-x86_64.sh 14 | RUN mkdir cmake && sh cmake-3.22.0-linux-x86_64.sh --skip-license --prefix=cmake 15 | ENV PATH /home/app/cmake/bin:$PATH 16 | 17 | #mkl 18 | # COPY l_onemkl_p_2023.2.0.49497_offline.sh . 19 | RUN wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/adb8a02c-4ee7-4882-97d6-a524150da358/l_onemkl_p_2023.2.0.49497_offline.sh 20 | RUN sh l_onemkl_p_2023.2.0.49497_offline.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s 21 | 22 | RUN echo "/opt/intel/oneapi/mkl/latest/lib/intel64" > /etc/ld.so.conf.d/mkl.conf 23 | RUN ldconfig 24 | RUN touch /etc/profile.d/intel.sh 25 | RUN echo ". /opt/intel/oneapi/mkl/latest/env/vars.sh" >> /etc/profile.d/intel.sh 26 | RUN . /etc/profile.d/intel.sh 27 | 28 | ENV CMAKE_ARGS "-DMKLROOT=/opt/intel/oneapi/mkl/latest/ -DBLA_VENDOR=Intel10_64lp_seq -DBLA_STATIC=ON" 29 | #RUN git config --global http.sslVerify false 30 | 31 | RUN git clone -b ood-try https://github.com/baidu/puck.git 32 | # COPY puck-ood-feature.tar.gz . 33 | # RUN tar zxvf puck-ood-feature.tar.gz 34 | RUN cd puck && . /etc/profile.d/intel.sh && python3 setup.py install 35 | RUN python3 -c 'from puck import py_puck_api' 36 | -------------------------------------------------------------------------------- /neurips23/ood/puck-fizz/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | puck-fizz: 3 | docker-tag: neurips23-ood-puck-fizz 4 | module: neurips23.ood.puck-fizz.puck 5 | constructor: Puck 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: [{"index_type":2, "C":1000, "F":1000,"tinker_neighborhood":10,"tinker_construction":200}] 10 | query-args: | 11 | [ 12 | {"search_coarse_count":50, "tinker_search_range": 100}, 13 | {"search_coarse_count":50, "tinker_search_range": 200}, 14 | {"search_coarse_count":50, "tinker_search_range": 300} 15 | ] 16 | 17 | 18 | text2image-10M: 19 | puck-fizz: 20 | docker-tag: neurips23-ood-puck-fizz 21 | module: neurips23.ood.puck-fizz.puck 22 | constructor: Puck 23 | base-args: ["@metric"] 24 | run-groups: 25 | base: 26 | args: [{"index_type":2, "C":1000, "F":1000,"tinker_neighborhood":16,"tinker_construction":200}] 27 | query-args: | 28 | [ 29 | {"search_coarse_count":10, "tinker_search_range": 160}, 30 | {"search_coarse_count":10, "tinker_search_range": 170}, 31 | {"search_coarse_count":10, "tinker_search_range": 180}, 32 | {"search_coarse_count":10, "tinker_search_range": 190} 33 | ] -------------------------------------------------------------------------------- /neurips23/ood/puck/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt-get update 5 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip 6 | #swig 7 | RUN apt-get update && apt-get install -y swig cmake 8 | RUN pip3 install pybind11 numpy 9 | RUN cat /etc/ld.so.conf 10 | RUN ls /etc/ld.so.conf.d/ 11 | ##cmake 12 | RUN wget https://cmake.org/files/v3.22/cmake-3.22.0-linux-x86_64.sh 13 | RUN mkdir cmake && sh cmake-3.22.0-linux-x86_64.sh --skip-license --prefix=cmake 14 | ENV PATH /home/app/cmake/bin:$PATH 15 | 16 | #mkl 17 | RUN wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/adb8a02c-4ee7-4882-97d6-a524150da358/l_onemkl_p_2023.2.0.49497_offline.sh 18 | RUN sh l_onemkl_p_2023.2.0.49497_offline.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s 19 | 20 | RUN echo "/opt/intel/oneapi/mkl/latest/lib/intel64" > /etc/ld.so.conf.d/mkl.conf 21 | RUN ldconfig 22 | RUN touch /etc/profile.d/intel.sh 23 | RUN echo ". /opt/intel/oneapi/mkl/latest/env/vars.sh" >> /etc/profile.d/intel.sh 24 | RUN . /etc/profile.d/intel.sh 25 | 26 | ENV CMAKE_ARGS "-DMKLROOT=/opt/intel/oneapi/mkl/latest/ -DBLA_VENDOR=Intel10_64lp_seq -DBLA_STATIC=ON" 27 | #RUN git config --global http.sslVerify false 28 | 29 | RUN git clone -b ood https://github.com/baidu/puck.git 30 | RUN cd puck && . /etc/profile.d/intel.sh && python3 setup.py install 31 | RUN python3 -c 'from puck import py_puck_api' 32 | -------------------------------------------------------------------------------- /neurips23/ood/puck/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | puck: 3 | docker-tag: neurips23-ood-puck 4 | module: neurips23.ood.puck.puck 5 | constructor: Puck 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: [{"index_type":2, "C":1000, "F":1000,"tinker_neighborhood":16,"tinker_construction":200}] 10 | query-args: | 11 | [ 12 | {"search_coarse_count":50, "tinker_search_range": 100}, 13 | {"search_coarse_count":50, "tinker_search_range": 200}, 14 | {"search_coarse_count":50, "tinker_search_range": 300} 15 | ] 16 | 17 | 18 | text2image-10M: 19 | puck: 20 | docker-tag: neurips23-ood-puck 21 | module: neurips23.ood.puck.puck 22 | constructor: Puck 23 | base-args: ["@metric"] 24 | run-groups: 25 | base: 26 | args: [{"index_type":2, "C":1000, "F":1000,"tinker_neighborhood":16,"tinker_construction":200}] 27 | query-args: | 28 | [ 29 | {"search_coarse_count":10, "tinker_search_range": 190}, 30 | {"search_coarse_count":10, "tinker_search_range": 160}, 31 | {"search_coarse_count":10, "tinker_search_range": 165}, 32 | {"search_coarse_count":10, "tinker_search_range": 170}, 33 | {"search_coarse_count":10, "tinker_search_range": 175} 34 | ] 35 | -------------------------------------------------------------------------------- /neurips23/ood/pyanns/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | 9 | RUN git clone https://github.com/veaaaab/DiskANN.git --branch bigann23_ood 10 | WORKDIR /home/app/DiskANN 11 | RUN pip3 install virtualenv build 12 | RUN python3 -m build 13 | RUN pip install dist/diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl 14 | WORKDIR /home/app 15 | 16 | RUN apt update 17 | RUN apt install python-is-python3 18 | RUN git clone https://github.com/veaaaab/pyanns --branch master --depth 1 19 | WORKDIR /home/app/pyanns 20 | RUN pip install -r requirements.txt 21 | RUN bash build.sh 22 | WORKDIR /home/app 23 | 24 | RUN python3 -c 'import pyanns' 25 | 26 | WORKDIR /home/app 27 | 28 | -------------------------------------------------------------------------------- /neurips23/ood/pyanns/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | pyanns: 3 | docker-tag: neurips23-ood-pyanns 4 | module: neurips23.ood.pyanns.pyanns 5 | constructor: Pyanns 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"R":32, "L":50}] 11 | query-args: | 12 | [{"ef":30}, {"ef":50}, {"ef":100}] 13 | text2image-10M: 14 | pyanns: 15 | docker-tag: neurips23-ood-pyanns 16 | module: neurips23.ood.pyanns.pyanns 17 | constructor: Pyanns 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{"R":48, "L":500, "buildthreads" : 8}] 23 | query-args: | 24 | [ 25 | {"ef":90}, 26 | {"ef":95}, 27 | {"ef":100}, 28 | {"ef":102}, 29 | {"ef":104}, 30 | {"ef":106}, 31 | {"ef":108}, 32 | {"ef":110}, 33 | {"ef":115}, 34 | {"ef":120}, 35 | {"ef":125}, 36 | {"ef":130} 37 | ] 38 | -------------------------------------------------------------------------------- /neurips23/ood/run.py: -------------------------------------------------------------------------------- 1 | from benchmark.algorithms.base_runner import BaseRunner 2 | import time 3 | 4 | class OODRunner(BaseRunner): 5 | pass -------------------------------------------------------------------------------- /neurips23/ood/scann/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN pip install --no-cache-dir scann==1.3.2 6 | 7 | WORKDIR /home/app 8 | -------------------------------------------------------------------------------- /neurips23/ood/scann/config.yaml: -------------------------------------------------------------------------------- 1 | text2image-10M: 2 | scann: 3 | docker-tag: neurips23-ood-scann 4 | module: neurips23.ood.scann.scann 5 | constructor: Scann 6 | base-args: ["@metric"] 7 | run-groups: 8 | tree40k-config0: 9 | args: | 10 | [{"tree_size": 40000, "download": true, "config_id": 0}] 11 | query-args: | 12 | [{"leaves_to_search": 35, "reorder": 150}, 13 | {"leaves_to_search": 35, "reorder": 155}, 14 | {"leaves_to_search": 36, "reorder": 150}, 15 | {"leaves_to_search": 37, "reorder": 145}, 16 | {"leaves_to_search": 38, "reorder": 140}, 17 | {"leaves_to_search": 34, "reorder": 155}] 18 | tree40k-config1: 19 | args: | 20 | [{"tree_size": 40000, "download": true, "config_id": 1}] 21 | query-args: | 22 | [{"leaves_to_search": 42, "reorder": 160}] 23 | tree40k-config2: 24 | args: | 25 | [{"tree_size": 40000, "download": true, "config_id": 2}] 26 | query-args: | 27 | [{"leaves_to_search": 27, "reorder": 140}] 28 | -------------------------------------------------------------------------------- /neurips23/ood/sustech-ood/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y libssl-dev 5 | RUN wget https://cmake.org/files/v3.23/cmake-3.23.1.tar.gz 6 | RUN tar -zxvf cmake-3.23.1.tar.gz 7 | WORKDIR /home/app/cmake-3.23.1 8 | RUN ./bootstrap --parallel=8 9 | RUN make -j4 10 | RUN make install 11 | WORKDIR /home/app 12 | RUN git clone -b faiss https://github.com/whateveraname/SUSTech-OOD.git --recursive 13 | WORKDIR /home/app/SUSTech-OOD 14 | RUN cmake -DCMAKE_BUILD_TYPE=Release . && make -j4 15 | WORKDIR /home/app -------------------------------------------------------------------------------- /neurips23/ood/sustech-ood/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | sustech-ood: 3 | docker-tag: neurips23-ood-sustech-ood 4 | module: neurips23.ood.sustech-ood.SUSTech-OOD 5 | constructor: IndexGraphOOD 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"M":5, "ef":100, "cluster_num":1}] 11 | query-args: | 12 | [{"ef":20, "nprobe":5}] 13 | text2image-10M: 14 | sustech-ood: 15 | docker-tag: neurips23-ood-sustech-ood 16 | module: neurips23.ood.sustech-ood.SUSTech-OOD 17 | constructor: IndexGraphOOD 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{"M":20, "ef":1200, "cluster_num":1000}] 23 | query-args: | 24 | [{"ef":95, "nprobe":30}, 25 | {"ef":115, "nprobe":30}, 26 | {"ef":125, "nprobe":30}, 27 | {"ef":130, "nprobe":30}, 28 | {"ef":135, "nprobe":30}, 29 | {"ef":140, "nprobe":30}, 30 | {"ef":145, "nprobe":30}, 31 | {"ef":155, "nprobe":30}, 32 | {"ef":175, "nprobe":30}] 33 | -------------------------------------------------------------------------------- /neurips23/ood/vamana/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | 9 | 10 | ARG CACHEBUST=1 11 | RUN git clone -b ood_v2 https://github.com/cmuparlay/ParlayANN.git && cd ParlayANN && git submodule update --init --recursive && cd python && pip install pybind11 && bash compile.sh 12 | # WORKDIR /home/app/ParlayANN 13 | # RUN git submodule update --init --recursive 14 | # WORKDIR /home/app/ParlayANN/python 15 | 16 | # RUN pip install pybind11 17 | 18 | # RUN bash compile.sh 19 | 20 | ENV PYTHONPATH=$PYTHONPATH:/home/app/ParlayANN/python 21 | 22 | # ENV PARLAY_NUM_THREADS=8 23 | 24 | WORKDIR /home/app -------------------------------------------------------------------------------- /neurips23/ood/vamana/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | vamana: 3 | docker-tag: neurips23-ood-vamana 4 | module: neurips23.ood.vamana.vamana 5 | constructor: vamana 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"R":30, "L":50, "alpha":1.2}] 11 | query-args: | 12 | [{"Ls":50, "T":8}] 13 | text2image-10M: 14 | vamana: 15 | docker-tag: neurips23-ood-vamana 16 | module: neurips23.ood.vamana.vamana 17 | constructor: vamana 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{"R":55, "L":500, "alpha":1.0, "two_pass":1, "use_query_data":1, "compress":1}] 23 | query-args: | 24 | [ 25 | {"Ls":70, "T":8}, 26 | {"Ls":80, "T":8}, 27 | {"Ls":90, "T":8}, 28 | {"Ls":95, "T":8}, 29 | {"Ls":100, "T":8}, 30 | {"Ls":105, "T":8}, 31 | {"Ls":110, "T":8}, 32 | {"Ls":120, "T":8}, 33 | {"Ls":125, "T":8}, 34 | {"Ls":150, "T":8}] 35 | vamana-singlepass: 36 | docker-tag: neurips23-ood-vamana 37 | module: neurips23.ood.vamana.vamana 38 | constructor: vamana 39 | base-args: ["@metric"] 40 | run-groups: 41 | base: 42 | args: | 43 | [{"R":64, "L":500}] 44 | query-args: | 45 | [{"Ls":30, "T":8}, 46 | {"Ls":50, "T":8}, 47 | {"Ls":70, "T":8}, 48 | {"Ls":100, "T":8}, 49 | {"Ls":113, "T":8}, 50 | {"Ls":125, "T":8}, 51 | {"Ls":150, "T":8}, 52 | {"Ls":175, "T":8}, 53 | {"Ls":200, "T":8}] 54 | 55 | -------------------------------------------------------------------------------- /neurips23/ood/zilliz/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 7 | 8 | RUN apt install python-is-python3 9 | RUN git clone https://github.com/hhy3/zilliz-bigann.git --branch ood --depth 1 10 | RUN pip install ./zilliz-bigann/*.whl 11 | 12 | RUN python3 -c 'import ood_searcher' 13 | -------------------------------------------------------------------------------- /neurips23/ood/zilliz/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | zilliz: 3 | docker-tag: neurips23-ood-zilliz 4 | module: neurips23.ood.zilliz.zilliz 5 | constructor: Zilliz 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"R":32, "L":50}] 11 | query-args: | 12 | [{"ef":30}, {"ef":50}, {"ef":100}] 13 | text2image-10M: 14 | zilliz: 15 | docker-tag: neurips23-ood-zilliz 16 | module: neurips23.ood.zilliz.zilliz 17 | constructor: Zilliz 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{"R":48, "L":500, "buildthreads" : 8}] 23 | query-args: | 24 | [ 25 | {"ef":90}, 26 | {"ef":95}, 27 | {"ef":100}, 28 | {"ef":102}, 29 | {"ef":104}, 30 | {"ef":106}, 31 | {"ef":108}, 32 | {"ef":110}, 33 | {"ef":115}, 34 | {"ef":120} 35 | ] 36 | -------------------------------------------------------------------------------- /neurips23/runbooks/generate_msturing10m_runbooks.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import os 3 | 4 | 5 | dataset_name="msturing-10M" 6 | 7 | data = {dataset_name: {}} 8 | 9 | total_points=10000000 10 | 11 | num_points=0 12 | max_num_points=0 13 | 14 | 15 | max_t=200 16 | # insert 10000000/200 points per step 17 | # start deleting points after 100 steps 18 | 19 | t=1 20 | for i in range(max_t): 21 | if i>=max_t//2: 22 | data[dataset_name][t]={ 23 | 'operation': 'search', 24 | } 25 | t+=1 26 | data[dataset_name][t]={ 27 | 'operation': 'delete', 28 | 'start': (i-max_t//2)*(total_points//max_t), 29 | 'end': (i-max_t//2+1)*(total_points//max_t) 30 | } 31 | t+=1 32 | num_points-=total_points//max_t 33 | data[dataset_name][t]={ 34 | 'operation': 'insert', 35 | 'start': i*(total_points//max_t), 36 | 'end': (i+1)*(total_points//max_t) 37 | } 38 | t+=1 39 | 40 | num_points+=total_points//max_t 41 | max_num_points=max(max_num_points,num_points) 42 | 43 | data[dataset_name]["max_pts"]=max_num_points 44 | 45 | run_book_name=dataset_name+"_"+"slidingwindow_runbook.yaml" 46 | 47 | with open(run_book_name, 'w') as outfile: 48 | yaml.dump(data, outfile, default_flow_style=False) 49 | 50 | 51 | -------------------------------------------------------------------------------- /neurips23/runbooks/simple_replace_runbook.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | max_pts: 10000 3 | 1: 4 | operation: "insert" 5 | start: 0 6 | end: 7500 7 | 2: 8 | operation: "search" 9 | 3: 10 | operation: "replace" 11 | tags_start: 0 12 | tags_end: 2500 13 | ids_start: 7500 14 | ids_end: 10000 15 | 4: 16 | operation: "search" 17 | 5: 18 | operation: "replace" 19 | tags_start: 0 20 | tags_end: 2500 21 | ids_start: 0 22 | ids_end: 2500 23 | 6: 24 | operation: "search" 25 | 7: 26 | operation: "delete" 27 | start: 2500 28 | end: 5000 29 | 8: 30 | operation: "search" -------------------------------------------------------------------------------- /neurips23/sparse/base.py: -------------------------------------------------------------------------------- 1 | from benchmark.algorithms.base import BaseANN 2 | 3 | class BaseSparseANN(BaseANN): 4 | def track(self): 5 | return "sparse" -------------------------------------------------------------------------------- /neurips23/sparse/cufe/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y sudo build-essential git axel wget curl 5 | 6 | 7 | # for python 3.10 8 | RUN sudo apt install software-properties-common -y 9 | RUN sudo add-apt-repository ppa:deadsnakes/ppa 10 | RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata 11 | RUN sudo apt-get -y install python3.10 12 | RUN apt-get -y install python3-numpy python3-scipy python3-pip 13 | 14 | # Get Rust; NOTE: using sh for better compatibility with other base images 15 | RUN curl https://sh.rustup.rs -sSf | sh -s -- -y 16 | 17 | # Add .cargo/bin to PATH 18 | ENV PATH="/root/.cargo/bin:${PATH}" 19 | 20 | # git clone a single branch 21 | RUN git clone --single-branch --branch main https://github.com/MichaelIbrahim-GaTech/research-bigann-linscan.git 22 | WORKDIR research-bigann-linscan/ 23 | 24 | # fix python3 link (required for pyo3) 25 | RUN ln -fs /usr/bin/python3.10 /usr/bin/python3 26 | 27 | # fix pip3 28 | RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 29 | 30 | RUN pip3 install maturin 31 | 32 | # build a whl file 33 | RUN maturin build -r 34 | 35 | # pip install the resulting whl file (regardless of the architecture) 36 | RUN for whl in target/wheels/*.whl; do pip3 install $whl; done 37 | 38 | RUN pip3 install -r requirements.txt 39 | 40 | # verify that the build worked 41 | RUN python3 -c 'import pylinscancufe; print(pylinscancufe.LinscanIndex());' 42 | 43 | WORKDIR .. 44 | -------------------------------------------------------------------------------- /neurips23/sparse/cufe/config.yaml: -------------------------------------------------------------------------------- 1 | sparse-small: 2 | cufe: 3 | docker-tag: neurips23-sparse-cufe 4 | module: neurips23.sparse.cufe.linscan 5 | constructor: LinscanCUFE 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{}] 11 | query-args: | 12 | [{"budget":1},{"budget":0.5},{"budget":0.4},{"budget":0.3},{"budget":0.25},{"budget":0.2},{"budget":0.15},{"budget":0.1},{"budget":0.075},{"budget":0.05}] 13 | sparse-1M: 14 | cufe: 15 | docker-tag: neurips23-sparse-cufe 16 | module: neurips23.sparse.cufe.linscan 17 | constructor: LinscanCUFE 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{}] 23 | query-args: | 24 | [{"budget":0.5},{"budget":1},{"budget":2},{"budget":4},{"budget":5},{"budget":6},{"budget":7},{"budget":8},{"budget":10}] 25 | sparse-full: 26 | cufe: 27 | docker-tag: neurips23-sparse-cufe 28 | module: neurips23.sparse.cufe.linscan 29 | constructor: LinscanCUFE 30 | base-args: ["@metric"] 31 | run-groups: 32 | base: 33 | args: | 34 | [{}] 35 | query-args: | 36 | [{"budget":5},{"budget":15},{"budget":35},{"budget":50},{"budget":52.5},{"budget":55},{"budget":57.5},{"budget":60},{"budget":90},{"budget":500}] 37 | 38 | -------------------------------------------------------------------------------- /neurips23/sparse/cufe/linscan.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | 5 | from benchmark.algorithms.base import BaseANN 6 | from benchmark.datasets import DATASETS 7 | import pylinscancufe 8 | 9 | # a python wrapper for the linscan algorithm, implemented in rust 10 | # algorithm details: https://arxiv.org/abs/2301.10622 11 | # code: https://github.com/pinecone-io/research-bigann-linscan 12 | 13 | # Build parameters: none 14 | # Query parameters: budget (in ms) for computing all the scores 15 | class LinscanCUFE(BaseANN): 16 | def __init__(self, metric, index_params): 17 | assert metric == "ip" 18 | self.name = "cufe_linscan" 19 | self._index = pylinscancufe.LinscanIndex() 20 | self._budget = np.infty 21 | self.scale = 32767/3.579759 # need to iterate over the dataset to get the maximum value 3.57959 22 | print("Linscan index initialized: " + str(self._index)) 23 | 24 | def fit(self, dataset): # e.g. dataset = "sparse-small" 25 | 26 | self.ds = DATASETS[dataset]() 27 | assert self.ds.data_type() == "sparse" 28 | 29 | 30 | N_VEC_LIMIT = 100000 # batch size 31 | it = self.ds.get_dataset_iterator(N_VEC_LIMIT) 32 | for d in it: 33 | for i in range(d.shape[0]): 34 | d1 = d.getrow(i) 35 | self._index.insert(dict(zip(d1.indices, np.round(d1.data*self.scale).astype(int)))) 36 | 37 | print("Index status: " + str(self._index)) 38 | 39 | 40 | def load_index(self, dataset): 41 | return None 42 | 43 | def set_query_arguments(self, query_args): 44 | self._budget = query_args["budget"] 45 | 46 | def query(self, X, k): 47 | """Carry out a batch query for k-NN of query set X.""" 48 | threshold_mult = int(np.round(0.4776719*self.scale)) # The mean of the training data is 0.4776719 and the median of the training data is 0.30324435 49 | nq = X.shape[0] 50 | 51 | # prepare the queries as a list of dicts 52 | self.queries = [] 53 | for i in range(nq): 54 | qc = X.getrow(i) 55 | q = dict(zip(qc.indices, np.round(qc.data*self.scale).astype(int))) 56 | self.queries.append(q) 57 | 58 | res = self._index.retrieve_parallel(self.queries, k, threshold_mult, self._budget) 59 | self.I = np.array(res, dtype='int32') 60 | 61 | def get_results(self): 62 | return self.I 63 | -------------------------------------------------------------------------------- /neurips23/sparse/install_neurips23.sh: -------------------------------------------------------------------------------- 1 | python install.py --neurips23track sparse --algorithm cufe 2 | python install.py --neurips23track sparse --algorithm linscan 3 | python install.py --neurips23track sparse --algorithm nle 4 | python install.py --neurips23track sparse --algorithm pyanns 5 | python install.py --neurips23track sparse --algorithm shnsw 6 | python install.py --neurips23track sparse --algorithm spmat 7 | python install.py --neurips23track sparse --algorithm sustech-whu 8 | -------------------------------------------------------------------------------- /neurips23/sparse/linscan/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt-get update && apt-get install -y curl 4 | 5 | # install rust + build tools 6 | RUN curl https://sh.rustup.rs -sSf | sh -s -- -y 7 | ENV PATH="/root/.cargo/bin:${PATH}" 8 | RUN git clone --single-branch --branch main https://github.com/pinecone-io/research-bigann-linscan 9 | WORKDIR research-bigann-linscan/ 10 | 11 | # install maturin (build tool for rust-python) 12 | RUN pip install maturin 13 | 14 | # build a whl file 15 | RUN maturin build -r 16 | 17 | # pip install the correct wheel (different architectures will produce .whl files with different names) 18 | RUN pip install ./target/wheels/*.whl 19 | 20 | # verify that the build worked 21 | RUN python3 -c 'import pylinscan; print(pylinscan.LinscanIndex());' 22 | 23 | WORKDIR .. -------------------------------------------------------------------------------- /neurips23/sparse/linscan/config.yaml: -------------------------------------------------------------------------------- 1 | sparse-small: 2 | linscan: 3 | docker-tag: neurips23-sparse-linscan 4 | module: neurips23.sparse.linscan.linscan 5 | constructor: Linscan 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{}] 11 | query-args: | 12 | [{"budget":1},{"budget":0.5},{"budget":0.4},{"budget":0.3},{"budget":0.25},{"budget":0.2},{"budget":0.15},{"budget":0.1},{"budget":0.075},{"budget":0.05}] 13 | sparse-1M: 14 | linscan: 15 | docker-tag: neurips23-sparse-linscan 16 | module: neurips23.sparse.linscan.linscan 17 | constructor: Linscan 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{}] 23 | query-args: | 24 | [{"budget":0.5},{"budget":1},{"budget":2},{"budget":4},{"budget":5},{"budget":6},{"budget":7},{"budget":8},{"budget":10}] 25 | sparse-full: 26 | linscan: 27 | docker-tag: neurips23-sparse-linscan 28 | module: neurips23.sparse.linscan.linscan 29 | constructor: Linscan 30 | base-args: ["@metric"] 31 | run-groups: 32 | base: 33 | args: | 34 | [{}] 35 | query-args: | 36 | [{"budget":5},{"budget":15},{"budget":35},{"budget":50},{"budget":52.5},{"budget":55},{"budget":57.5},{"budget":60},{"budget":90},{"budget":500}] 37 | -------------------------------------------------------------------------------- /neurips23/sparse/linscan/linscan.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | 5 | from benchmark.algorithms.base import BaseANN 6 | from benchmark.datasets import DATASETS 7 | import pylinscan 8 | 9 | # a python wrapper for the linscan algorithm, implemented in rust 10 | # algorithm details: https://arxiv.org/abs/2301.10622 11 | # code: https://github.com/pinecone-io/research-bigann-linscan 12 | 13 | # Build parameters: none 14 | # Query parameters: budget (in ms) for computing all the scores 15 | class Linscan(BaseANN): 16 | def __init__(self, metric, index_params): 17 | assert metric == "ip" 18 | self.name = "linscan" 19 | self._index = pylinscan.LinscanIndex() 20 | self._budget = np.infty 21 | print("Linscan index initialized: " + str(self._index)) 22 | 23 | def fit(self, dataset): # e.g. dataset = "sparse-small" 24 | 25 | self.ds = DATASETS[dataset]() 26 | assert self.ds.data_type() == "sparse" 27 | 28 | N_VEC_LIMIT = 100000 # batch size 29 | it = self.ds.get_dataset_iterator(N_VEC_LIMIT) 30 | for d in it: 31 | for i in range(d.shape[0]): 32 | d1 = d.getrow(i) 33 | self._index.insert(dict(zip(d1.indices, d1.data))) 34 | 35 | print("Index status: " + str(self._index)) 36 | 37 | 38 | def load_index(self, dataset): 39 | return None 40 | 41 | def set_query_arguments(self, query_args): 42 | self._budget = query_args["budget"] 43 | 44 | def query(self, X, k): 45 | """Carry out a batch query for k-NN of query set X.""" 46 | nq = X.shape[0] 47 | 48 | # prepare the queries as a list of dicts 49 | self.queries = [] 50 | for i in range(nq): 51 | qc = X.getrow(i) 52 | q = dict(zip(qc.indices, qc.data)) 53 | self.queries.append(q) 54 | 55 | res = self._index.retrieve_parallel(self.queries, k, self._budget) 56 | self.I = np.array(res, dtype='int32') 57 | 58 | def get_results(self): 59 | return self.I 60 | -------------------------------------------------------------------------------- /neurips23/sparse/nle/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt-get update 4 | 5 | RUN apt-get install -y curl git build-essential libpcre3-dev cmake libtool automake libatlas3-base libatlas-base-dev libstdc++-12-dev patchelf ninja-build libtbb2 6 | 7 | RUN pip3 install scikit-build 8 | 9 | #RUN apt-get install -y curl git openjdk-11-jdk build-essential libpcre3-dev cmake libtool automake libatlas3-base libatlas-base-dev libstdc++-12-dev patchelf ninja-build libtbb2 10 | 11 | RUN git clone https://github.com/cadurosar/tttt.git /workspace/tttt && cd /workspace/tttt && bash build.sh 12 | 13 | WORKDIR /home/app -------------------------------------------------------------------------------- /neurips23/sparse/operating_points_private_queries_AzureD8lds_v5.csv: -------------------------------------------------------------------------------- 1 | qps 2 | dataset algorithm 3 | sparse-full NLE-Full 1314.194166 4 | cufe 97.860465 5 | linscan 95.098871 6 | nle 1312.961060 7 | pyanns 6499.652881 8 | shnsw 5078.449772 9 | sustech-whu 788.168885 10 | -------------------------------------------------------------------------------- /neurips23/sparse/operating_points_public_queries_AzureD8lds_v5.txt: -------------------------------------------------------------------------------- 1 | qps 2 | dataset algorithm 3 | sparse-full cufe 104.768194 4 | linscan 92.510615 5 | nle 2358.590429 6 | pyanns 8732.172708 7 | shnsw 7136.927865 8 | -------------------------------------------------------------------------------- /neurips23/sparse/pinecone_smips/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt-get update && apt-get install -y curl 4 | 5 | # download and install the whl file 6 | RUN git clone --branch sparse https://github.com/pinecone-io/bigann.git 7 | RUN pip install ./bigann/*.whl 8 | 9 | # verify that the build worked 10 | RUN python3 -c 'import py_pinecone_smips;' 11 | -------------------------------------------------------------------------------- /neurips23/sparse/pinecone_smips/README.md: -------------------------------------------------------------------------------- 1 | # Pinecone Sparse ANN algorithm 2 | 3 | Our algorithm for the Sparse track is based on our very own research [[1](https://dl.acm.org/doi/10.1145/3609797), [2](https://arxiv.org/abs/2309.09013)]. 4 | In particular, we cluster sparse vectors, build an inverted index that is organized using our [novel structure](https://arxiv.org/abs/2309.09013) 5 | and query the index by first solving the top cluster retrieval problem, then finding the top-k vectors within those clusters using an anytime retrieval algorithm over the inverted index. 6 | 7 | We also augment the index above with two additional lightweight components. 8 | First, we use a k-MIP graph (where every vector is connected to k other vectors that maximize inner product with it) 9 | to “expand” the set of retrieved top-k vectors from the last step. 10 | Second, we re-rank the expanded set using a compressed forward index. 11 | In effect, our final solution is a hybrid of IVF- and graph-based methods, 12 | where the IVF stage provides a set of entry nodes into the graph. -------------------------------------------------------------------------------- /neurips23/sparse/pinecone_smips/config.yaml: -------------------------------------------------------------------------------- 1 | sparse-full: 2 | pinecone_smips: 3 | docker-tag: neurips23-sparse-pinecone_smips 4 | module: neurips23.sparse.pinecone_smips.pinecone_smips 5 | constructor: PineconeSMIPS 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"num_threads": 8, "index_path": "data/pinecone/sparse/index/"}] 11 | query-args: | 12 | [ 13 | {"nprobe":8, "top_kprime":26, "ip_budget": 350}, 14 | {"nprobe":8, "top_kprime":28, "ip_budget": 325}, 15 | {"nprobe":8, "top_kprime":30, "ip_budget": 300}, 16 | {"nprobe":8, "top_kprime":30, "ip_budget": 280}, 17 | {"nprobe":8, "top_kprime":30, "ip_budget": 260}, 18 | {"nprobe":8, "top_kprime":30, "ip_budget": 240}, 19 | {"nprobe":8, "top_kprime":30, "ip_budget": 220}, 20 | {"nprobe":8, "top_kprime":30, "ip_budget": 200}, 21 | {"nprobe":8, "top_kprime":32, "ip_budget": 280}, 22 | {"nprobe":8, "top_kprime":34, "ip_budget": 260}] -------------------------------------------------------------------------------- /neurips23/sparse/plot_public_queries_AzureD8lds_v5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/sparse/plot_public_queries_AzureD8lds_v5.png -------------------------------------------------------------------------------- /neurips23/sparse/pyanns/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install python-is-python3 5 | RUN git clone https://github.com/veaaaab/pyanns.git --branch sparse --depth 1 6 | WORKDIR /home/app/pyanns 7 | RUN pip install -r requirements.txt 8 | RUN bash build.sh 9 | 10 | RUN python3 -c 'import pyanns' 11 | 12 | WORKDIR /home/app 13 | -------------------------------------------------------------------------------- /neurips23/sparse/pyanns/config.yaml: -------------------------------------------------------------------------------- 1 | sparse-small: 2 | pyanns: 3 | docker-tag: neurips23-sparse-pyanns 4 | module: neurips23.sparse.pyanns.pyanns 5 | constructor: Pyanns 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{}] 11 | query-args: | 12 | [ 13 | {"budget": 0.1, "ef" : 80} 14 | ] 15 | sparse-1M: 16 | pyanns: 17 | docker-tag: neurips23-sparse-pyanns 18 | module: neurips23.sparse.pyanns.pyanns 19 | constructor: Pyanns 20 | base-args: ["@metric"] 21 | run-groups: 22 | base: 23 | args: | 24 | [{}] 25 | query-args: | 26 | [ 27 | {"budget": 0.1, "ef" : 80} 28 | ] 29 | 30 | sparse-full: 31 | pyanns: 32 | docker-tag: neurips23-sparse-pyanns 33 | module: neurips23.sparse.pyanns.pyanns 34 | constructor: Pyanns 35 | base-args: ["@metric"] 36 | run-groups: 37 | base: 38 | args: | 39 | [{}] 40 | query-args: | 41 | [ 42 | {"budget": 0.08, "ef" : 50}, 43 | {"budget": 0.08, "ef" : 65}, 44 | {"budget": 0.08, "ef" : 70}, 45 | {"budget": 0.1, "ef" : 50}, 46 | {"budget": 0.1, "ef" : 55}, 47 | {"budget": 0.1, "ef" : 60}, 48 | {"budget": 0.1, "ef" : 65}, 49 | {"budget": 0.1, "ef" : 70}, 50 | {"budget": 0.1, "ef" : 75}, 51 | {"budget": 0.1, "ef" : 80} 52 | ] 53 | -------------------------------------------------------------------------------- /neurips23/sparse/pyanns/pyanns.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import gc 4 | import os 5 | import numpy as np 6 | 7 | from benchmark.algorithms.base import BaseANN 8 | from benchmark.datasets import DATASETS 9 | import pyanns 10 | 11 | class Pyanns(BaseANN): 12 | def __init__(self, metric, index_params): 13 | assert metric == "ip" 14 | self.name = "pyanns" 15 | 16 | def fit(self, dataset): # e.g. dataset = "sparse-small" 17 | 18 | self.ds = DATASETS[dataset]() 19 | assert self.ds.data_type() == "sparse" 20 | 21 | print("start add") 22 | path = 'hnsw_sparse.idx' 23 | 24 | self.searcher = pyanns.SparseGrapSearcher(self.ds.get_dataset_fn(), path) 25 | print("done add") 26 | 27 | def load_index(self, dataset): 28 | return None 29 | 30 | def set_query_arguments(self, query_args): 31 | self._budget = query_args["budget"] 32 | self.ef = query_args["ef"] 33 | self.searcher.set_ef(self.ef) 34 | 35 | def query(self, X, k): 36 | """Carry out a batch query for k-NN of query set X.""" 37 | nq = X.shape[0] 38 | self.res = self.searcher.search_batch(nq, X.indptr, X.indices, X.data, k, self._budget).reshape(-1, k) 39 | 40 | def get_results(self): 41 | return self.res 42 | 43 | def __str__(self): 44 | return f'pyanns_qdrop{self._budget}_ef{self.ef}' 45 | -------------------------------------------------------------------------------- /neurips23/sparse/run.py: -------------------------------------------------------------------------------- 1 | from benchmark.algorithms.base_runner import BaseRunner 2 | 3 | class SparseRunner(BaseRunner): 4 | pass -------------------------------------------------------------------------------- /neurips23/sparse/shnsw/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt-get install -y python-setuptools python-pip 5 | RUN pip3 install pybind11 numpy setuptools 6 | RUN git clone https://github.com/Leslie-Chung/SHNSW.git 7 | 8 | WORKDIR SHNSW 9 | RUN pip3 install . 10 | 11 | RUN python3 -c 'import sparse_hnswlib' 12 | WORKDIR /home/app -------------------------------------------------------------------------------- /neurips23/sparse/shnsw/config.yaml: -------------------------------------------------------------------------------- 1 | sparse-small: 2 | shnsw: 3 | docker-tag: neurips23-sparse-shnsw 4 | module: neurips23.sparse.shnsw.shnsw 5 | constructor: SparseHNSW 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"M": 16, "efConstruction": 200, "buildthreads": 8}] 11 | query-args: 12 | [[10, 20, 40, 70, 75, 80, 85, 90, 100]] 13 | sparse-1M: 14 | shnsw: 15 | docker-tag: neurips23-sparse-shnsw 16 | module: neurips23.sparse.shnsw.shnsw 17 | constructor: SparseHNSW 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{"M": 16, "efConstruction": 200, "buildthreads": 8}] 23 | query-args: 24 | [[10, 20, 40, 60, 70, 75, 80, 90, 100]] 25 | sparse-full: 26 | shnsw: 27 | docker-tag: neurips23-sparse-shnsw 28 | module: neurips23.sparse.shnsw.shnsw 29 | constructor: SparseHNSW 30 | base-args: ["@metric"] 31 | run-groups: 32 | base: 33 | args: | 34 | [{"M": 16, "efConstruction": 1000, "buildthreads": 8}] 35 | query-args: 36 | [[20, 40, 45, 48, 50, 52, 55, 57, 70, 75, 80, 85, 90]] -------------------------------------------------------------------------------- /neurips23/sparse/shnsw/shnsw.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sparse_hnswlib 3 | import numpy as np 4 | from neurips23.sparse.base import BaseSparseANN 5 | from benchmark.datasets import DATASETS, download_accelerated 6 | 7 | 8 | class SparseHNSW(BaseSparseANN): 9 | def __init__(self, metric, method_param): 10 | assert metric == "ip" 11 | self.method_param = method_param 12 | self.name = "sparse_hnswlib" 13 | self.efC = self.method_param["efConstruction"] 14 | self.M = self.method_param["M"] 15 | self.nt = self.method_param["buildthreads"] 16 | 17 | def fit(self, dataset): 18 | print("begin fit") 19 | ds = DATASETS[dataset]() 20 | 21 | p = sparse_hnswlib.Index(space="ip", dim=8) 22 | p.init_index( 23 | max_elements=ds.nb, 24 | csr_path=ds.get_dataset_fn(), 25 | ef_construction=self.efC, 26 | M=self.M, 27 | ) 28 | p.add_items(num_threads=self.nt) 29 | 30 | index_dir = os.path.join(os.getcwd(), "data", "indices", "sparse", "shnsw") 31 | index_path = os.path.join(index_dir, "shnsw-{}-{}-{}".format(dataset, self.efC, self.M)) 32 | if not os.path.exists(index_dir): 33 | os.makedirs(index_dir, mode=0o777, exist_ok=True) 34 | p.save_index(index_path) 35 | self.p = p 36 | 37 | def load_index(self, dataset): 38 | print("begin load") 39 | index_dir = os.path.join(os.getcwd(), "data", "indices", "sparse", "shnsw") 40 | index_path = os.path.join(index_dir, "shnsw-{}-{}-{}".format(dataset, self.efC, self.M)) 41 | if not os.path.exists(index_dir): 42 | return False 43 | if not os.path.exists(index_path): 44 | return False 45 | ds = DATASETS[dataset]() 46 | X = ds.get_dataset() 47 | self.p = sparse_hnswlib.Index(space="ip", dim=8) 48 | print("#########") 49 | self.p.load_index(index_path) 50 | print("!!!!!!!!") 51 | return True 52 | 53 | def set_query_arguments(self, parameters): 54 | print("开始 set") 55 | ef = parameters 56 | self.p.set_ef(ef) 57 | 58 | def query(self, X, topK): 59 | # N, _ = X.shape 60 | self.I, _ = self.p.knn_query(X.indptr, X.indices, X.data, k=topK, num_threads=self.nt) 61 | 62 | def get_results(self): 63 | return self.I -------------------------------------------------------------------------------- /neurips23/sparse/spmat/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip 5 | 6 | RUN pip3 install scipy 7 | 8 | -------------------------------------------------------------------------------- /neurips23/sparse/spmat/config.yaml: -------------------------------------------------------------------------------- 1 | sparse-small: 2 | spmat: 3 | docker-tag: neurips23-sparse-spmat 4 | module: neurips23.sparse.spmat.spmat 5 | constructor: SparseMatMul 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"threads": 8}] 11 | query-args: | 12 | [{"alpha":0.5}, {"alpha":0.6}, {"alpha":0.7}, {"alpha":0.8}, {"alpha":0.9}, {"alpha":0.92}, {"alpha":0.94}, {"alpha":0.96}, {"alpha":0.98}, {"alpha":1.0}] -------------------------------------------------------------------------------- /neurips23/sparse/sustech-whu/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y libssl-dev cmake 5 | 6 | WORKDIR /home/app 7 | RUN git clone https://github.com/lizzy-0323/SUSTech-WHU-Sparse.git --recursive 8 | WORKDIR /home/app/SUSTech-WHU-Sparse 9 | RUN cmake -DCMAKE_BUILD_TYPE=Release . && make -j4 10 | WORKDIR /home/app -------------------------------------------------------------------------------- /neurips23/sparse/sustech-whu/config.yaml: -------------------------------------------------------------------------------- 1 | sparse-small: 2 | sustech-whu: 3 | docker-tag: neurips23-sparse-sustech-whu 4 | module: neurips23.sparse.sustech-whu.SUSTech-WHU 5 | constructor: HnswSparse 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"M":16,"ef":200}] 11 | query-args: | 12 | [{"ef":38},{"ef":40},{"ef":42},{"ef":48},{"ef":50},{"ef":59},{"ef":65},{"ef":70},{"ef":80},{"ef":82}] 13 | sparse-1M: 14 | sustech-whu: 15 | docker-tag: neurips23-sparse-sustech-whu 16 | module: neurips23.sparse.sustech-whu.SUSTech-WHU 17 | constructor: HnswSparse 18 | base-args: ["@metric"] 19 | run-groups: 20 | base: 21 | args: | 22 | [{"M":20,"ef":200}] 23 | query-args: | 24 | [{"ef":48},{"ef":50},{"ef":52},{"ef":55},{"ef":58},{"ef":50},{"ef":62},{"ef":65},{"ef":75},{"ef":80}] 25 | sparse-full: 26 | sustech-whu: 27 | docker-tag: neurips23-sparse-sustech-whu 28 | module: neurips23.sparse.sustech-whu.SUSTech-WHU 29 | constructor: HnswSparse 30 | base-args: ["@metric"] 31 | run-groups: 32 | base: 33 | args: | 34 | [{"M":20,"ef":1200}] 35 | query-args: | 36 | [{"ef":35},{"ef":40},{"ef":43},{"ef":45},{"ef":48},{"ef":50},{"ef":55},{"ef":65},{"ef":75},{"ef":80}] 37 | -------------------------------------------------------------------------------- /neurips23/sparse/zilliz/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install python-is-python3 5 | RUN git clone https://github.com/hhy3/zilliz-bigann.git --branch sparse --depth 1 6 | RUN pip install ./zilliz-bigann/*.whl 7 | 8 | RUN python3 -c 'import sparse_searcher' 9 | 10 | -------------------------------------------------------------------------------- /neurips23/sparse/zilliz/config.yaml: -------------------------------------------------------------------------------- 1 | sparse-small: 2 | zilliz: 3 | docker-tag: neurips23-sparse-zilliz 4 | module: neurips23.sparse.zilliz.zilliz 5 | constructor: Zilliz 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{ 11 | "R": 48, 12 | "L": 500 13 | }] 14 | query-args: | 15 | [ 16 | {"budget": 0.1, "ef" : 80} 17 | ] 18 | sparse-1M: 19 | zilliz: 20 | docker-tag: neurips23-sparse-zilliz 21 | module: neurips23.sparse.zilliz.zilliz 22 | constructor: Zilliz 23 | base-args: ["@metric"] 24 | run-groups: 25 | base: 26 | args: | 27 | [{ 28 | "R": 48, 29 | "L": 500 30 | }] 31 | query-args: | 32 | [ 33 | {"budget": 0.1, "ef" : 80} 34 | ] 35 | 36 | sparse-full: 37 | zilliz: 38 | docker-tag: neurips23-sparse-zilliz 39 | module: neurips23.sparse.zilliz.zilliz 40 | constructor: Zilliz 41 | base-args: ["@metric"] 42 | run-groups: 43 | base: 44 | args: | 45 | [{ 46 | "R": 48, 47 | "L": 500 48 | }] 49 | query-args: | 50 | [ 51 | {"budget": 0.11, "ef" : 45}, 52 | {"budget": 0.11, "ef" : 55}, 53 | {"budget": 0.11, "ef" : 65}, 54 | {"budget": 0.11, "ef" : 70}, 55 | {"budget": 0.12, "ef" : 45}, 56 | {"budget": 0.12, "ef" : 50}, 57 | {"budget": 0.12, "ef" : 55}, 58 | {"budget": 0.12, "ef" : 60}, 59 | {"budget": 0.12, "ef" : 65}, 60 | {"budget": 0.12, "ef" : 70} 61 | ] 62 | -------------------------------------------------------------------------------- /neurips23/sparse/zilliz/zilliz.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import gc 4 | import os 5 | import numpy as np 6 | 7 | from benchmark.algorithms.base import BaseANN 8 | from benchmark.datasets import DATASETS 9 | import sparse_searcher 10 | 11 | class Zilliz(BaseANN): 12 | def __init__(self, metric, index_params): 13 | assert metric == "ip" 14 | self.name = "zilliz" 15 | self.R = index_params['R'] 16 | self.L = index_params['L'] 17 | 18 | def fit(self, dataset): # e.g. dataset = "sparse-small" 19 | 20 | self.ds = DATASETS[dataset]() 21 | assert self.ds.data_type() == "sparse" 22 | 23 | print("start add") 24 | path = f'zilliz_R{self.R}_L{self.L}.idx' 25 | 26 | self.searcher = sparse_searcher.SparseGrapSearcher(self.ds.get_dataset_fn(), path, self.R, self.L) 27 | print("done add") 28 | 29 | def load_index(self, dataset): 30 | return None 31 | 32 | def set_query_arguments(self, query_args): 33 | self._budget = query_args["budget"] 34 | self.ef = query_args["ef"] 35 | self.searcher.set_ef(self.ef) 36 | 37 | def query(self, X, k): 38 | """Carry out a batch query for k-NN of query set X.""" 39 | nq = X.shape[0] 40 | self.res = self.searcher.search_batch(nq, X.indptr, X.indices, X.data, k, self._budget).reshape(-1, k) 41 | 42 | def get_results(self): 43 | return self.res 44 | 45 | def __str__(self): 46 | return f'zilliz_qdrop{self._budget}_ef{self.ef}' 47 | 48 | -------------------------------------------------------------------------------- /neurips23/streaming/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/streaming/__init__.py -------------------------------------------------------------------------------- /neurips23/streaming/base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.typing as npt 3 | from benchmark.algorithms.base import BaseANN 4 | 5 | class BaseStreamingANN(BaseANN): 6 | def track(self): 7 | return "stream" 8 | 9 | def setup(self, dtype, max_pts, ndims) -> None: 10 | ''' 11 | Initialize the data structures for your algorithm 12 | dtype can be 'uint8', 'int8 'or 'float32' 13 | max_pts is an upper bound on non-deleted points that the index must support 14 | ndims is the size of the dataset 15 | ''' 16 | raise NotImplementedError 17 | 18 | def insert(self, X: np.array, ids: npt.NDArray[np.uint32]) -> None: 19 | ''' 20 | Implement this for your algorithm 21 | X is num_vectos * num_dims matrix 22 | ids is num_vectors-sized array which indicates ids for each vector 23 | ''' 24 | raise NotImplementedError 25 | 26 | def delete(self, ids: npt.NDArray[np.uint32]) -> None: 27 | ''' 28 | Implement this for your algorithm 29 | delete the vectors labelled with ids. 30 | ''' 31 | raise NotImplementedError 32 | 33 | 34 | def fit(self, dataset): 35 | ''' 36 | Do not override this method 37 | ''' 38 | raise NotImplementedError 39 | 40 | def load_index(self, dataset): 41 | """ 42 | Do not override 43 | """ 44 | return False 45 | 46 | def get_index_components(self, dataset): 47 | """ 48 | Does not apply to streaming indices 49 | """ 50 | raise NotImplementedError 51 | 52 | def index_files_to_store(self, dataset): 53 | """ 54 | Does not apply to streaming indices 55 | """ 56 | raise NotImplementedError 57 | -------------------------------------------------------------------------------- /neurips23/streaming/cufe/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | 9 | ADD "https://github.com/AbdelrahmanMohamed129/DiskANN/tree/farah" latest_commit 10 | RUN git clone https://github.com/AbdelrahmanMohamed129/DiskANN --branch farah 11 | WORKDIR /home/app/DiskANN 12 | RUN git pull 13 | RUN pip3 install virtualenv build 14 | RUN python3 -m build 15 | RUN pip install dist/diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl 16 | WORKDIR /home/app -------------------------------------------------------------------------------- /neurips23/streaming/diskann/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | 9 | RUN git clone https://github.com/microsoft/DiskANN.git --branch 0.5.0.rc3.post1 10 | WORKDIR /home/app/DiskANN 11 | RUN pip3 install virtualenv build 12 | RUN python3 -m build 13 | RUN pip install dist/diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl 14 | WORKDIR /home/app 15 | -------------------------------------------------------------------------------- /neurips23/streaming/hwtl_sdu_anns_stream/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | 9 | RUN git clone https://github.com/WPJiang/HWTL_SDU-ANNS-stream 10 | WORKDIR /home/app/HWTL_SDU-ANNS-stream 11 | RUN pip install diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl 12 | WORKDIR /home/app 13 | -------------------------------------------------------------------------------- /neurips23/streaming/hwtl_sdu_anns_stream/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | hwtl_sdu_anns_stream: 3 | docker-tag: neurips23-streaming-hwtl_sdu_anns_stream 4 | module: neurips23.streaming.hwtl_sdu_anns_stream.hwtl_sdu_anns_stream 5 | constructor: hwtl_sdu_anns_stream 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"R":32, "L":50, "insert_threads":16, "consolidate_threads":16}] 11 | query-args: | 12 | [{"Ls":50, "T":8}] 13 | 14 | 15 | msturing-30M-clustered: 16 | hwtl_sdu_anns_stream: 17 | docker-tag: neurips23-streaming-hwtl_sdu_anns_stream 18 | module: neurips23.streaming.hwtl_sdu_anns_stream.hwtl_sdu_anns_stream 19 | constructor: hwtl_sdu_anns_stream 20 | base-args: ["@metric"] 21 | run-groups: 22 | base: 23 | args: | 24 | [{"R":65, "L":70, "insert_threads":16, "consolidate_threads":16}] 25 | query-args: | 26 | [{"Ls":100, "T":16}] 27 | -------------------------------------------------------------------------------- /neurips23/streaming/pinecone/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | 9 | WORKDIR /home/app 10 | 11 | 12 | # copy and install the fast rust reranker package and the updated diskann 13 | RUN git clone --branch streaming2 https://github.com/pinecone-io/bigann.git 14 | RUN pip install ./bigann/*.whl 15 | 16 | # verify that the build worked 17 | RUN python3 -c 'import diskannpy;' 18 | RUN python3 -c 'import research_pinecone_reranking;' 19 | -------------------------------------------------------------------------------- /neurips23/streaming/pinecone/README.md: -------------------------------------------------------------------------------- 1 | # Pinecone Streaming ANN algorithm 2 | 3 | Our solution employs a two-stage retrieval strategy. 4 | In the initial phase, we use a variant of the DiskANN index for candidate generation to generate a set of k’ >> k 5 | results through an approximate scoring mechanism over uint8-quantized vectors, 6 | with accelerated SIMD-based distance calculation. 7 | The second-stage reranks the candidates using full-precision scoring to enhance the overall accuracy of retrieval. 8 | It is worth noting that the raw vectors used in the second stage are stored on SSD. 9 | As such, it is important to optimize the number of disk reads invoked by the reranking stage. -------------------------------------------------------------------------------- /neurips23/streaming/pinecone/config.yaml: -------------------------------------------------------------------------------- 1 | msturing-30M-clustered: 2 | pinecone: 3 | docker-tag: neurips23-streaming-pinecone 4 | module: neurips23.streaming.pinecone.pinecone 5 | constructor: pinecone 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"R":32, "L":100, "insert_threads":8, "consolidate_threads":8}] 11 | query-args: | 12 | [ 13 | {"Ls":300, "k_1":30, "T":8}, 14 | {"Ls":400, "k_1":30, "T":8}, 15 | {"Ls":500, "k_1":30, "T":8}, 16 | {"Ls":520, "k_1":30, "T":8}, 17 | {"Ls":540, "k_1":30, "T":8}, 18 | {"Ls":560, "k_1":30, "T":8} 19 | ] 20 | -------------------------------------------------------------------------------- /neurips23/streaming/puck/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt-get update 5 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip 6 | #swig 7 | RUN apt-get update && apt-get install -y swig cmake 8 | RUN pip3 install pybind11 numpy 9 | RUN cat /etc/ld.so.conf 10 | RUN ls /etc/ld.so.conf.d/ 11 | ##cmake 12 | RUN wget https://cmake.org/files/v3.22/cmake-3.22.0-linux-x86_64.sh 13 | RUN mkdir cmake && sh cmake-3.22.0-linux-x86_64.sh --skip-license --prefix=cmake 14 | ENV PATH /home/app/cmake/bin:$PATH 15 | 16 | #mkl 17 | RUN wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/adb8a02c-4ee7-4882-97d6-a524150da358/l_onemkl_p_2023.2.0.49497_offline.sh 18 | RUN sh l_onemkl_p_2023.2.0.49497_offline.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s 19 | 20 | RUN echo "/opt/intel/oneapi/mkl/latest/lib/intel64" > /etc/ld.so.conf.d/mkl.conf 21 | RUN ldconfig 22 | RUN touch /etc/profile.d/intel.sh 23 | RUN echo ". /opt/intel/oneapi/mkl/latest/env/vars.sh" >> /etc/profile.d/intel.sh 24 | RUN . /etc/profile.d/intel.sh 25 | 26 | ENV CMAKE_ARGS "-DMKLROOT=/opt/intel/oneapi/mkl/latest/ -DBLA_VENDOR=Intel10_64lp_seq -DBLA_STATIC=ON" 27 | #RUN git config --global http.sslVerify false 28 | 29 | RUN git clone -b streaming https://github.com/baidu/puck.git 30 | RUN cd puck && . /etc/profile.d/intel.sh && python3 setup.py install 31 | RUN python3 -c 'from puck import py_puck_api' 32 | -------------------------------------------------------------------------------- /neurips23/streaming/puck/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | puck: 3 | docker-tag: neurips23-streaming-puck 4 | module: neurips23.streaming.puck.puck 5 | constructor: Puck 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [ 11 | { "index_type": 1, "C":20, "F":20, "FN":10, "N":0, "filter_topk":200} 12 | ] 13 | query-args: | 14 | [ 15 | {"radius_rate":1.00 ,"search_coarse_count":5} 16 | ] 17 | msturing-30M-clustered: 18 | puck: 19 | docker-tag: neurips23-streaming-puck 20 | module: neurips23.streaming.puck.puck 21 | constructor: Puck 22 | base-args: ["@metric"] 23 | run-groups: 24 | base: 25 | args: | 26 | [ 27 | { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":1200}, 28 | { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":1500}, 29 | { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":1800}, 30 | { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":1900}, 31 | { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":2000}, 32 | { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":2100}, 33 | { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":2200}, 34 | { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":2300} 35 | ] 36 | query-args: | 37 | [ 38 | {"radius_rate":1.00 ,"search_coarse_count":200} 39 | ] 40 | 41 | 42 | -------------------------------------------------------------------------------- /neurips23/streaming/pyanns/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN add-apt-repository -y ppa:git-core/ppa 6 | RUN apt update 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 8 | 9 | RUN git clone https://github.com/veaaaab/DiskANN.git --branch bigann23_streaming 10 | WORKDIR /home/app/DiskANN 11 | RUN pip3 install virtualenv build 12 | RUN python3 -m build 13 | RUN pip install dist/diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl 14 | WORKDIR /home/app 15 | -------------------------------------------------------------------------------- /neurips23/streaming/pyanns/config.yaml: -------------------------------------------------------------------------------- 1 | random-xs: 2 | pyanns: 3 | docker-tag: neurips23-streaming-pyanns 4 | module: neurips23.streaming.pyanns.pyanns 5 | constructor: Pyanns 6 | base-args: ["@metric"] 7 | run-groups: 8 | base: 9 | args: | 10 | [{"R":32, "L":50, "insert_threads":16, "consolidate_threads":16}] 11 | query-args: | 12 | [{"Ls":50, "T":8}] 13 | 14 | msturing-30M-clustered: 15 | pyanns: 16 | docker-tag: neurips23-streaming-pyanns 17 | module: neurips23.streaming.pyanns.pyanns 18 | constructor: Pyanns 19 | base-args: ["@metric"] 20 | run-groups: 21 | base: 22 | args: | 23 | [{"R":32, "L":100, "insert_threads":8, "consolidate_threads":8}] 24 | query-args: | 25 | [{"Ls":300, "T":8}, 26 | {"Ls":400, "T":8}, 27 | {"Ls":500, "T":8}, 28 | {"Ls":600, "T":8} 29 | ] 30 | -------------------------------------------------------------------------------- /neurips23/streaming/scann/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neurips23 2 | 3 | RUN apt update 4 | RUN apt install -y software-properties-common 5 | RUN pip install scann 6 | 7 | WORKDIR /home/app 8 | -------------------------------------------------------------------------------- /neurips23/streaming/scann/config.yaml: -------------------------------------------------------------------------------- 1 | msturing-30M-clustered: 2 | scann: 3 | docker-tag: neurips23-streaming-scann 4 | module: neurips23.streaming.scann.scann 5 | constructor: Scann 6 | base-args: ["@metric"] 7 | run-groups: 8 | dynamic4M: 9 | args: | 10 | [{ "tree_size": 5000, "leaves_to_search": 700, "reorder": 317}] 11 | query-args: | 12 | [{}] 13 | -------------------------------------------------------------------------------- /preparation/neurips23/sparse_algorithms/basic_sparse_index.py: -------------------------------------------------------------------------------- 1 | from scipy.sparse import csr_matrix 2 | import numpy as np 3 | 4 | # given a vector x, returns another vector with the minimal number of largest elements of x, 5 | # s.t. their sum is at most a times the sum of the elements in x. 6 | # 7 | # The goal is to sparsify the vector further, 8 | # but at the same time try and preserve as much of the original vector as possible. 9 | def largest_elements(x, a): 10 | # Compute the sum of elements of x 11 | x_sum = np.sum(x) 12 | 13 | # Compute the indices and values of the largest elements of x 14 | ind = np.argsort(-x.data) 15 | cs = np.cumsum(x.data[ind] / x_sum) 16 | 17 | n_elements = min(sum(cs < a) + 1, x.nnz) # rounding errors sometimes results in n_elements > x.nnz 18 | 19 | new_ind = x.indices[ind[:n_elements]] 20 | new_data = x.data[ind[:n_elements]] 21 | return csr_matrix((new_data, new_ind, [0, n_elements]), shape=x.shape) 22 | 23 | 24 | # a basic sparse index. 25 | # methods: 26 | # 1. init: from a csr matrix of data. 27 | # 2. query a singe vector, with parameters: 28 | # - k (# of neighbors), 29 | # - alpha (fraction of the sum of the vector to maintain. alpha=1 is exact search). 30 | class BasicSparseIndex(object): 31 | def __init__(self, data_csr): 32 | self.data_csc = data_csr.tocsc() 33 | 34 | def query(self, q, k, alpha=1): # single query, assumes q is a row vector 35 | if alpha == 1: 36 | q2 = q.transpose() 37 | else: 38 | q2 = largest_elements(q, alpha).transpose() 39 | 40 | # perform (sparse) matrix-vector multiplication 41 | res = self.data_csc.dot(q2) 42 | 43 | if res.nnz <= k: # if there are less than k elements with nonzero score, simply return them 44 | return list(zip(res.indices, res.data)) 45 | 46 | # extract the top k from the res sparse array directly 47 | indices = np.argpartition(res.data, -(k + 1))[-k:] 48 | results = [] 49 | for index in indices: 50 | results.append((res.data[index], index)) 51 | results.sort(reverse=True) 52 | return [(res.indices[b], a) for a, b in results] 53 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ansicolors==1.1.8 2 | docker==2.6.1 3 | h5py==2.10.0 4 | matplotlib==2.1.0 5 | numpy==1.16.0 6 | pyyaml==5.4 7 | psutil==5.6.6 8 | scipy==1.0.0 9 | scikit-learn==0.19.1 10 | jinja2==2.11.3 11 | pandas 12 | -------------------------------------------------------------------------------- /requirements_py3.10.txt: -------------------------------------------------------------------------------- 1 | ansicolors==1.1.8 2 | docker==7.1.0 3 | h5py==3.10.0 4 | matplotlib==3.3.4 5 | numpy==1.24.2 6 | pyyaml==6.0 7 | psutil==5.9.4 8 | scipy==1.10.1 9 | scikit-learn 10 | jinja2==3.1.2 11 | pandas==2.0.0 12 | -------------------------------------------------------------------------------- /requirements_py38.txt: -------------------------------------------------------------------------------- 1 | ansicolors==1.1.8 2 | docker==2.6.1 3 | h5py==2.10.0 4 | matplotlib==3.3.4 5 | numpy==1.19.5 6 | pyyaml==5.4 7 | psutil==5.8.0 8 | scipy==1.5.4 9 | scikit-learn 10 | jinja2==2.11.3 11 | pandas==1.1.5 12 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | from benchmark.main import main 2 | from multiprocessing import freeze_support 3 | 4 | if __name__ == "__main__": 5 | freeze_support() 6 | main() 7 | -------------------------------------------------------------------------------- /run_algorithm.py: -------------------------------------------------------------------------------- 1 | from benchmark.runner import run_from_cmdline 2 | 3 | run_from_cmdline() 4 | -------------------------------------------------------------------------------- /setup_links.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | DATASET_HOME=/raid/workspace/dataset 4 | INDEX_HOME=/raid/workspace/anaruse/libcuann/models 5 | 6 | mkdir -p data/indices/t3/CuannsIvfpq 2> /dev/null 7 | 8 | # 9 | cd data 10 | if [ ! -e bigann ]; then 11 | ln -s ${DATASET_HOME}/bigann-1B bigann 12 | fi 13 | if [ ! -e deep1b ]; then 14 | ln -s ${DATASET_HOME}/deep-1B deep1b 15 | fi 16 | if [ ! -e MSSPACEV1B ]; then 17 | ln -s ${DATASET_HOME}/msspacev-1B MSSPACEV1B 18 | fi 19 | if [ ! -e MSTuringANNS ]; then 20 | ln -s ${DATASET_HOME}/msturing-1B MSTuringANNS 21 | fi 22 | if [ ! -e text2image1B ]; then 23 | ln -s ${DATASET_HOME}/text2image-1B text2image1B 24 | fi 25 | 26 | # 27 | cd indices/t3/CuannsIvfpq 28 | if [ ! -e bigann-1B.cluster_250000.pq_64.5_bit ]; then 29 | ln -s ${INDEX_HOME}/BIGANN-1B-uint8-1000000000x128.cluster_250000.pq_64.5_bit bigann-1B.cluster_250000.pq_64.5_bit 30 | fi 31 | if [ ! -e deep-1B.cluster_250000.pq_64.5_bit ]; then 32 | ln -s ${INDEX_HOME}/DEEP-1B-float32-1000000000x96.cluster_250000.pq_64.5_bit deep-1B.cluster_250000.pq_64.5_bit 33 | fi 34 | if [ ! -e msspacev-1B.cluster_500000.pq_64.5_bit ]; then 35 | ln -s ${INDEX_HOME}/MS-SPACEV-1B-int8-1000000000x100.cluster_500000.pq_64.5_bit msspacev-1B.cluster_500000.pq_64.5_bit 36 | fi 37 | if [ ! -e msturing-1B.cluster_250000.pq_64.5_bit ]; then 38 | ln -s ${INDEX_HOME}/MS-Turing-ANNS-1B-float32-1000000000x100.cluster_250000.pq_64.5_bit msturing-1B.cluster_250000.pq_64.5_bit 39 | fi 40 | if [ ! -e text2image-1B.cluster_500000.pq_72.8_bit ]; then 41 | ln -s ${INDEX_HOME}/T2I-1B-float32-1000000000x200.cluster_500000.pq_72.8_bit text2image-1B.cluster_500000.pq_72.8_bit 42 | fi 43 | 44 | -------------------------------------------------------------------------------- /tests/tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # You should run this script from the repo top-level directory 4 | 5 | PYTHONPATH="." python tests/recall_tests.py 6 | 7 | --------------------------------------------------------------------------------