├── .dockerignore
├── .github
    └── workflows
    │   ├── neurips21.yml
    │   └── neurips23.yml
├── .gitignore
├── LICENSE
├── README.md
├── algos-2021.yaml
├── benchmark
    ├── __init__.py
    ├── algorithms
    │   ├── base.py
    │   ├── base_runner.py
    │   ├── bbann.py
    │   ├── buddy_t1.py
    │   ├── cuanns_ivfpq.py
    │   ├── cuanns_multigpu.py
    │   ├── definitions.py
    │   ├── diskann-t2.py
    │   ├── diskann-v0_3.py
    │   ├── elastiknn.py
    │   ├── faiss_inmem.py
    │   ├── faiss_t1.py
    │   ├── faiss_t3.py
    │   ├── gemini.py
    │   ├── httpann.py
    │   ├── httpann_example.py
    │   ├── kota-t2.py
    │   ├── kst_t1.py
    │   ├── puck_t1.py
    │   └── team11.py
    ├── dataset_io.py
    ├── datasets.py
    ├── distances.py
    ├── main.py
    ├── plotting
    │   ├── __init__.py
    │   ├── eval_range_search.py
    │   ├── metrics.py
    │   ├── plot_variants.py
    │   └── utils.py
    ├── results.py
    ├── runner.py
    ├── sensors
    │   └── power_capture.py
    ├── streaming
    │   ├── __init__.py
    │   ├── compute_gt.py
    │   ├── download_gt.py
    │   └── load_runbook.py
    └── t3
    │   ├── __init__.py
    │   └── helper.py
├── create_dataset.py
├── data_export.py
├── dataset_preparation
    ├── FB_ssnpp_dataset.md
    ├── fb_ssnpp_images
    │   ├── IoU.png
    │   ├── distance_histogram.png
    │   ├── pr_compression.png
    │   └── result_stats.png
    ├── make_filtered_groundtruth.py
    ├── make_groundtruth.py
    ├── make_sparse_groundtruth.py
    ├── prepare_bigann.py
    ├── prepare_fb_ssnpp.py
    ├── prepare_yfcc100m.py
    └── sparse_dataset.md
├── eval
    └── show_operating_points.py
├── install.py
├── install
    ├── Dockerfile
    ├── Dockerfile.bbann
    ├── Dockerfile.diskann
    ├── Dockerfile.elastiknn
    ├── Dockerfile.faiss
    ├── Dockerfile.faissconda
    ├── Dockerfile.httpann_example
    ├── Dockerfile.kota
    ├── Dockerfile.kst_ann_t1
    ├── Dockerfile.pqbuddy
    ├── Dockerfile.puck
    └── requirements_conda.txt
├── logging.conf
├── neurips21
    ├── README.md
    ├── t1_t2
    │   ├── README.md
    │   └── results
    │   │   ├── T1
    │   │       ├── bigann-1B.png
    │   │       ├── deep-1B.png
    │   │       ├── msspacev-1B.png
    │   │       ├── msturing-1B.png
    │   │       ├── neurips21
    │   │       │   ├── bigann-1B.png
    │   │       │   ├── deep-1B.png
    │   │       │   ├── msspacev-1B.png
    │   │       │   ├── msturing-1B.png
    │   │       │   ├── ssnpp-1B.png
    │   │       │   ├── t1.csv
    │   │       │   └── text2image-1B.png
    │   │       ├── ssnpp-1B.png
    │   │       └── text2image-1B.png
    │   │   └── T2
    │   │       ├── bigann-1B-IO.png
    │   │       ├── bigann-1B.png
    │   │       ├── deep-1B-IO.png
    │   │       ├── deep-1B.png
    │   │       ├── msspacev-1B-IO.png
    │   │       ├── msspacev-1B.png
    │   │       ├── msturing-1B-IO.png
    │   │       ├── msturing-1B.png
    │   │       ├── neurips21
    │   │           ├── bigann-1B.png
    │   │           ├── deep-1B.png
    │   │           ├── msspacev-1B.png
    │   │           ├── msturing-1B.png
    │   │           ├── ssnpp-1B.png
    │   │           ├── t2.csv
    │   │           └── text2image-1B.png
    │   │       ├── ssnpp-1B-IO.png
    │   │       ├── ssnpp-1B.png
    │   │       ├── text2image-1B-IO.png
    │   │       └── text2image-1B.png
    ├── t3
    │   ├── LB_history
    │   │   ├── Dec.2.2021
    │   │   │   ├── LEADERBOARDS.md
    │   │   │   └── LEADERBOARDS_REJECT_ANOMALIES.md
    │   │   └── Nov.29.2021
    │   │   │   ├── LEADERBOARDS.md
    │   │   │   └── TASKS_ISSUES_RESOLUTIONS.md
    │   ├── LEADERBOARDS.md
    │   ├── LEADERBOARDS_PRIVATE.md
    │   ├── LEADERBOARDS_PRIVATE_REJECT_ANOMALIES.md
    │   ├── LEADERBOARDS_PUBLIC.md
    │   ├── LEADERBOARDS_PUBLIC_REJECT_ANOMALIES.md
    │   ├── LEADERBOARDS_REJECT_ANOMALIES.md
    │   ├── RANKING.md
    │   ├── README.md
    │   ├── TASKS_ISSUES_RESOLUTIONS.md
    │   ├── cuanns_ivfpq
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   └── algos.yaml
    │   ├── cuanns_multigpu
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   └── algos.yaml
    │   ├── eval_2021
    │   │   └── faiss_t3
    │   │   │   └── prun.sh
    │   ├── faiss_t3
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   ├── algos.yaml
    │   │   ├── baseline_plots
    │   │   │   ├── bigann-1B-r-vs-p.png
    │   │   │   ├── bigann-1B-r-vs-t.png
    │   │   │   ├── deep-1B-r-vs-p.png
    │   │   │   ├── deep-1B-r-vs-t.png
    │   │   │   ├── msspacev-1B-r-vs-p.png
    │   │   │   ├── msspacev-1B-r-vs-t.png
    │   │   │   ├── msturing-1B-r-vs-p.png
    │   │   │   ├── msturing-1B-r-vs-t.png
    │   │   │   ├── text2image-1B-r-vs-p.png
    │   │   │   └── text2image-1B-r-vs-t.png
    │   │   ├── cost
    │   │   │   ├── AdvantechSky6200.pdf
    │   │   │   ├── GPU.pdf
    │   │   │   ├── RAM.pdf
    │   │   │   └── SSD.pdf
    │   │   └── faiss-gpu_requirements.txt
    │   └── gemini
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── algos.yaml
    │   │   ├── buildidx
    │   │       ├── build_index.py
    │   │       ├── htest.py
    │   │       ├── run_bin_build_index.sh
    │   │       └── test.py
    │   │   ├── cost
    │   │       ├── AdvantechSky6200.pdf
    │   │       ├── GPU.pdf
    │   │       ├── RAM.pdf
    │   │       └── SSD.pdf
    │   │   ├── requirements.txt
    │   │   ├── run_bin_python.sh
    │   │   └── run_conda_python.sh
    ├── track1_baseline_faiss
    │   ├── README.md
    │   ├── __init__.py
    │   ├── baseline_faiss.py
    │   ├── baseline_faiss_filtered.py
    │   ├── parse_results.py
    │   ├── plots
    │   │   ├── bigann-1B.png
    │   │   ├── deep-1B.png
    │   │   ├── msspacev-1B.png
    │   │   ├── msturing-1B.png
    │   │   ├── ssnpp-1B.png
    │   │   └── text2image-1B.png
    │   ├── run_baselines.bash
    │   └── test_bow_id_selector.py
    └── track3_baseline_faiss
    │   ├── README.md
    │   ├── gpu_baseline_faiss.py
    │   └── plots
    │       └── T3_deep-1B.png
├── neurips23
    ├── Azure_D8lds_v5_table.md
    ├── Dockerfile
    ├── README.md
    ├── __init__.py
    ├── common.py
    ├── ec2_c6i.2xlarge_res.csv
    ├── ec2_c6i.2xlarge_table.md
    ├── filter
    │   ├── base.py
    │   ├── cufe
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   ├── bow_id_selector.swig
    │   │   ├── config.yaml
    │   │   └── faissCUFE.py
    │   ├── dhq
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── dhq.py
    │   ├── faiss
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   ├── bow_id_selector.swig
    │   │   ├── config.yaml
    │   │   └── faiss.py
    │   ├── faissplus
    │   │   ├── Dockerfile
    │   │   ├── bow_id_selector.swig
    │   │   ├── config.yaml
    │   │   └── faiss.py
    │   ├── fdufilterdiskann
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── fdufilterdiskann.py
    │   ├── hwtl_sdu_anns_filter
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── hwtl_sdu_anns_filter.py
    │   ├── install_neurips23.sh
    │   ├── operating_points_private_queries_AzureD8lds_v5.csv
    │   ├── operating_points_public_queries_AzureD8lds_v5.txt
    │   ├── parlayivf
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   ├── config.yaml
    │   │   └── parlayivf.py
    │   ├── pinecone
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   ├── config.yaml
    │   │   └── pinecone_index.py
    │   ├── plot_public_queries_AzureD8lds_v5.png
    │   ├── puck
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── puck.py
    │   ├── pyanns
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── pyanns.py
    │   ├── res_private_queries_AzureD8lds_v5.csv
    │   ├── res_public_queries_AzureD8lds_v5.csv
    │   ├── run.py
    │   ├── wm_filter
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   ├── config.yaml
    │   │   └── wm_filter.py
    │   └── zilliz
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── zilliz.py
    ├── leaderboard.md
    ├── notes
    │   ├── README.md
    │   └── streaming
    │   │   └── hnsw_result
    │   │       ├── hnsw_result.md
    │   │       ├── recall_over_steps_10_48_hnsw.png
    │   │       ├── recall_over_steps_10_48_hnsw_robust_prune.png
    │   │       └── recall_over_steps_10_48_hnsw_search_based_repair.png
    ├── ongoing_leaderboard
    │   ├── Azure_D8lds_v5_table.md
    │   ├── filter
    │   │   ├── operating_points_public_queries_AzureD8lds_v5.txt
    │   │   ├── res_public_queries_AzureD8lds_v5.csv
    │   │   └── yfcc-10M.png
    │   ├── leaderboard.md
    │   ├── ood
    │   │   ├── operating_points_public_queries_AzureD8lds_v5.txt
    │   │   ├── res_public_queries_AzureD8lds_v5.csv
    │   │   └── text2image-10M.png
    │   ├── sparse
    │   │   ├── operating_points_public_queries_AzureD8lds_v5.txt
    │   │   ├── res_public_queries_AzureD8lds_v5.csv
    │   │   └── sparse-full.png
    │   └── streaming
    │   │   └── res_final_runbook_AzureD8lds_v5.csv
    ├── ood
    │   ├── base.py
    │   ├── cufe
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── diskann-in-mem.py
    │   ├── diskann
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── diskann-in-mem.py
    │   ├── epsearch
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── diskann-in-mem-ep-hnsw.py
    │   ├── hanns
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   ├── config.yaml
    │   │   └── hanns.py
    │   ├── install_neurips23.sh
    │   ├── mysteryann-dif
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── mysteryann-dif.py
    │   ├── mysteryann
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── mysteryann.py
    │   ├── ngt
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── module.py
    │   ├── operating_points_public_queries_AzureD8lds_v5.txt
    │   ├── pinecone-ood
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   ├── config.yaml
    │   │   └── s2_index.py
    │   ├── plot_public_queries_AzureD8lds_v5.png
    │   ├── puck-fizz
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── puck.py
    │   ├── puck
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── puck.py
    │   ├── pyanns
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── pyanns.py
    │   ├── res_public_queries_AzureD8lds_v5.csv
    │   ├── run.py
    │   ├── scann
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── scann.py
    │   ├── sustech-ood
    │   │   ├── Dockerfile
    │   │   ├── SUSTech-OOD.py
    │   │   └── config.yaml
    │   ├── vamana
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── vamana.py
    │   └── zilliz
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── zilliz.py
    ├── runbooks
    │   ├── clustered_data_gen.py
    │   ├── clustered_replace_runbook.yaml
    │   ├── clustered_runbook.yaml
    │   ├── delete_runbook.yaml
    │   ├── final_runbook.yaml
    │   ├── final_runbook_gen.py
    │   ├── gen_expiration_time_runbook.py
    │   ├── gen_replace_runbooks.py
    │   ├── generate_msturing10m_runbooks.py
    │   ├── msmarco-100M_expirationtime_runbook.yaml
    │   ├── msturing-10M_slidingwindow_runbook.yaml
    │   ├── random_replace_runbook.yaml
    │   ├── simple_replace_runbook.yaml
    │   ├── simple_runbook.yaml
    │   ├── wikipedia-1M_expiration_time_replace_delete_runbook.yaml
    │   ├── wikipedia-1M_expiration_time_replace_only_runbook.yaml
    │   ├── wikipedia-1M_expiration_time_runbook.yaml
    │   ├── wikipedia-35M_expiration_time_replace_delete_runbook.yaml
    │   ├── wikipedia-35M_expiration_time_replace_only_runbook.yaml
    │   └── wikipedia-35M_expirationtime_runbook.yaml
    ├── sparse
    │   ├── base.py
    │   ├── cufe
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── linscan.py
    │   ├── install_neurips23.sh
    │   ├── linscan
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── linscan.py
    │   ├── nle
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   ├── interface.py
    │   │   └── nle.py
    │   ├── operating_points_private_queries_AzureD8lds_v5.csv
    │   ├── operating_points_public_queries_AzureD8lds_v5.txt
    │   ├── pinecone_smips
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   ├── config.yaml
    │   │   └── pinecone_smips.py
    │   ├── plot_public_queries_AzureD8lds_v5.png
    │   ├── pyanns
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── pyanns.py
    │   ├── res_private_queries_AzureD8lds_v5.csv
    │   ├── res_public_queries_AzureD8lds_v5.csv
    │   ├── run.py
    │   ├── shnsw
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── shnsw.py
    │   ├── spmat
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── spmat.py
    │   ├── sustech-whu
    │   │   ├── Dockerfile
    │   │   ├── SUSTech-WHU.py
    │   │   └── config.yaml
    │   └── zilliz
    │   │   ├── Dockerfile
    │   │   ├── config.yaml
    │   │   └── zilliz.py
    └── streaming
    │   ├── README.md
    │   ├── __init__.py
    │   ├── base.py
    │   ├── cufe
    │       ├── Dockerfile
    │       ├── config.yaml
    │       └── diskann-str.py
    │   ├── diskann
    │       ├── Dockerfile
    │       ├── config.yaml
    │       └── diskann-str.py
    │   ├── hwtl_sdu_anns_stream
    │       ├── Dockerfile
    │       ├── config.yaml
    │       └── hwtl_sdu_anns_stream.py
    │   ├── pinecone
    │       ├── Dockerfile
    │       ├── README.md
    │       ├── config.yaml
    │       └── pinecone.py
    │   ├── puck
    │       ├── Dockerfile
    │       ├── config.yaml
    │       └── puck.py
    │   ├── pyanns
    │       ├── Dockerfile
    │       ├── config.yaml
    │       └── pyanns.py
    │   ├── res_final_runbook_AzureD8lds_v5.csv
    │   ├── run.py
    │   └── scann
    │       ├── Dockerfile
    │       ├── config.yaml
    │       └── scann.py
├── plot.py
├── preparation
    ├── neurips21
    │   └── notebooks
    │   │   ├── check_1B_groundtruth.ipynb
    │   │   ├── compare_track1_1B_vs_2x500M.ipynb
    │   │   ├── eval_t2i_results.ipynb
    │   │   ├── find_suitable_nq.ipynb
    │   │   └── large_coarse_quantizer.ipynb
    └── neurips23
    │   ├── parse_filtered_results.ipynb
    │   └── sparse_algorithms
    │       ├── basic_sparse_index.py
    │       └── eval_sparse.py
├── requirements.txt
├── requirements_py3.10.txt
├── requirements_py38.txt
├── run.py
├── run_algorithm.py
├── setup_links.sh
└── tests
    ├── recall_tests.py
    └── tests.sh


/.dockerignore:
--------------------------------------------------------------------------------
1 | data
2 | results
3 | *.bvecs
4 | venv
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | *.pyc
 3 | *.o
 4 | 
 5 | data/*
 6 | *.class
 7 | 
 8 | *.log
 9 | 
10 | results/*
11 | !results/*.png
12 | 
13 | venv
14 | 
15 | .idea
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Martin Aumüller
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Big ANN Benchmarks
 2 | 
 3 | <http://big-ann-benchmarks.com/>
 4 | 
 5 | ## Datasets
 6 | 
 7 | See <http://big-ann-benchmarks.com/> for details on the different datasets.
 8 | 
 9 | ## NeurIPS 2023 competition: Practical Vector Search
10 | 
11 | Please see [this readme](./neurips23/README.md) for a guide to the NeurIPS 23 competition.
12 | 
13 | ## NeurIPS 2021 competition: Billion-Scale ANN 
14 | 
15 | Please see [this readme](./neurips21/README.md) for a guide of running billion-scale benchmarks and a summary of the results from the NeurIPS 21 competition.
16 | 
17 | # Credits
18 | 
19 | This project is a version of [ann-benchmarks](https://github.com/erikbern/ann-benchmarks) by [Erik Bernhardsson](https://erikbern.com/) and contributors targeting evaluation of algorithms and hardware for newer billion-scale datasets and practical variants of nearest neighbor search.
20 | 


--------------------------------------------------------------------------------
/benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 


--------------------------------------------------------------------------------
/benchmark/algorithms/base_runner.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import time
 3 | 
 4 | class BaseRunner():
 5 |     def build(algo, dataset):
 6 |         t0 = time.time()
 7 |         algo.fit(dataset)
 8 |         return time.time() - t0
 9 |     
10 |     def run_task(algo, ds, distance, count, run_count, search_type, private_query, runbook=None):
11 |         best_search_time = float('inf')
12 |         search_times = []
13 | 
14 |         if not private_query:
15 |             X = ds.get_queries()
16 |         else:
17 |             X = ds.get_private_queries()
18 |         
19 |         print(fr"Got {X.shape[0]} queries")
20 | 
21 |         for i in range(run_count):
22 |             print('Run %d/%d...' % (i + 1, run_count))
23 | 
24 |             start = time.time()
25 |             if search_type == "knn":
26 |                 algo.query(X, count)
27 |                 total = (time.time() - start)
28 |                 results = algo.get_results()
29 |                 assert results.shape[0] == X.shape[0]
30 |             elif search_type == "range":
31 |                 algo.range_query(X, count)
32 |                 total = (time.time() - start)
33 |                 results = algo.get_range_results()
34 |             else:
35 |                 raise NotImplementedError(f"Search type {search_type} not available.")
36 | 
37 |             search_time = total
38 |             if search_time < best_search_time:
39 |                 best_search_time = search_time
40 |                 best_results = results
41 | 
42 |             search_times.append( search_time )
43 | 
44 |         attrs = {
45 |             "best_search_time": best_search_time,
46 |             "name": str(algo),
47 |             "run_count": run_count,
48 |             "distance": distance,
49 |             "type": search_type,
50 |             "count": int(count),
51 |             "search_times": search_times,
52 |             "private_queries": private_query, 
53 |         }
54 |         additional = algo.get_additional()
55 |         for k in additional:
56 |             attrs[k] = additional[k]
57 |         return (attrs, best_results)
58 |         


--------------------------------------------------------------------------------
/benchmark/algorithms/elastiknn.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/benchmark/algorithms/elastiknn.py


--------------------------------------------------------------------------------
/benchmark/algorithms/faiss_inmem.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | #import sys
 3 | #sys.path.append("install/lib-faiss")  # noqa
 4 | import numpy
 5 | import sklearn.preprocessing
 6 | import ctypes
 7 | import faiss
 8 | import os
 9 | from benchmark.algorithms.base import BaseANN
10 | from benchmark.datasets import DATASETS
11 | 
12 | 
13 | class Faiss(BaseANN):
14 |     def query(self, X, n):
15 |         if self._metric == 'angular':
16 |             X /= numpy.linalg.norm(X)
17 |         self.res = self.index.search(X.astype(numpy.float32), n)
18 | 
19 |     def get_results(self):
20 |         D, I = self.res
21 |         return I
22 | #        res = []
23 | #        for i in range(len(D)):
24 | #            r = []
25 | #            for l, d in zip(L[i], D[i]):
26 | #                if l != -1:
27 | #                    r.append(l)
28 | #            res.append(r)
29 | #        return res
30 | 
31 | 
32 | class FaissIVF(Faiss):
33 |     def __init__(self, metric, n_list):
34 |         self._n_list = n_list
35 |         self._metric = metric
36 | 
37 |     def index_name(self, name):
38 |         return f"data/ivf_{name}_{self._n_list}_{self._metric}"
39 | 
40 |     def fit(self, dataset):
41 |         X = DATASETS[dataset]().get_dataset() # assumes it fits into memory
42 | 
43 |         if self._metric == 'angular':
44 |             X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
45 | 
46 |         if X.dtype != numpy.float32:
47 |             X = X.astype(numpy.float32)
48 | 
49 |         self.quantizer = faiss.IndexFlatL2(X.shape[1])
50 |         index = faiss.IndexIVFFlat(
51 |             self.quantizer, X.shape[1], self._n_list, faiss.METRIC_L2)
52 |         index.train(X)
53 |         index.add(X)
54 |         faiss.write_index(index, self.index_name(dataset))
55 |         self.index = index
56 | 
57 |     def load_index(self, dataset):
58 |         if not os.path.exists(self.index_name(dataset)):
59 |             return False
60 | 
61 |         self.index = faiss.read_index(self.index_name(dataset))
62 |         return True
63 | 
64 |     def set_query_arguments(self, n_probe):
65 |         faiss.cvar.indexIVF_stats.reset()
66 |         self._n_probe = n_probe
67 |         self.index.nprobe = self._n_probe
68 | 
69 |     def get_additional(self):
70 |         return {"dist_comps": faiss.cvar.indexIVF_stats.ndis +      # noqa
71 |                 faiss.cvar.indexIVF_stats.nq * self._n_list}
72 | 
73 |     def __str__(self):
74 |         return 'FaissIVF(n_list=%d, n_probe=%d)' % (self._n_list,
75 |                                                     self._n_probe)
76 | 


--------------------------------------------------------------------------------
/benchmark/distances.py:
--------------------------------------------------------------------------------
 1 | from scipy.spatial.distance import pdist as scipy_pdist
 2 | import itertools
 3 | import numpy as np
 4 | 
 5 | def pdist(a, b, metric):
 6 |     return scipy_pdist([a, b], metric=metric)[0]
 7 | 
 8 | metrics = {
 9 |     'euclidean': {
10 |         'distance': lambda a, b: pdist(a, b, "euclidean"),
11 |     },
12 |     'angular': {
13 |         'distance': lambda a, b: pdist(a, b, "cosine"),
14 |     }
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/benchmark/plotting/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from benchmark.plotting import *
3 | 


--------------------------------------------------------------------------------
/benchmark/plotting/plot_variants.py:
--------------------------------------------------------------------------------
 1 | from benchmark.plotting.metrics import all_metrics as metrics
 2 | 
 3 | all_plot_variants = {
 4 |     "recall/time": ("k-nn", "qps"),
 5 |     "recall/buildtime": ("k-nn", "build"),
 6 |     "recall/indexsize": ("k-nn", "indexsize"),
 7 |     "recall/distcomps": ("k-nn", "distcomps"),
 8 |     "recall/candidates": ("k-nn", "candidates"),
 9 |     "recall/qpssize": ("k-nn", "queriessize"),
10 | }
11 | 


--------------------------------------------------------------------------------
/benchmark/streaming/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 


--------------------------------------------------------------------------------
/benchmark/streaming/download_gt.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | from benchmark.datasets import DATASETS
 5 | from benchmark.dataset_io import download
 6 | from benchmark.streaming.load_runbook import load_runbook, get_gt_url
 7 | from benchmark.streaming.compute_gt import gt_dir
 8 | 
 9 | 
10 | def main():
11 |     parser = argparse.ArgumentParser(
12 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
13 | 
14 |     parser.add_argument(
15 |         '--dataset',
16 |         choices=DATASETS.keys(),
17 |         help=f'Dataset to benchmark on.',
18 |         required=True)
19 |     parser.add_argument(
20 |         '--runbook_file',
21 |         help='Runbook yaml file path'
22 |     )
23 |     args = parser.parse_args()
24 | 
25 |     ds = DATASETS[args.dataset]()
26 |     print(args.runbook_file)
27 |     max_pts, runbook = load_runbook(args.dataset, ds.nb, args.runbook_file)
28 |     gt_url = get_gt_url(args.dataset, args.runbook_file)
29 | 
30 |     download_dir = gt_dir(ds, args.runbook_file)
31 |     os.makedirs(download_dir, exist_ok=True)
32 |     for step, entry in enumerate(runbook):
33 |         if entry['operation'] == 'search':
34 |             step_filename = 'step' + str(step+1) + '.gt100'
35 |             step_url = gt_url + '/' + step_filename
36 |             download(step_url, os.path.join(download_dir, step_filename))
37 | 
38 | if __name__ == '__main__':
39 |     main()
40 | 


--------------------------------------------------------------------------------
/benchmark/t3/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 


--------------------------------------------------------------------------------
/create_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from benchmark.datasets import DATASETS
 3 | 
 4 | if __name__ == "__main__":
 5 |     parser = argparse.ArgumentParser()
 6 |     parser.add_argument(
 7 |         '--dataset',
 8 |         choices=DATASETS.keys(),
 9 |         required=True)
10 |     parser.add_argument(
11 |         '--skip-data',
12 |         action='store_true',
13 |         help='skip downloading base vectors')
14 |     args = parser.parse_args()
15 |     ds = DATASETS[args.dataset]()
16 |     ds.prepare(True if args.skip_data else False)
17 | 


--------------------------------------------------------------------------------
/dataset_preparation/fb_ssnpp_images/IoU.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/dataset_preparation/fb_ssnpp_images/IoU.png


--------------------------------------------------------------------------------
/dataset_preparation/fb_ssnpp_images/distance_histogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/dataset_preparation/fb_ssnpp_images/distance_histogram.png


--------------------------------------------------------------------------------
/dataset_preparation/fb_ssnpp_images/pr_compression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/dataset_preparation/fb_ssnpp_images/pr_compression.png


--------------------------------------------------------------------------------
/dataset_preparation/fb_ssnpp_images/result_stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/dataset_preparation/fb_ssnpp_images/result_stats.png


--------------------------------------------------------------------------------
/dataset_preparation/prepare_bigann.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | Prepare the bigann dataset in the format expected for the 1B ANN competition
 4 | 
 5 | """
 6 | 
 7 | import sys
 8 | 
 9 | from faiss.contrib import datasets as faiss_datasets
10 | import numpy as np
11 | 
12 | 
13 | # source data is in the native Faiss format
14 | ds = faiss_datasets.DatasetBigANN()
15 | 
16 | stage = int(sys.argv[1])
17 | 
18 | outdir = "/scratch/matthijs/bigann_competiton_format/"
19 | 
20 | def u8bin_write(x, fname):
21 |     assert x.dtype == 'uint8'
22 |     f = open(fname, "wb")
23 |     n, d = x.shape
24 |     np.array([n, d], dtype='uint32').tofile(f)
25 |     x.tofile(f)
26 | 
27 | def ibin_write(x, fname):
28 |     assert x.dtype == 'int32'
29 |     f = open(fname, "wb")
30 |     n, d = x.shape
31 |     np.array([n, d], dtype='uint32').tofile(f)
32 |     x.tofile(f)
33 | 
34 | 
35 | if stage == 1:  # convert query format
36 |     # xq = ds.get_queries()
37 |     xq = faiss_datasets.bvecs_mmap(ds.basedir + 'bigann_query.bvecs')
38 |     xq = np.ascontiguousarray(xq)
39 |     u8bin_write(xq, outdir + "query.public.10K.u8bin")
40 | 
41 | elif stage == 2:  # sample new queries from train set
42 |     secretkey = int(sys.argv[2])
43 |     rs = np.random.RandomState(secretkey)
44 |     xt = faiss_datasets.bvecs_mmap(ds.basedir + 'bigann_learn.bvecs')
45 |     print("size", xt.shape)
46 |     selection = rs.choice(len(xt), 10000, replace=False)
47 |     u8bin_write(xt[selection], outdir + f"query.private.{secretkey}.10K.u8bin")
48 | 
49 | elif stage == 3:  # convert 10M subset
50 | 
51 |     xb = faiss_datasets.bvecs_mmap(ds.basedir + 'bigann_base.bvecs')
52 |     u8bin_write(xb[:10**7], outdir + "base.10M.u8bin")
53 | 
54 | elif stage == 4:  # write the 1B vectors...
55 | 
56 |     xb = faiss_datasets.bvecs_mmap(ds.basedir + 'bigann_base.bvecs')
57 |     bs = 10**6
58 |     f = open(outdir + "base.1B.u8bin", "wb")
59 |     np.array(xb.shape, dtype='uint32').tofile(f)
60 |     for i in range(1000):
61 |         print(i, end="\r", flush=True)
62 |         xb[i * bs : (i + 1) * bs].tofile(f)
63 | 
64 | elif stage == 5: # convert the training vectors
65 | 
66 |     xb = faiss_datasets.bvecs_mmap(ds.basedir + 'bigann_learn.bvecs')
67 |     bs = 10**6
68 |     f = open(outdir + "learn.100M.u8bin", "wb")
69 |     np.array(xb.shape, dtype='uint32').tofile(f)
70 |     for i in range(100):
71 |         print(i, end="\r", flush=True)
72 |         xb[i * bs : (i + 1) * bs].tofile(f)
73 | 
74 | elif stage == 6:
75 |     # convert ground-truth files for public queries
76 |     gt = ds.get_groundtruth()
77 |     ibin_write(gt, outdir + "GT.public.1B.ibin")
78 | 
79 |     ds10M = faiss_datasets.DatasetBigANN(10)
80 |     gt = ds.get_groundtruth()
81 |     ibin_write(gt, outdir + "GT.public.10M.ibin")
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/dataset_preparation/prepare_fb_ssnpp.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | Prepare the FB SSN++ dataset in the format expected for the 1B ANN competition
 4 | 
 5 | The datafiles have already been produced on the prod side:
 6 | 
 7 | - FB_ssnpp_database.u8bin: the 1B database vectors, deduplicated, already
 8 |   in correct format
 9 | 
10 | - 1M_queries_no_bursts_compressed.npy: a little less than 1M query vectors,
11 |    selected not to be bursty
12 | 
13 | """
14 | import sys
15 | import numpy as np
16 | 
17 | secret_suffix = sys.argv[1]
18 | 
19 | basedir = "/checkpoint/matthijs/billion-scale-ann-benchmarks/FB_ssnpp/"
20 | 
21 | def u8bin_write(x, fname):
22 |     assert x.dtype == 'uint8'
23 |     f = open(fname, "wb")
24 |     n, d = x.shape
25 |     np.array([n, d], dtype='uint32').tofile(f)
26 |     x.tofile(f)
27 | 
28 | xqall_fp32 = np.load(basedir + "1M_queries_no_bursts_compressed.npy")
29 | xqall = xqall_fp32.astype('uint8')
30 | assert np.all(xqall == xqall_fp32)
31 | u8bin_write(
32 |     xqall[:10**5],
33 |     basedir + "FB_ssnpp_public_queries.u8bin"
34 | )
35 | u8bin_write(
36 |     xqall[10**5: 2 * 10**5],
37 |     basedir + "FB_ssnpp_heldout_queries_" + secret_suffix + ".u8bin"
38 | )
39 | 
40 | 


--------------------------------------------------------------------------------
/eval/show_operating_points.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import pandas as pd
 3 | 
 4 | if __name__ == "__main__":
 5 |     parser = argparse.ArgumentParser()
 6 |     parser.add_argument(
 7 |         '--algorithm',
 8 |         required=False)
 9 |     parser.add_argument(
10 |         '--metric',
11 |         choices=['qps', 'recall'],
12 |         default='recall')
13 |     parser.add_argument(
14 |         '--threshold',
15 |         default=0.9,
16 |         help='threshold',
17 |         type=float)
18 |     parser.add_argument(
19 |         '--csv',
20 |         metavar='CSV',
21 |         help='input csv')
22 |     parser.add_argument(
23 |         '--dataset',
24 |         required=False)
25 | 
26 |     args = parser.parse_args()
27 |     df = pd.read_csv(args.csv)
28 | 
29 |     if args.algorithm:
30 |         df = df[df.algorithm == args.algorithm]
31 |     if args.dataset:
32 |         df = df[df.dataset == args.dataset]
33 | 
34 |     if args.metric == "qps":
35 |         print(df[(df.qps > args.threshold)].groupby(['dataset', 'algorithm']).max()[['recall/ap']])
36 |     elif args.metric == "recall":
37 |         print(df[(df['recall/ap'] > args.threshold)].groupby(['dataset', 'algorithm']).max()[['qps']].sort_values(by=["dataset", "qps"], ascending=False))
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/install/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | RUN apt-get update && apt-get install -y python3-numpy python3-scipy python3-pip build-essential git axel wget
 4 | RUN wget https://aka.ms/downloadazcopy-v10-linux && mv downloadazcopy-v10-linux azcopy.tgz && tar xzf azcopy.tgz --transform 's!^[^/]\+\($\|/\)!azcopy_folder\1!' 
 5 | RUN cp azcopy_folder/azcopy /usr/bin
 6 | 
 7 | RUN pip3 install -U pip
 8 | 
 9 | WORKDIR /home/app
10 | COPY requirements_py3.10.txt run_algorithm.py ./
11 | RUN pip3 install -r requirements_py3.10.txt
12 | 
13 | ENTRYPOINT ["python3", "-u", "run_algorithm.py"]
14 | 


--------------------------------------------------------------------------------
/install/Dockerfile.bbann:
--------------------------------------------------------------------------------
 1 | FROM billion-scale-benchmark
 2 | 
 3 | RUN apt-get update
 4 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev libboost-dev python3 python3-setuptools python3-pip libomp-dev
 5 | RUN pip3 install pybind11 numpy
 6 | 
 7 | RUN git clone --single-branch --branch master https://github.com/zilliztech/BBAnn.git
 8 | 
 9 | RUN mkdir -p BBAnn/build
10 | RUN cd BBAnn/build && cmake -DCMAKE_BUILD_TYPE=Release ..
11 | RUN cd BBAnn/build && make -j
12 | RUN cd BBAnn/python && pip install -e .
13 | RUN python3 -c 'import bbannpy'
14 | 


--------------------------------------------------------------------------------
/install/Dockerfile.diskann:
--------------------------------------------------------------------------------
 1 | FROM billion-scale-benchmark
 2 | 
 3 | RUN apt-get update
 4 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip
 5 | RUN pip3 install pybind11 numpy
 6 | 
 7 | RUN cd /tmp && wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
 8 | RUN cd /tmp && apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
 9 | RUN cd /tmp && rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
10 | RUN cd /tmp && sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list'
11 | RUN apt-get update
12 | RUN apt-get install -y intel-mkl-64bit-2020.0-088
13 | 
14 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/libblas.so     libblas.so-x86_64-linux-gnu      /opt/intel/mkl/lib/intel64/libmkl_rt.so 150
15 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/libblas.so.3   libblas.so.3-x86_64-linux-gnu    /opt/intel/mkl/lib/intel64/libmkl_rt.so 150
16 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/liblapack.so   liblapack.so-x86_64-linux-gnu    /opt/intel/mkl/lib/intel64/libmkl_rt.so 150
17 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/liblapack.so.3 liblapack.so.3-x86_64-linux-gnu  /opt/intel/mkl/lib/intel64/libmkl_rt.so 150
18 | 
19 | RUN echo "/opt/intel/lib/intel64"     >  /etc/ld.so.conf.d/mkl.conf
20 | RUN echo "/opt/intel/mkl/lib/intel64" >> /etc/ld.so.conf.d/mkl.conf
21 | RUN ldconfig
22 | RUN echo "MKL_THREADING_LAYER=GNU" >> /etc/environment
23 | 
24 | RUN git clone --single-branch --branch python_bindings_diskann https://github.com/microsoft/diskann
25 | RUN mkdir -p diskann/build
26 | RUN cd diskann/build && cmake -DCMAKE_BUILD_TYPE=Release ..
27 | RUN cd diskann/build && make -j
28 | RUN cd diskann/python && pip install -e .
29 | RUN python3 -c 'import diskannpy'
30 | 


--------------------------------------------------------------------------------
/install/Dockerfile.elastiknn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/install/Dockerfile.elastiknn


--------------------------------------------------------------------------------
/install/Dockerfile.faiss:
--------------------------------------------------------------------------------
 1 | FROM billion-scale-benchmark
 2 | 
 3 | RUN apt-get update && apt-get install -y libopenblas-base libopenblas-dev libpython3-dev swig python3-dev libssl-dev wget
 4 | RUN wget https://github.com/Kitware/CMake/releases/download/v3.18.3/cmake-3.18.3-Linux-x86_64.sh && mkdir cmake && sh cmake-3.18.3-Linux-x86_64.sh --skip-license --prefix=cmake && rm cmake-3.18.3-Linux-x86_64.sh
 5 | RUN git clone https://github.com/facebookresearch/faiss lib-faiss
 6 | RUN cd lib-faiss && ../cmake/bin/cmake -DFAISS_OPT_LEVEL=avx2 -DCMAKE_BUILD_TYPE=Release -DFAISS_ENABLE_GPU=OFF -DPython_EXECUTABLE=/usr/bin/python3 -B build .
 7 | RUN cd lib-faiss && make -C build -j4
 8 | RUN cd lib-faiss && cd build && cd faiss && cd python && python3 setup.py install && cd && rm -rf cmake
 9 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2)'
10 | 


--------------------------------------------------------------------------------
/install/Dockerfile.faissconda:
--------------------------------------------------------------------------------
 1 | FROM billion-scale-benchmark 
 2 | 
 3 | RUN apt update && apt install -y wget
 4 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-1-Linux-x86_64.sh
 5 | RUN bash Anaconda3-2023.03-1-Linux-x86_64.sh -b
 6 | 
 7 | ENV PATH /root/anaconda3/bin:$PATH
 8 | 
 9 | RUN conda install -c pytorch faiss-cpu
10 | COPY install/requirements_conda.txt ./
11 | # conda doesn't like some of our packages, use pip 
12 | RUN python3 -m pip install -r requirements_conda.txt 
13 | 
14 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2)'
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/install/Dockerfile.httpann_example:
--------------------------------------------------------------------------------
1 | FROM billion-scale-benchmark
2 | 
3 | RUN python3 -m pip install flask==2.0.1
4 | 


--------------------------------------------------------------------------------
/install/Dockerfile.kota:
--------------------------------------------------------------------------------
 1 | FROM billion-scale-benchmark
 2 | 
 3 | RUN apt-get update
 4 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip
 5 | RUN pip3 install pybind11 numpy
 6 | 
 7 | RUN cd /tmp && wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
 8 | RUN cd /tmp && apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
 9 | RUN cd /tmp && rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
10 | RUN cd /tmp && sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list'
11 | RUN apt-get update
12 | RUN apt-get install -y intel-mkl-64bit-2020.0-088
13 | 
14 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/libblas.so     libblas.so-x86_64-linux-gnu      /opt/intel/mkl/lib/intel64/libmkl_rt.so 150
15 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/libblas.so.3   libblas.so.3-x86_64-linux-gnu    /opt/intel/mkl/lib/intel64/libmkl_rt.so 150
16 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/liblapack.so   liblapack.so-x86_64-linux-gnu    /opt/intel/mkl/lib/intel64/libmkl_rt.so 150
17 | RUN update-alternatives --install /usr/lib/x86_64-linux-gnu/liblapack.so.3 liblapack.so.3-x86_64-linux-gnu  /opt/intel/mkl/lib/intel64/libmkl_rt.so 150
18 | 
19 | RUN echo "/opt/intel/lib/intel64"     >  /etc/ld.so.conf.d/mkl.conf
20 | RUN echo "/opt/intel/mkl/lib/intel64" >> /etc/ld.so.conf.d/mkl.conf
21 | RUN ldconfig
22 | RUN echo "MKL_THREADING_LAYER=GNU" >> /etc/environment
23 | 
24 | RUN git clone --branch python_binding https://github.com/LLLjun/DiskANN_NICS
25 | RUN mkdir -p DiskANN_NICS/build
26 | RUN cd DiskANN_NICS/build && cmake -DCMAKE_BUILD_TYPE=Release ..
27 | RUN cd DiskANN_NICS/build && make -j
28 | RUN cd DiskANN_NICS/python && pip install -e .
29 | RUN python3 -c 'import diskannpy'
30 | 


--------------------------------------------------------------------------------
/install/Dockerfile.kst_ann_t1:
--------------------------------------------------------------------------------
 1 | FROM billion-scale-benchmark
 2 | 
 3 | RUN apt-get update && apt-get install -y libopenblas-base libopenblas-dev libpython3-dev swig python3-dev libssl-dev wget
 4 | RUN wget https://github.com/Kitware/CMake/releases/download/v3.18.3/cmake-3.18.3-Linux-x86_64.sh && mkdir cmake && sh cmake-3.18.3-Linux-x86_64.sh --skip-license --prefix=cmake && rm cmake-3.18.3-Linux-x86_64.sh
 5 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2021.05-Linux-x86_64.sh
 6 | RUN bash Anaconda3-2021.05-Linux-x86_64.sh -b
 7 | 
 8 | ENV PATH /root/anaconda3/bin:$PATH
 9 | RUN git clone https://github.com/NJU-yasuo/faiss_t faiss
10 | RUN cd faiss && bash build-lib.sh 
11 | RUN cd faiss/_build/faiss/python && python3 setup.py install && cd && rm -rf cmake
12 | COPY install/requirements_conda.txt ./
13 | # conda doesn't like some of our packages, use pip 
14 | RUN python3 -m pip install -r requirements_conda.txt 
15 | 
16 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2)'
17 | 


--------------------------------------------------------------------------------
/install/Dockerfile.pqbuddy:
--------------------------------------------------------------------------------
 1 | FROM billion-scale-benchmark 
 2 | 
 3 | RUN apt update && apt install -y wget
 4 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2020.11-Linux-x86_64.sh 
 5 | RUN bash Anaconda3-2020.11-Linux-x86_64.sh -b
 6 | 
 7 | ENV PATH /root/anaconda3/bin:$PATH
 8 | 
 9 | RUN conda install -c pytorch faiss-cpu
10 | COPY install/requirements_conda.txt ./
11 | # conda doesn't like some of our packages, use pip 
12 | RUN python3 -m pip install -r requirements_conda.txt 
13 | 
14 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2)'
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/install/Dockerfile.puck:
--------------------------------------------------------------------------------
1 | FROM billion-scale-benchmark
2 | 
3 | RUN wget https://gips-test-bucket-0-gz.gz.bcebos.com/similar/puck_to_python.tar.gz
4 | RUN tar zxvf puck_to_python.tar.gz
5 | RUN mv lib puck
6 | RUN python3 -c 'from puck import py_puck_api'
7 | 


--------------------------------------------------------------------------------
/install/requirements_conda.txt:
--------------------------------------------------------------------------------
 1 | ansicolors
 2 | docker-py
 3 | h5py==3.8.0
 4 | matplotlib
 5 | numpy
 6 | pyyaml
 7 | psutil
 8 | scipy
 9 | scikit-learn
10 | jinja2
11 | pandas
12 | 


--------------------------------------------------------------------------------
/logging.conf:
--------------------------------------------------------------------------------
 1 | [loggers]
 2 | keys=root,annb
 3 | 
 4 | [handlers]
 5 | keys=consoleHandler,fileHandler
 6 | 
 7 | [formatters]
 8 | keys=simpleFormatter
 9 | 
10 | [formatter_simpleFormatter]
11 | format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
12 | datefmt=
13 | 
14 | [handler_consoleHandler]
15 | class=StreamHandler
16 | level=INFO
17 | formatter=simpleFormatter
18 | args=(sys.stdout,)
19 | 
20 | [handler_fileHandler]
21 | class=FileHandler
22 | level=INFO
23 | formatter=simpleFormatter
24 | args=('annb.log','w')
25 | 
26 | [logger_root]
27 | level=WARN
28 | handlers=consoleHandler
29 | 
30 | [logger_annb]
31 | level=INFO
32 | handlers=consoleHandler,fileHandler
33 | qualname=annb
34 | propagate=0
35 | 


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T1/bigann-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/bigann-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T1/deep-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/deep-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T1/msspacev-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/msspacev-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T1/msturing-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/msturing-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T1/neurips21/bigann-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/neurips21/bigann-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T1/neurips21/deep-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/neurips21/deep-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T1/neurips21/msspacev-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/neurips21/msspacev-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T1/neurips21/msturing-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/neurips21/msturing-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T1/neurips21/ssnpp-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/neurips21/ssnpp-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T1/neurips21/text2image-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/neurips21/text2image-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T1/ssnpp-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/ssnpp-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T1/text2image-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T1/text2image-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/bigann-1B-IO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/bigann-1B-IO.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/bigann-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/bigann-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/deep-1B-IO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/deep-1B-IO.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/deep-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/deep-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/msspacev-1B-IO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/msspacev-1B-IO.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/msspacev-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/msspacev-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/msturing-1B-IO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/msturing-1B-IO.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/msturing-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/msturing-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/neurips21/bigann-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/neurips21/bigann-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/neurips21/deep-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/neurips21/deep-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/neurips21/msspacev-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/neurips21/msspacev-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/neurips21/msturing-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/neurips21/msturing-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/neurips21/ssnpp-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/neurips21/ssnpp-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/neurips21/text2image-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/neurips21/text2image-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/ssnpp-1B-IO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/ssnpp-1B-IO.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/ssnpp-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/ssnpp-1B.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/text2image-1B-IO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/text2image-1B-IO.png


--------------------------------------------------------------------------------
/neurips21/t1_t2/results/T2/text2image-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t1_t2/results/T2/text2image-1B.png


--------------------------------------------------------------------------------
/neurips21/t3/LEADERBOARDS.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # BigANN Challenge T3 Leaderboards and Winners
 3 | 
 4 | We rank participants based on 4 different but inter-related benchmarks:
 5 | * One based on recall/average precision
 6 | * One based on throughput
 7 | * One based on power consumption
 8 | * One based on hardware cost
 9 | 
10 | We maintain two sets of leaderboards that rank participants on all benchmarks:
11 | * [Leaderboards based on a public query dataset](LEADERBOARDS_PUBLIC.md) in which participants had access during the competition.
12 | * [Leaderboards based on a private query dataet](LEADERBOARDS_PRIVATE.md) in which submissions are currently being evaluated.
13 | 
14 | Please consult the main [T3 track README](README.md) for more details about benchmarks and ranking methodology.
15 | 
16 | ## Public Dataset Leaderboards And Winners
17 | 
18 | The leaderboards and rankings on the public dataset set lives [here](LEADERBOARDS_PUBLIC.md).
19 | 
20 | We would like to congratulate all the winners of this part of the competition, teams from Intel and NVidia:
21 | * Sourabh Dongaonkar (Intel Corporate)
22 | * Mariano Tepper (Intel Labs)
23 | * Yong Wong (NVidia)
24 | * Akira Naruse (NVidia)
25 | * Jingrong Zhang (NVidia)
26 | * Mahesh Doijade (NVidia)
27 | 
28 | We are in the process of resolving the remaining issues and tasks.  
29 | 
30 | Upon completion, we will make the rankings and winners official.  
31 | 
32 | Please revisit this page again soon for more updates.
33 | 
34 | ## Private Dataset Leaderboards Status
35 | 
36 | The status of the leaderboards and rankings on the private dataset lives [here](LEADERBOARDS_PRIVATE.md).
37 | 
38 | All submissions are currently being evaluated using the private data sets so the scores (and rankings) could change as evaluation proceeds  
39 | 
40 | Please revisit this page again soon for more updates.
41 | 
42 | 


--------------------------------------------------------------------------------
/neurips21/t3/cuanns_ivfpq/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.4.2-devel-ubuntu20.04
 2 | 
 3 | RUN apt-get update
 4 | RUN apt-get install --no-install-recommends -y build-essential wget git python3-dev python3-pip
 5 | RUN pip3 install -U pip
 6 | 
 7 | RUN mkdir /home/soft
 8 | RUN cd /home/soft && wget -nv "https://drive.google.com/uc?export=download&id=1IybRBhZPQzMqQ2HRX9KSL7woftO8WZdV" -O pkg-libcuann-0.0.7.tgz && tar xfz pkg-libcuann-0.0.7.tgz
 9 | RUN pip3 install /home/soft/libcuann/python/dist/cuann-0.0.7-cp38-cp38-linux_x86_64.whl
10 | RUN rm /home/soft/pkg-libcuann-0.0.7.tgz
11 | RUN rm -rf /home/soft/libcuann/python
12 | 
13 | WORKDIR /home/app
14 | COPY requirements_py38.txt run_algorithm.py ./
15 | RUN pip3 install -r requirements_py38.txt
16 | RUN pip3 install 'Cython>=0.29.*' 'numpy>=1.19.*' cupy-cuda114
17 | 
18 | ENV LD_LIBRARY_PATH="/home/soft/libcuann/lib:${LD_LIBRARY_PATH}"
19 | ENV NVIDIA_TF32_OVERRIDE=1
20 | # ENV OMP_NUM_THREADS=32
21 | 
22 | ENTRYPOINT ["python3", "run_algorithm.py"]
23 | 


--------------------------------------------------------------------------------
/neurips21/t3/cuanns_ivfpq/README.md:
--------------------------------------------------------------------------------
 1 | # T3: Cuanns IVFPQ (Single GPU)
 2 | 
 3 | ## Hardware Configuration And Cost
 4 | 
 5 | |Part           |Model                                      |No. |Unit Price                          |Total Price|
 6 | |---------------|-------------------------------------------|----|------------------------------------|-----------|
 7 | |System         |[NVIDIA DGX A100 640GB]                    |   1|                                    |           |
 8 | |Total          |                                           |   1|                                    |           |
 9 | 
10 | Details of the system can be found at https://www.nvidia.com/en-us/data-center/dgx-a100/. However, no price information is provided. Therefore, we will not participate in the leaderboards based on hardware cost.
11 | 
12 | ## Hardware Access
13 | 
14 | SSH access to the system will be provided to competition organizers.
15 | 
16 | ## No Source Code Declarations
17 | 
18 | This submission requires the following software components where source-code is not available and/or not part of the source-code for this submission:
19 | * NVIDIA docker container runtime ( https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html )
20 | * NVIDIA CUDA libraries (including library for fast ANNS) and host drivers
21 | 
22 | ## Hardware Setup And Software Installation
23 | 
24 | ## Prerequisites
25 | 
26 | * Linux Ubuntu 20.04
27 | * CUDA 11.4 or above
28 | * The NVidia docker container runtime ( https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html )
29 | * This cloned project repository
30 | 
31 | ### Setup and Installation Instructions
32 | 
33 | Note that all the subsequent commands must be run in the top-level directory of this repo on your machine.
34 | 
35 | First build the cuanns_ivfpq docker container:
36 | ```
37 | python3 install.py --dockerfile t3/cuanns_ivfpq/Dockerfile
38 | ```
39 | Setup links to downloaded dataset and pre-built index files.
40 | ```
41 | ./setup_links.sh
42 | ```
43 | Otherwise, download datasets for competitions supported by cuanns_ivfpq.
44 | ```
45 | python3 create_dataset.py --dataset [bigann-1B|deep-1B|msturing-1B|msspacev-1B|text2image-1B]
46 | ```
47 | 
48 | ## Run Competition Algorithm
49 | 
50 | You can run this algorithm on the competition dataset using the run.py script.
51 | ```
52 | python3 run.py --t3 --definitions t3/cuanns_ivfpq/algos.yaml --dataset [bigann-1B|deep-1B|msturing-1B|msspacev-1B|text2image-1B]
53 | ```
54 | 
55 | #### Known Issues
56 | 
57 | The program to build the index file from the competition dataset is not yet implemented in this repo. When the program is ready, we will describe how to build the index.
58 | 


--------------------------------------------------------------------------------
/neurips21/t3/cuanns_multigpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.4.2-devel-ubuntu20.04
 2 | 
 3 | RUN apt-get update
 4 | RUN apt-get install --no-install-recommends -y build-essential wget git python3-dev python3-pip libopenblas-dev
 5 | RUN pip3 install -U pip
 6 | 
 7 | RUN mkdir /home/soft
 8 | RUN cd /home/soft && wget -nv https://github.com/Kitware/CMake/releases/download/v3.21.3/cmake-3.21.3-linux-x86_64.tar.gz && tar xzf cmake-3.21.3-linux-x86_64.tar.gz
 9 | RUN cd /home/soft && wget -nv https://github.com/facebookresearch/faiss/archive/refs/tags/v1.7.1.tar.gz -O faiss-1.7.1.tar.gz && tar xzf faiss-1.7.1.tar.gz
10 | RUN cd /home/soft/faiss-1.7.1 \
11 |   && ../cmake-3.21.3-linux-x86_64/bin/cmake -B build\
12 |   -DFAISS_ENABLE_GPU=ON \
13 |   -DFAISS_ENABLE_PYTHON=OFF -DBUILD_TESTING=OFF \
14 |   -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release \
15 |   -DCMAKE_CUDA_ARCHITECTURES="80" \
16 |   -DCMAKE_INSTALL_PREFIX=/home/soft/faiss . \
17 |   && cd build && make -j 8 && make install
18 | 
19 | 
20 | 
21 | WORKDIR /home/app
22 | COPY requirements_py38.txt run_algorithm.py ./
23 | RUN pip3 install -r requirements_py38.txt
24 | 
25 | RUN cd /home/soft/ && wget -nv "https://drive.google.com/uc?export=download&id=1jU4aLrihX6cPPzOB9oRQrrSYZkyUGoQ5" -O pycuann.tar.gz && tar xf pycuann.tar.gz pycuann.so
26 | 
27 | ENV LD_LIBRARY_PATH="/home/soft:${LD_LIBRARY_PATH}"
28 | ENV PATH="/home/soft:${PATH}"
29 | ENTRYPOINT ["python3", "run_algorithm.py"]
30 | 


--------------------------------------------------------------------------------
/neurips21/t3/cuanns_multigpu/README.md:
--------------------------------------------------------------------------------
 1 | # T3: Cuanns MultiGPU
 2 | 
 3 | ## Hardware Configuration And Cost
 4 | 
 5 | |Part           |Model                                      |No. |Unit Price                          |Total Price|
 6 | |---------------|-------------------------------------------|----|------------------------------------|-----------|
 7 | |System         |[NVIDIA DGX A100 640GB]                    |   1|                                    |           |
 8 | |Total          |                                           |   1|                                    |           |
 9 | 
10 | Details of the system can be found at https://www.nvidia.com/en-us/data-center/dgx-a100/. However, no price information is provided. Therefore, we will not participate in the leaderboards based on hardware cost.
11 | 
12 | ## Hardware Access
13 | 
14 | SSH access to the system will be provided to competition organizers.
15 | 
16 | ## No Source Code Declarations
17 | 
18 | This submission requires the following software components where source-code is not available and/or not part of the source-code for this submission:
19 | * NVIDIA docker container runtime ( https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html )
20 | * NVIDIA CUDA libraries and host drivers
21 | * the algorithm implementation is provided as a library
22 | 
23 | ## Hardware Setup And Software Installation
24 | 
25 | ### Prerequisites
26 | 
27 | * Linux Ubuntu 20.04
28 | * CUDA 11.4 or above
29 | * The NVidia docker container runtime ( https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html )
30 | * This cloned project repository
31 | 
32 | ### Setup and Installation Instructions
33 | 
34 | Note that all the subsequent commands must be run in the top-level directory of this repo on your machine.
35 | 
36 | First build the docker container:
37 | ```
38 | python install.py --dockerfile t3/cuanns_multigpu/Dockerfile
39 | ```
40 | Download datasets for competitions supported by cuanns_multigpu. Note that even if you do not build the index files, you still need to download the datasets.
41 | ```
42 | python create_dataset.py --dataset [bigann-1B|deep-1B|msturing-1B|msspacev-1B]
43 | ```
44 | Donwload index files. The download instructions will be updated as soon as the location of the index files is determined.
45 | ```
46 | (to be updated)
47 | ```
48 | 
49 | ## Run Competition Algorithm
50 | 
51 | You can run this algorithm on the competition dataset using the run.py script.
52 | ```
53 | python run.py --t3 --definitions t3/cuanns_multigpu/algos.yaml --dataset [bigann-1B|deep-1B|msturing-1B|msspacev-1B]
54 | ```
55 | 
56 | #### Known Issues
57 | 
58 | The program to build the index file from the competition dataset is not yet implemented in this repo. When the program is ready, we will describe how to build the index.
59 | 


--------------------------------------------------------------------------------
/neurips21/t3/eval_2021/faiss_t3/prun.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #python run.py --t3 --private-query --definitions t3/faiss_t3/algos.yaml --dataset deep-1B --nodocker
 4 | python run.py --t3 --private-query --definitions t3/faiss_t3/algos.yaml --dataset bigann-1B --nodocker
 5 | #python run.py --t3 --private-query --definitions t3/faiss_t3/algos.yaml --dataset text2image-1B --nodocker
 6 | #python run.py --t3 --private-query --definitions t3/faiss_t3/algos.yaml --dataset msturing-1B --nodocker
 7 | #python run.py --t3 --private-query --definitions t3/faiss_t3/algos.yaml --dataset msspacev-1B --nodocker
 8 | #python run.py --t3 --private-query --definitions t3/faiss_t3/algos.yaml --dataset ssnpp-1B --nodocker
 9 | 
10 | #python run.py --definitions t3/faiss_t3/algos.yaml --dataset deep-1B --t3 --private-query --nodocker --power-capture 192.168.99.110:1237:10
11 | #python run.py --definitions t3/faiss_t3/algos.yaml --dataset bigann-1B --t3 --private-query --nodocker --power-capture 192.168.99.110:1237:10
12 | #python run.py --definitions t3/faiss_t3/algos.yaml --dataset text2image-1B --t3 --private-query --nodocker --power-capture 192.168.99.110:1237:10
13 | #python run.py --definitions t3/faiss_t3/algos.yaml --dataset msturing-1B --t3 --private-query --nodocker --power-capture 192.168.99.110:1237:10
14 | #python run.py --definitions t3/faiss_t3/algos.yaml --dataset msspacev-1B --t3 --private-query --nodocker --power-capture 192.168.99.110:1237:10
15 | #python run.py --definitions t3/faiss_t3/algos.yaml --dataset ssnpp-1B --t3 --private-query --nodocker --power-capture 192.168.99.110:1237:10
16 | 


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/Dockerfile:
--------------------------------------------------------------------------------
 1 |  
 2 | #FROM nvidia/cuda:11.0-devel-ubuntu18.04
 3 | FROM nvidia/cuda:11.0.3-cudnn8-devel-ubuntu18.04
 4 | 
 5 | # CONDA
 6 | 
 7 | ENV PATH="/root/miniconda3/bin:${PATH}"
 8 | ARG PATH="/root/miniconda3/bin:${PATH}"
 9 | 
10 | RUN apt-get update  
11 | RUN apt-get install -y wget 
12 | RUN apt-get install -y build-essential 
13 | RUN apt-get install -y git 
14 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py38_23.1.0-1-Linux-x86_64.sh
15 | RUN bash Miniconda3-py38_23.1.0-1-Linux-x86_64.sh -b
16 | RUN rm -f Miniconda3-py38_23.1.0-1-Linux-x86_64.sh
17 | RUN conda --version 
18 | RUN conda create -n py38 python=3.8 -y
19 | RUN echo "source activate env" > ~/.bashrc
20 | RUN ls /opt/
21 | ENV PATH /opt/conda/envs/py38/bin:$PATH
22 | RUN conda config --set remote_read_timeout_secs 300
23 | RUN conda install -c pytorch faiss-gpu 
24 | #RUN conda install cudatoolkit=11.0
25 | RUN conda --version && which conda && which python && which pip3
26 | 
27 | # BIGANN
28 | 
29 | RUN pip3 install -U pip
30 | 
31 | WORKDIR /home/app
32 | COPY t3/faiss_t3/faiss-gpu_requirements.txt run_algorithm.py ./
33 | RUN pip3 install -r faiss-gpu_requirements.txt
34 | 
35 | ENTRYPOINT ["python3", "run_algorithm.py"]
36 | 
37 | ## For the following RUN command to work, we need to initiate docker build
38 | ## with a gpu device request much like what's done with docker eval run.
39 | # RUN python3 -c 'import faiss; print("gpus=", faiss.get_num_gpus())' 
40 | 
41 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2)'
42 | 


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/baseline_plots/bigann-1B-r-vs-p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/bigann-1B-r-vs-p.png


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/baseline_plots/bigann-1B-r-vs-t.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/bigann-1B-r-vs-t.png


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/baseline_plots/deep-1B-r-vs-p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/deep-1B-r-vs-p.png


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/baseline_plots/deep-1B-r-vs-t.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/deep-1B-r-vs-t.png


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/baseline_plots/msspacev-1B-r-vs-p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/msspacev-1B-r-vs-p.png


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/baseline_plots/msspacev-1B-r-vs-t.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/msspacev-1B-r-vs-t.png


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/baseline_plots/msturing-1B-r-vs-p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/msturing-1B-r-vs-p.png


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/baseline_plots/msturing-1B-r-vs-t.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/msturing-1B-r-vs-t.png


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/baseline_plots/text2image-1B-r-vs-p.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/text2image-1B-r-vs-p.png


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/baseline_plots/text2image-1B-r-vs-t.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/baseline_plots/text2image-1B-r-vs-t.png


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/cost/AdvantechSky6200.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/cost/AdvantechSky6200.pdf


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/cost/GPU.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/cost/GPU.pdf


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/cost/RAM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/cost/RAM.pdf


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/cost/SSD.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/faiss_t3/cost/SSD.pdf


--------------------------------------------------------------------------------
/neurips21/t3/faiss_t3/faiss-gpu_requirements.txt:
--------------------------------------------------------------------------------
 1 | ansicolors==1.1.8
 2 | docker==2.6.1
 3 | h5py==2.10.0
 4 | matplotlib
 5 | numpy
 6 | pyyaml==5.1
 7 | psutil==5.6.6
 8 | scipy
 9 | scikit-learn
10 | jinja2==2.10.1
11 | pandas
12 | 


--------------------------------------------------------------------------------
/neurips21/t3/gemini/.gitignore:
--------------------------------------------------------------------------------
1 | 1b
2 | centroids_2m
3 | records_weights
4 | 


--------------------------------------------------------------------------------
/neurips21/t3/gemini/algos.yaml:
--------------------------------------------------------------------------------
 1 | deep-1B:               
 2 |     gemini-t3:
 3 |           docker-tag: billion-scale-benchmark-faissconda
 4 |           module: benchmark.algorithms.gemini
 5 |           constructor: GeminiT3
 6 |           base-args: ["@metric"]
 7 |           run-groups:
 8 |             base:
 9 |               args: |
10 |                   [
11 |                           "{'nbits': 512, 'qbits':768, 'nlist':2097152, 'nt':83886080, 'num_apuc':3, 'f16':True }"
12 |                   ]
13 |               query-args: |
14 |                   [
15 |                     "{'nprobe': 800, 'nprobe_refine': 480, 'hamming_k': 5000, 'average_clstr_size_factor': 0.0}",
16 |                     "{'nprobe': 800, 'nprobe_refine': 480, 'hamming_k': 2000, 'average_clstr_size_factor': 0.0}",
17 |                     "{'nprobe': 800, 'nprobe_refine': 480, 'hamming_k': 1000, 'average_clstr_size_factor': 0.0}",
18 |                     "{'nprobe': 800, 'nprobe_refine': 480, 'hamming_k': 500, 'average_clstr_size_factor': 0.0}",
19 |                     "{'nprobe': 800, 'nprobe_refine': 480, 'hamming_k': 250, 'average_clstr_size_factor': 0.0}",
20 |                     "{'nprobe': 800, 'nprobe_refine': 400, 'hamming_k': 1000, 'average_clstr_size_factor': 0.0}",
21 |                     "{'nprobe': 800, 'nprobe_refine': 300, 'hamming_k': 1000, 'average_clstr_size_factor': 0.0}",
22 |                     "{'nprobe': 700, 'nprobe_refine': 380, 'hamming_k': 1000, 'average_clstr_size_factor': 0.0}",
23 |                     "{'nprobe': 600, 'nprobe_refine': 280, 'hamming_k': 1000, 'average_clstr_size_factor': 0.0}",
24 |                     "{'nprobe': 500, 'nprobe_refine': 180, 'hamming_k': 1000, 'average_clstr_size_factor': 0.0}"
25 |                   ]
26 | 


--------------------------------------------------------------------------------
/neurips21/t3/gemini/buildidx/run_bin_build_index.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | PATH=/usr/bin:$PATH python3 build_index.py
4 | 


--------------------------------------------------------------------------------
/neurips21/t3/gemini/buildidx/test.py:
--------------------------------------------------------------------------------
 1 | import faiss
 2 | import numpy as np
 3 | 
 4 | qd=768
 5 | d=512
 6 | nlist=5
 7 | 
 8 | nb=1000
 9 | db = np.empty((nb, d // 8), dtype='uint8')
10 | 
11 | quantizer = faiss.IndexBinaryFlat( d )
12 | index = faiss.IndexBinaryIVF( quantizer, d, nlist )
13 | index.train(db)
14 | index.add(db)
15 | 
16 | def convert_index_to_cluster_and_ids_lists(index, nbits):
17 |     cluster_list = np.empty(index.invlists.nlist, dtype=object)
18 |     ids_list = np.empty(index.invlists.nlist, dtype=object)
19 | 
20 |     zero_count = 0
21 | 
22 |     for i in range(index.invlists.nlist):
23 |         list_sz = index.invlists.list_size(i)
24 | 
25 |         if list_sz == 0:
26 |             zero_count = zero_count + 1
27 |             ids = None
28 |         else:
29 |             ids_ptr = index.invlists.get_ids(i)
30 |             ids = np.array(faiss.rev_swig_ptr(ids_ptr, list_sz)).reshape(-1, 1).astype(np.uint32) # GSL requires a 2d arrray for some reason
31 |             index.invlists.release_ids(ids_ptr)
32 |             #GW index.invlists.release_ids(list_sz, ids_ptr)
33 |         ids_list[i] = ids
34 | 
35 |         codes_ptr = index.invlists.get_codes(i)
36 |         codes = np.array(faiss.rev_swig_ptr(codes_ptr, list_sz * nbits // 8)).reshape(list_sz, nbits//8)
37 |         index.invlists.release_codes(codes_ptr)
38 |         #GW index.invlists.release_codes(list_sz * nbits // 8, codes_ptr)
39 |         cluster_list[i] = codes
40 | 
41 |     print('zero_count =', zero_count)
42 |     return cluster_list, ids_list
43 | 
44 | cls, ids = convert_index_to_cluster_and_ids_lists(index,d)
45 | print("cls", cls)
46 | print("ids", ids)
47 | 
48 | # Querying the index
49 | nq = 10
50 | queries = np.empty((nq, d // 8), dtype='uint8')
51 | print("queries", queries)
52 | k = 1
53 | D, I = index.search(queries, k)
54 | print("di",D,I)
55 | 
56 | 
57 | quantizer = faiss.downcast_IndexBinary(index.quantizer)
58 | print("Quantizer", type(quantizer))
59 | centroids = faiss.vector_to_array(quantizer.xb)
60 | print("Centroids", type(centroids), centroids.shape)
61 | centroids = np.reshape(centroids, (quantizer.ntotal, quantizer.d//8))
62 | print("Centroids", type(centroids), centroids.shape)
63 | print('centroids (binary):', centroids.shape, centroids.dtype)
64 | 


--------------------------------------------------------------------------------
/neurips21/t3/gemini/cost/AdvantechSky6200.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/gemini/cost/AdvantechSky6200.pdf


--------------------------------------------------------------------------------
/neurips21/t3/gemini/cost/GPU.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/gemini/cost/GPU.pdf


--------------------------------------------------------------------------------
/neurips21/t3/gemini/cost/RAM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/gemini/cost/RAM.pdf


--------------------------------------------------------------------------------
/neurips21/t3/gemini/cost/SSD.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/t3/gemini/cost/SSD.pdf


--------------------------------------------------------------------------------
/neurips21/t3/gemini/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy==1.16.0
 2 | scipy==1.0.0
 3 | scikit-learn==0.19.1
 4 | faiss==1.5.3
 5 | docker==2.6.1
 6 | psutil==5.6.6
 7 | h5py==2.10.0
 8 | ansicolors==1.1.8
 9 | tqdm==4.62.2
10 | dataclasses==0.8
11 | pyyaml
12 | matplotlib
13 | 


--------------------------------------------------------------------------------
/neurips21/t3/gemini/run_bin_python.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | PATH=/usr/bin:$PATH which python3
 4 | PATH=/usr/bin:$PATH python3 -c "import numpy;print('numpy',numpy.version.version)"
 5 | PATH=/usr/bin:$PATH pip3 show numpy
 6 | 
 7 | PATH=/usr/bin:$PATH python3 -c "import scipy;print('scipy',scipy.version.version)"
 8 | PATH=/usr/bin:$PATH pip3 show scipy
 9 | 
10 | PATH=/usr/bin:$PATH python3 -c "import sklearn;print('sklearn',sklearn.__version__)"
11 | PATH=/usr/bin:$PATH pip3 show sklearn
12 | 
13 | PATH=/usr/bin:$PATH python3 -c "import faiss;print('faiss',faiss.__version__)"
14 | PATH=/usr/bin:$PATH pip3 show faiss
15 | 
16 | PATH=/usr/bin:$PATH LD_LIBRARY_PATH="./gsl_resources:$HOME/.local/lib/python3.6/site-packages/faiss" PYTHONPATH="./gsl_resources:$HOME/.local/lib/python3.6/site-packages/faiss" python3 run.py --t3  --nodocker --definitions t3/gemini/algos.yaml --dataset deep-1B --runs 1
17 | 
18 | #PATH=/usr/bin:$PATH LD_LIBRARY_PATH="/home/silo/BigANN/big-ann-benchmarks/gsl_resources:/home/silo/.local/lib/python3.6/site-packages/faiss" PYTHONPATH="/home/silo/BigANN/big-ann-benchmarks/gsl_resources:/home/silo/.local/lib/python3.6/site-packages/faiss" python3 run.py --t3  --nodocker --definitions t3/gemini/algos.yaml --dataset deep-1B --runs 1
19 | 


--------------------------------------------------------------------------------
/neurips21/t3/gemini/run_conda_python.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #set -x
 3 | 
 4 | #conda activate bigann-silo-py369
 5 | which python3
 6 | which pip3
 7 | 
 8 | python3 -c "import numpy;print('numpy',numpy.version.version)"
 9 | pip3 show numpy
10 | 
11 | python3 -c "import scipy;print('scipy',scipy.version.version)"
12 | pip3 show scipy
13 | 
14 | python3 -c "import sklearn;print('sklearn',sklearn.__version__)"
15 | pip3 show sklearn
16 | 
17 | python3 -c "import faiss;print('faiss',faiss.__version__)"
18 | pip3 show faiss
19 | 
20 | LD_LIBRARY_PATH=./gsl_resources PYTHONPATH=./gsl_resources python3 run.py --t3  --nodocker --definitions t3/gemini/algos.yaml --dataset deep-1B --runs 1
21 | 


--------------------------------------------------------------------------------
/neurips21/track1_baseline_faiss/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/__init__.py


--------------------------------------------------------------------------------
/neurips21/track1_baseline_faiss/plots/bigann-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/plots/bigann-1B.png


--------------------------------------------------------------------------------
/neurips21/track1_baseline_faiss/plots/deep-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/plots/deep-1B.png


--------------------------------------------------------------------------------
/neurips21/track1_baseline_faiss/plots/msspacev-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/plots/msspacev-1B.png


--------------------------------------------------------------------------------
/neurips21/track1_baseline_faiss/plots/msturing-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/plots/msturing-1B.png


--------------------------------------------------------------------------------
/neurips21/track1_baseline_faiss/plots/ssnpp-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/plots/ssnpp-1B.png


--------------------------------------------------------------------------------
/neurips21/track1_baseline_faiss/plots/text2image-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track1_baseline_faiss/plots/text2image-1B.png


--------------------------------------------------------------------------------
/neurips21/track1_baseline_faiss/test_bow_id_selector.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import faiss
 3 | import bow_id_selector
 4 | 
 5 | sp = faiss.swig_ptr
 6 | 
 7 | from benchmark.datasets import DATASETS
 8 | 
 9 | ds = DATASETS["yfcc-10M"]()
10 | 
11 | print("load dataset + query metadata")
12 | meta_b = ds.get_dataset_metadata()
13 | meta_q = ds.get_queries_metadata()
14 | print("Sort")
15 | meta_b.sort_indices()
16 | 
17 | #size_t nb, const int64_t *lims, const int32_t *indices,
18 | #        int32_t w1, int32_t w2):
19 | 
20 | 
21 | print(meta_b.indptr.dtype)
22 | 
23 | rs = np.random.RandomState(123)
24 | 
25 | 
26 | 
27 | def csr_get_row_indices(m, i):
28 |     """ get the non-0 column indices for row i in matrix m """
29 |     return m.indices[m.indptr[i] : m.indptr[i + 1]]
30 | 
31 | print("TEST")
32 | 
33 | for i in range(500):
34 |     j = rs.choice(meta_b.shape[0])
35 |     row = csr_get_row_indices(meta_b, j)
36 | 
37 |     if len(row) < 3:
38 |         continue
39 | 
40 |     w12 = rs.choice(row, 2)
41 | 
42 |     cc = rs.choice(3)
43 |     if cc == 1:
44 |         w12[1] = -1
45 | 
46 |     if cc == 2:
47 |         w12[i % 2] += 1
48 |         if w12[i % 2] in row:
49 |             continue
50 | 
51 |     sel = bow_id_selector.IDSelectorBOW(
52 |         meta_b.shape[0], sp(meta_b.indptr),
53 |         sp(meta_b.indices))
54 | 
55 |     sel.set_query_words(int(w12[0]), int(w12[1]))
56 | 
57 |     if cc == 0 or cc == 1:
58 |         assert sel.is_member(int(j))
59 |     else:
60 |         assert not sel.is_member(int(j))
61 | 
62 | 
63 | def intersect_sorted(a1, a2):
64 |     n1, = a1.shape
65 |     n2, = a2.shape
66 |     res = np.empty(n1 + n2, dtype=a1.dtype)
67 |     nres = bow_id_selector.intersect_sorted(
68 |         n1, faiss.swig_ptr(a1),
69 |         n2, faiss.swig_ptr(a2),
70 |         faiss.swig_ptr(res)
71 |     )
72 |     return res[:nres]
73 | 
74 | 
75 | print(intersect_sorted(
76 |     np.array([1, 3, 6, 8, 10], dtype='int32'),
77 |     np.array([1, 5,8, 10], dtype='int32')
78 | ))


--------------------------------------------------------------------------------
/neurips21/track3_baseline_faiss/plots/T3_deep-1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips21/track3_baseline_faiss/plots/T3_deep-1B.png


--------------------------------------------------------------------------------
/neurips23/Azure_D8lds_v5_table.md:
--------------------------------------------------------------------------------
 1 | | dataset         | algorithm             | qps            |
 2 | |-----------------|-----------------------|----------------|
 3 | | yfcc-10M        | parlayivf             | 37670.703774   |
 4 | | (filter track)  | puck                  | 19153.425169   |
 5 | |                 | hwtl_sdu_anns_filter  | 15188.577106   |
 6 | |                 | wm_filter             | 14076.445534   |
 7 | |                 | dhq                   | 13517.047874   |
 8 | |                 | fdufilterdiskann      | 5752.463409    |
 9 | |                 | pyanns                | 5335.916507    |
10 | |                 | faissplus             | 3625.027286    |
11 | |                 | faiss                 | 3252.682553    |
12 | |                 | cufe                  | 2291.031703    |
13 | | text2image-10M  | mysteryann            | 22555.248017   |
14 | | (OOD track)     | pyanns [1]            | 22295.584534   |
15 | |                 | mysteryann-dif        | 22491.577263   |
16 | |                 | sustech-ood           | 13772.370641   |
17 | |                 | puck                  | 8699.573200    |
18 | |                 | vamana                | 6753.344080    |
19 | |                 | ngt                   | 6373.934425    |
20 | |                 | epsearch              | 5876.982706    |
21 | |                 | diskann               | 4132.829728    |
22 | |                 | cufe                  | 3561.416286    |
23 | | sparse-full     | pyanns [1]            | 6499.652881    |
24 | | (sparse track)  | shnsw                 | 5078.449772    |
25 | |                 | NLE-Full              | 1314.194166    |
26 | |                 | nle                   | 1312.961060    |
27 | |                 | sustech-whu [2]       | 788.168885     |
28 | |                 | cufe                  | 97.860465      |
29 | |                 | linscan               | 95.098871      |
30 | 
31 | 
32 | [1] The entry was from an author affiliated with Zilliz, a company involved in the organizing team. The conflict was not disclosed by the author, and was discovered post evaluation.
33 | [2] Build time exceeded 12 hours 
34 | 
35 | Table lists highest QPS measured with at least 90% recall@10. Private queries were used for yfcc-10M and sparse-full and public queries for text2image-10M.


--------------------------------------------------------------------------------
/neurips23/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:jammy
 2 | 
 3 | RUN apt-get update && apt-get install -y python3-numpy python3-scipy python3-pip build-essential git axel wget
 4 | RUN wget https://aka.ms/downloadazcopy-v10-linux && mv downloadazcopy-v10-linux azcopy.tgz && tar xzf azcopy.tgz --transform 's!^[^/]\+\($\|/\)!azcopy_folder\1!' 
 5 | RUN cp azcopy_folder/azcopy /usr/bin
 6 | 
 7 | RUN pip3 install -U pip
 8 | 
 9 | WORKDIR /home/app
10 | COPY requirements_py3.10.txt run_algorithm.py ./
11 | RUN pip3 install -r requirements_py3.10.txt
12 | 
13 | ENTRYPOINT ["python3", "-u", "run_algorithm.py"]
14 | 


--------------------------------------------------------------------------------
/neurips23/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/__init__.py


--------------------------------------------------------------------------------
/neurips23/common.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | from neurips23.filter.run import FilterRunner
 5 | from neurips23.sparse.run import SparseRunner
 6 | from neurips23.ood.run import OODRunner
 7 | from neurips23.streaming.run import StreamingRunner
 8 | 
 9 | def docker_tag_base():
10 |     return 'neurips23'
11 | 
12 | def basedir():
13 |     return 'neurips23'
14 | 
15 | def docker_tag(track, algo):
16 |     return docker_tag_base() + '-' + track + '-' + algo
17 | 
18 | def dockerfile_path_base():
19 |     return os.path.join('neurips23', 'Dockerfile')
20 | 
21 | def track_path(track):
22 |     return os.path.join('neurips23', track)
23 | 
24 | def dockerfile_path(track, algo):
25 |     return os.path.join(track_path(track), algo, 'Dockerfile')
26 | 
27 | def yaml_path(track, algo):
28 |     return os.path.join(track_path(track), algo, 'config.yaml')
29 | 
30 | def get_definitions(track, algo):
31 |     return yaml.load(yaml_path(track, algo))
32 | 
33 | RUNNERS = {
34 |     "filter": FilterRunner,
35 |     "sparse": SparseRunner,
36 |     "ood": OODRunner,
37 |     "streaming": StreamingRunner
38 | }
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/neurips23/ec2_c6i.2xlarge_table.md:
--------------------------------------------------------------------------------
 1 | | dataset         | algorithm             | qps            |
 2 | |-----------------|-----------------------|----------------|
 3 | | yfcc-10M        | parlayivf             | 30744.178014   |
 4 | | (filter track)  | puck                  | 17160.390356   |
 5 | |                 | hwtl_sdu_anns_filter  | 15433.837498   |
 6 | |                 | wm_filter             | 13723.065895   |
 7 | |                 | fdufilterdiskann      | 6085.293763    |
 8 | |                 | pyanns                | 5260.477613    |
 9 | |                 | faissplus             | 3851.283822    |
10 | |                 | rubignn               | 3289.234566    |
11 | |                 | faiss                 | 3254.200190    |
12 | | text2image-10M  | pyanns [1]            | 22476.070400   |
13 | | (OOD track)     | mysteryann-dif        | 17764.966620   |
14 | |                 | mysteryann            | 17665.716757   |
15 | |                 | sustech-ood           | 11594.134313   |
16 | |                 | puck-fizz             | 8460.214238    |
17 | |                 | puck                  | 8167.845988    |
18 | |                 | vamana                | 6322.589569    |
19 | |                 | cufe                  | 3414.617622    |
20 | | sparse-full     | pyanns [1]            | 6280.871386    |
21 | | (sparse track)  | shnsw                 | 4359.145718    |
22 | |                 | nle                   | 1297.986119    |
23 | |                 | sustech-whu [2]       | 670.864748     |
24 | |                 | cufe                  | 64.665603      |
25 | |                 | linscan               | 63.026394      |
26 | 
27 | [1] The entry was from an author affiliated with Zilliz, a company involved in the organizing team. The conflict was not disclosed by the author, and was discovered post evaluation.
28 | [2] Build time exceeded 12 hours (13 hours, 34 minutes)
29 | 
30 | Table lists highest QPS measured with at least 90% recall@10. Private queries were used for yfcc-10M and sparse-full and public queries for text2image-10M.
31 | 


--------------------------------------------------------------------------------
/neurips23/filter/base.py:
--------------------------------------------------------------------------------
 1 | from benchmark.algorithms.base import BaseANN
 2 | 
 3 | class BaseFilterANN(BaseANN):
 4 |     def filtered_query(self, X, filter, k):
 5 |         """
 6 |         Carry out a batch query for k-NN of query set X with associated filter.
 7 |         Query X[i] has asks for k-NN in the index that pass all filters in filter[i].
 8 |         """
 9 |         raise NotImplementedError()
10 |     
11 |     def track(self):
12 |         return "filter"


--------------------------------------------------------------------------------
/neurips23/filter/cufe/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update && apt install -y wget swig
 4 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-0-Linux-x86_64.sh
 5 | RUN bash Anaconda3-2023.03-0-Linux-x86_64.sh -b
 6 | 
 7 | ENV PATH /root/anaconda3/bin:$PATH
 8 | ENV CONDA_PREFIX /root/anaconda3/
 9 | 
10 | RUN conda install -c pytorch faiss-cpu
11 | COPY install/requirements_conda.txt ./
12 | # conda doesn't like some of our packages, use pip 
13 | RUN python3 -m pip install -r requirements_conda.txt 
14 | 
15 | COPY neurips23/filter/cufe/bow_id_selector.swig ./
16 | 
17 | RUN swig -c++ -python -I$CONDA_PREFIX/include -Ifaiss bow_id_selector.swig 
18 | RUN g++ -shared -O3 -g -fPIC bow_id_selector_wrap.cxx -o _bow_id_selector.so  \
19 |       -I $( python -c "import distutils.sysconfig ; print(distutils.sysconfig.get_python_inc())" )   \
20 |       -I $CONDA_PREFIX/include $CONDA_PREFIX/lib/libfaiss_avx2.so -Ifaiss
21 | 
22 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2); print(faiss.__version__)'
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/neurips23/filter/cufe/config.yaml:
--------------------------------------------------------------------------------
 1 | random-filter-s:
 2 |     cufe:
 3 |       docker-tag: neurips23-filter-cufe
 4 |       module: neurips23.filter.cufe.faissCUFE
 5 |       constructor: faissCUFE
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"indexkey": "IVF1024,SQ8"}]
11 |           query-args: |
12 |             [{"nprobe": 1},
13 |               {"nprobe":2},
14 |               {"nprobe":4}]    
15 | random-s:
16 |     cufe:
17 |       docker-tag: neurips23-filter-cufe
18 |       module: neurips23.filter.cufe.faissCUFE
19 |       constructor: faissCUFE
20 |       base-args: ["@metric"]
21 |       run-groups:
22 |         base:
23 |           args: |
24 |             [{"indexkey": "IVF1024,SQ8"}]
25 |           query-args: |
26 |             [{"nprobe": 1},
27 |               {"nprobe":2},
28 |               {"nprobe":4}]
29 | yfcc-10M-unfiltered:
30 |     cufe:
31 |       docker-tag: neurips23-filter-cufe
32 |       module: neurips23.filter.cufe.faissCUFE
33 |       constructor: faissCUFE
34 |       base-args: ["@metric"]
35 |       run-groups:
36 |         base:
37 |           args: |
38 |             [{"indexkey": "IVF16384,SQ8", "binarysig": true, "threads": 16}]
39 |           query-args: |
40 |             [{"nprobe": 1}, {"nprobe": 4}, {"nprobe": 16}, {"nprobe": 64}]
41 | yfcc-10M:
42 |     cufe:
43 |       docker-tag: neurips23-filter-cufe
44 |       module: neurips23.filter.cufe.faissCUFE
45 |       constructor: faissCUFE
46 |       base-args: ["@metric"]
47 |       run-groups:
48 |         base:
49 |           args: |
50 |             [{"indexkey": "IVF4096,SQ8", 
51 |               "binarysig": true, 
52 |               "threads": 16
53 |             }]
54 |           query-args: | 
55 |             [{"nprobe": 4, "mt_threshold":0.0003}, 
56 |              {"nprobe": 16, "mt_threshold":0.0003}, 
57 |              {"nprobe": 4, "mt_threshold":0.0001}, 
58 |              {"nprobe": 16, "mt_threshold":0.0001}, 
59 |              {"nprobe": 10, "mt_threshold":0.0001}, 
60 |              {"nprobe": 8, "mt_threshold": 0.0003}, 
61 |              {"nprobe": 32, "mt_threshold": 0.00033}, 
62 |              {"nprobe": 30, "mt_threshold": 0.00033}, 
63 |              {"nprobe": 12, "mt_threshold": 0.0002},
64 |              {"nprobe": 16, "mt_threshold": 0.00033}
65 |             ]
66 | 
67 | 


--------------------------------------------------------------------------------
/neurips23/filter/dhq/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 python3-numpy python3-scipy python3-pip build-essential git libblas-dev liblapack-dev wget libaio-dev libgoogle-perftools-dev clang-format libboost-all-dev libopenblas-dev liblapacke-dev
 8 | 
 9 | 
10 | 
11 | RUN apt-get update; DEBIAN_FRONTEND=noninteractive apt install intel-mkl python3-setuptools  wget python3-matplotlib build-essential checkinstall libssl-dev swig4.0 python3-dev  python3-numpy  python3-numpy-dev   -y
12 | COPY install/requirements_conda.txt ./
13 | # conda doesn't like some of our packages, use pip
14 | RUN python3 -m pip install -r requirements_conda.txt
15 | 
16 | RUN pip3 install -U pip numpy pybind11 tqdm
17 | RUN git clone https://github.com/SDU-L/DHQ.git
18 | RUN chmod -R +777 DHQ/
19 | RUN cp /home/app/DHQ/faiss/build/faiss/python/libfaiss_python_callbacks.so /lib
20 | RUN cp /home/app/DHQ/faiss/build/faiss/libfaiss.so /lib
21 | RUN cp /home/app/DHQ/faiss/build/faiss/libfaiss_avx2.so /lib
22 | RUN cp /home/app/DHQ/faiss/build/faiss/python/_swigfaiss_avx2.so /lib
23 | RUN cp /home/app/DHQ/faiss/build/faiss/python/_swigfaiss.so /lib
24 | #RUN
25 | RUN cd /home/app/DHQ/faiss/build/faiss/python/ && python3 setup.py install
26 | 
27 | RUN cd /home/app
28 | 
29 | ENV PYTHONPATH=DHQ/faiss/build/faiss/python/build/lib/
30 | RUN python3 -c 'import faiss; print(faiss.IDSelectorFilterWise); print(faiss.__version__)'
31 | RUN pip3 install DHQ/DHQ-1.0.3-cp310-cp310-linux_x86_64.whl
32 | RUN python3 -c 'import DHQ'
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/neurips23/filter/dhq/config.yaml:
--------------------------------------------------------------------------------
 1 | random-filter-s:
 2 |     dhq:
 3 |       docker-tag: neurips23-filter-dhq
 4 |       module: neurips23.filter.dhq.dhq
 5 |       constructor: DHQINDEX
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"L": 200, "R": 24, "level": 2, "indexkey": "IVF1024,SQ8"}]
11 |           query-args: |
12 |             [
13 |                 {"nprobe": 110, "num": 2100, "ef": 11, "random": 40}, 
14 |                 {"nprobe": 110, "num": 2100, "ef": 13, "random": 35}, 
15 |                 {"nprobe": 100, "num": 2400, "ef": 10, "random": 30}, 
16 |                 {"nprobe": 100, "num": 2400, "ef": 10, "random": 45}, 
17 |                 {"nprobe": 140, "num": 2100, "ef": 10, "random": 42}, 
18 |                 {"nprobe": 140, "num": 2100, "ef": 10, "random": 41}, 
19 |                 {"nprobe": 145, "num": 2100, "ef": 10, "random": 35}, 
20 |                 {"nprobe": 115, "num": 2400, "ef": 10, "random": 30}, 
21 |                 {"nprobe": 140, "num": 2100, "ef": 10, "random": 36}, 
22 |                 {"nprobe": 155, "num": 2100, "ef": 10, "random": 35}
23 |             ]
24 | 
25 | yfcc-10M:
26 |     dhq:
27 |       docker-tag: neurips23-filter-dhq
28 |       module: neurips23.filter.dhq.dhq
29 |       constructor: DHQINDEX
30 |       base-args: ["@metric"]
31 |       run-groups:
32 |         base:
33 |           args: |
34 |             [{"L": 200, "R": 24, "level": 2}]
35 |           query-args: |
36 |             [
37 |                 {"nprobe": 110, "num": 2100, "ef": 11, "random": 40}, 
38 |                 {"nprobe": 110, "num": 2100, "ef": 13, "random": 35}, 
39 |                 {"nprobe": 100, "num": 2400, "ef": 10, "random": 30}, 
40 |                 {"nprobe": 100, "num": 2400, "ef": 10, "random": 45}, 
41 |                 {"nprobe": 140, "num": 2100, "ef": 10, "random": 42}, 
42 |                 {"nprobe": 140, "num": 2100, "ef": 10, "random": 41}, 
43 |                 {"nprobe": 145, "num": 2100, "ef": 10, "random": 35}, 
44 |                 {"nprobe": 115, "num": 2400, "ef": 10, "random": 30}, 
45 |                 {"nprobe": 140, "num": 2100, "ef": 10, "random": 36}, 
46 |                 {"nprobe": 155, "num": 2100, "ef": 10, "random": 35}
47 |             ]
48 | 


--------------------------------------------------------------------------------
/neurips23/filter/faiss/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update && apt install -y wget swig
 4 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-0-Linux-x86_64.sh
 5 | RUN bash Anaconda3-2023.03-0-Linux-x86_64.sh -b
 6 | 
 7 | ENV PATH /root/anaconda3/bin:$PATH
 8 | ENV CONDA_PREFIX /root/anaconda3/
 9 | 
10 | RUN conda install -c pytorch faiss-cpu
11 | COPY install/requirements_conda.txt ./
12 | # conda doesn't like some of our packages, use pip 
13 | RUN python3 -m pip install -r requirements_conda.txt 
14 | 
15 | COPY neurips23/filter/faiss/bow_id_selector.swig ./
16 | 
17 | RUN swig -c++ -python -I$CONDA_PREFIX/include -Ifaiss bow_id_selector.swig 
18 | RUN g++ -shared -O3 -g -fPIC bow_id_selector_wrap.cxx -o _bow_id_selector.so  \
19 |       -I $( python -c "import distutils.sysconfig ; print(distutils.sysconfig.get_python_inc())" )   \
20 |       -I $CONDA_PREFIX/include $CONDA_PREFIX/lib/libfaiss_avx2.so -Ifaiss
21 | 
22 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2); print(faiss.__version__)'
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/neurips23/filter/faiss/config.yaml:
--------------------------------------------------------------------------------
 1 | random-filter-s:
 2 |     faiss:
 3 |       docker-tag: neurips23-filter-faiss
 4 |       module: neurips23.filter.faiss.faiss
 5 |       constructor: FAISS
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"indexkey": "IVF1024,SQ8"}]
11 |           query-args: |
12 |             [{"nprobe": 1},
13 |               {"nprobe":2},
14 |               {"nprobe":4}]    
15 | random-s:
16 |     faiss:
17 |       docker-tag: neurips23-filter-faiss
18 |       module: neurips23.filter.faiss.faiss
19 |       constructor: FAISS
20 |       base-args: ["@metric"]
21 |       run-groups:
22 |         base:
23 |           args: |
24 |             [{"indexkey": "IVF1024,SQ8"}]
25 |           query-args: |
26 |             [{"nprobe": 1},
27 |               {"nprobe":2},
28 |               {"nprobe":4}]
29 | yfcc-10M-unfiltered:
30 |     faiss:
31 |       docker-tag: neurips23-filter-faiss
32 |       module: neurips23.filter.faiss.faiss
33 |       constructor: FAISS
34 |       base-args: ["@metric"]
35 |       run-groups:
36 |         base:
37 |           args: |
38 |             [{"indexkey": "IVF16384,SQ8", "binarysig": true, "threads": 16}]
39 |           query-args: |
40 |             [{"nprobe": 1}, {"nprobe": 4}, {"nprobe": 16}, {"nprobe": 64}]
41 | yfcc-10M:
42 |     faiss:
43 |       docker-tag: neurips23-filter-faiss
44 |       module: neurips23.filter.faiss.faiss
45 |       constructor: FAISS
46 |       base-args: ["@metric"]
47 |       run-groups:
48 |         base:
49 |           args: |
50 |             [{"indexkey": "IVF16384,SQ8", 
51 |               "binarysig": true, 
52 |               "threads": 16
53 |             }]
54 |           query-args: |
55 |             [{"nprobe": 1, "mt_threshold":0.0003}, 
56 |              {"nprobe": 4, "mt_threshold":0.0003}, 
57 |              {"nprobe": 16, "mt_threshold":0.0003}, 
58 |              {"nprobe": 32, "mt_threshold":0.0003}, 
59 |              {"nprobe": 64, "mt_threshold":0.0003}, 
60 |              {"nprobe": 96, "mt_threshold":0.0003}, 
61 |              {"nprobe": 1, "mt_threshold":0.0001}, 
62 |              {"nprobe": 4, "mt_threshold":0.0001}, 
63 |              {"nprobe": 16, "mt_threshold":0.0001}, 
64 |              {"nprobe": 32, "mt_threshold":0.0001}, 
65 |              {"nprobe": 64, "mt_threshold":0.0001}, 
66 |              {"nprobe": 96, "mt_threshold":0.0001}, 
67 |              {"nprobe": 1, "mt_threshold":0.01}, 
68 |              {"nprobe": 4, "mt_threshold":0.01}, 
69 |              {"nprobe": 16, "mt_threshold":0.01}, 
70 |              {"nprobe": 32, "mt_threshold":0.01}, 
71 |              {"nprobe": 64, "mt_threshold":0.01}, 
72 |              {"nprobe": 96, "mt_threshold":0.01}
73 |             ]
74 | 
75 | 


--------------------------------------------------------------------------------
/neurips23/filter/faissplus/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update && apt install -y wget swig
 4 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-0-Linux-x86_64.sh
 5 | RUN bash Anaconda3-2023.03-0-Linux-x86_64.sh -b
 6 | 
 7 | ENV PATH /root/anaconda3/bin:$PATH
 8 | ENV CONDA_PREFIX /root/anaconda3/
 9 | 
10 | RUN conda install -c pytorch faiss-cpu
11 | COPY install/requirements_conda.txt ./
12 | # conda doesn't like some of our packages, use pip 
13 | RUN python3 -m pip install -r requirements_conda.txt 
14 | 
15 | COPY neurips23/filter/faissplus/bow_id_selector.swig ./
16 | 
17 | RUN swig -c++ -python -I$CONDA_PREFIX/include -Ifaiss bow_id_selector.swig 
18 | RUN g++ -shared -O3 -g -fPIC bow_id_selector_wrap.cxx -o _bow_id_selector.so  \
19 |       -I $( python -c "import distutils.sysconfig ; print(distutils.sysconfig.get_python_inc())" )   \
20 |       -I $CONDA_PREFIX/include $CONDA_PREFIX/lib/libfaiss_avx2.so -Ifaiss
21 | 
22 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2); print(faiss.__version__)'
23 | 
24 | 


--------------------------------------------------------------------------------
/neurips23/filter/faissplus/config.yaml:
--------------------------------------------------------------------------------
 1 | random-filter-s:
 2 |     faissplus:
 3 |       docker-tag: neurips23-filter-faissplus
 4 |       module: neurips23.filter.faissplus.faiss
 5 |       constructor: FAISS
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"indexkey": "IVF1024,SQ8"}]
11 |           query-args: |
12 |             [{"nprobe": 1},
13 |               {"nprobe":2},
14 |               {"nprobe":4}]    
15 | random-s:
16 |     faissplus:
17 |       docker-tag: neurips23-filter-faissplus
18 |       module: neurips23.filter.faissplus.faiss
19 |       constructor: FAISS
20 |       base-args: ["@metric"]
21 |       run-groups:
22 |         base:
23 |           args: |
24 |             [{"indexkey": "IVF1024,SQ8"}]
25 |           query-args: |
26 |             [{"nprobe": 1},
27 |               {"nprobe":2},
28 |               {"nprobe":4}]
29 | yfcc-10M-unfiltered:
30 |     faissplus:
31 |       docker-tag: neurips23-filter-faissplus
32 |       module: neurips23.filter.faissplus.faiss
33 |       constructor: FAISS
34 |       base-args: ["@metric"]
35 |       run-groups:
36 |         base:
37 |           args: |
38 |             [{"indexkey": "IVF16384,SQ8", "binarysig": true, "threads": 16}]
39 |           query-args: |
40 |             [{"nprobe": 1}, {"nprobe": 4}, {"nprobe": 16}, {"nprobe": 64}]
41 | yfcc-10M:
42 |     faissplus:
43 |       docker-tag: neurips23-filter-faissplus
44 |       module: neurips23.filter.faissplus.faiss
45 |       constructor: FAISS
46 |       base-args: ["@metric"]
47 |       run-groups:
48 |         base:
49 |           args: |
50 |             [{"indexkey": "IVF11264,SQ8", 
51 |               "binarysig": true, 
52 |               "threads": 16
53 |             }]
54 |           query-args: |
55 |             [
56 |             {"nprobe": 34, "mt_threshold": 0.00031}, 
57 |             {"nprobe": 32, "mt_threshold": 0.0003}, 
58 |             {"nprobe": 32, "mt_threshold": 0.00031}, 
59 |             {"nprobe": 34, "mt_threshold": 0.0003}, 
60 |             {"nprobe": 34, "mt_threshold": 0.00035}, 
61 |             {"nprobe": 32, "mt_threshold": 0.00033}, 
62 |             {"nprobe": 30, "mt_threshold": 0.00033}, 
63 |             {"nprobe": 32, "mt_threshold": 0.00035},
64 |             {"nprobe": 34, "mt_threshold": 0.00033},
65 |             {"nprobe": 40, "mt_threshold": 0.0003}
66 |             ]
67 | 


--------------------------------------------------------------------------------
/neurips23/filter/fdufilterdiskann/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | 
 9 | # COPY FilterDiskann /home/app/FilterDiskann
10 | WORKDIR /home/app
11 | RUN git clone --recursive --branch main https://github.com/PUITAR/FduFilterDiskANN.git
12 | WORKDIR /home/app/FduFilterDiskANN/pybindings
13 | 
14 | RUN pip3 install virtualenv build
15 | RUN pip3 install pybind11[global]
16 | RUN pip3 install .
17 | WORKDIR /home/app
18 | 


--------------------------------------------------------------------------------
/neurips23/filter/fdufilterdiskann/config.yaml:
--------------------------------------------------------------------------------
 1 | random-filter-s:
 2 |   fdufilterdiskann:
 3 |    docker-tag: neurips23-filter-fdufilterdiskann
 4 |    module: neurips23.filter.fdufilterdiskann.fdufilterdiskann
 5 |    constructor: fdufilterdiskann
 6 |    base-args: ["@metric"]
 7 |    run-groups:
 8 |     base:
 9 |      args: |
10 |       [{"R":2, "L":10, "buildthreads":16, "alpha":1.2}]
11 |      query-args: |
12 |       [{"Ls":10, "T":1, "threshold_1":20000, "threshold_2":40000}]
13 | yfcc-10M:
14 |   fdufilterdiskann:
15 |    docker-tag: neurips23-filter-fdufilterdiskann
16 |    module: neurips23.filter.fdufilterdiskann.fdufilterdiskann
17 |    constructor: fdufilterdiskann
18 |    base-args: ["@metric"]
19 |    run-groups:
20 |     base:
21 |      args: |
22 |       [{"R":60, "L":80, "buildthreads":16, "alpha":1.0}]
23 |      query-args: |
24 |       [
25 |         {"Ls":11, "T":16, "threshold_1":53500, "threshold_2":5000},
26 |         {"Ls":11, "T":16, "threshold_1":53500, "threshold_2":5005},
27 |         {"Ls":11, "T":16, "threshold_1":53500, "threshold_2":5010},
28 |         {"Ls":11, "T":16, "threshold_1":53500, "threshold_2":5015},
29 |         {"Ls":11, "T":16, "threshold_1":53500, "threshold_2":5020},
30 |         {"Ls":12, "T":16, "threshold_1":53500, "threshold_2":5025},
31 |         {"Ls":12, "T":16, "threshold_1":53500, "threshold_2":5030},
32 |         {"Ls":12, "T":16, "threshold_1":53500, "threshold_2":5035},
33 |         {"Ls":12, "T":16, "threshold_1":53500, "threshold_2":5040},
34 |         {"Ls":12, "T":16, "threshold_1":53500, "threshold_2":5045}
35 |       ]
36 |   
37 | 


--------------------------------------------------------------------------------
/neurips23/filter/hwtl_sdu_anns_filter/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install apt-utils
 5 | RUN apt update && apt install -y software-properties-common
 6 | RUN add-apt-repository -y ppa:git-core/ppa
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10 python3-numpy python3-scipy python3-pip build-essential git libblas-dev liblapack-dev wget libaio-dev libgoogle-perftools-dev clang-format libboost-all-dev libopenblas-dev liblapacke-dev
 8 | 
 9 | 
10 | 
11 | RUN apt update && apt install -y wget swig
12 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-0-Linux-x86_64.sh
13 | RUN bash Anaconda3-2023.03-0-Linux-x86_64.sh -b
14 | 
15 | ENV PATH /root/anaconda3/bin:$PATH
16 | ENV CONDA_PREFIX /root/anaconda3/
17 | 
18 | RUN conda install -c pytorch faiss-cpu
19 | COPY install/requirements_conda.txt ./
20 | # conda doesn't like some of our packages, use pip
21 | RUN python3 -m pip install -r requirements_conda.txt
22 | WORKDIR /home/app/
23 | RUN pip3 install -U pip pybind11 numpy
24 | 
25 | RUN git clone https://github.com/WPJiang/HWTL_SDU-ANNS-filter.git
26 | RUN cp ./HWTL_SDU-ANNS-filter/bow_id_selector.py  ./
27 | RUN cp ./HWTL_SDU-ANNS-filter/_bow_id_selector.so  ./
28 | 
29 | ENV LD_PRELOAD /root/anaconda3/lib/libmkl_core.so:/root/anaconda3/lib/libmkl_sequential.so
30 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2); print(faiss.__version__)'
31 | 
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/neurips23/filter/hwtl_sdu_anns_filter/config.yaml:
--------------------------------------------------------------------------------
 1 | random-filter-s:
 2 |     hwtl_sdu_anns_filter:
 3 |       docker-tag: neurips23-filter-hwtl_sdu_anns_filter
 4 |       module: neurips23.filter.hwtl_sdu_anns_filter.hwtl_sdu_anns_filter
 5 |       constructor: hwtl_sdu_anns_filter
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"default": 1}]
11 |           query-args: |
12 |             [{"nprobe": 4, "expansion":3, "threshold":3000},
13 |              {"nprobe": 8, "expansion":3, "threshold":3000},
14 |              {"nprobe": 12, "expansion":3, "threshold":3000},
15 |              {"nprobe": 16, "expansion":3, "threshold":3000},
16 |              {"nprobe": 32, "expansion":3, "threshold":3000}
17 |             ]
18 | 
19 | yfcc-10M:
20 |     hwtl_sdu_anns_filter:
21 |       docker-tag: neurips23-filter-hwtl_sdu_anns_filter
22 |       module: neurips23.filter.hwtl_sdu_anns_filter.hwtl_sdu_anns_filter
23 |       constructor: hwtl_sdu_anns_filter
24 |       base-args: ["@metric"]
25 |       run-groups:
26 |         base:
27 |           args: |
28 |             [{"L": 200, "R": 32, "level": 2, "threads": 16}]
29 |           query-args: |
30 |             [{"nprobe": 4, "expansion":3, "threshold":3000},
31 |              {"nprobe": 8, "expansion":3, "threshold":3000},
32 |              {"nprobe": 12, "expansion":3, "threshold":3000},
33 |              {"nprobe": 16, "expansion":3, "threshold":3000},
34 |              {"nprobe": 32, "expansion":3, "threshold":3000},
35 |              {"nprobe": 4, "expansion":4, "threshold":3000},
36 |              {"nprobe": 8, "expansion":4, "threshold":3000},
37 |              {"nprobe": 12, "expansion":4, "threshold":3000},
38 |              {"nprobe": 16, "expansion":4, "threshold":3000},
39 |              {"nprobe": 32, "expansion":4, "threshold":3000}
40 |             ]
41 | 


--------------------------------------------------------------------------------
/neurips23/filter/install_neurips23.sh:
--------------------------------------------------------------------------------
 1 | python install.py --neurips23track filter --algorithm cufe
 2 | python install.py --neurips23track filter --algorithm dhq
 3 | python install.py --neurips23track filter --algorithm faiss
 4 | python install.py --neurips23track filter --algorithm faissplus
 5 | python install.py --neurips23track filter --algorithm fdufilterdiskann
 6 | python install.py --neurips23track filter --algorithm hwtl_sdu_anns_filter
 7 | python install.py --neurips23track filter --algorithm parlayivf
 8 | python install.py --neurips23track filter --algorithm puck
 9 | python install.py --neurips23track filter --algorithm pyanns
10 | python install.py --neurips23track filter --algorithm wm_filter
11 | 


--------------------------------------------------------------------------------
/neurips23/filter/operating_points_private_queries_AzureD8lds_v5.csv:
--------------------------------------------------------------------------------
 1 |                                         qps
 2 | dataset  algorithm
 3 | yfcc-10M cufe                   2291.031703
 4 |          dhq                   13517.047874
 5 |          faiss                  3252.682553
 6 |          faissplus              3625.027286
 7 |          fdufilterdiskann       5752.463409
 8 |          hwtl_sdu_anns_filter  15188.577106
 9 |          parlayivf             37670.703774
10 |          puck                  19153.425169
11 |          pyanns                 5335.916507
12 |          wm_filter             14076.445534
13 | 


--------------------------------------------------------------------------------
/neurips23/filter/operating_points_public_queries_AzureD8lds_v5.txt:
--------------------------------------------------------------------------------
 1 |                                                qps
 2 | dataset         algorithm                         
 3 | yfcc-10M        cufe                   2917.132715
 4 |                 dhq                   13670.864704
 5 |                 faiss                  3032.534357
 6 |                 faissplus              3776.539092
 7 |                 fdufilterdiskann       5679.748583
 8 |                 hwtl_sdu_anns_filter  15059.124141
 9 |                 parlayivf             37902.113726
10 |                 puck                  19193.294823
11 |                 pyanns                 5184.844352
12 |                 wm_filter             14467.961514
13 | 


--------------------------------------------------------------------------------
/neurips23/filter/parlayivf/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | 
 9 | # RUN echo "docker caching is so painful pt. 31"
10 | 
11 | RUN git clone -b filter https://github.com/cmuparlay/ParlayANN.git && cd ParlayANN && git checkout f7208ba && git submodule update --init --recursive && cd python && pip install pybind11 && bash compile.sh 
12 | # WORKDIR /home/app/ParlayANN
13 | # RUN git submodule update --init --recursive
14 | # WORKDIR /home/app/ParlayANN/python
15 | 
16 | # RUN pip install pybind11
17 | 
18 | # RUN bash compile.sh
19 | 
20 | ENV PYTHONPATH=$PYTHONPATH:/home/app/ParlayANN/python
21 | 
22 | # ENV PARLAY_NUM_THREADS=8
23 | 
24 | WORKDIR /home/app


--------------------------------------------------------------------------------
/neurips23/filter/parlayivf/README.md:
--------------------------------------------------------------------------------
1 | # ParlayANN's IVF&sup2;
2 | ## Introduction
3 | This submission is from the team at Carnegie Mellon and the University of Maryland responsible for the [ParlayANN library](https://github.com/cmuparlay/ParlayANN), a collection of algorithms for approximate nearest neighbor search implemented with [ParlayLib](https://github.com/cmuparlay/parlaylib), an efficient library for shared memory parallelism.
4 | 
5 | ## Approach
6 | We leverage the fact that filtered search, especially when using 'and' queries, provides beneficial constraints on the set of points needing to be searched, and the fact that IVF indices can be stored in relatively little memory. The name IVF&sup2; refers to the fact we treat the filters like an actual inverted file index (in contrast to the 'IVF' indices used by faiss, pgvector, etc. for vector search), and the posting lists of the filters are (above a size threshold) themselves IVF indices. 
7 | 
8 | More details to come.
9 | 


--------------------------------------------------------------------------------
/neurips23/filter/pinecone/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | # install MKL support
 4 | RUN DEBIAN_FRONTEND=noninteractive apt-get install -y libmkl-full-dev
 5 | 
 6 | # copy and install the pys2 python package
 7 | RUN git clone --branch filter https://github.com/pinecone-io/bigann.git
 8 | RUN pip install ./bigann/*.whl
 9 | # verify that the build worked
10 | RUN python3 -c 'import pys2;'
11 | 
12 | 


--------------------------------------------------------------------------------
/neurips23/filter/pinecone/README.md:
--------------------------------------------------------------------------------
 1 | # Pinecone filter ANN algorithm
 2 | 
 3 | Our algorithm is based on the classical IVF architecture, where the data is first divided into geometrical clusters, 
 4 | combined with a metadata inverted index: for every metadata tag, we store a list of vectors with that tag.
 5 | 
 6 | Given a query, we first evaluate its level of selectivity (i.e., the count of vectors that pass the filter), 
 7 | and scan a varying number of clusters for that query. 
 8 | We efficiently scan only the relevant vectors based on the inverted index, so the number of operations is 
 9 | O(query selectivity) rather than O(# of vectors in the selected clusters). 
10 | The intuition is that for wide queries, the closest vectors are in neighboring clusters, 
11 | and for more selective queries there is a need to scan more clusters. 
12 | Additionally, we make sure that a minimal number of relevant vectors have been scanned, 
13 | to account for queries whose selectivity is less localized.
14 | 
15 | To accelerate the search, we pre-compute some of the intersections of the inverted lists (based on their size), 
16 | and use AVX for efficient computation of distances. 
17 | To optimize the hyperparameters on the public query set, we formalized the problem as a constrained convex 
18 | optimization problem, assigning the optimal recall value for each selectivity bucket. 
19 | For the most selective queries, it turns out that it is beneficial to simply scan all relevant vectors 
20 | (and ignore the geometrical clustering).


--------------------------------------------------------------------------------
/neurips23/filter/pinecone/config.yaml:
--------------------------------------------------------------------------------
 1 | random-filter-s:
 2 |     pinecone:
 3 |       docker-tag: neurips23-filter-pinecone
 4 |       module: neurips23.filter.pinecone.pinecone_index
 5 |       constructor: PineconeIndex
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"indexkey": "FilterIVFFlatU8", "num_clusters": "128", "precompute_intersection_threshold": "5000"}]
11 |           query-args: |
12 |             [
13 |             {"fraction_coefficient": "0.3", "fraction_exponent": "0.1", "skip_clustering_threshold": 2000},
14 |             {"fraction_coefficient": "0.7", "fraction_exponent": "0.1", "skip_clustering_threshold": 2000},
15 |             {"fraction_coefficient": "1.0", "fraction_exponent": "0.1", "skip_clustering_threshold": 2000},
16 |             {"fraction_coefficient": "2.0", "fraction_exponent": "0.1", "skip_clustering_threshold": 2000}
17 |             ]
18 | yfcc-10M:
19 |     pinecone:
20 |       docker-tag: neurips23-filter-pinecone
21 |       module: neurips23.filter.pinecone.pinecone_index
22 |       constructor: PineconeIndex
23 |       base-args: ["@metric"]
24 |       run-groups:
25 |         base:
26 |           args: |
27 |             [{"indexkey": "FilterIVFFlatU8", "num_clusters": "2048", "precompute_intersection_threshold": "1600"}]
28 |           query-args: |
29 |             [
30 |             {"fraction_coefficient": "13.0", "fraction_exponent": "0.65", "skip_clustering_threshold": 2000},
31 |             {"fraction_coefficient": "12.0", "fraction_exponent": "0.65", "skip_clustering_threshold": 2000},
32 |             {"fraction_coefficient": "11.5", "fraction_exponent": "0.65", "skip_clustering_threshold": 2000},
33 |             {"fraction_coefficient": "11.0", "fraction_exponent": "0.65", "skip_clustering_threshold": 2000},
34 |             {"fraction_coefficient": "10.5", "fraction_exponent": "0.65", "skip_clustering_threshold": 2000},
35 |             {"fraction_coefficient": "10.5", "fraction_exponent": "0.64", "skip_clustering_threshold": 2000},
36 |             {"fraction_coefficient": "10.0", "fraction_exponent": "0.64", "skip_clustering_threshold": 2000},
37 |             {"fraction_coefficient": "9.5", "fraction_exponent": "0.64", "skip_clustering_threshold": 2000},
38 |             {"fraction_coefficient": "9.0", "fraction_exponent": "0.64", "skip_clustering_threshold": 2000},
39 |             {"fraction_coefficient": "8.5", "fraction_exponent": "0.64", "skip_clustering_threshold": 2000}
40 |             ]
41 | 


--------------------------------------------------------------------------------
/neurips23/filter/plot_public_queries_AzureD8lds_v5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/filter/plot_public_queries_AzureD8lds_v5.png


--------------------------------------------------------------------------------
/neurips23/filter/puck/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt-get update
 5 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip
 6 | #swig
 7 | RUN apt-get update && apt-get install -y swig cmake
 8 | RUN pip3 install pybind11 numpy 
 9 | RUN cat /etc/ld.so.conf
10 | RUN ls /etc/ld.so.conf.d/
11 | ##cmake
12 | RUN wget https://cmake.org/files/v3.22/cmake-3.22.0-linux-x86_64.sh
13 | RUN mkdir cmake && sh cmake-3.22.0-linux-x86_64.sh --skip-license --prefix=cmake
14 | ENV PATH /home/app/cmake/bin:$PATH
15 | 
16 | #mkl
17 | RUN wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/adb8a02c-4ee7-4882-97d6-a524150da358/l_onemkl_p_2023.2.0.49497_offline.sh
18 | RUN sh l_onemkl_p_2023.2.0.49497_offline.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
19 | 
20 | RUN echo "/opt/intel/oneapi/mkl/latest/lib/intel64" > /etc/ld.so.conf.d/mkl.conf
21 | RUN ldconfig
22 | RUN touch /etc/profile.d/intel.sh
23 | RUN echo ".  /opt/intel/oneapi/mkl/latest/env/vars.sh" >> /etc/profile.d/intel.sh
24 | RUN . /etc/profile.d/intel.sh
25 | 
26 | ENV CMAKE_ARGS "-DMKLROOT=/opt/intel/oneapi/mkl/latest/ -DBLA_VENDOR=Intel10_64lp_seq -DBLA_STATIC=ON"
27 | #RUN  git config --global http.sslVerify false
28 | 
29 | RUN git clone -b filter  https://github.com/baidu/puck.git
30 | RUN cd puck && . /etc/profile.d/intel.sh  && python3 setup.py install
31 | RUN  python3 -c 'from puck import py_puck_api'
32 | 


--------------------------------------------------------------------------------
/neurips23/filter/pyanns/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update && apt install -y wget swig
 4 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2023.03-0-Linux-x86_64.sh
 5 | RUN bash Anaconda3-2023.03-0-Linux-x86_64.sh -b
 6 | 
 7 | ENV PATH /root/anaconda3/bin:$PATH
 8 | ENV CONDA_PREFIX /root/anaconda3/
 9 | 
10 | RUN conda install -c pytorch faiss-cpu
11 | COPY install/requirements_conda.txt ./
12 | # conda doesn't like some of our packages, use pip 
13 | RUN python3 -m pip install -r requirements_conda.txt 
14 | RUN python3 -m pip install pybind11
15 | 
16 | COPY neurips23/filter/faiss/bow_id_selector.swig ./
17 | 
18 | RUN swig -c++ -python -I$CONDA_PREFIX/include -Ifaiss bow_id_selector.swig 
19 | RUN g++ -shared -O3 -g -fPIC bow_id_selector_wrap.cxx -o _bow_id_selector.so  \
20 |       -I $( python -c "import distutils.sysconfig ; print(distutils.sysconfig.get_python_inc())" )   \
21 |       -I $CONDA_PREFIX/include $CONDA_PREFIX/lib/libfaiss_avx2.so -Ifaiss
22 | 
23 | RUN git clone https://github.com/veaaaab/uint8_knn.git
24 | WORKDIR /home/app/uint8_knn
25 | RUN bash build.sh
26 | 
27 | WORKDIR /home/app
28 | 
29 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2); print(faiss.__version__)'
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/neurips23/filter/pyanns/config.yaml:
--------------------------------------------------------------------------------
 1 | random-filter-s:
 2 |   pyanns:
 3 |     docker-tag: neurips23-filter-pyanns
 4 |     module: neurips23.filter.pyanns.pyanns
 5 |     constructor: Pyanns
 6 |     base-args: ["@metric"]
 7 |     run-groups:
 8 |       base:
 9 |         args: |
10 |           [{"indexkey": "IVF1024,SQ8"}]
11 |         query-args: |
12 |           [{"nprobe": 1},
13 |             {"nprobe":2},
14 |             {"nprobe":4}]
15 | random-s:
16 |   pyanns:
17 |     docker-tag: neurips23-filter-pyanns
18 |     module: neurips23.filter.pyanns.pyanns
19 |     constructor: Pyanns
20 |     base-args: ["@metric"]
21 |     run-groups:
22 |       base:
23 |         args: |
24 |           [{"indexkey": "IVF1024,SQ8"}]
25 |         query-args: |
26 |           [{"nprobe": 1},
27 |             {"nprobe":2},
28 |             {"nprobe":4}]
29 | yfcc-10M-unfiltered:
30 |   pyanns:
31 |     docker-tag: neurips23-filter-pyanns
32 |     module: neurips23.filter.pyanns.pyanns
33 |     constructor: Pyanns
34 |     base-args: ["@metric"]
35 |     run-groups:
36 |       base:
37 |         args: |
38 |           [{"indexkey": "IVF16384,SQ8", "binarysig": true, "threads": 16}]
39 |         query-args: |
40 |           [{"nprobe": 1}, {"nprobe": 4}, {"nprobe": 16}, {"nprobe": 64}]
41 | yfcc-10M:
42 |   pyanns:
43 |     docker-tag: neurips23-filter-pyanns
44 |     module: neurips23.filter.pyanns.pyanns
45 |     constructor: Pyanns
46 |     base-args: ["@metric"]
47 |     run-groups:
48 |       base:
49 |         args: |
50 |           [{"indexkey": "IVF16384,SQ8", 
51 |             "binarysig": true, 
52 |             "threads": 16
53 |           }]
54 |         query-args: |
55 |           [
56 |            {"nprobe": 16, "mt_threshold":0.0032},
57 |            {"nprobe": 16, "mt_threshold":0.0035},
58 |            {"nprobe": 32, "mt_threshold":0.001}
59 |           ]
60 | 


--------------------------------------------------------------------------------
/neurips23/filter/run.py:
--------------------------------------------------------------------------------
 1 | from benchmark.algorithms.base_runner import BaseRunner
 2 | import time
 3 | 
 4 | class FilterRunner(BaseRunner):
 5 |     def run_task(algo, ds, distance, count, run_count, search_type, private_query):
 6 |         best_search_time = float('inf')
 7 |         search_times = []
 8 | 
 9 |         if not private_query:
10 |             X = ds.get_queries()
11 |         else:
12 |             X = ds.get_private_queries()
13 |         
14 |         print(fr"Got {X.shape[0]} queries")
15 | 
16 |         for i in range(run_count):
17 |             print('Run %d/%d...' % (i + 1, run_count))
18 | 
19 |             start = time.time()
20 |             if search_type == "knn":
21 |                 algo.query(X, count)
22 |                 total = (time.time() - start)
23 |                 results = algo.get_results()
24 |                 assert results.shape[0] == X.shape[0]
25 |             elif search_type == "knn_filtered":
26 |                 if not private_query:
27 |                     metadata = ds.get_queries_metadata()
28 |                 else:
29 |                     metadata = ds.get_private_queries_metadata()
30 |                 algo.filtered_query(X, metadata, count)
31 |                 total = (time.time() - start)
32 |                 results = algo.get_results()
33 |                 assert results.shape[0] == X.shape[0]
34 |             else:
35 |                 raise NotImplementedError()
36 | 
37 |             search_time = total
38 |             best_search_time = min(best_search_time, search_time)
39 |             search_times.append( search_time )
40 | 
41 |         attrs = {
42 |             "best_search_time": best_search_time,
43 |             "name": str(algo),
44 |             "run_count": run_count,
45 |             "distance": distance,
46 |             "type": search_type,
47 |             "count": int(count),
48 |             "search_times": search_times,
49 |             "private_queries": private_query, 
50 |         }
51 |         additional = algo.get_additional()
52 |         for k in additional:
53 |             attrs[k] = additional[k]
54 |         return (attrs, results)
55 |         
56 | 


--------------------------------------------------------------------------------
/neurips23/filter/wm_filter/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt-get update; DEBIAN_FRONTEND=noninteractive apt install intel-mkl python3-setuptools  wget python3-matplotlib build-essential checkinstall libssl-dev swig4.0 python3-dev  python3-numpy  python3-numpy-dev   -y
 4 | COPY install/requirements_conda.txt ./
 5 | # conda doesn't like some of our packages, use pip
 6 | RUN python3 -m pip install -r requirements_conda.txt
 7 | 
 8 | 
 9 | # CMAKE with good enough version
10 | RUN  mkdir /build && wget  https://github.com/Kitware/CMake/archive/refs/tags/v3.27.1.tar.gz  && mv v3.27.1.tar.gz  /build
11 | RUN cd /build; tar -zxvf v3.27.1.tar.gz
12 | RUN cd /build/CMake-3.27.1 &&   ./bootstrap && make   && make install
13 | 
14 | 
15 | RUN cd / &&  git clone https://github.com/alemagnani/faiss.git && cd /faiss && git pull && git checkout wm_filter
16 | 
17 | RUN cd /faiss && rm -rf ./build
18 | RUN cd /faiss/; cmake -B build /faiss/  -DFAISS_ENABLE_GPU=OFF -DFAISS_ENABLE_PYTHON=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DFAISS_OPT_LEVEL=avx2  -DBLA_VENDOR=Intel10_64_dyn -DBUILD_TESTING=ON  -DPython_EXECUTABLE=/usr/bin/python3 -DMKL_LIBRARIES=/usr/lib/x86_64-linux-gnu/libmkl_rt.so
19 | RUN cd /faiss/; make -C build -j faiss faiss_avx2 swigfaiss  swigfaiss_avx2
20 | RUN  (cd /faiss/build/faiss/python && python3 setup.py install)
21 | 
22 | #RUN pip install tritonclient[all]
23 | ENV PYTHONPATH=/faiss/build/faiss/python/build/lib/
24 | 
25 | RUN python3 -c 'import faiss; print(faiss.IndexFlatL2); print(faiss.__version__)'
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/neurips23/filter/wm_filter/README.md:
--------------------------------------------------------------------------------
1 | 
2 | ### Submission for Neurips23 Filter track of WM_filter team
3 | This submission leverages the IVF index to run the filter in a fast way.
4 | 
5 | More info to come...
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/neurips23/filter/wm_filter/config.yaml:
--------------------------------------------------------------------------------
 1 | random-filter-s:
 2 |   wm_filter:
 3 |     docker-tag: neurips23-filter-wm_filter
 4 |     module: neurips23.filter.wm_filter.wm_filter
 5 |     constructor: FAISS
 6 |     base-args: [ "@metric" ]
 7 |     run-groups:
 8 |       base:
 9 |         args: |
10 |           [{"indexkey": "IVF1024,SQ8", 
11 |             "threads": 8,
12 |             "train_size": 2000000,
13 |             "type": "direct"
14 |           }]
15 |         query-args: |
16 |           [
17 |           {"nprobe": 80,  "max_codes": 100, "selector_probe_limit": 80},
18 |           {"nprobe": 100,  "max_codes": 500, "selector_probe_limit": 100},
19 |           {"nprobe": 120,  "max_codes": 1000, "selector_probe_limit": 120},
20 |           {"nprobe": 140,  "max_codes": 1800, "selector_probe_limit": 140},
21 |           {"nprobe": 160,  "max_codes": 500, "selector_probe_limit": 160}, 
22 |            {"nprobe": 70,  "max_codes": 1000, "selector_probe_limit": 70}
23 |           ]
24 | yfcc-10M:
25 |     wm_filter:
26 |       docker-tag: neurips23-filter-wm_filter
27 |       module: neurips23.filter.wm_filter.wm_filter
28 |       constructor: FAISS
29 |       base-args: [ "@metric" ]
30 |       run-groups:
31 |         base:
32 |           args: |
33 |             [{"indexkey": "IVF1024,SQ8", 
34 |               "threads": 8,
35 |               "train_size": 2000000,
36 |               "type": "direct"
37 |             }]
38 |           query-args: |
39 |             [
40 |             {"nprobe": 80,  "max_codes": 1800, "selector_probe_limit": 80},
41 |             {"nprobe": 100,  "max_codes": 1800, "selector_probe_limit": 100},
42 |             {"nprobe": 120,  "max_codes": 1800, "selector_probe_limit": 120},
43 |             {"nprobe": 140,  "max_codes": 1800, "selector_probe_limit": 140},
44 |             {"nprobe": 160,  "max_codes": 1800, "selector_probe_limit": 160}, 
45 |             {"nprobe": 70,  "max_codes": 2100, "selector_probe_limit": 70},
46 |             {"nprobe": 100,  "max_codes": 2100, "selector_probe_limit": 100},
47 |             {"nprobe": 130,  "max_codes": 2100, "selector_probe_limit": 130},
48 |             {"nprobe": 160,  "max_codes": 2100, "selector_probe_limit": 160},
49 |             {"nprobe": 200,  "max_codes": 2100, "selector_probe_limit": 200}
50 |             ]
51 | 


--------------------------------------------------------------------------------
/neurips23/filter/zilliz/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | # install MKL support
 4 | RUN DEBIAN_FRONTEND=noninteractive apt-get install -y libmkl-full-dev libaio-dev
 5 | 
 6 | RUN git clone https://github.com/hhy3/zilliz-bigann.git --branch filter
 7 | RUN pip install ./zilliz-bigann/*.whl
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/neurips23/filter/zilliz/config.yaml:
--------------------------------------------------------------------------------
 1 | random-filter-s:
 2 |   zilliz:
 3 |     docker-tag: neurips23-filter-zilliz
 4 |     module: neurips23.filter.zilliz.zilliz
 5 |     constructor: Zilliz
 6 |     base-args: ["@metric"]
 7 |     run-groups:
 8 |       base:
 9 |         args: |
10 |           [{"R": 12, "L": 200, "threshold": 8000
11 |           }]
12 |         query-args: |
13 |           [
14 |           {"ef": 16},
15 |           {"ef": 18},
16 |           {"ef": 20},
17 |           {"ef": 24},
18 |           {"ef": 30},
19 |           {"ef": 40},
20 |           {"ef": 50},
21 |           {"ef": 70}
22 |           ]
23 | 
24 | yfcc-10M:
25 |   zilliz:
26 |     docker-tag: neurips23-filter-zilliz
27 |     module: neurips23.filter.zilliz.zilliz
28 |     constructor: Zilliz
29 |     base-args: ["@metric"]
30 |     run-groups:
31 |       base:
32 |         args: |
33 |           [{"R": 12, "L": 100, "threshold": 8000, "threshold2": 10000
34 |           }]
35 |         query-args: |
36 |           [
37 |           {"ef": 16},
38 |           {"ef": 18},
39 |           {"ef": 19},
40 |           {"ef": 20},
41 |           {"ef": 22},
42 |           {"ef": 24},
43 |           {"ef": 26},
44 |           {"ef": 28},
45 |           {"ef": 32},
46 |           {"ef": 36}
47 |           ]
48 | 


--------------------------------------------------------------------------------
/neurips23/leaderboard.md:
--------------------------------------------------------------------------------
 1 | ### Leaderboard
 2 | 
 3 | **Note**: this is the leaderboard of the original submissions for the NeurIPS'32 competition (Dec. 2023). To view the ongoing leaderboard, which also includes new results, see [here](ongoing_leaderboard/leaderboard.md). 
 4 | 
 5 | This leaderboard is based on the standard recall@10 vs throughput benchmark that has become a standard benchmark when evaluating and comparing approximate nearest neighbor algorithms. 
 6 | The recall of the baselines at this QPS threshold is listed [above](#measuring_your_algorithm). 
 7 | 
 8 | For tasks "Filter", "Out-of-Distribution" and "Sparse" tracks, algorithms were ranked on the QPS they achieve on the track dataset, as long as the recall@10 is at least 90%. 
 9 | These results files for [Azure D8lds_v5](Azure_D8lds_v5_table.md) and [AWS EC2 c6i.2xlarge](ec2_c6i.2xlarge_table.md) list the maximum QPS measured for each algorithm with at least 90% recall@10.
10 | 
11 | For the Streaming track, algorithms were ranked on recall@10, as long as each algorithm completes the runbook within the allotted 1 hour. The leading entry had a recall of 0.9849.
12 | The [result file](streaming/res_final_runbook_AzureD8lds_v5.csv) lists measurements for all streaming algorithms on Azure D8lds_v5.
13 | 
14 | QPS vs recall@10 plots for tracks based on public queries on Azure D8lds_v5:
15 | **Filter track**
16 | ![yfcc-10M](filter/plot_public_queries_AzureD8lds_v5.png)
17 | 
18 | **OOD track**
19 | ![text2image-10M](ood/plot_public_queries_AzureD8lds_v5.png)
20 | 
21 | **Sparse track**
22 | ![sparse-full](sparse/plot_public_queries_AzureD8lds_v5.png)
23 | 
24 | More plots to follow.
25 | 


--------------------------------------------------------------------------------
/neurips23/notes/README.md:
--------------------------------------------------------------------------------
1 | # Additional notes NeurIPS'23 challenge
2 | 
3 | - [HNSW vs. Vamana streaming comparison](streaming/hnsw_result/hnsw_result.md) (contributed by [ekzhu](https://github.com/ekzhu))
4 | 
5 | 


--------------------------------------------------------------------------------
/neurips23/notes/streaming/hnsw_result/hnsw_result.md:
--------------------------------------------------------------------------------
 1 | # Final Runbook Results of Vamana and HNSW Implementations
 2 | 
 3 | We look at the recall stability of DiskANN's Vamana and various HNSW implementations
 4 | under streaming workload provided by the final runbook.
 5 | It is important to note that Vamana and HNSW are set up with different parameters,
 6 | so rather than comparing the absolute recall values, we compare the stability of
 7 | recall over the duration of the workload.
 8 | 
 9 | ## Vamana and HNSW with search-based edge repair algorithm.
10 | 
11 | For graph ANN indexes supporting in-place deletes, they all need to perform 
12 | edge repair to maintain the graph structure. Edge repair is done for each
13 | in-coming neighbor of a deleted point, as the deleted point is removed from
14 | the neighbor's adjacency list.
15 | 
16 | Search-based edge repair algorithm is implemented by [`hnswlib`](https://github.com/nmslib/hnswlib)
17 | in a function called [`repairConnectionsForUpdate`](https://github.com/nmslib/hnswlib/blob/359b2ba87358224963986f709e593d799064ace6/hnswlib/hnswalg.h#L987). The idea is to perform a "re-insert" of the node to be repaired
18 | and update its adjacency lists of all levels.
19 | 
20 | ![recall over steps hnsw search-based repair](recall_over_steps_10_48_hnsw_search_based_repair.png)
21 | 
22 | ## Vamana and HNSW with Vamana's edge repair algorithm.
23 | 
24 | Vamana's edge repair algorithm is different from the previously described search-based edge repair
25 | algorithm. The idea is to connect each in-coming neighbor of a deleted node to the 
26 | out-going neighbors of the deleted node, while applying a pruning step to maintain
27 | the maximum degree constraint. In this case, we use HNSW's original pruning
28 | algorithm. It is implemented by `hnswlib` in a function called 
29 | [`getNeighborsByHeuristic2`](https://github.com/nmslib/hnswlib/blob/359b2ba87358224963986f709e593d799064ace6/hnswlib/hnswalg.h#L382C16-L382C16)
30 | 
31 | ![recall over step hnsw vamana repair](recall_over_steps_10_48_hnsw.png)
32 | 
33 | ## Vamana and HNSW with Vamana's edge repair and robust pruning algorithm.
34 | 
35 | Lastly, we replaces HNSW's pruning algorithm with Vamana's. Now the HNSW
36 | algorithm is exactly the same as Vamana's, except that it has multiple layers.
37 | We can call this "Multi-layer Vamana".
38 | 
39 | ![recall over step hnsw vamana pruning](recall_over_steps_10_48_hnsw_robust_prune.png)


--------------------------------------------------------------------------------
/neurips23/notes/streaming/hnsw_result/recall_over_steps_10_48_hnsw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/notes/streaming/hnsw_result/recall_over_steps_10_48_hnsw.png


--------------------------------------------------------------------------------
/neurips23/notes/streaming/hnsw_result/recall_over_steps_10_48_hnsw_robust_prune.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/notes/streaming/hnsw_result/recall_over_steps_10_48_hnsw_robust_prune.png


--------------------------------------------------------------------------------
/neurips23/notes/streaming/hnsw_result/recall_over_steps_10_48_hnsw_search_based_repair.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/notes/streaming/hnsw_result/recall_over_steps_10_48_hnsw_search_based_repair.png


--------------------------------------------------------------------------------
/neurips23/ongoing_leaderboard/Azure_D8lds_v5_table.md:
--------------------------------------------------------------------------------
 1 | | dataset        | algorithm                | qps          |
 2 | |----------------|--------------------------|--------------|
 3 | | yfcc-10M       | pinecone [*]             | 85491.542780 |
 4 | | (filter track) | zilliz [*]               | 84596.419213 |
 5 | |                | parlayivf                | 37902.113726 |
 6 | |                | puck                     | 19193.294823 |
 7 | |                | hwtl_sdu_anns_filter [*] | 15059.124141 |
 8 | |                | wm_filter                | 14467.961514 |
 9 | |                | dhq                      | 13670.864704 |
10 | |                | fdufilterdiskann         | 5679.748583  |
11 | |                | pyanns                   | 5184.844352  |
12 | |                | faissplus                | 3776.539092  |
13 | |                | faiss                    | 3032.534357  |
14 | |                | cufe                     | 2917.132715  |
15 | | text2image-10M | pinecone-ood [*]         | 38087.669026 |
16 | | (OOD track)    | zilliz [*]               | 33240.822128 |
17 | |                | mysteryann               | 22555.248017 |
18 | |                | pyanns                   | 22295.584534 |
19 | |                | mysteryann-dif           | 22491.577263 |
20 | |                | sustech-ood              | 13772.370641 |
21 | |                | puck                     | 8699.573200  |
22 | |                | vamana                   | 6753.344080  |
23 | |                | ngt                      | 6373.934425  |
24 | |                | epsearch                 | 5876.982706  |
25 | |                | diskann                  | 4132.829728  |
26 | |                | cufe                     | 3561.416286  |
27 | | sparse-full    | zilliz [*]               | 10749.188262 |
28 | | (sparse track) | pinecone_smips [*]       | 10439.909652 |
29 | |                | pyanns                   | 8732.172708  |
30 | |                | shnsw                    | 7136.927865  |
31 | |                | nle                      | 2358.590429  |
32 | |                | cufe                     | 104.768194   |
33 | |                | linscan                  | 92.510615    |
34 | 
35 | [*] not open source
36 | 
37 | Table lists highest QPS measured with at least 90% recall@10, on the *public* query set.
38 | 
39 | Last evaluation date: March 1st, 2024 (includes all submission until March 1st, 2024, AOE)


--------------------------------------------------------------------------------
/neurips23/ongoing_leaderboard/filter/operating_points_public_queries_AzureD8lds_v5.txt:
--------------------------------------------------------------------------------
 1 |                                         qps
 2 | dataset  algorithm                         
 3 | yfcc-10M pinecone              85491.542780 [*]
 4 |          zilliz                84596.419213 [*]
 5 |          parlayivf             37902.113726
 6 |          puck                  19193.294823
 7 |          hwtl_sdu_anns_filter  15059.124141 [*]
 8 |          wm_filter             14467.961514
 9 |          dhq                   13670.864704
10 |          fdufilterdiskann       5679.748583
11 |          pyanns                 5184.844352
12 |          faissplus              3776.539092
13 |          faiss                  3032.534357
14 |          cufe                   2917.132715
15 | 
16 |  [*] not open source (binary only)
17 | 


--------------------------------------------------------------------------------
/neurips23/ongoing_leaderboard/filter/yfcc-10M.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/ongoing_leaderboard/filter/yfcc-10M.png


--------------------------------------------------------------------------------
/neurips23/ongoing_leaderboard/leaderboard.md:
--------------------------------------------------------------------------------
 1 | ## Ongoing Leaderboard
 2 | 
 3 | This leaderboard tracks the performance of submitted algorithms, starting at NeurIPS'23, including new algorithms. It is evaluated periodically, see details [below](#ongoing-leaderboard-rules).  
 4 | 
 5 | This leaderboard tracks recall@10 vs throughput benchmark that has become a standard benchmark when evaluating and comparing approximate nearest neighbor algorithms.  
 6 | 
 7 | For tasks "Filter", "Out-of-Distribution" and "Sparse" tracks, algorithms were ranked on the QPS they achieve on the track dataset, as long as the recall@10 is at least 90%. 
 8 | These results file for [Azure D8lds_v5](Azure_D8lds_v5_table.md) list the maximum QPS measured for each algorithm with at least 90% recall@10.
 9 | 
10 | For the Streaming track, algorithms were ranked on recall@10, as long as each algorithm completes the runbook within the alloted 1 hour. The leading entry had a recall of 0.99786, see details [below](#streaming-track).
11 | 
12 | To see the original leaderboard of the NeurIPS'23 submissions (Dec. 2023), see [here](../leaderboard.md).
13 | 
14 | QPS vs recall@10 plots for tracks based on public queries on Azure D8lds_v5:
15 | ### Filter track
16 | ![yfcc-10M](filter/yfcc-10M.png)
17 | Note: "pinecone", "zilliz" and "hwtl_sdu_anns_filter" are not open source
18 | 
19 | ### OOD track
20 | ![text2image-10M](ood/text2image-10M.png)
21 | Note: "pinecone-ood" and "zilliz" are not open source
22 | 
23 | ### Sparse track
24 | ![sparse-full](sparse/sparse-full.png)
25 | Note: "pinecone_smips" and "zilliz" are not open source
26 | 
27 | ### Streaming track
28 | The [result file](streaming/res_final_runbook_AzureD8lds_v5.csv) lists measurements for all streaming algorithms on Azure D8lds_v5. 
29 | 
30 | ## Ongoing Leaderboard Rules
31 | 
32 | The leaderboard is evaluated periodically, collecting new algorithm submissions. The current update is from March. 1st, 2024.
33 | 
34 | The rules of the ongoing leaderboard are similar to the original competition, with the following exceptions:
35 | - Open source is encouraged but not enforced. Closed-source entries will be marked as such. In any case, a short description of the algorithm is required. 
36 | - We will only evaluate the public query sets in the new leaderboard (on the same VM type: Azure Standard D8lds v5 with 8 vcpus, 16 GiB memory) 
37 | - To participate, simply send a PR with the new algorithm (no need for a CMT entry).
38 | 
39 | 


--------------------------------------------------------------------------------
/neurips23/ongoing_leaderboard/ood/operating_points_public_queries_AzureD8lds_v5.txt:
--------------------------------------------------------------------------------
 1 |                                         qps
 2 | dataset        algorithm                   
 3 | text2image-10M hanns           46033.867231 [*]
 4 |                scann           42854.013477
 5 |                pinecone-ood    38087.669026 [*]
 6 |                zilliz          33240.822128 [*]
 7 |                mysteryann      22555.248017
 8 |                mysteryann-dif  22491.577263
 9 |                pyanns          22295.584534
10 |                sustech-ood     13772.370641
11 |                puck             8699.573200
12 |                vamana           6753.344080
13 |                ngt              6373.934425
14 |                epsearch         5876.982706
15 |                diskann          4132.829728
16 |                cufe             3561.416286
17 | 
18 |  [*] not open source (binary only)
19 | 


--------------------------------------------------------------------------------
/neurips23/ongoing_leaderboard/ood/text2image-10M.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/ongoing_leaderboard/ood/text2image-10M.png


--------------------------------------------------------------------------------
/neurips23/ongoing_leaderboard/sparse/operating_points_public_queries_AzureD8lds_v5.txt:
--------------------------------------------------------------------------------
 1 |                                      qps
 2 | dataset     algorithm
 3 | sparse-full zilliz          10749.188262 [*]
 4 |             pinecone_smips  10439.909652 [*]
 5 |             pyanns           8732.172708
 6 |             shnsw            7136.927865
 7 |             nle              2358.590429
 8 |             cufe              104.768194
 9 |             linscan            92.510615
10 | 
11 | [*] not open source (binary only)
12 | 


--------------------------------------------------------------------------------
/neurips23/ongoing_leaderboard/sparse/sparse-full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/ongoing_leaderboard/sparse/sparse-full.png


--------------------------------------------------------------------------------
/neurips23/ood/base.py:
--------------------------------------------------------------------------------
1 | from benchmark.algorithms.base import BaseANN
2 | 
3 | class BaseOODANN(BaseANN):
4 |     def track(self):
5 |         return "ood"


--------------------------------------------------------------------------------
/neurips23/ood/cufe/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | 
 9 | ADD "https://github.com/AbdelrahmanMohamed129/DiskANN/tree/CUFE_OOD" latest_commit
10 | RUN git clone https://github.com/AbdelrahmanMohamed129/DiskANN --branch CUFE_OOD
11 | 
12 | WORKDIR /home/app/DiskANN
13 | RUN pip3 install virtualenv build
14 | RUN python3 -m build
15 | RUN pip install dist/diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl
16 | WORKDIR /home/app
17 | 


--------------------------------------------------------------------------------
/neurips23/ood/cufe/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |     cufe: 
 3 |       docker-tag: neurips23-ood-cufe
 4 |       module: neurips23.ood.cufe.diskann-in-mem
 5 |       constructor: cufe
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"R":32, "L":50, "buildthreads":32}]
11 |           query-args: |
12 |             [{"Ls":50, "T":8}]
13 | text2image-10M:
14 |     cufe: 
15 |       docker-tag: neurips23-ood-cufe
16 |       module: neurips23.ood.cufe.diskann-in-mem
17 |       constructor: cufe
18 |       base-args: ["@metric"]
19 |       run-groups:
20 |         base:
21 |           args: |
22 |             [{"R":64, "L":500, "buildthreads":32}]
23 |           query-args: |
24 |             [{"Ls":30, "T":8},
25 |              {"Ls":50, "T":8},
26 |              {"Ls":70, "T":8},
27 |              {"Ls":100, "T":8}]
28 | 


--------------------------------------------------------------------------------
/neurips23/ood/diskann/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | 
 9 | RUN git clone https://github.com/microsoft/DiskANN.git --branch  0.5.0.rc3.post1
10 | WORKDIR /home/app/DiskANN
11 | RUN pip3 install virtualenv build
12 | RUN python3 -m build
13 | RUN pip install dist/diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl
14 | WORKDIR /home/app
15 | 


--------------------------------------------------------------------------------
/neurips23/ood/diskann/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |     diskann: 
 3 |       docker-tag: neurips23-ood-diskann
 4 |       module: neurips23.ood.diskann.diskann-in-mem
 5 |       constructor: diskann
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"R":32, "L":50, "buildthreads":32}]
11 |           query-args: |
12 |             [{"Ls":50, "T":8}]
13 | text2image-1M:
14 |     diskann: 
15 |       docker-tag: neurips23-ood-diskann
16 |       module: neurips23.ood.diskann.diskann-in-mem
17 |       constructor: diskann
18 |       base-args: ["@metric"]
19 |       run-groups:
20 |         base:
21 |           args: |
22 |             [{"R":64, "L":500, "buildthreads":32}]
23 |           query-args: |
24 |             [{"Ls":30, "T":8},
25 |              {"Ls":50, "T":8},
26 |              {"Ls":70, "T":8},
27 |              {"Ls":100, "T":8}]
28 | text2image-10M:
29 |     diskann: 
30 |       docker-tag: neurips23-ood-diskann
31 |       module: neurips23.ood.diskann.diskann-in-mem
32 |       constructor: diskann
33 |       base-args: ["@metric"]
34 |       run-groups:
35 |         base:
36 |           args: |
37 |             [{"R":64, "L":500, "buildthreads":32}]
38 |           query-args: |
39 |             [{"Ls":30, "T":8},
40 |              {"Ls":50, "T":8},
41 |              {"Ls":70, "T":8},
42 |              {"Ls":100, "T":8}]
43 | 


--------------------------------------------------------------------------------
/neurips23/ood/epsearch/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |     epsearch: 
 3 |       docker-tag: neurips23-ood-epsearch
 4 |       module: neurips23.ood.epsearch.diskann-in-mem-ep-hnsw
 5 |       constructor: epdiskann
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"R":30, "L":500, "alpha":1.2, "n_ep_candidates":32, "buildthreads":8, "ep_train":"id", "M":32, "efConstruction":200}]
11 |           query-args: |
12 |             [{"Ls":50, "T":8, "efSearch":4}]
13 | text2image-10M:
14 |     epsearch: 
15 |       docker-tag: neurips23-ood-epsearch
16 |       module: neurips23.ood.epsearch.diskann-in-mem-ep-hnsw
17 |       constructor: epdiskann
18 |       base-args: ["@metric"]
19 |       run-groups:
20 |         base:
21 |           args: |
22 |             [{"R":56, "L":500, "alpha":1.0, "n_ep_candidates":16384, "buildthreads":8, "ep_train":"id", "M":32, "efConstruction":200}]
23 |           query-args: |
24 |             [{"Ls":100, "T":8, "efSearch":32},
25 |              {"Ls":105, "T":8, "efSearch":32},
26 |              {"Ls":110, "T":8, "efSearch":16}, 
27 |              {"Ls":110, "T":8, "efSearch":32}, 
28 |              {"Ls":115, "T":8, "efSearch":16}, 
29 |              {"Ls":115, "T":8, "efSearch":32}, 
30 |              {"Ls":120, "T":8, "efSearch":32}, 
31 |              {"Ls":125, "T":8, "efSearch":32}, 
32 |              {"Ls":130, "T":8, "efSearch":32}, 
33 |              {"Ls":140, "T":8, "efSearch":32}]
34 | 


--------------------------------------------------------------------------------
/neurips23/ood/hanns/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM neurips23
2 | 
3 | RUN apt update
4 | RUN apt install -y sudo software-properties-common python3.10
5 | RUN git clone https://github.com/AndrewHYu/Hanns.git
6 | RUN pip install ./Hanns/*.whl
7 | 
8 | WORKDIR /home/app
9 | 


--------------------------------------------------------------------------------
/neurips23/ood/hanns/README.md:
--------------------------------------------------------------------------------
1 | # Hanns
2 | Our OOD track solution consists of a vamana index, a mutil-scale spatial clustering index, and a layout-optimized quantization acceleration index.
3 | The entire retrieval process is from coarse to fine. First, the vamana index is used to quick find the nearst clusters. Then, within these clusters, the quantization-accelerated index is uesed for fast distance comparisons to identify the coarsely ranked candidates. Finally, SIMD instructions are used to re-rank these candidates, and the final results are returned.
4 | # Performance
5 | ![ood-track](https://github.com/AndrewHYu/Hanns/blob/main/pic/text2image-10M.png)
6 | 


--------------------------------------------------------------------------------
/neurips23/ood/hanns/config.yaml:
--------------------------------------------------------------------------------
 1 | text2image-10M:
 2 |     hanns:
 3 |       docker-tag: neurips23-ood-hanns
 4 |       module: neurips23.ood.hanns.hanns
 5 |       constructor: Hanns
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         tree40k-config0:
 9 |           args: |
10 |             [{"tree_size": 40000, "download": true, "config_id": 0}]
11 |           query-args: |
12 |             [{"leaves_to_search": 27, "reorder": 111},
13 |              {"leaves_to_search": 27, "reorder": 130},
14 |              {"leaves_to_search": 32, "reorder": 140},
15 |              {"leaves_to_search": 32, "reorder": 150},
16 |              {"leaves_to_search": 34, "reorder": 150},
17 |              {"leaves_to_search": 36, "reorder": 150},
18 |              {"leaves_to_search": 37, "reorder": 145},
19 |              {"leaves_to_search": 38, "reorder": 140},
20 |              {"leaves_to_search": 42, "reorder": 160},
21 |              {"leaves_to_search": 34, "reorder": 155}]
22 | 


--------------------------------------------------------------------------------
/neurips23/ood/install_neurips23.sh:
--------------------------------------------------------------------------------
 1 | python install.py --neurips23track ood --algorithm cufe
 2 | python install.py --neurips23track ood --algorithm diskann
 3 | python install.py --neurips23track ood --algorithm epsearch
 4 | python install.py --neurips23track ood --algorithm mysteryann
 5 | python install.py --neurips23track ood --algorithm mysteryann-dif
 6 | python install.py --neurips23track ood --algorithm ngt
 7 | python install.py --neurips23track ood --algorithm puck
 8 | python install.py --neurips23track ood --algorithm puck-fizz
 9 | python install.py --neurips23track ood --algorithm pyanns
10 | python install.py --neurips23track ood --algorithm sustech-ood
11 | python install.py --neurips23track ood --algorithm vamana
12 | 


--------------------------------------------------------------------------------
/neurips23/ood/mysteryann-dif/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | WORKDIR /home/app
 9 | RUN git clone --recursive --branch one_g_opt_diff_d_10 https://github.com/matchyc/mysteryann.git
10 | WORKDIR /home/app/mysteryann/pybindings
11 | RUN pip3 install virtualenv build
12 | RUN pip3 install pybind11[global]
13 | RUN pip3 install .
14 | WORKDIR /home/app


--------------------------------------------------------------------------------
/neurips23/ood/mysteryann-dif/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |     mysteryann-dif: 
 3 |       docker-tag: neurips23-ood-mysteryann-dif
 4 |       module: neurips23.ood.mysteryann-dif.mysteryann-dif
 5 |       constructor: mysteryann
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"M_pjbp":35, "L_pjpq":500, "NoT":5, "NoP":2, "T":8, "EoP":1}]
11 |           query-args: |
12 |             [{"L_pq":75, "T":8},
13 |              {"L_pq":78, "T":8},
14 |              {"L_pq":80, "T":8},
15 |              {"L_pq":82, "T":8},
16 |              {"L_pq":83, "T":8},
17 |              {"L_pq":85, "T":8},
18 |              {"L_pq":87, "T":8},
19 |              {"L_pq":89, "T":8},
20 |              {"L_pq":92, "T":8}]
21 | text2image-10M:
22 |     mysteryann-dif: 
23 |       docker-tag: neurips23-ood-mysteryann-dif
24 |       module: neurips23.ood.mysteryann-dif.mysteryann-dif
25 |       constructor: mysteryann
26 |       base-args: ["@metric"]
27 |       run-groups:
28 |         base:
29 |           args: |
30 |             [{"M_pjbp":45, "L_pjpq":800, "NoT": 5, "NoP": 3, "T": 8, "EoP": 1}]
31 |           query-args: |
32 |             [{"L_pq":80, "T":8},
33 |              {"L_pq":83, "T":8},
34 |              {"L_pq":85, "T":8},
35 |              {"L_pq":88, "T":8},
36 |              {"L_pq":90, "T":8},
37 |              {"L_pq":92, "T":8},
38 |              {"L_pq":93, "T":8},
39 |              {"L_pq":95, "T":8},
40 |              {"L_pq":100, "T":8},
41 |              {"L_pq":103, "T":8},
42 |              {"L_pq":107, "T":8},
43 |              {"L_pq":110, "T":8},
44 |              {"L_pq":115, "T":8},
45 |              {"L_pq":120, "T":8}]
46 | 
47 | 


--------------------------------------------------------------------------------
/neurips23/ood/mysteryann/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | WORKDIR /home/app
 9 | RUN git clone --recursive --branch one_g_opt https://github.com/matchyc/mysteryann.git
10 | WORKDIR /home/app/mysteryann/pybindings
11 | RUN pip3 install virtualenv build
12 | RUN pip3 install pybind11[global]
13 | RUN pip3 install .
14 | WORKDIR /home/app
15 | 


--------------------------------------------------------------------------------
/neurips23/ood/mysteryann/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |     mysteryann: 
 3 |       docker-tag: neurips23-ood-mysteryann
 4 |       module: neurips23.ood.mysteryann.mysteryann
 5 |       constructor: mysteryann
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"M_pjbp":35, "L_pjpq":500, "NoT":5, "NoP":2, "T":8, "EoP":1}]
11 |           query-args: |
12 |             [{"L_pq":75, "T":8},
13 |              {"L_pq":78, "T":8},
14 |              {"L_pq":80, "T":8},
15 |              {"L_pq":82, "T":8},
16 |              {"L_pq":83, "T":8},
17 |              {"L_pq":85, "T":8},
18 |              {"L_pq":87, "T":8},
19 |              {"L_pq":89, "T":8},
20 |              {"L_pq":92, "T":8}]
21 | text2image-10M:
22 |     mysteryann: 
23 |       docker-tag: neurips23-ood-mysteryann
24 |       module: neurips23.ood.mysteryann.mysteryann
25 |       constructor: mysteryann
26 |       base-args: ["@metric"]
27 |       run-groups:
28 |         base:
29 |           args: |
30 |             [{"M_pjbp":35, "L_pjpq":800, "NoT": 5, "NoP": 3, "T": 8, "EoP": 1}]
31 |           query-args: |
32 |             [{"L_pq":100, "T":8},
33 |              {"L_pq":110, "T":8},
34 |              {"L_pq":113, "T":8},
35 |              {"L_pq":115, "T":8},
36 |              {"L_pq":117, "T":8},
37 |              {"L_pq":118, "T":8},
38 |              {"L_pq":120, "T":8},
39 |              {"L_pq":123, "T":8},
40 |              {"L_pq":125, "T":8},
41 |              {"L_pq":128, "T":8},
42 |              {"L_pq":130, "T":8},
43 |              {"L_pq":133, "T":8},
44 |              {"L_pq":135, "T":8},
45 |              {"L_pq":140, "T":8},
46 |              {"L_pq":145, "T":8},
47 |              {"L_pq":150, "T":8}]
48 | 
49 | 


--------------------------------------------------------------------------------
/neurips23/ood/ngt/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt-get update
 4 | RUN apt-get install -y git cmake liblapack-dev bc
 5 | RUN pip3 install wheel pybind11
 6 | RUN git clone https://github.com/masajiro/NGT-neurips23.git NGT
 7 | RUN cd NGT && git log -n 1
 8 | RUN cd NGT && mkdir build && cd build && cmake .. 
 9 | RUN cd NGT/build && make -j 8 && make install
10 | RUN ldconfig
11 | RUN cd NGT/python && python3 setup.py bdist_wheel
12 | RUN pip3 install NGT/python/dist/ngt-*-linux_x86_64.whl
13 | 


--------------------------------------------------------------------------------
/neurips23/ood/ngt/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |     ngt:
 3 |       docker-tag: neurips23-ood-ngt
 4 |       module: neurips23.ood.ngt.module
 5 |       constructor: NGT
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |               [{"edge": 50, "outdegree": 10, "indegree": 100,
11 |                 "epsilon": 0.1, "reduction": 0.39}]
12 |           #      "url": "https://public-rlab.east.edge.storage-yahoo.jp/neurips23/indexes/onng-random-50-10-100-0.10-0.39.tgz"}]
13 |           query-args: |
14 |               [{"epsilon": 1.1}]
15 | text2image-10M:
16 |     ngt:
17 |       docker-tag: neurips23-ood-ngt
18 |       module: neurips23.ood.ngt.module
19 |       constructor: NGT
20 |       base-args: ["@metric"]
21 |       run-groups:
22 |         base:
23 |           args: |
24 |               [{"edge": 140, "outdegree": 10, "indegree": 175,
25 |                 "epsilon": 0.11, "reduction": 0.38}]
26 |           #      "url": "https://public-rlab.east.edge.storage-yahoo.jp/neurips23/indexes/onng-text2image-140-10-180-0.10-0.39.tgz"}]
27 |           query-args: |
28 |               [{"epsilon": 1.010},
29 |                {"epsilon": 1.014},
30 |                {"epsilon": 1.016},
31 |                {"epsilon": 1.017},
32 |                {"epsilon": 1.018},
33 |                {"epsilon": 1.020},
34 |                {"epsilon": 1.025}]
35 | 


--------------------------------------------------------------------------------
/neurips23/ood/operating_points_public_queries_AzureD8lds_v5.txt:
--------------------------------------------------------------------------------
 1 |                                         qps
 2 | dataset        algorithm
 3 | text2image-10M cufe             3561.416286
 4 |                diskann          4132.829728
 5 |                epsearch         5876.982706
 6 |                mysteryann      22555.248017
 7 |                mysteryann-dif  22491.577263
 8 |                ngt              6373.934425
 9 |                puck             8699.573200
10 |                pyanns          22295.584534
11 |                sustech-ood     13772.370641
12 |                vamana           6753.344080
13 | 


--------------------------------------------------------------------------------
/neurips23/ood/pinecone-ood/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | 
 9 | # copy and install the pys2 python package
10 | RUN git clone --branch ood https://github.com/pinecone-io/bigann.git
11 | RUN pip install ./bigann/*.whl
12 | # verify that the build worked
13 | RUN python3 -c 'import diskannpy;'
14 | RUN python3 -c 'import pys2;'
15 | 


--------------------------------------------------------------------------------
/neurips23/ood/pinecone-ood/README.md:
--------------------------------------------------------------------------------
 1 | # Pinecone OOD ANN algorithm
 2 | 
 3 | Our solution for the OOD track is based on three main components – 
 4 | an inverted-file (IVF) index on the vector collection using a clustering algorithm tailored for inner-product search, 
 5 | a k-MIP (max inner product) graph constructed using the co-occurence of vectors as nearest neighbors for a set of 
 6 | training queries, and, quantization tailored for SIMD-based acceleration for fast scoring and retrieval.
 7 | 
 8 | We perform retrieval in three stages. 
 9 | First, we retrieve a small number of candidates from top clusters by scoring quantized vectors. 
10 | Next, we use the k-MIP graph to “expand” the set of retrieved candidates by adding their neighbors 
11 | in the graph to the candidate set. 
12 | Finally, we score all the candidates by computing their distance to the query using a 
13 | fine-grained quantized representation of the vectors. 
14 | In addition, in order to accelerate the search, we process the queries in a batch to take advantage of cache locality.


--------------------------------------------------------------------------------
/neurips23/ood/pinecone-ood/config.yaml:
--------------------------------------------------------------------------------
 1 | text2image-1M:
 2 |     pinecone-ood: 
 3 |       docker-tag: neurips23-ood-pinecone-ood
 4 |       module: neurips23.ood.pinecone-ood.s2_index
 5 |       constructor: S2_index
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"index_str":"OODIndex1024[HNSW32]_spillage=1"}]
11 |           query-args: |
12 |             [{"nprobe":"15", "kfactor":"4"}]
13 | text2image-10M:
14 |     pinecone-ood: 
15 |       docker-tag: neurips23-ood-pinecone-ood
16 |       module: neurips23.ood.pinecone-ood.s2_index
17 |       constructor: S2_index
18 |       base-args: ["@metric"]
19 |       run-groups:
20 |         base:
21 |           args: |
22 |             [{"index_str":"OODIndex32768[HNSW32]_spillage=1"}]
23 |           query-args: |
24 |             [{"nprobe":"45", "kfactor":"1"},
25 |             {"nprobe":"52", "kfactor":"2"},
26 |             {"nprobe":"53", "kfactor":"2"},
27 |             {"nprobe":"54", "kfactor":"2"},
28 |             {"nprobe":"55", "kfactor":"2"},
29 |             {"nprobe":"60", "kfactor":"2"},
30 |             {"nprobe":"49", "kfactor":"3"},
31 |             {"nprobe":"50", "kfactor":"3"},
32 |             {"nprobe":"51", "kfactor":"3"},
33 |             {"nprobe":"52", "kfactor":"3"}
34 |             ]
35 | 


--------------------------------------------------------------------------------
/neurips23/ood/plot_public_queries_AzureD8lds_v5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/ood/plot_public_queries_AzureD8lds_v5.png


--------------------------------------------------------------------------------
/neurips23/ood/puck-fizz/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt-get update
 5 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip
 6 | #swig
 7 | RUN apt-get update && apt-get install -y swig cmake
 8 | RUN pip3 install pybind11 numpy
 9 | RUN cat /etc/ld.so.conf
10 | RUN ls /etc/ld.so.conf.d/
11 | ##cmake
12 | # COPY cmake-3.22.0-linux-x86_64.sh  .
13 | RUN wget https://cmake.org/files/v3.22/cmake-3.22.0-linux-x86_64.sh
14 | RUN mkdir cmake && sh cmake-3.22.0-linux-x86_64.sh --skip-license --prefix=cmake
15 | ENV PATH /home/app/cmake/bin:$PATH
16 | 
17 | #mkl
18 | # COPY l_onemkl_p_2023.2.0.49497_offline.sh .
19 | RUN wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/adb8a02c-4ee7-4882-97d6-a524150da358/l_onemkl_p_2023.2.0.49497_offline.sh
20 | RUN sh l_onemkl_p_2023.2.0.49497_offline.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
21 | 
22 | RUN echo "/opt/intel/oneapi/mkl/latest/lib/intel64" > /etc/ld.so.conf.d/mkl.conf
23 | RUN ldconfig
24 | RUN touch /etc/profile.d/intel.sh
25 | RUN echo ".  /opt/intel/oneapi/mkl/latest/env/vars.sh" >> /etc/profile.d/intel.sh
26 | RUN . /etc/profile.d/intel.sh
27 | 
28 | ENV CMAKE_ARGS "-DMKLROOT=/opt/intel/oneapi/mkl/latest/ -DBLA_VENDOR=Intel10_64lp_seq -DBLA_STATIC=ON"
29 | #RUN  git config --global http.sslVerify false
30 | 
31 | RUN git clone -b ood-try https://github.com/baidu/puck.git
32 | # COPY puck-ood-feature.tar.gz .
33 | # RUN tar zxvf puck-ood-feature.tar.gz
34 | RUN cd puck && . /etc/profile.d/intel.sh  && python3 setup.py install
35 | RUN  python3 -c 'from puck import py_puck_api'
36 | 


--------------------------------------------------------------------------------
/neurips23/ood/puck-fizz/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |     puck-fizz:
 3 |       docker-tag: neurips23-ood-puck-fizz
 4 |       module: neurips23.ood.puck-fizz.puck
 5 |       constructor: Puck
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: [{"index_type":2, "C":1000, "F":1000,"tinker_neighborhood":10,"tinker_construction":200}]
10 |           query-args: |
11 |                 [
12 |                   {"search_coarse_count":50, "tinker_search_range": 100},
13 |                   {"search_coarse_count":50, "tinker_search_range": 200},
14 |                   {"search_coarse_count":50, "tinker_search_range": 300}
15 |                 ]
16 | 
17 | 
18 | text2image-10M:
19 |     puck-fizz:
20 |       docker-tag: neurips23-ood-puck-fizz
21 |       module: neurips23.ood.puck-fizz.puck
22 |       constructor: Puck
23 |       base-args: ["@metric"]
24 |       run-groups:
25 |         base:
26 |           args: [{"index_type":2, "C":1000, "F":1000,"tinker_neighborhood":16,"tinker_construction":200}]
27 |           query-args: |
28 |                 [
29 |                   {"search_coarse_count":10, "tinker_search_range": 160},
30 |                   {"search_coarse_count":10, "tinker_search_range": 170},
31 |                   {"search_coarse_count":10, "tinker_search_range": 180},
32 |                   {"search_coarse_count":10, "tinker_search_range": 190}
33 |                 ] 


--------------------------------------------------------------------------------
/neurips23/ood/puck/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt-get update
 5 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip
 6 | #swig
 7 | RUN apt-get update && apt-get install -y swig cmake
 8 | RUN pip3 install pybind11 numpy 
 9 | RUN cat /etc/ld.so.conf
10 | RUN ls /etc/ld.so.conf.d/
11 | ##cmake
12 | RUN wget https://cmake.org/files/v3.22/cmake-3.22.0-linux-x86_64.sh
13 | RUN mkdir cmake && sh cmake-3.22.0-linux-x86_64.sh --skip-license --prefix=cmake
14 | ENV PATH /home/app/cmake/bin:$PATH
15 | 
16 | #mkl
17 | RUN wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/adb8a02c-4ee7-4882-97d6-a524150da358/l_onemkl_p_2023.2.0.49497_offline.sh
18 | RUN sh l_onemkl_p_2023.2.0.49497_offline.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
19 | 
20 | RUN echo "/opt/intel/oneapi/mkl/latest/lib/intel64" > /etc/ld.so.conf.d/mkl.conf
21 | RUN ldconfig
22 | RUN touch /etc/profile.d/intel.sh
23 | RUN echo ".  /opt/intel/oneapi/mkl/latest/env/vars.sh" >> /etc/profile.d/intel.sh
24 | RUN . /etc/profile.d/intel.sh
25 | 
26 | ENV CMAKE_ARGS "-DMKLROOT=/opt/intel/oneapi/mkl/latest/ -DBLA_VENDOR=Intel10_64lp_seq -DBLA_STATIC=ON"
27 | #RUN  git config --global http.sslVerify false
28 | 
29 | RUN git clone -b ood https://github.com/baidu/puck.git
30 | RUN cd puck && . /etc/profile.d/intel.sh  && python3 setup.py install
31 | RUN  python3 -c 'from puck import py_puck_api'
32 | 


--------------------------------------------------------------------------------
/neurips23/ood/puck/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |     puck:
 3 |       docker-tag: neurips23-ood-puck
 4 |       module: neurips23.ood.puck.puck
 5 |       constructor: Puck
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: [{"index_type":2, "C":1000, "F":1000,"tinker_neighborhood":16,"tinker_construction":200}]
10 |           query-args: |
11 |                 [
12 |                   {"search_coarse_count":50, "tinker_search_range": 100},
13 |                   {"search_coarse_count":50, "tinker_search_range": 200},
14 |                   {"search_coarse_count":50, "tinker_search_range": 300}
15 |                 ]
16 | 
17 | 
18 | text2image-10M:
19 |     puck:
20 |       docker-tag: neurips23-ood-puck
21 |       module: neurips23.ood.puck.puck
22 |       constructor: Puck
23 |       base-args: ["@metric"]
24 |       run-groups:
25 |         base:
26 |           args: [{"index_type":2, "C":1000, "F":1000,"tinker_neighborhood":16,"tinker_construction":200}]
27 |           query-args: |
28 |                 [
29 |                   {"search_coarse_count":10, "tinker_search_range": 190},
30 |                   {"search_coarse_count":10, "tinker_search_range": 160},
31 |                   {"search_coarse_count":10, "tinker_search_range": 165},
32 |                   {"search_coarse_count":10, "tinker_search_range": 170},
33 |                   {"search_coarse_count":10, "tinker_search_range": 175}
34 |                 ] 
35 | 


--------------------------------------------------------------------------------
/neurips23/ood/pyanns/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | 
 9 | RUN git clone https://github.com/veaaaab/DiskANN.git --branch  bigann23_ood
10 | WORKDIR /home/app/DiskANN
11 | RUN pip3 install virtualenv build
12 | RUN python3 -m build
13 | RUN pip install dist/diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl
14 | WORKDIR /home/app
15 | 
16 | RUN apt update
17 | RUN apt install python-is-python3
18 | RUN git clone https://github.com/veaaaab/pyanns --branch master --depth 1
19 | WORKDIR /home/app/pyanns
20 | RUN pip install -r requirements.txt
21 | RUN bash build.sh
22 | WORKDIR /home/app
23 | 
24 | RUN python3 -c 'import pyanns'
25 | 
26 | WORKDIR /home/app
27 | 
28 | 


--------------------------------------------------------------------------------
/neurips23/ood/pyanns/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |   pyanns:
 3 |     docker-tag: neurips23-ood-pyanns
 4 |     module: neurips23.ood.pyanns.pyanns
 5 |     constructor: Pyanns
 6 |     base-args: ["@metric"]
 7 |     run-groups:
 8 |       base:
 9 |         args: |
10 |           [{"R":32, "L":50}]
11 |         query-args: |
12 |           [{"ef":30}, {"ef":50}, {"ef":100}]
13 | text2image-10M:
14 |   pyanns:
15 |     docker-tag: neurips23-ood-pyanns
16 |     module: neurips23.ood.pyanns.pyanns
17 |     constructor: Pyanns
18 |     base-args: ["@metric"]
19 |     run-groups:
20 |       base:
21 |         args: |
22 |           [{"R":48, "L":500, "buildthreads" : 8}]
23 |         query-args: |
24 |           [
25 |           {"ef":90},
26 |           {"ef":95},
27 |           {"ef":100},
28 |           {"ef":102},
29 |           {"ef":104},
30 |           {"ef":106},
31 |           {"ef":108},
32 |           {"ef":110},
33 |           {"ef":115},
34 |           {"ef":120},
35 |           {"ef":125},
36 |           {"ef":130}
37 |           ]
38 | 


--------------------------------------------------------------------------------
/neurips23/ood/run.py:
--------------------------------------------------------------------------------
1 | from benchmark.algorithms.base_runner import BaseRunner
2 | import time
3 | 
4 | class OODRunner(BaseRunner):
5 |     pass


--------------------------------------------------------------------------------
/neurips23/ood/scann/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM neurips23
2 | 
3 | RUN apt update
4 | RUN apt install -y software-properties-common
5 | RUN pip install --no-cache-dir scann==1.3.2
6 | 
7 | WORKDIR /home/app
8 | 


--------------------------------------------------------------------------------
/neurips23/ood/scann/config.yaml:
--------------------------------------------------------------------------------
 1 | text2image-10M:
 2 |     scann:
 3 |       docker-tag: neurips23-ood-scann
 4 |       module: neurips23.ood.scann.scann
 5 |       constructor: Scann
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         tree40k-config0:
 9 |           args: |
10 |             [{"tree_size": 40000, "download": true, "config_id": 0}]
11 |           query-args: |
12 |             [{"leaves_to_search": 35, "reorder": 150},
13 |              {"leaves_to_search": 35, "reorder": 155},
14 |              {"leaves_to_search": 36, "reorder": 150},
15 |              {"leaves_to_search": 37, "reorder": 145},
16 |              {"leaves_to_search": 38, "reorder": 140},
17 |              {"leaves_to_search": 34, "reorder": 155}]
18 |         tree40k-config1:
19 |           args: |
20 |             [{"tree_size": 40000, "download": true, "config_id": 1}]
21 |           query-args: |
22 |             [{"leaves_to_search": 42, "reorder": 160}]
23 |         tree40k-config2:
24 |           args: |
25 |             [{"tree_size": 40000, "download": true, "config_id": 2}]
26 |           query-args: |
27 |             [{"leaves_to_search": 27, "reorder": 140}]
28 | 


--------------------------------------------------------------------------------
/neurips23/ood/sustech-ood/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y libssl-dev
 5 | RUN wget https://cmake.org/files/v3.23/cmake-3.23.1.tar.gz
 6 | RUN tar -zxvf cmake-3.23.1.tar.gz
 7 | WORKDIR /home/app/cmake-3.23.1
 8 | RUN ./bootstrap --parallel=8
 9 | RUN make -j4
10 | RUN make install
11 | WORKDIR /home/app
12 | RUN git clone -b faiss https://github.com/whateveraname/SUSTech-OOD.git --recursive
13 | WORKDIR /home/app/SUSTech-OOD
14 | RUN cmake -DCMAKE_BUILD_TYPE=Release . && make -j4
15 | WORKDIR /home/app


--------------------------------------------------------------------------------
/neurips23/ood/sustech-ood/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |     sustech-ood: 
 3 |       docker-tag: neurips23-ood-sustech-ood
 4 |       module: neurips23.ood.sustech-ood.SUSTech-OOD
 5 |       constructor: IndexGraphOOD
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"M":5, "ef":100, "cluster_num":1}]
11 |           query-args: |
12 |             [{"ef":20, "nprobe":5}]
13 | text2image-10M:
14 |     sustech-ood: 
15 |       docker-tag: neurips23-ood-sustech-ood
16 |       module: neurips23.ood.sustech-ood.SUSTech-OOD
17 |       constructor: IndexGraphOOD
18 |       base-args: ["@metric"]
19 |       run-groups:
20 |         base:
21 |           args: |
22 |             [{"M":20, "ef":1200, "cluster_num":1000}]
23 |           query-args: |
24 |             [{"ef":95, "nprobe":30},
25 |              {"ef":115, "nprobe":30},
26 |              {"ef":125, "nprobe":30},
27 |              {"ef":130, "nprobe":30},
28 |              {"ef":135, "nprobe":30},
29 |              {"ef":140, "nprobe":30},
30 |              {"ef":145, "nprobe":30},
31 |              {"ef":155, "nprobe":30},
32 |              {"ef":175, "nprobe":30}]
33 | 


--------------------------------------------------------------------------------
/neurips23/ood/vamana/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | 
 9 | 
10 | ARG CACHEBUST=1
11 | RUN git clone -b ood_v2 https://github.com/cmuparlay/ParlayANN.git && cd ParlayANN && git submodule update --init --recursive && cd python && pip install pybind11 && bash compile.sh 
12 | # WORKDIR /home/app/ParlayANN
13 | # RUN git submodule update --init --recursive
14 | # WORKDIR /home/app/ParlayANN/python
15 | 
16 | # RUN pip install pybind11
17 | 
18 | # RUN bash compile.sh
19 | 
20 | ENV PYTHONPATH=$PYTHONPATH:/home/app/ParlayANN/python
21 | 
22 | # ENV PARLAY_NUM_THREADS=8
23 | 
24 | WORKDIR /home/app


--------------------------------------------------------------------------------
/neurips23/ood/vamana/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |     vamana: 
 3 |       docker-tag: neurips23-ood-vamana
 4 |       module: neurips23.ood.vamana.vamana
 5 |       constructor: vamana
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"R":30, "L":50, "alpha":1.2}]
11 |           query-args: |
12 |             [{"Ls":50, "T":8}]
13 | text2image-10M:
14 |     vamana: 
15 |       docker-tag: neurips23-ood-vamana
16 |       module: neurips23.ood.vamana.vamana
17 |       constructor: vamana
18 |       base-args: ["@metric"]
19 |       run-groups:
20 |         base:
21 |           args: |
22 |             [{"R":55, "L":500, "alpha":1.0, "two_pass":1, "use_query_data":1, "compress":1}]
23 |           query-args: |
24 |             [
25 |              {"Ls":70, "T":8},
26 |              {"Ls":80, "T":8},
27 |              {"Ls":90, "T":8},
28 |              {"Ls":95, "T":8},
29 |              {"Ls":100, "T":8},
30 |              {"Ls":105, "T":8},
31 |              {"Ls":110, "T":8},
32 |              {"Ls":120, "T":8},
33 |              {"Ls":125, "T":8},
34 |              {"Ls":150, "T":8}]
35 |     vamana-singlepass: 
36 |       docker-tag: neurips23-ood-vamana
37 |       module: neurips23.ood.vamana.vamana
38 |       constructor: vamana
39 |       base-args: ["@metric"]
40 |       run-groups:
41 |         base:
42 |           args: |
43 |             [{"R":64, "L":500}]
44 |           query-args: |
45 |             [{"Ls":30, "T":8},
46 |              {"Ls":50, "T":8},
47 |              {"Ls":70, "T":8},
48 |              {"Ls":100, "T":8},
49 |              {"Ls":113, "T":8},
50 |              {"Ls":125, "T":8},
51 |              {"Ls":150, "T":8},
52 |              {"Ls":175, "T":8},
53 |              {"Ls":200, "T":8}]
54 | 
55 | 


--------------------------------------------------------------------------------
/neurips23/ood/zilliz/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 7 | 
 8 | RUN apt install python-is-python3
 9 | RUN git clone https://github.com/hhy3/zilliz-bigann.git --branch ood --depth 1
10 | RUN pip install ./zilliz-bigann/*.whl
11 | 
12 | RUN python3 -c 'import ood_searcher'
13 | 


--------------------------------------------------------------------------------
/neurips23/ood/zilliz/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |   zilliz:
 3 |     docker-tag: neurips23-ood-zilliz
 4 |     module: neurips23.ood.zilliz.zilliz
 5 |     constructor: Zilliz
 6 |     base-args: ["@metric"]
 7 |     run-groups:
 8 |       base:
 9 |         args: |
10 |           [{"R":32, "L":50}]
11 |         query-args: |
12 |           [{"ef":30}, {"ef":50}, {"ef":100}]
13 | text2image-10M:
14 |   zilliz:
15 |     docker-tag: neurips23-ood-zilliz
16 |     module: neurips23.ood.zilliz.zilliz
17 |     constructor: Zilliz
18 |     base-args: ["@metric"]
19 |     run-groups:
20 |       base:
21 |         args: |
22 |           [{"R":48, "L":500, "buildthreads" : 8}]
23 |         query-args: |
24 |           [
25 |           {"ef":90},
26 |           {"ef":95},
27 |           {"ef":100},
28 |           {"ef":102},
29 |           {"ef":104},
30 |           {"ef":106},
31 |           {"ef":108},
32 |           {"ef":110},
33 |           {"ef":115},
34 |           {"ef":120}
35 |           ]
36 | 


--------------------------------------------------------------------------------
/neurips23/runbooks/generate_msturing10m_runbooks.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | import os
 3 | 
 4 | 
 5 | dataset_name="msturing-10M"
 6 | 
 7 | data = {dataset_name: {}}
 8 | 
 9 | total_points=10000000
10 | 
11 | num_points=0
12 | max_num_points=0
13 | 
14 | 
15 | max_t=200
16 | # insert 10000000/200 points per step
17 | # start deleting points after 100 steps
18 | 
19 | t=1
20 | for i in range(max_t): 
21 |     if i>=max_t//2:
22 |         data[dataset_name][t]={
23 |             'operation': 'search',
24 |         }
25 |         t+=1
26 |         data[dataset_name][t]={
27 |             'operation': 'delete',
28 |             'start': (i-max_t//2)*(total_points//max_t),
29 |             'end': (i-max_t//2+1)*(total_points//max_t)
30 |         }
31 |         t+=1
32 |         num_points-=total_points//max_t
33 |     data[dataset_name][t]={
34 |             'operation': 'insert',
35 |             'start': i*(total_points//max_t),
36 |             'end': (i+1)*(total_points//max_t)
37 |         }
38 |     t+=1
39 | 
40 |     num_points+=total_points//max_t
41 |     max_num_points=max(max_num_points,num_points)
42 | 
43 | data[dataset_name]["max_pts"]=max_num_points
44 | 
45 | run_book_name=dataset_name+"_"+"slidingwindow_runbook.yaml"
46 | 
47 | with open(run_book_name, 'w') as outfile:
48 |     yaml.dump(data, outfile, default_flow_style=False)
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/neurips23/runbooks/simple_replace_runbook.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |   max_pts: 10000
 3 |   1: 
 4 |     operation: "insert"
 5 |     start: 0
 6 |     end: 7500
 7 |   2:
 8 |     operation: "search"
 9 |   3:
10 |     operation: "replace"
11 |     tags_start: 0
12 |     tags_end: 2500
13 |     ids_start: 7500
14 |     ids_end: 10000
15 |   4:
16 |     operation: "search"
17 |   5:
18 |     operation: "replace"
19 |     tags_start: 0
20 |     tags_end: 2500
21 |     ids_start: 0
22 |     ids_end: 2500
23 |   6:
24 |     operation: "search"
25 |   7: 
26 |     operation: "delete"
27 |     start: 2500
28 |     end: 5000
29 |   8:
30 |     operation: "search"


--------------------------------------------------------------------------------
/neurips23/sparse/base.py:
--------------------------------------------------------------------------------
1 | from benchmark.algorithms.base import BaseANN
2 | 
3 | class BaseSparseANN(BaseANN):
4 |     def track(self):
5 |         return "sparse"


--------------------------------------------------------------------------------
/neurips23/sparse/cufe/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt-get update
 4 | RUN apt-get install -y sudo build-essential git axel wget curl
 5 | 
 6 | 
 7 | # for python 3.10
 8 | RUN sudo apt install software-properties-common -y
 9 | RUN sudo add-apt-repository ppa:deadsnakes/ppa
10 | RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata
11 | RUN sudo apt-get -y install python3.10
12 | RUN apt-get -y install python3-numpy python3-scipy python3-pip
13 | 
14 | # Get Rust; NOTE: using sh for better compatibility with other base images
15 | RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
16 | 
17 | # Add .cargo/bin to PATH
18 | ENV PATH="/root/.cargo/bin:${PATH}"
19 | 
20 | # git clone a single branch
21 | RUN git clone --single-branch --branch main https://github.com/MichaelIbrahim-GaTech/research-bigann-linscan.git
22 | WORKDIR research-bigann-linscan/
23 | 
24 | # fix python3 link (required for pyo3)
25 | RUN ln -fs /usr/bin/python3.10 /usr/bin/python3
26 | 
27 | # fix pip3
28 | RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
29 | 
30 | RUN pip3 install maturin
31 | 
32 | # build a whl file
33 | RUN maturin build -r
34 | 
35 | # pip install the resulting whl file (regardless of the architecture)
36 | RUN for whl in target/wheels/*.whl; do pip3 install $whl; done
37 | 
38 | RUN pip3 install -r requirements.txt
39 | 
40 | # verify that the build worked
41 | RUN python3 -c 'import pylinscancufe; print(pylinscancufe.LinscanIndex());'
42 | 
43 | WORKDIR ..
44 | 


--------------------------------------------------------------------------------
/neurips23/sparse/cufe/config.yaml:
--------------------------------------------------------------------------------
 1 | sparse-small:
 2 |     cufe:
 3 |       docker-tag: neurips23-sparse-cufe
 4 |       module: neurips23.sparse.cufe.linscan 
 5 |       constructor: LinscanCUFE
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{}]
11 |           query-args: |
12 |             [{"budget":1},{"budget":0.5},{"budget":0.4},{"budget":0.3},{"budget":0.25},{"budget":0.2},{"budget":0.15},{"budget":0.1},{"budget":0.075},{"budget":0.05}]
13 | sparse-1M:
14 |     cufe:
15 |       docker-tag: neurips23-sparse-cufe
16 |       module: neurips23.sparse.cufe.linscan
17 |       constructor: LinscanCUFE
18 |       base-args: ["@metric"]
19 |       run-groups:
20 |         base:
21 |           args: |
22 |             [{}]
23 |           query-args: |
24 |             [{"budget":0.5},{"budget":1},{"budget":2},{"budget":4},{"budget":5},{"budget":6},{"budget":7},{"budget":8},{"budget":10}]
25 | sparse-full:
26 |     cufe:
27 |       docker-tag: neurips23-sparse-cufe
28 |       module: neurips23.sparse.cufe.linscan
29 |       constructor: LinscanCUFE
30 |       base-args: ["@metric"]
31 |       run-groups:
32 |         base:
33 |           args: |
34 |             [{}]
35 |           query-args: |
36 |             [{"budget":5},{"budget":15},{"budget":35},{"budget":50},{"budget":52.5},{"budget":55},{"budget":57.5},{"budget":60},{"budget":90},{"budget":500}]
37 |           
38 | 


--------------------------------------------------------------------------------
/neurips23/sparse/cufe/linscan.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import numpy as np
 4 | 
 5 | from benchmark.algorithms.base import BaseANN
 6 | from benchmark.datasets import DATASETS
 7 | import pylinscancufe
 8 | 
 9 | # a python wrapper for the linscan algorithm, implemented in rust
10 | # algorithm details: https://arxiv.org/abs/2301.10622
11 | # code: https://github.com/pinecone-io/research-bigann-linscan
12 | 
13 | # Build parameters: none
14 | # Query parameters: budget (in ms) for computing all the scores
15 | class LinscanCUFE(BaseANN):
16 |     def __init__(self, metric, index_params):
17 |         assert metric == "ip"
18 |         self.name = "cufe_linscan"
19 |         self._index = pylinscancufe.LinscanIndex()
20 |         self._budget = np.infty
21 |         self.scale = 32767/3.579759 # need to iterate over the dataset to get the maximum value 3.57959
22 |         print("Linscan index initialized: " + str(self._index))
23 | 
24 |     def fit(self, dataset): # e.g. dataset = "sparse-small"
25 | 
26 |         self.ds = DATASETS[dataset]()
27 |         assert self.ds.data_type() == "sparse"
28 |         
29 | 
30 |         N_VEC_LIMIT = 100000 # batch size
31 |         it = self.ds.get_dataset_iterator(N_VEC_LIMIT)
32 |         for d in it:
33 |             for i in range(d.shape[0]):
34 |                 d1 = d.getrow(i)
35 |                 self._index.insert(dict(zip(d1.indices, np.round(d1.data*self.scale).astype(int))))
36 | 
37 |         print("Index status: " + str(self._index))
38 | 
39 | 
40 |     def load_index(self, dataset):
41 |         return None
42 | 
43 |     def set_query_arguments(self, query_args):
44 |         self._budget = query_args["budget"]
45 | 
46 |     def query(self, X, k):
47 |         """Carry out a batch query for k-NN of query set X."""
48 |         threshold_mult = int(np.round(0.4776719*self.scale)) # The mean of the training data is 0.4776719 and the median of the training data is 0.30324435
49 |         nq = X.shape[0]
50 | 
51 |         # prepare the queries as a list of dicts
52 |         self.queries = []
53 |         for i in range(nq):
54 |             qc = X.getrow(i)
55 |             q = dict(zip(qc.indices, np.round(qc.data*self.scale).astype(int)))
56 |             self.queries.append(q)
57 | 
58 |         res = self._index.retrieve_parallel(self.queries, k, threshold_mult, self._budget)
59 |         self.I = np.array(res, dtype='int32')
60 | 
61 |     def get_results(self):
62 |         return self.I
63 | 


--------------------------------------------------------------------------------
/neurips23/sparse/install_neurips23.sh:
--------------------------------------------------------------------------------
1 | python install.py --neurips23track sparse --algorithm cufe
2 | python install.py --neurips23track sparse --algorithm linscan
3 | python install.py --neurips23track sparse --algorithm nle
4 | python install.py --neurips23track sparse --algorithm pyanns
5 | python install.py --neurips23track sparse --algorithm shnsw
6 | python install.py --neurips23track sparse --algorithm spmat
7 | python install.py --neurips23track sparse --algorithm sustech-whu
8 | 


--------------------------------------------------------------------------------
/neurips23/sparse/linscan/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt-get update && apt-get install -y curl
 4 | 
 5 | # install rust + build tools
 6 | RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
 7 | ENV PATH="/root/.cargo/bin:${PATH}"
 8 | RUN git clone --single-branch --branch main https://github.com/pinecone-io/research-bigann-linscan
 9 | WORKDIR research-bigann-linscan/
10 | 
11 | # install maturin (build tool for rust-python)
12 | RUN pip install maturin
13 | 
14 | # build a whl file
15 | RUN maturin build -r
16 | 
17 | # pip install the correct wheel (different architectures will produce .whl files with different names)
18 | RUN pip install ./target/wheels/*.whl
19 | 
20 | # verify that the build worked
21 | RUN python3 -c 'import pylinscan; print(pylinscan.LinscanIndex());'
22 | 
23 | WORKDIR ..


--------------------------------------------------------------------------------
/neurips23/sparse/linscan/config.yaml:
--------------------------------------------------------------------------------
 1 | sparse-small:
 2 |     linscan:
 3 |       docker-tag: neurips23-sparse-linscan
 4 |       module: neurips23.sparse.linscan.linscan 
 5 |       constructor: Linscan
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{}]
11 |           query-args: |
12 |             [{"budget":1},{"budget":0.5},{"budget":0.4},{"budget":0.3},{"budget":0.25},{"budget":0.2},{"budget":0.15},{"budget":0.1},{"budget":0.075},{"budget":0.05}]
13 | sparse-1M:
14 |     linscan:
15 |       docker-tag: neurips23-sparse-linscan
16 |       module: neurips23.sparse.linscan.linscan
17 |       constructor: Linscan
18 |       base-args: ["@metric"]
19 |       run-groups:
20 |         base:
21 |           args: |
22 |             [{}]
23 |           query-args: |
24 |             [{"budget":0.5},{"budget":1},{"budget":2},{"budget":4},{"budget":5},{"budget":6},{"budget":7},{"budget":8},{"budget":10}]
25 | sparse-full:
26 |     linscan:
27 |       docker-tag: neurips23-sparse-linscan
28 |       module: neurips23.sparse.linscan.linscan
29 |       constructor: Linscan
30 |       base-args: ["@metric"]
31 |       run-groups:
32 |         base:
33 |           args: |
34 |             [{}]
35 |           query-args: |
36 |             [{"budget":5},{"budget":15},{"budget":35},{"budget":50},{"budget":52.5},{"budget":55},{"budget":57.5},{"budget":60},{"budget":90},{"budget":500}]
37 |           


--------------------------------------------------------------------------------
/neurips23/sparse/linscan/linscan.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import numpy as np
 4 | 
 5 | from benchmark.algorithms.base import BaseANN
 6 | from benchmark.datasets import DATASETS
 7 | import pylinscan
 8 | 
 9 | # a python wrapper for the linscan algorithm, implemented in rust
10 | # algorithm details: https://arxiv.org/abs/2301.10622
11 | # code: https://github.com/pinecone-io/research-bigann-linscan
12 | 
13 | # Build parameters: none
14 | # Query parameters: budget (in ms) for computing all the scores
15 | class Linscan(BaseANN):
16 |     def __init__(self, metric, index_params):
17 |         assert metric == "ip"
18 |         self.name = "linscan"
19 |         self._index = pylinscan.LinscanIndex()
20 |         self._budget = np.infty
21 |         print("Linscan index initialized: " + str(self._index))
22 | 
23 |     def fit(self, dataset): # e.g. dataset = "sparse-small"
24 | 
25 |         self.ds = DATASETS[dataset]()
26 |         assert self.ds.data_type() == "sparse"
27 | 
28 |         N_VEC_LIMIT = 100000 # batch size
29 |         it = self.ds.get_dataset_iterator(N_VEC_LIMIT)
30 |         for d in it:
31 |             for i in range(d.shape[0]):
32 |                 d1 = d.getrow(i)
33 |                 self._index.insert(dict(zip(d1.indices, d1.data)))
34 | 
35 |         print("Index status: " + str(self._index))
36 | 
37 | 
38 |     def load_index(self, dataset):
39 |         return None
40 | 
41 |     def set_query_arguments(self, query_args):
42 |         self._budget = query_args["budget"]
43 | 
44 |     def query(self, X, k):
45 |         """Carry out a batch query for k-NN of query set X."""
46 |         nq = X.shape[0]
47 | 
48 |         # prepare the queries as a list of dicts
49 |         self.queries = []
50 |         for i in range(nq):
51 |             qc = X.getrow(i)
52 |             q = dict(zip(qc.indices, qc.data))
53 |             self.queries.append(q)
54 | 
55 |         res = self._index.retrieve_parallel(self.queries, k, self._budget)
56 |         self.I = np.array(res, dtype='int32')
57 | 
58 |     def get_results(self):
59 |         return self.I
60 | 


--------------------------------------------------------------------------------
/neurips23/sparse/nle/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt-get update 
 4 | 
 5 | RUN apt-get install -y curl git build-essential libpcre3-dev cmake libtool automake libatlas3-base libatlas-base-dev libstdc++-12-dev patchelf ninja-build libtbb2
 6 | 
 7 | RUN pip3 install scikit-build
 8 | 
 9 | #RUN apt-get install -y curl git openjdk-11-jdk build-essential libpcre3-dev cmake libtool automake libatlas3-base libatlas-base-dev libstdc++-12-dev patchelf ninja-build libtbb2
10 | 
11 | RUN git clone https://github.com/cadurosar/tttt.git /workspace/tttt && cd /workspace/tttt && bash build.sh
12 | 
13 | WORKDIR /home/app


--------------------------------------------------------------------------------
/neurips23/sparse/operating_points_private_queries_AzureD8lds_v5.csv:
--------------------------------------------------------------------------------
 1 |                                  qps
 2 | dataset     algorithm
 3 | sparse-full NLE-Full     1314.194166
 4 |             cufe           97.860465
 5 |             linscan        95.098871
 6 |             nle          1312.961060
 7 |             pyanns       6499.652881
 8 |             shnsw        5078.449772
 9 |             sustech-whu   788.168885
10 | 


--------------------------------------------------------------------------------
/neurips23/sparse/operating_points_public_queries_AzureD8lds_v5.txt:
--------------------------------------------------------------------------------
1 |                                qps
2 | dataset     algorithm
3 | sparse-full cufe        104.768194
4 |             linscan      92.510615
5 |             nle        2358.590429
6 |             pyanns     8732.172708
7 |             shnsw      7136.927865
8 | 


--------------------------------------------------------------------------------
/neurips23/sparse/pinecone_smips/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt-get update && apt-get install -y curl
 4 | 
 5 | # download and install the whl file
 6 | RUN git clone --branch sparse https://github.com/pinecone-io/bigann.git
 7 | RUN pip install ./bigann/*.whl
 8 | 
 9 | # verify that the build worked
10 | RUN python3 -c 'import py_pinecone_smips;'
11 | 


--------------------------------------------------------------------------------
/neurips23/sparse/pinecone_smips/README.md:
--------------------------------------------------------------------------------
 1 | # Pinecone Sparse ANN algorithm
 2 | 
 3 | Our algorithm for the Sparse track is based on our very own research [[1](https://dl.acm.org/doi/10.1145/3609797), [2](https://arxiv.org/abs/2309.09013)]. 
 4 | In particular, we cluster sparse vectors, build an inverted index that is organized using our [novel structure](https://arxiv.org/abs/2309.09013)
 5 |  and query the index by first solving the top cluster retrieval problem, then finding the top-k vectors within those clusters using an anytime retrieval algorithm over the inverted index.
 6 | 
 7 | We also augment the index above with two additional lightweight components. 
 8 | First, we use a k-MIP graph (where every vector is connected to k other vectors that maximize inner product with it) 
 9 | to “expand” the set of retrieved top-k vectors from the last step. 
10 | Second, we re-rank the expanded set using a compressed forward index. 
11 | In effect, our final solution is a hybrid of IVF- and graph-based methods, 
12 | where the IVF stage provides a set of entry nodes into the graph.


--------------------------------------------------------------------------------
/neurips23/sparse/pinecone_smips/config.yaml:
--------------------------------------------------------------------------------
 1 | sparse-full:
 2 |   pinecone_smips:
 3 |     docker-tag: neurips23-sparse-pinecone_smips
 4 |     module: neurips23.sparse.pinecone_smips.pinecone_smips
 5 |     constructor: PineconeSMIPS
 6 |     base-args: ["@metric"]
 7 |     run-groups:
 8 |       base:
 9 |         args: |
10 |           [{"num_threads": 8, "index_path": "data/pinecone/sparse/index/"}]
11 |         query-args: |
12 |           [
13 |           {"nprobe":8, "top_kprime":26, "ip_budget": 350},
14 |           {"nprobe":8, "top_kprime":28, "ip_budget": 325},
15 |           {"nprobe":8, "top_kprime":30, "ip_budget": 300},
16 |           {"nprobe":8, "top_kprime":30, "ip_budget": 280},
17 |           {"nprobe":8, "top_kprime":30, "ip_budget": 260},
18 |           {"nprobe":8, "top_kprime":30, "ip_budget": 240},
19 |           {"nprobe":8, "top_kprime":30, "ip_budget": 220},
20 |           {"nprobe":8, "top_kprime":30, "ip_budget": 200},
21 |           {"nprobe":8, "top_kprime":32, "ip_budget": 280},
22 |           {"nprobe":8, "top_kprime":34, "ip_budget": 260}]


--------------------------------------------------------------------------------
/neurips23/sparse/plot_public_queries_AzureD8lds_v5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/sparse/plot_public_queries_AzureD8lds_v5.png


--------------------------------------------------------------------------------
/neurips23/sparse/pyanns/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install python-is-python3
 5 | RUN git clone https://github.com/veaaaab/pyanns.git --branch sparse --depth 1
 6 | WORKDIR /home/app/pyanns
 7 | RUN pip install -r requirements.txt
 8 | RUN bash build.sh
 9 | 
10 | RUN python3 -c 'import pyanns'
11 | 
12 | WORKDIR /home/app
13 | 


--------------------------------------------------------------------------------
/neurips23/sparse/pyanns/config.yaml:
--------------------------------------------------------------------------------
 1 | sparse-small:
 2 |   pyanns:
 3 |     docker-tag: neurips23-sparse-pyanns
 4 |     module: neurips23.sparse.pyanns.pyanns
 5 |     constructor: Pyanns
 6 |     base-args: ["@metric"]
 7 |     run-groups:
 8 |       base:
 9 |         args: |
10 |           [{}]
11 |         query-args: |
12 |           [
13 |           {"budget": 0.1, "ef" : 80}
14 |           ]
15 | sparse-1M:
16 |   pyanns:
17 |     docker-tag: neurips23-sparse-pyanns
18 |     module: neurips23.sparse.pyanns.pyanns
19 |     constructor: Pyanns
20 |     base-args: ["@metric"]
21 |     run-groups:
22 |       base:
23 |         args: |
24 |           [{}]
25 |         query-args: |
26 |           [
27 |           {"budget": 0.1, "ef" : 80}
28 |           ]
29 | 
30 | sparse-full:
31 |   pyanns:
32 |     docker-tag: neurips23-sparse-pyanns
33 |     module: neurips23.sparse.pyanns.pyanns
34 |     constructor: Pyanns
35 |     base-args: ["@metric"]
36 |     run-groups:
37 |       base:
38 |         args: |
39 |           [{}]
40 |         query-args: |
41 |           [
42 |           {"budget": 0.08, "ef" : 50},
43 |           {"budget": 0.08, "ef" : 65},
44 |           {"budget": 0.08, "ef" : 70},
45 |           {"budget": 0.1, "ef" : 50},
46 |           {"budget": 0.1, "ef" : 55},
47 |           {"budget": 0.1, "ef" : 60},
48 |           {"budget": 0.1, "ef" : 65},
49 |           {"budget": 0.1, "ef" : 70},
50 |           {"budget": 0.1, "ef" : 75},
51 |           {"budget": 0.1, "ef" : 80}
52 |           ]
53 | 


--------------------------------------------------------------------------------
/neurips23/sparse/pyanns/pyanns.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import gc
 4 | import os
 5 | import numpy as np
 6 | 
 7 | from benchmark.algorithms.base import BaseANN
 8 | from benchmark.datasets import DATASETS
 9 | import pyanns
10 | 
11 | class Pyanns(BaseANN):
12 |     def __init__(self, metric, index_params):
13 |         assert metric == "ip"
14 |         self.name = "pyanns"
15 | 
16 |     def fit(self, dataset): # e.g. dataset = "sparse-small"
17 | 
18 |         self.ds = DATASETS[dataset]()
19 |         assert self.ds.data_type() == "sparse"
20 | 
21 |         print("start add")
22 |         path = 'hnsw_sparse.idx' 
23 | 
24 |         self.searcher = pyanns.SparseGrapSearcher(self.ds.get_dataset_fn(), path)
25 |         print("done add")
26 | 
27 |     def load_index(self, dataset):
28 |         return None
29 | 
30 |     def set_query_arguments(self, query_args):
31 |         self._budget = query_args["budget"]
32 |         self.ef = query_args["ef"]
33 |         self.searcher.set_ef(self.ef)
34 | 
35 |     def query(self, X, k):
36 |         """Carry out a batch query for k-NN of query set X."""
37 |         nq = X.shape[0]
38 |         self.res = self.searcher.search_batch(nq, X.indptr, X.indices, X.data, k, self._budget).reshape(-1, k)
39 | 
40 |     def get_results(self):
41 |         return self.res
42 | 
43 |     def __str__(self):
44 |         return f'pyanns_qdrop{self._budget}_ef{self.ef}'
45 | 


--------------------------------------------------------------------------------
/neurips23/sparse/run.py:
--------------------------------------------------------------------------------
1 | from benchmark.algorithms.base_runner import BaseRunner
2 | 
3 | class SparseRunner(BaseRunner):
4 |     pass


--------------------------------------------------------------------------------
/neurips23/sparse/shnsw/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt-get install -y python-setuptools python-pip
 5 | RUN pip3 install pybind11 numpy setuptools
 6 | RUN git clone https://github.com/Leslie-Chung/SHNSW.git
 7 | 
 8 | WORKDIR SHNSW
 9 | RUN pip3 install .
10 | 
11 | RUN python3 -c 'import sparse_hnswlib'
12 | WORKDIR /home/app


--------------------------------------------------------------------------------
/neurips23/sparse/shnsw/config.yaml:
--------------------------------------------------------------------------------
 1 | sparse-small:
 2 |     shnsw:
 3 |       docker-tag: neurips23-sparse-shnsw
 4 |       module: neurips23.sparse.shnsw.shnsw
 5 |       constructor: SparseHNSW
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"M": 16, "efConstruction": 200, "buildthreads": 8}]
11 |           query-args: 
12 |             [[10, 20, 40, 70, 75, 80, 85, 90, 100]]
13 | sparse-1M:
14 |     shnsw:
15 |       docker-tag: neurips23-sparse-shnsw
16 |       module: neurips23.sparse.shnsw.shnsw
17 |       constructor: SparseHNSW
18 |       base-args: ["@metric"]
19 |       run-groups:
20 |         base:
21 |           args: |
22 |             [{"M": 16, "efConstruction": 200, "buildthreads": 8}]
23 |           query-args: 
24 |             [[10, 20, 40, 60, 70, 75, 80, 90, 100]]
25 | sparse-full:
26 |     shnsw:
27 |       docker-tag: neurips23-sparse-shnsw
28 |       module: neurips23.sparse.shnsw.shnsw
29 |       constructor: SparseHNSW
30 |       base-args: ["@metric"]
31 |       run-groups:
32 |         base:
33 |           args: |
34 |             [{"M": 16, "efConstruction": 1000, "buildthreads": 8}]
35 |           query-args: 
36 |             [[20, 40, 45, 48, 50, 52, 55, 57, 70, 75, 80, 85, 90]]


--------------------------------------------------------------------------------
/neurips23/sparse/shnsw/shnsw.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sparse_hnswlib
 3 | import numpy as np
 4 | from neurips23.sparse.base import BaseSparseANN
 5 | from benchmark.datasets import DATASETS, download_accelerated
 6 | 
 7 | 
 8 | class SparseHNSW(BaseSparseANN):
 9 |     def __init__(self, metric, method_param):
10 |         assert metric == "ip"
11 |         self.method_param = method_param
12 |         self.name = "sparse_hnswlib"
13 |         self.efC = self.method_param["efConstruction"]
14 |         self.M = self.method_param["M"]
15 |         self.nt = self.method_param["buildthreads"]
16 | 
17 |     def fit(self, dataset):
18 |         print("begin fit")
19 |         ds = DATASETS[dataset]()
20 |         
21 |         p = sparse_hnswlib.Index(space="ip", dim=8)
22 |         p.init_index(
23 |             max_elements=ds.nb,
24 |             csr_path=ds.get_dataset_fn(),
25 |             ef_construction=self.efC,
26 |             M=self.M,
27 |         )
28 |         p.add_items(num_threads=self.nt)
29 |         
30 |         index_dir = os.path.join(os.getcwd(), "data", "indices", "sparse", "shnsw")
31 |         index_path = os.path.join(index_dir, "shnsw-{}-{}-{}".format(dataset, self.efC, self.M)) 
32 |         if not os.path.exists(index_dir):
33 |             os.makedirs(index_dir, mode=0o777, exist_ok=True)
34 |         p.save_index(index_path)
35 |         self.p = p
36 | 
37 |     def load_index(self, dataset):
38 |         print("begin load")
39 |         index_dir = os.path.join(os.getcwd(), "data", "indices", "sparse", "shnsw")
40 |         index_path = os.path.join(index_dir, "shnsw-{}-{}-{}".format(dataset, self.efC, self.M)) 
41 |         if not os.path.exists(index_dir):
42 |             return False
43 |         if not os.path.exists(index_path):
44 |             return False
45 |         ds = DATASETS[dataset]()
46 |         X = ds.get_dataset()
47 |         self.p = sparse_hnswlib.Index(space="ip", dim=8)
48 |         print("#########")
49 |         self.p.load_index(index_path)
50 |         print("!!!!!!!!")
51 |         return True
52 | 
53 |     def set_query_arguments(self, parameters):
54 |         print("开始 set")
55 |         ef = parameters
56 |         self.p.set_ef(ef)
57 | 
58 |     def query(self, X, topK):
59 |         # N, _ = X.shape
60 |         self.I, _ = self.p.knn_query(X.indptr, X.indices, X.data, k=topK, num_threads=self.nt)
61 |         
62 |     def get_results(self):
63 |         return self.I


--------------------------------------------------------------------------------
/neurips23/sparse/spmat/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM neurips23
2 | 
3 | RUN apt-get update
4 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip
5 | 
6 | RUN pip3 install scipy
7 | 
8 | 


--------------------------------------------------------------------------------
/neurips23/sparse/spmat/config.yaml:
--------------------------------------------------------------------------------
 1 | sparse-small:
 2 |     spmat:
 3 |       docker-tag: neurips23-sparse-spmat
 4 |       module: neurips23.sparse.spmat.spmat
 5 |       constructor: SparseMatMul
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"threads": 8}]
11 |           query-args: |
12 |             [{"alpha":0.5}, {"alpha":0.6}, {"alpha":0.7}, {"alpha":0.8}, {"alpha":0.9}, {"alpha":0.92}, {"alpha":0.94}, {"alpha":0.96}, {"alpha":0.98}, {"alpha":1.0}]


--------------------------------------------------------------------------------
/neurips23/sparse/sustech-whu/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y libssl-dev cmake
 5 | 
 6 | WORKDIR /home/app
 7 | RUN git clone https://github.com/lizzy-0323/SUSTech-WHU-Sparse.git --recursive
 8 | WORKDIR /home/app/SUSTech-WHU-Sparse
 9 | RUN cmake -DCMAKE_BUILD_TYPE=Release . && make -j4
10 | WORKDIR /home/app


--------------------------------------------------------------------------------
/neurips23/sparse/sustech-whu/config.yaml:
--------------------------------------------------------------------------------
 1 | sparse-small:
 2 |     sustech-whu:
 3 |       docker-tag: neurips23-sparse-sustech-whu
 4 |       module: neurips23.sparse.sustech-whu.SUSTech-WHU 
 5 |       constructor: HnswSparse
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"M":16,"ef":200}]
11 |           query-args: |
12 |             [{"ef":38},{"ef":40},{"ef":42},{"ef":48},{"ef":50},{"ef":59},{"ef":65},{"ef":70},{"ef":80},{"ef":82}]
13 | sparse-1M:
14 |     sustech-whu:
15 |       docker-tag: neurips23-sparse-sustech-whu
16 |       module: neurips23.sparse.sustech-whu.SUSTech-WHU 
17 |       constructor: HnswSparse
18 |       base-args: ["@metric"]
19 |       run-groups:
20 |         base:
21 |           args: |
22 |             [{"M":20,"ef":200}]
23 |           query-args: |
24 |             [{"ef":48},{"ef":50},{"ef":52},{"ef":55},{"ef":58},{"ef":50},{"ef":62},{"ef":65},{"ef":75},{"ef":80}]
25 | sparse-full:
26 |     sustech-whu:
27 |       docker-tag: neurips23-sparse-sustech-whu
28 |       module: neurips23.sparse.sustech-whu.SUSTech-WHU 
29 |       constructor: HnswSparse
30 |       base-args: ["@metric"]
31 |       run-groups:
32 |         base:
33 |           args: |
34 |             [{"M":20,"ef":1200}]
35 |           query-args: |
36 |             [{"ef":35},{"ef":40},{"ef":43},{"ef":45},{"ef":48},{"ef":50},{"ef":55},{"ef":65},{"ef":75},{"ef":80}]
37 |           


--------------------------------------------------------------------------------
/neurips23/sparse/zilliz/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install python-is-python3
 5 | RUN git clone https://github.com/hhy3/zilliz-bigann.git --branch sparse --depth 1
 6 | RUN pip install ./zilliz-bigann/*.whl
 7 | 
 8 | RUN python3 -c 'import sparse_searcher'
 9 | 
10 | 


--------------------------------------------------------------------------------
/neurips23/sparse/zilliz/config.yaml:
--------------------------------------------------------------------------------
 1 | sparse-small:
 2 |   zilliz:
 3 |     docker-tag: neurips23-sparse-zilliz
 4 |     module: neurips23.sparse.zilliz.zilliz
 5 |     constructor: Zilliz
 6 |     base-args: ["@metric"]
 7 |     run-groups:
 8 |       base:
 9 |         args: |
10 |           [{
11 |            "R": 48,
12 |            "L": 500
13 |           }]
14 |         query-args: |
15 |           [
16 |           {"budget": 0.1, "ef" : 80}
17 |           ]
18 | sparse-1M:
19 |   zilliz:
20 |     docker-tag: neurips23-sparse-zilliz
21 |     module: neurips23.sparse.zilliz.zilliz
22 |     constructor: Zilliz
23 |     base-args: ["@metric"]
24 |     run-groups:
25 |       base:
26 |         args: |
27 |           [{
28 |            "R": 48,
29 |            "L": 500
30 |           }]
31 |         query-args: |
32 |           [
33 |           {"budget": 0.1, "ef" : 80}
34 |           ]
35 | 
36 | sparse-full:
37 |   zilliz:
38 |     docker-tag: neurips23-sparse-zilliz
39 |     module: neurips23.sparse.zilliz.zilliz
40 |     constructor: Zilliz
41 |     base-args: ["@metric"]
42 |     run-groups:
43 |       base:
44 |         args: |
45 |           [{
46 |            "R": 48,
47 |            "L": 500
48 |           }]
49 |         query-args: |
50 |           [
51 |           {"budget": 0.11, "ef" : 45},
52 |           {"budget": 0.11, "ef" : 55},
53 |           {"budget": 0.11, "ef" : 65},
54 |           {"budget": 0.11, "ef" : 70},
55 |           {"budget": 0.12, "ef" : 45},
56 |           {"budget": 0.12, "ef" : 50},
57 |           {"budget": 0.12, "ef" : 55},
58 |           {"budget": 0.12, "ef" : 60},
59 |           {"budget": 0.12, "ef" : 65},
60 |           {"budget": 0.12, "ef" : 70}
61 |           ]
62 | 


--------------------------------------------------------------------------------
/neurips23/sparse/zilliz/zilliz.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import gc
 4 | import os
 5 | import numpy as np
 6 | 
 7 | from benchmark.algorithms.base import BaseANN
 8 | from benchmark.datasets import DATASETS
 9 | import sparse_searcher
10 | 
11 | class Zilliz(BaseANN):
12 |     def __init__(self, metric, index_params):
13 |         assert metric == "ip"
14 |         self.name = "zilliz"
15 |         self.R = index_params['R']
16 |         self.L = index_params['L']
17 | 
18 |     def fit(self, dataset): # e.g. dataset = "sparse-small"
19 | 
20 |         self.ds = DATASETS[dataset]()
21 |         assert self.ds.data_type() == "sparse"
22 | 
23 |         print("start add")
24 |         path = f'zilliz_R{self.R}_L{self.L}.idx'
25 | 
26 |         self.searcher = sparse_searcher.SparseGrapSearcher(self.ds.get_dataset_fn(), path, self.R, self.L)
27 |         print("done add")
28 | 
29 |     def load_index(self, dataset):
30 |         return None
31 | 
32 |     def set_query_arguments(self, query_args):
33 |         self._budget = query_args["budget"]
34 |         self.ef = query_args["ef"]
35 |         self.searcher.set_ef(self.ef)
36 | 
37 |     def query(self, X, k):
38 |         """Carry out a batch query for k-NN of query set X."""
39 |         nq = X.shape[0]
40 |         self.res = self.searcher.search_batch(nq, X.indptr, X.indices, X.data, k, self._budget).reshape(-1, k)
41 | 
42 |     def get_results(self):
43 |         return self.res
44 | 
45 |     def __str__(self):
46 |         return f'zilliz_qdrop{self._budget}_ef{self.ef}'
47 | 
48 | 


--------------------------------------------------------------------------------
/neurips23/streaming/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/harsha-simhadri/big-ann-benchmarks/a71ad47f3179086759a89ec859e23898d46a3727/neurips23/streaming/__init__.py


--------------------------------------------------------------------------------
/neurips23/streaming/base.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.typing as npt
 3 | from benchmark.algorithms.base import BaseANN
 4 | 
 5 | class BaseStreamingANN(BaseANN):
 6 |     def track(self):
 7 |         return "stream"
 8 |     
 9 |     def setup(self, dtype, max_pts, ndims) -> None:
10 |         '''
11 |         Initialize the data structures for your algorithm
12 |         dtype can be 'uint8', 'int8 'or 'float32'
13 |         max_pts is an upper bound on non-deleted points that the index must support
14 |         ndims is the size of the dataset
15 |         '''
16 |         raise NotImplementedError
17 |         
18 |     def insert(self, X: np.array, ids: npt.NDArray[np.uint32]) -> None:
19 |         '''
20 |         Implement this for your algorithm
21 |         X is num_vectos * num_dims matrix 
22 |         ids is num_vectors-sized array which indicates ids for each vector
23 |         '''
24 |         raise NotImplementedError
25 |     
26 |     def delete(self, ids: npt.NDArray[np.uint32]) -> None:
27 |         '''
28 |         Implement this for your algorithm
29 |         delete the vectors labelled with ids.
30 |         '''
31 |         raise NotImplementedError
32 | 
33 | 
34 |     def fit(self, dataset):
35 |         '''
36 |         Do not override this method
37 |         '''
38 |         raise NotImplementedError
39 |     
40 |     def load_index(self, dataset):
41 |         """
42 |         Do not override
43 |         """
44 |         return False
45 |     
46 |     def get_index_components(self, dataset):
47 |         """
48 |         Does not apply to streaming indices
49 |         """
50 |         raise NotImplementedError
51 | 
52 |     def index_files_to_store(self, dataset):
53 |         """
54 |         Does not apply to streaming indices
55 |         """
56 |         raise NotImplementedError
57 |     


--------------------------------------------------------------------------------
/neurips23/streaming/cufe/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | 
 9 | ADD "https://github.com/AbdelrahmanMohamed129/DiskANN/tree/farah" latest_commit
10 | RUN git clone https://github.com/AbdelrahmanMohamed129/DiskANN --branch farah
11 | WORKDIR /home/app/DiskANN
12 | RUN git pull
13 | RUN pip3 install virtualenv build
14 | RUN python3 -m build
15 | RUN pip install dist/diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl
16 | WORKDIR /home/app


--------------------------------------------------------------------------------
/neurips23/streaming/diskann/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | 
 9 | RUN git clone https://github.com/microsoft/DiskANN.git --branch  0.5.0.rc3.post1
10 | WORKDIR /home/app/DiskANN
11 | RUN pip3 install virtualenv build
12 | RUN python3 -m build
13 | RUN pip install dist/diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl
14 | WORKDIR /home/app
15 | 


--------------------------------------------------------------------------------
/neurips23/streaming/hwtl_sdu_anns_stream/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | 
 9 | RUN git clone https://github.com/WPJiang/HWTL_SDU-ANNS-stream
10 | WORKDIR /home/app/HWTL_SDU-ANNS-stream
11 | RUN pip install diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl
12 | WORKDIR /home/app
13 | 


--------------------------------------------------------------------------------
/neurips23/streaming/hwtl_sdu_anns_stream/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |     hwtl_sdu_anns_stream: 
 3 |       docker-tag: neurips23-streaming-hwtl_sdu_anns_stream
 4 |       module: neurips23.streaming.hwtl_sdu_anns_stream.hwtl_sdu_anns_stream
 5 |       constructor: hwtl_sdu_anns_stream
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |             [{"R":32, "L":50, "insert_threads":16, "consolidate_threads":16}]
11 |           query-args: |
12 |             [{"Ls":50, "T":8}]
13 | 
14 | 
15 | msturing-30M-clustered:
16 |     hwtl_sdu_anns_stream: 
17 |       docker-tag: neurips23-streaming-hwtl_sdu_anns_stream
18 |       module: neurips23.streaming.hwtl_sdu_anns_stream.hwtl_sdu_anns_stream
19 |       constructor: hwtl_sdu_anns_stream
20 |       base-args: ["@metric"]
21 |       run-groups:
22 |         base:
23 |           args: |
24 |             [{"R":65, "L":70, "insert_threads":16, "consolidate_threads":16}]
25 |           query-args: |
26 |             [{"Ls":100, "T":16}]
27 | 


--------------------------------------------------------------------------------
/neurips23/streaming/pinecone/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | 
 9 | WORKDIR /home/app
10 | 
11 | 
12 | # copy and install the fast rust reranker package and the updated diskann
13 | RUN git clone --branch streaming2 https://github.com/pinecone-io/bigann.git
14 | RUN pip install ./bigann/*.whl
15 | 
16 | # verify that the build worked
17 | RUN python3 -c 'import diskannpy;'
18 | RUN python3 -c 'import research_pinecone_reranking;'
19 | 


--------------------------------------------------------------------------------
/neurips23/streaming/pinecone/README.md:
--------------------------------------------------------------------------------
1 | # Pinecone Streaming ANN algorithm
2 | 
3 | Our solution employs a two-stage retrieval strategy. 
4 | In the initial phase, we use a variant of the DiskANN index for candidate generation to generate a set of k’ >> k 
5 | results through an approximate scoring mechanism over uint8-quantized vectors, 
6 | with accelerated SIMD-based distance calculation. 
7 | The second-stage reranks the candidates using full-precision scoring to enhance the overall accuracy of retrieval. 
8 | It is worth noting that the raw vectors used in the second stage are stored on SSD. 
9 | As such, it is important to optimize the number of disk reads invoked by the reranking stage.


--------------------------------------------------------------------------------
/neurips23/streaming/pinecone/config.yaml:
--------------------------------------------------------------------------------
 1 | msturing-30M-clustered:
 2 |   pinecone:
 3 |     docker-tag: neurips23-streaming-pinecone
 4 |     module: neurips23.streaming.pinecone.pinecone
 5 |     constructor: pinecone
 6 |     base-args: ["@metric"]
 7 |     run-groups:
 8 |       base:
 9 |         args: |
10 |           [{"R":32, "L":100, "insert_threads":8, "consolidate_threads":8}]
11 |         query-args: |
12 |           [
13 |             {"Ls":300, "k_1":30, "T":8},
14 |             {"Ls":400, "k_1":30, "T":8},
15 |             {"Ls":500, "k_1":30, "T":8},
16 |             {"Ls":520, "k_1":30, "T":8},
17 |             {"Ls":540, "k_1":30, "T":8},
18 |             {"Ls":560, "k_1":30, "T":8}
19 |           ]
20 | 


--------------------------------------------------------------------------------
/neurips23/streaming/puck/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt-get update
 5 | RUN apt-get install -y wget git cmake g++ libaio-dev libgoogle-perftools-dev clang-format libboost-dev python3 python3-setuptools python3-pip
 6 | #swig
 7 | RUN apt-get update && apt-get install -y swig cmake
 8 | RUN pip3 install pybind11 numpy 
 9 | RUN cat /etc/ld.so.conf
10 | RUN ls /etc/ld.so.conf.d/
11 | ##cmake
12 | RUN wget https://cmake.org/files/v3.22/cmake-3.22.0-linux-x86_64.sh
13 | RUN mkdir cmake && sh cmake-3.22.0-linux-x86_64.sh --skip-license --prefix=cmake
14 | ENV PATH /home/app/cmake/bin:$PATH
15 | 
16 | #mkl
17 | RUN wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/adb8a02c-4ee7-4882-97d6-a524150da358/l_onemkl_p_2023.2.0.49497_offline.sh
18 | RUN sh l_onemkl_p_2023.2.0.49497_offline.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
19 | 
20 | RUN echo "/opt/intel/oneapi/mkl/latest/lib/intel64" > /etc/ld.so.conf.d/mkl.conf
21 | RUN ldconfig
22 | RUN touch /etc/profile.d/intel.sh
23 | RUN echo ".  /opt/intel/oneapi/mkl/latest/env/vars.sh" >> /etc/profile.d/intel.sh
24 | RUN . /etc/profile.d/intel.sh
25 | 
26 | ENV CMAKE_ARGS "-DMKLROOT=/opt/intel/oneapi/mkl/latest/ -DBLA_VENDOR=Intel10_64lp_seq -DBLA_STATIC=ON"
27 | #RUN  git config --global http.sslVerify false
28 | 
29 | RUN git clone -b streaming https://github.com/baidu/puck.git
30 | RUN cd puck && . /etc/profile.d/intel.sh  && python3 setup.py install
31 | RUN  python3 -c 'from puck import py_puck_api'
32 | 


--------------------------------------------------------------------------------
/neurips23/streaming/puck/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |     puck:
 3 |       docker-tag: neurips23-streaming-puck
 4 |       module: neurips23.streaming.puck.puck
 5 |       constructor: Puck
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         base:
 9 |           args: |
10 |                 [
11 |                     { "index_type": 1, "C":20, "F":20, "FN":10, "N":0, "filter_topk":200}
12 |                 ]
13 |           query-args: |
14 |                 [
15 |                     {"radius_rate":1.00 ,"search_coarse_count":5}                   
16 |                 ]
17 | msturing-30M-clustered:
18 |     puck:
19 |       docker-tag: neurips23-streaming-puck
20 |       module: neurips23.streaming.puck.puck
21 |       constructor: Puck
22 |       base-args: ["@metric"]
23 |       run-groups:
24 |         base:
25 |           args: |
26 |                 [
27 |                     { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":1200},
28 |                     { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":1500},
29 |                     { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":1800},
30 |                     { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":1900},
31 |                     { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":2000},
32 |                     { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":2100},
33 |                     { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":2200},
34 |                     { "index_type": 1, "C":200, "F":200, "FN":8, "N":0, "filter_topk":2300}
35 |                 ]
36 |           query-args: |
37 |                 [
38 |                     {"radius_rate":1.00 ,"search_coarse_count":200}                   
39 |                 ]
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/neurips23/streaming/pyanns/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM neurips23
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y software-properties-common
 5 | RUN add-apt-repository -y ppa:git-core/ppa
 6 | RUN apt update
 7 | RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10
 8 | 
 9 | RUN git clone https://github.com/veaaaab/DiskANN.git --branch bigann23_streaming
10 | WORKDIR /home/app/DiskANN
11 | RUN pip3 install virtualenv build
12 | RUN python3 -m build
13 | RUN pip install dist/diskannpy-0.5.0rc3.post1-cp310-cp310-linux_x86_64.whl
14 | WORKDIR /home/app
15 | 


--------------------------------------------------------------------------------
/neurips23/streaming/pyanns/config.yaml:
--------------------------------------------------------------------------------
 1 | random-xs:
 2 |   pyanns:
 3 |     docker-tag: neurips23-streaming-pyanns
 4 |     module: neurips23.streaming.pyanns.pyanns
 5 |     constructor: Pyanns
 6 |     base-args: ["@metric"]
 7 |     run-groups:
 8 |       base:
 9 |         args: |
10 |           [{"R":32, "L":50, "insert_threads":16, "consolidate_threads":16}]
11 |         query-args: |
12 |           [{"Ls":50, "T":8}]
13 | 
14 | msturing-30M-clustered:
15 |   pyanns:
16 |     docker-tag: neurips23-streaming-pyanns
17 |     module: neurips23.streaming.pyanns.pyanns
18 |     constructor: Pyanns
19 |     base-args: ["@metric"]
20 |     run-groups:
21 |       base:
22 |         args: |
23 |           [{"R":32, "L":100, "insert_threads":8, "consolidate_threads":8}]
24 |         query-args: |
25 |           [{"Ls":300, "T":8},
26 |            {"Ls":400, "T":8},
27 |            {"Ls":500, "T":8},
28 |            {"Ls":600, "T":8}
29 |           ]
30 | 


--------------------------------------------------------------------------------
/neurips23/streaming/scann/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM neurips23
2 | 
3 | RUN apt update
4 | RUN apt install -y software-properties-common
5 | RUN pip install scann
6 | 
7 | WORKDIR /home/app
8 | 


--------------------------------------------------------------------------------
/neurips23/streaming/scann/config.yaml:
--------------------------------------------------------------------------------
 1 | msturing-30M-clustered:
 2 |     scann:
 3 |       docker-tag: neurips23-streaming-scann
 4 |       module: neurips23.streaming.scann.scann
 5 |       constructor: Scann
 6 |       base-args: ["@metric"]
 7 |       run-groups:
 8 |         dynamic4M:
 9 |           args: |
10 |             [{ "tree_size": 5000, "leaves_to_search": 700, "reorder": 317}]
11 |           query-args: |
12 |             [{}]
13 | 


--------------------------------------------------------------------------------
/preparation/neurips23/sparse_algorithms/basic_sparse_index.py:
--------------------------------------------------------------------------------
 1 | from scipy.sparse import csr_matrix
 2 | import numpy as np
 3 | 
 4 | # given a vector x, returns another vector with the minimal number of largest elements of x,
 5 | # s.t. their sum is at most a times the sum of the elements in x.
 6 | #
 7 | # The goal is to sparsify the vector further,
 8 | # but at the same time try and preserve as much of the original vector as possible.
 9 | def largest_elements(x, a):
10 |     # Compute the sum of elements of x
11 |     x_sum = np.sum(x)
12 | 
13 |     # Compute the indices and values of the largest elements of x
14 |     ind = np.argsort(-x.data)
15 |     cs = np.cumsum(x.data[ind] / x_sum)
16 | 
17 |     n_elements = min(sum(cs < a) + 1, x.nnz)  # rounding errors sometimes results in n_elements > x.nnz
18 | 
19 |     new_ind = x.indices[ind[:n_elements]]
20 |     new_data = x.data[ind[:n_elements]]
21 |     return csr_matrix((new_data, new_ind, [0, n_elements]), shape=x.shape)
22 | 
23 | 
24 | # a basic sparse index.
25 | # methods:
26 | # 1. init: from a csr matrix of data.
27 | # 2. query a singe vector, with parameters:
28 | #    - k (# of neighbors),
29 | #    - alpha (fraction of the sum of the vector to maintain. alpha=1 is exact search).
30 | class BasicSparseIndex(object):
31 |     def __init__(self, data_csr):
32 |         self.data_csc = data_csr.tocsc()
33 | 
34 |     def query(self, q, k, alpha=1):  # single query, assumes q is a row vector
35 |         if alpha == 1:
36 |             q2 = q.transpose()
37 |         else:
38 |             q2 = largest_elements(q, alpha).transpose()
39 | 
40 |         # perform (sparse) matrix-vector multiplication
41 |         res = self.data_csc.dot(q2)
42 | 
43 |         if res.nnz <= k:  # if there are less than k elements with nonzero score, simply return them
44 |             return list(zip(res.indices, res.data))
45 | 
46 |         # extract the top k from the res sparse array directly
47 |         indices = np.argpartition(res.data, -(k + 1))[-k:]
48 |         results = []
49 |         for index in indices:
50 |             results.append((res.data[index], index))
51 |         results.sort(reverse=True)
52 |         return [(res.indices[b], a) for a, b in results]
53 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | ansicolors==1.1.8
 2 | docker==2.6.1
 3 | h5py==2.10.0
 4 | matplotlib==2.1.0
 5 | numpy==1.16.0
 6 | pyyaml==5.4
 7 | psutil==5.6.6
 8 | scipy==1.0.0
 9 | scikit-learn==0.19.1
10 | jinja2==2.11.3
11 | pandas
12 | 


--------------------------------------------------------------------------------
/requirements_py3.10.txt:
--------------------------------------------------------------------------------
 1 | ansicolors==1.1.8
 2 | docker==7.1.0
 3 | h5py==3.10.0
 4 | matplotlib==3.3.4
 5 | numpy==1.24.2
 6 | pyyaml==6.0
 7 | psutil==5.9.4
 8 | scipy==1.10.1
 9 | scikit-learn
10 | jinja2==3.1.2
11 | pandas==2.0.0
12 | 


--------------------------------------------------------------------------------
/requirements_py38.txt:
--------------------------------------------------------------------------------
 1 | ansicolors==1.1.8
 2 | docker==2.6.1
 3 | h5py==2.10.0
 4 | matplotlib==3.3.4
 5 | numpy==1.19.5
 6 | pyyaml==5.4
 7 | psutil==5.8.0
 8 | scipy==1.5.4
 9 | scikit-learn
10 | jinja2==2.11.3
11 | pandas==1.1.5
12 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | from benchmark.main import main
2 | from multiprocessing import freeze_support
3 | 
4 | if __name__ == "__main__":
5 |     freeze_support()
6 |     main()
7 | 


--------------------------------------------------------------------------------
/run_algorithm.py:
--------------------------------------------------------------------------------
1 | from benchmark.runner import run_from_cmdline
2 | 
3 | run_from_cmdline()
4 | 


--------------------------------------------------------------------------------
/setup_links.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | DATASET_HOME=/raid/workspace/dataset
 4 | INDEX_HOME=/raid/workspace/anaruse/libcuann/models
 5 | 
 6 | mkdir -p data/indices/t3/CuannsIvfpq 2> /dev/null
 7 | 
 8 | #
 9 | cd data
10 | if [ ! -e bigann ]; then
11 |     ln -s ${DATASET_HOME}/bigann-1B bigann
12 | fi
13 | if [ ! -e deep1b ]; then
14 |     ln -s ${DATASET_HOME}/deep-1B deep1b
15 | fi
16 | if [ ! -e MSSPACEV1B ]; then
17 |     ln -s ${DATASET_HOME}/msspacev-1B MSSPACEV1B
18 | fi
19 | if [ ! -e MSTuringANNS ]; then
20 |     ln -s ${DATASET_HOME}/msturing-1B MSTuringANNS
21 | fi
22 | if [ ! -e text2image1B ]; then
23 |     ln -s ${DATASET_HOME}/text2image-1B text2image1B
24 | fi
25 | 
26 | #
27 | cd indices/t3/CuannsIvfpq
28 | if [ ! -e bigann-1B.cluster_250000.pq_64.5_bit ]; then
29 |     ln -s ${INDEX_HOME}/BIGANN-1B-uint8-1000000000x128.cluster_250000.pq_64.5_bit bigann-1B.cluster_250000.pq_64.5_bit
30 | fi
31 | if [ ! -e deep-1B.cluster_250000.pq_64.5_bit ]; then
32 |     ln -s ${INDEX_HOME}/DEEP-1B-float32-1000000000x96.cluster_250000.pq_64.5_bit deep-1B.cluster_250000.pq_64.5_bit
33 | fi
34 | if [ ! -e msspacev-1B.cluster_500000.pq_64.5_bit ]; then
35 |     ln -s ${INDEX_HOME}/MS-SPACEV-1B-int8-1000000000x100.cluster_500000.pq_64.5_bit msspacev-1B.cluster_500000.pq_64.5_bit
36 | fi
37 | if [ ! -e msturing-1B.cluster_250000.pq_64.5_bit ]; then
38 |     ln -s ${INDEX_HOME}/MS-Turing-ANNS-1B-float32-1000000000x100.cluster_250000.pq_64.5_bit msturing-1B.cluster_250000.pq_64.5_bit
39 | fi
40 | if [ ! -e text2image-1B.cluster_500000.pq_72.8_bit ]; then
41 |     ln -s ${INDEX_HOME}/T2I-1B-float32-1000000000x200.cluster_500000.pq_72.8_bit text2image-1B.cluster_500000.pq_72.8_bit
42 | fi
43 | 
44 | 


--------------------------------------------------------------------------------
/tests/tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # You should run this script from the repo top-level directory
4 | 
5 | PYTHONPATH="." python tests/recall_tests.py
6 | 
7 | 


--------------------------------------------------------------------------------