├── LICENSE.md
├── README.md
├── indices
    └── .gitignore
├── logs
    └── .gitignore
├── requirements.txt
├── script.sh
└── src
    ├── InvertedIndex
        ├── .ipynb_checkpoints
        │   ├── Gather-checkpoint.cpp
        │   ├── Gather-checkpoint.h
        │   ├── Makefile-checkpoint
        │   ├── scoreAgg-checkpoint.cpp
        │   ├── scoreAgg-checkpoint.pyx
        │   └── setup-checkpoint.py
        ├── Gather.cpp
        ├── Gather.h
        ├── Makefile
        ├── build
        │   └── temp.linux-x86_64-3.8
        │   │   ├── Gather.o
        │   │   └── scoreAgg.o
        ├── scoreAgg.cpp
        ├── scoreAgg.cpython-38-x86_64-linux-gnu.so
        ├── scoreAgg.pyx
        └── setup.py
    ├── __pycache__
        ├── BLISS.cpython-310.pyc
        ├── BLISS.cpython-38.pyc
        ├── config.cpython-310.pyc
        ├── config.cpython-38.pyc
        ├── dataPrepare.cpython-310.pyc
        ├── dataPrepare.cpython-38.pyc
        ├── index.cpython-38.pyc
        ├── net.cpython-310.pyc
        ├── net.cpython-38.pyc
        ├── train.cpython-310.pyc
        ├── train.cpython-38.pyc
        ├── utils.cpython-310.pyc
        └── utils.cpython-38.pyc
    ├── config.py
    ├── construct.py
    ├── dataPrepare.py
    ├── index.py
    ├── net.py
    ├── query.py
    ├── train.py
    └── utils.py


/LICENSE.md:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # [BLISS: A Billion scale Index using Iterative Re-partitioning](https://dl.acm.org/doi/abs/10.1145/3534678.3539414)
 2 | 
 3 | **Authors**: [Gaurav Gupta](https://gaurav16gupta.github.io/), [Tharun Medini](https://tharun24.github.io/), [Anshumali Shrivastava](https://www.cs.rice.edu/~as143/), [Alexander J. Smola](https://alex.smola.org/)
 4 | 
 5 | ## Abstract
 6 | 
 7 | Representation learning has transformed the problem of information retrieval into one of finding the approximate set of nearest neighbors in a high dimensional vector space. With limited hardware resources and time-critical queries, the retrieval engines face an inherent tension between latency, accuracy, scalability, compactness, and the ability to load balance in distributed settings. To improve the trade-off, we propose a new algorithm, called BaLanced Index for Scalable Search (BLISS), a highly tunable indexing algorithm with enviably small index sizes, making it easy to scale
 8 | to billions of vectors. It iteratively refines partitions of items by learning the relevant buckets directly from the query-item relevance data. To ensure that the buckets are balanced, BLISS uses the power-of-𝐾 choices strategy. We show that BLISS provides superior load balancing with high probability (and under very benign assumptions). Due to its design, BLISS can be employed for both near-neighbor retrieval (ANN problem) and extreme classification
 9 | (XML problem). For the case of ANN, we train and index 4 datasets with billion vectors each. We compare the recall, inference time, indexing time, and index size for BLISS with the two most popular and well-optimized libraries- Hierarchical Navigable Small World (HNSW) graph and Facebook’s FAISS. BLISS requires 100× lesser RAM than HNSW, making it fit in memory on commodity machines while taking a similar inference time as HNSW for the same recall. Against FAISS-IVF, BLISS achieves similar performance with 3-4× less memory requirement. BLISS is both data and model parallel, making it ideal for distributed implementation for training and inference. For the case of XML, BLISS surpasses the best baselines’ precision while being 5× faster for inference on popular multi-label datasets with half a million classes.
10 | 
11 | ## Prerequisites
12 | Compiler: gcc and g++
13 | ```
14 | pip install -r requirements.txt
15 | cd src/InvertedIndex && make
16 | ```
17 | 
18 | ### Download Datasets
19 | Create a directory named "data" outside the BLISS repo. Create a sperate directory for each dataset.
20 | For example download the GloVe data:
21 | ```
22 | cd data
23 | mkdir glove && cd glove
24 | wget http://ann-benchmarks.com/glove-100-angular.hdf5
25 | ```
26 | The chosen dataset folder names should match with the BLISS/src/config.py file.
27 | 
28 | The GloVe and SIFT datasets came from [ann-benchmarks](https://github.com/erikbern/ann-benchmarks/). The Billion scale data came from [big-ann-benchmarks](https://big-ann-benchmarks.com/index.html#call). The datasets for extreme classification can be downloaded from the [XMLRepository](http://manikvarma.org/code/Slice/download.html) 
29 | 
30 | Update the BLISS/src/config.py DATASET dictionary after adding any new dataset.
31 | 
32 | ## Run
33 | * Train the iterative model
34 | ```
35 | python3 construct.py --index='glove_epc20_K2_B4096_R4'
36 | ```
37 | * Index on the trained model
38 | ```
39 | python3 index.py --index='glove_epc20_K2_B4096_R4'
40 | ```
41 | * Query the index
42 | ```
43 | python3 query.py --index='glove_epc20_K2_B4096_R4' --topm=50
44 | ```
45 | 
46 | 
47 | ## Contributing
48 | We'd love to accept your contributions to this project. Please feel free to open an issue, or submit a pull request as necessary. 
49 | 
50 | ## Acknowledgment
51 | The code is build upon [Tharun24/MACH](https://github.com/Tharun24/MACH)
52 | 
53 | ## Citation
54 | If you find the provided code useful, please cite our work with the following bibtex.
55 | 
56 | ```bibtex
57 | @inproceedings{10.1145/3534678.3539414,
58 | author = {Gupta, Gaurav and Medini, Tharun and Shrivastava, Anshumali and Smola, Alexander J.},
59 | title = {BLISS: A Billion Scale Index Using Iterative Re-Partitioning},
60 | year = {2022},
61 | isbn = {9781450393850},
62 | publisher = {Association for Computing Machinery},
63 | address = {New York, NY, USA},
64 | url = {https://doi.org/10.1145/3534678.3539414},
65 | doi = {10.1145/3534678.3539414},
66 | booktitle = {Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
67 | pages = {486–495},
68 | numpages = {10},
69 | keywords = {classification, search, load-balance, billion-scale, learning-to-index},
70 | location = {Washington DC, USA},
71 | series = {KDD '22}
72 | }
73 | ```
74 | 
75 | TODO:
76 | 1) Multithreading support for score aggregation and re-ranking.
77 | 
78 | 


--------------------------------------------------------------------------------
/indices/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | */
3 | !.gitignore


--------------------------------------------------------------------------------
/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | */
3 | !.gitignore


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow-gpu>=2.7.1
2 | cython==0.29.24
3 | matplotlib
4 | tqdm
5 | sklearn==1.0.1
6 | numpy
7 | 


--------------------------------------------------------------------------------
/script.sh:
--------------------------------------------------------------------------------
1 | python3 construct.py --index='glove_epc20_K2_B4096_R4'
2 | python3 index.py --index='glove_epc20_K2_B4096_R4'
3 | python3 query.py --index='glove_epc20_K2_B4096_R4' --topm=50


--------------------------------------------------------------------------------
/src/InvertedIndex/.ipynb_checkpoints/Gather-checkpoint.cpp:
--------------------------------------------------------------------------------
  1 | #include <iomanip>
  2 | #include <fstream>
  3 | #include <iostream>
  4 | #include <chrono>
  5 | #include <vector>
  6 | #include <math.h>
  7 | #include <sstream>
  8 | #include <string>
  9 | #include <string.h>
 10 | #include <algorithm>
 11 | #include "Gather.h"
 12 | #include <iostream>
 13 | #include <unordered_map>
 14 | #include <typeinfo>
 15 | 
 16 | 
 17 | using namespace std;
 18 | 
 19 | // k is the nnz, m is the output size
 20 | FastIV::FastIV(int k, int m, long N){
 21 |     k_ = k;
 22 |     N_ = N;
 23 |     m_ = m;
 24 |     std::cout << "output dim: " <<m<< '\n';
 25 |     InvIndex = new vector<int>[m_]; //array of vectors
 26 |     std::cout <<k_<< '\n';
 27 |     std::cout <<N_<< '\n';
 28 |     std::cout <<m_<< '\n';
 29 | }
 30 | // L is bucket size
 31 | void FastIV::createIndex(int i, long* list, int L){
 32 |     for(int l=0; l<L; l++){
 33 |         InvIndex[i].push_back(list[l]);
 34 |     }
 35 | //     std::cout <<InvIndex[i].size()<< '\n';
 36 | }
 37 | 
 38 | 
 39 | // ///////////////////////////////////////////version 1////////////////////////////////////////////////////////////////////
 40 | 
 41 | // long FastIV::FC(long* hashIndex, int threshold, int maxsize, long* candidates){
 42 | // //     std::cout <<k_<< '\n';
 43 | // //     short int counter[N_] = {0}; //init with all 0s
 44 |     
 45 | //     short int* counter = new short int[N_];
 46 | //     chrono::time_point<chrono::high_resolution_clock> t0 = chrono::high_resolution_clock::now();
 47 | //     for(long i = 0; i < N_; i++){
 48 | //         counter[i] = 0;
 49 | //     }
 50 | // //     chrono::time_point<chrono::high_resolution_clock> t1 = chrono::high_resolution_clock::now();
 51 | // //     cout << "d0: "<<chrono::duration_cast<chrono::nanoseconds>(t1-t0).count()/1000000.0 << "msec\n";
 52 |     
 53 | // //     std::cout <<N_<< '\n';
 54 | // //     std::cout <<m_<< '\n';
 55 | //     for(int h=0; h<k_; h++){
 56 | //         for (int i=0; i<InvIndex[hashIndex[h]].size(); i++){
 57 | //             counter[InvIndex[hashIndex[h]][i]]+=1;
 58 | //         }
 59 | //     }
 60 | // //     chrono::time_point<chrono::high_resolution_clock> t2 = chrono::high_resolution_clock::now();
 61 | // //     cout << "d1: "<<chrono::duration_cast<chrono::nanoseconds>(t2-t1).count()/1000000.0 << "msec\n";
 62 |     
 63 | // //     std::cout <<"here1"<< '\n';
 64 | //     long cnt =0;
 65 | //     for(long i = maxsize; i >=0 ; --i){
 66 | //         candidates[i] =0;
 67 | //     }
 68 | // //     chrono::time_point<chrono::high_resolution_clock> t3 = chrono::high_resolution_clock::now();
 69 | // //     cout << "d2: "<<chrono::duration_cast<chrono::nanoseconds>(t3-t2).count()/1000000.0 << "msec\n";
 70 |     
 71 | // //     std::cout <<"here2"<< '\n';
 72 | //     for(long i = 0; i < N_; i++){
 73 | //         if (counter[i]>=threshold && cnt <=maxsize){
 74 | //             candidates[cnt] = i;
 75 | // //             std::cout << "here3 " <<candidates[cnt]<< '\n';
 76 | //             cnt++;
 77 | //         }
 78 | //     }
 79 | //     delete [] counter;
 80 | // //     chrono::time_point<chrono::high_resolution_clock> t4 = chrono::high_resolution_clock::now();
 81 | // //     cout << "d3: "<<chrono::duration_cast<chrono::nanoseconds>(t4-t3).count()/1000000.0 << "msec\n";
 82 | // //     std::cout << cnt<< '\n';
 83 | //     return cnt;
 84 | // }
 85 | 
 86 | // ///////////////////////////////////////////version 3////////////////////////////////////////////////////////////////////
 87 | 
 88 | long FastIV::FC(long* hashIndex, int threshold, int maxsize, long* candidates){  
 89 |     
 90 | //     chrono::time_point<chrono::high_resolution_clock> t0 = chrono::high_resolution_clock::now();
 91 |     
 92 |     short int* counter = new short int[N_];
 93 |     for(long i = 0; i < N_; i++){
 94 |         counter[i] = 0;
 95 |     }
 96 | 
 97 | //     chrono::time_point<chrono::high_resolution_clock> t1 = chrono::high_resolution_clock::now();
 98 | //     cout << "d0: "<<chrono::duration_cast<chrono::nanoseconds>(t1-t0).count()/1000000.0 << "msec\n";
 99 | 
100 |     long cnt =0;
101 |     for(long i = maxsize; i >=0 ; --i){
102 |         candidates[i] =0;
103 |     }
104 |     
105 | //     chrono::time_point<chrono::high_resolution_clock> t2 = chrono::high_resolution_clock::now();
106 | //     cout << "d1: "<<chrono::duration_cast<chrono::nanoseconds>(t2-t1).count()/1000000.0 << "msec\n";
107 |     
108 |     for(int h=0; h<k_; h++){
109 |         for (int i=0; i<InvIndex[hashIndex[h]].size(); i++){
110 |             if (counter[InvIndex[hashIndex[h]][i]]<threshold){
111 |                 counter[InvIndex[hashIndex[h]][i]]+=1;
112 |             }
113 |             if (counter[InvIndex[hashIndex[h]][i]]==threshold && cnt < maxsize){
114 |                 candidates[cnt] = InvIndex[hashIndex[h]][i];
115 |                 cnt++;
116 |                 counter[InvIndex[hashIndex[h]][i]]+=1;
117 |             }
118 |         }
119 |     }
120 | 
121 | //     chrono::time_point<chrono::high_resolution_clock> t3 = chrono::high_resolution_clock::now();
122 | //     cout << "d2: "<<chrono::duration_cast<chrono::nanoseconds>(t3-t2).count()/1000000.0 << "msec\n";
123 | 
124 |     delete [] counter;
125 |     return cnt;
126 | }
127 | 
128 | 
129 | // ///////////////////////////////////////////version 2////////////////////////////////////////////////////////////////////
130 | 
131 | // void FastIV::FC(long* hashIndex, int threshold, int maxsize, long* candidates){
132 | // //     short int* counter = new short int[N_];
133 | //     std::unordered_map<long, short int> counter;
134 | //     counter.reserve(10000);
135 |     
136 | //     chrono::time_point<chrono::high_resolution_clock> t1 = chrono::high_resolution_clock::now();
137 | //     for(int h=0; h<k_; h++){
138 | //         for (int i=0; i<InvIndex[hashIndex[h]].size(); i++){
139 | //             if (counter.find(InvIndex[hashIndex[h]][i]) == counter.end()){
140 | //                 counter[InvIndex[hashIndex[h]][i]]=0;
141 | //             }
142 | //             counter[InvIndex[hashIndex[h]][i]]+=1;
143 | //         }
144 | //     }
145 | //     chrono::time_point<chrono::high_resolution_clock> t2 = chrono::high_resolution_clock::now();
146 | //     cout << "d1: "<<chrono::duration_cast<chrono::nanoseconds>(t2-t1).count()/1000000.0 << "msec\n";
147 |     
148 | //     long cnt =0;
149 | //     for(long i = maxsize; i >=0 ; --i){
150 | //         candidates[i] =0;
151 | //     }
152 | //     chrono::time_point<chrono::high_resolution_clock> t3 = chrono::high_resolution_clock::now();
153 | //     cout << "d2: "<<chrono::duration_cast<chrono::nanoseconds>(t3-t2).count()/1000000.0 << "msec\n";
154 |     
155 | //     for (auto const& x : counter){
156 | // //     for (auto x : counter)
157 | // //         cout << x.first << " " << x.second << endl;
158 | //         if (x.second>=threshold && cnt <=maxsize){
159 | //             candidates[cnt] = x.first;
160 | //             cnt++;
161 | //         }
162 | //     }
163 | //     chrono::time_point<chrono::high_resolution_clock> t4 = chrono::high_resolution_clock::now();
164 | //     cout << "d3: "<<chrono::duration_cast<chrono::nanoseconds>(t4-t3).count()/1000000.0 << "msec\n";
165 | // //     std::cout << cnt<< ' ';
166 | // }
167 | 
168 | 
169 | 
170 | float FastIV::distComp(float* query, float* train_data, int d, long* candidates, int cansz, float* ip, int n_threads)
171 | {
172 |     int cand = 0;
173 |         
174 |     float* final_cand = new float[cansz];
175 |     float* temp = new float[d];
176 |     float assign=0;
177 |     float mm=0;
178 | //     # pragma omp parallel num_threads ( n_threads )
179 |     chrono::time_point<chrono::high_resolution_clock> t0 = chrono::high_resolution_clock::now();
180 | 
181 |     for (int i=0; i<cansz; i++){
182 |         cand = candidates[i];
183 |         std::copy(train_data+ (cand*d),train_data+ ((cand+1)*d), temp);
184 | //         for (int j=0; j<d; j++){
185 | // //             temp = train_data[cand*d+j];
186 | //             std::copy(train_data+ (cand*d),train_data+ ((cand+1)*d), temp);
187 | // //             final_cand[i] +=temp*query[j];
188 | // //             chrono::time_point<chrono::high_resolution_clock> t2 = chrono::high_resolution_clock::now();
189 | // //             ip[i] += temp*query[j];
190 | //         } 
191 |     }
192 |     chrono::time_point<chrono::high_resolution_clock> t1 = chrono::high_resolution_clock::now();
193 |     cout<<chrono::duration_cast<chrono::nanoseconds>(t1-t0).count()/1000000.0<<endl;
194 | //     cout << "assign: "<<assign << "msec\n";
195 | //     cout << "mm: "<<mm << "msec\n";
196 | //     cout<<final_cand[0];
197 | }
198 | 


--------------------------------------------------------------------------------
/src/InvertedIndex/.ipynb_checkpoints/Gather-checkpoint.h:
--------------------------------------------------------------------------------
 1 | // void cgather_batch(long*, long*, long*, long*, long*, double*, int, int, int, int, int);
 2 | // void cgather_K(float*, long*, float*, long*, int, int, int, int, int);
 3 |  
 4 | 
 5 | #include <vector>
 6 | 
 7 | class FastIV {
 8 | public:
 9 |     int k_, m_;
10 |     long N_;
11 |     std::vector<int>* InvIndex;
12 |     FastIV(int k, int m, long N);
13 |     void createIndex(int i, long* list, int L);
14 |     long FC(long* hashIndex, int threshold, int maxsize, long* candidates);
15 |     float distComp(float* query, float* train_data, int d, long* candidates, int cansz, float* ip, int n_threads);
16 | };
17 | 


--------------------------------------------------------------------------------
/src/InvertedIndex/.ipynb_checkpoints/Makefile-checkpoint:
--------------------------------------------------------------------------------
1 | all: clean
2 | 	python3 setup.py build_ext --inplace
3 | 
4 | clean:
5 | 	rm -rf build
6 | 	rm -rf __pycache__
7 | 	rm -rf scoreAgg.cpp
8 | 	rm -rf scoreAgg.*.so 


--------------------------------------------------------------------------------
/src/InvertedIndex/.ipynb_checkpoints/scoreAgg-checkpoint.pyx:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | cimport numpy as np
 3 | import cython
 4 | from cython import boundscheck, wraparound
 5 | import time
 6 | 
 7 | # cdef extern from "Gather.h":
 8 | #     void cgather_batch(long*, long*, long*, long*, long*, double*, int, int, int, int, int) except +
 9 | #     # void cgather_K(float*, long*, float*, long*, int, int, int, int,int) except +
10 | 
11 | # @boundscheck(False)
12 | # @wraparound(False)
13 | # def scoreAgg( np.ndarray[long, ndim=2,  mode="c"] label_buckets, 
14 | #             np.ndarray[long, ndim=2, mode="c"] counts, 
15 | #             np.ndarray[long, ndim=2,  mode="c"] cumCounts,
16 | #             np.ndarray[long, ndim=1,  mode="c"]candidates,
17 | #             np.ndarray[long, ndim=2,  mode="c"] bestbins,  
18 | #             np.ndarray[double, ndim=2,  mode="c"] bestbin_score, int R, int B, int N, int m, int maxsize): 
19 | #     cgather_batch(&label_buckets[0,0], &counts[0,0], &cumCounts[0,0], &candidates[0], &bestbins[0,0], &bestbin_score[0,0], R, B, N, m, maxsize)
20 | 
21 |     
22 | # new code
23 | cdef extern from "Gather.h":
24 |     cdef cppclass FastIV:
25 |         FastIV(int, int, long) except +
26 |         int k, m, N
27 |         void createIndex(int, long*, int)
28 |         long FC(long*, int, int, long* )
29 |         float distComp(float*, float*, int, long*, int, float*, int)
30 | 
31 | @boundscheck(False)
32 | @wraparound(False)
33 | cdef class PyFastIV:
34 |     cdef FastIV *thisptr
35 |     def __cinit__(self, int k, int m, long N):
36 |         self.thisptr = new FastIV(k, m, N)
37 |     def createIndex(self, int i, np.ndarray[long, ndim=1,  mode="c"] list, int L):
38 |         self.thisptr.createIndex(i, &list[0], L)
39 |     def FC(self, np.ndarray[long, ndim=1,  mode="c"] hashIndex, int threshold, int maxsize, np.ndarray[long, ndim=1,  mode="c"] candidates):
40 |         return self.thisptr.FC(&hashIndex[0], threshold, maxsize, &candidates[0])
41 |     def distComp(self, np.ndarray[float, ndim=1,  mode="c"] query, np.ndarray[float, ndim=2,  mode="c"] train_data, int d, np.ndarray[long, ndim=1,  mode="c"] candidates, int cansz, np.ndarray[float, ndim=1,  mode="c"] ip, int n_threads ):
42 |          return self.thisptr.distComp(& query[0], &train_data[0,0], d, &candidates[0], cansz, &ip[0], n_threads)
43 | 
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/src/InvertedIndex/.ipynb_checkpoints/setup-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | from Cython.Build import cythonize
 3 | import numpy
 4 | 
 5 | setup(ext_modules = cythonize(Extension(
 6 |            "scoreAgg",                                # the extension name
 7 |            sources=["scoreAgg.pyx", "Gather.cpp"], # the Cython source and additional C++ source files
 8 |            language="c++",                        # generate and compile C++ code
 9 |            include_dirs=[numpy.get_include()],
10 |            extra_compile_args=["-std=c++11", "-fopenmp", "-fopenmp-simd", "-O3",],
11 |            extra_link_args=["-fopenmp", "-fopenmp-simd"]
12 |       )))


--------------------------------------------------------------------------------
/src/InvertedIndex/Gather.cpp:
--------------------------------------------------------------------------------
  1 | #include <iomanip>
  2 | #include <fstream>
  3 | #include <iostream>
  4 | #include <chrono>
  5 | #include <vector>
  6 | #include <math.h>
  7 | #include <sstream>
  8 | #include <string>
  9 | #include <string.h>
 10 | #include <algorithm>
 11 | #include "Gather.h"
 12 | #include <iostream>
 13 | #include <unordered_map>
 14 | #include <typeinfo>
 15 | 
 16 | 
 17 | using namespace std;
 18 | 
 19 | // k is the nnz, m is the output size, N is the counter size
 20 | FastIV::FastIV(int R, int block, int B, int mf, int topk, int* inv_lookup, int* counts){
 21 |     R_ =R; //k
 22 |     block_ = block; //N
 23 |     B_ = B; //m
 24 |     threshold = mf; //r, threshold
 25 |     topk_ = topk;
 26 |     inv_lookup_ = inv_lookup;
 27 |     counts_ = counts;
 28 |     tempe = 0;
 29 |     std::cout << "params: "<<  R_<<" "<< block_<<" "<<  B_<<" "<<  threshold<<" "<< '\n';
 30 | 
 31 |     counter = new uint8_t[block_]; //can go further down with each unit= log(maxCount)
 32 |     // memset(counter, 0, sizeof(counter));  //tales almost same time 
 33 |     #pragma omp parallel for num_threads(32) // or #pragma omp simd
 34 |     for(long i = 0; i < block_; i++){  
 35 |         counter[i] = 0;
 36 |     }
 37 |     // InvIndex = new vector<int>[m_]; //array of vectors
 38 |     // long* inv_lookup = new long[block*R*Parts]; 
 39 | }
 40 | 
 41 | 
 42 | // ///////////////////////////////////////////vFor IRLI////////////////////////////////////////////////////////////////////
 43 | // candSize = self.fastIv.FC(top_buckets_[i,:,1,:], 60000, candidates)
 44 | 
 45 | void FastIV::FC(int* top_buckets_, int maxsize, int* candidates, int* candSize){  
 46 |     #pragma omp parallel for num_threads(32)
 47 |     for(long i = maxsize; i >=0 ; --i){
 48 |         candidates[i] =0;
 49 |     }
 50 |     // #pragma omp parallel for num_threads(4)
 51 |     // chrono::time_point<chrono::high_resolution_clock> t0 = chrono::high_resolution_clock::now();
 52 | 
 53 |     // uint8_t* counter = new uint8_t[block_]; //can go further down with each unit= log(maxCount)
 54 |     // // memset(counter, 0, sizeof(counter));  //tales almost same time 
 55 |     // #pragma omp parallel for num_threads(32) // or #pragma omp simd
 56 |     // for(long i = 0; i < block_; i++){  // MAJOR TIME TAKING PROCESS----TIP, DONT INIT ENTIRE COUNTER FOR A NEW QUERY, USE FRON PREVIIOUS 0'ed at places used
 57 |     //     counter[i] = 0;
 58 |     // }
 59 | 
 60 |     candSize[0] = 0;
 61 |     
 62 |     // long long int bucket = 0;
 63 |     int bucketsz = 0;
 64 |     int stpt = 0;
 65 |     int r = 0;
 66 |     // int pos,pos2 = 0;
 67 |     long cnt =0;
 68 |     if (R_==1){
 69 |         for(int k =0; k<topk_; k++) {
 70 |             stpt  = counts_[(r)*B_+ top_buckets_[topk_*r+ k]] +(r)*block_;
 71 |             bucketsz = counts_[(r)*B_ + top_buckets_[topk_*r+ k]+1] - counts_[(r)*B_+ top_buckets_[topk_*r+ k]];
 72 |             for (int i=0; i<bucketsz; i++){ 
 73 |                 if (cnt<maxsize){
 74 |                 candidates[cnt] = inv_lookup_[stpt+i];
 75 |                 cnt++;}
 76 |             }
 77 |         }
 78 |         candSize[0] = cnt;
 79 |     }
 80 | 
 81 |     if (R_>1){
 82 |     // chrono::time_point<chrono::high_resolution_clock> t1 = chrono::high_resolution_clock::now();
 83 |     // cout << "d1: "<<chrono::duration_cast<chrono::nanoseconds>(t1-t0).count()/1000000.0 << "msec\n";
 84 |     // #pragma omp parallel for num_threads(topk_) // this one doesn't create issues
 85 |     // cout << "h1 "<<endl;
 86 |         // #pragma omp parallel for num_threads(8)
 87 |     r = 0;
 88 |     for(int k =0; k<topk_; k++) {
 89 |         //////simpler ///////////
 90 |         // bucket = top_buckets_[topk_*r+ k];
 91 |         // pos = (part*R_+ r)*B_;
 92 |         // pos2 = (part*R_+ r)*block_;
 93 |         // stpt  = counts_[pos+ bucket];
 94 |         // bucketsz = counts_[pos + bucket+1] - stpt;
 95 |         // stpt = stpt+pos2;
 96 |         /////////////////////////
 97 |         
 98 |         stpt  = counts_[(r)*B_+ top_buckets_[topk_*r+ k]] +(r)*block_;
 99 |         bucketsz = counts_[(r)*B_ + top_buckets_[topk_*r+ k]+1] - counts_[(r)*B_+ top_buckets_[topk_*r+ k]];
100 |         // #pragma omp simd
101 |         // cout << "h1_2 "<<endl;
102 |         for (int i=0; i<bucketsz; i++){ // just increment in first pass
103 |             // cout<<part<<" "<<stpt+i<<" "<<inv_lookup_[stpt+i]<<endl;
104 |             counter[inv_lookup_[stpt+i]]+=1;
105 |         }
106 |     }
107 |     // chrono::time_point<chrono::high_resolution_clock> t2 = chrono::high_resolution_clock::now();
108 |     // cout << "d2: "<<chrono::duration_cast<chrono::nanoseconds>(t2-t1).count()/1000000.0 << "msec\n";
109 |     long cnt =0;
110 |     for(int r=1; r<R_; r++){
111 |         for(int k =0; k<topk_; k++) {
112 |             stpt  = counts_[(r)*B_+ top_buckets_[topk_*r+ k]] +(r)*block_;
113 |             bucketsz = counts_[(r)*B_ + top_buckets_[topk_*r+ k]+1] - counts_[(r)*B_+ top_buckets_[topk_*r+ k]];
114 |             for (int i=0; i<bucketsz; i++){
115 |                 if(counter[inv_lookup_[stpt+i]]<threshold){
116 |                     counter[inv_lookup_[stpt+i]]+=1;
117 |                 }
118 |                 if(counter[inv_lookup_[stpt+i]]==threshold&& cnt < maxsize){
119 |                     candidates[cnt] = inv_lookup_[stpt+i];
120 |                     // candidates[cnt] = inv_lookup_[stpt+i]+ (part*block_);
121 |                     cnt++;
122 |                     counter[inv_lookup_[stpt+i]]+=1;
123 |                 }
124 |             }
125 |         }
126 |     }
127 |     candSize[0] = cnt;
128 |     //cleanup
129 |     // chrono::time_point<chrono::high_resolution_clock> t3 = chrono::high_resolution_clock::now();
130 |     // cout << "d3: "<<chrono::duration_cast<chrono::nanoseconds>(t3-t2).count()/1000000.0 << "msec\n";
131 |     
132 |     // if (part<Parts_-1){
133 |         // #pragma omp parallel for num_threads(8) collapse(2)
134 |         for(int r=0; r<R_; r++){
135 |             for(int k =0; k<topk_; k++) {
136 |                 stpt  = counts_[(r)*B_+ top_buckets_[topk_*r+ k]] +(r)*block_;
137 |                 bucketsz = counts_[(r)*B_ + top_buckets_[topk_*r+ k]+1] - counts_[(r)*B_+ top_buckets_[topk_*r+ k]];
138 |                 for (int i=0; i<bucketsz; i++){ // just increment in first pass
139 |                     counter[inv_lookup_[stpt+i]]=0;
140 |                 }
141 |             }
142 |         }
143 |     // }    
144 |     // chrono::time_point<chrono::high_resolution_clock> t4 = chrono::high_resolution_clock::now();
145 |     // cout << "d4: "<<chrono::duration_cast<chrono::nanoseconds>(t4-t3).count()/1000000.0 << "msec\n";
146 |     // if (part == Parts_-1) {
147 |     //     break;}
148 |     }
149 |     
150 |     // delete [] counter;
151 |     // return cnts;
152 | }


--------------------------------------------------------------------------------
/src/InvertedIndex/Gather.h:
--------------------------------------------------------------------------------
 1 | // void cgather_batch(long*, long*, long*, long*, long*, double*, int, int, int, int, int);
 2 | // void cgather_K(float*, long*, float*, long*, int, int, int, int, int);
 3 |  
 4 | 
 5 | #include <vector>
 6 | 
 7 | class FastIV {
 8 | public:
 9 |     int R_, B_, threshold, topk_;
10 |     int block_;
11 |     int* inv_lookup_;
12 |     int* counts_;
13 |     int tempe;
14 |     uint8_t* counter; 
15 |     // std::vector<int>* InvIndex;
16 | 
17 |     FastIV(int R, int block, int B, int mf, int topk, int* inv_lookup, int* counts);
18 |     // void createIndex(int i, long* list, int L);
19 |     void FC(int* top_buckets_, int maxsize, int* candidates, int* candSize);
20 | };
21 | 


--------------------------------------------------------------------------------
/src/InvertedIndex/Makefile:
--------------------------------------------------------------------------------
1 | all: clean
2 | 	python3 setup.py build_ext --inplace
3 | 
4 | clean:
5 | 	rm -rf build
6 | 	rm -rf __pycache__
7 | 	rm -rf scoreAgg.cpp
8 | 	rm -rf scoreAgg.*.so 


--------------------------------------------------------------------------------
/src/InvertedIndex/build/temp.linux-x86_64-3.8/Gather.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/InvertedIndex/build/temp.linux-x86_64-3.8/Gather.o


--------------------------------------------------------------------------------
/src/InvertedIndex/build/temp.linux-x86_64-3.8/scoreAgg.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/InvertedIndex/build/temp.linux-x86_64-3.8/scoreAgg.o


--------------------------------------------------------------------------------
/src/InvertedIndex/scoreAgg.cpython-38-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/InvertedIndex/scoreAgg.cpython-38-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/src/InvertedIndex/scoreAgg.pyx:
--------------------------------------------------------------------------------
 1 | cimport numpy as np
 2 | import cython
 3 | from cython import boundscheck, wraparound
 4 | import time
 5 |     
 6 | cdef extern from "Gather.h":
 7 |     cdef cppclass FastIV:
 8 |         FastIV(int, int, int, int ,int, int*, int*) except +
 9 |         int R, B, mf, topk, node
10 |         int block
11 |         int* inv_lookup
12 |         int* counts
13 |         
14 |         # void createIndex(int, long*, int)
15 |         void FC(int*, int, int*, int*)
16 | 
17 | @boundscheck(False)
18 | @wraparound(False)
19 | cdef class PyFastIV:
20 |     cdef FastIV *thisptr
21 |     def __cinit__(self, int R, int block, int B, int mf, int topk, np.ndarray[int, ndim=1,  mode="c"] inv_lookup, np.ndarray[int, ndim=1,  mode="c"] counts ):
22 |         self.thisptr = new FastIV( R, block, B, mf, topk, &inv_lookup[0], &counts[0])
23 |     # def createIndex(self, int i, np.ndarray[long, ndim=1,  mode="c"] list, int L):
24 |     #     self.thisptr.createIndex(i, &list[0], L)
25 |     def FC(self, np.ndarray[int, ndim=1,  mode="c"] top_buckets_, int maxsize, np.ndarray[int, ndim=1,  mode="c"] candidates, np.ndarray[int, ndim=1,  mode="c"] candSize):
26 |         return self.thisptr.FC(&top_buckets_[0], maxsize,  &candidates[0], &candSize[0])
27 | 
28 | 


--------------------------------------------------------------------------------
/src/InvertedIndex/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | from Cython.Build import cythonize
 3 | import numpy
 4 | 
 5 | setup(ext_modules = cythonize(Extension(
 6 |            "scoreAgg",                                # the extension name
 7 |            sources=["scoreAgg.pyx", "Gather.cpp"], # the Cython source and additional C++ source files
 8 |            language="c++",                        # generate and compile C++ code
 9 |            include_dirs=[numpy.get_include()],
10 |            extra_compile_args=["-std=c++11", "-fopenmp", "-fopenmp-simd", "-O3"],
11 |            extra_link_args=["-fopenmp", "-fopenmp-simd"]
12 |       )))


--------------------------------------------------------------------------------
/src/__pycache__/BLISS.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/BLISS.cpython-310.pyc


--------------------------------------------------------------------------------
/src/__pycache__/BLISS.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/BLISS.cpython-38.pyc


--------------------------------------------------------------------------------
/src/__pycache__/config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/config.cpython-310.pyc


--------------------------------------------------------------------------------
/src/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/config.cpython-38.pyc


--------------------------------------------------------------------------------
/src/__pycache__/dataPrepare.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/dataPrepare.cpython-310.pyc


--------------------------------------------------------------------------------
/src/__pycache__/dataPrepare.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/dataPrepare.cpython-38.pyc


--------------------------------------------------------------------------------
/src/__pycache__/index.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/index.cpython-38.pyc


--------------------------------------------------------------------------------
/src/__pycache__/net.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/net.cpython-310.pyc


--------------------------------------------------------------------------------
/src/__pycache__/net.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/net.cpython-38.pyc


--------------------------------------------------------------------------------
/src/__pycache__/train.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/train.cpython-310.pyc


--------------------------------------------------------------------------------
/src/__pycache__/train.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/train.cpython-38.pyc


--------------------------------------------------------------------------------
/src/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/src/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gaurav16gupta/BLISS/5fdcffa07daf7b35f13fc13bbe6c1d22d74135c2/src/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
 1 | class config:
 2 |     DATASET = {'deep-1b': {'N':10**9, 'd':96, 'metric': 'IP', 'dt':'float32'},
 3 |             'sift-1b':{'N':10**9, 'd':128, 'metric': 'L2', 'dt':'uint8'},
 4 |             'FB_ssnpp-1b': {'N':10**9, 'd':256, 'metric': 'L2', 'dt':'uint8'},
 5 |             'spacev1b':{'N':10**9, 'd':100, 'metric': 'L2', 'dt':'int8'},
 6 |             'glove': {'N':1183514, 'd':100 , 'metric': 'cosine', 'dt':'float32'},           
 7 |             'sift': {'N':1000000, 'd':128 , 'metric': 'L2', 'dt':'float32'},
 8 |             'yandex': {'N':10**9, 'd':200 , 'metric': 'IP', 'dt':'float32'}
 9 |             }  
10 |     
11 | 


--------------------------------------------------------------------------------
/src/construct.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import argparse
 3 | import os, sys
 4 | sys.path.append('../indices/')
 5 | import pdb
 6 | import argparse
 7 | from utils import *
 8 | from train import trainIndex
 9 | from config import config
10 | 
11 | parser = argparse.ArgumentParser()
12 | parser.add_argument("--index", default='glove_epc20_K2_B4096_R4', type=str)
13 | parser.add_argument("--gpu", default='0', type=str)
14 | parser.add_argument("--memmap", default=False, type=bool)
15 | args = parser.parse_args()
16 | 
17 | datasetName = args.index.split('_')[0]  
18 | n_epochs = int(args.index.split('_')[1].split('epc')[1]) 
19 | K = int(args.index.split('_')[2].split('K')[1])  
20 | B = int(args.index.split('_')[3].split('B')[1])
21 | R = int(args.index.split('_')[4].split('R')[1])
22 | feat_dim =  config.DATASET[datasetName]['d']
23 | N = config.DATASET[datasetName]['N'] 
24 | metric = config.DATASET[datasetName]['metric'] 
25 | dtype = config.DATASET[datasetName]['dt'] 
26 | 
27 | if not os.path.exists("../logs/{}".format(datasetName)):  
28 |     os.makedirs("../logs/{}".format(datasetName))
29 | 
30 | lookups_loc  = "../indices/{}/".format(datasetName)
31 | train_data_loc = "../../data/{}/".format(datasetName)
32 | model_save_loc = "../indices/{}/".format(datasetName)
33 | batch_size = 5000
34 | hidden_dim = 512
35 | logfile = "../logs/{}/".format(datasetName)
36 | gpu = 0
37 | gpu_usage =0.9
38 | load_epoch = 0
39 | t1 = time.time()
40 | for r in range(R):
41 |     trainIndex(lookups_loc, train_data_loc, datasetName, model_save_loc, batch_size, B, feat_dim, hidden_dim, logfile,
42 |                     r, gpu, gpu_usage, load_epoch, K, n_epochs)
43 | 
44 | print ("Training finished in: ",time.time()-t1, " sec")
45 | 
46 | 


--------------------------------------------------------------------------------
/src/dataPrepare.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import h5py
  3 | from utils import *
  4 | from config import config
  5 | 
  6 | # add SIFT and other data as well
  7 | 
  8 | def getTraindata(dataname):
  9 |     metric = config.DATASET[dataname]['metric']
 10 |     datapath = '../../data/{}/'.format(dataname)
 11 |     trainpath = datapath + 'train.npy'
 12 |     gtpath = datapath + 'groundTruth.npy'
 13 | 
 14 |     if os.path.exists(trainpath) and os.path.exists(gtpath):     #check file size as well   
 15 |         print ("GT already there")
 16 |     else:
 17 |         #load the full data and get fraction
 18 |         fulldata = getFulldata(dataname, datapath)
 19 |         N = fulldata.shape[0]
 20 |         if N>10**6:
 21 |             # pick = np.random.choice(N, np.clip(N//100, 10**4, 10**6), replace=False) # fix seed
 22 |             np.random.seed(0)
 23 |             pick = np.random.choice(N, 10**6, replace=False) # fix seed
 24 |             data_train = fulldata[pick,:]
 25 |         else:
 26 |             data_train = fulldata
 27 |         
 28 |         gt = getTrueNNS(data_train, metric, 100)
 29 |         np.save(gtpath, gt)
 30 |         np.save(trainpath, data_train)
 31 |         del fulldata
 32 | 
 33 | def getFulldata(dataname, datapath):
 34 |     if dataname == 'glove':
 35 |         if os.path.exists(datapath+'fulldata.dat'):
 36 |             dt = config.DATASET[dataname]['dt'] 
 37 |             N = config.DATASET[dataname]['N']
 38 |             d = config.DATASET[dataname]['d']
 39 |             return np.array(np.memmap(datapath+'fulldata.dat', dtype=dt, mode='c', shape=(N,d)))
 40 |         else:
 41 |             data = np.array(h5py.File('../../data/glove/glove-100-angular.hdf5', 'r').get('train'))
 42 |             norms = np.linalg.norm(data,axis=1)
 43 |             savememmap(datapath+'fulldata.dat', data)
 44 |             np.save(datapath+'norms.npy', norms)
 45 |             return data
 46 |     if dataname == 'sift':
 47 |         if os.path.exists(datapath+'fulldata.dat'):
 48 |             dt = config.DATASET[dataname]['dt'] 
 49 |             N = config.DATASET[dataname]['N']
 50 |             d = config.DATASET[dataname]['d']
 51 |             return np.array(np.memmap(datapath+'fulldata.dat', dtype=dt, mode='c', shape=(N,d)))
 52 |         else:
 53 |             data = np.array(h5py.File('../../data/sift/sift-128-euclidean.hdf5', 'r').get('train'))
 54 |             norms = np.linalg.norm(data,axis=1)
 55 |             savememmap(datapath+'fulldata.dat', data)
 56 |             np.save(datapath+'norms.npy', norms)
 57 |             return data
 58 | 
 59 | def getQueries(dataname):
 60 |     datapath = '../../data/{}/'.format(dataname)
 61 |     if dataname == 'glove':
 62 |         if os.path.exists(datapath+'queries.npy') and os.path.exists(datapath+ 'neighbors100.npy'): 
 63 |             queries = np.load(datapath+'queries.npy')
 64 |             neighbors100 = np.load(datapath+ 'neighbors100.npy')
 65 |         else:
 66 |             queries = np.array(h5py.File('../../data/glove/glove-100-angular.hdf5', 'r').get('test'))
 67 |             neighbors100 = np.array(h5py.File('../../data/glove/glove-100-angular.hdf5', 'r').get('neighbors'))
 68 |             np.save(datapath+'queries.npy', queries)
 69 |             np.save(datapath+ 'neighbors100.npy', neighbors100)
 70 |         return [queries, neighbors100]
 71 | 
 72 |     if dataname == 'sift':
 73 |         if os.path.exists(datapath+'queries.npy') and os.path.exists(datapath+ 'neighbors100.npy'): 
 74 |             queries = np.load(datapath+'queries.npy')
 75 |             neighbors100 = np.load(datapath+ 'neighbors100.npy')
 76 |         else:
 77 |             queries = np.array(h5py.File('../../data/sift/sift-128-euclidean.hdf5', 'r').get('test'))
 78 |             neighbors100 = np.array(h5py.File('../../data/sift/sift-128-euclidean.hdf5', 'r').get('neighbors'))
 79 |             np.save(datapath+'queries.npy', queries)
 80 |             np.save(datapath+ 'neighbors100.npy', neighbors100)
 81 |         return [queries, neighbors100]
 82 | 
 83 | # if dataname == 'deep-1b':
 84 | #     import subprocess
 85 | #     import os
 86 | #     yadiskLink = "https://yadi.sk/d/11eDCm7Dsn9GA"
 87 | 
 88 | #     # download base files
 89 | #     for i in range(0,4):
 90 | #         command = 'curl ' + '"https://cloud-api.yandex.net/v1/disk/public/resources/download?public_key=' \
 91 | #                 + yadiskLink + '&path=/base/base_' + str(i).zfill(2) + '"'
 92 |         
 93 | #         process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
 94 | #         (out, err) = process.communicate()
 95 | #         out = out.decode()
 96 | #         wgetLink = out.split(',')[0][:]
 97 | #         wgetCommand = 'wget ' + wgetLink + ' -O base_' + str(i).zfill(2)
 98 | #         os.system(wgetCommand.split('{"href":')[0]+ wgetCommand.split('{"href":')[1])
 99 | 
100 | #         print ("Downloading base chunk " + str(i).zfill(2) + ' ...')
101 | #         #process = subprocess.Popen(wgetCommand, stdin=subprocess.PIPE, shell=True)
102 | #         #process.stdin.write('e')
103 | #         #process.wait()
104 | 
105 | #     #curate
106 | #     #convert
107 | #     #split
108 | #     #get groundtruth
109 | 
110 | #     DATASET = {'name':'deep-1b','N':10**9, 'd':96, 'metric': 'ip', 'dt':'float32', 
111 | #                     'fullpath':'../../data/deep-1b/fulldata.dat', 'trainpath':'../../data/deep-1b/traindata.dat'}
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/src/index.py:
--------------------------------------------------------------------------------
  1 | from config import config
  2 | import tensorflow as tf
  3 | import time
  4 | import numpy as np
  5 | import argparse
  6 | import os
  7 | import pdb
  8 | import sys
  9 | from dataPrepare import *
 10 | from net import MyModule
 11 | 
 12 | parser = argparse.ArgumentParser()
 13 | parser.add_argument("--index", default='glove_epc20_K2_B4096_R4', type=str)
 14 | args = parser.parse_args()
 15 | datasetName = args.index.split('_')[0]  
 16 | n_epochs = int(args.index.split('_')[1].split('epc')[1]) 
 17 | K = int(args.index.split('_')[2].split('K')[1])  
 18 | B = int(args.index.split('_')[3].split('B')[1])
 19 | R = int(args.index.split('_')[4].split('R')[1])
 20 | 
 21 | def Index(B,R,datasetName, load_epoch, K):
 22 |     bucketSort = True
 23 |     # if not gpu=='all':
 24 |     os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 25 | 
 26 |     #########################Tochange
 27 |     model_save_loc = "../indices/{}/".format(datasetName)
 28 |     lookups_loc  = "../indices/{}/".format(datasetName)
 29 |     N = config.DATASET[datasetName]['N'] 
 30 |     train_data_loc = "../../data/{}/".format(datasetName)
 31 |     batch_size = 5000
 32 | 
 33 |     ##########################
 34 |     # N = n_classes
 35 | 
 36 |     Model = MyModule(R)
 37 |     Model.load([model_save_loc+'/r_'+str(r)+'_epoch_'+str(load_epoch)+'.npz' for r in range(R)]) # node 0 for all
 38 |     print ("model loaded")
 39 |     # print (lookups_loc+'epoch_'+str(load_epoch))
 40 | 
 41 |     datapath = train_data_loc + '/fulldata.dat'
 42 |     dataset = tf.data.Dataset.from_tensor_slices(getFulldata(datasetName, datapath).astype(np.float32))
 43 |     dataset = dataset.batch(batch_size = batch_size)
 44 |     iterator = iter(dataset)
 45 |     print("data loaded")
 46 | 
 47 |     top_preds = np.zeros([R, N, K], dtype=np.int32)
 48 | 
 49 |     # p = Pool(n_cores)
 50 |     t1 = time.time()
 51 |     # pdb.set_trace()
 52 |     start_idx = 0
 53 |     while True: # this loops for batches
 54 |         try:
 55 |             # print (start_idx)
 56 |             top_preds[:, start_idx:min(start_idx+batch_size, N)]  = Model(iterator.get_next(), K) # should give top K bucket IDs
 57 |             start_idx += batch_size
 58 |             sys.stdout.write("Inference progress: %d%%   \r" % (start_idx*100/N) )
 59 |             sys.stdout.flush()
 60 |         except:
 61 |             print (start_idx)
 62 |             # pdb.set_trace()
 63 |             assert (start_idx >=N), "batch iterator issue!"
 64 |             break
 65 | 
 66 |     t2 = time.time()
 67 |     print("Inference time: ", t2-t1)
 68 |     #####################################
 69 |     try:
 70 |         #make it parallel
 71 |         for r in range(R):
 72 |             counts = np.zeros(B+1, dtype=np.int32)
 73 |             bucket_order = np.zeros(N, dtype=np.int32)
 74 |             for i in range(N):
 75 |                 bucket = top_preds[r, i, np.argmin(counts[top_preds[r,i]+1])] 
 76 |                 bucket_order[i] = bucket
 77 |                 counts[bucket+1] += 1  
 78 |                         
 79 |             counts = np.cumsum(counts)
 80 |             class_order = np.zeros(N,dtype=np.int32)
 81 |             class_order = np.argsort(bucket_order)
 82 |             # sorting buckets
 83 |             if bucketSort:
 84 |                 for b in range(B):
 85 |                     class_order[counts[b]:counts[b+1]] = np.sort(class_order[counts[b]:counts[b+1]])
 86 |             ###
 87 |             folder_path = lookups_loc+'epoch_'+str(load_epoch)
 88 |             if not os.path.exists(folder_path):
 89 |                 os.makedirs(folder_path)
 90 |             np.save(folder_path+'/class_order_'+str(r)+'.npy', class_order)
 91 |             np.save(folder_path+'/counts_'+str(r)+'.npy', counts)
 92 |             np.save(folder_path+'/bucket_order_'+str(r)+'.npy', bucket_order)
 93 |             print (r)
 94 |     except:
 95 |         print ("check indexing issue", r)
 96 |     t3 = time.time()
 97 |     print("indexed and saved in time: ", t3-t2)
 98 | 
 99 | Index(B, R, datasetName, n_epochs, K)
100 | 


--------------------------------------------------------------------------------
/src/net.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | class MyModule(tf.Module):
 5 |   def __init__(self, R):
 6 |     self.R = R
 7 |     self.W1 = [None for r in range(R)]
 8 |     self.b1 = [None for r in range(R)]
 9 |     self.hidden_layer = [None for r in range(R)]
10 |     self.W2 = [None for r in range(R)]
11 |     self.b2 = [None for r in range(R)]
12 |     self.logits = [None for r in range(R)]
13 |     self.top_buckets = [None for r in range(R)]
14 | 
15 |   def load(self,paths):
16 |     params = [np.load(path) for path in paths]
17 |     self.W1 = [tf.constant(params[r]['W1']) for r in range(self.R)]
18 |     self.b1 = [tf.constant(params[r]['b1']) for r in range(self.R)]
19 |     self.W2 = [tf.constant(params[r]['W2']) for r in range(self.R)]
20 |     self.b2 = [tf.constant(params[r]['b2']) for r in range(self.R)]
21 | 
22 |   @tf.function
23 |   def __call__(self, x, topk):
24 |     for r in range(self.R):
25 |         self.hidden_layer[r] = tf.nn.relu(tf.matmul(x, self.W1[r])+self.b1[r])
26 |         self.logits[r] = tf.matmul(self.hidden_layer[r],self.W2[r])+self.b2[r]
27 |         self.top_buckets[r] = tf.nn.top_k(self.logits[r], k=topk, sorted=False)[1]
28 |     return self.top_buckets


--------------------------------------------------------------------------------
/src/query.py:
--------------------------------------------------------------------------------
  1 | from config import config
  2 | import tensorflow as tf
  3 | import time
  4 | import numpy as np
  5 | import argparse
  6 | import os, sys
  7 | from utils import *
  8 | from multiprocessing import Pool
  9 | import pdb
 10 | sys.path.append('InvertedIndex/')
 11 | import scoreAgg
 12 | from net import MyModule
 13 | from dataPrepare import *
 14 | import argparse
 15 | 
 16 | parser = argparse.ArgumentParser()
 17 | parser.add_argument("--topm", default=10, type=int)
 18 | parser.add_argument("--mf", default=2, type=int)
 19 | parser.add_argument("--gpu", default='0', type=str)
 20 | parser.add_argument("--index", default='deep-1b_epc20_K2_B65536_R4', type=str)
 21 | # parser.add_argument("--CppInf", default=1, type=bool)
 22 | parser.add_argument("--memmap", default=False, type=bool)
 23 | parser.add_argument("--rerank", default=True, type=bool)
 24 | args = parser.parse_args()
 25 | 
 26 | datasetName = args.index.split('_')[0]  
 27 | eval_epoch = int(args.index.split('_')[1].split('epc')[1]) 
 28 | K = int(args.index.split('_')[2].split('K')[1])  
 29 | B = int(args.index.split('_')[3].split('B')[1])
 30 | R = int(args.index.split('_')[4].split('R')[1])
 31 | feat_dim =  config.DATASET[datasetName]['d']
 32 | N = config.DATASET[datasetName]['N'] 
 33 | metric = config.DATASET[datasetName]['metric'] 
 34 | dtype = config.DATASET[datasetName]['dt'] 
 35 | lookups_loc  = "../indices/{}/".format(datasetName) + '/epoch_'+ str(eval_epoch)
 36 | model_loc = "../indices/{}/".format(datasetName)
 37 | data_loc = "../../data/{}/".format(datasetName)
 38 | buffer = 1024*(int(2*R*N*args.topm/(B*args.mf))//1024)
 39 | 
 40 | batch_size = 32
 41 | logfile = '../logs/'+datasetName+'/'+args.index+'query.txt'
 42 | output_loc = logfile[:-3]+'npy'
 43 | 
 44 | if not args.gpu=='all':
 45 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
 46 | 
 47 | ############################## load lookups ################################
 48 | Model = MyModule(R)
 49 | Model.load([model_loc+'/r_'+str(r)+'_epoch_'+str(eval_epoch)+'.npz' for r in range(R)])
 50 | print ("model loaded")
 51 | 
 52 | inv_lookup = np.zeros(R*N, dtype=np.int32)
 53 | counts = np.zeros(R*(B+1), dtype=np.int32)
 54 | for r in range(R):
 55 |     inv_lookup[r*N: (r +1)*N ] = np.load(lookups_loc+'/class_order_'+str(r)+'.npy')# block size 
 56 |     counts[r*(B+1) : (r +1 )*(B+1) ] = np.load(lookups_loc+'/counts_'+str(r)+'.npy')[:B+1] 
 57 | inv_lookup = np.ascontiguousarray(inv_lookup, dtype=np.int32) 
 58 | counts = np.ascontiguousarray(counts, dtype=np.int32)
 59 | 
 60 | fastIv = scoreAgg.PyFastIV(R, N, (B+1), args.mf, args.topm, inv_lookup, counts)
 61 | # fastIv.createIndex() # in future load this directly from a binary file. Saved by C++ code
 62 | print ("Deserialized")
 63 | 
 64 | ################# Data Loader ####################
 65 | [queries, neighbors100] = getQueries(datasetName)
 66 | queries = queries[:1000,:]
 67 | print("queries loaded ", queries.shape)
 68 | queries = tf.data.Dataset.from_tensor_slices(queries)
 69 | queries = queries.batch(batch_size = batch_size)
 70 | iterator = iter(queries)
 71 | 
 72 | if args.rerank:
 73 |     datapath = data_loc +'fulldata.dat'
 74 |     dataset = getFulldata(datasetName, datapath)
 75 |     if metric=="L2":
 76 |         norms= np.load(data_loc +"norms.npy")
 77 |     if metric =="cosine":
 78 |         norms= np.load(data_loc +"norms.npy")
 79 |         dataset = dataset/(norms[:,None])
 80 |     print ("dense vectors loaded")
 81 | # to check these
 82 | count = 0
 83 | score_sum = [0.0,0.0,0.0]
 84 | output = -1* np.ones([10000,10])
 85 | #########################################
 86 | 
 87 | # p = Pool(config.n_cores)
 88 | fw = open(logfile, 'a', encoding='utf-8') # log file
 89 | bthN = 0
 90 | begin_time = time.time()
 91 | 
 92 | # p = Pool(Parts)
 93 | Inf = 0
 94 | RetRank = 0
 95 | 
 96 | while True:
 97 |     try:
 98 |         x_batch = iterator.get_next()
 99 |         x_batch = tf.cast(x_batch, tf.float32)
100 |         t1 = time.time()
101 |         top_buckets_ = Model(x_batch, args.topm) # should give topm bucket IDs, [R,batch_size,topmvals, ]
102 |         top_buckets_ = np.array(top_buckets_)
103 |         # top_buckets_ = np.transpose(top_buckets_, (2,0,1,3)) # bring batch_size (index 2) ahead, [batch_size,R,2,topm]
104 |         top_buckets_ = np.transpose(top_buckets_, (1,0,2)) # bring batch_size (index 1) ahead, [batch_size,R,topm]
105 | 
106 |         len_cands = np.zeros(top_buckets_.shape[0])
107 |         t2 = time.time()
108 |         Inf+= (t2-t1)
109 |         for i in range(top_buckets_.shape[0]):
110 |             candid = np.empty(buffer, dtype='int32') # does this init takes time?
111 |             candSize = np.empty(1, dtype='int32' )
112 |             fastIv.FC(np.ascontiguousarray(top_buckets_[i,:,:], dtype=np.int32).reshape(-1), buffer, candid, candSize)
113 |             candidates = (candid[0: candSize[0]])
114 |             
115 |             # candidates = (process_scores(top_buckets_[i]))
116 |             score_sum[1] += len(candidates)
117 |             if args.rerank:
118 |                 if metric == "IP":
119 |                     dists = np.dot(dataset[candidates],x_batch[i]) # or L2 dist
120 |                 if metric == "L2":
121 |                     dists = 2*np.dot(dataset[candidates],x_batch[i]) -norms[candidates]
122 |                 if metric =="cosine":
123 |                     dists = np.dot(dataset[candidates],x_batch[i]) # or L2 dist
124 |                 if len(dists)<=10:
125 |                     output[bthN*batch_size + i, :len(dists)] = candidates 
126 |                 if len(dists)>10:
127 |                     top_cands = np.argpartition(dists, -10)[-10:]
128 |                     output[bthN*batch_size + i, :10] = candidates[top_cands] 
129 |                     candidates = candidates[top_cands] 
130 |                 
131 |             score_sum[0] += len(np.intersect1d(candidates, neighbors100[bthN*batch_size + i,:10]))/10
132 | 
133 |         t3 = time.time()
134 |         RetRank+= t3-t2
135 |         bthN+=1
136 |         print (bthN)
137 |     except:
138 |         # print (bthN)
139 |         print ( " topm: ", args.topm, " mf: ", args.mf)
140 |         print('overall Recall for',count,'points:',score_sum[0]/((bthN-1)*batch_size + i))
141 |         print('Avg can. size for',count,'points:',score_sum[1]/((bthN-1)*batch_size + i))
142 |         pdb.set_trace()
143 |         print('Inf per point: ',Inf/((bthN-1)*batch_size))
144 |         print('Ret+rank per point: ',RetRank/((bthN-1)*batch_size))
145 |         print('per point to report: ',(Inf/32 + RetRank/4)/((bthN-1)*batch_size))
146 | 
147 |         print (" topm: ", args.topm, " mf: ", args.mf, file=fw)
148 |         print('overall Recall for',count,'points:',score_sum[0]/((bthN-1)*batch_size + i), file=fw)
149 |         print('Avg can. size for',count,'points:',score_sum[1]/((bthN-1)*batch_size + i), file=fw)
150 |         print('Inf per point: ',Inf/((bthN-1)*batch_size), file=fw)
151 |         print('Ret+rank  per point: ',RetRank/((bthN-1)*batch_size), file=fw)
152 |         print('per point to report: ',(Inf/32 + RetRank/4)/((bthN-1)*batch_size), file=fw)
153 |         np.save(output_loc,output)
154 |         break
155 | 
156 | # p.close()
157 | 
158 | 


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import argparse
  3 | import time
  4 | import os
  5 | import numpy as np
  6 | import logging
  7 | import pdb
  8 | from dataPrepare import *
  9 | from utils import *
 10 | 
 11 | def trainIndex(lookups_loc, train_data_loc, datasetName, model_save_loc, batch_size, B, vec_dim, hidden_dim, logfile,
 12 |                     r, gpu, gpu_usage, load_epoch, k2, n_epochs):
 13 | 
 14 |     getTraindata(datasetName) # check if already there, check if it return correct ground truth
 15 | 
 16 |     tf.compat.v1.disable_eager_execution()
 17 |     os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 18 |     # get train data
 19 |     
 20 |     x_train = np.load(train_data_loc+'train.npy')
 21 |     y_train = np.load(train_data_loc+'groundTruth.npy')
 22 |     N = x_train.shape[0]
 23 |     if not os.path.exists(lookups_loc+'epoch_'+str(load_epoch)+'/'):  
 24 |         os.makedirs(lookups_loc+'epoch_'+str(load_epoch)+'/')
 25 |     create_universal_lookups(r, B, N, lookups_loc+'epoch_'+str(load_epoch)+'/')
 26 | 
 27 |     if load_epoch==0:
 28 |         lookup = tf.Variable(np.load(lookups_loc+'epoch_'+str(load_epoch)+'/bucket_order_'+str(r)+'.npy')[:N])
 29 |     else:
 30 |         lookup = tf.Variable(np.load(lookups_loc+'epoch_'+str(load_epoch)+'/bucket_order_'+str(r)+'.npy')[:N])
 31 | 
 32 |     temp = tf.constant(np.arange(batch_size*100)//100)
 33 | 
 34 |     x = tf.compat.v1.placeholder(tf.float32, shape=[batch_size, vec_dim])
 35 |     _y = tf.compat.v1.placeholder(tf.int64, shape=[batch_size*100])
 36 |     y_idxs = tf.stack([temp, tf.gather(lookup, _y)], axis=-1)
 37 |     y_vals = tf.ones_like(y_idxs[:,0], dtype=tf.float32)
 38 |     y = tf.compat.v1.SparseTensor(y_idxs, y_vals, [batch_size, B])
 39 |     y_ = tf.compat.v1.sparse_tensor_to_dense(y, validate_indices=False)
 40 | 
 41 |     ###############
 42 |     if load_epoch>0:
 43 |         params=np.load(model_save_loc+'/r_'+str(r)+'_epoch_'+str(load_epoch)+'.npz')
 44 |         #
 45 |         W1_tmp = tf.compat.v1.placeholder(tf.float32, shape=[vec_dim, hidden_dim])
 46 |         b1_tmp = tf.compat.v1.placeholder(tf.float32, shape=[hidden_dim])
 47 |         W1 = tf.Variable(W1_tmp)
 48 |         b1 = tf.Variable(b1_tmp)
 49 |         # hidden_layer = tf.nn.relu(tf.sparse_tensor_dense_matmul(x,W1)+b1)
 50 |         hidden_layer = tf.nn.relu(tf.matmul(x,W1)+b1)
 51 |         #
 52 |         W2_tmp = tf.compat.v1.placeholder(tf.float32, shape=[hidden_dim, B])
 53 |         b2_tmp = tf.compat.v1.placeholder(tf.float32, shape=[B])
 54 |         W2 = tf.Variable(W2_tmp)
 55 |         b2 = tf.Variable(b2_tmp)
 56 |         logits = tf.matmul(hidden_layer,W2)+b2
 57 |     else:
 58 |         W1 = tf.Variable(tf.compat.v1.truncated_normal([vec_dim, hidden_dim], stddev=0.05, dtype=tf.float32))
 59 |         b1 = tf.Variable(tf.compat.v1.truncated_normal([hidden_dim], stddev=0.05, dtype=tf.float32))
 60 |         # hidden_layer = tf.nn.relu(tf.sparse_tensor_dense_matmul(x,W1)+b1)
 61 |         hidden_layer = tf.nn.relu(tf.matmul(x,W1)+b1)
 62 |         #
 63 |         W2 = tf.Variable(tf.compat.v1.truncated_normal([hidden_dim, B], stddev=0.05, dtype=tf.float32))
 64 |         b2 = tf.Variable(tf.compat.v1.truncated_normal([B], stddev=0.05, dtype=tf.float32))
 65 |         logits = tf.matmul(hidden_layer,W2)+b2
 66 | 
 67 | 
 68 |     top_buckets = tf.nn.top_k(logits, k=k2, sorted=True)[1]
 69 |     loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=y_))
 70 |     train_op = tf.compat.v1.train.AdamOptimizer().minimize(loss)
 71 | 
 72 |     sess = tf.compat.v1.Session(config = tf.compat.v1.ConfigProto(
 73 |                             allow_soft_placement=True,
 74 |                             log_device_placement=False,
 75 |                             gpu_options=tf.compat.v1.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=gpu_usage)))
 76 | 
 77 |     if load_epoch==0:
 78 |         sess.run(tf.compat.v1.global_variables_initializer())
 79 |     else:
 80 |         sess.run(tf.compat.v1.global_variables_initializer(),
 81 |             feed_dict = {
 82 |                 W1_tmp:params['W1'],
 83 |                 b1_tmp:params['b1'],
 84 |                 W2_tmp:params['W2'],
 85 |                 b2_tmp:params['b2']})
 86 |         ##
 87 |         del params
 88 | 
 89 |     begin_time = time.time()
 90 |     total_time = 0
 91 |     logging.basicConfig(filename = logfile+'/logs_'+str(r), level=logging.INFO)
 92 |     n_check=1000
 93 | 
 94 | 
 95 |     n_steps_per_epoch = N//batch_size
 96 | 
 97 |     for curr_epoch in range(load_epoch+1,load_epoch+n_epochs+1):
 98 |         count = 0
 99 |         
100 |         for j in range(n_steps_per_epoch):
101 |             start_idx = j*batch_size
102 |             end_idx = start_idx+batch_size
103 |             # pdb.set_trace()
104 |             try:
105 |                 sess.run(train_op, feed_dict={x:x_train[start_idx:end_idx], _y:y_train[start_idx:end_idx].reshape([-1])})
106 |             except:
107 |                 pdb.set_trace() # to handle this exception
108 |             count += 1
109 |             if count%n_check==0:
110 |                 _, train_loss = sess.run([train_op, loss], feed_dict={x:x_train[start_idx:end_idx], _y:y_train[start_idx:end_idx].reshape([-1])})
111 |                 time_diff = time.time()-begin_time
112 |                 total_time += time_diff
113 |                 logging.info('finished '+str(count)+' steps. Time elapsed for last '+str(n_check)+' steps: '+str(time_diff)+' s')
114 |                 logging.info('train_loss: '+str(train_loss))
115 |                 begin_time = time.time()
116 |                 count+=1
117 |         ############################################
118 |         logging.info('###################################')
119 |         logging.info('finished epoch '+str(curr_epoch))
120 |         logging.info('total time elapsed so far: '+str(total_time))
121 |         logging.info('###################################')
122 |         if curr_epoch%5==0:
123 |             params = sess.run([W1,b1,W2,b2])
124 |             np.savez_compressed(model_save_loc+'/r_'+str(r)+'_epoch_'+str(curr_epoch)+'.npz',
125 |                 W1=params[0], 
126 |                 b1=params[1], 
127 |                 W2=params[2], 
128 |                 b2=params[3])
129 |             del params
130 |             #######################################
131 |             begin_time = time.time()
132 |             
133 |             top_preds = np.zeros([N,k2], dtype=int)
134 |             start_idx = 0
135 |             for i in range(x_train.shape[0]//batch_size):
136 |                 top_preds[start_idx:start_idx+batch_size] = sess.run(top_buckets, feed_dict={x:x_train[start_idx:start_idx+batch_size]})
137 |                 start_idx += batch_size
138 |             ##
139 |             # top_preds[start_idx:] = sess.run(top_buckets, feed_dict={x:x_train[start_idx:]})
140 |             ##
141 |             print(time.time()-begin_time)
142 |             ##################################### 
143 |             counts = np.zeros(B+1, dtype=int)
144 |             bucket_order = np.zeros(N, dtype=int)
145 |             for i in range(N):
146 |                 bucket = top_preds[i, np.argmin(counts[top_preds[i]+1])] 
147 |                 bucket_order[i] = bucket
148 |                 counts[bucket+1] += 1
149 |             ###
150 |             nothing = sess.run(tf.compat.v1.assign(lookup,bucket_order))
151 |             ###
152 |             counts = np.cumsum(counts)
153 |             rolling_counts = np.zeros(B, dtype=int)
154 |             class_order = np.zeros(N,dtype=int)
155 |             for i in range(N):
156 |                 temp = bucket_order[i]
157 |                 class_order[counts[temp]+rolling_counts[temp]] = i
158 |                 rolling_counts[temp] += 1
159 |             
160 |             ###
161 |             # folder_path = lookups_loc+'epoch_'+str(curr_epoch)
162 |             # if not os.path.isdir(folder_path):
163 |             #     os.makedirs(folder_path)
164 |             # np.save(folder_path+'/class_order_'+str(r)+'.npy', class_order)
165 |             # np.save(folder_path+'/counts_'+str(r)+'.npy', counts)
166 |             # np.save(folder_path+'/bucket_order_'+str(r)+'.npy', bucket_order)
167 |             ################
168 |             begin_time = time.time()
169 | 
170 | 
171 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import tensorflow as tf
  3 | import time
  4 | import numpy as np
  5 | import os, sys
  6 | import pdb
  7 | import math
  8 | import matplotlib.pyplot as plt
  9 | from tqdm import tqdm
 10 | import time
 11 | import numpy as np
 12 | from multiprocessing import Pool
 13 | from sklearn.utils import murmurhash3_32 as mmh3
 14 | 
 15 | 
 16 | def savememmap(path, ar):
 17 |     if path[-4:]!='.dat':
 18 |         path = path +'.dat'
 19 |     shape = ar.shape
 20 |     dtype = ar.dtype
 21 |     fp = np.memmap( path, dtype=dtype, mode='w+', shape=(shape))
 22 |     fp[:]= ar[:]
 23 |     fp.flush()
 24 | 
 25 | def getTrueNNS(x_train, metric, K):
 26 |     os.environ['CUDA_VISIBLE_DEVICES'] = 'cpu'
 27 |     begin_time = time.time()
 28 |     batch_size = 1000
 29 |     output = np.zeros([x_train.shape[0], K], dtype=np.int32) # for upto 2B
 30 | 
 31 |     if metric=='IP':
 32 |         W = x_train.T
 33 |         W = tf.constant(W)
 34 |         for i in range(x_train.shape[0]//batch_size):
 35 |             start_idx = i*batch_size
 36 |             end_idx = start_idx+batch_size
 37 |             x_batch = x_train[start_idx:end_idx]
 38 |             # sim = x_batch@W
 39 |             sim = tf.matmul(x_batch,W)
 40 |             # top_idxs = np.argpartition(sim, -K)[:,-K:]
 41 |             top_idxs = tf.nn.top_k(sim, k=K, sorted=False)[1]
 42 |             output[start_idx:end_idx] = top_idxs
 43 | 
 44 |     elif metric=='L2':
 45 |         W = x_train.T
 46 |         W_norm = np.square(np.linalg.norm(W,axis=0))
 47 |         W = tf.constant(W)
 48 |         for i in range(x_train.shape[0]//batch_size):
 49 |             start_idx = i*batch_size
 50 |             end_idx = start_idx+batch_size
 51 |             x_batch = x_train[start_idx:end_idx]
 52 |             # sim = 2*x_batch@W - W_norm
 53 |             sim = 2*tf.matmul(x_batch,W)- W_norm
 54 |             # top_idxs = np.argpartition(sim, -K)[:,-K:]
 55 |             top_idxs = tf.nn.top_k(sim, k=K, sorted=False)[1]
 56 |             output[start_idx:end_idx] = top_idxs
 57 | 
 58 |     # elif metric=='cosine':
 59 |     #     x_train = x_train/(np.linalg.norm(x_train,axis=1)[:,None])
 60 |     #     W = x_train.T
 61 |     #     for i in range(x_train.shape[0]//batch_size):
 62 |     #         start_idx = i*batch_size
 63 |     #         end_idx = start_idx+batch_size
 64 |     #         x_batch = x_train[start_idx:end_idx]
 65 |     #         sim = x_batch@W # tf this
 66 |     #         top_idxs = np.argpartition(sim, -K)[:,-K:] # use tf.nn.topk It uses mul cores
 67 |     #         output[start_idx:end_idx] = top_idxs
 68 |     
 69 |     elif metric=='cosine': #todo
 70 |         x_train = x_train/(np.linalg.norm(x_train,axis=1)[:,None])
 71 |         W = tf.constant(x_train.T)
 72 |         for i in range(x_train.shape[0]//batch_size):
 73 |             start_idx = i*batch_size
 74 |             end_idx = start_idx+batch_size
 75 |             x_batch = x_train[start_idx:end_idx]
 76 |             sim = tf.matmul(x_batch,W)
 77 |             top_idxs = tf.nn.top_k(sim, k=K, sorted=False)[1]
 78 |             output[start_idx:end_idx] = top_idxs
 79 | 
 80 |     print(time.time()-begin_time)
 81 |     return output
 82 | 
 83 | 
 84 | def create_universal_lookups(r, B, n_classes, lookups_loc):
 85 |     c_o = lookups_loc+'class_order_'+str(r)+'.npy'
 86 |     ct = lookups_loc+'counts_'+str(r)+'.npy'
 87 |     b_o = lookups_loc+'bucket_order_'+str(r)+'.npy'
 88 |     if os.path.exists(c_o) and os.path.exists(ct) and os.path.exists(b_o):
 89 |         print ('init lookups exists')
 90 |     else:
 91 |         counts = np.zeros(B+1, dtype=int)
 92 |         bucket_order = np.zeros(n_classes, dtype=int)
 93 |         for i in range(n_classes):
 94 |             bucket = mmh3(i,seed=r)%B
 95 |             bucket_order[i] = bucket
 96 |             counts[bucket+1] += 1
 97 |         counts = np.cumsum(counts)
 98 |         rolling_counts = np.zeros(B, dtype=int)
 99 |         class_order = np.zeros(n_classes,dtype=int)
100 |         for i in range(n_classes):
101 |             temp = bucket_order[i]
102 |             class_order[counts[temp]+rolling_counts[temp]] = i
103 |             rolling_counts[temp] += 1
104 |         
105 |         np.save(c_o, class_order)
106 |         np.save(ct,counts)
107 |         np.save(b_o, bucket_order)
108 | 
109 | # to do: fix this
110 | def process_scores(inp, ):
111 |     R = inp.shape[0]
112 |     topk = inp.shape[2]
113 |     # scores = {}
114 |     freqs = {}
115 |     for r in range(R):
116 |         for k in range(topk):
117 |             val = inp[r,0,k] # inp[r,0,k] is values, inp[r,1,k] is the indices
118 |             for key in inv_lookup[r,counts[r,int(inp[r,1,k])]:counts[r,int(inp[r,1,k])+1]]:
119 |                 if key in freqs:
120 |                     # scores[key] += val
121 |                     freqs[key] += 1  
122 |                 else:
123 |                     # scores[key] = val
124 |                     freqs[key] = 1
125 |     i = 0
126 |     while True:
127 |         candidates = np.array([key for key in freqs if freqs[key]>=args.mf-i])
128 |         if len(candidates)>=10:
129 |             break
130 |         i += 1
131 |     return candidates
132 |     ###


--------------------------------------------------------------------------------