├── .github ├── bbqvec.png ├── bbqvec.svg ├── gopher-cook.svg └── workflows │ ├── go.yml │ └── rust.yml ├── .gitignore ├── LICENSE ├── README.md ├── backend.go ├── backend_disk.go ├── backend_memory.go ├── backend_quantized_memory.go ├── cmd └── run-ann-benchmark │ └── main.go ├── counting_bitmap.go ├── disk_test.go ├── emperical_test.go ├── errors.go ├── go.mod ├── go.sum ├── integration_test.go ├── micro_test.go ├── quantization.go ├── quantization_f16.go ├── quantization_f16_test.go ├── result.go ├── rust ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── benches │ ├── main_benchmark.rs │ ├── memory_store.rs │ └── micro.rs ├── cmd │ └── test-recall │ │ ├── .gitignore │ │ ├── Cargo.lock │ │ ├── Cargo.toml │ │ └── src │ │ └── main.rs ├── src │ ├── backend.rs │ ├── backend_disk.rs │ ├── backend_memory.rs │ ├── bitmaps.rs │ ├── counting_bitmap.rs │ ├── helpers.rs │ ├── lib.rs │ ├── quantization.rs │ ├── result.rs │ ├── spaces │ │ ├── LICENSE │ │ ├── mod.rs │ │ ├── simple.rs │ │ ├── simple_avx.rs │ │ ├── simple_neon.rs │ │ └── simple_sse.rs │ ├── unaligned_f32.rs │ ├── vector.rs │ ├── vector_file.rs │ └── vector_store.rs └── tests │ ├── basic_test.rs │ └── search.rs ├── speed_test.go ├── vector.go └── vector_store.go /.github/bbqvec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barakmich/bbqvec/ffc10c72562dca295c39f15e9be7661776f00c0f/.github/bbqvec.png -------------------------------------------------------------------------------- /.github/gopher-cook.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a golang project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 3 | 4 | name: Go 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | jobs: 13 | 14 | build: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Set up Go 20 | uses: actions/setup-go@v4 21 | with: 22 | go-version: '1.22' 23 | 24 | - name: Build 25 | run: go build -v ./... 26 | 27 | - name: Test 28 | run: go test -v ./... 29 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Build 20 | run: cd rust && cargo build --release --verbose 21 | - name: Run tests 22 | run: cd rust && cargo test --release --verbose 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.test 2 | *.exe 3 | *.pt 4 | *.bgn 5 | *.pprof 6 | perf.data 7 | perf.data.old 8 | /build 9 | /datasets 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2024, Daxe, Inc. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![BBQvec Logo](.github/bbqvec.png) 2 | 3 | ![Status](https://img.shields.io/badge/status-beta-blue) 4 | [![license](https://img.shields.io/github/license/barakmich/bbqvec)](LICENSE) 5 | [![GoDoc](https://godoc.org/github.com/barakmich/bbqvec?status.svg)](https://godoc.org/github.com/barakmich/bbqvec) 6 | [![Crates.io](https://img.shields.io/crates/v/bbqvec)](https://crates.io/crates/bbqvec) 7 | [![Go CI](https://github.com/barakmich/bbqvec/actions/workflows/go.yml/badge.svg)](https://github.com/barakmich/bbqvec/actions/workflows/go.yml) 8 | [![Rust CI](https://github.com/barakmich/bbqvec/actions/workflows/rust.yml/badge.svg)](https://github.com/barakmich/bbqvec/actions/workflows/rust.yml) 9 | 10 | BBQvec is an open-source, embedded vector database index for Go and Rust, providing approximate K-nearest-neighbors (aKNN). 11 | 12 | # Getting Started 13 | 14 | ## Go 15 | 16 | ```go 17 | package main 18 | 19 | import ( 20 | "fmt" 21 | 22 | bbq "github.com/barakmich/bbqvec" 23 | ) 24 | 25 | func main() { 26 | // Declare store parameters 27 | dimensions := 200 28 | nBasis := 10 29 | 30 | // Initialize the store 31 | backend := bbq.NewMemoryBackend(dimensions) 32 | datastore, _ := bbq.NewVectorStore(backend, nBasis) 33 | 34 | // Create some test data, 100K random vectors 35 | vecs := bbq.NewRandVectorSet(100_000, dimensions, nil) 36 | datastore.AddVectorsWithOffset(0, vecs) 37 | /* 38 | Equivalent to: 39 | for i, v := range vecs { 40 | datastore.AddVector(bbq.ID(i), v) 41 | } 42 | */ 43 | 44 | // Run a query 45 | targetVec := bbq.NewRandVector(dimensions, nil) 46 | results, _ := datastore.FindNearest(targetVec, 10, 1000, 1) 47 | 48 | // Inspect the results 49 | top := results.ToSlice()[0] 50 | vec, _ := backend.GetVector(top.ID) 51 | fmt.Println(top.ID, vec, top.Similarity) 52 | } 53 | ``` 54 | 55 | ## Rust 56 | 57 | ```rust 58 | use bbqvec::IndexIDIterator; 59 | 60 | fn main() -> Result<()> { 61 | // Declare store parameters 62 | let dimensions = 200; 63 | let n_basis = 10; 64 | 65 | // Initialize the store 66 | let mem = bbqvec::MemoryBackend::new(dimensions, n_basis)?; 67 | let mut store = bbqvec::VectorStore::new(mem)?; 68 | 69 | // Create some test data, 100K random vectors 70 | let vecs = bbqvec::create_vector_set(dimensions, 100000); 71 | store.add_vector_iter(vecs.enumerate_ids())?; 72 | 73 | // Run a query 74 | let target = bbqvec::create_random_vector(dimensions); 75 | let results = store.find_nearest(&target, 10, 1000, 1)?; 76 | 77 | // Inspect the results 78 | for res in results.iter_results() { 79 | println!("{} {}", res.id, res.similarity) 80 | } 81 | } 82 | 83 | ``` 84 | 85 | # TODOs 86 | 87 | We're still early; Go is the more tried-and-true and suits the beta use-case, but Rust is a good deal faster. We welcome contributions. 88 | 89 | ## Go 90 | - [ ] More benchmarks 91 | - [ ] New Quantizations 92 | - [ ] Hamming Distance (single-bit vectors) 93 | - [ ] Novel quantizations 94 | ## Rust 95 | - [ ] Finish disk backend to match Go (in progress, shortly) 96 | - [ ] New Quantizations 97 | 98 | 99 | ### Acknowledgements 100 | Thank you to MariaLetta for the [free-gophers-pack](https://github.com/MariaLetta/free-gophers-pack) and to [rustacean.net](https://rustacean.net) for the CC0 logo characters. 101 | -------------------------------------------------------------------------------- /backend.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/RoaringBitmap/roaring" 7 | ) 8 | 9 | type VectorBackend interface { 10 | PutVector(id ID, v Vector) error 11 | ComputeSimilarity(targetVector Vector, targetID ID) (float32, error) 12 | Info() BackendInfo 13 | Exists(id ID) bool 14 | Close() error 15 | } 16 | 17 | type scannableBackend interface { 18 | VectorBackend 19 | ForEachVector(func(ID) error) error 20 | } 21 | 22 | type VectorGetter[T any] interface { 23 | GetVector(id ID) (T, error) 24 | } 25 | 26 | type IndexBackend interface { 27 | SaveBases(bases []Basis, token uint64) (uint64, error) 28 | LoadBases() ([]Basis, error) 29 | 30 | SaveBitmap(basis int, index int, bitmap *roaring.Bitmap) error 31 | LoadBitmap(basis, index int) (*roaring.Bitmap, error) 32 | Sync() error 33 | } 34 | 35 | type BackendInfo struct { 36 | HasIndexData bool 37 | Dimensions int 38 | Quantization string 39 | } 40 | 41 | func FullTableScanSearch(be VectorBackend, target Vector, k int) (*ResultSet, error) { 42 | rs := NewResultSet(k) 43 | b, ok := be.(scannableBackend) 44 | if !ok { 45 | return nil, errors.New("Backend is incompatible") 46 | } 47 | err := b.ForEachVector(func(id ID) error { 48 | sim, err := b.ComputeSimilarity(target, id) 49 | if err != nil { 50 | return err 51 | } 52 | rs.AddResult(id, sim) 53 | return nil 54 | }) 55 | return rs, err 56 | } 57 | -------------------------------------------------------------------------------- /backend_disk.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "encoding/hex" 7 | "encoding/json" 8 | "errors" 9 | "fmt" 10 | "io" 11 | "io/fs" 12 | "math" 13 | "math/rand" 14 | "os" 15 | "path/filepath" 16 | 17 | "github.com/RoaringBitmap/roaring" 18 | "github.com/barakmich/mmap-go" 19 | ) 20 | 21 | const defaultVecsPerFile = 200000 22 | 23 | type DiskBackend[V any] struct { 24 | dir string 25 | metadata diskMetadata 26 | quantization Quantization[V] 27 | vectorPages map[int]mmap.MMap 28 | vectorFiles map[int]*os.File 29 | token uint64 30 | } 31 | 32 | type diskMetadata struct { 33 | Dimensions int `json:"dimensions"` 34 | Quantization string `json:"quantization"` 35 | VecsPerFile int `json:"vecs_per_file"` 36 | VecFiles []int `json:"vec_files"` 37 | } 38 | 39 | var _ IndexBackend = &DiskBackend[Vector]{} 40 | 41 | func NewDiskBackend[V any](directory string, dimensions int, quantization Quantization[V]) (*DiskBackend[V], error) { 42 | token := rand.Uint64() 43 | if token == 0 { 44 | token = 1 45 | } 46 | be := &DiskBackend[V]{ 47 | dir: directory, 48 | metadata: diskMetadata{ 49 | Dimensions: dimensions, 50 | Quantization: quantization.Name(), 51 | VecsPerFile: defaultVecsPerFile, 52 | }, 53 | quantization: quantization, 54 | token: token, 55 | vectorPages: make(map[int]mmap.MMap), 56 | vectorFiles: make(map[int]*os.File), 57 | } 58 | err := be.openFiles() 59 | if err != nil { 60 | return nil, err 61 | } 62 | return be, nil 63 | } 64 | 65 | func (d *DiskBackend[V]) Close() error { 66 | err := d.Sync() 67 | if err != nil { 68 | return err 69 | } 70 | for _, mm := range d.vectorPages { 71 | err := mm.Unmap() 72 | if err != nil { 73 | return err 74 | } 75 | } 76 | for _, f := range d.vectorFiles { 77 | err := f.Close() 78 | if err != nil { 79 | return err 80 | } 81 | } 82 | return d.saveMetadata() 83 | } 84 | 85 | func (d *DiskBackend[V]) Sync() error { 86 | for _, mm := range d.vectorPages { 87 | err := mm.FlushAsync() 88 | if err != nil { 89 | return err 90 | } 91 | } 92 | return nil 93 | } 94 | 95 | func (d *DiskBackend[V]) openFiles() error { 96 | _, err := os.Stat(d.dir) 97 | if errors.Is(err, fs.ErrNotExist) { 98 | return d.createNew() 99 | } else if err != nil { 100 | return err 101 | } 102 | 103 | _, err = os.Stat(filepath.Join(d.dir, "metadata.json")) 104 | if errors.Is(err, fs.ErrNotExist) { 105 | return d.createNew() 106 | } else if err != nil { 107 | return err 108 | } 109 | 110 | f, err := os.Open(filepath.Join(d.dir, "metadata.json")) 111 | if err != nil { 112 | return err 113 | } 114 | defer f.Close() 115 | err = json.NewDecoder(f).Decode(&d.metadata) 116 | if err != nil { 117 | return err 118 | } 119 | 120 | for _, k := range d.metadata.VecFiles { 121 | f, err := os.OpenFile(mkPageFilepath(d.dir, k), os.O_RDWR, 0755) 122 | if err != nil { 123 | return err 124 | } 125 | d.vectorFiles[k] = f 126 | mm, err := mmap.Map(f, mmap.RDWR, 0) 127 | if err != nil { 128 | return err 129 | } 130 | d.vectorPages[k] = mm 131 | } 132 | return nil 133 | } 134 | 135 | func (d *DiskBackend[V]) createNew() error { 136 | err := os.MkdirAll(d.dir, 0755) 137 | if err != nil { 138 | return err 139 | } 140 | return d.saveMetadata() 141 | } 142 | 143 | func (d *DiskBackend[V]) saveMetadata() error { 144 | f, err := os.Create(filepath.Join(d.dir, "metadata.json")) 145 | if err != nil { 146 | return err 147 | } 148 | defer f.Close() 149 | err = json.NewEncoder(f).Encode(d.metadata) 150 | if err != nil { 151 | return err 152 | } 153 | return nil 154 | } 155 | 156 | func (d *DiskBackend[V]) PutVector(id ID, v Vector) error { 157 | var err error 158 | key := int(id) / d.metadata.VecsPerFile 159 | off := int(id) % d.metadata.VecsPerFile 160 | page, ok := d.vectorPages[key] 161 | if !ok { 162 | page, err = d.createPage(key) 163 | if err != nil { 164 | return err 165 | } 166 | } 167 | size := d.quantization.LowerSize(d.metadata.Dimensions) 168 | l, err := d.quantization.Lower(v) 169 | if err != nil { 170 | return err 171 | } 172 | slice := page[off*size : (off+1)*size] 173 | return d.quantization.Marshal(slice, l) 174 | } 175 | 176 | func (d *DiskBackend[V]) createPage(key int) (mmap.MMap, error) { 177 | f, err := os.Create(mkPageFilepath(d.dir, key)) 178 | if err != nil { 179 | return nil, err 180 | } 181 | vecsize := d.quantization.LowerSize(d.metadata.Dimensions) 182 | err = f.Truncate(int64(vecsize * d.metadata.VecsPerFile)) 183 | if err != nil { 184 | return nil, err 185 | } 186 | d.vectorFiles[key] = f 187 | mm, err := mmap.Map(f, mmap.RDWR, 0) 188 | if err != nil { 189 | return nil, err 190 | } 191 | d.vectorPages[key] = mm 192 | d.metadata.VecFiles = append(d.metadata.VecFiles, key) 193 | err = d.saveMetadata() 194 | if err != nil { 195 | return nil, err 196 | } 197 | return mm, nil 198 | } 199 | 200 | func (d *DiskBackend[V]) ComputeSimilarity(targetVector Vector, targetID ID) (float32, error) { 201 | v, err := d.quantization.Lower(targetVector) 202 | if err != nil { 203 | return 0, err 204 | } 205 | target, err := d.GetVector(targetID) 206 | if err != nil { 207 | return 0, err 208 | } 209 | return d.quantization.Similarity(target, v), nil 210 | } 211 | 212 | func (d *DiskBackend[V]) Info() BackendInfo { 213 | exists := true 214 | if _, err := os.Stat(filepath.Join(d.dir, "bases")); errors.Is(err, os.ErrNotExist) { 215 | exists = false 216 | } 217 | 218 | return BackendInfo{ 219 | HasIndexData: exists, 220 | Dimensions: d.metadata.Dimensions, 221 | Quantization: d.quantization.Name(), 222 | } 223 | } 224 | 225 | func (d *DiskBackend[V]) Exists(id ID) bool { 226 | key := int(id) / d.metadata.VecsPerFile 227 | off := int(id) % d.metadata.VecsPerFile 228 | page, ok := d.vectorPages[key] 229 | if !ok { 230 | return false 231 | } 232 | size := d.quantization.LowerSize(d.metadata.Dimensions) 233 | slice := page[off*size : (off+1)*size] 234 | for _, x := range slice { 235 | if x != 0x0 { 236 | return true 237 | } 238 | } 239 | return false 240 | } 241 | 242 | func (d *DiskBackend[V]) GetVector(id ID) (v V, err error) { 243 | key := int(id) / d.metadata.VecsPerFile 244 | off := int(id) % d.metadata.VecsPerFile 245 | page, ok := d.vectorPages[key] 246 | if !ok { 247 | err = ErrIDNotFound 248 | return 249 | } 250 | size := d.quantization.LowerSize(d.metadata.Dimensions) 251 | slice := page[off*size : (off+1)*size] 252 | return d.quantization.Unmarshal(slice) 253 | } 254 | 255 | func (d *DiskBackend[V]) SaveBases(bases []Basis, token uint64) (uint64, error) { 256 | if token == d.token { 257 | return d.token, nil 258 | } 259 | nbuf := make([]byte, 4) 260 | buf := bytes.NewBuffer(nil) 261 | for _, b := range bases { 262 | for _, v := range b { 263 | for _, s := range v { 264 | binary.LittleEndian.PutUint32(nbuf, math.Float32bits(s)) 265 | buf.Write(nbuf) 266 | } 267 | } 268 | } 269 | f, err := os.Create(filepath.Join(d.dir, "bases")) 270 | if err != nil { 271 | return 0, err 272 | } 273 | defer f.Close() 274 | _, err = io.Copy(f, buf) 275 | return d.token, err 276 | } 277 | 278 | func (d *DiskBackend[V]) LoadBases() ([]Basis, error) { 279 | f, err := os.Open(filepath.Join(d.dir, "bases")) 280 | if err != nil { 281 | return nil, err 282 | } 283 | var out []Basis 284 | var basis Basis 285 | var vec Vector 286 | buf := make([]byte, 4) 287 | for { 288 | _, err = f.Read(buf) 289 | if errors.Is(err, io.EOF) { 290 | break 291 | } 292 | entry := math.Float32frombits(binary.LittleEndian.Uint32(buf)) 293 | vec = append(vec, entry) 294 | if len(vec) == d.metadata.Dimensions { 295 | basis = append(basis, vec) 296 | vec = nil 297 | if len(basis) == d.metadata.Dimensions { 298 | out = append(out, basis) 299 | basis = nil 300 | } 301 | } 302 | } 303 | return out, nil 304 | } 305 | 306 | func (d *DiskBackend[V]) SaveBitmap(basis int, index int, bitmap *roaring.Bitmap) error { 307 | path := mkBmapFilepath(d.dir, basis, index) 308 | f, err := os.Create(path) 309 | if err != nil { 310 | return err 311 | } 312 | defer f.Close() 313 | _, err = bitmap.WriteTo(f) 314 | return err 315 | } 316 | 317 | func (d *DiskBackend[V]) LoadBitmap(basis int, index int) (*roaring.Bitmap, error) { 318 | f, err := os.Open(mkBmapFilepath(d.dir, basis, index)) 319 | if err != nil { 320 | if errors.Is(err, os.ErrNotExist) { 321 | return nil, nil 322 | } 323 | return nil, err 324 | } 325 | bm := roaring.NewBitmap() 326 | _, err = bm.ReadFrom(f) 327 | return bm, err 328 | } 329 | 330 | func mkPageFilepath(basedir string, key int) string { 331 | buf := make([]byte, 8) 332 | binary.BigEndian.PutUint64(buf, uint64(key)) 333 | indexStr := hex.EncodeToString(buf) 334 | return filepath.Join(basedir, fmt.Sprintf("%s.vec", indexStr)) 335 | } 336 | 337 | func mkBmapFilepath(basedir string, basis int, index int) string { 338 | buf := make([]byte, 4) 339 | binary.BigEndian.PutUint16(buf, uint16(basis)) 340 | basisStr := hex.EncodeToString(buf[:2]) 341 | binary.BigEndian.PutUint32(buf, uint32(index)) 342 | indexStr := hex.EncodeToString(buf[:4]) 343 | return filepath.Join(basedir, fmt.Sprintf("%s-%s.bmap", basisStr, indexStr)) 344 | } 345 | -------------------------------------------------------------------------------- /backend_memory.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "errors" 5 | "math/rand" 6 | "time" 7 | ) 8 | 9 | type MemoryBackend struct { 10 | vecs []Vector 11 | rng *rand.Rand 12 | dim int 13 | } 14 | 15 | var _ scannableBackend = &MemoryBackend{} 16 | var _ VectorGetter[Vector] = &MemoryBackend{} 17 | 18 | func NewMemoryBackend(dimensions int) *MemoryBackend { 19 | return &MemoryBackend{ 20 | rng: rand.New(rand.NewSource(time.Now().UnixMicro())), 21 | dim: dimensions, 22 | } 23 | } 24 | 25 | func (mem *MemoryBackend) Close() error { 26 | return nil 27 | } 28 | 29 | func (mem *MemoryBackend) PutVector(id ID, vector Vector) error { 30 | if len(vector) != mem.dim { 31 | return errors.New("MemoryBackend: vector dimension doesn't match") 32 | } 33 | 34 | if int(id) < len(mem.vecs) { 35 | mem.vecs[int(id)] = vector 36 | } else if int(id) == len(mem.vecs) { 37 | mem.vecs = append(mem.vecs, vector) 38 | } else { 39 | mem.grow(int(id)) 40 | mem.vecs[int(id)] = vector 41 | } 42 | return nil 43 | } 44 | 45 | func (mem *MemoryBackend) grow(to int) { 46 | diff := (to - len(mem.vecs)) + 1 47 | mem.vecs = append(mem.vecs, make([]Vector, diff)...) 48 | } 49 | 50 | func (mem *MemoryBackend) ComputeSimilarity(vector Vector, targetID ID) (float32, error) { 51 | target, err := mem.GetVector(targetID) 52 | if err != nil { 53 | return 0, err 54 | } 55 | return target.CosineSimilarity(vector), nil 56 | } 57 | 58 | func (mem *MemoryBackend) Info() BackendInfo { 59 | return BackendInfo{ 60 | HasIndexData: false, 61 | Dimensions: mem.dim, 62 | } 63 | } 64 | 65 | func (mem *MemoryBackend) GetVector(id ID) (Vector, error) { 66 | if int(id) > len(mem.vecs)-1 { 67 | return nil, ErrIDNotFound 68 | } 69 | if mem.vecs[int(id)] == nil { 70 | return nil, ErrIDNotFound 71 | } 72 | return mem.vecs[int(id)], nil 73 | } 74 | 75 | func (mem *MemoryBackend) Exists(id ID) bool { 76 | i := int(id) 77 | if len(mem.vecs) <= i { 78 | return false 79 | } 80 | return mem.vecs[i] != nil 81 | } 82 | 83 | func (mem *MemoryBackend) ForEachVector(cb func(ID) error) error { 84 | for i, v := range mem.vecs { 85 | if v == nil { 86 | continue 87 | } 88 | err := cb(ID(i)) 89 | if err != nil { 90 | return err 91 | } 92 | } 93 | return nil 94 | } 95 | -------------------------------------------------------------------------------- /backend_quantized_memory.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "errors" 5 | "math/rand" 6 | "time" 7 | ) 8 | 9 | type QuantizedMemoryBackend[V any, Q Quantization[V]] struct { 10 | vecs []*V 11 | rng *rand.Rand 12 | dim int 13 | quantization Q 14 | } 15 | 16 | var _ scannableBackend = &QuantizedMemoryBackend[Vector, NoQuantization]{} 17 | var _ VectorGetter[Vector] = &QuantizedMemoryBackend[Vector, NoQuantization]{} 18 | 19 | func NewQuantizedMemoryBackend[V any, Q Quantization[V]](dimensions int, quantization Q) *QuantizedMemoryBackend[V, Q] { 20 | return &QuantizedMemoryBackend[V, Q]{ 21 | rng: rand.New(rand.NewSource(time.Now().UnixMicro())), 22 | dim: dimensions, 23 | quantization: quantization, 24 | } 25 | } 26 | 27 | func (q *QuantizedMemoryBackend[V, Q]) Close() error { 28 | return nil 29 | } 30 | 31 | func (q *QuantizedMemoryBackend[V, Q]) PutVector(id ID, vector Vector) error { 32 | if len(vector) != q.dim { 33 | return errors.New("QuantizedMemoryBackend: vector dimension doesn't match") 34 | } 35 | 36 | v, err := q.quantization.Lower(vector) 37 | if err != nil { 38 | return err 39 | } 40 | 41 | if int(id) < len(q.vecs) { 42 | q.vecs[int(id)] = &v 43 | } else if int(id) == len(q.vecs) { 44 | q.vecs = append(q.vecs, &v) 45 | } else { 46 | q.grow(int(id)) 47 | q.vecs[int(id)] = &v 48 | } 49 | return nil 50 | } 51 | 52 | func (q *QuantizedMemoryBackend[V, Q]) grow(to int) { 53 | diff := (to - len(q.vecs)) + 1 54 | q.vecs = append(q.vecs, make([]*V, diff)...) 55 | } 56 | 57 | func (q *QuantizedMemoryBackend[V, Q]) ComputeSimilarity(vector Vector, targetID ID) (float32, error) { 58 | v, err := q.quantization.Lower(vector) 59 | if err != nil { 60 | return 0, err 61 | } 62 | target, err := q.GetVector(targetID) 63 | if err != nil { 64 | return 0, err 65 | } 66 | return q.quantization.Similarity(target, v), nil 67 | } 68 | 69 | func (q *QuantizedMemoryBackend[V, Q]) Info() BackendInfo { 70 | return BackendInfo{ 71 | HasIndexData: false, 72 | Dimensions: q.dim, 73 | } 74 | } 75 | 76 | func (q *QuantizedMemoryBackend[V, Q]) Exists(id ID) bool { 77 | i := int(id) 78 | if len(q.vecs) <= i { 79 | return false 80 | } 81 | return q.vecs[i] != nil 82 | } 83 | 84 | func (q *QuantizedMemoryBackend[V, Q]) GetVector(id ID) (v V, err error) { 85 | if int(id) > len(q.vecs)-1 { 86 | err = ErrIDNotFound 87 | return 88 | } 89 | if q.vecs[int(id)] == nil { 90 | err = ErrIDNotFound 91 | return 92 | } 93 | return *q.vecs[int(id)], nil 94 | } 95 | 96 | func (q *QuantizedMemoryBackend[V, Q]) ForEachVector(cb func(ID) error) error { 97 | for i, v := range q.vecs { 98 | if v == nil { 99 | continue 100 | } 101 | err := cb(ID(i)) 102 | if err != nil { 103 | return err 104 | } 105 | } 106 | return nil 107 | } 108 | -------------------------------------------------------------------------------- /cmd/run-ann-benchmark/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "errors" 6 | "flag" 7 | "fmt" 8 | "io" 9 | "log" 10 | "math/rand" 11 | "os" 12 | "path/filepath" 13 | "runtime/pprof" 14 | "strconv" 15 | "sync" 16 | "sync/atomic" 17 | "time" 18 | 19 | bbq "github.com/barakmich/bbqvec" 20 | ) 21 | 22 | var ( 23 | k = flag.Int("k", 10, "K top results") 24 | path = flag.String("path", "", "Path to CSVs") 25 | bases = flag.Int("bases", 20, "Basis sets") 26 | spill = flag.Int("spill", 10, "Spill") 27 | searchK = flag.Int("searchk", 10000, "Search K") 28 | parallelism = flag.Int("parallel", 20, "Parallel queries") 29 | cpuprofile = flag.String("cpuprof", "", "CPU Profile file") 30 | ) 31 | 32 | func main() { 33 | flag.Parse() 34 | if *path == "" { 35 | log.Fatal("Path is required") 36 | } 37 | if *cpuprofile != "" { 38 | f, err := os.Create(*cpuprofile) 39 | if err != nil { 40 | log.Fatal("could not create CPU profile: ", err) 41 | } 42 | defer f.Close() // error handling omitted for example 43 | if err := pprof.StartCPUProfile(f); err != nil { 44 | log.Fatal("could not start CPU profile: ", err) 45 | } 46 | defer pprof.StopCPUProfile() 47 | } 48 | trainf, err := os.Open(filepath.Join(*path, "train.csv")) 49 | if err != nil { 50 | log.Fatal(err) 51 | } 52 | defer trainf.Close() 53 | testf, err := os.Open(filepath.Join(*path, "test.csv")) 54 | if err != nil { 55 | log.Fatal(err) 56 | } 57 | defer testf.Close() 58 | log.Println("Loading Train") 59 | train := loadVecs(trainf) 60 | log.Println("Train has", len(train)) 61 | log.Println("Loading Test") 62 | test := loadVecs(testf) 63 | log.Println("Test has", len(test)) 64 | 65 | log.Println("Loading true neighbors") 66 | 67 | neighborf, err := os.Open(filepath.Join(*path, "neighbors.csv")) 68 | if err != nil { 69 | log.Fatal(err) 70 | } 71 | defer neighborf.Close() 72 | trueres := loadRes(neighborf) 73 | 74 | // Now the fun begins 75 | dim := len(train[0]) 76 | log.Println("Loading into memory") 77 | be := bbq.NewMemoryBackend(dim) 78 | //be := bbq.NewQuantizedMemoryBackend(dim, bbq.Float16Quantization{}) 79 | store, err := bbq.NewVectorStore(be, *bases) 80 | if err != nil { 81 | log.Fatal(err) 82 | } 83 | 84 | start := time.Now() 85 | store.AddVectorsWithOffset(0, train) 86 | store.SetLogger(log.Printf) 87 | log.Printf("Built store in %v", time.Since(start)) 88 | 89 | for i := 0; i < 10; i++ { 90 | spot := rand.Intn(len(trueres)) 91 | fts, _ := bbq.FullTableScanSearch(be, test[spot], 100) 92 | ftsrec := fts.ComputeRecall(trueres[spot], 100) 93 | if ftsrec < 0.98 { 94 | log.Fatal("Error") 95 | } 96 | } 97 | log.Printf("FullTableScan data spot check done") 98 | res := make([]*bbq.ResultSet, len(test)) 99 | var finished atomic.Uint32 100 | var wg sync.WaitGroup 101 | ch := make(chan pair) 102 | for i := 0; i < *parallelism; i++ { 103 | go func() { 104 | for p := range ch { 105 | res[p.id], err = store.FindNearest(p.vec, *k, *searchK, *spill) 106 | v := finished.Add(1) 107 | if v%1000 == 0 { 108 | log.Printf("Search finished %d", v) 109 | } 110 | } 111 | wg.Done() 112 | }() 113 | wg.Add(1) 114 | } 115 | start = time.Now() 116 | for i, v := range test { 117 | ch <- pair{i, v} 118 | } 119 | close(ch) 120 | wg.Wait() 121 | delta := time.Since(start) 122 | qps := float64(len(test)) / delta.Seconds() 123 | totalrecall := 0.0 // ...what if this is a dream? 124 | for i := range res { 125 | totalrecall += res[i].ComputeRecall(trueres[i], 10) 126 | } 127 | recall := totalrecall / float64(len(res)) 128 | fmt.Printf("%0.4f,%0.4f", recall, qps) 129 | } 130 | 131 | func loadVecs(f *os.File) []bbq.Vector { 132 | c := csv.NewReader(f) 133 | c.ReuseRecord = true 134 | out := make([]bbq.Vector, 0, 100000) 135 | for { 136 | rec, err := c.Read() 137 | if errors.Is(err, io.EOF) { 138 | break 139 | } 140 | if err != nil { 141 | log.Fatal(err) 142 | } 143 | v := make([]float32, len(rec)) 144 | for i, st := range rec { 145 | x, err := strconv.ParseFloat(st, 32) 146 | if err != nil { 147 | log.Fatal(err) 148 | } 149 | v[i] = float32(x) 150 | } 151 | out = append(out, v) 152 | } 153 | return out 154 | } 155 | 156 | func loadRes(f *os.File) []*bbq.ResultSet { 157 | var out []*bbq.ResultSet 158 | c := csv.NewReader(f) 159 | c.ReuseRecord = true 160 | for { 161 | rec, err := c.Read() 162 | if errors.Is(err, io.EOF) { 163 | break 164 | } 165 | if err != nil { 166 | log.Fatal(err) 167 | } 168 | rs := bbq.NewResultSet(100) 169 | for i, st := range rec { 170 | x, err := strconv.Atoi(st) 171 | if err != nil { 172 | log.Fatal(err) 173 | } 174 | rs.AddResult(bbq.ID(x), float32(150-i)) 175 | } 176 | out = append(out, rs) 177 | } 178 | return out 179 | } 180 | 181 | type pair struct { 182 | id int 183 | vec bbq.Vector 184 | } 185 | -------------------------------------------------------------------------------- /counting_bitmap.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/RoaringBitmap/roaring" 7 | ) 8 | 9 | type CountingBitmap struct { 10 | bms []*roaring.Bitmap 11 | } 12 | 13 | func NewCountingBitmap(maxCount int) *CountingBitmap { 14 | return &CountingBitmap{ 15 | bms: make([]*roaring.Bitmap, maxCount), 16 | } 17 | } 18 | 19 | func (c *CountingBitmap) cardinalities() []int { 20 | cards := make([]int, len(c.bms)) 21 | for i, it := range c.bms { 22 | if it == nil { 23 | cards[i] = 0 24 | } else { 25 | cards[i] = int(it.GetCardinality()) 26 | } 27 | } 28 | return cards 29 | } 30 | 31 | func (c *CountingBitmap) String() string { 32 | return fmt.Sprint(c.cardinalities()) 33 | } 34 | 35 | func (c *CountingBitmap) Or(in *roaring.Bitmap) { 36 | cur := in 37 | for i := 0; i < len(c.bms); i++ { 38 | if c.bms[i] == nil { 39 | c.bms[i] = roaring.NewBitmap() 40 | } 41 | c.bms[i].Xor(cur) 42 | cur.AndNot(c.bms[i]) 43 | c.bms[i].Or(cur) 44 | if cur.GetCardinality() == 0 { 45 | break 46 | } 47 | } 48 | } 49 | 50 | // TopK may return more things than intended 51 | func (c *CountingBitmap) TopK(k int) *roaring.Bitmap { 52 | for i := len(c.bms) - 1; i >= 0; i-- { 53 | if c.bms[i] == nil { 54 | continue 55 | } 56 | if i != 0 && int(c.bms[i].GetCardinality()) < k { 57 | continue 58 | } 59 | return c.bms[i] 60 | } 61 | return nil 62 | } 63 | -------------------------------------------------------------------------------- /disk_test.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestDiskBackend(t *testing.T) { 8 | testDiskBackendQuantization(t, NoQuantization{}) 9 | } 10 | 11 | func TestDiskBackendF16(t *testing.T) { 12 | testDiskBackendQuantization(t, Float16Quantization{}) 13 | } 14 | 15 | func testDiskBackendQuantization[L any](t *testing.T, q Quantization[L]) { 16 | vecs := NewRandVectorSet(*nVectors, *dim, nil) 17 | 18 | mem := NewMemoryBackend(*dim) 19 | 20 | dir := t.TempDir() 21 | t.Log("TempDir:", dir) 22 | be, err := NewDiskBackend(dir, *dim, q) 23 | if err != nil { 24 | t.Fatal(err) 25 | } 26 | store, err := NewVectorStore(be, *nBasis) 27 | if err != nil { 28 | t.Fatal(err) 29 | } 30 | 31 | for i, v := range vecs { 32 | err := mem.PutVector(ID(i), v) 33 | if err != nil { 34 | t.Fatal("error mem put", err) 35 | } 36 | err = store.AddVector(ID(i), v) 37 | if err != nil { 38 | t.Fatal("error store put", err) 39 | } 40 | if i%10000 == 0 { 41 | t.Log("Wrote", i) 42 | } 43 | } 44 | err = store.Sync() 45 | if err != nil { 46 | t.Fatal(err) 47 | } 48 | 49 | err = store.Close() 50 | if err != nil { 51 | t.Fatal(err) 52 | } 53 | 54 | t.Log("Reopening") 55 | // Reopen 56 | 57 | be, err = NewDiskBackend(dir, *dim, q) 58 | if err != nil { 59 | t.Fatal("Couldn't open disk backend", err) 60 | } 61 | store, err = NewVectorStore(be, *nBasis) 62 | if err != nil { 63 | t.Fatal("Couldn't open vector store", err) 64 | } 65 | 66 | targetvecs := NewRandVectorSet(*testvecs, *dim, nil) 67 | for _, v := range targetvecs { 68 | fts, err := FullTableScanSearch(mem, v, 20) 69 | fts.Len() 70 | if err != nil { 71 | t.Fatal(err) 72 | } 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /emperical_test.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | ) 7 | 8 | func TestEmpericalCountBitmapConstant(t *testing.T) { 9 | vecs := NewRandVectorSet(*nVectors, *dim, nil) 10 | 11 | be := NewMemoryBackend(*dim) 12 | store, err := NewVectorStore(be, *nBasis) 13 | if err != nil { 14 | t.Fatal(err) 15 | } 16 | 17 | for i, v := range vecs { 18 | store.AddVector(ID(i), v) 19 | } 20 | 21 | count := 0 22 | n := 0 23 | for _, basisbms := range store.bms { 24 | for _, bm := range basisbms { 25 | count += int(bm.GetCardinality()) 26 | n += 1 27 | } 28 | } 29 | t.Logf("Expected avg bitmap count: %0.2f", float64(len(vecs))/float64(2**dim)) 30 | t.Logf("Average bitmap count: %0.2f", float64(count)/float64(n)) 31 | // now we get into the weeds 32 | buf := make([]float32, store.dimensions) 33 | maxes := make([]int, 1) 34 | target := NewRandVector(*dim, nil) 35 | counts := NewCountingBitmap(*nBasis) 36 | for i, basis := range store.bases { 37 | store.findIndexesForBasis(target, basis, buf, maxes) 38 | for _, m := range maxes { 39 | if v, ok := store.bms[i][m]; ok { 40 | counts.Or(v) 41 | } 42 | } 43 | printPredicted(i+1, t) 44 | t.Logf("got %#v", counts.cardinalities()) 45 | } 46 | } 47 | 48 | const k = 0.83 49 | 50 | func printPredicted(i int, t *testing.T) { 51 | f := make([]float64, i) 52 | for j := 0; j < i; j++ { 53 | f[j] = (math.Pow(float64(i), (k*float64(j))+1.0) * float64(*nVectors)) / math.Pow(float64(2**dim), float64(j+1)) 54 | } 55 | strs := make([]int, i) 56 | for i, g := range f { 57 | strs[i] = int(g) 58 | } 59 | t.Logf("exp %#v", strs) 60 | } 61 | -------------------------------------------------------------------------------- /errors.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import "errors" 4 | 5 | var ( 6 | ErrAlreadyBuilt = errors.New("Already built the index") 7 | ErrIDNotFound = errors.New("ID not found") 8 | ) 9 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/barakmich/bbqvec 2 | 3 | go 1.22.3 4 | 5 | require ( 6 | github.com/RoaringBitmap/roaring v1.9.3 7 | github.com/barakmich/mmap-go v0.0.0-20240708014031-bf5d4a307f6e 8 | github.com/kelindar/bitmap v1.5.2 9 | github.com/viterin/vek v0.4.2 10 | github.com/x448/float16 v0.8.4 11 | ) 12 | 13 | require ( 14 | github.com/bits-and-blooms/bitset v1.13.0 // indirect 15 | github.com/chewxy/math32 v1.10.1 // indirect 16 | github.com/kelindar/simd v1.1.2 // indirect 17 | github.com/klauspost/cpuid/v2 v2.2.7 // indirect 18 | github.com/mschoch/smat v0.2.0 // indirect 19 | github.com/viterin/partial v1.1.0 // indirect 20 | golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect 21 | golang.org/x/sys v0.20.0 // indirect 22 | ) 23 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/RoaringBitmap/roaring v1.9.3 h1:t4EbC5qQwnisr5PrP9nt0IRhRTb9gMUgQF4t4S2OByM= 2 | github.com/RoaringBitmap/roaring v1.9.3/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90= 3 | github.com/barakmich/mmap-go v0.0.0-20240708014031-bf5d4a307f6e h1:IKlgXkbV1ppIoCGl5o+aUSjwrMpfNMNDL0eX2hHCbsw= 4 | github.com/barakmich/mmap-go v0.0.0-20240708014031-bf5d4a307f6e/go.mod h1:QuO3A7CjHPLyaUVpqDTZznW6WhzCxm4SGthyaWRrd5s= 5 | github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= 6 | github.com/bits-and-blooms/bitset v1.13.0 h1:bAQ9OPNFYbGHV6Nez0tmNI0RiEu7/hxlYJRUA0wFAVE= 7 | github.com/bits-and-blooms/bitset v1.13.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= 8 | github.com/chewxy/math32 v1.10.1 h1:LFpeY0SLJXeaiej/eIp2L40VYfscTvKh/FSEZ68uMkU= 9 | github.com/chewxy/math32 v1.10.1/go.mod h1:dOB2rcuFrCn6UHrze36WSLVPKtzPMRAQvBvUwkSsLqs= 10 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 11 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 12 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 13 | github.com/kelindar/bitmap v1.5.2 h1:XwX7CTvJtetQZ64zrOkApoZZHBJRkjE23NfqUALA/HE= 14 | github.com/kelindar/bitmap v1.5.2/go.mod h1:j3qZjxH9s4OtvsnFTP2bmPkjqil9Y2xQlxPYHexasEA= 15 | github.com/kelindar/simd v1.1.2 h1:KduKb+M9cMY2HIH8S/cdJyD+5n5EGgq+Aeeleos55To= 16 | github.com/kelindar/simd v1.1.2/go.mod h1:inq4DFudC7W8L5fhxoeZflLRNpWSs0GNx6MlWFvuvr0= 17 | github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= 18 | github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= 19 | github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= 20 | github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= 21 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 22 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 23 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 24 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 25 | github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= 26 | github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 27 | github.com/viterin/partial v1.1.0 h1:iH1l1xqBlapXsYzADS1dcbizg3iQUKTU1rbwkHv/80E= 28 | github.com/viterin/partial v1.1.0/go.mod h1:oKGAo7/wylWkJTLrWX8n+f4aDPtQMQ6VG4dd2qur5QA= 29 | github.com/viterin/vek v0.4.2 h1:Vyv04UjQT6gcjEFX82AS9ocgNbAJqsHviheIBdPlv5U= 30 | github.com/viterin/vek v0.4.2/go.mod h1:A4JRAe8OvbhdzBL5ofzjBS0J29FyUrf95tQogvtHHUc= 31 | github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= 32 | github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= 33 | golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= 34 | golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= 35 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 36 | golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= 37 | golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 38 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 39 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 40 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 41 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 42 | -------------------------------------------------------------------------------- /integration_test.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestBasic(t *testing.T) { 8 | dim := 256 9 | nBasis := 20 10 | k := 20 11 | searchk := 200 12 | 13 | vecs := NewRandVectorSet(100000, dim, nil) 14 | 15 | be := NewMemoryBackend(dim) 16 | store, err := NewVectorStore(be, nBasis, WithPrespill(2)) 17 | if err != nil { 18 | t.Fatal(err) 19 | } 20 | 21 | for i, v := range vecs { 22 | store.AddVector(ID(i), v) 23 | } 24 | 25 | store.SetLogger(t.Logf) 26 | 27 | target := NewRandVector(dim, nil) 28 | indexNearest, err := store.FindNearest(target, k, searchk, 4) 29 | if err != nil { 30 | t.Fatal(err) 31 | } 32 | t.Log(indexNearest) 33 | ftsNearest, err := FullTableScanSearch(be, target, k) 34 | t.Log(ftsNearest) 35 | recall := indexNearest.ComputeRecall(ftsNearest, k) 36 | t.Log("Recall: ", recall) 37 | } 38 | -------------------------------------------------------------------------------- /micro_test.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "math/rand" 5 | "testing" 6 | 7 | "github.com/RoaringBitmap/roaring" 8 | "github.com/kelindar/bitmap" 9 | "github.com/viterin/vek/vek32" 10 | ) 11 | 12 | func BenchmarkMicroDot(b *testing.B) { 13 | v := NewRandVector(100, nil) 14 | n := NewRandVector(100, nil) 15 | for i := 0; i < b.N; i++ { 16 | vek32.Dot(v, n) 17 | } 18 | } 19 | 20 | func BenchmarkMicroRoaring(b *testing.B) { 21 | x := roaring.NewBitmap() 22 | y := roaring.NewBitmap() 23 | for range 20000 { 24 | x.AddInt(rand.Intn(2000000)) 25 | y.AddInt(rand.Intn(2000000)) 26 | } 27 | b.ResetTimer() 28 | for i := 0; i < b.N; i++ { 29 | roaring.Or(x, y) 30 | } 31 | } 32 | 33 | func BenchmarkMicroBitmap(b *testing.B) { 34 | var x bitmap.Bitmap 35 | var y bitmap.Bitmap 36 | for range 20000 { 37 | x.Set(uint32(rand.Intn(2000000))) 38 | y.Set(uint32(rand.Intn(2000000))) 39 | } 40 | b.ResetTimer() 41 | for i := 0; i < b.N; i++ { 42 | var z bitmap.Bitmap 43 | z.Or(x) 44 | z.Or(y) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /quantization.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "encoding/binary" 5 | "math" 6 | 7 | "github.com/viterin/vek/vek32" 8 | ) 9 | 10 | type Quantization[L any] interface { 11 | Similarity(x, y L) float32 12 | Lower(v Vector) (L, error) 13 | Marshal(to []byte, lower L) error 14 | Unmarshal(data []byte) (L, error) 15 | Name() string 16 | LowerSize(dim int) int 17 | } 18 | 19 | var _ Quantization[Vector] = NoQuantization{} 20 | 21 | type NoQuantization struct{} 22 | 23 | func (q NoQuantization) Similarity(x, y Vector) float32 { 24 | return vek32.CosineSimilarity(x, y) 25 | } 26 | 27 | func (q NoQuantization) Lower(v Vector) (Vector, error) { 28 | return v, nil 29 | } 30 | 31 | func (q NoQuantization) Marshal(to []byte, lower Vector) error { 32 | for i, n := range lower { 33 | u := math.Float32bits(n) 34 | binary.LittleEndian.PutUint32(to[i*4:], u) 35 | } 36 | return nil 37 | } 38 | 39 | func (q NoQuantization) Unmarshal(data []byte) (Vector, error) { 40 | out := make([]float32, len(data)>>2) 41 | for i := 0; i < len(data); i += 4 { 42 | bits := binary.LittleEndian.Uint32(data[i:]) 43 | out[i>>2] = math.Float32frombits(bits) 44 | } 45 | return out, nil 46 | } 47 | 48 | func (q NoQuantization) Name() string { 49 | return "none" 50 | } 51 | 52 | func (q NoQuantization) LowerSize(dim int) int { 53 | return 4 * dim 54 | } 55 | -------------------------------------------------------------------------------- /quantization_f16.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "encoding/binary" 5 | 6 | "github.com/viterin/vek/vek32" 7 | "github.com/x448/float16" 8 | ) 9 | 10 | type float16Vec []float16.Float16 11 | 12 | var _ Quantization[float16Vec] = Float16Quantization{} 13 | 14 | type Float16Quantization struct { 15 | bufx, bufy Vector 16 | } 17 | 18 | func (q Float16Quantization) Similarity(x, y float16Vec) float32 { 19 | if q.bufx == nil { 20 | q.bufx = make(Vector, len(x)) 21 | q.bufy = make(Vector, len(x)) 22 | } 23 | for i := range x { 24 | q.bufx[i] = x[i].Float32() 25 | q.bufy[i] = y[i].Float32() 26 | } 27 | return vek32.CosineSimilarity(q.bufx, q.bufy) 28 | } 29 | 30 | func (q Float16Quantization) Lower(v Vector) (float16Vec, error) { 31 | out := make(float16Vec, len(v)) 32 | for i, x := range v { 33 | out[i] = float16.Fromfloat32(x) 34 | } 35 | return out, nil 36 | } 37 | 38 | func (q Float16Quantization) Marshal(to []byte, lower float16Vec) error { 39 | for i, n := range lower { 40 | u := n.Bits() 41 | binary.LittleEndian.PutUint16(to[i*2:], u) 42 | } 43 | return nil 44 | } 45 | 46 | func (q Float16Quantization) Unmarshal(data []byte) (float16Vec, error) { 47 | out := make(float16Vec, len(data)>>1) 48 | for i := 0; i < len(data); i += 4 { 49 | bits := binary.LittleEndian.Uint16(data[i:]) 50 | out[i>>1] = float16.Frombits(bits) 51 | } 52 | return out, nil 53 | } 54 | 55 | func (q Float16Quantization) Name() string { 56 | return "float16" 57 | } 58 | 59 | func (q Float16Quantization) LowerSize(dim int) int { 60 | return 2 * dim 61 | } 62 | -------------------------------------------------------------------------------- /quantization_f16_test.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import "testing" 4 | 5 | func TestFloat16Quantization(t *testing.T) { 6 | vecs := NewRandVectorSet(1000, *dim, nil) 7 | mem := NewMemoryBackend(*dim) 8 | quant := NewQuantizedMemoryBackend(*dim, Float16Quantization{}) 9 | for i, v := range vecs { 10 | mem.PutVector(ID(i), v) 11 | quant.PutVector(ID(i), v) 12 | } 13 | target := NewRandVector(*dim, nil) 14 | memrs, err := FullTableScanSearch(mem, target, 20) 15 | if err != nil { 16 | t.Fatal(err) 17 | } 18 | qrs, err := FullTableScanSearch(quant, target, 20) 19 | if err != nil { 20 | t.Fatal(err) 21 | } 22 | recall := memrs.ComputeRecall(qrs, 10) 23 | t.Logf("Recall %0.4f\n", recall) 24 | t.Logf("\n%s\n%s", memrs, qrs) 25 | } 26 | 27 | func TestFloat16Backend(t *testing.T) { 28 | vecs := NewRandVectorSet(1000, *dim, nil) 29 | quant := NewQuantizedMemoryBackend(*dim, Float16Quantization{}) 30 | store, err := NewVectorStore(quant, *nBasis, WithPrespill(2)) 31 | if err != nil { 32 | t.Fatal(err) 33 | } 34 | err = store.AddVectorsWithOffset(0, vecs) 35 | if err != nil { 36 | t.Fatal(err) 37 | } 38 | 39 | target := NewRandVector(*dim, nil) 40 | qrs, err := FullTableScanSearch(quant, target, 20) 41 | if err != nil { 42 | t.Fatal(err) 43 | } 44 | rs, err := store.FindNearest(target, 20, 20000, 2) 45 | if err != nil { 46 | t.Fatal(err) 47 | } 48 | recall := rs.ComputeRecall(qrs, 10) 49 | t.Logf("Recall %0.4f\n", recall) 50 | t.Logf("\n%s\n%s", rs, qrs) 51 | } 52 | -------------------------------------------------------------------------------- /result.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | ) 7 | 8 | type Result struct { 9 | Similarity float32 10 | ID ID 11 | } 12 | 13 | func (r Result) String() string { 14 | return fmt.Sprintf("(%d %0.4f)", r.ID, r.Similarity) 15 | } 16 | 17 | type ResultSet struct { 18 | inner sync.Mutex 19 | sims []float32 20 | ids []ID 21 | k int 22 | valid int 23 | } 24 | 25 | func NewResultSet(topK int) *ResultSet { 26 | return &ResultSet{ 27 | k: topK, 28 | sims: make([]float32, topK), 29 | ids: make([]ID, topK), 30 | valid: 0, 31 | } 32 | } 33 | 34 | func (rs *ResultSet) Len() int { 35 | return len(rs.sims) 36 | } 37 | 38 | func (rs *ResultSet) ComputeRecall(baseline *ResultSet, at int) float64 { 39 | found := 0 40 | for _, v := range baseline.ids[:at] { 41 | for _, w := range rs.ids[:at] { 42 | if v == w { 43 | found += 1 44 | } 45 | } 46 | } 47 | return float64(found) / float64(at) 48 | } 49 | 50 | func (rs *ResultSet) String() string { 51 | return fmt.Sprint(rs.ToSlice()) 52 | } 53 | 54 | func (rs *ResultSet) AddResult(id ID, sim float32) bool { 55 | // Do a quick check... 56 | if rs.valid == rs.k { 57 | // Bail if the last one beats us 58 | last := rs.sims[len(rs.sims)-1] 59 | if last > sim { 60 | return false 61 | } 62 | } 63 | rs.inner.Lock() 64 | defer rs.inner.Unlock() 65 | insert := 0 66 | found := false 67 | for insert != rs.k { 68 | // If we're building it out, then the new insertion point is at the end. 69 | if rs.valid <= insert { 70 | rs.valid += 1 71 | found = true 72 | break 73 | } 74 | if rs.ids[insert] == id { 75 | return true 76 | } 77 | if rs.sims[insert] < sim { 78 | found = true 79 | break 80 | } 81 | insert++ 82 | } 83 | if !found { 84 | return false 85 | } 86 | copy(rs.sims[insert+1:], rs.sims[insert:]) 87 | rs.sims[insert] = sim 88 | copy(rs.ids[insert+1:], rs.ids[insert:]) 89 | rs.ids[insert] = id 90 | return true 91 | } 92 | 93 | func (rs *ResultSet) ToSlice() []*Result { 94 | out := make([]*Result, rs.valid) 95 | for i := range out { 96 | out[i] = &Result{ 97 | Similarity: rs.sims[i], 98 | ID: rs.ids[i], 99 | } 100 | } 101 | return out 102 | } 103 | -------------------------------------------------------------------------------- /rust/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /rust/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "addr2line" 7 | version = "0.22.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" 10 | dependencies = [ 11 | "gimli", 12 | ] 13 | 14 | [[package]] 15 | name = "adler" 16 | version = "1.0.2" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 19 | 20 | [[package]] 21 | name = "ahash" 22 | version = "0.8.11" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" 25 | dependencies = [ 26 | "cfg-if", 27 | "getrandom", 28 | "once_cell", 29 | "version_check", 30 | "zerocopy 0.7.34", 31 | ] 32 | 33 | [[package]] 34 | name = "aho-corasick" 35 | version = "1.1.3" 36 | source = "registry+https://github.com/rust-lang/crates.io-index" 37 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 38 | dependencies = [ 39 | "memchr", 40 | ] 41 | 42 | [[package]] 43 | name = "anes" 44 | version = "0.1.6" 45 | source = "registry+https://github.com/rust-lang/crates.io-index" 46 | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 47 | 48 | [[package]] 49 | name = "anstyle" 50 | version = "1.0.7" 51 | source = "registry+https://github.com/rust-lang/crates.io-index" 52 | checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" 53 | 54 | [[package]] 55 | name = "anyhow" 56 | version = "1.0.86" 57 | source = "registry+https://github.com/rust-lang/crates.io-index" 58 | checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" 59 | 60 | [[package]] 61 | name = "argminmax" 62 | version = "0.6.2" 63 | source = "registry+https://github.com/rust-lang/crates.io-index" 64 | checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa" 65 | dependencies = [ 66 | "num-traits", 67 | ] 68 | 69 | [[package]] 70 | name = "arrayvec" 71 | version = "0.7.4" 72 | source = "registry+https://github.com/rust-lang/crates.io-index" 73 | checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" 74 | 75 | [[package]] 76 | name = "autocfg" 77 | version = "1.3.0" 78 | source = "registry+https://github.com/rust-lang/crates.io-index" 79 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" 80 | 81 | [[package]] 82 | name = "backtrace" 83 | version = "0.3.72" 84 | source = "registry+https://github.com/rust-lang/crates.io-index" 85 | checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" 86 | dependencies = [ 87 | "addr2line", 88 | "cc", 89 | "cfg-if", 90 | "libc", 91 | "miniz_oxide", 92 | "object", 93 | "rustc-demangle", 94 | ] 95 | 96 | [[package]] 97 | name = "bbqvec" 98 | version = "0.0.9" 99 | dependencies = [ 100 | "anyhow", 101 | "argminmax", 102 | "bitvec", 103 | "bytemuck", 104 | "byteorder", 105 | "criterion", 106 | "croaring", 107 | "half", 108 | "memmap2", 109 | "pprof", 110 | "rand", 111 | "rayon", 112 | "roaring", 113 | "serde", 114 | "serde_json", 115 | "thiserror", 116 | ] 117 | 118 | [[package]] 119 | name = "bitflags" 120 | version = "1.3.2" 121 | source = "registry+https://github.com/rust-lang/crates.io-index" 122 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 123 | 124 | [[package]] 125 | name = "bitflags" 126 | version = "2.5.0" 127 | source = "registry+https://github.com/rust-lang/crates.io-index" 128 | checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" 129 | 130 | [[package]] 131 | name = "bitvec" 132 | version = "1.0.1" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" 135 | dependencies = [ 136 | "funty", 137 | "radium", 138 | "serde", 139 | "tap", 140 | "wyz", 141 | ] 142 | 143 | [[package]] 144 | name = "bumpalo" 145 | version = "3.16.0" 146 | source = "registry+https://github.com/rust-lang/crates.io-index" 147 | checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" 148 | 149 | [[package]] 150 | name = "bytemuck" 151 | version = "1.16.0" 152 | source = "registry+https://github.com/rust-lang/crates.io-index" 153 | checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" 154 | dependencies = [ 155 | "bytemuck_derive", 156 | ] 157 | 158 | [[package]] 159 | name = "bytemuck_derive" 160 | version = "1.6.0" 161 | source = "registry+https://github.com/rust-lang/crates.io-index" 162 | checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60" 163 | dependencies = [ 164 | "proc-macro2", 165 | "quote", 166 | "syn", 167 | ] 168 | 169 | [[package]] 170 | name = "byteorder" 171 | version = "1.5.0" 172 | source = "registry+https://github.com/rust-lang/crates.io-index" 173 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 174 | 175 | [[package]] 176 | name = "cast" 177 | version = "0.3.0" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 180 | 181 | [[package]] 182 | name = "cc" 183 | version = "1.0.98" 184 | source = "registry+https://github.com/rust-lang/crates.io-index" 185 | checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" 186 | 187 | [[package]] 188 | name = "cfg-if" 189 | version = "1.0.0" 190 | source = "registry+https://github.com/rust-lang/crates.io-index" 191 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 192 | 193 | [[package]] 194 | name = "ciborium" 195 | version = "0.2.2" 196 | source = "registry+https://github.com/rust-lang/crates.io-index" 197 | checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" 198 | dependencies = [ 199 | "ciborium-io", 200 | "ciborium-ll", 201 | "serde", 202 | ] 203 | 204 | [[package]] 205 | name = "ciborium-io" 206 | version = "0.2.2" 207 | source = "registry+https://github.com/rust-lang/crates.io-index" 208 | checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" 209 | 210 | [[package]] 211 | name = "ciborium-ll" 212 | version = "0.2.2" 213 | source = "registry+https://github.com/rust-lang/crates.io-index" 214 | checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" 215 | dependencies = [ 216 | "ciborium-io", 217 | "half", 218 | ] 219 | 220 | [[package]] 221 | name = "clap" 222 | version = "4.5.4" 223 | source = "registry+https://github.com/rust-lang/crates.io-index" 224 | checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" 225 | dependencies = [ 226 | "clap_builder", 227 | ] 228 | 229 | [[package]] 230 | name = "clap_builder" 231 | version = "4.5.2" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" 234 | dependencies = [ 235 | "anstyle", 236 | "clap_lex", 237 | ] 238 | 239 | [[package]] 240 | name = "clap_lex" 241 | version = "0.7.0" 242 | source = "registry+https://github.com/rust-lang/crates.io-index" 243 | checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" 244 | 245 | [[package]] 246 | name = "cpp_demangle" 247 | version = "0.4.3" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "7e8227005286ec39567949b33df9896bcadfa6051bccca2488129f108ca23119" 250 | dependencies = [ 251 | "cfg-if", 252 | ] 253 | 254 | [[package]] 255 | name = "criterion" 256 | version = "0.5.1" 257 | source = "registry+https://github.com/rust-lang/crates.io-index" 258 | checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" 259 | dependencies = [ 260 | "anes", 261 | "cast", 262 | "ciborium", 263 | "clap", 264 | "criterion-plot", 265 | "is-terminal", 266 | "itertools", 267 | "num-traits", 268 | "once_cell", 269 | "oorandom", 270 | "plotters", 271 | "rayon", 272 | "regex", 273 | "serde", 274 | "serde_derive", 275 | "serde_json", 276 | "tinytemplate", 277 | "walkdir", 278 | ] 279 | 280 | [[package]] 281 | name = "criterion-plot" 282 | version = "0.5.0" 283 | source = "registry+https://github.com/rust-lang/crates.io-index" 284 | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" 285 | dependencies = [ 286 | "cast", 287 | "itertools", 288 | ] 289 | 290 | [[package]] 291 | name = "croaring" 292 | version = "1.1.0" 293 | source = "registry+https://github.com/rust-lang/crates.io-index" 294 | checksum = "611eaefca84c93e431ad82dfb848f6e05a99e25148384f45a3852b0fbe1c8086" 295 | dependencies = [ 296 | "byteorder", 297 | "croaring-sys", 298 | ] 299 | 300 | [[package]] 301 | name = "croaring-sys" 302 | version = "2.0.0" 303 | source = "registry+https://github.com/rust-lang/crates.io-index" 304 | checksum = "ab5260027c04c33d67f405589d9c26e1e991fe062fb165f3094c9836e6c3b17f" 305 | dependencies = [ 306 | "cc", 307 | ] 308 | 309 | [[package]] 310 | name = "crossbeam-deque" 311 | version = "0.8.5" 312 | source = "registry+https://github.com/rust-lang/crates.io-index" 313 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" 314 | dependencies = [ 315 | "crossbeam-epoch", 316 | "crossbeam-utils", 317 | ] 318 | 319 | [[package]] 320 | name = "crossbeam-epoch" 321 | version = "0.9.18" 322 | source = "registry+https://github.com/rust-lang/crates.io-index" 323 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 324 | dependencies = [ 325 | "crossbeam-utils", 326 | ] 327 | 328 | [[package]] 329 | name = "crossbeam-utils" 330 | version = "0.8.20" 331 | source = "registry+https://github.com/rust-lang/crates.io-index" 332 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" 333 | 334 | [[package]] 335 | name = "crunchy" 336 | version = "0.2.2" 337 | source = "registry+https://github.com/rust-lang/crates.io-index" 338 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" 339 | 340 | [[package]] 341 | name = "debugid" 342 | version = "0.8.0" 343 | source = "registry+https://github.com/rust-lang/crates.io-index" 344 | checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" 345 | dependencies = [ 346 | "uuid", 347 | ] 348 | 349 | [[package]] 350 | name = "either" 351 | version = "1.12.0" 352 | source = "registry+https://github.com/rust-lang/crates.io-index" 353 | checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" 354 | 355 | [[package]] 356 | name = "equivalent" 357 | version = "1.0.1" 358 | source = "registry+https://github.com/rust-lang/crates.io-index" 359 | checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" 360 | 361 | [[package]] 362 | name = "errno" 363 | version = "0.3.9" 364 | source = "registry+https://github.com/rust-lang/crates.io-index" 365 | checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" 366 | dependencies = [ 367 | "libc", 368 | "windows-sys", 369 | ] 370 | 371 | [[package]] 372 | name = "fastrand" 373 | version = "2.1.0" 374 | source = "registry+https://github.com/rust-lang/crates.io-index" 375 | checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" 376 | 377 | [[package]] 378 | name = "findshlibs" 379 | version = "0.10.2" 380 | source = "registry+https://github.com/rust-lang/crates.io-index" 381 | checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64" 382 | dependencies = [ 383 | "cc", 384 | "lazy_static", 385 | "libc", 386 | "winapi", 387 | ] 388 | 389 | [[package]] 390 | name = "funty" 391 | version = "2.0.0" 392 | source = "registry+https://github.com/rust-lang/crates.io-index" 393 | checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" 394 | 395 | [[package]] 396 | name = "getrandom" 397 | version = "0.2.15" 398 | source = "registry+https://github.com/rust-lang/crates.io-index" 399 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" 400 | dependencies = [ 401 | "cfg-if", 402 | "libc", 403 | "wasi", 404 | ] 405 | 406 | [[package]] 407 | name = "gimli" 408 | version = "0.29.0" 409 | source = "registry+https://github.com/rust-lang/crates.io-index" 410 | checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" 411 | 412 | [[package]] 413 | name = "half" 414 | version = "2.4.1" 415 | source = "registry+https://github.com/rust-lang/crates.io-index" 416 | checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" 417 | dependencies = [ 418 | "cfg-if", 419 | "crunchy", 420 | "zerocopy 0.6.6", 421 | ] 422 | 423 | [[package]] 424 | name = "hashbrown" 425 | version = "0.14.5" 426 | source = "registry+https://github.com/rust-lang/crates.io-index" 427 | checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" 428 | 429 | [[package]] 430 | name = "hermit-abi" 431 | version = "0.3.9" 432 | source = "registry+https://github.com/rust-lang/crates.io-index" 433 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" 434 | 435 | [[package]] 436 | name = "indexmap" 437 | version = "2.2.6" 438 | source = "registry+https://github.com/rust-lang/crates.io-index" 439 | checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" 440 | dependencies = [ 441 | "equivalent", 442 | "hashbrown", 443 | ] 444 | 445 | [[package]] 446 | name = "inferno" 447 | version = "0.11.19" 448 | source = "registry+https://github.com/rust-lang/crates.io-index" 449 | checksum = "321f0f839cd44a4686e9504b0a62b4d69a50b62072144c71c68f5873c167b8d9" 450 | dependencies = [ 451 | "ahash", 452 | "indexmap", 453 | "is-terminal", 454 | "itoa", 455 | "log", 456 | "num-format", 457 | "once_cell", 458 | "quick-xml", 459 | "rgb", 460 | "str_stack", 461 | ] 462 | 463 | [[package]] 464 | name = "is-terminal" 465 | version = "0.4.12" 466 | source = "registry+https://github.com/rust-lang/crates.io-index" 467 | checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" 468 | dependencies = [ 469 | "hermit-abi", 470 | "libc", 471 | "windows-sys", 472 | ] 473 | 474 | [[package]] 475 | name = "itertools" 476 | version = "0.10.5" 477 | source = "registry+https://github.com/rust-lang/crates.io-index" 478 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" 479 | dependencies = [ 480 | "either", 481 | ] 482 | 483 | [[package]] 484 | name = "itoa" 485 | version = "1.0.11" 486 | source = "registry+https://github.com/rust-lang/crates.io-index" 487 | checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" 488 | 489 | [[package]] 490 | name = "js-sys" 491 | version = "0.3.69" 492 | source = "registry+https://github.com/rust-lang/crates.io-index" 493 | checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" 494 | dependencies = [ 495 | "wasm-bindgen", 496 | ] 497 | 498 | [[package]] 499 | name = "lazy_static" 500 | version = "1.4.0" 501 | source = "registry+https://github.com/rust-lang/crates.io-index" 502 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 503 | 504 | [[package]] 505 | name = "libc" 506 | version = "0.2.155" 507 | source = "registry+https://github.com/rust-lang/crates.io-index" 508 | checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" 509 | 510 | [[package]] 511 | name = "linux-raw-sys" 512 | version = "0.4.14" 513 | source = "registry+https://github.com/rust-lang/crates.io-index" 514 | checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" 515 | 516 | [[package]] 517 | name = "lock_api" 518 | version = "0.4.12" 519 | source = "registry+https://github.com/rust-lang/crates.io-index" 520 | checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" 521 | dependencies = [ 522 | "autocfg", 523 | "scopeguard", 524 | ] 525 | 526 | [[package]] 527 | name = "log" 528 | version = "0.4.21" 529 | source = "registry+https://github.com/rust-lang/crates.io-index" 530 | checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" 531 | 532 | [[package]] 533 | name = "memchr" 534 | version = "2.7.2" 535 | source = "registry+https://github.com/rust-lang/crates.io-index" 536 | checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" 537 | 538 | [[package]] 539 | name = "memmap2" 540 | version = "0.9.5" 541 | source = "registry+https://github.com/rust-lang/crates.io-index" 542 | checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" 543 | dependencies = [ 544 | "libc", 545 | ] 546 | 547 | [[package]] 548 | name = "miniz_oxide" 549 | version = "0.7.3" 550 | source = "registry+https://github.com/rust-lang/crates.io-index" 551 | checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" 552 | dependencies = [ 553 | "adler", 554 | ] 555 | 556 | [[package]] 557 | name = "nix" 558 | version = "0.26.4" 559 | source = "registry+https://github.com/rust-lang/crates.io-index" 560 | checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" 561 | dependencies = [ 562 | "bitflags 1.3.2", 563 | "cfg-if", 564 | "libc", 565 | ] 566 | 567 | [[package]] 568 | name = "num-format" 569 | version = "0.4.4" 570 | source = "registry+https://github.com/rust-lang/crates.io-index" 571 | checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" 572 | dependencies = [ 573 | "arrayvec", 574 | "itoa", 575 | ] 576 | 577 | [[package]] 578 | name = "num-traits" 579 | version = "0.2.19" 580 | source = "registry+https://github.com/rust-lang/crates.io-index" 581 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 582 | dependencies = [ 583 | "autocfg", 584 | ] 585 | 586 | [[package]] 587 | name = "object" 588 | version = "0.35.0" 589 | source = "registry+https://github.com/rust-lang/crates.io-index" 590 | checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" 591 | dependencies = [ 592 | "memchr", 593 | ] 594 | 595 | [[package]] 596 | name = "once_cell" 597 | version = "1.19.0" 598 | source = "registry+https://github.com/rust-lang/crates.io-index" 599 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" 600 | 601 | [[package]] 602 | name = "oorandom" 603 | version = "11.1.3" 604 | source = "registry+https://github.com/rust-lang/crates.io-index" 605 | checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" 606 | 607 | [[package]] 608 | name = "parking_lot" 609 | version = "0.12.3" 610 | source = "registry+https://github.com/rust-lang/crates.io-index" 611 | checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" 612 | dependencies = [ 613 | "lock_api", 614 | "parking_lot_core", 615 | ] 616 | 617 | [[package]] 618 | name = "parking_lot_core" 619 | version = "0.9.10" 620 | source = "registry+https://github.com/rust-lang/crates.io-index" 621 | checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" 622 | dependencies = [ 623 | "cfg-if", 624 | "libc", 625 | "redox_syscall", 626 | "smallvec", 627 | "windows-targets", 628 | ] 629 | 630 | [[package]] 631 | name = "plotters" 632 | version = "0.3.6" 633 | source = "registry+https://github.com/rust-lang/crates.io-index" 634 | checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" 635 | dependencies = [ 636 | "num-traits", 637 | "plotters-backend", 638 | "plotters-svg", 639 | "wasm-bindgen", 640 | "web-sys", 641 | ] 642 | 643 | [[package]] 644 | name = "plotters-backend" 645 | version = "0.3.6" 646 | source = "registry+https://github.com/rust-lang/crates.io-index" 647 | checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" 648 | 649 | [[package]] 650 | name = "plotters-svg" 651 | version = "0.3.6" 652 | source = "registry+https://github.com/rust-lang/crates.io-index" 653 | checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" 654 | dependencies = [ 655 | "plotters-backend", 656 | ] 657 | 658 | [[package]] 659 | name = "pprof" 660 | version = "0.13.0" 661 | source = "registry+https://github.com/rust-lang/crates.io-index" 662 | checksum = "ef5c97c51bd34c7e742402e216abdeb44d415fbe6ae41d56b114723e953711cb" 663 | dependencies = [ 664 | "backtrace", 665 | "cfg-if", 666 | "criterion", 667 | "findshlibs", 668 | "inferno", 669 | "libc", 670 | "log", 671 | "nix", 672 | "once_cell", 673 | "parking_lot", 674 | "protobuf", 675 | "protobuf-codegen-pure", 676 | "smallvec", 677 | "symbolic-demangle", 678 | "tempfile", 679 | "thiserror", 680 | ] 681 | 682 | [[package]] 683 | name = "ppv-lite86" 684 | version = "0.2.17" 685 | source = "registry+https://github.com/rust-lang/crates.io-index" 686 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 687 | 688 | [[package]] 689 | name = "proc-macro2" 690 | version = "1.0.84" 691 | source = "registry+https://github.com/rust-lang/crates.io-index" 692 | checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" 693 | dependencies = [ 694 | "unicode-ident", 695 | ] 696 | 697 | [[package]] 698 | name = "protobuf" 699 | version = "2.28.0" 700 | source = "registry+https://github.com/rust-lang/crates.io-index" 701 | checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" 702 | 703 | [[package]] 704 | name = "protobuf-codegen" 705 | version = "2.28.0" 706 | source = "registry+https://github.com/rust-lang/crates.io-index" 707 | checksum = "033460afb75cf755fcfc16dfaed20b86468082a2ea24e05ac35ab4a099a017d6" 708 | dependencies = [ 709 | "protobuf", 710 | ] 711 | 712 | [[package]] 713 | name = "protobuf-codegen-pure" 714 | version = "2.28.0" 715 | source = "registry+https://github.com/rust-lang/crates.io-index" 716 | checksum = "95a29399fc94bcd3eeaa951c715f7bea69409b2445356b00519740bcd6ddd865" 717 | dependencies = [ 718 | "protobuf", 719 | "protobuf-codegen", 720 | ] 721 | 722 | [[package]] 723 | name = "quick-xml" 724 | version = "0.26.0" 725 | source = "registry+https://github.com/rust-lang/crates.io-index" 726 | checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" 727 | dependencies = [ 728 | "memchr", 729 | ] 730 | 731 | [[package]] 732 | name = "quote" 733 | version = "1.0.36" 734 | source = "registry+https://github.com/rust-lang/crates.io-index" 735 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 736 | dependencies = [ 737 | "proc-macro2", 738 | ] 739 | 740 | [[package]] 741 | name = "radium" 742 | version = "0.7.0" 743 | source = "registry+https://github.com/rust-lang/crates.io-index" 744 | checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" 745 | 746 | [[package]] 747 | name = "rand" 748 | version = "0.8.5" 749 | source = "registry+https://github.com/rust-lang/crates.io-index" 750 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 751 | dependencies = [ 752 | "libc", 753 | "rand_chacha", 754 | "rand_core", 755 | ] 756 | 757 | [[package]] 758 | name = "rand_chacha" 759 | version = "0.3.1" 760 | source = "registry+https://github.com/rust-lang/crates.io-index" 761 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 762 | dependencies = [ 763 | "ppv-lite86", 764 | "rand_core", 765 | ] 766 | 767 | [[package]] 768 | name = "rand_core" 769 | version = "0.6.4" 770 | source = "registry+https://github.com/rust-lang/crates.io-index" 771 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 772 | dependencies = [ 773 | "getrandom", 774 | ] 775 | 776 | [[package]] 777 | name = "rayon" 778 | version = "1.10.0" 779 | source = "registry+https://github.com/rust-lang/crates.io-index" 780 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 781 | dependencies = [ 782 | "either", 783 | "rayon-core", 784 | ] 785 | 786 | [[package]] 787 | name = "rayon-core" 788 | version = "1.12.1" 789 | source = "registry+https://github.com/rust-lang/crates.io-index" 790 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 791 | dependencies = [ 792 | "crossbeam-deque", 793 | "crossbeam-utils", 794 | ] 795 | 796 | [[package]] 797 | name = "redox_syscall" 798 | version = "0.5.1" 799 | source = "registry+https://github.com/rust-lang/crates.io-index" 800 | checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" 801 | dependencies = [ 802 | "bitflags 2.5.0", 803 | ] 804 | 805 | [[package]] 806 | name = "regex" 807 | version = "1.10.4" 808 | source = "registry+https://github.com/rust-lang/crates.io-index" 809 | checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" 810 | dependencies = [ 811 | "aho-corasick", 812 | "memchr", 813 | "regex-automata", 814 | "regex-syntax", 815 | ] 816 | 817 | [[package]] 818 | name = "regex-automata" 819 | version = "0.4.6" 820 | source = "registry+https://github.com/rust-lang/crates.io-index" 821 | checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" 822 | dependencies = [ 823 | "aho-corasick", 824 | "memchr", 825 | "regex-syntax", 826 | ] 827 | 828 | [[package]] 829 | name = "regex-syntax" 830 | version = "0.8.3" 831 | source = "registry+https://github.com/rust-lang/crates.io-index" 832 | checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" 833 | 834 | [[package]] 835 | name = "rgb" 836 | version = "0.8.37" 837 | source = "registry+https://github.com/rust-lang/crates.io-index" 838 | checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8" 839 | dependencies = [ 840 | "bytemuck", 841 | ] 842 | 843 | [[package]] 844 | name = "roaring" 845 | version = "0.10.4" 846 | source = "registry+https://github.com/rust-lang/crates.io-index" 847 | checksum = "b26f4c25a604fcb3a1bcd96dd6ba37c93840de95de8198d94c0d571a74a804d1" 848 | dependencies = [ 849 | "bytemuck", 850 | "byteorder", 851 | "serde", 852 | ] 853 | 854 | [[package]] 855 | name = "rustc-demangle" 856 | version = "0.1.24" 857 | source = "registry+https://github.com/rust-lang/crates.io-index" 858 | checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" 859 | 860 | [[package]] 861 | name = "rustix" 862 | version = "0.38.34" 863 | source = "registry+https://github.com/rust-lang/crates.io-index" 864 | checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" 865 | dependencies = [ 866 | "bitflags 2.5.0", 867 | "errno", 868 | "libc", 869 | "linux-raw-sys", 870 | "windows-sys", 871 | ] 872 | 873 | [[package]] 874 | name = "ryu" 875 | version = "1.0.18" 876 | source = "registry+https://github.com/rust-lang/crates.io-index" 877 | checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" 878 | 879 | [[package]] 880 | name = "same-file" 881 | version = "1.0.6" 882 | source = "registry+https://github.com/rust-lang/crates.io-index" 883 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 884 | dependencies = [ 885 | "winapi-util", 886 | ] 887 | 888 | [[package]] 889 | name = "scopeguard" 890 | version = "1.2.0" 891 | source = "registry+https://github.com/rust-lang/crates.io-index" 892 | checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 893 | 894 | [[package]] 895 | name = "serde" 896 | version = "1.0.203" 897 | source = "registry+https://github.com/rust-lang/crates.io-index" 898 | checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" 899 | dependencies = [ 900 | "serde_derive", 901 | ] 902 | 903 | [[package]] 904 | name = "serde_derive" 905 | version = "1.0.203" 906 | source = "registry+https://github.com/rust-lang/crates.io-index" 907 | checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" 908 | dependencies = [ 909 | "proc-macro2", 910 | "quote", 911 | "syn", 912 | ] 913 | 914 | [[package]] 915 | name = "serde_json" 916 | version = "1.0.128" 917 | source = "registry+https://github.com/rust-lang/crates.io-index" 918 | checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" 919 | dependencies = [ 920 | "itoa", 921 | "memchr", 922 | "ryu", 923 | "serde", 924 | ] 925 | 926 | [[package]] 927 | name = "smallvec" 928 | version = "1.13.2" 929 | source = "registry+https://github.com/rust-lang/crates.io-index" 930 | checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" 931 | 932 | [[package]] 933 | name = "stable_deref_trait" 934 | version = "1.2.0" 935 | source = "registry+https://github.com/rust-lang/crates.io-index" 936 | checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" 937 | 938 | [[package]] 939 | name = "str_stack" 940 | version = "0.1.0" 941 | source = "registry+https://github.com/rust-lang/crates.io-index" 942 | checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" 943 | 944 | [[package]] 945 | name = "symbolic-common" 946 | version = "12.9.1" 947 | source = "registry+https://github.com/rust-lang/crates.io-index" 948 | checksum = "89d2aef0f60f62e38c472334148758afbd570ed78d20be622692e5ebfec3734f" 949 | dependencies = [ 950 | "debugid", 951 | "memmap2", 952 | "stable_deref_trait", 953 | "uuid", 954 | ] 955 | 956 | [[package]] 957 | name = "symbolic-demangle" 958 | version = "12.9.1" 959 | source = "registry+https://github.com/rust-lang/crates.io-index" 960 | checksum = "1719d1292eac816cdd3fdad12b22315624b7ce6a7bacb267a3a27fccfd286b48" 961 | dependencies = [ 962 | "cpp_demangle", 963 | "rustc-demangle", 964 | "symbolic-common", 965 | ] 966 | 967 | [[package]] 968 | name = "syn" 969 | version = "2.0.66" 970 | source = "registry+https://github.com/rust-lang/crates.io-index" 971 | checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" 972 | dependencies = [ 973 | "proc-macro2", 974 | "quote", 975 | "unicode-ident", 976 | ] 977 | 978 | [[package]] 979 | name = "tap" 980 | version = "1.0.1" 981 | source = "registry+https://github.com/rust-lang/crates.io-index" 982 | checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" 983 | 984 | [[package]] 985 | name = "tempfile" 986 | version = "3.10.1" 987 | source = "registry+https://github.com/rust-lang/crates.io-index" 988 | checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" 989 | dependencies = [ 990 | "cfg-if", 991 | "fastrand", 992 | "rustix", 993 | "windows-sys", 994 | ] 995 | 996 | [[package]] 997 | name = "thiserror" 998 | version = "1.0.61" 999 | source = "registry+https://github.com/rust-lang/crates.io-index" 1000 | checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" 1001 | dependencies = [ 1002 | "thiserror-impl", 1003 | ] 1004 | 1005 | [[package]] 1006 | name = "thiserror-impl" 1007 | version = "1.0.61" 1008 | source = "registry+https://github.com/rust-lang/crates.io-index" 1009 | checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" 1010 | dependencies = [ 1011 | "proc-macro2", 1012 | "quote", 1013 | "syn", 1014 | ] 1015 | 1016 | [[package]] 1017 | name = "tinytemplate" 1018 | version = "1.2.1" 1019 | source = "registry+https://github.com/rust-lang/crates.io-index" 1020 | checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 1021 | dependencies = [ 1022 | "serde", 1023 | "serde_json", 1024 | ] 1025 | 1026 | [[package]] 1027 | name = "unicode-ident" 1028 | version = "1.0.12" 1029 | source = "registry+https://github.com/rust-lang/crates.io-index" 1030 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 1031 | 1032 | [[package]] 1033 | name = "uuid" 1034 | version = "1.8.0" 1035 | source = "registry+https://github.com/rust-lang/crates.io-index" 1036 | checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" 1037 | 1038 | [[package]] 1039 | name = "version_check" 1040 | version = "0.9.4" 1041 | source = "registry+https://github.com/rust-lang/crates.io-index" 1042 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 1043 | 1044 | [[package]] 1045 | name = "walkdir" 1046 | version = "2.5.0" 1047 | source = "registry+https://github.com/rust-lang/crates.io-index" 1048 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 1049 | dependencies = [ 1050 | "same-file", 1051 | "winapi-util", 1052 | ] 1053 | 1054 | [[package]] 1055 | name = "wasi" 1056 | version = "0.11.0+wasi-snapshot-preview1" 1057 | source = "registry+https://github.com/rust-lang/crates.io-index" 1058 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 1059 | 1060 | [[package]] 1061 | name = "wasm-bindgen" 1062 | version = "0.2.92" 1063 | source = "registry+https://github.com/rust-lang/crates.io-index" 1064 | checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" 1065 | dependencies = [ 1066 | "cfg-if", 1067 | "wasm-bindgen-macro", 1068 | ] 1069 | 1070 | [[package]] 1071 | name = "wasm-bindgen-backend" 1072 | version = "0.2.92" 1073 | source = "registry+https://github.com/rust-lang/crates.io-index" 1074 | checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" 1075 | dependencies = [ 1076 | "bumpalo", 1077 | "log", 1078 | "once_cell", 1079 | "proc-macro2", 1080 | "quote", 1081 | "syn", 1082 | "wasm-bindgen-shared", 1083 | ] 1084 | 1085 | [[package]] 1086 | name = "wasm-bindgen-macro" 1087 | version = "0.2.92" 1088 | source = "registry+https://github.com/rust-lang/crates.io-index" 1089 | checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" 1090 | dependencies = [ 1091 | "quote", 1092 | "wasm-bindgen-macro-support", 1093 | ] 1094 | 1095 | [[package]] 1096 | name = "wasm-bindgen-macro-support" 1097 | version = "0.2.92" 1098 | source = "registry+https://github.com/rust-lang/crates.io-index" 1099 | checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" 1100 | dependencies = [ 1101 | "proc-macro2", 1102 | "quote", 1103 | "syn", 1104 | "wasm-bindgen-backend", 1105 | "wasm-bindgen-shared", 1106 | ] 1107 | 1108 | [[package]] 1109 | name = "wasm-bindgen-shared" 1110 | version = "0.2.92" 1111 | source = "registry+https://github.com/rust-lang/crates.io-index" 1112 | checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" 1113 | 1114 | [[package]] 1115 | name = "web-sys" 1116 | version = "0.3.69" 1117 | source = "registry+https://github.com/rust-lang/crates.io-index" 1118 | checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" 1119 | dependencies = [ 1120 | "js-sys", 1121 | "wasm-bindgen", 1122 | ] 1123 | 1124 | [[package]] 1125 | name = "winapi" 1126 | version = "0.3.9" 1127 | source = "registry+https://github.com/rust-lang/crates.io-index" 1128 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1129 | dependencies = [ 1130 | "winapi-i686-pc-windows-gnu", 1131 | "winapi-x86_64-pc-windows-gnu", 1132 | ] 1133 | 1134 | [[package]] 1135 | name = "winapi-i686-pc-windows-gnu" 1136 | version = "0.4.0" 1137 | source = "registry+https://github.com/rust-lang/crates.io-index" 1138 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1139 | 1140 | [[package]] 1141 | name = "winapi-util" 1142 | version = "0.1.8" 1143 | source = "registry+https://github.com/rust-lang/crates.io-index" 1144 | checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" 1145 | dependencies = [ 1146 | "windows-sys", 1147 | ] 1148 | 1149 | [[package]] 1150 | name = "winapi-x86_64-pc-windows-gnu" 1151 | version = "0.4.0" 1152 | source = "registry+https://github.com/rust-lang/crates.io-index" 1153 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1154 | 1155 | [[package]] 1156 | name = "windows-sys" 1157 | version = "0.52.0" 1158 | source = "registry+https://github.com/rust-lang/crates.io-index" 1159 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 1160 | dependencies = [ 1161 | "windows-targets", 1162 | ] 1163 | 1164 | [[package]] 1165 | name = "windows-targets" 1166 | version = "0.52.5" 1167 | source = "registry+https://github.com/rust-lang/crates.io-index" 1168 | checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" 1169 | dependencies = [ 1170 | "windows_aarch64_gnullvm", 1171 | "windows_aarch64_msvc", 1172 | "windows_i686_gnu", 1173 | "windows_i686_gnullvm", 1174 | "windows_i686_msvc", 1175 | "windows_x86_64_gnu", 1176 | "windows_x86_64_gnullvm", 1177 | "windows_x86_64_msvc", 1178 | ] 1179 | 1180 | [[package]] 1181 | name = "windows_aarch64_gnullvm" 1182 | version = "0.52.5" 1183 | source = "registry+https://github.com/rust-lang/crates.io-index" 1184 | checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" 1185 | 1186 | [[package]] 1187 | name = "windows_aarch64_msvc" 1188 | version = "0.52.5" 1189 | source = "registry+https://github.com/rust-lang/crates.io-index" 1190 | checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" 1191 | 1192 | [[package]] 1193 | name = "windows_i686_gnu" 1194 | version = "0.52.5" 1195 | source = "registry+https://github.com/rust-lang/crates.io-index" 1196 | checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" 1197 | 1198 | [[package]] 1199 | name = "windows_i686_gnullvm" 1200 | version = "0.52.5" 1201 | source = "registry+https://github.com/rust-lang/crates.io-index" 1202 | checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" 1203 | 1204 | [[package]] 1205 | name = "windows_i686_msvc" 1206 | version = "0.52.5" 1207 | source = "registry+https://github.com/rust-lang/crates.io-index" 1208 | checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" 1209 | 1210 | [[package]] 1211 | name = "windows_x86_64_gnu" 1212 | version = "0.52.5" 1213 | source = "registry+https://github.com/rust-lang/crates.io-index" 1214 | checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" 1215 | 1216 | [[package]] 1217 | name = "windows_x86_64_gnullvm" 1218 | version = "0.52.5" 1219 | source = "registry+https://github.com/rust-lang/crates.io-index" 1220 | checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" 1221 | 1222 | [[package]] 1223 | name = "windows_x86_64_msvc" 1224 | version = "0.52.5" 1225 | source = "registry+https://github.com/rust-lang/crates.io-index" 1226 | checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" 1227 | 1228 | [[package]] 1229 | name = "wyz" 1230 | version = "0.5.1" 1231 | source = "registry+https://github.com/rust-lang/crates.io-index" 1232 | checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" 1233 | dependencies = [ 1234 | "tap", 1235 | ] 1236 | 1237 | [[package]] 1238 | name = "zerocopy" 1239 | version = "0.6.6" 1240 | source = "registry+https://github.com/rust-lang/crates.io-index" 1241 | checksum = "854e949ac82d619ee9a14c66a1b674ac730422372ccb759ce0c39cabcf2bf8e6" 1242 | dependencies = [ 1243 | "byteorder", 1244 | "zerocopy-derive 0.6.6", 1245 | ] 1246 | 1247 | [[package]] 1248 | name = "zerocopy" 1249 | version = "0.7.34" 1250 | source = "registry+https://github.com/rust-lang/crates.io-index" 1251 | checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" 1252 | dependencies = [ 1253 | "zerocopy-derive 0.7.34", 1254 | ] 1255 | 1256 | [[package]] 1257 | name = "zerocopy-derive" 1258 | version = "0.6.6" 1259 | source = "registry+https://github.com/rust-lang/crates.io-index" 1260 | checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91" 1261 | dependencies = [ 1262 | "proc-macro2", 1263 | "quote", 1264 | "syn", 1265 | ] 1266 | 1267 | [[package]] 1268 | name = "zerocopy-derive" 1269 | version = "0.7.34" 1270 | source = "registry+https://github.com/rust-lang/crates.io-index" 1271 | checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" 1272 | dependencies = [ 1273 | "proc-macro2", 1274 | "quote", 1275 | "syn", 1276 | ] 1277 | -------------------------------------------------------------------------------- /rust/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bbqvec" 3 | version = "0.0.10" 4 | edition = "2021" 5 | repository = "https://github.com/barakmich/bbqvec" 6 | authors = ["Barak Michener "] 7 | readme = "../README.md" 8 | license = "Apache-2.0" 9 | keywords = ["vector", "database", "aknn", "search", "nearest-neighbor"] 10 | categories = ["algorithms", "data-structures", "database-implementations"] 11 | description = "Scalable, embeddable, vector storage for approximate K-nearest-neighbors (AKNN)" 12 | 13 | #[workspace] 14 | #members = ["cmd/test-recall"] 15 | 16 | [dependencies] 17 | anyhow = "1.0.86" 18 | argminmax = {version = "0.6.2", default-features = false, features = ["float"]} 19 | bitvec = {version = "1", features = ["serde"]} 20 | bytemuck = {version = "1.16.0", features = ["derive", "extern_crate_alloc"]} 21 | byteorder = "1.5.0" 22 | croaring = "1.1.0" 23 | half = {version = "2.4.1", features = ["std", "zerocopy"]} 24 | memmap2 = "0.9.5" 25 | rand = "0.8.5" 26 | rayon = "1.10.0" 27 | roaring = {version = "0.10.4", features = ["serde"]} 28 | serde = {version = "1.0.203", features = ["derive"]} 29 | serde_json = "1.0.128" 30 | thiserror = "1.0.61" 31 | 32 | [dev-dependencies] 33 | criterion = "0.5.1" 34 | pprof = {version = "0.13.0", features = ["flamegraph", "protobuf-codec", "protobuf", "criterion"]} 35 | 36 | [[bench]] 37 | name = "main_benchmark" 38 | harness = false 39 | 40 | [profile.bench] 41 | debug = true 42 | -------------------------------------------------------------------------------- /rust/benches/main_benchmark.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | mod micro; 3 | use micro::criterion_benchmark_micro; 4 | mod memory_store; 5 | use memory_store::criterion_benchmark_memory_store; 6 | use pprof::criterion::{Output, PProfProfiler}; 7 | 8 | criterion_group! { 9 | name = memory; 10 | config = Criterion::default().with_profiler(PProfProfiler::new(1000, Output::Protobuf)); 11 | targets = criterion_benchmark_memory_store 12 | } 13 | criterion_group!(micro, criterion_benchmark_micro); 14 | criterion_main!(micro, memory); 15 | -------------------------------------------------------------------------------- /rust/benches/memory_store.rs: -------------------------------------------------------------------------------- 1 | use bbqvec::IndexIDIterator; 2 | use criterion::{BenchmarkId, Criterion}; 3 | 4 | pub fn criterion_benchmark_memory_store(c: &mut Criterion) { 5 | let data = bbqvec::create_vector_set(256, 1000000); 6 | println!("Made vecs"); 7 | let mem = bbqvec::MemoryBackend::new(256, 30).unwrap(); 8 | let mut store = bbqvec::VectorStore::new_croaring_bitmap(mem).unwrap(); 9 | println!("Made store"); 10 | store.add_vector_iter(data.enumerate_ids()).unwrap(); 11 | println!("itered"); 12 | println!("built"); 13 | c.bench_with_input(BenchmarkId::new("find_nearest", "store"), &store, |b, s| { 14 | b.iter(|| { 15 | let target = bbqvec::create_random_vector(256); 16 | s.find_nearest(&target, 20, 1000, 4).unwrap(); 17 | }) 18 | }); 19 | } 20 | -------------------------------------------------------------------------------- /rust/benches/micro.rs: -------------------------------------------------------------------------------- 1 | use bbqvec::Bitmap; 2 | use bitvec::prelude::*; 3 | use std::ops::BitOr; 4 | 5 | use criterion::{black_box, Criterion}; 6 | use rand::Rng; 7 | 8 | pub fn criterion_benchmark_micro(c: &mut Criterion) { 9 | c.bench_function("create_random_vector_100", |b| { 10 | b.iter(|| bbqvec::create_random_vector(100)) 11 | }); 12 | c.bench_function("normalize_100", |b| { 13 | let mut vec = bbqvec::create_random_vector(100); 14 | b.iter(|| bbqvec::vector::normalize(&mut vec)); 15 | }); 16 | c.bench_function("dot_product_100", |b| { 17 | let vec = bbqvec::create_random_vector(100); 18 | let normal = bbqvec::create_random_vector(100); 19 | b.iter(|| bbqvec::vector::dot_product(&vec, &normal)); 20 | }); 21 | c.bench_function("roaring", |b| { 22 | let mut x = roaring::RoaringBitmap::new(); 23 | let mut y = roaring::RoaringBitmap::new(); 24 | for _ in 0..20000 { 25 | x.insert(rand::thread_rng().gen_range(0..2000000)); 26 | y.insert(rand::thread_rng().gen_range(0..2000000)); 27 | } 28 | b.iter(|| { 29 | black_box((&x).bitor(&y)); 30 | }); 31 | }); 32 | c.bench_function("bitmap", |b| { 33 | let mut x = BitVec::::new(); 34 | let mut y = BitVec::new(); 35 | for _ in 0..20000 { 36 | x.add(rand::thread_rng().gen_range(0..2000000)); 37 | y.add(rand::thread_rng().gen_range(0..2000000)); 38 | } 39 | b.iter(|| { 40 | black_box({ 41 | let mut z = BitVec::new(); 42 | z = z.bitor(&x); 43 | z = z.bitor(&y); 44 | z 45 | }); 46 | }); 47 | }); 48 | } 49 | -------------------------------------------------------------------------------- /rust/cmd/test-recall/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /rust/cmd/test-recall/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "anyhow" 7 | version = "1.0.86" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" 10 | 11 | [[package]] 12 | name = "argminmax" 13 | version = "0.6.2" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa" 16 | dependencies = [ 17 | "num-traits", 18 | ] 19 | 20 | [[package]] 21 | name = "argv" 22 | version = "0.1.11" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "5ec90225cd9dc43f6b1c9892603293f3767520a3e8440edec1f7d2a47b88c678" 25 | 26 | [[package]] 27 | name = "autocfg" 28 | version = "1.3.0" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" 31 | 32 | [[package]] 33 | name = "bbqvec" 34 | version = "0.1.0" 35 | dependencies = [ 36 | "anyhow", 37 | "argminmax", 38 | "bitvec", 39 | "bytemuck", 40 | "byteorder", 41 | "croaring", 42 | "half", 43 | "memmap2", 44 | "rand", 45 | "rayon", 46 | "roaring", 47 | "serde", 48 | "thiserror", 49 | ] 50 | 51 | [[package]] 52 | name = "bitvec" 53 | version = "1.0.1" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" 56 | dependencies = [ 57 | "funty", 58 | "radium", 59 | "serde", 60 | "tap", 61 | "wyz", 62 | ] 63 | 64 | [[package]] 65 | name = "bytemuck" 66 | version = "1.16.0" 67 | source = "registry+https://github.com/rust-lang/crates.io-index" 68 | checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" 69 | dependencies = [ 70 | "bytemuck_derive", 71 | ] 72 | 73 | [[package]] 74 | name = "bytemuck_derive" 75 | version = "1.7.0" 76 | source = "registry+https://github.com/rust-lang/crates.io-index" 77 | checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b" 78 | dependencies = [ 79 | "proc-macro2", 80 | "quote", 81 | "syn 2.0.66", 82 | ] 83 | 84 | [[package]] 85 | name = "byteorder" 86 | version = "1.5.0" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 89 | 90 | [[package]] 91 | name = "cc" 92 | version = "1.0.98" 93 | source = "registry+https://github.com/rust-lang/crates.io-index" 94 | checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" 95 | 96 | [[package]] 97 | name = "cfg-if" 98 | version = "1.0.0" 99 | source = "registry+https://github.com/rust-lang/crates.io-index" 100 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 101 | 102 | [[package]] 103 | name = "croaring" 104 | version = "1.1.0" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "611eaefca84c93e431ad82dfb848f6e05a99e25148384f45a3852b0fbe1c8086" 107 | dependencies = [ 108 | "byteorder", 109 | "croaring-sys", 110 | ] 111 | 112 | [[package]] 113 | name = "croaring-sys" 114 | version = "2.0.0" 115 | source = "registry+https://github.com/rust-lang/crates.io-index" 116 | checksum = "ab5260027c04c33d67f405589d9c26e1e991fe062fb165f3094c9836e6c3b17f" 117 | dependencies = [ 118 | "cc", 119 | ] 120 | 121 | [[package]] 122 | name = "crossbeam-deque" 123 | version = "0.8.5" 124 | source = "registry+https://github.com/rust-lang/crates.io-index" 125 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" 126 | dependencies = [ 127 | "crossbeam-epoch", 128 | "crossbeam-utils", 129 | ] 130 | 131 | [[package]] 132 | name = "crossbeam-epoch" 133 | version = "0.9.18" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 136 | dependencies = [ 137 | "crossbeam-utils", 138 | ] 139 | 140 | [[package]] 141 | name = "crossbeam-utils" 142 | version = "0.8.20" 143 | source = "registry+https://github.com/rust-lang/crates.io-index" 144 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" 145 | 146 | [[package]] 147 | name = "crunchy" 148 | version = "0.2.2" 149 | source = "registry+https://github.com/rust-lang/crates.io-index" 150 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" 151 | 152 | [[package]] 153 | name = "either" 154 | version = "1.12.0" 155 | source = "registry+https://github.com/rust-lang/crates.io-index" 156 | checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" 157 | 158 | [[package]] 159 | name = "funty" 160 | version = "2.0.0" 161 | source = "registry+https://github.com/rust-lang/crates.io-index" 162 | checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" 163 | 164 | [[package]] 165 | name = "getrandom" 166 | version = "0.2.15" 167 | source = "registry+https://github.com/rust-lang/crates.io-index" 168 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" 169 | dependencies = [ 170 | "cfg-if", 171 | "libc", 172 | "wasi", 173 | ] 174 | 175 | [[package]] 176 | name = "gflags" 177 | version = "0.3.12" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | checksum = "8331d16d5d69b23e753a00b3aaeee0c33b3bab7b93044afa48d2e6e6c28d621a" 180 | dependencies = [ 181 | "argv", 182 | "gflags-impl", 183 | "inventory", 184 | "ref-cast", 185 | ] 186 | 187 | [[package]] 188 | name = "gflags-impl" 189 | version = "0.3.12" 190 | source = "registry+https://github.com/rust-lang/crates.io-index" 191 | checksum = "1c746dc576d32875419faf6928ea7f74027f4c34aeee5bd9b540fb37b4448561" 192 | dependencies = [ 193 | "proc-macro2", 194 | "quote", 195 | "syn 1.0.109", 196 | ] 197 | 198 | [[package]] 199 | name = "half" 200 | version = "2.4.1" 201 | source = "registry+https://github.com/rust-lang/crates.io-index" 202 | checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" 203 | dependencies = [ 204 | "cfg-if", 205 | "crunchy", 206 | "zerocopy", 207 | ] 208 | 209 | [[package]] 210 | name = "inventory" 211 | version = "0.3.15" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "f958d3d68f4167080a18141e10381e7634563984a537f2a49a30fd8e53ac5767" 214 | 215 | [[package]] 216 | name = "libc" 217 | version = "0.2.155" 218 | source = "registry+https://github.com/rust-lang/crates.io-index" 219 | checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" 220 | 221 | [[package]] 222 | name = "memmap2" 223 | version = "0.9.5" 224 | source = "registry+https://github.com/rust-lang/crates.io-index" 225 | checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" 226 | dependencies = [ 227 | "libc", 228 | ] 229 | 230 | [[package]] 231 | name = "num-traits" 232 | version = "0.2.19" 233 | source = "registry+https://github.com/rust-lang/crates.io-index" 234 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 235 | dependencies = [ 236 | "autocfg", 237 | ] 238 | 239 | [[package]] 240 | name = "ppv-lite86" 241 | version = "0.2.17" 242 | source = "registry+https://github.com/rust-lang/crates.io-index" 243 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 244 | 245 | [[package]] 246 | name = "proc-macro2" 247 | version = "1.0.85" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" 250 | dependencies = [ 251 | "unicode-ident", 252 | ] 253 | 254 | [[package]] 255 | name = "quote" 256 | version = "1.0.36" 257 | source = "registry+https://github.com/rust-lang/crates.io-index" 258 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 259 | dependencies = [ 260 | "proc-macro2", 261 | ] 262 | 263 | [[package]] 264 | name = "radium" 265 | version = "0.7.0" 266 | source = "registry+https://github.com/rust-lang/crates.io-index" 267 | checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" 268 | 269 | [[package]] 270 | name = "rand" 271 | version = "0.8.5" 272 | source = "registry+https://github.com/rust-lang/crates.io-index" 273 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 274 | dependencies = [ 275 | "libc", 276 | "rand_chacha", 277 | "rand_core", 278 | ] 279 | 280 | [[package]] 281 | name = "rand_chacha" 282 | version = "0.3.1" 283 | source = "registry+https://github.com/rust-lang/crates.io-index" 284 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 285 | dependencies = [ 286 | "ppv-lite86", 287 | "rand_core", 288 | ] 289 | 290 | [[package]] 291 | name = "rand_core" 292 | version = "0.6.4" 293 | source = "registry+https://github.com/rust-lang/crates.io-index" 294 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 295 | dependencies = [ 296 | "getrandom", 297 | ] 298 | 299 | [[package]] 300 | name = "rayon" 301 | version = "1.10.0" 302 | source = "registry+https://github.com/rust-lang/crates.io-index" 303 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 304 | dependencies = [ 305 | "either", 306 | "rayon-core", 307 | ] 308 | 309 | [[package]] 310 | name = "rayon-core" 311 | version = "1.12.1" 312 | source = "registry+https://github.com/rust-lang/crates.io-index" 313 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 314 | dependencies = [ 315 | "crossbeam-deque", 316 | "crossbeam-utils", 317 | ] 318 | 319 | [[package]] 320 | name = "ref-cast" 321 | version = "1.0.23" 322 | source = "registry+https://github.com/rust-lang/crates.io-index" 323 | checksum = "ccf0a6f84d5f1d581da8b41b47ec8600871962f2a528115b542b362d4b744931" 324 | dependencies = [ 325 | "ref-cast-impl", 326 | ] 327 | 328 | [[package]] 329 | name = "ref-cast-impl" 330 | version = "1.0.23" 331 | source = "registry+https://github.com/rust-lang/crates.io-index" 332 | checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" 333 | dependencies = [ 334 | "proc-macro2", 335 | "quote", 336 | "syn 2.0.66", 337 | ] 338 | 339 | [[package]] 340 | name = "roaring" 341 | version = "0.10.4" 342 | source = "registry+https://github.com/rust-lang/crates.io-index" 343 | checksum = "b26f4c25a604fcb3a1bcd96dd6ba37c93840de95de8198d94c0d571a74a804d1" 344 | dependencies = [ 345 | "bytemuck", 346 | "byteorder", 347 | "serde", 348 | ] 349 | 350 | [[package]] 351 | name = "serde" 352 | version = "1.0.203" 353 | source = "registry+https://github.com/rust-lang/crates.io-index" 354 | checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" 355 | dependencies = [ 356 | "serde_derive", 357 | ] 358 | 359 | [[package]] 360 | name = "serde_derive" 361 | version = "1.0.203" 362 | source = "registry+https://github.com/rust-lang/crates.io-index" 363 | checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" 364 | dependencies = [ 365 | "proc-macro2", 366 | "quote", 367 | "syn 2.0.66", 368 | ] 369 | 370 | [[package]] 371 | name = "syn" 372 | version = "1.0.109" 373 | source = "registry+https://github.com/rust-lang/crates.io-index" 374 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 375 | dependencies = [ 376 | "proc-macro2", 377 | "quote", 378 | "unicode-ident", 379 | ] 380 | 381 | [[package]] 382 | name = "syn" 383 | version = "2.0.66" 384 | source = "registry+https://github.com/rust-lang/crates.io-index" 385 | checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" 386 | dependencies = [ 387 | "proc-macro2", 388 | "quote", 389 | "unicode-ident", 390 | ] 391 | 392 | [[package]] 393 | name = "tap" 394 | version = "1.0.1" 395 | source = "registry+https://github.com/rust-lang/crates.io-index" 396 | checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" 397 | 398 | [[package]] 399 | name = "test-recall" 400 | version = "0.1.0" 401 | dependencies = [ 402 | "anyhow", 403 | "bbqvec", 404 | "gflags", 405 | ] 406 | 407 | [[package]] 408 | name = "thiserror" 409 | version = "1.0.61" 410 | source = "registry+https://github.com/rust-lang/crates.io-index" 411 | checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" 412 | dependencies = [ 413 | "thiserror-impl", 414 | ] 415 | 416 | [[package]] 417 | name = "thiserror-impl" 418 | version = "1.0.61" 419 | source = "registry+https://github.com/rust-lang/crates.io-index" 420 | checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" 421 | dependencies = [ 422 | "proc-macro2", 423 | "quote", 424 | "syn 2.0.66", 425 | ] 426 | 427 | [[package]] 428 | name = "unicode-ident" 429 | version = "1.0.12" 430 | source = "registry+https://github.com/rust-lang/crates.io-index" 431 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 432 | 433 | [[package]] 434 | name = "wasi" 435 | version = "0.11.0+wasi-snapshot-preview1" 436 | source = "registry+https://github.com/rust-lang/crates.io-index" 437 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 438 | 439 | [[package]] 440 | name = "wyz" 441 | version = "0.5.1" 442 | source = "registry+https://github.com/rust-lang/crates.io-index" 443 | checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" 444 | dependencies = [ 445 | "tap", 446 | ] 447 | 448 | [[package]] 449 | name = "zerocopy" 450 | version = "0.6.6" 451 | source = "registry+https://github.com/rust-lang/crates.io-index" 452 | checksum = "854e949ac82d619ee9a14c66a1b674ac730422372ccb759ce0c39cabcf2bf8e6" 453 | dependencies = [ 454 | "byteorder", 455 | "zerocopy-derive", 456 | ] 457 | 458 | [[package]] 459 | name = "zerocopy-derive" 460 | version = "0.6.6" 461 | source = "registry+https://github.com/rust-lang/crates.io-index" 462 | checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91" 463 | dependencies = [ 464 | "proc-macro2", 465 | "quote", 466 | "syn 2.0.66", 467 | ] 468 | -------------------------------------------------------------------------------- /rust/cmd/test-recall/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "test-recall" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | gflags = "0.3.12" 10 | bbqvec = {path = "../../"} 11 | anyhow = "1.0.86" 12 | -------------------------------------------------------------------------------- /rust/cmd/test-recall/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::time::{Duration, Instant}; 2 | 3 | use anyhow::Result; 4 | use bbqvec::{ 5 | backend::VectorBackend, Bitmap, IndexIDIterator, MemoryBackend, ResultSet, Vector, VectorStore, 6 | }; 7 | 8 | gflags::define! { 9 | -v, --vectors: usize = 100000 10 | } 11 | 12 | gflags::define! { 13 | -q, --queries: usize = 1000 14 | } 15 | 16 | gflags::define! { 17 | -d, --dimensions: usize = 256 18 | } 19 | 20 | gflags::define! { 21 | -b, --bases: usize = 30 22 | } 23 | 24 | gflags::define! { 25 | -k, --search-k: usize = 1000 26 | } 27 | 28 | gflags::define! { 29 | -s, --spill: usize = 16 30 | } 31 | 32 | enum Mode { 33 | SingleRun, 34 | Matrix, 35 | } 36 | 37 | fn main() -> Result<()> { 38 | let args = gflags::parse(); 39 | let mode = if args.is_empty() { 40 | Mode::SingleRun 41 | } else { 42 | match args[0] { 43 | "run" => Mode::SingleRun, 44 | "matrix" => Mode::Matrix, 45 | _ => Mode::SingleRun, 46 | } 47 | }; 48 | match mode { 49 | Mode::SingleRun => single_run_main(), 50 | Mode::Matrix => matrix_main(), 51 | } 52 | } 53 | 54 | fn single_run_main() -> Result<()> { 55 | let store = make_store()?; 56 | let tests = bbqvec::create_vector_set(DIMENSIONS.flag, QUERIES.flag); 57 | let mut fts_results = Vec::with_capacity(tests.len()); 58 | for t in tests.iter() { 59 | fts_results.push(store.full_table_scan(t, 20)?); 60 | } 61 | let (results, took) = run_test(&tests, &store, SEARCH_K.flag, SPILL.flag)?; 62 | print_result_line(&fts_results, &results, SEARCH_K.flag, SPILL.flag, took)?; 63 | Ok(()) 64 | } 65 | 66 | fn matrix_main() -> Result<()> { 67 | let store = make_store()?; 68 | let tests = bbqvec::create_vector_set(DIMENSIONS.flag, QUERIES.flag); 69 | let mut fts_results = Vec::with_capacity(tests.len()); 70 | for t in tests.iter() { 71 | fts_results.push(store.full_table_scan(t, 20)?); 72 | } 73 | for spill in [1, 4, 8, 16] { 74 | for searchk in [100, 500, 1000, 2000, 5000, 10000, 20000] { 75 | if DIMENSIONS.flag < spill { 76 | continue; 77 | } 78 | let (results, took) = run_test(&tests, &store, searchk, spill)?; 79 | print_result_line(&fts_results, &results, searchk, spill, took)?; 80 | } 81 | } 82 | Ok(()) 83 | } 84 | 85 | fn make_store() -> Result> { 86 | let data = bbqvec::create_vector_set(DIMENSIONS.flag, VECTORS.flag); 87 | println!("Made vectors"); 88 | let mem = bbqvec::MemoryBackend::new(DIMENSIONS.flag, BASES.flag)?; 89 | let mut store = bbqvec::VectorStore::new(mem)?; 90 | store.add_vector_iter(data.enumerate_ids())?; 91 | println!("Added vectors"); 92 | Ok(store) 93 | } 94 | 95 | fn run_test( 96 | tests: &Vec, 97 | store: &bbqvec::VectorStore, 98 | search_k: usize, 99 | spill: usize, 100 | ) -> Result<(Vec, Duration)> { 101 | let mut out = Vec::with_capacity(tests.len()); 102 | let start = Instant::now(); 103 | for v in tests { 104 | let res = store.find_nearest(v, 20, search_k, spill)?; 105 | out.push(res); 106 | } 107 | let took = Instant::now().duration_since(start); 108 | Ok((out, took)) 109 | } 110 | 111 | fn print_result_line( 112 | fts: &[ResultSet], 113 | real: &[ResultSet], 114 | search_k: usize, 115 | spill: usize, 116 | took: Duration, 117 | ) -> Result<()> { 118 | let mut acc = [0.0; 4]; 119 | let mut checked = 0; 120 | for (f, r) in fts.iter().zip(real.iter()) { 121 | acc[0] += f.compute_recall(r, 1); 122 | acc[1] += f.compute_recall(r, 5); 123 | acc[2] += f.compute_recall(r, 10); 124 | acc[3] += f.compute_recall(r, 20); 125 | checked += r.checked; 126 | } 127 | acc.iter_mut().for_each(|v| *v *= 100.0 / fts.len() as f64); 128 | let per = took.as_millis() as f64 / real.len() as f64; 129 | let avg_check = checked / real.len(); 130 | println!( 131 | "searchk {:<6} / spill {:<4} ({:8.4}ms, {:10} checked) {:5.2}@1 {:5.2}@5 {:5.2}@10 {:5.2}@20", 132 | search_k, spill, per, avg_check, acc[0], acc[1], acc[2], acc[3] 133 | ); 134 | Ok(()) 135 | } 136 | -------------------------------------------------------------------------------- /rust/src/backend.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | 3 | use crate::{Basis, Bitmap, ResultSet, Vector, ID}; 4 | 5 | pub struct BackendInfo { 6 | pub quantization: String, 7 | pub has_index_data: bool, 8 | pub dimensions: usize, 9 | pub n_basis: usize, 10 | pub vector_count: usize, 11 | } 12 | 13 | pub trait VectorBackend { 14 | fn put_vector(&mut self, id: ID, v: &Vector) -> Result<()>; 15 | fn compute_similarity(&self, target: &Vector, target_id: ID) -> Result; 16 | fn info(&self) -> BackendInfo; 17 | fn iter_vector_ids(&self) -> impl Iterator; 18 | fn vector_exists(&self, id: ID) -> bool; 19 | fn close(self) -> Result<()>; 20 | 21 | fn find_nearest(&self, target: &Vector, k: usize) -> Result { 22 | let mut set = ResultSet::new(k); 23 | for id in self.iter_vector_ids() { 24 | let sim = self.compute_similarity(target, id)?; 25 | set.add_result(id, sim); 26 | } 27 | Ok(set) 28 | } 29 | 30 | fn load_bases(&self) -> Result>>; 31 | fn load_bitmap(&mut self, basis: usize, index: i32) -> Result>; 32 | 33 | fn save_bases(&mut self, _bases: &[Basis]) -> Result<()> { 34 | Ok(()) 35 | } 36 | 37 | fn save_bitmap(&mut self, _basis: usize, _index: usize, _bitmap: &impl Bitmap) -> Result<()> { 38 | Ok(()) 39 | } 40 | 41 | fn sync(&self) -> Result<()> { 42 | Ok(()) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /rust/src/backend_disk.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use std::{collections::HashMap, path::PathBuf}; 3 | 4 | use serde::{Deserialize, Serialize}; 5 | 6 | use crate::{ 7 | quantization::Quantization, vector_file::VectorFile, Basis, Bitmap, Vector, VectorBackend, ID, 8 | }; 9 | 10 | #[derive(Default)] 11 | pub struct DiskBackend { 12 | dir: PathBuf, 13 | metadata: DiskMetadata, 14 | vector_files: HashMap>, 15 | token: u64, 16 | } 17 | 18 | #[derive(Serialize, Deserialize, Default)] 19 | pub(crate) struct DiskMetadata { 20 | pub dimensions: usize, 21 | pub quantization: String, 22 | pub vecs_per_file: usize, 23 | pub vec_files: Vec, 24 | } 25 | 26 | const DEFAULT_VECS_PER_FILE: usize = 200_000; 27 | 28 | impl DiskBackend { 29 | pub fn open(path: PathBuf, dimensions: usize) -> Result { 30 | let mut token: u64 = rand::random(); 31 | if token == 0 { 32 | token = 1; 33 | } 34 | let mut s = Self { 35 | dir: path, 36 | metadata: DiskMetadata { 37 | dimensions, 38 | quantization: Q::name().into(), 39 | vecs_per_file: DEFAULT_VECS_PER_FILE, 40 | vec_files: Vec::new(), 41 | }, 42 | token, 43 | ..Default::default() 44 | }; 45 | s.open_files()?; 46 | Ok(s) 47 | } 48 | 49 | fn open_files(&mut self) -> Result<()> { 50 | let metadata_path = self.dir.join("metadata.json"); 51 | if !metadata_path.exists() { 52 | return self.create_new(); 53 | } 54 | let metadata_contents = std::fs::read_to_string(&metadata_path)?; 55 | let metadata: DiskMetadata = serde_json::from_str(&metadata_contents)?; 56 | self.metadata = metadata; 57 | for vf in self.metadata.vec_files.iter() { 58 | let vector_file = VectorFile::::create_or_open( 59 | self.make_pagefile_path(vf), 60 | self.metadata.dimensions, 61 | self.metadata.vecs_per_file, 62 | )?; 63 | self.vector_files.insert(*vf, vector_file); 64 | } 65 | Ok(()) 66 | } 67 | 68 | fn create_new(&mut self) -> Result<()> { 69 | std::fs::create_dir_all(self.dir.clone())?; 70 | self.save_metadata() 71 | } 72 | 73 | fn save_metadata(&self) -> Result<()> { 74 | let metadata_path = self.dir.join("metadata.json"); 75 | Ok(serde_json::to_writer( 76 | &std::fs::File::create(metadata_path)?, 77 | &self.metadata, 78 | )?) 79 | } 80 | 81 | fn make_pagefile_path(&self, key: &usize) -> PathBuf { 82 | self.dir.join(format!("{:x}.vec", key)) 83 | } 84 | } 85 | 86 | impl VectorBackend for DiskBackend { 87 | fn put_vector(&mut self, id: ID, v: &Vector) -> Result<()> { 88 | todo!() 89 | } 90 | 91 | fn compute_similarity(&self, target: &Vector, target_id: ID) -> Result { 92 | todo!() 93 | } 94 | 95 | fn info(&self) -> crate::backend::BackendInfo { 96 | todo!() 97 | } 98 | 99 | fn iter_vector_ids(&self) -> impl Iterator { 100 | 0..(self.metadata.vecs_per_file * self.metadata.vec_files.len()) as ID 101 | } 102 | 103 | fn vector_exists(&self, id: ID) -> bool { 104 | todo!() 105 | } 106 | 107 | fn close(self) -> Result<()> { 108 | todo!() 109 | } 110 | 111 | fn load_bases(&self) -> Result>> { 112 | todo!() 113 | } 114 | 115 | fn load_bitmap(&mut self, basis: usize, index: i32) -> Result> { 116 | todo!() 117 | } 118 | 119 | fn save_bases(&mut self, _bases: &[Basis]) -> Result<()> { 120 | Ok(()) 121 | } 122 | 123 | fn save_bitmap(&mut self, _basis: usize, _index: usize, _bitmap: &impl Bitmap) -> Result<()> { 124 | Ok(()) 125 | } 126 | 127 | fn sync(&self) -> Result<()> { 128 | for v in self.vector_files.values() { 129 | v.flush()? 130 | } 131 | self.save_metadata() 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /rust/src/backend_memory.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | cmp::min, 3 | sync::{Arc, Mutex}, 4 | }; 5 | 6 | use anyhow::{anyhow, Result}; 7 | use rand::RngCore; 8 | 9 | use crate::{ 10 | backend::{BackendInfo, VectorBackend}, 11 | quantization::Quantization, 12 | Vector, ID, 13 | }; 14 | 15 | pub struct QuantizedMemoryBackend { 16 | vecs: Vec>, 17 | dimensions: usize, 18 | n_basis: usize, 19 | rng: Option>>>, 20 | } 21 | 22 | pub type MemoryBackend = QuantizedMemoryBackend; 23 | 24 | impl QuantizedMemoryBackend { 25 | pub fn new(dimensions: usize, n_basis: usize) -> Result { 26 | Ok(Self { 27 | vecs: Vec::new(), 28 | dimensions, 29 | n_basis, 30 | rng: None, 31 | }) 32 | } 33 | 34 | pub fn set_rng(&mut self, rng: Box) { 35 | self.rng = Some(Arc::new(Mutex::new(rng))); 36 | } 37 | } 38 | 39 | impl VectorBackend for QuantizedMemoryBackend { 40 | fn put_vector(&mut self, id: crate::ID, v: &Vector) -> Result<()> { 41 | if v.len() != self.dimensions { 42 | return Err(anyhow!("dimensions don't match")); 43 | } 44 | let uid = id as usize; 45 | if self.vecs.len() <= uid { 46 | if self.vecs.capacity() == uid { 47 | self.vecs.reserve(min(self.vecs.capacity(), 1024 * 1024)) 48 | } 49 | self.vecs.resize(uid + 1, None); 50 | } 51 | let mut insert = v.clone(); 52 | crate::vector::normalize(&mut insert); 53 | let l = Q::lower(insert)?; 54 | self.vecs[uid] = Some(l); 55 | Ok(()) 56 | } 57 | 58 | fn compute_similarity(&self, target: &Vector, target_id: crate::ID) -> Result { 59 | // Make sure it's normalized! 60 | let v = self.vecs[target_id as usize] 61 | .as_ref() 62 | .ok_or(anyhow!("No vector present"))?; 63 | Q::compare(target, v) 64 | } 65 | 66 | fn info(&self) -> crate::backend::BackendInfo { 67 | BackendInfo { 68 | has_index_data: false, 69 | dimensions: self.dimensions, 70 | n_basis: self.n_basis, 71 | vector_count: self.vecs.len(), 72 | quantization: Q::name().into(), 73 | } 74 | } 75 | 76 | fn iter_vector_ids(&self) -> impl Iterator { 77 | self.vecs 78 | .iter() 79 | .enumerate() 80 | .filter(|(_, v)| v.is_some()) 81 | .map(|(k, _)| k as ID) 82 | } 83 | 84 | fn vector_exists(&self, id: ID) -> bool { 85 | let v = self.vecs.get(id as usize); 86 | match v { 87 | Some(x) => x.is_some(), 88 | None => false, 89 | } 90 | } 91 | 92 | fn close(self) -> Result<()> { 93 | todo!() 94 | } 95 | 96 | fn load_bases(&self) -> Result>> { 97 | Ok(None) 98 | } 99 | 100 | fn load_bitmap(&mut self, _basis: usize, _index: i32) -> Result> { 101 | Ok(None) 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /rust/src/bitmaps.rs: -------------------------------------------------------------------------------- 1 | use crate::ID; 2 | use std::ops::{BitOrAssign, BitXorAssign, SubAssign}; 3 | 4 | pub use bitvec::prelude::BitVec; 5 | pub use croaring::Bitmap as CRoaringBitmap; 6 | pub use roaring::RoaringBitmap; 7 | 8 | pub trait Bitmap: std::fmt::Debug + Default + Clone + Send { 9 | fn new() -> Self; 10 | fn count(&self) -> usize; 11 | fn is_empty(&self) -> bool; 12 | fn add(&mut self, id: ID); 13 | fn iter_elems(&self) -> impl Iterator; 14 | fn and_not(&mut self, rhs: &Self); 15 | fn or(&mut self, rhs: &Self); 16 | fn xor(&mut self, rhs: &Self); 17 | fn estimate_size(&self) -> usize; 18 | } 19 | 20 | impl Bitmap for roaring::RoaringBitmap { 21 | fn new() -> Self { 22 | roaring::RoaringBitmap::new() 23 | } 24 | 25 | fn is_empty(&self) -> bool { 26 | self.is_empty() 27 | } 28 | 29 | fn count(&self) -> usize { 30 | self.len() as usize 31 | } 32 | 33 | fn add(&mut self, id: ID) { 34 | self.insert(id as u32); 35 | } 36 | 37 | fn iter_elems(&self) -> impl Iterator { 38 | self.iter().map(|x| x as ID) 39 | } 40 | fn and_not(&mut self, rhs: &Self) { 41 | self.sub_assign(rhs) 42 | } 43 | fn or(&mut self, rhs: &Self) { 44 | self.bitor_assign(rhs) 45 | } 46 | fn xor(&mut self, rhs: &Self) { 47 | self.bitxor_assign(rhs) 48 | } 49 | fn estimate_size(&self) -> usize { 50 | self.serialized_size() 51 | } 52 | } 53 | 54 | impl Bitmap for bitvec::prelude::BitVec { 55 | fn new() -> Self { 56 | bitvec::prelude::BitVec::new() 57 | } 58 | 59 | fn count(&self) -> usize { 60 | self.count_ones() 61 | } 62 | 63 | fn is_empty(&self) -> bool { 64 | self.is_empty() 65 | } 66 | 67 | fn add(&mut self, id: ID) { 68 | if self.len() <= id as usize { 69 | self.resize((id + 1) as usize, false) 70 | } 71 | self.set(id as usize, true) 72 | } 73 | 74 | fn iter_elems(&self) -> impl Iterator { 75 | self.iter_ones().map(|x| x as ID) 76 | } 77 | 78 | #[inline] 79 | fn and_not(&mut self, rhs: &Self) { 80 | for elem in self.as_raw_mut_slice().iter_mut().zip(rhs.as_raw_slice()) { 81 | *elem.0 &= !elem.1 82 | } 83 | } 84 | 85 | #[inline] 86 | fn or(&mut self, rhs: &Self) { 87 | if self.len() < rhs.len() { 88 | self.resize(rhs.len(), false) 89 | } 90 | self.bitor_assign(rhs) 91 | } 92 | 93 | #[inline] 94 | fn xor(&mut self, rhs: &Self) { 95 | if self.len() < rhs.len() { 96 | self.resize(rhs.len(), false) 97 | } 98 | self.bitxor_assign(rhs) 99 | } 100 | 101 | fn estimate_size(&self) -> usize { 102 | std::mem::size_of_val(self.as_raw_slice()) 103 | } 104 | } 105 | 106 | impl Bitmap for croaring::Bitmap { 107 | fn new() -> Self { 108 | croaring::Bitmap::new() 109 | } 110 | 111 | fn is_empty(&self) -> bool { 112 | self.is_empty() 113 | } 114 | 115 | fn count(&self) -> usize { 116 | self.cardinality() as usize 117 | } 118 | 119 | fn add(&mut self, id: ID) { 120 | self.add(id as u32) 121 | } 122 | 123 | fn iter_elems(&self) -> impl Iterator { 124 | self.iter().map(|x| x as ID) 125 | } 126 | 127 | fn and_not(&mut self, rhs: &Self) { 128 | self.andnot_inplace(rhs) 129 | } 130 | 131 | fn or(&mut self, rhs: &Self) { 132 | self.or_inplace(rhs) 133 | } 134 | 135 | fn xor(&mut self, rhs: &Self) { 136 | self.xor_inplace(rhs) 137 | } 138 | 139 | fn estimate_size(&self) -> usize { 140 | self.get_serialized_size_in_bytes::() 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /rust/src/counting_bitmap.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | use crate::Bitmap; 4 | 5 | #[derive(Default)] 6 | pub struct CountingBitmap { 7 | bitmaps: Vec, 8 | } 9 | 10 | impl Display for CountingBitmap { 11 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 12 | f.write_fmt(format_args!("{:?}", self.cardinalities())) 13 | } 14 | } 15 | 16 | impl CountingBitmap { 17 | pub fn new(size: usize) -> Self { 18 | Self { 19 | bitmaps: vec![B::new(); size], 20 | } 21 | } 22 | 23 | pub fn or(&mut self, rhs: B) { 24 | let mut cur = rhs; 25 | for i in 0..self.bitmaps.len() { 26 | self.bitmaps[i].xor(&cur); 27 | cur.and_not(&self.bitmaps[i]); 28 | self.bitmaps[i].or(&cur); 29 | if cur.is_empty() { 30 | break; 31 | } 32 | } 33 | } 34 | 35 | pub fn cardinalities(&self) -> Vec { 36 | self.bitmaps.iter().map(|b| b.count()).collect::>() 37 | } 38 | 39 | pub fn top_k(&self, search_k: usize) -> Option<&B> { 40 | self.bitmaps.iter().rev().find(|x| x.count() >= search_k) 41 | } 42 | } 43 | 44 | #[cfg(test)] 45 | mod test { 46 | use bitvec::prelude::*; 47 | 48 | use super::*; 49 | 50 | #[test] 51 | fn finds_count() { 52 | let mut cbm = CountingBitmap::::new(3); 53 | let bm_a = bitvec![usize, Lsb0; 0, 0, 1]; 54 | let bm_b = bitvec![usize, Lsb0; 0, 1, 1]; 55 | let bm_c = bitvec![usize, Lsb0; 1, 1, 1]; 56 | cbm.or(bm_a); 57 | cbm.or(bm_b); 58 | cbm.or(bm_c); 59 | let v: Vec = cbm.top_k(1).unwrap().iter_elems().collect(); 60 | assert_eq!(v.len(), 1); 61 | assert_eq!(v[0], 2); 62 | assert_eq!(cbm.top_k(10), None); 63 | assert_eq!(cbm.top_k(1).unwrap().count(), 1); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /rust/src/helpers.rs: -------------------------------------------------------------------------------- 1 | use rand::Rng; 2 | 3 | use crate::{Vector, ID}; 4 | 5 | pub fn create_random_vector(dimensions: usize) -> Vector { 6 | let mut out = Vec::new(); 7 | for _ in 0..dimensions { 8 | out.push(rand::thread_rng().gen_range(-1.0..1.0)) 9 | } 10 | crate::vector::normalize(&mut out); 11 | out 12 | } 13 | 14 | pub fn create_vector_set(dimensions: usize, count: usize) -> Vec { 15 | std::iter::repeat_with(|| create_random_vector(dimensions)) 16 | .take(count) 17 | .collect() 18 | } 19 | 20 | pub trait IndexIDIterator { 21 | fn enumerate_ids(&self) -> impl Iterator; 22 | } 23 | 24 | impl IndexIDIterator for Vec { 25 | fn enumerate_ids(&self) -> impl Iterator { 26 | self.iter().enumerate().map(|(i, v)| (i as ID, v)) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /rust/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod counting_bitmap; 2 | pub(crate) mod quantization; 3 | pub use quantization::BF16Quantization; 4 | pub use quantization::NoQuantization; 5 | 6 | pub mod result; 7 | pub use result::ResultSet; 8 | 9 | pub(crate) mod spaces; 10 | pub(crate) mod unaligned_f32; 11 | 12 | pub mod backend; 13 | pub use backend::VectorBackend; 14 | 15 | pub(crate) mod backend_memory; 16 | pub use backend_memory::MemoryBackend; 17 | pub use backend_memory::QuantizedMemoryBackend; 18 | 19 | //pub(crate) mod vector_file; 20 | //pub(crate) mod backend_disk; 21 | //pub use backend_disk::DiskBackend; 22 | 23 | pub mod vector; 24 | 25 | pub(crate) mod vector_store; 26 | pub use vector_store::VectorStore; 27 | 28 | mod helpers; 29 | pub use helpers::*; 30 | 31 | pub type Vector = Vec; 32 | pub type ID = u64; 33 | pub type Basis = Vec; 34 | 35 | pub mod bitmaps; 36 | pub use bitmaps::*; 37 | -------------------------------------------------------------------------------- /rust/src/quantization.rs: -------------------------------------------------------------------------------- 1 | use crate::{vector::distance, Vector}; 2 | use anyhow::Result; 3 | use half::{bf16, vec::HalfFloatVecExt}; 4 | 5 | pub trait Quantization: Default { 6 | type Lower: Clone; 7 | fn similarity(x: &Self::Lower, y: &Self::Lower) -> Result; 8 | fn compare(x: &Vector, y: &Self::Lower) -> Result; 9 | fn lower(vec: Vector) -> Result; 10 | fn vector_size(dimensions: usize) -> usize; 11 | fn marshal(v: &Self::Lower, array: &mut [u8]) -> Result<()>; 12 | fn unmarshal(array: &[u8]) -> Result; 13 | fn name() -> &'static str; 14 | } 15 | 16 | #[derive(Default)] 17 | pub struct NoQuantization {} 18 | 19 | impl Quantization for NoQuantization { 20 | type Lower = Vector; 21 | 22 | fn similarity(x: &Self::Lower, y: &Self::Lower) -> Result { 23 | Ok(distance(x, y)) 24 | } 25 | 26 | fn compare(x: &Vector, y: &Self::Lower) -> Result { 27 | Ok(distance(x, y)) 28 | } 29 | 30 | fn lower(vec: Vector) -> Result { 31 | Ok(vec) 32 | } 33 | 34 | fn name() -> &'static str { 35 | "none" 36 | } 37 | 38 | fn vector_size(dimensions: usize) -> usize { 39 | 4 * dimensions 40 | } 41 | 42 | fn marshal(v: &Self::Lower, array: &mut [u8]) -> Result<()> { 43 | for (i, f) in v.iter().enumerate() { 44 | let bytes = f.to_le_bytes(); 45 | let _ = &array[i * 4..i * 4 + 4].copy_from_slice(&bytes); 46 | } 47 | Ok(()) 48 | } 49 | 50 | fn unmarshal(array: &[u8]) -> Result { 51 | let mut vec = Vec::new(); 52 | for i in (0..array.len()).step_by(4) { 53 | let bytes = &array[i..i + 4]; 54 | let f: f32 = f32::from_le_bytes(bytes.try_into().unwrap()); 55 | vec.push(f); 56 | } 57 | Ok(vec) 58 | } 59 | } 60 | 61 | #[derive(Default)] 62 | pub struct BF16Quantization {} 63 | 64 | impl Quantization for BF16Quantization { 65 | type Lower = Vec; 66 | 67 | fn similarity(x: &Self::Lower, y: &Self::Lower) -> Result { 68 | let fx = x.iter().map(|v| v.to_f32()).collect(); 69 | let fy = y.iter().map(|v| v.to_f32()).collect(); 70 | Ok(distance(&fx, &fy)) 71 | } 72 | 73 | fn compare(x: &Vector, y: &Self::Lower) -> Result { 74 | let fy = y.iter().map(|v| v.to_f32()).collect(); 75 | Ok(distance(x, &fy)) 76 | } 77 | 78 | fn lower(vec: Vector) -> Result { 79 | Ok(Vec::from_f32_slice(vec.as_slice())) 80 | } 81 | 82 | fn name() -> &'static str { 83 | "bf16" 84 | } 85 | 86 | fn vector_size(dimensions: usize) -> usize { 87 | 2 * dimensions 88 | } 89 | 90 | fn marshal(v: &Self::Lower, array: &mut [u8]) -> Result<()> { 91 | for (i, f) in v.iter().enumerate() { 92 | let bytes = f.to_le_bytes(); 93 | let _ = &array[i * 2..i * 2 + 2].copy_from_slice(&bytes); 94 | } 95 | Ok(()) 96 | } 97 | 98 | fn unmarshal(array: &[u8]) -> Result { 99 | let mut vec = Vec::new(); 100 | for i in (0..array.len()).step_by(2) { 101 | let bytes = &array[i..i + 2]; 102 | let f: bf16 = bf16::from_le_bytes(bytes.try_into().unwrap()); 103 | vec.push(f); 104 | } 105 | Ok(vec) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /rust/src/result.rs: -------------------------------------------------------------------------------- 1 | use crate::ID; 2 | 3 | #[derive(Debug)] 4 | pub struct SearchResult { 5 | pub similarity: f32, 6 | pub id: ID, 7 | } 8 | 9 | #[derive(Debug)] 10 | pub struct ResultSet { 11 | sims: Vec, 12 | ids: Vec, 13 | k: usize, 14 | pub checked: usize, 15 | } 16 | 17 | impl ResultSet { 18 | pub fn new(k: usize) -> Self { 19 | Self { 20 | sims: Vec::with_capacity(k), 21 | ids: Vec::with_capacity(k), 22 | k, 23 | checked: 0, 24 | } 25 | } 26 | 27 | pub fn len(&self) -> usize { 28 | self.sims.len() 29 | } 30 | 31 | pub fn is_empty(&self) -> bool { 32 | self.sims.is_empty() 33 | } 34 | 35 | pub fn compute_recall(&self, baseline: &ResultSet, at: usize) -> f64 { 36 | let mut found = 0; 37 | for x in baseline.ids.iter().take(at) { 38 | for y in self.ids.iter().take(at) { 39 | if x == y { 40 | found += 1; 41 | } 42 | } 43 | } 44 | found as f64 / at as f64 45 | } 46 | 47 | pub fn add_result(&mut self, id: ID, similarity: f32) { 48 | self.checked += 1; 49 | if self.sims.len() == self.k { 50 | let last = self.sims.last().unwrap_or(&f32::MIN); 51 | if *last > similarity { 52 | return; 53 | } 54 | } 55 | let mut insert: usize = 0; 56 | let mut found: bool = false; 57 | for (self_id, self_sim) in self.ids.iter().zip(self.sims.iter()) { 58 | if id == *self_id { 59 | // Found ourselves 60 | return; 61 | } 62 | if *self_sim < similarity { 63 | found = true; 64 | break; 65 | } 66 | insert += 1; 67 | } 68 | if !found { 69 | // Last chance -- we're not big enough yet, so you can join the club. 70 | if self.sims.len() < self.k { 71 | self.sims.push(similarity); 72 | self.ids.push(id); 73 | } 74 | return; 75 | } 76 | self.ids.insert(insert, id); 77 | self.ids.truncate(self.k); 78 | self.sims.insert(insert, similarity); 79 | self.sims.truncate(self.k); 80 | } 81 | 82 | pub fn iter_results(&self) -> impl Iterator + '_ { 83 | self.sims 84 | .iter() 85 | .zip(self.ids.iter()) 86 | .map(|(sim, id)| SearchResult { 87 | similarity: *sim, 88 | id: *id, 89 | }) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /rust/src/spaces/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023-2024 Meilisearch 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /rust/src/spaces/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod simple; 2 | 3 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 4 | mod simple_sse; 5 | 6 | #[cfg(target_arch = "x86_64")] 7 | mod simple_avx; 8 | 9 | #[cfg(target_arch = "aarch64")] 10 | mod simple_neon; 11 | -------------------------------------------------------------------------------- /rust/src/spaces/simple.rs: -------------------------------------------------------------------------------- 1 | #[cfg(target_arch = "x86_64")] 2 | use super::simple_avx::*; 3 | #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] 4 | use super::simple_neon::*; 5 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 6 | use super::simple_sse::*; 7 | use crate::unaligned_f32::UnalignedF32Slice; 8 | 9 | #[cfg(target_arch = "x86_64")] 10 | const MIN_DIM_SIZE_AVX: usize = 32; 11 | 12 | #[cfg(any( 13 | target_arch = "x86", 14 | target_arch = "x86_64", 15 | all(target_arch = "aarch64", target_feature = "neon") 16 | ))] 17 | const MIN_DIM_SIZE_SIMD: usize = 16; 18 | 19 | #[allow(dead_code)] 20 | pub fn euclidean_distance(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { 21 | #[cfg(target_arch = "x86_64")] 22 | { 23 | if is_x86_feature_detected!("avx") 24 | && is_x86_feature_detected!("fma") 25 | && u.len() >= MIN_DIM_SIZE_AVX 26 | { 27 | return unsafe { euclid_similarity_avx(u, v) }; 28 | } 29 | } 30 | 31 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 32 | { 33 | if is_x86_feature_detected!("sse") && u.len() >= MIN_DIM_SIZE_SIMD { 34 | return unsafe { euclid_similarity_sse(u, v) }; 35 | } 36 | } 37 | 38 | #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] 39 | { 40 | if std::arch::is_aarch64_feature_detected!("neon") && u.len() >= MIN_DIM_SIZE_SIMD { 41 | return unsafe { euclid_similarity_neon(u, v) }; 42 | } 43 | } 44 | 45 | euclidean_distance_non_optimized(u, v) 46 | } 47 | 48 | // Don't use dot-product: avoid catastrophic cancellation in 49 | // https://github.com/spotify/annoy/issues/314. 50 | pub fn euclidean_distance_non_optimized(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { 51 | u.iter().zip(v.iter()).map(|(u, v)| (u - v) * (u - v)).sum() 52 | } 53 | 54 | pub fn dot_product(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { 55 | #[cfg(target_arch = "x86_64")] 56 | { 57 | if is_x86_feature_detected!("avx") 58 | && is_x86_feature_detected!("fma") 59 | && u.len() >= MIN_DIM_SIZE_AVX 60 | { 61 | return unsafe { dot_similarity_avx(u, v) }; 62 | } 63 | } 64 | 65 | #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] 66 | { 67 | if is_x86_feature_detected!("sse") && u.len() >= MIN_DIM_SIZE_SIMD { 68 | return unsafe { dot_similarity_sse(u, v) }; 69 | } 70 | } 71 | 72 | #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] 73 | { 74 | if std::arch::is_aarch64_feature_detected!("neon") && u.len() >= MIN_DIM_SIZE_SIMD { 75 | return unsafe { dot_similarity_neon(u, v) }; 76 | } 77 | } 78 | 79 | dot_product_non_optimized(u, v) 80 | } 81 | 82 | pub fn dot_product_non_optimized(u: &UnalignedF32Slice, v: &UnalignedF32Slice) -> f32 { 83 | u.iter().zip(v.iter()).map(|(a, b)| a * b).sum() 84 | } 85 | -------------------------------------------------------------------------------- /rust/src/spaces/simple_avx.rs: -------------------------------------------------------------------------------- 1 | use std::arch::x86_64::*; 2 | use std::ptr::read_unaligned; 3 | 4 | use crate::unaligned_f32::UnalignedF32Slice; 5 | 6 | #[target_feature(enable = "avx")] 7 | #[target_feature(enable = "fma")] 8 | unsafe fn hsum256_ps_avx(x: __m256) -> f32 { 9 | let x128: __m128 = _mm_add_ps(_mm256_extractf128_ps(x, 1), _mm256_castps256_ps128(x)); 10 | let x64: __m128 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128)); 11 | let x32: __m128 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); 12 | _mm_cvtss_f32(x32) 13 | } 14 | 15 | #[target_feature(enable = "avx")] 16 | #[target_feature(enable = "fma")] 17 | pub(crate) unsafe fn euclid_similarity_avx(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { 18 | // It is safe to load unaligned floats from a pointer. 19 | // 20 | 21 | let n = v1.len(); 22 | let m = n - (n % 32); 23 | let mut ptr1 = v1.as_ptr() as *const f32; 24 | let mut ptr2 = v2.as_ptr() as *const f32; 25 | let mut sum256_1: __m256 = _mm256_setzero_ps(); 26 | let mut sum256_2: __m256 = _mm256_setzero_ps(); 27 | let mut sum256_3: __m256 = _mm256_setzero_ps(); 28 | let mut sum256_4: __m256 = _mm256_setzero_ps(); 29 | let mut i: usize = 0; 30 | while i < m { 31 | let sub256_1: __m256 = 32 | _mm256_sub_ps(_mm256_loadu_ps(ptr1.add(0)), _mm256_loadu_ps(ptr2.add(0))); 33 | sum256_1 = _mm256_fmadd_ps(sub256_1, sub256_1, sum256_1); 34 | 35 | let sub256_2: __m256 = 36 | _mm256_sub_ps(_mm256_loadu_ps(ptr1.add(8)), _mm256_loadu_ps(ptr2.add(8))); 37 | sum256_2 = _mm256_fmadd_ps(sub256_2, sub256_2, sum256_2); 38 | 39 | let sub256_3: __m256 = 40 | _mm256_sub_ps(_mm256_loadu_ps(ptr1.add(16)), _mm256_loadu_ps(ptr2.add(16))); 41 | sum256_3 = _mm256_fmadd_ps(sub256_3, sub256_3, sum256_3); 42 | 43 | let sub256_4: __m256 = 44 | _mm256_sub_ps(_mm256_loadu_ps(ptr1.add(24)), _mm256_loadu_ps(ptr2.add(24))); 45 | sum256_4 = _mm256_fmadd_ps(sub256_4, sub256_4, sum256_4); 46 | 47 | ptr1 = ptr1.add(32); 48 | ptr2 = ptr2.add(32); 49 | i += 32; 50 | } 51 | 52 | let mut result = hsum256_ps_avx(sum256_1) 53 | + hsum256_ps_avx(sum256_2) 54 | + hsum256_ps_avx(sum256_3) 55 | + hsum256_ps_avx(sum256_4); 56 | for i in 0..n - m { 57 | let a = read_unaligned(ptr1.add(i)); 58 | let b = read_unaligned(ptr2.add(i)); 59 | result += (a - b).powi(2); 60 | } 61 | result 62 | } 63 | 64 | #[target_feature(enable = "avx")] 65 | #[target_feature(enable = "fma")] 66 | pub(crate) unsafe fn dot_similarity_avx(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { 67 | // It is safe to load unaligned floats from a pointer. 68 | // 69 | 70 | let n = v1.len(); 71 | let m = n - (n % 32); 72 | let mut ptr1 = v1.as_ptr() as *const f32; 73 | let mut ptr2 = v2.as_ptr() as *const f32; 74 | let mut sum256_1: __m256 = _mm256_setzero_ps(); 75 | let mut sum256_2: __m256 = _mm256_setzero_ps(); 76 | let mut sum256_3: __m256 = _mm256_setzero_ps(); 77 | let mut sum256_4: __m256 = _mm256_setzero_ps(); 78 | let mut i: usize = 0; 79 | while i < m { 80 | sum256_1 = _mm256_fmadd_ps(_mm256_loadu_ps(ptr1), _mm256_loadu_ps(ptr2), sum256_1); 81 | sum256_2 = _mm256_fmadd_ps( 82 | _mm256_loadu_ps(ptr1.add(8)), 83 | _mm256_loadu_ps(ptr2.add(8)), 84 | sum256_2, 85 | ); 86 | sum256_3 = _mm256_fmadd_ps( 87 | _mm256_loadu_ps(ptr1.add(16)), 88 | _mm256_loadu_ps(ptr2.add(16)), 89 | sum256_3, 90 | ); 91 | sum256_4 = _mm256_fmadd_ps( 92 | _mm256_loadu_ps(ptr1.add(24)), 93 | _mm256_loadu_ps(ptr2.add(24)), 94 | sum256_4, 95 | ); 96 | 97 | ptr1 = ptr1.add(32); 98 | ptr2 = ptr2.add(32); 99 | i += 32; 100 | } 101 | 102 | let mut result = hsum256_ps_avx(sum256_1) 103 | + hsum256_ps_avx(sum256_2) 104 | + hsum256_ps_avx(sum256_3) 105 | + hsum256_ps_avx(sum256_4); 106 | 107 | for i in 0..n - m { 108 | let a = read_unaligned(ptr1.add(i)); 109 | let b = read_unaligned(ptr2.add(i)); 110 | result += a * b; 111 | } 112 | result 113 | } 114 | 115 | #[cfg(test)] 116 | mod tests { 117 | #[test] 118 | fn test_spaces_avx() { 119 | use super::*; 120 | use crate::spaces::simple::*; 121 | 122 | if is_x86_feature_detected!("avx") && is_x86_feature_detected!("fma") { 123 | let v1: Vec = vec![ 124 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 125 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 126 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 127 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 128 | 26., 27., 28., 29., 30., 31., 129 | ]; 130 | let v2: Vec = vec![ 131 | 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54., 55., 132 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 133 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 134 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 135 | 56., 57., 58., 59., 60., 61., 136 | ]; 137 | 138 | let v1 = UnalignedF32Slice::from_slice(&v1[..]); 139 | let v2 = UnalignedF32Slice::from_slice(&v2[..]); 140 | 141 | let euclid_simd = unsafe { euclid_similarity_avx(v1, v2) }; 142 | let euclid = euclidean_distance_non_optimized(v1, v2); 143 | assert_eq!(euclid_simd, euclid); 144 | 145 | let dot_simd = unsafe { dot_similarity_avx(v1, v2) }; 146 | let dot = dot_product_non_optimized(v1, v2); 147 | assert_eq!(dot_simd, dot); 148 | 149 | // let cosine_simd = unsafe { cosine_preprocess_avx(v1.clone()) }; 150 | // let cosine = cosine_preprocess(v1); 151 | // assert_eq!(cosine_simd, cosine); 152 | } else { 153 | println!("avx test skipped"); 154 | } 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /rust/src/spaces/simple_neon.rs: -------------------------------------------------------------------------------- 1 | #[cfg(target_feature = "neon")] 2 | use std::arch::aarch64::*; 3 | use std::ptr::read_unaligned; 4 | 5 | use crate::unaligned_f32::UnalignedF32Slice; 6 | 7 | #[cfg(target_feature = "neon")] 8 | pub(crate) unsafe fn euclid_similarity_neon(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { 9 | // We use the unaligned_float32x4_t helper function to read f32x4 NEON SIMD types 10 | // from potentially unaligned memory locations safely. 11 | // https://github.com/meilisearch/arroy/pull/13 12 | 13 | let n = v1.len(); 14 | let m = n - (n % 16); 15 | let mut ptr1 = v1.as_ptr() as *const f32; 16 | let mut ptr2 = v2.as_ptr() as *const f32; 17 | let mut sum1 = vdupq_n_f32(0.); 18 | let mut sum2 = vdupq_n_f32(0.); 19 | let mut sum3 = vdupq_n_f32(0.); 20 | let mut sum4 = vdupq_n_f32(0.); 21 | 22 | let mut i: usize = 0; 23 | while i < m { 24 | let sub1 = vsubq_f32(unaligned_float32x4_t(ptr1), unaligned_float32x4_t(ptr2)); 25 | sum1 = vfmaq_f32(sum1, sub1, sub1); 26 | 27 | let sub2 = vsubq_f32( 28 | unaligned_float32x4_t(ptr1.add(4)), 29 | unaligned_float32x4_t(ptr2.add(4)), 30 | ); 31 | sum2 = vfmaq_f32(sum2, sub2, sub2); 32 | 33 | let sub3 = vsubq_f32( 34 | unaligned_float32x4_t(ptr1.add(8)), 35 | unaligned_float32x4_t(ptr2.add(8)), 36 | ); 37 | sum3 = vfmaq_f32(sum3, sub3, sub3); 38 | 39 | let sub4 = vsubq_f32( 40 | unaligned_float32x4_t(ptr1.add(12)), 41 | unaligned_float32x4_t(ptr2.add(12)), 42 | ); 43 | sum4 = vfmaq_f32(sum4, sub4, sub4); 44 | 45 | ptr1 = ptr1.add(16); 46 | ptr2 = ptr2.add(16); 47 | i += 16; 48 | } 49 | let mut result = vaddvq_f32(sum1) + vaddvq_f32(sum2) + vaddvq_f32(sum3) + vaddvq_f32(sum4); 50 | for i in 0..n - m { 51 | let a = read_unaligned(ptr1.add(i)); 52 | let b = read_unaligned(ptr2.add(i)); 53 | result += (a - b).powi(2); 54 | } 55 | result 56 | } 57 | 58 | #[cfg(target_feature = "neon")] 59 | pub(crate) unsafe fn dot_similarity_neon(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { 60 | // We use the unaligned_float32x4_t helper function to read f32x4 NEON SIMD types 61 | // from potentially unaligned memory locations safely. 62 | // https://github.com/meilisearch/arroy/pull/13 63 | 64 | let n = v1.len(); 65 | let m = n - (n % 16); 66 | let mut ptr1 = v1.as_ptr() as *const f32; 67 | let mut ptr2 = v2.as_ptr() as *const f32; 68 | let mut sum1 = vdupq_n_f32(0.); 69 | let mut sum2 = vdupq_n_f32(0.); 70 | let mut sum3 = vdupq_n_f32(0.); 71 | let mut sum4 = vdupq_n_f32(0.); 72 | 73 | let mut i: usize = 0; 74 | while i < m { 75 | sum1 = vfmaq_f32( 76 | sum1, 77 | unaligned_float32x4_t(ptr1), 78 | unaligned_float32x4_t(ptr2), 79 | ); 80 | sum2 = vfmaq_f32( 81 | sum2, 82 | unaligned_float32x4_t(ptr1.add(4)), 83 | unaligned_float32x4_t(ptr2.add(4)), 84 | ); 85 | sum3 = vfmaq_f32( 86 | sum3, 87 | unaligned_float32x4_t(ptr1.add(8)), 88 | unaligned_float32x4_t(ptr2.add(8)), 89 | ); 90 | sum4 = vfmaq_f32( 91 | sum4, 92 | unaligned_float32x4_t(ptr1.add(12)), 93 | unaligned_float32x4_t(ptr2.add(12)), 94 | ); 95 | ptr1 = ptr1.add(16); 96 | ptr2 = ptr2.add(16); 97 | i += 16; 98 | } 99 | let mut result = vaddvq_f32(sum1) + vaddvq_f32(sum2) + vaddvq_f32(sum3) + vaddvq_f32(sum4); 100 | for i in 0..n - m { 101 | let a = read_unaligned(ptr1.add(i)); 102 | let b = read_unaligned(ptr2.add(i)); 103 | result += a * b; 104 | } 105 | result 106 | } 107 | 108 | /// Reads 4xf32 in a stack-located array aligned on a f32 and reads a `float32x4_t` from it. 109 | unsafe fn unaligned_float32x4_t(ptr: *const f32) -> float32x4_t { 110 | vld1q_f32(read_unaligned(ptr as *const [f32; 4]).as_ptr()) 111 | } 112 | 113 | #[cfg(test)] 114 | mod tests { 115 | #[cfg(target_feature = "neon")] 116 | #[test] 117 | fn test_spaces_neon() { 118 | use super::*; 119 | use crate::spaces::simple::*; 120 | 121 | if std::arch::is_aarch64_feature_detected!("neon") { 122 | let v1: Vec = vec![ 123 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 124 | 26., 27., 28., 29., 30., 31., 125 | ]; 126 | let v2: Vec = vec![ 127 | 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54., 55., 128 | 56., 57., 58., 59., 60., 61., 129 | ]; 130 | 131 | let v1 = UnalignedF32Slice::from_slice(&v1[..]); 132 | let v2 = UnalignedF32Slice::from_slice(&v2[..]); 133 | 134 | let euclid_simd = unsafe { euclid_similarity_neon(v1, v2) }; 135 | let euclid = euclidean_distance_non_optimized(v1, v2); 136 | assert_eq!(euclid_simd, euclid); 137 | 138 | let dot_simd = unsafe { dot_similarity_neon(v1, v2) }; 139 | let dot = dot_product_non_optimized(v1, v2); 140 | assert_eq!(dot_simd, dot); 141 | 142 | // let cosine_simd = unsafe { cosine_preprocess_neon(v1.clone()) }; 143 | // let cosine = cosine_preprocess(v1); 144 | // assert_eq!(cosine_simd, cosine); 145 | } else { 146 | println!("neon test skipped"); 147 | } 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /rust/src/spaces/simple_sse.rs: -------------------------------------------------------------------------------- 1 | #[cfg(target_arch = "x86")] 2 | use std::arch::x86::*; 3 | #[cfg(target_arch = "x86_64")] 4 | use std::arch::x86_64::*; 5 | use std::ptr::read_unaligned; 6 | 7 | use crate::unaligned_f32::UnalignedF32Slice; 8 | 9 | #[target_feature(enable = "sse")] 10 | unsafe fn hsum128_ps_sse(x: __m128) -> f32 { 11 | let x64: __m128 = _mm_add_ps(x, _mm_movehl_ps(x, x)); 12 | let x32: __m128 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); 13 | _mm_cvtss_f32(x32) 14 | } 15 | 16 | #[allow(dead_code)] 17 | #[target_feature(enable = "sse")] 18 | pub(crate) unsafe fn euclid_similarity_sse(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { 19 | // It is safe to load unaligned floats from a pointer. 20 | // 21 | 22 | let n = v1.len(); 23 | let m = n - (n % 16); 24 | let mut ptr1 = v1.as_ptr() as *const f32; 25 | let mut ptr2 = v2.as_ptr() as *const f32; 26 | let mut sum128_1: __m128 = _mm_setzero_ps(); 27 | let mut sum128_2: __m128 = _mm_setzero_ps(); 28 | let mut sum128_3: __m128 = _mm_setzero_ps(); 29 | let mut sum128_4: __m128 = _mm_setzero_ps(); 30 | let mut i: usize = 0; 31 | while i < m { 32 | let sub128_1 = _mm_sub_ps(_mm_loadu_ps(ptr1), _mm_loadu_ps(ptr2)); 33 | sum128_1 = _mm_add_ps(_mm_mul_ps(sub128_1, sub128_1), sum128_1); 34 | 35 | let sub128_2 = _mm_sub_ps(_mm_loadu_ps(ptr1.add(4)), _mm_loadu_ps(ptr2.add(4))); 36 | sum128_2 = _mm_add_ps(_mm_mul_ps(sub128_2, sub128_2), sum128_2); 37 | 38 | let sub128_3 = _mm_sub_ps(_mm_loadu_ps(ptr1.add(8)), _mm_loadu_ps(ptr2.add(8))); 39 | sum128_3 = _mm_add_ps(_mm_mul_ps(sub128_3, sub128_3), sum128_3); 40 | 41 | let sub128_4 = _mm_sub_ps(_mm_loadu_ps(ptr1.add(12)), _mm_loadu_ps(ptr2.add(12))); 42 | sum128_4 = _mm_add_ps(_mm_mul_ps(sub128_4, sub128_4), sum128_4); 43 | 44 | ptr1 = ptr1.add(16); 45 | ptr2 = ptr2.add(16); 46 | i += 16; 47 | } 48 | 49 | let mut result = hsum128_ps_sse(sum128_1) 50 | + hsum128_ps_sse(sum128_2) 51 | + hsum128_ps_sse(sum128_3) 52 | + hsum128_ps_sse(sum128_4); 53 | for i in 0..n - m { 54 | let a = read_unaligned(ptr1.add(i)); 55 | let b = read_unaligned(ptr2.add(i)); 56 | result += (a - b).powi(2); 57 | } 58 | result 59 | } 60 | 61 | #[target_feature(enable = "sse")] 62 | pub(crate) unsafe fn dot_similarity_sse(v1: &UnalignedF32Slice, v2: &UnalignedF32Slice) -> f32 { 63 | // It is safe to load unaligned floats from a pointer. 64 | // 65 | 66 | let n = v1.len(); 67 | let m = n - (n % 16); 68 | let mut ptr1 = v1.as_ptr() as *const f32; 69 | let mut ptr2 = v2.as_ptr() as *const f32; 70 | let mut sum128_1: __m128 = _mm_setzero_ps(); 71 | let mut sum128_2: __m128 = _mm_setzero_ps(); 72 | let mut sum128_3: __m128 = _mm_setzero_ps(); 73 | let mut sum128_4: __m128 = _mm_setzero_ps(); 74 | 75 | let mut i: usize = 0; 76 | while i < m { 77 | sum128_1 = _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(ptr1), _mm_loadu_ps(ptr2)), sum128_1); 78 | 79 | sum128_2 = _mm_add_ps( 80 | _mm_mul_ps(_mm_loadu_ps(ptr1.add(4)), _mm_loadu_ps(ptr2.add(4))), 81 | sum128_2, 82 | ); 83 | 84 | sum128_3 = _mm_add_ps( 85 | _mm_mul_ps(_mm_loadu_ps(ptr1.add(8)), _mm_loadu_ps(ptr2.add(8))), 86 | sum128_3, 87 | ); 88 | 89 | sum128_4 = _mm_add_ps( 90 | _mm_mul_ps(_mm_loadu_ps(ptr1.add(12)), _mm_loadu_ps(ptr2.add(12))), 91 | sum128_4, 92 | ); 93 | 94 | ptr1 = ptr1.add(16); 95 | ptr2 = ptr2.add(16); 96 | i += 16; 97 | } 98 | 99 | let mut result = hsum128_ps_sse(sum128_1) 100 | + hsum128_ps_sse(sum128_2) 101 | + hsum128_ps_sse(sum128_3) 102 | + hsum128_ps_sse(sum128_4); 103 | for i in 0..n - m { 104 | let a = read_unaligned(ptr1.add(i)); 105 | let b = read_unaligned(ptr2.add(i)); 106 | result += a * b; 107 | } 108 | result 109 | } 110 | 111 | #[cfg(test)] 112 | mod tests { 113 | #[test] 114 | fn test_spaces_sse() { 115 | use super::*; 116 | use crate::spaces::simple::*; 117 | 118 | if is_x86_feature_detected!("sse") { 119 | let v1: Vec = vec![ 120 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 121 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 122 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 123 | 26., 27., 28., 29., 30., 31., 124 | ]; 125 | let v2: Vec = vec![ 126 | 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54., 55., 127 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 128 | 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 129 | 56., 57., 58., 59., 60., 61., 130 | ]; 131 | 132 | let v1 = UnalignedF32Slice::from_slice(&v1[..]); 133 | let v2 = UnalignedF32Slice::from_slice(&v2[..]); 134 | 135 | let euclid_simd = unsafe { euclid_similarity_sse(v1, v2) }; 136 | let euclid = euclidean_distance_non_optimized(v1, v2); 137 | assert_eq!(euclid_simd, euclid); 138 | 139 | let dot_simd = unsafe { dot_similarity_sse(v1, v2) }; 140 | let dot = dot_product_non_optimized(v1, v2); 141 | assert_eq!(dot_simd, dot); 142 | 143 | // let cosine_simd = unsafe { cosine_preprocess_sse(v1.clone()) }; 144 | // let cosine = cosine_preprocess(v1); 145 | // assert_eq!(cosine_simd, cosine); 146 | } else { 147 | println!("sse test skipped"); 148 | } 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /rust/src/unaligned_f32.rs: -------------------------------------------------------------------------------- 1 | //! All the credit to Meilisearch, who did the deep dive here. 2 | //! Their MIT License is also in the `spaces/` directory, and this crate is similarly open-source. 3 | use std::{ 4 | borrow::Borrow, 5 | fmt, 6 | mem::{size_of, transmute}, 7 | }; 8 | 9 | use bytemuck::cast_slice; 10 | use byteorder::ByteOrder; 11 | 12 | /// A wrapper struct that is used to read unaligned floats directly from memory. 13 | #[repr(transparent)] 14 | pub struct UnalignedF32Slice([u8]); 15 | 16 | impl UnalignedF32Slice { 17 | /// Creates an unaligned slice of f32 wrapper from a slice of bytes. 18 | pub fn from_bytes(bytes: &[u8]) -> anyhow::Result<&Self> { 19 | if bytes.len() % size_of::() == 0 { 20 | Ok(unsafe { transmute(bytes) }) 21 | } else { 22 | Err(anyhow::anyhow!("Byte size mismatch to f32")) 23 | } 24 | } 25 | 26 | /// Creates an unaligned slice of f32 wrapper from a slice of f32. 27 | /// The slice is already known to be of the right length. 28 | pub fn from_slice(slice: &[f32]) -> &Self { 29 | Self::from_bytes(cast_slice(slice)).unwrap() 30 | } 31 | 32 | /// Returns the original raw slice of bytes. 33 | pub fn as_bytes(&self) -> &[u8] { 34 | &self.0 35 | } 36 | 37 | /// Return the number of f32 that fits into this slice. 38 | pub fn len(&self) -> usize { 39 | self.0.len() / size_of::() 40 | } 41 | 42 | /// Returns wether it is empty or not. 43 | pub fn is_empty(&self) -> bool { 44 | self.0.is_empty() 45 | } 46 | 47 | /// Returns an iterator of f32 that are read from the slice. 48 | /// The f32 are copied in memory and are therefore, aligned. 49 | #[allow(clippy::needless_lifetimes)] 50 | pub fn iter<'a>(&'a self) -> impl Iterator + 'a { 51 | self.0 52 | .chunks_exact(size_of::()) 53 | .map(byteorder::NativeEndian::read_f32) 54 | } 55 | 56 | /// Returns the raw pointer to the start of this slice. 57 | pub fn as_ptr(&self) -> *const u8 { 58 | self.0.as_ptr() 59 | } 60 | } 61 | 62 | impl<'a> From<&'a crate::Vector> for &'a UnalignedF32Slice { 63 | fn from(value: &'a Vec) -> Self { 64 | UnalignedF32Slice::from_slice(value.as_slice()) 65 | } 66 | } 67 | 68 | impl ToOwned for UnalignedF32Slice { 69 | type Owned = Vec; 70 | 71 | fn to_owned(&self) -> Self::Owned { 72 | bytemuck::pod_collect_to_vec(&self.0) 73 | } 74 | } 75 | 76 | impl Borrow for Vec { 77 | fn borrow(&self) -> &UnalignedF32Slice { 78 | UnalignedF32Slice::from_slice(&self[..]) 79 | } 80 | } 81 | 82 | impl fmt::Debug for UnalignedF32Slice { 83 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 84 | struct SmallF32(f32); 85 | impl fmt::Debug for SmallF32 { 86 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 87 | f.write_fmt(format_args!("{:.4?}", self.0)) 88 | } 89 | } 90 | 91 | let mut list = f.debug_list(); 92 | self.iter().for_each(|float| { 93 | list.entry(&SmallF32(float)); 94 | }); 95 | list.finish() 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /rust/src/vector.rs: -------------------------------------------------------------------------------- 1 | use crate::Vector; 2 | 3 | #[inline(always)] 4 | pub fn normalize(vec: &mut Vector) { 5 | let s = crate::unaligned_f32::UnalignedF32Slice::from_slice(vec.as_slice()); 6 | let norm = crate::spaces::simple::dot_product(s, s).sqrt(); 7 | vec.iter_mut().for_each(|v| *v /= norm); 8 | } 9 | 10 | #[inline(always)] 11 | pub fn dot_product(vec: &Vector, other: &Vector) -> f32 { 12 | crate::spaces::simple::dot_product(vec.into(), other.into()) 13 | } 14 | 15 | #[inline(always)] 16 | pub fn subtract_into(vec: &mut Vector, other: &Vector) { 17 | for (v, o) in vec.iter_mut().zip(other.iter()) { 18 | *v -= o; 19 | } 20 | } 21 | 22 | #[inline(always)] 23 | pub fn distance(vec: &Vector, other: &Vector) -> f32 { 24 | vec.iter() 25 | .zip(other.iter()) 26 | .fold(0.0, |acc, (a, b)| acc + ((a - b) * (a - b))) 27 | .sqrt() 28 | } 29 | -------------------------------------------------------------------------------- /rust/src/vector_file.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{anyhow, Result}; 2 | use memmap2::MmapMut; 3 | use std::path::PathBuf; 4 | 5 | use crate::quantization::Quantization; 6 | 7 | pub struct VectorFile { 8 | dimensions: usize, 9 | vec_size: usize, 10 | mmap: MmapMut, 11 | max_vecs: usize, 12 | quantization: std::marker::PhantomData, 13 | } 14 | 15 | impl VectorFile { 16 | pub fn create_or_open(path: PathBuf, dimensions: usize, max_vecs: usize) -> Result { 17 | let file = match std::fs::OpenOptions::new() 18 | .read(true) 19 | .write(true) 20 | .create(true) 21 | .truncate(true) 22 | .open(&path) 23 | { 24 | Ok(f) => f, 25 | Err(e) => panic!("Failed to open or create file: {}", e), 26 | }; 27 | let vec_size = Q::vector_size(dimensions); 28 | 29 | let file_size = max_vecs * vec_size; 30 | if file.metadata().unwrap().len() == 0 { 31 | file.set_len(file_size as u64).unwrap(); 32 | file.sync_data()?; 33 | } 34 | let mmap = unsafe { MmapMut::map_mut(&file)? }; 35 | Ok(Self { 36 | dimensions, 37 | vec_size, 38 | mmap, 39 | max_vecs, 40 | quantization: Default::default(), 41 | }) 42 | } 43 | 44 | pub fn flush(&self) -> Result<()> { 45 | Ok(self.mmap.flush_async()?) 46 | } 47 | 48 | pub fn write_at(&mut self, offset: usize, vec: &Q::Lower) -> Result<()> { 49 | if offset >= self.max_vecs { 50 | return Err(anyhow!("Offset outside file bounds")); 51 | } 52 | let slice = &mut self.mmap[offset * self.vec_size..(offset + 1) * self.vec_size]; 53 | Q::marshal(vec, slice) 54 | } 55 | 56 | pub fn read_at(&self, offset: usize) -> Result { 57 | if offset >= self.max_vecs { 58 | return Err(anyhow!("Offset outside file bounds")); 59 | } 60 | let slice = &self.mmap[offset * self.vec_size..(offset + 1) * self.vec_size]; 61 | Q::unmarshal(slice) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /rust/src/vector_store.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{anyhow, Result}; 2 | use argminmax::ArgMinMax; 3 | use std::{borrow::BorrowMut, collections::HashMap}; 4 | 5 | use crate::{ 6 | backend::VectorBackend, 7 | counting_bitmap::CountingBitmap, 8 | create_random_vector, 9 | vector::{dot_product, normalize}, 10 | Basis, Bitmap, ResultSet, Vector, ID, 11 | }; 12 | 13 | pub struct VectorStore { 14 | backend: E, 15 | dimensions: usize, 16 | bases: Vec, 17 | // If we ever have more than INT_MAX_32 dimensions, I quit. 18 | bitmaps: Vec>, 19 | } 20 | 21 | impl VectorStore { 22 | pub fn new(backend: E) -> Result { 23 | VectorStore::new_vector_store(backend) 24 | } 25 | 26 | pub fn new_croaring_bitmap(backend: E) -> Result { 27 | VectorStore::new_vector_store(backend) 28 | } 29 | } 30 | 31 | impl VectorStore { 32 | pub fn new_roaring_bitmap(backend: E) -> Result { 33 | VectorStore::new_vector_store(backend) 34 | } 35 | } 36 | 37 | impl VectorStore { 38 | pub fn new_bitvec_bitmap(backend: E) -> Result { 39 | VectorStore::new_vector_store(backend) 40 | } 41 | } 42 | 43 | impl VectorStore { 44 | pub fn new_vector_store(mut backend: E) -> Result { 45 | let info = backend.info(); 46 | let bases = match backend.load_bases()? { 47 | Some(b) => b, 48 | None => make_basis(info.n_basis, info.dimensions)?, 49 | }; 50 | let bitmaps = load_all_bitmaps(backend.borrow_mut())?; 51 | let out = Self { 52 | backend, 53 | dimensions: info.dimensions, 54 | bases, 55 | bitmaps, 56 | }; 57 | Ok(out) 58 | } 59 | 60 | #[inline(always)] 61 | pub fn add_vector(&mut self, id: ID, vector: &Vector) -> Result<()> { 62 | self.add_vector_iter(vec![(id, vector)].into_iter()) 63 | } 64 | 65 | pub fn add_vector_iter<'a>( 66 | &mut self, 67 | iter: impl Iterator, 68 | ) -> Result<()> { 69 | for (id, vec) in iter { 70 | self.backend.put_vector(id, vec)?; 71 | self.add_to_bitmaps(id, vec)?; 72 | } 73 | Ok(()) 74 | } 75 | 76 | pub fn find_nearest( 77 | &self, 78 | target: &Vector, 79 | k: usize, 80 | search_k: usize, 81 | spill: usize, 82 | ) -> Result { 83 | let sp = if spill >= self.dimensions { 84 | self.dimensions - 1 85 | } else { 86 | spill 87 | }; 88 | self.find_nearest_internal(target, k, search_k, sp) 89 | } 90 | 91 | #[inline(always)] 92 | fn find_nearest_internal( 93 | &self, 94 | target: &Vector, 95 | k: usize, 96 | search_k: usize, 97 | spill: usize, 98 | ) -> Result { 99 | let mut rs = ResultSet::new(k); 100 | let mut bs = CountingBitmap::::new(self.bases.len()); 101 | let mut proj: Vec = Vec::with_capacity(self.dimensions); 102 | for (i, basis) in self.bases.iter().enumerate() { 103 | let mut spill_into = B::new(); 104 | proj.clear(); 105 | for b in basis { 106 | proj.push(dot_product(target, b)) 107 | } 108 | for _s in 0..(spill + 1) { 109 | let face_idx = find_face_idx(&proj); 110 | if let Some(bm) = self.bitmaps[i].get(&face_idx) { 111 | spill_into.or(bm); 112 | }; 113 | proj[(face_idx.unsigned_abs() - 1) as usize] = 0.0; 114 | } 115 | bs.or(spill_into); 116 | } 117 | let elems = bs 118 | .top_k(search_k) 119 | .ok_or(anyhow!("Didn't find a counting layer?"))?; 120 | for id in elems.iter_elems() { 121 | let sim = self.backend.compute_similarity(target, id)?; 122 | rs.add_result(id, sim); 123 | } 124 | Ok(rs) 125 | } 126 | 127 | #[allow(unused)] 128 | fn add_to_bitmaps(&mut self, id: ID, vec: &Vector) -> Result<()> { 129 | let mut proj = Vec::with_capacity(self.dimensions); 130 | for (bi, basis) in self.bases.iter().enumerate() { 131 | proj.clear(); 132 | for b in basis { 133 | proj.push(dot_product(vec, b)); 134 | } 135 | let face_idx = find_face_idx(&proj); 136 | self.bitmaps[bi].entry(face_idx).or_default().add(id); 137 | } 138 | Ok(()) 139 | } 140 | 141 | pub fn full_table_scan(&self, vec: &Vector, k: usize) -> Result { 142 | self.backend.find_nearest(vec, k) 143 | } 144 | } 145 | 146 | fn make_basis(n_basis: usize, dimensions: usize) -> Result> { 147 | let mut bases = Vec::::with_capacity(n_basis); 148 | for _n in 0..n_basis { 149 | let mut basis = Basis::with_capacity(dimensions); 150 | for _ in 0..dimensions { 151 | basis.push(create_random_vector(dimensions)); 152 | } 153 | let out = orthonormalize(basis, 1); 154 | bases.push(out); 155 | } 156 | Ok(bases) 157 | } 158 | 159 | #[allow(unused)] 160 | fn print_basis(basis: &Basis) { 161 | for i in 0..basis.len() { 162 | for j in 0..basis.len() { 163 | print!("{:+.4} ", dot_product(&basis[i], &basis[j])); 164 | } 165 | println!(); 166 | } 167 | } 168 | 169 | fn orthonormalize(mut basis: Basis, rounds: usize) -> Basis { 170 | let dim = basis[0].len(); 171 | for _ in 0..rounds { 172 | for i in 0..basis.len() { 173 | normalize(&mut basis[i]); 174 | for j in i + 1..basis.len() { 175 | let dot = dot_product(&basis[i], &basis[j]); 176 | for k in 0..dim { 177 | basis[j][k] -= dot * basis[i][k]; 178 | } 179 | normalize(&mut basis[j]); 180 | } 181 | } 182 | } 183 | basis 184 | } 185 | 186 | fn load_all_bitmaps(be: &mut impl VectorBackend) -> Result>> { 187 | let info = be.info(); 188 | let mut out = Vec::with_capacity(info.n_basis); 189 | for i in 0..info.n_basis { 190 | let mut hm = HashMap::::new(); 191 | for x in 0..info.dimensions { 192 | let index = x as i32; 193 | let bit = be.load_bitmap::(i, index)?; 194 | if let Some(bitmap) = bit { 195 | hm.insert(index, bitmap); 196 | } else { 197 | hm.insert(index, B::default()); 198 | } 199 | let bit = be.load_bitmap::(i, -index)?; 200 | if let Some(bitmap) = bit { 201 | hm.insert(-index, bitmap); 202 | } else { 203 | hm.insert(-index, B::default()); 204 | } 205 | } 206 | out.push(hm) 207 | } 208 | Ok(out) 209 | } 210 | 211 | #[inline(always)] 212 | fn find_face_idx(projection: &Vector) -> i32 { 213 | let (min_idx, max_idx) = projection.argminmax(); 214 | let idx = if projection[max_idx].abs() >= projection[min_idx].abs() { 215 | max_idx as i32 216 | } else { 217 | min_idx as i32 218 | }; 219 | if projection[idx as usize] > 0.0 { 220 | idx + 1 221 | } else { 222 | -(idx + 1) 223 | } 224 | } 225 | 226 | #[cfg(test)] 227 | mod test { 228 | use super::*; 229 | use crate::{IndexIDIterator, MemoryBackend}; 230 | 231 | fn vecs() -> Vec { 232 | vec![ 233 | vec![1.0, 0.0], 234 | vec![0.0, 1.0], 235 | vec![1.0, 1.0], 236 | vec![-1.0, 0.0], 237 | ] 238 | } 239 | 240 | #[test] 241 | fn test_create_basis() { 242 | let mut mem = MemoryBackend::new(2, 1).unwrap(); 243 | mem.set_rng(Box::new(rand::rngs::mock::StepRng::new(0, 3))); 244 | for (i, v) in vecs().enumerate_ids() { 245 | mem.put_vector(i, v).unwrap(); 246 | } 247 | let basis_set: Vec = make_basis(1, 2).unwrap(); 248 | assert_eq!(basis_set.len(), 1); 249 | assert_eq!(basis_set[0].len(), 2); 250 | } 251 | 252 | #[test] 253 | fn test_make_bitmaps() { 254 | //let mem = MemoryBackend::new(2, 2); 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /rust/tests/basic_test.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use bbqvec::{self, backend::VectorBackend, IndexIDIterator}; 3 | 4 | #[test] 5 | fn creates_a_vector() { 6 | let v = bbqvec::create_random_vector(20); 7 | assert_eq!(v.len(), 20); 8 | } 9 | 10 | #[test] 11 | fn full_table_scan() -> Result<()> { 12 | let vecs = bbqvec::create_vector_set(20, 200); 13 | let mut mem = bbqvec::MemoryBackend::new(20, 3)?; 14 | for (id, v) in vecs.enumerate_ids() { 15 | mem.put_vector(id, v)?; 16 | } 17 | let target = bbqvec::create_random_vector(256); 18 | let _ = mem.find_nearest(&target, 20)?; 19 | Ok(()) 20 | } 21 | 22 | #[test] 23 | fn built_index() -> Result<()> { 24 | let vecs = bbqvec::create_vector_set(20, 2000); 25 | let mem = bbqvec::MemoryBackend::new(20, 10)?; 26 | let mut store = bbqvec::VectorStore::new_croaring_bitmap(mem)?; 27 | store.add_vector_iter(vecs.enumerate_ids())?; 28 | Ok(()) 29 | } 30 | 31 | #[test] 32 | fn built_quantized_index() -> Result<()> { 33 | let vecs = bbqvec::create_vector_set(20, 2000); 34 | let mem = bbqvec::QuantizedMemoryBackend::::new(20, 10)?; 35 | let mut store = bbqvec::VectorStore::new_croaring_bitmap(mem)?; 36 | store.add_vector_iter(vecs.enumerate_ids())?; 37 | Ok(()) 38 | } 39 | 40 | #[test] 41 | fn built_big_index() -> Result<()> { 42 | let vecs = bbqvec::create_vector_set(256, 1000); 43 | let mem = bbqvec::MemoryBackend::new(256, 10)?; 44 | let mut store = bbqvec::VectorStore::new_croaring_bitmap(mem)?; 45 | store.add_vector_iter(vecs.enumerate_ids())?; 46 | Ok(()) 47 | } 48 | -------------------------------------------------------------------------------- /rust/tests/search.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use bbqvec::{self, IndexIDIterator}; 3 | 4 | #[test] 5 | fn search_index() -> Result<()> { 6 | let data = bbqvec::create_vector_set(10, 100000); 7 | let mem = bbqvec::MemoryBackend::new(10, 10)?; 8 | let mut store = bbqvec::VectorStore::new_croaring_bitmap(mem)?; 9 | println!("Made store"); 10 | store.add_vector_iter(data.enumerate_ids())?; 11 | println!("itered"); 12 | println!("built"); 13 | for _i in 0..1000 { 14 | let target = bbqvec::create_random_vector(10); 15 | store.find_nearest(&target, 20, 1000, 1)?; 16 | } 17 | Ok(()) 18 | } 19 | -------------------------------------------------------------------------------- /speed_test.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "testing" 7 | ) 8 | 9 | var ( 10 | nVectors = flag.Int("nvectors", 100000, "Number of vectors to generate") 11 | testvecs = flag.Int("testvectors", 1000, "Number of vectors to generate") 12 | dim = flag.Int("dim", 256, "Dimension of generated vectors") 13 | nBasis = flag.Int("bases", 30, "Number of basis sets") 14 | searchk = flag.Int("searchk", 1000, "SearchK") 15 | spill = flag.Int("spill", 16, "Spill") 16 | disk = flag.Bool("disk", false, "Run tests against disk") 17 | ) 18 | 19 | func BenchmarkMemoryStore(b *testing.B) { 20 | vecs := NewRandVectorSet(*nVectors, *dim, nil) 21 | 22 | be := NewMemoryBackend(*dim) 23 | store, err := NewVectorStore(be, *nBasis) 24 | if err != nil { 25 | b.Fatal(err) 26 | } 27 | 28 | for i, v := range vecs { 29 | store.AddVector(ID(i), v) 30 | } 31 | 32 | b.Run("Internal", func(b *testing.B) { 33 | for i := 0; i < b.N; i++ { 34 | v := NewRandVector(*dim, nil) 35 | store.FindNearest(v, 20, *searchk, *spill) 36 | } 37 | }) 38 | } 39 | 40 | func BenchmarkParameters(b *testing.B) { 41 | //First, build the thing 42 | vecs := NewRandVectorSet(*nVectors, *dim, nil) 43 | 44 | mem := NewMemoryBackend(*dim) 45 | 46 | var be VectorBackend 47 | if *disk { 48 | dir := b.TempDir() 49 | b.Log("TempDir:", dir) 50 | var err error 51 | be, err = NewDiskBackend(dir, *dim, NoQuantization{}) 52 | if err != nil { 53 | b.Fatal(err) 54 | } 55 | } else { 56 | be = NewMemoryBackend(*dim) 57 | } 58 | 59 | store, err := NewVectorStore(be, *nBasis) 60 | if err != nil { 61 | b.Fatal(err) 62 | } 63 | 64 | for i, v := range vecs { 65 | mem.PutVector(ID(i), v) 66 | store.AddVector(ID(i), v) 67 | } 68 | 69 | targetvecs := NewRandVectorSet(*testvecs, *dim, nil) 70 | res := make([]*ResultSet, *testvecs) 71 | for i, v := range targetvecs { 72 | res[i], err = FullTableScanSearch(mem, v, 20) 73 | if err != nil { 74 | b.Fatal(err) 75 | } 76 | } 77 | for _, searchk := range []int{100, 1000, 10000, 20000} { 78 | for _, spill := range []int{1, 4, 16, 64} { 79 | b.Run(fmt.Sprintf("sk%d_sp%d", searchk, spill), func(b *testing.B) { 80 | benchQuality(b, searchk, spill, store, targetvecs, res) 81 | }) 82 | } 83 | } 84 | } 85 | 86 | func benchQuality(b *testing.B, searchk, spill int, store *VectorStore, vecs []Vector, res []*ResultSet) { 87 | b.ResetTimer() 88 | //b.ReportAllocs() 89 | recalls := make([]float64, 4) 90 | ats := []int{1, 5, 10, 20} 91 | for i := 0; i < b.N; i++ { 92 | b.StartTimer() 93 | v := vecs[i%len(vecs)] 94 | aknn, err := store.FindNearest(v, 20, searchk, spill) 95 | if err != nil { 96 | b.Fatal(err) 97 | } 98 | b.StopTimer() 99 | recalls[0] += aknn.ComputeRecall(res[i%len(vecs)], 1) 100 | recalls[1] += aknn.ComputeRecall(res[i%len(vecs)], 5) 101 | recalls[2] += aknn.ComputeRecall(res[i%len(vecs)], 10) 102 | recalls[3] += aknn.ComputeRecall(res[i%len(vecs)], 20) 103 | } 104 | for i, total := range recalls { 105 | b.ReportMetric(total/float64(b.N), fmt.Sprintf("recall@%02d", ats[i])) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /vector.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "math/rand" 5 | 6 | "github.com/viterin/vek/vek32" 7 | ) 8 | 9 | type ID uint64 10 | 11 | type Basis []Vector 12 | 13 | type Vector []float32 14 | 15 | func (v Vector) Clone() Vector { 16 | out := make([]float32, len(v)) 17 | copy(out, v) 18 | return out 19 | } 20 | 21 | func (v Vector) Normalize() { 22 | factor := vek32.Norm(v) 23 | vek32.DivNumber_Inplace(v, factor) 24 | } 25 | 26 | func (v Vector) Dimensions() int { 27 | return len(v) 28 | } 29 | 30 | func (v Vector) CosineSimilarity(other Vector) float32 { 31 | return vek32.CosineSimilarity(v, other) 32 | } 33 | 34 | func NewRandVector(dim int, rng *rand.Rand) Vector { 35 | out := make([]float32, dim) 36 | for i := 0; i < dim; i++ { 37 | if rng != nil { 38 | out[i] = float32(rng.NormFloat64()) 39 | } else { 40 | out[i] = float32(rand.NormFloat64()) 41 | } 42 | } 43 | factor := vek32.Norm(out) 44 | vek32.DivNumber_Inplace(out, factor) 45 | return out 46 | } 47 | 48 | func NewRandVectorSet(n int, dim int, rng *rand.Rand) []Vector { 49 | out := make([]Vector, n) 50 | for i := 0; i < n; i++ { 51 | out[i] = NewRandVector(dim, rng) 52 | out[i].Normalize() 53 | } 54 | return out 55 | } 56 | -------------------------------------------------------------------------------- /vector_store.go: -------------------------------------------------------------------------------- 1 | package bbq 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "strings" 7 | "sync" 8 | 9 | "github.com/RoaringBitmap/roaring" 10 | "github.com/viterin/vek/vek32" 11 | ) 12 | 13 | const defaultMaxSampling = 10000 14 | 15 | type PrintfFunc func(string, ...any) 16 | 17 | type VectorStore struct { 18 | logger PrintfFunc 19 | backend VectorBackend 20 | dimensions int 21 | nbasis int 22 | bases []Basis 23 | bms []map[int]*roaring.Bitmap 24 | preSpill int 25 | lastSaveToken uint64 26 | } 27 | 28 | type VectorStoreOption func(vs *VectorStore) error 29 | 30 | func WithPrespill(prespill int) VectorStoreOption { 31 | return func(vs *VectorStore) error { 32 | if prespill <= 0 { 33 | prespill = 1 34 | } else if prespill > vs.dimensions { 35 | prespill = vs.dimensions 36 | } 37 | vs.preSpill = prespill 38 | return nil 39 | } 40 | } 41 | 42 | func NewVectorStore(backend VectorBackend, nBasis int, opts ...VectorStoreOption) (*VectorStore, error) { 43 | info := backend.Info() 44 | v := &VectorStore{ 45 | dimensions: info.Dimensions, 46 | nbasis: nBasis, 47 | backend: backend, 48 | bases: make([]Basis, nBasis), 49 | bms: make([]map[int]*roaring.Bitmap, nBasis), 50 | preSpill: 1, 51 | } 52 | for _, o := range opts { 53 | err := o(v) 54 | if err != nil { 55 | return nil, err 56 | } 57 | } 58 | if info.HasIndexData { 59 | err := v.loadFromBackend() 60 | return v, err 61 | } 62 | err := v.makeBasis() 63 | if err != nil { 64 | return nil, err 65 | } 66 | err = v.Sync() 67 | if err != nil { 68 | return nil, err 69 | } 70 | return v, nil 71 | } 72 | 73 | func (vs *VectorStore) Close() error { 74 | err := vs.Sync() 75 | if err != nil { 76 | return err 77 | } 78 | return vs.backend.Close() 79 | } 80 | 81 | func (vs *VectorStore) SetLogger(printf PrintfFunc) { 82 | vs.logger = printf 83 | } 84 | 85 | func (vs *VectorStore) log(s string, a ...any) { 86 | if vs.logger != nil { 87 | vs.logger(s, a...) 88 | } 89 | } 90 | 91 | func (vs *VectorStore) AddVector(id ID, v Vector) error { 92 | if vs.backend.Exists(id) { 93 | vs.removeFromBitmaps(id) 94 | } 95 | err := vs.backend.PutVector(id, v) 96 | if err != nil { 97 | return err 98 | } 99 | return vs.addToBitmaps([]ID{id}, []Vector{v}) 100 | } 101 | 102 | func (vs *VectorStore) AddVectorsWithOffset(offset ID, vecs []Vector) error { 103 | ids := make([]ID, len(vecs)) 104 | for i, v := range vecs { 105 | id := offset + ID(i) 106 | ids[i] = id 107 | if vs.backend.Exists(id) { 108 | vs.removeFromBitmaps(id) 109 | } 110 | vs.backend.PutVector(id, v) 111 | } 112 | return vs.addToBitmaps(ids, vecs) 113 | } 114 | 115 | func (vs *VectorStore) AddVectorsWithIDs(ids []ID, vecs []Vector) error { 116 | for i, v := range vecs { 117 | id := ids[i] 118 | if vs.backend.Exists(id) { 119 | vs.removeFromBitmaps(id) 120 | } 121 | vs.backend.PutVector(id, v) 122 | } 123 | return vs.addToBitmaps(ids, vecs) 124 | } 125 | 126 | func (vs *VectorStore) FindNearest(vector Vector, k int, searchk int, spill int) (*ResultSet, error) { 127 | if spill < 0 { 128 | spill = 0 129 | } else if spill >= vs.dimensions { 130 | spill = vs.dimensions - 1 131 | } 132 | return vs.findNearestInternal(vector, k, searchk, spill) 133 | } 134 | 135 | func (vs *VectorStore) findNearestInternal(vector Vector, k int, searchk int, spill int) (*ResultSet, error) { 136 | counts := NewCountingBitmap(vs.nbasis) 137 | buf := make([]float32, vs.dimensions) 138 | maxes := make([]int, spill+1) 139 | for i, basis := range vs.bases { 140 | spillClone := roaring.New() 141 | vs.findIndexesForBasis(vector, basis, buf, maxes) 142 | for _, m := range maxes { 143 | if v, ok := vs.bms[i][m]; ok { 144 | spillClone.Or(v) 145 | } 146 | } 147 | counts.Or(spillClone) 148 | } 149 | elems := counts.TopK(searchk) 150 | //vs.log("Actual searchK is: %s", counts.String()) 151 | // Rerank within the reduced set 152 | rs := NewResultSet(k) 153 | var err error 154 | 155 | elems.Iterate(func(x uint32) bool { 156 | // things that take closures should really return error, so that it can abort... 157 | var sim float32 158 | sim, err = vs.backend.ComputeSimilarity(vector, ID(x)) 159 | if err != nil { 160 | return false 161 | } 162 | // On err, this will be the zero value of sum (but that's ok, we're going down) 163 | rs.AddResult(ID(x), sim) 164 | return true 165 | }) 166 | return rs, err 167 | } 168 | 169 | func (vs *VectorStore) findIndexesForBasis(target Vector, basis Basis, buf []float32, maxes []int) { 170 | for x, b := range basis { 171 | dot := vek32.Dot(b, target) 172 | buf[x] = dot 173 | } 174 | for i := 0; i < len(maxes); i++ { 175 | big := vek32.ArgMax(buf) 176 | small := vek32.ArgMin(buf) 177 | idx := 0 178 | if math.Abs(float64(buf[big])) >= math.Abs(float64(buf[small])) { 179 | idx = big 180 | } else { 181 | idx = small 182 | } 183 | if buf[idx] > 0.0 { 184 | maxes[i] = idx + 1 185 | } else { 186 | maxes[i] = -(idx + 1) 187 | } 188 | buf[idx] = 0.0 189 | } 190 | } 191 | 192 | func (vs *VectorStore) Sync() error { 193 | be, ok := vs.backend.(IndexBackend) 194 | if !ok { 195 | return nil 196 | } 197 | var err error 198 | vs.lastSaveToken, err = be.SaveBases(vs.bases, vs.lastSaveToken) 199 | if err != nil { 200 | return err 201 | } 202 | for b, dimmap := range vs.bms { 203 | for i, v := range dimmap { 204 | err = be.SaveBitmap(b, i, v) 205 | if err != nil { 206 | return err 207 | } 208 | } 209 | } 210 | return be.Sync() 211 | } 212 | 213 | func (vs *VectorStore) makeBasis() error { 214 | vs.log("Making basis set") 215 | for n := 0; n < vs.nbasis; n++ { 216 | basis := make(Basis, vs.dimensions) 217 | for i := range vs.dimensions { 218 | basis[i] = NewRandVector(vs.dimensions, nil) 219 | } 220 | for range 10 { 221 | orthonormalize(basis) 222 | } 223 | vs.log("Completed basis %d", n) 224 | vs.bases[n] = basis 225 | } 226 | vs.log("Completed basis set generation") 227 | return nil 228 | } 229 | 230 | // Use Modified Gram-Schmidt (https://en.wikipedia.org/wiki/Gram%E2%80%93Schmidt_process) 231 | // to turn our random vectors into an orthonormal basis. 232 | func orthonormalize(basis Basis) { 233 | buf := make([]float32, len(basis[0])) 234 | cur := basis[0] 235 | for i := 1; i < len(basis); i++ { 236 | for j := i; j < len(basis); j++ { 237 | dot := vek32.Dot(basis[j], cur) 238 | vek32.MulNumber_Into(buf, cur, dot) 239 | vek32.Sub_Inplace(basis[j], buf) 240 | basis[j].Normalize() 241 | } 242 | cur = basis[i] 243 | } 244 | } 245 | 246 | func debugPrintBasis(basis Basis) { 247 | for i := 0; i < len(basis); i++ { 248 | sim := make([]any, len(basis)) 249 | for j := 0; j < len(basis); j++ { 250 | sim[j] = vek32.CosineSimilarity(basis[i], basis[j]) 251 | } 252 | pattern := strings.Repeat("%+.15f ", len(basis)) 253 | fmt.Printf(pattern+"\n", sim...) 254 | } 255 | } 256 | 257 | func (vs *VectorStore) removeFromBitmaps(id ID) { 258 | for _, m := range vs.bms { 259 | if m == nil { 260 | continue 261 | } 262 | for _, bm := range m { 263 | bm.Remove(uint32(id)) 264 | } 265 | } 266 | } 267 | 268 | func (vs *VectorStore) addToBitmaps(ids []ID, vectors []Vector) error { 269 | var wg sync.WaitGroup 270 | for n, basis := range vs.bases { 271 | wg.Add(1) 272 | go func(n int, basis Basis, wg *sync.WaitGroup) { 273 | if v := vs.bms[n]; v == nil { 274 | vs.bms[n] = make(map[int]*roaring.Bitmap) 275 | } 276 | buf := make([]float32, vs.dimensions) 277 | maxes := make([]int, vs.preSpill) 278 | for i := range vectors { 279 | id := ids[i] 280 | v := vectors[i] 281 | vs.findIndexesForBasis(v, basis, buf, maxes) 282 | for _, m := range maxes { 283 | if _, ok := vs.bms[n][m]; !ok { 284 | vs.bms[n][m] = roaring.NewBitmap() 285 | } 286 | vs.bms[n][m].Add(uint32(id)) 287 | } 288 | } 289 | wg.Done() 290 | }(n, basis, &wg) 291 | } 292 | wg.Wait() 293 | return nil 294 | } 295 | 296 | func (vs *VectorStore) loadFromBackend() error { 297 | var err error 298 | be, ok := vs.backend.(IndexBackend) 299 | if !ok { 300 | panic("Backend suggested that it could store indexes, but it cannot") 301 | } 302 | vs.bases, err = be.LoadBases() 303 | if err != nil { 304 | return err 305 | } 306 | for b := 0; b < vs.nbasis; b++ { 307 | dimmap := make(map[int]*roaring.Bitmap) 308 | for i := 1; i <= vs.dimensions; i++ { 309 | bm, err := be.LoadBitmap(b, i) 310 | if err != nil { 311 | return err 312 | } 313 | dimmap[i] = bm 314 | bm, err = be.LoadBitmap(b, -i) 315 | if err != nil { 316 | return err 317 | } 318 | dimmap[-i] = bm 319 | } 320 | } 321 | return nil 322 | } 323 | --------------------------------------------------------------------------------