├── .github
├── bbqvec.png
├── bbqvec.svg
├── gopher-cook.svg
└── workflows
│ ├── go.yml
│ └── rust.yml
├── .gitignore
├── LICENSE
├── README.md
├── backend.go
├── backend_disk.go
├── backend_memory.go
├── backend_quantized_memory.go
├── cmd
└── run-ann-benchmark
│ └── main.go
├── counting_bitmap.go
├── disk_test.go
├── emperical_test.go
├── errors.go
├── go.mod
├── go.sum
├── integration_test.go
├── micro_test.go
├── quantization.go
├── quantization_f16.go
├── quantization_f16_test.go
├── result.go
├── rust
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── benches
│ ├── main_benchmark.rs
│ ├── memory_store.rs
│ └── micro.rs
├── cmd
│ └── test-recall
│ │ ├── .gitignore
│ │ ├── Cargo.lock
│ │ ├── Cargo.toml
│ │ └── src
│ │ └── main.rs
├── src
│ ├── backend.rs
│ ├── backend_disk.rs
│ ├── backend_memory.rs
│ ├── bitmaps.rs
│ ├── counting_bitmap.rs
│ ├── helpers.rs
│ ├── lib.rs
│ ├── quantization.rs
│ ├── result.rs
│ ├── spaces
│ │ ├── LICENSE
│ │ ├── mod.rs
│ │ ├── simple.rs
│ │ ├── simple_avx.rs
│ │ ├── simple_neon.rs
│ │ └── simple_sse.rs
│ ├── unaligned_f32.rs
│ ├── vector.rs
│ ├── vector_file.rs
│ └── vector_store.rs
└── tests
│ ├── basic_test.rs
│ └── search.rs
├── speed_test.go
├── vector.go
└── vector_store.go
/.github/bbqvec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/barakmich/bbqvec/ffc10c72562dca295c39f15e9be7661776f00c0f/.github/bbqvec.png
--------------------------------------------------------------------------------
/.github/gopher-cook.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
124 |
--------------------------------------------------------------------------------
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
1 | # This workflow will build a golang project
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go
3 |
4 | name: Go
5 |
6 | on:
7 | push:
8 | branches: [ "main" ]
9 | pull_request:
10 | branches: [ "main" ]
11 |
12 | jobs:
13 |
14 | build:
15 | runs-on: ubuntu-latest
16 | steps:
17 | - uses: actions/checkout@v4
18 |
19 | - name: Set up Go
20 | uses: actions/setup-go@v4
21 | with:
22 | go-version: '1.22'
23 |
24 | - name: Build
25 | run: go build -v ./...
26 |
27 | - name: Test
28 | run: go test -v ./...
29 |
--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
1 | name: Rust
2 |
3 | on:
4 | push:
5 | branches: [ "main" ]
6 | pull_request:
7 | branches: [ "main" ]
8 |
9 | env:
10 | CARGO_TERM_COLOR: always
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - uses: actions/checkout@v4
19 | - name: Build
20 | run: cd rust && cargo build --release --verbose
21 | - name: Run tests
22 | run: cd rust && cargo test --release --verbose
23 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.test
2 | *.exe
3 | *.pt
4 | *.bgn
5 | *.pprof
6 | perf.data
7 | perf.data.old
8 | /build
9 | /datasets
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright 2024, Daxe, Inc.
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 | 
4 | [](LICENSE)
5 | [](https://godoc.org/github.com/barakmich/bbqvec)
6 | [](https://crates.io/crates/bbqvec)
7 | [](https://github.com/barakmich/bbqvec/actions/workflows/go.yml)
8 | [](https://github.com/barakmich/bbqvec/actions/workflows/rust.yml)
9 |
10 | BBQvec is an open-source, embedded vector database index for Go and Rust, providing approximate K-nearest-neighbors (aKNN).
11 |
12 | # Getting Started
13 |
14 | ## Go
15 |
16 | ```go
17 | package main
18 |
19 | import (
20 | "fmt"
21 |
22 | bbq "github.com/barakmich/bbqvec"
23 | )
24 |
25 | func main() {
26 | // Declare store parameters
27 | dimensions := 200
28 | nBasis := 10
29 |
30 | // Initialize the store
31 | backend := bbq.NewMemoryBackend(dimensions)
32 | datastore, _ := bbq.NewVectorStore(backend, nBasis)
33 |
34 | // Create some test data, 100K random vectors
35 | vecs := bbq.NewRandVectorSet(100_000, dimensions, nil)
36 | datastore.AddVectorsWithOffset(0, vecs)
37 | /*
38 | Equivalent to:
39 | for i, v := range vecs {
40 | datastore.AddVector(bbq.ID(i), v)
41 | }
42 | */
43 |
44 | // Run a query
45 | targetVec := bbq.NewRandVector(dimensions, nil)
46 | results, _ := datastore.FindNearest(targetVec, 10, 1000, 1)
47 |
48 | // Inspect the results
49 | top := results.ToSlice()[0]
50 | vec, _ := backend.GetVector(top.ID)
51 | fmt.Println(top.ID, vec, top.Similarity)
52 | }
53 | ```
54 |
55 | ## Rust
56 |
57 | ```rust
58 | use bbqvec::IndexIDIterator;
59 |
60 | fn main() -> Result<()> {
61 | // Declare store parameters
62 | let dimensions = 200;
63 | let n_basis = 10;
64 |
65 | // Initialize the store
66 | let mem = bbqvec::MemoryBackend::new(dimensions, n_basis)?;
67 | let mut store = bbqvec::VectorStore::new(mem)?;
68 |
69 | // Create some test data, 100K random vectors
70 | let vecs = bbqvec::create_vector_set(dimensions, 100000);
71 | store.add_vector_iter(vecs.enumerate_ids())?;
72 |
73 | // Run a query
74 | let target = bbqvec::create_random_vector(dimensions);
75 | let results = store.find_nearest(&target, 10, 1000, 1)?;
76 |
77 | // Inspect the results
78 | for res in results.iter_results() {
79 | println!("{} {}", res.id, res.similarity)
80 | }
81 | }
82 |
83 | ```
84 |
85 | # TODOs
86 |
87 | We're still early; Go is the more tried-and-true and suits the beta use-case, but Rust is a good deal faster. We welcome contributions.
88 |
89 | ## Go
90 | - [ ] More benchmarks
91 | - [ ] New Quantizations
92 | - [ ] Hamming Distance (single-bit vectors)
93 | - [ ] Novel quantizations
94 | ## Rust
95 | - [ ] Finish disk backend to match Go (in progress, shortly)
96 | - [ ] New Quantizations
97 |
98 |
99 | ### Acknowledgements
100 | Thank you to MariaLetta for the [free-gophers-pack](https://github.com/MariaLetta/free-gophers-pack) and to [rustacean.net](https://rustacean.net) for the CC0 logo characters.
101 |
--------------------------------------------------------------------------------
/backend.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import (
4 | "errors"
5 |
6 | "github.com/RoaringBitmap/roaring"
7 | )
8 |
9 | type VectorBackend interface {
10 | PutVector(id ID, v Vector) error
11 | ComputeSimilarity(targetVector Vector, targetID ID) (float32, error)
12 | Info() BackendInfo
13 | Exists(id ID) bool
14 | Close() error
15 | }
16 |
17 | type scannableBackend interface {
18 | VectorBackend
19 | ForEachVector(func(ID) error) error
20 | }
21 |
22 | type VectorGetter[T any] interface {
23 | GetVector(id ID) (T, error)
24 | }
25 |
26 | type IndexBackend interface {
27 | SaveBases(bases []Basis, token uint64) (uint64, error)
28 | LoadBases() ([]Basis, error)
29 |
30 | SaveBitmap(basis int, index int, bitmap *roaring.Bitmap) error
31 | LoadBitmap(basis, index int) (*roaring.Bitmap, error)
32 | Sync() error
33 | }
34 |
35 | type BackendInfo struct {
36 | HasIndexData bool
37 | Dimensions int
38 | Quantization string
39 | }
40 |
41 | func FullTableScanSearch(be VectorBackend, target Vector, k int) (*ResultSet, error) {
42 | rs := NewResultSet(k)
43 | b, ok := be.(scannableBackend)
44 | if !ok {
45 | return nil, errors.New("Backend is incompatible")
46 | }
47 | err := b.ForEachVector(func(id ID) error {
48 | sim, err := b.ComputeSimilarity(target, id)
49 | if err != nil {
50 | return err
51 | }
52 | rs.AddResult(id, sim)
53 | return nil
54 | })
55 | return rs, err
56 | }
57 |
--------------------------------------------------------------------------------
/backend_disk.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import (
4 | "bytes"
5 | "encoding/binary"
6 | "encoding/hex"
7 | "encoding/json"
8 | "errors"
9 | "fmt"
10 | "io"
11 | "io/fs"
12 | "math"
13 | "math/rand"
14 | "os"
15 | "path/filepath"
16 |
17 | "github.com/RoaringBitmap/roaring"
18 | "github.com/barakmich/mmap-go"
19 | )
20 |
21 | const defaultVecsPerFile = 200000
22 |
23 | type DiskBackend[V any] struct {
24 | dir string
25 | metadata diskMetadata
26 | quantization Quantization[V]
27 | vectorPages map[int]mmap.MMap
28 | vectorFiles map[int]*os.File
29 | token uint64
30 | }
31 |
32 | type diskMetadata struct {
33 | Dimensions int `json:"dimensions"`
34 | Quantization string `json:"quantization"`
35 | VecsPerFile int `json:"vecs_per_file"`
36 | VecFiles []int `json:"vec_files"`
37 | }
38 |
39 | var _ IndexBackend = &DiskBackend[Vector]{}
40 |
41 | func NewDiskBackend[V any](directory string, dimensions int, quantization Quantization[V]) (*DiskBackend[V], error) {
42 | token := rand.Uint64()
43 | if token == 0 {
44 | token = 1
45 | }
46 | be := &DiskBackend[V]{
47 | dir: directory,
48 | metadata: diskMetadata{
49 | Dimensions: dimensions,
50 | Quantization: quantization.Name(),
51 | VecsPerFile: defaultVecsPerFile,
52 | },
53 | quantization: quantization,
54 | token: token,
55 | vectorPages: make(map[int]mmap.MMap),
56 | vectorFiles: make(map[int]*os.File),
57 | }
58 | err := be.openFiles()
59 | if err != nil {
60 | return nil, err
61 | }
62 | return be, nil
63 | }
64 |
65 | func (d *DiskBackend[V]) Close() error {
66 | err := d.Sync()
67 | if err != nil {
68 | return err
69 | }
70 | for _, mm := range d.vectorPages {
71 | err := mm.Unmap()
72 | if err != nil {
73 | return err
74 | }
75 | }
76 | for _, f := range d.vectorFiles {
77 | err := f.Close()
78 | if err != nil {
79 | return err
80 | }
81 | }
82 | return d.saveMetadata()
83 | }
84 |
85 | func (d *DiskBackend[V]) Sync() error {
86 | for _, mm := range d.vectorPages {
87 | err := mm.FlushAsync()
88 | if err != nil {
89 | return err
90 | }
91 | }
92 | return nil
93 | }
94 |
95 | func (d *DiskBackend[V]) openFiles() error {
96 | _, err := os.Stat(d.dir)
97 | if errors.Is(err, fs.ErrNotExist) {
98 | return d.createNew()
99 | } else if err != nil {
100 | return err
101 | }
102 |
103 | _, err = os.Stat(filepath.Join(d.dir, "metadata.json"))
104 | if errors.Is(err, fs.ErrNotExist) {
105 | return d.createNew()
106 | } else if err != nil {
107 | return err
108 | }
109 |
110 | f, err := os.Open(filepath.Join(d.dir, "metadata.json"))
111 | if err != nil {
112 | return err
113 | }
114 | defer f.Close()
115 | err = json.NewDecoder(f).Decode(&d.metadata)
116 | if err != nil {
117 | return err
118 | }
119 |
120 | for _, k := range d.metadata.VecFiles {
121 | f, err := os.OpenFile(mkPageFilepath(d.dir, k), os.O_RDWR, 0755)
122 | if err != nil {
123 | return err
124 | }
125 | d.vectorFiles[k] = f
126 | mm, err := mmap.Map(f, mmap.RDWR, 0)
127 | if err != nil {
128 | return err
129 | }
130 | d.vectorPages[k] = mm
131 | }
132 | return nil
133 | }
134 |
135 | func (d *DiskBackend[V]) createNew() error {
136 | err := os.MkdirAll(d.dir, 0755)
137 | if err != nil {
138 | return err
139 | }
140 | return d.saveMetadata()
141 | }
142 |
143 | func (d *DiskBackend[V]) saveMetadata() error {
144 | f, err := os.Create(filepath.Join(d.dir, "metadata.json"))
145 | if err != nil {
146 | return err
147 | }
148 | defer f.Close()
149 | err = json.NewEncoder(f).Encode(d.metadata)
150 | if err != nil {
151 | return err
152 | }
153 | return nil
154 | }
155 |
156 | func (d *DiskBackend[V]) PutVector(id ID, v Vector) error {
157 | var err error
158 | key := int(id) / d.metadata.VecsPerFile
159 | off := int(id) % d.metadata.VecsPerFile
160 | page, ok := d.vectorPages[key]
161 | if !ok {
162 | page, err = d.createPage(key)
163 | if err != nil {
164 | return err
165 | }
166 | }
167 | size := d.quantization.LowerSize(d.metadata.Dimensions)
168 | l, err := d.quantization.Lower(v)
169 | if err != nil {
170 | return err
171 | }
172 | slice := page[off*size : (off+1)*size]
173 | return d.quantization.Marshal(slice, l)
174 | }
175 |
176 | func (d *DiskBackend[V]) createPage(key int) (mmap.MMap, error) {
177 | f, err := os.Create(mkPageFilepath(d.dir, key))
178 | if err != nil {
179 | return nil, err
180 | }
181 | vecsize := d.quantization.LowerSize(d.metadata.Dimensions)
182 | err = f.Truncate(int64(vecsize * d.metadata.VecsPerFile))
183 | if err != nil {
184 | return nil, err
185 | }
186 | d.vectorFiles[key] = f
187 | mm, err := mmap.Map(f, mmap.RDWR, 0)
188 | if err != nil {
189 | return nil, err
190 | }
191 | d.vectorPages[key] = mm
192 | d.metadata.VecFiles = append(d.metadata.VecFiles, key)
193 | err = d.saveMetadata()
194 | if err != nil {
195 | return nil, err
196 | }
197 | return mm, nil
198 | }
199 |
200 | func (d *DiskBackend[V]) ComputeSimilarity(targetVector Vector, targetID ID) (float32, error) {
201 | v, err := d.quantization.Lower(targetVector)
202 | if err != nil {
203 | return 0, err
204 | }
205 | target, err := d.GetVector(targetID)
206 | if err != nil {
207 | return 0, err
208 | }
209 | return d.quantization.Similarity(target, v), nil
210 | }
211 |
212 | func (d *DiskBackend[V]) Info() BackendInfo {
213 | exists := true
214 | if _, err := os.Stat(filepath.Join(d.dir, "bases")); errors.Is(err, os.ErrNotExist) {
215 | exists = false
216 | }
217 |
218 | return BackendInfo{
219 | HasIndexData: exists,
220 | Dimensions: d.metadata.Dimensions,
221 | Quantization: d.quantization.Name(),
222 | }
223 | }
224 |
225 | func (d *DiskBackend[V]) Exists(id ID) bool {
226 | key := int(id) / d.metadata.VecsPerFile
227 | off := int(id) % d.metadata.VecsPerFile
228 | page, ok := d.vectorPages[key]
229 | if !ok {
230 | return false
231 | }
232 | size := d.quantization.LowerSize(d.metadata.Dimensions)
233 | slice := page[off*size : (off+1)*size]
234 | for _, x := range slice {
235 | if x != 0x0 {
236 | return true
237 | }
238 | }
239 | return false
240 | }
241 |
242 | func (d *DiskBackend[V]) GetVector(id ID) (v V, err error) {
243 | key := int(id) / d.metadata.VecsPerFile
244 | off := int(id) % d.metadata.VecsPerFile
245 | page, ok := d.vectorPages[key]
246 | if !ok {
247 | err = ErrIDNotFound
248 | return
249 | }
250 | size := d.quantization.LowerSize(d.metadata.Dimensions)
251 | slice := page[off*size : (off+1)*size]
252 | return d.quantization.Unmarshal(slice)
253 | }
254 |
255 | func (d *DiskBackend[V]) SaveBases(bases []Basis, token uint64) (uint64, error) {
256 | if token == d.token {
257 | return d.token, nil
258 | }
259 | nbuf := make([]byte, 4)
260 | buf := bytes.NewBuffer(nil)
261 | for _, b := range bases {
262 | for _, v := range b {
263 | for _, s := range v {
264 | binary.LittleEndian.PutUint32(nbuf, math.Float32bits(s))
265 | buf.Write(nbuf)
266 | }
267 | }
268 | }
269 | f, err := os.Create(filepath.Join(d.dir, "bases"))
270 | if err != nil {
271 | return 0, err
272 | }
273 | defer f.Close()
274 | _, err = io.Copy(f, buf)
275 | return d.token, err
276 | }
277 |
278 | func (d *DiskBackend[V]) LoadBases() ([]Basis, error) {
279 | f, err := os.Open(filepath.Join(d.dir, "bases"))
280 | if err != nil {
281 | return nil, err
282 | }
283 | var out []Basis
284 | var basis Basis
285 | var vec Vector
286 | buf := make([]byte, 4)
287 | for {
288 | _, err = f.Read(buf)
289 | if errors.Is(err, io.EOF) {
290 | break
291 | }
292 | entry := math.Float32frombits(binary.LittleEndian.Uint32(buf))
293 | vec = append(vec, entry)
294 | if len(vec) == d.metadata.Dimensions {
295 | basis = append(basis, vec)
296 | vec = nil
297 | if len(basis) == d.metadata.Dimensions {
298 | out = append(out, basis)
299 | basis = nil
300 | }
301 | }
302 | }
303 | return out, nil
304 | }
305 |
306 | func (d *DiskBackend[V]) SaveBitmap(basis int, index int, bitmap *roaring.Bitmap) error {
307 | path := mkBmapFilepath(d.dir, basis, index)
308 | f, err := os.Create(path)
309 | if err != nil {
310 | return err
311 | }
312 | defer f.Close()
313 | _, err = bitmap.WriteTo(f)
314 | return err
315 | }
316 |
317 | func (d *DiskBackend[V]) LoadBitmap(basis int, index int) (*roaring.Bitmap, error) {
318 | f, err := os.Open(mkBmapFilepath(d.dir, basis, index))
319 | if err != nil {
320 | if errors.Is(err, os.ErrNotExist) {
321 | return nil, nil
322 | }
323 | return nil, err
324 | }
325 | bm := roaring.NewBitmap()
326 | _, err = bm.ReadFrom(f)
327 | return bm, err
328 | }
329 |
330 | func mkPageFilepath(basedir string, key int) string {
331 | buf := make([]byte, 8)
332 | binary.BigEndian.PutUint64(buf, uint64(key))
333 | indexStr := hex.EncodeToString(buf)
334 | return filepath.Join(basedir, fmt.Sprintf("%s.vec", indexStr))
335 | }
336 |
337 | func mkBmapFilepath(basedir string, basis int, index int) string {
338 | buf := make([]byte, 4)
339 | binary.BigEndian.PutUint16(buf, uint16(basis))
340 | basisStr := hex.EncodeToString(buf[:2])
341 | binary.BigEndian.PutUint32(buf, uint32(index))
342 | indexStr := hex.EncodeToString(buf[:4])
343 | return filepath.Join(basedir, fmt.Sprintf("%s-%s.bmap", basisStr, indexStr))
344 | }
345 |
--------------------------------------------------------------------------------
/backend_memory.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import (
4 | "errors"
5 | "math/rand"
6 | "time"
7 | )
8 |
9 | type MemoryBackend struct {
10 | vecs []Vector
11 | rng *rand.Rand
12 | dim int
13 | }
14 |
15 | var _ scannableBackend = &MemoryBackend{}
16 | var _ VectorGetter[Vector] = &MemoryBackend{}
17 |
18 | func NewMemoryBackend(dimensions int) *MemoryBackend {
19 | return &MemoryBackend{
20 | rng: rand.New(rand.NewSource(time.Now().UnixMicro())),
21 | dim: dimensions,
22 | }
23 | }
24 |
25 | func (mem *MemoryBackend) Close() error {
26 | return nil
27 | }
28 |
29 | func (mem *MemoryBackend) PutVector(id ID, vector Vector) error {
30 | if len(vector) != mem.dim {
31 | return errors.New("MemoryBackend: vector dimension doesn't match")
32 | }
33 |
34 | if int(id) < len(mem.vecs) {
35 | mem.vecs[int(id)] = vector
36 | } else if int(id) == len(mem.vecs) {
37 | mem.vecs = append(mem.vecs, vector)
38 | } else {
39 | mem.grow(int(id))
40 | mem.vecs[int(id)] = vector
41 | }
42 | return nil
43 | }
44 |
45 | func (mem *MemoryBackend) grow(to int) {
46 | diff := (to - len(mem.vecs)) + 1
47 | mem.vecs = append(mem.vecs, make([]Vector, diff)...)
48 | }
49 |
50 | func (mem *MemoryBackend) ComputeSimilarity(vector Vector, targetID ID) (float32, error) {
51 | target, err := mem.GetVector(targetID)
52 | if err != nil {
53 | return 0, err
54 | }
55 | return target.CosineSimilarity(vector), nil
56 | }
57 |
58 | func (mem *MemoryBackend) Info() BackendInfo {
59 | return BackendInfo{
60 | HasIndexData: false,
61 | Dimensions: mem.dim,
62 | }
63 | }
64 |
65 | func (mem *MemoryBackend) GetVector(id ID) (Vector, error) {
66 | if int(id) > len(mem.vecs)-1 {
67 | return nil, ErrIDNotFound
68 | }
69 | if mem.vecs[int(id)] == nil {
70 | return nil, ErrIDNotFound
71 | }
72 | return mem.vecs[int(id)], nil
73 | }
74 |
75 | func (mem *MemoryBackend) Exists(id ID) bool {
76 | i := int(id)
77 | if len(mem.vecs) <= i {
78 | return false
79 | }
80 | return mem.vecs[i] != nil
81 | }
82 |
83 | func (mem *MemoryBackend) ForEachVector(cb func(ID) error) error {
84 | for i, v := range mem.vecs {
85 | if v == nil {
86 | continue
87 | }
88 | err := cb(ID(i))
89 | if err != nil {
90 | return err
91 | }
92 | }
93 | return nil
94 | }
95 |
--------------------------------------------------------------------------------
/backend_quantized_memory.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import (
4 | "errors"
5 | "math/rand"
6 | "time"
7 | )
8 |
9 | type QuantizedMemoryBackend[V any, Q Quantization[V]] struct {
10 | vecs []*V
11 | rng *rand.Rand
12 | dim int
13 | quantization Q
14 | }
15 |
16 | var _ scannableBackend = &QuantizedMemoryBackend[Vector, NoQuantization]{}
17 | var _ VectorGetter[Vector] = &QuantizedMemoryBackend[Vector, NoQuantization]{}
18 |
19 | func NewQuantizedMemoryBackend[V any, Q Quantization[V]](dimensions int, quantization Q) *QuantizedMemoryBackend[V, Q] {
20 | return &QuantizedMemoryBackend[V, Q]{
21 | rng: rand.New(rand.NewSource(time.Now().UnixMicro())),
22 | dim: dimensions,
23 | quantization: quantization,
24 | }
25 | }
26 |
27 | func (q *QuantizedMemoryBackend[V, Q]) Close() error {
28 | return nil
29 | }
30 |
31 | func (q *QuantizedMemoryBackend[V, Q]) PutVector(id ID, vector Vector) error {
32 | if len(vector) != q.dim {
33 | return errors.New("QuantizedMemoryBackend: vector dimension doesn't match")
34 | }
35 |
36 | v, err := q.quantization.Lower(vector)
37 | if err != nil {
38 | return err
39 | }
40 |
41 | if int(id) < len(q.vecs) {
42 | q.vecs[int(id)] = &v
43 | } else if int(id) == len(q.vecs) {
44 | q.vecs = append(q.vecs, &v)
45 | } else {
46 | q.grow(int(id))
47 | q.vecs[int(id)] = &v
48 | }
49 | return nil
50 | }
51 |
52 | func (q *QuantizedMemoryBackend[V, Q]) grow(to int) {
53 | diff := (to - len(q.vecs)) + 1
54 | q.vecs = append(q.vecs, make([]*V, diff)...)
55 | }
56 |
57 | func (q *QuantizedMemoryBackend[V, Q]) ComputeSimilarity(vector Vector, targetID ID) (float32, error) {
58 | v, err := q.quantization.Lower(vector)
59 | if err != nil {
60 | return 0, err
61 | }
62 | target, err := q.GetVector(targetID)
63 | if err != nil {
64 | return 0, err
65 | }
66 | return q.quantization.Similarity(target, v), nil
67 | }
68 |
69 | func (q *QuantizedMemoryBackend[V, Q]) Info() BackendInfo {
70 | return BackendInfo{
71 | HasIndexData: false,
72 | Dimensions: q.dim,
73 | }
74 | }
75 |
76 | func (q *QuantizedMemoryBackend[V, Q]) Exists(id ID) bool {
77 | i := int(id)
78 | if len(q.vecs) <= i {
79 | return false
80 | }
81 | return q.vecs[i] != nil
82 | }
83 |
84 | func (q *QuantizedMemoryBackend[V, Q]) GetVector(id ID) (v V, err error) {
85 | if int(id) > len(q.vecs)-1 {
86 | err = ErrIDNotFound
87 | return
88 | }
89 | if q.vecs[int(id)] == nil {
90 | err = ErrIDNotFound
91 | return
92 | }
93 | return *q.vecs[int(id)], nil
94 | }
95 |
96 | func (q *QuantizedMemoryBackend[V, Q]) ForEachVector(cb func(ID) error) error {
97 | for i, v := range q.vecs {
98 | if v == nil {
99 | continue
100 | }
101 | err := cb(ID(i))
102 | if err != nil {
103 | return err
104 | }
105 | }
106 | return nil
107 | }
108 |
--------------------------------------------------------------------------------
/cmd/run-ann-benchmark/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/csv"
5 | "errors"
6 | "flag"
7 | "fmt"
8 | "io"
9 | "log"
10 | "math/rand"
11 | "os"
12 | "path/filepath"
13 | "runtime/pprof"
14 | "strconv"
15 | "sync"
16 | "sync/atomic"
17 | "time"
18 |
19 | bbq "github.com/barakmich/bbqvec"
20 | )
21 |
22 | var (
23 | k = flag.Int("k", 10, "K top results")
24 | path = flag.String("path", "", "Path to CSVs")
25 | bases = flag.Int("bases", 20, "Basis sets")
26 | spill = flag.Int("spill", 10, "Spill")
27 | searchK = flag.Int("searchk", 10000, "Search K")
28 | parallelism = flag.Int("parallel", 20, "Parallel queries")
29 | cpuprofile = flag.String("cpuprof", "", "CPU Profile file")
30 | )
31 |
32 | func main() {
33 | flag.Parse()
34 | if *path == "" {
35 | log.Fatal("Path is required")
36 | }
37 | if *cpuprofile != "" {
38 | f, err := os.Create(*cpuprofile)
39 | if err != nil {
40 | log.Fatal("could not create CPU profile: ", err)
41 | }
42 | defer f.Close() // error handling omitted for example
43 | if err := pprof.StartCPUProfile(f); err != nil {
44 | log.Fatal("could not start CPU profile: ", err)
45 | }
46 | defer pprof.StopCPUProfile()
47 | }
48 | trainf, err := os.Open(filepath.Join(*path, "train.csv"))
49 | if err != nil {
50 | log.Fatal(err)
51 | }
52 | defer trainf.Close()
53 | testf, err := os.Open(filepath.Join(*path, "test.csv"))
54 | if err != nil {
55 | log.Fatal(err)
56 | }
57 | defer testf.Close()
58 | log.Println("Loading Train")
59 | train := loadVecs(trainf)
60 | log.Println("Train has", len(train))
61 | log.Println("Loading Test")
62 | test := loadVecs(testf)
63 | log.Println("Test has", len(test))
64 |
65 | log.Println("Loading true neighbors")
66 |
67 | neighborf, err := os.Open(filepath.Join(*path, "neighbors.csv"))
68 | if err != nil {
69 | log.Fatal(err)
70 | }
71 | defer neighborf.Close()
72 | trueres := loadRes(neighborf)
73 |
74 | // Now the fun begins
75 | dim := len(train[0])
76 | log.Println("Loading into memory")
77 | be := bbq.NewMemoryBackend(dim)
78 | //be := bbq.NewQuantizedMemoryBackend(dim, bbq.Float16Quantization{})
79 | store, err := bbq.NewVectorStore(be, *bases)
80 | if err != nil {
81 | log.Fatal(err)
82 | }
83 |
84 | start := time.Now()
85 | store.AddVectorsWithOffset(0, train)
86 | store.SetLogger(log.Printf)
87 | log.Printf("Built store in %v", time.Since(start))
88 |
89 | for i := 0; i < 10; i++ {
90 | spot := rand.Intn(len(trueres))
91 | fts, _ := bbq.FullTableScanSearch(be, test[spot], 100)
92 | ftsrec := fts.ComputeRecall(trueres[spot], 100)
93 | if ftsrec < 0.98 {
94 | log.Fatal("Error")
95 | }
96 | }
97 | log.Printf("FullTableScan data spot check done")
98 | res := make([]*bbq.ResultSet, len(test))
99 | var finished atomic.Uint32
100 | var wg sync.WaitGroup
101 | ch := make(chan pair)
102 | for i := 0; i < *parallelism; i++ {
103 | go func() {
104 | for p := range ch {
105 | res[p.id], err = store.FindNearest(p.vec, *k, *searchK, *spill)
106 | v := finished.Add(1)
107 | if v%1000 == 0 {
108 | log.Printf("Search finished %d", v)
109 | }
110 | }
111 | wg.Done()
112 | }()
113 | wg.Add(1)
114 | }
115 | start = time.Now()
116 | for i, v := range test {
117 | ch <- pair{i, v}
118 | }
119 | close(ch)
120 | wg.Wait()
121 | delta := time.Since(start)
122 | qps := float64(len(test)) / delta.Seconds()
123 | totalrecall := 0.0 // ...what if this is a dream?
124 | for i := range res {
125 | totalrecall += res[i].ComputeRecall(trueres[i], 10)
126 | }
127 | recall := totalrecall / float64(len(res))
128 | fmt.Printf("%0.4f,%0.4f", recall, qps)
129 | }
130 |
131 | func loadVecs(f *os.File) []bbq.Vector {
132 | c := csv.NewReader(f)
133 | c.ReuseRecord = true
134 | out := make([]bbq.Vector, 0, 100000)
135 | for {
136 | rec, err := c.Read()
137 | if errors.Is(err, io.EOF) {
138 | break
139 | }
140 | if err != nil {
141 | log.Fatal(err)
142 | }
143 | v := make([]float32, len(rec))
144 | for i, st := range rec {
145 | x, err := strconv.ParseFloat(st, 32)
146 | if err != nil {
147 | log.Fatal(err)
148 | }
149 | v[i] = float32(x)
150 | }
151 | out = append(out, v)
152 | }
153 | return out
154 | }
155 |
156 | func loadRes(f *os.File) []*bbq.ResultSet {
157 | var out []*bbq.ResultSet
158 | c := csv.NewReader(f)
159 | c.ReuseRecord = true
160 | for {
161 | rec, err := c.Read()
162 | if errors.Is(err, io.EOF) {
163 | break
164 | }
165 | if err != nil {
166 | log.Fatal(err)
167 | }
168 | rs := bbq.NewResultSet(100)
169 | for i, st := range rec {
170 | x, err := strconv.Atoi(st)
171 | if err != nil {
172 | log.Fatal(err)
173 | }
174 | rs.AddResult(bbq.ID(x), float32(150-i))
175 | }
176 | out = append(out, rs)
177 | }
178 | return out
179 | }
180 |
181 | type pair struct {
182 | id int
183 | vec bbq.Vector
184 | }
185 |
--------------------------------------------------------------------------------
/counting_bitmap.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/RoaringBitmap/roaring"
7 | )
8 |
9 | type CountingBitmap struct {
10 | bms []*roaring.Bitmap
11 | }
12 |
13 | func NewCountingBitmap(maxCount int) *CountingBitmap {
14 | return &CountingBitmap{
15 | bms: make([]*roaring.Bitmap, maxCount),
16 | }
17 | }
18 |
19 | func (c *CountingBitmap) cardinalities() []int {
20 | cards := make([]int, len(c.bms))
21 | for i, it := range c.bms {
22 | if it == nil {
23 | cards[i] = 0
24 | } else {
25 | cards[i] = int(it.GetCardinality())
26 | }
27 | }
28 | return cards
29 | }
30 |
31 | func (c *CountingBitmap) String() string {
32 | return fmt.Sprint(c.cardinalities())
33 | }
34 |
35 | func (c *CountingBitmap) Or(in *roaring.Bitmap) {
36 | cur := in
37 | for i := 0; i < len(c.bms); i++ {
38 | if c.bms[i] == nil {
39 | c.bms[i] = roaring.NewBitmap()
40 | }
41 | c.bms[i].Xor(cur)
42 | cur.AndNot(c.bms[i])
43 | c.bms[i].Or(cur)
44 | if cur.GetCardinality() == 0 {
45 | break
46 | }
47 | }
48 | }
49 |
50 | // TopK may return more things than intended
51 | func (c *CountingBitmap) TopK(k int) *roaring.Bitmap {
52 | for i := len(c.bms) - 1; i >= 0; i-- {
53 | if c.bms[i] == nil {
54 | continue
55 | }
56 | if i != 0 && int(c.bms[i].GetCardinality()) < k {
57 | continue
58 | }
59 | return c.bms[i]
60 | }
61 | return nil
62 | }
63 |
--------------------------------------------------------------------------------
/disk_test.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | func TestDiskBackend(t *testing.T) {
8 | testDiskBackendQuantization(t, NoQuantization{})
9 | }
10 |
11 | func TestDiskBackendF16(t *testing.T) {
12 | testDiskBackendQuantization(t, Float16Quantization{})
13 | }
14 |
15 | func testDiskBackendQuantization[L any](t *testing.T, q Quantization[L]) {
16 | vecs := NewRandVectorSet(*nVectors, *dim, nil)
17 |
18 | mem := NewMemoryBackend(*dim)
19 |
20 | dir := t.TempDir()
21 | t.Log("TempDir:", dir)
22 | be, err := NewDiskBackend(dir, *dim, q)
23 | if err != nil {
24 | t.Fatal(err)
25 | }
26 | store, err := NewVectorStore(be, *nBasis)
27 | if err != nil {
28 | t.Fatal(err)
29 | }
30 |
31 | for i, v := range vecs {
32 | err := mem.PutVector(ID(i), v)
33 | if err != nil {
34 | t.Fatal("error mem put", err)
35 | }
36 | err = store.AddVector(ID(i), v)
37 | if err != nil {
38 | t.Fatal("error store put", err)
39 | }
40 | if i%10000 == 0 {
41 | t.Log("Wrote", i)
42 | }
43 | }
44 | err = store.Sync()
45 | if err != nil {
46 | t.Fatal(err)
47 | }
48 |
49 | err = store.Close()
50 | if err != nil {
51 | t.Fatal(err)
52 | }
53 |
54 | t.Log("Reopening")
55 | // Reopen
56 |
57 | be, err = NewDiskBackend(dir, *dim, q)
58 | if err != nil {
59 | t.Fatal("Couldn't open disk backend", err)
60 | }
61 | store, err = NewVectorStore(be, *nBasis)
62 | if err != nil {
63 | t.Fatal("Couldn't open vector store", err)
64 | }
65 |
66 | targetvecs := NewRandVectorSet(*testvecs, *dim, nil)
67 | for _, v := range targetvecs {
68 | fts, err := FullTableScanSearch(mem, v, 20)
69 | fts.Len()
70 | if err != nil {
71 | t.Fatal(err)
72 | }
73 | }
74 |
75 | }
76 |
--------------------------------------------------------------------------------
/emperical_test.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import (
4 | "math"
5 | "testing"
6 | )
7 |
8 | func TestEmpericalCountBitmapConstant(t *testing.T) {
9 | vecs := NewRandVectorSet(*nVectors, *dim, nil)
10 |
11 | be := NewMemoryBackend(*dim)
12 | store, err := NewVectorStore(be, *nBasis)
13 | if err != nil {
14 | t.Fatal(err)
15 | }
16 |
17 | for i, v := range vecs {
18 | store.AddVector(ID(i), v)
19 | }
20 |
21 | count := 0
22 | n := 0
23 | for _, basisbms := range store.bms {
24 | for _, bm := range basisbms {
25 | count += int(bm.GetCardinality())
26 | n += 1
27 | }
28 | }
29 | t.Logf("Expected avg bitmap count: %0.2f", float64(len(vecs))/float64(2**dim))
30 | t.Logf("Average bitmap count: %0.2f", float64(count)/float64(n))
31 | // now we get into the weeds
32 | buf := make([]float32, store.dimensions)
33 | maxes := make([]int, 1)
34 | target := NewRandVector(*dim, nil)
35 | counts := NewCountingBitmap(*nBasis)
36 | for i, basis := range store.bases {
37 | store.findIndexesForBasis(target, basis, buf, maxes)
38 | for _, m := range maxes {
39 | if v, ok := store.bms[i][m]; ok {
40 | counts.Or(v)
41 | }
42 | }
43 | printPredicted(i+1, t)
44 | t.Logf("got %#v", counts.cardinalities())
45 | }
46 | }
47 |
48 | const k = 0.83
49 |
50 | func printPredicted(i int, t *testing.T) {
51 | f := make([]float64, i)
52 | for j := 0; j < i; j++ {
53 | f[j] = (math.Pow(float64(i), (k*float64(j))+1.0) * float64(*nVectors)) / math.Pow(float64(2**dim), float64(j+1))
54 | }
55 | strs := make([]int, i)
56 | for i, g := range f {
57 | strs[i] = int(g)
58 | }
59 | t.Logf("exp %#v", strs)
60 | }
61 |
--------------------------------------------------------------------------------
/errors.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import "errors"
4 |
5 | var (
6 | ErrAlreadyBuilt = errors.New("Already built the index")
7 | ErrIDNotFound = errors.New("ID not found")
8 | )
9 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/barakmich/bbqvec
2 |
3 | go 1.22.3
4 |
5 | require (
6 | github.com/RoaringBitmap/roaring v1.9.3
7 | github.com/barakmich/mmap-go v0.0.0-20240708014031-bf5d4a307f6e
8 | github.com/kelindar/bitmap v1.5.2
9 | github.com/viterin/vek v0.4.2
10 | github.com/x448/float16 v0.8.4
11 | )
12 |
13 | require (
14 | github.com/bits-and-blooms/bitset v1.13.0 // indirect
15 | github.com/chewxy/math32 v1.10.1 // indirect
16 | github.com/kelindar/simd v1.1.2 // indirect
17 | github.com/klauspost/cpuid/v2 v2.2.7 // indirect
18 | github.com/mschoch/smat v0.2.0 // indirect
19 | github.com/viterin/partial v1.1.0 // indirect
20 | golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
21 | golang.org/x/sys v0.20.0 // indirect
22 | )
23 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/RoaringBitmap/roaring v1.9.3 h1:t4EbC5qQwnisr5PrP9nt0IRhRTb9gMUgQF4t4S2OByM=
2 | github.com/RoaringBitmap/roaring v1.9.3/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
3 | github.com/barakmich/mmap-go v0.0.0-20240708014031-bf5d4a307f6e h1:IKlgXkbV1ppIoCGl5o+aUSjwrMpfNMNDL0eX2hHCbsw=
4 | github.com/barakmich/mmap-go v0.0.0-20240708014031-bf5d4a307f6e/go.mod h1:QuO3A7CjHPLyaUVpqDTZznW6WhzCxm4SGthyaWRrd5s=
5 | github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
6 | github.com/bits-and-blooms/bitset v1.13.0 h1:bAQ9OPNFYbGHV6Nez0tmNI0RiEu7/hxlYJRUA0wFAVE=
7 | github.com/bits-and-blooms/bitset v1.13.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
8 | github.com/chewxy/math32 v1.10.1 h1:LFpeY0SLJXeaiej/eIp2L40VYfscTvKh/FSEZ68uMkU=
9 | github.com/chewxy/math32 v1.10.1/go.mod h1:dOB2rcuFrCn6UHrze36WSLVPKtzPMRAQvBvUwkSsLqs=
10 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
11 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
12 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
13 | github.com/kelindar/bitmap v1.5.2 h1:XwX7CTvJtetQZ64zrOkApoZZHBJRkjE23NfqUALA/HE=
14 | github.com/kelindar/bitmap v1.5.2/go.mod h1:j3qZjxH9s4OtvsnFTP2bmPkjqil9Y2xQlxPYHexasEA=
15 | github.com/kelindar/simd v1.1.2 h1:KduKb+M9cMY2HIH8S/cdJyD+5n5EGgq+Aeeleos55To=
16 | github.com/kelindar/simd v1.1.2/go.mod h1:inq4DFudC7W8L5fhxoeZflLRNpWSs0GNx6MlWFvuvr0=
17 | github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
18 | github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
19 | github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
20 | github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
21 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
22 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
23 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
24 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
25 | github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8=
26 | github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
27 | github.com/viterin/partial v1.1.0 h1:iH1l1xqBlapXsYzADS1dcbizg3iQUKTU1rbwkHv/80E=
28 | github.com/viterin/partial v1.1.0/go.mod h1:oKGAo7/wylWkJTLrWX8n+f4aDPtQMQ6VG4dd2qur5QA=
29 | github.com/viterin/vek v0.4.2 h1:Vyv04UjQT6gcjEFX82AS9ocgNbAJqsHviheIBdPlv5U=
30 | github.com/viterin/vek v0.4.2/go.mod h1:A4JRAe8OvbhdzBL5ofzjBS0J29FyUrf95tQogvtHHUc=
31 | github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
32 | github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
33 | golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM=
34 | golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc=
35 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
36 | golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
37 | golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
38 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
39 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
40 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
41 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
42 |
--------------------------------------------------------------------------------
/integration_test.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | func TestBasic(t *testing.T) {
8 | dim := 256
9 | nBasis := 20
10 | k := 20
11 | searchk := 200
12 |
13 | vecs := NewRandVectorSet(100000, dim, nil)
14 |
15 | be := NewMemoryBackend(dim)
16 | store, err := NewVectorStore(be, nBasis, WithPrespill(2))
17 | if err != nil {
18 | t.Fatal(err)
19 | }
20 |
21 | for i, v := range vecs {
22 | store.AddVector(ID(i), v)
23 | }
24 |
25 | store.SetLogger(t.Logf)
26 |
27 | target := NewRandVector(dim, nil)
28 | indexNearest, err := store.FindNearest(target, k, searchk, 4)
29 | if err != nil {
30 | t.Fatal(err)
31 | }
32 | t.Log(indexNearest)
33 | ftsNearest, err := FullTableScanSearch(be, target, k)
34 | t.Log(ftsNearest)
35 | recall := indexNearest.ComputeRecall(ftsNearest, k)
36 | t.Log("Recall: ", recall)
37 | }
38 |
--------------------------------------------------------------------------------
/micro_test.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import (
4 | "math/rand"
5 | "testing"
6 |
7 | "github.com/RoaringBitmap/roaring"
8 | "github.com/kelindar/bitmap"
9 | "github.com/viterin/vek/vek32"
10 | )
11 |
12 | func BenchmarkMicroDot(b *testing.B) {
13 | v := NewRandVector(100, nil)
14 | n := NewRandVector(100, nil)
15 | for i := 0; i < b.N; i++ {
16 | vek32.Dot(v, n)
17 | }
18 | }
19 |
20 | func BenchmarkMicroRoaring(b *testing.B) {
21 | x := roaring.NewBitmap()
22 | y := roaring.NewBitmap()
23 | for range 20000 {
24 | x.AddInt(rand.Intn(2000000))
25 | y.AddInt(rand.Intn(2000000))
26 | }
27 | b.ResetTimer()
28 | for i := 0; i < b.N; i++ {
29 | roaring.Or(x, y)
30 | }
31 | }
32 |
33 | func BenchmarkMicroBitmap(b *testing.B) {
34 | var x bitmap.Bitmap
35 | var y bitmap.Bitmap
36 | for range 20000 {
37 | x.Set(uint32(rand.Intn(2000000)))
38 | y.Set(uint32(rand.Intn(2000000)))
39 | }
40 | b.ResetTimer()
41 | for i := 0; i < b.N; i++ {
42 | var z bitmap.Bitmap
43 | z.Or(x)
44 | z.Or(y)
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/quantization.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import (
4 | "encoding/binary"
5 | "math"
6 |
7 | "github.com/viterin/vek/vek32"
8 | )
9 |
10 | type Quantization[L any] interface {
11 | Similarity(x, y L) float32
12 | Lower(v Vector) (L, error)
13 | Marshal(to []byte, lower L) error
14 | Unmarshal(data []byte) (L, error)
15 | Name() string
16 | LowerSize(dim int) int
17 | }
18 |
19 | var _ Quantization[Vector] = NoQuantization{}
20 |
21 | type NoQuantization struct{}
22 |
23 | func (q NoQuantization) Similarity(x, y Vector) float32 {
24 | return vek32.CosineSimilarity(x, y)
25 | }
26 |
27 | func (q NoQuantization) Lower(v Vector) (Vector, error) {
28 | return v, nil
29 | }
30 |
31 | func (q NoQuantization) Marshal(to []byte, lower Vector) error {
32 | for i, n := range lower {
33 | u := math.Float32bits(n)
34 | binary.LittleEndian.PutUint32(to[i*4:], u)
35 | }
36 | return nil
37 | }
38 |
39 | func (q NoQuantization) Unmarshal(data []byte) (Vector, error) {
40 | out := make([]float32, len(data)>>2)
41 | for i := 0; i < len(data); i += 4 {
42 | bits := binary.LittleEndian.Uint32(data[i:])
43 | out[i>>2] = math.Float32frombits(bits)
44 | }
45 | return out, nil
46 | }
47 |
48 | func (q NoQuantization) Name() string {
49 | return "none"
50 | }
51 |
52 | func (q NoQuantization) LowerSize(dim int) int {
53 | return 4 * dim
54 | }
55 |
--------------------------------------------------------------------------------
/quantization_f16.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import (
4 | "encoding/binary"
5 |
6 | "github.com/viterin/vek/vek32"
7 | "github.com/x448/float16"
8 | )
9 |
10 | type float16Vec []float16.Float16
11 |
12 | var _ Quantization[float16Vec] = Float16Quantization{}
13 |
14 | type Float16Quantization struct {
15 | bufx, bufy Vector
16 | }
17 |
18 | func (q Float16Quantization) Similarity(x, y float16Vec) float32 {
19 | if q.bufx == nil {
20 | q.bufx = make(Vector, len(x))
21 | q.bufy = make(Vector, len(x))
22 | }
23 | for i := range x {
24 | q.bufx[i] = x[i].Float32()
25 | q.bufy[i] = y[i].Float32()
26 | }
27 | return vek32.CosineSimilarity(q.bufx, q.bufy)
28 | }
29 |
30 | func (q Float16Quantization) Lower(v Vector) (float16Vec, error) {
31 | out := make(float16Vec, len(v))
32 | for i, x := range v {
33 | out[i] = float16.Fromfloat32(x)
34 | }
35 | return out, nil
36 | }
37 |
38 | func (q Float16Quantization) Marshal(to []byte, lower float16Vec) error {
39 | for i, n := range lower {
40 | u := n.Bits()
41 | binary.LittleEndian.PutUint16(to[i*2:], u)
42 | }
43 | return nil
44 | }
45 |
46 | func (q Float16Quantization) Unmarshal(data []byte) (float16Vec, error) {
47 | out := make(float16Vec, len(data)>>1)
48 | for i := 0; i < len(data); i += 4 {
49 | bits := binary.LittleEndian.Uint16(data[i:])
50 | out[i>>1] = float16.Frombits(bits)
51 | }
52 | return out, nil
53 | }
54 |
55 | func (q Float16Quantization) Name() string {
56 | return "float16"
57 | }
58 |
59 | func (q Float16Quantization) LowerSize(dim int) int {
60 | return 2 * dim
61 | }
62 |
--------------------------------------------------------------------------------
/quantization_f16_test.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import "testing"
4 |
5 | func TestFloat16Quantization(t *testing.T) {
6 | vecs := NewRandVectorSet(1000, *dim, nil)
7 | mem := NewMemoryBackend(*dim)
8 | quant := NewQuantizedMemoryBackend(*dim, Float16Quantization{})
9 | for i, v := range vecs {
10 | mem.PutVector(ID(i), v)
11 | quant.PutVector(ID(i), v)
12 | }
13 | target := NewRandVector(*dim, nil)
14 | memrs, err := FullTableScanSearch(mem, target, 20)
15 | if err != nil {
16 | t.Fatal(err)
17 | }
18 | qrs, err := FullTableScanSearch(quant, target, 20)
19 | if err != nil {
20 | t.Fatal(err)
21 | }
22 | recall := memrs.ComputeRecall(qrs, 10)
23 | t.Logf("Recall %0.4f\n", recall)
24 | t.Logf("\n%s\n%s", memrs, qrs)
25 | }
26 |
27 | func TestFloat16Backend(t *testing.T) {
28 | vecs := NewRandVectorSet(1000, *dim, nil)
29 | quant := NewQuantizedMemoryBackend(*dim, Float16Quantization{})
30 | store, err := NewVectorStore(quant, *nBasis, WithPrespill(2))
31 | if err != nil {
32 | t.Fatal(err)
33 | }
34 | err = store.AddVectorsWithOffset(0, vecs)
35 | if err != nil {
36 | t.Fatal(err)
37 | }
38 |
39 | target := NewRandVector(*dim, nil)
40 | qrs, err := FullTableScanSearch(quant, target, 20)
41 | if err != nil {
42 | t.Fatal(err)
43 | }
44 | rs, err := store.FindNearest(target, 20, 20000, 2)
45 | if err != nil {
46 | t.Fatal(err)
47 | }
48 | recall := rs.ComputeRecall(qrs, 10)
49 | t.Logf("Recall %0.4f\n", recall)
50 | t.Logf("\n%s\n%s", rs, qrs)
51 | }
52 |
--------------------------------------------------------------------------------
/result.go:
--------------------------------------------------------------------------------
1 | package bbq
2 |
3 | import (
4 | "fmt"
5 | "sync"
6 | )
7 |
8 | type Result struct {
9 | Similarity float32
10 | ID ID
11 | }
12 |
13 | func (r Result) String() string {
14 | return fmt.Sprintf("(%d %0.4f)", r.ID, r.Similarity)
15 | }
16 |
17 | type ResultSet struct {
18 | inner sync.Mutex
19 | sims []float32
20 | ids []ID
21 | k int
22 | valid int
23 | }
24 |
25 | func NewResultSet(topK int) *ResultSet {
26 | return &ResultSet{
27 | k: topK,
28 | sims: make([]float32, topK),
29 | ids: make([]ID, topK),
30 | valid: 0,
31 | }
32 | }
33 |
34 | func (rs *ResultSet) Len() int {
35 | return len(rs.sims)
36 | }
37 |
38 | func (rs *ResultSet) ComputeRecall(baseline *ResultSet, at int) float64 {
39 | found := 0
40 | for _, v := range baseline.ids[:at] {
41 | for _, w := range rs.ids[:at] {
42 | if v == w {
43 | found += 1
44 | }
45 | }
46 | }
47 | return float64(found) / float64(at)
48 | }
49 |
50 | func (rs *ResultSet) String() string {
51 | return fmt.Sprint(rs.ToSlice())
52 | }
53 |
54 | func (rs *ResultSet) AddResult(id ID, sim float32) bool {
55 | // Do a quick check...
56 | if rs.valid == rs.k {
57 | // Bail if the last one beats us
58 | last := rs.sims[len(rs.sims)-1]
59 | if last > sim {
60 | return false
61 | }
62 | }
63 | rs.inner.Lock()
64 | defer rs.inner.Unlock()
65 | insert := 0
66 | found := false
67 | for insert != rs.k {
68 | // If we're building it out, then the new insertion point is at the end.
69 | if rs.valid <= insert {
70 | rs.valid += 1
71 | found = true
72 | break
73 | }
74 | if rs.ids[insert] == id {
75 | return true
76 | }
77 | if rs.sims[insert] < sim {
78 | found = true
79 | break
80 | }
81 | insert++
82 | }
83 | if !found {
84 | return false
85 | }
86 | copy(rs.sims[insert+1:], rs.sims[insert:])
87 | rs.sims[insert] = sim
88 | copy(rs.ids[insert+1:], rs.ids[insert:])
89 | rs.ids[insert] = id
90 | return true
91 | }
92 |
93 | func (rs *ResultSet) ToSlice() []*Result {
94 | out := make([]*Result, rs.valid)
95 | for i := range out {
96 | out[i] = &Result{
97 | Similarity: rs.sims[i],
98 | ID: rs.ids[i],
99 | }
100 | }
101 | return out
102 | }
103 |
--------------------------------------------------------------------------------
/rust/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 |
--------------------------------------------------------------------------------
/rust/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 |
5 | [[package]]
6 | name = "addr2line"
7 | version = "0.22.0"
8 | source = "registry+https://github.com/rust-lang/crates.io-index"
9 | checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678"
10 | dependencies = [
11 | "gimli",
12 | ]
13 |
14 | [[package]]
15 | name = "adler"
16 | version = "1.0.2"
17 | source = "registry+https://github.com/rust-lang/crates.io-index"
18 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
19 |
20 | [[package]]
21 | name = "ahash"
22 | version = "0.8.11"
23 | source = "registry+https://github.com/rust-lang/crates.io-index"
24 | checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
25 | dependencies = [
26 | "cfg-if",
27 | "getrandom",
28 | "once_cell",
29 | "version_check",
30 | "zerocopy 0.7.34",
31 | ]
32 |
33 | [[package]]
34 | name = "aho-corasick"
35 | version = "1.1.3"
36 | source = "registry+https://github.com/rust-lang/crates.io-index"
37 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
38 | dependencies = [
39 | "memchr",
40 | ]
41 |
42 | [[package]]
43 | name = "anes"
44 | version = "0.1.6"
45 | source = "registry+https://github.com/rust-lang/crates.io-index"
46 | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
47 |
48 | [[package]]
49 | name = "anstyle"
50 | version = "1.0.7"
51 | source = "registry+https://github.com/rust-lang/crates.io-index"
52 | checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b"
53 |
54 | [[package]]
55 | name = "anyhow"
56 | version = "1.0.86"
57 | source = "registry+https://github.com/rust-lang/crates.io-index"
58 | checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
59 |
60 | [[package]]
61 | name = "argminmax"
62 | version = "0.6.2"
63 | source = "registry+https://github.com/rust-lang/crates.io-index"
64 | checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa"
65 | dependencies = [
66 | "num-traits",
67 | ]
68 |
69 | [[package]]
70 | name = "arrayvec"
71 | version = "0.7.4"
72 | source = "registry+https://github.com/rust-lang/crates.io-index"
73 | checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
74 |
75 | [[package]]
76 | name = "autocfg"
77 | version = "1.3.0"
78 | source = "registry+https://github.com/rust-lang/crates.io-index"
79 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
80 |
81 | [[package]]
82 | name = "backtrace"
83 | version = "0.3.72"
84 | source = "registry+https://github.com/rust-lang/crates.io-index"
85 | checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11"
86 | dependencies = [
87 | "addr2line",
88 | "cc",
89 | "cfg-if",
90 | "libc",
91 | "miniz_oxide",
92 | "object",
93 | "rustc-demangle",
94 | ]
95 |
96 | [[package]]
97 | name = "bbqvec"
98 | version = "0.0.9"
99 | dependencies = [
100 | "anyhow",
101 | "argminmax",
102 | "bitvec",
103 | "bytemuck",
104 | "byteorder",
105 | "criterion",
106 | "croaring",
107 | "half",
108 | "memmap2",
109 | "pprof",
110 | "rand",
111 | "rayon",
112 | "roaring",
113 | "serde",
114 | "serde_json",
115 | "thiserror",
116 | ]
117 |
118 | [[package]]
119 | name = "bitflags"
120 | version = "1.3.2"
121 | source = "registry+https://github.com/rust-lang/crates.io-index"
122 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
123 |
124 | [[package]]
125 | name = "bitflags"
126 | version = "2.5.0"
127 | source = "registry+https://github.com/rust-lang/crates.io-index"
128 | checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
129 |
130 | [[package]]
131 | name = "bitvec"
132 | version = "1.0.1"
133 | source = "registry+https://github.com/rust-lang/crates.io-index"
134 | checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
135 | dependencies = [
136 | "funty",
137 | "radium",
138 | "serde",
139 | "tap",
140 | "wyz",
141 | ]
142 |
143 | [[package]]
144 | name = "bumpalo"
145 | version = "3.16.0"
146 | source = "registry+https://github.com/rust-lang/crates.io-index"
147 | checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
148 |
149 | [[package]]
150 | name = "bytemuck"
151 | version = "1.16.0"
152 | source = "registry+https://github.com/rust-lang/crates.io-index"
153 | checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5"
154 | dependencies = [
155 | "bytemuck_derive",
156 | ]
157 |
158 | [[package]]
159 | name = "bytemuck_derive"
160 | version = "1.6.0"
161 | source = "registry+https://github.com/rust-lang/crates.io-index"
162 | checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60"
163 | dependencies = [
164 | "proc-macro2",
165 | "quote",
166 | "syn",
167 | ]
168 |
169 | [[package]]
170 | name = "byteorder"
171 | version = "1.5.0"
172 | source = "registry+https://github.com/rust-lang/crates.io-index"
173 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
174 |
175 | [[package]]
176 | name = "cast"
177 | version = "0.3.0"
178 | source = "registry+https://github.com/rust-lang/crates.io-index"
179 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
180 |
181 | [[package]]
182 | name = "cc"
183 | version = "1.0.98"
184 | source = "registry+https://github.com/rust-lang/crates.io-index"
185 | checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f"
186 |
187 | [[package]]
188 | name = "cfg-if"
189 | version = "1.0.0"
190 | source = "registry+https://github.com/rust-lang/crates.io-index"
191 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
192 |
193 | [[package]]
194 | name = "ciborium"
195 | version = "0.2.2"
196 | source = "registry+https://github.com/rust-lang/crates.io-index"
197 | checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
198 | dependencies = [
199 | "ciborium-io",
200 | "ciborium-ll",
201 | "serde",
202 | ]
203 |
204 | [[package]]
205 | name = "ciborium-io"
206 | version = "0.2.2"
207 | source = "registry+https://github.com/rust-lang/crates.io-index"
208 | checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
209 |
210 | [[package]]
211 | name = "ciborium-ll"
212 | version = "0.2.2"
213 | source = "registry+https://github.com/rust-lang/crates.io-index"
214 | checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
215 | dependencies = [
216 | "ciborium-io",
217 | "half",
218 | ]
219 |
220 | [[package]]
221 | name = "clap"
222 | version = "4.5.4"
223 | source = "registry+https://github.com/rust-lang/crates.io-index"
224 | checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0"
225 | dependencies = [
226 | "clap_builder",
227 | ]
228 |
229 | [[package]]
230 | name = "clap_builder"
231 | version = "4.5.2"
232 | source = "registry+https://github.com/rust-lang/crates.io-index"
233 | checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4"
234 | dependencies = [
235 | "anstyle",
236 | "clap_lex",
237 | ]
238 |
239 | [[package]]
240 | name = "clap_lex"
241 | version = "0.7.0"
242 | source = "registry+https://github.com/rust-lang/crates.io-index"
243 | checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
244 |
245 | [[package]]
246 | name = "cpp_demangle"
247 | version = "0.4.3"
248 | source = "registry+https://github.com/rust-lang/crates.io-index"
249 | checksum = "7e8227005286ec39567949b33df9896bcadfa6051bccca2488129f108ca23119"
250 | dependencies = [
251 | "cfg-if",
252 | ]
253 |
254 | [[package]]
255 | name = "criterion"
256 | version = "0.5.1"
257 | source = "registry+https://github.com/rust-lang/crates.io-index"
258 | checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
259 | dependencies = [
260 | "anes",
261 | "cast",
262 | "ciborium",
263 | "clap",
264 | "criterion-plot",
265 | "is-terminal",
266 | "itertools",
267 | "num-traits",
268 | "once_cell",
269 | "oorandom",
270 | "plotters",
271 | "rayon",
272 | "regex",
273 | "serde",
274 | "serde_derive",
275 | "serde_json",
276 | "tinytemplate",
277 | "walkdir",
278 | ]
279 |
280 | [[package]]
281 | name = "criterion-plot"
282 | version = "0.5.0"
283 | source = "registry+https://github.com/rust-lang/crates.io-index"
284 | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
285 | dependencies = [
286 | "cast",
287 | "itertools",
288 | ]
289 |
290 | [[package]]
291 | name = "croaring"
292 | version = "1.1.0"
293 | source = "registry+https://github.com/rust-lang/crates.io-index"
294 | checksum = "611eaefca84c93e431ad82dfb848f6e05a99e25148384f45a3852b0fbe1c8086"
295 | dependencies = [
296 | "byteorder",
297 | "croaring-sys",
298 | ]
299 |
300 | [[package]]
301 | name = "croaring-sys"
302 | version = "2.0.0"
303 | source = "registry+https://github.com/rust-lang/crates.io-index"
304 | checksum = "ab5260027c04c33d67f405589d9c26e1e991fe062fb165f3094c9836e6c3b17f"
305 | dependencies = [
306 | "cc",
307 | ]
308 |
309 | [[package]]
310 | name = "crossbeam-deque"
311 | version = "0.8.5"
312 | source = "registry+https://github.com/rust-lang/crates.io-index"
313 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
314 | dependencies = [
315 | "crossbeam-epoch",
316 | "crossbeam-utils",
317 | ]
318 |
319 | [[package]]
320 | name = "crossbeam-epoch"
321 | version = "0.9.18"
322 | source = "registry+https://github.com/rust-lang/crates.io-index"
323 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
324 | dependencies = [
325 | "crossbeam-utils",
326 | ]
327 |
328 | [[package]]
329 | name = "crossbeam-utils"
330 | version = "0.8.20"
331 | source = "registry+https://github.com/rust-lang/crates.io-index"
332 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
333 |
334 | [[package]]
335 | name = "crunchy"
336 | version = "0.2.2"
337 | source = "registry+https://github.com/rust-lang/crates.io-index"
338 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
339 |
340 | [[package]]
341 | name = "debugid"
342 | version = "0.8.0"
343 | source = "registry+https://github.com/rust-lang/crates.io-index"
344 | checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d"
345 | dependencies = [
346 | "uuid",
347 | ]
348 |
349 | [[package]]
350 | name = "either"
351 | version = "1.12.0"
352 | source = "registry+https://github.com/rust-lang/crates.io-index"
353 | checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
354 |
355 | [[package]]
356 | name = "equivalent"
357 | version = "1.0.1"
358 | source = "registry+https://github.com/rust-lang/crates.io-index"
359 | checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
360 |
361 | [[package]]
362 | name = "errno"
363 | version = "0.3.9"
364 | source = "registry+https://github.com/rust-lang/crates.io-index"
365 | checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
366 | dependencies = [
367 | "libc",
368 | "windows-sys",
369 | ]
370 |
371 | [[package]]
372 | name = "fastrand"
373 | version = "2.1.0"
374 | source = "registry+https://github.com/rust-lang/crates.io-index"
375 | checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
376 |
377 | [[package]]
378 | name = "findshlibs"
379 | version = "0.10.2"
380 | source = "registry+https://github.com/rust-lang/crates.io-index"
381 | checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64"
382 | dependencies = [
383 | "cc",
384 | "lazy_static",
385 | "libc",
386 | "winapi",
387 | ]
388 |
389 | [[package]]
390 | name = "funty"
391 | version = "2.0.0"
392 | source = "registry+https://github.com/rust-lang/crates.io-index"
393 | checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
394 |
395 | [[package]]
396 | name = "getrandom"
397 | version = "0.2.15"
398 | source = "registry+https://github.com/rust-lang/crates.io-index"
399 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
400 | dependencies = [
401 | "cfg-if",
402 | "libc",
403 | "wasi",
404 | ]
405 |
406 | [[package]]
407 | name = "gimli"
408 | version = "0.29.0"
409 | source = "registry+https://github.com/rust-lang/crates.io-index"
410 | checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
411 |
412 | [[package]]
413 | name = "half"
414 | version = "2.4.1"
415 | source = "registry+https://github.com/rust-lang/crates.io-index"
416 | checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
417 | dependencies = [
418 | "cfg-if",
419 | "crunchy",
420 | "zerocopy 0.6.6",
421 | ]
422 |
423 | [[package]]
424 | name = "hashbrown"
425 | version = "0.14.5"
426 | source = "registry+https://github.com/rust-lang/crates.io-index"
427 | checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
428 |
429 | [[package]]
430 | name = "hermit-abi"
431 | version = "0.3.9"
432 | source = "registry+https://github.com/rust-lang/crates.io-index"
433 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
434 |
435 | [[package]]
436 | name = "indexmap"
437 | version = "2.2.6"
438 | source = "registry+https://github.com/rust-lang/crates.io-index"
439 | checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
440 | dependencies = [
441 | "equivalent",
442 | "hashbrown",
443 | ]
444 |
445 | [[package]]
446 | name = "inferno"
447 | version = "0.11.19"
448 | source = "registry+https://github.com/rust-lang/crates.io-index"
449 | checksum = "321f0f839cd44a4686e9504b0a62b4d69a50b62072144c71c68f5873c167b8d9"
450 | dependencies = [
451 | "ahash",
452 | "indexmap",
453 | "is-terminal",
454 | "itoa",
455 | "log",
456 | "num-format",
457 | "once_cell",
458 | "quick-xml",
459 | "rgb",
460 | "str_stack",
461 | ]
462 |
463 | [[package]]
464 | name = "is-terminal"
465 | version = "0.4.12"
466 | source = "registry+https://github.com/rust-lang/crates.io-index"
467 | checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b"
468 | dependencies = [
469 | "hermit-abi",
470 | "libc",
471 | "windows-sys",
472 | ]
473 |
474 | [[package]]
475 | name = "itertools"
476 | version = "0.10.5"
477 | source = "registry+https://github.com/rust-lang/crates.io-index"
478 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
479 | dependencies = [
480 | "either",
481 | ]
482 |
483 | [[package]]
484 | name = "itoa"
485 | version = "1.0.11"
486 | source = "registry+https://github.com/rust-lang/crates.io-index"
487 | checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
488 |
489 | [[package]]
490 | name = "js-sys"
491 | version = "0.3.69"
492 | source = "registry+https://github.com/rust-lang/crates.io-index"
493 | checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
494 | dependencies = [
495 | "wasm-bindgen",
496 | ]
497 |
498 | [[package]]
499 | name = "lazy_static"
500 | version = "1.4.0"
501 | source = "registry+https://github.com/rust-lang/crates.io-index"
502 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
503 |
504 | [[package]]
505 | name = "libc"
506 | version = "0.2.155"
507 | source = "registry+https://github.com/rust-lang/crates.io-index"
508 | checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
509 |
510 | [[package]]
511 | name = "linux-raw-sys"
512 | version = "0.4.14"
513 | source = "registry+https://github.com/rust-lang/crates.io-index"
514 | checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
515 |
516 | [[package]]
517 | name = "lock_api"
518 | version = "0.4.12"
519 | source = "registry+https://github.com/rust-lang/crates.io-index"
520 | checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
521 | dependencies = [
522 | "autocfg",
523 | "scopeguard",
524 | ]
525 |
526 | [[package]]
527 | name = "log"
528 | version = "0.4.21"
529 | source = "registry+https://github.com/rust-lang/crates.io-index"
530 | checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
531 |
532 | [[package]]
533 | name = "memchr"
534 | version = "2.7.2"
535 | source = "registry+https://github.com/rust-lang/crates.io-index"
536 | checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
537 |
538 | [[package]]
539 | name = "memmap2"
540 | version = "0.9.5"
541 | source = "registry+https://github.com/rust-lang/crates.io-index"
542 | checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
543 | dependencies = [
544 | "libc",
545 | ]
546 |
547 | [[package]]
548 | name = "miniz_oxide"
549 | version = "0.7.3"
550 | source = "registry+https://github.com/rust-lang/crates.io-index"
551 | checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae"
552 | dependencies = [
553 | "adler",
554 | ]
555 |
556 | [[package]]
557 | name = "nix"
558 | version = "0.26.4"
559 | source = "registry+https://github.com/rust-lang/crates.io-index"
560 | checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
561 | dependencies = [
562 | "bitflags 1.3.2",
563 | "cfg-if",
564 | "libc",
565 | ]
566 |
567 | [[package]]
568 | name = "num-format"
569 | version = "0.4.4"
570 | source = "registry+https://github.com/rust-lang/crates.io-index"
571 | checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3"
572 | dependencies = [
573 | "arrayvec",
574 | "itoa",
575 | ]
576 |
577 | [[package]]
578 | name = "num-traits"
579 | version = "0.2.19"
580 | source = "registry+https://github.com/rust-lang/crates.io-index"
581 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
582 | dependencies = [
583 | "autocfg",
584 | ]
585 |
586 | [[package]]
587 | name = "object"
588 | version = "0.35.0"
589 | source = "registry+https://github.com/rust-lang/crates.io-index"
590 | checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e"
591 | dependencies = [
592 | "memchr",
593 | ]
594 |
595 | [[package]]
596 | name = "once_cell"
597 | version = "1.19.0"
598 | source = "registry+https://github.com/rust-lang/crates.io-index"
599 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
600 |
601 | [[package]]
602 | name = "oorandom"
603 | version = "11.1.3"
604 | source = "registry+https://github.com/rust-lang/crates.io-index"
605 | checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
606 |
607 | [[package]]
608 | name = "parking_lot"
609 | version = "0.12.3"
610 | source = "registry+https://github.com/rust-lang/crates.io-index"
611 | checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
612 | dependencies = [
613 | "lock_api",
614 | "parking_lot_core",
615 | ]
616 |
617 | [[package]]
618 | name = "parking_lot_core"
619 | version = "0.9.10"
620 | source = "registry+https://github.com/rust-lang/crates.io-index"
621 | checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
622 | dependencies = [
623 | "cfg-if",
624 | "libc",
625 | "redox_syscall",
626 | "smallvec",
627 | "windows-targets",
628 | ]
629 |
630 | [[package]]
631 | name = "plotters"
632 | version = "0.3.6"
633 | source = "registry+https://github.com/rust-lang/crates.io-index"
634 | checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3"
635 | dependencies = [
636 | "num-traits",
637 | "plotters-backend",
638 | "plotters-svg",
639 | "wasm-bindgen",
640 | "web-sys",
641 | ]
642 |
643 | [[package]]
644 | name = "plotters-backend"
645 | version = "0.3.6"
646 | source = "registry+https://github.com/rust-lang/crates.io-index"
647 | checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7"
648 |
649 | [[package]]
650 | name = "plotters-svg"
651 | version = "0.3.6"
652 | source = "registry+https://github.com/rust-lang/crates.io-index"
653 | checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705"
654 | dependencies = [
655 | "plotters-backend",
656 | ]
657 |
658 | [[package]]
659 | name = "pprof"
660 | version = "0.13.0"
661 | source = "registry+https://github.com/rust-lang/crates.io-index"
662 | checksum = "ef5c97c51bd34c7e742402e216abdeb44d415fbe6ae41d56b114723e953711cb"
663 | dependencies = [
664 | "backtrace",
665 | "cfg-if",
666 | "criterion",
667 | "findshlibs",
668 | "inferno",
669 | "libc",
670 | "log",
671 | "nix",
672 | "once_cell",
673 | "parking_lot",
674 | "protobuf",
675 | "protobuf-codegen-pure",
676 | "smallvec",
677 | "symbolic-demangle",
678 | "tempfile",
679 | "thiserror",
680 | ]
681 |
682 | [[package]]
683 | name = "ppv-lite86"
684 | version = "0.2.17"
685 | source = "registry+https://github.com/rust-lang/crates.io-index"
686 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
687 |
688 | [[package]]
689 | name = "proc-macro2"
690 | version = "1.0.84"
691 | source = "registry+https://github.com/rust-lang/crates.io-index"
692 | checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6"
693 | dependencies = [
694 | "unicode-ident",
695 | ]
696 |
697 | [[package]]
698 | name = "protobuf"
699 | version = "2.28.0"
700 | source = "registry+https://github.com/rust-lang/crates.io-index"
701 | checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
702 |
703 | [[package]]
704 | name = "protobuf-codegen"
705 | version = "2.28.0"
706 | source = "registry+https://github.com/rust-lang/crates.io-index"
707 | checksum = "033460afb75cf755fcfc16dfaed20b86468082a2ea24e05ac35ab4a099a017d6"
708 | dependencies = [
709 | "protobuf",
710 | ]
711 |
712 | [[package]]
713 | name = "protobuf-codegen-pure"
714 | version = "2.28.0"
715 | source = "registry+https://github.com/rust-lang/crates.io-index"
716 | checksum = "95a29399fc94bcd3eeaa951c715f7bea69409b2445356b00519740bcd6ddd865"
717 | dependencies = [
718 | "protobuf",
719 | "protobuf-codegen",
720 | ]
721 |
722 | [[package]]
723 | name = "quick-xml"
724 | version = "0.26.0"
725 | source = "registry+https://github.com/rust-lang/crates.io-index"
726 | checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd"
727 | dependencies = [
728 | "memchr",
729 | ]
730 |
731 | [[package]]
732 | name = "quote"
733 | version = "1.0.36"
734 | source = "registry+https://github.com/rust-lang/crates.io-index"
735 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
736 | dependencies = [
737 | "proc-macro2",
738 | ]
739 |
740 | [[package]]
741 | name = "radium"
742 | version = "0.7.0"
743 | source = "registry+https://github.com/rust-lang/crates.io-index"
744 | checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
745 |
746 | [[package]]
747 | name = "rand"
748 | version = "0.8.5"
749 | source = "registry+https://github.com/rust-lang/crates.io-index"
750 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
751 | dependencies = [
752 | "libc",
753 | "rand_chacha",
754 | "rand_core",
755 | ]
756 |
757 | [[package]]
758 | name = "rand_chacha"
759 | version = "0.3.1"
760 | source = "registry+https://github.com/rust-lang/crates.io-index"
761 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
762 | dependencies = [
763 | "ppv-lite86",
764 | "rand_core",
765 | ]
766 |
767 | [[package]]
768 | name = "rand_core"
769 | version = "0.6.4"
770 | source = "registry+https://github.com/rust-lang/crates.io-index"
771 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
772 | dependencies = [
773 | "getrandom",
774 | ]
775 |
776 | [[package]]
777 | name = "rayon"
778 | version = "1.10.0"
779 | source = "registry+https://github.com/rust-lang/crates.io-index"
780 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
781 | dependencies = [
782 | "either",
783 | "rayon-core",
784 | ]
785 |
786 | [[package]]
787 | name = "rayon-core"
788 | version = "1.12.1"
789 | source = "registry+https://github.com/rust-lang/crates.io-index"
790 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
791 | dependencies = [
792 | "crossbeam-deque",
793 | "crossbeam-utils",
794 | ]
795 |
796 | [[package]]
797 | name = "redox_syscall"
798 | version = "0.5.1"
799 | source = "registry+https://github.com/rust-lang/crates.io-index"
800 | checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e"
801 | dependencies = [
802 | "bitflags 2.5.0",
803 | ]
804 |
805 | [[package]]
806 | name = "regex"
807 | version = "1.10.4"
808 | source = "registry+https://github.com/rust-lang/crates.io-index"
809 | checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
810 | dependencies = [
811 | "aho-corasick",
812 | "memchr",
813 | "regex-automata",
814 | "regex-syntax",
815 | ]
816 |
817 | [[package]]
818 | name = "regex-automata"
819 | version = "0.4.6"
820 | source = "registry+https://github.com/rust-lang/crates.io-index"
821 | checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
822 | dependencies = [
823 | "aho-corasick",
824 | "memchr",
825 | "regex-syntax",
826 | ]
827 |
828 | [[package]]
829 | name = "regex-syntax"
830 | version = "0.8.3"
831 | source = "registry+https://github.com/rust-lang/crates.io-index"
832 | checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
833 |
834 | [[package]]
835 | name = "rgb"
836 | version = "0.8.37"
837 | source = "registry+https://github.com/rust-lang/crates.io-index"
838 | checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8"
839 | dependencies = [
840 | "bytemuck",
841 | ]
842 |
843 | [[package]]
844 | name = "roaring"
845 | version = "0.10.4"
846 | source = "registry+https://github.com/rust-lang/crates.io-index"
847 | checksum = "b26f4c25a604fcb3a1bcd96dd6ba37c93840de95de8198d94c0d571a74a804d1"
848 | dependencies = [
849 | "bytemuck",
850 | "byteorder",
851 | "serde",
852 | ]
853 |
854 | [[package]]
855 | name = "rustc-demangle"
856 | version = "0.1.24"
857 | source = "registry+https://github.com/rust-lang/crates.io-index"
858 | checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
859 |
860 | [[package]]
861 | name = "rustix"
862 | version = "0.38.34"
863 | source = "registry+https://github.com/rust-lang/crates.io-index"
864 | checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
865 | dependencies = [
866 | "bitflags 2.5.0",
867 | "errno",
868 | "libc",
869 | "linux-raw-sys",
870 | "windows-sys",
871 | ]
872 |
873 | [[package]]
874 | name = "ryu"
875 | version = "1.0.18"
876 | source = "registry+https://github.com/rust-lang/crates.io-index"
877 | checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
878 |
879 | [[package]]
880 | name = "same-file"
881 | version = "1.0.6"
882 | source = "registry+https://github.com/rust-lang/crates.io-index"
883 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
884 | dependencies = [
885 | "winapi-util",
886 | ]
887 |
888 | [[package]]
889 | name = "scopeguard"
890 | version = "1.2.0"
891 | source = "registry+https://github.com/rust-lang/crates.io-index"
892 | checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
893 |
894 | [[package]]
895 | name = "serde"
896 | version = "1.0.203"
897 | source = "registry+https://github.com/rust-lang/crates.io-index"
898 | checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
899 | dependencies = [
900 | "serde_derive",
901 | ]
902 |
903 | [[package]]
904 | name = "serde_derive"
905 | version = "1.0.203"
906 | source = "registry+https://github.com/rust-lang/crates.io-index"
907 | checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
908 | dependencies = [
909 | "proc-macro2",
910 | "quote",
911 | "syn",
912 | ]
913 |
914 | [[package]]
915 | name = "serde_json"
916 | version = "1.0.128"
917 | source = "registry+https://github.com/rust-lang/crates.io-index"
918 | checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8"
919 | dependencies = [
920 | "itoa",
921 | "memchr",
922 | "ryu",
923 | "serde",
924 | ]
925 |
926 | [[package]]
927 | name = "smallvec"
928 | version = "1.13.2"
929 | source = "registry+https://github.com/rust-lang/crates.io-index"
930 | checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
931 |
932 | [[package]]
933 | name = "stable_deref_trait"
934 | version = "1.2.0"
935 | source = "registry+https://github.com/rust-lang/crates.io-index"
936 | checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
937 |
938 | [[package]]
939 | name = "str_stack"
940 | version = "0.1.0"
941 | source = "registry+https://github.com/rust-lang/crates.io-index"
942 | checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb"
943 |
944 | [[package]]
945 | name = "symbolic-common"
946 | version = "12.9.1"
947 | source = "registry+https://github.com/rust-lang/crates.io-index"
948 | checksum = "89d2aef0f60f62e38c472334148758afbd570ed78d20be622692e5ebfec3734f"
949 | dependencies = [
950 | "debugid",
951 | "memmap2",
952 | "stable_deref_trait",
953 | "uuid",
954 | ]
955 |
956 | [[package]]
957 | name = "symbolic-demangle"
958 | version = "12.9.1"
959 | source = "registry+https://github.com/rust-lang/crates.io-index"
960 | checksum = "1719d1292eac816cdd3fdad12b22315624b7ce6a7bacb267a3a27fccfd286b48"
961 | dependencies = [
962 | "cpp_demangle",
963 | "rustc-demangle",
964 | "symbolic-common",
965 | ]
966 |
967 | [[package]]
968 | name = "syn"
969 | version = "2.0.66"
970 | source = "registry+https://github.com/rust-lang/crates.io-index"
971 | checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5"
972 | dependencies = [
973 | "proc-macro2",
974 | "quote",
975 | "unicode-ident",
976 | ]
977 |
978 | [[package]]
979 | name = "tap"
980 | version = "1.0.1"
981 | source = "registry+https://github.com/rust-lang/crates.io-index"
982 | checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
983 |
984 | [[package]]
985 | name = "tempfile"
986 | version = "3.10.1"
987 | source = "registry+https://github.com/rust-lang/crates.io-index"
988 | checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1"
989 | dependencies = [
990 | "cfg-if",
991 | "fastrand",
992 | "rustix",
993 | "windows-sys",
994 | ]
995 |
996 | [[package]]
997 | name = "thiserror"
998 | version = "1.0.61"
999 | source = "registry+https://github.com/rust-lang/crates.io-index"
1000 | checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
1001 | dependencies = [
1002 | "thiserror-impl",
1003 | ]
1004 |
1005 | [[package]]
1006 | name = "thiserror-impl"
1007 | version = "1.0.61"
1008 | source = "registry+https://github.com/rust-lang/crates.io-index"
1009 | checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
1010 | dependencies = [
1011 | "proc-macro2",
1012 | "quote",
1013 | "syn",
1014 | ]
1015 |
1016 | [[package]]
1017 | name = "tinytemplate"
1018 | version = "1.2.1"
1019 | source = "registry+https://github.com/rust-lang/crates.io-index"
1020 | checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
1021 | dependencies = [
1022 | "serde",
1023 | "serde_json",
1024 | ]
1025 |
1026 | [[package]]
1027 | name = "unicode-ident"
1028 | version = "1.0.12"
1029 | source = "registry+https://github.com/rust-lang/crates.io-index"
1030 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
1031 |
1032 | [[package]]
1033 | name = "uuid"
1034 | version = "1.8.0"
1035 | source = "registry+https://github.com/rust-lang/crates.io-index"
1036 | checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0"
1037 |
1038 | [[package]]
1039 | name = "version_check"
1040 | version = "0.9.4"
1041 | source = "registry+https://github.com/rust-lang/crates.io-index"
1042 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
1043 |
1044 | [[package]]
1045 | name = "walkdir"
1046 | version = "2.5.0"
1047 | source = "registry+https://github.com/rust-lang/crates.io-index"
1048 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
1049 | dependencies = [
1050 | "same-file",
1051 | "winapi-util",
1052 | ]
1053 |
1054 | [[package]]
1055 | name = "wasi"
1056 | version = "0.11.0+wasi-snapshot-preview1"
1057 | source = "registry+https://github.com/rust-lang/crates.io-index"
1058 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
1059 |
1060 | [[package]]
1061 | name = "wasm-bindgen"
1062 | version = "0.2.92"
1063 | source = "registry+https://github.com/rust-lang/crates.io-index"
1064 | checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
1065 | dependencies = [
1066 | "cfg-if",
1067 | "wasm-bindgen-macro",
1068 | ]
1069 |
1070 | [[package]]
1071 | name = "wasm-bindgen-backend"
1072 | version = "0.2.92"
1073 | source = "registry+https://github.com/rust-lang/crates.io-index"
1074 | checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
1075 | dependencies = [
1076 | "bumpalo",
1077 | "log",
1078 | "once_cell",
1079 | "proc-macro2",
1080 | "quote",
1081 | "syn",
1082 | "wasm-bindgen-shared",
1083 | ]
1084 |
1085 | [[package]]
1086 | name = "wasm-bindgen-macro"
1087 | version = "0.2.92"
1088 | source = "registry+https://github.com/rust-lang/crates.io-index"
1089 | checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
1090 | dependencies = [
1091 | "quote",
1092 | "wasm-bindgen-macro-support",
1093 | ]
1094 |
1095 | [[package]]
1096 | name = "wasm-bindgen-macro-support"
1097 | version = "0.2.92"
1098 | source = "registry+https://github.com/rust-lang/crates.io-index"
1099 | checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
1100 | dependencies = [
1101 | "proc-macro2",
1102 | "quote",
1103 | "syn",
1104 | "wasm-bindgen-backend",
1105 | "wasm-bindgen-shared",
1106 | ]
1107 |
1108 | [[package]]
1109 | name = "wasm-bindgen-shared"
1110 | version = "0.2.92"
1111 | source = "registry+https://github.com/rust-lang/crates.io-index"
1112 | checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
1113 |
1114 | [[package]]
1115 | name = "web-sys"
1116 | version = "0.3.69"
1117 | source = "registry+https://github.com/rust-lang/crates.io-index"
1118 | checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef"
1119 | dependencies = [
1120 | "js-sys",
1121 | "wasm-bindgen",
1122 | ]
1123 |
1124 | [[package]]
1125 | name = "winapi"
1126 | version = "0.3.9"
1127 | source = "registry+https://github.com/rust-lang/crates.io-index"
1128 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
1129 | dependencies = [
1130 | "winapi-i686-pc-windows-gnu",
1131 | "winapi-x86_64-pc-windows-gnu",
1132 | ]
1133 |
1134 | [[package]]
1135 | name = "winapi-i686-pc-windows-gnu"
1136 | version = "0.4.0"
1137 | source = "registry+https://github.com/rust-lang/crates.io-index"
1138 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
1139 |
1140 | [[package]]
1141 | name = "winapi-util"
1142 | version = "0.1.8"
1143 | source = "registry+https://github.com/rust-lang/crates.io-index"
1144 | checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
1145 | dependencies = [
1146 | "windows-sys",
1147 | ]
1148 |
1149 | [[package]]
1150 | name = "winapi-x86_64-pc-windows-gnu"
1151 | version = "0.4.0"
1152 | source = "registry+https://github.com/rust-lang/crates.io-index"
1153 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
1154 |
1155 | [[package]]
1156 | name = "windows-sys"
1157 | version = "0.52.0"
1158 | source = "registry+https://github.com/rust-lang/crates.io-index"
1159 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
1160 | dependencies = [
1161 | "windows-targets",
1162 | ]
1163 |
1164 | [[package]]
1165 | name = "windows-targets"
1166 | version = "0.52.5"
1167 | source = "registry+https://github.com/rust-lang/crates.io-index"
1168 | checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
1169 | dependencies = [
1170 | "windows_aarch64_gnullvm",
1171 | "windows_aarch64_msvc",
1172 | "windows_i686_gnu",
1173 | "windows_i686_gnullvm",
1174 | "windows_i686_msvc",
1175 | "windows_x86_64_gnu",
1176 | "windows_x86_64_gnullvm",
1177 | "windows_x86_64_msvc",
1178 | ]
1179 |
1180 | [[package]]
1181 | name = "windows_aarch64_gnullvm"
1182 | version = "0.52.5"
1183 | source = "registry+https://github.com/rust-lang/crates.io-index"
1184 | checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
1185 |
1186 | [[package]]
1187 | name = "windows_aarch64_msvc"
1188 | version = "0.52.5"
1189 | source = "registry+https://github.com/rust-lang/crates.io-index"
1190 | checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
1191 |
1192 | [[package]]
1193 | name = "windows_i686_gnu"
1194 | version = "0.52.5"
1195 | source = "registry+https://github.com/rust-lang/crates.io-index"
1196 | checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
1197 |
1198 | [[package]]
1199 | name = "windows_i686_gnullvm"
1200 | version = "0.52.5"
1201 | source = "registry+https://github.com/rust-lang/crates.io-index"
1202 | checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
1203 |
1204 | [[package]]
1205 | name = "windows_i686_msvc"
1206 | version = "0.52.5"
1207 | source = "registry+https://github.com/rust-lang/crates.io-index"
1208 | checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
1209 |
1210 | [[package]]
1211 | name = "windows_x86_64_gnu"
1212 | version = "0.52.5"
1213 | source = "registry+https://github.com/rust-lang/crates.io-index"
1214 | checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
1215 |
1216 | [[package]]
1217 | name = "windows_x86_64_gnullvm"
1218 | version = "0.52.5"
1219 | source = "registry+https://github.com/rust-lang/crates.io-index"
1220 | checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
1221 |
1222 | [[package]]
1223 | name = "windows_x86_64_msvc"
1224 | version = "0.52.5"
1225 | source = "registry+https://github.com/rust-lang/crates.io-index"
1226 | checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
1227 |
1228 | [[package]]
1229 | name = "wyz"
1230 | version = "0.5.1"
1231 | source = "registry+https://github.com/rust-lang/crates.io-index"
1232 | checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
1233 | dependencies = [
1234 | "tap",
1235 | ]
1236 |
1237 | [[package]]
1238 | name = "zerocopy"
1239 | version = "0.6.6"
1240 | source = "registry+https://github.com/rust-lang/crates.io-index"
1241 | checksum = "854e949ac82d619ee9a14c66a1b674ac730422372ccb759ce0c39cabcf2bf8e6"
1242 | dependencies = [
1243 | "byteorder",
1244 | "zerocopy-derive 0.6.6",
1245 | ]
1246 |
1247 | [[package]]
1248 | name = "zerocopy"
1249 | version = "0.7.34"
1250 | source = "registry+https://github.com/rust-lang/crates.io-index"
1251 | checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087"
1252 | dependencies = [
1253 | "zerocopy-derive 0.7.34",
1254 | ]
1255 |
1256 | [[package]]
1257 | name = "zerocopy-derive"
1258 | version = "0.6.6"
1259 | source = "registry+https://github.com/rust-lang/crates.io-index"
1260 | checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91"
1261 | dependencies = [
1262 | "proc-macro2",
1263 | "quote",
1264 | "syn",
1265 | ]
1266 |
1267 | [[package]]
1268 | name = "zerocopy-derive"
1269 | version = "0.7.34"
1270 | source = "registry+https://github.com/rust-lang/crates.io-index"
1271 | checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b"
1272 | dependencies = [
1273 | "proc-macro2",
1274 | "quote",
1275 | "syn",
1276 | ]
1277 |
--------------------------------------------------------------------------------
/rust/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "bbqvec"
3 | version = "0.0.10"
4 | edition = "2021"
5 | repository = "https://github.com/barakmich/bbqvec"
6 | authors = ["Barak Michener "]
7 | readme = "../README.md"
8 | license = "Apache-2.0"
9 | keywords = ["vector", "database", "aknn", "search", "nearest-neighbor"]
10 | categories = ["algorithms", "data-structures", "database-implementations"]
11 | description = "Scalable, embeddable, vector storage for approximate K-nearest-neighbors (AKNN)"
12 |
13 | #[workspace]
14 | #members = ["cmd/test-recall"]
15 |
16 | [dependencies]
17 | anyhow = "1.0.86"
18 | argminmax = {version = "0.6.2", default-features = false, features = ["float"]}
19 | bitvec = {version = "1", features = ["serde"]}
20 | bytemuck = {version = "1.16.0", features = ["derive", "extern_crate_alloc"]}
21 | byteorder = "1.5.0"
22 | croaring = "1.1.0"
23 | half = {version = "2.4.1", features = ["std", "zerocopy"]}
24 | memmap2 = "0.9.5"
25 | rand = "0.8.5"
26 | rayon = "1.10.0"
27 | roaring = {version = "0.10.4", features = ["serde"]}
28 | serde = {version = "1.0.203", features = ["derive"]}
29 | serde_json = "1.0.128"
30 | thiserror = "1.0.61"
31 |
32 | [dev-dependencies]
33 | criterion = "0.5.1"
34 | pprof = {version = "0.13.0", features = ["flamegraph", "protobuf-codec", "protobuf", "criterion"]}
35 |
36 | [[bench]]
37 | name = "main_benchmark"
38 | harness = false
39 |
40 | [profile.bench]
41 | debug = true
42 |
--------------------------------------------------------------------------------
/rust/benches/main_benchmark.rs:
--------------------------------------------------------------------------------
1 | use criterion::{criterion_group, criterion_main, Criterion};
2 | mod micro;
3 | use micro::criterion_benchmark_micro;
4 | mod memory_store;
5 | use memory_store::criterion_benchmark_memory_store;
6 | use pprof::criterion::{Output, PProfProfiler};
7 |
8 | criterion_group! {
9 | name = memory;
10 | config = Criterion::default().with_profiler(PProfProfiler::new(1000, Output::Protobuf));
11 | targets = criterion_benchmark_memory_store
12 | }
13 | criterion_group!(micro, criterion_benchmark_micro);
14 | criterion_main!(micro, memory);
15 |
--------------------------------------------------------------------------------
/rust/benches/memory_store.rs:
--------------------------------------------------------------------------------
1 | use bbqvec::IndexIDIterator;
2 | use criterion::{BenchmarkId, Criterion};
3 |
4 | pub fn criterion_benchmark_memory_store(c: &mut Criterion) {
5 | let data = bbqvec::create_vector_set(256, 1000000);
6 | println!("Made vecs");
7 | let mem = bbqvec::MemoryBackend::new(256, 30).unwrap();
8 | let mut store = bbqvec::VectorStore::new_croaring_bitmap(mem).unwrap();
9 | println!("Made store");
10 | store.add_vector_iter(data.enumerate_ids()).unwrap();
11 | println!("itered");
12 | println!("built");
13 | c.bench_with_input(BenchmarkId::new("find_nearest", "store"), &store, |b, s| {
14 | b.iter(|| {
15 | let target = bbqvec::create_random_vector(256);
16 | s.find_nearest(&target, 20, 1000, 4).unwrap();
17 | })
18 | });
19 | }
20 |
--------------------------------------------------------------------------------
/rust/benches/micro.rs:
--------------------------------------------------------------------------------
1 | use bbqvec::Bitmap;
2 | use bitvec::prelude::*;
3 | use std::ops::BitOr;
4 |
5 | use criterion::{black_box, Criterion};
6 | use rand::Rng;
7 |
8 | pub fn criterion_benchmark_micro(c: &mut Criterion) {
9 | c.bench_function("create_random_vector_100", |b| {
10 | b.iter(|| bbqvec::create_random_vector(100))
11 | });
12 | c.bench_function("normalize_100", |b| {
13 | let mut vec = bbqvec::create_random_vector(100);
14 | b.iter(|| bbqvec::vector::normalize(&mut vec));
15 | });
16 | c.bench_function("dot_product_100", |b| {
17 | let vec = bbqvec::create_random_vector(100);
18 | let normal = bbqvec::create_random_vector(100);
19 | b.iter(|| bbqvec::vector::dot_product(&vec, &normal));
20 | });
21 | c.bench_function("roaring", |b| {
22 | let mut x = roaring::RoaringBitmap::new();
23 | let mut y = roaring::RoaringBitmap::new();
24 | for _ in 0..20000 {
25 | x.insert(rand::thread_rng().gen_range(0..2000000));
26 | y.insert(rand::thread_rng().gen_range(0..2000000));
27 | }
28 | b.iter(|| {
29 | black_box((&x).bitor(&y));
30 | });
31 | });
32 | c.bench_function("bitmap", |b| {
33 | let mut x = BitVec::::new();
34 | let mut y = BitVec::new();
35 | for _ in 0..20000 {
36 | x.add(rand::thread_rng().gen_range(0..2000000));
37 | y.add(rand::thread_rng().gen_range(0..2000000));
38 | }
39 | b.iter(|| {
40 | black_box({
41 | let mut z = BitVec::new();
42 | z = z.bitor(&x);
43 | z = z.bitor(&y);
44 | z
45 | });
46 | });
47 | });
48 | }
49 |
--------------------------------------------------------------------------------
/rust/cmd/test-recall/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 |
--------------------------------------------------------------------------------
/rust/cmd/test-recall/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 |
5 | [[package]]
6 | name = "anyhow"
7 | version = "1.0.86"
8 | source = "registry+https://github.com/rust-lang/crates.io-index"
9 | checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
10 |
11 | [[package]]
12 | name = "argminmax"
13 | version = "0.6.2"
14 | source = "registry+https://github.com/rust-lang/crates.io-index"
15 | checksum = "52424b59d69d69d5056d508b260553afd91c57e21849579cd1f50ee8b8b88eaa"
16 | dependencies = [
17 | "num-traits",
18 | ]
19 |
20 | [[package]]
21 | name = "argv"
22 | version = "0.1.11"
23 | source = "registry+https://github.com/rust-lang/crates.io-index"
24 | checksum = "5ec90225cd9dc43f6b1c9892603293f3767520a3e8440edec1f7d2a47b88c678"
25 |
26 | [[package]]
27 | name = "autocfg"
28 | version = "1.3.0"
29 | source = "registry+https://github.com/rust-lang/crates.io-index"
30 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
31 |
32 | [[package]]
33 | name = "bbqvec"
34 | version = "0.1.0"
35 | dependencies = [
36 | "anyhow",
37 | "argminmax",
38 | "bitvec",
39 | "bytemuck",
40 | "byteorder",
41 | "croaring",
42 | "half",
43 | "memmap2",
44 | "rand",
45 | "rayon",
46 | "roaring",
47 | "serde",
48 | "thiserror",
49 | ]
50 |
51 | [[package]]
52 | name = "bitvec"
53 | version = "1.0.1"
54 | source = "registry+https://github.com/rust-lang/crates.io-index"
55 | checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
56 | dependencies = [
57 | "funty",
58 | "radium",
59 | "serde",
60 | "tap",
61 | "wyz",
62 | ]
63 |
64 | [[package]]
65 | name = "bytemuck"
66 | version = "1.16.0"
67 | source = "registry+https://github.com/rust-lang/crates.io-index"
68 | checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5"
69 | dependencies = [
70 | "bytemuck_derive",
71 | ]
72 |
73 | [[package]]
74 | name = "bytemuck_derive"
75 | version = "1.7.0"
76 | source = "registry+https://github.com/rust-lang/crates.io-index"
77 | checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b"
78 | dependencies = [
79 | "proc-macro2",
80 | "quote",
81 | "syn 2.0.66",
82 | ]
83 |
84 | [[package]]
85 | name = "byteorder"
86 | version = "1.5.0"
87 | source = "registry+https://github.com/rust-lang/crates.io-index"
88 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
89 |
90 | [[package]]
91 | name = "cc"
92 | version = "1.0.98"
93 | source = "registry+https://github.com/rust-lang/crates.io-index"
94 | checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f"
95 |
96 | [[package]]
97 | name = "cfg-if"
98 | version = "1.0.0"
99 | source = "registry+https://github.com/rust-lang/crates.io-index"
100 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
101 |
102 | [[package]]
103 | name = "croaring"
104 | version = "1.1.0"
105 | source = "registry+https://github.com/rust-lang/crates.io-index"
106 | checksum = "611eaefca84c93e431ad82dfb848f6e05a99e25148384f45a3852b0fbe1c8086"
107 | dependencies = [
108 | "byteorder",
109 | "croaring-sys",
110 | ]
111 |
112 | [[package]]
113 | name = "croaring-sys"
114 | version = "2.0.0"
115 | source = "registry+https://github.com/rust-lang/crates.io-index"
116 | checksum = "ab5260027c04c33d67f405589d9c26e1e991fe062fb165f3094c9836e6c3b17f"
117 | dependencies = [
118 | "cc",
119 | ]
120 |
121 | [[package]]
122 | name = "crossbeam-deque"
123 | version = "0.8.5"
124 | source = "registry+https://github.com/rust-lang/crates.io-index"
125 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
126 | dependencies = [
127 | "crossbeam-epoch",
128 | "crossbeam-utils",
129 | ]
130 |
131 | [[package]]
132 | name = "crossbeam-epoch"
133 | version = "0.9.18"
134 | source = "registry+https://github.com/rust-lang/crates.io-index"
135 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
136 | dependencies = [
137 | "crossbeam-utils",
138 | ]
139 |
140 | [[package]]
141 | name = "crossbeam-utils"
142 | version = "0.8.20"
143 | source = "registry+https://github.com/rust-lang/crates.io-index"
144 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
145 |
146 | [[package]]
147 | name = "crunchy"
148 | version = "0.2.2"
149 | source = "registry+https://github.com/rust-lang/crates.io-index"
150 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
151 |
152 | [[package]]
153 | name = "either"
154 | version = "1.12.0"
155 | source = "registry+https://github.com/rust-lang/crates.io-index"
156 | checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
157 |
158 | [[package]]
159 | name = "funty"
160 | version = "2.0.0"
161 | source = "registry+https://github.com/rust-lang/crates.io-index"
162 | checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
163 |
164 | [[package]]
165 | name = "getrandom"
166 | version = "0.2.15"
167 | source = "registry+https://github.com/rust-lang/crates.io-index"
168 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
169 | dependencies = [
170 | "cfg-if",
171 | "libc",
172 | "wasi",
173 | ]
174 |
175 | [[package]]
176 | name = "gflags"
177 | version = "0.3.12"
178 | source = "registry+https://github.com/rust-lang/crates.io-index"
179 | checksum = "8331d16d5d69b23e753a00b3aaeee0c33b3bab7b93044afa48d2e6e6c28d621a"
180 | dependencies = [
181 | "argv",
182 | "gflags-impl",
183 | "inventory",
184 | "ref-cast",
185 | ]
186 |
187 | [[package]]
188 | name = "gflags-impl"
189 | version = "0.3.12"
190 | source = "registry+https://github.com/rust-lang/crates.io-index"
191 | checksum = "1c746dc576d32875419faf6928ea7f74027f4c34aeee5bd9b540fb37b4448561"
192 | dependencies = [
193 | "proc-macro2",
194 | "quote",
195 | "syn 1.0.109",
196 | ]
197 |
198 | [[package]]
199 | name = "half"
200 | version = "2.4.1"
201 | source = "registry+https://github.com/rust-lang/crates.io-index"
202 | checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
203 | dependencies = [
204 | "cfg-if",
205 | "crunchy",
206 | "zerocopy",
207 | ]
208 |
209 | [[package]]
210 | name = "inventory"
211 | version = "0.3.15"
212 | source = "registry+https://github.com/rust-lang/crates.io-index"
213 | checksum = "f958d3d68f4167080a18141e10381e7634563984a537f2a49a30fd8e53ac5767"
214 |
215 | [[package]]
216 | name = "libc"
217 | version = "0.2.155"
218 | source = "registry+https://github.com/rust-lang/crates.io-index"
219 | checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
220 |
221 | [[package]]
222 | name = "memmap2"
223 | version = "0.9.5"
224 | source = "registry+https://github.com/rust-lang/crates.io-index"
225 | checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
226 | dependencies = [
227 | "libc",
228 | ]
229 |
230 | [[package]]
231 | name = "num-traits"
232 | version = "0.2.19"
233 | source = "registry+https://github.com/rust-lang/crates.io-index"
234 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
235 | dependencies = [
236 | "autocfg",
237 | ]
238 |
239 | [[package]]
240 | name = "ppv-lite86"
241 | version = "0.2.17"
242 | source = "registry+https://github.com/rust-lang/crates.io-index"
243 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
244 |
245 | [[package]]
246 | name = "proc-macro2"
247 | version = "1.0.85"
248 | source = "registry+https://github.com/rust-lang/crates.io-index"
249 | checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23"
250 | dependencies = [
251 | "unicode-ident",
252 | ]
253 |
254 | [[package]]
255 | name = "quote"
256 | version = "1.0.36"
257 | source = "registry+https://github.com/rust-lang/crates.io-index"
258 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
259 | dependencies = [
260 | "proc-macro2",
261 | ]
262 |
263 | [[package]]
264 | name = "radium"
265 | version = "0.7.0"
266 | source = "registry+https://github.com/rust-lang/crates.io-index"
267 | checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
268 |
269 | [[package]]
270 | name = "rand"
271 | version = "0.8.5"
272 | source = "registry+https://github.com/rust-lang/crates.io-index"
273 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
274 | dependencies = [
275 | "libc",
276 | "rand_chacha",
277 | "rand_core",
278 | ]
279 |
280 | [[package]]
281 | name = "rand_chacha"
282 | version = "0.3.1"
283 | source = "registry+https://github.com/rust-lang/crates.io-index"
284 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
285 | dependencies = [
286 | "ppv-lite86",
287 | "rand_core",
288 | ]
289 |
290 | [[package]]
291 | name = "rand_core"
292 | version = "0.6.4"
293 | source = "registry+https://github.com/rust-lang/crates.io-index"
294 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
295 | dependencies = [
296 | "getrandom",
297 | ]
298 |
299 | [[package]]
300 | name = "rayon"
301 | version = "1.10.0"
302 | source = "registry+https://github.com/rust-lang/crates.io-index"
303 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
304 | dependencies = [
305 | "either",
306 | "rayon-core",
307 | ]
308 |
309 | [[package]]
310 | name = "rayon-core"
311 | version = "1.12.1"
312 | source = "registry+https://github.com/rust-lang/crates.io-index"
313 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
314 | dependencies = [
315 | "crossbeam-deque",
316 | "crossbeam-utils",
317 | ]
318 |
319 | [[package]]
320 | name = "ref-cast"
321 | version = "1.0.23"
322 | source = "registry+https://github.com/rust-lang/crates.io-index"
323 | checksum = "ccf0a6f84d5f1d581da8b41b47ec8600871962f2a528115b542b362d4b744931"
324 | dependencies = [
325 | "ref-cast-impl",
326 | ]
327 |
328 | [[package]]
329 | name = "ref-cast-impl"
330 | version = "1.0.23"
331 | source = "registry+https://github.com/rust-lang/crates.io-index"
332 | checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6"
333 | dependencies = [
334 | "proc-macro2",
335 | "quote",
336 | "syn 2.0.66",
337 | ]
338 |
339 | [[package]]
340 | name = "roaring"
341 | version = "0.10.4"
342 | source = "registry+https://github.com/rust-lang/crates.io-index"
343 | checksum = "b26f4c25a604fcb3a1bcd96dd6ba37c93840de95de8198d94c0d571a74a804d1"
344 | dependencies = [
345 | "bytemuck",
346 | "byteorder",
347 | "serde",
348 | ]
349 |
350 | [[package]]
351 | name = "serde"
352 | version = "1.0.203"
353 | source = "registry+https://github.com/rust-lang/crates.io-index"
354 | checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
355 | dependencies = [
356 | "serde_derive",
357 | ]
358 |
359 | [[package]]
360 | name = "serde_derive"
361 | version = "1.0.203"
362 | source = "registry+https://github.com/rust-lang/crates.io-index"
363 | checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
364 | dependencies = [
365 | "proc-macro2",
366 | "quote",
367 | "syn 2.0.66",
368 | ]
369 |
370 | [[package]]
371 | name = "syn"
372 | version = "1.0.109"
373 | source = "registry+https://github.com/rust-lang/crates.io-index"
374 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
375 | dependencies = [
376 | "proc-macro2",
377 | "quote",
378 | "unicode-ident",
379 | ]
380 |
381 | [[package]]
382 | name = "syn"
383 | version = "2.0.66"
384 | source = "registry+https://github.com/rust-lang/crates.io-index"
385 | checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5"
386 | dependencies = [
387 | "proc-macro2",
388 | "quote",
389 | "unicode-ident",
390 | ]
391 |
392 | [[package]]
393 | name = "tap"
394 | version = "1.0.1"
395 | source = "registry+https://github.com/rust-lang/crates.io-index"
396 | checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
397 |
398 | [[package]]
399 | name = "test-recall"
400 | version = "0.1.0"
401 | dependencies = [
402 | "anyhow",
403 | "bbqvec",
404 | "gflags",
405 | ]
406 |
407 | [[package]]
408 | name = "thiserror"
409 | version = "1.0.61"
410 | source = "registry+https://github.com/rust-lang/crates.io-index"
411 | checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
412 | dependencies = [
413 | "thiserror-impl",
414 | ]
415 |
416 | [[package]]
417 | name = "thiserror-impl"
418 | version = "1.0.61"
419 | source = "registry+https://github.com/rust-lang/crates.io-index"
420 | checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
421 | dependencies = [
422 | "proc-macro2",
423 | "quote",
424 | "syn 2.0.66",
425 | ]
426 |
427 | [[package]]
428 | name = "unicode-ident"
429 | version = "1.0.12"
430 | source = "registry+https://github.com/rust-lang/crates.io-index"
431 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
432 |
433 | [[package]]
434 | name = "wasi"
435 | version = "0.11.0+wasi-snapshot-preview1"
436 | source = "registry+https://github.com/rust-lang/crates.io-index"
437 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
438 |
439 | [[package]]
440 | name = "wyz"
441 | version = "0.5.1"
442 | source = "registry+https://github.com/rust-lang/crates.io-index"
443 | checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
444 | dependencies = [
445 | "tap",
446 | ]
447 |
448 | [[package]]
449 | name = "zerocopy"
450 | version = "0.6.6"
451 | source = "registry+https://github.com/rust-lang/crates.io-index"
452 | checksum = "854e949ac82d619ee9a14c66a1b674ac730422372ccb759ce0c39cabcf2bf8e6"
453 | dependencies = [
454 | "byteorder",
455 | "zerocopy-derive",
456 | ]
457 |
458 | [[package]]
459 | name = "zerocopy-derive"
460 | version = "0.6.6"
461 | source = "registry+https://github.com/rust-lang/crates.io-index"
462 | checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91"
463 | dependencies = [
464 | "proc-macro2",
465 | "quote",
466 | "syn 2.0.66",
467 | ]
468 |
--------------------------------------------------------------------------------
/rust/cmd/test-recall/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "test-recall"
3 | version = "0.1.0"
4 | edition = "2021"
5 |
6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7 |
8 | [dependencies]
9 | gflags = "0.3.12"
10 | bbqvec = {path = "../../"}
11 | anyhow = "1.0.86"
12 |
--------------------------------------------------------------------------------
/rust/cmd/test-recall/src/main.rs:
--------------------------------------------------------------------------------
1 | use std::time::{Duration, Instant};
2 |
3 | use anyhow::Result;
4 | use bbqvec::{
5 | backend::VectorBackend, Bitmap, IndexIDIterator, MemoryBackend, ResultSet, Vector, VectorStore,
6 | };
7 |
8 | gflags::define! {
9 | -v, --vectors: usize = 100000
10 | }
11 |
12 | gflags::define! {
13 | -q, --queries: usize = 1000
14 | }
15 |
16 | gflags::define! {
17 | -d, --dimensions: usize = 256
18 | }
19 |
20 | gflags::define! {
21 | -b, --bases: usize = 30
22 | }
23 |
24 | gflags::define! {
25 | -k, --search-k: usize = 1000
26 | }
27 |
28 | gflags::define! {
29 | -s, --spill: usize = 16
30 | }
31 |
32 | enum Mode {
33 | SingleRun,
34 | Matrix,
35 | }
36 |
37 | fn main() -> Result<()> {
38 | let args = gflags::parse();
39 | let mode = if args.is_empty() {
40 | Mode::SingleRun
41 | } else {
42 | match args[0] {
43 | "run" => Mode::SingleRun,
44 | "matrix" => Mode::Matrix,
45 | _ => Mode::SingleRun,
46 | }
47 | };
48 | match mode {
49 | Mode::SingleRun => single_run_main(),
50 | Mode::Matrix => matrix_main(),
51 | }
52 | }
53 |
54 | fn single_run_main() -> Result<()> {
55 | let store = make_store()?;
56 | let tests = bbqvec::create_vector_set(DIMENSIONS.flag, QUERIES.flag);
57 | let mut fts_results = Vec::with_capacity(tests.len());
58 | for t in tests.iter() {
59 | fts_results.push(store.full_table_scan(t, 20)?);
60 | }
61 | let (results, took) = run_test(&tests, &store, SEARCH_K.flag, SPILL.flag)?;
62 | print_result_line(&fts_results, &results, SEARCH_K.flag, SPILL.flag, took)?;
63 | Ok(())
64 | }
65 |
66 | fn matrix_main() -> Result<()> {
67 | let store = make_store()?;
68 | let tests = bbqvec::create_vector_set(DIMENSIONS.flag, QUERIES.flag);
69 | let mut fts_results = Vec::with_capacity(tests.len());
70 | for t in tests.iter() {
71 | fts_results.push(store.full_table_scan(t, 20)?);
72 | }
73 | for spill in [1, 4, 8, 16] {
74 | for searchk in [100, 500, 1000, 2000, 5000, 10000, 20000] {
75 | if DIMENSIONS.flag < spill {
76 | continue;
77 | }
78 | let (results, took) = run_test(&tests, &store, searchk, spill)?;
79 | print_result_line(&fts_results, &results, searchk, spill, took)?;
80 | }
81 | }
82 | Ok(())
83 | }
84 |
85 | fn make_store() -> Result> {
86 | let data = bbqvec::create_vector_set(DIMENSIONS.flag, VECTORS.flag);
87 | println!("Made vectors");
88 | let mem = bbqvec::MemoryBackend::new(DIMENSIONS.flag, BASES.flag)?;
89 | let mut store = bbqvec::VectorStore::new(mem)?;
90 | store.add_vector_iter(data.enumerate_ids())?;
91 | println!("Added vectors");
92 | Ok(store)
93 | }
94 |
95 | fn run_test(
96 | tests: &Vec,
97 | store: &bbqvec::VectorStore,
98 | search_k: usize,
99 | spill: usize,
100 | ) -> Result<(Vec, Duration)> {
101 | let mut out = Vec::with_capacity(tests.len());
102 | let start = Instant::now();
103 | for v in tests {
104 | let res = store.find_nearest(v, 20, search_k, spill)?;
105 | out.push(res);
106 | }
107 | let took = Instant::now().duration_since(start);
108 | Ok((out, took))
109 | }
110 |
111 | fn print_result_line(
112 | fts: &[ResultSet],
113 | real: &[ResultSet],
114 | search_k: usize,
115 | spill: usize,
116 | took: Duration,
117 | ) -> Result<()> {
118 | let mut acc = [0.0; 4];
119 | let mut checked = 0;
120 | for (f, r) in fts.iter().zip(real.iter()) {
121 | acc[0] += f.compute_recall(r, 1);
122 | acc[1] += f.compute_recall(r, 5);
123 | acc[2] += f.compute_recall(r, 10);
124 | acc[3] += f.compute_recall(r, 20);
125 | checked += r.checked;
126 | }
127 | acc.iter_mut().for_each(|v| *v *= 100.0 / fts.len() as f64);
128 | let per = took.as_millis() as f64 / real.len() as f64;
129 | let avg_check = checked / real.len();
130 | println!(
131 | "searchk {:<6} / spill {:<4} ({:8.4}ms, {:10} checked) {:5.2}@1 {:5.2}@5 {:5.2}@10 {:5.2}@20",
132 | search_k, spill, per, avg_check, acc[0], acc[1], acc[2], acc[3]
133 | );
134 | Ok(())
135 | }
136 |
--------------------------------------------------------------------------------
/rust/src/backend.rs:
--------------------------------------------------------------------------------
1 | use anyhow::Result;
2 |
3 | use crate::{Basis, Bitmap, ResultSet, Vector, ID};
4 |
5 | pub struct BackendInfo {
6 | pub quantization: String,
7 | pub has_index_data: bool,
8 | pub dimensions: usize,
9 | pub n_basis: usize,
10 | pub vector_count: usize,
11 | }
12 |
13 | pub trait VectorBackend {
14 | fn put_vector(&mut self, id: ID, v: &Vector) -> Result<()>;
15 | fn compute_similarity(&self, target: &Vector, target_id: ID) -> Result;
16 | fn info(&self) -> BackendInfo;
17 | fn iter_vector_ids(&self) -> impl Iterator- ;
18 | fn vector_exists(&self, id: ID) -> bool;
19 | fn close(self) -> Result<()>;
20 |
21 | fn find_nearest(&self, target: &Vector, k: usize) -> Result {
22 | let mut set = ResultSet::new(k);
23 | for id in self.iter_vector_ids() {
24 | let sim = self.compute_similarity(target, id)?;
25 | set.add_result(id, sim);
26 | }
27 | Ok(set)
28 | }
29 |
30 | fn load_bases(&self) -> Result