├── .github └── workflows │ ├── tag-release.yml │ └── unit-tests.yaml ├── .gitignore ├── .mvn ├── jvm.config └── wrapper │ ├── MavenWrapperDownloader.java │ └── maven-wrapper.properties ├── CHANGELOG.md ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── UPGRADING.md ├── benchmarks-jmh ├── README.md ├── pom.xml ├── scripts │ └── test_node_setup.sh └── src │ └── main │ ├── java │ └── io │ │ └── github │ │ └── jbellis │ │ └── jvector │ │ └── bench │ │ ├── IndexConstructionWithRandomSetBenchmark.java │ │ ├── IndexConstructionWithStaticSetBenchmark.java │ │ ├── PQBenchmark.java │ │ ├── RandomVectorsBenchmark.java │ │ └── StaticSetVectorsBenchmark.java │ └── resources │ └── log4j2.xml ├── jvector-base ├── pom.xml └── src │ └── main │ └── java │ └── io │ └── github │ └── jbellis │ └── jvector │ ├── annotations │ ├── Experimental.java │ └── VisibleForTesting.java │ ├── disk │ ├── BufferedRandomAccessWriter.java │ ├── ByteBufferReader.java │ ├── RandomAccessReader.java │ ├── RandomAccessWriter.java │ ├── ReaderSupplier.java │ ├── ReaderSupplierFactory.java │ ├── SimpleMappedReader.java │ └── SimpleReader.java │ ├── exceptions │ └── ThreadInterruptedException.java │ ├── graph │ ├── ConcurrentNeighborMap.java │ ├── GraphIndex.java │ ├── GraphIndexBuilder.java │ ├── GraphSearcher.java │ ├── ListRandomAccessVectorValues.java │ ├── MapRandomAccessVectorValues.java │ ├── NodeArray.java │ ├── NodeQueue.java │ ├── NodesIterator.java │ ├── NodesUnsorted.java │ ├── OnHeapGraphIndex.java │ ├── RandomAccessVectorValues.java │ ├── ScoreTracker.java │ ├── SearchResult.java │ ├── disk │ │ ├── CommonHeader.java │ │ ├── Header.java │ │ ├── OnDiskGraphIndex.java │ │ ├── OnDiskGraphIndexWriter.java │ │ ├── OrdinalMapper.java │ │ └── feature │ │ │ ├── Feature.java │ │ │ ├── FeatureId.java │ │ │ ├── FeatureSource.java │ │ │ ├── FusedADC.java │ │ │ ├── InlineVectors.java │ │ │ ├── NVQ.java │ │ │ ├── SeparatedFeature.java │ │ │ ├── SeparatedNVQ.java │ │ │ └── SeparatedVectors.java │ ├── diversity │ │ ├── DiversityProvider.java │ │ └── VamanaDiversityProvider.java │ └── similarity │ │ ├── BuildScoreProvider.java │ │ ├── CachingVectorValues.java │ │ ├── DefaultSearchScoreProvider.java │ │ ├── ScoreFunction.java │ │ └── SearchScoreProvider.java │ ├── quantization │ ├── BQVectors.java │ ├── BinaryQuantization.java │ ├── CompressedVectors.java │ ├── FusedADCPQDecoder.java │ ├── ImmutableBQVectors.java │ ├── ImmutablePQVectors.java │ ├── KMeansPlusPlusClusterer.java │ ├── MutableBQVectors.java │ ├── MutableCompressedVectors.java │ ├── MutablePQVectors.java │ ├── NVQScorer.java │ ├── NVQVectors.java │ ├── NVQuantization.java │ ├── PQDecoder.java │ ├── PQVectors.java │ ├── ProductQuantization.java │ └── VectorCompressor.java │ ├── util │ ├── AbstractLongHeap.java │ ├── Accountable.java │ ├── ArrayUtil.java │ ├── AtomicFixedBitSet.java │ ├── BitSet.java │ ├── BitUtil.java │ ├── Bits.java │ ├── BoundedLongHeap.java │ ├── Constants.java │ ├── DenseIntMap.java │ ├── DocIdSetIterator.java │ ├── ExceptionUtils.java │ ├── ExplicitThreadLocal.java │ ├── FixedBitSet.java │ ├── GrowableBitSet.java │ ├── GrowableLongHeap.java │ ├── IntMap.java │ ├── MathUtil.java │ ├── NumericUtils.java │ ├── PhysicalCoreExecutor.java │ ├── RamUsageEstimator.java │ ├── SparseBits.java │ ├── SparseFixedBitSet.java │ ├── SparseIntMap.java │ └── ThreadSafeGrowableBitSet.java │ └── vector │ ├── ArrayByteSequence.java │ ├── ArraySliceByteSequence.java │ ├── ArrayVectorFloat.java │ ├── ArrayVectorProvider.java │ ├── DefaultVectorUtilSupport.java │ ├── DefaultVectorizationProvider.java │ ├── Matrix.java │ ├── VectorSimilarityFunction.java │ ├── VectorUtil.java │ ├── VectorUtilSupport.java │ ├── VectorizationProvider.java │ └── types │ ├── ByteSequence.java │ ├── VectorFloat.java │ └── VectorTypeSupport.java ├── jvector-examples ├── README.md ├── pom.xml ├── src │ └── main │ │ └── java │ │ └── io │ │ └── github │ │ └── jbellis │ │ └── jvector │ │ └── example │ │ ├── Bench.java │ │ ├── Bench2D.java │ │ ├── BenchYAML.java │ │ ├── DistancesNVQ.java │ │ ├── Grid.java │ │ ├── HelloVectorWorld.java │ │ ├── IPCService.java │ │ ├── SiftSmall.java │ │ ├── benchmarks │ │ ├── AbstractQueryBenchmark.java │ │ ├── AccuracyBenchmark.java │ │ ├── BenchmarkTablePrinter.java │ │ ├── CountBenchmark.java │ │ ├── ExecutionTimeBenchmark.java │ │ ├── LatencyBenchmark.java │ │ ├── Metric.java │ │ ├── QueryBenchmark.java │ │ ├── QueryExecutor.java │ │ ├── QueryTester.java │ │ └── ThroughputBenchmark.java │ │ ├── util │ │ ├── AccuracyMetrics.java │ │ ├── CompressorParameters.java │ │ ├── DataSet.java │ │ ├── DataSetCreator.java │ │ ├── DataSetLoader.java │ │ ├── Deep1BLoader.java │ │ ├── DownloadHelper.java │ │ ├── Hdf5Loader.java │ │ ├── MMapRandomAccessVectorValues.java │ │ ├── MMapReader.java │ │ ├── MultiFileDatasource.java │ │ ├── SiftLoader.java │ │ └── UpdatableRandomAccessVectorValues.java │ │ └── yaml │ │ ├── CommonParameters.java │ │ ├── Compression.java │ │ ├── ConstructionParameters.java │ │ ├── DatasetCollection.java │ │ ├── MultiConfig.java │ │ └── SearchParameters.java └── yaml-configs │ ├── ada002-100k.yml │ ├── datasets.yml │ └── default.yml ├── jvector-multirelease ├── pom.xml └── src │ └── assembly │ ├── mrjar.xml │ └── sourcesjar.xml ├── jvector-native ├── pom.xml └── src │ ├── main │ ├── c │ │ ├── jextract_vector_simd.sh │ │ ├── jvector_simd.c │ │ ├── jvector_simd.h │ │ └── jvector_simd_check.c │ └── java │ │ └── io │ │ └── github │ │ └── jbellis │ │ └── jvector │ │ ├── disk │ │ └── MemorySegmentReader.java │ │ └── vector │ │ ├── MemorySegmentByteSequence.java │ │ ├── MemorySegmentVectorFloat.java │ │ ├── MemorySegmentVectorProvider.java │ │ ├── NativeVectorUtilSupport.java │ │ ├── NativeVectorizationProvider.java │ │ ├── VectorSimdOps.java │ │ └── cnative │ │ ├── LibraryLoader.java │ │ └── NativeSimdOps.java │ └── test │ └── java │ └── io │ └── github │ └── jbellis │ └── jvector │ └── disk │ └── MemorySegmentReaderTest.java ├── jvector-tests ├── pom.xml ├── resources │ ├── version0.odgi │ └── version0.pq └── src │ ├── main │ └── assembly │ │ └── test-jar-with-dependencies.xml │ └── test │ └── java │ └── io │ └── github │ └── jbellis │ └── jvector │ ├── LuceneTestCase.java │ ├── TestUtil.java │ ├── graph │ ├── GraphIndexBuilderTest.java │ ├── MockVectorValues.java │ ├── Test2DThreshold.java │ ├── TestDeletions.java │ ├── TestNeighbors.java │ ├── TestNodeArray.java │ ├── TestNodeQueue.java │ ├── TestVectorGraph.java │ └── disk │ │ └── TestOnDiskGraphIndex.java │ ├── microbench │ ├── GraphBuildBench.java │ ├── GraphIndexBench.java │ └── SimilarityBench.java │ ├── quantization │ ├── TestADCGraphIndex.java │ ├── TestBinaryQuantization.java │ ├── TestCompressedVectors.java │ └── TestProductQuantization.java │ ├── util │ ├── BaseBitSetTestCase.java │ ├── TestAtomicFixedBitSet.java │ ├── TestFixedBitSet.java │ ├── TestIntMap.java │ ├── TestLongHeap.java │ └── TestThreadSafeGrowableBitSet.java │ └── vector │ ├── TestArraySliceByteSequence.java │ ├── TestMatrixUtil.java │ └── TestVectorizationProvider.java ├── jvector-twenty ├── pom.xml └── src │ └── main │ └── java │ └── io │ └── github │ └── jbellis │ └── jvector │ └── vector │ ├── PanamaVectorUtilSupport.java │ ├── PanamaVectorizationProvider.java │ └── SimdOps.java ├── mvnw ├── mvnw.cmd ├── plot_output.py ├── pom.xml ├── rat-excludes.txt ├── siftsmall ├── siftsmall_base.fvecs ├── siftsmall_groundtruth.ivecs └── siftsmall_query.fvecs └── update_changelog.sh /.github/workflows/tag-release.yml: -------------------------------------------------------------------------------- 1 | # @author Madhavan Sridharan 2 | name: Prepare new tag & changelog PR 3 | 4 | # runs on 5 | # * manually triggered 6 | on: 7 | workflow_dispatch: 8 | inputs: 9 | tag_version: 10 | description: 'Tag version to release (e.g. X.Y.Z-beta.M)' 11 | required: true 12 | new_revision: 13 | description: 'New development version to update in pom.xml (e.g. X.Y.Z-beta.N-SNAPSHOT)' 14 | required: true 15 | 16 | # global env vars, available in all jobs and steps 17 | env: 18 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 19 | 20 | jobs: 21 | new_tag_and_changelog: 22 | runs-on: ubuntu-latest 23 | permissions: 24 | contents: write 25 | pull-requests: write 26 | 27 | steps: 28 | - name: Checkout repo 29 | uses: actions/checkout@v4 30 | with: 31 | token: ${{ secrets.GITHUB_TOKEN }} 32 | 33 | - name: Git config 34 | run: | 35 | git config user.name "GitHub Actions" 36 | git config user.email "actions@github.com" 37 | 38 | - name: Update release version in pom.xml 39 | run: | 40 | sed -i 's|.*|${{ github.event.inputs.tag_version }}|' ./pom.xml 41 | git add ./pom.xml 42 | git commit -m "chore (release): Start release version ${{ github.event.inputs.tag_version }}" 43 | 44 | # Note: the tag version will be pushed right away at this step prior to changelog pr merging 45 | - name: Create and push tag 46 | run: | 47 | git tag -a "${{ github.event.inputs.tag_version }}" -m "Release tag version ${{ github.event.inputs.tag_version }}" 48 | git push origin "${{ github.event.inputs.tag_version }}" 49 | 50 | - name: Generate changelog 51 | continue-on-error: true 52 | run: ./update_changelog.sh 53 | 54 | - name: Update next development revision in pom.xml 55 | run: | 56 | sed -i 's|.*|${{ github.event.inputs.new_revision }}|' ./pom.xml 57 | git add ./pom.xml 58 | git commit -m "chore (release): Start development of ${{ github.event.inputs.new_revision }}" 59 | 60 | - name: Create pull request 61 | uses: peter-evans/create-pull-request@v7 62 | env: 63 | GITHUB_TOKEN: 64 | with: 65 | token: ${{ secrets.GITHUB_TOKEN }} 66 | branch: "release/bump-tag-version-and-update-changelog" 67 | branch-suffix: "short-commit-hash" 68 | base: "main" 69 | title: "chore(release): Bump tag version and update changelog" 70 | commit-message: "chore(release): Bump tag version and update changelog" 71 | body: | 72 | This pull request does the following as part of the release process, 73 | - bumps the tag version to ${{ github.event.inputs.tag_version }} 74 | - updates changelog 75 | - bumps the revision in pom.xml to ${{ github.event.inputs.new_revision }} 76 | Please review and merge. 77 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | .mvn/wrapper/maven-wrapper.jar 3 | .java-version 4 | 5 | ### Bench caches 6 | pq_cache/ 7 | 8 | ### JVM crashes 9 | hs_err_pid* 10 | replay_pid* 11 | 12 | ### IntelliJ IDEA ### 13 | .idea 14 | *.iml 15 | 16 | ### VS Code ### 17 | .vscode/ 18 | 19 | ### Mac OS ### 20 | .DS_Store 21 | 22 | **/.flattened-pom.xml 23 | 24 | fvec/ 25 | hdf5/ 26 | 27 | ### Native libraries/objects 28 | *.so 29 | *.o 30 | 31 | ### asdf (or other version managers) 32 | .tool-versions 33 | 34 | ### aider 35 | .aider* 36 | 37 | # JMH generated files 38 | dependency-reduced-pom.xml 39 | -------------------------------------------------------------------------------- /.mvn/jvm.config: -------------------------------------------------------------------------------- 1 | -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn 2 | -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=error 3 | -Dorg.slf4j.simpleLogger.dateTimeFormat=HH:mm:ss,SSS 4 | -Dorg.slf4j.simpleLogger.showDateTime=true 5 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.zip 2 | wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [4.0.0-beta.5](https://github.com/datastax/jvector/tree/4.0.0-beta.5) (2025-05-23) 4 | 5 | [Full Changelog](https://github.com/datastax/jvector/compare/4.0.0-beta.4...4.0.0-beta.5) 6 | 7 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | This product includes software developed for Apache Lucene by 2 | The Apache Software Foundation (http://www.apache.org/). 3 | -------------------------------------------------------------------------------- /benchmarks-jmh/README.md: -------------------------------------------------------------------------------- 1 | # JMH Benchmarks 2 | Micro benchmarks for jVector. While {@link Bench.java} is about recall, the JMH benchmarks 3 | are mostly targeting scalability and latency aspects. 4 | 5 | ## Building and running the benchmark 6 | 7 | 1. You can build and then run 8 | ```shell 9 | mvn clean install -DskipTests=true 10 | java --enable-native-access=ALL-UNNAMED \ 11 | --add-modules=jdk.incubator.vector \ 12 | -XX:+HeapDumpOnOutOfMemoryError \ 13 | -Xmx14G -Djvector.experimental.enable_native_vectorization=true \ 14 | -jar benchmarks-jmh/target/benchmarks-jmh-4.0.0-beta.2-SNAPSHOT.jar 15 | ``` 16 | 17 | You can add additional optional JMH arguments dynamically from command line. For example, to run the benchmarks with 4 forks, 5 warmup iterations, 5 measurement iterations, 2 threads, and 10 seconds warmup time per iteration, use the following command: 18 | ```shell 19 | java --enable-native-access=ALL-UNNAMED \ 20 | --add-modules=jdk.incubator.vector \ 21 | -XX:+HeapDumpOnOutOfMemoryError \ 22 | -Xmx14G -Djvector.experimental.enable_native_vectorization=true \ 23 | -jar benchmarks-jmh/target/benchmarks-jmh-4.0.0-beta.2-SNAPSHOT.jar \ 24 | -f 4 -wi 5 -i 5 -t 2 -w 10s 25 | ``` 26 | 27 | Common JMH command line options you can use in the configuration or command line: 28 | - `-f ` - Number of forks 29 | - `-wi ` - Number of warmup iterations 30 | - `-i ` - Number of measurement iterations 31 | - `-w