├── .cargo └── config.toml ├── .dockerignore ├── .github ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ └── feature_request.yml ├── actions │ ├── install-packages │ │ └── action.yml │ ├── install-pgrx │ │ └── action.yml │ ├── install-pgvector │ │ └── action.yml │ └── install-postgres │ │ └── action.yml └── workflows │ ├── code_checks.yml │ ├── deb-packager.yaml │ ├── pgrx_test.yaml │ └── shellcheck.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── Cargo.toml ├── DEVELOPMENT.md ├── LICENSE ├── Makefile ├── NOTICE ├── README.md ├── pgvectorscale ├── .gitignore ├── Cargo.toml ├── benches │ ├── distance.rs │ └── lsr.rs ├── pgvectorscale_derive │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── sql │ ├── timescale_vector--0.0.1--0.0.2.sql │ ├── vectorscale--0.0.2--0.2.0.sql │ ├── vectorscale--0.0.2--0.3.0.sql │ ├── vectorscale--0.0.2--0.4.0.sql │ ├── vectorscale--0.0.2--0.5.0.sql │ ├── vectorscale--0.0.2--0.5.1.sql │ ├── vectorscale--0.0.2--0.6.0.sql │ ├── vectorscale--0.0.2--0.7.0.sql │ ├── vectorscale--0.0.2--0.7.1.sql │ ├── vectorscale--0.1.0--0.7.1.sql │ ├── vectorscale--0.2.0--0.3.0.sql │ ├── vectorscale--0.2.0--0.4.0.sql │ ├── vectorscale--0.2.0--0.5.0.sql │ ├── vectorscale--0.2.0--0.5.1.sql │ ├── vectorscale--0.2.0--0.6.0.sql │ ├── vectorscale--0.2.0--0.7.0.sql │ ├── vectorscale--0.2.0--0.7.1.sql │ ├── vectorscale--0.3.0--0.4.0.sql │ ├── vectorscale--0.3.0--0.5.0.sql │ ├── vectorscale--0.3.0--0.5.1.sql │ ├── vectorscale--0.3.0--0.6.0.sql │ ├── vectorscale--0.3.0--0.7.0.sql │ ├── vectorscale--0.3.0--0.7.1.sql │ ├── vectorscale--0.4.0--0.5.0.sql │ ├── vectorscale--0.4.0--0.5.1.sql │ ├── vectorscale--0.4.0--0.6.0.sql │ ├── vectorscale--0.4.0--0.7.0.sql │ ├── vectorscale--0.4.0--0.7.1.sql │ ├── vectorscale--0.5.0--0.5.1.sql │ ├── vectorscale--0.5.0--0.6.0.sql │ ├── vectorscale--0.5.0--0.7.0.sql │ ├── vectorscale--0.5.0--0.7.1.sql │ ├── vectorscale--0.5.1--0.6.0.sql │ ├── vectorscale--0.5.1--0.7.0.sql │ ├── vectorscale--0.5.1--0.7.1.sql │ ├── vectorscale--0.6.0--0.7.0.sql │ ├── vectorscale--0.6.0--0.7.1.sql │ └── vectorscale--0.7.0--0.7.1.sql ├── src │ ├── access_method │ │ ├── build.rs │ │ ├── cost_estimate.rs │ │ ├── debugging.rs │ │ ├── distance │ │ │ ├── distance_aarch64.rs │ │ │ ├── distance_x86.rs │ │ │ └── mod.rs │ │ ├── graph │ │ │ ├── mod.rs │ │ │ ├── neighbor_store.rs │ │ │ ├── neighbor_with_distance.rs │ │ │ └── start_nodes.rs │ │ ├── guc.rs │ │ ├── labels │ │ │ ├── filtering_tests.rs │ │ │ └── mod.rs │ │ ├── meta_page.rs │ │ ├── mod.rs │ │ ├── node.rs │ │ ├── options.rs │ │ ├── pg_vector.rs │ │ ├── plain │ │ │ ├── mod.rs │ │ │ ├── node.rs │ │ │ ├── storage.rs │ │ │ └── tests.rs │ │ ├── sbq │ │ │ ├── cache.rs │ │ │ ├── mod.rs │ │ │ ├── node.rs │ │ │ ├── quantize.rs │ │ │ ├── storage.rs │ │ │ └── tests.rs │ │ ├── scan.rs │ │ ├── stats.rs │ │ ├── storage.rs │ │ ├── storage_common.rs │ │ ├── upgrade_test.rs │ │ └── vacuum.rs │ ├── bin │ │ └── pgrx_embed.rs │ ├── lib.rs │ └── util │ │ ├── buffer.rs │ │ ├── chain.rs │ │ ├── mod.rs │ │ ├── page.rs │ │ ├── ports.rs │ │ ├── table_slot.rs │ │ └── tape.rs └── vectorscale.control └── scripts └── package-deb.sh /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.'cfg(target_os="macos")'] 2 | # Postgres symbols won't be available until runtime 3 | rustflags = ["-Clink-arg=-Wl,-undefined,dynamic_lookup"] 4 | 5 | [target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'] 6 | rustflags = ["-Ctarget-feature=+avx2,+fma"] 7 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | pgvectorscale/target 2 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @timescale/ts-vector 2 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | You find the Timescale Code of Conduct at . 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐛 Bug report 3 | description: Is something not working? Let's fix it together! 4 | title: "[Bug]: " 5 | labels: ["bug", "triage", "community", "pgvectorscale"] 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | Thanks for taking the time to fill out this bug report! 11 | 12 | - type: textarea 13 | id: what-happened 14 | attributes: 15 | label: What happened? 16 | description: | 17 | Tell us what happened and also what you would have expected to 18 | happen instead. 19 | placeholder: "Describe the bug" 20 | validations: 21 | required: true 22 | 23 | - type: input 24 | id: pgvectorscale-extension-version 25 | attributes: 26 | label: pgvectorscale extension affected 27 | description: | 28 | Let us know what version of the pgvectorscale postgres extension you 29 | are running. 30 | placeholder: "0.4.0" 31 | validations: 32 | required: false 33 | 34 | - type: input 35 | id: postgresql-version 36 | attributes: 37 | label: PostgreSQL version used 38 | description: Let us know what version of PostgreSQL you are running. 39 | placeholder: "14.1" 40 | validations: 41 | required: true 42 | 43 | - type: input 44 | id: os 45 | attributes: 46 | label: What operating system did you use? 47 | description: | 48 | Please provide OS, version, and architecture. For example: 49 | Windows 10 x64, Ubuntu 21.04 x64, Mac OS X 10.5 ARM, Rasperry 50 | Pi i386, etc. 51 | placeholder: "Ubuntu 21.04 x64" 52 | validations: 53 | required: true 54 | 55 | - type: dropdown 56 | id: installation 57 | attributes: 58 | label: What installation method did you use? 59 | multiple: true 60 | options: 61 | - Docker 62 | - Source 63 | - Pypi 64 | - Other (Please specify in the description of the bug) 65 | - Not applicable 66 | validations: 67 | required: true 68 | 69 | - type: dropdown 70 | id: platform 71 | attributes: 72 | label: What platform did you run on? 73 | multiple: true 74 | options: 75 | - Amazon Web Services (AWS) 76 | - Google Cloud Platform (GCP) 77 | - Managed Service for TimescaleDB (MST/Aiven) 78 | - Microsoft Azure Cloud 79 | - On prem/Self-hosted 80 | - Timescale Cloud 81 | - Other 82 | - Not applicable 83 | validations: 84 | required: true 85 | 86 | - type: textarea 87 | id: logs 88 | attributes: 89 | label: Relevant log output and stack trace 90 | description: | 91 | Please copy and paste any relevant log output or a stack 92 | trace. This will be automatically formatted into code, so no 93 | need for backticks. 94 | render: bash 95 | 96 | - type: textarea 97 | id: reproduce 98 | attributes: 99 | label: How can we reproduce the bug? 100 | description: | 101 | Please try to provide step-by-step instructions how to 102 | reproduce the issue. If possible, provide scripts that we can 103 | run to trigger the bug. 104 | render: bash 105 | validations: 106 | required: true 107 | - type: dropdown 108 | id: work-on-it 109 | attributes: 110 | label: Are you going to work on the bugfix? 111 | description: No pressure at all, but we’re ready to help you navigate the contribution process. 112 | options: 113 | - "🦸 Yes , I will submit a PR soon!" 114 | - "🆘 No, could someone else please work on the bugfix?" 115 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: pgai discord 4 | url: https://discord.com/channels/1246241636019605616/1246243698111676447 5 | about: Get free help from the pgai community 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: 💡 Feature request 3 | description: Suggest a new feature 4 | title: "[Feature]: <Feature name>" 5 | labels: ["feature-request", "community", "pgvectorscale"] 6 | body: 7 | - type: markdown 8 | id: info 9 | attributes: 10 | value: | 11 | Only use this template to suggest a new feature that doesn't already 12 | exist, or enhancements to existing features. For bugs, use the bug 13 | report template. 14 | 15 | - type: textarea 16 | id: what 17 | attributes: 18 | label: What problem does the new feature solve? 19 | description: | 20 | Describe the problem and why it is important to solve. Did you consider 21 | alternative solutions, perhaps outside the pgvectorscale? Why is it 22 | better to add the feature to pgvectorscale? 23 | validations: 24 | required: true 25 | 26 | - type: textarea 27 | id: how 28 | attributes: 29 | label: What does the feature do? 30 | description: | 31 | Give a high-level overview of what the feature does and how it would 32 | work. 33 | validations: 34 | required: true 35 | 36 | - type: textarea 37 | id: implementation 38 | attributes: 39 | label: Implementation challenges 40 | description: | 41 | If you have ideas of how to implement the feature, and any particularly 42 | challenging issues to overcome, then provide them here. 43 | validations: 44 | required: false 45 | - type: dropdown 46 | id: work-on-it 47 | attributes: 48 | label: Are you going to work on this feature? 49 | description: No pressure at all, but we’re ready to help you navigate the contribution process. 50 | options: 51 | - "🦸 Yes , I will submit a PR soon!" 52 | - "🆘 No, could someone else please consider working on it?" 53 | -------------------------------------------------------------------------------- /.github/actions/install-packages/action.yml: -------------------------------------------------------------------------------- 1 | name: "Install linux packages" 2 | 3 | runs: 4 | using: "composite" 5 | steps: 6 | - name: Install Linux Dependencies 7 | shell: bash 8 | run: | 9 | sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" -- 18 10 | sudo apt-get update 11 | sudo apt-get install -y flex bison lcov systemd-coredump gdb libblas-dev libipc-run-perl libtest-most-perl clang-18 llvm-18 llvm-18-dev llvm-18-runtime llvm-18-tools libopenblas-dev 12 | -------------------------------------------------------------------------------- /.github/actions/install-pgrx/action.yml: -------------------------------------------------------------------------------- 1 | name: "Setup pgrx" 2 | description: "Installs cargo-pgrx" 3 | 4 | inputs: 5 | pg-install-dir: 6 | default: postgresql 7 | vector-dir: 8 | default: . 9 | grcov-version: 10 | description: "grcov version" 11 | default: 0.8.18 12 | pgrx-version: 13 | description: "pgrx version" 14 | # Default version currently isn't unused 15 | # default: 0.9.8 16 | 17 | runs: 18 | using: "composite" 19 | steps: 20 | - name: 21 | id: pg-config 22 | shell: bash 23 | run: | 24 | export PATH=${{ inputs.pg-install-dir }}/bin:$PATH 25 | MAJOR_VERSION=`pg_config --version | sed 's/^PostgreSQL \([0-9]\+\).*/\1/g'` 26 | BINDIR=`pg_config --bindir` 27 | echo "version=$MAJOR_VERSION" >> $GITHUB_OUTPUT 28 | echo "bindir=$BINDIR" >> $GITHUB_OUTPUT 29 | 30 | - name: 31 | id: rustc 32 | shell: bash 33 | working-directory: ${{ inputs.vector-dir }} 34 | # rustup show active-toolchain installs the toolchain specified via rust-toolchain.yaml if it is not installed 35 | run: | 36 | cd pgvectorscale 37 | RUST_VERSION=`rustup show active-toolchain -v | grep rustc | cut -d' ' -f2` 38 | echo "version=$RUST_VERSION" >> $GITHUB_OUTPUT 39 | 40 | - name: Cache cargo-pgrx installation ${{ inputs.pgrx-version }} 41 | id: cache-cargo-pgrx 42 | uses: actions/cache@v4 43 | with: 44 | path: ~/.cargo/bin/cargo-pgrx 45 | key: ${{ runner.arch }}-${{runner.os}}-cargo-pgrx-${{ inputs.pgrx-version }}-pg${{ steps.pg-config.outputs.version }}-${{ steps.rustc.outputs.version }}-v2 46 | 47 | - name: Install cargo-pgrx ${{ inputs.pgrx-version }} 48 | if: steps.cache-cargo-pgrx.outputs.cache-hit != 'true' 49 | shell: bash 50 | run: | 51 | cargo install cargo-pgrx --version ${{ inputs.pgrx-version }} --force 52 | 53 | - name: Configure pgrx 54 | shell: bash 55 | run: | 56 | export PATH=${{ steps.pg-config.outputs.bindir }}:$PATH 57 | export PG_CONFIG=`which pg_config` 58 | cargo pgrx init --pg${{ steps.pg-config.outputs.version }}=$PG_CONFIG 59 | cat $HOME/.pgrx/config.toml 60 | 61 | - name: Cache cargo-grcov installation ${{ inputs.grcov-version }} 62 | id: cache-cargo-grcov 63 | uses: actions/cache@v4 64 | with: 65 | path: ~/.cargo/bin/grcov 66 | key: ${{ runner.arch }}-${{runner.os}}-cargo-grcov-${{ inputs.grcov-version }}-${{ steps.rustc.outputs.version }} 67 | 68 | - name: Install grcov 69 | if: steps.cache-cargo-grcov.outputs.cache-hit != 'true' 70 | id: install-cargo-grcov 71 | shell: bash 72 | run: cargo install grcov --version ${{ inputs.grcov-version }} --force 73 | -------------------------------------------------------------------------------- /.github/actions/install-pgvector/action.yml: -------------------------------------------------------------------------------- 1 | name: "Install pgvector " 2 | description: "Builds and installs Pgvector" 3 | 4 | inputs: 5 | pgvector-version: 6 | required: true 7 | description: "pgvector version" 8 | pgvector-src-dir: 9 | default: pgvectorbuild 10 | pg-install-dir: 11 | default: postgresql 12 | 13 | runs: 14 | using: "composite" 15 | steps: 16 | 17 | - name: Build Pgvector ${{ inputs.pgvector-version }} 18 | shell: bash 19 | env: 20 | pg_build_args: --enable-debug --enable-cassert 21 | llvm_config: llvm-config-18 22 | CC: gcc 23 | CXX: g++ 24 | run: | 25 | export PATH=${{ inputs.pg-install-dir }}/bin:$PATH 26 | mkdir -p ${{ inputs.pgvector-src-dir }} 27 | cd ${{ inputs.pgvector-src-dir }} 28 | git clone --branch v${{ inputs.pgvector-version }} https://github.com/pgvector/pgvector.git 29 | cd pgvector 30 | make -j$(nproc) 31 | 32 | - name: Install pgvector ${{ inputs.pgvector-version }} 33 | shell: bash 34 | run: | 35 | export PATH=${{ inputs.pg-install-dir }}/bin:$PATH 36 | make -C ${{ inputs.pgvector-src-dir }}/pgvector install 37 | -------------------------------------------------------------------------------- /.github/actions/install-postgres/action.yml: -------------------------------------------------------------------------------- 1 | name: "Setup PostgreSQL" 2 | description: "Builds and installs PostgreSQL" 3 | 4 | inputs: 5 | pg-version: 6 | required: true 7 | description: "PostgreSQL version" 8 | pg-src-dir: 9 | default: pgbuild 10 | pg-install-dir: 11 | default: postgresql 12 | 13 | runs: 14 | using: "composite" 15 | steps: 16 | - name: Cache PostgreSQL ${{ inputs.pg-version }} 17 | id: cache-postgresql 18 | uses: actions/cache@v4 19 | with: 20 | path: ${{ inputs.pg-src-dir }} 21 | key: ${{ runner.arch }}-${{ runner.os }}-postgresql-${{ inputs.pg-version }}-v2 22 | 23 | - name: Build PostgreSQL 24 | if: steps.cache-postgresql.outputs.cache-hit != 'true' 25 | shell: bash 26 | env: 27 | pg_build_args: --enable-debug --enable-cassert 28 | llvm_config: llvm-config-18 29 | CC: gcc 30 | CXX: g++ 31 | run: | 32 | wget -q -O postgresql.tar.bz2 https://ftp.postgresql.org/pub/source/v${{ inputs.pg-version }}/postgresql-${{ inputs.pg-version }}.tar.bz2 33 | mkdir -p ${{ inputs.pg-src-dir }} 34 | tar --extract --file postgresql.tar.bz2 --directory ${{ inputs.pg-src-dir }} --strip-components 1 35 | cd ${{ inputs.pg-src-dir }} 36 | PG_INSTALL_DIR=`readlink -f ${{ inputs.pg-install-dir }}` 37 | echo ">>>>>>> $PG_INSTALL_DIR" 38 | ./configure --prefix=${PG_INSTALL_DIR} ${pg_build_args} --with-llvm LLVM_CONFIG=${llvm_config} --with-openssl --without-readline --without-zlib --without-libxml 39 | make -j$(nproc) 40 | 41 | - name: Install PostgreSQL ${{ inputs.pg-version }} 42 | shell: bash 43 | run: | 44 | make -C ${{ inputs.pg-src-dir }} install 45 | -------------------------------------------------------------------------------- /.github/workflows/code_checks.yml: -------------------------------------------------------------------------------- 1 | name: Run code checks 2 | on: [push, pull_request, workflow_dispatch] 3 | permissions: 4 | contents: read 5 | 6 | jobs: 7 | fmt: 8 | runs-on: ubuntu-latest 9 | 10 | container: 11 | image: timescaledev/rust-builder:ubuntu-1.65 12 | 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v4 16 | 17 | - name: Verify formatting 18 | run: cd pgvectorscale && cargo fmt --check 19 | 20 | - name: Check formatting failure 21 | if: failure() 22 | run: | 23 | echo "cargo version is" 24 | cargo --version 25 | 26 | 27 | -------------------------------------------------------------------------------- /.github/workflows/deb-packager.yaml: -------------------------------------------------------------------------------- 1 | name: Deb packager 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | tag: 6 | description: 'Tag' 7 | required: true 8 | default: '' 9 | TAG_GIT_REF: 10 | description: 'Tag git Ref (leave empty for same as Tag)' 11 | required: false 12 | default: '' 13 | 14 | jobs: 15 | packager: 16 | runs-on: ${{ matrix.platform.runs_on }} 17 | 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | pg: 22 | - major: 13 23 | minor: 16 24 | - major: 14 25 | minor: 13 26 | - major: 15 27 | minor: 7 28 | - major: 16 29 | minor: 3 30 | - major: 17 31 | minor: 0 32 | platform: 33 | - type: amd64 34 | runs_on: ubuntu-latest 35 | - type: arm64 36 | runs_on: cloud-image-runner-arm64 37 | 38 | env: 39 | PG_SRC_DIR: pgbuild 40 | PG_INSTALL_DIR: postgresql 41 | MAKE_JOBS: 6 42 | PG_CONFIG_PATH: postgresql/bin/pg_config 43 | TAG: ${{ github.event.inputs.tag }} 44 | TAG_DIR: pgvectorscale 45 | TAG_GIT_REF: ${{ github.event.inputs.TAG_GIT_REF == '' && github.event.inputs.tag || github.event.inputs.TAG_GIT_REF}} 46 | 47 | steps: 48 | - name: Install package_cloud 49 | run: | 50 | sudo apt-get -qq update 51 | sudo apt-get -qq install gcc g++ make ruby-all-dev 52 | sudo gem install rake --no-doc 53 | sudo gem install rainbow -v 2.2.2 --no-doc 54 | sudo gem install package_cloud --no-doc 55 | 56 | - name: Checkout pgvectorscale 57 | uses: actions/checkout@v4 58 | 59 | - name: Install Linux Packages 60 | uses: ./.github/actions/install-packages 61 | 62 | - name: Install Deb builder specific packages 63 | run: | 64 | sudo apt-get install dpkg-dev debhelper build-essential 65 | 66 | - name: Install PostgreSQL ${{ matrix.pg.major }} 67 | uses: ./.github/actions/install-postgres 68 | with: 69 | pg-version: ${{ matrix.pg.major }}.${{ matrix.pg.minor }} 70 | pg-src-dir: ~/${{ env.PG_SRC_DIR }} 71 | pg-install-dir: ~/${{ env.PG_INSTALL_DIR }} 72 | 73 | - name: Checkout ${{ env.TAG }} 74 | uses: actions/checkout@v4 75 | with: 76 | repository: timescale/pgvectorscale 77 | ref: '${{ env.TAG_GIT_REF }}' 78 | path: ${{ env.TAG_DIR }} 79 | 80 | - name: Install pgrx 81 | uses: ./.github/actions/install-pgrx 82 | with: 83 | pg-install-dir: ~/${{ env.PG_INSTALL_DIR }} 84 | pgrx-version: 0.12.9 85 | 86 | - name: Build Deb 87 | id: debbuild 88 | run: | 89 | export PATH=~/${{ env.PG_INSTALL_DIR }}/bin:$PATH 90 | (cd ${{ env.TAG_DIR }} && make package) 91 | bash scripts/package-deb.sh "${{ env.TAG }}" "${PWD}/${{ env.TAG_DIR }}" "$RUNNER_OS" "${{ matrix.pg.major }}" 92 | 93 | # Use a GH artifact, then we can make use of the (quite limited) GH API https://docs.github.com/en/rest/actions/artifacts 94 | # The artifact will have a TTL of 90 days 95 | - name: Upload deb as Artifact 96 | uses: actions/upload-artifact@v4 97 | with: 98 | name: pgvectorscale-${{ env.TAG }}-pg${{ matrix.pg.major }}-${{ matrix.platform.type }} 99 | path: pkgdump/pgvectorscale-*${{ env.TAG }}*.deb 100 | 101 | - name: Upload to packagecloud 102 | env: 103 | PACKAGECLOUD_TOKEN: ${{ secrets.IO_PACKAGECLOUD_TOKEN }} 104 | run: | 105 | source /etc/os-release 106 | package_cloud push timescale/timescaledb/$ID/$VERSION_CODENAME pkgdump/pgvectorscale-*${{ env.TAG }}*.deb 107 | -------------------------------------------------------------------------------- /.github/workflows/pgrx_test.yaml: -------------------------------------------------------------------------------- 1 | name: Run PGRX tests 2 | on: [push, pull_request, workflow_dispatch] 3 | permissions: 4 | contents: read 5 | 6 | jobs: 7 | tester: 8 | runs-on: ${{ matrix.platform.runs_on }} 9 | 10 | strategy: 11 | fail-fast: false 12 | matrix: 13 | pgvector: 14 | - version: 0.7.4 15 | pg: 16 | - major: 13 17 | minor: 16 18 | - major: 14 19 | minor: 13 20 | - major: 15 21 | minor: 7 22 | - major: 16 23 | minor: 3 24 | - major: 17 25 | minor: 0 26 | platform: 27 | - type: amd64 28 | runs_on: ubuntu-22.04 29 | - type: arm64 30 | runs_on: cloud-image-runner-arm64 31 | 32 | env: 33 | PG_SRC_DIR: pgbuild 34 | PG_INSTALL_DIR: postgresql 35 | MAKE_JOBS: 6 36 | PG_CONFIG_PATH: postgresql/bin/pg_config 37 | TAG: ${{ github.event.inputs.tag }} 38 | TAG_DIR: pgvectorscale 39 | TAG_GIT_REF: ${{ github.event.inputs.TAG_GIT_REF == '' && github.event.inputs.tag || github.event.inputs.TAG_GIT_REF}} 40 | 41 | steps: 42 | - name: Checkout pgvectorscale 43 | uses: actions/checkout@v4 44 | 45 | - name: Install Linux Packages 46 | uses: ./.github/actions/install-packages 47 | 48 | - name: Install PostgreSQL ${{ matrix.pg.major }} 49 | uses: ./.github/actions/install-postgres 50 | with: 51 | pg-version: ${{ matrix.pg.major }}.${{ matrix.pg.minor }} 52 | pg-src-dir: ~/${{ env.PG_SRC_DIR }} 53 | pg-install-dir: ~/${{ env.PG_INSTALL_DIR }} 54 | 55 | - name: Install pgvector ${{ matrix.pgvector.version }} 56 | uses: ./.github/actions/install-pgvector 57 | with: 58 | pgvector-version: ${{ matrix.pgvector.version }} 59 | pg-install-dir: ~/${{ env.PG_INSTALL_DIR }} 60 | 61 | - name: Install pgrx 62 | uses: ./.github/actions/install-pgrx 63 | with: 64 | pg-install-dir: ~/${{ env.PG_INSTALL_DIR }} 65 | pgrx-version: 0.12.9 66 | 67 | - name: Run Clippy 68 | id: clippy 69 | run: | 70 | cd pgvectorscale 71 | cargo clippy --all-targets --no-default-features --features 'pg_test pg${{ matrix.pg.major }}' 72 | 73 | - name: Run tests 74 | id: runtests 75 | run: | 76 | cd pgvectorscale 77 | cargo pgrx test -- pg${{ matrix.pg.major }} 78 | -------------------------------------------------------------------------------- /.github/workflows/shellcheck.yaml: -------------------------------------------------------------------------------- 1 | # Test our shell scripts for bugs 2 | name: Shellcheck 3 | on: 4 | pull_request: 5 | paths: 6 | - '**.sh' 7 | - .github/workflows/shellcheck.yaml 8 | push: 9 | paths: 10 | - '**.sh' 11 | - .github/workflows/shellcheck.yaml 12 | jobs: 13 | shellcheck: 14 | name: Shellcheck 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Install Dependencies 19 | run: | 20 | sudo apt-get update 21 | sudo apt-get install shellcheck 22 | 23 | - name: Checkout 24 | uses: actions/checkout@v4 25 | 26 | - name: Run shellcheck 27 | run: | 28 | make shellcheck && exit 0 || exit 1 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | 13 | # MSVC Windows builds of rustc generate these, which store debugging information 14 | *.pdb 15 | 16 | .idea 17 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/doublify/pre-commit-rust 3 | rev: v1.0 4 | hooks: 5 | - id: fmt 6 | args: ["--all", "--"] 7 | - id: cargo-check 8 | args: ["--"] 9 | - id: clippy 10 | args: ["--all-targets", "--", "-D", "warnings"] 11 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to pgvectorscale 2 | 3 | We appreciate any help the community can provide to make pgvectorscale better! 4 | 5 | You can help in different ways: 6 | 7 | * Open an [issue](https://github.com/timescale/pgvectorscale/issues) with a 8 | bug report, build issue, feature request, suggestion, etc. 9 | 10 | * Fork this repository and submit a pull request 11 | 12 | For any particular improvement you want to make, it can be beneficial to 13 | begin discussion on the GitHub issues page. This is the best place to 14 | discuss your proposed improvement (and its implementation) with the core 15 | development team. 16 | 17 | Before we accept any code contributions, pgvectorscale contributors need to 18 | sign the [Contributor License Agreement](https://cla-assistant.io/timescale/pgvectorscale) (CLA). By signing a CLA, we can 19 | ensure that the community is free and confident in its ability to use your 20 | contributions. 21 | 22 | ## Development 23 | 24 | Please follow our DEVELOPMENT doc for [instructions how to develop and test](https://github.com/timescale/pgvectorscale/blob/main/DEVELOPMENT.md). 25 | 26 | ## Code review workflow 27 | 28 | * Sign the [Contributor License Agreement](https://cla-assistant.io/timescale/pgvectorscale) (CLA) if you're a new contributor. 29 | 30 | * Develop on your local branch: 31 | 32 | * Fork the repository and create a local feature branch to do work on, 33 | ideally on one thing at a time. Don't mix bug fixes with unrelated 34 | feature enhancements or stylistical changes. 35 | 36 | * Hack away. Add tests for non-trivial changes. 37 | 38 | * Run the [test suite](#testing) and make sure everything passes. 39 | 40 | * When committing, be sure to write good commit messages according to [these 41 | seven rules](https://chris.beams.io/posts/git-commit/#seven-rules). Doing 42 | `git commit` prints a message if any of the rules is violated. 43 | Stylistically, 44 | we use commit message titles in the imperative tense, e.g., `Add 45 | merge-append query optimization for time aggregate`. In the case of 46 | non-trivial changes, include a longer description in the commit message 47 | body explaining and detailing the changes. That is, a commit message 48 | should have a short title, followed by a empty line, and then 49 | followed by the longer description. 50 | 51 | * When committing, link which GitHub issue of [this 52 | repository](https://github.com/timescale/pgvectorscale/issues) is fixed or 53 | closed by the commit with a [linking keyword recognised by 54 | GitHub](https://docs.github.com/en/github/managing-your-work-on-github/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword). 55 | For example, if the commit fixes bug 123, add a line at the end of the 56 | commit message with `Fixes #123`, if the commit implements feature 57 | request 321, add a line at the end of the commit message `Closes #321`. 58 | This will be recognized by GitHub. It will close the corresponding issue 59 | and place a hyperlink under the number. 60 | 61 | * Push your changes to an upstream branch: 62 | 63 | * Make sure that each commit in the pull request will represent a 64 | logical change to the code, will compile, and will pass tests. 65 | 66 | * Make sure that the pull request message contains all important 67 | information from the commit messages including which issues are 68 | fixed and closed. If a pull request contains one commit only, then 69 | repeating the commit message is preferred, which is done automatically 70 | by GitHub when it creates the pull request. 71 | 72 | * Rebase your local feature branch against main (`git fetch origin`, 73 | then `git rebase origin/main`) to make sure you're 74 | submitting your changes on top of the newest version of our code. 75 | 76 | * When finalizing your PR (i.e., it has been approved for merging), 77 | aim for the fewest number of commits that 78 | make sense. That is, squash any "fix up" commits into the commit they 79 | fix rather than keep them separate. Each commit should represent a 80 | clean, logical change and include a descriptive commit message. 81 | 82 | * Push your commit to your upstream feature branch: `git push -u <yourfork> my-feature-branch` 83 | 84 | * Create and manage pull request: 85 | 86 | * [Create a pull request using GitHub](https://help.github.com/articles/creating-a-pull-request). 87 | If you know a core developer well suited to reviewing your pull 88 | request, either mention them (preferably by GitHub name) in the PR's 89 | body or [assign them as a reviewer](https://help.github.com/articles/assigning-issues-and-pull-requests-to-other-github-users/). 90 | 91 | * Address feedback by amending your commit(s). If your change contains 92 | multiple commits, address each piece of feedback by amending that 93 | commit to which the particular feedback is aimed. 94 | 95 | * The PR is marked as accepted when the reviewer thinks it's ready to be 96 | merged. Most new contributors aren't allowed to merge themselves; in 97 | that case, we'll do it for you. 98 | 99 | ## Testing 100 | 101 | Every non-trivial change to the code base should be accompanied by a 102 | relevant addition to or modification of the test suite. 103 | 104 | Please check that the full test suite (including your test additions 105 | or changes) passes successfully on your local machine **before you 106 | open a pull request**. 107 | 108 | See our [testing](https://github.com/timescale/pgvectorscale/blob/main/DEVELOPMENT.md#testing) 109 | instructions for help with how to test. 110 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["pgvectorscale"] 3 | resolver = "2" 4 | 5 | [profile.dev] 6 | panic = "unwind" 7 | 8 | [profile.release] 9 | panic = "unwind" 10 | opt-level = 3 11 | lto = "fat" 12 | codegen-units = 1 13 | #debug = true 14 | -------------------------------------------------------------------------------- /DEVELOPMENT.md: -------------------------------------------------------------------------------- 1 | # Setup your pgvectorscale developer environment 2 | 3 | You build pgvectorscale from source, then integrate the extension into each database in your PostgreSQL environment. 4 | 5 | ## pgvectorscale prerequisites 6 | 7 | To create a pgvectorscale developer environment, you need the following on your local machine: 8 | 9 | * [PostgreSQL v16](https://docs.timescale.com/self-hosted/latest/install/installation-linux/#install-and-configure-timescaledb-on-postgresql) 10 | * [pgvector](https://github.com/pgvector/pgvector/blob/master/README.md) 11 | * Development packages: 12 | ``` 13 | sudo apt-get install make gcc pkg-config clang postgresql-server-dev-16 libssl-dev 14 | ``` 15 | 16 | * [Rust][rust-language]: 17 | ```shell 18 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh 19 | ``` 20 | 21 | ## Build and install pgvectorscale on your database 22 | 23 | 1. In Terminal, clone this repository and switch to the extension subdirectory: 24 | 25 | ```shell 26 | git clone https://github.com/timescale/pgvectorscale && \ 27 | cd pgvectorscale/pgvectorscale 28 | ``` 29 | 30 | 1. Install [Cargo-pgrx][cargo-pgrx]: 31 | 32 | ```bash 33 | cargo install --locked cargo-pgrx --version $(cargo metadata --format-version 1 | jq -r '.packages[] | select(.name == "pgrx") | .version') 34 | ``` 35 | You must reinstall cargo-pgrx whenever you update Rust, cargo-pgrx must be 36 | built with the same compiler as pgvectorscale. 37 | 38 | 1. Initialize the pgrx development environment: 39 | 40 | ```bash 41 | cargo pgrx init --pg16 pg_config 42 | ``` 43 | 44 | 1. Build pgvectorscale: 45 | 46 | ```shell 47 | cargo pgrx install --release 48 | ``` 49 | 50 | If the destination folder requires elevated permissions, use the `--sudo` 51 | flag: 52 | 53 | ```shell 54 | cargo pgrx install --sudo --release 55 | ``` 56 | 57 | 1. Connect to the database: 58 | 59 | ```bash 60 | psql -d "postgres://<username>@<password>:<port>/<database-name>" 61 | ``` 62 | 63 | 1. Add pgvectorscale to your database: 64 | 65 | ```postgresql 66 | CREATE EXTENSION IF NOT EXISTS vectorscale CASCADE; 67 | ``` 68 | 69 | # Troubleshooting 70 | 71 | ## MacOS: fatal error: 'stdio.h' file not found 72 | 73 | Make sure you have the command line tools: 74 | 75 | ```bash 76 | xcode-select --install 77 | ``` 78 | 79 | If the error persists try setting explicitly the `SDKROOT` environment 80 | variable: 81 | 82 | ```bash 83 | export SDKROOT=$(xcrun --sdk macosx --show-sdk-path) 84 | ``` 85 | 86 | Make sure the variable was set: 87 | 88 | 89 | ```bash 90 | $ env | grep SDKROOT 91 | SDKROOT=/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk 92 | ``` 93 | 94 | ## Permission denied while installing the extension artifacts 95 | 96 | If the PostgreSQL directory where the extension needs to be installed requires 97 | elevated permissions (for example, when using MacOS PostgreSQL that installs in the 98 | `/Applications` folder, or when installing from a package manager that uses a system 99 | directory), you need to use the pgrx `--sudo` flag: 100 | 101 | ```bash 102 | cargo pgrx install --sudo --release 103 | ``` 104 | 105 | [pgvector]: https://github.com/pgvector/pgvector/blob/master/README.md 106 | [rust-language]: https://www.rust-lang.org/ 107 | [cargo-pgrx]: https://lib.rs/crates/cargo-pgrx 108 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The PostgreSQL License 2 | 3 | Permission to use, copy, modify, and distribute this software and its 4 | documentation for any purpose, without fee, and without a written agreement 5 | is hereby granted, provided that the above copyright notice and this paragraph 6 | and the following two paragraphs appear in all copies. 7 | 8 | IN NO EVENT SHALL TIMESCALE BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, 9 | SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING 10 | OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF Timescale HAS 11 | BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | 13 | TIMESCALE SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 14 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 15 | THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND TIMESCALE HAS NO 16 | OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR 17 | MODIFICATIONS. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash 2 | ROOTDIR = $(realpath .) 3 | RUST_SRCDIR =$(ROOTDIR)/pgvectorscale 4 | 5 | PG_CONFIG = $(shell which pg_config) 6 | EXTENSION=vectorscale 7 | 8 | PG_VERSION = $(shell ${PG_CONFIG} --version | awk -F'[ \.]' '{print $$2}') 9 | ##TODO error out if this is not PG14??? 10 | PGRX_HOME?= ${HOME}/.pgrx 11 | PGRX_VERSION=0.9.8 12 | VECTOR_VERSION?=$(shell sed -n 's/^[[:space:]]*version[[:space:]]*=[[:space:]]*"\(.*\)"/\1/p' pgvectorscale/Cargo.toml) 13 | PG_DATA=${PGRX_HOME}/data-${PG_VERSION} 14 | 15 | PG_PKGLIBDIR=$(shell ${PG_CONFIG} --pkglibdir) 16 | PG_SHARELIBDIR=$(shell ${PG_CONFIG} --sharedir) 17 | $(info pg_pkglib = $(PG_PKGLIBDIR) and pg_sharelib = $(PG_SHARELIBDIR) ) 18 | 19 | MODULE_big = $(EXTENSION) 20 | PGXS := $(shell $(PG_CONFIG) --pgxs) 21 | 22 | include $(PGXS) 23 | PG_REGRESS='$(top_builddir)/src/test/regress/pg_regress' 24 | PG_REGRESS_OPTS_EXTRA=--create-role=superuser,tsdbadmin,test_role_1 --launcher=./test/runner.sh 25 | export TEST_OUTPUT_DIR:=$(ROOTDIR)/test_output 26 | export PG_ABS_SRCDIR:=$(ROOTDIR)/test 27 | export TEST_DBNAME:=regression 28 | 29 | ### default collation settings on Cloud is C.UTF-8 30 | PG_DEFAULT_REGRESS_LOCALE=$(shell uname | grep -q 'Darwin' && echo 'en_US.UTF-8' || echo 'C.UTF-8') 31 | PG_REGRESS_LOCALE?=$(PG_DEFAULT_REGRESS_LOCALE) 32 | PG_REGRESS_ENV=CONFDIR='$(CURDIR)/test' TESTDIR='$(CURDIR)' LC_COLLATE=$(PG_REGRESS_LOCALE) LC_CTYPE=$(PG_REGRESS_LOCALE) 33 | 34 | #ifdef PGHOST 35 | #USE_EXISTING_INSTANCE=0 36 | #endif 37 | 38 | ifdef USE_EXISTING_INSTANCE 39 | $(info Use existing instance) 40 | INSTANCE_OPTS= 41 | else 42 | $(info Use temp instance) 43 | INSTANCE_OPTS=--temp-instance=$(ROOTDIR)/test_instance --temp-config=$(ROOTDIR)/test/postgres.conf 44 | endif 45 | 46 | .PHONY: format 47 | format: 48 | cd $(RUST_SRCDIR)/src && rustfmt --edition 2021 *.rs 49 | 50 | .PHONY: build 51 | build: 52 | cd $(RUST_SRCDIR) && cargo build --features pg${PG_VERSION} $(EXTRA_RUST_ARGS) 53 | 54 | .PHONY: install-pgrx 55 | install-pgrx: 56 | cargo install cargo-pgrx --version ${PGRX_VERSION} 57 | 58 | .PHONY: init-pgrx 59 | init-pgrx: $(PG_DATA) 60 | 61 | $(PG_DATA): 62 | cd $(RUST_SRCDIR) && cargo pgrx init --pg${PG_VERSION}=${PG_CONFIG} 63 | 64 | .PHONY: install-debug 65 | ###pgxs.mk has a rule for install.So we need a different rule name 66 | install-debug: init-pgrx 67 | cd $(RUST_SRCDIR) && cargo pgrx install --features pg${PG_VERSION} 68 | 69 | .PHONY: install-release 70 | install-release: init-pgrx 71 | cd $(RUST_SRCDIR) && cargo pgrx install --release --features pg${PG_VERSION} 72 | 73 | 74 | .PHONY: package 75 | package: init-pgrx 76 | cd $(RUST_SRCDIR) && cargo pgrx package --features pg${PG_VERSION} 77 | 78 | .PHONY: shellcheck 79 | shellcheck: 80 | find . -name '*.sh' -exec shellcheck '{}' + 81 | 82 | .PHONY: shfmt 83 | shfmt: 84 | shfmt -w -i 4 test scripts 85 | 86 | 87 | .PHONY: release rust test prove install clean 88 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | pgvectorscale by Timescale (TM) 2 | 3 | Copyright (c) 2023-2024 Timescale, Inc. All Rights Reserved. 4 | 5 | Licensed under the PostgreSQL License (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | https://github.com/timescale/pgvectorscale/blob/main/LICENSE 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | -------------------------------------------------------------------------------- /pgvectorscale/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea/ 3 | /target 4 | *.iml 5 | **/*.rs.bk 6 | Cargo.lock 7 | -------------------------------------------------------------------------------- /pgvectorscale/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "vectorscale" 3 | version = "0.7.1-dev" 4 | edition = "2021" 5 | 6 | [lib] 7 | crate-type = ["cdylib", "rlib"] 8 | 9 | [[bin]] 10 | name = "pgrx_embed_vectorscale" 11 | path = "./src/bin/pgrx_embed.rs" 12 | 13 | [features] 14 | default = ["pg17"] 15 | pg13 = ["pgrx/pg13", "pgrx-tests/pg13"] 16 | pg14 = ["pgrx/pg14", "pgrx-tests/pg14"] 17 | pg15 = ["pgrx/pg15", "pgrx-tests/pg15"] 18 | pg16 = ["pgrx/pg16", "pgrx-tests/pg16"] 19 | pg17 = ["pgrx/pg17", "pgrx-tests/pg17"] 20 | pg_test = [] 21 | 22 | [lints.rust] 23 | unexpected_cfgs = { level = "allow", check-cfg = [ 24 | 'cfg(pgrx_embed)', 25 | 'cfg(pg12)', 26 | ] } 27 | 28 | [dependencies] 29 | memoffset = "0.9.0" 30 | pgrx = "=0.12.9" 31 | rkyv = { version = "0.7.43", features = ["validation"] } 32 | simdeez = { version = "1.0.8" } 33 | rand = { version = "0.8", features = ["small_rng"] } 34 | pgvectorscale_derive = { path = "pgvectorscale_derive" } 35 | semver = "1.0.24" 36 | serial_test = "3.2.0" 37 | once_cell = "1.20.1" 38 | 39 | [dev-dependencies] 40 | pgrx-tests = "=0.12.9" 41 | pgrx-pg-config = "=0.12.9" 42 | criterion = "0.5.1" 43 | tempfile = "3.15.0" 44 | 45 | [[bench]] 46 | name = "distance" 47 | harness = false 48 | 49 | [[bench]] 50 | name = "lsr" 51 | harness = false 52 | -------------------------------------------------------------------------------- /pgvectorscale/benches/lsr.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | cmp::{Ordering, Reverse}, 3 | collections::BinaryHeap, 4 | }; 5 | 6 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 7 | use rand::Rng; 8 | 9 | pub struct ListSearchNeighbor { 10 | pub index_pointer: u64, 11 | distance: f32, 12 | visited: bool, 13 | _private_data: u64, 14 | } 15 | 16 | impl PartialOrd for ListSearchNeighbor { 17 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { 18 | Some(self.cmp(other)) 19 | } 20 | } 21 | 22 | impl PartialEq for ListSearchNeighbor { 23 | fn eq(&self, other: &Self) -> bool { 24 | self.index_pointer == other.index_pointer 25 | } 26 | } 27 | 28 | impl Eq for ListSearchNeighbor {} 29 | 30 | impl Ord for ListSearchNeighbor { 31 | fn cmp(&self, other: &Self) -> Ordering { 32 | self.distance.partial_cmp(&other.distance).unwrap() 33 | } 34 | } 35 | 36 | pub struct ListSearchResult { 37 | candidate_storage: Vec<ListSearchNeighbor>, //plain storage 38 | best_candidate: Vec<usize>, //pos in candidate storage, sorted by distance 39 | } 40 | 41 | impl ListSearchResult { 42 | pub fn get_lsn_by_idx(&self, idx: usize) -> &ListSearchNeighbor { 43 | &self.candidate_storage[idx] 44 | } 45 | 46 | pub fn insert_neighbor(&mut self, n: ListSearchNeighbor) { 47 | //insert while preserving sort order. 48 | let idx = self 49 | .best_candidate 50 | .partition_point(|x| self.candidate_storage[*x] < n); 51 | self.candidate_storage.push(n); 52 | let pos = self.candidate_storage.len() - 1; 53 | self.best_candidate.insert(idx, pos) 54 | } 55 | 56 | fn visit_closest(&mut self, pos_limit: usize) -> Option<usize> { 57 | //OPT: should we optimize this not to do a linear search each time? 58 | let neighbor_position = self 59 | .best_candidate 60 | .iter() 61 | .position(|n| !self.candidate_storage[*n].visited); 62 | match neighbor_position { 63 | Some(pos) => { 64 | if pos > pos_limit { 65 | return None; 66 | } 67 | let n = &mut self.candidate_storage[self.best_candidate[pos]]; 68 | n.visited = true; 69 | Some(self.best_candidate[pos]) 70 | } 71 | None => None, 72 | } 73 | } 74 | } 75 | 76 | pub struct ListSearchResultMinHeap { 77 | candidates: BinaryHeap<Reverse<ListSearchNeighbor>>, 78 | visited: Vec<ListSearchNeighbor>, 79 | } 80 | 81 | impl ListSearchResultMinHeap { 82 | pub fn insert_neighbor(&mut self, n: ListSearchNeighbor) { 83 | //insert while preserving sort order. 84 | // self.candidate_storage.push(n); 85 | // let pos = self.candidate_storage.len() - 1; 86 | self.candidates.push(Reverse(n)); 87 | 88 | /*let idx = self 89 | .best_candidate 90 | .partition_point(|x| self.candidate_storage[*x].distance < n.distance); 91 | self.candidate_storage.push(n); 92 | let pos = self.candidate_storage.len() - 1; 93 | self.best_candidate.insert(idx, pos)*/ 94 | } 95 | 96 | fn visit_closest(&mut self, pos_limit: usize) -> Option<&ListSearchNeighbor> { 97 | //OPT: should we optimize this not to do a linear search each time? 98 | if self.candidates.is_empty() { 99 | panic!("no candidates left"); 100 | //return None; 101 | } 102 | 103 | if self.visited.len() > pos_limit { 104 | let node_at_pos = &self.visited[pos_limit - 1]; 105 | let head = self.candidates.peek().unwrap(); 106 | if head.0.distance >= node_at_pos.distance { 107 | return None; 108 | } 109 | } 110 | 111 | let head = self.candidates.pop().unwrap(); 112 | let idx = self 113 | .visited 114 | .partition_point(|x| x.distance < head.0.distance); 115 | self.visited.insert(idx, head.0); 116 | Some(&self.visited[idx]) 117 | } 118 | } 119 | 120 | fn run_lsr_min_heap(lsr: &mut ListSearchResultMinHeap) { 121 | let item = lsr.visit_closest(100000000); 122 | let lsn = item.unwrap(); 123 | 124 | let mut rng = rand::thread_rng(); 125 | let delta: f64 = rng.gen(); // generates a float between 0 and 1 126 | let distance = lsn.distance + ((delta * 5.0) as f32); 127 | 128 | for _ in 0..20 { 129 | lsr.insert_neighbor(ListSearchNeighbor { 130 | index_pointer: 0, 131 | distance, 132 | visited: false, 133 | _private_data: 2, 134 | }) 135 | } 136 | } 137 | 138 | fn run_lsr(lsr: &mut ListSearchResult) { 139 | let item_idx = lsr.visit_closest(1000000); 140 | let lsn = lsr.get_lsn_by_idx(item_idx.unwrap()); 141 | 142 | let mut rng = rand::thread_rng(); 143 | let delta: f64 = rng.gen(); // generates a float between 0 and 1 144 | let distance = lsn.distance + ((delta * 5.0) as f32); 145 | 146 | for _ in 0..20 { 147 | lsr.insert_neighbor(ListSearchNeighbor { 148 | index_pointer: 0, 149 | distance, 150 | visited: false, 151 | _private_data: 2, 152 | }) 153 | } 154 | } 155 | 156 | pub fn benchmark_lsr(c: &mut Criterion) { 157 | let mut lsr = ListSearchResult { 158 | candidate_storage: Vec::new(), 159 | best_candidate: Vec::new(), 160 | }; 161 | 162 | lsr.insert_neighbor(ListSearchNeighbor { 163 | index_pointer: 0, 164 | distance: 100.0, 165 | visited: false, 166 | _private_data: 1, 167 | }); 168 | 169 | c.bench_function("lsr OG", |b| b.iter(|| run_lsr(black_box(&mut lsr)))); 170 | } 171 | 172 | pub fn benchmark_lsr_min_heap(c: &mut Criterion) { 173 | let mut lsr = ListSearchResultMinHeap { 174 | candidates: BinaryHeap::new(), 175 | visited: Vec::new(), 176 | }; 177 | 178 | lsr.insert_neighbor(ListSearchNeighbor { 179 | index_pointer: 0, 180 | distance: 100.0, 181 | visited: false, 182 | _private_data: 1, 183 | }); 184 | 185 | c.bench_function("lsr min heap", |b| { 186 | b.iter(|| run_lsr_min_heap(black_box(&mut lsr))) 187 | }); 188 | } 189 | 190 | criterion_group!(benches_lsr, benchmark_lsr, benchmark_lsr_min_heap); 191 | 192 | criterion_main!(benches_lsr); 193 | /* 194 | fn fibonacci(n: u64) -> u64 { 195 | match n { 196 | 0 => 1, 197 | 1 => 1, 198 | n => fibonacci(n - 1) + fibonacci(n - 2), 199 | } 200 | } 201 | pub fn criterion_benchmark(c: &mut Criterion) { 202 | c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); 203 | } 204 | 205 | criterion_group!(benches, criterion_benchmark); 206 | criterion_main!(benches);*/ 207 | -------------------------------------------------------------------------------- /pgvectorscale/pgvectorscale_derive/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pgvectorscale_derive" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [lib] 7 | proc-macro = true 8 | 9 | [dependencies] 10 | syn = "1.0" 11 | quote = "1.0" -------------------------------------------------------------------------------- /pgvectorscale/pgvectorscale_derive/src/lib.rs: -------------------------------------------------------------------------------- 1 | use proc_macro::TokenStream; 2 | use quote::{format_ident, quote}; 3 | 4 | #[proc_macro_derive(Readable)] 5 | pub fn readable_macro_derive(input: TokenStream) -> TokenStream { 6 | // Construct a representation of Rust code as a syntax tree 7 | // that we can manipulate 8 | let ast = syn::parse(input).unwrap(); 9 | 10 | // Build the trait implementation 11 | impl_readable_macro(&ast) 12 | } 13 | 14 | #[proc_macro_derive(Writeable)] 15 | pub fn writeable_macro_derive(input: TokenStream) -> TokenStream { 16 | let ast = syn::parse(input).unwrap(); 17 | impl_writeable_macro(&ast) 18 | } 19 | 20 | fn impl_readable_macro(ast: &syn::DeriveInput) -> TokenStream { 21 | let name = &ast.ident; 22 | let readable_name = format_ident!("Readable{}", name); 23 | let archived_name = format_ident!("Archived{}", name); 24 | let gen = quote! { 25 | pub struct #readable_name<'a> { 26 | _rb: ReadableBuffer<'a>, 27 | } 28 | 29 | impl<'a> #readable_name<'a> { 30 | pub fn with_readable_buffer(rb: ReadableBuffer<'a>) -> Self { 31 | Self { _rb: rb } 32 | } 33 | 34 | pub fn get_archived_node(&self) -> &'a #archived_name { 35 | // checking the code here is expensive during build, so skip it. 36 | // TODO: should we check the data during queries? 37 | //rkyv::check_archived_root::<Node>(self._rb.get_data_slice()).unwrap() 38 | unsafe { rkyv::archived_root::<#name>(self._rb.get_data_slice()) } 39 | } 40 | 41 | pub fn get_owned_page(self) -> crate::util::page::ReadablePage<'a> { 42 | self._rb.get_owned_page() 43 | } 44 | } 45 | 46 | impl ReadableNode for #name { 47 | type Node<'a> = #readable_name<'a>; 48 | unsafe fn read<'a, 'b, S: crate::access_method::stats::StatsNodeRead>(index: &'a PgRelation, index_pointer: ItemPointer, stats: &'b mut S) -> Self::Node<'a> { 49 | let rb = index_pointer.read_bytes(index); 50 | stats.record_read(); 51 | #readable_name::with_readable_buffer(rb) 52 | } 53 | } 54 | }; 55 | gen.into() 56 | } 57 | 58 | fn impl_writeable_macro(ast: &syn::DeriveInput) -> TokenStream { 59 | let name = &ast.ident; 60 | let writeable_name = format_ident!("Writable{}", name); 61 | let archived_name = format_ident!("Archived{}", name); 62 | let gen = quote! { 63 | 64 | pub struct #writeable_name<'a> { 65 | wb: WritableBuffer<'a>, 66 | } 67 | 68 | impl #archived_name { 69 | pub fn with_data(data: &mut [u8]) -> std::pin::Pin<&mut #archived_name> { 70 | let pinned_bytes = std::pin::Pin::new(data); 71 | unsafe { rkyv::archived_root_mut::<#name>(pinned_bytes) } 72 | } 73 | } 74 | 75 | impl<'a> #writeable_name<'a> { 76 | pub fn get_archived_node(&mut self) -> std::pin::Pin<&'a mut #archived_name> { 77 | #archived_name::with_data(self.wb.get_data_slice()) 78 | } 79 | 80 | pub fn commit(self) { 81 | self.wb.commit() 82 | } 83 | } 84 | 85 | impl WriteableNode for #name { 86 | type Node<'a> = #writeable_name<'a>; 87 | unsafe fn modify<'a, 'b, S: crate::access_method::stats::StatsNodeModify>(index: &'a PgRelation, index_pointer: ItemPointer, stats: &'b mut S) -> Self::Node<'a> { 88 | let wb = index_pointer.modify_bytes(index); 89 | stats.record_modify(); 90 | #writeable_name { wb: wb } 91 | } 92 | 93 | fn write<S: crate::access_method::stats::StatsNodeWrite>(&self, tape: &mut crate::util::tape::Tape, stats: &mut S) -> ItemPointer { 94 | //TODO 256 probably too small 95 | let bytes = self.serialize_to_vec(); 96 | stats.record_write(); 97 | unsafe { tape.write(&bytes) } 98 | } 99 | 100 | fn serialize_to_vec(&self) -> rkyv::util::AlignedVec { 101 | //TODO 256 probably too small 102 | rkyv::to_bytes::<_, 256>(self).unwrap() 103 | } 104 | } 105 | }; 106 | gen.into() 107 | } 108 | -------------------------------------------------------------------------------- /pgvectorscale/sql/timescale_vector--0.0.1--0.0.2.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timescale/pgvectorscale/af45212be3e17d8d689b2dcabd7560023a8092a6/pgvectorscale/sql/timescale_vector--0.0.1--0.0.2.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.0.2--0.2.0.sql: -------------------------------------------------------------------------------- 1 | /* 2 | This file is auto generated by pgrx. 3 | 4 | The ordering of items is not stable, it is driven by a dependency graph. 5 | */ 6 | 7 | --rename index access method 8 | 9 | UPDATE pg_catalog.pg_am SET amname = 'diskann' WHERE amname = 'tsv'; 10 | ALTER FUNCTION tsv_amhandler(internal) RENAME TO diskann_amhandler; 11 | 12 | -- src/access_method/mod.rs:48 13 | -- pgvectorscale::access_method::amhandler 14 | 15 | CREATE OR REPLACE FUNCTION diskann_amhandler(internal) RETURNS index_am_handler PARALLEL SAFE IMMUTABLE STRICT COST 0.0001 LANGUAGE c AS '$libdir/vectorscale-0.2.0', 'amhandler_wrapper'; 16 | 17 | DO $$ 18 | DECLARE 19 | c int; 20 | BEGIN 21 | SELECT count(*) 22 | INTO c 23 | FROM pg_catalog.pg_am a 24 | WHERE a.amname = 'diskann'; 25 | 26 | IF c = 0 THEN 27 | CREATE ACCESS METHOD diskann TYPE INDEX HANDLER diskann_amhandler; 28 | END IF; 29 | END; 30 | $$; 31 | 32 | 33 | 34 | 35 | -- src/access_method/mod.rs:91 36 | 37 | DO $$ 38 | DECLARE 39 | c int; 40 | BEGIN 41 | SELECT count(*) 42 | INTO c 43 | FROM pg_catalog.pg_opclass c 44 | WHERE c.opcname = 'vector_cosine_ops' 45 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann'); 46 | 47 | IF c = 0 THEN 48 | CREATE OPERATOR CLASS vector_cosine_ops DEFAULT 49 | FOR TYPE vector USING diskann AS 50 | OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops; 51 | END IF; 52 | END; 53 | $$; 54 | -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.0.2--0.3.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.2.0--0.3.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.0.2--0.4.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.3.0--0.4.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.0.2--0.5.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.4.0--0.5.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.0.2--0.5.1.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.5.0--0.5.1.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.0.2--0.6.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.5.1--0.6.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.0.2--0.7.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.6.0--0.7.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.0.2--0.7.1.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.7.0--0.7.1.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.1.0--0.7.1.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.7.0--0.7.1.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.2.0--0.3.0.sql: -------------------------------------------------------------------------------- 1 | /* 2 | This file is auto generated by pgrx. 3 | 4 | The ordering of items is not stable, it is driven by a dependency graph. 5 | */ 6 | 7 | -- src/access_method/mod.rs:44 8 | -- vectorscale::access_method::amhandler 9 | 10 | CREATE OR REPLACE FUNCTION diskann_amhandler(internal) RETURNS index_am_handler PARALLEL SAFE IMMUTABLE STRICT COST 0.0001 LANGUAGE c AS '$libdir/vectorscale-0.3.0', 'amhandler_wrapper'; 11 | 12 | DO $$ 13 | DECLARE 14 | c int; 15 | BEGIN 16 | SELECT count(*) 17 | INTO c 18 | FROM pg_catalog.pg_am a 19 | WHERE a.amname = 'diskann'; 20 | 21 | IF c = 0 THEN 22 | CREATE ACCESS METHOD diskann TYPE INDEX HANDLER diskann_amhandler; 23 | END IF; 24 | END; 25 | $$; 26 | 27 | 28 | 29 | 30 | -- src/access_method/mod.rs:89 31 | 32 | DO $$ 33 | DECLARE 34 | c int; 35 | BEGIN 36 | SELECT count(*) 37 | INTO c 38 | FROM pg_catalog.pg_opclass c 39 | WHERE c.opcname = 'vector_cosine_ops' 40 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann'); 41 | 42 | IF c = 0 THEN 43 | CREATE OPERATOR CLASS vector_cosine_ops DEFAULT 44 | FOR TYPE vector USING diskann AS 45 | OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops; 46 | END IF; 47 | END; 48 | $$; 49 | 50 | 51 | -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.2.0--0.4.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.3.0--0.4.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.2.0--0.5.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.4.0--0.5.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.2.0--0.5.1.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.5.0--0.5.1.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.2.0--0.6.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.5.1--0.6.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.2.0--0.7.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.6.0--0.7.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.2.0--0.7.1.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.7.0--0.7.1.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.3.0--0.4.0.sql: -------------------------------------------------------------------------------- 1 | /* <begin connected objects> */ 2 | /* 3 | This file is auto generated by pgrx. 4 | 5 | The ordering of items is not stable, it is driven by a dependency graph. 6 | */ 7 | /* </end connected objects> */ 8 | 9 | /* <begin connected objects> */ 10 | -- src/access_method/mod.rs:26 11 | -- vectorscale::access_method::amhandler 12 | 13 | CREATE OR REPLACE FUNCTION diskann_amhandler(internal) RETURNS index_am_handler PARALLEL SAFE IMMUTABLE STRICT COST 0.0001 LANGUAGE c AS '$libdir/vectorscale-0.4.0', 'amhandler_wrapper'; 14 | 15 | DO $$ 16 | DECLARE 17 | c int; 18 | BEGIN 19 | SELECT count(*) 20 | INTO c 21 | FROM pg_catalog.pg_am a 22 | WHERE a.amname = 'diskann'; 23 | 24 | IF c = 0 THEN 25 | CREATE ACCESS METHOD diskann TYPE INDEX HANDLER diskann_amhandler; 26 | END IF; 27 | END; 28 | $$; 29 | /* </end connected objects> */ 30 | 31 | /* <begin connected objects> */ 32 | -- src/access_method/mod.rs:89 33 | -- requires: 34 | -- amhandler 35 | 36 | 37 | DO $$ 38 | DECLARE 39 | c int; 40 | BEGIN 41 | SELECT count(*) 42 | INTO c 43 | FROM pg_catalog.pg_opclass c 44 | WHERE c.opcname = 'vector_cosine_ops' 45 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann'); 46 | 47 | IF c = 0 THEN 48 | CREATE OPERATOR CLASS vector_cosine_ops DEFAULT 49 | FOR TYPE vector USING diskann AS 50 | OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops; 51 | END IF; 52 | END; 53 | $$; 54 | /* </end connected objects> */ 55 | 56 | -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.3.0--0.5.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.4.0--0.5.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.3.0--0.5.1.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.5.0--0.5.1.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.3.0--0.6.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.5.1--0.6.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.3.0--0.7.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.6.0--0.7.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.3.0--0.7.1.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.7.0--0.7.1.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.4.0--0.5.0.sql: -------------------------------------------------------------------------------- 1 | /* <begin connected objects> */ 2 | /* 3 | This file is auto generated by pgrx. 4 | 5 | The ordering of items is not stable, it is driven by a dependency graph. 6 | */ 7 | /* </end connected objects> */ 8 | 9 | /* <begin connected objects> */ 10 | -- src/access_method/mod.rs:29 11 | -- vectorscale::access_method::amhandler 12 | 13 | CREATE OR REPLACE FUNCTION diskann_amhandler(internal) RETURNS index_am_handler PARALLEL SAFE IMMUTABLE STRICT COST 0.0001 LANGUAGE c AS '$libdir/vectorscale-0.5.0', 'amhandler_wrapper'; 14 | 15 | DO $$ 16 | DECLARE 17 | c int; 18 | BEGIN 19 | SELECT count(*) 20 | INTO c 21 | FROM pg_catalog.pg_am a 22 | WHERE a.amname = 'diskann'; 23 | 24 | IF c = 0 THEN 25 | CREATE ACCESS METHOD diskann TYPE INDEX HANDLER diskann_amhandler; 26 | END IF; 27 | END; 28 | $$; 29 | /* </end connected objects> */ 30 | 31 | /* <begin connected objects> */ 32 | -- src/access_method/distance.rs:42 33 | -- vectorscale::access_method::distance::distance_type_cosine 34 | CREATE FUNCTION "distance_type_cosine"() RETURNS smallint /* i16 */ 35 | IMMUTABLE STRICT PARALLEL SAFE 36 | LANGUAGE c /* Rust */ 37 | AS '$libdir/vectorscale-0.5.0', 'distance_type_cosine_wrapper'; 38 | /* </end connected objects> */ 39 | 40 | /* <begin connected objects> */ 41 | -- src/access_method/distance.rs:47 42 | -- vectorscale::access_method::distance::distance_type_l2 43 | CREATE FUNCTION "distance_type_l2"() RETURNS smallint /* i16 */ 44 | IMMUTABLE STRICT PARALLEL SAFE 45 | LANGUAGE c /* Rust */ 46 | AS '$libdir/vectorscale-0.5.0', 'distance_type_l2_wrapper'; 47 | /* </end connected objects> */ 48 | 49 | /* <begin connected objects> */ 50 | -- src/access_method/mod.rs:163 51 | -- requires: 52 | -- amhandler 53 | -- distance_type_cosine 54 | -- distance_type_l2 55 | 56 | 57 | DO $$ 58 | DECLARE 59 | have_cos_ops int; 60 | have_l2_ops int; 61 | BEGIN 62 | -- Has cosine operator class been installed previously? 63 | SELECT count(*) 64 | INTO have_cos_ops 65 | FROM pg_catalog.pg_opclass c 66 | WHERE c.opcname = 'vector_cosine_ops' 67 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 68 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 69 | 70 | -- Has L2 operator class been installed previously? 71 | SELECT count(*) 72 | INTO have_l2_ops 73 | FROM pg_catalog.pg_opclass c 74 | WHERE c.opcname = 'vector_l2_ops' 75 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 76 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 77 | 78 | IF have_cos_ops = 0 THEN 79 | -- Fresh install from scratch 80 | CREATE OPERATOR CLASS vector_cosine_ops DEFAULT 81 | FOR TYPE vector USING diskann AS 82 | OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, 83 | FUNCTION 1 distance_type_cosine(); 84 | 85 | CREATE OPERATOR CLASS vector_l2_ops 86 | FOR TYPE vector USING diskann AS 87 | OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, 88 | FUNCTION 1 distance_type_l2(); 89 | ELSIF have_l2_ops = 0 THEN 90 | -- Upgrade to add L2 distance support and update cosine opclass to 91 | -- include the distance_type_cosine function 92 | INSERT INTO pg_amproc (oid, amprocfamily, amproclefttype, amprocrighttype, amprocnum, amproc) 93 | SELECT (select (max(oid)::int + 1)::oid from pg_amproc), c.opcfamily, c.opcintype, c.opcintype, 1, '@extschema@.distance_type_l2'::regproc 94 | FROM pg_opclass c, pg_am a 95 | WHERE a.oid = c.opcmethod AND c.opcname = 'vector_cosine_ops' AND a.amname = 'diskann'; 96 | 97 | CREATE OPERATOR CLASS vector_l2_ops 98 | FOR TYPE vector USING diskann AS 99 | OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, 100 | FUNCTION 1 distance_type_l2(); 101 | END IF; 102 | END; 103 | $$; 104 | /* </end connected objects> */ 105 | 106 | -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.4.0--0.5.1.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.5.0--0.5.1.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.4.0--0.6.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.5.1--0.6.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.4.0--0.7.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.6.0--0.7.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.4.0--0.7.1.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.7.0--0.7.1.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.5.0--0.5.1.sql: -------------------------------------------------------------------------------- 1 | /* <begin connected objects> */ 2 | /* 3 | This file is auto generated by pgrx. 4 | 5 | The ordering of items is not stable, it is driven by a dependency graph. 6 | */ 7 | /* </end connected objects> */ 8 | 9 | /* <begin connected objects> */ 10 | -- pgvectorscale/src/access_method/mod.rs:29 11 | -- vectorscale::access_method::amhandler 12 | 13 | CREATE OR REPLACE FUNCTION diskann_amhandler(internal) RETURNS index_am_handler PARALLEL SAFE IMMUTABLE STRICT COST 0.0001 LANGUAGE c AS '$libdir/vectorscale-0.5.1', 'amhandler_wrapper'; 14 | 15 | DO $$ 16 | DECLARE 17 | c int; 18 | BEGIN 19 | SELECT count(*) 20 | INTO c 21 | FROM pg_catalog.pg_am a 22 | WHERE a.amname = 'diskann'; 23 | 24 | IF c = 0 THEN 25 | CREATE ACCESS METHOD diskann TYPE INDEX HANDLER diskann_amhandler; 26 | END IF; 27 | END; 28 | $$; 29 | /* </end connected objects> */ 30 | 31 | /* <begin connected objects> */ 32 | -- pgvectorscale/src/access_method/distance.rs:42 33 | -- vectorscale::access_method::distance::distance_type_cosine 34 | CREATE OR REPLACE FUNCTION "distance_type_cosine"() RETURNS smallint /* i16 */ 35 | IMMUTABLE STRICT PARALLEL SAFE 36 | LANGUAGE c /* Rust */ 37 | AS '$libdir/vectorscale-0.5.1', 'distance_type_cosine_wrapper'; 38 | /* </end connected objects> */ 39 | 40 | /* <begin connected objects> */ 41 | -- pgvectorscale/src/access_method/distance.rs:47 42 | -- vectorscale::access_method::distance::distance_type_l2 43 | CREATE OR REPLACE FUNCTION "distance_type_l2"() RETURNS smallint /* i16 */ 44 | IMMUTABLE STRICT PARALLEL SAFE 45 | LANGUAGE c /* Rust */ 46 | AS '$libdir/vectorscale-0.5.1', 'distance_type_l2_wrapper'; 47 | /* </end connected objects> */ 48 | 49 | /* <begin connected objects> */ 50 | -- pgvectorscale/src/access_method/mod.rs:163 51 | -- requires: 52 | -- amhandler 53 | -- distance_type_cosine 54 | -- distance_type_l2 55 | 56 | 57 | DO $$ 58 | DECLARE 59 | have_cos_ops int; 60 | have_l2_ops int; 61 | BEGIN 62 | -- Has cosine operator class been installed previously? 63 | SELECT count(*) 64 | INTO have_cos_ops 65 | FROM pg_catalog.pg_opclass c 66 | WHERE c.opcname = 'vector_cosine_ops' 67 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 68 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 69 | 70 | -- Has L2 operator class been installed previously? 71 | SELECT count(*) 72 | INTO have_l2_ops 73 | FROM pg_catalog.pg_opclass c 74 | WHERE c.opcname = 'vector_l2_ops' 75 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 76 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 77 | 78 | IF have_cos_ops = 0 THEN 79 | -- Fresh install from scratch 80 | CREATE OPERATOR CLASS vector_cosine_ops DEFAULT 81 | FOR TYPE vector USING diskann AS 82 | OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, 83 | FUNCTION 1 distance_type_cosine(); 84 | 85 | CREATE OPERATOR CLASS vector_l2_ops 86 | FOR TYPE vector USING diskann AS 87 | OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, 88 | FUNCTION 1 distance_type_l2(); 89 | ELSIF have_l2_ops = 0 THEN 90 | -- Upgrade to add L2 distance support and update cosine opclass to 91 | -- include the distance_type_cosine function 92 | INSERT INTO pg_amproc (oid, amprocfamily, amproclefttype, amprocrighttype, amprocnum, amproc) 93 | SELECT (select (max(oid)::int + 1)::oid from pg_amproc), c.opcfamily, c.opcintype, c.opcintype, 1, '@extschema@.distance_type_l2'::regproc 94 | FROM pg_opclass c, pg_am a 95 | WHERE a.oid = c.opcmethod AND c.opcname = 'vector_cosine_ops' AND a.amname = 'diskann'; 96 | 97 | CREATE OPERATOR CLASS vector_l2_ops 98 | FOR TYPE vector USING diskann AS 99 | OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, 100 | FUNCTION 1 distance_type_l2(); 101 | END IF; 102 | END; 103 | $$; 104 | /* </end connected objects> */ 105 | 106 | -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.5.0--0.6.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.5.1--0.6.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.5.0--0.7.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.6.0--0.7.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.5.0--0.7.1.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.7.0--0.7.1.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.5.1--0.6.0.sql: -------------------------------------------------------------------------------- 1 | /* <begin connected objects> */ 2 | /* 3 | This file is auto generated by pgrx. 4 | 5 | The ordering of items is not stable, it is driven by a dependency graph. 6 | */ 7 | /* </end connected objects> */ 8 | 9 | /* <begin connected objects> */ 10 | -- pgvectorscale/src/access_method/mod.rs:33 11 | -- vectorscale::access_method::amhandler 12 | 13 | CREATE OR REPLACE FUNCTION diskann_amhandler(internal) RETURNS index_am_handler PARALLEL SAFE IMMUTABLE STRICT COST 0.0001 LANGUAGE c AS '$libdir/vectorscale-0.6.0', 'amhandler_wrapper'; 14 | 15 | DO $$ 16 | DECLARE 17 | c int; 18 | BEGIN 19 | SELECT count(*) 20 | INTO c 21 | FROM pg_catalog.pg_am a 22 | WHERE a.amname = 'diskann'; 23 | 24 | IF c = 0 THEN 25 | CREATE ACCESS METHOD diskann TYPE INDEX HANDLER diskann_amhandler; 26 | END IF; 27 | END; 28 | $$; 29 | /* </end connected objects> */ 30 | 31 | /* <begin connected objects> */ 32 | -- pgvectorscale/src/access_method/distance.rs:47 33 | -- vectorscale::access_method::distance::distance_type_cosine 34 | CREATE OR REPLACE FUNCTION "distance_type_cosine"() RETURNS smallint /* i16 */ 35 | IMMUTABLE STRICT PARALLEL SAFE 36 | LANGUAGE c /* Rust */ 37 | AS '$libdir/vectorscale-0.6.0', 'distance_type_cosine_wrapper'; 38 | /* </end connected objects> */ 39 | 40 | /* <begin connected objects> */ 41 | -- pgvectorscale/src/access_method/distance.rs:57 42 | -- vectorscale::access_method::distance::distance_type_inner_product 43 | CREATE OR REPLACE FUNCTION "distance_type_inner_product"() RETURNS smallint /* i16 */ 44 | IMMUTABLE STRICT PARALLEL SAFE 45 | LANGUAGE c /* Rust */ 46 | AS '$libdir/vectorscale-0.6.0', 'distance_type_inner_product_wrapper'; 47 | /* </end connected objects> */ 48 | 49 | /* <begin connected objects> */ 50 | -- pgvectorscale/src/access_method/distance.rs:52 51 | -- vectorscale::access_method::distance::distance_type_l2 52 | CREATE OR REPLACE FUNCTION "distance_type_l2"() RETURNS smallint /* i16 */ 53 | IMMUTABLE STRICT PARALLEL SAFE 54 | LANGUAGE c /* Rust */ 55 | AS '$libdir/vectorscale-0.6.0', 'distance_type_l2_wrapper'; 56 | /* </end connected objects> */ 57 | 58 | /* <begin connected objects> */ 59 | -- pgvectorscale/src/access_method/mod.rs:167 60 | -- requires: 61 | -- amhandler 62 | -- distance_type_cosine 63 | -- distance_type_l2 64 | -- distance_type_inner_product 65 | 66 | 67 | DO $$ 68 | DECLARE 69 | have_cos_ops int; 70 | have_l2_ops int; 71 | have_ip_ops int; 72 | BEGIN 73 | -- Has cosine operator class been installed previously? 74 | SELECT count(*) 75 | INTO have_cos_ops 76 | FROM pg_catalog.pg_opclass c 77 | WHERE c.opcname = 'vector_cosine_ops' 78 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 79 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 80 | 81 | -- Has L2 operator class been installed previously? 82 | SELECT count(*) 83 | INTO have_l2_ops 84 | FROM pg_catalog.pg_opclass c 85 | WHERE c.opcname = 'vector_l2_ops' 86 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 87 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 88 | 89 | -- Has inner product operator class been installed previously? 90 | SELECT count(*) 91 | INTO have_ip_ops 92 | FROM pg_catalog.pg_opclass c 93 | WHERE c.opcname = 'vector_ip_ops' 94 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 95 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 96 | 97 | IF have_cos_ops = 0 THEN 98 | CREATE OPERATOR CLASS vector_cosine_ops DEFAULT 99 | FOR TYPE vector USING diskann AS 100 | OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, 101 | FUNCTION 1 distance_type_cosine(); 102 | ELSIF have_l2_ops = 0 THEN 103 | -- Upgrade from 0.4.0 to 0.5.0. Update cosine opclass to include 104 | -- the distance_type_cosine function. 105 | INSERT INTO pg_amproc (oid, amprocfamily, amproclefttype, amprocrighttype, amprocnum, amproc) 106 | SELECT (select (max(oid)::int + 1)::oid from pg_amproc), c.opcfamily, c.opcintype, c.opcintype, 1, '@extschema@.distance_type_l2'::regproc 107 | FROM pg_opclass c, pg_am a 108 | WHERE a.oid = c.opcmethod AND c.opcname = 'vector_cosine_ops' AND a.amname = 'diskann'; 109 | END IF; 110 | 111 | IF have_l2_ops = 0 THEN 112 | CREATE OPERATOR CLASS vector_l2_ops 113 | FOR TYPE vector USING diskann AS 114 | OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, 115 | FUNCTION 1 distance_type_l2(); 116 | END IF; 117 | 118 | IF have_ip_ops = 0 THEN 119 | CREATE OPERATOR CLASS vector_ip_ops 120 | FOR TYPE vector USING diskann AS 121 | OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, 122 | FUNCTION 1 distance_type_inner_product(); 123 | END IF; 124 | END; 125 | $$; 126 | /* </end connected objects> */ 127 | 128 | -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.5.1--0.7.0.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.6.0--0.7.0.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.5.1--0.7.1.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.7.0--0.7.1.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.6.0--0.7.0.sql: -------------------------------------------------------------------------------- 1 | /* <begin connected objects> */ 2 | /* 3 | This file is auto generated by pgrx. 4 | 5 | The ordering of items is not stable, it is driven by a dependency graph. 6 | */ 7 | /* </end connected objects> */ 8 | 9 | /* <begin connected objects> */ 10 | -- pgvectorscale/src/access_method/mod.rs:38 11 | -- vectorscale::access_method::amhandler 12 | 13 | CREATE OR REPLACE FUNCTION diskann_amhandler(internal) RETURNS index_am_handler PARALLEL SAFE IMMUTABLE STRICT COST 0.0001 LANGUAGE c AS '$libdir/vectorscale-0.7.0', 'amhandler_wrapper'; 14 | 15 | DO $$ 16 | DECLARE 17 | c int; 18 | BEGIN 19 | SELECT count(*) 20 | INTO c 21 | FROM pg_catalog.pg_am a 22 | WHERE a.amname = 'diskann'; 23 | 24 | IF c = 0 THEN 25 | CREATE ACCESS METHOD diskann TYPE INDEX HANDLER diskann_amhandler; 26 | END IF; 27 | END; 28 | $$; 29 | /* </end connected objects> */ 30 | 31 | /* <begin connected objects> */ 32 | -- pgvectorscale/src/access_method/distance.rs:47 33 | -- vectorscale::access_method::distance::distance_type_cosine 34 | CREATE OR REPLACE FUNCTION "distance_type_cosine"() RETURNS smallint /* i16 */ 35 | IMMUTABLE STRICT PARALLEL SAFE 36 | LANGUAGE c /* Rust */ 37 | AS '$libdir/vectorscale-0.7.0', 'distance_type_cosine_wrapper'; 38 | /* </end connected objects> */ 39 | 40 | /* <begin connected objects> */ 41 | -- pgvectorscale/src/access_method/distance.rs:57 42 | -- vectorscale::access_method::distance::distance_type_inner_product 43 | CREATE OR REPLACE FUNCTION "distance_type_inner_product"() RETURNS smallint /* i16 */ 44 | IMMUTABLE STRICT PARALLEL SAFE 45 | LANGUAGE c /* Rust */ 46 | AS '$libdir/vectorscale-0.7.0', 'distance_type_inner_product_wrapper'; 47 | /* </end connected objects> */ 48 | 49 | /* <begin connected objects> */ 50 | -- pgvectorscale/src/access_method/distance.rs:52 51 | -- vectorscale::access_method::distance::distance_type_l2 52 | CREATE OR REPLACE FUNCTION "distance_type_l2"() RETURNS smallint /* i16 */ 53 | IMMUTABLE STRICT PARALLEL SAFE 54 | LANGUAGE c /* Rust */ 55 | AS '$libdir/vectorscale-0.7.0', 'distance_type_l2_wrapper'; 56 | /* </end connected objects> */ 57 | 58 | /* <begin connected objects> */ 59 | -- pgvectorscale/src/access_method/mod.rs:290 60 | -- vectorscale::access_method::smallint_array_overlap 61 | CREATE FUNCTION "smallint_array_overlap"( 62 | "left" smallint[], /* pgrx::datum::array::Array<i16> */ 63 | "right" smallint[] /* pgrx::datum::array::Array<i16> */ 64 | ) RETURNS bool /* bool */ 65 | IMMUTABLE STRICT PARALLEL SAFE 66 | LANGUAGE c /* Rust */ 67 | AS '$libdir/vectorscale-0.7.0', 'smallint_array_overlap_wrapper'; 68 | /* </end connected objects> */ 69 | 70 | /* <begin connected objects> */ 71 | -- pgvectorscale/src/access_method/mod.rs:172 72 | -- requires: 73 | -- amhandler 74 | -- distance_type_cosine 75 | -- distance_type_l2 76 | -- distance_type_inner_product 77 | -- smallint_array_overlap 78 | 79 | 80 | DO $$ 81 | DECLARE 82 | have_cos_ops int; 83 | have_l2_ops int; 84 | have_ip_ops int; 85 | have_label_ops int; 86 | BEGIN 87 | -- Has cosine operator class been installed previously? 88 | SELECT count(*) 89 | INTO have_cos_ops 90 | FROM pg_catalog.pg_opclass c 91 | WHERE c.opcname = 'vector_cosine_ops' 92 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 93 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 94 | 95 | -- Has L2 operator class been installed previously? 96 | SELECT count(*) 97 | INTO have_l2_ops 98 | FROM pg_catalog.pg_opclass c 99 | WHERE c.opcname = 'vector_l2_ops' 100 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 101 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 102 | 103 | -- Has inner product operator class been installed previously? 104 | SELECT count(*) 105 | INTO have_ip_ops 106 | FROM pg_catalog.pg_opclass c 107 | WHERE c.opcname = 'vector_ip_ops' 108 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 109 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 110 | 111 | -- Has label-filtering support been installed previously? 112 | SELECT count(*) 113 | INTO have_label_ops 114 | FROM pg_catalog.pg_opclass c 115 | WHERE c.opcname = 'vector_smallint_label_ops' 116 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 117 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 118 | 119 | IF have_cos_ops = 0 THEN 120 | CREATE OPERATOR CLASS vector_cosine_ops DEFAULT 121 | FOR TYPE vector USING diskann AS 122 | OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, 123 | FUNCTION 1 distance_type_cosine(); 124 | ELSIF have_l2_ops = 0 THEN 125 | -- Upgrade from 0.4.0 to 0.5.0. Update cosine opclass to include 126 | -- the distance_type_cosine function. 127 | INSERT INTO pg_amproc (oid, amprocfamily, amproclefttype, amprocrighttype, amprocnum, amproc) 128 | SELECT (select (max(oid)::int + 1)::oid from pg_amproc), c.opcfamily, c.opcintype, c.opcintype, 1, '@extschema@.distance_type_l2'::regproc 129 | FROM pg_opclass c, pg_am a 130 | WHERE a.oid = c.opcmethod AND c.opcname = 'vector_cosine_ops' AND a.amname = 'diskann'; 131 | END IF; 132 | 133 | IF have_l2_ops = 0 THEN 134 | CREATE OPERATOR CLASS vector_l2_ops 135 | FOR TYPE vector USING diskann AS 136 | OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, 137 | FUNCTION 1 distance_type_l2(); 138 | END IF; 139 | 140 | IF have_ip_ops = 0 THEN 141 | CREATE OPERATOR CLASS vector_ip_ops 142 | FOR TYPE vector USING diskann AS 143 | OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, 144 | FUNCTION 1 distance_type_inner_product(); 145 | END IF; 146 | 147 | -- First, check if the && operator exists for smallint[] 148 | IF NOT EXISTS ( 149 | SELECT 1 FROM pg_operator 150 | WHERE oprname = '&&' 151 | AND oprleft = 'smallint[]'::regtype 152 | AND oprright = 'smallint[]'::regtype 153 | ) THEN 154 | -- Create the && operator for smallint[] 155 | CREATE OPERATOR && ( 156 | LEFTARG = smallint[], 157 | RIGHTARG = smallint[], 158 | PROCEDURE = smallint_array_overlap, 159 | COMMUTATOR = &&, 160 | RESTRICT = contsel, 161 | JOIN = contjoinsel 162 | ); 163 | 164 | -- Register the operator with the system catalogs for proper selectivity estimation 165 | -- This is done by adding entries to pg_amop for the array_ops operator class 166 | EXECUTE format( 167 | 'ALTER OPERATOR FAMILY array_ops USING btree ADD OPERATOR 3 && (smallint[], smallint[]) FOR SEARCH' 168 | ); 169 | END IF; 170 | 171 | IF have_label_ops = 0 THEN 172 | CREATE OPERATOR CLASS vector_smallint_label_ops 173 | DEFAULT FOR TYPE smallint[] USING diskann AS 174 | OPERATOR 1 &&; 175 | END IF; 176 | END; 177 | $$; 178 | /* </end connected objects> */ 179 | 180 | -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.6.0--0.7.1.sql: -------------------------------------------------------------------------------- 1 | vectorscale--0.7.0--0.7.1.sql -------------------------------------------------------------------------------- /pgvectorscale/sql/vectorscale--0.7.0--0.7.1.sql: -------------------------------------------------------------------------------- 1 | /* <begin connected objects> */ 2 | /* 3 | This file is auto generated by pgrx. 4 | 5 | The ordering of items is not stable, it is driven by a dependency graph. 6 | */ 7 | /* </end connected objects> */ 8 | 9 | /* <begin connected objects> */ 10 | -- pgvectorscale/src/access_method/mod.rs:38 11 | -- vectorscale::access_method::amhandler 12 | 13 | CREATE OR REPLACE FUNCTION diskann_amhandler(internal) RETURNS index_am_handler PARALLEL SAFE IMMUTABLE STRICT COST 0.0001 LANGUAGE c AS '$libdir/vectorscale-0.7.1', 'amhandler_wrapper'; 14 | 15 | DO $$ 16 | DECLARE 17 | c int; 18 | BEGIN 19 | SELECT count(*) 20 | INTO c 21 | FROM pg_catalog.pg_am a 22 | WHERE a.amname = 'diskann'; 23 | 24 | IF c = 0 THEN 25 | CREATE ACCESS METHOD diskann TYPE INDEX HANDLER diskann_amhandler; 26 | END IF; 27 | END; 28 | $$; 29 | /* </end connected objects> */ 30 | 31 | /* <begin connected objects> */ 32 | -- pgvectorscale/src/access_method/distance.rs:47 33 | -- vectorscale::access_method::distance::distance_type_cosine 34 | CREATE OR REPLACE FUNCTION "distance_type_cosine"() RETURNS smallint /* i16 */ 35 | IMMUTABLE STRICT PARALLEL SAFE 36 | LANGUAGE c /* Rust */ 37 | AS '$libdir/vectorscale-0.7.1', 'distance_type_cosine_wrapper'; 38 | /* </end connected objects> */ 39 | 40 | /* <begin connected objects> */ 41 | -- pgvectorscale/src/access_method/distance.rs:57 42 | -- vectorscale::access_method::distance::distance_type_inner_product 43 | CREATE OR REPLACE FUNCTION "distance_type_inner_product"() RETURNS smallint /* i16 */ 44 | IMMUTABLE STRICT PARALLEL SAFE 45 | LANGUAGE c /* Rust */ 46 | AS '$libdir/vectorscale-0.7.1', 'distance_type_inner_product_wrapper'; 47 | /* </end connected objects> */ 48 | 49 | /* <begin connected objects> */ 50 | -- pgvectorscale/src/access_method/distance.rs:52 51 | -- vectorscale::access_method::distance::distance_type_l2 52 | CREATE OR REPLACE FUNCTION "distance_type_l2"() RETURNS smallint /* i16 */ 53 | IMMUTABLE STRICT PARALLEL SAFE 54 | LANGUAGE c /* Rust */ 55 | AS '$libdir/vectorscale-0.7.1', 'distance_type_l2_wrapper'; 56 | /* </end connected objects> */ 57 | 58 | /* <begin connected objects> */ 59 | -- pgvectorscale/src/access_method/mod.rs:290 60 | -- vectorscale::access_method::smallint_array_overlap 61 | CREATE OR REPLACE FUNCTION "smallint_array_overlap"( 62 | "left" smallint[], /* pgrx::datum::array::Array<i16> */ 63 | "right" smallint[] /* pgrx::datum::array::Array<i16> */ 64 | ) RETURNS bool /* bool */ 65 | IMMUTABLE STRICT PARALLEL SAFE 66 | LANGUAGE c /* Rust */ 67 | AS '$libdir/vectorscale-0.7.1', 'smallint_array_overlap_wrapper'; 68 | /* </end connected objects> */ 69 | 70 | /* <begin connected objects> */ 71 | -- pgvectorscale/src/access_method/mod.rs:172 72 | -- requires: 73 | -- amhandler 74 | -- distance_type_cosine 75 | -- distance_type_l2 76 | -- distance_type_inner_product 77 | -- smallint_array_overlap 78 | 79 | 80 | DO $$ 81 | DECLARE 82 | have_cos_ops int; 83 | have_l2_ops int; 84 | have_ip_ops int; 85 | have_label_ops int; 86 | BEGIN 87 | -- Has cosine operator class been installed previously? 88 | SELECT count(*) 89 | INTO have_cos_ops 90 | FROM pg_catalog.pg_opclass c 91 | WHERE c.opcname = 'vector_cosine_ops' 92 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 93 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 94 | 95 | -- Has L2 operator class been installed previously? 96 | SELECT count(*) 97 | INTO have_l2_ops 98 | FROM pg_catalog.pg_opclass c 99 | WHERE c.opcname = 'vector_l2_ops' 100 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 101 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 102 | 103 | -- Has inner product operator class been installed previously? 104 | SELECT count(*) 105 | INTO have_ip_ops 106 | FROM pg_catalog.pg_opclass c 107 | WHERE c.opcname = 'vector_ip_ops' 108 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 109 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 110 | 111 | -- Has label-filtering support been installed previously? 112 | SELECT count(*) 113 | INTO have_label_ops 114 | FROM pg_catalog.pg_opclass c 115 | WHERE c.opcname = 'vector_smallint_label_ops' 116 | AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'diskann') 117 | AND c.opcnamespace = (SELECT oid FROM pg_catalog.pg_namespace where nspname='@extschema@'); 118 | 119 | IF have_cos_ops = 0 THEN 120 | CREATE OPERATOR CLASS vector_cosine_ops DEFAULT 121 | FOR TYPE vector USING diskann AS 122 | OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops, 123 | FUNCTION 1 distance_type_cosine(); 124 | ELSIF have_l2_ops = 0 THEN 125 | -- Upgrade from 0.4.0 to 0.5.0. Update cosine opclass to include 126 | -- the distance_type_cosine function. 127 | INSERT INTO pg_amproc (oid, amprocfamily, amproclefttype, amprocrighttype, amprocnum, amproc) 128 | SELECT (select (max(oid)::int + 1)::oid from pg_amproc), c.opcfamily, c.opcintype, c.opcintype, 1, '@extschema@.distance_type_l2'::regproc 129 | FROM pg_opclass c, pg_am a 130 | WHERE a.oid = c.opcmethod AND c.opcname = 'vector_cosine_ops' AND a.amname = 'diskann'; 131 | END IF; 132 | 133 | IF have_l2_ops = 0 THEN 134 | CREATE OPERATOR CLASS vector_l2_ops 135 | FOR TYPE vector USING diskann AS 136 | OPERATOR 1 <-> (vector, vector) FOR ORDER BY float_ops, 137 | FUNCTION 1 distance_type_l2(); 138 | END IF; 139 | 140 | IF have_ip_ops = 0 THEN 141 | CREATE OPERATOR CLASS vector_ip_ops 142 | FOR TYPE vector USING diskann AS 143 | OPERATOR 1 <#> (vector, vector) FOR ORDER BY float_ops, 144 | FUNCTION 1 distance_type_inner_product(); 145 | END IF; 146 | 147 | -- First, check if the && operator exists for smallint[] 148 | IF NOT EXISTS ( 149 | SELECT 1 FROM pg_operator 150 | WHERE oprname = '&&' 151 | AND oprleft = 'smallint[]'::regtype 152 | AND oprright = 'smallint[]'::regtype 153 | ) THEN 154 | -- Create the && operator for smallint[] 155 | CREATE OPERATOR && ( 156 | LEFTARG = smallint[], 157 | RIGHTARG = smallint[], 158 | PROCEDURE = smallint_array_overlap, 159 | COMMUTATOR = &&, 160 | RESTRICT = contsel, 161 | JOIN = contjoinsel 162 | ); 163 | 164 | -- Register the operator with the system catalogs for proper selectivity estimation 165 | -- This is done by adding entries to pg_amop for the array_ops operator class 166 | EXECUTE format( 167 | 'ALTER OPERATOR FAMILY array_ops USING btree ADD OPERATOR 3 && (smallint[], smallint[]) FOR SEARCH' 168 | ); 169 | END IF; 170 | 171 | IF have_label_ops = 0 THEN 172 | CREATE OPERATOR CLASS vector_smallint_label_ops 173 | DEFAULT FOR TYPE smallint[] USING diskann AS 174 | OPERATOR 1 &&; 175 | END IF; 176 | END; 177 | $$; 178 | /* </end connected objects> */ 179 | 180 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/cost_estimate.rs: -------------------------------------------------------------------------------- 1 | use pgrx::*; 2 | 3 | /// cost estimate function loosely based on how ivfflat does things 4 | #[pg_guard(immutable, parallel_safe)] 5 | #[allow(clippy::too_many_arguments)] 6 | pub unsafe extern "C" fn amcostestimate( 7 | root: *mut pg_sys::PlannerInfo, 8 | path: *mut pg_sys::IndexPath, 9 | loop_count: f64, 10 | index_startup_cost: *mut pg_sys::Cost, 11 | index_total_cost: *mut pg_sys::Cost, 12 | index_selectivity: *mut pg_sys::Selectivity, 13 | index_correlation: *mut f64, 14 | index_pages: *mut f64, 15 | ) { 16 | if (*path).indexorderbys.is_null() { 17 | //can't use index without order bys 18 | *index_startup_cost = f64::MAX; 19 | *index_total_cost = f64::MAX; 20 | *index_selectivity = 0.; 21 | *index_correlation = 0.; 22 | *index_pages = 0.; 23 | return; 24 | } 25 | let path_ref = path.as_ref().expect("path argument is NULL"); 26 | /*let indexinfo = path_ref 27 | .indexinfo 28 | .as_ref() 29 | .expect("indexinfo in path is NULL"); 30 | let index_relation = unsafe { 31 | PgRelation::with_lock( 32 | indexinfo.indexoid, 33 | pg_sys::AccessShareLock as pg_sys::LOCKMODE, 34 | ) 35 | }; 36 | let heap_relation = index_relation 37 | .heap_relation() 38 | .expect("failed to get heap relation for index");*/ 39 | 40 | let total_index_tuples = (*path_ref.indexinfo).tuples; 41 | 42 | let mut generic_costs = pg_sys::GenericCosts { 43 | numIndexTuples: total_index_tuples / 100., //TODO need better estimate 44 | ..Default::default() 45 | }; 46 | 47 | pg_sys::genericcostestimate(root, path, loop_count, &mut generic_costs); 48 | 49 | //TODO probably have to adjust costs more here 50 | 51 | *index_startup_cost = generic_costs.indexTotalCost; 52 | *index_total_cost = generic_costs.indexTotalCost; 53 | *index_selectivity = generic_costs.indexSelectivity; 54 | *index_correlation = generic_costs.indexCorrelation; 55 | *index_pages = generic_costs.numIndexPages; 56 | //pg_sys::cpu_index_tuple_cost; 57 | } 58 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/debugging.rs: -------------------------------------------------------------------------------- 1 | //! Debugging methods 2 | 3 | use std::collections::HashMap; 4 | 5 | use pgrx::PgRelation; 6 | 7 | use crate::util::ItemPointer; 8 | 9 | use super::{node::ReadableNode, plain::node::PlainNode, stats::GreedySearchStats}; 10 | 11 | #[allow(dead_code)] 12 | pub fn print_graph_from_disk(index: &PgRelation, init_id: ItemPointer) { 13 | let mut map = HashMap::<ItemPointer, Vec<f32>>::new(); 14 | let mut sb = String::new(); 15 | unsafe { 16 | print_graph_from_disk_visitor(index, init_id, &mut map, &mut sb); 17 | } 18 | panic!("{}", sb.as_str()) 19 | } 20 | 21 | unsafe fn print_graph_from_disk_visitor( 22 | index: &PgRelation, 23 | index_pointer: ItemPointer, 24 | map: &mut HashMap<ItemPointer, Vec<f32>>, 25 | sb: &mut String, 26 | ) { 27 | let mut stats = GreedySearchStats::new(); 28 | let data_node = PlainNode::read(index, index_pointer, &mut stats); 29 | let node = data_node.get_archived_node(); 30 | let v = node.vector.as_slice(); 31 | let copy: Vec<f32> = v.to_vec(); 32 | let name = format!("node {:?}", ©); 33 | 34 | map.insert(index_pointer, copy); 35 | 36 | for neighbor_pointer in node.iter_neighbors() { 37 | let p = neighbor_pointer; 38 | if !map.contains_key(&p) { 39 | print_graph_from_disk_visitor(index, p, map, sb); 40 | } 41 | } 42 | sb.push_str(&name); 43 | sb.push('\n'); 44 | 45 | for neighbor_pointer in node.iter_neighbors() { 46 | let neighbor = map.get(&neighbor_pointer).unwrap(); 47 | sb.push_str(&format!("->{:?}\n", neighbor)) 48 | } 49 | sb.push('\n') 50 | } 51 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/distance/distance_aarch64.rs: -------------------------------------------------------------------------------- 1 | //! Calculate the distance by vector arithmetic optimized for aarch64 neon intrinsics 2 | 3 | use core::arch::aarch64::{self, *}; 4 | use std::ops; 5 | 6 | #[cfg(not(target_feature = "neon"))] 7 | #[cfg(not(doc))] 8 | compile_error!( 9 | "On arm, the neon feature must be enabled. Set RUSTFLAGS=\"-C target-feature=+neon\"" 10 | ); 11 | 12 | // Naming and impl done to match simdeez and the options used in the distance_l2_simd_body and 13 | // distance_cosine_simd_body macros 14 | struct S(float32x4_t); 15 | 16 | impl S { 17 | const VF32_WIDTH: usize = 4; // 128bit register for ARM NEON 18 | 19 | unsafe fn setzero_ps() -> S { 20 | let zero: f32 = 0.0; 21 | S(aarch64::vld1q_dup_f32(&zero)) 22 | } 23 | 24 | unsafe fn loadu_ps(a: &f32) -> S { 25 | S(aarch64::vld1q_f32(a)) 26 | } 27 | 28 | unsafe fn horizontal_add_ps(a: S) -> f32 { 29 | aarch64::vaddvq_f32(a.0) 30 | } 31 | 32 | unsafe fn fmadd_ps(a: S, b: S, c: S) -> S { 33 | S(aarch64::vfmaq_f32(c.0, a.0, b.0)) 34 | } 35 | } 36 | 37 | impl ops::Add<S> for S { 38 | type Output = S; 39 | 40 | fn add(self, rhs: S) -> Self::Output { 41 | unsafe { S(aarch64::vaddq_f32(self.0, rhs.0)) } 42 | } 43 | } 44 | 45 | impl ops::Sub<S> for S { 46 | type Output = S; 47 | 48 | fn sub(self, rhs: S) -> Self::Output { 49 | unsafe { S(aarch64::vsubq_f32(self.0, rhs.0)) } 50 | } 51 | } 52 | 53 | impl ops::Mul<S> for S { 54 | type Output = S; 55 | 56 | fn mul(self, rhs: S) -> Self::Output { 57 | unsafe { S(aarch64::vmulq_f32(self.0, rhs.0)) } 58 | } 59 | } 60 | 61 | pub unsafe fn distance_l2_aarch64_neon(x: &[f32], y: &[f32]) -> f32 { 62 | super::distance_l2_simd_body!(x, y) 63 | } 64 | 65 | pub unsafe fn distance_cosine_aarch64_neon(x: &[f32], y: &[f32]) -> f32 { 66 | (1.0 - super::inner_product_simd_body!(x, y)).max(0.0) 67 | } 68 | 69 | pub unsafe fn inner_product_aarch64_neon(x: &[f32], y: &[f32]) -> f32 { 70 | super::inner_product_simd_body!(x, y) 71 | } 72 | 73 | #[cfg(test)] 74 | mod tests { 75 | #[test] 76 | fn distances_equal() { 77 | let r: Vec<f32> = (0..2000).map(|v| v as f32 + 1.0).collect(); 78 | let l: Vec<f32> = (0..2000).map(|v| v as f32 + 2.0).collect(); 79 | 80 | let r_size = r.iter().map(|v| v * v).sum::<f32>().sqrt(); 81 | let l_size = l.iter().map(|v| v * v).sum::<f32>().sqrt(); 82 | 83 | let r: Vec<f32> = r.iter().map(|v| v / r_size).collect(); 84 | let l: Vec<f32> = l.iter().map(|v| v / l_size).collect(); 85 | 86 | assert!( 87 | (unsafe { super::distance_cosine_aarch64_neon(&r, &l) } 88 | - super::super::distance_cosine_unoptimized(&r, &l)) 89 | .abs() 90 | < 0.000001 91 | ); 92 | assert!( 93 | (unsafe { super::distance_l2_aarch64_neon(&r, &l) } 94 | - super::super::distance_l2_unoptimized(&r, &l)) 95 | .abs() 96 | < 0.000001 97 | ); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/distance/distance_x86.rs: -------------------------------------------------------------------------------- 1 | //! Calculate the distance by vector arithmetic optimized for x86 2 | 3 | use simdeez::avx2::*; 4 | use simdeez::scalar::*; 5 | use simdeez::sse2::*; 6 | use simdeez::sse41::*; 7 | 8 | #[cfg(not(target_feature = "avx2"))] 9 | #[cfg(not(doc))] 10 | compile_error!( 11 | "On x86, the AVX2 feature must be enabled. Set RUSTFLAGS=\"-C target-feature=+avx2,+fma\"" 12 | ); 13 | 14 | //note: without fmadd, the performance degrades pretty badly. Benchmark before disbaling 15 | #[cfg(not(target_feature = "fma"))] 16 | #[cfg(not(doc))] 17 | compile_error!( 18 | "On x86, the fma feature must be enabled. Set RUSTFLAGS=\"-C target-feature=+avx2,+fma\"" 19 | ); 20 | 21 | simdeez::simd_runtime_generate!( 22 | pub fn distance_l2_x86(x: &[f32], y: &[f32]) -> f32 { 23 | super::distance_l2_simd_body!(x, y) 24 | } 25 | ); 26 | 27 | simdeez::simd_runtime_generate!( 28 | pub fn inner_product_x86(x: &[f32], y: &[f32]) -> f32 { 29 | super::inner_product_simd_body!(x, y) 30 | } 31 | ); 32 | 33 | /// Calculate the cosine distance between two normal vectors 34 | pub unsafe fn distance_cosine_x86_avx2(x: &[f32], y: &[f32]) -> f32 { 35 | (1.0 - inner_product_x86_avx2(x, y)).max(0.0) 36 | } 37 | 38 | #[cfg(test)] 39 | mod tests { 40 | #[test] 41 | fn distances_equal() { 42 | let r: Vec<f32> = (0..2000).map(|v| v as f32 + 1.0).collect(); 43 | let l: Vec<f32> = (0..2000).map(|v| v as f32 + 2.0).collect(); 44 | 45 | let r_size = r.iter().map(|v| v * v).sum::<f32>().sqrt(); 46 | let l_size = l.iter().map(|v| v * v).sum::<f32>().sqrt(); 47 | 48 | let r: Vec<f32> = r.iter().map(|v| v / r_size).collect(); 49 | let l: Vec<f32> = l.iter().map(|v| v / l_size).collect(); 50 | 51 | assert!( 52 | (unsafe { super::distance_cosine_x86_avx2(&r, &l) } 53 | - super::super::distance_cosine_unoptimized(&r, &l)) 54 | .abs() 55 | < 0.000001 56 | ); 57 | assert!( 58 | (unsafe { super::distance_l2_x86_avx2(&r, &l) } 59 | - super::super::distance_l2_unoptimized(&r, &l)) 60 | .abs() 61 | < 0.000001 62 | ); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/graph/neighbor_store.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | use crate::util::{IndexPointer, ItemPointer}; 4 | 5 | use crate::access_method::graph::neighbor_with_distance::*; 6 | use crate::access_method::labels::LabelSet; 7 | use crate::access_method::meta_page::MetaPage; 8 | use crate::access_method::stats::{StatsDistanceComparison, StatsNodeModify, StatsNodeRead}; 9 | use crate::access_method::storage::Storage; 10 | 11 | /// A builderGraph is a graph that keep the neighbors in-memory in the neighbor_map below 12 | /// The idea is that during the index build, you don't want to update the actual Postgres 13 | /// pages every time you change the neighbors. Instead you change the neighbors in memory 14 | /// until the build is done. Afterwards, calling the `write` method, will write out all 15 | /// the neighbors to the right pages. 16 | #[derive(Default)] 17 | pub struct BuilderNeighborCache { 18 | //maps node's pointer to the representation on disk 19 | //use a btree to provide ordering on the item pointers in iter(). 20 | //this ensures the write in finalize_node_at_end_of_build() is ordered, not random. 21 | neighbor_map: BTreeMap<ItemPointer, (Option<LabelSet>, Vec<NeighborWithDistance>)>, 22 | } 23 | 24 | impl BuilderNeighborCache { 25 | pub fn iter( 26 | &self, 27 | ) -> impl Iterator< 28 | Item = ( 29 | &ItemPointer, 30 | (Option<&LabelSet>, &Vec<NeighborWithDistance>), 31 | ), 32 | > { 33 | self.neighbor_map 34 | .iter() 35 | .map(|(k, (v1, v2))| (k, (v1.as_ref(), v2))) 36 | } 37 | 38 | pub fn get_neighbors(&self, neighbors_of: ItemPointer) -> Vec<IndexPointer> { 39 | let neighbors = self.neighbor_map.get(&neighbors_of); 40 | match neighbors { 41 | Some((_, n)) => n 42 | .iter() 43 | .map(|n| n.get_index_pointer_to_neighbor()) 44 | .collect(), 45 | None => vec![], 46 | } 47 | } 48 | 49 | pub fn get_neighbors_with_full_vector_distances( 50 | &self, 51 | neighbors_of: ItemPointer, 52 | result: &mut Vec<NeighborWithDistance>, 53 | ) { 54 | let neighbors = self.neighbor_map.get(&neighbors_of); 55 | if let Some((_, n)) = neighbors { 56 | for nwd in n { 57 | result.push(nwd.clone()); 58 | } 59 | } 60 | } 61 | 62 | pub fn set_neighbors( 63 | &mut self, 64 | neighbors_of: ItemPointer, 65 | labels: Option<LabelSet>, 66 | new_neighbors: Vec<NeighborWithDistance>, 67 | ) { 68 | self.neighbor_map 69 | .insert(neighbors_of, (labels, new_neighbors)); 70 | } 71 | 72 | pub fn max_neighbors(&self, meta_page: &MetaPage) -> usize { 73 | meta_page.get_max_neighbors_during_build() 74 | } 75 | } 76 | 77 | pub enum GraphNeighborStore { 78 | Builder(BuilderNeighborCache), 79 | Disk, 80 | } 81 | 82 | impl GraphNeighborStore { 83 | pub fn get_neighbors_with_full_vector_distances< 84 | S: Storage, 85 | T: StatsNodeRead + StatsDistanceComparison, 86 | >( 87 | &self, 88 | neighbors_of: ItemPointer, 89 | storage: &S, 90 | result: &mut Vec<NeighborWithDistance>, 91 | stats: &mut T, 92 | ) { 93 | match self { 94 | GraphNeighborStore::Builder(b) => { 95 | b.get_neighbors_with_full_vector_distances(neighbors_of, result) 96 | } 97 | GraphNeighborStore::Disk => { 98 | storage.get_neighbors_with_distances_from_disk(neighbors_of, result, stats) 99 | } 100 | }; 101 | } 102 | 103 | pub fn set_neighbors<S: Storage, T: StatsNodeModify + StatsNodeRead>( 104 | &mut self, 105 | storage: &S, 106 | meta_page: &MetaPage, 107 | neighbors_of: ItemPointer, 108 | labels: Option<LabelSet>, 109 | new_neighbors: Vec<NeighborWithDistance>, 110 | stats: &mut T, 111 | ) { 112 | match self { 113 | GraphNeighborStore::Builder(b) => b.set_neighbors(neighbors_of, labels, new_neighbors), 114 | GraphNeighborStore::Disk => storage.set_neighbors_on_disk( 115 | meta_page, 116 | neighbors_of, 117 | new_neighbors.as_slice(), 118 | stats, 119 | ), 120 | } 121 | } 122 | 123 | pub fn max_neighbors(&self, meta_page: &MetaPage) -> usize { 124 | match self { 125 | GraphNeighborStore::Builder(b) => b.max_neighbors(meta_page), 126 | GraphNeighborStore::Disk => meta_page.get_num_neighbors() as _, 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/graph/neighbor_with_distance.rs: -------------------------------------------------------------------------------- 1 | use std::{cell::OnceCell, cmp::Ordering}; 2 | 3 | use crate::util::{IndexPointer, ItemPointer}; 4 | 5 | use crate::access_method::labels::LabelSet; 6 | 7 | //TODO is this right? 8 | pub type Distance = f32; 9 | 10 | // implements a distance with a lazy tie break 11 | #[derive(Clone, Debug)] 12 | pub struct DistanceWithTieBreak { 13 | distance: Distance, 14 | from: IndexPointer, 15 | to: IndexPointer, 16 | distance_tie_break: OnceCell<usize>, 17 | } 18 | 19 | impl DistanceWithTieBreak { 20 | pub fn new(distance: Distance, from: IndexPointer, to: IndexPointer) -> Self { 21 | assert!(!distance.is_nan()); 22 | assert!(distance >= 0.0); 23 | DistanceWithTieBreak { 24 | distance, 25 | from, 26 | to, 27 | distance_tie_break: OnceCell::new(), 28 | } 29 | } 30 | 31 | pub fn with_query(distance: Distance, to: IndexPointer) -> Self { 32 | //this is the distance from the query to a index node. 33 | //make the distance_tie_break = 0 34 | let distance_tie_break = OnceCell::new(); 35 | //explicitly set the distance_tie_break to 0 to avoid the cost of computing it 36 | distance_tie_break.set(0).unwrap(); 37 | DistanceWithTieBreak { 38 | distance, 39 | from: to, 40 | to, 41 | distance_tie_break, 42 | } 43 | } 44 | 45 | fn get_distance_tie_break(&self) -> usize { 46 | *self 47 | .distance_tie_break 48 | .get_or_init(|| self.from.ip_distance(self.to)) 49 | } 50 | 51 | pub fn get_distance(&self) -> Distance { 52 | self.distance 53 | } 54 | 55 | pub fn get_factor(&self, divisor: &Self) -> f64 { 56 | if divisor.get_distance() < 0.0 + f32::EPSILON { 57 | if self.get_distance() < 0.0 + f32::EPSILON { 58 | self.get_distance_tie_break() as f64 / divisor.get_distance_tie_break() as f64 59 | } else { 60 | f64::MAX 61 | } 62 | } else { 63 | self.get_distance() as f64 / divisor.get_distance() as f64 64 | } 65 | } 66 | } 67 | 68 | impl PartialOrd for DistanceWithTieBreak { 69 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { 70 | Some(self.cmp(other)) 71 | } 72 | } 73 | 74 | impl Ord for DistanceWithTieBreak { 75 | fn cmp(&self, other: &Self) -> Ordering { 76 | if self.distance == 0.0 && other.distance == 0.0 { 77 | return self 78 | .get_distance_tie_break() 79 | .cmp(&other.get_distance_tie_break()); 80 | } 81 | self.distance.total_cmp(&other.distance) 82 | } 83 | } 84 | 85 | impl PartialEq for DistanceWithTieBreak { 86 | fn eq(&self, other: &Self) -> bool { 87 | if self.distance == 0.0 && other.distance == 0.0 { 88 | return self.get_distance_tie_break() == other.get_distance_tie_break(); 89 | } 90 | self.distance == other.distance 91 | } 92 | } 93 | 94 | //promise that PartialEq is reflexive 95 | impl Eq for DistanceWithTieBreak {} 96 | 97 | #[derive(Clone, Debug)] 98 | pub struct NeighborWithDistance { 99 | index_pointer: IndexPointer, 100 | distance: DistanceWithTieBreak, 101 | labels: Option<LabelSet>, 102 | } 103 | 104 | impl NeighborWithDistance { 105 | pub fn new( 106 | neighbor_index_pointer: ItemPointer, 107 | distance: DistanceWithTieBreak, 108 | labels: Option<LabelSet>, 109 | ) -> Self { 110 | Self { 111 | index_pointer: neighbor_index_pointer, 112 | distance, 113 | labels, 114 | } 115 | } 116 | 117 | pub fn get_index_pointer_to_neighbor(&self) -> ItemPointer { 118 | self.index_pointer 119 | } 120 | 121 | pub fn get_distance_with_tie_break(&self) -> &DistanceWithTieBreak { 122 | &self.distance 123 | } 124 | 125 | pub fn get_labels(&self) -> Option<&LabelSet> { 126 | self.labels.as_ref() 127 | } 128 | } 129 | 130 | impl PartialOrd for NeighborWithDistance { 131 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { 132 | Some(self.cmp(other)) 133 | } 134 | } 135 | 136 | impl Ord for NeighborWithDistance { 137 | fn cmp(&self, other: &Self) -> Ordering { 138 | self.distance.cmp(&other.distance) 139 | } 140 | } 141 | 142 | impl PartialEq for NeighborWithDistance { 143 | fn eq(&self, other: &Self) -> bool { 144 | self.index_pointer == other.index_pointer 145 | } 146 | } 147 | 148 | //promise that PartialEq is reflexive 149 | impl Eq for NeighborWithDistance {} 150 | 151 | impl std::hash::Hash for NeighborWithDistance { 152 | fn hash<H: std::hash::Hasher>(&self, state: &mut H) { 153 | self.index_pointer.hash(state); 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/graph/start_nodes.rs: -------------------------------------------------------------------------------- 1 | use pgvectorscale_derive::{Readable, Writeable}; 2 | use rkyv::{Archive, Deserialize, Serialize}; 3 | 4 | use crate::access_method::labels::{Label, LabelSet}; 5 | use crate::access_method::node::{ReadableNode, WriteableNode}; 6 | use crate::util::{ItemPointer, ReadableBuffer, WritableBuffer}; 7 | use pgrx::PgRelation; 8 | use std::collections::BTreeMap; 9 | 10 | use crate::access_method::labels::LabelSetView; 11 | 12 | /// Start nodes for the graph. For unlabeled vectorsets, this is a single node. For 13 | /// labeled vectorsets, this is a map of labels to nodes. 14 | #[derive(Clone, Debug, PartialEq, Eq, Archive, Deserialize, Serialize, Readable, Writeable)] 15 | #[archive(check_bytes)] 16 | pub struct StartNodes { 17 | /// Default starting node for the graph. 18 | default_node: ItemPointer, 19 | /// Labeled starting nodes for the graph 20 | labeled_nodes: BTreeMap<Label, ItemPointer>, 21 | } 22 | 23 | impl StartNodes { 24 | pub fn new(default_node: ItemPointer) -> Self { 25 | Self { 26 | default_node, 27 | labeled_nodes: BTreeMap::new(), 28 | } 29 | } 30 | 31 | pub fn upsert(&mut self, label: Label, node: ItemPointer) -> Option<ItemPointer> { 32 | self.labeled_nodes.insert(label, node) 33 | } 34 | 35 | pub fn default_node(&self) -> ItemPointer { 36 | self.default_node 37 | } 38 | 39 | pub fn get_for_node(&self, labels: Option<&LabelSet>) -> Vec<ItemPointer> { 40 | if let Some(labels) = labels { 41 | labels 42 | .iter() 43 | .filter_map(|label| self.labeled_nodes.get(label).copied()) 44 | .collect() 45 | } else { 46 | vec![self.default_node] 47 | } 48 | } 49 | 50 | pub fn contains(&self, label: Label) -> bool { 51 | self.labeled_nodes.contains_key(&label) 52 | } 53 | 54 | pub fn contains_all(&self, labels: Option<&LabelSet>) -> bool { 55 | match labels { 56 | Some(labels) => labels 57 | .iter() 58 | .all(|label| self.labeled_nodes.contains_key(label)), 59 | None => true, 60 | } 61 | } 62 | 63 | pub fn node_for_label(&self, label: Label) -> Option<ItemPointer> { 64 | self.labeled_nodes.get(&label).copied() 65 | } 66 | 67 | pub fn node_for_labels(&self, labels: &LabelSet) -> Vec<ItemPointer> { 68 | if labels.is_empty() { 69 | vec![self.default_node] 70 | } else { 71 | labels 72 | .iter() 73 | .filter_map(|label| self.labeled_nodes.get(label).copied()) 74 | .collect() 75 | } 76 | } 77 | 78 | pub fn get_all_labeled_nodes(&self) -> Vec<(Option<Label>, ItemPointer)> { 79 | let mut nodes = vec![(None, self.default_node)]; 80 | nodes.extend( 81 | self.labeled_nodes 82 | .iter() 83 | .map(|(label, node)| (Some(*label), *node)), 84 | ); 85 | nodes 86 | } 87 | 88 | pub fn get_all_nodes(&self) -> Vec<ItemPointer> { 89 | let mut nodes = vec![self.default_node]; 90 | nodes.extend(self.labeled_nodes.values().copied()); 91 | nodes 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/guc.rs: -------------------------------------------------------------------------------- 1 | use pgrx::*; 2 | 3 | pub static TSV_QUERY_SEARCH_LIST_SIZE: GucSetting<i32> = GucSetting::<i32>::new(100); 4 | pub static TSV_RESORT_SIZE: GucSetting<i32> = GucSetting::<i32>::new(50); 5 | 6 | pub fn init() { 7 | GucRegistry::define_int_guc( 8 | "diskann.query_search_list_size", 9 | "The size of the search list used in queries", 10 | "Higher value increases recall at the cost of speed.", 11 | &TSV_QUERY_SEARCH_LIST_SIZE, 12 | 1, 13 | 10000, 14 | GucContext::Userset, 15 | GucFlags::default(), 16 | ); 17 | 18 | GucRegistry::define_int_guc( 19 | "diskann.query_rescore", 20 | "The number of elements rescored (0 to disable rescoring)", 21 | "Rescoring takes the query_rescore number of elements that have the smallest approximate distance, rescores them with the exact distance, returning the closest ones with the exact distance.", 22 | &TSV_RESORT_SIZE, 23 | 1, 24 | 1000, 25 | GucContext::Userset, 26 | GucFlags::default(), 27 | ); 28 | } 29 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/node.rs: -------------------------------------------------------------------------------- 1 | use pgrx::PgRelation; 2 | use rkyv::AlignedVec; 3 | 4 | use crate::util::{tape::Tape, ItemPointer}; 5 | 6 | use super::stats::{StatsNodeModify, StatsNodeRead, StatsNodeWrite}; 7 | 8 | pub trait ReadableNode { 9 | type Node<'a>; 10 | unsafe fn read<'a, S: StatsNodeRead>( 11 | index: &'a PgRelation, 12 | index_pointer: ItemPointer, 13 | stats: &mut S, 14 | ) -> Self::Node<'a>; 15 | } 16 | 17 | pub trait WriteableNode { 18 | type Node<'a>; 19 | 20 | unsafe fn modify<'a, S: StatsNodeModify>( 21 | index: &'a PgRelation, 22 | index_pointer: ItemPointer, 23 | stats: &mut S, 24 | ) -> Self::Node<'a>; 25 | 26 | fn write<S: StatsNodeWrite>(&self, tape: &mut Tape, stats: &mut S) -> ItemPointer; 27 | 28 | fn serialize_to_vec(&self) -> AlignedVec; 29 | } 30 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/pg_vector.rs: -------------------------------------------------------------------------------- 1 | use pgrx::*; 2 | 3 | use crate::access_method::distance::DistanceType; 4 | 5 | use super::{distance::preprocess_cosine, meta_page}; 6 | 7 | //Ported from pg_vector code 8 | #[repr(C)] 9 | #[derive(Debug)] 10 | pub struct PgVectorInternal { 11 | vl_len_: i32, /* varlena header (do not touch directly!) */ 12 | pub dim: i16, /* number of dimensions */ 13 | unused: i16, 14 | pub x: pg_sys::__IncompleteArrayField<std::os::raw::c_float>, 15 | } 16 | 17 | impl PgVectorInternal { 18 | pub fn to_slice(&self) -> &[f32] { 19 | let dim = self.dim; 20 | let raw_slice = unsafe { self.x.as_slice(dim as _) }; 21 | raw_slice 22 | } 23 | } 24 | 25 | #[derive(Debug)] 26 | pub struct PgVector { 27 | index_distance: Option<*mut PgVectorInternal>, 28 | index_distance_needs_pfree: bool, 29 | full_distance: Option<*mut PgVectorInternal>, 30 | full_distance_needs_pfree: bool, 31 | } 32 | 33 | impl Drop for PgVector { 34 | fn drop(&mut self) { 35 | if self.index_distance_needs_pfree { 36 | unsafe { 37 | if self.index_distance.is_some() { 38 | pg_sys::pfree(self.index_distance.unwrap().cast()); 39 | } 40 | } 41 | } 42 | if self.full_distance_needs_pfree { 43 | unsafe { 44 | if self.full_distance.is_some() { 45 | pg_sys::pfree(self.full_distance.unwrap().cast()); 46 | } 47 | } 48 | } 49 | } 50 | } 51 | 52 | impl PgVector { 53 | /// # Safety 54 | /// 55 | /// TODO 56 | pub unsafe fn from_pg_parts( 57 | datum_parts: *mut pg_sys::Datum, 58 | isnull_parts: *mut bool, 59 | index: usize, 60 | meta_page: &meta_page::MetaPage, 61 | index_distance: bool, 62 | full_distance: bool, 63 | ) -> Option<PgVector> { 64 | let isnulls = std::slice::from_raw_parts(isnull_parts, index + 1); 65 | if isnulls[index] { 66 | return None; 67 | } 68 | let datums = std::slice::from_raw_parts(datum_parts, index + 1); 69 | Some(Self::from_datum( 70 | datums[index], 71 | meta_page, 72 | index_distance, 73 | full_distance, 74 | )) 75 | } 76 | 77 | unsafe fn create_inner( 78 | datum: pg_sys::Datum, 79 | meta_page: &meta_page::MetaPage, 80 | is_index_distance: bool, 81 | ) -> *mut PgVectorInternal { 82 | //TODO: we are using a copy here to avoid lifetime issues and because in some cases we have to 83 | //modify the datum in preprocess_cosine. We should find a way to avoid the copy if the vector is 84 | //normalized and preprocess_cosine is a noop; 85 | let detoasted = pg_sys::pg_detoast_datum_copy(datum.cast_mut_ptr()); 86 | let is_copy = !std::ptr::eq( 87 | detoasted.cast::<PgVectorInternal>(), 88 | datum.cast_mut_ptr::<PgVectorInternal>(), 89 | ); 90 | 91 | /* if is_copy every changes, need to change needs_pfree */ 92 | assert!(is_copy, "Datum should be a copy"); 93 | let casted = detoasted.cast::<PgVectorInternal>(); 94 | 95 | if is_index_distance 96 | && meta_page.get_num_dimensions() != meta_page.get_num_dimensions_to_index() 97 | { 98 | assert!((*casted).dim > meta_page.get_num_dimensions_to_index() as _); 99 | (*casted).dim = meta_page.get_num_dimensions_to_index() as _; 100 | } 101 | 102 | let dim = (*casted).dim; 103 | let raw_slice = unsafe { (*casted).x.as_mut_slice(dim as _) }; 104 | 105 | if meta_page.get_distance_type() == DistanceType::Cosine { 106 | preprocess_cosine(raw_slice); 107 | } 108 | casted 109 | } 110 | 111 | /// # Safety 112 | /// 113 | /// TODO 114 | pub unsafe fn from_datum( 115 | datum: pg_sys::Datum, 116 | meta_page: &meta_page::MetaPage, 117 | index_distance: bool, 118 | full_distance: bool, 119 | ) -> PgVector { 120 | if meta_page.get_num_dimensions() == meta_page.get_num_dimensions_to_index() { 121 | /* optimization if the num dimensions are the same */ 122 | let inner = Self::create_inner(datum, meta_page, true); 123 | return PgVector { 124 | index_distance: Some(inner), 125 | index_distance_needs_pfree: true, 126 | full_distance: Some(inner), 127 | full_distance_needs_pfree: false, 128 | }; 129 | } 130 | 131 | let idx = if index_distance { 132 | Some(Self::create_inner(datum, meta_page, true)) 133 | } else { 134 | None 135 | }; 136 | 137 | let full = if full_distance { 138 | Some(Self::create_inner(datum, meta_page, false)) 139 | } else { 140 | None 141 | }; 142 | 143 | PgVector { 144 | index_distance: idx, 145 | index_distance_needs_pfree: true, 146 | full_distance: full, 147 | full_distance_needs_pfree: true, 148 | } 149 | } 150 | 151 | pub fn to_index_slice(&self) -> &[f32] { 152 | unsafe { (*self.index_distance.unwrap()).to_slice() } 153 | } 154 | 155 | pub fn to_full_slice(&self) -> &[f32] { 156 | unsafe { (*self.full_distance.unwrap()).to_slice() } 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/plain/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod node; 2 | pub mod storage; 3 | mod tests; 4 | 5 | use super::{ 6 | distance::DistanceFn, 7 | labels::LabeledVector, 8 | stats::{StatsDistanceComparison, StatsNodeRead}, 9 | storage::{NodeDistanceMeasure, Storage}, 10 | }; 11 | use node::{PlainNode, ReadablePlainNode}; 12 | use storage::PlainStorage; 13 | 14 | use crate::access_method::node::ReadableNode; 15 | use crate::util::IndexPointer; 16 | 17 | pub enum PlainDistanceMeasure { 18 | Full(LabeledVector), 19 | } 20 | 21 | impl PlainDistanceMeasure { 22 | pub fn calculate_distance<S: StatsDistanceComparison>( 23 | distance_fn: DistanceFn, 24 | query: &[f32], 25 | vector: &[f32], 26 | stats: &mut S, 27 | ) -> f32 { 28 | assert!(!vector.is_empty()); 29 | assert!(vector.len() == query.len()); 30 | stats.record_full_distance_comparison(); 31 | (distance_fn)(query, vector) 32 | } 33 | } 34 | 35 | /* This is only applicable to plain, so keep here not in storage_common */ 36 | pub struct IndexFullDistanceMeasure<'a> { 37 | readable_node: ReadablePlainNode<'a>, 38 | storage: &'a PlainStorage<'a>, 39 | } 40 | 41 | impl<'a> IndexFullDistanceMeasure<'a> { 42 | /// # Safety 43 | /// 44 | /// The caller must ensure that: 45 | /// 1. The index_pointer is valid and points to a properly initialized PlainNode 46 | /// 2. The storage reference remains valid for the lifetime 'a 47 | /// 3. The node at index_pointer is not modified while this IndexFullDistanceMeasure exists 48 | pub unsafe fn with_index_pointer<T: StatsNodeRead>( 49 | storage: &'a PlainStorage<'a>, 50 | index_pointer: IndexPointer, 51 | stats: &mut T, 52 | ) -> Self { 53 | let rn = unsafe { PlainNode::read(storage.index, index_pointer, stats) }; 54 | Self { 55 | readable_node: rn, 56 | storage, 57 | } 58 | } 59 | 60 | /// # Safety 61 | /// 62 | /// The caller must ensure that: 63 | /// 1. The readable_node is valid and points to a properly initialized PlainNode 64 | /// 2. The storage reference remains valid for the lifetime 'a 65 | /// 3. The node at readable_node is not modified while this IndexFullDistanceMeasure exists 66 | pub unsafe fn with_readable_node( 67 | storage: &'a PlainStorage<'a>, 68 | readable_node: ReadablePlainNode<'a>, 69 | ) -> Self { 70 | Self { 71 | readable_node, 72 | storage, 73 | } 74 | } 75 | } 76 | 77 | impl NodeDistanceMeasure for IndexFullDistanceMeasure<'_> { 78 | unsafe fn get_distance<T: StatsNodeRead + StatsDistanceComparison>( 79 | &self, 80 | index_pointer: IndexPointer, 81 | stats: &mut T, 82 | ) -> f32 { 83 | let rn1 = PlainNode::read(self.storage.index, index_pointer, stats); 84 | let rn2 = &self.readable_node; 85 | let node1 = rn1.get_archived_node(); 86 | let node2 = rn2.get_archived_node(); 87 | assert!(!node1.vector.is_empty()); 88 | assert!(node1.vector.len() == node2.vector.len()); 89 | let vec1 = node1.vector.as_slice(); 90 | let vec2 = node2.vector.as_slice(); 91 | (self.storage.get_distance_function())(vec1, vec2) 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/plain/node.rs: -------------------------------------------------------------------------------- 1 | use std::pin::Pin; 2 | 3 | use pgrx::pg_sys::{InvalidBlockNumber, InvalidOffsetNumber}; 4 | use pgrx::*; 5 | use pgvectorscale_derive::{Readable, Writeable}; 6 | use rkyv::vec::ArchivedVec; 7 | use rkyv::{Archive, Deserialize, Serialize}; 8 | 9 | use crate::access_method::graph::neighbor_with_distance::NeighborWithDistance; 10 | use crate::access_method::meta_page::MetaPage; 11 | use crate::access_method::node::{ReadableNode, WriteableNode}; 12 | use crate::access_method::storage::{ArchivedData, NodeVacuum}; 13 | use crate::util::{ArchivedItemPointer, HeapPointer, ItemPointer, ReadableBuffer, WritableBuffer}; 14 | 15 | #[derive(Archive, Deserialize, Serialize, Readable, Writeable)] 16 | #[archive(check_bytes)] 17 | pub struct PlainNode { 18 | pub vector: Vec<f32>, 19 | pub pq_vector: Vec<u8>, 20 | neighbor_index_pointers: Vec<ItemPointer>, 21 | pub heap_item_pointer: HeapPointer, 22 | } 23 | 24 | impl PlainNode { 25 | fn new_internal( 26 | vector: Vec<f32>, 27 | pq_vector: Vec<u8>, 28 | heap_item_pointer: ItemPointer, 29 | meta_page: &MetaPage, 30 | ) -> Self { 31 | let num_neighbors = meta_page.get_num_neighbors(); 32 | Self { 33 | vector, 34 | // always use vectors of num_clusters on length because we never want the serialized size of a Node to change 35 | pq_vector, 36 | // always use vectors of num_neighbors on length because we never want the serialized size of a Node to change 37 | neighbor_index_pointers: (0..num_neighbors) 38 | .map(|_| ItemPointer::new(InvalidBlockNumber, InvalidOffsetNumber)) 39 | .collect(), 40 | heap_item_pointer, 41 | } 42 | } 43 | 44 | pub fn new_for_full_vector( 45 | vector: Vec<f32>, 46 | heap_item_pointer: ItemPointer, 47 | meta_page: &MetaPage, 48 | ) -> Self { 49 | let pq_vector = Vec::with_capacity(0); 50 | Self::new_internal(vector, pq_vector, heap_item_pointer, meta_page) 51 | } 52 | } 53 | 54 | /// contains helpers for mutate-in-place. See struct_mutable_refs in test_alloc.rs in rkyv 55 | impl ArchivedPlainNode { 56 | pub fn is_deleted(&self) -> bool { 57 | self.heap_item_pointer.offset == InvalidOffsetNumber 58 | } 59 | 60 | pub fn delete(self: Pin<&mut Self>) { 61 | //TODO: actually optimize the deletes by removing index tuples. For now just mark it. 62 | let mut heap_pointer = unsafe { self.map_unchecked_mut(|s| &mut s.heap_item_pointer) }; 63 | heap_pointer.offset = InvalidOffsetNumber; 64 | heap_pointer.block_number = InvalidBlockNumber; 65 | } 66 | 67 | pub fn neighbor_index_pointer( 68 | self: Pin<&mut Self>, 69 | ) -> Pin<&mut ArchivedVec<ArchivedItemPointer>> { 70 | unsafe { self.map_unchecked_mut(|s| &mut s.neighbor_index_pointers) } 71 | } 72 | 73 | pub fn num_neighbors(&self) -> usize { 74 | self.neighbor_index_pointers 75 | .iter() 76 | .position(|f| f.block_number == InvalidBlockNumber) 77 | .unwrap_or(self.neighbor_index_pointers.len()) 78 | } 79 | 80 | pub fn iter_neighbors(&self) -> impl Iterator<Item = ItemPointer> + '_ { 81 | self.neighbor_index_pointers 82 | .iter() 83 | .take(self.num_neighbors()) 84 | .map(|ip| ip.deserialize_item_pointer()) 85 | } 86 | 87 | pub fn set_neighbors( 88 | mut self: Pin<&mut Self>, 89 | neighbors: &[NeighborWithDistance], 90 | meta_page: &MetaPage, 91 | ) { 92 | for (i, new_neighbor) in neighbors.iter().enumerate() { 93 | let mut a_index_pointer = self.as_mut().neighbor_index_pointer().index_pin(i); 94 | //TODO hate that we have to set each field like this 95 | a_index_pointer.block_number = 96 | new_neighbor.get_index_pointer_to_neighbor().block_number; 97 | a_index_pointer.offset = new_neighbor.get_index_pointer_to_neighbor().offset; 98 | } 99 | //set the marker that the list ended 100 | if neighbors.len() < meta_page.get_num_neighbors() as _ { 101 | let mut past_last_index_pointers = 102 | self.neighbor_index_pointer().index_pin(neighbors.len()); 103 | past_last_index_pointers.block_number = InvalidBlockNumber; 104 | past_last_index_pointers.offset = InvalidOffsetNumber; 105 | } 106 | } 107 | } 108 | 109 | impl ArchivedData for ArchivedPlainNode { 110 | fn get_index_pointer_to_neighbors(&self) -> Vec<ItemPointer> { 111 | self.iter_neighbors().collect() 112 | } 113 | 114 | fn is_deleted(&self) -> bool { 115 | self.heap_item_pointer.offset == InvalidOffsetNumber 116 | } 117 | 118 | fn get_heap_item_pointer(&self) -> HeapPointer { 119 | self.heap_item_pointer.deserialize_item_pointer() 120 | } 121 | } 122 | 123 | impl NodeVacuum for ArchivedPlainNode { 124 | fn with_data(data: &mut [u8]) -> Pin<&mut Self> { 125 | ArchivedPlainNode::with_data(data) 126 | } 127 | 128 | fn delete(self: Pin<&mut Self>) { 129 | //TODO: actually optimize the deletes by removing index tuples. For now just mark it. 130 | let mut heap_pointer = unsafe { self.map_unchecked_mut(|s| &mut s.heap_item_pointer) }; 131 | heap_pointer.offset = InvalidOffsetNumber; 132 | heap_pointer.block_number = InvalidBlockNumber; 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/plain/tests.rs: -------------------------------------------------------------------------------- 1 | #[cfg(any(test, feature = "pg_test"))] 2 | #[pgrx::pg_schema] 3 | mod tests { 4 | 5 | use pgrx::*; 6 | 7 | use crate::access_method::distance::DistanceType; 8 | 9 | #[pg_test] 10 | unsafe fn test_plain_storage_index_creation_many_neighbors() -> spi::Result<()> { 11 | crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( 12 | DistanceType::Cosine, 13 | "num_neighbors=38, storage_layout = plain", 14 | "plain_many_neighbors", 15 | 1536, 16 | )?; 17 | Ok(()) 18 | } 19 | 20 | #[pg_test] 21 | unsafe fn test_plain_storage_index_creation_few_neighbors() -> spi::Result<()> { 22 | //a test with few neighbors tests the case that nodes share a page, which has caused deadlocks in the past. 23 | crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( 24 | DistanceType::Cosine, 25 | "num_neighbors=10, storage_layout = plain", 26 | "plain_few_neighbors", 27 | 1536, 28 | )?; 29 | Ok(()) 30 | } 31 | 32 | #[test] 33 | fn test_plain_storage_delete_vacuum_plain() { 34 | crate::access_method::vacuum::tests::test_delete_vacuum_plain_scaffold( 35 | "num_neighbors = 38, storage_layout = plain", 36 | ); 37 | } 38 | 39 | #[test] 40 | fn test_plain_storage_delete_vacuum_full() { 41 | crate::access_method::vacuum::tests::test_delete_vacuum_full_scaffold( 42 | "num_neighbors = 38, storage_layout = plain", 43 | ); 44 | } 45 | 46 | #[test] 47 | fn test_plain_storage_update_with_null() { 48 | crate::access_method::vacuum::tests::test_update_with_null_scaffold( 49 | "num_neighbors = 38, storage_layout = plain", 50 | ); 51 | } 52 | 53 | #[pg_test] 54 | unsafe fn test_plain_storage_empty_table_insert() -> spi::Result<()> { 55 | crate::access_method::build::tests::test_empty_table_insert_scaffold( 56 | "num_neighbors=38, storage_layout = plain", 57 | ) 58 | } 59 | 60 | #[pg_test] 61 | unsafe fn test_plain_storage_insert_empty_insert() -> spi::Result<()> { 62 | crate::access_method::build::tests::test_insert_empty_insert_scaffold( 63 | "num_neighbors=38, storage_layout = plain", 64 | ) 65 | } 66 | 67 | #[pg_test] 68 | unsafe fn test_plain_storage_num_dimensions_cosine() -> spi::Result<()> { 69 | crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( 70 | DistanceType::Cosine, 71 | "num_neighbors=38, storage_layout = plain, num_dimensions=768", 72 | "plain_num_dimensions", 73 | 3072, 74 | )?; 75 | Ok(()) 76 | } 77 | 78 | #[pg_test] 79 | unsafe fn test_plain_storage_num_dimensions_l2() -> spi::Result<()> { 80 | crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( 81 | DistanceType::L2, 82 | "num_neighbors=38, storage_layout = plain, num_dimensions=768", 83 | "plain_num_dimensions", 84 | 3072, 85 | )?; 86 | Ok(()) 87 | } 88 | 89 | #[pg_test] 90 | #[should_panic] 91 | unsafe fn test_plain_storage_num_dimensions_ip() -> spi::Result<()> { 92 | // Should panic because combination of inner product and plain storage 93 | // is not supported. 94 | crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( 95 | DistanceType::InnerProduct, 96 | "num_neighbors=38, storage_layout = plain, num_dimensions=768", 97 | "plain_num_dimensions", 98 | 3072, 99 | )?; 100 | Ok(()) 101 | } 102 | 103 | #[pg_test] 104 | unsafe fn test_plain_storage_index_updates_cosine() -> spi::Result<()> { 105 | crate::access_method::build::tests::test_index_updates( 106 | DistanceType::Cosine, 107 | "storage_layout = plain, num_neighbors=30", 108 | 50, 109 | "plain", 110 | )?; 111 | Ok(()) 112 | } 113 | 114 | #[pg_test] 115 | unsafe fn test_plain_storage_index_updates_l2() -> spi::Result<()> { 116 | crate::access_method::build::tests::test_index_updates( 117 | DistanceType::L2, 118 | "storage_layout = plain, num_neighbors=30", 119 | 50, 120 | "plain", 121 | )?; 122 | Ok(()) 123 | } 124 | 125 | #[pg_test] 126 | #[should_panic] 127 | unsafe fn test_plain_storage_index_updates_ip() -> spi::Result<()> { 128 | // Should panic because combination of inner product and plain storage 129 | // is not supported. 130 | crate::access_method::build::tests::test_index_updates( 131 | DistanceType::InnerProduct, 132 | "storage_layout = plain, num_neighbors=30", 133 | 50, 134 | "plain", 135 | )?; 136 | Ok(()) 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/sbq/cache.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use crate::{ 4 | access_method::stats::StatsNodeRead, 5 | util::{IndexPointer, ItemPointer}, 6 | }; 7 | 8 | use super::{SbqSpeedupStorage, SbqVectorElement}; 9 | 10 | pub struct QuantizedVectorCache { 11 | quantized_vector_map: HashMap<ItemPointer, Vec<SbqVectorElement>>, 12 | } 13 | 14 | /* should be a LRU cache for quantized vector. For now cheat and never evict 15 | TODO: implement LRU cache 16 | */ 17 | impl QuantizedVectorCache { 18 | pub fn new(capacity: usize) -> Self { 19 | Self { 20 | quantized_vector_map: HashMap::with_capacity(capacity), 21 | } 22 | } 23 | 24 | pub fn get<S: StatsNodeRead>( 25 | &mut self, 26 | index_pointer: IndexPointer, 27 | storage: &SbqSpeedupStorage, 28 | stats: &mut S, 29 | ) -> &[SbqVectorElement] { 30 | let vec = self 31 | .quantized_vector_map 32 | .entry(index_pointer) 33 | .or_insert_with(|| { 34 | storage.get_quantized_vector_from_index_pointer(index_pointer, stats) 35 | }); 36 | vec.as_slice() 37 | } 38 | 39 | /* Ensure that all these elements are in the cache. If the capacity isn't big enough throw an error. 40 | must_get must succeed on all the elements after this call prior to another get or preload call */ 41 | 42 | pub fn preload<I: Iterator<Item = IndexPointer>, S: StatsNodeRead>( 43 | &mut self, 44 | index_pointers: I, 45 | storage: &SbqSpeedupStorage, 46 | stats: &mut S, 47 | ) { 48 | for index_pointer in index_pointers { 49 | self.get(index_pointer, storage, stats); 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/sbq/mod.rs: -------------------------------------------------------------------------------- 1 | mod cache; 2 | pub mod node; 3 | pub mod quantize; 4 | pub mod storage; 5 | mod tests; 6 | 7 | use super::{ 8 | distance::distance_xor_optimized, 9 | graph::neighbor_store::GraphNeighborStore, 10 | labels::LabeledVector, 11 | stats::{StatsDistanceComparison, StatsNodeRead, StatsNodeWrite}, 12 | storage::NodeDistanceMeasure, 13 | }; 14 | 15 | use quantize::SbqQuantizer; 16 | 17 | use pgrx::PgRelation; 18 | use rkyv::{Archive, Deserialize, Serialize}; 19 | use storage::SbqSpeedupStorage; 20 | 21 | use super::meta_page::MetaPage; 22 | use crate::access_method::node::{ReadableNode, WriteableNode}; 23 | use crate::util::{ 24 | chain::{ChainItemReader, ChainTapeWriter}, 25 | page::{PageType, ReadablePage}, 26 | tape::Tape, 27 | IndexPointer, ItemPointer, ReadableBuffer, WritableBuffer, 28 | }; 29 | use pgvectorscale_derive::{Readable, Writeable}; 30 | 31 | pub type SbqVectorElement = u64; 32 | const BITS_STORE_TYPE_SIZE: usize = 64; 33 | 34 | #[derive(Archive, Deserialize, Serialize, Readable, Writeable)] 35 | #[archive(check_bytes)] 36 | #[repr(C)] 37 | pub struct SbqMeansV1 { 38 | count: u64, 39 | means: Vec<f32>, 40 | m2: Vec<f32>, 41 | } 42 | 43 | impl SbqMeansV1 { 44 | pub unsafe fn load<S: StatsNodeRead>( 45 | index: &PgRelation, 46 | mut quantizer: SbqQuantizer, 47 | qip: ItemPointer, 48 | stats: &mut S, 49 | ) -> SbqQuantizer { 50 | assert!(quantizer.use_mean); 51 | let bq = SbqMeansV1::read(index, qip, stats); 52 | let archived = bq.get_archived_node(); 53 | 54 | quantizer.load( 55 | archived.count, 56 | archived.means.to_vec(), 57 | archived.m2.to_vec(), 58 | ); 59 | quantizer 60 | } 61 | 62 | pub unsafe fn store<S: StatsNodeWrite>( 63 | index: &PgRelation, 64 | quantizer: &SbqQuantizer, 65 | stats: &mut S, 66 | ) -> ItemPointer { 67 | let mut tape = Tape::new(index, PageType::SbqMeans); 68 | let node = SbqMeansV1 { 69 | count: quantizer.count, 70 | means: quantizer.mean.to_vec(), 71 | m2: quantizer.m2.to_vec(), 72 | }; 73 | let ptr = node.write(&mut tape, stats); 74 | tape.close(); 75 | ptr 76 | } 77 | } 78 | 79 | #[derive(Archive, Deserialize, Serialize)] 80 | #[archive(check_bytes)] 81 | #[repr(C)] 82 | pub struct SbqMeans { 83 | count: u64, 84 | means: Vec<f32>, 85 | m2: Vec<f32>, 86 | } 87 | 88 | impl SbqMeans { 89 | pub unsafe fn load<S: StatsNodeRead>( 90 | index: &PgRelation, 91 | meta_page: &MetaPage, 92 | stats: &mut S, 93 | ) -> SbqQuantizer { 94 | let mut quantizer = SbqQuantizer::new(meta_page); 95 | if !quantizer.use_mean { 96 | return quantizer; 97 | } 98 | let qip = meta_page 99 | .get_quantizer_metadata_pointer() 100 | .unwrap_or_else(|| pgrx::error!("No SBQ pointer found in meta page")); 101 | 102 | let page = ReadablePage::read(index, qip.block_number); 103 | let page_type = page.get_type(); 104 | match page_type { 105 | PageType::SbqMeansV1 => SbqMeansV1::load(index, quantizer, qip, stats), 106 | PageType::SbqMeans => { 107 | let mut tape_reader = ChainItemReader::new(index, PageType::SbqMeans, stats); 108 | let mut buf: Vec<u8> = Vec::new(); 109 | for item in tape_reader.read(qip) { 110 | buf.extend_from_slice(item.get_data_slice()); 111 | } 112 | 113 | let means = rkyv::from_bytes::<SbqMeans>(buf.as_slice()).unwrap(); 114 | quantizer.load(means.count, means.means, means.m2); 115 | quantizer 116 | } 117 | _ => { 118 | pgrx::error!("Invalid page type {} for SbqMeans", page_type as u8); 119 | } 120 | } 121 | } 122 | 123 | pub unsafe fn store<S: StatsNodeWrite>( 124 | index: &PgRelation, 125 | quantizer: &SbqQuantizer, 126 | stats: &mut S, 127 | ) -> ItemPointer { 128 | let bq = SbqMeans { 129 | count: quantizer.count, 130 | means: quantizer.mean.clone(), 131 | m2: quantizer.m2.clone(), 132 | }; 133 | let mut tape = ChainTapeWriter::new(index, PageType::SbqMeans, stats); 134 | let buf = rkyv::to_bytes::<_, 1024>(&bq).unwrap(); 135 | tape.write(&buf) 136 | } 137 | } 138 | 139 | pub struct SbqSearchDistanceMeasure { 140 | quantized_vector: Vec<SbqVectorElement>, 141 | query: LabeledVector, 142 | } 143 | 144 | impl SbqSearchDistanceMeasure { 145 | pub fn new(quantizer: &SbqQuantizer, query: LabeledVector) -> SbqSearchDistanceMeasure { 146 | SbqSearchDistanceMeasure { 147 | quantized_vector: quantizer.quantize(query.vec().to_index_slice()), 148 | query, 149 | } 150 | } 151 | 152 | pub fn calculate_bq_distance<S: StatsDistanceComparison>( 153 | &self, 154 | bq_vector: &[SbqVectorElement], 155 | gns: &GraphNeighborStore, 156 | stats: &mut S, 157 | ) -> f32 { 158 | assert!(!bq_vector.is_empty()); 159 | stats.record_quantized_distance_comparison(); 160 | let (a, b) = match gns { 161 | GraphNeighborStore::Disk => { 162 | debug_assert!( 163 | self.quantized_vector.len() == bq_vector.len(), 164 | "self.quantized_vector.len()={} bq_vector.len()={}", 165 | self.quantized_vector.len(), 166 | bq_vector.len() 167 | ); 168 | (self.quantized_vector.as_slice(), bq_vector) 169 | } 170 | GraphNeighborStore::Builder(_b) => { 171 | debug_assert!( 172 | self.quantized_vector.len() == bq_vector.len(), 173 | "self.quantized_vector.len()={} bq_vector.len()={}", 174 | self.quantized_vector.len(), 175 | bq_vector.len() 176 | ); 177 | (self.quantized_vector.as_slice(), bq_vector) 178 | } 179 | }; 180 | 181 | let count_ones = distance_xor_optimized(a, b); 182 | //dot product is LOWER the more xors that lead to 1 becaues that means a negative times a positive = negative component 183 | //but the distance is 1 - dot product, so the more count_ones the higher the distance. 184 | // one other check for distance(a,a), xor=0, count_ones=0, distance=0 185 | count_ones as f32 186 | } 187 | } 188 | 189 | pub struct SbqNodeDistanceMeasure<'a> { 190 | vec: Vec<SbqVectorElement>, 191 | storage: &'a SbqSpeedupStorage<'a>, 192 | } 193 | 194 | impl<'a> SbqNodeDistanceMeasure<'a> { 195 | pub unsafe fn with_index_pointer<T: StatsNodeRead>( 196 | storage: &'a SbqSpeedupStorage<'a>, 197 | index_pointer: IndexPointer, 198 | stats: &mut T, 199 | ) -> Self { 200 | let cache = &mut storage.qv_cache.borrow_mut(); 201 | let vec = cache.get(index_pointer, storage, stats); 202 | Self { 203 | vec: vec.to_vec(), 204 | storage, 205 | } 206 | } 207 | } 208 | 209 | impl NodeDistanceMeasure for SbqNodeDistanceMeasure<'_> { 210 | unsafe fn get_distance<T: StatsNodeRead + StatsDistanceComparison>( 211 | &self, 212 | index_pointer: IndexPointer, 213 | stats: &mut T, 214 | ) -> f32 { 215 | let cache = &mut self.storage.qv_cache.borrow_mut(); 216 | let vec1 = cache.get(index_pointer, self.storage, stats); 217 | distance_xor_optimized(vec1, self.vec.as_slice()) as f32 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/sbq/quantize.rs: -------------------------------------------------------------------------------- 1 | use crate::access_method::meta_page::MetaPage; 2 | 3 | use super::{SbqVectorElement, BITS_STORE_TYPE_SIZE}; 4 | 5 | #[derive(Clone)] 6 | pub struct SbqQuantizer { 7 | pub use_mean: bool, 8 | training: bool, 9 | pub count: u64, 10 | pub mean: Vec<f32>, 11 | pub m2: Vec<f32>, 12 | pub num_bits_per_dimension: u8, 13 | } 14 | 15 | impl SbqQuantizer { 16 | pub fn new(meta_page: &MetaPage) -> SbqQuantizer { 17 | Self { 18 | use_mean: true, 19 | training: false, 20 | count: 0, 21 | mean: vec![], 22 | m2: vec![], 23 | num_bits_per_dimension: meta_page.get_bq_num_bits_per_dimension(), 24 | } 25 | } 26 | 27 | pub fn load(&mut self, count: u64, mean: Vec<f32>, m2: Vec<f32>) { 28 | self.count = count; 29 | self.mean = mean; 30 | self.m2 = m2 31 | } 32 | 33 | pub fn quantized_size(&self, full_vector_size: usize) -> usize { 34 | Self::quantized_size_internal(full_vector_size, self.num_bits_per_dimension) 35 | } 36 | 37 | pub fn quantized_size_internal(full_vector_size: usize, num_bits_per_dimension: u8) -> usize { 38 | let num_bits = full_vector_size * num_bits_per_dimension as usize; 39 | 40 | if num_bits % BITS_STORE_TYPE_SIZE == 0 { 41 | num_bits / BITS_STORE_TYPE_SIZE 42 | } else { 43 | (num_bits / BITS_STORE_TYPE_SIZE) + 1 44 | } 45 | } 46 | 47 | pub fn quantized_size_bytes(num_dimensions: usize, num_bits_per_dimension: u8) -> usize { 48 | Self::quantized_size_internal(num_dimensions, num_bits_per_dimension) 49 | * std::mem::size_of::<SbqVectorElement>() 50 | } 51 | 52 | pub fn quantize(&self, full_vector: &[f32]) -> Vec<SbqVectorElement> { 53 | assert!(!self.training); 54 | if self.use_mean { 55 | let mut res_vector = vec![0; self.quantized_size(full_vector.len())]; 56 | 57 | if self.num_bits_per_dimension == 1 { 58 | for (i, &v) in full_vector.iter().enumerate() { 59 | if v > self.mean[i] { 60 | res_vector[i / BITS_STORE_TYPE_SIZE] |= 1 << (i % BITS_STORE_TYPE_SIZE); 61 | } 62 | } 63 | } else { 64 | for (i, &v) in full_vector.iter().enumerate() { 65 | let mean = self.mean[i]; 66 | let variance = self.m2[i] / self.count as f32; 67 | let std_dev = variance.sqrt(); 68 | let ranges = self.num_bits_per_dimension + 1; 69 | 70 | let v_z_score = (v - mean) / std_dev; 71 | let index = (v_z_score + 2.0) / (4.0 / ranges as f32); //we consider z scores between -2 and 2 and divide them into {ranges} ranges 72 | 73 | let bit_position = i * self.num_bits_per_dimension as usize; 74 | if index < 1.0 { 75 | //all zeros 76 | } else { 77 | let count_ones = 78 | (index.floor() as usize).min(self.num_bits_per_dimension as usize); 79 | //fill in count_ones bits from the left 80 | // ex count_ones=1: 100 81 | // ex count_ones=2: 110 82 | // ex count_ones=3: 111 83 | for j in 0..count_ones { 84 | res_vector[(bit_position + j) / BITS_STORE_TYPE_SIZE] |= 85 | 1 << ((bit_position + j) % BITS_STORE_TYPE_SIZE); 86 | } 87 | } 88 | } 89 | } 90 | res_vector 91 | } else { 92 | let mut res_vector = vec![0; self.quantized_size(full_vector.len())]; 93 | 94 | for (i, &v) in full_vector.iter().enumerate() { 95 | if v > 0.0 { 96 | res_vector[i / BITS_STORE_TYPE_SIZE] |= 1 << (i % BITS_STORE_TYPE_SIZE); 97 | } 98 | } 99 | 100 | res_vector 101 | } 102 | } 103 | 104 | pub fn start_training(&mut self, meta_page: &MetaPage) { 105 | self.training = true; 106 | if self.use_mean { 107 | self.count = 0; 108 | self.mean = vec![0.0; meta_page.get_num_dimensions_to_index() as _]; 109 | if self.num_bits_per_dimension > 1 { 110 | self.m2 = vec![0.0; meta_page.get_num_dimensions_to_index() as _]; 111 | } 112 | } 113 | } 114 | 115 | pub fn add_sample(&mut self, sample: &[f32]) { 116 | if self.use_mean { 117 | self.count += 1; 118 | assert!(self.mean.len() == sample.len()); 119 | 120 | if self.num_bits_per_dimension > 1 { 121 | assert!(self.m2.len() == sample.len()); 122 | let delta: Vec<_> = self 123 | .mean 124 | .iter() 125 | .zip(sample.iter()) 126 | .map(|(m, s)| s - *m) 127 | .collect(); 128 | 129 | self.mean 130 | .iter_mut() 131 | .zip(sample.iter()) 132 | .for_each(|(m, s)| *m += (s - *m) / self.count as f32); 133 | 134 | let delta2 = self.mean.iter().zip(sample.iter()).map(|(m, s)| s - *m); 135 | 136 | self.m2 137 | .iter_mut() 138 | .zip(delta.iter()) 139 | .zip(delta2) 140 | .for_each(|((m2, d), d2)| *m2 += d * d2); 141 | } else { 142 | self.mean 143 | .iter_mut() 144 | .zip(sample.iter()) 145 | .for_each(|(m, s)| *m += (s - *m) / self.count as f32); 146 | } 147 | } 148 | } 149 | 150 | pub fn finish_training(&mut self) { 151 | self.training = false; 152 | } 153 | 154 | pub fn vector_for_new_node( 155 | &self, 156 | _meta_page: &MetaPage, 157 | full_vector: &[f32], 158 | ) -> Vec<SbqVectorElement> { 159 | self.quantize(full_vector) 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/sbq/tests.rs: -------------------------------------------------------------------------------- 1 | #[cfg(any(test, feature = "pg_test"))] 2 | #[pgrx::pg_schema] 3 | mod tests { 4 | use pgrx::*; 5 | 6 | use crate::access_method::distance::DistanceType; 7 | 8 | #[pg_test] 9 | unsafe fn test_bq_compressed_index_creation_default_neighbors() -> spi::Result<()> { 10 | crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( 11 | DistanceType::Cosine, 12 | "storage_layout = memory_optimized", 13 | "bq_compressed_default_neighbors", 14 | 1536, 15 | )?; 16 | Ok(()) 17 | } 18 | 19 | #[pg_test] 20 | unsafe fn test_bq_compressed_storage_index_creation_few_neighbors() -> spi::Result<()> { 21 | //a test with few neighbors tests the case that nodes share a page, which has caused deadlocks in the past. 22 | crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( 23 | DistanceType::Cosine, 24 | "num_neighbors=10, storage_layout = memory_optimized", 25 | "bq_compressed_few_neighbors", 26 | 1536, 27 | )?; 28 | Ok(()) 29 | } 30 | 31 | #[test] 32 | fn test_bq_compressed_storage_delete_vacuum_plain() { 33 | crate::access_method::vacuum::tests::test_delete_vacuum_plain_scaffold( 34 | "num_neighbors = 10, storage_layout = memory_optimized", 35 | ); 36 | } 37 | 38 | #[test] 39 | fn test_bq_compressed_storage_delete_vacuum_full() { 40 | crate::access_method::vacuum::tests::test_delete_vacuum_full_scaffold( 41 | "num_neighbors = 38, storage_layout = memory_optimized", 42 | ); 43 | } 44 | 45 | #[test] 46 | fn test_bq_compressed_storage_update_with_null() { 47 | crate::access_method::vacuum::tests::test_update_with_null_scaffold( 48 | "num_neighbors = 38, storage_layout = memory_optimized", 49 | ); 50 | } 51 | #[pg_test] 52 | unsafe fn test_bq_compressed_storage_empty_table_insert() -> spi::Result<()> { 53 | crate::access_method::build::tests::test_empty_table_insert_scaffold( 54 | "num_neighbors=38, storage_layout = memory_optimized", 55 | ) 56 | } 57 | 58 | #[pg_test] 59 | unsafe fn test_bq_compressed_storage_insert_empty_insert() -> spi::Result<()> { 60 | crate::access_method::build::tests::test_insert_empty_insert_scaffold( 61 | "num_neighbors=38, storage_layout = memory_optimized", 62 | ) 63 | } 64 | 65 | #[pg_test] 66 | unsafe fn test_bq_compressed_storage_index_creation_num_dimensions() -> spi::Result<()> { 67 | crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( 68 | DistanceType::Cosine, 69 | "storage_layout = memory_optimized, num_dimensions=768", 70 | "bq_compressed_num_dimensions", 71 | 3072, 72 | )?; 73 | Ok(()) 74 | } 75 | 76 | #[pg_test] 77 | unsafe fn test_bq_compressed_storage_index_updates_cosine() -> spi::Result<()> { 78 | crate::access_method::build::tests::test_index_updates( 79 | DistanceType::Cosine, 80 | "storage_layout = memory_optimized, num_neighbors=10", 81 | 300, 82 | "bq_compressed", 83 | )?; 84 | Ok(()) 85 | } 86 | 87 | #[pg_test] 88 | unsafe fn test_bq_compressed_storage_index_updates_l2() -> spi::Result<()> { 89 | crate::access_method::build::tests::test_index_updates( 90 | DistanceType::L2, 91 | "storage_layout = memory_optimized, num_neighbors=10", 92 | 300, 93 | "bq_compressed", 94 | )?; 95 | Ok(()) 96 | } 97 | 98 | #[pg_test] 99 | unsafe fn test_bq_compressed_storage_index_updates_ip() -> spi::Result<()> { 100 | crate::access_method::build::tests::test_index_updates( 101 | DistanceType::InnerProduct, 102 | "storage_layout = memory_optimized, num_neighbors=10", 103 | 300, 104 | "bq_compressed", 105 | )?; 106 | Ok(()) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/stats.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | 3 | pub trait StatsNodeRead { 4 | fn record_read(&mut self); 5 | } 6 | 7 | pub trait StatsHeapNodeRead { 8 | fn record_heap_read(&mut self); 9 | } 10 | 11 | pub trait StatsNodeModify { 12 | fn record_modify(&mut self); 13 | } 14 | 15 | pub trait StatsNodeWrite { 16 | fn record_write(&mut self); 17 | } 18 | 19 | pub trait StatsDistanceComparison { 20 | fn record_full_distance_comparison(&mut self); 21 | fn record_quantized_distance_comparison(&mut self); 22 | } 23 | 24 | pub trait StatsNodeVisit { 25 | fn record_visit(&mut self); 26 | fn record_candidate(&mut self); 27 | } 28 | 29 | #[derive(Debug)] 30 | pub struct PruneNeighborStats { 31 | pub calls: usize, 32 | pub distance_comparisons: usize, 33 | pub node_reads: usize, 34 | pub node_modify: usize, 35 | pub num_neighbors_before_prune: usize, 36 | pub num_neighbors_after_prune: usize, 37 | } 38 | 39 | impl Default for PruneNeighborStats { 40 | fn default() -> Self { 41 | Self::new() 42 | } 43 | } 44 | 45 | impl PruneNeighborStats { 46 | pub fn new() -> Self { 47 | PruneNeighborStats { 48 | calls: 0, 49 | distance_comparisons: 0, 50 | node_reads: 0, 51 | node_modify: 0, 52 | num_neighbors_before_prune: 0, 53 | num_neighbors_after_prune: 0, 54 | } 55 | } 56 | } 57 | 58 | impl StatsDistanceComparison for PruneNeighborStats { 59 | fn record_full_distance_comparison(&mut self) { 60 | self.distance_comparisons += 1; 61 | } 62 | 63 | fn record_quantized_distance_comparison(&mut self) { 64 | self.distance_comparisons += 1; 65 | } 66 | } 67 | 68 | impl StatsNodeRead for PruneNeighborStats { 69 | fn record_read(&mut self) { 70 | self.node_reads += 1; 71 | } 72 | } 73 | 74 | impl StatsNodeModify for PruneNeighborStats { 75 | fn record_modify(&mut self) { 76 | self.node_modify += 1; 77 | } 78 | } 79 | 80 | #[derive(Debug)] 81 | pub struct GreedySearchStats { 82 | calls: usize, 83 | full_distance_comparisons: usize, 84 | node_reads: usize, 85 | node_heap_reads: usize, 86 | quantized_distance_comparisons: usize, 87 | visited_nodes: usize, 88 | candidate_nodes: usize, 89 | } 90 | 91 | impl Default for GreedySearchStats { 92 | fn default() -> Self { 93 | Self::new() 94 | } 95 | } 96 | 97 | impl GreedySearchStats { 98 | pub fn new() -> Self { 99 | GreedySearchStats { 100 | calls: 0, 101 | full_distance_comparisons: 0, 102 | node_reads: 0, 103 | node_heap_reads: 0, 104 | quantized_distance_comparisons: 0, 105 | visited_nodes: 0, 106 | candidate_nodes: 0, 107 | } 108 | } 109 | 110 | pub fn combine(&mut self, other: &Self) { 111 | self.calls += other.calls; 112 | self.full_distance_comparisons += other.full_distance_comparisons; 113 | self.node_reads += other.node_reads; 114 | self.node_heap_reads += other.node_heap_reads; 115 | self.quantized_distance_comparisons += other.quantized_distance_comparisons; 116 | } 117 | 118 | pub fn get_calls(&self) -> usize { 119 | self.calls 120 | } 121 | 122 | pub fn get_node_reads(&self) -> usize { 123 | self.node_reads 124 | } 125 | 126 | pub fn get_node_heap_reads(&self) -> usize { 127 | self.node_heap_reads 128 | } 129 | 130 | pub fn get_total_distance_comparisons(&self) -> usize { 131 | self.full_distance_comparisons + self.quantized_distance_comparisons 132 | } 133 | 134 | pub fn get_quantized_distance_comparisons(&self) -> usize { 135 | self.quantized_distance_comparisons 136 | } 137 | 138 | pub fn get_visited_nodes(&self) -> usize { 139 | self.visited_nodes 140 | } 141 | 142 | pub fn get_candidate_nodes(&self) -> usize { 143 | self.candidate_nodes 144 | } 145 | 146 | pub fn get_full_distance_comparisons(&self) -> usize { 147 | self.full_distance_comparisons 148 | } 149 | 150 | pub fn record_call(&mut self) { 151 | self.calls += 1; 152 | } 153 | } 154 | 155 | impl StatsNodeRead for GreedySearchStats { 156 | fn record_read(&mut self) { 157 | self.node_reads += 1; 158 | } 159 | } 160 | 161 | impl StatsHeapNodeRead for GreedySearchStats { 162 | fn record_heap_read(&mut self) { 163 | self.node_heap_reads += 1; 164 | } 165 | } 166 | 167 | impl StatsDistanceComparison for GreedySearchStats { 168 | fn record_full_distance_comparison(&mut self) { 169 | self.full_distance_comparisons += 1; 170 | } 171 | 172 | fn record_quantized_distance_comparison(&mut self) { 173 | self.quantized_distance_comparisons += 1; 174 | } 175 | } 176 | 177 | impl StatsNodeVisit for GreedySearchStats { 178 | fn record_visit(&mut self) { 179 | self.visited_nodes += 1; 180 | } 181 | 182 | fn record_candidate(&mut self) { 183 | self.candidate_nodes += 1; 184 | } 185 | } 186 | 187 | #[derive(Debug)] 188 | pub struct QuantizerStats { 189 | pub node_reads: usize, 190 | pub node_writes: usize, 191 | } 192 | 193 | impl Default for QuantizerStats { 194 | fn default() -> Self { 195 | Self::new() 196 | } 197 | } 198 | 199 | impl QuantizerStats { 200 | pub fn new() -> Self { 201 | QuantizerStats { 202 | node_reads: 0, 203 | node_writes: 0, 204 | } 205 | } 206 | } 207 | 208 | impl StatsNodeRead for QuantizerStats { 209 | fn record_read(&mut self) { 210 | self.node_reads += 1; 211 | } 212 | } 213 | 214 | impl StatsNodeWrite for QuantizerStats { 215 | fn record_write(&mut self) { 216 | self.node_writes += 1; 217 | } 218 | } 219 | 220 | #[derive(Debug)] 221 | pub struct InsertStats { 222 | pub prune_neighbor_stats: PruneNeighborStats, 223 | pub greedy_search_stats: GreedySearchStats, 224 | pub quantizer_stats: QuantizerStats, 225 | pub node_reads: usize, 226 | pub node_modify: usize, 227 | pub node_writes: usize, 228 | } 229 | 230 | impl Default for InsertStats { 231 | fn default() -> Self { 232 | Self::new() 233 | } 234 | } 235 | 236 | impl InsertStats { 237 | pub fn new() -> Self { 238 | InsertStats { 239 | prune_neighbor_stats: PruneNeighborStats::new(), 240 | greedy_search_stats: GreedySearchStats::new(), 241 | quantizer_stats: QuantizerStats::new(), 242 | node_reads: 0, 243 | node_modify: 0, 244 | node_writes: 0, 245 | } 246 | } 247 | } 248 | 249 | impl StatsNodeRead for InsertStats { 250 | fn record_read(&mut self) { 251 | self.node_reads += 1; 252 | } 253 | } 254 | 255 | impl StatsNodeModify for InsertStats { 256 | fn record_modify(&mut self) { 257 | self.node_modify += 1; 258 | } 259 | } 260 | 261 | impl StatsNodeWrite for InsertStats { 262 | fn record_write(&mut self) { 263 | self.node_writes += 1; 264 | } 265 | } 266 | 267 | pub struct WriteStats { 268 | pub started: Instant, 269 | pub num_nodes: usize, 270 | pub nodes_read: usize, 271 | pub nodes_modified: usize, 272 | pub nodes_written: usize, 273 | pub prune_stats: PruneNeighborStats, 274 | pub num_neighbors: usize, 275 | } 276 | 277 | impl Default for WriteStats { 278 | fn default() -> Self { 279 | Self::new() 280 | } 281 | } 282 | 283 | impl WriteStats { 284 | pub fn new() -> Self { 285 | Self { 286 | started: Instant::now(), 287 | num_nodes: 0, 288 | prune_stats: PruneNeighborStats::new(), 289 | num_neighbors: 0, 290 | nodes_read: 0, 291 | nodes_modified: 0, 292 | nodes_written: 0, 293 | } 294 | } 295 | } 296 | 297 | impl StatsNodeRead for WriteStats { 298 | fn record_read(&mut self) { 299 | self.nodes_read += 1; 300 | } 301 | } 302 | 303 | impl StatsNodeModify for WriteStats { 304 | fn record_modify(&mut self) { 305 | self.nodes_modified += 1; 306 | } 307 | } 308 | 309 | impl StatsNodeWrite for WriteStats { 310 | fn record_write(&mut self) { 311 | self.nodes_written += 1; 312 | } 313 | } 314 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/storage.rs: -------------------------------------------------------------------------------- 1 | use std::pin::Pin; 2 | 3 | use pgrx::{pg_sys, PgBox}; 4 | 5 | use crate::util::{page::PageType, tape::Tape, HeapPointer, IndexPointer, ItemPointer}; 6 | 7 | use super::{ 8 | distance::DistanceFn, 9 | graph::neighbor_store::GraphNeighborStore, 10 | graph::neighbor_with_distance::NeighborWithDistance, 11 | graph::{ListSearchNeighbor, ListSearchResult}, 12 | labels::{LabelSet, LabeledVector}, 13 | meta_page::MetaPage, 14 | stats::{ 15 | GreedySearchStats, StatsDistanceComparison, StatsHeapNodeRead, StatsNodeModify, 16 | StatsNodeRead, StatsNodeWrite, WriteStats, 17 | }, 18 | }; 19 | 20 | /// NodeDistanceMeasure keeps the state to make distance comparisons between two nodes. 21 | pub trait NodeDistanceMeasure { 22 | unsafe fn get_distance<S: StatsNodeRead + StatsDistanceComparison>( 23 | &self, 24 | index_pointer: IndexPointer, 25 | stats: &mut S, 26 | ) -> f32; 27 | } 28 | pub trait ArchivedData { 29 | fn is_deleted(&self) -> bool; 30 | fn get_heap_item_pointer(&self) -> HeapPointer; 31 | fn get_index_pointer_to_neighbors(&self) -> Vec<ItemPointer>; 32 | } 33 | 34 | pub trait NodeVacuum: ArchivedData { 35 | fn with_data(data: &mut [u8]) -> Pin<&mut Self>; 36 | fn delete(self: Pin<&mut Self>); 37 | } 38 | 39 | pub trait Storage { 40 | /// A QueryDistanceMeasure keeps the state to make distance comparison between a query given at initialization and a node. 41 | type QueryDistanceMeasure; 42 | /// A NodeDistanceMeasure keeps the state to make distance comparison between a node given at initialization and another node. 43 | type NodeDistanceMeasure<'a>: NodeDistanceMeasure 44 | where 45 | Self: 'a; 46 | type ArchivedType<'b>: ArchivedData 47 | where 48 | Self: 'b; 49 | type LSNPrivateData; 50 | 51 | fn page_type() -> PageType; 52 | 53 | fn create_node<S: StatsNodeWrite>( 54 | &self, 55 | full_vector: &[f32], 56 | labels: Option<LabelSet>, 57 | heap_pointer: HeapPointer, 58 | meta_page: &MetaPage, 59 | tape: &mut Tape, 60 | stats: &mut S, 61 | ) -> ItemPointer; 62 | 63 | fn start_training(&mut self, meta_page: &MetaPage); 64 | fn add_sample(&mut self, sample: &[f32]); 65 | fn finish_training(&mut self, meta_page: &mut MetaPage, stats: &mut WriteStats); 66 | 67 | fn finalize_node_at_end_of_build<S: StatsNodeRead + StatsNodeModify>( 68 | &mut self, 69 | meta: &MetaPage, 70 | index_pointer: IndexPointer, 71 | neighbors: &[NeighborWithDistance], 72 | stats: &mut S, 73 | ); 74 | 75 | unsafe fn get_node_distance_measure<'a, S: StatsNodeRead>( 76 | &'a self, 77 | index_pointer: IndexPointer, 78 | stats: &mut S, 79 | ) -> Self::NodeDistanceMeasure<'a>; 80 | 81 | fn get_query_distance_measure(&self, query: LabeledVector) -> Self::QueryDistanceMeasure; 82 | 83 | fn get_full_distance_for_resort<S: StatsHeapNodeRead + StatsDistanceComparison>( 84 | &self, 85 | scan: &PgBox<pg_sys::IndexScanDescData>, 86 | query: &Self::QueryDistanceMeasure, 87 | index_pointer: IndexPointer, 88 | heap_pointer: HeapPointer, 89 | meta_page: &MetaPage, 90 | stats: &mut S, 91 | ) -> Option<f32>; 92 | 93 | fn visit_lsn( 94 | &self, 95 | lsr: &mut ListSearchResult<Self::QueryDistanceMeasure, Self::LSNPrivateData>, 96 | lsn_idx: usize, 97 | gns: &GraphNeighborStore, 98 | no_filter: bool, 99 | ) where 100 | Self: Sized; 101 | 102 | /// Create a ListSearchNeighbor for the start node of the search. If start node 103 | /// already processed (e.g. because multiple labels use it), return None. 104 | fn create_lsn_for_start_node( 105 | &self, 106 | lsr: &mut ListSearchResult<Self::QueryDistanceMeasure, Self::LSNPrivateData>, 107 | index_pointer: ItemPointer, 108 | gns: &GraphNeighborStore, 109 | ) -> Option<ListSearchNeighbor<Self::LSNPrivateData>> 110 | where 111 | Self: Sized; 112 | 113 | fn return_lsn( 114 | &self, 115 | lsn: &ListSearchNeighbor<Self::LSNPrivateData>, 116 | stats: &mut GreedySearchStats, 117 | ) -> HeapPointer 118 | where 119 | Self: Sized; 120 | 121 | fn get_neighbors_with_distances_from_disk<S: StatsNodeRead + StatsDistanceComparison>( 122 | &self, 123 | neighbors_of: ItemPointer, 124 | result: &mut Vec<NeighborWithDistance>, 125 | stats: &mut S, 126 | ); 127 | 128 | fn set_neighbors_on_disk<S: StatsNodeModify + StatsNodeRead>( 129 | &self, 130 | meta: &MetaPage, 131 | index_pointer: IndexPointer, 132 | neighbors: &[NeighborWithDistance], 133 | stats: &mut S, 134 | ); 135 | 136 | fn get_distance_function(&self) -> DistanceFn; 137 | 138 | fn get_labels<S: StatsNodeRead>( 139 | &self, 140 | index_pointer: IndexPointer, 141 | stats: &mut S, 142 | ) -> Option<LabelSet>; 143 | } 144 | 145 | #[derive(PartialEq, Debug)] 146 | pub enum StorageType { 147 | Plain = 0, 148 | // R.I.P. SbqSpeedup = 1, 149 | SbqCompression = 2, 150 | } 151 | 152 | pub const DEFAULT_STORAGE_TYPE_STR: &str = "memory_optimized"; 153 | 154 | impl StorageType { 155 | pub fn from_u8(value: u8) -> Self { 156 | match value { 157 | 0 => StorageType::Plain, 158 | 2 => StorageType::SbqCompression, 159 | _ => panic!("Invalid storage type"), 160 | } 161 | } 162 | 163 | pub fn from_str(value: &str) -> Self { 164 | match value.to_lowercase().as_str() { 165 | "plain" => StorageType::Plain, 166 | "bq_compression" | "memory_optimized" => StorageType::SbqCompression, 167 | _ => panic!("Invalid storage type. Must be either 'plain' or 'memory_optimized'"), 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/storage_common.rs: -------------------------------------------------------------------------------- 1 | use pgrx::{pg_sys::AttrNumber, PgRelation}; 2 | 3 | pub fn get_num_index_attributes(index: &PgRelation) -> usize { 4 | let natts = unsafe { (*index.rd_index).indnatts as usize }; 5 | assert!(natts <= 2); 6 | natts 7 | } 8 | 9 | pub fn get_index_vector_attribute(index: &PgRelation) -> AttrNumber { 10 | unsafe { 11 | let a = index.rd_index; 12 | let natts = (*a).indnatts; 13 | assert!(natts <= 2); 14 | (*a).indkey.values.as_slice(natts as _)[0] 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /pgvectorscale/src/access_method/upgrade_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | #[pgrx::pg_schema] 3 | pub mod tests { 4 | use pgrx::*; 5 | use serial_test::serial; 6 | use std::{fs, path::Path, process::Stdio}; 7 | 8 | fn copy_dir_all(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> std::io::Result<()> { 9 | fs::create_dir_all(&dst)?; 10 | for entry in fs::read_dir(src)? { 11 | let entry = entry?; 12 | let ty = entry.file_type()?; 13 | if ty.is_dir() { 14 | if entry.file_name() == "target" { 15 | continue; 16 | } 17 | copy_dir_all(entry.path(), dst.as_ref().join(entry.file_name()))?; 18 | } else { 19 | fs::copy(entry.path(), dst.as_ref().join(entry.file_name()))?; 20 | } 21 | } 22 | Ok(()) 23 | } 24 | 25 | fn test_upgrade_base( 26 | version: &str, 27 | pgrx_version: &str, 28 | subdirname: &str, 29 | extname: &str, 30 | amname: &str, 31 | ) { 32 | if cfg!(feature = "pg17") 33 | && semver::Version::parse(version).unwrap() < semver::Version::parse("0.4.0").unwrap() 34 | { 35 | // PG17 was not supported before 0.4.0 36 | return; 37 | } 38 | pgrx_tests::run_test( 39 | "test_delete_mock_fn", 40 | None, 41 | crate::pg_test::postgresql_conf_options(), 42 | ) 43 | .unwrap(); 44 | 45 | let (mut client, _) = pgrx_tests::client().unwrap(); 46 | 47 | client 48 | .execute( 49 | &"DROP EXTENSION IF EXISTS vectorscale CASCADE;".to_string(), 50 | &[], 51 | ) 52 | .unwrap(); 53 | 54 | let current_file = file!(); 55 | 56 | // Convert the file path to an absolute path 57 | let current_dir = std::env::current_dir().unwrap(); 58 | let absolute_path_full = std::path::Path::new(¤t_dir).join(current_file); 59 | let mut absolute_path = None; 60 | for ancestor in absolute_path_full.ancestors() { 61 | if std::fs::exists(ancestor.join(".git")).unwrap() { 62 | absolute_path = Some(ancestor.to_path_buf()); 63 | } 64 | } 65 | let absolute_path = absolute_path.expect("Couldn't find root directory"); 66 | 67 | let temp_dir = tempfile::tempdir().unwrap(); 68 | let temp_path = temp_dir.path(); 69 | 70 | copy_dir_all(absolute_path.clone(), temp_dir.path()).unwrap(); 71 | 72 | let pgrx = pgrx_pg_config::Pgrx::from_config().unwrap(); 73 | let pg_version = pg_sys::get_pg_major_version_num(); 74 | let pg_config = pgrx.get(&format!("pg{}", pg_version)).unwrap(); 75 | 76 | let res = std::process::Command::new("git") 77 | .current_dir(temp_path) 78 | .arg("checkout") 79 | .arg("-f") 80 | .arg(version) 81 | .output() 82 | .unwrap(); 83 | assert!( 84 | res.status.success(), 85 | "failed: {:?} {:?} {:?}", 86 | res, 87 | absolute_path, 88 | temp_dir.path() 89 | ); 90 | 91 | let pgrx_str = format!("={pgrx_version}"); 92 | let pgrx_dir = format!("pgrx-{pgrx_version}"); 93 | 94 | let res = std::process::Command::new("cargo") 95 | .current_dir(temp_path.join(subdirname)) 96 | .args([ 97 | "install", 98 | "cargo-pgrx", 99 | "--version", 100 | pgrx_str.as_str(), 101 | "--force", 102 | "--root", 103 | temp_path.join(pgrx_dir.as_str()).to_str().unwrap(), 104 | "cargo-pgrx", 105 | ]) 106 | .stdout(Stdio::inherit()) 107 | .stderr(Stdio::piped()) 108 | .output() 109 | .unwrap(); 110 | 111 | assert!(res.status.success(), "failed: {:?}", res); 112 | 113 | let res = std::process::Command::new( 114 | temp_path 115 | .join(pgrx_dir.as_str()) 116 | .join("bin/cargo-pgrx") 117 | .to_str() 118 | .unwrap(), 119 | ) 120 | .current_dir(temp_path.join(subdirname)) 121 | .env( 122 | "CARGO_TARGET_DIR", 123 | temp_path.join(subdirname).join("target"), 124 | ) 125 | .env("CARGO_PKG_VERSION", version) 126 | .arg("pgrx") 127 | .arg("install") 128 | .arg("--test") 129 | .arg("--pg-config") 130 | .arg(pg_config.path().unwrap()) 131 | .stdout(Stdio::inherit()) 132 | .stderr(Stdio::inherit()) 133 | .output() 134 | .unwrap(); 135 | assert!(res.status.success(), "failed: {:?}", res); 136 | 137 | client 138 | .execute( 139 | &format!("CREATE EXTENSION {extname} VERSION '{}' CASCADE;", version), 140 | &[], 141 | ) 142 | .unwrap(); 143 | 144 | let suffix = (1..=253) 145 | .map(|i| format!("{}", i)) 146 | .collect::<Vec<String>>() 147 | .join(", "); 148 | 149 | client 150 | .batch_execute(&format!( 151 | "CREATE TABLE test(embedding vector(256)); 152 | 153 | select setseed(0.5); 154 | -- generate 300 vectors 155 | INSERT INTO test(embedding) 156 | SELECT 157 | * 158 | FROM ( 159 | SELECT 160 | ('[ 0 , ' || array_to_string(array_agg(random()), ',', '0') || ']')::vector AS embedding 161 | FROM 162 | generate_series(1, 255 * 300) i 163 | GROUP BY 164 | i % 300) g; 165 | 166 | INSERT INTO test(embedding) VALUES ('[1,2,3,{suffix}]'), ('[4,5,6,{suffix}]'), ('[7,8,10,{suffix}]'); 167 | 168 | CREATE INDEX idxtest 169 | ON test 170 | USING {amname}(embedding); 171 | " 172 | )) 173 | .unwrap(); 174 | 175 | client.execute("set enable_seqscan = 0;", &[]).unwrap(); 176 | let cnt: i64 = client.query_one(&format!("WITH cte as (select * from test order by embedding <=> '[1,1,1,{suffix}]') SELECT count(*) from cte;"), &[]).unwrap().get(0); 177 | assert_eq!(cnt, 303, "count before upgrade"); 178 | 179 | if extname == "timescale_vector" { 180 | client 181 | .execute( 182 | &"UPDATE pg_extension SET extname='vectorscale' WHERE extname = 'timescale_vector';".to_string(), 183 | &[], 184 | ) 185 | .unwrap(); 186 | } 187 | 188 | //reinstall myself 189 | let res = std::process::Command::new("cargo") 190 | .arg("pgrx") 191 | .arg("install") 192 | .arg("--test") 193 | .arg("--pg-config") 194 | .arg(pg_config.path().unwrap()) 195 | .stdout(Stdio::inherit()) 196 | .stderr(Stdio::piped()) 197 | .output() 198 | .unwrap(); 199 | assert!(res.status.success(), "failed: {:?}", res); 200 | 201 | //need to recreate the client to avoid double load of GUC. Look into this later. 202 | let (mut client, _) = pgrx_tests::client().unwrap(); 203 | client 204 | .execute( 205 | &format!( 206 | "ALTER EXTENSION vectorscale UPDATE TO '{}'", 207 | env!("CARGO_PKG_VERSION") 208 | ), 209 | &[], 210 | ) 211 | .unwrap(); 212 | 213 | // Recreate client to pick up system catalog changes 214 | let (mut client, _) = pgrx_tests::client().unwrap(); 215 | 216 | client.execute("set enable_seqscan = 0;", &[]).unwrap(); 217 | let cnt: i64 = client.query_one(&format!("WITH cte as (select * from test order by embedding <=> '[1,1,1,{suffix}]') SELECT count(*) from cte;"), &[]).unwrap().get(0); 218 | assert_eq!(cnt, 303, "count after upgrade"); 219 | 220 | client.execute("DROP INDEX idxtest;", &[]).unwrap(); 221 | client 222 | .execute( 223 | "CREATE INDEX idxtest_cosine ON test USING diskann(embedding vector_cosine_ops);", 224 | &[], 225 | ) 226 | .unwrap(); 227 | client 228 | .execute( 229 | "CREATE INDEX idxtest_l2 ON test USING diskann(embedding vector_l2_ops);", 230 | &[], 231 | ) 232 | .unwrap(); 233 | client 234 | .execute( 235 | "CREATE INDEX idxtest_ip ON test USING diskann(embedding vector_ip_ops);", 236 | &[], 237 | ) 238 | .unwrap(); 239 | } 240 | 241 | #[ignore] 242 | #[serial] 243 | #[test] 244 | fn test_upgrade_from_0_0_2() { 245 | test_upgrade_base( 246 | "0.0.2", 247 | "0.11.1", 248 | "timescale_vector", 249 | "timescale_vector", 250 | "tsv", 251 | ); 252 | } 253 | 254 | #[ignore] 255 | #[serial] 256 | #[test] 257 | fn test_upgrade_from_0_2_0() { 258 | test_upgrade_base("0.2.0", "0.11.4", "pgvectorscale", "vectorscale", "diskann"); 259 | } 260 | 261 | #[ignore] 262 | #[serial] 263 | #[test] 264 | fn test_upgrade_from_0_3_0() { 265 | test_upgrade_base("0.3.0", "0.11.4", "pgvectorscale", "vectorscale", "diskann"); 266 | } 267 | 268 | #[ignore] 269 | #[serial] 270 | #[test] 271 | fn test_upgrade_from_0_4_0() { 272 | test_upgrade_base("0.4.0", "0.12.5", "pgvectorscale", "vectorscale", "diskann"); 273 | } 274 | 275 | #[ignore] 276 | #[serial] 277 | #[test] 278 | fn test_upgrade_from_0_5_0() { 279 | test_upgrade_base("0.5.0", "0.12.5", "pgvectorscale", "vectorscale", "diskann"); 280 | } 281 | 282 | #[ignore] 283 | #[serial] 284 | #[test] 285 | fn test_upgrade_from_0_5_1() { 286 | test_upgrade_base("0.5.1", "0.12.5", "pgvectorscale", "vectorscale", "diskann"); 287 | } 288 | 289 | #[ignore] 290 | #[serial] 291 | #[test] 292 | fn test_upgrade_from_0_6_0() { 293 | test_upgrade_base("0.6.0", "0.12.5", "pgvectorscale", "vectorscale", "diskann"); 294 | } 295 | 296 | #[ignore] 297 | #[serial] 298 | #[test] 299 | fn test_upgrade_from_0_7_0() { 300 | test_upgrade_base("0.7.0", "0.12.9", "pgvectorscale", "vectorscale", "diskann"); 301 | } 302 | } 303 | -------------------------------------------------------------------------------- /pgvectorscale/src/bin/pgrx_embed.rs: -------------------------------------------------------------------------------- 1 | ::pgrx::pgrx_embed!(); 2 | -------------------------------------------------------------------------------- /pgvectorscale/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(unexpected_cfgs)] 2 | use pgrx::prelude::*; 3 | 4 | pgrx::pg_module_magic!(); 5 | 6 | pub mod access_method; 7 | mod util; 8 | 9 | #[allow(non_snake_case)] 10 | #[pg_guard] 11 | pub unsafe extern "C" fn _PG_init() { 12 | access_method::distance::init(); 13 | access_method::options::init(); 14 | access_method::guc::init(); 15 | } 16 | 17 | #[allow(non_snake_case)] 18 | #[pg_guard] 19 | pub extern "C" fn _PG_fini() { 20 | // noop 21 | } 22 | 23 | /// This module is required by `cargo pgrx test` invocations. 24 | /// It must be visible at the root of your extension crate. 25 | #[cfg(test)] 26 | pub mod pg_test { 27 | pub fn setup(_options: Vec<&str>) { 28 | //let (mut client, _) = pgrx_tests::client().unwrap(); 29 | 30 | // perform one-off initialization when the pg_test framework starts 31 | } 32 | 33 | pub fn postgresql_conf_options() -> Vec<&'static str> { 34 | // return any postgresql.conf settings that are required for your tests 35 | vec![] 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /pgvectorscale/src/util/buffer.rs: -------------------------------------------------------------------------------- 1 | //! A buffer is a Postgres abstraction to identify a slot in memory, almost always shared memory. 2 | //! Under the hood, it is just an index into the shared memory array. 3 | //! To use the slot, certain pins and locks need to be taken 4 | //! See src/backend/storage/buffer/README in the Postgres src. 5 | 6 | use std::ops::Deref; 7 | 8 | use pgrx::*; 9 | 10 | use pgrx::pg_sys::{ 11 | BlockNumber, Buffer, BufferGetBlockNumber, ForkNumber, InvalidBlockNumber, ReadBufferMode, 12 | }; 13 | 14 | pub struct LockRelationForExtension<'a> { 15 | relation: &'a PgRelation, 16 | } 17 | 18 | impl<'a> LockRelationForExtension<'a> { 19 | pub fn new(index: &'a PgRelation) -> Self { 20 | unsafe { 21 | pg_sys::LockRelationForExtension( 22 | index.as_ptr(), 23 | pg_sys::ExclusiveLock as pg_sys::LOCKMODE, 24 | ) 25 | } 26 | Self { relation: index } 27 | } 28 | } 29 | 30 | impl Drop for LockRelationForExtension<'_> { 31 | /// drop both unlock and unpins the buffer. 32 | fn drop(&mut self) { 33 | unsafe { 34 | // Only unlock while in a transaction state. Should not be unlocking during abort or commit. 35 | // During abort, the system will unlock stuff itself. During commit, the release should have already happened. 36 | if pgrx::pg_sys::IsTransactionState() { 37 | pg_sys::UnlockRelationForExtension( 38 | self.relation.as_ptr(), 39 | pg_sys::ExclusiveLock as pg_sys::LOCKMODE, 40 | ); 41 | } 42 | } 43 | } 44 | } 45 | 46 | /// LockedBufferExclusive is an RAII-guarded buffer that 47 | /// has been locked for exclusive access. 48 | /// 49 | /// It is probably not a good idea to hold on to this too long. 50 | pub struct LockedBufferExclusive<'a> { 51 | _relation: &'a PgRelation, 52 | buffer: Buffer, 53 | } 54 | 55 | impl<'a> LockedBufferExclusive<'a> { 56 | /// new return an allocated buffer for a new block in a relation. 57 | /// The block is obtained by extending the relation. 58 | /// 59 | /// The returned block will be pinned and locked in exclusive mode 60 | /// 61 | /// Safety: safe because it locks the relation for extension. 62 | pub fn new(index: &'a PgRelation) -> Self { 63 | //ReadBufferExtended requires the caller to ensure that only one backend extends the relation at one time. 64 | let _lock = LockRelationForExtension::new(index); 65 | //should really be using ExtendBufferedRel but it's not in pgrx so go thru the read path with InvalidBlockNumber 66 | unsafe { Self::read_unchecked(index, InvalidBlockNumber) } 67 | } 68 | 69 | /// Safety: Safe because it checks the block number doesn't overflow. ReadBufferExtended will throw an error if the block number is out of range for the relation 70 | pub fn read(index: &'a PgRelation, block: BlockNumber) -> Self { 71 | unsafe { Self::read_unchecked(index, block) } 72 | } 73 | 74 | /// Safety: unsafe because tje block number is not verifiwed 75 | unsafe fn read_unchecked(index: &'a PgRelation, block: BlockNumber) -> Self { 76 | let fork_number = ForkNumber::MAIN_FORKNUM; 77 | 78 | let buf = pg_sys::ReadBufferExtended( 79 | index.as_ptr(), 80 | fork_number, 81 | block, 82 | ReadBufferMode::RBM_NORMAL, 83 | std::ptr::null_mut(), 84 | ); 85 | 86 | pg_sys::LockBuffer(buf, pg_sys::BUFFER_LOCK_EXCLUSIVE as i32); 87 | LockedBufferExclusive { 88 | _relation: index, 89 | buffer: buf, 90 | } 91 | } 92 | 93 | /// Get an exclusive lock for cleanup (vacuum) operations. 94 | /// Obtaining this lock is more restrictive. It will only be obtained once the pin 95 | /// count is 1. Refer to the PG code for `LockBufferForCleanup` for more info 96 | pub unsafe fn read_for_cleanup(index: &'a PgRelation, block: BlockNumber) -> Self { 97 | let fork_number = ForkNumber::MAIN_FORKNUM; 98 | 99 | let buf = pg_sys::ReadBufferExtended( 100 | index.as_ptr(), 101 | fork_number, 102 | block, 103 | ReadBufferMode::RBM_NORMAL, 104 | std::ptr::null_mut(), 105 | ); 106 | 107 | pg_sys::LockBufferForCleanup(buf); 108 | LockedBufferExclusive { 109 | _relation: index, 110 | buffer: buf, 111 | } 112 | } 113 | 114 | pub fn get_block_number(&self) -> BlockNumber { 115 | unsafe { BufferGetBlockNumber(self.buffer) } 116 | } 117 | } 118 | 119 | impl Drop for LockedBufferExclusive<'_> { 120 | /// drop both unlock and unpins the buffer. 121 | fn drop(&mut self) { 122 | unsafe { 123 | // Only unlock while in a transaction state. Should not be unlocking during abort or commit. 124 | // During abort, the system will unlock stuff itself. During commit, the release should have already happened. 125 | if pgrx::pg_sys::IsTransactionState() { 126 | pg_sys::UnlockReleaseBuffer(self.buffer); 127 | } 128 | } 129 | } 130 | } 131 | 132 | impl Deref for LockedBufferExclusive<'_> { 133 | type Target = Buffer; 134 | fn deref(&self) -> &Self::Target { 135 | &self.buffer 136 | } 137 | } 138 | 139 | /// LockedBufferShare is an RAII-guarded buffer that 140 | /// has been locked for share access. 141 | /// 142 | /// This lock uses a LWLock so it really should not be held for too long. 143 | pub struct LockedBufferShare<'a> { 144 | _relation: &'a PgRelation, 145 | buffer: Buffer, 146 | } 147 | 148 | impl<'a> LockedBufferShare<'a> { 149 | /// read return buffer for the given blockNumber in a relation. 150 | /// 151 | /// The returned block will be pinned and locked in share mode 152 | /// 153 | /// Safety: Safe because it checks the block number doesn't overflow. ReadBufferExtended will throw an error if the block number is out of range for the relation 154 | pub fn read(index: &'a PgRelation, block: BlockNumber) -> Self { 155 | let fork_number = ForkNumber::MAIN_FORKNUM; 156 | 157 | unsafe { 158 | let buf = pg_sys::ReadBufferExtended( 159 | index.as_ptr(), 160 | fork_number, 161 | block, 162 | ReadBufferMode::RBM_NORMAL, 163 | std::ptr::null_mut(), 164 | ); 165 | 166 | pg_sys::LockBuffer(buf, pg_sys::BUFFER_LOCK_SHARE as i32); 167 | LockedBufferShare { 168 | _relation: index, 169 | buffer: buf, 170 | } 171 | } 172 | } 173 | } 174 | 175 | impl Drop for LockedBufferShare<'_> { 176 | /// drop both unlock and unpins the buffer. 177 | fn drop(&mut self) { 178 | unsafe { 179 | // Only unlock while in a transaction state. Should not be unlocking during abort or commit. 180 | // During abort, the system will unlock stuff itself. During commit, the release should have already happened. 181 | if pgrx::pg_sys::IsTransactionState() { 182 | pg_sys::UnlockReleaseBuffer(self.buffer); 183 | } 184 | } 185 | } 186 | } 187 | 188 | impl Deref for LockedBufferShare<'_> { 189 | type Target = Buffer; 190 | fn deref(&self) -> &Self::Target { 191 | &self.buffer 192 | } 193 | } 194 | 195 | /// PinnerBuffer is an RAII-guarded buffer that 196 | /// has been pinned but not locked. 197 | /// 198 | /// It is probably not a good idea to hold on to this too long except during an index scan. 199 | /// Does not use a LWLock. Note a pinned buffer is valid whether or not the relation that read it 200 | /// is still open. 201 | pub struct PinnedBufferShare { 202 | buffer: Buffer, 203 | } 204 | 205 | impl PinnedBufferShare { 206 | /// read return buffer for the given blockNumber in a relation. 207 | /// 208 | /// The returned block will be pinned 209 | /// 210 | /// Safety: Safe because it checks the block number doesn't overflow. ReadBufferExtended will throw an error if the block number is out of range for the relation 211 | pub fn read(index: &PgRelation, block: BlockNumber) -> Self { 212 | let fork_number = ForkNumber::MAIN_FORKNUM; 213 | 214 | unsafe { 215 | let buf = pg_sys::ReadBufferExtended( 216 | index.as_ptr(), 217 | fork_number, 218 | block, 219 | ReadBufferMode::RBM_NORMAL, 220 | std::ptr::null_mut(), 221 | ); 222 | PinnedBufferShare { buffer: buf } 223 | } 224 | } 225 | } 226 | 227 | impl Drop for PinnedBufferShare { 228 | /// drop both unlock and unpins the buffer. 229 | fn drop(&mut self) { 230 | unsafe { 231 | // Only unlock while in a transaction state. Should not be unlocking during abort or commit. 232 | // During abort, the system will unlock stuff itself. During commit, the release should have already happened. 233 | if pgrx::pg_sys::IsTransactionState() { 234 | pg_sys::ReleaseBuffer(self.buffer); 235 | } 236 | } 237 | } 238 | } 239 | -------------------------------------------------------------------------------- /pgvectorscale/src/util/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod buffer; 2 | pub mod chain; 3 | pub mod page; 4 | pub mod ports; 5 | pub mod table_slot; 6 | pub mod tape; 7 | 8 | use pgrx::PgRelation; 9 | use rkyv::{Archive, Deserialize, Serialize}; 10 | 11 | use self::{ 12 | page::{ReadablePage, WritablePage}, 13 | ports::{PageGetItem, PageGetItemId}, 14 | }; 15 | 16 | #[derive(Archive, Deserialize, Serialize, Debug, PartialEq, Eq, Hash, Clone, Copy)] 17 | #[archive(check_bytes)] 18 | #[repr(C)] // Added this so we can compute size via sizeof 19 | pub struct ItemPointer { 20 | pub block_number: pgrx::pg_sys::BlockNumber, 21 | pub offset: pgrx::pg_sys::OffsetNumber, 22 | } 23 | 24 | impl PartialOrd for ItemPointer { 25 | fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { 26 | Some(self.cmp(other)) 27 | } 28 | } 29 | 30 | impl Ord for ItemPointer { 31 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 32 | self.block_number 33 | .cmp(&other.block_number) 34 | .then_with(|| self.offset.cmp(&other.offset)) 35 | } 36 | } 37 | 38 | impl ArchivedItemPointer { 39 | pub fn deserialize_item_pointer(&self) -> ItemPointer { 40 | self.deserialize(&mut rkyv::Infallible).unwrap() 41 | } 42 | } 43 | 44 | impl PartialEq for ArchivedItemPointer { 45 | fn eq(&self, other: &Self) -> bool { 46 | self.block_number == other.block_number && self.offset == other.offset 47 | } 48 | } 49 | 50 | impl Eq for ArchivedItemPointer {} 51 | 52 | impl PartialOrd for ArchivedItemPointer { 53 | fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { 54 | Some(self.cmp(other)) 55 | } 56 | } 57 | 58 | impl Ord for ArchivedItemPointer { 59 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 60 | self.block_number 61 | .cmp(&other.block_number) 62 | .then_with(|| self.offset.cmp(&other.offset)) 63 | } 64 | } 65 | 66 | pub struct ReadableBuffer<'a> { 67 | _page: ReadablePage<'a>, 68 | len: usize, 69 | ptr: *const u8, 70 | } 71 | 72 | impl<'a> ReadableBuffer<'a> { 73 | pub fn get_data_slice(&self) -> &'a [u8] { 74 | unsafe { std::slice::from_raw_parts(self.ptr, self.len) } 75 | } 76 | 77 | pub fn get_owned_page(self) -> ReadablePage<'a> { 78 | self._page 79 | } 80 | 81 | pub fn len(&self) -> usize { 82 | self.len 83 | } 84 | 85 | pub fn advance(&mut self, len: usize) { 86 | assert!(self.len >= len); 87 | self.ptr = unsafe { self.ptr.add(len) }; 88 | self.len -= len; 89 | } 90 | } 91 | 92 | pub struct WritableBuffer<'a> { 93 | _page: WritablePage<'a>, 94 | len: usize, 95 | ptr: *mut u8, 96 | } 97 | 98 | impl<'a> WritableBuffer<'a> { 99 | pub fn get_data_slice(&mut self) -> &'a mut [u8] { 100 | unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } 101 | } 102 | 103 | pub fn commit(self) { 104 | self._page.commit(); 105 | } 106 | } 107 | 108 | impl ItemPointer { 109 | pub fn new( 110 | block_number: pgrx::pg_sys::BlockNumber, 111 | offset: pgrx::pg_sys::OffsetNumber, 112 | ) -> Self { 113 | Self { 114 | block_number, 115 | offset, 116 | } 117 | } 118 | 119 | pub fn new_invalid() -> Self { 120 | Self { 121 | block_number: pgrx::pg_sys::InvalidBlockNumber, 122 | offset: pgrx::pg_sys::InvalidOffsetNumber, 123 | } 124 | } 125 | 126 | pub fn is_valid(&self) -> bool { 127 | self.block_number != pgrx::pg_sys::InvalidBlockNumber 128 | && self.offset != pgrx::pg_sys::InvalidOffsetNumber 129 | } 130 | 131 | pub unsafe fn with_page(page: &page::WritablePage, offset: pgrx::pg_sys::OffsetNumber) -> Self { 132 | Self { 133 | block_number: pgrx::pg_sys::BufferGetBlockNumber(**(page.get_buffer())), 134 | offset, 135 | } 136 | } 137 | 138 | pub unsafe fn with_item_pointer_data(ctid: pgrx::pg_sys::ItemPointerData) -> Self { 139 | let ip = pgrx::itemptr::item_pointer_get_block_number(&ctid); 140 | let off = pgrx::itemptr::item_pointer_get_offset_number(&ctid); 141 | Self::new(ip, off) 142 | } 143 | 144 | pub fn to_item_pointer_data(self, ctid: &mut pgrx::pg_sys::ItemPointerData) { 145 | pgrx::itemptr::item_pointer_set_all(ctid, self.block_number, self.offset) 146 | } 147 | 148 | pub unsafe fn read_bytes(self, index: &PgRelation) -> ReadableBuffer { 149 | let page = ReadablePage::read(index, self.block_number); 150 | page.get_item_unchecked(self.offset) 151 | } 152 | 153 | pub unsafe fn modify_bytes(self, index: &PgRelation) -> WritableBuffer { 154 | let page = WritablePage::modify(index, self.block_number); 155 | let item_id = PageGetItemId(*page, self.offset); 156 | let item = PageGetItem(*page, item_id) as *mut u8; 157 | let len = (*item_id).lp_len(); 158 | WritableBuffer { 159 | _page: page, 160 | ptr: item, 161 | len: len as _, 162 | } 163 | } 164 | 165 | pub fn ip_distance(self, other: Self) -> usize { 166 | let block_diff = self.block_number as isize - other.block_number as isize; 167 | let offset_diff = self.offset as isize - other.offset as isize; 168 | debug_assert!(offset_diff < pgrx::pg_sys::MaxOffsetNumber as _); 169 | (block_diff * (pgrx::pg_sys::MaxOffsetNumber as isize) + offset_diff).unsigned_abs() 170 | } 171 | } 172 | 173 | pub type IndexPointer = ItemPointer; 174 | pub type HeapPointer = ItemPointer; 175 | -------------------------------------------------------------------------------- /pgvectorscale/src/util/page.rs: -------------------------------------------------------------------------------- 1 | //! A Page is a Postgres abstraction for a slice of memory you can write to 2 | //! It is usually 8kb and has a special layout. See https://www.postgresql.org/docs/current/storage-page-layout.html 3 | 4 | use pg_sys::Page; 5 | use pgrx::{ 6 | pg_sys::{BlockNumber, BufferGetPage, OffsetNumber, BLCKSZ}, 7 | *, 8 | }; 9 | use std::ops::Deref; 10 | 11 | use super::{ 12 | buffer::{LockedBufferExclusive, LockedBufferShare}, 13 | ports::{PageGetItem, PageGetItemId}, 14 | ReadableBuffer, 15 | }; 16 | pub struct WritablePage<'a> { 17 | buffer: LockedBufferExclusive<'a>, 18 | page: Page, 19 | state: *mut pg_sys::GenericXLogState, 20 | committed: bool, 21 | } 22 | 23 | pub const TSV_PAGE_ID: u16 = 0xAE24; /* magic number, generated randomly */ 24 | 25 | /// PageType identifies different types of pages in our index. 26 | /// The layout of any one type should be consistent 27 | #[derive(Clone, Copy, PartialEq, Eq, Debug)] 28 | pub enum PageType { 29 | MetaV1 = 0, 30 | Node = 1, 31 | PqQuantizerDef = 2, 32 | PqQuantizerVector = 3, 33 | SbqMeansV1 = 4, 34 | SbqNode = 5, 35 | MetaV2 = 6, 36 | SbqMeans = 7, 37 | Meta = 8, 38 | } 39 | 40 | impl PageType { 41 | pub fn from_u8(value: u8) -> Self { 42 | match value { 43 | 0 => PageType::MetaV1, 44 | 1 => PageType::Node, 45 | 2 => PageType::PqQuantizerDef, 46 | 3 => PageType::PqQuantizerVector, 47 | 4 => PageType::SbqMeansV1, 48 | 5 => PageType::SbqNode, 49 | 6 => PageType::MetaV2, 50 | 7 => PageType::SbqMeans, 51 | 8 => PageType::Meta, 52 | _ => panic!("Unknown PageType number {}", value), 53 | } 54 | } 55 | 56 | /// `ChainTape` supports chaining of pages that might contain large data. 57 | /// This is not supported for all page types. Note that `Tape` requires 58 | /// that the page type not be chained. 59 | pub fn is_chained(self) -> bool { 60 | matches!(self, PageType::SbqMeans) || matches!(self, PageType::Meta) 61 | } 62 | } 63 | 64 | /// This is the Tsv-specific data that goes on every "diskann-owned" page 65 | /// It is placed at the end of a page in the "special" area 66 | #[repr(C)] 67 | struct TsvPageOpaqueData { 68 | page_type: u8, // stores the PageType enum as an integer (u8 because we doubt we'll have more than 256 types). 69 | _reserved: u8, // don't waste bytes, may be able to reuse later. For now: 0 70 | page_id: u16, // A magic ID for debuging to identify the page as a "diskann-owned". Should be last. 71 | } 72 | 73 | impl TsvPageOpaqueData { 74 | fn new(page_type: PageType) -> Self { 75 | Self { 76 | page_type: page_type as u8, 77 | _reserved: 0, 78 | page_id: TSV_PAGE_ID, 79 | } 80 | } 81 | 82 | /// Safety: unsafe because no verification done. Blind cast. 83 | #[inline(always)] 84 | unsafe fn with_page(page: Page) -> *mut TsvPageOpaqueData { 85 | let sp = super::ports::PageGetSpecialPointer(page); 86 | sp.cast::<TsvPageOpaqueData>() 87 | } 88 | 89 | /// Safety: Safe because of the verify call that checks a magic number 90 | fn read_from_page(page: &Page) -> &TsvPageOpaqueData { 91 | unsafe { 92 | let ptr = Self::with_page(*page); 93 | (*ptr).verify(); 94 | ptr.as_ref().unwrap() 95 | } 96 | } 97 | 98 | fn verify(&self) { 99 | assert_eq!(self.page_id, TSV_PAGE_ID); 100 | PageType::from_u8(self.page_type); 101 | } 102 | } 103 | 104 | /// WritablePage implements and RAII-guarded Page that you can write to. 105 | /// All writes will be WAL-logged. 106 | /// 107 | /// It is probably not a good idea to hold on to a WritablePage for a long time. 108 | impl<'a> WritablePage<'a> { 109 | /// new creates a totally new page on a relation by extending the relation 110 | pub fn new(index: &'a PgRelation, page_type: PageType) -> Self { 111 | let buffer = LockedBufferExclusive::new(index); 112 | unsafe { 113 | let state = pg_sys::GenericXLogStart(index.as_ptr()); 114 | //TODO do we need a GENERIC_XLOG_FULL_IMAGE option? 115 | let page = pg_sys::GenericXLogRegisterBuffer(state, *buffer, 0); 116 | let mut new = Self { 117 | buffer, 118 | page, 119 | state, 120 | committed: false, 121 | }; 122 | new.reinit(page_type); 123 | new 124 | } 125 | } 126 | 127 | pub fn reinit(&mut self, page_type: PageType) { 128 | unsafe { 129 | pg_sys::PageInit( 130 | self.page, 131 | pg_sys::BLCKSZ as usize, 132 | std::mem::size_of::<TsvPageOpaqueData>(), 133 | ); 134 | *TsvPageOpaqueData::with_page(self.page) = TsvPageOpaqueData::new(page_type); 135 | } 136 | } 137 | 138 | pub fn modify(index: &'a PgRelation, block: BlockNumber) -> Self { 139 | let buffer = LockedBufferExclusive::read(index, block); 140 | Self::modify_with_buffer(index, buffer) 141 | } 142 | 143 | pub fn add_item(&mut self, data: &[u8]) -> OffsetNumber { 144 | let size = data.len(); 145 | assert!(self.get_free_space() >= size); 146 | unsafe { self.add_item_unchecked(data) } 147 | } 148 | 149 | pub unsafe fn add_item_unchecked(&mut self, data: &[u8]) -> OffsetNumber { 150 | let size = data.len(); 151 | assert!(size < BLCKSZ as usize); 152 | 153 | let offset_number = pg_sys::PageAddItemExtended( 154 | self.page, 155 | data.as_ptr() as _, 156 | size, 157 | pg_sys::InvalidOffsetNumber, 158 | 0, 159 | ); 160 | 161 | assert!(offset_number != pg_sys::InvalidOffsetNumber); 162 | offset_number 163 | } 164 | 165 | /// get a writable page for cleanup(vacuum) operations. 166 | pub unsafe fn cleanup(index: &'a PgRelation, block: BlockNumber) -> Self { 167 | let buffer = LockedBufferExclusive::read_for_cleanup(index, block); 168 | Self::modify_with_buffer(index, buffer) 169 | } 170 | 171 | // Safety: Safe because it verifies the page 172 | fn modify_with_buffer(index: &'a PgRelation, buffer: LockedBufferExclusive<'a>) -> Self { 173 | unsafe { 174 | let state = pg_sys::GenericXLogStart(index.as_ptr()); 175 | let page = pg_sys::GenericXLogRegisterBuffer(state, *buffer, 0); 176 | //this check the page 177 | _ = TsvPageOpaqueData::read_from_page(&page); 178 | Self { 179 | buffer, 180 | page, 181 | state, 182 | committed: false, 183 | } 184 | } 185 | } 186 | 187 | pub fn get_buffer(&self) -> &LockedBufferExclusive { 188 | &self.buffer 189 | } 190 | 191 | pub fn get_block_number(&self) -> BlockNumber { 192 | self.buffer.get_block_number() 193 | } 194 | 195 | fn get_free_space(&self) -> usize { 196 | unsafe { pg_sys::PageGetFreeSpace(self.page) } 197 | } 198 | 199 | /// The actual free space that can be used to store data. 200 | /// See https://github.com/postgres/postgres/blob/0164a0f9ee12e0eff9e4c661358a272ecd65c2d4/src/backend/storage/page/bufpage.c#L304 201 | pub fn get_aligned_free_space(&self) -> usize { 202 | let free_space = self.get_free_space(); 203 | free_space - (free_space % 8) 204 | } 205 | 206 | pub fn get_type(&self) -> PageType { 207 | unsafe { 208 | let opaque_data = 209 | //safe to do because self.page was already verified during construction 210 | TsvPageOpaqueData::with_page(self.page); 211 | 212 | PageType::from_u8((*opaque_data).page_type) 213 | } 214 | } 215 | 216 | pub fn set_types(&self, new: PageType) { 217 | unsafe { 218 | let opaque_data = 219 | //safe to do because self.page was already verified during construction 220 | TsvPageOpaqueData::with_page(self.page); 221 | 222 | (*opaque_data).page_type = new as u8; 223 | } 224 | } 225 | /// commit saves all the changes to the page. 226 | /// Note that this will consume the page and make it unusable after the call. 227 | pub fn commit(mut self) { 228 | unsafe { 229 | pg_sys::MarkBufferDirty(*self.buffer); 230 | pg_sys::GenericXLogFinish(self.state); 231 | } 232 | self.committed = true; 233 | } 234 | } 235 | 236 | impl Drop for WritablePage<'_> { 237 | // drop aborts the xlog if it has not been committed. 238 | fn drop(&mut self) { 239 | if !self.committed { 240 | unsafe { 241 | pg_sys::GenericXLogAbort(self.state); 242 | }; 243 | } 244 | } 245 | } 246 | 247 | impl Deref for WritablePage<'_> { 248 | type Target = Page; 249 | fn deref(&self) -> &Self::Target { 250 | &self.page 251 | } 252 | } 253 | 254 | pub struct ReadablePage<'a> { 255 | buffer: LockedBufferShare<'a>, 256 | page: Page, 257 | } 258 | 259 | impl<'a> ReadablePage<'a> { 260 | /// new creates a totally new page on a relation by extending the relation 261 | pub unsafe fn read(index: &'a PgRelation, block: BlockNumber) -> Self { 262 | let buffer = LockedBufferShare::read(index, block); 263 | let page = BufferGetPage(*buffer); 264 | Self { buffer, page } 265 | } 266 | 267 | pub fn get_type(&self) -> PageType { 268 | let opaque_data = TsvPageOpaqueData::read_from_page(&self.page); 269 | PageType::from_u8(opaque_data.page_type) 270 | } 271 | 272 | pub fn get_buffer(&self) -> &LockedBufferShare { 273 | &self.buffer 274 | } 275 | 276 | // Safety: unsafe because no verification of the offset is done. 277 | pub unsafe fn get_item_unchecked( 278 | self, 279 | offset: pgrx::pg_sys::OffsetNumber, 280 | ) -> ReadableBuffer<'a> { 281 | let item_id = PageGetItemId(self.page, offset); 282 | let item = PageGetItem(self.page, item_id) as *mut u8; 283 | let len = (*item_id).lp_len(); 284 | ReadableBuffer { 285 | _page: self, 286 | ptr: item, 287 | len: len as _, 288 | } 289 | } 290 | } 291 | 292 | impl Deref for ReadablePage<'_> { 293 | type Target = Page; 294 | fn deref(&self) -> &Self::Target { 295 | &self.page 296 | } 297 | } 298 | -------------------------------------------------------------------------------- /pgvectorscale/src/util/ports.rs: -------------------------------------------------------------------------------- 1 | //! This module contains ports of Postgres static functions and #defines not in pgrx. 2 | //! Following pgrx conventions, we keep function names as close to Postgres as possible. 3 | //! Thus, we don't follow rust naming conventions. 4 | 5 | use std::os::raw::c_int; 6 | 7 | use memoffset::*; 8 | 9 | #[cfg(any(feature = "pg15", feature = "pg16", feature = "pg17"))] 10 | use pg_sys::pgstat_assoc_relation; 11 | 12 | use pgrx::pg_sys::{Datum, ItemId, OffsetNumber, Pointer, TupleTableSlot}; 13 | use pgrx::{pg_sys, PgBox, PgRelation}; 14 | 15 | /// Given a valid Page pointer, return address of the "Special Pointer" (custom info at end of page) 16 | /// 17 | /// # Safety 18 | /// 19 | /// This function cannot determine if the `page` argument is really a non-null pointer to a [`Page`]. 20 | #[inline(always)] 21 | #[allow(non_snake_case)] 22 | pub unsafe fn PageGetSpecialPointer(page: pgrx::pg_sys::Page) -> Pointer { 23 | // PageValidateSpecialPointer(page); 24 | // return (char *) page + ((PageHeader) page)->pd_special; 25 | PageValidateSpecialPointer(page); 26 | let header = page.cast::<pgrx::pg_sys::PageHeaderData>(); 27 | page.cast::<std::os::raw::c_char>() 28 | .add((*header).pd_special as usize) 29 | } 30 | 31 | #[allow(non_snake_case)] 32 | pub unsafe fn PageValidateSpecialPointer(page: pgrx::pg_sys::Page) { 33 | //Assert(page); 34 | //Assert(((PageHeader) page)->pd_special <= BLCKSZ); 35 | //Assert(((PageHeader) page)->pd_special >= SizeOfPageHeaderData); 36 | assert!(!page.is_null()); 37 | let header = page.cast::<pgrx::pg_sys::PageHeaderData>(); 38 | assert!((*header).pd_special <= pgrx::pg_sys::BLCKSZ as u16); 39 | assert!((*header).pd_special >= SizeOfPageHeaderData as u16); 40 | } 41 | 42 | #[allow(non_upper_case_globals)] 43 | const SizeOfPageHeaderData: usize = offset_of!(pgrx::pg_sys::PageHeaderData, pd_linp); 44 | pub const PROGRESS_CREATE_IDX_SUBPHASE: c_int = 10; 45 | 46 | #[allow(non_snake_case)] 47 | pub unsafe fn PageGetContents(page: pgrx::pg_sys::Page) -> *mut std::os::raw::c_char { 48 | //return (char *) page + MAXALIGN(SizeOfPageHeaderData); 49 | page.cast::<std::os::raw::c_char>() 50 | .add(pgrx::pg_sys::MAXALIGN(SizeOfPageHeaderData)) 51 | } 52 | 53 | #[allow(non_snake_case)] 54 | pub unsafe fn PageGetItem(page: pgrx::pg_sys::Page, item_id: ItemId) -> *mut std::os::raw::c_char { 55 | //Assert(page); 56 | //Assert(ItemIdHasStorage(itemId)); 57 | 58 | //return (Item) (((char *) page) + ItemIdGetOffset(itemId)); 59 | assert!(!page.is_null()); 60 | assert!((*item_id).lp_len() != 0); 61 | 62 | page.cast::<std::os::raw::c_char>() 63 | .add((*item_id).lp_off() as _) 64 | } 65 | 66 | #[allow(non_snake_case)] 67 | pub unsafe fn PageGetItemId(page: pgrx::pg_sys::Page, offset: OffsetNumber) -> ItemId { 68 | //return &((PageHeader) page)->pd_linp[offsetNumber - 1]; 69 | let header = page.cast::<pgrx::pg_sys::PageHeaderData>(); 70 | (*header).pd_linp.as_mut_ptr().add((offset - 1) as _) 71 | } 72 | 73 | #[allow(non_snake_case)] 74 | pub unsafe fn PageGetMaxOffsetNumber(page: pgrx::pg_sys::Page) -> usize { 75 | /* 76 | PageHeader pageheader = (PageHeader) page; 77 | 78 | if (pageheader->pd_lower <= SizeOfPageHeaderData) 79 | return 0; 80 | else 81 | return (pageheader->pd_lower - SizeOfPageHeaderData) / sizeof(ItemIdData); 82 | */ 83 | 84 | let header = page.cast::<pgrx::pg_sys::PageHeaderData>(); 85 | 86 | if (*header).pd_lower as usize <= SizeOfPageHeaderData { 87 | 0 88 | } else { 89 | ((*header).pd_lower as usize - SizeOfPageHeaderData) 90 | / std::mem::size_of::<pgrx::pg_sys::ItemIdData>() 91 | } 92 | } 93 | 94 | pub unsafe fn slot_getattr( 95 | slot: &PgBox<TupleTableSlot>, 96 | attnum: pg_sys::AttrNumber, 97 | ) -> Option<Datum> { 98 | /* 99 | static inline Datum 100 | slot_getattr(TupleTableSlot *slot, int attnum, 101 | bool *isnull) 102 | { 103 | Assert(attnum > 0); 104 | 105 | if (attnum > slot->tts_nvalid) 106 | slot_getsomeattrs(slot, attnum); 107 | 108 | *isnull = slot->tts_isnull[attnum - 1]; 109 | 110 | return slot->tts_values[attnum - 1]; 111 | } 112 | */ 113 | assert!(attnum > 0); 114 | 115 | if attnum > slot.tts_nvalid { 116 | pg_sys::slot_getsomeattrs_int(slot.as_ptr(), attnum as _); 117 | } 118 | 119 | let index = (attnum - 1) as usize; 120 | 121 | if *slot.tts_isnull.add(index) { 122 | return None; 123 | } 124 | Some(*slot.tts_values.add(index)) 125 | } 126 | 127 | #[allow(unused_variables)] 128 | pub unsafe fn pgstat_count_index_scan(index_relation: pg_sys::Relation, indexrel: PgRelation) { 129 | if !indexrel.pgstat_info.is_null() { 130 | let tmp = indexrel.pgstat_info; 131 | #[cfg(any(feature = "pg13", feature = "pg14", feature = "pg15"))] 132 | { 133 | (*tmp).t_counts.t_numscans += 1; 134 | } 135 | #[cfg(any(feature = "pg16", feature = "pg17"))] 136 | { 137 | (*tmp).counts.numscans += 1; 138 | } 139 | } 140 | 141 | #[cfg(any(feature = "pg15", feature = "pg16", feature = "pg17"))] 142 | if indexrel.pgstat_info.is_null() && indexrel.pgstat_enabled { 143 | pgstat_assoc_relation(index_relation); 144 | assert!(!indexrel.pgstat_info.is_null()); 145 | let tmp = indexrel.pgstat_info; 146 | #[cfg(feature = "pg15")] 147 | { 148 | (*tmp).t_counts.t_numscans += 1; 149 | } 150 | #[cfg(any(feature = "pg16", feature = "pg17"))] 151 | { 152 | (*tmp).counts.numscans += 1; 153 | } 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /pgvectorscale/src/util/table_slot.rs: -------------------------------------------------------------------------------- 1 | use pgrx::pg_sys::{Datum, SnapshotData, TupleTableSlot}; 2 | use pgrx::{pg_sys, PgBox, PgRelation}; 3 | 4 | use crate::access_method::stats::StatsHeapNodeRead; 5 | use crate::util::ports::slot_getattr; 6 | use crate::util::HeapPointer; 7 | 8 | pub struct TableSlot { 9 | slot: PgBox<TupleTableSlot>, 10 | } 11 | 12 | impl TableSlot { 13 | pub unsafe fn from_index_heap_pointer<S: StatsHeapNodeRead>( 14 | heap_rel: &PgRelation, 15 | heap_pointer: HeapPointer, 16 | snapshot: *mut SnapshotData, 17 | stats: &mut S, 18 | ) -> Option<Self> { 19 | let slot = PgBox::from_pg(pg_sys::table_slot_create( 20 | heap_rel.as_ptr(), 21 | std::ptr::null_mut(), 22 | )); 23 | 24 | let table_am = heap_rel.rd_tableam; 25 | let mut ctid: pg_sys::ItemPointerData = pg_sys::ItemPointerData { 26 | ..Default::default() 27 | }; 28 | heap_pointer.to_item_pointer_data(&mut ctid); 29 | 30 | let scan = (*table_am).index_fetch_begin.unwrap()(heap_rel.as_ptr()); 31 | let mut call_again = false; 32 | /* all_dead can be ignored, only used in optimizations we don't implement */ 33 | let mut all_dead = false; 34 | let valid = (*table_am).index_fetch_tuple.unwrap()( 35 | scan, 36 | &mut ctid, 37 | snapshot, 38 | slot.as_ptr(), 39 | &mut call_again, 40 | &mut all_dead, 41 | ); 42 | (*table_am).index_fetch_end.unwrap()(scan); 43 | 44 | assert!(!call_again, "MVCC snapshots should not require call_again"); 45 | stats.record_heap_read(); 46 | 47 | if !valid { 48 | /* no valid tuples found in HOT-chain */ 49 | return None; 50 | } 51 | 52 | Some(Self { slot }) 53 | } 54 | 55 | pub unsafe fn get_attribute(&self, attribute_number: pg_sys::AttrNumber) -> Option<Datum> { 56 | slot_getattr(&self.slot, attribute_number) 57 | } 58 | } 59 | 60 | impl Drop for TableSlot { 61 | fn drop(&mut self) { 62 | unsafe { pg_sys::ExecDropSingleTupleTableSlot(self.slot.as_ptr()) }; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /pgvectorscale/src/util/tape.rs: -------------------------------------------------------------------------------- 1 | //! Tape provides a simple infinite-tape-writing abstraction over postgres pages. 2 | 3 | use super::page::{PageType, ReadablePage, WritablePage}; 4 | use pgrx::{ 5 | pg_sys::{BlockNumber, ForkNumber, RelationGetNumberOfBlocksInFork, BLCKSZ}, 6 | PgRelation, 7 | }; 8 | 9 | pub struct Tape<'a> { 10 | page_type: PageType, 11 | index: &'a PgRelation, 12 | current: BlockNumber, 13 | } 14 | 15 | impl<'a> Tape<'a> { 16 | /// Create a `Tape` that starts writing on a new page. 17 | pub unsafe fn new(index: &'a PgRelation, page_type: PageType) -> Self { 18 | assert!(!page_type.is_chained()); 19 | let page = WritablePage::new(index, page_type); 20 | let block_number = page.get_block_number(); 21 | page.commit(); 22 | Self { 23 | page_type, 24 | index, 25 | current: block_number, 26 | } 27 | } 28 | 29 | /// Create a Tape that resumes writing on the newest page of the given type, if possible. 30 | pub unsafe fn resume(index: &'a PgRelation, page_type: PageType) -> Self { 31 | assert!(!page_type.is_chained()); 32 | let nblocks = RelationGetNumberOfBlocksInFork(index.as_ptr(), ForkNumber::MAIN_FORKNUM); 33 | let mut current_block = None; 34 | for block in (0..nblocks).rev() { 35 | if ReadablePage::read(index, block).get_type() == page_type { 36 | current_block = Some(block); 37 | break; 38 | } 39 | } 40 | match current_block { 41 | Some(current) => Tape { 42 | index, 43 | page_type, 44 | current, 45 | }, 46 | None => Tape::new(index, page_type), 47 | } 48 | } 49 | 50 | pub unsafe fn write(&mut self, data: &[u8]) -> super::ItemPointer { 51 | let size = data.len(); 52 | assert!(size < BLCKSZ as usize); 53 | assert!(!self.page_type.is_chained()); 54 | 55 | let mut current_page = WritablePage::modify(self.index, self.current); 56 | 57 | // Don't split data over pages. (See chain.rs for that.) 58 | if current_page.get_aligned_free_space() < size { 59 | current_page = WritablePage::new(self.index, self.page_type); 60 | self.current = current_page.get_block_number(); 61 | if current_page.get_aligned_free_space() < size { 62 | panic!("Not enough free space on new page"); 63 | } 64 | } 65 | let offset_number = current_page.add_item_unchecked(data); 66 | 67 | let item_pointer = super::ItemPointer::with_page(¤t_page, offset_number); 68 | current_page.commit(); 69 | 70 | item_pointer 71 | } 72 | 73 | pub fn close(self) {} 74 | } 75 | 76 | #[cfg(any(test, feature = "pg_test"))] 77 | #[pgrx::pg_schema] 78 | mod tests { 79 | use pgrx::{pg_sys, pg_test, Spi}; 80 | 81 | use super::*; 82 | 83 | fn make_test_relation() -> PgRelation { 84 | Spi::run( 85 | "CREATE TABLE test(encoding vector(3)); 86 | CREATE INDEX idxtest 87 | ON test 88 | USING diskann(encoding) 89 | WITH (num_neighbors=30);", 90 | ) 91 | .unwrap(); 92 | 93 | let index_oid = Spi::get_one::<pg_sys::Oid>("SELECT 'idxtest'::regclass::oid") 94 | .unwrap() 95 | .expect("oid was null"); 96 | unsafe { PgRelation::from_pg(pg_sys::RelationIdGetRelation(index_oid)) } 97 | } 98 | 99 | #[pg_test] 100 | fn tape_resume() { 101 | let indexrel = make_test_relation(); 102 | unsafe { 103 | let node_page = { 104 | let mut tape = Tape::new(&indexrel, PageType::Node); 105 | let node_page = tape.current; 106 | let ip = tape.write(&[1, 2, 3]); 107 | assert_eq!( 108 | ip.block_number, node_page, 109 | "Tape block number should match IP" 110 | ); 111 | assert_eq!(ip.offset, 1, "IP offset should be correct"); 112 | let ip = tape.write(&[4, 5, 6]); 113 | assert_eq!( 114 | ip.block_number, node_page, 115 | "Tape block number should match IP" 116 | ); 117 | assert_eq!( 118 | tape.current, node_page, 119 | "Data should be written to page with enough room" 120 | ); 121 | node_page 122 | }; 123 | 124 | { 125 | let mut tape = Tape::resume(&indexrel, PageType::PqQuantizerVector); 126 | let ip = tape.write(&[99]); 127 | assert_eq!( 128 | ip.block_number, tape.current, 129 | "Tape block number should match IP" 130 | ); 131 | assert_eq!( 132 | tape.current, 133 | node_page + 1, 134 | "An unseen page type must create a new page" 135 | ); 136 | } 137 | 138 | { 139 | let mut tape = Tape::resume(&indexrel, PageType::Node); 140 | let ip = tape.write(&[7, 8, 9]); 141 | assert_eq!( 142 | ip.block_number, tape.current, 143 | "Tape block number should match IP" 144 | ); 145 | tape.write(&[10, 11, 12]); 146 | assert_eq!( 147 | ip.block_number, tape.current, 148 | "Tape block number should match IP" 149 | ); 150 | assert_eq!( 151 | tape.current, node_page, 152 | "Data should be written to existing page when there is room" 153 | ); 154 | 155 | let page = WritablePage::modify(tape.index, tape.current); 156 | assert_eq!(page.get_aligned_free_space(), 8104); 157 | } 158 | 159 | { 160 | let mut tape = Tape::resume(&indexrel, PageType::Node); 161 | let ip = tape.write(&[42; 8109]); 162 | assert_eq!( 163 | ip.block_number, tape.current, 164 | "Tape block number should match IP" 165 | ); 166 | assert_ne!( 167 | tape.current, node_page, 168 | "Writing more than available forces a new page" 169 | ); 170 | } 171 | } 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /pgvectorscale/vectorscale.control: -------------------------------------------------------------------------------- 1 | comment = 'diskann access method for vector search' 2 | default_version = '@CARGO_VERSION@' 3 | #module_pathname = '$libdir/pgvectorscale' 4 | relocatable = false 5 | superuser = true 6 | requires = 'vector' 7 | -------------------------------------------------------------------------------- /scripts/package-deb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DEBHELPER_COMPAT=11 4 | 5 | set -eux 6 | 7 | OS_NAME="${3}" 8 | BASEDIR="${2}"/pgvectorscale 9 | DEBDIR="${PWD}"/pkgdump 10 | PGVECTORSCALE_VERSION="${1}" 11 | PG_VERSIONS="${4}" 12 | 13 | echo "$BASEDIR" 14 | 15 | if [ ! -d "$DEBDIR" ]; then 16 | mkdir -p "${DEBDIR}" 17 | fi 18 | 19 | DEB_VERSION=${PGVECTORSCALE_VERSION}-${OS_NAME} 20 | 21 | # Show what we got to aid debugging. 22 | git log -1 23 | rm -rf "${BASEDIR}"/debian && mkdir -p "${BASEDIR}"/debian 24 | ln -s /usr/bin/dh "${BASEDIR}"/debian/rules 25 | 26 | date=$(TZ=Etc/UTC date -R) 27 | maintainer='Timescale <hello@timescale.com>' 28 | 29 | cd "${BASEDIR}" 30 | 31 | # deb-changelog(5) 32 | cat >"${BASEDIR}"/debian/changelog <<EOF 33 | pgvectorscale (1:$DEB_VERSION) unused; urgency=medium 34 | * See https://github.com/timescale/pgvectorscale/releases/tag/$PGVECTORSCALE_VERSION 35 | -- $maintainer $date 36 | EOF 37 | # deb-src-control(5) 38 | cat >"${BASEDIR}"/debian/control <<EOF 39 | Source: pgvectorscale 40 | Maintainer: $maintainer 41 | Homepage: https://github.com/timescale/pgvectorscale 42 | Rules-Requires-Root: no 43 | Section: vector 44 | Priority: extra 45 | Build-Depends: debhelper-compat (= $DEBHELPER_COMPAT) 46 | EOF 47 | 48 | libdir=$(pg_config --libdir) 49 | sharedir=$(pg_config --sharedir) 50 | 51 | base_PATH=$PATH 52 | 53 | for pg in $PG_VERSIONS; do 54 | PATH=/usr/lib/postgresql/$pg/bin:$base_PATH 55 | # cargo pgrx package 56 | cat >>"${BASEDIR}"/debian/control <<EOF 57 | 58 | Package: pgvectorscale-postgresql-$pg 59 | Architecture: any 60 | Depends: postgresql-$pg 61 | Description: pgvectorscale for speeding up ANN search 62 | EOF 63 | 64 | echo "../target/release/vectorscale-pg$pg/$libdir/* usr/lib/postgresql/$pg/lib/" >"${BASEDIR}"/debian/pgvectorscale-postgresql-"$pg".install 65 | echo "../target/release/vectorscale-pg$pg/$sharedir/* usr/share/postgresql/$pg/" >>"${BASEDIR}"/debian/pgvectorscale-postgresql-"$pg".install 66 | done 67 | 68 | dpkg-buildpackage --build=binary --no-sign --post-clean 69 | 70 | cd .. 71 | 72 | # packagecloud.io doesn't support `.ddeb` files? Like `.udeb`, they're just 73 | # deb packages by another name, so: 74 | for i in pgvectorscale*.ddeb; do 75 | # But it's only on Ubuntu that dpkg-buildpackage creates dbgsym packages 76 | # with the suffix `.ddeb`. On Debian, 'pgvectorscale*.ddeb' 77 | # evaluates to 'pgvectorscale*.ddeb' so there's nothing to do. 78 | [ "$i" = 'pgvectorscale*.ddeb' ] || mv "$i" "${i%.ddeb}".deb 79 | done 80 | 81 | cp pgvectorscale*.deb "$DEBDIR" 82 | --------------------------------------------------------------------------------