├── .devcontainer ├── Dockerfile ├── devcontainer.json └── docker-compose.yaml ├── .env.azure ├── .env.devcontainer ├── .github ├── dependabot-bot.yml ├── dependabot.yaml └── workflows │ ├── bicep-validation.yaml │ └── tests.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── azure.yaml ├── examples ├── asyncpg_items.py ├── movies_ada002.json ├── psycopg_items.py ├── sqlalchemy_async.py ├── sqlalchemy_items.py ├── sqlalchemy_movies.py └── sqlmodel_items.py ├── infra ├── main.bicep ├── main.parameters.json └── pg.bicep ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── write_azure_env.ps1 └── write_azure_env.sh /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG IMAGE=bullseye 2 | FROM mcr.microsoft.com/devcontainers/${IMAGE} 3 | 4 | ENV PYTHONUNBUFFERED 1 5 | 6 | RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ 7 | && apt-get -y install --no-install-recommends postgresql-client \ 8 | && apt-get clean -y && rm -rf /var/lib/apt/lists/* -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: 2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/python-3 3 | { 4 | "name": "azure-postgres-pgvector-python", 5 | "dockerComposeFile": "docker-compose.yaml", 6 | "service": "app", 7 | "workspaceFolder": "/workspace", 8 | "forwardPorts": [5432], 9 | "portsAttributes": { 10 | "5432": {"label": "PostgreSQL port", "onAutoForward": "silent"} 11 | }, 12 | "features": { 13 | "ghcr.io/azure/azure-dev/azd:latest": {} 14 | }, 15 | // Configure tool-specific properties. 16 | "customizations": { 17 | // Configure properties specific to VS Code. 18 | "vscode": { 19 | // Add the IDs of extensions you want installed when the container is created. 20 | "extensions": [ 21 | "ms-python.python", 22 | "ms-python.vscode-pylance", 23 | "charliermarsh.ruff", 24 | "ms-python.black-formatter", 25 | "mtxr.sqltools", 26 | "mtxr.sqltools-driver-pg", 27 | "ms-vscode.vscode-node-azure-pack", 28 | "mechatroner.rainbow-csv" 29 | ], 30 | // Set *default* container specific settings.json values on container create. 31 | "settings": { 32 | "python.defaultInterpreterPath": "/usr/local/bin/python", 33 | "python.testing.unittestEnabled": false, 34 | "python.testing.pytestEnabled": false, 35 | "[python]": { 36 | "editor.formatOnSave": true, 37 | "editor.codeActionsOnSave": { 38 | "source.fixAll": "explicit" 39 | }, 40 | "editor.defaultFormatter": "ms-python.black-formatter" 41 | }, 42 | "sqltools.connections": [ 43 | { 44 | "name": "Local database", 45 | "driver": "PostgreSQL", 46 | "server": "localhost", 47 | "port": 5432, 48 | "database": "postgres", 49 | "username": "admin", 50 | "password": "LocalPasswordOnly" 51 | }, 52 | { 53 | "name": "Azure database", 54 | "driver": "PostgreSQL", 55 | "server": ".postgres.database.azure.com", 56 | "port": 5432, 57 | "database": "db", 58 | "username": "UserRole", 59 | "askForPassword": true, 60 | "pgOptions": { 61 | "ssl": true 62 | } 63 | } 64 | ], 65 | "files.exclude": { 66 | ".ruff_cache": true, 67 | ".pytest_cache": true 68 | } 69 | } 70 | } 71 | }, 72 | // Use 'postCreateCommand' to run commands after the container is created. 73 | "postCreateCommand": "pip install -r requirements.txt", 74 | // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. 75 | "remoteUser": "vscode" 76 | } 77 | -------------------------------------------------------------------------------- /.devcontainer/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | app: 5 | build: 6 | context: .. 7 | dockerfile: .devcontainer/Dockerfile 8 | args: 9 | IMAGE: python:3.11 10 | 11 | volumes: 12 | - ..:/workspace:cached 13 | 14 | # Overrides default command so things don't shut down after the process ends. 15 | command: sleep infinity 16 | 17 | # Runs app on the same network as the database container, allows "forwardPorts" in devcontainer.json function. 18 | network_mode: service:db 19 | 20 | db: 21 | image: pgvector/pgvector:pg16 22 | restart: unless-stopped 23 | volumes: 24 | - postgres-data:/var/lib/postgresql/data 25 | environment: 26 | POSTGRES_DB: postgres 27 | POSTGRES_USER: admin 28 | POSTGRES_PASSWORD: LocalPasswordOnly 29 | 30 | # Add "forwardPorts": ["5432"] to **devcontainer.json** to forward PostgreSQL locally. 31 | # (Adding the "ports" property to this file will not forward from a Codespace.) 32 | 33 | volumes: 34 | postgres-data: -------------------------------------------------------------------------------- /.env.azure: -------------------------------------------------------------------------------- 1 | # Set these values to connect to the Azure database 2 | # Use write_azure_env.sh or write_azure_env.ps1 to set these values 3 | POSTGRES_DATABASE="db" 4 | POSTGRES_HOST="YOUR-SERVER-NAME.postgres.database.azure.com" 5 | POSTGRES_SSL="require" 6 | POSTGRES_USERNAME="UserRole" 7 | -------------------------------------------------------------------------------- /.env.devcontainer: -------------------------------------------------------------------------------- 1 | # Use these values to connect to the local database from within the devcontainer 2 | POSTGRES_HOST=localhost 3 | POSTGRES_USERNAME=admin 4 | POSTGRES_PASSWORD=LocalPasswordOnly 5 | POSTGRES_DATABASE=postgres 6 | -------------------------------------------------------------------------------- /.github/dependabot-bot.yml: -------------------------------------------------------------------------------- 1 | safe: 2 | - psycopg2 3 | - python-dotenv 4 | - SQLAlchemy 5 | - faker 6 | - pgvector -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" -------------------------------------------------------------------------------- /.github/workflows/bicep-validation.yaml: -------------------------------------------------------------------------------- 1 | name: Validate AZD template 2 | on: 3 | push: 4 | branches: [ main ] 5 | paths: 6 | - "infra/**" 7 | pull_request: 8 | branches: [ main ] 9 | paths: 10 | - "infra/**" 11 | workflow_dispatch: 12 | 13 | 14 | jobs: 15 | build: 16 | runs-on: ubuntu-latest 17 | permissions: 18 | security-events: write 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v4 22 | 23 | - name: Build Bicep for linting 24 | uses: azure/CLI@v1 25 | with: 26 | inlineScript: az config set bicep.use_binary_from_path=false && az bicep build -f infra/main.bicep --stdout 27 | 28 | - name: Run Microsoft Security DevOps Analysis 29 | uses: microsoft/security-devops-action@v1 30 | id: msdo 31 | continue-on-error: true 32 | with: 33 | tools: templateanalyzer 34 | 35 | - name: Upload alerts to Security tab 36 | uses: github/codeql-action/upload-sarif@v3 37 | if: github.repository == 'Azure-Samples/langfuse-on-azure' 38 | with: 39 | sarif_file: ${{ steps.msdo.outputs.sarifFile }} 40 | -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: Python check 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test_package: 11 | name: Test ${{ matrix.os }} Python ${{ matrix.python_version }} 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | os: ["ubuntu-20.04"] 17 | python_version: ["3.9", "3.10", "3.11", "3.12"] 18 | services: 19 | postgres: 20 | image: pgvector/pgvector:pg16 21 | env: 22 | POSTGRES_PASSWORD: postgres 23 | ports: 24 | - 5432:5432 25 | # needed because the postgres container does not provide a healthcheck 26 | options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 27 | steps: 28 | - uses: actions/checkout@v3 29 | - name: Setup python 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: ${{ matrix.python_version }} 33 | architecture: x64 34 | - name: Install dependencies 35 | run: | 36 | python3 -m pip install --upgrade pip 37 | python3 -m pip install -r requirements.txt 38 | - name: Run Pytest tests 39 | run: | 40 | python3 examples/asyncpg_items.py 41 | python3 examples/psycopg_items.py 42 | python3 examples/sqlalchemy_async.py 43 | python3 examples/sqlalchemy_items.py 44 | python3 examples/sqlalchemy_movies.py 45 | python3 examples/sqlmodel_items.py 46 | env: 47 | POSTGRES_HOST: localhost 48 | POSTGRES_USERNAME: postgres 49 | POSTGRES_PASSWORD: postgres 50 | POSTGRES_DATABASE: postgres 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .azure 3 | .env 4 | .venv 5 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/astral-sh/ruff-pre-commit 9 | rev: v0.1.0 10 | hooks: 11 | - id: ruff 12 | - repo: https://github.com/psf/black 13 | rev: 23.9.1 14 | hooks: 15 | - id: black 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Azure Samples 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: PostgreSQL + pgvector 3 | description: Deploy a PostgreSQL Flexible Server to Azure with the pgvector extension enabled. 4 | languages: 5 | - bicep 6 | - azdeveloper 7 | - python 8 | products: 9 | - azure-database-postgresql 10 | - azure 11 | page_type: sample 12 | urlFragment: azure-postgres-pgvector-python 13 | --- 14 | 15 | # PostgreSQL + pgvector on Azure 16 | 17 | This repository makes it easy to deploy a PostgreSQL Flexible Server to Azure with the [pgvector](https://github.com/pgvector/pgvector) extension installed. The pgvector extension provides a vector similarity search engine for PostgreSQL, allowing you to perform similarity searches on your data using vector embeddings. 18 | 19 | The repository contains infrastructure-as-code (Bicep) to deploy an Azure PostgreSQL Flexible Server with pgvector extension enabled, password authentication disabled, and Entra (Active Directory) authentication enabled. The repository also contains example Python scripts to demonstrate how to use pgvector. 20 | 21 | Table of contents: 22 | 23 | * [Opening this project](#opening-this-project) 24 | * [GitHub Codespaces](#github-codespaces) 25 | * [VS Code Dev Containers](#vs-code-dev-containers) 26 | * [Local environment](#local-environment) 27 | * [Deploying to Azure](#deploying-to-azure) 28 | * [Example scripts](#example-scripts) 29 | 30 | ## Opening this project 31 | 32 | You have a few options for setting up this project. 33 | The easiest way to get started is GitHub Codespaces, since it will setup all the tools for you, but you can also [set it up locally](#local-environment) if desired. 34 | 35 | ### GitHub Codespaces 36 | 37 | You can run this repo virtually by using GitHub Codespaces, which will open a web-based VS Code in your browser: 38 | 39 | [![Open in GitHub Codespaces](https://img.shields.io/static/v1?style=for-the-badge&label=GitHub+Codespaces&message=Open&color=brightgreen&logo=github)](https://codespaces.new/Azure-Samples/azure-postgres-pgvector-python) 40 | 41 | Once the codespace opens (this may take several minutes), open a terminal window. 42 | 43 | ### VS Code Dev Containers 44 | 45 | A related option is VS Code Dev Containers, which will open the project in your local VS Code using the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers): 46 | 47 | 1. Start Docker Desktop (install it if not already installed). 48 | 1. Open the project: 49 | [![Open in Dev Containers](https://img.shields.io/static/v1?style=for-the-badge&label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/azure-samples/azure-postgres-pgvector-python). 50 | 1. In the VS Code window that opens, once the project files show up (this may take several minutes), open a terminal window. 51 | 52 | ### Local environment 53 | 54 | 1. Install the required tools: 55 | 56 | * [Azure Developer CLI](https://aka.ms/azure-dev/install) 57 | * [Python 3.9, 3.10, or 3.11](https://www.python.org/downloads/) (Only necessary if you want to run the Python scripts) 58 | 59 | 2. Create a new folder and switch to it in the terminal. 60 | 3. Run this command to download the project code: 61 | 62 | ```shell 63 | azd init -t azure-postgres-pgvector-python 64 | ``` 65 | 66 | Note that this command will initialize a git repository, so you do not need to clone this repository. 67 | 68 | 4. Create a Python virtual environment and install the required packages: 69 | 70 | ```shell 71 | python -m pip install -r requirements.txt 72 | ``` 73 | 74 | 5. Open a terminal window inside the project folder. 75 | 76 | ## Deploying to Azure 77 | 78 | Follow these steps to deploy a PostgreSQL Flexible Server to Azure with the pgvector extension enabled: 79 | 80 | 1. Login to your Azure account: 81 | 82 | ```shell 83 | azd auth login 84 | ``` 85 | 86 | 1. Create a new azd environment: 87 | 88 | ```shell 89 | azd env new 90 | ``` 91 | 92 | Enter a name that will be used for the resource group. 93 | This will create a new folder in the `.azure` folder, and set it as the active environment for any calls to `azd` going forward. 94 | 95 | 1. Run this command to provision all the resources: 96 | 97 | ```shell 98 | azd provision 99 | ``` 100 | 101 | This will create a new resource group, and create the PostgreSQL Flexible server inside that group. 102 | 103 | 1. The example Python scripts look for configuration variables from a `.env` file located in the directory from where you invoke the scripts. You can easily create a file with the correct variables for your PostgreSQL server by running this script that copies the necessary `azd` environment variables into your local `.env`: 104 | 105 | ```shell 106 | ./write_azure_env.sh 107 | ``` 108 | 109 | 1. Now you may run the Python scripts in order to interact with the PostgreSQL server. 110 | 111 | ```shell 112 | python examples/sqlalchemy_async.py 113 | ``` 114 | 115 | Note that each of the script starts off with a `CREATE EXTENSION vector;` command, which will install the pgvector extension into the database. Once you run that once in a given database, you do not need to run it again for that particular database. 116 | 117 | ## Example scripts 118 | 119 | The `examples` folder contains example Python scripts that demonstrate how to use pgvector, based on the [pgvector sample code](https://github.com/pgvector/pgvector-python). 120 | 121 | | Script | Dependencies | Description | 122 | |--------|--------------|-------------| 123 | | [`sqlalchemy_async.py`](./examples/sqlalchemy_async.py) | `asyncpg`, `sqlalchemy`, `pgvector` | Uses pgvector with SQLAlchemy and asyncpg for a simple 3-dimension vector. | 124 | | [`sqlalchemy_items.py`](./examples/sqlalchemy_items.py) | `psycopg2`, `sqlalchemy`, `pgvector` | Uses pgvector with SQLAlchemy and psycopg2 for a simple 3-dimension vector. | 125 | | [`sqlalchemy_movies.py`](./examples/sqlalchemy_movies.py) | `psycopg2`, `sqlalchemy`, `pgvector` | Uses pgvector with SQLAlchemy and psycopg2 for 1536-dimension vectors calculated previously with OpenAI's text-embedding-ada-002 model. | 126 | | [`sqlmodel_items.py`](./examples/sqlmodel_items.py) | `sqlmodel`, `pgvector` | Uses pgvector with SQLModel for a simple 3-dimension vector. | 127 | | [`asyncpg_items.py`](./examples/asyncpg_items.py) | `asyncpg`, `pgvector` | Uses pgvector with asyncpg for a simple 3-dimension vector. | 128 | | [`psycopg_items.py`](./examples/psycopg_items.py) | `psycopg2`, `pgvector` | Uses pgvector with psycopg2 for a simple 3-dimension vector. | 129 | 130 | Let us know if there are any other examples you would like to see! 131 | -------------------------------------------------------------------------------- /azure.yaml: -------------------------------------------------------------------------------- 1 | name: postgres-pgvector 2 | metadata: 3 | template: postgres-pgvector@0.0.2-beta 4 | -------------------------------------------------------------------------------- /examples/asyncpg_items.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | import asyncpg 5 | from azure.identity import DefaultAzureCredential 6 | from dotenv import load_dotenv 7 | from pgvector.asyncpg import register_vector 8 | 9 | 10 | async def async_main(): 11 | # Establish a connection to an existing database 12 | load_dotenv(".env", override=True) 13 | POSTGRES_HOST = os.environ["POSTGRES_HOST"] 14 | POSTGRES_USERNAME = os.environ["POSTGRES_USERNAME"] 15 | POSTGRES_DATABASE = os.environ["POSTGRES_DATABASE"] 16 | 17 | if POSTGRES_HOST.endswith(".database.azure.com"): 18 | print("Authenticating to Azure Database for PostgreSQL using Azure Identity...") 19 | azure_credential = DefaultAzureCredential() 20 | token = azure_credential.get_token("https://ossrdbms-aad.database.windows.net/.default") 21 | POSTGRES_PASSWORD = token.token 22 | else: 23 | POSTGRES_PASSWORD = os.environ["POSTGRES_PASSWORD"] 24 | 25 | DATABASE_URI = f"postgresql://{POSTGRES_USERNAME}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}/{POSTGRES_DATABASE}" 26 | # Specify SSL mode if needed 27 | if POSTGRES_SSL := os.environ.get("POSTGRES_SSL"): 28 | DATABASE_URI += f"?sslmode={POSTGRES_SSL}" 29 | 30 | conn = await asyncpg.connect(DATABASE_URI) 31 | 32 | await conn.execute("CREATE EXTENSION IF NOT EXISTS vector") 33 | await register_vector(conn) 34 | 35 | await conn.execute("DROP TABLE IF EXISTS items") 36 | await conn.execute("CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))") 37 | await conn.execute("CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)") 38 | 39 | await conn.execute("INSERT INTO items (embedding) VALUES ($1)", [1, 2, 3]) 40 | await conn.execute("INSERT INTO items (embedding) VALUES ($1)", [-1, 1, 3]) 41 | await conn.execute("INSERT INTO items (embedding) VALUES ($1)", [0, -1, -2]) 42 | 43 | # Find 2 closest vectors to [3, 1, 2] 44 | row = await conn.fetch("SELECT * FROM items ORDER BY embedding <-> $1 LIMIT 2", [3, 1, 2]) 45 | for row in row: 46 | print(row["embedding"]) 47 | 48 | # Calculate distance between [3, 1, 2] and the first vector 49 | row = await conn.fetch( 50 | "SELECT embedding <-> $1 AS distance FROM items ORDER BY embedding <-> $1 LIMIT 1", [3, 1, 2] 51 | ) 52 | print(row[0]["distance"]) 53 | 54 | # Find vectors within distance 5 from [3, 1, 2] 55 | row = await conn.fetch("SELECT * FROM items WHERE embedding <-> $1 < 5", [3, 1, 2]) 56 | for row in row: 57 | print(row["embedding"]) 58 | 59 | # Calculate average of all vectors 60 | row = await conn.fetch("SELECT avg(embedding) FROM items") 61 | print(row[0]["avg"]) 62 | 63 | # Close the connection. 64 | await conn.close() 65 | 66 | 67 | asyncio.run(async_main()) 68 | -------------------------------------------------------------------------------- /examples/psycopg_items.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import psycopg2 5 | from azure.identity import DefaultAzureCredential 6 | from dotenv import load_dotenv 7 | from pgvector.psycopg2 import register_vector 8 | 9 | # Connect to the database based on environment variables 10 | load_dotenv(".env", override=True) 11 | POSTGRES_HOST = os.environ["POSTGRES_HOST"] 12 | POSTGRES_USERNAME = os.environ["POSTGRES_USERNAME"] 13 | POSTGRES_DATABASE = os.environ["POSTGRES_DATABASE"] 14 | 15 | if POSTGRES_HOST.endswith(".database.azure.com"): 16 | print("Authenticating to Azure Database for PostgreSQL using Azure Identity...") 17 | azure_credential = DefaultAzureCredential() 18 | token = azure_credential.get_token("https://ossrdbms-aad.database.windows.net/.default") 19 | POSTGRES_PASSWORD = token.token 20 | else: 21 | POSTGRES_PASSWORD = os.environ["POSTGRES_PASSWORD"] 22 | 23 | extra_params = {} 24 | if POSTGRES_SSL := os.environ.get("POSTGRES_SSL"): 25 | extra_params["sslmode"] = POSTGRES_SSL 26 | 27 | conn = psycopg2.connect( 28 | database=POSTGRES_DATABASE, 29 | user=POSTGRES_USERNAME, 30 | password=POSTGRES_PASSWORD, 31 | host=POSTGRES_HOST, 32 | **extra_params, 33 | ) 34 | 35 | conn.autocommit = True 36 | cur = conn.cursor() 37 | # Create pgvector extension 38 | cur.execute("CREATE EXTENSION IF NOT EXISTS vector") 39 | # Drop table defined in this model from the database, if already exists 40 | cur.execute("DROP TABLE IF EXISTS items") 41 | # Create table defined in this model in the database 42 | cur.execute("CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))") 43 | register_vector(conn) 44 | 45 | # Define HNSW index to support vector similarity search through the vector_l2_ops access method (Euclidean distance). The SQL operator for Euclidean distance is written as <->. 46 | cur.execute("CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)") 47 | 48 | # Insert three vectors as three separate rows in the items table 49 | embeddings = [ 50 | np.array([1, 2, 3]), 51 | np.array([-1, 1, 3]), 52 | np.array([0, -1, -2]), 53 | ] 54 | for embedding in embeddings: 55 | cur.execute("INSERT INTO items (embedding) VALUES (%s)", (embedding,)) 56 | 57 | 58 | # Find all vectors in table items 59 | cur.execute("SELECT * FROM items") 60 | all_items = cur.fetchall() 61 | print("All vectors in table items:") 62 | for item in all_items: 63 | print(f"\t{item[1]}") 64 | 65 | # Find 2 closest vectors to [3, 1, 2] 66 | embedding_predicate = np.array([3, 1, 2]) 67 | cur.execute("SELECT * FROM items ORDER BY embedding <-> %s LIMIT 2", (embedding_predicate,)) 68 | closest_items = cur.fetchall() 69 | print("Two closest vectors to [3, 1, 2] in table items:") 70 | for item in closest_items: 71 | print(f"\t{item[1]}") 72 | 73 | # Calculate distance between [3, 1, 2] and the first vector 74 | cur.execute( 75 | "SELECT embedding <-> %s AS distance FROM items ORDER BY embedding <-> %s LIMIT 1", 76 | (embedding_predicate, embedding_predicate), 77 | ) 78 | distance = cur.fetchone() 79 | print(f"Distance between [3, 1, 2] vector and the one closest to it: {distance[0]}") 80 | 81 | # Find vectors within distance 5 from [3, 1, 2] 82 | cur.execute("SELECT * FROM items WHERE embedding <-> %s < 5", (embedding_predicate,)) 83 | close_enough_items = cur.fetchall() 84 | print("Vectors within a distance of 5 from [3, 1, 2]:") 85 | for item in close_enough_items: 86 | print(f"\t{item[1]}") 87 | 88 | # Calculate average of all vectors 89 | cur.execute("SELECT avg(embedding) FROM items") 90 | avg_embedding = cur.fetchone() 91 | print(f"Average of all vectors: {avg_embedding}") 92 | 93 | cur.close() 94 | -------------------------------------------------------------------------------- /examples/sqlalchemy_async.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | import os 5 | 6 | from azure.identity import DefaultAzureCredential 7 | from dotenv import load_dotenv 8 | from pgvector.sqlalchemy import Vector 9 | from sqlalchemy import Index, func, select, text 10 | from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine 11 | from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column 12 | 13 | 14 | # Define the models 15 | class Base(DeclarativeBase): 16 | pass 17 | 18 | 19 | class Item(Base): 20 | __tablename__ = "items" 21 | id: Mapped[int] = mapped_column(primary_key=True) 22 | embedding = mapped_column(Vector(3)) 23 | 24 | 25 | # Define HNSW index to support vector similarity search through the vector_l2_ops access method (Euclidean distance). The SQL operator for Euclidean distance is written as <->. 26 | index = Index( 27 | "hnsw_index_for_euclidean_distance_similarity_search", 28 | Item.embedding, 29 | postgresql_using="hnsw", 30 | postgresql_with={"m": 16, "ef_construction": 64}, 31 | postgresql_ops={"embedding": "vector_l2_ops"}, 32 | ) 33 | 34 | 35 | async def insert_objects(async_session: async_sessionmaker[AsyncSession]) -> None: 36 | async with async_session() as session: 37 | async with session.begin(): 38 | # Insert three vectors as three separate rows in the items table 39 | session.add_all( 40 | [ 41 | Item(embedding=[1, 2, 3]), 42 | Item(embedding=[-1, 1, 3]), 43 | Item(embedding=[0, -1, -2]), 44 | ] 45 | ) 46 | 47 | 48 | async def select_and_update_objects( 49 | async_session: async_sessionmaker[AsyncSession], 50 | ) -> None: 51 | async with async_session() as session: 52 | # Find 2 closest vectors to [3, 1, 2] 53 | closest_items = await session.scalars(select(Item).order_by(Item.embedding.l2_distance([3, 1, 2])).limit(2)) 54 | print("Two closest vectors to [3, 1, 2] in table items:") 55 | for item in closest_items: 56 | print(f"\t{item.embedding}") 57 | 58 | # Calculate distance between [3, 1, 2] and the first vector 59 | distance = (await session.scalars(select(Item.embedding.l2_distance([3, 1, 2])))).first() 60 | print(f"Distance between [3, 1, 2] vector and the one closest to it: {distance}") 61 | 62 | # Find vectors within distance 5 from [3, 1, 2] 63 | close_enough_items = await session.scalars(select(Item).filter(Item.embedding.l2_distance([3, 1, 2]) < 5)) 64 | print("Vectors within a distance of 5 from [3, 1, 2]:") 65 | for item in close_enough_items: 66 | print(f"\t{item.embedding}") 67 | 68 | # Calculate average of all vectors 69 | avg_embedding = (await session.scalars(select(func.avg(Item.embedding)))).first() 70 | print(f"Average of all vectors: {avg_embedding}") 71 | 72 | 73 | async def async_main() -> None: 74 | load_dotenv(".env", override=True) 75 | 76 | POSTGRES_HOST = os.environ["POSTGRES_HOST"] 77 | POSTGRES_USERNAME = os.environ["POSTGRES_USERNAME"] 78 | POSTGRES_DATABASE = os.environ["POSTGRES_DATABASE"] 79 | 80 | if POSTGRES_HOST.endswith(".database.azure.com"): 81 | print("Authenticating to Azure Database for PostgreSQL using Azure Identity...") 82 | azure_credential = DefaultAzureCredential() 83 | token = azure_credential.get_token("https://ossrdbms-aad.database.windows.net/.default") 84 | POSTGRES_PASSWORD = token.token 85 | else: 86 | POSTGRES_PASSWORD = os.environ["POSTGRES_PASSWORD"] 87 | 88 | DATABASE_URI = f"postgresql+asyncpg://{POSTGRES_USERNAME}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}/{POSTGRES_DATABASE}" 89 | # Specify SSL mode if needed 90 | if POSTGRES_SSL := os.environ.get("POSTGRES_SSL"): 91 | DATABASE_URI += f"?ssl={POSTGRES_SSL}" 92 | 93 | engine = create_async_engine( 94 | DATABASE_URI, 95 | echo=False, 96 | ) 97 | 98 | # async_sessionmaker: a factory for new AsyncSession objects. 99 | # expire_on_commit - don't expire objects after transaction commit 100 | async_session = async_sessionmaker(engine, expire_on_commit=False) 101 | 102 | async with engine.begin() as conn: 103 | # Create pgvector extension 104 | await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) 105 | # Drop all tables (and indexes) defined in this model from the database, if they already exist 106 | await conn.run_sync(Base.metadata.drop_all) 107 | # Create all tables (and indexes) defined in this model in the database 108 | await conn.run_sync(Base.metadata.create_all) 109 | 110 | await insert_objects(async_session) 111 | await select_and_update_objects(async_session) 112 | 113 | # for AsyncEngine created in function scope, close and 114 | # clean-up pooled connections 115 | await engine.dispose() 116 | 117 | 118 | asyncio.run(async_main()) 119 | -------------------------------------------------------------------------------- /examples/sqlalchemy_items.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from azure.identity import DefaultAzureCredential 4 | from dotenv import load_dotenv 5 | from pgvector.sqlalchemy import Vector 6 | from sqlalchemy import Index, create_engine, func, select, text 7 | from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column 8 | 9 | 10 | # Define the models 11 | class Base(DeclarativeBase): 12 | pass 13 | 14 | 15 | class Item(Base): 16 | __tablename__ = "items" 17 | id: Mapped[int] = mapped_column(primary_key=True) 18 | embedding = mapped_column(Vector(3)) 19 | 20 | 21 | # Define HNSW index to support vector similarity search through the vector_l2_ops access method (Euclidean distance). The SQL operator for Euclidean distance is written as <->. 22 | index = Index( 23 | "hnsw_index_for_euclidean_distance_similarity_search", 24 | Item.embedding, 25 | postgresql_using="hnsw", 26 | postgresql_with={"m": 16, "ef_construction": 64}, 27 | postgresql_ops={"embedding": "vector_l2_ops"}, 28 | ) 29 | 30 | # Connect to the database based on environment variables 31 | load_dotenv(".env", override=True) 32 | POSTGRES_HOST = os.environ["POSTGRES_HOST"] 33 | POSTGRES_USERNAME = os.environ["POSTGRES_USERNAME"] 34 | POSTGRES_DATABASE = os.environ["POSTGRES_DATABASE"] 35 | 36 | if POSTGRES_HOST.endswith(".database.azure.com"): 37 | print("Authenticating to Azure Database for PostgreSQL using Azure Identity...") 38 | azure_credential = DefaultAzureCredential() 39 | token = azure_credential.get_token("https://ossrdbms-aad.database.windows.net/.default") 40 | POSTGRES_PASSWORD = token.token 41 | else: 42 | POSTGRES_PASSWORD = os.environ["POSTGRES_PASSWORD"] 43 | 44 | DATABASE_URI = f"postgresql://{POSTGRES_USERNAME}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}/{POSTGRES_DATABASE}" 45 | # Specify SSL mode if needed 46 | if POSTGRES_SSL := os.environ.get("POSTGRES_SSL"): 47 | DATABASE_URI += f"?sslmode={POSTGRES_SSL}" 48 | 49 | engine = create_engine(DATABASE_URI, echo=False) 50 | 51 | # Create pgvector extension 52 | with engine.begin() as conn: 53 | conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) 54 | 55 | # Drop all tables (and indexes) defined in this model from the database, if they already exist 56 | Base.metadata.drop_all(engine) 57 | # Create all tables (and indexes) defined for this model in the database 58 | Base.metadata.create_all(engine) 59 | 60 | # Insert data and issue queries 61 | with Session(engine) as session: 62 | 63 | # Insert three vectors as three separate rows in the items table 64 | session.add_all( 65 | [ 66 | Item(embedding=[1, 2, 3]), 67 | Item(embedding=[-1, 1, 3]), 68 | Item(embedding=[0, -1, -2]), 69 | ] 70 | ) 71 | 72 | # Find all vectors in table items 73 | all_items = session.scalars(select(Item)) 74 | print("All vectors in table items:") 75 | for item in all_items: 76 | print(f"\t{item.embedding}") 77 | 78 | # Find 2 closest vectors to [3, 1, 2] 79 | closest_items = session.scalars(select(Item).order_by(Item.embedding.l2_distance([3, 1, 2])).limit(2)) 80 | print("Two closest vectors to [3, 1, 2] in table items:") 81 | for item in closest_items: 82 | print(f"\t{item.embedding}") 83 | 84 | # Calculate distance between [3, 1, 2] and the first vector 85 | distance = session.scalars(select(Item.embedding.l2_distance([3, 1, 2]))).first() 86 | print(f"Distance between [3, 1, 2] vector and the one closest to it: {distance}") 87 | 88 | # Find vectors within distance 5 from [3, 1, 2] 89 | close_enough_items = session.scalars(select(Item).filter(Item.embedding.l2_distance([3, 1, 2]) < 5)) 90 | print("Vectors within a distance of 5 from [3, 1, 2]:") 91 | for item in close_enough_items: 92 | print(f"\t{item.embedding}") 93 | 94 | # Calculate average of all vectors 95 | avg_embedding = session.scalars(select(func.avg(Item.embedding))).first() 96 | print(f"Average of all vectors: {avg_embedding}") 97 | -------------------------------------------------------------------------------- /examples/sqlalchemy_movies.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from pathlib import Path 4 | 5 | from azure.identity import DefaultAzureCredential 6 | from dotenv import load_dotenv 7 | from pgvector.sqlalchemy import Vector 8 | from sqlalchemy import Index, create_engine, select, text 9 | from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column 10 | 11 | 12 | # Define the models 13 | class Base(DeclarativeBase): 14 | pass 15 | 16 | 17 | class Movie(Base): 18 | __tablename__ = "movies" 19 | id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) 20 | title: Mapped[str] = mapped_column() 21 | title_vector = mapped_column(Vector(1536)) # ada-002 is 1536-dimensional 22 | 23 | 24 | # Define HNSW index to support vector similarity search through the vector_cosine_ops access method (cosine distance). The SQL operator for cosine distance is written as <=>. 25 | index = Index( 26 | "hnsw_index_for_cosine_distance_similarity_search", 27 | Movie.title_vector, 28 | postgresql_using="hnsw", 29 | postgresql_with={"m": 16, "ef_construction": 64}, 30 | postgresql_ops={"title_vector": "vector_cosine_ops"}, 31 | ) 32 | 33 | # Connect to the database based on environment variables 34 | load_dotenv(".env", override=True) 35 | POSTGRES_HOST = os.environ["POSTGRES_HOST"] 36 | POSTGRES_USERNAME = os.environ["POSTGRES_USERNAME"] 37 | POSTGRES_DATABASE = os.environ["POSTGRES_DATABASE"] 38 | 39 | if POSTGRES_HOST.endswith(".database.azure.com"): 40 | print("Authenticating to Azure Database for PostgreSQL using Azure Identity...") 41 | azure_credential = DefaultAzureCredential() 42 | token = azure_credential.get_token("https://ossrdbms-aad.database.windows.net/.default") 43 | POSTGRES_PASSWORD = token.token 44 | else: 45 | POSTGRES_PASSWORD = os.environ["POSTGRES_PASSWORD"] 46 | 47 | DATABASE_URI = f"postgresql://{POSTGRES_USERNAME}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}/{POSTGRES_DATABASE}" 48 | # Specify SSL mode if needed 49 | if POSTGRES_SSL := os.environ.get("POSTGRES_SSL"): 50 | DATABASE_URI += f"?sslmode={POSTGRES_SSL}" 51 | 52 | engine = create_engine(DATABASE_URI, echo=False) 53 | 54 | # Create pgvector extension 55 | with engine.begin() as conn: 56 | conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) 57 | 58 | # Drop all tables (and indexes) defined in this model from the database, if they already exist 59 | Base.metadata.drop_all(engine) 60 | # Create all tables (and indexes) defined for this model in the database 61 | Base.metadata.create_all(engine) 62 | 63 | # Insert data and issue queries 64 | with Session(engine) as session: 65 | 66 | # Insert the movies from the JSON file 67 | current_directory = Path(__file__).parent 68 | data_path = current_directory / "movies_ada002.json" 69 | with open(data_path) as f: 70 | movies = json.load(f) 71 | for title, title_vector in movies.items(): 72 | movie = Movie(title=title, title_vector=title_vector) 73 | session.add(movie) 74 | session.commit() 75 | 76 | # Query for target movie, the one whose title matches "Winnie the Pooh" 77 | query = select(Movie).where(Movie.title == "Winnie the Pooh") 78 | target_movie = session.execute(query).scalars().first() 79 | if target_movie is None: 80 | print("Movie not found") 81 | exit(1) 82 | 83 | # Find the 5 most similar movies to "Winnie the Pooh" 84 | most_similars = session.scalars( 85 | select(Movie).order_by(Movie.title_vector.cosine_distance(target_movie.title_vector)).limit(5) 86 | ) 87 | print(f"Five most similar movies to '{target_movie.title}':") 88 | for movie in most_similars: 89 | print(f"\t{movie.title}") 90 | -------------------------------------------------------------------------------- /examples/sqlmodel_items.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from azure.identity import DefaultAzureCredential 4 | from dotenv import load_dotenv 5 | from pgvector.sqlalchemy import Vector 6 | from sqlalchemy import Column 7 | from sqlmodel import Field, Session, SQLModel, create_engine, func, select 8 | 9 | 10 | class Item(SQLModel, table=True): 11 | id: int = Field(primary_key=True, sa_column_kwargs={"autoincrement": True}) 12 | embedding: list[float] = Field(sa_column=Column(Vector(3))) 13 | 14 | 15 | # Connect to the database 16 | load_dotenv(".env", override=True) 17 | POSTGRES_HOST = os.environ["POSTGRES_HOST"] 18 | POSTGRES_USERNAME = os.environ["POSTGRES_USERNAME"] 19 | POSTGRES_DATABASE = os.environ["POSTGRES_DATABASE"] 20 | 21 | if POSTGRES_HOST.endswith(".database.azure.com"): 22 | print("Authenticating to Azure Database for PostgreSQL using Azure Identity...") 23 | azure_credential = DefaultAzureCredential() 24 | token = azure_credential.get_token("https://ossrdbms-aad.database.windows.net/.default") 25 | POSTGRES_PASSWORD = token.token 26 | else: 27 | POSTGRES_PASSWORD = os.environ["POSTGRES_PASSWORD"] 28 | 29 | DATABASE_URI = f"postgresql://{POSTGRES_USERNAME}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}/{POSTGRES_DATABASE}" 30 | # Specify SSL mode if needed 31 | if POSTGRES_SSL := os.environ.get("POSTGRES_SSL"): 32 | DATABASE_URI += f"?sslmode={POSTGRES_SSL}" 33 | 34 | engine = create_engine(DATABASE_URI, echo=False) 35 | 36 | SQLModel.metadata.drop_all(engine) 37 | SQLModel.metadata.create_all(engine) 38 | 39 | with Session(engine) as session: 40 | session.add_all( 41 | [ 42 | Item(embedding=[1, 2, 3]), 43 | Item(embedding=[-1, 1, 3]), 44 | Item(embedding=[0, -1, -2]), 45 | ] 46 | ) 47 | 48 | # Find 2 closest vectors to [3, 1, 2] 49 | closest_items = session.exec(select(Item).order_by(Item.embedding.l2_distance([3, 1, 2])).limit(2)) 50 | for item in closest_items: 51 | print(item.embedding) 52 | 53 | # Calculate distance between [3, 1, 2] and the first vector 54 | distance = session.exec(select(Item.embedding.l2_distance([3, 1, 2]))).first() 55 | print(distance) 56 | 57 | # Find vectors within distance 5 from [3, 1, 2] 58 | close_enough_items = session.exec(select(Item).filter(Item.embedding.l2_distance([3, 1, 2]) < 5)) 59 | for item in close_enough_items: 60 | print(item.embedding) 61 | 62 | # Calculate average of all vectors 63 | avg_embedding = session.exec(select(func.avg(Item.embedding))).first() 64 | print(avg_embedding) 65 | -------------------------------------------------------------------------------- /infra/main.bicep: -------------------------------------------------------------------------------- 1 | targetScope = 'subscription' 2 | 3 | @minLength(1) 4 | @maxLength(64) 5 | @description('Name of the the environment which is used to generate a short unique hash used in all resources.') 6 | param environmentName string 7 | 8 | @minLength(1) 9 | @description('Primary location for all resources') 10 | param location string 11 | 12 | param resourceGroupName string = '' 13 | 14 | @description('The Object ID of the Azure AD admin.') 15 | param aadAdminObjectid string 16 | 17 | @description('Azure AD admin name.') 18 | param aadAdminName string 19 | 20 | @description('Azure AD admin Type') 21 | @allowed([ 22 | 'User' 23 | 'Group' 24 | 'ServicePrincipal' 25 | ]) 26 | param aadAdminType string = 'User' 27 | param databaseName string = 'db' 28 | 29 | var resourceToken = toLower(uniqueString(subscription().id, environmentName, location)) 30 | 31 | var tags = { 'azd-env-name': environmentName } 32 | var prefix = '${environmentName}-${resourceToken}' 33 | 34 | resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = { 35 | name: !empty(resourceGroupName) ? resourceGroupName : 'rg-${environmentName}' 36 | location: location 37 | } 38 | 39 | module pg 'pg.bicep' = { 40 | name: 'pg' 41 | scope: resourceGroup 42 | params: { 43 | name: '${prefix}-postgresql' 44 | location: location 45 | tags: tags 46 | authType: 'EntraOnly' 47 | aadAdminObjectid: aadAdminObjectid 48 | aadAdminName: aadAdminName 49 | aadAdminType: aadAdminType 50 | databaseNames: [ databaseName ] 51 | storage: { 52 | storageSizeGB: 32 53 | } 54 | version: '15' 55 | allowAllIPsFirewall: true 56 | } 57 | } 58 | 59 | output POSTGRES_USERNAME string = aadAdminName 60 | output POSTGRES_DATABASE string = databaseName 61 | output POSTGRES_HOST string = pg.outputs.POSTGRES_DOMAIN_NAME 62 | output POSTGRES_SSL string = 'require' 63 | -------------------------------------------------------------------------------- /infra/main.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environmentName": { 6 | "value": "${AZURE_ENV_NAME}" 7 | }, 8 | "resourceGroupName": { 9 | "value": "${AZURE_RESOURCE_GROUP}" 10 | }, 11 | "location": { 12 | "value": "${AZURE_LOCATION}" 13 | }, 14 | "aadAdminName": { 15 | "value": "UserRole" 16 | }, 17 | "aadAdminObjectid": { 18 | "value": "${AZURE_PRINCIPAL_ID}" 19 | }, 20 | "aadAdminType": { 21 | "value": "User" 22 | }, 23 | "administratorLogin": { 24 | "value": "adminUsername" 25 | }, 26 | "administratorLoginPassword": { 27 | "value": "unusedPassword" 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /infra/pg.bicep: -------------------------------------------------------------------------------- 1 | metadata description = 'Creates an Azure App Service in an existing Azure App Service plan.' 2 | param name string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | @allowed([ 7 | 'Password' 8 | 'EntraOnly' 9 | ]) 10 | param authType string = 'Password' 11 | 12 | param administratorLogin string = '' 13 | @secure() 14 | param administratorLoginPassword string = '' 15 | 16 | @description('The Object ID of the Azure AD admin.') 17 | param aadAdminObjectid string 18 | 19 | @description('Azure AD admin name.') 20 | param aadAdminName string 21 | 22 | @description('Azure AD admin Type') 23 | @allowed([ 24 | 'User' 25 | 'Group' 26 | 'ServicePrincipal' 27 | ]) 28 | param aadAdminType string = 'User' 29 | 30 | param databaseNames array = [] 31 | param allowAzureIPsFirewall bool = false 32 | param allowAllIPsFirewall bool = false 33 | param allowedSingleIPs array = [] 34 | 35 | // PostgreSQL version 36 | param version string 37 | param storage object 38 | 39 | var authProperties = authType == 'Password' ? { 40 | administratorLogin: administratorLogin 41 | administratorLoginPassword: administratorLoginPassword 42 | authConfig: { 43 | passwordAuth: 'Enabled' 44 | } 45 | } : { 46 | authConfig: { 47 | activeDirectoryAuth: 'Enabled' 48 | passwordAuth: 'Disabled' 49 | } 50 | } 51 | 52 | resource postgresServer 'Microsoft.DBforPostgreSQL/flexibleServers@2023-03-01-preview' = { 53 | name: name 54 | location: location 55 | tags: tags 56 | sku: { 57 | name: 'Standard_B1ms' 58 | tier: 'Burstable' 59 | } 60 | properties: union(authProperties, { 61 | version: version 62 | storage: storage 63 | 64 | highAvailability: { 65 | mode: 'Disabled' 66 | } 67 | }) 68 | 69 | resource database 'databases' = [for name in databaseNames: { 70 | name: name 71 | }] 72 | } 73 | 74 | resource firewall_all 'Microsoft.DBforPostgreSQL/flexibleServers/firewallRules@2023-03-01-preview' = if (allowAllIPsFirewall) { 75 | parent: postgresServer 76 | name: 'allow-all-IPs' 77 | properties: { 78 | startIpAddress: '0.0.0.0' 79 | endIpAddress: '255.255.255.255' 80 | } 81 | } 82 | 83 | resource firewall_azure 'Microsoft.DBforPostgreSQL/flexibleServers/firewallRules@2023-03-01-preview' = if (allowAzureIPsFirewall) { 84 | parent: postgresServer 85 | name: 'allow-all-azure-internal-IPs' 86 | properties: { 87 | startIpAddress: '0.0.0.0' 88 | endIpAddress: '0.0.0.0' 89 | } 90 | } 91 | 92 | @batchSize(1) 93 | resource firewall_single 'Microsoft.DBforPostgreSQL/flexibleServers/firewallRules@2023-03-01-preview' = [for ip in allowedSingleIPs: { 94 | parent: postgresServer 95 | name: 'allow-single-${replace(ip, '.', '')}' 96 | properties: { 97 | startIpAddress: ip 98 | endIpAddress: ip 99 | } 100 | }] 101 | 102 | // Workaround issue https://github.com/Azure/bicep-types-az/issues/1507 103 | resource configurations 'Microsoft.DBforPostgreSQL/flexibleServers/configurations@2023-03-01-preview' = { 104 | name: 'azure.extensions' 105 | parent: postgresServer 106 | properties: { 107 | value: 'vector' 108 | source: 'user-override' 109 | } 110 | dependsOn: [ 111 | firewall_all, firewall_azure, firewall_single 112 | ] 113 | } 114 | 115 | 116 | 117 | // This must be created *after* the server is created - it cannot be a nested child resource 118 | resource addAddUser 'Microsoft.DBforPostgreSQL/flexibleServers/administrators@2023-03-01-preview' = { 119 | name: aadAdminObjectid // Pass my principal ID 120 | parent: postgresServer 121 | properties: { 122 | tenantId: subscription().tenantId 123 | principalType: aadAdminType // User 124 | principalName: aadAdminName // UserRole 125 | } 126 | dependsOn: [ 127 | firewall_all, firewall_azure, firewall_single, configurations 128 | ] 129 | } 130 | 131 | 132 | output POSTGRES_DOMAIN_NAME string = postgresServer.properties.fullyQualifiedDomainName 133 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | line-length = 120 3 | target-version = "py311" 4 | select = ["E", "F", "I", "UP"] 5 | ignore = ["D203"] 6 | show-source = true 7 | 8 | [tool.ruff.lint.isort] 9 | known-local-folder = ["examples"] 10 | 11 | [tool.black] 12 | line-length = 120 13 | target-version = ["py311"] 14 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | ruff 2 | black 3 | pre-commit 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | psycopg2==2.9.9 2 | python-dotenv==1.0.1 3 | SQLAlchemy[asyncio]==2.0.35 4 | pgvector==0.3.5 5 | SQLModel==0.0.19 6 | asyncpg==0.29.0 7 | azure-identity 8 | -------------------------------------------------------------------------------- /write_azure_env.ps1: -------------------------------------------------------------------------------- 1 | # Clear the contents of the .env file 2 | Set-Content -Path .env -Value "" 3 | 4 | # Append new values to the .env file 5 | $postgresDatabase = azd env get-value POSTGRES_DATABASE 6 | $postgresHost = azd env get-value POSTGRES_HOST 7 | $postgresSSL = azd env get-value POSTGRES_SSL 8 | $postgresUsername = azd env get-value POSTGRES_USERNAME 9 | 10 | Add-Content -Path .env -Value "POSTGRES_DATABASE=$postgresDatabase" 11 | Add-Content -Path .env -Value "POSTGRES_HOST=$postgresHost" 12 | Add-Content -Path .env -Value "POSTGRES_SSL=$postgresSSL" 13 | Add-Content -Path .env -Value "POSTGRES_USERNAME=$postgresUsername" 14 | -------------------------------------------------------------------------------- /write_azure_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Clear the contents of the .env file 4 | > .env 5 | 6 | # Append new values to the .env file 7 | echo "POSTGRES_DATABASE=$(azd env get-value POSTGRES_DATABASE)" >> .env 8 | echo "POSTGRES_HOST=$(azd env get-value POSTGRES_HOST)" >> .env 9 | echo "POSTGRES_SSL=$(azd env get-value POSTGRES_SSL)" >> .env 10 | echo "POSTGRES_USERNAME=$(azd env get-value POSTGRES_USERNAME)" >> .env 11 | --------------------------------------------------------------------------------