├── .devcontainer └── devcontainer.json ├── .github ├── CODE_OF_CONDUCT.md ├── ISSUE_TEMPLATE.md ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yaml └── workflows │ └── python.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── azure_ai_search.ipynb ├── image_search.ipynb ├── openai_movies.json ├── pg.sql ├── product_images ├── amulet10_fullshot.jpg ├── amulet1_fullshot.jpg ├── amulet2_fullshot.jpg ├── amulet3_fullshot.jpg ├── amulet4_fullshot.jpg ├── amulet5_fullshot.jpg ├── amulet6_fullshot.jpg ├── amulet7_fullshot.jpg ├── amulet8_fullshot.jpg ├── amulet9_fullshot.jpg ├── breathe_front.jpg ├── chainnecklace1_top.jpg ├── chainnecklace2_top.jpg ├── chainnecklace3_top.jpg ├── chainnecklace3_top_back.jpg ├── compassionprayer_straight.png ├── doubletealight1_aboveside.jpg ├── doubletealight2_aboveside.jpg ├── driftwoodearrings1.jpg ├── driftwoodearrings2_fullshot.jpg ├── driftwoodearrings3_back.jpg ├── driftwoodearrings3_front.jpg ├── glasscandle2_top.jpg ├── glasscandle_side.jpg ├── goldnugget_fullshot.jpg ├── goldtri_fullshot.jpg ├── hakunamatata_top.jpg ├── incenseholder4_side.jpg ├── incenseholder5_side.jpg ├── inhaleexhale_top.jpg ├── matatamagnet_fridge.jpg ├── shellchain_fullshot.jpg ├── singletealight1_side.jpg ├── stonechain_closeup.jpg ├── stripednecklace_fullshot.jpg ├── tealightsand_night.jpg └── tripletealight_above.jpg ├── pyproject.toml ├── query_images ├── amulet3_closeup_back.jpg ├── amulet3_closeup_front.jpg ├── amulet3_outside.jpg └── tealightsand_side.jpg ├── rag.ipynb ├── rag_eval.ipynb ├── requirements-dev.txt ├── requirements.txt ├── search_relevance.ipynb ├── vector.py └── vector_embeddings.ipynb /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: 2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/python-3 3 | { 4 | "name": "Vector search demos", 5 | "image": "mcr.microsoft.com/vscode/devcontainers/python:3.11-bullseye", 6 | "features": { 7 | "ghcr.io/azure/azure-dev/azd:latest": {} 8 | }, 9 | // Configure tool-specific properties. 10 | "customizations": { 11 | // Configure properties specific to VS Code. 12 | "vscode": { 13 | // Set *default* container specific settings.json values on container create. 14 | "settings": { 15 | "python.defaultInterpreterPath": "/usr/local/bin/python", 16 | "files.exclude": { 17 | ".coverage": true, 18 | "__pycache__": true 19 | } 20 | }, 21 | 22 | // Add the IDs of extensions you want installed when the container is created. 23 | "extensions": [ 24 | "ms-python.python", 25 | "charliermarsh.ruff", 26 | "ms-python.black-formatter", 27 | "ms-toolsai.jupyter" 28 | ] 29 | } 30 | }, 31 | 32 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 33 | // "forwardPorts": [], 34 | 35 | // Use 'postCreateCommand' to run commands after the container is created. 36 | "postCreateCommand": "pip3 install --user -r requirements-dev.txt", 37 | 38 | // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. 39 | "remoteUser": "vscode" 40 | } 41 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 4 | > Please provide us with the following information: 5 | > --------------------------------------------------------------- 6 | 7 | ### This issue is for a: (mark with an `x`) 8 | ``` 9 | - [ ] bug report -> please search issues before submitting 10 | - [ ] feature request 11 | - [ ] documentation issue or request 12 | - [ ] regression (a behavior that used to work and stopped in a new release) 13 | ``` 14 | 15 | ### Minimal steps to reproduce 16 | > 17 | 18 | ### Any log messages given by the failure 19 | > 20 | 21 | ### Expected/desired behavior 22 | > 23 | 24 | ### OS and Version? 25 | > Windows 7, 8 or 10. Linux (which distribution). macOS (Yosemite? El Capitan? Sierra?) 26 | 27 | ### Versions 28 | > 29 | 30 | ### Mention any other details that might be useful 31 | 32 | > --------------------------------------------------------------- 33 | > Thanks! We'll be in touch soon. 34 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Purpose 2 | 3 | * ... 4 | 5 | ## Does this introduce a breaking change? 6 | 7 | ``` 8 | [ ] Yes 9 | [ ] No 10 | ``` 11 | 12 | ## Pull Request Type 13 | What kind of change does this Pull Request introduce? 14 | 15 | 16 | ``` 17 | [ ] Bugfix 18 | [ ] Feature 19 | [ ] Code style update (formatting, local variables) 20 | [ ] Refactoring (no functional changes, no api changes) 21 | [ ] Documentation content changes 22 | [ ] Other... Please describe: 23 | ``` 24 | 25 | ## How to Test 26 | * Get the code 27 | 28 | ``` 29 | git clone [repo-address] 30 | cd [repo-name] 31 | git checkout [branch-name] 32 | npm install 33 | ``` 34 | 35 | * Test the code 36 | 37 | ``` 38 | ``` 39 | 40 | ## What to Check 41 | Verify that the following are valid 42 | * ... 43 | 44 | ## Other Information 45 | 46 | -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /.github/workflows/python.yaml: -------------------------------------------------------------------------------- 1 | name: Python checks 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Python 3 15 | uses: actions/setup-python@v3 16 | with: 17 | python-version: "3.11" 18 | - name: Install dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install -r requirements-dev.txt 22 | - name: Lint with ruff 23 | run: ruff check . 24 | - name: Check formatting with black 25 | run: black . --check --verbose 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__ 3 | .coverage 4 | .DS_Store 5 | .env -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/astral-sh/ruff-pre-commit 9 | rev: v0.1.0 10 | hooks: 11 | - id: ruff 12 | - repo: https://github.com/psf/black 13 | rev: 23.9.1 14 | hooks: 15 | - id: black 16 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to [project-title] 2 | 3 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 4 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 5 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 6 | 7 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 8 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 9 | provided by the bot. You will only need to do this once across all repos using our CLA. 10 | 11 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 12 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 13 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 14 | 15 | - [Code of Conduct](#coc) 16 | - [Issues and Bugs](#issue) 17 | - [Feature Requests](#feature) 18 | - [Submission Guidelines](#submit) 19 | 20 | ## Code of Conduct 21 | Help us keep this project open and inclusive. Please read and follow our [Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 22 | 23 | ## Found an Issue? 24 | If you find a bug in the source code or a mistake in the documentation, you can help us by 25 | [submitting an issue](#submit-issue) to the GitHub Repository. Even better, you can 26 | [submit a Pull Request](#submit-pr) with a fix. 27 | 28 | ## Want a Feature? 29 | You can *request* a new feature by [submitting an issue](#submit-issue) to the GitHub 30 | Repository. If you would like to *implement* a new feature, please submit an issue with 31 | a proposal for your work first, to be sure that we can use it. 32 | 33 | * **Small Features** can be crafted and directly [submitted as a Pull Request](#submit-pr). 34 | 35 | ## Submission Guidelines 36 | 37 | ### Submitting an Issue 38 | Before you submit an issue, search the archive, maybe your question was already answered. 39 | 40 | If your issue appears to be a bug, and hasn't been reported, open a new issue. 41 | Help us to maximize the effort we can spend fixing issues and adding new 42 | features, by not reporting duplicate issues. Providing the following information will increase the 43 | chances of your issue being dealt with quickly: 44 | 45 | * **Overview of the Issue** - if an error is being thrown a non-minified stack trace helps 46 | * **Version** - what version is affected (e.g. 0.1.2) 47 | * **Motivation for or Use Case** - explain what are you trying to do and why the current behavior is a bug for you 48 | * **Browsers and Operating System** - is this a problem with all browsers? 49 | * **Reproduce the Error** - provide a live example or a unambiguous set of steps 50 | * **Related Issues** - has a similar issue been reported before? 51 | * **Suggest a Fix** - if you can't fix the bug yourself, perhaps you can point to what might be 52 | causing the problem (line of code or commit) 53 | 54 | You can file new issues by providing the above information at the corresponding repository's issues link: https://github.com/[organization-name]/[repository-name]/issues/new]. 55 | 56 | ### Submitting a Pull Request (PR) 57 | Before you submit your Pull Request (PR) consider the following guidelines: 58 | 59 | * Search the repository (https://github.com/[organization-name]/[repository-name]/pulls) for an open or closed PR 60 | that relates to your submission. You don't want to duplicate effort. 61 | 62 | * Make your changes in a new git fork: 63 | 64 | * Commit your changes using a descriptive commit message 65 | * Push your fork to GitHub: 66 | * In GitHub, create a pull request 67 | * If we suggest changes then: 68 | * Make the required updates. 69 | * Rebase your fork and force push to your GitHub repository (this will update your Pull Request): 70 | 71 | ```shell 72 | git rebase master -i 73 | git push -f 74 | ``` 75 | 76 | That's it! Thank you for your contribution! 77 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Azure AI Search Demos 2 | 3 | This repository contains many notebooks that explain how Azure AI Search works, including several showcasing how vector search works. 4 | 5 | ## Environment setup 6 | 7 | 1. Run `azd up` on [azure-search-openai-demo](https://github.com/Azure-Samples/azure-search-openai-demo/) with GPT-4-vision enabled. This will create the necessary resources for the Azure OpenAI, Azure AI Search services, and the Computer Vision service. 8 | 9 | 2. Create a .env with these variables, and the values taken from `.azure/ENV-NAME/.env` 10 | 11 | ```shell 12 | AZURE_OPENAI_SERVICE=YOUR-SERVICE-NAME 13 | AZURE_OPENAI_DEPLOYMENT_NAME=YOUR-OPENAI-DEPLOYMENT-NAME 14 | AZURE_OPENAI_ADA_DEPLOYMENT=YOUR-EMBED-DEPLOYMENT-NAME 15 | AZURE_SEARCH_SERVICE=YOUR-SEARCH-SERVICE-NAME 16 | AZURE_COMPUTERVISION_SERVICE=YOUR-COMPUTERVISION-SERVICE-NAME 17 | ``` 18 | 19 | 3. Login to the Azure Developer CLI: 20 | 21 | ```shell 22 | azd auth login 23 | ``` 24 | 25 | 4. If you deployed your resource group to a tenant other than your home tenant, set the tenant ID: 26 | 27 | ```shell 28 | export TENANT_ID=YOUR-TENANT-ID 29 | ``` 30 | 31 | ## Notebooks 32 | 33 | These are the available notebooks, in suggested order: 34 | 35 | * [Vector Embeddings Notebook](./vector_embeddings.ipynb) 36 | * [Azure AI Search Notebook](./azure_ai_search.ipynb) 37 | * [Image Search Notebook](./image_search.ipynb) 38 | * [Azure AI Search Relevance Notebook](./search_relevance.ipynb) 39 | * [RAG with Azure AI Search](./rag.ipynb) 40 | * [RAG Evaluation](./rag_eval.ipynb) 41 | 42 | You can find video recordings going through the notebooks [here](https://github.com/microsoft/aitour-rag-with-ai-search/tree/main/session-delivery-resources#video-recordings). 43 | -------------------------------------------------------------------------------- /azure_ai_search.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Azure AI Search: vector search, step by step" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Setup API client\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import os\n", 24 | "\n", 25 | "import azure.identity\n", 26 | "import dotenv\n", 27 | "from azure.search.documents.indexes import SearchIndexClient\n", 28 | "\n", 29 | "dotenv.load_dotenv()\n", 30 | "\n", 31 | "AZURE_SEARCH_SERVICE = os.getenv(\"AZURE_SEARCH_SERVICE\")\n", 32 | "AZURE_SEARCH_ENDPOINT = f\"https://{AZURE_SEARCH_SERVICE}.search.windows.net\"\n", 33 | "\n", 34 | "azure_credential = azure.identity.DefaultAzureCredential()\n", 35 | "index_client = SearchIndexClient(endpoint=AZURE_SEARCH_ENDPOINT, credential=azure_credential)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Search a tiny index\n", 43 | "\n", 44 | "### Create index" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/plain": [ 55 | "" 56 | ] 57 | }, 58 | "execution_count": 3, 59 | "metadata": {}, 60 | "output_type": "execute_result" 61 | } 62 | ], 63 | "source": [ 64 | "from azure.search.documents.indexes.models import (\n", 65 | " HnswAlgorithmConfiguration,\n", 66 | " HnswParameters,\n", 67 | " SearchField,\n", 68 | " SearchFieldDataType,\n", 69 | " SearchIndex,\n", 70 | " SimpleField,\n", 71 | " VectorSearch,\n", 72 | " VectorSearchAlgorithmKind,\n", 73 | " VectorSearchProfile,\n", 74 | ")\n", 75 | "\n", 76 | "AZURE_SEARCH_TINY_INDEX = \"teenytinyindex\"\n", 77 | "\n", 78 | "index = SearchIndex(\n", 79 | " name=AZURE_SEARCH_TINY_INDEX, \n", 80 | " fields=[\n", 81 | " SimpleField(name=\"id\", type=SearchFieldDataType.String, key=True),\n", 82 | " SearchField(name=\"embedding\", \n", 83 | " type=SearchFieldDataType.Collection(SearchFieldDataType.Single), \n", 84 | " searchable=True, \n", 85 | " vector_search_dimensions=3,\n", 86 | " vector_search_profile_name=\"embedding_profile\")\n", 87 | " ],\n", 88 | " vector_search=VectorSearch(\n", 89 | " algorithms=[HnswAlgorithmConfiguration( # Hierachical Navigable Small World, IVF\n", 90 | " name=\"hnsw_config\",\n", 91 | " kind=VectorSearchAlgorithmKind.HNSW,\n", 92 | " parameters=HnswParameters(metric=\"cosine\"),\n", 93 | " )],\n", 94 | " profiles=[VectorSearchProfile(name=\"embedding_profile\", algorithm_configuration_name=\"hnsw_config\")]\n", 95 | " )\n", 96 | ")\n", 97 | "\n", 98 | "index_client.create_index(index)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "### Insert a few documents with tiny vectors" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 4, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "data": { 115 | "text/plain": [ 116 | "[,\n", 117 | " ,\n", 118 | " ]" 119 | ] 120 | }, 121 | "execution_count": 4, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "from azure.search.documents import SearchClient\n", 128 | "\n", 129 | "search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_TINY_INDEX, credential=azure_credential)\n", 130 | "search_client.upload_documents(documents=[\n", 131 | " {\"id\": \"1\", \"embedding\": [1, 2, 3]},\n", 132 | " {\"id\": \"2\", \"embedding\": [1, 1, 3]},\n", 133 | " {\"id\": \"3\", \"embedding\": [4, 5, 6]}])" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "### Search using vector similarity" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 5, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "id: 2, score: 0.36515692\n", 153 | "id: 1, score: 0.3618256\n", 154 | "id: 3, score: 0.34674543\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "from azure.search.documents.models import VectorizedQuery\n", 160 | "\n", 161 | "r = search_client.search(search_text=None, vector_queries=[\n", 162 | " VectorizedQuery(vector=[-2, -1, -1], k_nearest_neighbors=3, fields=\"embedding\")])\n", 163 | "for doc in r:\n", 164 | " print(f\"id: {doc['id']}, score: {doc['@search.score']}\")" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "## Search a larger index" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "import azure.identity\n", 181 | "import dotenv\n", 182 | "import openai\n", 183 | "\n", 184 | "dotenv.load_dotenv()\n", 185 | "\n", 186 | "# Initialize Azure search variables\n", 187 | "AZURE_SEARCH_SERVICE = os.getenv(\"AZURE_SEARCH_SERVICE\")\n", 188 | "AZURE_SEARCH_ENDPOINT = f\"https://{AZURE_SEARCH_SERVICE}.search.windows.net\"\n", 189 | "\n", 190 | "# Set up OpenAI client based on environment variables\n", 191 | "dotenv.load_dotenv()\n", 192 | "AZURE_OPENAI_SERVICE = os.getenv(\"AZURE_OPENAI_SERVICE\")\n", 193 | "AZURE_OPENAI_ADA_DEPLOYMENT = os.getenv(\"AZURE_OPENAI_ADA_DEPLOYMENT\")\n", 194 | "\n", 195 | "token_provider = azure.identity.get_bearer_token_provider(azure_credential, \"https://cognitiveservices.azure.com/.default\")\n", 196 | "openai_client = openai.AzureOpenAI(\n", 197 | " api_version=\"2023-07-01-preview\",\n", 198 | " azure_endpoint=f\"https://{AZURE_OPENAI_SERVICE}.openai.azure.com\",\n", 199 | " azure_ad_token_provider=token_provider)\n", 200 | "\n", 201 | "def get_embedding(text):\n", 202 | " get_embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=text)\n", 203 | " return get_embeddings_response.data[0].embedding" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 6, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "name": "stdout", 213 | "output_type": "stream", 214 | "text": [ 215 | "Score: 0.82780\tContent:lessons • Scuba diving lessons • Surfing lessons • Horseback riding lessons These lessons provide employees with the opportunity to try new th\n", 216 | "Score: 0.82780\tContent:lessons • Scuba diving lessons • Surfing lessons • Horseback riding lessons These lessons provide employees with the opportunity to try new th\n", 217 | "Score: 0.82352\tContent: TURING TEST The computer scientist Edsger Dijkstra (1984) said that \"The question of whether Ma- chines Can Think ... is about as relevant as the que\n", 218 | "Score: 0.82301\tContent: A number of research projects are presently developing prototype robots for clearing minefields, on land and at sea. Most existing robots for these t\n", 219 | "Score: 0.82083\tContent: Au- tonomous underwater vehicles (AUVs) are used in deep sea exploration. Mobile robots deliver packages in the workplace and vacuum the floors at ho\n" 220 | ] 221 | } 222 | ], 223 | "source": [ 224 | "AZURE_SEARCH_FULL_INDEX = \"gptkbindex\"\n", 225 | "search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_credential)\n", 226 | "\n", 227 | "search_query = \"learning about underwater activities\"\n", 228 | "search_vector = get_embedding(search_query)\n", 229 | "r = search_client.search(search_text=None, top=5, vector_queries=[\n", 230 | " VectorizedQuery(vector=search_vector, k_nearest_neighbors=5, fields=\"embedding\")])\n", 231 | "for doc in r:\n", 232 | " content = doc[\"content\"].replace(\"\\n\", \" \")[:150]\n", 233 | " print(f\"Score: {doc['@search.score']:.5f}\\tContent:{content}\")" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [] 240 | } 241 | ], 242 | "metadata": { 243 | "kernelspec": { 244 | "display_name": "Python 3", 245 | "language": "python", 246 | "name": "python3" 247 | }, 248 | "language_info": { 249 | "codemirror_mode": { 250 | "name": "ipython", 251 | "version": 3 252 | }, 253 | "file_extension": ".py", 254 | "mimetype": "text/x-python", 255 | "name": "python", 256 | "nbconvert_exporter": "python", 257 | "pygments_lexer": "ipython3", 258 | "version": "3.11.7" 259 | }, 260 | "orig_nbformat": 4 261 | }, 262 | "nbformat": 4, 263 | "nbformat_minor": 2 264 | } 265 | -------------------------------------------------------------------------------- /pg.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION vector; 2 | 3 | CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(1536)); 4 | 5 | INSERT INTO items (embedding) VALUES ('[0.0014701404143124819, 6 | 0.0034404152538627386, 7 | -0.012805989943444729,...]'); 8 | 9 | SELECT * FROM items ORDER BY embedding <=> '[-0.0126618119, -0.027928412, -0.0130874719, ...]' LIMIT 5; 10 | 11 | CREATE INDEX ON items USING hnsw (embedding vector_cosine_ops); -------------------------------------------------------------------------------- /product_images/amulet10_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/amulet10_fullshot.jpg -------------------------------------------------------------------------------- /product_images/amulet1_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/amulet1_fullshot.jpg -------------------------------------------------------------------------------- /product_images/amulet2_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/amulet2_fullshot.jpg -------------------------------------------------------------------------------- /product_images/amulet3_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/amulet3_fullshot.jpg -------------------------------------------------------------------------------- /product_images/amulet4_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/amulet4_fullshot.jpg -------------------------------------------------------------------------------- /product_images/amulet5_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/amulet5_fullshot.jpg -------------------------------------------------------------------------------- /product_images/amulet6_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/amulet6_fullshot.jpg -------------------------------------------------------------------------------- /product_images/amulet7_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/amulet7_fullshot.jpg -------------------------------------------------------------------------------- /product_images/amulet8_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/amulet8_fullshot.jpg -------------------------------------------------------------------------------- /product_images/amulet9_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/amulet9_fullshot.jpg -------------------------------------------------------------------------------- /product_images/breathe_front.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/breathe_front.jpg -------------------------------------------------------------------------------- /product_images/chainnecklace1_top.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/chainnecklace1_top.jpg -------------------------------------------------------------------------------- /product_images/chainnecklace2_top.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/chainnecklace2_top.jpg -------------------------------------------------------------------------------- /product_images/chainnecklace3_top.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/chainnecklace3_top.jpg -------------------------------------------------------------------------------- /product_images/chainnecklace3_top_back.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/chainnecklace3_top_back.jpg -------------------------------------------------------------------------------- /product_images/compassionprayer_straight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/compassionprayer_straight.png -------------------------------------------------------------------------------- /product_images/doubletealight1_aboveside.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/doubletealight1_aboveside.jpg -------------------------------------------------------------------------------- /product_images/doubletealight2_aboveside.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/doubletealight2_aboveside.jpg -------------------------------------------------------------------------------- /product_images/driftwoodearrings1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/driftwoodearrings1.jpg -------------------------------------------------------------------------------- /product_images/driftwoodearrings2_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/driftwoodearrings2_fullshot.jpg -------------------------------------------------------------------------------- /product_images/driftwoodearrings3_back.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/driftwoodearrings3_back.jpg -------------------------------------------------------------------------------- /product_images/driftwoodearrings3_front.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/driftwoodearrings3_front.jpg -------------------------------------------------------------------------------- /product_images/glasscandle2_top.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/glasscandle2_top.jpg -------------------------------------------------------------------------------- /product_images/glasscandle_side.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/glasscandle_side.jpg -------------------------------------------------------------------------------- /product_images/goldnugget_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/goldnugget_fullshot.jpg -------------------------------------------------------------------------------- /product_images/goldtri_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/goldtri_fullshot.jpg -------------------------------------------------------------------------------- /product_images/hakunamatata_top.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/hakunamatata_top.jpg -------------------------------------------------------------------------------- /product_images/incenseholder4_side.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/incenseholder4_side.jpg -------------------------------------------------------------------------------- /product_images/incenseholder5_side.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/incenseholder5_side.jpg -------------------------------------------------------------------------------- /product_images/inhaleexhale_top.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/inhaleexhale_top.jpg -------------------------------------------------------------------------------- /product_images/matatamagnet_fridge.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/matatamagnet_fridge.jpg -------------------------------------------------------------------------------- /product_images/shellchain_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/shellchain_fullshot.jpg -------------------------------------------------------------------------------- /product_images/singletealight1_side.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/singletealight1_side.jpg -------------------------------------------------------------------------------- /product_images/stonechain_closeup.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/stonechain_closeup.jpg -------------------------------------------------------------------------------- /product_images/stripednecklace_fullshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/stripednecklace_fullshot.jpg -------------------------------------------------------------------------------- /product_images/tealightsand_night.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/tealightsand_night.jpg -------------------------------------------------------------------------------- /product_images/tripletealight_above.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/product_images/tripletealight_above.jpg -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | line-length = 120 3 | target-version = "py311" 4 | extend-include = ["*.ipynb"] 5 | 6 | [tool.ruff.lint] 7 | select = ["E", "F", "I", "UP"] 8 | ignore = ["D203"] 9 | 10 | [tool.black] 11 | line-length = 120 12 | target-version = ["py311"] 13 | 14 | [tool.pytest.ini_options] 15 | addopts = "-ra --cov" 16 | testpaths = ["tests"] 17 | pythonpath = ['.'] 18 | 19 | [tool.coverage.report] 20 | show_missing = true 21 | -------------------------------------------------------------------------------- /query_images/amulet3_closeup_back.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/query_images/amulet3_closeup_back.jpg -------------------------------------------------------------------------------- /query_images/amulet3_closeup_front.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/query_images/amulet3_closeup_front.jpg -------------------------------------------------------------------------------- /query_images/amulet3_outside.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/query_images/amulet3_outside.jpg -------------------------------------------------------------------------------- /query_images/tealightsand_side.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pamelafox/vector-search-demos/05d3b53d91f0ee9898eaa8f766ef368cb7822d8e/query_images/tealightsand_side.jpg -------------------------------------------------------------------------------- /rag.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Retrieval Augmented Generation" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Setup API clients" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import os\n", 24 | "\n", 25 | "import azure.identity\n", 26 | "import dotenv\n", 27 | "import openai\n", 28 | "from azure.search.documents import SearchClient\n", 29 | "from azure.search.documents.models import VectorizedQuery\n", 30 | "\n", 31 | "dotenv.load_dotenv()\n", 32 | "\n", 33 | "azure_credential = azure.identity.DefaultAzureCredential()\n", 34 | "\n", 35 | "# Initialize Azure OpenAI client\n", 36 | "AZURE_OPENAI_SERVICE = os.getenv(\"AZURE_OPENAI_SERVICE\")\n", 37 | "AZURE_OPENAI_ADA_DEPLOYMENT = os.getenv(\"AZURE_OPENAI_ADA_DEPLOYMENT\")\n", 38 | "\n", 39 | "token_provider = azure.identity.get_bearer_token_provider(azure_credential, \"https://cognitiveservices.azure.com/.default\")\n", 40 | "openai_client = openai.AzureOpenAI(\n", 41 | " api_version=\"2023-07-01-preview\",\n", 42 | " azure_endpoint=f\"https://{AZURE_OPENAI_SERVICE}.openai.azure.com\",\n", 43 | " azure_ad_token_provider=token_provider)\n", 44 | "\n", 45 | "def get_embedding(text):\n", 46 | " get_embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=text)\n", 47 | " return get_embeddings_response.data[0].embedding\n", 48 | "\n", 49 | "# Initialize Azure search client\n", 50 | "AZURE_SEARCH_SERVICE = os.getenv(\"AZURE_SEARCH_SERVICE\")\n", 51 | "AZURE_SEARCH_ENDPOINT = f\"https://{AZURE_SEARCH_SERVICE}.search.windows.net\"\n", 52 | "\n", 53 | "AZURE_SEARCH_FULL_INDEX = \"gptkbindex\"\n", 54 | "search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_credential)\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## Prepare user question" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 2, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "user_question = \"What does a product manager do?\"\n", 71 | "user_question_vector = get_embedding(user_question)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "## Retrieve matching documents\n", 79 | "\n", 80 | "The search call below does a **hybrid search**, performing both a full-text search and a vector search in parallel.\n", 81 | "It merges those results using Reciprocal Rank Fusion (RRF). \n", 82 | "Finally, it re-ranks the merged results using the AI Search semantic ranker, a re-ranking model." 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 3, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "name": "stdout", 92 | "output_type": "stream", 93 | "text": [ 94 | "[role_library.pdf#page=29]: The Manager of Product Management will collaborate with internal teams, such as engineering, sales, marketing, and finance, as well as external partners, suppliers, and customers to ensure successful product execution.\n", 95 | "Responsibilities:\n", 96 | "· Lead the product management team and provide guidance on product strategy, design, development, and launch.\n", 97 | "· Develop and implement product life-cycle management processes.\n", 98 | "· Monitor and analyze industry trends to identify opportunities for new products.\n", 99 | "· Develop product marketing plans and go-to-market strategies.\n", 100 | "· Research customer needs and develop customer-centric product roadmaps.\n", 101 | "· Collaborate with internal teams to ensure product execution and successful launch.\n", 102 | "· Develop pricing strategies and cost models.\n", 103 | "· Oversee product portfolio and performance metrics.\n", 104 | "· Manage product development budget.\n", 105 | "· Analyze product performance and customer feedback to identify areas for improvement.\n", 106 | "Qualifications:\n", 107 | "· Bachelor's degree in business, engineering, or a related field.\n", 108 | "· At least 5 years of experience in product management.\n", 109 | "· Proven track record of successful product launches.\n", 110 | "\n", 111 | "\n", 112 | "[role_library.pdf#page=28]: \n", 113 | "· 7+ years of experience in research and development in the electronics sector.\n", 114 | "· Proven track record of successfully designing, testing, and optimizing products.\n", 115 | "· Experience leading a team of researchers and engineers.\n", 116 | "· Excellent problem-solving and analytical skills.\n", 117 | ". Ability to work in a fast-paced environment and meet tight deadlines.· Knowledge of industry trends, technologies, and regulations.\n", 118 | "· Excellent communication and presentation skills.\n", 119 | "Manager of Product Management\n", 120 | "Job Title: Manager of Product Management, Contoso Electronics\n", 121 | "Job Summary:\n", 122 | "The Manager of Product Management is responsible for overseeing the product management team, driving product development and marketing strategy for Contoso Electronics. This individual will be accountable for the successful launch of new products and the implementation of product life-cycle management processes. The Manager of Product Management will collaborate with internal teams, such as engineering, sales, marketing, and finance, as well as external partners, suppliers, and customers to ensure successful product execution.\n", 123 | "\n", 124 | "\n", 125 | "[role_library.pdf#page=23]: Company: Contoso Electronics\n", 126 | "Location: Anywhere\n", 127 | "Job Type: Full-Time\n", 128 | "Salary: Competitive, commensurate with experience\n", 129 | "Job Summary:\n", 130 | "The Senior Manager of Product Management will be responsible for leading the product management team at Contoso Electronics. This role includes developing strategies, plans and objectives for the product management team and managing the day-to-day operations. The Senior Manager of Product Management will be responsible for the successful launch of new products and the optimization of existing products.\n", 131 | "Responsibilities:\n", 132 | "· Develop and implement product management strategies, plans and objectives to maximize team performance.\n", 133 | "· Analyze competitive landscape and market trends to develop product strategies.\n", 134 | "· Lead the product management team in the development of product plans, roadmaps and launch plans.\n", 135 | "· Monitor the performance of product management team, analyze results and implement corrective action as needed.\n", 136 | "· Manage the product lifecycle, including product development, launch, and end of life.\n", 137 | "· Ensure product features and benefits meet customer requirements.\n", 138 | "· Establish and maintain relationships with key customers, partners, and vendors.\n", 139 | "\n", 140 | "\n", 141 | "[role_library.pdf#page=11]: \n", 142 | "· Proven track record of successful product launches.\n", 143 | "· Excellent organizational and project management skills.\n", 144 | "· Ability to work independently and effectively in a fast-paced environment.\n", 145 | "· Excellent problem-solving and analytical skills.\n", 146 | "· Strong interpersonal and communication skills.\n", 147 | "· Knowledge of relevant technology, products, and industry trends.\n", 148 | "Vice President of Product Management\n", 149 | "Job Title: Vice President of Product Management\n", 150 | "Company: Contoso Electronics\n", 151 | "Location: Anywhere, USA\n", 152 | "Job Type: Full-time\n", 153 | "Salary: Competitive Salary & Benefits\n", 154 | "Job Summary:The Vice President of Product Management will lead the product management team and be responsible for developing, executing and managing product strategy and roadmap. This individual will be the key leader in driving product innovation and ensuring that Contoso Electronics meets the highest standards of product quality and customer satisfaction.\n", 155 | "Responsibilities:\n", 156 | "· Lead and manage the product management team, including setting team goals and objectives, hiring, training, and developing team members, and evaluating performance\n", 157 | "· Develop and \n", 158 | "\n", 159 | "\n", 160 | "[role_library.pdf#page=12]: \n", 161 | "Responsibilities:\n", 162 | "· Lead and manage the product management team, including setting team goals and objectives, hiring, training, and developing team members, and evaluating performance\n", 163 | "· Develop and maintain a long-term product strategy, roadmap and vision for Contoso Electronics products\n", 164 | "· Drive product innovation, identify new product opportunities, and assess market and customer needs\n", 165 | ". Act as the primary liaison between the business and technical teams, ensuring that product plans are translated into successful products\n", 166 | ". Manage the product life cycle from concept through product launch and post-launch support\n", 167 | "· Ensure product quality and customer satisfaction by developing product requirements and testing plans\n", 168 | "· Monitor and report on product performance, sales, and customer feedback\n", 169 | "· Collaborate with stakeholders, customers, and other partners to develop product enhancements\n", 170 | "Qualifications:\n", 171 | "· Bachelor's degree in engineering, business, marketing, or related field\n", 172 | "· 10+ years of experience in product management, product engineering, product marketing, or related field\n", 173 | "· Proven ability to lead product innovation, develop and execute product strategy, and manage products \n", 174 | "\n" 175 | ] 176 | } 177 | ], 178 | "source": [ 179 | "r = search_client.search(\n", 180 | " user_question,\n", 181 | " top=5, \n", 182 | " vector_queries=[\n", 183 | " VectorizedQuery(vector=user_question_vector, k_nearest_neighbors=50, fields=\"embedding\")],\n", 184 | " query_type=\"semantic\",\n", 185 | " semantic_configuration_name=\"default\")\n", 186 | "\n", 187 | "sources = \"\\n\\n\".join([f\"[{doc['sourcepage']}]: {doc['content']}\\n\" for doc in r])\n", 188 | "\n", 189 | "print(sources)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "## Send question and documents to the model" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 4, 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "A product manager is responsible for leading the product management team and providing guidance on product strategy, design, development, and launch. They collaborate with internal teams and external partners to ensure successful product execution [role_library.pdf#page=29]. They are also responsible for developing and implementing product life-cycle management processes, monitoring industry trends, developing product marketing plans, researching customer needs, and overseeing product portfolio and performance metrics [role_library.pdf#page=29]. They should have a bachelor's degree in business, engineering, or a related field, and at least 5 years of experience in product management with a proven track record of successful product launches [role_library.pdf#page=29].\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "SYSTEM_MESSAGE = \"\"\"\n", 214 | "Assistant helps company employees questions about the employee handbook. Be brief in your answers.\n", 215 | "Answer ONLY with the facts listed in the list of sources below.\n", 216 | "If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below.\n", 217 | "Each source has a name followed by colon and the actual information, include the source name for each fact you use.\n", 218 | "Use square brackets to reference the source, for example [info1.txt].\n", 219 | "\"\"\"\n", 220 | "USER_MESSAGE = user_question + \"\\nSources: \" + sources\n", 221 | "\n", 222 | "# Now we can use the matches to generate a response\n", 223 | "response = openai_client.chat.completions.create(\n", 224 | " model=os.getenv(\"AZURE_OPENAI_DEPLOYMENT_NAME\"),\n", 225 | " temperature=0.7,\n", 226 | " messages=[\n", 227 | " {\"role\": \"system\", \"content\": SYSTEM_MESSAGE},\n", 228 | " {\"role\": \"user\", \"content\": USER_MESSAGE},\n", 229 | " ],\n", 230 | ")\n", 231 | "\n", 232 | "answer = response.choices[0].message.content\n", 233 | "print(answer)" 234 | ] 235 | } 236 | ], 237 | "metadata": { 238 | "language_info": { 239 | "name": "python" 240 | } 241 | }, 242 | "nbformat": 4, 243 | "nbformat_minor": 2 244 | } 245 | -------------------------------------------------------------------------------- /rag_eval.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Evaluate RAG answer quality" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Setup API clients" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 4, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import os\n", 24 | "\n", 25 | "import azure.identity\n", 26 | "import dotenv\n", 27 | "import openai\n", 28 | "from azure.search.documents import SearchClient\n", 29 | "from azure.search.documents.models import VectorizedQuery\n", 30 | "\n", 31 | "dotenv.load_dotenv()\n", 32 | "\n", 33 | "azure_credential = azure.identity.DefaultAzureCredential()\n", 34 | "\n", 35 | "# Initialize Azure OpenAI client\n", 36 | "AZURE_OPENAI_SERVICE = os.getenv(\"AZURE_OPENAI_SERVICE\")\n", 37 | "AZURE_OPENAI_ADA_DEPLOYMENT = os.getenv(\"AZURE_OPENAI_ADA_DEPLOYMENT\")\n", 38 | "\n", 39 | "token_provider = azure.identity.get_bearer_token_provider(azure_credential, \"https://cognitiveservices.azure.com/.default\")\n", 40 | "openai_client = openai.AzureOpenAI(\n", 41 | " api_version=\"2023-07-01-preview\",\n", 42 | " azure_endpoint=f\"https://{AZURE_OPENAI_SERVICE}.openai.azure.com\",\n", 43 | " azure_ad_token_provider=token_provider)\n", 44 | "\n", 45 | "def get_embedding(text):\n", 46 | " get_embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=text)\n", 47 | " return get_embeddings_response.data[0].embedding\n", 48 | "\n", 49 | "# Initialize Azure search client\n", 50 | "AZURE_SEARCH_SERVICE = os.getenv(\"AZURE_SEARCH_SERVICE\")\n", 51 | "AZURE_SEARCH_ENDPOINT = f\"https://{AZURE_SEARCH_SERVICE}.search.windows.net\"\n", 52 | "\n", 53 | "AZURE_SEARCH_FULL_INDEX = \"gptkbindex\"\n", 54 | "search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_credential)\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## Get answer for a question" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 5, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "name": "stdout", 71 | "output_type": "stream", 72 | "text": [ 73 | "A product manager is responsible for leading the product management team, developing and implementing product strategies, monitoring industry trends, collaborating with internal teams, developing pricing strategies, analyzing product performance, and identifying areas for improvement. They must have a bachelor's degree in business, engineering, or a related field, at least 5 years of experience in product management, and a proven track record of successful product launches. [role_library.pdf#page=29]\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "user_question = \"What does a product manager do?\"\n", 79 | "user_question_vector = get_embedding(user_question)\n", 80 | "\n", 81 | "r = search_client.search(\n", 82 | " user_question,\n", 83 | " top=5, \n", 84 | " vector_queries=[\n", 85 | " VectorizedQuery(vector=user_question_vector, k_nearest_neighbors=50, fields=\"embedding\")],\n", 86 | " query_type=\"semantic\",\n", 87 | " semantic_configuration_name=\"default\")\n", 88 | "\n", 89 | "sources = \"\\n\\n\".join([f\"[{doc['sourcepage']}]: {doc['content']}\\n\" for doc in r])\n", 90 | "\n", 91 | "SYSTEM_MESSAGE = \"\"\"\n", 92 | "Assistant helps company employees questions about the employee handbook. Be brief in your answers.\n", 93 | "Answer ONLY with the facts listed in the list of sources below.\n", 94 | "If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below.\n", 95 | "Each source has a name followed by colon and the actual information, include the source name for each fact you use.\n", 96 | "Use square brackets to reference the source, for example [info1.txt].\n", 97 | "\"\"\"\n", 98 | "USER_MESSAGE = user_question + \"\\nSources: \" + sources\n", 99 | "\n", 100 | "response = openai_client.chat.completions.create(\n", 101 | " model=os.getenv(\"AZURE_OPENAI_DEPLOYMENT_NAME\"),\n", 102 | " temperature=0.7,\n", 103 | " messages=[\n", 104 | " {\"role\": \"system\", \"content\": SYSTEM_MESSAGE},\n", 105 | " {\"role\": \"user\", \"content\": USER_MESSAGE},\n", 106 | " ],\n", 107 | ")\n", 108 | "\n", 109 | "answer = response.choices[0].message.content\n", 110 | "print(answer)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "## Evaluate the answer quality\n", 118 | "\n", 119 | "We can use the `promptflow-evals` package to run GPT-based evaluators on the RAG responses." 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 7, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "name": "stdout", 129 | "output_type": "stream", 130 | "text": [ 131 | "{'gpt_relevance': 5.0}\n", 132 | "{'gpt_groundedness': 5.0}\n" 133 | ] 134 | } 135 | ], 136 | "source": [ 137 | "import os\n", 138 | "\n", 139 | "from promptflow.core import AzureOpenAIModelConfiguration\n", 140 | "from promptflow.evals.evaluators import GroundednessEvaluator, RelevanceEvaluator\n", 141 | "\n", 142 | "model_config = AzureOpenAIModelConfiguration(\n", 143 | " azure_endpoint=f\"https://{AZURE_OPENAI_SERVICE}.openai.azure.com\",\n", 144 | " azure_deployment=os.environ.get(\"AZURE_OPENAI_DEPLOYMENT_NAME\"),\n", 145 | ")\n", 146 | "\n", 147 | "relevance_eval = RelevanceEvaluator(model_config)\n", 148 | "groundedness_eval = GroundednessEvaluator(model_config)\n", 149 | "\n", 150 | "relevance_score = relevance_eval(\n", 151 | " question=user_question,\n", 152 | " answer=answer,\n", 153 | " context=sources,\n", 154 | ")\n", 155 | "print(relevance_score)\n", 156 | "\n", 157 | "groundedness_score = groundedness_eval(\n", 158 | " answer=answer,\n", 159 | " context=sources,\n", 160 | ")\n", 161 | "print(groundedness_score)" 162 | ] 163 | } 164 | ], 165 | "metadata": { 166 | "language_info": { 167 | "name": "python" 168 | } 169 | }, 170 | "nbformat": 4, 171 | "nbformat_minor": 2 172 | } 173 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | pre-commit 3 | ruff 4 | black 5 | ipykernel -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | numpy 3 | pandas 4 | azure-identity 5 | azure-search-documents 6 | python-dotenv 7 | pillow 8 | promptflow-evals 9 | ipykernel -------------------------------------------------------------------------------- /search_relevance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Azure AI Search: Relevance" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Setup API clients" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 21, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import os\n", 24 | "\n", 25 | "import azure.identity\n", 26 | "import dotenv\n", 27 | "import openai\n", 28 | "from azure.search.documents import SearchClient\n", 29 | "from azure.search.documents.models import VectorizedQuery\n", 30 | "\n", 31 | "dotenv.load_dotenv()\n", 32 | "\n", 33 | "azure_credential = azure.identity.DefaultAzureCredential()\n", 34 | "\n", 35 | "# Initialize Azure OpenAI client\n", 36 | "AZURE_OPENAI_SERVICE = os.getenv(\"AZURE_OPENAI_SERVICE\")\n", 37 | "AZURE_OPENAI_ADA_DEPLOYMENT = os.getenv(\"AZURE_OPENAI_ADA_DEPLOYMENT\")\n", 38 | "\n", 39 | "token_provider = azure.identity.get_bearer_token_provider(azure_credential, \"https://cognitiveservices.azure.com/.default\")\n", 40 | "openai_client = openai.AzureOpenAI(\n", 41 | " api_version=\"2023-07-01-preview\",\n", 42 | " azure_endpoint=f\"https://{AZURE_OPENAI_SERVICE}.openai.azure.com\",\n", 43 | " azure_ad_token_provider=token_provider)\n", 44 | "\n", 45 | "def get_embedding(text):\n", 46 | " get_embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=text)\n", 47 | " return get_embeddings_response.data[0].embedding\n", 48 | "\n", 49 | "# Initialize Azure search client\n", 50 | "AZURE_SEARCH_SERVICE = os.getenv(\"AZURE_SEARCH_SERVICE\")\n", 51 | "AZURE_SEARCH_ENDPOINT = f\"https://{AZURE_SEARCH_SERVICE}.search.windows.net\"\n", 52 | "\n", 53 | "AZURE_SEARCH_FULL_INDEX = \"gptkbindex\"\n", 54 | "search_client = SearchClient(AZURE_SEARCH_ENDPOINT, AZURE_SEARCH_FULL_INDEX, credential=azure_credential)\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "#### Vector search is not always optimal\n", 62 | "\n", 63 | "For example, consider searches for exact strings." 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 22, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "name": "stdout", 73 | "output_type": "stream", 74 | "text": [ 75 | "Score: 0.81583\tMatching text: Not found\n", 76 | "Score: 0.80997\tMatching text: Not found\n", 77 | "Score: 0.80969\tMatching text: Not found\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "search_query = \"$45.00\"\n", 83 | "search_vector = get_embedding(search_query)\n", 84 | "r = search_client.search(None, top=3, vector_queries=[\n", 85 | " VectorizedQuery(vector=search_vector, k_nearest_neighbors=50, fields=\"embedding\")])\n", 86 | "for doc in r:\n", 87 | " found_content = \"Not found\"\n", 88 | " if search_query.lower() in doc['content'].lower():\n", 89 | " found_content = doc['content'][doc['content'].lower().find(search_query.lower()):].replace(\"\\n\", \" \")\n", 90 | " print(f\"Score: {doc['@search.score']:.5f}\\tMatching text: {found_content}\")" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "Compare to a text search for same query:" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 23, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "name": "stdout", 107 | "output_type": "stream", 108 | "text": [ 109 | "Score: 4.72874\tMatching text: $45.00$55.00Employee +1$65.00$71.00Employee +2 or more$78.00$89.00 Next Steps We hope that this information has been helpful in understanding the differences between Northwind Health Plus and Northwind Standard. We are confident that you will find the right plan for you and your family. Thank you for choosing Contoso Electronics!\n", 110 | "Score: 3.37283\tMatching text: Not found\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "r = search_client.search(search_query, top=3)\n", 116 | "for doc in r:\n", 117 | " found_content = \"Not found\"\n", 118 | " if search_query.lower() in doc['content'].lower():\n", 119 | " found_content = doc['content'][doc['content'].lower().find(search_query.lower()):].replace(\"\\n\", \" \")\n", 120 | " print(f\"Score: {doc['@search.score']:.5f}\\tMatching text: {found_content}\")" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "#### Hybrid retrieval\n", 128 | "\n", 129 | "Uses RRF to combine vector and text results." 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 24, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "Score: 0.03254\tMatching text: $45.00$55.00Employee +1$65.00$71.00Employee +2 or more$78.00$89.00 Next Steps We hope that this information has been helpful in understanding the differences between Northwind Health Plus and Northwind Standard. We are confident that you will find the right plan for you and your family. Thank you for choosing Contoso Electronics!\n", 142 | "Score: 0.03110\tMatching text: Not found\n", 143 | "Score: 0.01667\tMatching text: Not found\n", 144 | "Score: 0.01639\tMatching text: Not found\n", 145 | "Score: 0.01613\tMatching text: Not found\n", 146 | "Score: 0.01562\tMatching text: Not found\n", 147 | "Score: 0.01538\tMatching text: Not found\n", 148 | "Score: 0.01515\tMatching text: Not found\n", 149 | "Score: 0.01493\tMatching text: Not found\n", 150 | "Score: 0.01449\tMatching text: Not found\n" 151 | ] 152 | } 153 | ], 154 | "source": [ 155 | "search_vector = get_embedding(search_query)\n", 156 | "r = search_client.search(search_query, top=15, vector_queries=[\n", 157 | " VectorizedQuery(vector=search_vector, k_nearest_neighbors=10, fields=\"embedding\")])\n", 158 | "for doc in r:\n", 159 | " found_content = \"Not found\"\n", 160 | " if search_query in doc['content']:\n", 161 | " found_content = doc['content'][doc['content'].find(search_query):].replace(\"\\n\", \" \")\n", 162 | " print(f\"Score: {doc['@search.score']:.5f}\\tMatching text: {found_content}\")" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "#### Hybrid ranking is not always optimal" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 25, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "name": "stdout", 179 | "output_type": "stream", 180 | "text": [ 181 | "Score: 0.03226\t\tContent: PerksPlus is not only designed to support employees' physical health, but also their mental health. Regular exercise has been shown to reduce stress,\n", 182 | "Score: 0.03132\t\tContent: Under the Northwind Health Plus plan, habilitation services are covered up to a certain dollar amount and number of visits. This amount and the numbe\n", 183 | "Score: 0.02782\t\tContent: Occupational Therapy Occupational therapy helps individuals develop, maintain, or restore skills for daily living and work. It can help individuals w\n", 184 | "Score: 0.02766\t\tContent: Some of the lessons covered under PerksPlus include: · Skiing and snowboarding lessons · Scuba diving lessons · Surfing lessons · Horseback riding le\n", 185 | "Score: 0.02107\t\tContent: Early intervention and treatment can make a significant difference in achieving successful recovery. · Take advantage of your Northwind Standard cove\n" 186 | ] 187 | } 188 | ], 189 | "source": [ 190 | "search_query = \"learning about underwater activities\"\n", 191 | "search_vector = get_embedding(search_query)\n", 192 | "r = search_client.search(search_query, top=5, vector_queries=[\n", 193 | " VectorizedQuery(vector=search_vector, k_nearest_neighbors=10, fields=\"embedding\")])\n", 194 | "for doc in r:\n", 195 | " content = doc[\"content\"].replace(\"\\n\", \" \")[:150]\n", 196 | " print(f\"Score: {doc['@search.score']:.5f}\\t\\tContent: {content}\")" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "#### Hybrid + semantic reranking 🎉" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 26, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "name": "stdout", 213 | "output_type": "stream", 214 | "text": [ 215 | "Score: 0.02766\tReranker: 1.94936\tContent: Some of the lessons covered under PerksPlus include: · Skiing and snowboarding lessons · Scuba diving lessons · Surfing lessons · Horseback riding le\n", 216 | "Score: 0.03226\tReranker: 1.77669\tContent: PerksPlus is not only designed to support employees' physical health, but also their mental health. Regular exercise has been shown to reduce stress,\n", 217 | "Score: 0.02471\tReranker: 0.59237\tContent: By taking the time to research providers in-network with Northwind Health Plus and keeping track of your medical records and tests, you can make sure\n", 218 | "Score: 0.03132\tReranker: 0.58931\tContent: Under the Northwind Health Plus plan, habilitation services are covered up to a certain dollar amount and number of visits. This amount and the numbe\n", 219 | "Score: 0.02117\tReranker: 0.49740\tContent: This position will be responsible for designing and implementing innovative solutions to maximize product performance and optimize customer satisfact\n" 220 | ] 221 | } 222 | ], 223 | "source": [ 224 | "search_query = \"learning about underwater activities\"\n", 225 | "search_vector = get_embedding(search_query)\n", 226 | "r = search_client.search(\n", 227 | " search_query,\n", 228 | " top=5, \n", 229 | " vector_queries=[\n", 230 | " VectorizedQuery(vector=search_vector, k_nearest_neighbors=50, fields=\"embedding\")],\n", 231 | " query_type=\"semantic\",\n", 232 | " semantic_configuration_name=\"default\")\n", 233 | "\n", 234 | "for doc in r:\n", 235 | " content = doc[\"content\"].replace(\"\\n\", \" \")[:150]\n", 236 | " print(f\"Score: {doc['@search.score']:.5f}\\tReranker: {doc['@search.reranker_score']:.5f}\\tContent: {content}\")" 237 | ] 238 | } 239 | ], 240 | "metadata": { 241 | "kernelspec": { 242 | "display_name": "Python 3", 243 | "language": "python", 244 | "name": "python3" 245 | }, 246 | "language_info": { 247 | "codemirror_mode": { 248 | "name": "ipython", 249 | "version": 3 250 | }, 251 | "file_extension": ".py", 252 | "mimetype": "text/x-python", 253 | "name": "python", 254 | "nbconvert_exporter": "python", 255 | "pygments_lexer": "ipython3", 256 | "version": "3.11.0" 257 | }, 258 | "orig_nbformat": 4 259 | }, 260 | "nbformat": 4, 261 | "nbformat_minor": 2 262 | } 263 | -------------------------------------------------------------------------------- /vector.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import dotenv 4 | import openai 5 | from azure.identity import DefaultAzureCredential, get_bearer_token_provider 6 | 7 | dotenv.load_dotenv() 8 | 9 | # Set up OpenAI client 10 | AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE") 11 | AZURE_OPENAI_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") 12 | azure_credential = DefaultAzureCredential() 13 | token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default") 14 | openai_client = openai.AzureOpenAI( 15 | api_version="2023-07-01-preview", 16 | azure_endpoint=f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com", 17 | azure_ad_token_provider=token_provider, 18 | ) 19 | 20 | # Compute embeddings using OpenAI 21 | embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_EMBEDDING_DEPLOYMENT, input=["dog"]) 22 | vector = embeddings_response.data[0].embedding 23 | print(vector) 24 | 25 | # https://pamelafox.github.io/vectors-comparison/ 26 | -------------------------------------------------------------------------------- /vector_embeddings.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Vector embeddings with OpenAI" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Setup OpenAI API" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import os\n", 24 | "\n", 25 | "import azure.identity\n", 26 | "import dotenv\n", 27 | "import openai\n", 28 | "\n", 29 | "# Set up OpenAI client based on environment variables\n", 30 | "dotenv.load_dotenv()\n", 31 | "AZURE_OPENAI_SERVICE = os.getenv(\"AZURE_OPENAI_SERVICE\")\n", 32 | "AZURE_OPENAI_ADA_DEPLOYMENT = os.getenv(\"AZURE_OPENAI_ADA_DEPLOYMENT\")\n", 33 | "\n", 34 | "azure_credential = azure.identity.DefaultAzureCredential()\n", 35 | "token_provider = azure.identity.get_bearer_token_provider(azure_credential,\n", 36 | " \"https://cognitiveservices.azure.com/.default\")\n", 37 | "openai_client = openai.AzureOpenAI(\n", 38 | " api_version=\"2024-06-01\",\n", 39 | " azure_endpoint=f\"https://{AZURE_OPENAI_SERVICE}.openai.azure.com\",\n", 40 | " azure_ad_token_provider=token_provider)\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Vector representations" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "sentence = \"A dog just walked past my house and yipped yipped like a Martian\"\n", 57 | "\n", 58 | "response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=sentence)\n", 59 | "\n", 60 | "vector = response.data[0].embedding" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "vector" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "len(vector)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### Document similarity modeled as cosine distance" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "import numpy as np\n", 95 | "import pandas as pd\n", 96 | "\n", 97 | "\n", 98 | "def cosine_similarity(a, b):\n", 99 | " return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))\n", 100 | "\n", 101 | "sentences1 = ['The new movie is awesome',\n", 102 | " 'The new movie is awesome',\n", 103 | " 'The new movie is awesome']\n", 104 | "\n", 105 | "sentences2 = ['The new movie is awesome',\n", 106 | " 'This recent movie is so good',\n", 107 | " 'djkshsjdkhfsjdfkhsd']\n", 108 | "\n", 109 | "def get_embeddings(sentences):\n", 110 | " embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=sentences)\n", 111 | " return [embedding_object.embedding for embedding_object in embeddings_response.data]\n", 112 | "\n", 113 | "embeddings1 = get_embeddings(sentences1)\n", 114 | "embeddings2 = get_embeddings(sentences2)\n", 115 | "\n", 116 | "for i in range(len(sentences1)):\n", 117 | " print(f\"{sentences1[i]} \\t\\t {sentences2[i]} \\t\\t Score: {cosine_similarity(embeddings1[i], embeddings2[i]):.4f}\")" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "### Vector search" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 18, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "import json\n", 134 | "\n", 135 | "# Load in vectors for movie titles\n", 136 | "with open('openai_movies.json') as json_file:\n", 137 | " movie_vectors = json.load(json_file)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 19, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/html": [ 148 | "
\n", 149 | "\n", 162 | "\n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | "
MovieScore
8101 Dalmatians0.980079
335102 Dalmatians0.949210
28The Fox and the Hound0.861836
6Lady and the Tramp0.851990
48The Great Mouse Detective0.842004
468Beverly Hills Chihuahua0.841186
15The Aristocats0.840352
558The Good Dinosaur0.838919
135Aladdin0.837534
391Teacher's Pet: The Movie0.837149
\n", 223 | "
" 224 | ], 225 | "text/plain": [ 226 | " Movie Score\n", 227 | "8 101 Dalmatians 0.980079\n", 228 | "335 102 Dalmatians 0.949210\n", 229 | "28 The Fox and the Hound 0.861836\n", 230 | "6 Lady and the Tramp 0.851990\n", 231 | "48 The Great Mouse Detective 0.842004\n", 232 | "468 Beverly Hills Chihuahua 0.841186\n", 233 | "15 The Aristocats 0.840352\n", 234 | "558 The Good Dinosaur 0.838919\n", 235 | "135 Aladdin 0.837534\n", 236 | "391 Teacher's Pet: The Movie 0.837149" 237 | ] 238 | }, 239 | "execution_count": 19, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "# Compute vector for query\n", 246 | "query = \"101 Dalmations\"\n", 247 | "\n", 248 | "embeddings_response = openai_client.embeddings.create(model=AZURE_OPENAI_ADA_DEPLOYMENT, input=[query])\n", 249 | "vector = embeddings_response.data[0].embedding\n", 250 | "\n", 251 | "# Compute cosine similarity between query and each movie title\n", 252 | "scores = []\n", 253 | "for movie in movie_vectors:\n", 254 | " scores.append((movie, cosine_similarity(vector, movie_vectors[movie])))\n", 255 | "\n", 256 | "# Display the top 10 results\n", 257 | "df = pd.DataFrame(scores, columns=['Movie', 'Score'])\n", 258 | "df = df.sort_values('Score', ascending=False)\n", 259 | "df.head(10)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [] 268 | } 269 | ], 270 | "metadata": { 271 | "kernelspec": { 272 | "display_name": "Python 3", 273 | "language": "python", 274 | "name": "python3" 275 | }, 276 | "language_info": { 277 | "codemirror_mode": { 278 | "name": "ipython", 279 | "version": 3 280 | }, 281 | "file_extension": ".py", 282 | "mimetype": "text/x-python", 283 | "name": "python", 284 | "nbconvert_exporter": "python", 285 | "pygments_lexer": "ipython3", 286 | "version": "3.11.7" 287 | }, 288 | "orig_nbformat": 4 289 | }, 290 | "nbformat": 4, 291 | "nbformat_minor": 2 292 | } 293 | --------------------------------------------------------------------------------