├── Quickstart-Agentic-Retrieval
├── .gitignore
├── requirements.txt
├── sample.env
└── quickstart-agentic-retrieval.ipynb
├── azure-function-search
├── lookup.sample.dat
├── suggest.sample.dat
├── .funcignore
├── search.sample.dat
├── host.json
├── function_app.py
├── shared_code
│ └── __init__.py
├── readme.md
├── local.settings.json.rename
├── requirements.txt
├── lookup.py
├── suggest.py
├── .gitignore
└── search.py
├── Quickstart-Semantic-Search
├── requirements.txt
└── sample.env
├── .vscode
├── extensions.json
├── launch.json
├── settings.json
└── tasks.json
├── Quickstart-Vector-Search
├── sample.env
└── requirements.txt
├── Quickstart-Document-Permissions-Push-API
├── sample.env
├── requirements.txt
└── document-permissions-push-api.ipynb
├── Quickstart-Document-Permissions-Pull-API
├── requirements.txt
├── sample.env
└── document-permissions-pull-api.ipynb
├── agentic-retrieval-pipeline-example
├── requirements.txt
├── sample.env
└── agent-example.ipynb
├── bulk-insert
├── requirements.txt
├── readme.md
├── bulk-insert.py
└── good-books-index.json
├── LICENSE.md
├── .gitignore
├── Quickstart
├── README.md
└── azure-search-quickstart.ipynb
├── README.md
├── CONTRIBUTING.md
└── cmk-example
└── cmk-example.ipynb
/Quickstart-Agentic-Retrieval/.gitignore:
--------------------------------------------------------------------------------
1 | *.jsonl
--------------------------------------------------------------------------------
/azure-function-search/lookup.sample.dat:
--------------------------------------------------------------------------------
1 | {
2 | "name": "Hello"
3 | }
--------------------------------------------------------------------------------
/azure-function-search/suggest.sample.dat:
--------------------------------------------------------------------------------
1 | {"q": "w", "top": 5, suggester: "sg"}
--------------------------------------------------------------------------------
/azure-function-search/.funcignore:
--------------------------------------------------------------------------------
1 | .git*
2 | .vscode
3 | local.settings.json
4 | test
5 | .venv
--------------------------------------------------------------------------------
/Quickstart-Semantic-Search/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-identity
2 | dotenv
3 | azure-search-documents==11.7.0b2
4 |
--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | "recommendations": [
3 | "ms-azuretools.vscode-azurefunctions",
4 | "ms-python.python"
5 | ]
6 | }
--------------------------------------------------------------------------------
/Quickstart-Semantic-Search/sample.env:
--------------------------------------------------------------------------------
1 | AZURE_SEARCH_ENDPOINT=https://your-search-service.search.windows.net
2 | AZURE_SEARCH_INDEX_NAME=hotels-sample-index
--------------------------------------------------------------------------------
/Quickstart-Vector-Search/sample.env:
--------------------------------------------------------------------------------
1 | AZURE_SEARCH_ENDPOINT=https://your-search-service.search.windows.net
2 | AZURE_SEARCH_INDEX_NAME=vector-search-quickstart
--------------------------------------------------------------------------------
/azure-function-search/search.sample.dat:
--------------------------------------------------------------------------------
1 | {"q":"","top":8,"skip":0,"filters":[{"field":"authors","value":"James Patterson"},{"field":"language_code","value":"en-US"}]}
--------------------------------------------------------------------------------
/Quickstart-Document-Permissions-Push-API/sample.env:
--------------------------------------------------------------------------------
1 | AZURE_SEARCH_ENDPOINT=https://your-search-service.search.windows.net
2 | AZURE_SEARCH_INDEX=document-permissions-push-idx
--------------------------------------------------------------------------------
/Quickstart-Document-Permissions-Push-API/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-identity
2 | aiohttp
3 | ipykernel
4 | dotenv
5 | requests
6 | msgraph-sdk
7 | azure-search-documents==11.7.0b2
--------------------------------------------------------------------------------
/Quickstart-Agentic-Retrieval/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-identity
2 | azure-ai-evaluation
3 | openai
4 | aiohttp
5 | ipykernel
6 | dotenv
7 | requests
8 | azure-search-documents==11.7.0b2
--------------------------------------------------------------------------------
/Quickstart-Vector-Search/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-ai-projects==1.0.0b11
2 | azure-ai-agents==1.0.0
3 | azure-identity
4 | aiohttp
5 | ipykernel
6 | dotenv
7 | azure-search-documents==11.7.0b2
8 |
--------------------------------------------------------------------------------
/Quickstart-Document-Permissions-Pull-API/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-identity
2 | aiohttp
3 | ipykernel
4 | dotenv
5 | requests
6 | msgraph-sdk
7 | azure-storage-file-datalake
8 | azure-search-documents==11.7.0b2
--------------------------------------------------------------------------------
/agentic-retrieval-pipeline-example/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-ai-projects==2.0.0b1
2 | azure-mgmt-cognitiveservices
3 | azure-identity
4 | ipykernel
5 | dotenv
6 | azure-search-documents==11.7.0b2
7 | requests
8 | openai
--------------------------------------------------------------------------------
/bulk-insert/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-core
2 | azure-search-documents
3 | requests
4 | certifi
5 | chardet
6 | idna
7 | isodate
8 | msrest
9 | numpy
10 | oauthlib
11 | pandas
12 | python-dateutil
13 | pytz
14 | requests-oauthlib
15 | six
16 | typing-extensions
17 | urllib3
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "0.2.0",
3 | "configurations": [
4 | {
5 | "name": "Attach to Python Functions",
6 | "type": "python",
7 | "request": "attach",
8 | "port": 9091,
9 | "preLaunchTask": "func: host start"
10 | }
11 | ]
12 | }
--------------------------------------------------------------------------------
/azure-function-search/host.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "2.0",
3 | "logging": {
4 | "applicationInsights": {
5 | "samplingSettings": {
6 | "isEnabled": true,
7 | "excludedTypes": "Request"
8 | }
9 | }
10 | },
11 | "extensionBundle": {
12 | "id": "Microsoft.Azure.Functions.ExtensionBundle",
13 | "version": "[4.*, 5.0.0)"
14 | }
15 | }
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "azureFunctions.projectSubpath": "search-website-functions-v4/api",
3 | "azureFunctions.deploySubpath": "search-website-functions-v4/api",
4 | "azureFunctions.scmDoBuildDuringDeployment": true,
5 | "azureFunctions.pythonVenv": ".venv",
6 | "azureFunctions.projectLanguage": "Python",
7 | "azureFunctions.projectRuntime": "~4",
8 | "debug.internalConsoleOptions": "neverOpen"
9 | }
--------------------------------------------------------------------------------
/Quickstart-Document-Permissions-Pull-API/sample.env:
--------------------------------------------------------------------------------
1 | AZURE_SEARCH_ENDPOINT=https://your-search-service.search.windows.net
2 | AZURE_SEARCH_INDEX=document-permissions-indexer-idx
3 | AZURE_SEARCH_INDEXER=document-permissions-indexer-idxr
4 | AZURE_SEARCH_DATASOURCE=document-permissions-indexer-ds
5 | AZURE_STORAGE_ACCOUNT_NAME=
6 | AZURE_STORAGE_CONTAINER_NAME=state-parks
7 | AZURE_STORAGE_CONNECTION_STRING=
8 | AZURE_STORAGE_RESOURCE_ID=
--------------------------------------------------------------------------------
/azure-function-search/function_app.py:
--------------------------------------------------------------------------------
1 | import azure.functions as func
2 | import logging
3 | import json
4 | from search import bp as search_bp
5 | from lookup import bp as lookup_bp
6 | from suggest import bp as suggest_bp
7 |
8 | app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS)
9 |
10 | app.register_functions(lookup_bp)
11 | app.register_functions(search_bp)
12 | app.register_functions(suggest_bp)
13 |
14 |
15 |
--------------------------------------------------------------------------------
/azure-function-search/shared_code/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 |
4 | def azure_config():
5 |
6 | configs = {}
7 | configs["search_facets"] = os.environ.get("SearchFacets", "")
8 | configs["search_index_name"] = os.environ.get("SearchIndexName", "")
9 | configs["search_service_name"] = os.environ.get("SearchServiceName", "")
10 | configs["search_api_key"] = os.environ.get("SearchApiKey", "")
11 |
12 | return configs
13 |
--------------------------------------------------------------------------------
/azure-function-search/readme.md:
--------------------------------------------------------------------------------
1 | # Create an Azure function that specifies queries
2 |
3 | This folder contains source code for an Azure function that formulates queries, performs document lookup, and suggests typeahead queries. It's the Python version of the `api` content used in the [C# sample Add search to websites](https://learn.microsoft.com/azure/search/tutorial-csharp-overview). If you're a Python developer, you can substitute this code to create a Python version of the sample app.
4 |
--------------------------------------------------------------------------------
/azure-function-search/local.settings.json.rename:
--------------------------------------------------------------------------------
1 | {
2 | "IsEncrypted": false,
3 | "Values": {
4 | "AzureWebJobsStorage": "",
5 | "AzureWebJobsFeatureFlags": "EnableWorkerIndexing",
6 | "FUNCTIONS_WORKER_RUNTIME": "python",
7 | "SearchApiKey": "YOUR-SEARCH-QUERY-KEY",
8 | "SearchServiceName": "YOUR-SEARCH-RESOURCE-NAME",
9 | "SearchIndexName": "good-books",
10 | "SearchFacets": "authors*,language_code"
11 | },
12 | "Host": {
13 | "CORS": "*"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/agentic-retrieval-pipeline-example/sample.env:
--------------------------------------------------------------------------------
1 | PROJECT_ENDPOINT=https://your-foundry-resource.services.ai.azure.com/api/projects/your-foundry-project
2 | AGENT_MODEL=gpt-4.1-mini
3 | PROJECT_RESOURCE_ID=/subscriptions/your-subscription-id/resourceGroups/your-resource-group/providers/Microsoft.CognitiveServices/accounts/your-account/projects/your-project
4 | AZURE_OPENAI_ENDPOINT=https://your-openai-service.openai.azure.com
5 | AZURE_OPENAI_GPT_DEPLOYMENT=gpt-5-mini
6 | AZURE_SEARCH_ENDPOINT=https://your-search-service.search.windows.net
7 | AZURE_SEARCH_INDEX_NAME=earth-at-night
--------------------------------------------------------------------------------
/azure-function-search/requirements.txt:
--------------------------------------------------------------------------------
1 | # DO NOT include azure-functions-worker in this file
2 | # The Python Worker is managed by Azure Functions platform
3 | # Manually managing azure-functions-worker may cause unexpected issues
4 |
5 | azure-common>=1.1.28,<2.0.0
6 | azure-core>=1.29.4,<2.0.0
7 | azure-functions>=1.17.0,<2.0.0
8 | azure-search-documents>=11.3.0,<12.0.0
9 | certifi>=2023.7.22,<2024.0.0
10 | chardet>=5.2.0,<6.0.0
11 | charset-normalizer>=3.2.0,<4.0.0
12 | idna>=3.4,<4.0.0
13 | isodate>=0.6.1,<1.0.0
14 | msrest>=0.7.1,<1.0.0
15 | oauthlib>=3.2.2,<4.0.0
16 | requests>=2.31.0,<3.0.0
17 | requests-oauthlib>=1.3.1,<2.0.0
18 | six>=1.16.0,<2.0.0
19 | typing_extensions>=4.8.0,<5.0.0
20 | urllib3>=2.0.4,<3.0.0
--------------------------------------------------------------------------------
/Quickstart-Agentic-Retrieval/sample.env:
--------------------------------------------------------------------------------
1 | # Variables for agentic retrieval in Azure AI Search
2 | SEARCH_ENDPOINT = https://your-search-service.search.windows.net
3 | AOAI_ENDPOINT = https://your-foundry-resource.openai.azure.com
4 | AOAI_EMBEDDING_MODEL = text-embedding-3-large
5 | AOAI_EMBEDDING_DEPLOYMENT = text-embedding-3-large
6 | AOAI_GPT_MODEL = gpt-5-mini
7 | AOAI_GPT_DEPLOYMENT = gpt-5-mini
8 | INDEX_NAME = earth-at-night
9 | KNOWLEDGE_SOURCE_NAME = earth-knowledge-source
10 | KNOWLEDGE_BASE_NAME = earth-knowledge-base
11 |
12 | # Variables for evaluation in Microsoft Foundry
13 | FOUNDRY_ENDPOINT = https://your-foundry-resource.services.ai.azure.com/api/projects/your-project-id
14 | AOAI_API_VERSION = 2025-04-01-preview
--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "2.0.0",
3 | "tasks": [
4 | {
5 | "type": "func",
6 | "command": "host start",
7 | "problemMatcher": "$func-python-watch",
8 | "isBackground": true,
9 | "dependsOn": "pip install (functions)",
10 | "options": {
11 | "cwd": "${workspaceFolder}/search-website-functions-v4/api"
12 | }
13 | },
14 | {
15 | "label": "pip install (functions)",
16 | "type": "shell",
17 | "osx": {
18 | "command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt"
19 | },
20 | "windows": {
21 | "command": "${config:azureFunctions.pythonVenv}\\Scripts\\python -m pip install -r requirements.txt"
22 | },
23 | "linux": {
24 | "command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt"
25 | },
26 | "problemMatcher": [],
27 | "options": {
28 | "cwd": "${workspaceFolder}/search-website-functions-v4/api"
29 | }
30 | }
31 | ]
32 | }
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Microsoft Corporation. All rights reserved.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE
--------------------------------------------------------------------------------
/azure-function-search/lookup.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import azure.functions as func
3 | from azure.core.credentials import AzureKeyCredential
4 | from azure.search.documents import SearchClient
5 | from shared_code import azure_config
6 | import json
7 |
8 | environment_vars = azure_config()
9 |
10 | # Set Azure Search endpoint and key
11 | endpoint = f'https://{environment_vars["search_service_name"]}.search.windows.net'
12 | key = environment_vars["search_api_key"]
13 |
14 | # Your index name
15 | index_name = "good-books"
16 |
17 | # Create Azure SDK client
18 | search_client = SearchClient(endpoint, index_name, AzureKeyCredential(key))
19 |
20 | bp = func.Blueprint()
21 | @bp.function_name("lookup")
22 | @bp.route(route="lookup", methods=[func.HttpMethod.GET, func.HttpMethod.POST])
23 | def main(req: func.HttpRequest) -> func.HttpResponse:
24 |
25 | # http://localhost:7071/api/Lookup?id=100
26 | docid = req.params.get("id")
27 |
28 | if docid:
29 | logging.info(f"/Lookup id = {docid}")
30 | returnedDocument = search_client.get_document(key=docid)
31 |
32 | full_response = {}
33 | full_response["document"] = returnedDocument
34 |
35 | return func.HttpResponse(
36 | body=json.dumps(full_response), mimetype="application/json", status_code=200
37 | )
38 | else:
39 | return func.HttpResponse("No doc id param found.", status_code=200)
40 |
--------------------------------------------------------------------------------
/azure-function-search/suggest.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import azure.functions as func
3 | from azure.core.credentials import AzureKeyCredential
4 | from azure.search.documents import SearchClient
5 | from shared_code import azure_config
6 | import json
7 |
8 | environment_vars = azure_config()
9 |
10 | # curl --header "Content-Type: application/json" \
11 | # --request POST \
12 | # --data '{"q":"code","top":"5", "suggester":"sg"}' \
13 | # http://localhost:7071/api/Suggest
14 |
15 | # Set Azure Search endpoint and key
16 | service_name = environment_vars["search_service_name"]
17 | endpoint = f"https://{service_name}.search.windows.net"
18 | key = environment_vars["search_api_key"]
19 |
20 | # Your index name
21 | index_name = "good-books"
22 |
23 | # Create Azure SDK client
24 | search_client = SearchClient(endpoint, index_name, AzureKeyCredential(key))
25 |
26 |
27 | bp=func.Blueprint()
28 | @bp.function_name("suggest")
29 | @bp.route(route="suggest", methods=[func.HttpMethod.GET, func.HttpMethod.POST] )
30 | def main(req: func.HttpRequest) -> func.HttpResponse:
31 |
32 | # variables sent in body
33 | req_body = req.get_json()
34 | q = req_body.get("q")
35 | top = req_body.get("top") or 5
36 | suggester = req_body.get("suggester") or "sg"
37 |
38 | if q:
39 | logging.info("/Suggest q = %s", q)
40 | suggestions = search_client.suggest(search_text=q, suggester_name=suggester, top=top)
41 |
42 | # format the React app expects
43 | full_response = {}
44 | full_response["suggestions"] = suggestions
45 | logging.debug(suggestions)
46 |
47 | return func.HttpResponse(
48 | body=json.dumps(full_response), mimetype="application/json", status_code=200
49 | )
50 | else:
51 | return func.HttpResponse("No query param found.", status_code=200)
52 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | # Azurite files for local environment
107 | __azurite_*
108 |
--------------------------------------------------------------------------------
/Quickstart/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | page_type: sample
3 | languages:
4 | - python
5 | name: Python quickstart for Azure AI Search
6 | products:
7 | - azure
8 | - azure-cognitive-search
9 | description: |
10 | Learn how to create, load, and query an Azure AI Search index using Python.
11 | urlFragment: python-quickstart
12 | ---
13 |
14 | # Quickstart: Python for Azure AI Search
15 |
16 | 
17 |
18 | This sample demonstrates how to use the Azure SDK for Python to create an Azure AI Search index, load it with documents, and execute queries. The index is modeled on a subset of the hotels dataset, which is reduced in this sample for readability and comprehension. The code includes the index definition and documents.
19 |
20 | This sample uses a Jupyter notebook (.ipynb) file to perform the actions against the Azure AI Search service.
21 |
22 | ## Prerequisites
23 |
24 | * [Visual Studio Code](https://code.visualstudio.com/download) with the [Python extension](https://marketplace.visualstudio.com/items?itemName=ms-python.python) and [Jupyter package](https://pypi.org/project/jupyter/).
25 |
26 | * The [azure-search-documents package](https://pypi.org/project/azure-search-documents/) from the Azure SDK for Python.
27 |
28 | ## Set up the sample
29 |
30 | 1. Clone or download this sample repository.
31 |
32 | 1. Extract contents if the download is a zip file. Make sure the files are read-write.
33 |
34 | ## Run the sample
35 |
36 | 1. Open the azure-search-quickstart.ipynb file in Visual Studio Code.
37 |
38 | 1. Set the service endpoint and API key for your search service:
39 |
40 | * service_name = "YOUR-SEARCH-SERVICE-NAME"
41 | * admin_key = "YOUR-SEARCH-SERVICE-ADMIN-API-KEY"
42 |
43 | 1. Run each step in sequence.
44 |
45 | ## Next step
46 |
47 | You can learn more about Azure AI Search on the [official documentation site](https://learn.microsoft.com/azure/search).
48 |
--------------------------------------------------------------------------------
/bulk-insert/readme.md:
--------------------------------------------------------------------------------
1 | # Create an Azure AI Search index from a CSV file
2 |
3 | This folder contains source code for a bulk-insert program that creates and loads an index using the good-books sample data in a CSV folder. It's the Python version of the `bulk-insert` content used in the [C# sample Add search to websites](https://learn.microsoft.com/azure/search/tutorial-csharp-overview). If you're a Python developer, you can substitute this code to create a Python version of the sample app.
4 |
5 | You can also run this code standalone to create a good-books index on your search service.
6 |
7 | 1. Check your search service to make sure you have room for an extra index. The **Usage** tab on the Azure portal's search service page provides this information. The maximum limit on the free tier is 3 indexes. The maximum limit on the Basic tier is 15 indexes.
8 |
9 | 1. Change the following values in the `bulk-insert.py` file:
10 |
11 | * YOUR-SEARCH-RESOURCE-NAME (not the full URL)
12 | * YOUR-SEARCH-ADMIN-KEY
13 |
14 | 1. Create a virtual environment. Press Ctrl-Shift-P to open the command palette and search for `Python: Create Environment`.
15 |
16 | 1. Open an integrated terminal in Visual Studio Code.
17 |
18 | 1. Make sure the path is "azure-search-static-web-app/python/bulk-insert".
19 |
20 | 1. Install the dependencies:
21 |
22 | ```bash
23 | pip install -r requirements.txt
24 | ```
25 |
26 | 1. Run the program:
27 |
28 | ```bash
29 | py bulk-insert.py
30 | ```
31 |
32 | 1. You should see the following output:
33 |
34 | ```bash
35 | Schema uploaded; Index created for good-books.
36 | Batch sent! - #1
37 | Batch sent! - #2
38 | Batch sent! - #3
39 | Batch sent! - #4
40 | Batch sent! - #5
41 | Batch sent! - #6
42 | Batch sent! - #7
43 | Batch sent! - #8
44 | Batch sent! - #9
45 | Batch sent! - #10
46 | Done!
47 | Upload complete
48 | ```
49 |
50 | If you get a "file not found error" on good-books-index.json, try adding the "Terminal: Execute in File Directory" in **Settings** > **Extensions** > **Python**.
51 |
--------------------------------------------------------------------------------
/azure-function-search/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 | .pytest_cache/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | db.sqlite3
61 |
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 |
66 | # Scrapy stuff:
67 | .scrapy
68 |
69 | # Sphinx documentation
70 | docs/_build/
71 |
72 | # PyBuilder
73 | target/
74 |
75 | # Jupyter Notebook
76 | .ipynb_checkpoints
77 |
78 | # IPython
79 | profile_default/
80 | ipython_config.py
81 |
82 | # pyenv
83 | .python-version
84 |
85 | # pipenv
86 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
87 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
88 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not
89 | # install all needed dependencies.
90 | #Pipfile.lock
91 |
92 | # celery beat schedule file
93 | celerybeat-schedule
94 |
95 | # SageMath parsed files
96 | *.sage.py
97 |
98 | # Environments
99 | .env
100 | .venv
101 | env/
102 | venv/
103 | ENV/
104 | env.bak/
105 | venv.bak/
106 |
107 | # Spyder project settings
108 | .spyderproject
109 | .spyproject
110 |
111 | # Rope project settings
112 | .ropeproject
113 |
114 | # mkdocs documentation
115 | /site
116 |
117 | # mypy
118 | .mypy_cache/
119 | .dmypy.json
120 | dmypy.json
121 |
122 | # Pyre type checker
123 | .pyre/
124 |
125 | # Azure Functions artifacts
126 | bin
127 | obj
128 | appsettings.json
129 | local.settings.json
130 |
131 | # Azurite artifacts
132 | __blobstorage__
133 | __queuestorage__
134 | __azurite_db*__.json
135 | .python_packages
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python samples for Azure AI Search
2 |
3 | This repository contains Python code samples used in Azure AI Search documentation. Unless noted otherwise, all samples run on the shared (free) pricing tier of a [search service](https://learn.microsoft.com/azure/search/search-create-service-portal).
4 |
5 | If your configuration uses a search service managed identity for indexer connections, your search service must be on the Basic tier or higher.
6 |
7 | ## Day-one quickstarts and tutorials
8 |
9 | | Sample | Description |
10 | |--------|-------------|
11 | | [Quickstart](Quickstart/README.md) | Introduces the fundamental tasks of working with a classic search index: create, load, and query. The index is modeled on a subset of the hotels dataset, which is widely used in Azure AI Search samples but reduced in this sample for readability and comprehension. |
12 | | [Quickstart-Agentic-Retrieval](Quickstart-Agentic-Retrieval/quickstart-agentic-retrieval.ipynb) | Create a knowledge base in Azure AI Search to integrate LLM reasoning into query planning. |
13 | | [Quickstart-Document-Permissions-Pull-API](Quickstart-Document-Permissions-Pull-API/document-permissions-pull-api.ipynb) | Using an indexer "pull API" approach, flow access control lists from a data source to search results and apply permission filters that restrict access to authorized content. Indexer support is limited to Azure Data Lake Storage (ADLS) Gen2 permission metadata. |
14 | | [Quickstart-Document-Permissions-Push-API](Quickstart-Document-Permissions-Push-API/document-permissions-push-api.ipynb) | Using the push APIs for indexing a JSON payload, flow embedded permission metadata to indexed documents and search results that are filtered based on user access to authorized content. |
15 | | [Quickstart-Semantic-Search](Quickstart-Semantic-Search/semantic-search-quickstart.ipynb) | Extends the quickstart through modifications that invoke semantic ranking. This notebook adds a semantic configuration to the index and semantic query options that formulate the query and response. |
16 | | [Quickstart-Vector-Search](Quickstart-Vector-Search/quickstart-vector-search.ipynb) | Introduces vector search in Azure AI Search. This notebook demonstrates how to create, load, and query a vector index. |
17 |
18 | ## Deeper dive tutorials
19 |
20 | | Sample | Description |
21 | |--------|-------------|
22 | | [agentic-retrieval-pipeline-example](agentic-retrieval-pipeline-example/agent-example.ipynb) | Extends the quickstart by integrating Foundry Agent Service. Add an AI agent and MCP tool to your Azure AI Search agentic retrieval pipeline for an end-to-end conversational search experience. |
23 | | [azure-function-search](azure-function-search/readme.md) | An Azure Function that sends query requests to an Azure AI Search service. You can substitute this code to replace the contents of the `api` folder in the C# sample [azure-search-static-web-app](https://github.com/Azure-Samples/azure-search-static-web-app). |
24 | | [bulk-insert](bulk-insert/readme.md) | Create and load an index using the push APIs and sample data. You can substitute this code to replace the contents of the `bulk-insert` folder in the C# sample [azure-search-static-web-app](https://github.com/Azure-Samples/azure-search-static-web-app) |
25 | | [cmk-encryption](cmk-example/cmk-example.ipynb) | Encrypt content using customer-managed keys. |
26 |
27 | ## Archived samples
28 |
29 | + **azureml-custom-skill**: See the **Archive** branch of this repository.
30 | + **image-processing**: See [azure-search-sample-archive/tree/main/image-processing](https://github.com/Azure-Samples/azure-search-sample-archive/tree/main/image-processing).
31 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to Azure AI Search samples
2 |
3 | This project welcomes contributions and suggestions. Most contributions require you to agree to a
4 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
5 | the rights to use your contribution. For details, visit https://cla.microsoft.com.
6 |
7 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide
8 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions
9 | provided by the bot. You will only need to do this once across all repos using our CLA.
10 |
11 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
12 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
13 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
14 |
15 | - [Code of Conduct](#coc)
16 | - [Issues and Bugs](#issue)
17 | - [Feature Requests](#feature)
18 | - [Submission Guidelines](#submit)
19 |
20 | ## Code of Conduct
21 | Help us keep this project open and inclusive. Please read and follow our [Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
22 |
23 | ## Found an Issue?
24 | If you find a bug in the source code or a mistake in the documentation, you can help us by
25 | [submitting an issue](#submit-issue) to the GitHub Repository. Even better, you can
26 | [submit a Pull Request](#submit-pr) with a fix.
27 |
28 | ## Want a Feature?
29 | You can *request* a new feature by [submitting an issue](#submit-issue) to the GitHub
30 | Repository. If you would like to *implement* a new feature, please submit an issue with
31 | a proposal for your work first, to be sure that we can use it.
32 |
33 | * **Small Features** can be crafted and directly [submitted as a Pull Request](#submit-pr).
34 |
35 | ## Submission Guidelines
36 |
37 | ### Submitting an Issue
38 | Before you submit an issue, search the archive, maybe your question was already answered.
39 |
40 | If your issue appears to be a bug, and hasn't been reported, open a new issue.
41 | Help us to maximize the effort we can spend fixing issues and adding new
42 | features, by not reporting duplicate issues. Providing the following information will increase the
43 | chances of your issue being dealt with quickly:
44 |
45 | * **Overview of the Issue** - if an error is being thrown a non-minified stack trace helps
46 | * **Version** - what version is affected (e.g. 0.1.2)
47 | * **Motivation for or Use Case** - explain what are you trying to do and why the current behavior is a bug for you
48 | * **Browsers and Operating System** - is this a problem with all browsers?
49 | * **Reproduce the Error** - provide a live example or a unambiguous set of steps
50 | * **Related Issues** - has a similar issue been reported before?
51 | * **Suggest a Fix** - if you can't fix the bug yourself, perhaps you can point to what might be
52 | causing the problem (line of code or commit)
53 |
54 | You can file new issues by providing the above information at the corresponding repository's issues link: https://github.com/[organization-name]/[repository-name]/issues/new].
55 |
56 | ### Submitting a Pull Request (PR)
57 | Before you submit your Pull Request (PR) consider the following guidelines:
58 |
59 | * Search the repository (https://github.com/[organization-name]/[repository-name]/pulls) for an open or closed PR
60 | that relates to your submission. You don't want to duplicate effort.
61 |
62 | * Make your changes in a new git fork:
63 |
64 | * Commit your changes using a descriptive commit message
65 | * Push your fork to GitHub:
66 | * In GitHub, create a pull request
67 | * If we suggest changes then:
68 | * Make the required updates.
69 | * Rebase your fork and force push to your GitHub repository (this will update your Pull Request):
70 |
71 | ```shell
72 | git rebase master -i
73 | git push -f
74 | ```
75 |
76 | That's it! Thank you for your contribution!
77 |
--------------------------------------------------------------------------------
/azure-function-search/search.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import azure.functions as func
3 | from azure.core.credentials import AzureKeyCredential
4 | from azure.search.documents import SearchClient
5 | from shared_code import azure_config
6 | import json
7 |
8 | environment_vars = azure_config()
9 |
10 | # Set Azure Search endpoint and key
11 | endpoint = f'https://{environment_vars["search_service_name"]}.search.windows.net'
12 | key = environment_vars["search_api_key"]
13 |
14 | # Your index name
15 | index_name = "good-books"
16 |
17 | # Create Azure SDK client
18 | search_client = SearchClient(endpoint, index_name, AzureKeyCredential(key))
19 |
20 | # returns obj like {authors: 'array', language_code:'string'}
21 | def read_facets(facetsString):
22 | facets = facetsString.split(",")
23 | output = {}
24 | for x in facets:
25 | if x.find("*") != -1:
26 | newVal = x.replace("*", "")
27 | output[newVal] = "array"
28 | else:
29 | output[x] = "string"
30 |
31 | return output
32 |
33 |
34 | # creates filters in odata syntax
35 | def create_filter_expression(filter_list, facets):
36 | i = 0
37 | filter_expressions = []
38 | return_string = ""
39 | separator = " and "
40 |
41 | while i < len(filter_list):
42 | field = filter_list[i]["field"]
43 | value = filter_list[i]["value"]
44 |
45 | if facets[field] == "array":
46 | print("array")
47 | filter_expressions.append(f"{field}/any(t: search.in(t, '{value}', ','))")
48 | else:
49 | print("value")
50 | filter_expressions.append(f"{field} eq '{value}'")
51 |
52 | i += 1
53 |
54 | return_string = separator.join(filter_expressions)
55 |
56 | return return_string
57 |
58 |
59 | def new_shape(docs):
60 |
61 | old_api_shape = list(docs)
62 |
63 | client_side_expected_shape = []
64 |
65 | for item in old_api_shape:
66 |
67 | new_document = {}
68 | new_document["score"] = item["@search.score"]
69 | new_document["highlights"] = item["@search.highlights"]
70 |
71 | new_api_shape = {}
72 | new_api_shape["id"] = item["id"]
73 | new_api_shape["goodreads_book_id"] = item["goodreads_book_id"]
74 | new_api_shape["best_book_id"] = item["best_book_id"]
75 | new_api_shape["work_id"] = item["work_id"]
76 | new_api_shape["books_count"] = item["books_count"]
77 | new_api_shape["isbn"] = item["isbn"]
78 | new_api_shape["isbn13"] = item["isbn13"]
79 | new_api_shape["authors"] = item["authors"]
80 | new_api_shape["original_publication_year"] = item["original_publication_year"]
81 | new_api_shape["original_title"] = item["original_title"]
82 | new_api_shape["title"] = item["title"]
83 | new_api_shape["language_code"] = item["language_code"]
84 | new_api_shape["average_rating"] = item["average_rating"]
85 | new_api_shape["ratings_count"] = item["ratings_count"]
86 | new_api_shape["work_ratings_count"] = item["work_ratings_count"]
87 | new_api_shape["work_text_reviews_count"] = item["work_text_reviews_count"]
88 | new_api_shape["ratings_1"] = item["ratings_1"]
89 | new_api_shape["ratings_2"] = item["ratings_2"]
90 | new_api_shape["ratings_3"] = item["ratings_3"]
91 | new_api_shape["ratings_4"] = item["ratings_4"]
92 | new_api_shape["ratings_5"] = item["ratings_5"]
93 | new_api_shape["image_url"] = item["image_url"]
94 | new_api_shape["small_image_url"] = item["small_image_url"]
95 |
96 | new_document["document"] = new_api_shape
97 |
98 | client_side_expected_shape.append(new_document)
99 |
100 | return list(client_side_expected_shape)
101 |
102 | bp=func.Blueprint()
103 | @bp.function_name("search")
104 | @bp.route(route="search", methods=[func.HttpMethod.GET, func.HttpMethod.POST] )
105 | def main(req: func.HttpRequest) -> func.HttpResponse:
106 |
107 | # variables sent in body
108 | req_body = req.get_json()
109 | q = req_body.get("q")
110 | top = req_body.get("top") or 8
111 | skip = req_body.get("skip") or 0
112 | filters = req_body.get("filters") or []
113 |
114 | facets = environment_vars["search_facets"]
115 | facetKeys = read_facets(facets)
116 |
117 | search_filter = ""
118 | if len(filters):
119 | search_filter = create_filter_expression(filters, facetKeys)
120 |
121 | if q:
122 | logging.info(f"/Search q = {q}")
123 |
124 | search_results = search_client.search(
125 | search_text=q,
126 | top=top,
127 | skip=skip,
128 | facets=facetKeys,
129 | filter=search_filter,
130 | include_total_count=True,
131 | )
132 |
133 | returned_docs = new_shape(search_results)
134 |
135 | # format the React app expects
136 | full_response = {}
137 |
138 | full_response["count"] = search_results.get_count()
139 | full_response["facets"] = search_results.get_facets()
140 | full_response["results"] = returned_docs
141 |
142 | return func.HttpResponse(
143 | body=json.dumps(full_response), mimetype="application/json", status_code=200
144 | )
145 | else:
146 | return func.HttpResponse("No query param found.", status_code=200)
147 |
--------------------------------------------------------------------------------
/bulk-insert/bulk-insert.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import json
3 | import requests
4 | import pandas as pd
5 | from azure.core.credentials import AzureKeyCredential
6 | from azure.search.documents import SearchClient
7 | from azure.search.documents.indexes import SearchIndexClient
8 | from azure.search.documents.indexes.models import SearchIndex
9 | from azure.search.documents.indexes.models import (
10 | ComplexField,
11 | CorsOptions,
12 | SearchIndex,
13 | ScoringProfile,
14 | SearchFieldDataType,
15 | SimpleField,
16 | SearchableField,
17 | )
18 |
19 | # Get the service name (short name) and admin API key from the environment
20 | service_name = "YOUR-SEARCH-SERVICE-NAME"
21 | key = "YOUR-SEARCH-SERVICE-ADMIN-API-KEY"
22 | endpoint = "https://{}.search.windows.net/".format(service_name)
23 |
24 | # Give your index a name
25 | # You can also supply this at runtime in __main__
26 | index_name = "good-books"
27 |
28 | # Search Index Schema definition
29 | index_schema = "./good-books-index.json"
30 |
31 | # Books catalog
32 | books_url = "https://raw.githubusercontent.com/Azure-Samples/azure-search-sample-data/main/good-books/books.csv"
33 | batch_size = 1000
34 |
35 | # Instantiate a client
36 | class CreateClient(object):
37 | def __init__(self, endpoint, key, index_name):
38 | self.endpoint = endpoint
39 | self.index_name = index_name
40 | self.key = key
41 | self.credentials = AzureKeyCredential(key)
42 |
43 | # Create a SearchClient
44 | # Use this to upload docs to the Index
45 | def create_search_client(self):
46 | return SearchClient(
47 | endpoint=self.endpoint,
48 | index_name=self.index_name,
49 | credential=self.credentials,
50 | )
51 |
52 | # Create a SearchIndexClient
53 | # This is used to create, manage, and delete an index
54 | def create_admin_client(self):
55 | return SearchIndexClient(endpoint=endpoint, credential=self.credentials)
56 |
57 |
58 | # Get Schema from File or URL
59 | def get_schema_data(schema, url=False):
60 | if not url:
61 | with open(schema) as json_file:
62 | schema_data = json.load(json_file)
63 | return schema_data
64 | else:
65 | data_from_url = requests.get(schema)
66 | schema_data = json.loads(data_from_url.content)
67 | return schema_data
68 |
69 |
70 | # Create Search Index from the schema
71 | # If reading the schema from a URL, set url=True
72 | def create_schema_from_json_and_upload(schema, index_name, admin_client, url=False):
73 |
74 | cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)
75 | scoring_profiles = []
76 | schema_data = get_schema_data(schema, url)
77 |
78 | index = SearchIndex(
79 | name=index_name,
80 | fields=schema_data["fields"],
81 | scoring_profiles=scoring_profiles,
82 | suggesters=schema_data["suggesters"],
83 | cors_options=cors_options,
84 | )
85 |
86 | try:
87 | upload_schema = admin_client.create_index(index)
88 | if upload_schema:
89 | print(f"Schema uploaded; Index created for {index_name}.")
90 | else:
91 | exit(0)
92 | except:
93 | print("Unexpected error:", sys.exc_info()[0])
94 |
95 |
96 | # Convert CSV data to JSON
97 | def convert_csv_to_json(url):
98 | df = pd.read_csv(url)
99 | convert = df.to_json(orient="records")
100 | return json.loads(convert)
101 |
102 |
103 | # Batch your uploads to Azure Search
104 | def batch_upload_json_data_to_index(json_file, client):
105 | batch_array = []
106 | count = 0
107 | batch_counter = 0
108 | for i in json_file:
109 | count += 1
110 | batch_array.append(
111 | {
112 | "id": str(i["book_id"]),
113 | "goodreads_book_id": int(i["goodreads_book_id"]),
114 | "best_book_id": int(i["best_book_id"]),
115 | "work_id": int(i["work_id"]),
116 | "books_count": i["books_count"] if i["books_count"] else 0,
117 | "isbn": str(i["isbn"]),
118 | "isbn13": str(i["isbn13"]),
119 | "authors": i["authors"].split(",") if i["authors"] else None,
120 | "original_publication_year": int(i["original_publication_year"])
121 | if i["original_publication_year"]
122 | else 0,
123 | "original_title": i["original_title"],
124 | "title": i["title"],
125 | "language_code": i["language_code"],
126 | "average_rating": int(i["average_rating"])
127 | if i["average_rating"]
128 | else 0,
129 | "ratings_count": int(i["ratings_count"]) if i["ratings_count"] else 0,
130 | "work_ratings_count": int(i["work_ratings_count"])
131 | if i["work_ratings_count"]
132 | else 0,
133 | "work_text_reviews_count": i["work_text_reviews_count"]
134 | if i["work_text_reviews_count"]
135 | else 0,
136 | "ratings_1": int(i["ratings_1"]) if i["ratings_1"] else 0,
137 | "ratings_2": int(i["ratings_2"]) if i["ratings_2"] else 0,
138 | "ratings_3": int(i["ratings_3"]) if i["ratings_3"] else 0,
139 | "ratings_4": int(i["ratings_4"]) if i["ratings_4"] else 0,
140 | "ratings_5": int(i["ratings_5"]) if i["ratings_5"] else 0,
141 | "image_url": i["image_url"],
142 | "small_image_url": i["small_image_url"],
143 | }
144 | )
145 |
146 | # In this sample, we limit batches to 1000 records.
147 | # When the counter hits a number divisible by 1000, the batch is sent.
148 | if count % batch_size == 0:
149 | client.upload_documents(documents=batch_array)
150 | batch_counter += 1
151 | print(f"Batch sent! - #{batch_counter}")
152 | batch_array = []
153 |
154 | # This will catch any records left over, when not divisible by 1000
155 | if len(batch_array) > 0:
156 | client.upload_documents(documents=batch_array)
157 | batch_counter += 1
158 | print(f"Final batch sent! - #{batch_counter}")
159 |
160 | print("Done!")
161 |
162 |
163 | if __name__ == "__main__":
164 | start_client = CreateClient(endpoint, key, index_name)
165 | admin_client = start_client.create_admin_client()
166 | search_client = start_client.create_search_client()
167 | schema = create_schema_from_json_and_upload(
168 | index_schema, index_name, admin_client, url=False
169 | )
170 | books_data = convert_csv_to_json(books_url)
171 | batch_upload = batch_upload_json_data_to_index(books_data, search_client)
172 | print("Upload complete")
173 |
--------------------------------------------------------------------------------
/bulk-insert/good-books-index.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "good-books",
3 | "fields": [
4 | {
5 | "name": "id",
6 | "type": "Edm.String",
7 | "facetable": false,
8 | "filterable": false,
9 | "key": true,
10 | "retrievable": true,
11 | "searchable": true,
12 | "sortable": false,
13 | "analyzer": "standard.lucene",
14 | "indexAnalyzer": null,
15 | "searchAnalyzer": null,
16 | "synonymMaps": [],
17 | "fields": []
18 | },
19 | {
20 | "name": "goodreads_book_id",
21 | "type": "Edm.Int64",
22 | "facetable": false,
23 | "filterable": false,
24 | "retrievable": true,
25 | "sortable": false,
26 | "analyzer": null,
27 | "indexAnalyzer": null,
28 | "searchAnalyzer": null,
29 | "synonymMaps": [],
30 | "fields": []
31 | },
32 | {
33 | "name": "best_book_id",
34 | "type": "Edm.Int64",
35 | "facetable": false,
36 | "filterable": false,
37 | "retrievable": true,
38 | "sortable": false,
39 | "analyzer": null,
40 | "indexAnalyzer": null,
41 | "searchAnalyzer": null,
42 | "synonymMaps": [],
43 | "fields": []
44 | },
45 | {
46 | "name": "work_id",
47 | "type": "Edm.Int64",
48 | "facetable": false,
49 | "filterable": false,
50 | "retrievable": true,
51 | "sortable": false,
52 | "analyzer": null,
53 | "indexAnalyzer": null,
54 | "searchAnalyzer": null,
55 | "synonymMaps": [],
56 | "fields": []
57 | },
58 | {
59 | "name": "books_count",
60 | "type": "Edm.Int64",
61 | "facetable": false,
62 | "filterable": false,
63 | "retrievable": true,
64 | "sortable": false,
65 | "analyzer": null,
66 | "indexAnalyzer": null,
67 | "searchAnalyzer": null,
68 | "synonymMaps": [],
69 | "fields": []
70 | },
71 | {
72 | "name": "isbn",
73 | "type": "Edm.String",
74 | "facetable": false,
75 | "filterable": false,
76 | "key": false,
77 | "retrievable": true,
78 | "searchable": true,
79 | "sortable": false,
80 | "analyzer": "standard.lucene",
81 | "indexAnalyzer": null,
82 | "searchAnalyzer": null,
83 | "synonymMaps": [],
84 | "fields": []
85 | },
86 | {
87 | "name": "isbn13",
88 | "type": "Edm.String",
89 | "facetable": false,
90 | "filterable": false,
91 | "retrievable": true,
92 | "sortable": false,
93 | "analyzer": null,
94 | "indexAnalyzer": null,
95 | "searchAnalyzer": null,
96 | "synonymMaps": [],
97 | "fields": []
98 | },
99 | {
100 | "name": "authors",
101 | "type": "Collection(Edm.String)",
102 | "facetable": true,
103 | "filterable": true,
104 | "key": false,
105 | "retrievable": true,
106 | "searchable": true,
107 | "sortable": false,
108 | "analyzer": "standard.lucene",
109 | "indexAnalyzer": null,
110 | "searchAnalyzer": null,
111 | "synonymMaps": [],
112 | "fields": []
113 | },
114 | {
115 | "name": "original_publication_year",
116 | "type": "Edm.Int64",
117 | "facetable": false,
118 | "filterable": false,
119 | "retrievable": true,
120 | "sortable": false,
121 | "analyzer": null,
122 | "indexAnalyzer": null,
123 | "searchAnalyzer": null,
124 | "synonymMaps": [],
125 | "fields": []
126 | },
127 | {
128 | "name": "original_title",
129 | "type": "Edm.String",
130 | "facetable": false,
131 | "filterable": false,
132 | "key": false,
133 | "retrievable": true,
134 | "searchable": true,
135 | "sortable": false,
136 | "analyzer": "standard.lucene",
137 | "indexAnalyzer": null,
138 | "searchAnalyzer": null,
139 | "synonymMaps": [],
140 | "fields": []
141 | },
142 | {
143 | "name": "title",
144 | "type": "Edm.String",
145 | "facetable": false,
146 | "filterable": false,
147 | "key": false,
148 | "retrievable": true,
149 | "searchable": true,
150 | "sortable": true,
151 | "analyzer": "standard.lucene",
152 | "indexAnalyzer": null,
153 | "searchAnalyzer": null,
154 | "synonymMaps": [],
155 | "fields": []
156 | },
157 | {
158 | "name": "language_code",
159 | "type": "Edm.String",
160 | "facetable": true,
161 | "filterable": true,
162 | "key": false,
163 | "retrievable": true,
164 | "searchable": false,
165 | "sortable": false,
166 | "analyzer": null,
167 | "indexAnalyzer": null,
168 | "searchAnalyzer": null,
169 | "synonymMaps": [],
170 | "fields": []
171 | },
172 | {
173 | "name": "average_rating",
174 | "type": "Edm.Double",
175 | "facetable": true,
176 | "filterable": true,
177 | "retrievable": true,
178 | "sortable": true,
179 | "analyzer": null,
180 | "indexAnalyzer": null,
181 | "searchAnalyzer": null,
182 | "synonymMaps": [],
183 | "fields": []
184 | },
185 | {
186 | "name": "ratings_count",
187 | "type": "Edm.Int64",
188 | "facetable": true,
189 | "filterable": true,
190 | "retrievable": true,
191 | "sortable": true,
192 | "analyzer": null,
193 | "indexAnalyzer": null,
194 | "searchAnalyzer": null,
195 | "synonymMaps": [],
196 | "fields": []
197 | },
198 | {
199 | "name": "work_ratings_count",
200 | "type": "Edm.Int64",
201 | "facetable": false,
202 | "filterable": false,
203 | "retrievable": true,
204 | "sortable": false,
205 | "analyzer": null,
206 | "indexAnalyzer": null,
207 | "searchAnalyzer": null,
208 | "synonymMaps": [],
209 | "fields": []
210 | },
211 | {
212 | "name": "work_text_reviews_count",
213 | "type": "Edm.Int64",
214 | "facetable": false,
215 | "filterable": false,
216 | "retrievable": true,
217 | "sortable": false,
218 | "analyzer": null,
219 | "indexAnalyzer": null,
220 | "searchAnalyzer": null,
221 | "synonymMaps": [],
222 | "fields": []
223 | },
224 | {
225 | "name": "ratings_1",
226 | "type": "Edm.Int64",
227 | "facetable": false,
228 | "filterable": false,
229 | "retrievable": true,
230 | "sortable": false,
231 | "analyzer": null,
232 | "indexAnalyzer": null,
233 | "searchAnalyzer": null,
234 | "synonymMaps": [],
235 | "fields": []
236 | },
237 | {
238 | "name": "ratings_2",
239 | "type": "Edm.Int64",
240 | "facetable": false,
241 | "filterable": false,
242 | "retrievable": true,
243 | "sortable": false,
244 | "analyzer": null,
245 | "indexAnalyzer": null,
246 | "searchAnalyzer": null,
247 | "synonymMaps": [],
248 | "fields": []
249 | },
250 | {
251 | "name": "ratings_3",
252 | "type": "Edm.Int64",
253 | "facetable": false,
254 | "filterable": false,
255 | "retrievable": true,
256 | "sortable": false,
257 | "analyzer": null,
258 | "indexAnalyzer": null,
259 | "searchAnalyzer": null,
260 | "synonymMaps": [],
261 | "fields": []
262 | },
263 | {
264 | "name": "ratings_4",
265 | "type": "Edm.Int64",
266 | "facetable": false,
267 | "filterable": false,
268 | "retrievable": true,
269 | "sortable": false,
270 | "analyzer": null,
271 | "indexAnalyzer": null,
272 | "searchAnalyzer": null,
273 | "synonymMaps": [],
274 | "fields": []
275 | },
276 | {
277 | "name": "ratings_5",
278 | "type": "Edm.Int64",
279 | "facetable": false,
280 | "filterable": false,
281 | "retrievable": true,
282 | "sortable": false,
283 | "analyzer": null,
284 | "indexAnalyzer": null,
285 | "searchAnalyzer": null,
286 | "synonymMaps": [],
287 | "fields": []
288 | },
289 | {
290 | "name": "image_url",
291 | "type": "Edm.String",
292 | "facetable": false,
293 | "filterable": false,
294 | "key": false,
295 | "retrievable": true,
296 | "searchable": true,
297 | "sortable": false,
298 | "analyzer": "standard.lucene",
299 | "indexAnalyzer": null,
300 | "searchAnalyzer": null,
301 | "synonymMaps": [],
302 | "fields": []
303 | },
304 | {
305 | "name": "small_image_url",
306 | "type": "Edm.String",
307 | "facetable": false,
308 | "filterable": false,
309 | "key": false,
310 | "retrievable": true,
311 | "searchable": true,
312 | "sortable": false,
313 | "analyzer": "standard.lucene",
314 | "indexAnalyzer": null,
315 | "searchAnalyzer": null,
316 | "synonymMaps": [],
317 | "fields": []
318 | }
319 | ],
320 | "suggesters": [
321 | {
322 | "name": "sg",
323 | "searchMode": "analyzingInfixMatching",
324 | "sourceFields": [
325 | "authors",
326 | "original_title"
327 | ]
328 | }
329 | ],
330 | "scoringProfiles": [],
331 | "defaultScoringProfile": "",
332 | "corsOptions": {
333 | "allowedOrigins": [
334 | "*"
335 | ],
336 | "maxAgeInSeconds": 300
337 | },
338 | "analyzers": [],
339 | "charFilters": [],
340 | "tokenFilters": [],
341 | "tokenizers": [],
342 | "encryptionKey": null
343 | }
--------------------------------------------------------------------------------
/Quickstart-Document-Permissions-Push-API/document-permissions-push-api.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "810ce279",
6 | "metadata": {},
7 | "source": [
8 | "# Document-level access example using the push document APIs\n",
9 | "\n",
10 | "In Azure AI Search, you can upload any JSON document payload to a search index for indexing. This notebook shows you how index documents that contain [user access permissions at the document level](azure/search/search-document-level-access-overview), and then query the index to return only those results that the user is authorized to view.\n",
11 | "\n",
12 | "The security principal behind the query access token determines the \"user\". The permission metadata in the document determines whether the user has authorization to the content. Internally, the search engine filters out any documents that aren't associated with the security principal.\n",
13 | "\n",
14 | "This feature is currently in preview.\n",
15 | "\n",
16 | "For an alternative approaching using indexers and pull API, see [Quickstart-Document-Permissions-Pull-API](../Quickstart-Document-Permissions-Pull-API/document-permissions-pull-api.ipynb).\n"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "id": "b6585426",
22 | "metadata": {},
23 | "source": [
24 | "## Prerequisites\n",
25 | "\n",
26 | "+ Azure AI Search with [role-based access control](https://learn.microsoft.com/azure/search/search-security-enable-roles).\n",
27 | "\n",
28 | "+ [Visual Studio Code](https://code.visualstudio.com/download) with the [Python extension](https://marketplace.visualstudio.com/items?itemName=ms-python.python) and [Jupyter package](https://pypi.org/project/jupyter/).\n",
29 | "\n",
30 | "## Permissions\n",
31 | "\n",
32 | "This walkthrough uses Microsoft Entra ID authentication and authorization.\n",
33 | "\n",
34 | "On Azure AI Search, you must have role assignments to create objects and run queries:\n",
35 | "\n",
36 | "+ **Search Service Contributor**\n",
37 | "+ **Search Index Data Contributor**\n",
38 | "+ **Search Index Data Reader**\n",
39 | "\n",
40 | "For more information, see [Connect to Azure AI Search using roles](https://learn.microsoft.com/azure/search/search-security-rbac) and [Quickstart: Connect without keys for local testing](https://learn.microsoft.com/azure/search/search-get-started-rbac).\n",
41 | "\n",
42 | "## Set the environment variables\n",
43 | "\n",
44 | "1. Rename `sample.env` to `.env`.\n",
45 | "1. In the `.env` file, provide a full endpoint to your search service (https://your-search-service.search.windows.net).\n",
46 | "1. Replace the default index name if you want a different name.\n",
47 | "\n",
48 | "## Load connections\n",
49 | "\n",
50 | "We recommend creating a virtual environment to run this sample code. In Visual Studio Code, open the control palette (ctrl-shift-p) to create an environment. This notebook was tested on Python 3.10.\n",
51 | "\n",
52 | "After your environment is created, load the environment variables."
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "id": "2975a7f5",
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "from dotenv import load_dotenv\n",
63 | "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n",
64 | "import os\n",
65 | "\n",
66 | "load_dotenv(override=True) # take environment variables from .env.\n",
67 | "\n",
68 | "# The following variables from your .env file are used in this notebook\n",
69 | "endpoint = os.environ[\"AZURE_SEARCH_ENDPOINT\"]\n",
70 | "credential = DefaultAzureCredential()\n",
71 | "index_name = os.getenv(\"AZURE_SEARCH_INDEX\")\n",
72 | "token_provider = get_bearer_token_provider(credential, \"https://search.azure.com/.default\")\n"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "id": "9327cf01",
78 | "metadata": {},
79 | "source": [
80 | "## Create a sample index\n",
81 | "\n",
82 | "The search index must includes fields for your content and for permission metadata. Assign the new permission filter option to a string field and make sure the field is filterable. The search engine builds the filter internally at query time."
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": null,
88 | "id": "9863061f",
89 | "metadata": {},
90 | "outputs": [],
91 | "source": [
92 | "from azure.search.documents.indexes.models import SearchField, SearchIndex, PermissionFilter, SearchIndexPermissionFilterOption\n",
93 | "from azure.search.documents.indexes import SearchIndexClient\n",
94 | "\n",
95 | "index_client = SearchIndexClient(endpoint=endpoint, credential=credential)\n",
96 | "index = SearchIndex(\n",
97 | " name=index_name,\n",
98 | " fields=[\n",
99 | " SearchField(name=\"id\", type=\"Edm.String\", key=True, filterable=True, sortable=True),\n",
100 | " SearchField(name=\"oid\", type=\"Collection(Edm.String)\", retrievable=True, filterable=True, permission_filter=PermissionFilter.USER_IDS),\n",
101 | " SearchField(name=\"group\", type=\"Collection(Edm.String)\", retrievable=True, filterable=True, permission_filter=PermissionFilter.GROUP_IDS),\n",
102 | " SearchField(name=\"name\", type=\"Edm.String\", searchable=True)\n",
103 | " ],\n",
104 | " permission_filter_option=SearchIndexPermissionFilterOption.ENABLED\n",
105 | ")\n",
106 | "\n",
107 | "index_client.create_index(index=index)\n",
108 | "print(f\"Index '{index_name}' created with permission filter option enabled.\")"
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "id": "f5cf4169",
114 | "metadata": {},
115 | "source": [
116 | "## Connect to Graph to find your object ID (OID) and groups\n",
117 | "\n",
118 | "This step calls the Graph APIs to get a few group IDs for your Microsoft Entra identity. Your group IDs will be added to the access control list of the objects created in the next step."
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "id": "63904f09",
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "from msgraph import GraphServiceClient\n",
129 | "client = GraphServiceClient(credentials=credential, scopes=[\"https://graph.microsoft.com/.default\"])\n",
130 | "\n",
131 | "groups = await client.me.member_of.get()\n",
132 | "me = await client.me.get()\n",
133 | "oid = me.id"
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "id": "a9ce6d0f",
139 | "metadata": {},
140 | "source": [
141 | "## Upload sample data\n",
142 | "\n",
143 | "This step creates the container, folders, and uploads documents into Azure Storage. It assigns your group IDs to to the access control list for each file."
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": null,
149 | "id": "8fb830a1",
150 | "metadata": {},
151 | "outputs": [],
152 | "source": [
153 | "from azure.search.documents import SearchClient\n",
154 | "search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)\n",
155 | "\n",
156 | "documents = [\n",
157 | " { \"id\": \"1\", \"oid\": [oid], \"group\": [groups.value[0].id], \"name\": \"Document 1\" },\n",
158 | " { \"id\": \"2\", \"oid\": [\"all\"], \"group\": [groups.value[0].id], \"name\": \"Document 2\" },\n",
159 | " { \"id\": \"3\", \"oid\": [oid], \"group\": [\"all\"], \"name\": \"Document 3\" },\n",
160 | " { \"id\": \"4\", \"oid\": [\"none\"], \"group\": [\"none\"], \"name\": \"Document 4\" },\n",
161 | " { \"id\": \"5\", \"oid\": [\"none\"], \"group\": [groups.value[0].id], \"name\": \"Document 5\" },\n",
162 | "]\n",
163 | "search_client.upload_documents(documents=documents)\n",
164 | "print(\"Documents uploaded to the index.\")\n"
165 | ]
166 | },
167 | {
168 | "cell_type": "markdown",
169 | "id": "e5c93f76",
170 | "metadata": {},
171 | "source": [
172 | "## Search sample data with x-ms-query-source-authorization\n",
173 | "\n",
174 | "This query uses an empty search string (`*`) to provide an unqualified search. It returns the file name and permission metadata associated with each file. Notice that each file is associated with a different group ID."
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "id": "cd872e8c",
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "results = search_client.search(search_text=\"*\", x_ms_query_source_authorization=token_provider(), select=\"name,oid,group\", order_by=\"id asc\")\n",
185 | "\n",
186 | "for result in results:\n",
187 | " print(f\"Name: {result['name']}, OID: {result['oid']}, Group: {result['group']}\")"
188 | ]
189 | },
190 | {
191 | "cell_type": "markdown",
192 | "id": "d31b67d8",
193 | "metadata": {},
194 | "source": [
195 | "## Search sample data without x-ms-query-source-authorization \n",
196 | "\n",
197 | "This step demonstrates the user experience when authorization fails. No results are returned in the response."
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "id": "a1f2f2a0",
204 | "metadata": {},
205 | "outputs": [],
206 | "source": [
207 | "results = search_client.search(search_text=\"*\", x_ms_query_source_authorization=None, select=\"name,oid,group\", order_by=\"id asc\")\n",
208 | "\n",
209 | "for result in results:\n",
210 | " print(f\"Name: {result['name']}, OID: {result['oid']}, Group: {result['group']}\")"
211 | ]
212 | },
213 | {
214 | "cell_type": "markdown",
215 | "id": "5ad253ec",
216 | "metadata": {},
217 | "source": [
218 | "## Next step\n",
219 | "\n",
220 | "To learn more, see [Document-level access control in Azure AI Search](https://learn.microsoft.com/azure/search/search-document-level-access-overview)."
221 | ]
222 | }
223 | ],
224 | "metadata": {
225 | "kernelspec": {
226 | "display_name": ".venv",
227 | "language": "python",
228 | "name": "python3"
229 | },
230 | "language_info": {
231 | "codemirror_mode": {
232 | "name": "ipython",
233 | "version": 3
234 | },
235 | "file_extension": ".py",
236 | "mimetype": "text/x-python",
237 | "name": "python",
238 | "nbconvert_exporter": "python",
239 | "pygments_lexer": "ipython3",
240 | "version": "3.12.10"
241 | }
242 | },
243 | "nbformat": 4,
244 | "nbformat_minor": 5
245 | }
246 |
--------------------------------------------------------------------------------
/cmk-example/cmk-example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Python example for CMK-encryption in Azure AI Search\n",
8 | "\n",
9 | "This notebook provides sample script for [adding customer-managed key (CMK) encryption](https://learn.microsoft.com/azure/search/search-security-manage-encryption-keys) to objects on Azure AI Search."
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "## Prerequisites\n",
17 | "\n",
18 | "\n",
19 | "- [Azure AI Search](https://learn.microsoft.com/azure/search/search-create-service-portal)\n",
20 | "- [Azure Key Vault]()\n",
21 | "- [Azure Storage](https://learn.microsoft.com/azure/storage/common/storage-account-create) or [Azure Log Analytics](https://learn.microsoft.com/azure/azure-monitor/logs/quick-create-workspace?tabs=azure-portal) for data retention of audit logs.\n",
22 | "- [Visual Studio Code](https://code.visualstudio.com/download) with the [Python extension](https://marketplace.visualstudio.com/items?itemName=ms-python.python) and [Jupyter package](https://pypi.org/project/jupyter/).\n"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "## Sign in to Azure\n",
30 | "\n",
31 | "You might not need this step, but if downstream connections fail with a 401 during indexer pipeline execution, it could be because you're using the wrong tenant or subscription. You can avoid this issue by signing in from the command line, explicitly setting the tenant ID and choosing the right subscription.\n",
32 | "\n",
33 | "This section assumes you have the [Azure CLI](https://learn.microsoft.com/cli/azure/authenticate-azure-cli-interactively).\n",
34 | "\n",
35 | "1. Open a command line prompt.\n",
36 | "\n",
37 | "1. Run this command to get a list of Azure tenants: `az account tenant list`\n",
38 | "\n",
39 | "1. If you have multiple tenants, set the tenant: `az login --tenant `\n",
40 | "\n",
41 | "If you have multiple subscriptions, a list is provided so that you can select one."
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "## Create a virtual environment in Visual Studio Code\n",
49 | "\n",
50 | "Create a virtual environment so that you can install the dependencies in isolation.\n",
51 | "\n",
52 | "1. In Visual Studio Code, open the folder containing tutorial-rag.ipynb.\n",
53 | "\n",
54 | "1. Press Ctrl-shift-P to open the command palette, search for \"Python: Create Environment\", and then select `Venv` to create a virtual environment in the current workspace.\n",
55 | "\n",
56 | "1. Select Tutorial-RAG\\tutorial-rag-requirements.txt for the dependencies.\n",
57 | "\n",
58 | "It takes several minutes to create the environment. When the environment is ready, continue to the next step."
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {},
64 | "source": [
65 | "## Install packages"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {},
72 | "outputs": [],
73 | "source": [
74 | "! pip install python-dotenv\n",
75 | "! pip install azure-core\n",
76 | "! pip install azure-search-documents\n",
77 | "! pip install azure-storage-blob\n",
78 | "! pip install azure-identity\n",
79 | "! pip install openai\n",
80 | "! pip install aiohttp"
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "## Create an index"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": 26,
93 | "metadata": {},
94 | "outputs": [],
95 | "source": [
96 | "# Set variables\n",
97 | "AZURE_SEARCH_SERVICE: str = \"\"\n",
98 | "AZURE_KEY_VAULT_NAME: str = \"\"\n",
99 | "AZURE_KEY_VAULT_URI: str = \"\"\n",
100 | "AZURE_KEY_VAULT_VERSION: str = \"\""
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": 24,
106 | "metadata": {},
107 | "outputs": [
108 | {
109 | "name": "stdout",
110 | "output_type": "stream",
111 | "text": [
112 | " test-cmk-index-qs created\n"
113 | ]
114 | }
115 | ],
116 | "source": [
117 | "from azure.search.documents.indexes import SearchIndexClient\n",
118 | "from azure.search.documents.indexes.models import (\n",
119 | " SimpleField,\n",
120 | " SearchFieldDataType,\n",
121 | " SearchableField,\n",
122 | " SearchIndex,\n",
123 | " SearchResourceEncryptionKey\n",
124 | ")\n",
125 | "from azure.identity import DefaultAzureCredential\n",
126 | "\n",
127 | "credential = DefaultAzureCredential()\n",
128 | "\n",
129 | "# Create a search index \n",
130 | "index_name = \"test-cmk-index\"\n",
131 | "index_client = SearchIndexClient(endpoint=AZURE_SEARCH_SERVICE, credential=credential) \n",
132 | "fields = [\n",
133 | " SimpleField(name=\"Id\", type=SearchFieldDataType.String, key=True),\n",
134 | " SearchableField(name=\"Description\", type=SearchFieldDataType.String)\n",
135 | " ]\n",
136 | "\n",
137 | "scoring_profiles = []\n",
138 | "suggester = []\n",
139 | "encryption_key = SearchResourceEncryptionKey(\n",
140 | " key_name=AZURE_KEY_VAULT_NAME,\n",
141 | " key_version=AZURE_KEY_VAULT_VERSION,\n",
142 | " vault_uri=AZURE_KEY_VAULT_URI\n",
143 | ")\n",
144 | "\n",
145 | "# Create the search index=\n",
146 | "index = SearchIndex(name=index_name, fields=fields, encryption_key=encryption_key)\n",
147 | "result = index_client.create_or_update_index(index)\n",
148 | "print(f' {result.name} created')"
149 | ]
150 | },
151 | {
152 | "cell_type": "markdown",
153 | "metadata": {},
154 | "source": [
155 | "## Get the index definition"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {},
162 | "outputs": [],
163 | "source": [
164 | "index_name = \"test-cmk-index\"\n",
165 | "index_client = SearchIndexClient(endpoint=AZURE_SEARCH_SERVICE, credential=credential) \n",
166 | "\n",
167 | "result = index_client.get_index(index_name) \n",
168 | "print(f\"{result}\") "
169 | ]
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "metadata": {},
174 | "source": [
175 | "## Load the index"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 29,
181 | "metadata": {},
182 | "outputs": [
183 | {
184 | "name": "stdout",
185 | "output_type": "stream",
186 | "text": [
187 | "Upload of new document succeeded: True\n"
188 | ]
189 | }
190 | ],
191 | "source": [
192 | "from azure.search.documents import SearchClient\n",
193 | "\n",
194 | "# Create a documents payload\n",
195 | "documents = [\n",
196 | " {\n",
197 | " \"@search.action\": \"upload\",\n",
198 | " \"Id\": \"1\",\n",
199 | " \"Description\": \"The hotel is ideally located on the main commercial artery of the city in the heart of New York. A few minutes away is Time's Square and the historic centre of the city, as well as other places of interest that make New York one of America's most attractive and cosmopolitan cities.\"\n",
200 | " },\n",
201 | " {\n",
202 | " \"@search.action\": \"upload\",\n",
203 | " \"Id\": \"2\",\n",
204 | " \"Description\": \"The hotel is situated in a nineteenth century plaza, which has been expanded and renovated to the highest architectural standards to create a modern, functional and first-class hotel in which art and unique historical elements coexist with the most modern comforts.\"\n",
205 | " },\n",
206 | " {\n",
207 | " \"@search.action\": \"upload\",\n",
208 | " \"Id\": \"3\",\n",
209 | " \"Description\": \"The hotel stands out for its gastronomic excellence under the management of William Dough, who advises on and oversees all of the Hotel's restaurant services.\"\n",
210 | " },\n",
211 | " {\n",
212 | " \"@search.action\": \"upload\",\n",
213 | " \"Id\": \"4\",\n",
214 | " \"Description\": \"The hotel is located in the heart of the historic center of Sublime in an extremely vibrant and lively area within short walking distance to the sites and landmarks of the city and is surrounded by the extraordinary beauty of churches, buildings, shops and monuments. Sublime Cliff is part of a lovingly restored 1800 palace.\"\n",
215 | " }\n",
216 | "]\n",
217 | "\n",
218 | "search_client = SearchClient(endpoint=AZURE_SEARCH_SERVICE, index_name=index_name, credential=credential)\n",
219 | "try:\n",
220 | " result = search_client.upload_documents(documents=documents)\n",
221 | " print(\"Upload of new document succeeded: {}\".format(result[0].succeeded))\n",
222 | "except Exception as ex:\n",
223 | " print (ex.message)\n",
224 | "\n",
225 | " index_client = SearchClient(endpoint=AZURE_SEARCH_SERVICE, credential=credential)"
226 | ]
227 | },
228 | {
229 | "cell_type": "markdown",
230 | "metadata": {},
231 | "source": [
232 | "## Query encrypted content"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": 31,
238 | "metadata": {},
239 | "outputs": [
240 | {
241 | "name": "stdout",
242 | "output_type": "stream",
243 | "text": [
244 | "Score: 0.6130029\n",
245 | "Id: 4\n",
246 | "Description: The hotel is located in the heart of the historic center of Sublime in an extremely vibrant and lively area within short walking distance to the sites and landmarks of the city and is surrounded by the extraordinary beauty of churches, buildings, shops and monuments. Sublime Cliff is part of a lovingly restored 1800 palace.\n",
247 | "Score: 0.26286605\n",
248 | "Id: 1\n",
249 | "Description: The hotel is ideally located on the main commercial artery of the city in the heart of New York. A few minutes away is Time's Square and the historic centre of the city, as well as other places of interest that make New York one of America's most attractive and cosmopolitan cities.\n"
250 | ]
251 | }
252 | ],
253 | "source": [
254 | "from azure.search.documents import SearchClient\n",
255 | "\n",
256 | "query = \"historic\" \n",
257 | "\n",
258 | "search_client = SearchClient(endpoint=AZURE_SEARCH_SERVICE, credential=credential, index_name=index_name)\n",
259 | " \n",
260 | "results = search_client.search( \n",
261 | " query_type='simple',\n",
262 | " search_text=query, \n",
263 | " select=[\"Id\", \"Description\"],\n",
264 | " include_total_count=True\n",
265 | " )\n",
266 | " \n",
267 | "for result in results: \n",
268 | " print(f\"Score: {result['@search.score']}\")\n",
269 | " print(f\"Id: {result['Id']}\")\n",
270 | " print(f\"Description: {result['Description']}\")\n"
271 | ]
272 | }
273 | ],
274 | "metadata": {
275 | "kernelspec": {
276 | "display_name": ".venv",
277 | "language": "python",
278 | "name": "python3"
279 | },
280 | "language_info": {
281 | "codemirror_mode": {
282 | "name": "ipython",
283 | "version": 3
284 | },
285 | "file_extension": ".py",
286 | "mimetype": "text/x-python",
287 | "name": "python",
288 | "nbconvert_exporter": "python",
289 | "pygments_lexer": "ipython3",
290 | "version": "3.11.9"
291 | }
292 | },
293 | "nbformat": 4,
294 | "nbformat_minor": 2
295 | }
296 |
--------------------------------------------------------------------------------
/Quickstart-Document-Permissions-Pull-API/document-permissions-pull-api.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "aba4346f",
6 | "metadata": {},
7 | "source": [
8 | "# Document level access in Azure AI Search using the indexer pull APIs\n",
9 | "\n",
10 | "In Azure AI Search, you can use an indexer to pull content into a search index for indexing. This notebook shows you how index blobs that have access control lists (ACLs) in Azure Storage Data Lake Storage (ADLS) Gen2, and then query the index to return only those results that the user is authorized to view.\n",
11 | "\n",
12 | "The security principal behind the query access token determines the \"user\". The ACLs on folders and files determine whether the user is authorized to view the content, and that metadata is pulled into the index along with other document content. Internally at query time, the search engine filters out any documents that aren't associated with the object ID.\n",
13 | "\n",
14 | "This feature is currently in preview.\n",
15 | "\n",
16 | "For an alternative approaching using push APIs to index any data, see [Quickstart-Document-Permissions-Push-API](../Quickstart-Document-Permissions-Push-API/document-permissions-push-api.ipynb).\n",
17 | "\n",
18 | "\n",
19 | "## Prerequisites\n",
20 | "\n",
21 | "+ Azure AI Search, Basic tier or higher, with a [system-assigned managed identity](https://learn.microsoft.com/azure/search/search-howto-managed-identities-data-sources) and [role-based access control](https://learn.microsoft.com/azure/search/search-security-enable-roles).\n",
22 | "\n",
23 | "+ Azure Storage, general purpose account, with a [hierarchical namespace](https://learn.microsoft.com/azure/storage/blobs/create-data-lake-storage-account).\n",
24 | "\n",
25 | "+ Folders and files, where each file has an [access control list specified](https://learn.microsoft.com/azure/storage/blobs/data-lake-storage-access-control). We recommend group IDs.\n",
26 | "\n",
27 | "+ [Visual Studio Code](https://code.visualstudio.com/download) with the [Python extension](https://marketplace.visualstudio.com/items?itemName=ms-python.python) and [Jupyter package](https://pypi.org/project/jupyter/).\n",
28 | "\n",
29 | "## Permissions\n",
30 | "\n",
31 | "This walkthrough uses Microsoft Entra ID authentication and authorization.\n",
32 | "\n",
33 | "+ On Azure Storage, **Storage Blob Data Reader** permissions are required for both the search service identity and for your user account since you are testing locally. You also need **Storage Blob Data Contributor** because the sample includes code for creating and configuring a container and its contents.\n",
34 | "\n",
35 | "+ On Azure AI Search, assign yourself **Search Service Contributor**, **Search Index Data Contributor**, and **Search Index Data Reader** permissions to create objects and run queries. For more information, see [Connect to Azure AI Search using roles](https://learn.microsoft.com/azure/search/search-security-rbac) and [Quickstart: Connect without keys for local testing](https://learn.microsoft.com/azure/search/search-get-started-rbac).\n",
36 | "\n",
37 | "## Limitations\n",
38 | "\n",
39 | "+ Parsing indexer options aren't currently supported. There's no support for CSV, JSON, or Markdown parsing."
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "id": "f445040a",
45 | "metadata": {},
46 | "source": [
47 | "## Set up connections\n",
48 | "\n",
49 | "Save the `sample.env` file as `.env` and then modify the environment variables to use your Azure endpoints. All variables must be specified.\n",
50 | "\n",
51 | "You need endpoints for:\n",
52 | "\n",
53 | "+ Azure AI Search\n",
54 | "+ Azure Storage\n",
55 | "\n",
56 | "For Azure AI Search, find the endpoint in the [Azure portal](https://portal.azure.com), in the **Essentials** section of the Overview page.\n",
57 | "\n",
58 | "For Azure Storage, follow the guidance in [Get storage account configuration information](https://learn.microsoft.com/azure/storage/common/storage-account-get-info) to specify all of the variables in the `.env` file. \n",
59 | "\n",
60 | "## Load connections\n",
61 | "\n",
62 | "We recommend creating a virtual environment to run this sample code. In Visual Studio Code, open the control palette (ctrl-shift-p) to create an environment. This notebook was tested on Python 3.10.\n",
63 | "\n",
64 | "After the environment is created, load the environment variables to set up connections and object names."
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": null,
70 | "id": "0b40bb5b",
71 | "metadata": {},
72 | "outputs": [],
73 | "source": [
74 | "from dotenv import load_dotenv\n",
75 | "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n",
76 | "import os\n",
77 | "\n",
78 | "load_dotenv(override=True) # take environment variables from .env.\n",
79 | "\n",
80 | "# The following variables from your .env file are used in this notebook\n",
81 | "endpoint = os.environ[\"AZURE_SEARCH_ENDPOINT\"]\n",
82 | "credential = DefaultAzureCredential()\n",
83 | "index_name = os.getenv(\"AZURE_SEARCH_INDEX\", \"document-permissions-indexer-idx\")\n",
84 | "indexer_name = os.getenv(\"AZURE_SEARCH_INDEXER\", \"document-permissions-indexer-idxr\")\n",
85 | "datasource_name = os.getenv(\"AZURE_SEARCH_DATASOURCE\", \"document-permissions-indexer-ds\")\n",
86 | "adls_gen2_account_name = os.getenv(\"AZURE_STORAGE_ACCOUNT_NAME\")\n",
87 | "adls_gen2_container_name = os.getenv(\"AZURE_STORAGE_CONTAINER_NAME\")\n",
88 | "adls_gen2_connection_string = os.environ[\"AZURE_STORAGE_CONNECTION_STRING\"]\n",
89 | "adls_gen2_resource_id = os.environ[\"AZURE_STORAGE_RESOURCE_ID\"]\n",
90 | "token_provider = get_bearer_token_provider(credential, \"https://search.azure.com/.default\")"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "id": "2d46b940",
96 | "metadata": {},
97 | "source": [
98 | "## Create an index\n",
99 | "\n",
100 | "The search index must include fields for your content and for permission metadata. Assign the new permission filter option to a string field and make sure the field is filterable. The search engine builds the filter internally at query time."
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": null,
106 | "id": "2f981cad",
107 | "metadata": {},
108 | "outputs": [],
109 | "source": [
110 | "from azure.search.documents.indexes.models import SearchField, SearchIndex, PermissionFilter, SearchIndexPermissionFilterOption\n",
111 | "from azure.search.documents.indexes import SearchIndexClient\n",
112 | "\n",
113 | "index_client = SearchIndexClient(endpoint=endpoint, credential=credential)\n",
114 | "index = SearchIndex(\n",
115 | " name=index_name,\n",
116 | " fields=[\n",
117 | " SearchField(name=\"id\", type=\"Edm.String\", key=True, filterable=True, sortable=True),\n",
118 | " SearchField(name=\"content\", type=\"Edm.String\", searchable=True, filterable=False, sortable=False),\n",
119 | " SearchField(name=\"oids\", type=\"Collection(Edm.String)\", filterable=True, permission_filter=PermissionFilter.USER_IDS),\n",
120 | " SearchField(name=\"groups\", type=\"Collection(Edm.String)\", filterable=True, permission_filter=PermissionFilter.GROUP_IDS),\n",
121 | " SearchField(name=\"metadata_storage_path\", type=\"Edm.String\", searchable=True),\n",
122 | " SearchField(name=\"metadata_storage_name\", type=\"Edm.String\", searchable=True)\n",
123 | " ],\n",
124 | " permission_filter_option=SearchIndexPermissionFilterOption.ENABLED\n",
125 | ")\n",
126 | "\n",
127 | "index_client.create_or_update_index(index=index)\n",
128 | "print(f\"Index '{index_name}' created with permission filter option enabled.\")"
129 | ]
130 | },
131 | {
132 | "cell_type": "markdown",
133 | "id": "2b8945a2",
134 | "metadata": {},
135 | "source": [
136 | "## Create a data source\n",
137 | "\n",
138 | "Set the `IndexerPermissionOption` so that the indexer knows to retrieve the permission metadata."
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": null,
144 | "id": "b25aaf7b",
145 | "metadata": {},
146 | "outputs": [],
147 | "source": [
148 | "from azure.search.documents.indexes.models import SearchIndexerDataSourceConnection, SearchIndexerDataSourceType, IndexerPermissionOption, SearchIndexerDataContainer, DataSourceCredentials\n",
149 | "from azure.search.documents.indexes import SearchIndexerClient\n",
150 | "indexer_client = SearchIndexerClient(endpoint=endpoint, credential=credential)\n",
151 | "datasource = SearchIndexerDataSourceConnection(\n",
152 | " name=datasource_name,\n",
153 | " type=SearchIndexerDataSourceType.ADLS_GEN2,\n",
154 | " connection_string=f\"ResourceId={adls_gen2_resource_id};\",\n",
155 | " container=SearchIndexerDataContainer(name=adls_gen2_container_name),\n",
156 | " indexer_permission_options=[IndexerPermissionOption.GROUP_IDS]\n",
157 | ")\n",
158 | "\n",
159 | "indexer_client.create_or_update_data_source_connection(datasource)\n",
160 | "print(f\"Datasource '{datasource_name}' created with permission filter option enabled.\")"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "id": "ff5b912d",
166 | "metadata": {},
167 | "source": [
168 | "## Get group IDs\n",
169 | "\n",
170 | "This step calls the Graph APIs to get a few group IDs for your Microsoft Entra identity. Your group IDs will be added to the access control list of the objects created in the next step. Two group identifiers are retrieved. Each one is assigned to a different file."
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "id": "329fe160",
177 | "metadata": {},
178 | "outputs": [],
179 | "source": [
180 | "from msgraph import GraphServiceClient\n",
181 | "client = GraphServiceClient(credentials=credential, scopes=[\"https://graph.microsoft.com/.default\"])\n",
182 | "\n",
183 | "groups = await client.me.member_of.get()\n",
184 | "first_group_id = groups.value[0].id\n",
185 | "second_group_id = groups.value[1].id"
186 | ]
187 | },
188 | {
189 | "cell_type": "markdown",
190 | "id": "20588dc3",
191 | "metadata": {},
192 | "source": [
193 | "## Upload sample directory and file\n",
194 | "\n",
195 | "This step creates the container, folders, and uploads the files into Azure Storage. It assigns your group IDs to to the access control list for each file."
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": null,
201 | "id": "acd28b29",
202 | "metadata": {},
203 | "outputs": [],
204 | "source": [
205 | "from azure.storage.filedatalake import DataLakeServiceClient\n",
206 | "import requests\n",
207 | "\n",
208 | "service = DataLakeServiceClient.from_connection_string(adls_gen2_connection_string, credential=credential)\n",
209 | "container = service.get_file_system_client(adls_gen2_container_name)\n",
210 | "if not container.exists():\n",
211 | " container.create_file_system()\n",
212 | "root_dir_client = container.get_directory_client(\"/\")\n",
213 | "state_parks_dir_client = container.get_directory_client(\"state-parks\")\n",
214 | "state_parks_dir_client.create_directory()\n",
215 | "root_dir_client.update_access_control_recursive(f\"group:{first_group_id}:rwx\")\n",
216 | "root_dir_client.update_access_control_recursive(f\"group:{second_group_id}:rwx\")\n",
217 | "\n",
218 | "oregon_dir_client = state_parks_dir_client.create_sub_directory(\"oregon\")\n",
219 | "oregon_dir_client.create_directory()\n",
220 | "file_client = oregon_dir_client.create_file(\"oregon_state_parks.csv\")\n",
221 | "oregon_state_parks_content = requests.get(\"https://raw.githubusercontent.com/Azure-Samples/azure-search-sample-data/refs/heads/main/state-parks/Oregon/oregon_state_parks.csv\").content.decode(\"utf-8\")\n",
222 | "file_client.upload_data(oregon_state_parks_content, overwrite=True)\n",
223 | "oregon_dir_client.update_access_control_recursive(f\"group:{first_group_id}:rwx\")\n",
224 | "\n",
225 | "washington_dir_client = state_parks_dir_client.create_sub_directory(\"washington\")\n",
226 | "washington_dir_client.create_directory()\n",
227 | "file_client = washington_dir_client.create_file(\"washington_state_parks.csv\")\n",
228 | "washington_state_parks_content = requests.get(\"https://raw.githubusercontent.com/Azure-Samples/azure-search-sample-data/refs/heads/main/state-parks/Washington/washington_state_parks.csv\").content.decode(\"utf-8\")\n",
229 | "file_client.upload_data(washington_state_parks_content, overwrite=True)\n",
230 | "washington_dir_client.update_access_control_recursive(f\"group:{second_group_id}:rwx\")"
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "id": "ca6de2ad",
236 | "metadata": {},
237 | "source": [
238 | "## Run the indexer\n",
239 | "\n",
240 | "Start the indexer to run all operations, from data retrieval to indexing. Any connection errors or permission problems become evident here."
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": null,
246 | "id": "2ce7eb5e",
247 | "metadata": {},
248 | "outputs": [],
249 | "source": [
250 | "from azure.search.documents.indexes.models import SearchIndexer, FieldMapping\n",
251 | "\n",
252 | "indexer = SearchIndexer(\n",
253 | " name=indexer_name,\n",
254 | " target_index_name=index_name,\n",
255 | " data_source_name=datasource_name,\n",
256 | " field_mappings=[\n",
257 | " FieldMapping(source_field_name=\"metadata_group_ids\", target_field_name=\"groups\"),\n",
258 | " FieldMapping(source_field_name=\"metadata_user_ids\", target_field_name=\"oids\"),\n",
259 | " ]\n",
260 | ")\n",
261 | "\n",
262 | "indexer_client.create_or_update_indexer(indexer)\n",
263 | "print(f\"Indexer '{indexer_name}' created\")\n"
264 | ]
265 | },
266 | {
267 | "cell_type": "markdown",
268 | "id": "987dd496",
269 | "metadata": {},
270 | "source": [
271 | "## Search sample data using x-ms-query-source-authorization\n",
272 | "\n",
273 | "Wait for the indexer to finish processing before running queries. This query uses an empty search string (`*`) for an unqualified search. It returns the file name and permission metadata associated with each file. Notice that each file is associated with a different group ID."
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": null,
279 | "id": "7a899da1",
280 | "metadata": {},
281 | "outputs": [],
282 | "source": [
283 | "from azure.search.documents import SearchClient\n",
284 | "search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)\n",
285 | "\n",
286 | "results = search_client.search(search_text=\"*\", x_ms_query_source_authorization=token_provider(), select=\"metadata_storage_path,oids,groups\", order_by=\"id asc\")\n",
287 | "for result in results:\n",
288 | " print(f\"Path: {result['metadata_storage_path']}, OID: {result['oids']}, Group: {result['groups']}\")"
289 | ]
290 | },
291 | {
292 | "cell_type": "markdown",
293 | "id": "c712ab8c",
294 | "metadata": {},
295 | "source": [
296 | "## Search sample data without x-ms-query-source-authorization \n",
297 | "\n",
298 | "This step demonstrates the user experience when authorization fails. No results are returned in the response."
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": null,
304 | "id": "72d203f0",
305 | "metadata": {},
306 | "outputs": [],
307 | "source": [
308 | "from azure.search.documents import SearchClient\n",
309 | "search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)\n",
310 | "\n",
311 | "results = search_client.search(search_text=\"*\", x_ms_query_source_authorization=None, select=\"metadata_storage_path,oids,groups\", order_by=\"id asc\")\n",
312 | "for result in results:\n",
313 | " print(f\"Path: {result['metadata_storage_path']}, OID: {result['oids']}, Group: {result['groups']}\")"
314 | ]
315 | },
316 | {
317 | "cell_type": "markdown",
318 | "id": "e1ac3c84",
319 | "metadata": {},
320 | "source": [
321 | "## Next step\n",
322 | "\n",
323 | "To learn more, see [Document-level access control in Azure AI Search](https://learn.microsoft.com/azure/search/search-document-level-access-overview)."
324 | ]
325 | }
326 | ],
327 | "metadata": {
328 | "kernelspec": {
329 | "display_name": ".venv",
330 | "language": "python",
331 | "name": "python3"
332 | },
333 | "language_info": {
334 | "codemirror_mode": {
335 | "name": "ipython",
336 | "version": 3
337 | },
338 | "file_extension": ".py",
339 | "mimetype": "text/x-python",
340 | "name": "python",
341 | "nbconvert_exporter": "python",
342 | "pygments_lexer": "ipython3",
343 | "version": "3.12.10"
344 | }
345 | },
346 | "nbformat": 4,
347 | "nbformat_minor": 5
348 | }
349 |
--------------------------------------------------------------------------------
/Quickstart/azure-search-quickstart.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "attachments": {},
5 | "cell_type": "markdown",
6 | "metadata": {},
7 | "source": [
8 | "# Create a search index in Azure AI Search using the Azure SDK for Python"
9 | ]
10 | },
11 | {
12 | "attachments": {},
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "This notebook steps through creating, loading, and querying an index in Azure AI Search index by calling the azure-search-documents library in the Azure SDK for Python. "
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "## Install packages and set variables"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "! pip install azure-search-documents==11.7.0b2 --quiet\n",
33 | "! pip install azure-identity --quiet\n",
34 | "! pip install python-dotenv --quiet"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "# Provide variables\n",
44 | "search_endpoint: str = \"PUT-YOUR-SEARCH-ENDPOINT-HERE\"\n",
45 | "search_api_key: str = \"PUT-YOUR-SEARCH-API-KEY-HERE\"\n",
46 | "index_name: str = \"hotels-quickstart-csharp\""
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "## Create an index"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "from azure.core.credentials import AzureKeyCredential\n",
63 | "\n",
64 | "credential = AzureKeyCredential(search_api_key)\n",
65 | "\n",
66 | "from azure.search.documents.indexes import SearchIndexClient\n",
67 | "from azure.search.documents import SearchClient\n",
68 | "from azure.search.documents.indexes.models import (\n",
69 | " ComplexField,\n",
70 | " SimpleField,\n",
71 | " SearchFieldDataType,\n",
72 | " SearchableField,\n",
73 | " SearchIndex\n",
74 | ")\n",
75 | "\n",
76 | "# Create a search schema\n",
77 | "index_client = SearchIndexClient(\n",
78 | " endpoint=search_endpoint, credential=credential)\n",
79 | "fields = [\n",
80 | " SimpleField(name=\"HotelId\", type=SearchFieldDataType.String, key=True),\n",
81 | " SearchableField(name=\"HotelName\", type=SearchFieldDataType.String, sortable=True),\n",
82 | " SearchableField(name=\"Description\", type=SearchFieldDataType.String, analyzer_name=\"en.lucene\"),\n",
83 | " SearchableField(name=\"Category\", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),\n",
84 | "\n",
85 | " SearchableField(name=\"Tags\", collection=True, type=SearchFieldDataType.String, facetable=True, filterable=True),\n",
86 | "\n",
87 | " SimpleField(name=\"ParkingIncluded\", type=SearchFieldDataType.Boolean, facetable=True, filterable=True, sortable=True),\n",
88 | " SimpleField(name=\"LastRenovationDate\", type=SearchFieldDataType.DateTimeOffset, facetable=True, filterable=True, sortable=True),\n",
89 | " SimpleField(name=\"Rating\", type=SearchFieldDataType.Double, facetable=True, filterable=True, sortable=True),\n",
90 | "\n",
91 | " ComplexField(name=\"Address\", fields=[\n",
92 | " SearchableField(name=\"StreetAddress\", type=SearchFieldDataType.String),\n",
93 | " SearchableField(name=\"City\", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),\n",
94 | " SearchableField(name=\"StateProvince\", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),\n",
95 | " SearchableField(name=\"PostalCode\", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),\n",
96 | " SearchableField(name=\"Country\", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),\n",
97 | " ])\n",
98 | " ]\n",
99 | "\n",
100 | "scoring_profiles = []\n",
101 | "suggester = [{'name': 'sg', 'source_fields': ['Tags', 'Address/City', 'Address/Country']}]\n",
102 | "\n",
103 | "# Create the search index=\n",
104 | "index = SearchIndex(name=index_name, fields=fields, suggesters=suggester, scoring_profiles=scoring_profiles)\n",
105 | "result = index_client.create_or_update_index(index)\n",
106 | "print(f' {result.name} created')"
107 | ]
108 | },
109 | {
110 | "attachments": {},
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | "## Create a documents payload"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": null,
120 | "metadata": {},
121 | "outputs": [],
122 | "source": [
123 | "# Create a documents payload\n",
124 | "documents = [\n",
125 | " {\n",
126 | " \"@search.action\": \"upload\",\n",
127 | " \"HotelId\": \"1\",\n",
128 | " \"HotelName\": \"Stay-Kay City Hotel\",\n",
129 | " \"Description\": \"This classic hotel is fully-refurbished and ideally located on the main commercial artery of the city in the heart of New York. A few minutes away is Times Square and the historic centre of the city, as well as other places of interest that make New York one of America's most attractive and cosmopolitan cities.\",\n",
130 | " \"Category\": \"Boutique\",\n",
131 | " \"Tags\": [ \"view\", \"air conditioning\", \"concierge\" ],\n",
132 | " \"ParkingIncluded\": \"false\",\n",
133 | " \"LastRenovationDate\": \"2020-01-18T00:00:00Z\",\n",
134 | " \"Rating\": 3.60,\n",
135 | " \"Address\": {\n",
136 | " \"StreetAddress\": \"677 5th Ave\",\n",
137 | " \"City\": \"New York\",\n",
138 | " \"StateProvince\": \"NY\",\n",
139 | " \"PostalCode\": \"10022\",\n",
140 | " \"Country\": \"USA\"\n",
141 | " }\n",
142 | " },\n",
143 | " {\n",
144 | " \"@search.action\": \"upload\",\n",
145 | " \"HotelId\": \"2\",\n",
146 | " \"HotelName\": \"Old Century Hotel\",\n",
147 | " \"Description\": \"The hotel is situated in a nineteenth century plaza, which has been expanded and renovated to the highest architectural standards to create a modern, functional and first-class hotel in which art and unique historical elements coexist with the most modern comforts. The hotel also regularly hosts events like wine tastings, beer dinners, and live music.\",\n",
148 | " \"Category\": \"Boutique\",\n",
149 | " \"Tags\": [ \"pool\", \"free wifi\", \"concierge\" ],\n",
150 | " \"ParkingIncluded\": \"false\",\n",
151 | " \"LastRenovationDate\": \"2019-02-18T00:00:00Z\",\n",
152 | " \"Rating\": 3.60,\n",
153 | " \"Address\": {\n",
154 | " \"StreetAddress\": \"140 University Town Center Dr\",\n",
155 | " \"City\": \"Sarasota\",\n",
156 | " \"StateProvince\": \"FL\",\n",
157 | " \"PostalCode\": \"34243\",\n",
158 | " \"Country\": \"USA\"\n",
159 | " }\n",
160 | " },\n",
161 | " {\n",
162 | " \"@search.action\": \"upload\",\n",
163 | " \"HotelId\": \"3\",\n",
164 | " \"HotelName\": \"Gastronomic Landscape Hotel\",\n",
165 | " \"Description\": \"The Gastronomic Hotel stands out for its culinary excellence under the management of William Dough, who advises on and oversees all of the Hotel’s restaurant services.\",\n",
166 | " \"Category\": \"Suite\",\n",
167 | " \"Tags\": [ \"restaurant\", \"bar\", \"continental breakfast\" ],\n",
168 | " \"ParkingIncluded\": \"true\",\n",
169 | " \"LastRenovationDate\": \"2015-09-20T00:00:00Z\",\n",
170 | " \"Rating\": 4.80,\n",
171 | " \"Address\": {\n",
172 | " \"StreetAddress\": \"3393 Peachtree Rd\",\n",
173 | " \"City\": \"Atlanta\",\n",
174 | " \"StateProvince\": \"GA\",\n",
175 | " \"PostalCode\": \"30326\",\n",
176 | " \"Country\": \"USA\"\n",
177 | " }\n",
178 | " },\n",
179 | " {\n",
180 | " \"@search.action\": \"upload\",\n",
181 | " \"HotelId\": \"4\",\n",
182 | " \"HotelName\": \"Sublime Palace Hotel\",\n",
183 | " \"Description\": \"Sublime Palace Hotel is located in the heart of the historic center of Sublime in an extremely vibrant and lively area within short walking distance to the sites and landmarks of the city and is surrounded by the extraordinary beauty of churches, buildings, shops and monuments. Sublime Cliff is part of a lovingly restored 19th century resort, updated for every modern convenience.\",\n",
184 | " \"Category\": \"Boutique\",\n",
185 | " \"Tags\": [ \"concierge\", \"view\", \"air conditioning\" ],\n",
186 | " \"ParkingIncluded\": \"true\",\n",
187 | " \"LastRenovationDate\": \"2020-02-06T00:00:00Z\",\n",
188 | " \"Rating\": 4.60,\n",
189 | " \"Address\": {\n",
190 | " \"StreetAddress\": \"7400 San Pedro Ave\",\n",
191 | " \"City\": \"San Antonio\",\n",
192 | " \"StateProvince\": \"TX\",\n",
193 | " \"PostalCode\": \"78216\",\n",
194 | " \"Country\": \"USA\"\n",
195 | " }\n",
196 | " }\n",
197 | "]"
198 | ]
199 | },
200 | {
201 | "cell_type": "markdown",
202 | "metadata": {},
203 | "source": [
204 | "## Upload documents"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": null,
210 | "metadata": {},
211 | "outputs": [],
212 | "source": [
213 | "search_client = SearchClient(endpoint=search_endpoint,\n",
214 | " index_name=index_name,\n",
215 | " credential=credential)\n",
216 | "try:\n",
217 | " result = search_client.upload_documents(documents=documents)\n",
218 | " print(\"Upload of new document succeeded: {}\".format(result[0].succeeded))\n",
219 | "except Exception as ex:\n",
220 | " print (ex.message)\n",
221 | "\n",
222 | " index_client = SearchIndexClient(\n",
223 | " endpoint=search_endpoint, credential=credential)"
224 | ]
225 | },
226 | {
227 | "attachments": {},
228 | "cell_type": "markdown",
229 | "metadata": {},
230 | "source": [
231 | "## Run your first query"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": null,
237 | "metadata": {},
238 | "outputs": [],
239 | "source": [
240 | "# Run an empty query (returns selected fields, all documents)\n",
241 | "results = search_client.search(query_type='simple',\n",
242 | " search_text=\"*\" ,\n",
243 | " select='HotelName,Description,Tags',\n",
244 | " include_total_count=True)\n",
245 | "\n",
246 | "print ('Total Documents Matching Query:', results.get_count())\n",
247 | "for result in results:\n",
248 | " print(result[\"@search.score\"])\n",
249 | " print(result[\"HotelName\"])\n",
250 | " print(result[\"Tags\"])\n",
251 | " print(f\"Description: {result['Description']}\")\n"
252 | ]
253 | },
254 | {
255 | "attachments": {},
256 | "cell_type": "markdown",
257 | "metadata": {},
258 | "source": [
259 | "## Run a term query"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": null,
265 | "metadata": {},
266 | "outputs": [],
267 | "source": [
268 | "# Run a term query\n",
269 | "results = search_client.search(query_type='simple',\n",
270 | " search_text=\"wifi\" ,\n",
271 | " select='HotelName,Description,Tags',\n",
272 | " include_total_count=True)\n",
273 | "\n",
274 | "print ('Total Documents Matching Query:', results.get_count())\n",
275 | "for result in results:\n",
276 | " print(result[\"@search.score\"])\n",
277 | " print(result[\"HotelName\"])\n",
278 | " print(f\"Description: {result['Description']}\")"
279 | ]
280 | },
281 | {
282 | "attachments": {},
283 | "cell_type": "markdown",
284 | "metadata": {},
285 | "source": [
286 | "## Add a filter"
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "execution_count": null,
292 | "metadata": {},
293 | "outputs": [],
294 | "source": [
295 | "# Add a filter\n",
296 | "results = search_client.search(\n",
297 | " search_text=\"hotels\", \n",
298 | " select='HotelId,HotelName,Rating', \n",
299 | " filter='Rating gt 4', \n",
300 | " order_by='Rating desc')\n",
301 | "\n",
302 | "for result in results:\n",
303 | " print(\"{}: {} - {} rating\".format(result[\"HotelId\"], result[\"HotelName\"], result[\"Rating\"]))"
304 | ]
305 | },
306 | {
307 | "attachments": {},
308 | "cell_type": "markdown",
309 | "metadata": {},
310 | "source": [
311 | "## Scope a query to specific searchable fields"
312 | ]
313 | },
314 | {
315 | "cell_type": "code",
316 | "execution_count": null,
317 | "metadata": {},
318 | "outputs": [],
319 | "source": [
320 | "results = search_client.search(\n",
321 | " search_text=\"sublime\", \n",
322 | " search_fields=['HotelName'], \n",
323 | " select='HotelId,HotelName')\n",
324 | "\n",
325 | "for result in results:\n",
326 | " print(\"{}: {}\".format(result[\"HotelId\"], result[\"HotelName\"]))"
327 | ]
328 | },
329 | {
330 | "attachments": {},
331 | "cell_type": "markdown",
332 | "metadata": {},
333 | "source": [
334 | "## Return facets"
335 | ]
336 | },
337 | {
338 | "cell_type": "code",
339 | "execution_count": null,
340 | "metadata": {},
341 | "outputs": [],
342 | "source": [
343 | "# Return facets\n",
344 | "results = search_client.search(search_text=\"*\", facets=[\"Category\"])\n",
345 | "\n",
346 | "facets = results.get_facets()\n",
347 | "\n",
348 | "for facet in facets[\"Category\"]:\n",
349 | " print(\" {}\".format(facet))"
350 | ]
351 | },
352 | {
353 | "attachments": {},
354 | "cell_type": "markdown",
355 | "metadata": {},
356 | "source": [
357 | "## Look up a document "
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": null,
363 | "metadata": {},
364 | "outputs": [],
365 | "source": [
366 | "# Look up a specific document by ID\n",
367 | "result = search_client.get_document(key=\"3\")\n",
368 | "\n",
369 | "print(\"Details for hotel '3' are:\")\n",
370 | "print(\"Name: {}\".format(result[\"HotelName\"]))\n",
371 | "print(\"Rating: {}\".format(result[\"Rating\"]))\n",
372 | "print(\"Category: {}\".format(result[\"Category\"]))"
373 | ]
374 | },
375 | {
376 | "attachments": {},
377 | "cell_type": "markdown",
378 | "metadata": {},
379 | "source": [
380 | "## Autocomplete a query"
381 | ]
382 | },
383 | {
384 | "cell_type": "code",
385 | "execution_count": null,
386 | "metadata": {},
387 | "outputs": [],
388 | "source": [
389 | "# Autocomplete a query\n",
390 | "search_suggestion = 'sa'\n",
391 | "results = search_client.autocomplete(\n",
392 | " search_text=search_suggestion, \n",
393 | " suggester_name=\"sg\",\n",
394 | " mode='twoTerms')\n",
395 | "\n",
396 | "print(\"Autocomplete for:\", search_suggestion)\n",
397 | "for result in results:\n",
398 | " print (result['text'])"
399 | ]
400 | },
401 | {
402 | "attachments": {},
403 | "cell_type": "markdown",
404 | "metadata": {},
405 | "source": [
406 | "## Clean up\n",
407 | "\n",
408 | "If you are finished with this index, you can delete it by running the following lines. Deleting unnecessary indexes frees up space for stepping through more quickstarts and tutorials."
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": null,
414 | "metadata": {},
415 | "outputs": [],
416 | "source": [
417 | "try:\n",
418 | " result = index_client.delete_index(index_name)\n",
419 | " print ('Index', index_name, 'Deleted')\n",
420 | "except Exception as ex:\n",
421 | " print (ex)"
422 | ]
423 | },
424 | {
425 | "attachments": {},
426 | "cell_type": "markdown",
427 | "metadata": {},
428 | "source": [
429 | "Confirm the index deletion by running the following script that lists all of the indexes on your search service. If hotels-quickstart is not listed, you've successfully deleted the index and have completed this quickstart."
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "execution_count": null,
435 | "metadata": {},
436 | "outputs": [],
437 | "source": [
438 | "try:\n",
439 | " result = index_client.get_index(index_name)\n",
440 | " print (result)\n",
441 | "except Exception as ex:\n",
442 | " print (ex)\n"
443 | ]
444 | }
445 | ],
446 | "metadata": {
447 | "kernelspec": {
448 | "display_name": ".venv",
449 | "language": "python",
450 | "name": "python3"
451 | },
452 | "language_info": {
453 | "codemirror_mode": {
454 | "name": "ipython",
455 | "version": 3
456 | },
457 | "file_extension": ".py",
458 | "mimetype": "text/x-python",
459 | "name": "python",
460 | "nbconvert_exporter": "python",
461 | "pygments_lexer": "ipython3",
462 | "version": "3.10.12"
463 | }
464 | },
465 | "nbformat": 4,
466 | "nbformat_minor": 2
467 | }
468 |
--------------------------------------------------------------------------------
/Quickstart-Agentic-Retrieval/quickstart-agentic-retrieval.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "4be5d807",
6 | "metadata": {},
7 | "source": [
8 | "# Quickstart: Agentic retrieval in Azure AI Search\n",
9 | "\n",
10 | "Use this notebook to get started with [agentic retrieval](https://learn.microsoft.com/azure/search/search-agentic-retrieval-concept) in Azure AI Search, which integrates an LLM from Azure OpenAI in Foundry Models to process queries, retrieve relevant content from indexed documents, and generate natural-language answers.\n",
11 | "\n",
12 | "In this notebook, you:\n",
13 | "\n",
14 | "1. Create and load an `earth-at-night` search index.\n",
15 | "\n",
16 | "1. Create an `earth-knowledge-source` that targets your index.\n",
17 | "\n",
18 | "1. Create an `earth-knowledge-base` that targets your knowledge source and an LLM for query planning and answer synthesis.\n",
19 | "\n",
20 | "1. Use the knowledge base to fetch, rank, and synthesize relevant information from the index.\n",
21 | "\n",
22 | "1. Run an evaluation to assess the groundedness and relevance of the pipeline.\n",
23 | "\n",
24 | "This notebook provides a high-level demonstration of agentic retrieval. For more detailed guidance, see [Quickstart: Use agentic retrieval in Azure AI Search](https://learn.microsoft.com/azure/search/search-get-started-agentic-retrieval)."
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "id": "6712b97d",
30 | "metadata": {},
31 | "source": [
32 | "## Prerequisites\n",
33 | "\n",
34 | "+ An [Azure AI Search service](https://learn.microsoft.com/azure/search/search-create-service-portal) in any [region that provides agentic retrieval](https://learn.microsoft.com/azure/search/search-region-support).\n",
35 | "\n",
36 | "+ A [Microsoft Foundry project](https://learn.microsoft.com/azure/ai-foundry/how-to/create-projects) and resource. When you create a project, the resource is automatically created.\n",
37 | "\n",
38 | "+ A [supported LLM](https://learn.microsoft.com/azure/search/agentic-retrieval-how-to-create-knowledge-base#supported-models). This sample uses `gpt-5-mini`.\n",
39 | "\n",
40 | "+ A text embedding model. This sample uses `text-embedding-3-large`.\n",
41 | "\n",
42 | "+ [Visual Studio Code](https://code.visualstudio.com/download) with the [Python extension](https://marketplace.visualstudio.com/items?itemName=ms-python.python) and [Jupyter package](https://pypi.org/project/jupyter/)."
43 | ]
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "id": "3f5fbd46",
48 | "metadata": {},
49 | "source": [
50 | "## Configure access\n",
51 | "\n",
52 | "This notebook assumes that you're using Microsoft Entra ID for authentication and role assignments for authorization.\n",
53 | "\n",
54 | "To configure role-based access:\n",
55 | "\n",
56 | "1. Sign in to the [Azure portal](https://portal.azure.com).\n",
57 | "\n",
58 | "1. On your Azure AI Search service:\n",
59 | "\n",
60 | " 1. [Enable role-based access](https://learn.microsoft.com/azure/search/search-security-enable-roles).\n",
61 | " \n",
62 | " 1. [Create a system-assigned managed identity](https://learn.microsoft.com/azure/search/search-howto-managed-identities-data-sources#create-a-system-managed-identity).\n",
63 | " \n",
64 | " 1. [Assign the following roles](https://learn.microsoft.com/azure/search/search-security-rbac#how-to-assign-roles-in-the-azure-portal) to yourself.\n",
65 | " \n",
66 | " + **Search Service Contributor**\n",
67 | " \n",
68 | " + **Search Index Data Contributor**\n",
69 | " \n",
70 | " + **Search Index Data Reader**\n",
71 | "\n",
72 | "1. On your Microsoft Foundry resource, assign **Cognitive Services User** to the managed identity of your search service."
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "id": "733bf308",
78 | "metadata": {},
79 | "source": [
80 | "## Set up connections\n",
81 | "\n",
82 | "The `sample.env` file contains environment variables for connections to Azure AI Search and Azure OpenAI in Foundry Models. Agentic retrieval requires these connections for document retrieval, query planning, and query execution.\n",
83 | "\n",
84 | "To set up the connections:\n",
85 | "\n",
86 | "1. Sign in to the [Azure portal](https://portal.azure.com).\n",
87 | "\n",
88 | "1. Get the endpoints for Azure AI Search (`https://your-search-service.search.windows.net`) and Azure OpenAI in Foundry Models (`https://your-foundry-resource.openai.azure.com`).\n",
89 | "\n",
90 | "1. Save the `sample.env` file as `.env` on your local system.\n",
91 | "\n",
92 | "1. Update the `.env` file with the retrieved endpoints."
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "id": "34a54a0f",
98 | "metadata": {},
99 | "source": [
100 | "## Create a virtual environment\n",
101 | "\n",
102 | "The `requirements.txt` file contains the dependencies for this notebook. You can use a virtual environment to install these dependencies in isolation.\n",
103 | "\n",
104 | "To create a virtual environment:\n",
105 | "\n",
106 | "1. In Visual Studio Code, open the folder that contains `quickstart-agentic-retrieval.ipynb`.\n",
107 | "\n",
108 | "1. Press **Ctrl**+**Shift**+**P** to open the command palette.\n",
109 | "\n",
110 | "1. Search for **Python: Create Environment**, and then select **Venv**.\n",
111 | "\n",
112 | "1. Select a Python installation. We tested this notebook on Python 3.13.7.\n",
113 | "\n",
114 | "1. Select `requirements.txt` for the dependencies.\n",
115 | "\n",
116 | "Creating the virtual environment can take several minutes. When the environment is ready, proceed to the next step."
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "id": "0714a968",
122 | "metadata": {},
123 | "source": [
124 | "## Install packages and load connections\n",
125 | "\n",
126 | "This step installs the packages for this notebook and establishes connections to Azure AI Search and Azure OpenAI in Foundry Models."
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": null,
132 | "id": "041e5d89",
133 | "metadata": {},
134 | "outputs": [],
135 | "source": [
136 | "! pip install -r requirements.txt --quiet"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": null,
142 | "id": "2df3a118",
143 | "metadata": {},
144 | "outputs": [],
145 | "source": [
146 | "from dotenv import load_dotenv\n",
147 | "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n",
148 | "import os\n",
149 | "\n",
150 | "# Take environment variables from .env\n",
151 | "load_dotenv(override=True)\n",
152 | "\n",
153 | "# This notebook uses the following variables from your .env file\n",
154 | "search_endpoint = os.environ[\"SEARCH_ENDPOINT\"]\n",
155 | "credential = DefaultAzureCredential()\n",
156 | "token_provider = get_bearer_token_provider(credential, \"https://search.azure.com/.default\")\n",
157 | "aoai_endpoint = os.environ[\"AOAI_ENDPOINT\"]\n",
158 | "aoai_embedding_model = os.environ.get(\"AOAI_EMBEDDING_MODEL\", \"text-embedding-3-large\")\n",
159 | "aoai_embedding_deployment = os.environ.get(\"AOAI_EMBEDDING_DEPLOYMENT\", \"text-embedding-3-large\")\n",
160 | "aoai_gpt_model = os.environ.get(\"AOAI_GPT_MODEL\", \"gpt-5-mini\")\n",
161 | "aoai_gpt_deployment = os.environ.get(\"AOAI_GPT_DEPLOYMENT\", \"gpt-5-mini\")\n",
162 | "index_name = os.environ.get(\"INDEX_NAME\", \"earth-at-night\")\n",
163 | "knowledge_source_name = os.environ.get(\"KNOWLEDGE_SOURCE_NAME\", \"earth-knowledge-source\")\n",
164 | "knowledge_base_name = os.environ.get(\"KNOWLEDGE_BASE_NAME\", \"earth-knowledge-base\")"
165 | ]
166 | },
167 | {
168 | "cell_type": "markdown",
169 | "id": "58e8a088",
170 | "metadata": {},
171 | "source": [
172 | "## Create a search index\n",
173 | "\n",
174 | "This step creates an index that contains plain text and vector content. You can use an existing index, but it must meet the criteria for [agentic retrieval workloads](https://learn.microsoft.com/azure/search/search-agentic-retrieval-how-to-index). The primary schema requirement is a semantic configuration with a `default_configuration_name`."
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "id": "ee48bec5",
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "from azure.search.documents.indexes.models import SearchIndex, SearchField, VectorSearch, VectorSearchProfile, HnswAlgorithmConfiguration, AzureOpenAIVectorizer, AzureOpenAIVectorizerParameters, SemanticSearch, SemanticConfiguration, SemanticPrioritizedFields, SemanticField\n",
185 | "from azure.search.documents.indexes import SearchIndexClient\n",
186 | "from azure.identity import get_bearer_token_provider\n",
187 | "\n",
188 | "azure_openai_token_provider = get_bearer_token_provider(credential, \"https://cognitiveservices.azure.com/.default\")\n",
189 | "index = SearchIndex(\n",
190 | " name=index_name,\n",
191 | " fields=[\n",
192 | " SearchField(name=\"id\", type=\"Edm.String\", key=True, filterable=True, sortable=True, facetable=True),\n",
193 | " SearchField(name=\"page_chunk\", type=\"Edm.String\", filterable=False, sortable=False, facetable=False),\n",
194 | " SearchField(name=\"page_embedding_text_3_large\", type=\"Collection(Edm.Single)\", stored=False, vector_search_dimensions=3072, vector_search_profile_name=\"hnsw_text_3_large\"),\n",
195 | " SearchField(name=\"page_number\", type=\"Edm.Int32\", filterable=True, sortable=True, facetable=True)\n",
196 | " ],\n",
197 | " vector_search=VectorSearch(\n",
198 | " profiles=[VectorSearchProfile(name=\"hnsw_text_3_large\", algorithm_configuration_name=\"alg\", vectorizer_name=\"azure_openai_text_3_large\")],\n",
199 | " algorithms=[HnswAlgorithmConfiguration(name=\"alg\")],\n",
200 | " vectorizers=[\n",
201 | " AzureOpenAIVectorizer(\n",
202 | " vectorizer_name=\"azure_openai_text_3_large\",\n",
203 | " parameters=AzureOpenAIVectorizerParameters(\n",
204 | " resource_url=aoai_endpoint,\n",
205 | " deployment_name=aoai_embedding_deployment,\n",
206 | " model_name=aoai_embedding_model\n",
207 | " )\n",
208 | " )\n",
209 | " ]\n",
210 | " ),\n",
211 | " semantic_search=SemanticSearch(\n",
212 | " default_configuration_name=\"semantic_config\",\n",
213 | " configurations=[\n",
214 | " SemanticConfiguration(\n",
215 | " name=\"semantic_config\",\n",
216 | " prioritized_fields=SemanticPrioritizedFields(\n",
217 | " content_fields=[\n",
218 | " SemanticField(field_name=\"page_chunk\")\n",
219 | " ]\n",
220 | " )\n",
221 | " )\n",
222 | " ]\n",
223 | " )\n",
224 | ")\n",
225 | "\n",
226 | "index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)\n",
227 | "index_client.create_or_update_index(index)\n",
228 | "print(f\"Index '{index_name}' created or updated successfully.\")"
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "id": "39874f61",
234 | "metadata": {},
235 | "source": [
236 | "## Upload sample documents\n",
237 | "\n",
238 | "This notebook uses data from NASA's Earth at Night e-book. The data is retrieved from the [azure-search-sample-data](https://github.com/Azure-Samples/azure-search-sample-data) repository on GitHub and passed to the search client for indexing."
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": null,
244 | "id": "ded5147b",
245 | "metadata": {},
246 | "outputs": [],
247 | "source": [
248 | "import requests\n",
249 | "from azure.search.documents import SearchIndexingBufferedSender\n",
250 | "\n",
251 | "url = \"https://raw.githubusercontent.com/Azure-Samples/azure-search-sample-data/refs/heads/main/nasa-e-book/earth-at-night-json/documents.json\"\n",
252 | "documents = requests.get(url).json()\n",
253 | "\n",
254 | "with SearchIndexingBufferedSender(endpoint=search_endpoint, index_name=index_name, credential=credential) as client:\n",
255 | " client.upload_documents(documents=documents)\n",
256 | "\n",
257 | "print(f\"Documents uploaded to index '{index_name}' successfully.\")"
258 | ]
259 | },
260 | {
261 | "cell_type": "markdown",
262 | "id": "d0fb9e5f",
263 | "metadata": {},
264 | "source": [
265 | "## Create a knowledge source\n",
266 | "\n",
267 | "This step creates a knowledge source that targets the index you previously created. In the next step, you create a knowledge base that uses the knowledge source to orchestrate agentic retrieval."
268 | ]
269 | },
270 | {
271 | "cell_type": "code",
272 | "execution_count": null,
273 | "id": "e3415954",
274 | "metadata": {},
275 | "outputs": [],
276 | "source": [
277 | "from azure.search.documents.indexes.models import SearchIndexKnowledgeSource, SearchIndexKnowledgeSourceParameters, SearchIndexFieldReference\n",
278 | "from azure.search.documents.indexes import SearchIndexClient\n",
279 | "\n",
280 | "ks = SearchIndexKnowledgeSource(\n",
281 | " name=knowledge_source_name,\n",
282 | " description=\"Knowledge source for Earth at night data\",\n",
283 | " search_index_parameters=SearchIndexKnowledgeSourceParameters(\n",
284 | " search_index_name=index_name,\n",
285 | " source_data_fields=[SearchIndexFieldReference(name=\"id\"), SearchIndexFieldReference(name=\"page_number\")]\n",
286 | " ),\n",
287 | ")\n",
288 | "\n",
289 | "index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)\n",
290 | "index_client.create_or_update_knowledge_source(knowledge_source=ks)\n",
291 | "print(f\"Knowledge source '{knowledge_source_name}' created or updated successfully.\")"
292 | ]
293 | },
294 | {
295 | "cell_type": "markdown",
296 | "id": "5c5e0a34",
297 | "metadata": {},
298 | "source": [
299 | "## Create a knowledge base\n",
300 | "\n",
301 | "This step creates a knowledge base, which acts as a wrapper for your knowledge source and LLM deployment.\n",
302 | "\n",
303 | "`EXTRACTIVE_DATA` is the default modality and returns content from your knowledge sources without generative alteration. However, this quickstart uses the `ANSWER_SYNTHESIS` modality for LLM-generated answers that cite the retrieved content."
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": null,
309 | "id": "d3fe4183",
310 | "metadata": {},
311 | "outputs": [],
312 | "source": [
313 | "from azure.search.documents.indexes.models import KnowledgeBase, KnowledgeBaseAzureOpenAIModel, KnowledgeSourceReference, AzureOpenAIVectorizerParameters, KnowledgeRetrievalOutputMode, KnowledgeRetrievalLowReasoningEffort\n",
314 | "from azure.search.documents.indexes import SearchIndexClient\n",
315 | "\n",
316 | "aoai_params = AzureOpenAIVectorizerParameters(\n",
317 | " resource_url=aoai_endpoint,\n",
318 | " deployment_name=aoai_gpt_deployment,\n",
319 | " model_name=aoai_gpt_model,\n",
320 | ")\n",
321 | "\n",
322 | "knowledge_base = KnowledgeBase(\n",
323 | " name=knowledge_base_name,\n",
324 | " models=[KnowledgeBaseAzureOpenAIModel(azure_open_ai_parameters=aoai_params)],\n",
325 | " knowledge_sources=[\n",
326 | " KnowledgeSourceReference(\n",
327 | " name=knowledge_source_name\n",
328 | " )\n",
329 | " ],\n",
330 | " output_mode=KnowledgeRetrievalOutputMode.ANSWER_SYNTHESIS,\n",
331 | " answer_instructions=\"Provide a 2 sentence concise and informative answer based on the retrieved documents.\"\n",
332 | ")\n",
333 | "\n",
334 | "index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)\n",
335 | "index_client.create_or_update_knowledge_base(knowledge_base)\n",
336 | "print(f\"Knowledge base '{knowledge_base_name}' created or updated successfully.\")"
337 | ]
338 | },
339 | {
340 | "cell_type": "markdown",
341 | "id": "cf7d8fbe",
342 | "metadata": {},
343 | "source": [
344 | "## Set up messages\n",
345 | "\n",
346 | "Messages are the input for the retrieval route and contain the conversation history. Each message includes a `role` that indicates its origin, such as `system` or `user`, and `content` in natural language. The LLM you use determines which roles are valid."
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": null,
352 | "id": "357268fc",
353 | "metadata": {},
354 | "outputs": [],
355 | "source": [
356 | "instructions = \"\"\"\n",
357 | "A Q&A agent that can answer questions about the Earth at night.\n",
358 | "If you don't have the answer, respond with \"I don't know\".\n",
359 | "\"\"\"\n",
360 | "\n",
361 | "messages = [\n",
362 | " {\n",
363 | " \"role\": \"system\",\n",
364 | " \"content\": instructions\n",
365 | " }\n",
366 | "]"
367 | ]
368 | },
369 | {
370 | "cell_type": "markdown",
371 | "id": "4090707f",
372 | "metadata": {},
373 | "source": [
374 | "## Use agentic retrieval to fetch results\n",
375 | "\n",
376 | "This step runs the agentic retrieval pipeline to produce a grounded, citation-backed answer. Given the conversation history and retrieval parameters, your knowledge base:\n",
377 | "\n",
378 | "1. Analyzes the entire conversation to infer the user's information need.\n",
379 | "\n",
380 | "1. Decomposes the compound query into focused subqueries.\n",
381 | "\n",
382 | "1. Runs the subqueries concurrently against your knowledge source.\n",
383 | "\n",
384 | "1. Uses semantic ranker to rerank and filter the results.\n",
385 | "\n",
386 | "1. Synthesizes the top results into a natural-language answer."
387 | ]
388 | },
389 | {
390 | "cell_type": "code",
391 | "execution_count": null,
392 | "id": "918ded26",
393 | "metadata": {},
394 | "outputs": [],
395 | "source": [
396 | "from azure.search.documents.knowledgebases import KnowledgeBaseRetrievalClient\n",
397 | "from azure.search.documents.knowledgebases.models import KnowledgeBaseRetrievalRequest, KnowledgeBaseMessage, KnowledgeBaseMessageTextContent, SearchIndexKnowledgeSourceParams\n",
398 | "\n",
399 | "agent_client = KnowledgeBaseRetrievalClient(endpoint=search_endpoint, knowledge_base_name=knowledge_base_name, credential=credential)\n",
400 | "query_1 = \"\"\"\n",
401 | " Why do suburban belts display larger December brightening than urban cores even though absolute light levels are higher downtown?\n",
402 | " Why is the Phoenix nighttime street grid is so sharply visible from space, whereas large stretches of the interstate between midwestern cities remain comparatively dim?\n",
403 | " \"\"\"\n",
404 | "\n",
405 | "messages.append({\n",
406 | " \"role\": \"user\",\n",
407 | " \"content\": query_1\n",
408 | "})\n",
409 | "\n",
410 | "req = KnowledgeBaseRetrievalRequest(\n",
411 | " messages=[\n",
412 | " KnowledgeBaseMessage(\n",
413 | " role=m[\"role\"],\n",
414 | " content=[KnowledgeBaseMessageTextContent(text=m[\"content\"])]\n",
415 | " ) for m in messages if m[\"role\"] != \"system\"\n",
416 | " ],\n",
417 | " knowledge_source_params=[\n",
418 | " SearchIndexKnowledgeSourceParams(\n",
419 | " knowledge_source_name=knowledge_source_name,\n",
420 | " include_references=True,\n",
421 | " include_reference_source_data=True,\n",
422 | " always_query_source=True\n",
423 | " )\n",
424 | " ],\n",
425 | " include_activity=True,\n",
426 | " retrieval_reasoning_effort=KnowledgeRetrievalLowReasoningEffort\n",
427 | ")\n",
428 | "\n",
429 | "result = agent_client.retrieve(retrieval_request=req)\n",
430 | "print(f\"Retrieved content from '{knowledge_base_name}' successfully.\")"
431 | ]
432 | },
433 | {
434 | "cell_type": "markdown",
435 | "id": "886fc687",
436 | "metadata": {},
437 | "source": [
438 | "### Review the retrieval response, activity, and results\n",
439 | "\n",
440 | "Because your knowledge base is configured for answer synthesis, the retrieval response contains the following values:\n",
441 | "\n",
442 | "+ `response_contents`: An LLM-generated answer to the query that cites the retrieved documents.\n",
443 | "\n",
444 | "+ `activity_contents`: Detailed planning and execution information, including subqueries, reranking decisions, and intermediate steps.\n",
445 | "\n",
446 | "+ `references_contents`: Source documents and chunks that contributed to the answer.\n",
447 | "\n",
448 | "**Tip:** Retrieval parameters, such as reranker thresholds and knowledge source parameters, influence how aggressively your agent reranks and which sources it queries. Inspect the activity and references to validate grounding and build traceable citations."
449 | ]
450 | },
451 | {
452 | "cell_type": "code",
453 | "execution_count": null,
454 | "id": "d4d78fbe",
455 | "metadata": {},
456 | "outputs": [],
457 | "source": [
458 | "response_contents = []\n",
459 | "activity_contents = []\n",
460 | "references_contents = []"
461 | ]
462 | },
463 | {
464 | "cell_type": "code",
465 | "execution_count": null,
466 | "id": "7fccf4b0",
467 | "metadata": {},
468 | "outputs": [],
469 | "source": [
470 | "import json\n",
471 | "\n",
472 | "# Build simple string values for response_content, activity_content, and references_content\n",
473 | "\n",
474 | "# Responses -> Concatenate text/value fields from all response contents\n",
475 | "response_parts = []\n",
476 | "for resp in result.response:\n",
477 | " for content in resp.content:\n",
478 | " response_parts.append(content.text)\n",
479 | "response_content = \"\\n\\n\".join(response_parts) if response_parts else \"No response found on 'result'\"\n",
480 | "\n",
481 | "response_contents.append(response_content)\n",
482 | "\n",
483 | "# Print the three string values\n",
484 | "print(\"response_content:\\n\", response_content, \"\\n\")"
485 | ]
486 | },
487 | {
488 | "cell_type": "code",
489 | "execution_count": null,
490 | "id": "7355941b",
491 | "metadata": {},
492 | "outputs": [],
493 | "source": [
494 | "messages.append({\n",
495 | " \"role\": \"assistant\",\n",
496 | " \"content\": response_content\n",
497 | "})"
498 | ]
499 | },
500 | {
501 | "cell_type": "code",
502 | "execution_count": null,
503 | "id": "4cef4fd3",
504 | "metadata": {},
505 | "outputs": [],
506 | "source": [
507 | "# Activity -> JSON string of activity as list of dicts\n",
508 | "if result.activity:\n",
509 | " activity_content = json.dumps([a.as_dict() for a in result.activity], indent=2)\n",
510 | "else:\n",
511 | " activity_content = \"No activity found on 'result'\"\n",
512 | " \n",
513 | "activity_contents.append(activity_content)\n",
514 | "print(\"activity_content:\\n\", activity_content, \"\\n\")"
515 | ]
516 | },
517 | {
518 | "cell_type": "code",
519 | "execution_count": null,
520 | "id": "172df234",
521 | "metadata": {},
522 | "outputs": [],
523 | "source": [
524 | "# References -> JSON string of references as list of dicts\n",
525 | "if result.references:\n",
526 | " references_content = json.dumps([r.as_dict() for r in result.references], indent=2)\n",
527 | "else:\n",
528 | " references_content = \"No references found on 'result'\"\n",
529 | " \n",
530 | "references_contents.append(references_content)\n",
531 | "print(\"references_content:\\n\", references_content)"
532 | ]
533 | },
534 | {
535 | "cell_type": "markdown",
536 | "id": "75386ed1",
537 | "metadata": {},
538 | "source": [
539 | "## Continue the conversation\n",
540 | "\n",
541 | "This step continues the conversation with your knowledge base, building upon the previous messages and queries to retrieve relevant information from your knowledge source."
542 | ]
543 | },
544 | {
545 | "cell_type": "code",
546 | "execution_count": null,
547 | "id": "da260539",
548 | "metadata": {},
549 | "outputs": [],
550 | "source": [
551 | "query_2 = \"How do I find lava at night?\"\n",
552 | "messages.append({\n",
553 | " \"role\": \"user\",\n",
554 | " \"content\": query_2\n",
555 | "})\n",
556 | "\n",
557 | "req = KnowledgeBaseRetrievalRequest(\n",
558 | " messages=[\n",
559 | " KnowledgeBaseMessage(\n",
560 | " role=m[\"role\"],\n",
561 | " content=[KnowledgeBaseMessageTextContent(text=m[\"content\"])]\n",
562 | " ) for m in messages if m[\"role\"] != \"system\"\n",
563 | " ],\n",
564 | " knowledge_source_params=[\n",
565 | " SearchIndexKnowledgeSourceParams(\n",
566 | " knowledge_source_name=knowledge_source_name,\n",
567 | " include_references=True,\n",
568 | " include_reference_source_data=True,\n",
569 | " always_query_source=True\n",
570 | " )\n",
571 | " ],\n",
572 | " include_activity=True,\n",
573 | " retrieval_reasoning_effort=KnowledgeRetrievalLowReasoningEffort\n",
574 | ")\n",
575 | "\n",
576 | "result = agent_client.retrieve(retrieval_request=req)\n",
577 | "print(f\"Retrieved content from '{knowledge_base_name}' successfully.\")"
578 | ]
579 | },
580 | {
581 | "cell_type": "markdown",
582 | "id": "fd1cba0c",
583 | "metadata": {},
584 | "source": [
585 | "### Review the new retrieval response, activity, and results"
586 | ]
587 | },
588 | {
589 | "cell_type": "code",
590 | "execution_count": null,
591 | "id": "35a1bfcb",
592 | "metadata": {},
593 | "outputs": [],
594 | "source": [
595 | "import json\n",
596 | "\n",
597 | "# Build simple string values for response_content, activity_content, and references_content\n",
598 | "\n",
599 | "# Responses -> Concatenate text/value fields from all response contents\n",
600 | "response_parts = []\n",
601 | "for resp in result.response:\n",
602 | " for content in resp.content:\n",
603 | " response_parts.append(content.text)\n",
604 | "response_content = \"\\n\\n\".join(response_parts) if response_parts else \"No response found on 'result'\"\n",
605 | "\n",
606 | "response_contents.append(response_content)\n",
607 | "\n",
608 | "# Print the three string values\n",
609 | "print(\"response_content:\\n\", response_content, \"\\n\")"
610 | ]
611 | },
612 | {
613 | "cell_type": "code",
614 | "execution_count": null,
615 | "id": "6f74c2c3",
616 | "metadata": {},
617 | "outputs": [],
618 | "source": [
619 | "# Activity -> JSON string of activity as list of dicts\n",
620 | "if result.activity:\n",
621 | " activity_content = json.dumps([a.as_dict() for a in result.activity], indent=2)\n",
622 | "else:\n",
623 | " activity_content = \"No activity found on 'result'\"\n",
624 | " \n",
625 | "activity_contents.append(activity_content)\n",
626 | "print(\"activity_content:\\n\", activity_content, \"\\n\")"
627 | ]
628 | },
629 | {
630 | "cell_type": "code",
631 | "execution_count": null,
632 | "id": "a6486c8a",
633 | "metadata": {},
634 | "outputs": [],
635 | "source": [
636 | "# References -> JSON string of references as list of dicts\n",
637 | "if result.references:\n",
638 | " references_content = json.dumps([r.as_dict() for r in result.references], indent=2)\n",
639 | "else:\n",
640 | " references_content = \"No references found on 'result'\"\n",
641 | " \n",
642 | "references_contents.append(references_content)\n",
643 | "print(\"references_content:\\n\", references_content)"
644 | ]
645 | },
646 | {
647 | "cell_type": "markdown",
648 | "id": "e98057c5",
649 | "metadata": {},
650 | "source": [
651 | "## Run an evaluation with Microsoft Foundry\n",
652 | "\n",
653 | "To evaluate the groundedness and relevance of the pipeline, run an evaluation with Foundry. For more detailed guidance, see [Evaluate your generative AI application locally with the Azure AI Evaluation SDK (preview)](https://learn.microsoft.com/azure/ai-foundry/how-to/develop/evaluate-sdk)."
654 | ]
655 | },
656 | {
657 | "cell_type": "markdown",
658 | "id": "d88117b3",
659 | "metadata": {},
660 | "source": [
661 | "### Prerequisites\n",
662 | "\n",
663 | "+ The same [Foundry project](https://learn.microsoft.com/azure/ai-foundry/how-to/create-projects) you used for agentic retrieval. Set `FOUNDRY_ENDPOINT` to your project endpoint in the `.env` file. You can find this endpoint in the [Foundry portal](https://ai.azure.com/).\n",
664 | "\n",
665 | "+ The `azure-ai-evaluation` package, which is already installed as part of the `requirements.txt` file."
666 | ]
667 | },
668 | {
669 | "cell_type": "code",
670 | "execution_count": null,
671 | "id": "80001db4",
672 | "metadata": {},
673 | "outputs": [],
674 | "source": [
675 | "# Load connections\n",
676 | "from dotenv import load_dotenv\n",
677 | "import os\n",
678 | "\n",
679 | "load_dotenv(override=True)\n",
680 | "\n",
681 | "foundry_endpoint = os.environ[\"FOUNDRY_ENDPOINT\"]\n",
682 | "aoai_api_version = os.environ[\"AOAI_API_VERSION\"]\n",
683 | "\n",
684 | "# Run the evaluation\n",
685 | "from azure.ai.evaluation import AzureOpenAIModelConfiguration, GroundednessEvaluator, RelevanceEvaluator, evaluate\n",
686 | "import json\n",
687 | "\n",
688 | "evaluation_data = []\n",
689 | "print(\"Preparing evaluation data...\")\n",
690 | "for q, r, g in zip([query_1, query_2], references_contents, response_contents):\n",
691 | " evaluation_data.append({\n",
692 | " \"query\": q,\n",
693 | " \"response\": g,\n",
694 | " \"context\": r,\n",
695 | " })\n",
696 | "\n",
697 | "filename = \"evaluation_data.jsonl\"\n",
698 | "\n",
699 | "with open(filename, \"w\") as f:\n",
700 | " for item in evaluation_data:\n",
701 | " f.write(json.dumps(item) + \"\\n\")\n",
702 | "\n",
703 | "model_config = AzureOpenAIModelConfiguration(\n",
704 | " azure_endpoint=aoai_endpoint,\n",
705 | " api_version=aoai_api_version,\n",
706 | " azure_deployment=aoai_gpt_model\n",
707 | ")\n",
708 | "\n",
709 | "# RAG triad metrics\n",
710 | "groundedness = GroundednessEvaluator(model_config=model_config)\n",
711 | "relevance = RelevanceEvaluator(model_config=model_config)\n",
712 | "\n",
713 | "print(\"Starting evaluation...\")\n",
714 | "result = evaluate(\n",
715 | " data=filename,\n",
716 | " evaluators={\n",
717 | " \"groundedness\": groundedness,\n",
718 | " \"relevance\": relevance,\n",
719 | " },\n",
720 | " azure_ai_project=foundry_endpoint,\n",
721 | ")\n",
722 | "\n",
723 | "print(\"Evaluation complete.\")\n",
724 | "studio_url = result.get(\"studio_url\")\n",
725 | "print(\"For more information, go to the Foundry portal.\") if studio_url else None"
726 | ]
727 | },
728 | {
729 | "cell_type": "markdown",
730 | "id": "75777ed2",
731 | "metadata": {},
732 | "source": [
733 | "## Clean up objects and resources\n",
734 | "\n",
735 | "If you no longer need Azure AI Search or Microsoft Foundry, delete the resources from your Azure subscription. You can also start over by deleting individual objects."
736 | ]
737 | },
738 | {
739 | "cell_type": "markdown",
740 | "id": "a14f6fe6",
741 | "metadata": {},
742 | "source": [
743 | "### Delete the knowledge base"
744 | ]
745 | },
746 | {
747 | "cell_type": "code",
748 | "execution_count": null,
749 | "id": "67b6a475",
750 | "metadata": {},
751 | "outputs": [],
752 | "source": [
753 | "from azure.search.documents.indexes import SearchIndexClient\n",
754 | "\n",
755 | "index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)\n",
756 | "index_client.delete_knowledge_base(knowledge_base_name)\n",
757 | "print(f\"Knowledge base '{knowledge_base_name}' deleted successfully.\")"
758 | ]
759 | },
760 | {
761 | "cell_type": "markdown",
762 | "id": "ecdfb289",
763 | "metadata": {},
764 | "source": [
765 | "### Delete the knowledge source"
766 | ]
767 | },
768 | {
769 | "cell_type": "code",
770 | "execution_count": null,
771 | "id": "3b08f5e1",
772 | "metadata": {},
773 | "outputs": [],
774 | "source": [
775 | "from azure.search.documents.indexes import SearchIndexClient\n",
776 | "\n",
777 | "index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)\n",
778 | "index_client.delete_knowledge_source(knowledge_source=knowledge_source_name)\n",
779 | "print(f\"Knowledge source '{knowledge_source_name}' deleted successfully.\")"
780 | ]
781 | },
782 | {
783 | "cell_type": "markdown",
784 | "id": "a35bfbb1",
785 | "metadata": {},
786 | "source": [
787 | "### Delete the search index"
788 | ]
789 | },
790 | {
791 | "cell_type": "code",
792 | "execution_count": null,
793 | "id": "25f5e6a4",
794 | "metadata": {},
795 | "outputs": [],
796 | "source": [
797 | "from azure.search.documents.indexes import SearchIndexClient\n",
798 | "\n",
799 | "index_client = SearchIndexClient(endpoint=search_endpoint, credential=credential)\n",
800 | "index_client.delete_index(index_name)\n",
801 | "print(f\"Index '{index_name}' deleted successfully.\")"
802 | ]
803 | }
804 | ],
805 | "metadata": {
806 | "kernelspec": {
807 | "display_name": ".venv",
808 | "language": "python",
809 | "name": "python3"
810 | },
811 | "language_info": {
812 | "codemirror_mode": {
813 | "name": "ipython",
814 | "version": 3
815 | },
816 | "file_extension": ".py",
817 | "mimetype": "text/x-python",
818 | "name": "python",
819 | "nbconvert_exporter": "python",
820 | "pygments_lexer": "ipython3",
821 | "version": "3.13.9"
822 | }
823 | },
824 | "nbformat": 4,
825 | "nbformat_minor": 5
826 | }
827 |
--------------------------------------------------------------------------------
/agentic-retrieval-pipeline-example/agent-example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "5e3d4685",
6 | "metadata": {},
7 | "source": [
8 | "# Tutorial: Agentic retrieval using Azure AI Search and Foundry Agent Service\n",
9 | "\n",
10 | "Use this notebook to create an agentic retrieval pipeline built on Azure AI Search and Foundry Agent Service.\n",
11 | "\n",
12 | "In this notebook, you:\n",
13 | "\n",
14 | "1. Create and load an `earth-at-night` search index.\n",
15 | "\n",
16 | "1. Create an `earth-knowledge-source` that targets your index.\n",
17 | "\n",
18 | "1. Create an `earth-knowledge-base` that targets your knowledge source and an LLM for intelligent query planning.\n",
19 | "\n",
20 | "1. Use the knowledge base to fetch, rank, and synthesize relevant information from the index.\n",
21 | "\n",
22 | "1. Create an agent in Foundry Agent Service to determine when queries are needed.\n",
23 | "\n",
24 | "1. Create an MCP tool to orchestrate all requests.\n",
25 | "\n",
26 | "1. Start a chat with the agent.\n",
27 | "\n",
28 | "This notebook is referenced in [Tutorial: Build an end-to-end agentic retrieval solution using Azure AI Search](https://learn.microsoft.com/azure/search/search-agentic-retrieval-how-to-pipeline).\n",
29 | "\n",
30 | "Unlike [Quickstart: Use agentic retrieval in Azure AI Search](https://learn.microsoft.com/azure/search/search-get-started-agentic-retrieval), this quickstart uses Foundry Agent Service to determine whether to retrieve data from the knowledge source and uses an MCP tool for orchestration."
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "id": "ecd68a6e",
36 | "metadata": {},
37 | "source": [
38 | "## Prerequisites\n",
39 | "\n",
40 | "+ An Azure AI Search service in any [region that provides agentic retrieval](https://learn.microsoft.com/azure/search/search-region-support).\n",
41 | "\n",
42 | "+ A [Microsoft Foundry project](https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/create-projects?view=foundry-classic&tabs=foundry) and resource. When you create a project, the resource is automatically created.\n",
43 | "\n",
44 | "+ A [supported LLM](https://learn.microsoft.com/azure/search/search-agentic-retrieval-how-to-create#supported-models) deployed to your project. This notebook uses `gpt-5-mini`. We recommend a minimum token capacity of 100,000. You can find the LLM's capacity and rate limit in the Foundry portal. If you want vectorization at query time, you should also deploy a text embedding model.\n",
45 | "\n",
46 | "+ [Visual Studio Code](https://code.visualstudio.com/download) with the [Python extension](https://marketplace.visualstudio.com/items?itemName=ms-python.python) and [Jupyter package](https://pypi.org/project/jupyter/)."
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "id": "1a2379a9",
52 | "metadata": {},
53 | "source": [
54 | "## Configure access\n",
55 | "\n",
56 | "This notebook assumes that you're using Microsoft Entra ID for authentication and role assignments for authorization.\n",
57 | "\n",
58 | "To configure role-based access:\n",
59 | "\n",
60 | "1. Sign in to the [Azure portal](https://portal.azure.com).\n",
61 | "\n",
62 | "1. On your Azure AI Search service:\n",
63 | "\n",
64 | " 1. [Enable role-based access](https://learn.microsoft.com/azure/search/search-security-enable-roles).\n",
65 | " \n",
66 | " 1. [Create a system-assigned managed identity](https://learn.microsoft.com/azure/search/search-howto-managed-identities-data-sources#create-a-system-managed-identity).\n",
67 | " \n",
68 | " 1. [Assign the following roles](https://learn.microsoft.com/azure/search/search-security-rbac#how-to-assign-roles-in-the-azure-portal) to yourself.\n",
69 | " \n",
70 | " + **Search Service Contributor**\n",
71 | " \n",
72 | " + **Search Index Data Contributor**\n",
73 | " \n",
74 | " + **Search Index Data Reader**\n",
75 | "\n",
76 | " 1. Assign **Search Index Data Reader** to your Microsoft Foundry project.\n",
77 | "\n",
78 | "1. On your Microsoft Foundry resource:\n",
79 | "\n",
80 | " 1. Assign the following roles to yourself.\n",
81 | "\n",
82 | " + **Azure AI User**\n",
83 | "\n",
84 | " + **Azure AI Project Manager**\n",
85 | "\n",
86 | " 1. Assign **Cognitive Services User** to the managed identity of your search service."
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "id": "1f40a871",
92 | "metadata": {},
93 | "source": [
94 | "## Set up connections\n",
95 | "\n",
96 | "Save the `sample.env` file as `.env` and then modify the environment variables to use your Azure endpoints. You need endpoints for:\n",
97 | "\n",
98 | "+ Azure AI Search\n",
99 | "+ Azure OpenAI (for the models deployed to your project)\n",
100 | "+ Microsoft Foundry project\n",
101 | "\n",
102 | "You also need the resource ID of your project. You can find all of these values in the [Azure portal](https://portal.azure.com/)."
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "id": "679bc80a",
108 | "metadata": {},
109 | "source": [
110 | "## Load connections\n",
111 | "\n",
112 | "We recommend creating a virtual environment to run this sample code. In Visual Studio Code, open the control palette (ctrl-shift-p) to create an environment. This notebook was tested on Python 3.13.7.\n",
113 | "\n",
114 | "After your environment is created, load the environment variables to set up connections and object names."
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 1,
120 | "id": "e42b4a10",
121 | "metadata": {},
122 | "outputs": [],
123 | "source": [
124 | "import os\n",
125 | "\n",
126 | "from azure.identity import DefaultAzureCredential\n",
127 | "from azure.mgmt.core.tools import parse_resource_id\n",
128 | "from dotenv import load_dotenv\n",
129 | "\n",
130 | "load_dotenv(override=True) # take environment variables from .env.\n",
131 | "\n",
132 | "project_endpoint = os.environ[\"PROJECT_ENDPOINT\"]\n",
133 | "project_resource_id = os.environ[\"PROJECT_RESOURCE_ID\"]\n",
134 | "project_connection_name = os.getenv(\"PROJECT_CONNECTION_NAME\", \"earthknowledgeconnection\")\n",
135 | "agent_model = os.getenv(\"AGENT_MODEL\", \"gpt-4.1-mini\")\n",
136 | "agent_name = os.getenv(\"AGENT_NAME\", \"earth-knowledge-agent\")\n",
137 | "endpoint = os.environ[\"AZURE_SEARCH_ENDPOINT\"]\n",
138 | "credential = DefaultAzureCredential()\n",
139 | "knowledge_source_name = os.getenv(\"AZURE_SEARCH_KNOWLEDGE_SOURCE_NAME\", \"earth-knowledge-source\")\n",
140 | "index_name = os.getenv(\"AZURE_SEARCH_INDEX\", \"earth-at-night\")\n",
141 | "azure_openai_endpoint = os.environ[\"AZURE_OPENAI_ENDPOINT\"]\n",
142 | "azure_openai_gpt_deployment = os.getenv(\"AZURE_OPENAI_GPT_DEPLOYMENT\", \"gpt-4.1-mini\")\n",
143 | "azure_openai_gpt_model = os.getenv(\"AZURE_OPENAI_GPT_MODEL\", \"gpt-4.1-mini\")\n",
144 | "azure_openai_embedding_deployment = os.getenv(\"AZURE_OPENAI_EMBEDDING_DEPLOYMENT\", \"text-embedding-3-large\")\n",
145 | "azure_openai_embedding_model = os.getenv(\"AZURE_OPENAI_EMBEDDING_MODEL\", \"text-embedding-3-large\")\n",
146 | "base_name = os.getenv(\"AZURE_SEARCH_AGENT_NAME\", \"earth-knowledge-base\")\n",
147 | "\n",
148 | "# Parse the resource ID to extract subscription and other components\n",
149 | "parsed_resource_id = parse_resource_id(project_resource_id)\n",
150 | "subscription_id = parsed_resource_id['subscription']\n",
151 | "resource_group = parsed_resource_id['resource_group']\n",
152 | "account_name = parsed_resource_id['name']\n",
153 | "project_name = parsed_resource_id['child_name_1']"
154 | ]
155 | },
156 | {
157 | "cell_type": "markdown",
158 | "id": "ea2ecdce",
159 | "metadata": {},
160 | "source": [
161 | "## Create a search index\n",
162 | "\n",
163 | "This steps create a search index that contains plain text and vector content. You can use an existing index, but it must meet the [criteria for agentic retrieval workloads](https://learn.microsoft.com/azure/search/search-agentic-retrieval-how-to-index). The primary schema requirement is a semantic configuration with a `default_configuration_name`."
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 2,
169 | "id": "91fd6810",
170 | "metadata": {},
171 | "outputs": [
172 | {
173 | "name": "stdout",
174 | "output_type": "stream",
175 | "text": [
176 | "Index 'earth-at-night' created or updated successfully\n"
177 | ]
178 | }
179 | ],
180 | "source": [
181 | "from azure.search.documents.indexes import SearchIndexClient\n",
182 | "from azure.search.documents.indexes.models import (\n",
183 | " AzureOpenAIVectorizer, AzureOpenAIVectorizerParameters,\n",
184 | " HnswAlgorithmConfiguration, SearchField, SearchIndex,\n",
185 | " SemanticConfiguration, SemanticField, SemanticPrioritizedFields,\n",
186 | " SemanticSearch, VectorSearch, VectorSearchProfile\n",
187 | ")\n",
188 | "\n",
189 | "index = SearchIndex(\n",
190 | " name=index_name,\n",
191 | " fields=[\n",
192 | " SearchField(name=\"id\", type=\"Edm.String\", key=True, filterable=True, sortable=True, facetable=True),\n",
193 | " SearchField(name=\"page_chunk\", type=\"Edm.String\", filterable=False, sortable=False, facetable=False),\n",
194 | " SearchField(name=\"page_embedding_text_3_large\", type=\"Collection(Edm.Single)\", stored=False, vector_search_dimensions=3072, vector_search_profile_name=\"hnsw_text_3_large\"),\n",
195 | " SearchField(name=\"page_number\", type=\"Edm.Int32\", filterable=True, sortable=True, facetable=True)\n",
196 | " ],\n",
197 | " vector_search=VectorSearch(\n",
198 | " profiles=[VectorSearchProfile(name=\"hnsw_text_3_large\", algorithm_configuration_name=\"alg\", vectorizer_name=\"azure_openai_text_3_large\")],\n",
199 | " algorithms=[HnswAlgorithmConfiguration(name=\"alg\")],\n",
200 | " vectorizers=[\n",
201 | " AzureOpenAIVectorizer(\n",
202 | " vectorizer_name=\"azure_openai_text_3_large\",\n",
203 | " parameters=AzureOpenAIVectorizerParameters(\n",
204 | " resource_url=azure_openai_endpoint,\n",
205 | " deployment_name=azure_openai_embedding_deployment,\n",
206 | " model_name=azure_openai_embedding_model\n",
207 | " )\n",
208 | " )\n",
209 | " ]\n",
210 | " ),\n",
211 | " semantic_search=SemanticSearch(\n",
212 | " default_configuration_name=\"semantic_config\",\n",
213 | " configurations=[\n",
214 | " SemanticConfiguration(\n",
215 | " name=\"semantic_config\",\n",
216 | " prioritized_fields=SemanticPrioritizedFields(\n",
217 | " content_fields=[\n",
218 | " SemanticField(field_name=\"page_chunk\")\n",
219 | " ]\n",
220 | " )\n",
221 | " )\n",
222 | " ]\n",
223 | " )\n",
224 | ")\n",
225 | "\n",
226 | "index_client = SearchIndexClient(endpoint=endpoint, credential=credential)\n",
227 | "index_client.create_or_update_index(index)\n",
228 | "print(f\"Index '{index_name}' created or updated successfully\")"
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "id": "376b9785",
234 | "metadata": {},
235 | "source": [
236 | "## Upload sample documents\n",
237 | "\n",
238 | "This notebook uses data from NASA's Earth at Night e-book. The data is retrieved from the [azure-search-sample-data](https://github.com/Azure-Samples/azure-search-sample-data) repository on GitHub and passed to the search client for indexing."
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": 3,
244 | "id": "f98f31e7",
245 | "metadata": {},
246 | "outputs": [
247 | {
248 | "name": "stdout",
249 | "output_type": "stream",
250 | "text": [
251 | "Documents uploaded to index 'earth-at-night'\n"
252 | ]
253 | }
254 | ],
255 | "source": [
256 | "import requests\n",
257 | "from azure.search.documents import SearchIndexingBufferedSender\n",
258 | "\n",
259 | "url = \"https://raw.githubusercontent.com/Azure-Samples/azure-search-sample-data/refs/heads/main/nasa-e-book/earth-at-night-json/documents.json\"\n",
260 | "documents = requests.get(url).json()\n",
261 | "\n",
262 | "with SearchIndexingBufferedSender(endpoint=endpoint, index_name=index_name, credential=credential) as client:\n",
263 | " client.upload_documents(documents=documents)\n",
264 | "\n",
265 | "print(f\"Documents uploaded to index '{index_name}'\")"
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "id": "38c2d9d5",
271 | "metadata": {},
272 | "source": [
273 | "## Create a knowledge source\n",
274 | "\n",
275 | "This step creates a knowledge source that targets the index you previously created. In the next step, you create a knowledge base that uses the knowledge source to orchestrate agentic retrieval.\n"
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "execution_count": 4,
281 | "id": "0cf01881",
282 | "metadata": {},
283 | "outputs": [
284 | {
285 | "name": "stdout",
286 | "output_type": "stream",
287 | "text": [
288 | "Knowledge source 'earth-knowledge-source' created or updated successfully.\n"
289 | ]
290 | }
291 | ],
292 | "source": [
293 | "from azure.search.documents.indexes import SearchIndexClient\n",
294 | "from azure.search.documents.indexes.models import (\n",
295 | " SearchIndexFieldReference, SearchIndexKnowledgeSource,\n",
296 | " SearchIndexKnowledgeSourceParameters\n",
297 | ")\n",
298 | "\n",
299 | "ks = SearchIndexKnowledgeSource(\n",
300 | " name=knowledge_source_name,\n",
301 | " description=\"Knowledge source for Earth at night data\",\n",
302 | " search_index_parameters=SearchIndexKnowledgeSourceParameters(\n",
303 | " search_index_name=index_name,\n",
304 | " source_data_fields=[SearchIndexFieldReference(name=\"id\"), SearchIndexFieldReference(name=\"page_number\")]\n",
305 | " ),\n",
306 | ")\n",
307 | "\n",
308 | "index_client = SearchIndexClient(endpoint=endpoint, credential=credential)\n",
309 | "index_client.create_or_update_knowledge_source(knowledge_source=ks)\n",
310 | "print(f\"Knowledge source '{knowledge_source_name}' created or updated successfully.\")"
311 | ]
312 | },
313 | {
314 | "cell_type": "markdown",
315 | "id": "e3d0081e",
316 | "metadata": {},
317 | "source": [
318 | "## Create a knowledge base\n",
319 | "\n",
320 | "This step creates a knowledge base, which acts as a wrapper for your knowledge source and LLM deployment.\n",
321 | "\n",
322 | "`EXTRACTIVE_DATA` is the default modality and returns content from your knowledge sources without answer generation. This is recommended for interaction with Foundry Agent Service."
323 | ]
324 | },
325 | {
326 | "cell_type": "code",
327 | "execution_count": 5,
328 | "id": "fbe31e32",
329 | "metadata": {},
330 | "outputs": [
331 | {
332 | "name": "stdout",
333 | "output_type": "stream",
334 | "text": [
335 | "Knowledge base 'earth-knowledge-base' created or updated successfully\n"
336 | ]
337 | }
338 | ],
339 | "source": [
340 | "from azure.search.documents.indexes import SearchIndexClient\n",
341 | "from azure.search.documents.indexes.models import (\n",
342 | " KnowledgeBase, KnowledgeRetrievalMinimalReasoningEffort,\n",
343 | " KnowledgeRetrievalOutputMode, KnowledgeSourceReference\n",
344 | ")\n",
345 | "\n",
346 | "knowledge_base = KnowledgeBase(\n",
347 | " name=base_name,\n",
348 | " knowledge_sources=[\n",
349 | " KnowledgeSourceReference(\n",
350 | " name=knowledge_source_name\n",
351 | " )\n",
352 | " ],\n",
353 | " output_mode=KnowledgeRetrievalOutputMode.EXTRACTIVE_DATA,\n",
354 | " retrieval_reasoning_effort=KnowledgeRetrievalMinimalReasoningEffort()\n",
355 | ")\n",
356 | "\n",
357 | "\n",
358 | "index_client = SearchIndexClient(endpoint=endpoint, credential=credential)\n",
359 | "index_client.create_or_update_knowledge_base(knowledge_base=knowledge_base)\n",
360 | "print(f\"Knowledge base '{base_name}' created or updated successfully\")\n",
361 | "\n",
362 | "mcp_endpoint = f\"{endpoint}/knowledgebases/{base_name}/mcp?api-version=2025-11-01-Preview\""
363 | ]
364 | },
365 | {
366 | "cell_type": "markdown",
367 | "id": "ff845de0",
368 | "metadata": {},
369 | "source": [
370 | "## Create an agent\n",
371 | "\n",
372 | "In Foundry Agent Service, an agent is an smart micro-service that can use an LLM with tools. The purpose of this agent is to use retrieval tools from the knowledge base to do RAG.\n",
373 | "\n",
374 | "Your Foundry project may have no agents at this stage, but if you've already run this notebook, you will see the agent listed here."
375 | ]
376 | },
377 | {
378 | "cell_type": "code",
379 | "execution_count": null,
380 | "id": "6eb0ebd3",
381 | "metadata": {},
382 | "outputs": [],
383 | "source": [
384 | "from azure.ai.projects import AIProjectClient\n",
385 | "\n",
386 | "project_client = AIProjectClient(endpoint=project_endpoint, credential=credential)\n",
387 | "\n",
388 | "list(project_client.agents.list())"
389 | ]
390 | },
391 | {
392 | "cell_type": "markdown",
393 | "id": "61de7601",
394 | "metadata": {},
395 | "source": [
396 | "## Create an MCP tool connection\n",
397 | "\n",
398 | "In Microsoft Foundry, you must create a connection to authenticate to your MCP tool."
399 | ]
400 | },
401 | {
402 | "cell_type": "code",
403 | "execution_count": null,
404 | "id": "80c209b9",
405 | "metadata": {},
406 | "outputs": [
407 | {
408 | "name": "stdout",
409 | "output_type": "stream",
410 | "text": [
411 | "Connection 'earthknowledgeconnection' created or updated successfully.\n"
412 | ]
413 | }
414 | ],
415 | "source": [
416 | "import requests\n",
417 | "from azure.identity import get_bearer_token_provider\n",
418 | "\n",
419 | "# Requires the Foundry project to have Search Index Data Reader role on the search service\n",
420 | "bearer_token_provider = get_bearer_token_provider(credential, \"https://management.azure.com/.default\")\n",
421 | "headers = {\n",
422 | " \"Authorization\": f\"Bearer {bearer_token_provider()}\",\n",
423 | "}\n",
424 | "response = requests.put(\n",
425 | " f\"https://management.azure.com{project_resource_id}/connections/{project_connection_name}?api-version=2025-10-01-preview\",\n",
426 | " headers=headers,\n",
427 | " json={\n",
428 | " \"name\": project_connection_name,\n",
429 | " \"type\": \"Microsoft.MachineLearningServices/workspaces/connections\",\n",
430 | " \"properties\": {\n",
431 | " \"authType\": \"ProjectManagedIdentity\",\n",
432 | " \"category\": \"RemoteTool\",\n",
433 | " \"target\": mcp_endpoint,\n",
434 | " \"isSharedToAll\": True,\n",
435 | " \"audience\": \"https://search.azure.com/\",\n",
436 | " \"metadata\": { \"ApiType\": \"Azure\" }\n",
437 | " }\n",
438 | " }\n",
439 | ")\n",
440 | "response.raise_for_status()\n",
441 | "print(f\"Connection '{project_connection_name}' created or updated successfully.\")"
442 | ]
443 | },
444 | {
445 | "cell_type": "markdown",
446 | "id": "6631324e",
447 | "metadata": {},
448 | "source": [
449 | "## Optimize agent instructions for knowledge retrieval\n",
450 | "\n",
451 | "To maximize the accuracy of knowledge base invocations and ensure proper citation formatting, use optimized agent instructions. Based on our experiments, we recommend the following instruction template as a starting point:\n",
452 | "\n",
453 | "```\n",
454 | "You are a helpful assistant that must use the knowledge base to answer all the questions from user. You must never answer from your own knowledge under any circumstances.\n",
455 | "Every answer must always provide annotations for using the MCP knowledge base tool and render them as: `【message_idx:search_idx†source_name】`\n",
456 | "If you cannot find the answer in the provided knowledge base you must respond with \"I don't know\".\n",
457 | "```\n",
458 | "\n",
459 | "The specified citation format ensures the agent includes provenance information in responses, making it clear which knowledge sources were used."
460 | ]
461 | },
462 | {
463 | "cell_type": "code",
464 | "execution_count": null,
465 | "id": "aa363122",
466 | "metadata": {},
467 | "outputs": [
468 | {
469 | "name": "stdout",
470 | "output_type": "stream",
471 | "text": [
472 | "AI agent 'earth-knowledge-agent' created or updated successfully\n"
473 | ]
474 | }
475 | ],
476 | "source": [
477 | "from azure.ai.projects.models import PromptAgentDefinition, MCPTool\n",
478 | "\n",
479 | "instructions = \"\"\"\n",
480 | "You are a helpful assistant that must use the knowledge base to answer all the questions from user. You must never answer from your own knowledge under any circumstances.\n",
481 | "Every answer must always provide annotations for using the MCP knowledge base tool and render them as: `【message_idx:search_idx†source_name】`\n",
482 | "If you cannot find the answer in the provided knowledge base you must respond with \"I don't know\".\n",
483 | "\"\"\"\n",
484 | "mcp_kb_tool = MCPTool(\n",
485 | " server_label=\"knowledge-base\",\n",
486 | " server_url=mcp_endpoint,\n",
487 | " require_approval=\"never\",\n",
488 | " allowed_tools=[\"knowledge_base_retrieve\"],\n",
489 | " project_connection_id=project_connection_name\n",
490 | ")\n",
491 | "agent = project_client.agents.create_version(\n",
492 | " agent_name=agent_name,\n",
493 | " definition=PromptAgentDefinition(\n",
494 | " model=agent_model,\n",
495 | " instructions=instructions,\n",
496 | " tools=[mcp_kb_tool]\n",
497 | " )\n",
498 | ")\n",
499 | "\n",
500 | "\n",
501 | "print(f\"AI agent '{agent_name}' created or updated successfully\")"
502 | ]
503 | },
504 | {
505 | "cell_type": "markdown",
506 | "id": "16a2d5ed",
507 | "metadata": {},
508 | "source": [
509 | "## Start a chat with the agent\n",
510 | "\n",
511 | "Set the `tool_choice` parameter to \"required\" to ensure the knowledge base tool is consistently used"
512 | ]
513 | },
514 | {
515 | "cell_type": "code",
516 | "execution_count": 9,
517 | "id": "e9492c4a",
518 | "metadata": {},
519 | "outputs": [
520 | {
521 | "name": "stdout",
522 | "output_type": "stream",
523 | "text": [
524 | "Response: Here are evidence-based explanations to your questions:\n",
525 | "\n",
526 | "---\n",
527 | "\n",
528 | "**1. Why do suburban belts display larger December brightening than urban cores, even though absolute light levels are higher downtown?**\n",
529 | "\n",
530 | "- Suburban belts show a *larger percentage increase* in night brightness during December compared to urban cores, largely because suburban residential areas feature more single-family homes and larger yards, which are typically decorated with holiday lights. These areas start from a lower baseline (less bright overall at night compared to dense urban centers), so the relative change (brightening) is much more noticeable.\n",
531 | "\n",
532 | "- In contrast, the downtown core is already very bright at night due to dense commercial lighting and streetlights. While it also sees a December increase (often 20–30% brighter), the *absolute* change is less striking because it begins at a much higher base of illumination.\n",
533 | "\n",
534 | "- This pattern is observed across U.S. cities, with the phenomenon driven by widespread cultural practices and the suburban landscape’s suitability for holiday lighting displays. The effect is visible in satellite data and was quantified at 20–50% brighter in December, especially in suburbs and city outskirts.\n",
535 | "\n",
536 | "---\n",
537 | "\n",
538 | "**2. Why is the Phoenix nighttime street grid so sharply visible from space, whereas large stretches of the interstate between midwestern cities remain comparatively dim?**\n",
539 | "\n",
540 | "- Phoenix’s sharply visible nighttime street grid from space is a result of its urban layout: the city (like many western U.S. cities) was developed using a regular grid system, with extensive and uniform street lighting and strong urban sprawl. The grid pattern, and the dense network of intersecting surface streets, is brightly illuminated, particularly at intersections, commercial areas, and major thoroughfares.\n",
541 | "\n",
542 | "- The interstate highways between midwestern cities, though significant in length and crucial to national infrastructure, traverse sparsely populated rural areas. These stretches typically have very little artificial lighting (due to low traffic volumes at night and cost considerations), making them much less visible in nighttime satellite imagery. Only nodes (cities and towns) along the route show as bright \"pearls\" in the darkness, while the \"strings\" (highways) connecting them remain faint or invisible.\n",
543 | "\n",
544 | "- In summary:\n",
545 | " - Urban areas like Phoenix stand out with strong, connected patterns of light due to dense development and extensive lighting.\n",
546 | " - Rural interstates are sparsely lit, and only their endpoints—cities and large towns—generate notable light visible from space.\n",
547 | "\n",
548 | "---\n",
549 | "\n",
550 | "**References**:\n",
551 | "- [Holiday Lights increase most dramatically in suburbs, not downtowns: earth_at_night_508_page_176_verbalized, page 160](4:5)\n",
552 | "- [Lighting paths and urban grids are visible from space, while rural highways remain dim: earth_at_night_508_page_124_verbalized, page 108](4:3)\n",
553 | "- [Phoenix’s grid and surrounding urban structure: earth_at_night_508_page_104_verbalized, page 88](4:1)\n"
554 | ]
555 | }
556 | ],
557 | "source": [
558 | "# Get the OpenAI client for responses and conversations\n",
559 | "openai_client = project_client.get_openai_client()\n",
560 | "\n",
561 | "conversation = openai_client.conversations.create()\n",
562 | "\n",
563 | "# Send initial request that will trigger the MCP tool\n",
564 | "response = openai_client.responses.create(\n",
565 | " conversation=conversation.id,\n",
566 | " tool_choice=\"required\",\n",
567 | " input=\"\"\"\n",
568 | " Why do suburban belts display larger December brightening than urban cores even though absolute light levels are higher downtown?\n",
569 | " Why is the Phoenix nighttime street grid is so sharply visible from space, whereas large stretches of the interstate between midwestern cities remain comparatively dim?\n",
570 | " \"\"\",\n",
571 | " extra_body={\"agent\": {\"name\": agent.name, \"type\": \"agent_reference\"}},\n",
572 | ")\n",
573 | "\n",
574 | "print(f\"Response: {response.output_text}\")\n"
575 | ]
576 | },
577 | {
578 | "cell_type": "markdown",
579 | "id": "39c02785",
580 | "metadata": {},
581 | "source": [
582 | "## Inspect the response\n",
583 | "\n",
584 | "The underlying response from the agent contains metadata about what queries the agent sent to the knowledge base and what citations were found"
585 | ]
586 | },
587 | {
588 | "cell_type": "code",
589 | "execution_count": null,
590 | "id": "b845d2ae",
591 | "metadata": {},
592 | "outputs": [],
593 | "source": [
594 | "response.to_dict()"
595 | ]
596 | },
597 | {
598 | "cell_type": "markdown",
599 | "id": "3b328340",
600 | "metadata": {},
601 | "source": [
602 | "## (Optional) Add remote SharePoint as a knowledge source\n",
603 | "\n",
604 | "Adding a remote SharePoint knowledge source requires an additional `x-ms-query-source-authorization` header in your MCP connection."
605 | ]
606 | },
607 | {
608 | "cell_type": "code",
609 | "execution_count": 11,
610 | "id": "3711252f",
611 | "metadata": {},
612 | "outputs": [
613 | {
614 | "name": "stdout",
615 | "output_type": "stream",
616 | "text": [
617 | "Knowledge source 'remote-sharepoint' created or updated successfully.\n",
618 | "Knowledge base 'earth-knowledge-base' updated with new knowledge source successfully\n",
619 | "AI agent 'earth-knowledge-agent' created or updated successfully\n"
620 | ]
621 | }
622 | ],
623 | "source": [
624 | "from azure.search.documents.indexes.models import RemoteSharePointKnowledgeSource, KnowledgeSourceReference\n",
625 | "from azure.search.documents.indexes import SearchIndexClient\n",
626 | "from azure.identity import get_bearer_token_provider\n",
627 | "\n",
628 | "remote_sp_ks = RemoteSharePointKnowledgeSource(\n",
629 | " name=\"remote-sharepoint\",\n",
630 | " description=\"SharePoint knowledge source\"\n",
631 | ")\n",
632 | "\n",
633 | "index_client = SearchIndexClient(endpoint=endpoint, credential=credential)\n",
634 | "index_client.create_or_update_knowledge_source(knowledge_source=remote_sp_ks)\n",
635 | "print(f\"Knowledge source '{remote_sp_ks.name}' created or updated successfully.\")\n",
636 | "\n",
637 | "knowledge_base.knowledge_sources = [\n",
638 | " KnowledgeSourceReference(name=remote_sp_ks.name), KnowledgeSourceReference(name=knowledge_source_name)\n",
639 | "]\n",
640 | "index_client.create_or_update_knowledge_base(knowledge_base=knowledge_base)\n",
641 | "print(f\"Knowledge base '{base_name}' updated with new knowledge source successfully\")\n",
642 | "\n",
643 | "mcp_kb_tool = MCPTool(\n",
644 | " server_label=\"knowledge-base\",\n",
645 | " server_url=mcp_endpoint,\n",
646 | " require_approval=\"never\",\n",
647 | " allowed_tools=[\"knowledge_base_retrieve\"],\n",
648 | " project_connection_id=project_connection_name,\n",
649 | " headers={\n",
650 | " \"x-ms-query-source-authorization\": get_bearer_token_provider(credential, \"https://search.azure.com/.default\")()\n",
651 | " }\n",
652 | ")\n",
653 | "agent = project_client.agents.create_version(\n",
654 | " agent_name=agent_name,\n",
655 | " definition=PromptAgentDefinition(\n",
656 | " model=agent_model,\n",
657 | " instructions=instructions,\n",
658 | " tools=[mcp_kb_tool]\n",
659 | " )\n",
660 | ")\n",
661 | "\n",
662 | "\n",
663 | "print(f\"AI agent '{agent_name}' created or updated successfully\")"
664 | ]
665 | },
666 | {
667 | "cell_type": "markdown",
668 | "id": "0926264d",
669 | "metadata": {},
670 | "source": [
671 | "## Clean up objects and resources\n",
672 | "\n",
673 | "If you no longer need Azure AI Search or Microsoft Foundry, delete the resources from your Azure subscription. You can also start over by deleting individual objects."
674 | ]
675 | },
676 | {
677 | "cell_type": "markdown",
678 | "id": "4395247f",
679 | "metadata": {},
680 | "source": [
681 | "### Delete the agent"
682 | ]
683 | },
684 | {
685 | "cell_type": "code",
686 | "execution_count": 12,
687 | "id": "409befbb",
688 | "metadata": {},
689 | "outputs": [
690 | {
691 | "name": "stdout",
692 | "output_type": "stream",
693 | "text": [
694 | "AI agent 'earth-knowledge-agent' version '7' deleted successfully\n"
695 | ]
696 | }
697 | ],
698 | "source": [
699 | "project_client.agents.delete_version(agent.name, agent.version)\n",
700 | "print(f\"AI agent '{agent.name}' version '{agent.version}' deleted successfully\")"
701 | ]
702 | },
703 | {
704 | "cell_type": "markdown",
705 | "id": "b7e67115",
706 | "metadata": {},
707 | "source": [
708 | "### Delete the knowledge base"
709 | ]
710 | },
711 | {
712 | "cell_type": "code",
713 | "execution_count": 13,
714 | "id": "d67f8609",
715 | "metadata": {},
716 | "outputs": [
717 | {
718 | "name": "stdout",
719 | "output_type": "stream",
720 | "text": [
721 | "Knowledge base 'earth-knowledge-base' deleted successfully\n"
722 | ]
723 | }
724 | ],
725 | "source": [
726 | "index_client.delete_knowledge_base(base_name)\n",
727 | "print(f\"Knowledge base '{base_name}' deleted successfully\")"
728 | ]
729 | },
730 | {
731 | "cell_type": "markdown",
732 | "id": "ff523474",
733 | "metadata": {},
734 | "source": [
735 | "### Delete the knowledge source"
736 | ]
737 | },
738 | {
739 | "cell_type": "code",
740 | "execution_count": 14,
741 | "id": "e35a6eb0",
742 | "metadata": {},
743 | "outputs": [
744 | {
745 | "name": "stdout",
746 | "output_type": "stream",
747 | "text": [
748 | "Knowledge source 'earth-knowledge-source' deleted successfully.\n"
749 | ]
750 | }
751 | ],
752 | "source": [
753 | "index_client.delete_knowledge_source(knowledge_source=knowledge_source_name) # This is new feature in 2025-08-01-Preview api version\n",
754 | "print(f\"Knowledge source '{knowledge_source_name}' deleted successfully.\")\n"
755 | ]
756 | },
757 | {
758 | "cell_type": "markdown",
759 | "id": "882ea545",
760 | "metadata": {},
761 | "source": [
762 | "### Delete the search index"
763 | ]
764 | },
765 | {
766 | "cell_type": "code",
767 | "execution_count": 15,
768 | "id": "d9895f27",
769 | "metadata": {},
770 | "outputs": [
771 | {
772 | "name": "stdout",
773 | "output_type": "stream",
774 | "text": [
775 | "Index 'earth-at-night' deleted successfully\n"
776 | ]
777 | }
778 | ],
779 | "source": [
780 | "index_client.delete_index(index)\n",
781 | "print(f\"Index '{index_name}' deleted successfully\")"
782 | ]
783 | }
784 | ],
785 | "metadata": {
786 | "kernelspec": {
787 | "display_name": ".venv",
788 | "language": "python",
789 | "name": "python3"
790 | },
791 | "language_info": {
792 | "codemirror_mode": {
793 | "name": "ipython",
794 | "version": 3
795 | },
796 | "file_extension": ".py",
797 | "mimetype": "text/x-python",
798 | "name": "python",
799 | "nbconvert_exporter": "python",
800 | "pygments_lexer": "ipython3",
801 | "version": "3.12.10"
802 | }
803 | },
804 | "nbformat": 4,
805 | "nbformat_minor": 5
806 | }
807 |
--------------------------------------------------------------------------------