├── .azdo
    └── pipelines
    │   └── azure-dev.yml
├── .gitignore
├── .vscode
    ├── extensions.json
    ├── launch.json
    ├── settings.json
    └── tasks.json
├── LICENSE
├── README.md
├── assets
    ├── ai-hub-gateway-benefits.png
    ├── ai-search-api-call.png
    ├── ai-studio-connected-resources.png
    ├── ai-studio-new-connections.png
    ├── ai-studio-new-manual-ai-openai.png
    ├── ai-studio-new-manual-ai-search.png
    ├── ai-studio-prompt-flow-gpt.png
    ├── ai-studio-prompt-flow-inputs.png
    ├── apim-test.png
    ├── architecture-1-0-5.png
    ├── architecture-1-0-6.png
    ├── azure-openai-landing-zone.drawio
    ├── azure-openai-landing-zone.png
    ├── azure-resources-diagram-asa.svg
    ├── azure-resources-diagram-standard.svg
    ├── azure-resources-diagram.svg
    ├── code.png
    ├── cosmos-db-firwall.png
    ├── cosmos-db-model-pricing.png
    ├── customer-truth.png
    ├── oai-logicapps-nonstreaming.png
    ├── oai-logicapps-streaming.png
    ├── one-click-deploy.png
    ├── power-bi-data-final.png
    ├── power-bi-data-source-add.png
    ├── power-bi-data-source-adv-editor-update.png
    ├── power-bi-data-source-adv-editor-update2.png
    ├── power-bi-data-source-adv-editor.png
    ├── power-bi-data-source-model-pricing.png
    ├── power-bi-data-source-transform.png
    ├── power-bi-data-source.png
    ├── power-bi-percentage-dashboad.png
    ├── powerbi-relationship.png
    ├── powerbi-usage-dashboard-old.png
    ├── powerbi-usage-dashboard.png
    ├── supporting-documents.png
    ├── throttling-events-alert.png
    ├── throttling-events-app-insights.png
    └── user-story.png
├── azure.yaml
├── guides
    ├── ai-hub-gateway-hybrid-deployment.md
    ├── ai-search-integration.md
    ├── ai-studio-integration.md
    ├── apim-configuration.md
    ├── architecture.md
    ├── bring-your-own-network.md
    ├── deployment-troubleshooting.md
    ├── deployment.md
    ├── end-to-end-scenario.md
    ├── openai-onboarding.md
    ├── openai-usage-ingestion.md
    ├── power-bi-dashboard.md
    ├── routing-configurations.md
    └── throttling-events-handling.md
├── infra
    ├── abbreviations.json
    ├── main.bicep
    ├── main.parameters.json
    └── modules
    │   ├── ai
    │       └── cognitiveservices.bicep
    │   ├── apim
    │       ├── ai-model-inference
    │       │   └── ai-model-inference-api-spec.yaml
    │       ├── ai-search-api
    │       │   └── ai-search-api-spec.yaml
    │       ├── api.bicep
    │       ├── apim.bicep
    │       ├── openai-api
    │       │   ├── oai-api-spec-2024-02-01.yaml
    │       │   ├── oai-api-spec-2024-05-01-preview.yaml
    │       │   ├── oai-api-spec-2024-06-01.yaml
    │       │   ├── oai-api-spec-2024-10-21.yaml
    │       │   └── oai-realtime-api-ws.json
    │       ├── policies
    │       │   ├── ai-model-inference-api-policy.xml
    │       │   ├── ai-search-api-policy.xml
    │       │   ├── frag-aad-auth.xml
    │       │   ├── frag-ai-usage.xml
    │       │   ├── frag-backend-routing.xml
    │       │   ├── frag-dynamic-throttling-assignment.xml
    │       │   ├── frag-openai-usage-streaming.xml
    │       │   ├── frag-openai-usage.xml
    │       │   ├── frag-throttling-events.xml
    │       │   ├── frag-validate-routes.xml
    │       │   ├── hr_product_policy.xml
    │       │   ├── openai-realtime-policy.xml
    │       │   ├── openai_api_policy.xml
    │       │   ├── openai_api_policy_dynamic_throttling.xml
    │       │   ├── retail_product_policy.xml
    │       │   ├── search_hr_product_policy.xml
    │       │   └── translator-api-policy.xml
    │       ├── speech-api
    │       │   └── speech-api-3-1.json
    │       └── translator-api
    │       │   └── translator-api-spec.yaml
    │   ├── cosmos-db
    │       └── cosmos-db.bicep
    │   ├── event-hub
    │       └── event-hub.bicep
    │   ├── functionapp
    │       ├── functionapp.bicep
    │       └── storageaccount.bicep
    │   ├── logicapp
    │       ├── api-connection-access.bicep
    │       ├── api-connection.json
    │       └── logicapp.bicep
    │   ├── monitor
    │       ├── applicationinsights-dashboard.bicep
    │       ├── applicationinsights.bicep
    │       ├── loganalytics.bicep
    │       └── monitoring.bicep
    │   ├── networking
    │       ├── dns.bicep
    │       ├── private-endpoint.bicep
    │       ├── subnet.bicep
    │       ├── vnet-existing.bicep
    │       └── vnet.bicep
    │   ├── security
    │       ├── managed-identity-apim.bicep
    │       └── managed-identity-stream-analytics.bicep
    │   └── stream-analytics
    │       └── stream-analytics.bicep
├── scripts
    └── apim-event-hub-logger.ps1
└── src
    ├── apim
        ├── ai-search-api
        │   ├── ai-search-api-policy.xml
        │   └── ai-search-api-spec.yaml
        ├── http
        │   └── chat.http
        ├── oa-fragments-archived
        │   ├── oai-blocked-streaming-in-policy.xml
        │   ├── oai-clusters-lb-configuration-be-policy.xml
        │   ├── oai-clusters-lb-configuration-in-policy.xml
        │   └── oai-usage-eventhub-out-policy.xml
        ├── oa-weighted-lb
        │   ├── oai-clusters-weighted-lb-configuration-be-policy.xml
        │   └── oai-clusters-weighted-lb-configuration-in-policy.xml
        └── oai-api
        │   ├── oai-api-policy-archived.xml
        │   ├── oai-api-policy.xml
        │   ├── oai-api-spec-2024-02-01.yaml
        │   └── oai-api-spec.yaml
    ├── testing
        └── openai-testing.http
    ├── usage-ingestion-function
        ├── .gitignore
        ├── Program.cs
        ├── Properties
        │   └── launchSettings.json
        ├── UsageProcessorFunction.cs
        ├── host.json
        └── usage-ingestion-func.csproj
    ├── usage-ingestion-logicapp
        ├── .funcignore
        ├── ai-usage-ingestion
        │   └── workflow.json
        ├── ai-usage-streaming-ingestion
        │   └── workflow.json
        ├── connections.json
        ├── host.json
        ├── package.json
        └── workflow-designtime
        │   └── host.json
    └── usage-reports
        ├── AI-Hub-Gateway-Usage-Report-v1-3.pbit
        ├── AI-Hub-Gateway-Usage-Report-v1-3.pbix
        ├── AI-Hub-Gateway-Usage-Report-v1-4.pbix
        ├── AI-Hub-Gateway-Usage-Report-v1-5-Incremetal.pbix
        ├── AI-Search-Cost-Estimation-Logic.md
        ├── model-pricing.json
        └── usage-record.json


/.azdo/pipelines/azure-dev.yml:
--------------------------------------------------------------------------------
 1 | # Run when commits are pushed to main
 2 | trigger:
 3 |   - main
 4 | 
 5 | pool:
 6 |   vmImage: ubuntu-latest
 7 | 
 8 | steps:
 9 |   # setup-azd@0 needs to be manually installed in your organization
10 |   # if you can't install it, you can use the below bash script to install azd
11 |   # and remove this step
12 |   - task: setup-azd@0
13 |     displayName: Install azd
14 | 
15 |   # If you can't install above task in your organization, you can comment it and uncomment below task to install azd
16 |   # - task: Bash@3
17 |   #   displayName: Install azd
18 |   #   inputs:
19 |   #     targetType: 'inline'
20 |   #     script: |
21 |   #       curl -fsSL https://aka.ms/install-azd.sh | bash
22 | 
23 |   # azd delegate auth to az to use service connection with AzureCLI@2
24 |   - pwsh: |
25 |       azd config set auth.useAzCliAuth "true"
26 |     displayName: Configure AZD to Use AZ CLI Authentication.
27 |   - task: AzureCLI@2
28 |     displayName: Provision Infrastructure
29 |     inputs:
30 |       azureSubscription: azconnection
31 |       scriptType: bash
32 |       scriptLocation: inlineScript
33 |       keepAzSessionActive: true
34 |       inlineScript: |
35 |         azd provision --no-prompt
36 |     env:
37 |       AZURE_SUBSCRIPTION_ID: $(AZURE_SUBSCRIPTION_ID)
38 |       AZURE_ENV_NAME: $(AZURE_ENV_NAME)
39 |       AZURE_LOCATION: $(AZURE_LOCATION)
40 |       AZD_INITIAL_ENVIRONMENT_CONFIG: $(AZD_INITIAL_ENVIRONMENT_CONFIG)
41 | 
42 |   - task: AzureCLI@2
43 |     displayName: Deploy Application
44 |     inputs:
45 |       azureSubscription: azconnection
46 |       scriptType: bash
47 |       scriptLocation: inlineScript
48 |       keepAzSessionActive: true
49 |       inlineScript: |
50 |         azd deploy --no-prompt
51 |     env:
52 |       AZURE_SUBSCRIPTION_ID: $(AZURE_SUBSCRIPTION_ID)
53 |       AZURE_ENV_NAME: $(AZURE_ENV_NAME)
54 |       AZURE_LOCATION: $(AZURE_LOCATION)
55 | 
56 | 


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 |   "recommendations": [
3 |     "ms-azuretools.vscode-azurelogicapps",
4 |     "ms-azuretools.vscode-azurefunctions",
5 |     "ms-dotnettools.csharp"
6 |   ]
7 | }
8 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |         {
 5 |             "name": "Attach to .NET Functions",
 6 |             "type": "coreclr",
 7 |             "request": "attach",
 8 |             "processId": "${command:azureLogicAppsStandard.pickProcess}"
 9 |         }
10 |     ]
11 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "azureFunctions.deploySubpath": "src\\usage-ingestion-function/bin/Release/net8.0/publish",
 3 |     "azureFunctions.projectLanguage": "C#",
 4 |     "azureFunctions.projectRuntime": "~4",
 5 |     "debug.internalConsoleOptions": "neverOpen",
 6 |     "azureFunctions.preDeployTask": "publish (functions)",
 7 |     "azureLogicAppsStandard.deploySubpath": "src\\usage-ingestion-logicapp",
 8 |     "azureLogicAppsStandard.projectLanguage": "JavaScript",
 9 |     "azureLogicAppsStandard.projectRuntime": "~4",
10 |     "azureFunctions.suppressProject": true
11 | }


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"version": "2.0.0",
  3 | 	"tasks": [
  4 | 		{
  5 | 			"label": "clean (functions)",
  6 | 			"command": "dotnet",
  7 | 			"args": [
  8 | 				"clean",
  9 | 				"/property:GenerateFullPaths=true",
 10 | 				"/consoleloggerparameters:NoSummary"
 11 | 			],
 12 | 			"type": "process",
 13 | 			"problemMatcher": "$msCompile",
 14 | 			"options": {
 15 | 				"cwd": "${workspaceFolder}/src\\usage-ingestion-function"
 16 | 			}
 17 | 		},
 18 | 		{
 19 | 			"label": "build (functions)",
 20 | 			"command": "dotnet",
 21 | 			"args": [
 22 | 				"build",
 23 | 				"/property:GenerateFullPaths=true",
 24 | 				"/consoleloggerparameters:NoSummary"
 25 | 			],
 26 | 			"type": "process",
 27 | 			"dependsOn": "clean (functions)",
 28 | 			"group": {
 29 | 				"kind": "build",
 30 | 				"isDefault": true
 31 | 			},
 32 | 			"problemMatcher": "$msCompile",
 33 | 			"options": {
 34 | 				"cwd": "${workspaceFolder}/src\\usage-ingestion-function"
 35 | 			}
 36 | 		},
 37 | 		{
 38 | 			"label": "clean release (functions)",
 39 | 			"command": "dotnet",
 40 | 			"args": [
 41 | 				"clean",
 42 | 				"--configuration",
 43 | 				"Release",
 44 | 				"/property:GenerateFullPaths=true",
 45 | 				"/consoleloggerparameters:NoSummary"
 46 | 			],
 47 | 			"type": "process",
 48 | 			"problemMatcher": "$msCompile",
 49 | 			"options": {
 50 | 				"cwd": "${workspaceFolder}/src\\usage-ingestion-function"
 51 | 			}
 52 | 		},
 53 | 		{
 54 | 			"label": "publish (functions)",
 55 | 			"command": "dotnet",
 56 | 			"args": [
 57 | 				"publish",
 58 | 				"--configuration",
 59 | 				"Release",
 60 | 				"/property:GenerateFullPaths=true",
 61 | 				"/consoleloggerparameters:NoSummary"
 62 | 			],
 63 | 			"type": "process",
 64 | 			"dependsOn": "clean release (functions)",
 65 | 			"problemMatcher": "$msCompile",
 66 | 			"options": {
 67 | 				"cwd": "${workspaceFolder}/src\\usage-ingestion-function"
 68 | 			}
 69 | 		},
 70 | 		{
 71 | 			"type": "func",
 72 | 			"dependsOn": "build (functions)",
 73 | 			"options": {
 74 | 				"cwd": "${workspaceFolder}/src\\usage-ingestion-function/bin/Debug/net8.0"
 75 | 			},
 76 | 			"command": "host start",
 77 | 			"isBackground": true,
 78 | 			"problemMatcher": "$func-dotnet-watch"
 79 | 		},
 80 | 		{
 81 | 			"label": "generateDebugSymbols",
 82 | 			"command": "${config:azureLogicAppsStandard.dotnetBinaryPath}",
 83 | 			"args": [
 84 | 				"${input:getDebugSymbolDll}"
 85 | 			],
 86 | 			"type": "process",
 87 | 			"problemMatcher": "$msCompile",
 88 | 			"options": {
 89 | 				"cwd": "${workspaceFolder}/src\\usage-ingestion-logicapp"
 90 | 			}
 91 | 		},
 92 | 		{
 93 | 			"type": "shell",
 94 | 			"command": "${config:azureLogicAppsStandard.funcCoreToolsBinaryPath}",
 95 | 			"args": [
 96 | 				"host",
 97 | 				"start"
 98 | 			],
 99 | 			"options": {
100 | 				"env": {
101 | 					"PATH": "${config:azureLogicAppsStandard.autoRuntimeDependenciesPath}\\NodeJs;${config:azureLogicAppsStandard.autoRuntimeDependenciesPath}\\DotNetSDK;$env:PATH"
102 | 				},
103 | 				"cwd": "${workspaceFolder}/src\\usage-ingestion-logicapp"
104 | 			},
105 | 			"problemMatcher": "$func-watch",
106 | 			"isBackground": true,
107 | 			"label": "func: host start",
108 | 			"group": {
109 | 				"kind": "build",
110 | 				"isDefault": true
111 | 			}
112 | 		}
113 | 	],
114 | 	"inputs": [
115 | 		{
116 | 			"id": "getDebugSymbolDll",
117 | 			"type": "command",
118 | 			"command": "azureLogicAppsStandard.getDebugSymbolDll"
119 | 		}
120 | 	]
121 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Azure Samples
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/assets/ai-hub-gateway-benefits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-hub-gateway-benefits.png


--------------------------------------------------------------------------------
/assets/ai-search-api-call.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-search-api-call.png


--------------------------------------------------------------------------------
/assets/ai-studio-connected-resources.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-studio-connected-resources.png


--------------------------------------------------------------------------------
/assets/ai-studio-new-connections.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-studio-new-connections.png


--------------------------------------------------------------------------------
/assets/ai-studio-new-manual-ai-openai.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-studio-new-manual-ai-openai.png


--------------------------------------------------------------------------------
/assets/ai-studio-new-manual-ai-search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-studio-new-manual-ai-search.png


--------------------------------------------------------------------------------
/assets/ai-studio-prompt-flow-gpt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-studio-prompt-flow-gpt.png


--------------------------------------------------------------------------------
/assets/ai-studio-prompt-flow-inputs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-studio-prompt-flow-inputs.png


--------------------------------------------------------------------------------
/assets/apim-test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/apim-test.png


--------------------------------------------------------------------------------
/assets/architecture-1-0-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/architecture-1-0-5.png


--------------------------------------------------------------------------------
/assets/architecture-1-0-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/architecture-1-0-6.png


--------------------------------------------------------------------------------
/assets/azure-openai-landing-zone.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/azure-openai-landing-zone.png


--------------------------------------------------------------------------------
/assets/code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/code.png


--------------------------------------------------------------------------------
/assets/cosmos-db-firwall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/cosmos-db-firwall.png


--------------------------------------------------------------------------------
/assets/cosmos-db-model-pricing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/cosmos-db-model-pricing.png


--------------------------------------------------------------------------------
/assets/customer-truth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/customer-truth.png


--------------------------------------------------------------------------------
/assets/oai-logicapps-nonstreaming.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/oai-logicapps-nonstreaming.png


--------------------------------------------------------------------------------
/assets/oai-logicapps-streaming.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/oai-logicapps-streaming.png


--------------------------------------------------------------------------------
/assets/one-click-deploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/one-click-deploy.png


--------------------------------------------------------------------------------
/assets/power-bi-data-final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-final.png


--------------------------------------------------------------------------------
/assets/power-bi-data-source-add.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source-add.png


--------------------------------------------------------------------------------
/assets/power-bi-data-source-adv-editor-update.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source-adv-editor-update.png


--------------------------------------------------------------------------------
/assets/power-bi-data-source-adv-editor-update2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source-adv-editor-update2.png


--------------------------------------------------------------------------------
/assets/power-bi-data-source-adv-editor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source-adv-editor.png


--------------------------------------------------------------------------------
/assets/power-bi-data-source-model-pricing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source-model-pricing.png


--------------------------------------------------------------------------------
/assets/power-bi-data-source-transform.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source-transform.png


--------------------------------------------------------------------------------
/assets/power-bi-data-source.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source.png


--------------------------------------------------------------------------------
/assets/power-bi-percentage-dashboad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-percentage-dashboad.png


--------------------------------------------------------------------------------
/assets/powerbi-relationship.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/powerbi-relationship.png


--------------------------------------------------------------------------------
/assets/powerbi-usage-dashboard-old.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/powerbi-usage-dashboard-old.png


--------------------------------------------------------------------------------
/assets/powerbi-usage-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/powerbi-usage-dashboard.png


--------------------------------------------------------------------------------
/assets/supporting-documents.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/supporting-documents.png


--------------------------------------------------------------------------------
/assets/throttling-events-alert.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/throttling-events-alert.png


--------------------------------------------------------------------------------
/assets/throttling-events-app-insights.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/throttling-events-app-insights.png


--------------------------------------------------------------------------------
/assets/user-story.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/user-story.png


--------------------------------------------------------------------------------
/azure.yaml:
--------------------------------------------------------------------------------
 1 | # Application name
 2 | name: ai-hub-gateway-landing-zone
 3 | metadata:
 4 |   template: ai-hub-gateway-landing-zone-v2
 5 | infra:
 6 |   provider: bicep
 7 | services:
 8 |   # usageProcessingFunctionApp:
 9 |   #   project: ./src/usage-ingestion-function
10 |   #   language: dotnet
11 |   #   host: function
12 |   usageProcessingLogicApp:
13 |     project: ./src/usage-ingestion-logicapp
14 |     language: js
15 |     host: function


--------------------------------------------------------------------------------
/guides/ai-hub-gateway-hybrid-deployment.md:
--------------------------------------------------------------------------------
 1 | # Hybrid deployment of AI Hub Gateway
 2 | 
 3 | Azure API Management (APIM) has 3 components: 
 4 | - API Gateway: is the runtime component that handles API requests and can be deployed on Azure as Managed Gateway or anywhere else (like on-premises) as Self-hosted Gateway.
 5 | - Developer Portal: is the self-service portal for developers to discover and consume APIs
 6 | - API Management Service: is the management plane that manages the API Gateway and Developer Portal.
 7 | 
 8 | Building on the APIM API Gateway capability of being hosted anywhere, I will deploy it in this walkthrough on Azure Container App (which in a similar fashion can be deployed on-premises on a compliant Kubernetes cluster or VM). 
 9 | 
10 | The Developer Portal and API Management Service will remain hosted on Azure.
11 | 
12 | ## Creating containerized hosting environment
13 | 
14 | I will be creating here a resource group, container app environment and a container app to host APIM API gateway.
15 | 
16 | ```bash
17 | PROJECT=ai-gateway
18 | RESOURCE_GROUP=rg-$PROJECT
19 | ACA_SELFHOSTED_NAME=aca-$PROJECT-app
20 | ACA_SELFHOSTED_ENV=aca-$PROJECT-env
21 | LOCATION=northeurope
22 | 
23 | az group create --name $RESOURCE_GROUP --location $LOCATION
24 | 
25 | az containerapp env create --name $ACA_SELFHOSTED_ENV --resource-group $RESOURCE_GROUP --location $LOCATION
26 | 
27 | # Getting APIM self-hosted gateway endpoint and token
28 | # You can get these values from APIM - Gateway - Self-hosted gateway configuration - Deployment - Docker
29 | ENDPOINT="<APIM configuration endpoint>"
30 | TOKEN="REPLACE_WITH_YOUR_KEY"
31 | 
32 | 
33 | az containerapp create --name $ACA_SELFHOSTED_NAME \
34 |   --environment $ACA_SELFHOSTED_ENV \
35 |   --resource-group $RESOURCE_GROUP \
36 |   --ingress 'external' \
37 |   --image mcr.microsoft.com/azure-api-management/gateway:2.5.0 \
38 |   --target-port 8080 \
39 |   --query properties.configuration.ingress.fqdn \
40 |   --env-vars "config.service.endpoint"="$ENDPOINT" "config.service.auth"="$TOKEN" "net.server.http.forwarded.proto.enabled"="true"
41 | 
42 | # Testing the deployment (you should get empty 200 response)
43 | GATEWAY_URL=$(az containerapp show --name $ACA_SELFHOSTED_NAME --resource-group $RESOURCE_GROUP --query "properties.configuration.ingress.fqdn" --output tsv)
44 | echo $GATEWAY_URL
45 | curl -i https://$GATEWAY_URL/status-0123456789abcdef
46 | 
47 | ```
48 | 
49 | 


--------------------------------------------------------------------------------
/guides/ai-studio-integration.md:
--------------------------------------------------------------------------------
 1 | # AI Studio Integration
 2 | 
 3 | Azure AI Studio is a a unified platform for developing and deploying generative AI apps responsibly.
 4 | 
 5 | It offers prebuilt and customizable models, using your data to innovate at scale.
 6 | 
 7 | Integrating AI Hub Gateway with Azure AI Studio allows you to access the AI Hub Gateway governed AI services (like Azure OpenAI and Azure AI Search) to build AI solutions.
 8 | 
 9 | This guid provide details about how this integration can be done.
10 | 
11 | ## Prerequisites
12 | 
13 | As AI Studio still requires to connect to AI Services using public endpoints, AI Hub Gateway APIM endpoint needs to be publicly accessible.
14 | 
15 | Azure OpenAI & AI Search endpoints can be integrated through APIM 
16 | 1.	Requires APIM to be public 
17 | 
18 |     a.	Directly using APIM native capability (networking is set to None or External) to have public endpoint (not recommended)
19 |    
20 |     b.	Or indirectly through customer network appliances where APIM is fully private with networking set to Internal (recommended)
21 | 2.	Keep in mind that AI Studio tries to query OpenAI service itself through ARM calls to retrieve list of deployment, you will get warnings like (model deployment can’t be read) as APIM is not exposing ARM APIs
22 | 3.	Selecting AI Hub Gateway connected resource in AI Studio (like prompt flow) connections and it will work as expected
23 | 4.	Above can scale basically to many other resources that AI Studio is capable of connecting to.
24 | 
25 | ## Connected resources
26 | 
27 | Using AI-Hub-Gateway with AI Studio is possible today through ```Connected resources```.
28 | 
29 | 


--------------------------------------------------------------------------------
/guides/architecture.md:
--------------------------------------------------------------------------------
 1 | ## AI Hub Gateway Landing Zone Architecture
 2 | The AI Hub Gateway Landing Zone architecture designed to be a central hub for AI services, providing a single point of entry for AI services, and enabling the organization to manage and govern AI services in a consistent manner. 
 3 | 
 4 | ![AI Hub Gateway Landing Zone](../assets/architecture-1-0-6.png)
 5 | 
 6 | ### Azure architecture diagram
 7 | This example diagram shows how these different Azure services would interact in a classic [hub-spoke topology](https://learn.microsoft.com/en-us/azure/architecture/networking/architecture/hub-spoke?tabs=cli). 
 8 | 
 9 | ![AI Hub Gateway Landing Zone](../assets/azure-openai-landing-zone.png)
10 | 
11 | ### Networking
12 | 
13 | The AI Landing Zone Virtual Network could be connected to the spokes via [virtual network peering](https://learn.microsoft.com/en-us/azure/virtual-network/virtual-network-peering-overview). The different applications (applications A, B and C) laying in the spoke networks would be able to resolve the API Management endpoint for their AI service consumption.
14 | 
15 | The different Azure OpenAi services would not be accessible to other external services, only being accessible through the API Management instance, being able to communicate to those instances via [Private Links](https://learn.microsoft.com/en-us/azure/private-link/private-link-overview).
16 | 
17 | For more details, see the [networking components section](#networking-components).
18 | 
19 | ### AI Services and Indexes
20 | The API Management instance would be able to communicate with one-to-many Azure OpenAI and/or AI service, as illustrated in the diagram. This can be a mix of 1 or more services, in 1 or more subscriptions, and also be of different model types, such as [Azure OpenAI Services](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview) or other models in [Azure Machine Learning Studio, for example Mistral](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-models-mistral?view=azureml-api-2).
21 | 
22 | It is also posible to make [Azure AI Search](https://learn.microsoft.com/en-us/azure/search/search-what-is-azure-search) indexes available through the API Management instance. This is particularly useful when wanting to consume a same index of data in several applications, while ensuring finer-grain control on the index.
23 | 
24 | For more details, see the [additional components section](#additional-components-deployment).
25 | 
26 | ### Other data sources
27 | Other data sources, such as Cosmos DB or SQL databases, could also be used as data sources to create the AI Search index.
28 | 
29 | ### Cross-charging and token consumption count
30 | When sharing AI consumption with different applications, sometimes it would be beneficial to know the token consumption for each application, in order to calculate charge-backs. This is possible using a combination of different services, such as Event Hub, Synapse Analytics, Cosmos DB and PowerBI.
31 | For more details, see the [data and charge-back platforms section](#data-and-charge-back-platforms).
32 | 
33 | ## Architecture components
34 | The AI Hub Gateway Landing Zone consists of the following components:
35 | 
36 | ### Main gateway components
37 | These are the critical components of the AI Hub Gateway Landing Zone that provides the capabilities outlined above.
38 | 
39 | - **Azure API Management**: Azure API Management is a fully managed service that enables customers to publish, secure, transform, maintain, and monitor APIs.
40 | - **Application Insights**: Application Insights is an extensible Application Performance Management (APM) service that provides critical insights on the gateway operational performance.
41 | - **Event Hub**: Event Hub is a fully managed, real-time data ingestion service that’s simple, trusted, and scalable and it is used to stream usage and charge-back data to target data and charge back platforms.
42 | 
43 | ### AI services
44 | This is the Azure AI services that will be exposed through the AI Hub Gateway Landing Zone.
45 | 
46 | Examples of these service could include:
47 | 
48 | - **Azure OpenAI**: Azure OpenAI is a cloud deployment of cutting edge generative models from OpenAI (like ChatGPT, DALL.E and more).
49 | - **Azure AI Search**: Azure AI Search is a cloud search service with built-in AI capabilities that enrich all types of information to help users identify and explore relevant content at scale (critical component of RAG-based generative AI applications).
50 | - **Azure Cognitive Services**: Azure Cognitive Services is a set of cloud-based services with REST APIs and client library SDKs available to help you build cognitive intelligence into your applications.
51 | 
52 | ### Backend services
53 | These are the backend services that will include your AI business logic and experiences.
54 | 
55 | You can host backend services on Azure, on-premises, or other clouds.
56 | 
57 | Examples of these services could include:
58 | - **Azure Kubernetes Service**: Azure Kubernetes Service (AKS) is a managed container orchestration service, based on the open-source Kubernetes system, which is available on the Microsoft Azure public cloud.
59 | - **Azure Container Apps**: Azure Container Apps is a fully managed serverless container service that enables you to run containers on Azure without having to manage the infrastructure.
60 | - **Azure App Service**: Azure App Service is a fully managed platform for building, deploying, and scaling web apps.
61 | 
62 | Also in these backends, it is common to use **AI Orchestrator** framework like [Semantic Kernel](https://github.com/microsoft/semantic-kernel) and [Langchain](https://www.langchain.com/) to orchestrate sophisticated AI workflows and scenarios.
63 | 
64 | ### <a name="data-and-charge-back-platforms">Data and charge-back platforms</a>
65 | 
66 | As part of the AI Hub Gateway Landing Zone, you will need to integrate with existing data and charge-back platforms to track usage and charge-back to the respective business units.
67 | 
68 | Examples of these platforms could include:
69 | - **Cosmos DB**: Azure Cosmos DB is a fully managed NoSQL database for storing usage and charge-back data.
70 | - **Azure Synapse Analytics**: Azure Synapse Analytics is an analytics service that brings together enterprise data warehousing and big data analytics.
71 | - **Microsoft Fabric**: Microsoft Fabric is a cloud-based platform that provides a scalable, reliable, and secure infrastructure for building and managing data and analytics solutions.
72 | - **PowerBI**: Power BI is a business analytics service by Microsoft. It aims to provide interactive visualizations and business intelligence capabilities with an interface simple enough for end users to create their own reports and dashboards.


--------------------------------------------------------------------------------
/guides/deployment-troubleshooting.md:
--------------------------------------------------------------------------------
 1 | # Deployment troubleshooting
 2 | 
 3 | This guide provides troubleshooting tips for common issues that you might encounter when deploying this accelerator to Azure using Azure Developer CLI or Bicep.
 4 | 
 5 | ## Transient errors
 6 | 
 7 | You might want to try again running the deployed as it might resolve some of the transient issues.
 8 | 
 9 | ```bash
10 | azd up
11 | ```
12 | 
13 | This is usually a transient issue. Please try again after some time (it might take up to 1 hour unfortunately).
14 | 
15 | Below are few examples of transient issues:
16 | 
17 | - Unable to edit or replace deployment 'application-insights-dashboard'
18 | 
19 | - Runtime Scale Monitoring is not supported for this Functions version
20 | 
21 | - Failed to connect to management endpoint apim-RANDOM.management.azure-api.net:3443 for a service deployed in a Virtual Network. Make sure to follow guidance at https://aka.ms/apim-vnet-common-issues for Inbound connectivity to Management endpoint. Check 'ApiManagement Control Plane - inbound' connectivity at https://aka.ms/apimnetworkstatus. (Code: ManagementApiRequestFailed)
22 | 
23 | - Managed identity id not found
24 | 
25 | - Timeout: Call to Management API apim-RANDOM.management.azure-api.net:3443 timed out for the Developer SKU service which will have inherent capacity issues due to it's scale. Please refer to SLA at https://aka.ms/apimsla and considering upgrading to a SKU Tier with higher SLA.
26 | 
27 | - Deployment 'azure-ai-search-api' could not be found (this only will happen if you sett ```enableAzureAISearch``` to false and you can ignore it)


--------------------------------------------------------------------------------
/guides/deployment.md:
--------------------------------------------------------------------------------
  1 | ## Primary components deployment
  2 | 
  3 | Below is a high-level guide to deploy the AI Hub Gateway accelerator main components.
  4 | 
  5 | ![components](../assets/azure-resources-diagram.svg)
  6 | 
  7 | ### <a name="networking-components">Networking components</a>
  8 | 
  9 | Default behavior of the infrastructure script (in Bicep), provision the following networking components:
 10 | 
 11 | - **Virtual network & subnet**: A virtual network to host the AI Hub Gateway Landing Zone.
 12 |     - **APIM subnet** to be deployed in internal/external mode requires a subnet with /27 or larger with **required Network Security Group (NSG)** that allows the critical rules.
 13 |     - **Private endpoints subnet(s)**: Private endpoints for the AI services, Cosmos DB, Event Hub, Monitor, Storage to be exposed through the AI Hub Gateway Landing Zone. Usually a /27 or larger subnet would be sufficient.
 14 |     - **Azure Function** subnet to be used for injecting the function runtime into the VNet so it can access both Cosmos DB and Event Hub private endpoints. This subnet is delegated to ```Microsoft.Web/serverFarms```.
 15 | - **Private DNS zones**: Private DNS zones to resolve the private endpoints.
 16 |     - Internal APIM relies on **private DNS** to resolve the APIM endpoints, so a Azure Private DNS zone or other DNS solution is required.
 17 |     - **Private endpoints DNS zone**: A private DNS zone to resolve the private endpoints for the connected Azure PaaS services.
 18 |       - 'privatelink.openai.azure.com'
 19 |       - 'privatelink.vaultcore.azure.net'
 20 |       - 'privatelink.monitor.azure.com'
 21 |       - 'privatelink.servicebus.windows.net'
 22 |       - 'privatelink.documents.azure.com'
 23 |       - 'privatelink.blob.core.windows.net'
 24 |       - 'privatelink.file.core.windows.net'
 25 | 
 26 | Additional networking consideration that you might need to take into account:
 27 | - **ExpressRoute or VPN**: If you are planning to connect to on-premises or other clouds, you will need to have an ExpressRoute or VPN connection.
 28 | - **DMZ appliances**: If you are planning to expose backend and gateway services on the internet, you need to have a Web Application Firewall (like Azure Front Door & Application Gateway) and network firewall (like Azure Firewall) to govern both ingress and egress traffic.
 29 | - **Custom Domains** for APIM specially if it is in "internal mode" to allow its private DNS resolution without conflicting with any external APIM instances that you may have (by default, all APIM instances uses *.azure-api.net domain regardless if being external or internal).
 30 | 
 31 | ### Azure API Management (APIM)
 32 | APIM is the central component of the AI Hub Gateway Landing Zone. 
 33 | 
 34 | Recommended deployment of APIM to be in **internal mode** to ensure that the gateway is not exposed to the internet and to ensure that the gateway is only accessible through the private network.
 35 | 
 36 | **internal mode** requires a subnet with /27 or larger with NSG that allows the critical rules in addition to management public IP (with DNS label set)
 37 | 
 38 | This is a great starting point to deploy APIM in internal mode: [Deploy Azure API Management in internal mode](https://learn.microsoft.com/en-us/azure/api-management/api-management-using-with-internal-vnet?tabs=stv2)
 39 | 
 40 | ### Application Insights
 41 | Application Insights is a critical component of the AI Hub Gateway Landing Zone, and it is used to monitor the operational performance of the gateway.
 42 | 
 43 | To deploy Application Insights, you can use the following guide: [How to integrate Azure API Management with Azure Application Insights](https://azure.github.io/apim-lab/apim-lab/6-analytics-monitoring/analytics-monitoring-6-2-application-insights.html) 
 44 | 
 45 | ### Event Hub
 46 | 
 47 | Event Hub is used to stream usage and charge-back data to target data and charge back platforms.
 48 | 
 49 | To deploy Event Hub, you can use the following guide: [Logging with Event Hub](https://azure.github.io/apim-lab/apim-lab/6-analytics-monitoring/analytics-monitoring-6-3-event-hub.html)
 50 | 
 51 | ### <a name="additional-components-deployment">Additional components deployment</a>
 52 | 
 53 | With the primary components deployed, you can now deploy or identify the AI services and backend services that will be exposed through the AI Hub Gateway.
 54 | 
 55 | Additional components may include:
 56 | - **Azure OpenAI**: You can have 1 or more OpenAI services deployed (like one with PTU and one with PAYG)
 57 | - **Azure AI Search**: Azure AI Search with indexed data (1 or more indexes)
 58 | - **Backend services**: Backend services that will include your AI business logic and experiences (like a python chat app deployed on Azure App Service as an example).
 59 | 
 60 | For the above components, we need to ensure the following:
 61 | - **Private endpoints**: The AI services should be exposed through private endpoints.
 62 | - **Private DNS zone**: A private DNS zone to resolve the private endpoints for the connected Azure AI services.
 63 | - **APIM Managed identity**: Is granted access to Azure AI services (like OpenAI and AI Search).
 64 | - **Update endpoint and keys**: The backend services should use AI Hub Gateway endpoint and keys.
 65 | - **Usage & charge-back**: Identify the data pipeline for tokens usage and charge back based on Event Hub integration.
 66 | 
 67 | ### Deployment summary
 68 | 
 69 | When deployment of primary components is completed, you will have the following components deployed:
 70 | 
 71 | - **Azure API Management**
 72 | - **Application Insights**
 73 | - **Event Hub**
 74 | 
 75 | Network wiring also will be established to allow the gateway to access the AI services through private endpoints, internet access through DMZ appliances and backend systems through private network should be planned.
 76 | 
 77 | with the additional components deployed, you will have the following components identified:
 78 | - **Azure OpenAI** instances (by default 3 across 3 regions)
 79 | - **Cosmos DB** for ingesting AI usage metrics 
 80 | - **Azure Functions + Storage** for processing AI usage metrics from event hub to cosmos db.
 81 | - **Managed identities** for APIM to access the AI services and for Azure Function to access Cosmos DB and Event Hub.
 82 | 
 83 | ## Azure API Management configuration
 84 | To configure Azure API Management to expose the AI services through the AI Hub Gateway Landing Zone, you will need to configure the following:
 85 | 
 86 | - **APIs**: Import APIs definitions to APIM.
 87 | - **Products**: Create products to bundle one or more APIs under a common access terms/policies.
 88 | - **Policies**: Apply policies to the APIs to manage access, rate limits, and other governance policies.
 89 | 
 90 | ### APIs import
 91 | In this guide, I will be importing both OpenAI and AI Search APIs to APIM.
 92 | 
 93 | Many Azure services APIs are available in [Azure REST API specs](https://github.com/Azure/azure-rest-api-specs/tree/main) reference on GitHub.
 94 | 
 95 | #### Azure OpenAI API
 96 | Although I have included the OpenAI API definition [in this repository](../src/apim/oai-api/oai-api-spec-2024-02-01.yaml), you can also find the Azure OpenAI API definition in here: [Azure OpenAI API](https://github.com/Azure/azure-rest-api-specs/tree/main/specification/cognitiveservices/data-plane/AzureOpenAI/inference)
 97 | 
 98 | One included in the repository is inference version 2024-02-01 stable.
 99 | 
100 | Only main change you need to do in the downloaded API definition is to update ```"url": "https://{endpoint}/openai",``` to ```"url": "https://TO-BE-RELACED/openai",``` to avoid conflict with APIM import validation.
101 | 
102 | > **Important**: You need to append ```/openai``` to your selected ```API URL suffix``` in APIM import dialog to be something like (ai-hub-gw/openai). This is important as OpenAI SDK append /openai to the endpoint URL (not doing so you might get 404 errors from the client connecting to AI Hub Gateway endpoint).
103 | 
104 | One last thing, you need to update APIM subscription header name from ```Ocp-Apim-Subscription-Key``` to ```api-key``` to match the OpenAI SDK default implementation (not doing so you might get 401 unauthorized error).
105 | 
106 | #### Azure AI Search API
107 | Same story with Azure AI Search, you can find a local copy [in this repository](../src/apim/ai-search-api/ai-search-api-spec.yaml).
108 | 
109 | I had to make few additional changes to the downloaded API definition to make it work with APIM import 
110 | validation.
111 | 
112 | Public documentation for AI Search API can be found here: [Azure AI Search API](https://github.com/Azure/azure-rest-api-specs/tree/main/specification/search/data-plane/Azure.Search) (I used stable 2023-11-01 version).


--------------------------------------------------------------------------------
/guides/openai-onboarding.md:
--------------------------------------------------------------------------------
  1 | # Onboarding an OpenAI Instance or Consumer Application
  2 | 
  3 | This guide will walk you through the steps to configure Azure API Management (APIM) to work with a new consumer or Azure OpenAI deployment.
  4 | 
  5 | ## Table of Contents
  6 | 1. [Prerequisites](#prerequisites)
  7 | 2. [Step-by-Step Configuration: Onboarding a New Azure OpenAI Resource](#step-by-step-configuration-onboarding-a-new-azure-openai-resource)
  8 |     1. [Ensure Line of Sight to OpenAI](#1-ensure-line-of-sight-to-openai)
  9 |     2. [Grant OpenAI User Access to APIM User Managed Identity](#2-grant-openai-user-access-to-apim-user-managed-identity)
 10 |     3. [Identify All Deployment Names Associated with OpenAI](#3-identify-all-deployment-names-associated-with-openai)
 11 |     4. [Create APIM Backend for OpenAI](#4-create-apim-backend-for-openai)
 12 |     6. [Update Routing Configuration](#5-update-routing-configuration)
 13 |     7. [Testing the Revision](#6-testing-the-revision)
 14 |     8. [Marking Revision as Current](#7-marking-revision-as-current)
 15 |     9. [Enforcing Deployment-Level RBAC](#8-enforcing-deployment-level-rbac)
 16 | 3. [Step-by-Step Configuration: Onboarding a New Consumer](#step-by-step-configuration-onboarding-a-new-consumer)
 17 |     1. [Create New Product](#1-create-new-product)
 18 |     2. [Create New Subscription for the Product](#2-create-new-subscription-for-the-product)
 19 |     3. [Share APIM OpenAI Endpoint, Subscription Key, and Available Models](#3-share-apim-openai-endpoint-subscription-key-and-available-models)
 20 | 
 21 | ## Prerequisites
 22 | 
 23 | Before starting, make sure you have:
 24 | - An operational AI Hub Gateway deplyoment.
 25 | - Access to the Azure OpenAI service if you are adding a new deployment.
 26 | - Azure Portal access.
 27 | 
 28 | ## Step-by-Step Configuration: Onboarding a New Azure OpenAI Resource
 29 | 
 30 | ### 1. Ensure Line of Sight to OpenAI
 31 | 
 32 | **Steps:**
 33 | 
 34 | 1. **Azure Portal:**
 35 |    - Navigate to your Virtual Network (VNet) where APIM is deployed.
 36 |    - Go to **DNS Servers** and ensure you have the correct DNS settings for resolving OpenAI endpoints.
 37 | 
 38 | 2. **DNS Configuration:**
 39 |    - If you're using custom DNS, ensure the DNS server can resolve OpenAI service endpoints.
 40 |    - You may need to add custom DNS entries to your DNS server for OpenAI services.
 41 |   
 42 | 3. **Network Configuration:**
 43 |    - Ensure that network connectivity is available between API Management and the Azure OpenAI Resource. If your Azure OpenAI Resource does not allow public networking, you may need to add a private endpoint in your Virtual Network. See: [Use private endpoints](https://learn.microsoft.com/en-us/azure/ai-services/cognitive-services-virtual-networks?tabs=portal#use-private-endpoints).
 44 | 
 45 | ### 2. Grant OpenAI User Access to APIM User Managed Identity
 46 | 
 47 | The identity of the Azure API Management needs access to perform inference calls on the AI Models.
 48 | 
 49 | **Steps:**
 50 | 
 51 | 1. **Azure Portal:**
 52 |    - Navigate to your Azure API Management instance.
 53 |    - Go to **Managed identities** under **Security** and ensure it is enabled.
 54 | 
 55 | 2. **Role Assignment:**
 56 |    - Navigate to your Azure OpenAI resource.
 57 |    - Go to **Access Control (IAM)** and click **Add role assignment**.
 58 |    - Select **Cognitive Services OpenAI User** role. See: [Role-based access control](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/role-based-access-control).
 59 |    - Assign this role to the APIM Managed Identity.
 60 | 
 61 | ### 3. Identify All Deployment Names Associated with OpenAI
 62 | 
 63 | **Steps:**
 64 | 
 65 | 1. **Azure Portal:**
 66 |    - Navigate to your Azure OpenAI resource.
 67 |    - Under **Deployments**, note down the names of all the deployments you have created. 
 68 | 
 69 | ### 4. Create APIM Backend for OpenAI
 70 | 
 71 | > [!TIP]
 72 | > Ensure that your backend-url ends with /openai
 73 | 
 74 | **Steps:**
 75 | 
 76 | 1. **Azure Portal:**
 77 |    - Navigate to your Azure API Management instance.
 78 |    - Go to **Backends** under **APIs**.
 79 |    - Click **+ Add** to create a new backend.
 80 |    - Configure the backend with the OpenAI endpoint URL and name it appropriately (it should end with `/openai/`).
 81 | 
 82 | ### 5. Update Routing Configuration
 83 | 
 84 | **Steps:**
 85 | 
 86 | 1. **Azure Portal:**
 87 |    - Navigate to your Azure API Management instance.
 88 |    - Go to **APIs**, select the OpenAI API, and navigate to **Design**.
 89 |    - Go to the menu on the **OpenAI API** and select **Add Revision** to create a new revision (to avoid downtime during implementation).
 90 |    - Under **Inbound processing**, update the policy to include the new routes and clusters for OpenAI deployments.
 91 | 
 92 | **Sample Configuration:**
 93 | 
 94 | ```xml
 95 | <set-variable name="oaClusters" value="@{
 96 |     // route is an Azure OpenAI API endpoint
 97 |     JArray routes = new JArray();
 98 |     JArray clusters = new JArray();
 99 |     
100 |     routes.Add(new JObject()
101 |     {
102 |         { "name", "EastUS" },
103 |         { "location", "eastus" },
104 |         { "backend-id", "openai-backend-0" },
105 |         { "priority", 1},
106 |         { "isThrottling", false }, 
107 |         { "retryAfter", DateTime.MinValue } 
108 |     });
109 | 
110 |     clusters.Add(new JObject()
111 |     {
112 |         { "deploymentName", "chat" },
113 |         { "routes", new JArray(routes[0]) }
114 |     });
115 | 
116 |     return clusters;   
117 | }" />
118 | ```
119 | 
120 | Ensure that the backend is linked with all available deployments for that endpoint by updating the clusters variable accordingly.
121 | 
122 | ### 6. Testing the Revision
123 | 
124 | **Steps:**
125 | 
126 | 1. **Azure Portal:**
127 |    - Navigate to your Azure API Management instance.
128 |    - Go to **APIs** and select the OpenAI API.
129 |    - Under **Test**, select the new revision and test the API endpoints to ensure they are working as expected.
130 | 
131 | ### 7. Marking Revision as Current
132 | 
133 | **Steps:**
134 | 
135 | 1. **Azure Portal:**
136 |    - Navigate to your Azure API Management instance.
137 |    - Go to **APIs**, select the OpenAI API, and navigate to **Revisions**.
138 |    - Select the new revision and click **Make current**.
139 | 
140 | ### 8. Enforcing Deployment-Level RBAC
141 | 
142 | In some cases, you might want to restrict access to specific models based on the business unit or team using the OpenAI endpoint. 
143 | 
144 | The following policy can be implemented at a product level to restrict access to specific model deployments. For more details, refer to the [Model-based RBAC guide](https://github.com/Azure-Samples/ai-hub-gateway-solution-accelerator/blob/main/guides/apim-configuration.md#model-based-rbac).
145 | 
146 | > [!CAUTION]
147 | > This policy will restrict access to only two deployments (gpt-4 and embedding). Any other model deployment will get a 401 Unauthorized response.
148 | 
149 | **Sample Policy:**
150 | 
151 | ```xml
152 | <inbound>
153 |     <base />
154 |     <!-- Restrict access for this product to specific models -->
155 |     <choose>
156 |         <when condition="@(!new [] { 'gpt-4', 'embedding' }.Contains(context.Request.MatchedParameters['deployment-id'] ?? String.Empty))">
157 |             <return-response>
158 |                 <set-status code="401" reason="Unauthorized" />
159 |             </return-response>
160 |         </when>
161 |     </choose>
162 | </inbound>
163 | ```
164 | 
165 | 
166 | ## Step-by-Step Configuration: Onboarding a New Consumer
167 | 
168 | ### 1. Create New Product
169 | 
170 | **Steps:**
171 | 
172 | 1. **Azure Portal:**
173 |    - Navigate to your Azure API Management instance.
174 |    - Go to **Products** and click **+ Add**.
175 |    - Configure the product with the appropriate settings for token throughput capacity and access to specific models (using product-level policies).
176 | 
177 | ### 2. Create New Subscription for the Product
178 | 
179 | **Steps:**
180 | 
181 | 1. **Azure Portal:**
182 |    - Navigate to your Azure API Management instance.
183 |    - Go to **Products**, select the newly created product, and navigate to **Subscriptions**.
184 |    - Click **+ Add** to create a new subscription.
185 |    - Provide the necessary details and generate a subscription key.
186 | 
187 | ### 3. Share APIM OpenAI Endpoint, Subscription Key, and Available Models
188 | 
189 | **Steps:**
190 | 
191 | 1. **Azure Portal:**
192 |    - Navigate to your Azure API Management instance.
193 |    - Go to **APIs**,
194 | 
195 |  select the OpenAI API, and copy the endpoint URL.
196 |    - Share the endpoint URL, subscription key, and list of available models with the team.
197 | 
198 | **Sample Configuration for Sharing:**
199 | > [!CAUTION]
200 | > A subscription key is like a password. Ensure you share it securely.
201 | 
202 | ```plaintext
203 | API Endpoint: https://apim-your-instance.azure-api.net/openai
204 | Subscription Key: {YourSubscriptionKey}
205 | Available Models: gpt-3.5-turbo, gpt-4, dall-e
206 | ```
207 | 


--------------------------------------------------------------------------------
/guides/openai-usage-ingestion.md:
--------------------------------------------------------------------------------
 1 | # Azure OpenAI Usage Ingestion
 2 | 
 3 | This guid explore the details how AI Hub Gateway is using Logic Apps to ingest usage data from Azure OpenAI API for both streaming and non-streaming requests.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | The following components are configured part of this accelerator:
 8 | 
 9 | - API Management service fully configured with all relevant policies as part of this accelerator
10 | - Logic App service integrated with vnet
11 | - Event hub configured as a logger in API Management
12 | - Cosmos DB account with SQL API that has been configured to store the usage data
13 | 
14 | ## Overview
15 | 
16 | There is 2 paths for ingesting usage data from Azure OpenAI API:
17 | 
18 | - **Non-streaming requests**: In this path, API Management publishes the usage data to Event Hub, which is then ingested by Logic App and stored in Cosmos DB.
19 | - **Streaming requests**: In this path, API Management publishes the usage data to ```Application Insights``` custom metrics, which is then ingested by Logic App and stored in Cosmos DB.
20 | 
21 | ## Non-streaming requests
22 | 
23 | This workflow is triggered by the Event Hub message that is published by API Management. The message is then ingested by Logic App and stored in Cosmos DB.
24 | 
25 | Here the ingestion is near real-time, as the message is processed once its published to Event Hub.
26 | 
27 | The primary policy fragment that is used [frag-openai-usage.xml](../infra/modules/apim/policies/frag-openai-usage.xml)
28 | 
29 | ![Non-streaming requests](../assets/oai-logicapps-nonstreaming.png)
30 | 
31 | ## Streaming requests
32 | 
33 | This workflow is triggered by scheduled event (by default it runs twice every day).
34 | 
35 | The workflow uses Cosmos DB to maintain streaming export configurations which control the time range that quired data from Application Insights custom metrics should cover.
36 | 
37 | > Note: the frequency might be changed depending on how much streaming requests are being made to the API knowing that Azure Monitor query supports maximum of 500,000 records per query. Minimum recommended frequency is once every 1 hour if twice a day is proven not to be sufficient.
38 | 
39 | The primary policy fragment that is used [frag-openai-usage-streaming.xml](../infra/modules/apim/policies/frag-openai-usage-streaming.xml)
40 | 
41 | ![Streaming requests](../assets/oai-logicapps-streaming.png)
42 | 
43 | ## Logic App workflows
44 | 
45 | The Logic App workflows source code can be found in [/src/usage-ingestion-logicapp/](../src/usage-ingestion-logicapp/).
46 | 
47 | This folder contains the following:
48 | - [ai-usage-ingestion](../src/usage-ingestion-logicapp/ai-usage-ingestion) which process usage published to Event Hub
49 | - [ai-usage-ingestion-streaming](../src/usage-ingestion-logicapp/ai-usage-ingestion-streaming) which process usage from Application Insights custom metrics
50 | - [connections.json](../src/usage-ingestion-logicapp/connections.json) which contains the connection details for the Logic App workflows
51 |     - Event Hub connection (in app)
52 |     - Cosmos DB connection (in app)
53 |     - Application Insights connection (managed)


--------------------------------------------------------------------------------
/guides/power-bi-dashboard.md:
--------------------------------------------------------------------------------
 1 | # Power BI Dashboard
 2 | 
 3 | Power BI is a business analytics service by Microsoft. It aims to provide interactive visualizations and business intelligence capabilities with an interface simple enough for end users to create their own reports and dashboards.
 4 | 
 5 | In this accelerator, we will be using Power BI to create a dashboard that will display the data from the Cosmos DB Database.
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | - Download and install the Power BI Desktop application from the [Microsoft Store on Windows](https://www.microsoft.com/store/productId/9NTXR16HNW1T?ocid=pdpshare) or from [App Store](https://go.microsoft.com/fwlink/?LinkId=526218&clcid=0x409) on Mac.
10 | 
11 | - Make sure that you can access the Cosmos DB from your local machine (you might need to allow you public IP to access Cosmos DB).
12 | 
13 | ![Cosmos DB Firewall](../assets/cosmos-db-firwall.png) 
14 | 
15 | - Insert initial data into the model-pricing container (sample data for the model-pricing container can be found in the [/src/usage-reports/model-pricing.json](../src/usage-reports/model-pricing.json)).
16 | 
17 | ![Insert Data](../assets/cosmos-db-model-pricing.png)
18 | 
19 | > **Note:** Pricing in the sample file is based on the public Azure pricing for East US region (which in many cases similar to other regions). You can review the prices on Azure docs related to the service being used and update the ```model-pricing``` accordingly.
20 | 
21 | Below is a sample pricing entry for gpt-4o:
22 | 
23 | ```json
24 | {
25 |     "id": "4",
26 |     "model": "gpt-4o",
27 |     "deploymentName": "gpt-4o",
28 |     "isActive": true,
29 |     "CostPerInputUnit": 0.005,
30 |     "CostPerOutputUnit": 0.015,
31 |     "CostUnit": 1000,
32 |     "BaseCost": 0,
33 |     "Currency": "USD",
34 |     "CalculationMethod": "tokens",
35 |     "region": "ALL"
36 | }
37 | ```
38 | 
39 | ## Preparing the Power BI Dashboard
40 | 
41 | Now you can open the [src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-5-Incremetal.pbix](../src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-5-Incremetal.pbix) file in the Power BI Desktop application.
42 | 
43 | As this PowerBI file is using import mode, you should see some data already there from previously connected data source.
44 | 
45 | In order to link the dashboard to the Cosmos DB, you need to update the connection string in the Power BI file.
46 | 
47 | 1. Click on "Transform Data" in the Home tab.
48 | 
49 | ![Transform Data](../assets/power-bi-data-source-transform.png)
50 | 
51 | 2. Right click on the "ai-usage-container" data table and select "Advanced Editor".
52 | 
53 | ![Edit Data](../assets/power-bi-data-source-adv-editor.png)
54 | 
55 | 3. Replace the Cosmos DB endpoint with the one you have deployed.
56 | 
57 | ![Edit Data AI Usage](../assets/power-bi-data-source-adv-editor-update.png)
58 | 
59 | 4. Repeat the same for model-pricing data table.
60 | 
61 | ![Edit Data Model Pricing](../assets/power-bi-data-source-adv-editor-update2.png)
62 | 
63 | 5. Click on "Refresh Preview" to force the Power BI to refresh the data.
64 | 
65 | 6. Click on "Close & Apply" to save the changes.
66 | 
67 | 7. Now you should see the data from the Cosmos DB in the Power BI.
68 | 
69 | ![Power BI Dashboard](../assets/power-bi-data-final.png)
70 | 
71 | 8. If you need to get fresh copy of the data, you can click on "Refresh" in the Home tab.


--------------------------------------------------------------------------------
/guides/routing-configurations.md:
--------------------------------------------------------------------------------
 1 | # Gateway routing configurations
 2 | 
 3 | ## Dynamic Throttling Assignment
 4 | 
 5 | Some times with reserved OpenAI models through PTU (provisioned throughput units), latency increases the closer you are getting to 100% utilization.
 6 | 
 7 | Although it is important to size correctly the capacity allocation for PTU, some occasional spikes can push the service to reach 90%+ utilization which results in increased latency.
 8 | 
 9 | In many cases, this is not a challenge, but in other cases where the use case is sensitive to latency this can potential impact the experience.
10 | 
11 | AI Hub Gateway routing engine offers a way to handle such events by falling back to other OpenAI instance to relief the primary PTU instance from being at maximum capacity.
12 | 
13 | Introducing ```Dynamic Throttling Assignment`` which is a routing strategy that allows you to define a target TPM that when it is reached, APIM will switch incoming traffic to backup OpenAI service temporary (by default for 30 seconds) allowing the PTU instance to regain capacity again then it will resume using it.
14 | 
15 | ### Example
16 | 
17 | APIM, as part of AI Hub Gateway, is configured with 2 OpenAI services with a deployment called ptu-gpt4-o (notice primary PTU has priority 1 and PAYGO has priority 2)
18 | 
19 | An OpenAI deployment of gpt4-o has 50 PTU, which let's assume it can handle a 100K TPM (this is an estimate).
20 | 
21 | You can set a target of 80K TPM for that service, where APIM will use a rolling tokens-count against the deployment and automatically switch to the next priority OpenAI deployment once the target 80K TPM is reached.
22 | 
23 | In oder to leverage the dynamic throtlling, you have to configure multiple points:
24 | 
25 | 1. Add ```targetTPMLimit``` to the OpenAI backend routes (inbound policy section):
26 | 
27 | ```csharp
28 | // Notice targetTPMLimit is set to 500 TPM to guide APIM to switch suspend traffic to this backend
29 | routes.Add(new JObject()
30 | {
31 |     { "name", "EastUS" },
32 |     { "location", "eastus" },
33 |     { "backend-id", "openai-backend-0" },
34 |     { "priority", 1},
35 |     { "targetTPMLimit", 500 },
36 |     { "isThrottling", false }, 
37 |     { "retryAfter", DateTime.MinValue } 
38 | });
39 | ```
40 | 
41 | 2. Setup a counter against the PTU deployment name for APIM to count the tokens (inbound policy section):
42 | 
43 | ```xml
44 | 
45 | <!-- Dynamic Throttling Assignment TPM counters (work only if the backend/deployment is not throttling) -->
46 | <choose>
47 |     <when condition="@(context.Request.MatchedParameters["deployment-id"] == "chat" && ((JArray)context.Variables["routes"])[0]["isThrottling"].ToString() == "False")">
48 |         <azure-openai-token-limit counter-key="openai-backend-0-chat" tokens-per-minute="1000000" estimate-prompt-tokens="true" tokens-consumed-variable-name="openai-backend-0-chat-ConsumedTokens" remaining-tokens-variable-name="openai-backend-0-chat-RemainingTokens" />
49 |     </when>
50 | </choose>
51 | 
52 | ```
53 | 
54 | 3. Reference ```dynamic-throttling-assignment``` policy fragment (outbound policy section):
55 | 
56 | ```xml
57 | 
58 | <!-- Update Dynamic Priority Assignment based on TPM counters -->
59 | <include-fragment fragment-id="dynamic-throttling-assignment" />
60 | 
61 | ```
62 | 
63 | 4. Test the policy updates through setting up a small ```targetTPMLimit``` and leverage APIM trace to notice that APIM is switching traffic after hitting the limit and switch it back once the counter goes below that target limit.
64 | 
65 | 5. Run a load test against the service to ensure that the selected target limit is sufficient to manage latency within acceptable parameters and reduce the limit if it is not.
66 | 
67 | You can view a full APIM policy that is leveraging dynamic throttling policy [here](../infra/modules/apim/policies/openai_api_policy_dynamic_throttling.xml)


--------------------------------------------------------------------------------
/guides/throttling-events-handling.md:
--------------------------------------------------------------------------------
 1 | # Throttling Events Handling
 2 | 
 3 | One of the key things to consider when working with AI Apps, is throttling.
 4 | 
 5 | Throttling can happen due to the fact that AI backend is ran out of capacity (which AI Hub Gateway can help in mitigating by automatically failing back to another instance) or due to implemented capacity control measures in the AI Hub Gateway (preventing certain use cases from exceeding allocated capacity).
 6 | 
 7 | ## Handling Throttling Events in AI Hub Gateway
 8 | In either case, it might be very important to keep an eye on these events especially in production environments.
 9 | 
10 | AI Hub Gateway provides a mechanism that allows you to monitor these events per use case (product), per AI deployment/service and among other dimensions so you can take measures to address these events.
11 | 
12 | A policy fragement [throttling-events](../infra/modules/apim/policies/frag-throttling-events.xml) is used to raise Application Insights custom metrics for throttling events.
13 | 
14 | ```xml
15 | <fragment>
16 |     <choose>
17 |         <when condition="@(context.Response.StatusCode == 429)">
18 |             <emit-metric name="AI Throttling" value="1" namespace="throttling-events">
19 |                 <dimension name="API ID" />
20 |                 <dimension name="Operation ID" />
21 |                 <!-- <dimension name="Subscription ID" /> -->
22 |                 <dimension name="Location" />
23 |                 <dimension name="Product Name" value="@(context.Product?.Name?.ToString() ?? "Portal-Admin")" />
24 |                 <dimension name="Deployment Name" value="@((string)context.Variables["target-deployment"])" />
25 |                 <dimension name="Service Name" value="@((string)context.Variables["service-name"] ?? "NA")" />
26 |             </emit-metric>
27 |         </when>
28 |     </choose>
29 | </fragment>
30 | ```
31 | 
32 | By referencing this policy in ```on-error``` section of an API, it will capture and raise ```429``` too many requests status code as a custom metric in Application Insights.
33 | 
34 | ```xml
35 | <on-error>
36 |     <base />
37 |     <!-- This is used to push custom metrics related to 429 throttleing errors -->
38 |     <!-- It is designed to premit setting up Azure Monitor Alerts notifying the team of potential service degredation -->
39 |     <set-variable name="service-name" value="Azure Open AI" />
40 |     <set-variable name="target-deployment" value="@((string)context.Request.MatchedParameters["deployment-id"])" />
41 |     <include-fragment fragment-id="throttling-events" />
42 | </on-error>
43 | ```
44 | 
45 | It is designed in a way that the fragment is expecting 2 variables to be set by the referencing API to allow it to be used across multiple APIs that has the potentially of being throttled.
46 | 
47 | ## View throttling events in Application Insights
48 | 
49 | Once the policy is in place, you can view the custom metric in Application Insights under Metrics with ```throttling-events``` namespace and ```AI Throttling``` metric name.
50 | 
51 | You can also split the metric by the different dimensions to get a better understanding of the throttling events (in the chart below I'm splitting by ```Product Name```).
52 | 
53 | ![Throttling Events](../assets/throttling-events-app-insights.png)
54 | 
55 | ## Creating Alerts in Azure Monitor
56 | 
57 | Having a nice dashboard is helpful, but it is even more helpful to have alerts that can notify you when certain throttling thresholds are met.
58 | 
59 | You can create alerts in Azure Monitor based on the custom metric ```AI Throttling``` and set the threshold that you want to be notified on.
60 | 
61 | ![Create Alert](../assets/throttling-events-alert.png)
62 | 
63 | You can create a generic alert that will notify you when the number of throttling events exceeds a certain threshold.
64 | 
65 | You can also create refined alert that will notify you when the number of throttling events exceeds a certain threshold for a specific product or AI deployment.
66 | 
67 | ## Conclusion
68 | 
69 | Throttling events can be a sign of potential service degradation and it is important to monitor and address them as soon as possible.
70 | 
71 | AI Hub Gateway provides a mechanism to monitor these events and take measures to address them.
72 | 
73 | Although using Alerts can be helpful, it is important to keep in mind the following points:
74 | - Alerts should be used in significant events that require attention not a noisy notification that will be ignored over time.
75 | - Add in place a mechanism to address the issue that caused the throttling event in the first place.
76 | - Keep an eye on the alerts and adjust the thresholds as needed.
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/infra/abbreviations.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "analysisServicesServers": "as",
  3 |     "apiManagementService": "apim-",
  4 |     "appConfigurationConfigurationStores": "appcs-",
  5 |     "appManagedEnvironments": "cae-",
  6 |     "appContainerApps": "ca-",
  7 |     "authorizationPolicyDefinitions": "policy-",
  8 |     "automationAutomationAccounts": "aa-",
  9 |     "blueprintBlueprints": "bp-",
 10 |     "blueprintBlueprintsArtifacts": "bpa-",
 11 |     "cacheRedis": "redis-",
 12 |     "cdnProfiles": "cdnp-",
 13 |     "cdnProfilesEndpoints": "cdne-",
 14 |     "cognitiveServicesAccounts": "cog-",
 15 |     "cognitiveServicesFormRecognizer": "cog-fr-",
 16 |     "cognitiveServicesTextAnalytics": "cog-ta-",
 17 |     "computeAvailabilitySets": "avail-",
 18 |     "computeCloudServices": "cld-",
 19 |     "computeDiskEncryptionSets": "des",
 20 |     "computeDisks": "disk",
 21 |     "computeDisksOs": "osdisk",
 22 |     "computeGalleries": "gal",
 23 |     "computeSnapshots": "snap-",
 24 |     "computeVirtualMachines": "vm",
 25 |     "computeVirtualMachineScaleSets": "vmss-",
 26 |     "containerInstanceContainerGroups": "ci",
 27 |     "containerRegistryRegistries": "cr",
 28 |     "containerServiceManagedClusters": "aks-",
 29 |     "databricksWorkspaces": "dbw-",
 30 |     "dataFactoryFactories": "adf-",
 31 |     "dataLakeAnalyticsAccounts": "dla",
 32 |     "dataLakeStoreAccounts": "dls",
 33 |     "dataMigrationServices": "dms-",
 34 |     "dBforMySQLServers": "mysql-",
 35 |     "dBforPostgreSQLServers": "psql-",
 36 |     "devicesIotHubs": "iot-",
 37 |     "devicesProvisioningServices": "provs-",
 38 |     "devicesProvisioningServicesCertificates": "pcert-",
 39 |     "documentDBDatabaseAccounts": "cosmos-",
 40 |     "eventGridDomains": "evgd-",
 41 |     "eventGridDomainsTopics": "evgt-",
 42 |     "eventGridEventSubscriptions": "evgs-",
 43 |     "eventHubNamespaces": "evhns-",
 44 |     "eventHubNamespacesEventHubs": "evh-",
 45 |     "hdInsightClustersHadoop": "hadoop-",
 46 |     "hdInsightClustersHbase": "hbase-",
 47 |     "hdInsightClustersKafka": "kafka-",
 48 |     "hdInsightClustersMl": "mls-",
 49 |     "hdInsightClustersSpark": "spark-",
 50 |     "hdInsightClustersStorm": "storm-",
 51 |     "hybridComputeMachines": "arcs-",
 52 |     "insightsActionGroups": "ag-",
 53 |     "insightsComponents": "appi-",
 54 |     "keyVaultVaults": "kv-",
 55 |     "kubernetesConnectedClusters": "arck",
 56 |     "kustoClusters": "dec",
 57 |     "kustoClustersDatabases": "dedb",
 58 |     "logicIntegrationAccounts": "ia-",
 59 |     "logicWorkflows": "logic-",
 60 |     "machineLearningServicesWorkspaces": "mlw-",
 61 |     "managedIdentityUserAssignedIdentities": "id-",
 62 |     "managementManagementGroups": "mg-",
 63 |     "migrateAssessmentProjects": "migr-",
 64 |     "networkApplicationGateways": "agw-",
 65 |     "networkApplicationSecurityGroups": "asg-",
 66 |     "networkAzureFirewalls": "afw-",
 67 |     "networkBastionHosts": "bas-",
 68 |     "networkConnections": "con-",
 69 |     "networkDnsZones": "dnsz-",
 70 |     "networkExpressRouteCircuits": "erc-",
 71 |     "networkFirewallPolicies": "afwp-",
 72 |     "networkFirewallPoliciesWebApplication": "waf",
 73 |     "networkFirewallPoliciesRuleGroups": "wafrg",
 74 |     "networkFrontDoors": "fd-",
 75 |     "networkFrontdoorWebApplicationFirewallPolicies": "fdfp-",
 76 |     "networkLoadBalancersExternal": "lbe-",
 77 |     "networkLoadBalancersInternal": "lbi-",
 78 |     "networkLoadBalancersInboundNatRules": "rule-",
 79 |     "networkLocalNetworkGateways": "lgw-",
 80 |     "networkNatGateways": "ng-",
 81 |     "networkNetworkInterfaces": "nic-",
 82 |     "networkNetworkSecurityGroups": "nsg-",
 83 |     "networkNetworkSecurityGroupsSecurityRules": "nsgsr-",
 84 |     "networkNetworkWatchers": "nw-",
 85 |     "networkPrivateDnsZones": "pdnsz-",
 86 |     "networkPrivateLinkServices": "pl-",
 87 |     "networkPublicIPAddresses": "pip-",
 88 |     "networkPublicIPPrefixes": "ippre-",
 89 |     "networkRouteFilters": "rf-",
 90 |     "networkRouteTables": "rt-",
 91 |     "networkRouteTablesRoutes": "udr-",
 92 |     "networkTrafficManagerProfiles": "traf-",
 93 |     "networkVirtualNetworkGateways": "vgw-",
 94 |     "networkVirtualNetworks": "vnet-",
 95 |     "networkVirtualNetworksSubnets": "snet-",
 96 |     "networkVirtualNetworksVirtualNetworkPeerings": "peer-",
 97 |     "networkVirtualWans": "vwan-",
 98 |     "networkVpnGateways": "vpng-",
 99 |     "networkVpnGatewaysVpnConnections": "vcn-",
100 |     "networkVpnGatewaysVpnSites": "vst-",
101 |     "notificationHubsNamespaces": "ntfns-",
102 |     "notificationHubsNamespacesNotificationHubs": "ntf-",
103 |     "operationalInsightsWorkspaces": "log-",
104 |     "portalDashboards": "dash-",
105 |     "powerBIDedicatedCapacities": "pbi-",
106 |     "purviewAccounts": "pview-",
107 |     "privateEndpoints": "pe-",
108 |     "recoveryServicesVaults": "rsv-",
109 |     "resourcesResourceGroups": "rg-",
110 |     "searchSearchServices": "srch-",
111 |     "serviceBusNamespaces": "sb-",
112 |     "serviceBusNamespacesQueues": "sbq-",
113 |     "serviceBusNamespacesTopics": "sbt-",
114 |     "serviceEndPointPolicies": "se-",
115 |     "serviceFabricClusters": "sf-",
116 |     "signalRServiceSignalR": "sigr",
117 |     "sqlManagedInstances": "sqlmi-",
118 |     "sqlServers": "sql-",
119 |     "sqlServersDataWarehouse": "sqldw-",
120 |     "sqlServersDatabases": "sqldb-",
121 |     "sqlServersDatabasesStretch": "sqlstrdb-",
122 |     "storageStorageAccounts": "st",
123 |     "storageStorageAccountsVm": "stvm",
124 |     "storSimpleManagers": "ssimp",
125 |     "streamAnalyticsCluster": "asa-",
126 |     "synapseWorkspaces": "syn",
127 |     "synapseWorkspacesAnalyticsWorkspaces": "synw",
128 |     "synapseWorkspacesSqlPoolsDedicated": "syndp",
129 |     "synapseWorkspacesSqlPoolsSpark": "synsp",
130 |     "timeSeriesInsightsEnvironments": "tsi-",
131 |     "webServerFarms": "plan-",
132 |     "webSitesAppService": "app-",
133 |     "webSitesAppServiceEnvironment": "ase-",
134 |     "webSitesFunctions": "func-",
135 |     "webStaticSites": "stapp-"
136 | }


--------------------------------------------------------------------------------
/infra/main.parameters.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
 3 |   "contentVersion": "1.0.0.0",
 4 |   "parameters": {
 5 |     "environmentName": {
 6 |       "value": "${AZURE_ENV_NAME}"
 7 |     },
 8 |     "location": {
 9 |       "value": "${AZURE_LOCATION}"
10 |     },
11 |     "entraAuth": {
12 |       "value": "${AZURE_ENTRA_AUTH}"
13 |     },
14 |     "entraTenantId": {
15 |       "value": "${AZURE_TENANT_ID}"
16 |     },
17 |     "entraClientId": {
18 |       "value": "${AZURE_CLIENT_ID}"
19 |     },
20 |     "entraAudience": {
21 |       "value": "${AZURE_AUDIENCE}"
22 |     },
23 |     "deploymentCapacity": {
24 |       "value": "${OPENAI_CAPACITY}",
25 |       "metadata": {
26 |         "description": "The OpenAI endpoints capacity (in thousands of tokens per minute)."
27 |       }
28 |     }
29 |   }
30 | }


--------------------------------------------------------------------------------
/infra/modules/ai/cognitiveservices.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | param managedIdentityName string = ''
 5 | param deployments array = []
 6 | param kind string = 'OpenAI'
 7 | param sku object = {
 8 |   name: 'S0'
 9 | }
10 | param deploymentCapacity int = 1
11 | 
12 | // Networking
13 | param publicNetworkAccess string = 'Disabled'
14 | param openAiPrivateEndpointName string
15 | param vNetName string
16 | param vNetLocation string
17 | param privateEndpointSubnetName string
18 | param openAiDnsZoneName string
19 | 
20 | // Use existing network/dns zone
21 | param dnsZoneRG string
22 | param dnsSubscriptionId string
23 | param vNetRG string
24 | resource vnet 'Microsoft.Network/virtualNetworks@2022-01-01' existing = {
25 |   name: vNetName
26 |   scope: resourceGroup(vNetRG)
27 | }
28 | 
29 | // Get existing subnet
30 | resource subnet 'Microsoft.Network/virtualNetworks/subnets@2022-01-01' existing = {
31 |   name: privateEndpointSubnetName
32 |   parent: vnet
33 | }
34 | 
35 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' existing = {
36 |   name: managedIdentityName
37 | }
38 | 
39 | resource account 'Microsoft.CognitiveServices/accounts@2023-05-01' = {
40 |   name: name
41 |   location: location
42 |   tags: union(tags, { 'azd-service-name': name })
43 |   kind: kind
44 |   identity: {
45 |     type: 'UserAssigned'
46 |     userAssignedIdentities: {
47 |       '${managedIdentity.id}': {}
48 |     }
49 |   }
50 |   properties: {
51 |     customSubDomainName: toLower(name)
52 |     publicNetworkAccess: publicNetworkAccess
53 |     networkAcls: {
54 |       defaultAction: 'Deny'
55 |       ipRules: []
56 |       virtualNetworkRules: []
57 |     }
58 |   }
59 |   sku: sku
60 | }
61 | 
62 | @batchSize(1)
63 | resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for deployment in deployments: {
64 |   parent: account
65 |   name: deployment.name
66 |   properties: {
67 |     model: deployment.model
68 |     raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null
69 |   }
70 |   sku: contains(deployment, 'sku') ? deployment.sku : {
71 |     name: 'Standard'
72 |     capacity: deploymentCapacity
73 |   }
74 | }]
75 | 
76 | module privateEndpoint '../networking/private-endpoint.bicep' = {
77 |   name: '${account.name}-privateEndpoint'
78 |   params: {
79 |     groupIds: [
80 |       'account'
81 |     ]
82 |     dnsZoneName: openAiDnsZoneName
83 |     name: openAiPrivateEndpointName
84 |     privateLinkServiceId: account.id
85 |     location: vNetLocation
86 |     privateEndpointSubnetId: subnet.id
87 |     dnsZoneRG: dnsZoneRG
88 |     dnsSubId: dnsSubscriptionId
89 |   }
90 |   dependsOn: [
91 |     deployment
92 |   ]
93 | }
94 | 
95 | output openAiName string = account.name
96 | output openAiEndpointUri string = '${account.properties.endpoint}openai/'
97 | 


--------------------------------------------------------------------------------
/infra/modules/apim/api.bicep:
--------------------------------------------------------------------------------
  1 | @description('The name of the API')
  2 | @minLength(1)
  3 | @maxLength(63)
  4 | param apiName string
  5 | 
  6 | @description('The display name of the API')
  7 | @minLength(1)
  8 | @maxLength(63)
  9 | param apiDispalyName string
 10 | 
 11 | @description('The contents of the OpenAPI definition')
 12 | @minLength(1)
 13 | param openApiSpecification string
 14 | 
 15 | @description('The XML Policy document for the API')
 16 | @minLength(1)
 17 | param policyDocument string
 18 | 
 19 | @description('The name of the API Management service to deploy the API to.')
 20 | @minLength(1)
 21 | param serviceName string
 22 | 
 23 | @description('The API description (if blank, use the name of the API)')
 24 | param apiDescription string = ''
 25 | 
 26 | @description('The relative path for the API (if different to the API name)')
 27 | param path string = ''
 28 | 
 29 | @description('The (optional) service URL')
 30 | param serviceUrl string = ''
 31 | 
 32 | @description('Set to true if a subscription is required')
 33 | param subscriptionRequired bool = true
 34 | 
 35 | @description('API Revision number. Default is 1')
 36 | param apiRevision string = '1'
 37 | 
 38 | @description('Ability to override the subscription key name. Default is Ocp-Apim-Subscription-Key')
 39 | param subscriptionKeyName string = ''
 40 | 
 41 | param enableAPIDeployment bool = true
 42 | 
 43 | // Assume the content format is JSON format if the ending is .json - otherwise, it's YAML
 44 | var contentFormat = startsWith(openApiSpecification, '{') ? 'openapi+json' : 'openapi'
 45 | 
 46 | @description('The type of the API')
 47 | @allowed([
 48 |   'http'
 49 |   'soap'
 50 |   'graphql'
 51 |   'websocket'
 52 | ])
 53 | param apiType string = 'http'
 54 | 
 55 | @description('The protocols supported by the API')
 56 | @allowed([
 57 |   'http'
 58 |   'https'
 59 |   'ws'
 60 |   'wss'
 61 | ])
 62 | param apiProtocols array = [
 63 |   'https'
 64 | ]
 65 | 
 66 | resource apimService 'Microsoft.ApiManagement/service@2022-08-01' existing = {
 67 |   name: serviceName
 68 | }
 69 | 
 70 | var isWebSotcketAPI = contains(apiProtocols, 'ws') || contains(apiProtocols, 'wss')
 71 | 
 72 | resource apiDefinition 'Microsoft.ApiManagement/service/apis@2022-08-01' = if(enableAPIDeployment && !isWebSotcketAPI) {
 73 |   name: apiName
 74 |   parent: apimService
 75 |   properties: {
 76 |     path: (path == '') ? apiName : path
 77 |     apiRevision: apiRevision
 78 |     description: (apiDescription == '') ? apiName : apiDescription
 79 |     displayName: apiDispalyName
 80 |     format: (openApiSpecification != 'NA') ? contentFormat : null
 81 |     value: (openApiSpecification != 'NA') ? openApiSpecification : null
 82 |     subscriptionRequired: subscriptionRequired
 83 |     subscriptionKeyParameterNames: {
 84 |       header: empty(subscriptionKeyName) ? 'Ocp-Apim-Subscription-Key' : subscriptionKeyName
 85 |     }
 86 |     type: apiType
 87 |     protocols: apiProtocols
 88 |     serviceUrl: (serviceUrl == '') ? 'https://to-be-replaced-by-policy' : serviceUrl
 89 |   }
 90 | }
 91 | 
 92 | resource apiDefinitionWebSocket 'Microsoft.ApiManagement/service/apis@2022-08-01' = if(enableAPIDeployment && isWebSotcketAPI) {
 93 |   name: apiName
 94 |   parent: apimService
 95 |   properties: {
 96 |     path: (path == '') ? apiName : path
 97 |     apiRevision: apiRevision
 98 |     description: (apiDescription == '') ? apiName : apiDescription
 99 |     displayName: apiDispalyName
100 |     subscriptionRequired: subscriptionRequired
101 |     subscriptionKeyParameterNames: {
102 |       header: empty(subscriptionKeyName) ? 'Ocp-Apim-Subscription-Key' : subscriptionKeyName
103 |     }
104 |     type: apiType
105 |     protocols: apiProtocols
106 |     serviceUrl: (serviceUrl == '') ? 'https://to-be-replaced-by-policy' : serviceUrl
107 |   }
108 | }
109 | 
110 | resource apiPolicy 'Microsoft.ApiManagement/service/apis/policies@2022-08-01' = if(enableAPIDeployment && policyDocument != 'NA') {
111 |   name: 'policy'
112 |   parent: apiDefinition
113 |   properties: {
114 |     format: 'rawxml'
115 |     value: policyDocument
116 |   }
117 | }
118 | 
119 | output id string = (enableAPIDeployment) ? apiDefinition.id : ''
120 | output path string = (enableAPIDeployment) ? apiDefinition.properties.path : ''
121 | 


--------------------------------------------------------------------------------
/infra/modules/apim/openai-api/oai-realtime-api-ws.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "swagger": "2.0",
 3 |     "info": {
 4 |         "title": "OAI-Realtime-API",
 5 |         "version": "1.0"
 6 |     },
 7 |     "host": "apim-d6vjlv67krcvy.azure-api.net",
 8 |     "basePath": "/openai/realtime",
 9 |     "schemes": [
10 |         "ws",
11 |         "wss"
12 |     ],
13 |     "securityDefinitions": {
14 |         "apiKeyHeader": {
15 |             "type": "apiKey",
16 |             "name": "api-key",
17 |             "in": "header"
18 |         },
19 |         "apiKeyQuery": {
20 |             "type": "apiKey",
21 |             "name": "api-key",
22 |             "in": "query"
23 |         }
24 |     },
25 |     "security": [
26 |         {
27 |             "apiKeyHeader": []
28 |         },
29 |         {
30 |             "apiKeyQuery": []
31 |         }
32 |     ],
33 |     "paths": {
34 |         "": {
35 |             "get": {
36 |                 "description": "WebSocket opening handshake",
37 |                 "operationId": "onHandshake",
38 |                 "summary": "onHandshake",
39 |                 "responses": {
40 |                     "200": {
41 |                         "description": null
42 |                     }
43 |                 }
44 |             }
45 |         }
46 |     },
47 |     "tags": []
48 | }


--------------------------------------------------------------------------------
/infra/modules/apim/policies/ai-model-inference-api-policy.xml:
--------------------------------------------------------------------------------
 1 | <policies>
 2 |     <inbound>
 3 |         <base />
 4 |         <set-header name="api-key" exists-action="delete" />
 5 |         <authentication-managed-identity resource="https://cognitiveservices.azure.com" output-token-variable-name="msi-access-token" client-id="{{uami-client-id}}" ignore-error="false" />
 6 |         <set-header name="Authorization" exists-action="override">
 7 |             <value>@("Bearer " + (string)context.Variables["msi-access-token"])</value>
 8 |         </set-header>
 9 |     </inbound>
10 |     <backend>
11 |         <base />
12 |     </backend>
13 |     <outbound>
14 |         <base />
15 |     </outbound>
16 |     <on-error>
17 |         <base />
18 |     </on-error>
19 | </policies>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/ai-search-api-policy.xml:
--------------------------------------------------------------------------------
 1 | <policies>
 2 |     <inbound>
 3 |         <base />
 4 |         <!-- AAD Authorization -->
 5 |         <!-- Enabled if entra-validate named value is set to true -->
 6 |         <include-fragment fragment-id="aad-auth" />
 7 |         <set-header name="api-key" exists-action="delete" />
 8 |         <!-- Setting cache keys -->
 9 |         <set-variable name="deployment-id" value="@((string)context.Request.MatchedParameters["index-name"])" />
10 |         <set-variable name="routesCacheKey" value="@((string)context.Variables["deployment-id"] + "Routes" + context.Deployment.Region + context.Api.Revision)" />
11 |         <set-variable name="oaClustersCacheKey" value="@("aiSearchInstance" + context.Deployment.Region + context.Api.Revision)" />
12 |         <cache-lookup-value key="@((string)context.Variables.GetValueOrDefault<string>("oaClustersCacheKey", "ALL-SEARCH"))" variable-name="oaClusters" />
13 |         <!-- If we can't find the configuration cached, it will be loaded -->
14 |         <choose>
15 |             <when condition="@(context.Variables.ContainsKey("oaClusters") == false)">
16 |                 <set-variable name="oaClusters" value="@{
17 |                         // route is an Azure AI Search endpoint
18 |                         JArray routes = new JArray();
19 |                         // cluster is a group of routes that are capable of serving a specific index name
20 |                         JArray clusters = new JArray();
21 |                         // Update the below if condition when using multiple APIM gateway regions/SHGW to get different configurations for each region
22 |                         if(context.Deployment.Region == "West Europe" || true)
23 |                         {
24 |                             // Adding all Azure AI Search endpoints routes (which are set as APIM Backend)
25 |                             routes.Add(new JObject()
26 |                             {
27 |                                 { "name", "Contoso Insurance KB" },
28 |                                 { "location", "Switzerland North" },
29 |                                 { "backend-id", "ai-search-swn" },
30 |                                 { "priority", 1},
31 |                                 { "isThrottling", false }, 
32 |                                 { "retryAfter", DateTime.MinValue } 
33 |                             });
34 | 
35 |                             routes.Add(new JObject()
36 |                             {
37 |                                 { "name", "Northwind Outdoor KB" },
38 |                                 { "location", "Sweden Central" },
39 |                                 { "backend-id", "ai-search-sec" },
40 |                                 { "priority", 1},
41 |                                 { "isThrottling", false }, 
42 |                                 { "retryAfter", DateTime.MinValue } 
43 |                             });
44 | 
45 |                             // For each index, create a cluster with the routes that can serve it
46 |                             clusters.Add(new JObject()
47 |                             {
48 |                                 { "deploymentName", "gptkbindex" },
49 |                                 { "routes", new JArray(routes[0]) }
50 |                             });
51 | 
52 |                             clusters.Add(new JObject()
53 |                             {
54 |                                 { "deploymentName", "product-info" },
55 |                                 { "routes", new JArray(routes[1]) }
56 |                             });
57 |                         }
58 |                         else
59 |                         {
60 |                             //No clusters found for selected region, either return error (default behavior) or set default cluster in the else section
61 |                         }
62 |                         
63 |                         return clusters;   
64 |                     }" />
65 |                 <!-- Add cluster configurations to cache -->
66 |                 <cache-store-value key="@((string)context.Variables.GetValueOrDefault<string>("oaClustersCacheKey", "ALL-SEARCH"))" value="@((JArray)context.Variables["oaClusters"])" duration="86400" />
67 |             </when>
68 |         </choose>
69 |         <include-fragment fragment-id="validate-routes" />
70 |         <authentication-managed-identity resource="https://search.azure.com" output-token-variable-name="msi-access-token" client-id="{{uami-client-id}}" ignore-error="false" />
71 |         <set-header name="Authorization" exists-action="override">
72 |             <value>@("Bearer " + (string)context.Variables["msi-access-token"])</value>
73 |         </set-header>
74 |     </inbound>
75 |     <backend>
76 |         <include-fragment fragment-id="backend-routing" />
77 |     </backend>
78 |     <outbound>
79 |         <base />
80 |         <set-variable name="idPrefix" value="search" />
81 |         <set-variable name="targetService" value="AI-Search" />
82 |         <set-variable name="model" value="@((string)context.Variables.GetValueOrDefault<string>("deployment-id", "AI-SEARCH-INDEX"))" />
83 |         <set-variable name="deploymentName" value="@((string)context.Variables.GetValueOrDefault<string>("deployment-id", "AI-SEARCH-SERVICE"))" />
84 |         <include-fragment fragment-id="ai-usage" />
85 |     </outbound>
86 |     <on-error>
87 |         <base />
88 |         <set-variable name="service-name" value="Azure AI Search" />
89 |         <set-variable name="target-deployment" value="@((string)context.Request.MatchedParameters["index-name"])" />
90 |         <include-fragment fragment-id="throttling-events" />
91 |     </on-error>
92 | </policies>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/frag-aad-auth.xml:
--------------------------------------------------------------------------------
 1 | <fragment>
 2 |     <!-- AAD Authorization -->
 3 |     <!-- Enable AAD is named value set to true -->
 4 |     <set-variable name="entra-validate" value="{{entra-auth}}" />
 5 |     <choose>
 6 |         <when condition="@(context.Variables.GetValueOrDefault("entra-auth") == "true")">
 7 |             <validate-jwt header-name="Authorization" failed-validation-httpcode="401"
 8 |                 failed-validation-error-message="Unauthorized" require-expiration-time="true"
 9 |                 require-scheme="Bearer" require-signed-tokens="true">
10 |                 <openid-config
11 |                     url="https://login.microsoftonline.com/{{tenant-id}}/v2.0/.well-known/openid-configuration" />
12 |                 <audiences>
13 |                     <audience>{{audience}}</audience>
14 |                 </audiences>
15 |                 <issuers>
16 |                     <issuer>https://sts.windows.net/{{tenant-id}}/</issuer>
17 |                 </issuers>
18 |                 <required-claims>
19 |                     <claim name="appid">
20 |                         <value>{{client-id}}</value>
21 |                     </claim>
22 |                 </required-claims>
23 |             </validate-jwt>
24 |         </when>
25 |     </choose>
26 | </fragment>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/frag-ai-usage.xml:
--------------------------------------------------------------------------------
 1 | <fragment>
 2 |     <!-- Log OpenAI usage to EventHub -->
 3 |     <choose>
 4 |         <when condition="@(context.Response.StatusCode == 200)">
 5 |             <log-to-eventhub logger-id="usage-eventhub-logger">@{
 6 |             return new JObject(
 7 |                 new JProperty("id", (string)context.Variables.GetValueOrDefault<string>("idPrefix", "ai") + "-" + Guid.NewGuid().ToString()),
 8 |                 new JProperty("timestamp", DateTime.UtcNow.ToString()),
 9 |                 new JProperty("appId", context.Request.Headers.GetValueOrDefault("Authorization",string.Empty).Split(' ').LastOrDefault()?.AsJwt()?.Claims.GetValueOrDefault("appid", "NA")),
10 |                 new JProperty("subscriptionId", context.Subscription?.Id?.ToString() ?? "Portal-Admin"),
11 |                 new JProperty("productName", context.Product?.Name?.ToString() ?? "Portal-Admin"),
12 |                 new JProperty("targetService", (string)context.Variables.GetValueOrDefault<string>("targetService", "NA")),
13 |                 new JProperty("model", (string)context.Variables.GetValueOrDefault<string>("model", "NA")),
14 |                 new JProperty("gatewayName", context.Deployment?.ServiceName ?? "NA"),
15 |                 new JProperty("gatewayRegion", context.Deployment?.Region ?? "NA"),
16 |                 new JProperty("aiGatewayId", context.Deployment?.Gateway?.Id ?? "NA"),
17 |                 new JProperty("RequestIp", context.Request?.IpAddress ?? "NA"),
18 |                 new JProperty("operationName", context.Operation?.Name ?? "NA"),
19 |                 new JProperty("sessionId", (string)context.Variables.GetValueOrDefault<string>("sessionId", "NA")),
20 |                 new JProperty("endUserId", (string)context.Variables.GetValueOrDefault<string>("endUserId", "NA")),
21 |                 new JProperty("backendId", (string)context.Variables.GetValueOrDefault<string>("backendId", "NA")),
22 |                 new JProperty("routeLocation", (string)context.Variables.GetValueOrDefault<string>("routeLocation", "NA")),
23 |                 new JProperty("routeName", (string)context.Variables.GetValueOrDefault<string>("routeName", "NA")),
24 |                 new JProperty("deploymentName", (string)context.Variables.GetValueOrDefault<string>("deploymentName", "NA")),
25 |                 new JProperty("promptTokens", (int)context.Variables.GetValueOrDefault<int>("promptTokens", 1)),
26 |                 new JProperty("responseTokens", (int)context.Variables.GetValueOrDefault<int>("responseTokens", 0)),
27 |                 new JProperty("totalTokens", (int)context.Variables.GetValueOrDefault<int>("totalTokens", 1))
28 |         ).ToString();
29 |         }</log-to-eventhub>
30 |         </when>
31 |     </choose>
32 | </fragment>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/frag-backend-routing.xml:
--------------------------------------------------------------------------------
  1 | <fragment>
  2 |     <retry condition="@(context.Response != null && (context.Response.StatusCode == 429 || context.Response.StatusCode >= 500) && ((Int32)context.Variables["remainingRoutes"]) > 0)" count="3" interval="0">
  3 |         <!-- Before picking the route, let's verify if there is any that should be set to not throttling anymore -->
  4 |         <set-variable name="routes" value="@{
  5 |                 JArray routes = (JArray)context.Variables["routes"];
  6 |                 
  7 |                 for (int i = 0; i < routes.Count; i++)
  8 |                 {
  9 |                     JObject route = (JObject)routes[i];
 10 | 
 11 |                     if (route.Value<bool>("isThrottling") && DateTime.Now >= route.Value<DateTime>("retryAfter"))
 12 |                     {
 13 |                         route["isThrottling"] = false;
 14 |                         route["retryAfter"] = DateTime.MinValue;
 15 |                     }
 16 |                 }
 17 | 
 18 |                 return routes; 
 19 |             }" />
 20 |         <cache-store-value key="@((string)context.Variables.GetValueOrDefault<string>("routesCacheKey", "ALL-ROUTES"))" value="@((JArray)context.Variables["routes"])" duration="86400" />
 21 |         <!-- This is the main logic to pick the route to be used -->
 22 |         <set-variable name="routeIndex" value="@{
 23 |                 JArray routes = (JArray)context.Variables["routes"];
 24 |                 int selectedPriority = Int32.MaxValue;
 25 |                 List<int> availableRoutesIndexes = new List<int>();
 26 | 
 27 |                 for (int i = 0; i < routes.Count; i++)
 28 |                 {
 29 |                     JObject route = (JObject)routes[i];
 30 | 
 31 |                     if (!route.Value<bool>("isThrottling"))
 32 |                     {
 33 |                         int routePriority = route.Value<int>("priority");
 34 | 
 35 |                         if (routePriority < selectedPriority)
 36 |                         {
 37 |                             selectedPriority = routePriority;
 38 |                             availableRoutesIndexes.Clear();
 39 |                             availableRoutesIndexes.Add(i);
 40 |                         } 
 41 |                         else if (routePriority == selectedPriority)
 42 |                         {
 43 |                             availableRoutesIndexes.Add(i);
 44 |                         }
 45 |                     }
 46 |                 }
 47 | 
 48 |                 if (availableRoutesIndexes.Count == 1)
 49 |                 {
 50 |                     return availableRoutesIndexes[0];
 51 |                 }
 52 |             
 53 |                 if (availableRoutesIndexes.Count > 0)
 54 |                 {
 55 |                     //Returns a random route from the list if we have more than one available with the same priority
 56 |                     return availableRoutesIndexes[new Random().Next(0, availableRoutesIndexes.Count)];
 57 |                 }
 58 |                 else
 59 |                 {
 60 |                     //If there are no available routes, the request will be sent to the first one
 61 |                     return 0;    
 62 |                 }
 63 |                 }" />
 64 |         <set-variable name="backendId" value="@(((JObject)((JArray)context.Variables["routes"])[(Int32)context.Variables["routeIndex"]]).Value<string>("backend-id"))" />
 65 |         <set-variable name="routeLocation" value="@(((JObject)((JArray)context.Variables["routes"])[(Int32)context.Variables["routeIndex"]]).Value<string>("location"))" />
 66 |         <set-variable name="routeName" value="@(((JObject)((JArray)context.Variables["routes"])[(Int32)context.Variables["routeIndex"]]).Value<string>("name"))" />
 67 |         <set-variable name="deploymentName" value="@((string)context.Variables["deployment-id"])" />
 68 |         <set-backend-service backend-id="@((string)context.Variables["backendId"])" />
 69 |         <choose>
 70 |             <when condition="@(context.Variables.GetValueOrDefault<string>("isStream", "false") == "true")">
 71 |                 <forward-request buffer-request-body="false" />
 72 |             </when>
 73 |             <otherwise>
 74 |                 <forward-request buffer-request-body="true" />
 75 |             </otherwise>
 76 |         </choose>
 77 |         <choose>
 78 |             <!-- In case we got 429 or 5xx from a route, update the list with its status -->
 79 |             <when condition="@(context.Response != null && (context.Response.StatusCode == 429 || context.Response.StatusCode >= 500) )">
 80 |                 <cache-lookup-value key="@(context.Request.MatchedParameters["deployment-id"] + "Routes" + context.Deployment.Region + context.Api.Revision)" variable-name="routes" />
 81 |                 <set-variable name="routes" value="@{
 82 |                         JArray routes = (JArray)context.Variables["routes"];
 83 |                         int currentrouteIndex = context.Variables.GetValueOrDefault<int>("routeIndex");
 84 |                         int retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("Retry-After", "-1"));
 85 | 
 86 |                         if (retryAfter == -1)
 87 |                         {
 88 |                             retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("x-ratelimit-reset-requests", "-1"));
 89 |                         }
 90 | 
 91 |                         if (retryAfter == -1)
 92 |                         {
 93 |                             retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("x-ratelimit-reset-tokens", "10"));
 94 |                         }
 95 | 
 96 |                         JObject route = (JObject)routes[currentrouteIndex];
 97 |                         route["isThrottling"] = true;
 98 |                         route["retryAfter"] = DateTime.Now.AddSeconds(retryAfter);
 99 | 
100 |                         return routes;      
101 |                     }" />
102 |                 <cache-store-value key="@((string)context.Variables.GetValueOrDefault<string>("routesCacheKey", "ALL-ROUTES"))" value="@((JArray)context.Variables["routes"])" duration="86400" />
103 |                 <set-variable name="remainingRoutes" value="@{
104 |                         JArray routes = (JArray)context.Variables["routes"];
105 | 
106 |                         int remainingRoutes = 0;
107 | 
108 |                         for (int i = 0; i < routes.Count; i++)
109 |                         {
110 |                             JObject route = (JObject)routes[i];
111 | 
112 |                             if (!route.Value<bool>("isThrottling"))
113 |                             {
114 |                                 remainingRoutes++;
115 |                             }
116 |                         }
117 | 
118 |                         return remainingRoutes;
119 |                     }" />
120 |             </when>
121 |         </choose>
122 |     </retry>
123 | </fragment>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/frag-dynamic-throttling-assignment.xml:
--------------------------------------------------------------------------------
 1 | <fragment>
 2 |     <set-variable name="routes" value="@{
 3 |                 JArray routes = (JArray)context.Variables["routes"];
 4 |                 JArray outputRoutes = new JArray();
 5 |                 string activeBackendId = (string)context.Variables["backendId"];
 6 |                 string targetDeployment = (string)context.Variables["deployment-id"];
 7 |                 for (int i = 0; i < routes.Count; i++)
 8 |                 {
 9 |                     JObject route = (JObject)routes[i];
10 |                     int targetTPMLimit = route.Value<int?>("targetTPMLimit") ?? -1;
11 |                     int consumedTPM = route.Value<int?>("consumedTPM") ?? 0;
12 | 
13 |                     if (targetTPMLimit == -1)
14 |                     {
15 |                         outputRoutes.Add(route);
16 |                         continue;
17 |                     }
18 | 
19 |                     string consumedTokensKey = route.Value<string>("backend-id") + "-" + targetDeployment + "-ConsumedTokens";
20 |                     string remainingTokensKey = route.Value<string>("backend-id") + "-" + targetDeployment + "-RemainingTokens";
21 |                     
22 |                     if (context.Variables.ContainsKey(consumedTokensKey) && route["backend-id"].ToString() == activeBackendId)
23 |                     {
24 |                         int requestConsumedTokens = (int)context.Variables[consumedTokensKey];
25 |                         int remainingTokens = (int)context.Variables[remainingTokensKey];
26 |                         
27 |                         // Calcualting the total consumed tokens so far
28 |                         int consumedTokens = 1000000 - remainingTokens - requestConsumedTokens;
29 | 
30 |                         double consumedPercentage = (double)consumedTokens / (double)targetTPMLimit;
31 |                         
32 |                         route["consumedPercentage"] = consumedPercentage;
33 | 
34 |                         if (consumedPercentage > 0.8)
35 |                         {
36 |                             if((bool)route["isThrottling"] == false)
37 |                             {
38 |                                 route["isThrottling"] = true;
39 |                                 route["retryAfter"] = DateTime.Now.AddSeconds(30);
40 |                             }
41 |                         }
42 |                         else
43 |                         {
44 |                             route["isThrottling"] = false;
45 |                             route["retryAfter"] = DateTime.MinValue;
46 |                         }
47 |                     }
48 |                     
49 |                     outputRoutes.Add(route);
50 |                 }
51 | 
52 |                 return outputRoutes; 
53 |             }" />
54 |     <cache-store-value key="@((string)context.Variables.GetValueOrDefault<string>("routesCacheKey", "ALL-ROUTES"))" value="@((JArray)context.Variables["routes"])" duration="86400" />
55 | </fragment>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/frag-openai-usage-streaming.xml:
--------------------------------------------------------------------------------
 1 | <fragment>
 2 | 	<!-- Usage logs for "streaming" requests only -->
 3 | 	<choose>
 4 | 		<!-- Usage logs for streaming requests -->
 5 | 		<when condition="@(context.Variables.GetValueOrDefault<string>("isStream","false").Equals("true", StringComparison.OrdinalIgnoreCase))">
 6 | 			<set-variable name="potentialRouteIndex" value="@{
 7 |                 JArray routes = (JArray)context.Variables["routes"];
 8 |                 int selectedPriority = Int32.MaxValue;
 9 |                 List<int> availableRoutesIndexes = new List<int>();
10 | 
11 |                 for (int i = 0; i < routes.Count; i++)
12 |                 {
13 |                     JObject route = (JObject)routes[i];
14 | 
15 |                     if (!route.Value<bool>("isThrottling"))
16 |                     {
17 |                         int routePriority = route.Value<int>("priority");
18 | 
19 |                         if (routePriority < selectedPriority)
20 |                         {
21 |                             selectedPriority = routePriority;
22 |                             availableRoutesIndexes.Clear();
23 |                             availableRoutesIndexes.Add(i);
24 |                         } 
25 |                         else if (routePriority == selectedPriority)
26 |                         {
27 |                             availableRoutesIndexes.Add(i);
28 |                         }
29 |                     }
30 |                 }
31 | 
32 |                 if (availableRoutesIndexes.Count == 1)
33 |                 {
34 |                     return availableRoutesIndexes[0];
35 |                 }
36 |             
37 |                 if (availableRoutesIndexes.Count > 0)
38 |                 {
39 |                     //Returns a random route from the list if we have more than one available with the same priority
40 |                     return availableRoutesIndexes[new Random().Next(0, availableRoutesIndexes.Count)];
41 |                 }
42 |                 else
43 |                 {
44 |                     //If there are no available routes, the request will be sent to the first one
45 |                     return 0;    
46 |                 }
47 |                 }" />
48 | 			<set-variable name="backendId" value="@(((JObject)((JArray)context.Variables["routes"])[(Int32)context.Variables["potentialRouteIndex"]]).Value<string>("backend-id"))" />
49 | 			<set-variable name="routeLocation" value="@(((JObject)((JArray)context.Variables["routes"])[(Int32)context.Variables["potentialRouteIndex"]]).Value<string>("location"))" />
50 | 			<set-variable name="routeName" value="@(((JObject)((JArray)context.Variables["routes"])[(Int32)context.Variables["potentialRouteIndex"]]).Value<string>("name"))" />
51 | 			<set-variable name="deploymentName" value="@((string)context.Variables["deployment-id"])" />
52 | 			<azure-openai-emit-token-metric namespace="ai-streaming">
53 | 				<dimension name="SubscriptionId" value="@(context.Subscription.Id)" />
54 | 				<dimension name="productName" value="@(context.Product?.Name?.ToString() ?? "Portal-Admin")" />
55 | 				<dimension name="gatewayName" value="@(context.Deployment?.ServiceName ?? "NA")" />
56 | 				<dimension name="routeName" value="@((string)context.Variables.GetValueOrDefault<string>("routeName", "Streaming"))" />
57 | 				<dimension name="deploymentName" value="@((string)context.Variables.GetValueOrDefault<string>("deploymentName", "Streaming"))" />
58 | 				<dimension name="operationName" value="@(context.Operation?.Id ?? "Streaming")" />
59 | 				<dimension name="backendId" value="@((string)context.Variables.GetValueOrDefault<string>("backendId", "DefaultStreaming"))" />
60 | 				<dimension name="routeLocation" value="@((string)context.Variables.GetValueOrDefault<string>("routeLocation", "DefaultStreaming"))" />
61 | 			</azure-openai-emit-token-metric>
62 | 		</when>
63 | 	</choose>
64 | </fragment>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/frag-openai-usage.xml:
--------------------------------------------------------------------------------
 1 | <fragment>
 2 | 	<!-- Usage logs for non-streaming requests only -->
 3 | 	<choose>
 4 | 		<when condition="@(context.Variables.GetValueOrDefault<string>("isStream","false").Equals("false", StringComparison.OrdinalIgnoreCase))">
 5 | 			<!-- Set the response body as a variable to be used in various policies -->
 6 | 			<set-variable name="responseBody" value="@(context.Response.Body.As<JObject>())" />
 7 | 			<!-- Log OpenAI usage to EventHub -->
 8 | 			<choose>
 9 | 				<when condition="@(context.Response.StatusCode == 200)">
10 | 					<log-to-eventhub logger-id="usage-eventhub-logger">@{
11 |                     //Avoid reading response body as it can only be ready once, instead, before calling this fragement, a variable call responseBody will be set in the outbound policy
12 |                     //var responseBody = context.Response.Body?.As<JObject>(true); //Avoid this one
13 |                     var responseBody = (JObject)context.Variables["responseBody"]; //It is set in the outbound policy before calling the fragment
14 |                     return new JObject(
15 |                         new JProperty("id", responseBody?["id"]?.ToString() ?? Guid.NewGuid().ToString()),
16 |                         new JProperty("timestamp", DateTime.UtcNow.ToString()),
17 |                         new JProperty("appId", context.Request.Headers.GetValueOrDefault("Authorization",string.Empty).Split(' ').LastOrDefault()?.AsJwt()?.Claims.GetValueOrDefault("appid", "NA")),
18 |                         new JProperty("subscriptionId", context.Subscription?.Id?.ToString() ?? "Portal-Admin"),
19 |                         new JProperty("productName", context.Product?.Name?.ToString() ?? "Portal-Admin"),
20 |                         new JProperty("targetService", responseBody?["object"]?.ToString() ?? "NA"),
21 |                         new JProperty("model", responseBody?["model"]?.ToString() ?? "NA"),
22 |                         new JProperty("gatewayName", context.Deployment?.ServiceName ?? "NA"),
23 |                         new JProperty("gatewayRegion", context.Deployment?.Region ?? "NA"),
24 |                         new JProperty("aiGatewayId", context.Deployment?.Gateway?.Id ?? "NA"),
25 |                         new JProperty("RequestIp", context.Request?.IpAddress ?? "NA"),
26 |                         new JProperty("operationName", context.Operation?.Name ?? "NA"),
27 |                         new JProperty("sessionId", (string)context.Variables.GetValueOrDefault<string>("sessionId", "NA")),
28 |                         new JProperty("endUserId", (string)context.Variables.GetValueOrDefault<string>("endUserId", "NA")),
29 |                         new JProperty("backendId", (string)context.Variables.GetValueOrDefault<string>("backendId", "NA")),
30 |                         new JProperty("routeLocation", (string)context.Variables.GetValueOrDefault<string>("routeLocation", "NA")),
31 |                         new JProperty("routeName", (string)context.Variables.GetValueOrDefault<string>("routeName", "NA")),
32 |                         new JProperty("deploymentName", (string)context.Variables.GetValueOrDefault<string>("deploymentName", "NA")),
33 |                         new JProperty("promptTokens", responseBody?["usage"]?["prompt_tokens"]?.Value<int>() ?? 1),
34 |                         new JProperty("responseTokens", responseBody?["usage"]?["completion_tokens"]?.Value<int>() ?? 0),
35 |                         new JProperty("totalTokens", responseBody?["usage"]?["total_tokens"]?.Value<int>() ?? 1)
36 |                 ).ToString();
37 |                 }</log-to-eventhub>
38 | 				</when>
39 | 			</choose>
40 | 			<set-body>@{
41 |                     return ((JObject)context.Variables["responseBody"]).ToString();
42 |             }</set-body>
43 | 		</when>
44 | 	</choose>
45 | </fragment>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/frag-throttling-events.xml:
--------------------------------------------------------------------------------
 1 | <fragment>
 2 |     <choose>
 3 |         <when condition="@(context.Response.StatusCode == 429)">
 4 |             <emit-metric name="AI Throttling" value="1" namespace="throttling-events">
 5 |                 <dimension name="API ID" />
 6 |                 <dimension name="Operation ID" />
 7 |                 <!-- <dimension name="Subscription ID" /> -->
 8 |                 <dimension name="Location" />
 9 |                 <dimension name="Product Name" value="@(context.Product?.Name?.ToString() ?? "Portal-Admin")" />
10 |                 <dimension name="Deployment Name" value="@((string)context.Variables["target-deployment"])" />
11 |                 <dimension name="Service Name" value="@((string)context.Variables["service-name"] ?? "NA")" />
12 |             </emit-metric>
13 |         </when>
14 |     </choose>
15 | </fragment>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/frag-validate-routes.xml:
--------------------------------------------------------------------------------
 1 | <fragment>
 2 |     <!-- Getting OpenAI routes configuration based on deployment name, region and api revision -->
 3 |     <cache-lookup-value key="@((string)context.Variables.GetValueOrDefault<string>("routesCacheKey", "ALL-ROUTES"))" variable-name="routes" />
 4 |     <!-- If we can't find the configuration, it will be loaded -->
 5 |     <choose>
 6 |         <when condition="@(context.Variables.ContainsKey("routes") == false)">
 7 |             <set-variable name="routes" value="@{
 8 |                     string deploymentName = (string)context.Variables["deployment-id"];
 9 |                     JArray clusters = (JArray)context.Variables["oaClusters"];
10 |                     JObject cluster = (JObject)clusters.FirstOrDefault(o => o["deploymentName"]?.Value<string>() == deploymentName);
11 |                     if(cluster == null)
12 |                     {
13 |                         //Error: No cluster matched the requested deployment name
14 |                         return new JArray() { new JObject()
15 |                             {
16 |                                 { "name", deploymentName },
17 |                                 { "location", "NA" },
18 |                                 { "url", "No routes found for the deployment (" + deploymentName + ") in the region (" + context.Deployment.Region + ")" }
19 |                             }
20 |                         };
21 |                     }
22 |                     JArray routes = (JArray)cluster["routes"];
23 |                     return routes;
24 |                 }" />
25 |             <!-- If no routes found for deployment, return bad request with content of routes variable -->
26 |             <choose>
27 |                 <when condition="@(((JArray)context.Variables["routes"]).ToString().Contains("No routes"))">
28 |                     <return-response>
29 |                         <set-status code="400" reason="No routes" />
30 |                         <set-body>@(((JArray)context.Variables["routes"]).ToString())</set-body>
31 |                     </return-response>
32 |                 </when>
33 |             </choose>
34 |             <!-- Add cluster configurations to cache -->
35 |             <cache-store-value key="@((string)context.Variables.GetValueOrDefault<string>("routesCacheKey", "ALL-ROUTES"))" value="@((JArray)context.Variables["routes"])" duration="86400" />
36 |         </when>
37 |     </choose>
38 |     <set-variable name="routeIndex" value="-1" />
39 |     <set-variable name="remainingRoutes" value="1" />
40 | </fragment>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/hr_product_policy.xml:
--------------------------------------------------------------------------------
 1 | <policies>
 2 |     <inbound>
 3 |         <base />
 4 |         <!-- Capacity management: allow only assigned tpm for each Retail use case subscritpion -->
 5 |         <set-variable name="target-deployment" value="@((string)context.Request.MatchedParameters["deployment-id"])" />
 6 |         <choose>
 7 |             <when condition="@((string)context.Variables["target-deployment"] == "gpt-4o")">
 8 |                 <azure-openai-token-limit counter-key="@(context.Subscription.Id + "-" + context.Variables["target-deployment"])" tokens-per-minute="10000" estimate-prompt-tokens="false" tokens-consumed-header-name="consumed-tokens" remaining-tokens-header-name="remaining-tokens" retry-after-header-name="retry-after" />
 9 |             </when>
10 |             <when condition="@((string)context.Variables["target-deployment"] == "chat")">
11 |                 <azure-openai-token-limit counter-key="@(context.Subscription.Id + "-" + context.Variables["target-deployment"])" tokens-per-minute="2000" estimate-prompt-tokens="false" tokens-consumed-header-name="consumed-tokens" remaining-tokens-header-name="remaining-tokens" retry-after-header-name="retry-after" />
12 |             </when>
13 |             <otherwise>
14 |                 <azure-openai-token-limit counter-key="@(context.Subscription.Id + "-default")" tokens-per-minute="1000" estimate-prompt-tokens="false" tokens-consumed-header-name="consumed-tokens" remaining-tokens-header-name="remaining-tokens" retry-after-header-name="retry-after" />
15 |             </otherwise>
16 |         </choose>
17 |         <!-- <azure-openai-token-limit counter-key="@(context.Subscription.Id)" 
18 |             tokens-per-minute="5000" 
19 |             estimate-prompt-tokens="true" 
20 |             tokens-consumed-header-name="consumed-tokens" 
21 |             remaining-tokens-header-name="remaining-tokens" 
22 |             retry-after-header-name="retry-after" /> -->
23 |     </inbound>
24 |     <backend>
25 |         <base />
26 |     </backend>
27 |     <outbound>
28 |         <base />
29 |     </outbound>
30 |     <on-error>
31 |         <base />
32 |     </on-error>
33 | </policies>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/openai-realtime-policy.xml:
--------------------------------------------------------------------------------
 1 | <policies>
 2 |     <inbound>
 3 |         <base />
 4 |         <!-- AAD Authorization -->
 5 |         <!-- Enabled if entra-validate named value is set to true -->
 6 |         <include-fragment fragment-id="aad-auth" />
 7 |         
 8 |         <!-- Deleting api-key header to it is not passed to OpenAI endpoint-->
 9 |         <set-header name="api-key" exists-action="delete" />
10 |         
11 |         <!-- Backend Managed Identity -->
12 |         <authentication-managed-identity resource="https://cognitiveservices.azure.com" output-token-variable-name="msi-access-token" client-id="{{uami-client-id}}" ignore-error="false" />
13 |         <set-header name="Authorization" exists-action="override">
14 |             <value>@("Bearer " + (string)context.Variables["msi-access-token"])</value>
15 |         </set-header>
16 |         
17 |         <!-- Set the backend service URL based on the product name -->
18 |         <choose>
19 |             <when condition="@(context.Product.Name == "REPLACE1")">
20 |                 <set-backend-service base-url="wss://REPLACE.openai.azure.com/openai/realtime://backend-for-product-a.com" />
21 |             </when>
22 |             <when condition="@(context.Product.Name == "REPLACE2")">
23 |                 <set-backend-service base-url="wss://REPLACE.openai.azure.com/openai/realtime" />
24 |             </when>
25 |             <otherwise>
26 |                 <set-backend-service base-url="wss://REPLACE.openai.azure.com/openai/realtime" />
27 |             </otherwise>
28 |         </choose>
29 |     </inbound>
30 |     <!-- Control if and how the requests are forwarded to services  -->
31 |     <backend>
32 |         <base />
33 |         <!--<include-fragment fragment-id="backend-routing" />-->
34 |     </backend>
35 |     <!-- Customize the responses -->
36 |     <outbound>
37 |         <base />
38 |     </outbound>
39 |     <!-- Handle exceptions and customize error responses  -->
40 |     <on-error>
41 |         <base />
42 |     </on-error>
43 | </policies>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/openai_api_policy.xml:
--------------------------------------------------------------------------------
  1 | <policies>
  2 |     <inbound>
  3 |         <base />
  4 |         <!-- AAD Authorization -->
  5 |         <!-- Enabled if entra-validate named value is set to true -->
  6 |         <include-fragment fragment-id="aad-auth" />
  7 |         
  8 |         <!-- Detecting streaming request to adjust token calculations -->
  9 |         <choose>
 10 |             <when condition="@(context.Request.Body.As<JObject>(true)["stream"] != null && context.Request.Body.As<JObject>(true)["stream"].Type != JTokenType.Null)">
 11 |                 <set-variable name="isStream" value="@{
 12 |                 var content = (context.Request.Body?.As<JObject>(true));
 13 |                 string streamValue = content["stream"].ToString().ToLower();
 14 |                 return streamValue;
 15 |             }" />
 16 |             </when>
 17 |         </choose>
 18 |         
 19 |         <!-- Deleting api-key header to it is not passed to OpenAI endpoint-->
 20 |         <set-header name="api-key" exists-action="delete" />
 21 |         
 22 |         <!-- Setting cache keys -->
 23 |         <set-variable name="deployment-id" value="@((string)context.Request.MatchedParameters["deployment-id"])" />
 24 |         <set-variable name="routesCacheKey" value="@((string)context.Variables["deployment-id"] + "Routes" + context.Deployment.Region + context.Api.Revision)" />
 25 |         <set-variable name="oaClustersCacheKey" value="@("oaClusters" + context.Deployment.Region + context.Api.Revision)" />
 26 | 
 27 |         <!-- Getting OpenAI clusters configuration -->
 28 |         <cache-lookup-value key="@((string)context.Variables.GetValueOrDefault<string>("oaClustersCacheKey", "ALL-CLUSTERS"))" variable-name="oaClusters" />
 29 |         <!-- If we can't find the configuration cached, it will be loaded -->
 30 |         <choose>
 31 |             <when condition="@(context.Variables.ContainsKey("oaClusters") == false)">
 32 |                 <set-variable name="oaClusters" value="@{
 33 |                         // route is an Azure OpenAI API endpoints
 34 |                         JArray routes = new JArray();
 35 |                         // cluster is a group of routes that are capable of serving a specific deployment name (model and version)
 36 |                         JArray clusters = new JArray();
 37 |                         // Update the below if condition when using multiple APIM gateway regions/SHGW to get different configuartions for each region
 38 |                         if(context.Deployment.Region == "West Europe" || true)
 39 |                         {
 40 |                             // Adding all Azure OpenAI endpoints routes (which are set as APIM Backend)
 41 |                             routes.Add(new JObject()
 42 |                             {
 43 |                                 { "name", "OpenAI 1" },
 44 |                                 { "location", "East US" },
 45 |                                 { "backend-id", "openai-backend-0" },
 46 |                                 { "priority", 1},
 47 |                                 { "isThrottling", false }, 
 48 |                                 { "retryAfter", DateTime.MinValue } 
 49 |                             });
 50 | 
 51 |                             routes.Add(new JObject()
 52 |                             {
 53 |                                 { "name", "OpenAI 2" },
 54 |                                 { "location", "North Central US" },
 55 |                                 { "backend-id", "openai-backend-1" },
 56 |                                 { "priority", 2},
 57 |                                 { "isThrottling", false },
 58 |                                 { "retryAfter", DateTime.MinValue }
 59 |                             });
 60 | 
 61 |                             routes.Add(new JObject()
 62 |                             {
 63 |                                 { "name", "OpenAI 3" },
 64 |                                 { "location", "East US 2" },
 65 |                                 { "backend-id", "openai-backend-2" },
 66 |                                 { "priority", 2},
 67 |                                 { "isThrottling", false },
 68 |                                 { "retryAfter", DateTime.MinValue }
 69 |                             });
 70 | 
 71 |                             // For each deployment name, create a cluster with the routes that can serve it
 72 |                             // It is important in you OpenAI deployments to use the same name across instances
 73 |                             clusters.Add(new JObject()
 74 |                             {
 75 |                                 { "deploymentName", "chat" },
 76 |                                 { "routes", new JArray(routes[0], routes[1], routes[2]) }
 77 |                             });
 78 | 
 79 |                             clusters.Add(new JObject()
 80 |                             {
 81 |                                 { "deploymentName", "embedding" },
 82 |                                 { "routes", new JArray(routes[0], routes[2]) }
 83 |                             });
 84 | 
 85 |                             clusters.Add(new JObject()
 86 |                             {
 87 |                                 { "deploymentName", "gpt-4o" },
 88 |                                 { "routes", new JArray(routes[0]) }
 89 |                             });
 90 |                             
 91 |                         }
 92 |                         else
 93 |                         {
 94 |                             //No clusters found for selected region, either return error (defult behavior) or set default cluster in the else section
 95 |                         }
 96 |                         
 97 |                         return clusters;   
 98 |                     }" />
 99 |                 <!-- Add cluster configurations to cache -->
100 |                 <cache-store-value key="@((string)context.Variables.GetValueOrDefault<string>("oaClustersCacheKey", "ALL-CLUSTERS"))" value="@((JArray)context.Variables["oaClusters"])" duration="86400" />
101 |             </when>
102 |         </choose>
103 |         <include-fragment fragment-id="validate-routes" />
104 |         <!-- Backend Managed Identity -->
105 |         <authentication-managed-identity resource="https://cognitiveservices.azure.com" output-token-variable-name="msi-access-token" client-id="{{uami-client-id}}" ignore-error="false" />
106 |         <set-header name="Authorization" exists-action="override">
107 |             <value>@("Bearer " + (string)context.Variables["msi-access-token"])</value>
108 |         </set-header>
109 | 
110 |         <!-- Setting gobal TPM limit to collect usage for streaming requests -->
111 |         <azure-openai-token-limit counter-key="APIMOpenAI" tokens-per-minute="50000000" estimate-prompt-tokens="false" tokens-consumed-variable-name="TotalConsumedTokens" remaining-tokens-variable-name="TotalRemainingTokens" />
112 |         
113 |         <!-- Handling usage for streaming requests -->
114 |         <include-fragment fragment-id="openai-usage-streaming" />
115 |     </inbound>
116 |     <backend>
117 |         <include-fragment fragment-id="backend-routing" />
118 |     </backend>
119 |     <outbound>
120 |         <base />
121 | 
122 |         <!-- Handling usage for non-streaming requests -->
123 |         <include-fragment fragment-id="openai-usage" />
124 |     </outbound>
125 |     <on-error>
126 |         <base />
127 |         <!-- This is used to push custom metrics related to 429 throttleing errors -->
128 |         <!-- It is designed to premit setting up Azure Monitor Alerts notifying the team of potential service degredation -->
129 |         <set-variable name="service-name" value="Azure Open AI" />
130 |         <set-variable name="target-deployment" value="@((string)context.Request.MatchedParameters["deployment-id"])" />
131 |         <include-fragment fragment-id="throttling-events" />
132 |     </on-error>
133 | </policies>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/retail_product_policy.xml:
--------------------------------------------------------------------------------
 1 | <policies>
 2 |     <inbound>
 3 |         <base />
 4 |         <!-- Restrict access for this product to specific models -->
 5 |         <choose>
 6 |             <when condition="@(!new [] { "gpt-4", "chat", "embedding" }.Contains(context.Request.MatchedParameters["deployment-id"] ?? String.Empty))">
 7 |                 <return-response>
 8 |                     <set-status code="401" reason="Unauthorized model access" />
 9 |                 </return-response>
10 |             </when>
11 |         </choose>
12 | 
13 |         <!-- Capacity management: allow only assigned tpm for each Retail use case subscritpion -->
14 |         <azure-openai-token-limit counter-key="@(context.Subscription.Id)" 
15 |             tokens-per-minute="10000" 
16 |             estimate-prompt-tokens="true" 
17 |             tokens-consumed-header-name="consumed-tokens" 
18 |             remaining-tokens-header-name="remaining-tokens" 
19 |             retry-after-header-name="retry-after" />
20 | 
21 |     </inbound>
22 |     <backend>
23 |         <base />
24 |     </backend>
25 |     <outbound>
26 |         <base />
27 |     </outbound>
28 |     <on-error>
29 |         <base />
30 |     </on-error>
31 | </policies>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/search_hr_product_policy.xml:
--------------------------------------------------------------------------------
 1 | <!--
 2 |     - Policies are applied in the order they appear.
 3 |     - Position <base/> inside a section to inherit policies from the outer scope.
 4 |     - Comments within policies are not preserved.
 5 | -->
 6 | <!-- Add policies as children to the <inbound>, <outbound>, <backend>, and <on-error> elements -->
 7 | <policies>
 8 |     <!-- Throttle, authorize, validate, cache, or transform the requests -->
 9 |     <inbound>
10 |         <base />
11 |         <choose>
12 |             <when condition="@(!new [] { "product-info" }.Contains(context.Request.MatchedParameters["index-name"] ?? String.Empty))">
13 |                 <return-response>
14 |                     <set-status code="401" reason="Unauthorized index access" />
15 |                 </return-response>
16 |             </when>
17 |         </choose>
18 |     </inbound>
19 |     <!-- Control if and how the requests are forwarded to services  -->
20 |     <backend>
21 |         <base />
22 |     </backend>
23 |     <!-- Customize the responses -->
24 |     <outbound>
25 |         <base />
26 |     </outbound>
27 |     <!-- Handle exceptions and customize error responses  -->
28 |     <on-error>
29 |         <base />
30 |     </on-error>
31 | </policies>


--------------------------------------------------------------------------------
/infra/modules/apim/policies/translator-api-policy.xml:
--------------------------------------------------------------------------------
 1 | <policies>
 2 |     <inbound>
 3 |         <base />
 4 |         <set-header name="ocp-apim-subscription-key" exists-action="delete" />
 5 |         <authentication-managed-identity resource="https://cognitiveservices.azure.com" output-token-variable-name="msi-access-token" client-id="{{uami-client-id}}" ignore-error="false" />
 6 |         <set-header name="Authorization" exists-action="override">
 7 |             <value>@("Bearer " + (string)context.Variables["msi-access-token"])</value>
 8 |         </set-header>
 9 |         <!--<rewrite-uri template="/translator/text/v3.0/translate" copy-unmatched-params="true" />-->
10 |         <rewrite-uri template="@("/translator/text/v3.0/" + context.Request.Url.Path)" copy-unmatched-params="true" />
11 |     </inbound>
12 |     
13 |     <backend>
14 |         <base />
15 |     </backend>
16 |     
17 |     <outbound>
18 |         <base />
19 |     </outbound>
20 |     
21 |     <on-error>
22 |         <base />
23 |     </on-error>
24 | </policies>


--------------------------------------------------------------------------------
/infra/modules/cosmos-db/cosmos-db.bicep:
--------------------------------------------------------------------------------
  1 | @description('Azure Cosmos DB account name, max length 44 characters')
  2 | param accountName string
  3 | 
  4 | @description('Location for the Azure Cosmos DB account.')
  5 | param location string = resourceGroup().location
  6 | 
  7 | @description('The primary region for the Azure Cosmos DB account.')
  8 | param primaryRegion string = location
  9 | 
 10 | param tags object = {}
 11 | 
 12 | @allowed([
 13 |   'Eventual'
 14 |   'ConsistentPrefix'
 15 |   'Session'
 16 |   'BoundedStaleness'
 17 |   'Strong'
 18 | ])
 19 | @description('The default consistency level of the Cosmos DB account.')
 20 | param defaultConsistencyLevel string = 'Session'
 21 | 
 22 | @minValue(10)
 23 | @maxValue(2147483647)
 24 | @description('Max stale requests. Required for BoundedStaleness. Valid ranges, Single Region: 10 to 2147483647. Multi Region: 100000 to 2147483647.')
 25 | param maxStalenessPrefix int = 100000
 26 | 
 27 | @minValue(5)
 28 | @maxValue(86400)
 29 | @description('Max lag time (minutes). Required for BoundedStaleness. Valid ranges, Single Region: 5 to 84600. Multi Region: 300 to 86400.')
 30 | param maxIntervalInSeconds int = 300
 31 | 
 32 | @allowed([
 33 |   true
 34 |   false
 35 | ])
 36 | @description('Enable system managed failover for regions')
 37 | param systemManagedFailover bool = true
 38 | 
 39 | @description('The name for the database')
 40 | param databaseName string = 'ai-usage-db'
 41 | 
 42 | @description('The name for the container')
 43 | param containerName string = 'ai-usage-container'
 44 | 
 45 | @description('The name for the container')
 46 | param pricingContainerName string = 'model-pricing'
 47 | 
 48 | @description('The name for the container')
 49 | param streamingExportConfigContainerName string = 'streaming-export-config'
 50 | 
 51 | @minValue(400)
 52 | @maxValue(1000000)
 53 | @description('The throughput for the container')
 54 | param throughput int = 400
 55 | 
 56 | var consistencyPolicy = {
 57 |   Eventual: {
 58 |     defaultConsistencyLevel: 'Eventual'
 59 |   }
 60 |   ConsistentPrefix: {
 61 |     defaultConsistencyLevel: 'ConsistentPrefix'
 62 |   }
 63 |   Session: {
 64 |     defaultConsistencyLevel: 'Session'
 65 |   }
 66 |   BoundedStaleness: {
 67 |     defaultConsistencyLevel: 'BoundedStaleness'
 68 |     maxStalenessPrefix: maxStalenessPrefix
 69 |     maxIntervalInSeconds: maxIntervalInSeconds
 70 |   }
 71 |   Strong: {
 72 |     defaultConsistencyLevel: 'Strong'
 73 |   }
 74 | }
 75 | var locations = [
 76 |   {
 77 |     locationName: primaryRegion
 78 |     failoverPriority: 0
 79 |     isZoneRedundant: false
 80 |   }
 81 | ]
 82 | 
 83 | // Networking
 84 | param cosmosPrivateEndpointName string
 85 | param vNetName string
 86 | param privateEndpointSubnetName string
 87 | param cosmosDnsZoneName string
 88 | param publicAccess string = 'Disabled'
 89 | 
 90 | // Use existing network/dns zone
 91 | param dnsZoneRG string
 92 | param dnsSubscriptionId string
 93 | 
 94 | param vNetRG string
 95 | resource vnet 'Microsoft.Network/virtualNetworks@2022-01-01' existing = {
 96 |   name: vNetName
 97 |   scope: resourceGroup(vNetRG)
 98 | }
 99 | 
100 | // Get existing subnet
101 | resource subnet 'Microsoft.Network/virtualNetworks/subnets@2022-01-01' existing = {
102 |   name: privateEndpointSubnetName
103 |   parent: vnet
104 | }
105 | 
106 | resource account 'Microsoft.DocumentDB/databaseAccounts@2024-02-15-preview' = {
107 |   name: toLower(accountName)
108 |   location: location
109 |   tags: union(tags, { 'azd-service-name': accountName })
110 |   kind: 'GlobalDocumentDB'
111 |   properties: {
112 |     consistencyPolicy: consistencyPolicy[defaultConsistencyLevel]
113 |     locations: locations
114 |     databaseAccountOfferType: 'Standard'
115 |     enableAutomaticFailover: systemManagedFailover
116 |     disableKeyBasedMetadataWriteAccess: true
117 |     publicNetworkAccess: publicAccess
118 |   }
119 | }
120 | 
121 | resource database 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases@2024-02-15-preview' = {
122 |   parent: account
123 |   name: databaseName
124 |   properties: {
125 |     resource: {
126 |       id: databaseName
127 |     }
128 |   }
129 | }
130 | 
131 | resource container 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2024-02-15-preview' = {
132 |   parent: database
133 |   name: containerName
134 |   properties: {
135 |     resource: {
136 |       id: containerName
137 |       partitionKey: {
138 |         paths: [
139 |           '/productName'
140 |         ]
141 |         kind: 'Hash'
142 |       }
143 |       indexingPolicy: {
144 |         indexingMode: 'consistent'
145 |         automatic: true
146 |       }
147 |     }
148 |     options: {
149 |       throughput: throughput
150 |     }
151 |   }
152 | }
153 | 
154 | resource modelPricingContainer 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2024-02-15-preview' = {
155 |   parent: database
156 |   name: pricingContainerName
157 |   properties: {
158 |     resource: {
159 |       id: pricingContainerName
160 |       partitionKey: {
161 |         paths: [
162 |           '/model'
163 |         ]
164 |         kind: 'Hash'
165 |       }
166 |       indexingPolicy: {
167 |         indexingMode: 'consistent'
168 |         automatic: true
169 |       }
170 |     }
171 |     options: {
172 |       throughput: throughput
173 |     }
174 |   }
175 | }
176 | 
177 | resource streamingExportConfigContainer 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2024-02-15-preview' = {
178 |   parent: database
179 |   name: streamingExportConfigContainerName
180 |   properties: {
181 |     resource: {
182 |       id: streamingExportConfigContainerName
183 |       partitionKey: {
184 |         paths: [
185 |           '/type'
186 |         ]
187 |         kind: 'Hash'
188 |       }
189 |       indexingPolicy: {
190 |         indexingMode: 'consistent'
191 |         automatic: true
192 |       }
193 |     }
194 |     options: {
195 |       throughput: throughput
196 |     }
197 |   }
198 | }
199 | 
200 | module privateEndpoint '../networking/private-endpoint.bicep' = {
201 |   name: '${accountName}-privateEndpoint'
202 |   params: {
203 |     groupIds: [
204 |       'sql'
205 |     ]
206 |     dnsZoneName: cosmosDnsZoneName
207 |     name: cosmosPrivateEndpointName
208 |     privateLinkServiceId: account.id
209 |     location: location
210 |     dnsZoneRG: dnsZoneRG
211 |     privateEndpointSubnetId: subnet.id
212 |     dnsSubId: dnsSubscriptionId
213 |   }
214 | }
215 | 
216 | output location string = location
217 | output cosmosDbAccountName string = account.name
218 | output cosmosDbDatabaseName string = database.name
219 | output cosmosDbContainerName string = container.name
220 | output cosmosDbPricingContainerName string = modelPricingContainer.name
221 | output cosmosDbStreamingExportConfigContainerName string = streamingExportConfigContainer.name
222 | output resourceId string = database.id
223 | output cosmosDbEndpoint string = 'https://${account.name}.documents.azure.com:443/'
224 | 


--------------------------------------------------------------------------------
/infra/modules/event-hub/event-hub.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param sku string = 'Standard'
 4 | param capacity int = 1
 5 | param tags object = {}
 6 | param eventHubName string = 'ai-usage'
 7 | 
 8 | param eventHubPrivateEndpointName string
 9 | param vNetName string
10 | param privateEndpointSubnetName string
11 | param eventHubDnsZoneName string
12 | 
13 | param publicNetworkAccess string = 'Disabled'
14 | 
15 | // Use existing network/dns zone
16 | param dnsZoneRG string
17 | param dnsSubscriptionId string
18 | param vNetRG string
19 | 
20 | resource vnet 'Microsoft.Network/virtualNetworks@2022-01-01' existing = {
21 |   name: vNetName
22 |   scope: resourceGroup(vNetRG)
23 | }
24 | 
25 | // Get existing subnet
26 | resource subnet 'Microsoft.Network/virtualNetworks/subnets@2022-01-01' existing = {
27 |   name: privateEndpointSubnetName
28 |   parent: vnet
29 | }
30 | 
31 | resource eventHubNamespace 'Microsoft.EventHub/namespaces@2024-05-01-preview' = {
32 |   name: name
33 |   location: location
34 |   tags: union(tags, { 'azd-service-name': name })
35 |   sku: {
36 |     name: sku
37 |     tier: sku
38 |     capacity: capacity
39 |   }
40 |   properties: {
41 |     isAutoInflateEnabled: false
42 |     maximumThroughputUnits: 0
43 |     publicNetworkAccess: publicNetworkAccess
44 |   }
45 | }
46 | 
47 | resource eventHub 'Microsoft.EventHub/namespaces/eventhubs@2024-05-01-preview' = {
48 |   name: 'ai-usage'
49 |   parent: eventHubNamespace
50 |   properties: {
51 |     messageRetentionInDays: 7
52 |     partitionCount: 2
53 |     status: 'Active'
54 |   }
55 | }
56 | 
57 | module privateEndpoint '../networking/private-endpoint.bicep' = {
58 |   name: '${eventHubName}-privateEndpoint'
59 |   params: {
60 |     groupIds: [
61 |       'namespace'
62 |     ]
63 |     dnsZoneName: eventHubDnsZoneName
64 |     name: eventHubPrivateEndpointName
65 |     privateLinkServiceId: eventHubNamespace.id
66 |     location: location
67 |     dnsZoneRG: dnsZoneRG
68 |     privateEndpointSubnetId: subnet.id
69 |     dnsSubId: dnsSubscriptionId
70 |   }
71 | }
72 | 
73 | output eventHubNamespaceName string = eventHubNamespace.name
74 | output eventHubName string = eventHub.name
75 | output eventHubEndpoint string = eventHubNamespace.properties.serviceBusEndpoint
76 | 


--------------------------------------------------------------------------------
/infra/modules/functionapp/functionapp.bicep:
--------------------------------------------------------------------------------
  1 | 
  2 | param functionAppName string 
  3 | param tags object = {}
  4 | param azdserviceName string
  5 | param storageAccountName string
  6 | param functionContentShareName string
  7 | 
  8 | param functionAppIdentityName string
  9 | 
 10 | param applicationInsightsName string
 11 | param eventHubNamespaceName string
 12 | param eventHubName string
 13 | //param vnetName string
 14 | param functionAppSubnetId string
 15 | 
 16 | param cosmosDBEndpoint string
 17 | param cosmosDatabaseName string
 18 | param cosmosContainerName string
 19 | 
 20 | param location string = resourceGroup().location
 21 | 
 22 | var functionPlanOS = 'Linux'
 23 | var functionRuntime  = 'dotnet-isolated'
 24 | var dotnetFrameworkVersion  = '8.0'
 25 | var linuxFxVersion  = 'DOTNET-ISOLATED|8.0'
 26 | var isReserved = functionPlanOS == 'Linux'
 27 | 
 28 | resource functionAppmanagedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-31' existing = {
 29 |   name: functionAppIdentityName
 30 | }
 31 | 
 32 | resource storageAccount 'Microsoft.Storage/storageAccounts@2023-05-01' existing = {
 33 |   name: storageAccountName
 34 | }
 35 | 
 36 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = {
 37 |   name: applicationInsightsName
 38 | }
 39 | 
 40 | 
 41 | var storageAccountConnectionString = 'DefaultEndpointsProtocol=https;AccountName=${storageAccount.name};AccountKey=${storageAccount.listKeys().keys[0].value};EndpointSuffix=core.windows.net'
 42 | 
 43 | resource hostingPlan 'Microsoft.Web/serverfarms@2023-12-01' = {
 44 |   name: 'hosting-plan-${functionAppName}'
 45 |   tags: union(tags, { 'azd-service-name': 'hosting-plan-${functionAppName}' })
 46 |   location: location
 47 |   sku: {
 48 |     name: 'EP1'
 49 |     tier: 'ElasticPremium'
 50 |     family: 'EP'
 51 |   }
 52 |   kind: 'elastic'
 53 |   properties: {
 54 |     maximumElasticWorkerCount: 10
 55 |     reserved: isReserved
 56 |   }
 57 | }
 58 | 
 59 | resource functionApp 'Microsoft.Web/sites@2023-12-01' = {
 60 |   name: functionAppName
 61 |   location: location
 62 |   kind: 'functionapp,linux'
 63 |   tags: union(tags, { 'azd-service-name': azdserviceName })
 64 |   identity: {
 65 |     type: 'UserAssigned'
 66 |     userAssignedIdentities: {
 67 |       '${functionAppmanagedIdentity.id}': {}
 68 |     }
 69 |   }
 70 |   properties: {
 71 |     enabled: true
 72 |     serverFarmId: hostingPlan.id
 73 |     reserved: isReserved       
 74 |     virtualNetworkSubnetId: functionAppSubnetId
 75 |   }
 76 | }
 77 | 
 78 | 
 79 | // Add the function to the subnet
 80 | resource networkConfig 'Microsoft.Web/sites/networkConfig@2023-12-01' = {
 81 |   parent: functionApp
 82 |   name: 'virtualNetwork'
 83 |   properties: {
 84 |     subnetResourceId: functionAppSubnetId
 85 |     swiftSupported: true
 86 |   }
 87 | }
 88 | 
 89 | //create functionapp siteconfig
 90 | resource functionAppSiteConfig 'Microsoft.Web/sites/config@2023-12-01' = {
 91 |   parent: functionApp
 92 |   name: 'web'
 93 |   properties: {
 94 |     linuxFxVersion: linuxFxVersion
 95 |     detailedErrorLoggingEnabled: true
 96 |     vnetRouteAllEnabled: true
 97 |     ftpsState: 'FtpsOnly'
 98 |     minTlsVersion: '1.2'
 99 |     scmMinTlsVersion: '1.2'
100 |     minimumElasticInstanceCount: 1
101 |     //vnetName: vnetName
102 |     publicNetworkAccess: 'Enabled'  
103 |     functionsRuntimeScaleMonitoringEnabled: true
104 |     netFrameworkVersion: dotnetFrameworkVersion
105 |   }
106 |   dependsOn: [
107 |     applicationInsights
108 |   ]
109 | }
110 | 
111 | //Create functionapp appsettings
112 | 
113 | resource functionAppSettings 'Microsoft.Web/sites/config@2023-12-01' = {
114 |   parent: functionApp
115 |   name: 'appsettings'
116 |   properties: {
117 |       APPLICATIONINSIGHTS_CONNECTION_STRING: applicationInsights.properties.ConnectionString
118 |       AzureWebJobsStorage: storageAccountConnectionString
119 |       //AzureWebJobsStorage__accountname: storageAccountName      
120 |       FUNCTIONS_EXTENSION_VERSION:  '~4'
121 |       FUNCTIONS_WORKER_RUNTIME: functionRuntime
122 |       WEBSITE_CONTENTAZUREFILECONNECTIONSTRING: storageAccountConnectionString
123 |       WEBSITE_CONTENTSHARE: functionContentShareName
124 |       WEBSITE_VNET_ROUTE_ALL: '1'
125 |       WEBSITE_CONTENTOVERVNET: '1'
126 |       //EventHub Input Trigger Settings With Managed Identity
127 |       //https://learn.microsoft.com/en-us/azure/azure-functions/functions-reference?tabs=eventhubs&pivots=programming-language-csharp#common-properties-for-identity-based-connections
128 |       EventHubConnection__clientId: functionAppmanagedIdentity.properties.clientId
129 |       EventHubConnection__credential: 'managedidentity'
130 |       EventHubConnection__fullyQualifiedNamespace: '${eventHubNamespaceName}.servicebus.windows.net'
131 |       EventHubName: eventHubName
132 | 
133 |       //CosmosDB
134 |       CosmosAccountEndpoint: cosmosDBEndpoint
135 |       CosmosDatabaseName: cosmosDatabaseName
136 |       CosmosContainerName: cosmosContainerName
137 |       CosmosManagedIdentityId: functionAppmanagedIdentity.properties.clientId
138 |   }
139 |   dependsOn: [
140 |     storageAccount
141 |   ]
142 | }
143 | 


--------------------------------------------------------------------------------
/infra/modules/functionapp/storageaccount.bicep:
--------------------------------------------------------------------------------
  1 | param storageAccountName string
  2 | param location string = resourceGroup().location
  3 | param tags object = {}
  4 | 
  5 | param functionAppManagedIdentityName string
  6 | 
  7 | //Networking
  8 | param vNetName string
  9 | param privateEndpointSubnetName string
 10 | param storageBlobDnsZoneName string
 11 | param storageBlobPrivateEndpointName string
 12 | param storageFileDnsZoneName string
 13 | param storageFilePrivateEndpointName string
 14 | param storageTableDnsZoneName string
 15 | param storageTablePrivateEndpointName string
 16 | param storageQueueDnsZoneName string
 17 | param storageQueuePrivateEndpointName string
 18 | // https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#storage-blob-data-owner
 19 | var storageBlobDataOwnerRoleId = subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b')
 20 | 
 21 | // Use existing network/dns zone
 22 | param dnsZoneRG string
 23 | param dnsSubscriptionId string
 24 | param vNetRG string
 25 | 
 26 | param provisionFunctionShare bool = true
 27 | param provisionLogicShare bool = true
 28 | 
 29 | param functionContentShareName string
 30 | param logicContentShareName string
 31 | 
 32 | resource vnet 'Microsoft.Network/virtualNetworks@2022-01-01' existing = {
 33 |   name: vNetName
 34 |   scope: resourceGroup(vNetRG)
 35 | }
 36 | 
 37 | // Get existing subnet
 38 | resource subnet 'Microsoft.Network/virtualNetworks/subnets@2022-01-01' existing = {
 39 |   name: privateEndpointSubnetName
 40 |   parent: vnet
 41 | }
 42 | 
 43 | resource functionAppmanagedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' existing = {
 44 |   name: functionAppManagedIdentityName
 45 | }
 46 | 
 47 | @description('Storage Account type')
 48 | @allowed([
 49 |   'Standard_LRS'
 50 |   'Standard_GRS'
 51 |   'Standard_RAGRS'
 52 | ])
 53 | param storageAccountType string = 'Standard_LRS'
 54 | 
 55 | 
 56 | resource storageAccount 'Microsoft.Storage/storageAccounts@2022-05-01' = {
 57 |   name: storageAccountName
 58 |   location: location
 59 |   tags: union(tags, { 'azd-service-name': storageAccountName })
 60 |   sku: {
 61 |     name: storageAccountType
 62 |   }
 63 |   kind: 'StorageV2'
 64 |   properties: {
 65 |     supportsHttpsTrafficOnly: true
 66 |     publicNetworkAccess: 'Disabled'
 67 |     allowBlobPublicAccess: false
 68 |     accessTier: 'Hot'
 69 |     networkAcls: {
 70 |       bypass: 'None'
 71 |       defaultAction: 'Deny'
 72 |     }
 73 |   }
 74 | }
 75 | 
 76 | resource shareFunctionApp 'Microsoft.Storage/storageAccounts/fileServices/shares@2022-05-01' = if (provisionFunctionShare) {
 77 |   name: '${storageAccountName}/default/${functionContentShareName}'
 78 |   dependsOn: [
 79 |     storageAccount
 80 |   ]
 81 | }
 82 | 
 83 | resource shareLogicApp 'Microsoft.Storage/storageAccounts/fileServices/shares@2022-05-01' = if (provisionLogicShare) {
 84 |   name: '${storageAccountName}/default/${logicContentShareName}'
 85 |   dependsOn: [
 86 |     storageAccount
 87 |   ]
 88 | }
 89 | 
 90 | resource storageAccountFunctionAppRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
 91 |   name: guid(storageAccount.id, functionAppmanagedIdentity.name, storageBlobDataOwnerRoleId)
 92 |   properties: {
 93 |     principalId: functionAppmanagedIdentity.properties.principalId
 94 |     roleDefinitionId: storageBlobDataOwnerRoleId
 95 |   }
 96 |   scope: storageAccount
 97 | }
 98 | 
 99 | module privateEndpointBlob '../networking/private-endpoint.bicep' = {
100 |   name: '${storageAccountName}-blob-privateEndpoint'
101 |   params: {
102 |     groupIds: [
103 |       'blob'
104 |     ]
105 |     dnsZoneName: storageBlobDnsZoneName
106 |     name: storageBlobPrivateEndpointName
107 |     privateLinkServiceId: storageAccount.id
108 |     location: location
109 |     dnsZoneRG: dnsZoneRG
110 |     privateEndpointSubnetId: subnet.id
111 |     dnsSubId: dnsSubscriptionId
112 |   }
113 | }
114 | 
115 | module privateEndpointFile '../networking/private-endpoint.bicep' = {
116 |   name: '${storageAccountName}-file-privateEndpoint'
117 |   params: {
118 |     groupIds: [
119 |       'file'
120 |     ]
121 |     dnsZoneName: storageFileDnsZoneName
122 |     name: storageFilePrivateEndpointName
123 |     privateLinkServiceId: storageAccount.id
124 |     location: location
125 |     dnsZoneRG: dnsZoneRG
126 |     privateEndpointSubnetId: subnet.id
127 |     dnsSubId: dnsSubscriptionId
128 |   }
129 | }
130 | 
131 | module privateEndpointTable '../networking/private-endpoint.bicep' = {
132 |   name: '${storageAccountName}-table-privateEndpoint'
133 |   params: {
134 |     groupIds: [
135 |       'table'
136 |     ]
137 |     dnsZoneName: storageTableDnsZoneName
138 |     name: storageTablePrivateEndpointName
139 |     privateLinkServiceId: storageAccount.id
140 |     location: location
141 |     dnsZoneRG: dnsZoneRG
142 |     privateEndpointSubnetId: subnet.id
143 |     dnsSubId: dnsSubscriptionId
144 |   }
145 | }
146 | 
147 | module privateEndpointQueue '../networking/private-endpoint.bicep' = {
148 |   name: '${storageAccountName}-queue-privateEndpoint'
149 |   params: {
150 |     groupIds: [
151 |       'queue'
152 |     ]
153 |     dnsZoneName: storageQueueDnsZoneName
154 |     name: storageQueuePrivateEndpointName
155 |     privateLinkServiceId: storageAccount.id
156 |     location: location
157 |     dnsZoneRG: dnsZoneRG
158 |     privateEndpointSubnetId: subnet.id
159 |     dnsSubId: dnsSubscriptionId
160 |   }
161 | }
162 | 
163 | 
164 | output storageAccountName string = storageAccount.name
165 | 


--------------------------------------------------------------------------------
/infra/modules/logicapp/api-connection-access.bicep:
--------------------------------------------------------------------------------
 1 | param connectionName string
 2 | param accessPolicyName string
 3 | param identityPrincipalId string
 4 | param location string = resourceGroup().location
 5 | 
 6 | resource logicAppConnectionExisting 'Microsoft.Web/connections@2016-06-01' existing = {
 7 |   name: connectionName
 8 |   resource accessPolicy 'accessPolicies@2016-06-01' = {
 9 |     name: accessPolicyName
10 |     location: location
11 |     properties: {
12 |       principal: {
13 |         type: 'ActiveDirectory'
14 |         identity: {
15 |           tenantId: subscription().tenantId
16 |           objectId: identityPrincipalId
17 |         }
18 |       }
19 |     }
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/infra/modules/logicapp/api-connection.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |         "connection_name": {
 6 |             "type": "string"
 7 |         },
 8 |         "display_name": {
 9 |             "type": "string"
10 |         },
11 |         "location": {
12 |             "type": "string"
13 |         }
14 |     },
15 |     "variables": {},
16 |     "resources": [
17 |         {
18 |             "type": "Microsoft.Web/connections",
19 |             "name": "[parameters('connection_name')]",
20 |             "apiVersion": "[providers('Microsoft.Web','connections').apiVersions[0]]",
21 |             "location": "[parameters('location')]",
22 |             "kind": "V2",
23 |             "properties": {
24 |                 "alternativeParameterValues": {},
25 |                 "api": {
26 |                     "id": "[subscriptionResourceId('Microsoft.Web/locations/managedApis', parameters('location'), parameters('connection_name'))]"
27 |                 },
28 |                 "authenticatedUser": {},
29 |                 "connectionState": "Enabled",
30 |                 "customParameterValues": {},
31 |                 "displayName": "[parameters('display_name')]",
32 |                 "parameterValueSet": {
33 |                     "name": "managedIdentityAuth",
34 |                     "values": {}
35 |                 }
36 |             }
37 |         }
38 |     ],
39 |     "outputs": {
40 |         "connectRuntimeUrl": {
41 |             "type": "string",
42 |             "value": "[reference(resourceId('Microsoft.Web/connections', parameters('connection_name')), '2016-06-01').connectionRuntimeUrl]"
43 |         },
44 |         "resourceId": {
45 |             "type": "string",
46 |             "value": "[resourceId('Microsoft.Web/connections', parameters('connection_name'))]"
47 |         },
48 |         "apiId": {
49 |             "type": "string",
50 |             "value": "[subscriptionResourceId('Microsoft.Web/locations/managedApis', parameters('location'), parameters('connection_name'))]"
51 |         }
52 |     }
53 | }


--------------------------------------------------------------------------------
/infra/modules/monitor/applicationinsights.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param dashboardName string
 3 | param location string = resourceGroup().location
 4 | param tags object = {}
 5 | 
 6 | param logAnalyticsWorkspaceId string
 7 | 
 8 | param createDashboard bool
 9 | 
10 | // Networking
11 | param privateLinkScopeName string
12 | 
13 | resource privateLinkScope 'microsoft.insights/privateLinkScopes@2021-07-01-preview' existing = if (privateLinkScopeName != '') {
14 |   name: privateLinkScopeName
15 | }
16 | 
17 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = {
18 |   name: name
19 |   location: location
20 |   tags: union(tags, { 'azd-service-name': name })
21 |   kind: 'web'
22 |   properties: {
23 |     Application_Type: 'web'
24 |     WorkspaceResourceId: logAnalyticsWorkspaceId
25 |     publicNetworkAccessForIngestion: privateLinkScopeName != '' ? 'Disabled' : 'Enabled'
26 |     publicNetworkAccessForQuery: privateLinkScopeName != '' ? 'Enabled' : 'Enabled'
27 |     CustomMetricsOptedInType: 'WithDimensions'
28 |   }
29 | }
30 | 
31 | resource appInsightsScopedResource 'Microsoft.Insights/privateLinkScopes/scopedResources@2021-07-01-preview' = if (privateLinkScopeName != '') {
32 |   parent: privateLinkScope
33 |   name: '${applicationInsights.name}-connection'
34 |   properties: {
35 |     linkedResourceId: applicationInsights.id
36 |   }
37 | }
38 | 
39 | module applicationInsightsDashboard 'applicationinsights-dashboard.bicep' = if(createDashboard) {
40 |   name: 'application-insights-dashboard'
41 |   params: {
42 |     name: dashboardName
43 |     location: location
44 |     applicationInsightsName: applicationInsights.name
45 |   }
46 | }
47 | 
48 | output connectionString string = applicationInsights.properties.ConnectionString
49 | output instrumentationKey string = applicationInsights.properties.InstrumentationKey
50 | output name string = applicationInsights.name
51 | 


--------------------------------------------------------------------------------
/infra/modules/monitor/loganalytics.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | 
 5 | // Networking
 6 | param privateLinkScopeName string
 7 | 
 8 | resource privateLinkScope 'microsoft.insights/privateLinkScopes@2021-07-01-preview' existing = if (privateLinkScopeName != '') {
 9 |   name: privateLinkScopeName
10 | }
11 | 
12 | resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' = {
13 |   name: name
14 |   location: location
15 |   tags: union(tags, { 'azd-service-name': name })
16 |   properties: any({
17 |     retentionInDays: 30
18 |     features: {
19 |       searchVersion: 1
20 |     }
21 |     sku: {
22 |       name: 'PerGB2018'
23 |     }
24 |     publicNetworkAccessForIngestion: privateLinkScopeName != '' ? 'Disabled' : 'Enabled'
25 |     publicNetworkAccessForQuery: privateLinkScopeName != '' ? 'Enabled' : 'Enabled'
26 |   })
27 | }
28 | 
29 | resource logAnalyticsScopedResource 'Microsoft.Insights/privateLinkScopes/scopedResources@2021-07-01-preview' = if (privateLinkScopeName != '') {
30 |   parent: privateLinkScope
31 |   name: '${logAnalytics.name}-connection'
32 |   properties: {
33 |     linkedResourceId: logAnalytics.id
34 |   }
35 | }
36 | 
37 | output id string = logAnalytics.id
38 | output name string = logAnalytics.name
39 | 


--------------------------------------------------------------------------------
/infra/modules/monitor/monitoring.bicep:
--------------------------------------------------------------------------------
  1 | param logAnalyticsName string
  2 | param apimApplicationInsightsName string
  3 | param apimApplicationInsightsDashboardName string
  4 | param functionApplicationInsightsName string
  5 | param functionApplicationInsightsDashboardName string
  6 | param location string = resourceGroup().location
  7 | param tags object = {}
  8 | 
  9 | param createDashboard bool
 10 | 
 11 | // Networking
 12 | param usePrivateLinkScope bool = true
 13 | var privateLinkScopeName = 'ampls-monitoring'
 14 | param vNetName string
 15 | param privateEndpointSubnetName string
 16 | param applicationInsightsDnsZoneName string
 17 | 
 18 | // Use existing network/dns zone
 19 | param dnsZoneRG string
 20 | param dnsSubscriptionId string
 21 | param vNetRG string
 22 | resource vnet 'Microsoft.Network/virtualNetworks@2022-01-01' existing = {
 23 |   name: vNetName
 24 |   scope: resourceGroup(vNetRG)
 25 | }
 26 | 
 27 | // Get existing subnet
 28 | resource subnet 'Microsoft.Network/virtualNetworks/subnets@2022-01-01' existing = {
 29 |   name: privateEndpointSubnetName
 30 |   parent: vnet
 31 | }
 32 | 
 33 | resource privateLinkScope 'microsoft.insights/privateLinkScopes@2021-07-01-preview' = if (usePrivateLinkScope) {
 34 |   name: privateLinkScopeName
 35 |   location: 'global'
 36 |   properties: {
 37 |     accessModeSettings: {
 38 |       ingestionAccessMode: 'Open'
 39 |       queryAccessMode: 'Open'
 40 |     }
 41 |   }
 42 | }
 43 | 
 44 | module logAnalytics 'loganalytics.bicep' = {
 45 |   name: 'log-analytics'
 46 |   params: {
 47 |     name: logAnalyticsName
 48 |     location: location
 49 |     tags: tags
 50 |     privateLinkScopeName: usePrivateLinkScope ? privateLinkScopeName : ''
 51 |   }
 52 | }
 53 | 
 54 | // APIM App Insights
 55 | module apimApplicationInsights 'applicationinsights.bicep' = {
 56 |   name: 'application-insights'
 57 |   params: {
 58 |     name: apimApplicationInsightsName
 59 |     location: location
 60 |     tags: tags
 61 |     dashboardName: apimApplicationInsightsDashboardName
 62 |     logAnalyticsWorkspaceId: logAnalytics.outputs.id
 63 |     privateLinkScopeName: usePrivateLinkScope ? privateLinkScopeName : ''
 64 |     createDashboard: createDashboard
 65 |   }
 66 | }
 67 | 
 68 | // Function App Insights
 69 | module functionApplicationInsights 'applicationinsights.bicep' = {
 70 |   name: 'func-application-insights'
 71 |   params: {
 72 |     name: functionApplicationInsightsName
 73 |     location: location
 74 |     tags: tags
 75 |     dashboardName: functionApplicationInsightsDashboardName
 76 |     logAnalyticsWorkspaceId: logAnalytics.outputs.id
 77 |     privateLinkScopeName: usePrivateLinkScope ? privateLinkScopeName : ''
 78 |     createDashboard: createDashboard
 79 |   }
 80 | }
 81 | 
 82 | module privateEndpoint '../networking/private-endpoint.bicep' = if (usePrivateLinkScope) {
 83 |   name: '${privateLinkScopeName}-privateEndpoint'
 84 |   params: {
 85 |     groupIds: [
 86 |       'azuremonitor'
 87 |     ]
 88 |     dnsZoneName: applicationInsightsDnsZoneName
 89 |     name: '${privateLinkScopeName}-pe'
 90 |     privateLinkServiceId: privateLinkScope.id
 91 |     location: location
 92 |     dnsZoneRG: dnsZoneRG
 93 |     privateEndpointSubnetId: subnet.id
 94 |     dnsSubId: dnsSubscriptionId
 95 |   }
 96 |   dependsOn: [
 97 |     logAnalytics
 98 |     apimApplicationInsights
 99 |     functionApplicationInsights
100 |   ]
101 | }
102 | 
103 | output applicationInsightsName string = apimApplicationInsights.outputs.name
104 | output applicationInsightsConnectionString string = apimApplicationInsights.outputs.connectionString
105 | output applicationInsightsInstrumentationKey string = apimApplicationInsights.outputs.instrumentationKey
106 | output funcApplicationInsightsName string = functionApplicationInsights.outputs.name
107 | output funcApplicationInsightsConnectionString string = functionApplicationInsights.outputs.connectionString
108 | output funcApplicationInsightsInstrumentationKey string = functionApplicationInsights.outputs.instrumentationKey
109 | output logAnalyticsWorkspaceId string = logAnalytics.outputs.id
110 | output logAnalyticsWorkspaceName string = logAnalytics.outputs.name
111 | 


--------------------------------------------------------------------------------
/infra/modules/networking/dns.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param tags object = {}
 3 | 
 4 | resource privateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = {
 5 |   name: name
 6 |   location: 'global'
 7 |   tags: union(tags, { 'azd-service-name': name })
 8 | }
 9 | 
10 | output privateDnsZoneName string = privateDnsZone.name
11 | 


--------------------------------------------------------------------------------
/infra/modules/networking/private-endpoint.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param privateLinkServiceId string
 3 | param groupIds array
 4 | param dnsZoneName string
 5 | param location string
 6 | 
 7 | param privateEndpointSubnetId string
 8 | param dnsZoneRG string
 9 | param dnsSubId string
10 | 
11 | 
12 | resource privateEndpointDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' existing = {
13 |   name: dnsZoneName
14 |   scope: resourceGroup(dnsSubId ,dnsZoneRG)
15 | }
16 | 
17 | resource privateEndpoint 'Microsoft.Network/privateEndpoints@2022-09-01' = {
18 |   name: name
19 |   location: location
20 |   dependsOn: [
21 |     privateEndpointDnsZone
22 |   ]
23 |   properties: {
24 |     subnet: {
25 |       id: privateEndpointSubnetId
26 |     }
27 |     privateLinkServiceConnections: [
28 |       {
29 |         name: name
30 |         properties: {
31 |           privateLinkServiceId: privateLinkServiceId
32 |           groupIds: groupIds
33 |         }
34 |       }
35 |     ]
36 |   }
37 | }
38 | 
39 | resource privateEndpointDnsGroup 'Microsoft.Network/privateEndpoints/privateDnsZoneGroups@2022-09-01' = {
40 |   parent: privateEndpoint
41 |   name: 'privateDnsZoneGroup'
42 |   properties: {
43 |     privateDnsZoneConfigs: [
44 |       {
45 |         name: 'default'
46 |         properties: {
47 |           privateDnsZoneId: privateEndpointDnsZone.id
48 |         }
49 |       }
50 |     ]
51 |   }
52 | }
53 | 
54 | output privateEndpointName string = privateEndpoint.name
55 | 


--------------------------------------------------------------------------------
/infra/modules/networking/subnet.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param properties object
 3 | param vnetName string
 4 | param vnetRG string
 5 | 
 6 | resource vnet 'Microsoft.Network/virtualNetworks@2022-01-01' existing = {
 7 |   name: vnetName
 8 |   scope: resourceGroup(vnetRG)
 9 | }
10 | 
11 | 
12 | resource subnet 'Microsoft.Network/virtualNetworks/subnets@2022-01-01' = {
13 |   name: '${vnet.name}/${name}'
14 |   properties: properties
15 | }
16 | 


--------------------------------------------------------------------------------
/infra/modules/networking/vnet-existing.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param vnetRG string
 3 | param apimSubnetName string
 4 | param privateEndpointSubnetName string
 5 | param functionAppSubnetName string
 6 | 
 7 | resource virtualNetwork 'Microsoft.Network/virtualNetworks@2019-11-01' existing = {
 8 |   name: name
 9 |   scope: resourceGroup(vnetRG)
10 | }
11 | 
12 | resource apimSubnet 'Microsoft.Network/virtualNetworks/subnets@2023-11-01' existing = {
13 |   name: apimSubnetName
14 |   parent: virtualNetwork
15 | }
16 | 
17 | resource privateEndpointSubnet 'Microsoft.Network/virtualNetworks/subnets@2023-11-01' existing = {
18 |   name: privateEndpointSubnetName
19 |   parent: virtualNetwork
20 | }
21 | 
22 | resource functionAppSubnet 'Microsoft.Network/virtualNetworks/subnets@2023-11-01' existing = {
23 |   name: functionAppSubnetName
24 |   parent: virtualNetwork
25 | }
26 | 
27 | output virtualNetworkId string = virtualNetwork.id
28 | output vnetName string = virtualNetwork.name
29 | output apimSubnetName string = apimSubnet.name
30 | output apimSubnetId string = '${virtualNetwork.id}/subnets/${apimSubnetName}'
31 | output privateEndpointSubnetName string = privateEndpointSubnet.name
32 | output privateEndpointSubnetId string = '${virtualNetwork.id}/subnets/${privateEndpointSubnetName}'
33 | output functionAppSubnetName string = functionAppSubnet.name
34 | output functionAppSubnetId string = '${virtualNetwork.id}/subnets/${functionAppSubnetName}'
35 | output location string = virtualNetwork.location
36 | output vnetRG string = vnetRG
37 | 


--------------------------------------------------------------------------------
/infra/modules/security/managed-identity-apim.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | 
 5 | var cognitiveServicesUserRoleDefinitionId = resourceId('Microsoft.Authorization/roleDefinitions', '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd')
 6 | var eventHubsDataSenderRoleDefinitionId = resourceId('Microsoft.Authorization/roleDefinitions', '2b629674-e913-4c01-ae53-ef4638d8f975')
 7 | 
 8 | // Getting definitions for 'Search Index Data Reader' and 'Search Index Data Contributor' 
 9 | // var searchIndexDataReaderRoleDefinitionId = resourceId('Microsoft.Authorization/roleDefinitions', '1407120a-92aa-4202-b7e9-c0e197c71c8f')
10 | // var searchIndexDataContributorRoleDefinitionId = resourceId('Microsoft.Authorization/roleDefinitions', '8ebe5a00-799e-43f5-93ac-243d3dce84a7')
11 | 
12 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' = {
13 |   name: name
14 |   location: location
15 |   tags: union(tags, { 'azd-service-name': name })
16 | }
17 | 
18 | // Assign the Cognitive Services User role to the user-defined managed identity used by workloads
19 | resource cognitiveServicesUserRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
20 |   name: guid(managedIdentity.id, cognitiveServicesUserRoleDefinitionId)
21 |   scope: resourceGroup()
22 |   properties: {
23 |     roleDefinitionId: cognitiveServicesUserRoleDefinitionId
24 |     principalId: managedIdentity.properties.principalId
25 |     principalType: 'ServicePrincipal'
26 |   }
27 | }
28 | 
29 | // Assign to Azure Event Hubs Data Sender role to the user-defined managed identity used by workloads
30 | resource eventHubsDataSenderRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
31 |   name: guid(managedIdentity.id, eventHubsDataSenderRoleDefinitionId)
32 |   scope: resourceGroup()
33 |   properties: {
34 |     roleDefinitionId: eventHubsDataSenderRoleDefinitionId
35 |     principalId: managedIdentity.properties.principalId
36 |     principalType: 'ServicePrincipal'
37 |   }
38 | }
39 | 
40 | 
41 | output managedIdentityName string = managedIdentity.name
42 | 


--------------------------------------------------------------------------------
/infra/modules/security/managed-identity-stream-analytics.bicep:
--------------------------------------------------------------------------------
 1 | param name string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | param cosmosDbAccountName string
 5 | 
 6 | var docDbAccNativeContributorRoleDefinitionId = '00000000-0000-0000-0000-000000000002'
 7 | var eventHubsDataOwnerRoleDefinitionId = resourceId('Microsoft.Authorization/roleDefinitions', 'f526a384-b230-433a-b45c-95f59c4a2dec')
 8 | 
 9 | resource cosmosDbAccount 'Microsoft.DocumentDB/databaseAccounts@2024-02-15-preview' existing = {
10 |   name: cosmosDbAccountName
11 | }
12 | 
13 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' = {
14 |   name: name
15 |   location: location
16 |   tags: union(tags, { 'azd-service-name': name })
17 | }
18 | 
19 | resource sqlRoleAssignment 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2023-04-15' = {
20 |   name: guid(docDbAccNativeContributorRoleDefinitionId, managedIdentity.id, cosmosDbAccount.id)
21 |   parent: cosmosDbAccount
22 |   properties:{
23 |     principalId: managedIdentity.properties.principalId
24 |     roleDefinitionId: '/${cosmosDbAccount.id}/sqlRoleDefinitions/${docDbAccNativeContributorRoleDefinitionId}'
25 |     scope: cosmosDbAccount.id
26 |   }
27 | }
28 | 
29 | // Assign to Azure Event Hubs Data Owner role to the user-defined managed identity used by workloads
30 | resource eventHubsDataOwnerRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
31 |   name: guid(managedIdentity.id, eventHubsDataOwnerRoleDefinitionId)
32 |   scope: resourceGroup()
33 |   properties: {
34 |     roleDefinitionId: eventHubsDataOwnerRoleDefinitionId
35 |     principalId: managedIdentity.properties.principalId
36 |     principalType: 'ServicePrincipal'
37 |   }
38 | }
39 | 
40 | 
41 | output managedIdentityName string = managedIdentity.name
42 | 


--------------------------------------------------------------------------------
/infra/modules/stream-analytics/stream-analytics.bicep:
--------------------------------------------------------------------------------
 1 | param jobName string
 2 | param location string = resourceGroup().location
 3 | param tags object = {}
 4 | param eventHubNamespace string
 5 | param eventHubName string
 6 | param cosmosDbAccountName string
 7 | param cosmosDbDatabaseName string
 8 | param cosmosDbContainerName string
 9 | param managedIdentityName string
10 | 
11 | 
12 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' existing = {
13 |   name: managedIdentityName
14 | }
15 | 
16 | resource streamAnalyticsJob 'Microsoft.StreamAnalytics/streamingjobs@2021-10-01-preview' = {
17 |   name: jobName
18 |   location: location
19 |   tags: union(tags, { 'azd-service-name': jobName })
20 |   identity: {
21 |     type: 'UserAssigned'
22 |     userAssignedIdentities: {
23 |       '${managedIdentity.id}': {}
24 |     }
25 |   }
26 |   properties: {
27 |     sku: {
28 |       name: 'StandardV2'
29 |     }
30 |     eventsOutOfOrderPolicy: 'Adjust'
31 |     outputErrorPolicy: 'Stop'
32 |     eventsOutOfOrderMaxDelayInSeconds: 5
33 |     compatibilityLevel: '1.2'
34 |     inputs: [
35 |       {
36 |         name: 'input'
37 |         properties: {
38 |           type: 'Stream'
39 |           serialization: {
40 |             type: 'Json'
41 |             properties: {
42 |               encoding: 'UTF8'
43 |             }
44 |           }
45 |           datasource: {
46 |             type: 'Microsoft.EventHub/EventHub'
47 |             properties: {
48 |               authenticationMode: 'Msi'
49 |               eventHubName: eventHubName
50 |               serviceBusNamespace: eventHubNamespace
51 |             }
52 |           }
53 |         }
54 |       }
55 |     ]
56 |     outputs: [
57 |       {
58 |         name: 'output'
59 |         properties: {
60 |           datasource: {
61 |             type: 'Microsoft.Storage/DocumentDB'
62 |             properties: {
63 |               accountId: cosmosDbAccountName
64 |               database: cosmosDbDatabaseName
65 |               collectionNamePattern: cosmosDbContainerName
66 |               authenticationMode: 'Msi'
67 |               documentId: 'id'
68 |               partitionKey: 'productName'
69 |             }
70 |           }
71 |         }
72 |       }
73 |     ]
74 |     transformation: {
75 |       name: 'transformation'
76 |       properties: {
77 |         query: 'SELECT * INTO [output] FROM [input]'
78 |         streamingUnits: 3
79 |       }
80 |     }
81 |   }
82 | }
83 | 
84 | output asaId string = streamAnalyticsJob.id
85 | 


--------------------------------------------------------------------------------
/scripts/apim-event-hub-logger.ps1:
--------------------------------------------------------------------------------
 1 | # Selecting target subscription
 2 | $subcriptionId = "<SubscriptionId>"
 3 | Set-AzContext -Subscription $subcriptionId
 4 | 
 5 | # API Management service-specific details
 6 | $apimServiceName = "apim-ai-gateway"
 7 | $resourceGroupName = "rg-ai-gateway"
 8 | 
 9 | # Event Hub connection string
10 | $eventHubConnectionString = "Endpoint=sb://<EventHubsNamespace>.servicebus.windows.net/;SharedAccessKeyName=<KeyName>;SharedAccessKey=<key"
11 | 
12 | # Create logger
13 | $context = New-AzApiManagementContext -ResourceGroupName $resourceGroupName -ServiceName $apimServiceName
14 | New-AzApiManagementLogger -Context $context -LoggerId "usage-eventhub-logger" -Name "usage-eventhub-logger" -ConnectionString $eventHubConnectionString -Description "Event Hub logger for OpenAI usage metrics"


--------------------------------------------------------------------------------
/src/apim/ai-search-api/ai-search-api-policy.xml:
--------------------------------------------------------------------------------
 1 | <policies>
 2 |     <inbound>
 3 |         <base />
 4 |         <authentication-managed-identity resource="https://search.azure.com" />
 5 |         <set-header name="api-key" exists-action="delete" />
 6 |     </inbound>
 7 |     <backend>
 8 |         <base />
 9 |     </backend>
10 |     <outbound>
11 |         <base />
12 |     </outbound>
13 |     <on-error>
14 |         <base />
15 |     </on-error>
16 | </policies>


--------------------------------------------------------------------------------
/src/apim/http/chat.http:
--------------------------------------------------------------------------------
 1 | POST https://apim-7pg4fleh6wgj6.azure-api.net/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview
 2 | Content-Type: application/json
 3 | api-key: a67da71776184be6842383d95094f275
 4 | 
 5 | {
 6 |     "stream": true,
 7 |     "model": "chat",
 8 |     "messages": [
 9 |         {
10 |             "role": "system",
11 |             "content": "You are a helpful assistant that responds in Markdown. Help me with my math homework!"
12 |         },
13 |         {
14 |             "role": "user",
15 |             "content": "How to calculate the distance between earth and moon?"
16 |         }
17 |     ]
18 | }


--------------------------------------------------------------------------------
/src/apim/oa-fragments-archived/oai-blocked-streaming-in-policy.xml:
--------------------------------------------------------------------------------
 1 | <<<<<<< HEAD:src/apim/oa-fragments/oai-blocked-streaming-in-policy.xml
 2 | <fragment>
 3 |     <choose>
 4 |         <when condition="@(context.Request.Body.As<JObject>(true)[" stream"] != null && context.Request.Body.As<JObject>(true)[" stream"].Type != JTokenType.Null)">
 5 |             <set-variable name="isStream"
 6 |                 value="@{
 7 |             var content = (context.Request.Body?.As<JObject>(true));
 8 |             string streamValue = content["stream"].ToString();
 9 |             return streamValue;
10 |         }" />
11 |         </when>
12 |     </choose>
13 |     <!-- Blocks streaming completions and returns 404 -->
14 |     <choose>
15 |         <when condition="@(context.Variables.GetValueOrDefault<string>(" isStream"," false").Equals(" true", StringComparison.OrdinalIgnoreCase))">
16 |             <return-response>
17 |                 <set-status code="404" reason="BlockStreaming" />
18 |                 <set-header name="Microsoft-Azure-Api-Management-Correlation-Id"
19 |                     exists-action="override">
20 |                     <value>@{return Guid.NewGuid().ToString();}</value>
21 |                 </set-header>
22 |                 <set-body>Streaming chat completions are not allowed by this organization.</set-body>
23 |             </return-response>
24 |         </when>
25 |     </choose>
26 | =======
27 | <fragment>
28 |     <choose>
29 |         <when condition="@(context.Request.Body.As<JObject>(true)[" stream"] != null && context.Request.Body.As<JObject>(true)[" stream"].Type != JTokenType.Null)">
30 |             <set-variable name="isStream"
31 |                 value="@{
32 |             var content = (context.Request.Body?.As<JObject>(true));
33 |             string streamValue = content["stream"].ToString();
34 |             return streamValue;
35 |         }" />
36 |         </when>
37 |     </choose>
38 |     <!-- Blocks streaming completions and returns 404 -->
39 |     <choose>
40 |         <when condition="@(context.Variables.GetValueOrDefault<string>(" isStream"," false").Equals(" true", StringComparison.OrdinalIgnoreCase))">
41 |             <return-response>
42 |                 <set-status code="404" reason="BlockStreaming" />
43 |                 <set-header name="Microsoft-Azure-Api-Management-Correlation-Id"
44 |                     exists-action="override">
45 |                     <value>@{return Guid.NewGuid().ToString();}</value>
46 |                 </set-header>
47 |                 <set-body>Streaming chat completions are not allowed by this organization.</set-body>
48 |             </return-response>
49 |         </when>
50 |     </choose>
51 | >>>>>>> azd-deployment:src/apim/oa-fragments-archived/oai-blocked-streaming-in-policy.xml
52 | </fragment>


--------------------------------------------------------------------------------
/src/apim/oa-fragments-archived/oai-clusters-lb-configuration-be-policy.xml:
--------------------------------------------------------------------------------
  1 | <fragment>
  2 | 	<retry condition="@(context.Response != null && (context.Response.StatusCode == 429 || context.Response.StatusCode >= 500) && ((Int32)context.Variables["remainingRoutes"]) > 0)" count="3" interval="0">
  3 | 		<!-- Before picking the route, let's verify if there is any that should be set to not throttling anymore -->
  4 | 		<set-variable name="routes" value="@{
  5 |                 JArray routes = (JArray)context.Variables["routes"];
  6 |                 
  7 |                 for (int i = 0; i < routes.Count; i++)
  8 |                 {
  9 |                     JObject route = (JObject)routes[i];
 10 | 
 11 |                     if (route.Value<bool>("isThrottling") && DateTime.Now >= route.Value<DateTime>("retryAfter"))
 12 |                     {
 13 |                         route["isThrottling"] = false;
 14 |                         route["retryAfter"] = DateTime.MinValue;
 15 |                     }
 16 |                 }
 17 | 
 18 |                 return routes; 
 19 |             }" />
 20 | 		<cache-store-value key="@(context.Request.MatchedParameters["deployment-id"] + "Routes" + context.Deployment.Region + context.Api.Revision)" value="@((JArray)context.Variables["routes"])" duration="86400" />
 21 | 		<!-- This is the main logic to pick the route to be used -->
 22 | 		<set-variable name="routeIndex" value="@{
 23 |                 
 24 |                 JArray routes = (JArray)context.Variables["routes"];
 25 |                 int selectedPriority = Int32.MaxValue;
 26 |                 List<int> availableRoutesIndexes = new List<int>();
 27 | 
 28 |                 for (int i = 0; i < routes.Count; i++)
 29 |                 {
 30 |                     JObject route = (JObject)routes[i];
 31 | 
 32 |                     if (!route.Value<bool>("isThrottling"))
 33 |                     {
 34 |                         int routePriority = route.Value<int>("priority");
 35 | 
 36 |                         if (routePriority < selectedPriority)
 37 |                         {
 38 |                             selectedPriority = routePriority;
 39 |                             availableRoutesIndexes.Clear();
 40 |                             availableRoutesIndexes.Add(i);
 41 |                         } 
 42 |                         else if (routePriority == selectedPriority)
 43 |                         {
 44 |                             availableRoutesIndexes.Add(i);
 45 |                         }
 46 |                     }
 47 |                 }
 48 | 
 49 |                 if (availableRoutesIndexes.Count == 1)
 50 |                 {
 51 |                     return availableRoutesIndexes[0];
 52 |                 }
 53 |             
 54 |                 if (availableRoutesIndexes.Count > 0)
 55 |                 {
 56 |                     //Returns a random route from the list if we have more than one available with the same priority
 57 |                     return availableRoutesIndexes[new Random().Next(0, availableRoutesIndexes.Count)];
 58 |                 }
 59 |                 else
 60 |                 {
 61 |                     //If there are no available routes, the request will be sent to the first one
 62 |                     return 0;    
 63 |                 }
 64 |                 }" />
 65 |                 
 66 | 		<set-variable name="routeUrl" value="@(((JObject)((JArray)context.Variables["routes"])[(Int32)context.Variables["routeIndex"]]).Value<string>("url") + "/openai")" />
 67 |         <set-variable name="routeLocation" value="@(((JObject)((JArray)context.Variables["routes"])[(Int32)context.Variables["routeIndex"]]).Value<string>("location"))" />
 68 |         <set-variable name="routeName" value="@(((JObject)((JArray)context.Variables["routes"])[(Int32)context.Variables["routeIndex"]]).Value<string>("name"))" />
 69 |         <set-variable name="deploymentName" value="@(context.Request.MatchedParameters["deployment-id"])" />
 70 | 		<set-backend-service base-url="@((string)context.Variables["routeUrl"])" />
 71 | 		<forward-request buffer-request-body="true" />
 72 | 		<choose>
 73 | 			<!-- In case we got 429 or 5xx from a route, update the list with its status -->
 74 | 			<when condition="@(context.Response != null && (context.Response.StatusCode == 429 || context.Response.StatusCode >= 500) )">
 75 | 				<cache-lookup-value key="@(context.Request.MatchedParameters["deployment-id"] + "Routes" + context.Deployment.Region + context.Api.Revision)" variable-name="routes" />
 76 | 				<set-variable name="routes" value="@{
 77 |                         JArray routes = (JArray)context.Variables["routes"];
 78 |                         int currentrouteIndex = context.Variables.GetValueOrDefault<int>("routeIndex");
 79 |                         int retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("Retry-After", "-1"));
 80 | 
 81 |                         if (retryAfter == -1)
 82 |                         {
 83 |                             retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("x-ratelimit-reset-requests", "-1"));
 84 |                         }
 85 | 
 86 |                         if (retryAfter == -1)
 87 |                         {
 88 |                             retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("x-ratelimit-reset-tokens", "10"));
 89 |                         }
 90 | 
 91 |                         JObject route = (JObject)routes[currentrouteIndex];
 92 |                         route["isThrottling"] = true;
 93 |                         route["retryAfter"] = DateTime.Now.AddSeconds(retryAfter);
 94 | 
 95 |                         return routes;      
 96 |                     }" />
 97 | 				<cache-store-value key="@(context.Request.MatchedParameters["deployment-id"] + "Routes" + context.Deployment.Region + context.Api.Revision)" value="@((JArray)context.Variables["routes"])" duration="86400" />
 98 | 				<set-variable name="remainingRoutes" value="@{
 99 |                         JArray routes = (JArray)context.Variables["routes"];
100 | 
101 |                         int remainingRoutes = 0;
102 | 
103 |                         for (int i = 0; i < routes.Count; i++)
104 |                         {
105 |                             JObject route = (JObject)routes[i];
106 | 
107 |                             if (!route.Value<bool>("isThrottling"))
108 |                             {
109 |                                 remainingRoutes++;
110 |                             }
111 |                         }
112 | 
113 |                         return remainingRoutes;
114 |                     }" />
115 | 			</when>
116 | 		</choose>
117 | 	</retry>
118 | </fragment>


--------------------------------------------------------------------------------
/src/apim/oa-fragments-archived/oai-clusters-lb-configuration-in-policy.xml:
--------------------------------------------------------------------------------
  1 | <!-- 
  2 | # OpenAI Clusters Load Balancer Configuration Policy
  3 | 
  4 | This policy fragment is responsible for managing the configuration of OpenAI clusters. It performs the following operations:
  5 | 
  6 | 1. **Cache Lookup**: It first tries to retrieve the OpenAI clusters configuration from the cache using the key `oaClusters`.
  7 | 
  8 | 2. **Configuration Loading**: If the configuration is not found in the cache, it creates a new configuration. The configuration includes details about different OpenAI clusters and their routes. Each cluster has a name and a set of routes. Each route includes the following properties:
  9 |     - `name`: The name of the route.
 10 |     - `location`: The location of the route.
 11 |     - `url`: The URL of the route.
 12 |     - `priority`: The priority of the route.
 13 |     - `isThrottling`: A boolean indicating whether throttling is enabled for the route.
 14 |     - `retryAfter`: The time after which a retry should be attempted if the initial request fails.
 15 | 
 16 | 3. **Cache Storage**: After creating the configuration, it stores the configuration back in the cache with a duration of 60 minutes. This means that subsequent requests within the next 60 minutes will retrieve the configuration from the cache instead of creating a new one.
 17 | 
 18 | This policy fragment is part of a larger policy and should be used in conjunction with other policy fragments to manage the routing of requests to different OpenAI clusters based on their configuration.
 19 | -->
 20 | <fragment>
 21 |     <!-- Getting OpenAI clusters configuration -->
 22 |     <cache-lookup-value key="@("oaClusters" + context.Deployment.Region + context.Api.Revision)" variable-name="oaClusters" />
 23 |     <!-- If we can't find the configuration, it will be loaded -->
 24 |     <choose>
 25 |         <when condition="@(context.Variables.ContainsKey("oaClusters") == false)">
 26 |             <set-variable name="oaClusters"
 27 |                 value="@{
 28 |                     JArray routes = new JArray();
 29 |                     JArray clusters = new JArray();
 30 |                     if(context.Deployment.Region == "West Europe" || true)
 31 |                     {
 32 |                         routes.Add(new JObject()
 33 |                         {
 34 |                             { "name", "REPLACE1" },
 35 |                             { "location", "swedencentral" },
 36 |                             { "url", "https://REPLACE1.openai.azure.com" },
 37 |                             { "priority", 1},
 38 |                             { "isThrottling", false }, 
 39 |                             { "retryAfter", DateTime.MinValue } 
 40 |                         });
 41 | 
 42 |                         routes.Add(new JObject()
 43 |                         {
 44 |                             { "name", "REPLACE2" },
 45 |                             { "location", "westeurope" },
 46 |                             { "url", "https://REPLACE2.openai.azure.com" },
 47 |                             { "priority", 1},
 48 |                             { "isThrottling", false },
 49 |                             { "retryAfter", DateTime.MinValue }
 50 |                         });
 51 |                         clusters.Add(new JObject()
 52 |                         {
 53 |                             { "deploymentName", "gpt-35-turbo" },
 54 |                             { "routes", new JArray(routes[0], routes[1]) }
 55 |                         });
 56 | 
 57 |                         clusters.Add(new JObject()
 58 |                         {
 59 |                             { "deploymentName", "embedding" },
 60 |                             { "routes", new JArray(routes[0], routes[1]) }
 61 |                         });
 62 | 
 63 |                         clusters.Add(new JObject()
 64 |                         {
 65 |                             { "deploymentName", "gpt-4" },
 66 |                             { "routes", new JArray(routes[0]) }
 67 |                         });
 68 | 
 69 |                         clusters.Add(new JObject()
 70 |                         {
 71 |                             { "deploymentName", "dall-e-3" },
 72 |                             { "routes", new JArray(routes[0]) }
 73 |                         });
 74 |                     }
 75 |                     else
 76 |                     {
 77 |                         //No clusters found, either return error or set default cluster
 78 |                     }
 79 |                     
 80 |                     return clusters;   
 81 |                 }" />
 82 |             <!-- Add cluster configurations to cache -->
 83 |             <cache-store-value key="@("oaClusters" + context.Deployment.Region + context.Api.Revision)" value="@((JArray)context.Variables["oaClusters"])" duration="86400" />
 84 |         </when>
 85 |     </choose>
 86 |     <!-- Getting OpenAI routes configuration based on deployment name, region and api revision -->
 87 |     <cache-lookup-value key="@(context.Request.MatchedParameters["deployment-id"] + "Routes" + context.Deployment.Region + context.Api.Revision)" variable-name="routes" />
 88 |     <!-- If we can't find the configuration, it will be loaded -->
 89 |     <choose>
 90 |         <when condition="@(context.Variables.ContainsKey("routes") == false)">
 91 |             <set-variable name="routes"
 92 |                 value="@{
 93 |                     string deploymentName = context.Request.MatchedParameters["deployment-id"];
 94 |                     JArray clusters = (JArray)context.Variables["oaClusters"];
 95 |                     JObject cluster = (JObject)clusters.FirstOrDefault(o => o["deploymentName"]?.Value<string>() == deploymentName);
 96 |                     if(cluster == null)
 97 |                     {
 98 |                         //Error has no cluster matched the deployment name
 99 |                         return new JArray() { new JObject()
100 |                             {
101 |                                 { "name", deploymentName },
102 |                                 { "location", "NA" },
103 |                                 { "url", "No routes found for the deployment (" + deploymentName + ") in the region (" + context.Deployment.Region + ")" }
104 |                             }
105 |                         };
106 |                     }
107 |                     JArray routes = (JArray)cluster["routes"];
108 |                     return routes;
109 |                 }" />
110 |             <!-- If no routes found for deployment, return bad request with content of routes variable -->
111 |             <choose>
112 |                 <when condition="@(((JArray)context.Variables["routes"]).ToString().Contains("No routes"))">
113 |                     <return-response>
114 |                         <set-status code="400" reason="No routes"/>
115 |                         <set-body>
116 |                             @(((JArray)context.Variables["routes"]).ToString())
117 |                         </set-body>
118 |                      </return-response>
119 |                 </when>
120 |             </choose>
121 |             <!-- Add cluster configurations to cache -->
122 |             <cache-store-value key="@(context.Request.MatchedParameters["deployment-id"] + "Routes" + context.Deployment.Region + context.Api.Revision)" value="@((JArray)context.Variables["routes"])" duration="86400" />
123 |         </when>
124 |     </choose>
125 |     <set-variable name="routeIndex" value="-1" />
126 |     <set-variable name="remainingRoutes" value="1" />
127 | </fragment>


--------------------------------------------------------------------------------
/src/apim/oa-fragments-archived/oai-usage-eventhub-out-policy.xml:
--------------------------------------------------------------------------------
 1 | <fragment>
 2 | 	<choose>
 3 | 		<when condition="@(context.Response.StatusCode == 200)">
 4 | 			<log-to-eventhub logger-id="chargeback-eventhub-logger">@{
 5 |                     //Avoid reading response body as it can only be ready once, instead, before calling this fragement, a variable call responseBody will be set in the outbound policy
 6 |                     //var responseBody = context.Response.Body?.As<JObject>(true); //Avoid this one
 7 |                     var responseBody = (JObject)context.Variables["responseBody"]; //It is set in the outbound policy before calling the fragment
 8 |                     return new JObject(
 9 |                         new JProperty("id", responseBody?["id"]?.ToString() ?? "NA"),
10 |                         new JProperty("timestamp", DateTime.UtcNow.ToString()),
11 |                         new JProperty("appId", context.Request.Headers.GetValueOrDefault("Authorization",string.Empty).Split(' ').LastOrDefault()?.AsJwt()?.Claims.GetValueOrDefault("appid", "NA")),
12 |                         new JProperty("subscriptionId", context.Subscription?.Id?.ToString() ?? "Portal-Admin"),
13 |                         new JProperty("productName", context.Product?.Name?.ToString() ?? "Portal-Admin"),
14 |                         new JProperty("targetService", responseBody?["object"]?.ToString() ?? "NA"),
15 |                         new JProperty("model", responseBody?["model"]?.ToString() ?? "NA"),
16 |                         new JProperty("gatewayName", context.Deployment?.ServiceName ?? "NA"),
17 |                         new JProperty("gatewayRegion", context.Deployment?.Region ?? "NA"),
18 |                         new JProperty("aiGatewayId", context.Deployment?.Gateway?.Id ?? "NA"),
19 |                         new JProperty("RequestIp", context.Request?.IpAddress ?? "NA"),
20 |                         new JProperty("operationName", context.Operation?.Name ?? "NA"),
21 |                         new JProperty("routeUrl", (string)context.Variables.GetValueOrDefault<string>("routeUrl", "NA")),
22 |                         new JProperty("routeLocation", (string)context.Variables.GetValueOrDefault<string>("routeLocation", "NA")),
23 |                         new JProperty("routeName", (string)context.Variables.GetValueOrDefault<string>("routeName", "NA")),
24 |                         new JProperty("deploymentName", (string)context.Variables.GetValueOrDefault<string>("deploymentName", "NA")),
25 |                         new JProperty("promptTokens", responseBody?["usage"]?["prompt_tokens"]?.ToString() ?? "0"),
26 |                         new JProperty("responseTokens", responseBody?["usage"]?["completion_tokens"]?.ToString() ?? "0"),
27 |                         new JProperty("totalTokens", responseBody?["usage"]?["total_tokens"]?.ToString() ?? "0")
28 |                 ).ToString();
29 |                 }</log-to-eventhub>
30 | 		</when>
31 | 	</choose>
32 | </fragment>


--------------------------------------------------------------------------------
/src/apim/oa-weighted-lb/oai-clusters-weighted-lb-configuration-be-policy.xml:
--------------------------------------------------------------------------------
 1 | <fragment>
 2 | 	<retry condition="@(context.Response != null && (context.Response.StatusCode == 429 || context.Response.StatusCode >= 500) && ((Int32)context.Variables["remainingRoutes"]) > 0)" count="3" interval="0">
 3 | 		<set-variable name="routeIndex" value="@{
 4 |             Random random = new Random();
 5 |             int totalWeight = (Int32)context.Variables["totalWeight"];
 6 |             JArray cumulativeWeights = (JArray)context.Variables["cumulativeWeights"];
 7 |             int randomWeight = random.Next(1, totalWeight + 1);
 8 |             int nextRouteIndex = 0;
 9 |             for (int i = 0; i < cumulativeWeights.Count; i++)
10 |             {
11 |                 if (randomWeight <= cumulativeWeights[i].Value<int>())
12 |                 {
13 |                     nextRouteIndex = i;
14 |                     break;
15 |                 }
16 |             }
17 |             return nextRouteIndex;
18 |         }" />
19 | 
20 |         
21 | 		<!-- This is the main logic to pick the route to be used -->
22 | 		
23 |                 
24 | 		<set-variable name="routeUrl" value="@(((JObject)((JArray)context.Variables["routes"])[(Int32)context.Variables["routeIndex"]]).Value<string>("url") + "/openai")" />
25 |         <set-variable name="routeLocation" value="@(((JObject)((JArray)context.Variables["routes"])[(Int32)context.Variables["routeIndex"]]).Value<string>("location"))" />
26 |         <set-variable name="routeName" value="@(((JObject)((JArray)context.Variables["routes"])[(Int32)context.Variables["routeIndex"]]).Value<string>("name"))" />
27 |         <set-variable name="deploymentName" value="@(context.Request.MatchedParameters["deployment-id"])" />
28 | 		<set-backend-service base-url="@((string)context.Variables["routeUrl"])" />
29 | 		<forward-request buffer-request-body="true" />
30 | 		
31 | 	</retry>
32 | </fragment>


--------------------------------------------------------------------------------
/src/apim/oa-weighted-lb/oai-clusters-weighted-lb-configuration-in-policy.xml:
--------------------------------------------------------------------------------
  1 | <fragment>
  2 |     <!-- Getting OpenAI clusters configuration -->
  3 |     <cache-lookup-value key="@("oaClusters" + context.Deployment.Region + context.Api.Revision)" variable-name="oaClusters" />
  4 |     <!-- If we can't find the configuration, it will be loaded -->
  5 |     <choose>
  6 |         <when condition="@(context.Variables.ContainsKey("oaClusters") == false)">
  7 |             <set-variable name="oaClusters"
  8 |                 value="@{
  9 |                     JArray routes = new JArray();
 10 |                     JArray clusters = new JArray();
 11 |                     if(context.Deployment.Region == "West Europe" || true)
 12 |                     {
 13 |                         routes.Add(new JObject()
 14 |                         {
 15 |                             { "name", "REPLACE1" },
 16 |                             { "location", "swedencentral" },
 17 |                             { "url", "https://REPLACE1.openai.azure.com" },
 18 |                             { "priority", 1},
 19 |                             { "isThrottling", false }, 
 20 |                             { "weight", "300"},
 21 |                             { "retryAfter", DateTime.MinValue } 
 22 |                         });
 23 | 
 24 |                         routes.Add(new JObject()
 25 |                         {
 26 |                             { "name", "REPLACE2" },
 27 |                             { "location", "westeurope" },
 28 |                             { "url", "https://REPLACE2.openai.azure.com" },
 29 |                             { "priority", 1},
 30 |                             { "isThrottling", false },
 31 |                             { "weight", "100"},
 32 |                             { "retryAfter", DateTime.MinValue }
 33 |                         });
 34 | 
 35 |                         routes.Add(new JObject()
 36 |                         {
 37 |                             { "name", "REPLACE3" },
 38 |                             { "location", "westus" },
 39 |                             { "url", "https://REPLACE3.openai.azure.com" },
 40 |                             { "priority", 1},
 41 |                             { "isThrottling", false },
 42 |                             { "weight", "50"},
 43 |                             { "retryAfter", DateTime.MinValue }
 44 |                         });
 45 | 
 46 |                         clusters.Add(new JObject()
 47 |                         {
 48 |                             { "deploymentName", "gpt-35-turbo" },
 49 |                             { "routes", new JArray(routes[0], routes[1]) }
 50 |                         });
 51 | 
 52 |                         clusters.Add(new JObject()
 53 |                         {
 54 |                             { "deploymentName", "embedding" },
 55 |                             { "routes", new JArray(routes[0], routes[1]) }
 56 |                         });
 57 | 
 58 |                         clusters.Add(new JObject()
 59 |                         {
 60 |                             { "deploymentName", "gpt-4" },
 61 |                             { "routes", new JArray(routes[0]) }
 62 |                         });
 63 | 
 64 |                         clusters.Add(new JObject()
 65 |                         {
 66 |                             { "deploymentName", "dall-e-3" },
 67 |                             { "routes", new JArray(routes[0]) }
 68 |                         });
 69 |                     }
 70 |                     else
 71 |                     {
 72 |                         //Error has no clusters for the region
 73 |                     }
 74 |                     
 75 |                     return clusters;   
 76 |                 }" />
 77 |             <!-- Add cluster configurations to cache -->
 78 |             <cache-store-value key="@("oaClusters" + context.Deployment.Region + context.Api.Revision)" value="@((JArray)context.Variables["oaClusters"])" duration="86400" />
 79 |         </when>
 80 |     </choose>
 81 |     <!-- Getting OpenAI routes configuration based on deployment name, region and api revision -->
 82 |     <cache-lookup-value key="@(context.Request.MatchedParameters["deployment-id"] + "Routes" + context.Deployment.Region + context.Api.Revision)" variable-name="routes" />
 83 |     <!-- If we can't find the configuration, it will be loaded -->
 84 |     <choose>
 85 |         <when condition="@(context.Variables.ContainsKey("routes") == false)">
 86 |             <set-variable name="routes"
 87 |                 value="@{
 88 |                     string deploymentName = context.Request.MatchedParameters["deployment-id"];
 89 |                     JArray clusters = (JArray)context.Variables["oaClusters"];
 90 |                     JObject cluster = (JObject)clusters.FirstOrDefault(o => o["deploymentName"]?.Value<string>() == deploymentName);
 91 |                     if(cluster == null)
 92 |                     {
 93 |                         //Error has no cluster matched the deployment name
 94 |                     }
 95 |                     JArray routes = (JArray)cluster["routes"];
 96 |                     return routes;
 97 |                 }" />
 98 |             <!-- Set total weights for selected routes based on model -->
 99 |             <set-variable name="totalWeight" value="@{
100 |                 int totalWeight = 0;
101 |                 JArray routes = (JArray)context.Variables["routes"];
102 |                 foreach (JObject route in routes)
103 |                 {
104 |                     totalWeight += int.Parse(route["weight"].ToString());
105 |                 }
106 |                 return totalWeight;
107 |                 }" />
108 |             <!-- Set cumulative weights for selected routes based on model-->
109 |             <set-variable name="cumulativeWeights" value="@{
110 |                 JArray cumulativeWeights = new JArray();
111 |                 int totalWeight = 0;
112 |                 JArray routes = (JArray)context.Variables["routes"];
113 |                 foreach (JObject route in routes)
114 |                 {
115 |                     totalWeight += int.Parse(route["weight"].ToString());
116 |                     cumulativeWeights.Add(totalWeight);
117 |                 }
118 |                 return cumulativeWeights;
119 |             }" />
120 |             <!-- Add cluster configurations to cache -->
121 |             <cache-store-value key="@(context.Request.MatchedParameters["deployment-id"] + "Routes" + context.Deployment.Region + context.Api.Revision)" value="@((JArray)context.Variables["routes"])" duration="86400" />
122 |         </when>
123 |     </choose>
124 |     <set-variable name="routeIndex" value="-1" />
125 |     <set-variable name="remainingRoutes" value="1" />
126 | </fragment>


--------------------------------------------------------------------------------
/src/apim/oai-api/oai-api-policy-archived.xml:
--------------------------------------------------------------------------------
 1 | <!--
 2 |     Policy at All operations - Azure OpenAI API
 3 | -->
 4 | <policies>
 5 |     <inbound>
 6 |         <base />
 7 |         <include-fragment fragment-id="oai-blocked-streaming-in-policy" />
 8 |         <set-header name="api-key" exists-action="delete" />
 9 |         <authentication-managed-identity resource="https://cognitiveservices.azure.com" />
10 |         <include-fragment fragment-id="oai-clusters-lb-configuration-in-policy" />
11 |     </inbound>
12 |     <backend>
13 |         <include-fragment fragment-id="oai-clusters-lb-configuration-be-policy" />
14 |     </backend>
15 |     <outbound>
16 |         <base />
17 |         <set-variable name="responseBody" value="@(context.Response.Body.As<JObject>())" />
18 |         <include-fragment fragment-id="oai-usage-eventhub-out-policy" />
19 |         <set-body>@{
20 |             return ((JObject)context.Variables["responseBody"]).ToString();
21 |         }</set-body>
22 |     </outbound>
23 |     <on-error>
24 |         <base />
25 |     </on-error>
26 | </policies>


--------------------------------------------------------------------------------
/src/testing/openai-testing.http:
--------------------------------------------------------------------------------
 1 | @aiHubGatewayOpenAIBaseUrl = "https://REPLACE.azure-api.net/openai/deployments"
 2 | @aiHRSubscriptionKey = ""
 3 | @aiRetailSubscriptionKey = ""
 4 | 
 5 | ### gpt-35-turbo/AI-HR
 6 | POST {{aiHubGatewayOpenAIBaseUrl}}/chat/chat/completions?api-version=2024-06-01
 7 | Content-Type: application/json
 8 | api-key: {{aiHRSubscriptionKey}}
 9 | 
10 | {
11 |   "messages": [
12 | 	{"role": "system", "content": "You are a helpful assistant."},
13 | 	{"role": "user", "content": "Tell me a joke."}
14 |   ],
15 |   "stream": true
16 | }
17 | 
18 | ### gpt-4o/AI-HR
19 | POST {{aiHubGatewayOpenAIBaseUrl}}/gpt-4o/chat/completions?api-version=2024-06-01
20 | Content-Type: application/json
21 | api-key: {{aiHRSubscriptionKey}}
22 | 
23 | {
24 |   "messages": [
25 | 	{"role": "system", "content": "You are a helpful assistant."},
26 | 	{"role": "user", "content": "Tell me a joke."}
27 |   ],
28 |   "stream": false
29 | }
30 | 
31 | ### gpt-35-turbo/AI-Retail
32 | POST {{aiHubGatewayOpenAIBaseUrl}}/chat/chat/completions?api-version=2024-06-01
33 | Content-Type: application/json
34 | api-key: {{aiRetailSubscriptionKey}}
35 | 
36 | {
37 |   "messages": [
38 | 	{"role": "system", "content": "You are a helpful assistant."},
39 | 	{"role": "user", "content": "Tell me a joke."}
40 |   ],
41 |   "stream": false
42 | }


--------------------------------------------------------------------------------
/src/usage-ingestion-function/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | 
  4 | # Azure Functions localsettings file
  5 | local.settings.json
  6 | 
  7 | # User-specific files
  8 | *.suo
  9 | *.user
 10 | *.userosscache
 11 | *.sln.docstates
 12 | 
 13 | # User-specific files (MonoDevelop/Xamarin Studio)
 14 | *.userprefs
 15 | 
 16 | # Build results
 17 | [Dd]ebug/
 18 | [Dd]ebugPublic/
 19 | [Rr]elease/
 20 | [Rr]eleases/
 21 | x64/
 22 | x86/
 23 | bld/
 24 | [Bb]in/
 25 | [Oo]bj/
 26 | [Ll]og/
 27 | 
 28 | # Visual Studio 2015 cache/options directory
 29 | .vs/
 30 | # Uncomment if you have tasks that create the project's static files in wwwroot
 31 | #wwwroot/
 32 | 
 33 | # MSTest test Results
 34 | [Tt]est[Rr]esult*/
 35 | [Bb]uild[Ll]og.*
 36 | 
 37 | # NUNIT
 38 | *.VisualState.xml
 39 | TestResult.xml
 40 | 
 41 | # Build Results of an ATL Project
 42 | [Dd]ebugPS/
 43 | [Rr]eleasePS/
 44 | dlldata.c
 45 | 
 46 | # DNX
 47 | project.lock.json
 48 | project.fragment.lock.json
 49 | artifacts/
 50 | 
 51 | *_i.c
 52 | *_p.c
 53 | *_i.h
 54 | *.ilk
 55 | *.meta
 56 | *.obj
 57 | *.pch
 58 | *.pdb
 59 | *.pgc
 60 | *.pgd
 61 | *.rsp
 62 | *.sbr
 63 | *.tlb
 64 | *.tli
 65 | *.tlh
 66 | *.tmp
 67 | *.tmp_proj
 68 | *.log
 69 | *.vspscc
 70 | *.vssscc
 71 | .builds
 72 | *.pidb
 73 | *.svclog
 74 | *.scc
 75 | 
 76 | # Chutzpah Test files
 77 | _Chutzpah*
 78 | 
 79 | # Visual C++ cache files
 80 | ipch/
 81 | *.aps
 82 | *.ncb
 83 | *.opendb
 84 | *.opensdf
 85 | *.sdf
 86 | *.cachefile
 87 | *.VC.db
 88 | *.VC.VC.opendb
 89 | 
 90 | # Visual Studio profiler
 91 | *.psess
 92 | *.vsp
 93 | *.vspx
 94 | *.sap
 95 | 
 96 | # TFS 2012 Local Workspace
 97 | $tf/
 98 | 
 99 | # Guidance Automation Toolkit
100 | *.gpState
101 | 
102 | # ReSharper is a .NET coding add-in
103 | _ReSharper*/
104 | *.[Rr]e[Ss]harper
105 | *.DotSettings.user
106 | 
107 | # JustCode is a .NET coding add-in
108 | .JustCode
109 | 
110 | # TeamCity is a build add-in
111 | _TeamCity*
112 | 
113 | # DotCover is a Code Coverage Tool
114 | *.dotCover
115 | 
116 | # NCrunch
117 | _NCrunch_*
118 | .*crunch*.local.xml
119 | nCrunchTemp_*
120 | 
121 | # MightyMoose
122 | *.mm.*
123 | AutoTest.Net/
124 | 
125 | # Web workbench (sass)
126 | .sass-cache/
127 | 
128 | # Installshield output folder
129 | [Ee]xpress/
130 | 
131 | # DocProject is a documentation generator add-in
132 | DocProject/buildhelp/
133 | DocProject/Help/*.HxT
134 | DocProject/Help/*.HxC
135 | DocProject/Help/*.hhc
136 | DocProject/Help/*.hhk
137 | DocProject/Help/*.hhp
138 | DocProject/Help/Html2
139 | DocProject/Help/html
140 | 
141 | # Click-Once directory
142 | publish/
143 | 
144 | # Publish Web Output
145 | *.[Pp]ublish.xml
146 | *.azurePubxml
147 | # TODO: Comment the next line if you want to checkin your web deploy settings
148 | # but database connection strings (with potential passwords) will be unencrypted
149 | #*.pubxml
150 | *.publishproj
151 | 
152 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
153 | # checkin your Azure Web App publish settings, but sensitive information contained
154 | # in these scripts will be unencrypted
155 | PublishScripts/
156 | 
157 | # NuGet Packages
158 | *.nupkg
159 | # The packages folder can be ignored because of Package Restore
160 | **/packages/*
161 | # except build/, which is used as an MSBuild target.
162 | !**/packages/build/
163 | # Uncomment if necessary however generally it will be regenerated when needed
164 | #!**/packages/repositories.config
165 | # NuGet v3's project.json files produces more ignoreable files
166 | *.nuget.props
167 | *.nuget.targets
168 | 
169 | # Microsoft Azure Build Output
170 | csx/
171 | *.build.csdef
172 | 
173 | # Microsoft Azure Emulator
174 | ecf/
175 | rcf/
176 | 
177 | # Windows Store app package directories and files
178 | AppPackages/
179 | BundleArtifacts/
180 | Package.StoreAssociation.xml
181 | _pkginfo.txt
182 | 
183 | # Visual Studio cache files
184 | # files ending in .cache can be ignored
185 | *.[Cc]ache
186 | # but keep track of directories ending in .cache
187 | !*.[Cc]ache/
188 | 
189 | # Others
190 | ClientBin/
191 | ~$*
192 | *~
193 | *.dbmdl
194 | *.dbproj.schemaview
195 | *.jfm
196 | *.pfx
197 | *.publishsettings
198 | node_modules/
199 | orleans.codegen.cs
200 | 
201 | # Since there are multiple workflows, uncomment next line to ignore bower_components
202 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
203 | #bower_components/
204 | 
205 | # RIA/Silverlight projects
206 | Generated_Code/
207 | 
208 | # Backup & report files from converting an old project file
209 | # to a newer Visual Studio version. Backup files are not needed,
210 | # because we have git ;-)
211 | _UpgradeReport_Files/
212 | Backup*/
213 | UpgradeLog*.XML
214 | UpgradeLog*.htm
215 | 
216 | # SQL Server files
217 | *.mdf
218 | *.ldf
219 | 
220 | # Business Intelligence projects
221 | *.rdl.data
222 | *.bim.layout
223 | *.bim_*.settings
224 | 
225 | # Microsoft Fakes
226 | FakesAssemblies/
227 | 
228 | # GhostDoc plugin setting file
229 | *.GhostDoc.xml
230 | 
231 | # Node.js Tools for Visual Studio
232 | .ntvs_analysis.dat
233 | 
234 | # Visual Studio 6 build log
235 | *.plg
236 | 
237 | # Visual Studio 6 workspace options file
238 | *.opt
239 | 
240 | # Visual Studio LightSwitch build output
241 | **/*.HTMLClient/GeneratedArtifacts
242 | **/*.DesktopClient/GeneratedArtifacts
243 | **/*.DesktopClient/ModelManifest.xml
244 | **/*.Server/GeneratedArtifacts
245 | **/*.Server/ModelManifest.xml
246 | _Pvt_Extensions
247 | 
248 | # Paket dependency manager
249 | .paket/paket.exe
250 | paket-files/
251 | 
252 | # FAKE - F# Make
253 | .fake/
254 | 
255 | # JetBrains Rider
256 | .idea/
257 | *.sln.iml
258 | 
259 | # CodeRush
260 | .cr/
261 | 
262 | # Python Tools for Visual Studio (PTVS)
263 | __pycache__/
264 | *.pyc


--------------------------------------------------------------------------------
/src/usage-ingestion-function/Program.cs:
--------------------------------------------------------------------------------
 1 | using Microsoft.Azure.Functions.Worker;
 2 | using Microsoft.Extensions.Hosting;
 3 | using Microsoft.Extensions.DependencyInjection;
 4 | 
 5 | var host = new HostBuilder()
 6 |     .ConfigureFunctionsWebApplication()
 7 |     .ConfigureServices(services => {
 8 |         services.AddApplicationInsightsTelemetryWorkerService();
 9 |         services.ConfigureFunctionsApplicationInsights();
10 |     })
11 |     .Build();
12 | 
13 | host.Run();
14 | 


--------------------------------------------------------------------------------
/src/usage-ingestion-function/Properties/launchSettings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "profiles": {
3 |     "usage_ingestion_func": {
4 |       "commandName": "Project",
5 |       "commandLineArgs": "--port 7291",
6 |       "launchBrowser": false
7 |     }
8 |   }
9 | }


--------------------------------------------------------------------------------
/src/usage-ingestion-function/UsageProcessorFunction.cs:
--------------------------------------------------------------------------------
 1 | using System;
 2 | using System.Text;
 3 | using Azure.Messaging.EventHubs;
 4 | using Microsoft.Azure.Functions.Worker;
 5 | using Microsoft.Extensions.Logging;
 6 | using Azure.Identity;
 7 | using Microsoft.Azure.Cosmos;
 8 | using System.Threading.Tasks;
 9 | using Newtonsoft.Json;
10 | using Newtonsoft.Json.Linq;
11 | using Microsoft.Extensions.Configuration;
12 | 
13 | namespace AIHubGateway.UsageProcessing
14 | {
15 |     public class UsageProcessorFunction
16 |     {
17 |         private readonly ILogger<UsageProcessorFunction> _logger;
18 |         private CosmosClient _cosmosClient;
19 |         private Container _container;
20 | 
21 |         public UsageProcessorFunction(ILogger<UsageProcessorFunction> logger, IConfiguration configuration)
22 |         {
23 |             _logger = logger;
24 |             //_logger.LogInformation("UsageProcessorFunction created v1");
25 | 
26 |             // Read Cosmos DB settings from IConfiguration
27 |             string accountEndpoint = configuration["CosmosAccountEndpoint"] ?? string.Empty;
28 |             string databaseName = configuration["CosmosDatabaseName"] ?? string.Empty;
29 |             string containerName = configuration["CosmosContainerName"] ?? string.Empty;
30 |             string cosmosDbManagedIdentityClientId = configuration["CosmosManagedIdentityId"] ?? string.Empty; // using the same identity used with event hub
31 |             
32 |             //_logger.LogInformation($"Cosmos DB settings: acc:{accountEndpoint}, db:{databaseName}, cont:{containerName}, mi:{cosmosDbManagedIdentityClientId}");
33 |             
34 |             // Create a new CosmosClient using the DefaultAzureCredential
35 |             var credential = string.IsNullOrEmpty(cosmosDbManagedIdentityClientId) ? new DefaultAzureCredential() : new DefaultAzureCredential(new DefaultAzureCredentialOptions { ManagedIdentityClientId = cosmosDbManagedIdentityClientId });
36 |             _cosmosClient = new CosmosClient(accountEndpoint, credential);
37 |             _container = _cosmosClient.GetContainer(databaseName, containerName);
38 |         }
39 | 
40 |         [Function(nameof(UsageProcessorFunction))]
41 |         public async Task Run([EventHubTrigger("ai-usage", Connection = "EventHubConnection")] EventData[] events)
42 |         {
43 |             try
44 |             {
45 |             foreach (EventData @event in events)
46 |             {
47 |                 //_logger.LogInformation("Event Body: {body}", Encoding.UTF8.GetString(@event.Body.ToArray()));
48 | 
49 |                 // Convert the event body to a dynamic object
50 |                 dynamic data = JsonConvert.DeserializeObject(Encoding.UTF8.GetString(@event.Body.ToArray()));
51 | 
52 |                 // Insert the event into Cosmos DB
53 |                 ItemResponse<JObject> response = await _container.CreateItemAsync(data);
54 |             }
55 |             }
56 |             catch (Exception ex)
57 |             {
58 |                 _logger.LogError(ex, $"Error processing event: ${ex.Message}");
59 |             }
60 |         }
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/usage-ingestion-function/host.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "2.0",
 3 |     "logging": {
 4 |         "applicationInsights": {
 5 |             "samplingSettings": {
 6 |                 "isEnabled": true,
 7 |                 "excludedTypes": "Request"
 8 |             },
 9 |             "enableLiveMetricsFilters": true
10 |         }
11 |     }
12 | }


--------------------------------------------------------------------------------
/src/usage-ingestion-function/usage-ingestion-func.csproj:
--------------------------------------------------------------------------------
 1 | ﻿<Project Sdk="Microsoft.NET.Sdk">
 2 |   <PropertyGroup>
 3 |     <TargetFramework>net8.0</TargetFramework>
 4 |     <AzureFunctionsVersion>v4</AzureFunctionsVersion>
 5 |     <OutputType>Exe</OutputType>
 6 |     <ImplicitUsings>enable</ImplicitUsings>
 7 |     <Nullable>enable</Nullable>
 8 |     <RootNamespace>usage_ingestion_func</RootNamespace>
 9 |     <UserSecretsId>e06a8c82-3af6-486b-94b5-fda429dfc27a</UserSecretsId>
10 |   </PropertyGroup>
11 |   <ItemGroup>
12 |     <FrameworkReference Include="Microsoft.AspNetCore.App" />
13 |     <PackageReference Include="Microsoft.Azure.Cosmos" Version="3.40.0" />
14 |     <PackageReference Include="Microsoft.Azure.Functions.Worker" Version="1.21.0" />
15 |     <PackageReference Include="Microsoft.Azure.Functions.Worker.Extensions.EventHubs" Version="6.1.0" />
16 |     <PackageReference Include="Microsoft.Azure.Functions.Worker.Extensions.Http" Version="3.1.0" />
17 |     <PackageReference Include="Microsoft.Azure.Functions.Worker.Extensions.Http.AspNetCore" Version="1.2.0" />
18 |     <PackageReference Include="Microsoft.Azure.Functions.Worker.Sdk" Version="1.17.0" />
19 |     <PackageReference Include="Microsoft.ApplicationInsights.WorkerService" Version="2.22.0" />
20 |     <PackageReference Include="Microsoft.Azure.Functions.Worker.ApplicationInsights" Version="1.2.0" />
21 |   </ItemGroup>
22 |   <ItemGroup>
23 |     <None Update="host.json">
24 |       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
25 |     </None>
26 |     <None Update="local.settings.json">
27 |       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
28 |       <CopyToPublishDirectory>Never</CopyToPublishDirectory>
29 |     </None>
30 |   </ItemGroup>
31 |   <ItemGroup>
32 |     <Using Include="System.Threading.ExecutionContext" Alias="ExecutionContext" />
33 |   </ItemGroup>
34 | </Project>


--------------------------------------------------------------------------------
/src/usage-ingestion-logicapp/.funcignore:
--------------------------------------------------------------------------------
1 | .debug
2 | .git*
3 | .vscode
4 | __azurite_db*__.json
5 | __blobstorage__
6 | __queuestorage__
7 | local.settings.json
8 | test
9 | workflow-designtime/


--------------------------------------------------------------------------------
/src/usage-ingestion-logicapp/ai-usage-ingestion/workflow.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "definition": {
 3 |         "$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#",
 4 |         "actions": {
 5 |             "Create_Usage_Log": {
 6 |                 "inputs": {
 7 |                     "parameters": {
 8 |                         "containerId": "@appsetting('CosmosDBContainerUsage')",
 9 |                         "databaseId": "@appsetting('CosmosDBDatabase')",
10 |                         "isUpsert": true,
11 |                         "item": "@triggerBody()?['contentData']"
12 |                     },
13 |                     "serviceProviderConfiguration": {
14 |                         "connectionName": "AzureCosmosDB",
15 |                         "operationId": "CreateOrUpdateDocument",
16 |                         "serviceProviderId": "/serviceProviders/AzureCosmosDB"
17 |                     }
18 |                 },
19 |                 "runAfter": {},
20 |                 "type": "ServiceProvider"
21 |             }
22 |         },
23 |         "contentVersion": "1.0.0.0",
24 |         "outputs": {},
25 |         "triggers": {
26 |             "New_Usage_Record_Received": {
27 |                 "inputs": {
28 |                     "parameters": {
29 |                         "eventHubName": "@appsetting('eventHub_name')"
30 |                     },
31 |                     "serviceProviderConfiguration": {
32 |                         "connectionName": "eventHub",
33 |                         "operationId": "receiveEvents",
34 |                         "serviceProviderId": "/serviceProviders/eventHub"
35 |                     }
36 |                 },
37 |                 "splitOn": "@triggerOutputs()?['body']",
38 |                 "type": "ServiceProvider"
39 |             }
40 |         }
41 |     },
42 |     "kind": "Stateful"
43 | }


--------------------------------------------------------------------------------
/src/usage-ingestion-logicapp/connections.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "managedApiConnections": {
 3 |     "azuremonitorlogs": {
 4 |       "api": {
 5 |         "id": "@appsetting('AzureMonitor_Api_Id')"
 6 |       },
 7 |       "authentication": {
 8 |         "type": "ManagedServiceIdentity"
 9 |       },
10 |       "connection": {
11 |         "id": "@appsetting('AzureMonitor_Resource_Id')"
12 |       },
13 |       "connectionProperties": {
14 |         "authentication": {
15 |           "additionalAudiences": [
16 |             "https://api.loganalytics.io"
17 |           ],
18 |           "audience": "https://management.core.windows.net/",
19 |           "type": "ManagedServiceIdentity"
20 |         }
21 |       },
22 |       "connectionRuntimeUrl": "@appsetting('AzureMonitor_ConnectRuntime_Url')"
23 |     }
24 |   },
25 |   "serviceProviderConnections": {
26 |     "AzureCosmosDB": {
27 |       "displayName": "conn-cosmos-db",
28 |       "parameterValues": {
29 |         "connectionString": "@appsetting('AzureCosmosDB_connectionString')"
30 |       },
31 |       "serviceProvider": {
32 |         "id": "/serviceProviders/AzureCosmosDB"
33 |       }
34 |     },
35 |     "eventHub": {
36 |       "displayName": "conn-ai-usage-event-hub",
37 |       "parameterSetName": "ManagedServiceIdentity",
38 |       "parameterValues": {
39 |         "authProvider": {
40 |           "Type": "ManagedServiceIdentity"
41 |         },
42 |         "fullyQualifiedNamespace": "@appsetting('eventHub_fullyQualifiedNamespace')"
43 |       },
44 |       "serviceProvider": {
45 |         "id": "/serviceProviders/eventHub"
46 |       }
47 |     }
48 |   }
49 | }


--------------------------------------------------------------------------------
/src/usage-ingestion-logicapp/host.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "2.0",
 3 |   "logging": {
 4 |     "applicationInsights": {
 5 |       "samplingSettings": {
 6 |         "isEnabled": true,
 7 |         "excludedTypes": "Request"
 8 |       }
 9 |     }
10 |   },
11 |   "extensionBundle": {
12 |     "id": "Microsoft.Azure.Functions.ExtensionBundle.Workflows",
13 |     "version": "[1.*, 2.0.0)"
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/src/usage-ingestion-logicapp/package.json:
--------------------------------------------------------------------------------
1 | {}


--------------------------------------------------------------------------------
/src/usage-ingestion-logicapp/workflow-designtime/host.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "2.0",
 3 |   "extensionBundle": {
 4 |     "id": "Microsoft.Azure.Functions.ExtensionBundle.Workflows",
 5 |     "version": "[1.*, 2.0.0)"
 6 |   },
 7 |   "extensions": {
 8 |     "workflow": {
 9 |       "settings": {
10 |         "Runtime.WorkflowOperationDiscoveryHostMode": "true"
11 |       }
12 |     }
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-3.pbit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-3.pbit


--------------------------------------------------------------------------------
/src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-3.pbix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-3.pbix


--------------------------------------------------------------------------------
/src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-4.pbix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-4.pbix


--------------------------------------------------------------------------------
/src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-5-Incremetal.pbix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-5-Incremetal.pbix


--------------------------------------------------------------------------------
/src/usage-reports/AI-Search-Cost-Estimation-Logic.md:
--------------------------------------------------------------------------------
 1 | # Azure AI Search Cost Estimation
 2 | 
 3 | As Azure AI Search has cost associated with multiple dimensions like service tier, number of units, storage, data transfer and other enabled features like cognitive skills, it is challenging to come up with a simple formula to estimate the cost per request.
 4 | 
 5 | This document provides a high-level overview of the cost estimation logic for Azure AI Search. The cost estimation logic is based on the pricing details provided by Microsoft for Azure AI Search.
 6 | 
 7 | ## Scenario 1: Cost Estimation for Standard S1 Search Service:
 8 | 
 9 | - **Service Tier**: Standard S1
10 | - **Number of Units**: 2
11 | - **Region**: East US
12 | - **Duration**: 1 month
13 | - **Semantic Ranker**: 100K requests
14 | 
15 | Total Cost ~ $590/month 
16 | 
17 | Assuming you have 100% of all API calls going through APIM, you can estimate the cost by multiplying each service usage percentage per month by the cost of the service.
18 | 
19 | - **Search-Retail**: 60% * $590 = $354
20 | - **Search-HR**: 40% * $590 = $236


--------------------------------------------------------------------------------
/src/usage-reports/model-pricing.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "id": "1",
 4 |         "model": "ada",
 5 |         "deploymentName": "embedding",
 6 |         "isActive": true,
 7 |         "CostPerInputUnit": 0.0001,
 8 |         "CostPerOutputUnit": 0,
 9 |         "CostUnit": 1000,
10 |         "BaseCost": 0,
11 |         "Currency": "USD",
12 |         "CalculationMethod": "tokens",
13 |         "region": "ALL"
14 |     },
15 |     {
16 |         "id": "2",
17 |         "model": "gpt-4o-mini",
18 |         "deploymentName": "chat",
19 |         "isActive": true,
20 |         "CostPerInputUnit": 0.15,
21 |         "CostPerOutputUnit": 0.60,
22 |         "CostUnit": 1000000,
23 |         "BaseCost": 0,
24 |         "Currency": "USD",
25 |         "CalculationMethod": "tokens",
26 |         "region": "ALL"
27 |     },
28 |     {
29 |         "id": "3",
30 |         "model": "gpt-4",
31 |         "deploymentName": "gpt-4",
32 |         "isActive": true,
33 |         "CostPerInputUnit": 0.03,
34 |         "CostPerOutputUnit": 0.06,
35 |         "CostUnit": 1000,
36 |         "BaseCost": 0,
37 |         "Currency": "USD",
38 |         "CalculationMethod": "tokens",
39 |         "region": "ALL"
40 |     },
41 |     {
42 |         "id": "4",
43 |         "model": "gpt-4o",
44 |         "deploymentName": "gpt-4o",
45 |         "isActive": true,
46 |         "CostPerInputUnit": 0.005,
47 |         "CostPerOutputUnit": 0.015,
48 |         "CostUnit": 1000,
49 |         "BaseCost": 0,
50 |         "Currency": "USD",
51 |         "CalculationMethod": "tokens",
52 |         "region": "ALL"
53 |     },
54 |     {
55 |         "id": "5",
56 |         "model": "dall-e-3",
57 |         "deploymentName": "dall-e-3",
58 |         "isActive": true,
59 |         "CostPerInputUnit": 4,
60 |         "CostPerOutputUnit": 0,
61 |         "CostUnit": 100,
62 |         "BaseCost": 0,
63 |         "Currency": "USD",
64 |         "CalculationMethod": "tokens",
65 |         "region": "ALL"
66 |     },
67 |     {
68 |         "id": "6",
69 |         "model": "ai-search",
70 |         "deploymentName": "ai-search-business",
71 |         "isActive": true,
72 |         "CostPerInputUnit": 1,
73 |         "CostPerOutputUnit": 0,
74 |         "CostUnit": 1,
75 |         "BaseCost": 600,
76 |         "Currency": "USD",
77 |         "CalculationMethod": "percentage",
78 |         "region": "ALL"
79 |     },
80 |     {
81 |         "id": "7",
82 |         "model": "ai-search",
83 |         "deploymentName": "ai-search-marketing",
84 |         "isActive": true,
85 |         "CostPerInputUnit": 1,
86 |         "CostPerOutputUnit": 0,
87 |         "CostUnit": 1,
88 |         "BaseCost": 1000,
89 |         "Currency": "USD",
90 |         "CalculationMethod": "percentage",
91 |         "region": "ALL"
92 |     }
93 | ]


--------------------------------------------------------------------------------
/src/usage-reports/usage-record.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "id": "chatcmpl-9TVtj333ld9WDBiRh9qvLE9ZpA",
 3 |     "timestamp": "5/24/2024 2:45:56 PM",
 4 |     "appId": "11115293-844b-456f-8dbc-0c2d78e3e307",
 5 |     "subscriptionId": "master",
 6 |     "productName": "AI-Retail",
 7 |     "targetService": "chat.completion",
 8 |     "model": "gpt-35-turbo",
 9 |     "gatewayName": "APIM-HOST.azure-api.net",
10 |     "gatewayRegion": "East US",
11 |     "aiGatewayId": "managed",
12 |     "RequestIp": "X.Y.Z",
13 |     "operationName": "Creates a completion for the chat message",
14 |     "sessionId": "NA",
15 |     "endUserId": "NA",
16 |     "backendId": "openai-backend-2",
17 |     "routeLocation": "eastus2",
18 |     "routeName": "EastUS2",
19 |     "deploymentName": "chat",
20 |     "promptTokens": "15000",
21 |     "responseTokens": "5000",
22 |     "totalTokens": "20000",
23 |     "EventProcessedUtcTime": "2024-05-27T14:46:04.4009773Z",
24 |     "PartitionId": 1,
25 |     "EventEnqueuedUtcTime": "2024-05-27T14:46:04.1720000Z"
26 | }


--------------------------------------------------------------------------------