├── .azdo └── pipelines │ └── azure-dev.yml ├── .gitignore ├── .vscode ├── extensions.json ├── launch.json ├── settings.json └── tasks.json ├── LICENSE ├── README.md ├── assets ├── ai-hub-gateway-benefits.png ├── ai-search-api-call.png ├── ai-studio-connected-resources.png ├── ai-studio-new-connections.png ├── ai-studio-new-manual-ai-openai.png ├── ai-studio-new-manual-ai-search.png ├── ai-studio-prompt-flow-gpt.png ├── ai-studio-prompt-flow-inputs.png ├── apim-test.png ├── architecture-1-0-5.png ├── architecture-1-0-6.png ├── azure-openai-landing-zone.drawio ├── azure-openai-landing-zone.png ├── azure-resources-diagram-asa.svg ├── azure-resources-diagram-standard.svg ├── azure-resources-diagram.svg ├── code.png ├── cosmos-db-firwall.png ├── cosmos-db-model-pricing.png ├── customer-truth.png ├── oai-logicapps-nonstreaming.png ├── oai-logicapps-streaming.png ├── one-click-deploy.png ├── power-bi-data-final.png ├── power-bi-data-source-add.png ├── power-bi-data-source-adv-editor-update.png ├── power-bi-data-source-adv-editor-update2.png ├── power-bi-data-source-adv-editor.png ├── power-bi-data-source-model-pricing.png ├── power-bi-data-source-transform.png ├── power-bi-data-source.png ├── power-bi-percentage-dashboad.png ├── powerbi-relationship.png ├── powerbi-usage-dashboard-old.png ├── powerbi-usage-dashboard.png ├── supporting-documents.png ├── throttling-events-alert.png ├── throttling-events-app-insights.png └── user-story.png ├── azure.yaml ├── guides ├── ai-hub-gateway-hybrid-deployment.md ├── ai-search-integration.md ├── ai-studio-integration.md ├── apim-configuration.md ├── architecture.md ├── bring-your-own-network.md ├── deployment-troubleshooting.md ├── deployment.md ├── end-to-end-scenario.md ├── openai-onboarding.md ├── openai-usage-ingestion.md ├── power-bi-dashboard.md ├── routing-configurations.md └── throttling-events-handling.md ├── infra ├── abbreviations.json ├── main.bicep ├── main.parameters.json └── modules │ ├── ai │ └── cognitiveservices.bicep │ ├── apim │ ├── ai-model-inference │ │ └── ai-model-inference-api-spec.yaml │ ├── ai-search-api │ │ └── ai-search-api-spec.yaml │ ├── api.bicep │ ├── apim.bicep │ ├── openai-api │ │ ├── oai-api-spec-2024-02-01.yaml │ │ ├── oai-api-spec-2024-05-01-preview.yaml │ │ ├── oai-api-spec-2024-06-01.yaml │ │ ├── oai-api-spec-2024-10-21.yaml │ │ └── oai-realtime-api-ws.json │ ├── policies │ │ ├── ai-model-inference-api-policy.xml │ │ ├── ai-search-api-policy.xml │ │ ├── frag-aad-auth.xml │ │ ├── frag-ai-usage.xml │ │ ├── frag-backend-routing.xml │ │ ├── frag-dynamic-throttling-assignment.xml │ │ ├── frag-openai-usage-streaming.xml │ │ ├── frag-openai-usage.xml │ │ ├── frag-throttling-events.xml │ │ ├── frag-validate-routes.xml │ │ ├── hr_product_policy.xml │ │ ├── openai-realtime-policy.xml │ │ ├── openai_api_policy.xml │ │ ├── openai_api_policy_dynamic_throttling.xml │ │ ├── retail_product_policy.xml │ │ ├── search_hr_product_policy.xml │ │ └── translator-api-policy.xml │ ├── speech-api │ │ └── speech-api-3-1.json │ └── translator-api │ │ └── translator-api-spec.yaml │ ├── cosmos-db │ └── cosmos-db.bicep │ ├── event-hub │ └── event-hub.bicep │ ├── functionapp │ ├── functionapp.bicep │ └── storageaccount.bicep │ ├── logicapp │ ├── api-connection-access.bicep │ ├── api-connection.json │ └── logicapp.bicep │ ├── monitor │ ├── applicationinsights-dashboard.bicep │ ├── applicationinsights.bicep │ ├── loganalytics.bicep │ └── monitoring.bicep │ ├── networking │ ├── dns.bicep │ ├── private-endpoint.bicep │ ├── subnet.bicep │ ├── vnet-existing.bicep │ └── vnet.bicep │ ├── security │ ├── managed-identity-apim.bicep │ └── managed-identity-stream-analytics.bicep │ └── stream-analytics │ └── stream-analytics.bicep ├── scripts └── apim-event-hub-logger.ps1 └── src ├── apim ├── ai-search-api │ ├── ai-search-api-policy.xml │ └── ai-search-api-spec.yaml ├── http │ └── chat.http ├── oa-fragments-archived │ ├── oai-blocked-streaming-in-policy.xml │ ├── oai-clusters-lb-configuration-be-policy.xml │ ├── oai-clusters-lb-configuration-in-policy.xml │ └── oai-usage-eventhub-out-policy.xml ├── oa-weighted-lb │ ├── oai-clusters-weighted-lb-configuration-be-policy.xml │ └── oai-clusters-weighted-lb-configuration-in-policy.xml └── oai-api │ ├── oai-api-policy-archived.xml │ ├── oai-api-policy.xml │ ├── oai-api-spec-2024-02-01.yaml │ └── oai-api-spec.yaml ├── testing └── openai-testing.http ├── usage-ingestion-function ├── .gitignore ├── Program.cs ├── Properties │ └── launchSettings.json ├── UsageProcessorFunction.cs ├── host.json └── usage-ingestion-func.csproj ├── usage-ingestion-logicapp ├── .funcignore ├── ai-usage-ingestion │ └── workflow.json ├── ai-usage-streaming-ingestion │ └── workflow.json ├── connections.json ├── host.json ├── package.json └── workflow-designtime │ └── host.json └── usage-reports ├── AI-Hub-Gateway-Usage-Report-v1-3.pbit ├── AI-Hub-Gateway-Usage-Report-v1-3.pbix ├── AI-Hub-Gateway-Usage-Report-v1-4.pbix ├── AI-Hub-Gateway-Usage-Report-v1-5-Incremetal.pbix ├── AI-Search-Cost-Estimation-Logic.md ├── model-pricing.json └── usage-record.json /.azdo/pipelines/azure-dev.yml: -------------------------------------------------------------------------------- 1 | # Run when commits are pushed to main 2 | trigger: 3 | - main 4 | 5 | pool: 6 | vmImage: ubuntu-latest 7 | 8 | steps: 9 | # setup-azd@0 needs to be manually installed in your organization 10 | # if you can't install it, you can use the below bash script to install azd 11 | # and remove this step 12 | - task: setup-azd@0 13 | displayName: Install azd 14 | 15 | # If you can't install above task in your organization, you can comment it and uncomment below task to install azd 16 | # - task: Bash@3 17 | # displayName: Install azd 18 | # inputs: 19 | # targetType: 'inline' 20 | # script: | 21 | # curl -fsSL https://aka.ms/install-azd.sh | bash 22 | 23 | # azd delegate auth to az to use service connection with AzureCLI@2 24 | - pwsh: | 25 | azd config set auth.useAzCliAuth "true" 26 | displayName: Configure AZD to Use AZ CLI Authentication. 27 | - task: AzureCLI@2 28 | displayName: Provision Infrastructure 29 | inputs: 30 | azureSubscription: azconnection 31 | scriptType: bash 32 | scriptLocation: inlineScript 33 | keepAzSessionActive: true 34 | inlineScript: | 35 | azd provision --no-prompt 36 | env: 37 | AZURE_SUBSCRIPTION_ID: $(AZURE_SUBSCRIPTION_ID) 38 | AZURE_ENV_NAME: $(AZURE_ENV_NAME) 39 | AZURE_LOCATION: $(AZURE_LOCATION) 40 | AZD_INITIAL_ENVIRONMENT_CONFIG: $(AZD_INITIAL_ENVIRONMENT_CONFIG) 41 | 42 | - task: AzureCLI@2 43 | displayName: Deploy Application 44 | inputs: 45 | azureSubscription: azconnection 46 | scriptType: bash 47 | scriptLocation: inlineScript 48 | keepAzSessionActive: true 49 | inlineScript: | 50 | azd deploy --no-prompt 51 | env: 52 | AZURE_SUBSCRIPTION_ID: $(AZURE_SUBSCRIPTION_ID) 53 | AZURE_ENV_NAME: $(AZURE_ENV_NAME) 54 | AZURE_LOCATION: $(AZURE_LOCATION) 55 | 56 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "ms-azuretools.vscode-azurelogicapps", 4 | "ms-azuretools.vscode-azurefunctions", 5 | "ms-dotnettools.csharp" 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Attach to .NET Functions", 6 | "type": "coreclr", 7 | "request": "attach", 8 | "processId": "${command:azureLogicAppsStandard.pickProcess}" 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "azureFunctions.deploySubpath": "src\\usage-ingestion-function/bin/Release/net8.0/publish", 3 | "azureFunctions.projectLanguage": "C#", 4 | "azureFunctions.projectRuntime": "~4", 5 | "debug.internalConsoleOptions": "neverOpen", 6 | "azureFunctions.preDeployTask": "publish (functions)", 7 | "azureLogicAppsStandard.deploySubpath": "src\\usage-ingestion-logicapp", 8 | "azureLogicAppsStandard.projectLanguage": "JavaScript", 9 | "azureLogicAppsStandard.projectRuntime": "~4", 10 | "azureFunctions.suppressProject": true 11 | } -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "label": "clean (functions)", 6 | "command": "dotnet", 7 | "args": [ 8 | "clean", 9 | "/property:GenerateFullPaths=true", 10 | "/consoleloggerparameters:NoSummary" 11 | ], 12 | "type": "process", 13 | "problemMatcher": "$msCompile", 14 | "options": { 15 | "cwd": "${workspaceFolder}/src\\usage-ingestion-function" 16 | } 17 | }, 18 | { 19 | "label": "build (functions)", 20 | "command": "dotnet", 21 | "args": [ 22 | "build", 23 | "/property:GenerateFullPaths=true", 24 | "/consoleloggerparameters:NoSummary" 25 | ], 26 | "type": "process", 27 | "dependsOn": "clean (functions)", 28 | "group": { 29 | "kind": "build", 30 | "isDefault": true 31 | }, 32 | "problemMatcher": "$msCompile", 33 | "options": { 34 | "cwd": "${workspaceFolder}/src\\usage-ingestion-function" 35 | } 36 | }, 37 | { 38 | "label": "clean release (functions)", 39 | "command": "dotnet", 40 | "args": [ 41 | "clean", 42 | "--configuration", 43 | "Release", 44 | "/property:GenerateFullPaths=true", 45 | "/consoleloggerparameters:NoSummary" 46 | ], 47 | "type": "process", 48 | "problemMatcher": "$msCompile", 49 | "options": { 50 | "cwd": "${workspaceFolder}/src\\usage-ingestion-function" 51 | } 52 | }, 53 | { 54 | "label": "publish (functions)", 55 | "command": "dotnet", 56 | "args": [ 57 | "publish", 58 | "--configuration", 59 | "Release", 60 | "/property:GenerateFullPaths=true", 61 | "/consoleloggerparameters:NoSummary" 62 | ], 63 | "type": "process", 64 | "dependsOn": "clean release (functions)", 65 | "problemMatcher": "$msCompile", 66 | "options": { 67 | "cwd": "${workspaceFolder}/src\\usage-ingestion-function" 68 | } 69 | }, 70 | { 71 | "type": "func", 72 | "dependsOn": "build (functions)", 73 | "options": { 74 | "cwd": "${workspaceFolder}/src\\usage-ingestion-function/bin/Debug/net8.0" 75 | }, 76 | "command": "host start", 77 | "isBackground": true, 78 | "problemMatcher": "$func-dotnet-watch" 79 | }, 80 | { 81 | "label": "generateDebugSymbols", 82 | "command": "${config:azureLogicAppsStandard.dotnetBinaryPath}", 83 | "args": [ 84 | "${input:getDebugSymbolDll}" 85 | ], 86 | "type": "process", 87 | "problemMatcher": "$msCompile", 88 | "options": { 89 | "cwd": "${workspaceFolder}/src\\usage-ingestion-logicapp" 90 | } 91 | }, 92 | { 93 | "type": "shell", 94 | "command": "${config:azureLogicAppsStandard.funcCoreToolsBinaryPath}", 95 | "args": [ 96 | "host", 97 | "start" 98 | ], 99 | "options": { 100 | "env": { 101 | "PATH": "${config:azureLogicAppsStandard.autoRuntimeDependenciesPath}\\NodeJs;${config:azureLogicAppsStandard.autoRuntimeDependenciesPath}\\DotNetSDK;$env:PATH" 102 | }, 103 | "cwd": "${workspaceFolder}/src\\usage-ingestion-logicapp" 104 | }, 105 | "problemMatcher": "$func-watch", 106 | "isBackground": true, 107 | "label": "func: host start", 108 | "group": { 109 | "kind": "build", 110 | "isDefault": true 111 | } 112 | } 113 | ], 114 | "inputs": [ 115 | { 116 | "id": "getDebugSymbolDll", 117 | "type": "command", 118 | "command": "azureLogicAppsStandard.getDebugSymbolDll" 119 | } 120 | ] 121 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Azure Samples 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/ai-hub-gateway-benefits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-hub-gateway-benefits.png -------------------------------------------------------------------------------- /assets/ai-search-api-call.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-search-api-call.png -------------------------------------------------------------------------------- /assets/ai-studio-connected-resources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-studio-connected-resources.png -------------------------------------------------------------------------------- /assets/ai-studio-new-connections.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-studio-new-connections.png -------------------------------------------------------------------------------- /assets/ai-studio-new-manual-ai-openai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-studio-new-manual-ai-openai.png -------------------------------------------------------------------------------- /assets/ai-studio-new-manual-ai-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-studio-new-manual-ai-search.png -------------------------------------------------------------------------------- /assets/ai-studio-prompt-flow-gpt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-studio-prompt-flow-gpt.png -------------------------------------------------------------------------------- /assets/ai-studio-prompt-flow-inputs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/ai-studio-prompt-flow-inputs.png -------------------------------------------------------------------------------- /assets/apim-test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/apim-test.png -------------------------------------------------------------------------------- /assets/architecture-1-0-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/architecture-1-0-5.png -------------------------------------------------------------------------------- /assets/architecture-1-0-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/architecture-1-0-6.png -------------------------------------------------------------------------------- /assets/azure-openai-landing-zone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/azure-openai-landing-zone.png -------------------------------------------------------------------------------- /assets/code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/code.png -------------------------------------------------------------------------------- /assets/cosmos-db-firwall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/cosmos-db-firwall.png -------------------------------------------------------------------------------- /assets/cosmos-db-model-pricing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/cosmos-db-model-pricing.png -------------------------------------------------------------------------------- /assets/customer-truth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/customer-truth.png -------------------------------------------------------------------------------- /assets/oai-logicapps-nonstreaming.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/oai-logicapps-nonstreaming.png -------------------------------------------------------------------------------- /assets/oai-logicapps-streaming.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/oai-logicapps-streaming.png -------------------------------------------------------------------------------- /assets/one-click-deploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/one-click-deploy.png -------------------------------------------------------------------------------- /assets/power-bi-data-final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-final.png -------------------------------------------------------------------------------- /assets/power-bi-data-source-add.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source-add.png -------------------------------------------------------------------------------- /assets/power-bi-data-source-adv-editor-update.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source-adv-editor-update.png -------------------------------------------------------------------------------- /assets/power-bi-data-source-adv-editor-update2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source-adv-editor-update2.png -------------------------------------------------------------------------------- /assets/power-bi-data-source-adv-editor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source-adv-editor.png -------------------------------------------------------------------------------- /assets/power-bi-data-source-model-pricing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source-model-pricing.png -------------------------------------------------------------------------------- /assets/power-bi-data-source-transform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source-transform.png -------------------------------------------------------------------------------- /assets/power-bi-data-source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-data-source.png -------------------------------------------------------------------------------- /assets/power-bi-percentage-dashboad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/power-bi-percentage-dashboad.png -------------------------------------------------------------------------------- /assets/powerbi-relationship.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/powerbi-relationship.png -------------------------------------------------------------------------------- /assets/powerbi-usage-dashboard-old.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/powerbi-usage-dashboard-old.png -------------------------------------------------------------------------------- /assets/powerbi-usage-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/powerbi-usage-dashboard.png -------------------------------------------------------------------------------- /assets/supporting-documents.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/supporting-documents.png -------------------------------------------------------------------------------- /assets/throttling-events-alert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/throttling-events-alert.png -------------------------------------------------------------------------------- /assets/throttling-events-app-insights.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/throttling-events-app-insights.png -------------------------------------------------------------------------------- /assets/user-story.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/assets/user-story.png -------------------------------------------------------------------------------- /azure.yaml: -------------------------------------------------------------------------------- 1 | # Application name 2 | name: ai-hub-gateway-landing-zone 3 | metadata: 4 | template: ai-hub-gateway-landing-zone-v2 5 | infra: 6 | provider: bicep 7 | services: 8 | # usageProcessingFunctionApp: 9 | # project: ./src/usage-ingestion-function 10 | # language: dotnet 11 | # host: function 12 | usageProcessingLogicApp: 13 | project: ./src/usage-ingestion-logicapp 14 | language: js 15 | host: function -------------------------------------------------------------------------------- /guides/ai-hub-gateway-hybrid-deployment.md: -------------------------------------------------------------------------------- 1 | # Hybrid deployment of AI Hub Gateway 2 | 3 | Azure API Management (APIM) has 3 components: 4 | - API Gateway: is the runtime component that handles API requests and can be deployed on Azure as Managed Gateway or anywhere else (like on-premises) as Self-hosted Gateway. 5 | - Developer Portal: is the self-service portal for developers to discover and consume APIs 6 | - API Management Service: is the management plane that manages the API Gateway and Developer Portal. 7 | 8 | Building on the APIM API Gateway capability of being hosted anywhere, I will deploy it in this walkthrough on Azure Container App (which in a similar fashion can be deployed on-premises on a compliant Kubernetes cluster or VM). 9 | 10 | The Developer Portal and API Management Service will remain hosted on Azure. 11 | 12 | ## Creating containerized hosting environment 13 | 14 | I will be creating here a resource group, container app environment and a container app to host APIM API gateway. 15 | 16 | ```bash 17 | PROJECT=ai-gateway 18 | RESOURCE_GROUP=rg-$PROJECT 19 | ACA_SELFHOSTED_NAME=aca-$PROJECT-app 20 | ACA_SELFHOSTED_ENV=aca-$PROJECT-env 21 | LOCATION=northeurope 22 | 23 | az group create --name $RESOURCE_GROUP --location $LOCATION 24 | 25 | az containerapp env create --name $ACA_SELFHOSTED_ENV --resource-group $RESOURCE_GROUP --location $LOCATION 26 | 27 | # Getting APIM self-hosted gateway endpoint and token 28 | # You can get these values from APIM - Gateway - Self-hosted gateway configuration - Deployment - Docker 29 | ENDPOINT="" 30 | TOKEN="REPLACE_WITH_YOUR_KEY" 31 | 32 | 33 | az containerapp create --name $ACA_SELFHOSTED_NAME \ 34 | --environment $ACA_SELFHOSTED_ENV \ 35 | --resource-group $RESOURCE_GROUP \ 36 | --ingress 'external' \ 37 | --image mcr.microsoft.com/azure-api-management/gateway:2.5.0 \ 38 | --target-port 8080 \ 39 | --query properties.configuration.ingress.fqdn \ 40 | --env-vars "config.service.endpoint"="$ENDPOINT" "config.service.auth"="$TOKEN" "net.server.http.forwarded.proto.enabled"="true" 41 | 42 | # Testing the deployment (you should get empty 200 response) 43 | GATEWAY_URL=$(az containerapp show --name $ACA_SELFHOSTED_NAME --resource-group $RESOURCE_GROUP --query "properties.configuration.ingress.fqdn" --output tsv) 44 | echo $GATEWAY_URL 45 | curl -i https://$GATEWAY_URL/status-0123456789abcdef 46 | 47 | ``` 48 | 49 | -------------------------------------------------------------------------------- /guides/ai-studio-integration.md: -------------------------------------------------------------------------------- 1 | # AI Studio Integration 2 | 3 | Azure AI Studio is a a unified platform for developing and deploying generative AI apps responsibly. 4 | 5 | It offers prebuilt and customizable models, using your data to innovate at scale. 6 | 7 | Integrating AI Hub Gateway with Azure AI Studio allows you to access the AI Hub Gateway governed AI services (like Azure OpenAI and Azure AI Search) to build AI solutions. 8 | 9 | This guid provide details about how this integration can be done. 10 | 11 | ## Prerequisites 12 | 13 | As AI Studio still requires to connect to AI Services using public endpoints, AI Hub Gateway APIM endpoint needs to be publicly accessible. 14 | 15 | Azure OpenAI & AI Search endpoints can be integrated through APIM 16 | 1. Requires APIM to be public 17 | 18 | a. Directly using APIM native capability (networking is set to None or External) to have public endpoint (not recommended) 19 | 20 | b. Or indirectly through customer network appliances where APIM is fully private with networking set to Internal (recommended) 21 | 2. Keep in mind that AI Studio tries to query OpenAI service itself through ARM calls to retrieve list of deployment, you will get warnings like (model deployment can’t be read) as APIM is not exposing ARM APIs 22 | 3. Selecting AI Hub Gateway connected resource in AI Studio (like prompt flow) connections and it will work as expected 23 | 4. Above can scale basically to many other resources that AI Studio is capable of connecting to. 24 | 25 | ## Connected resources 26 | 27 | Using AI-Hub-Gateway with AI Studio is possible today through ```Connected resources```. 28 | 29 | -------------------------------------------------------------------------------- /guides/architecture.md: -------------------------------------------------------------------------------- 1 | ## AI Hub Gateway Landing Zone Architecture 2 | The AI Hub Gateway Landing Zone architecture designed to be a central hub for AI services, providing a single point of entry for AI services, and enabling the organization to manage and govern AI services in a consistent manner. 3 | 4 | ![AI Hub Gateway Landing Zone](../assets/architecture-1-0-6.png) 5 | 6 | ### Azure architecture diagram 7 | This example diagram shows how these different Azure services would interact in a classic [hub-spoke topology](https://learn.microsoft.com/en-us/azure/architecture/networking/architecture/hub-spoke?tabs=cli). 8 | 9 | ![AI Hub Gateway Landing Zone](../assets/azure-openai-landing-zone.png) 10 | 11 | ### Networking 12 | 13 | The AI Landing Zone Virtual Network could be connected to the spokes via [virtual network peering](https://learn.microsoft.com/en-us/azure/virtual-network/virtual-network-peering-overview). The different applications (applications A, B and C) laying in the spoke networks would be able to resolve the API Management endpoint for their AI service consumption. 14 | 15 | The different Azure OpenAi services would not be accessible to other external services, only being accessible through the API Management instance, being able to communicate to those instances via [Private Links](https://learn.microsoft.com/en-us/azure/private-link/private-link-overview). 16 | 17 | For more details, see the [networking components section](#networking-components). 18 | 19 | ### AI Services and Indexes 20 | The API Management instance would be able to communicate with one-to-many Azure OpenAI and/or AI service, as illustrated in the diagram. This can be a mix of 1 or more services, in 1 or more subscriptions, and also be of different model types, such as [Azure OpenAI Services](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview) or other models in [Azure Machine Learning Studio, for example Mistral](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-models-mistral?view=azureml-api-2). 21 | 22 | It is also posible to make [Azure AI Search](https://learn.microsoft.com/en-us/azure/search/search-what-is-azure-search) indexes available through the API Management instance. This is particularly useful when wanting to consume a same index of data in several applications, while ensuring finer-grain control on the index. 23 | 24 | For more details, see the [additional components section](#additional-components-deployment). 25 | 26 | ### Other data sources 27 | Other data sources, such as Cosmos DB or SQL databases, could also be used as data sources to create the AI Search index. 28 | 29 | ### Cross-charging and token consumption count 30 | When sharing AI consumption with different applications, sometimes it would be beneficial to know the token consumption for each application, in order to calculate charge-backs. This is possible using a combination of different services, such as Event Hub, Synapse Analytics, Cosmos DB and PowerBI. 31 | For more details, see the [data and charge-back platforms section](#data-and-charge-back-platforms). 32 | 33 | ## Architecture components 34 | The AI Hub Gateway Landing Zone consists of the following components: 35 | 36 | ### Main gateway components 37 | These are the critical components of the AI Hub Gateway Landing Zone that provides the capabilities outlined above. 38 | 39 | - **Azure API Management**: Azure API Management is a fully managed service that enables customers to publish, secure, transform, maintain, and monitor APIs. 40 | - **Application Insights**: Application Insights is an extensible Application Performance Management (APM) service that provides critical insights on the gateway operational performance. 41 | - **Event Hub**: Event Hub is a fully managed, real-time data ingestion service that’s simple, trusted, and scalable and it is used to stream usage and charge-back data to target data and charge back platforms. 42 | 43 | ### AI services 44 | This is the Azure AI services that will be exposed through the AI Hub Gateway Landing Zone. 45 | 46 | Examples of these service could include: 47 | 48 | - **Azure OpenAI**: Azure OpenAI is a cloud deployment of cutting edge generative models from OpenAI (like ChatGPT, DALL.E and more). 49 | - **Azure AI Search**: Azure AI Search is a cloud search service with built-in AI capabilities that enrich all types of information to help users identify and explore relevant content at scale (critical component of RAG-based generative AI applications). 50 | - **Azure Cognitive Services**: Azure Cognitive Services is a set of cloud-based services with REST APIs and client library SDKs available to help you build cognitive intelligence into your applications. 51 | 52 | ### Backend services 53 | These are the backend services that will include your AI business logic and experiences. 54 | 55 | You can host backend services on Azure, on-premises, or other clouds. 56 | 57 | Examples of these services could include: 58 | - **Azure Kubernetes Service**: Azure Kubernetes Service (AKS) is a managed container orchestration service, based on the open-source Kubernetes system, which is available on the Microsoft Azure public cloud. 59 | - **Azure Container Apps**: Azure Container Apps is a fully managed serverless container service that enables you to run containers on Azure without having to manage the infrastructure. 60 | - **Azure App Service**: Azure App Service is a fully managed platform for building, deploying, and scaling web apps. 61 | 62 | Also in these backends, it is common to use **AI Orchestrator** framework like [Semantic Kernel](https://github.com/microsoft/semantic-kernel) and [Langchain](https://www.langchain.com/) to orchestrate sophisticated AI workflows and scenarios. 63 | 64 | ### Data and charge-back platforms 65 | 66 | As part of the AI Hub Gateway Landing Zone, you will need to integrate with existing data and charge-back platforms to track usage and charge-back to the respective business units. 67 | 68 | Examples of these platforms could include: 69 | - **Cosmos DB**: Azure Cosmos DB is a fully managed NoSQL database for storing usage and charge-back data. 70 | - **Azure Synapse Analytics**: Azure Synapse Analytics is an analytics service that brings together enterprise data warehousing and big data analytics. 71 | - **Microsoft Fabric**: Microsoft Fabric is a cloud-based platform that provides a scalable, reliable, and secure infrastructure for building and managing data and analytics solutions. 72 | - **PowerBI**: Power BI is a business analytics service by Microsoft. It aims to provide interactive visualizations and business intelligence capabilities with an interface simple enough for end users to create their own reports and dashboards. -------------------------------------------------------------------------------- /guides/deployment-troubleshooting.md: -------------------------------------------------------------------------------- 1 | # Deployment troubleshooting 2 | 3 | This guide provides troubleshooting tips for common issues that you might encounter when deploying this accelerator to Azure using Azure Developer CLI or Bicep. 4 | 5 | ## Transient errors 6 | 7 | You might want to try again running the deployed as it might resolve some of the transient issues. 8 | 9 | ```bash 10 | azd up 11 | ``` 12 | 13 | This is usually a transient issue. Please try again after some time (it might take up to 1 hour unfortunately). 14 | 15 | Below are few examples of transient issues: 16 | 17 | - Unable to edit or replace deployment 'application-insights-dashboard' 18 | 19 | - Runtime Scale Monitoring is not supported for this Functions version 20 | 21 | - Failed to connect to management endpoint apim-RANDOM.management.azure-api.net:3443 for a service deployed in a Virtual Network. Make sure to follow guidance at https://aka.ms/apim-vnet-common-issues for Inbound connectivity to Management endpoint. Check 'ApiManagement Control Plane - inbound' connectivity at https://aka.ms/apimnetworkstatus. (Code: ManagementApiRequestFailed) 22 | 23 | - Managed identity id not found 24 | 25 | - Timeout: Call to Management API apim-RANDOM.management.azure-api.net:3443 timed out for the Developer SKU service which will have inherent capacity issues due to it's scale. Please refer to SLA at https://aka.ms/apimsla and considering upgrading to a SKU Tier with higher SLA. 26 | 27 | - Deployment 'azure-ai-search-api' could not be found (this only will happen if you sett ```enableAzureAISearch``` to false and you can ignore it) -------------------------------------------------------------------------------- /guides/deployment.md: -------------------------------------------------------------------------------- 1 | ## Primary components deployment 2 | 3 | Below is a high-level guide to deploy the AI Hub Gateway accelerator main components. 4 | 5 | ![components](../assets/azure-resources-diagram.svg) 6 | 7 | ### Networking components 8 | 9 | Default behavior of the infrastructure script (in Bicep), provision the following networking components: 10 | 11 | - **Virtual network & subnet**: A virtual network to host the AI Hub Gateway Landing Zone. 12 | - **APIM subnet** to be deployed in internal/external mode requires a subnet with /27 or larger with **required Network Security Group (NSG)** that allows the critical rules. 13 | - **Private endpoints subnet(s)**: Private endpoints for the AI services, Cosmos DB, Event Hub, Monitor, Storage to be exposed through the AI Hub Gateway Landing Zone. Usually a /27 or larger subnet would be sufficient. 14 | - **Azure Function** subnet to be used for injecting the function runtime into the VNet so it can access both Cosmos DB and Event Hub private endpoints. This subnet is delegated to ```Microsoft.Web/serverFarms```. 15 | - **Private DNS zones**: Private DNS zones to resolve the private endpoints. 16 | - Internal APIM relies on **private DNS** to resolve the APIM endpoints, so a Azure Private DNS zone or other DNS solution is required. 17 | - **Private endpoints DNS zone**: A private DNS zone to resolve the private endpoints for the connected Azure PaaS services. 18 | - 'privatelink.openai.azure.com' 19 | - 'privatelink.vaultcore.azure.net' 20 | - 'privatelink.monitor.azure.com' 21 | - 'privatelink.servicebus.windows.net' 22 | - 'privatelink.documents.azure.com' 23 | - 'privatelink.blob.core.windows.net' 24 | - 'privatelink.file.core.windows.net' 25 | 26 | Additional networking consideration that you might need to take into account: 27 | - **ExpressRoute or VPN**: If you are planning to connect to on-premises or other clouds, you will need to have an ExpressRoute or VPN connection. 28 | - **DMZ appliances**: If you are planning to expose backend and gateway services on the internet, you need to have a Web Application Firewall (like Azure Front Door & Application Gateway) and network firewall (like Azure Firewall) to govern both ingress and egress traffic. 29 | - **Custom Domains** for APIM specially if it is in "internal mode" to allow its private DNS resolution without conflicting with any external APIM instances that you may have (by default, all APIM instances uses *.azure-api.net domain regardless if being external or internal). 30 | 31 | ### Azure API Management (APIM) 32 | APIM is the central component of the AI Hub Gateway Landing Zone. 33 | 34 | Recommended deployment of APIM to be in **internal mode** to ensure that the gateway is not exposed to the internet and to ensure that the gateway is only accessible through the private network. 35 | 36 | **internal mode** requires a subnet with /27 or larger with NSG that allows the critical rules in addition to management public IP (with DNS label set) 37 | 38 | This is a great starting point to deploy APIM in internal mode: [Deploy Azure API Management in internal mode](https://learn.microsoft.com/en-us/azure/api-management/api-management-using-with-internal-vnet?tabs=stv2) 39 | 40 | ### Application Insights 41 | Application Insights is a critical component of the AI Hub Gateway Landing Zone, and it is used to monitor the operational performance of the gateway. 42 | 43 | To deploy Application Insights, you can use the following guide: [How to integrate Azure API Management with Azure Application Insights](https://azure.github.io/apim-lab/apim-lab/6-analytics-monitoring/analytics-monitoring-6-2-application-insights.html) 44 | 45 | ### Event Hub 46 | 47 | Event Hub is used to stream usage and charge-back data to target data and charge back platforms. 48 | 49 | To deploy Event Hub, you can use the following guide: [Logging with Event Hub](https://azure.github.io/apim-lab/apim-lab/6-analytics-monitoring/analytics-monitoring-6-3-event-hub.html) 50 | 51 | ### Additional components deployment 52 | 53 | With the primary components deployed, you can now deploy or identify the AI services and backend services that will be exposed through the AI Hub Gateway. 54 | 55 | Additional components may include: 56 | - **Azure OpenAI**: You can have 1 or more OpenAI services deployed (like one with PTU and one with PAYG) 57 | - **Azure AI Search**: Azure AI Search with indexed data (1 or more indexes) 58 | - **Backend services**: Backend services that will include your AI business logic and experiences (like a python chat app deployed on Azure App Service as an example). 59 | 60 | For the above components, we need to ensure the following: 61 | - **Private endpoints**: The AI services should be exposed through private endpoints. 62 | - **Private DNS zone**: A private DNS zone to resolve the private endpoints for the connected Azure AI services. 63 | - **APIM Managed identity**: Is granted access to Azure AI services (like OpenAI and AI Search). 64 | - **Update endpoint and keys**: The backend services should use AI Hub Gateway endpoint and keys. 65 | - **Usage & charge-back**: Identify the data pipeline for tokens usage and charge back based on Event Hub integration. 66 | 67 | ### Deployment summary 68 | 69 | When deployment of primary components is completed, you will have the following components deployed: 70 | 71 | - **Azure API Management** 72 | - **Application Insights** 73 | - **Event Hub** 74 | 75 | Network wiring also will be established to allow the gateway to access the AI services through private endpoints, internet access through DMZ appliances and backend systems through private network should be planned. 76 | 77 | with the additional components deployed, you will have the following components identified: 78 | - **Azure OpenAI** instances (by default 3 across 3 regions) 79 | - **Cosmos DB** for ingesting AI usage metrics 80 | - **Azure Functions + Storage** for processing AI usage metrics from event hub to cosmos db. 81 | - **Managed identities** for APIM to access the AI services and for Azure Function to access Cosmos DB and Event Hub. 82 | 83 | ## Azure API Management configuration 84 | To configure Azure API Management to expose the AI services through the AI Hub Gateway Landing Zone, you will need to configure the following: 85 | 86 | - **APIs**: Import APIs definitions to APIM. 87 | - **Products**: Create products to bundle one or more APIs under a common access terms/policies. 88 | - **Policies**: Apply policies to the APIs to manage access, rate limits, and other governance policies. 89 | 90 | ### APIs import 91 | In this guide, I will be importing both OpenAI and AI Search APIs to APIM. 92 | 93 | Many Azure services APIs are available in [Azure REST API specs](https://github.com/Azure/azure-rest-api-specs/tree/main) reference on GitHub. 94 | 95 | #### Azure OpenAI API 96 | Although I have included the OpenAI API definition [in this repository](../src/apim/oai-api/oai-api-spec-2024-02-01.yaml), you can also find the Azure OpenAI API definition in here: [Azure OpenAI API](https://github.com/Azure/azure-rest-api-specs/tree/main/specification/cognitiveservices/data-plane/AzureOpenAI/inference) 97 | 98 | One included in the repository is inference version 2024-02-01 stable. 99 | 100 | Only main change you need to do in the downloaded API definition is to update ```"url": "https://{endpoint}/openai",``` to ```"url": "https://TO-BE-RELACED/openai",``` to avoid conflict with APIM import validation. 101 | 102 | > **Important**: You need to append ```/openai``` to your selected ```API URL suffix``` in APIM import dialog to be something like (ai-hub-gw/openai). This is important as OpenAI SDK append /openai to the endpoint URL (not doing so you might get 404 errors from the client connecting to AI Hub Gateway endpoint). 103 | 104 | One last thing, you need to update APIM subscription header name from ```Ocp-Apim-Subscription-Key``` to ```api-key``` to match the OpenAI SDK default implementation (not doing so you might get 401 unauthorized error). 105 | 106 | #### Azure AI Search API 107 | Same story with Azure AI Search, you can find a local copy [in this repository](../src/apim/ai-search-api/ai-search-api-spec.yaml). 108 | 109 | I had to make few additional changes to the downloaded API definition to make it work with APIM import 110 | validation. 111 | 112 | Public documentation for AI Search API can be found here: [Azure AI Search API](https://github.com/Azure/azure-rest-api-specs/tree/main/specification/search/data-plane/Azure.Search) (I used stable 2023-11-01 version). -------------------------------------------------------------------------------- /guides/openai-onboarding.md: -------------------------------------------------------------------------------- 1 | # Onboarding an OpenAI Instance or Consumer Application 2 | 3 | This guide will walk you through the steps to configure Azure API Management (APIM) to work with a new consumer or Azure OpenAI deployment. 4 | 5 | ## Table of Contents 6 | 1. [Prerequisites](#prerequisites) 7 | 2. [Step-by-Step Configuration: Onboarding a New Azure OpenAI Resource](#step-by-step-configuration-onboarding-a-new-azure-openai-resource) 8 | 1. [Ensure Line of Sight to OpenAI](#1-ensure-line-of-sight-to-openai) 9 | 2. [Grant OpenAI User Access to APIM User Managed Identity](#2-grant-openai-user-access-to-apim-user-managed-identity) 10 | 3. [Identify All Deployment Names Associated with OpenAI](#3-identify-all-deployment-names-associated-with-openai) 11 | 4. [Create APIM Backend for OpenAI](#4-create-apim-backend-for-openai) 12 | 6. [Update Routing Configuration](#5-update-routing-configuration) 13 | 7. [Testing the Revision](#6-testing-the-revision) 14 | 8. [Marking Revision as Current](#7-marking-revision-as-current) 15 | 9. [Enforcing Deployment-Level RBAC](#8-enforcing-deployment-level-rbac) 16 | 3. [Step-by-Step Configuration: Onboarding a New Consumer](#step-by-step-configuration-onboarding-a-new-consumer) 17 | 1. [Create New Product](#1-create-new-product) 18 | 2. [Create New Subscription for the Product](#2-create-new-subscription-for-the-product) 19 | 3. [Share APIM OpenAI Endpoint, Subscription Key, and Available Models](#3-share-apim-openai-endpoint-subscription-key-and-available-models) 20 | 21 | ## Prerequisites 22 | 23 | Before starting, make sure you have: 24 | - An operational AI Hub Gateway deplyoment. 25 | - Access to the Azure OpenAI service if you are adding a new deployment. 26 | - Azure Portal access. 27 | 28 | ## Step-by-Step Configuration: Onboarding a New Azure OpenAI Resource 29 | 30 | ### 1. Ensure Line of Sight to OpenAI 31 | 32 | **Steps:** 33 | 34 | 1. **Azure Portal:** 35 | - Navigate to your Virtual Network (VNet) where APIM is deployed. 36 | - Go to **DNS Servers** and ensure you have the correct DNS settings for resolving OpenAI endpoints. 37 | 38 | 2. **DNS Configuration:** 39 | - If you're using custom DNS, ensure the DNS server can resolve OpenAI service endpoints. 40 | - You may need to add custom DNS entries to your DNS server for OpenAI services. 41 | 42 | 3. **Network Configuration:** 43 | - Ensure that network connectivity is available between API Management and the Azure OpenAI Resource. If your Azure OpenAI Resource does not allow public networking, you may need to add a private endpoint in your Virtual Network. See: [Use private endpoints](https://learn.microsoft.com/en-us/azure/ai-services/cognitive-services-virtual-networks?tabs=portal#use-private-endpoints). 44 | 45 | ### 2. Grant OpenAI User Access to APIM User Managed Identity 46 | 47 | The identity of the Azure API Management needs access to perform inference calls on the AI Models. 48 | 49 | **Steps:** 50 | 51 | 1. **Azure Portal:** 52 | - Navigate to your Azure API Management instance. 53 | - Go to **Managed identities** under **Security** and ensure it is enabled. 54 | 55 | 2. **Role Assignment:** 56 | - Navigate to your Azure OpenAI resource. 57 | - Go to **Access Control (IAM)** and click **Add role assignment**. 58 | - Select **Cognitive Services OpenAI User** role. See: [Role-based access control](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/role-based-access-control). 59 | - Assign this role to the APIM Managed Identity. 60 | 61 | ### 3. Identify All Deployment Names Associated with OpenAI 62 | 63 | **Steps:** 64 | 65 | 1. **Azure Portal:** 66 | - Navigate to your Azure OpenAI resource. 67 | - Under **Deployments**, note down the names of all the deployments you have created. 68 | 69 | ### 4. Create APIM Backend for OpenAI 70 | 71 | > [!TIP] 72 | > Ensure that your backend-url ends with /openai 73 | 74 | **Steps:** 75 | 76 | 1. **Azure Portal:** 77 | - Navigate to your Azure API Management instance. 78 | - Go to **Backends** under **APIs**. 79 | - Click **+ Add** to create a new backend. 80 | - Configure the backend with the OpenAI endpoint URL and name it appropriately (it should end with `/openai/`). 81 | 82 | ### 5. Update Routing Configuration 83 | 84 | **Steps:** 85 | 86 | 1. **Azure Portal:** 87 | - Navigate to your Azure API Management instance. 88 | - Go to **APIs**, select the OpenAI API, and navigate to **Design**. 89 | - Go to the menu on the **OpenAI API** and select **Add Revision** to create a new revision (to avoid downtime during implementation). 90 | - Under **Inbound processing**, update the policy to include the new routes and clusters for OpenAI deployments. 91 | 92 | **Sample Configuration:** 93 | 94 | ```xml 95 | 118 | ``` 119 | 120 | Ensure that the backend is linked with all available deployments for that endpoint by updating the clusters variable accordingly. 121 | 122 | ### 6. Testing the Revision 123 | 124 | **Steps:** 125 | 126 | 1. **Azure Portal:** 127 | - Navigate to your Azure API Management instance. 128 | - Go to **APIs** and select the OpenAI API. 129 | - Under **Test**, select the new revision and test the API endpoints to ensure they are working as expected. 130 | 131 | ### 7. Marking Revision as Current 132 | 133 | **Steps:** 134 | 135 | 1. **Azure Portal:** 136 | - Navigate to your Azure API Management instance. 137 | - Go to **APIs**, select the OpenAI API, and navigate to **Revisions**. 138 | - Select the new revision and click **Make current**. 139 | 140 | ### 8. Enforcing Deployment-Level RBAC 141 | 142 | In some cases, you might want to restrict access to specific models based on the business unit or team using the OpenAI endpoint. 143 | 144 | The following policy can be implemented at a product level to restrict access to specific model deployments. For more details, refer to the [Model-based RBAC guide](https://github.com/Azure-Samples/ai-hub-gateway-solution-accelerator/blob/main/guides/apim-configuration.md#model-based-rbac). 145 | 146 | > [!CAUTION] 147 | > This policy will restrict access to only two deployments (gpt-4 and embedding). Any other model deployment will get a 401 Unauthorized response. 148 | 149 | **Sample Policy:** 150 | 151 | ```xml 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | ``` 164 | 165 | 166 | ## Step-by-Step Configuration: Onboarding a New Consumer 167 | 168 | ### 1. Create New Product 169 | 170 | **Steps:** 171 | 172 | 1. **Azure Portal:** 173 | - Navigate to your Azure API Management instance. 174 | - Go to **Products** and click **+ Add**. 175 | - Configure the product with the appropriate settings for token throughput capacity and access to specific models (using product-level policies). 176 | 177 | ### 2. Create New Subscription for the Product 178 | 179 | **Steps:** 180 | 181 | 1. **Azure Portal:** 182 | - Navigate to your Azure API Management instance. 183 | - Go to **Products**, select the newly created product, and navigate to **Subscriptions**. 184 | - Click **+ Add** to create a new subscription. 185 | - Provide the necessary details and generate a subscription key. 186 | 187 | ### 3. Share APIM OpenAI Endpoint, Subscription Key, and Available Models 188 | 189 | **Steps:** 190 | 191 | 1. **Azure Portal:** 192 | - Navigate to your Azure API Management instance. 193 | - Go to **APIs**, 194 | 195 | select the OpenAI API, and copy the endpoint URL. 196 | - Share the endpoint URL, subscription key, and list of available models with the team. 197 | 198 | **Sample Configuration for Sharing:** 199 | > [!CAUTION] 200 | > A subscription key is like a password. Ensure you share it securely. 201 | 202 | ```plaintext 203 | API Endpoint: https://apim-your-instance.azure-api.net/openai 204 | Subscription Key: {YourSubscriptionKey} 205 | Available Models: gpt-3.5-turbo, gpt-4, dall-e 206 | ``` 207 | -------------------------------------------------------------------------------- /guides/openai-usage-ingestion.md: -------------------------------------------------------------------------------- 1 | # Azure OpenAI Usage Ingestion 2 | 3 | This guid explore the details how AI Hub Gateway is using Logic Apps to ingest usage data from Azure OpenAI API for both streaming and non-streaming requests. 4 | 5 | ## Prerequisites 6 | 7 | The following components are configured part of this accelerator: 8 | 9 | - API Management service fully configured with all relevant policies as part of this accelerator 10 | - Logic App service integrated with vnet 11 | - Event hub configured as a logger in API Management 12 | - Cosmos DB account with SQL API that has been configured to store the usage data 13 | 14 | ## Overview 15 | 16 | There is 2 paths for ingesting usage data from Azure OpenAI API: 17 | 18 | - **Non-streaming requests**: In this path, API Management publishes the usage data to Event Hub, which is then ingested by Logic App and stored in Cosmos DB. 19 | - **Streaming requests**: In this path, API Management publishes the usage data to ```Application Insights``` custom metrics, which is then ingested by Logic App and stored in Cosmos DB. 20 | 21 | ## Non-streaming requests 22 | 23 | This workflow is triggered by the Event Hub message that is published by API Management. The message is then ingested by Logic App and stored in Cosmos DB. 24 | 25 | Here the ingestion is near real-time, as the message is processed once its published to Event Hub. 26 | 27 | The primary policy fragment that is used [frag-openai-usage.xml](../infra/modules/apim/policies/frag-openai-usage.xml) 28 | 29 | ![Non-streaming requests](../assets/oai-logicapps-nonstreaming.png) 30 | 31 | ## Streaming requests 32 | 33 | This workflow is triggered by scheduled event (by default it runs twice every day). 34 | 35 | The workflow uses Cosmos DB to maintain streaming export configurations which control the time range that quired data from Application Insights custom metrics should cover. 36 | 37 | > Note: the frequency might be changed depending on how much streaming requests are being made to the API knowing that Azure Monitor query supports maximum of 500,000 records per query. Minimum recommended frequency is once every 1 hour if twice a day is proven not to be sufficient. 38 | 39 | The primary policy fragment that is used [frag-openai-usage-streaming.xml](../infra/modules/apim/policies/frag-openai-usage-streaming.xml) 40 | 41 | ![Streaming requests](../assets/oai-logicapps-streaming.png) 42 | 43 | ## Logic App workflows 44 | 45 | The Logic App workflows source code can be found in [/src/usage-ingestion-logicapp/](../src/usage-ingestion-logicapp/). 46 | 47 | This folder contains the following: 48 | - [ai-usage-ingestion](../src/usage-ingestion-logicapp/ai-usage-ingestion) which process usage published to Event Hub 49 | - [ai-usage-ingestion-streaming](../src/usage-ingestion-logicapp/ai-usage-ingestion-streaming) which process usage from Application Insights custom metrics 50 | - [connections.json](../src/usage-ingestion-logicapp/connections.json) which contains the connection details for the Logic App workflows 51 | - Event Hub connection (in app) 52 | - Cosmos DB connection (in app) 53 | - Application Insights connection (managed) -------------------------------------------------------------------------------- /guides/power-bi-dashboard.md: -------------------------------------------------------------------------------- 1 | # Power BI Dashboard 2 | 3 | Power BI is a business analytics service by Microsoft. It aims to provide interactive visualizations and business intelligence capabilities with an interface simple enough for end users to create their own reports and dashboards. 4 | 5 | In this accelerator, we will be using Power BI to create a dashboard that will display the data from the Cosmos DB Database. 6 | 7 | ## Prerequisites 8 | 9 | - Download and install the Power BI Desktop application from the [Microsoft Store on Windows](https://www.microsoft.com/store/productId/9NTXR16HNW1T?ocid=pdpshare) or from [App Store](https://go.microsoft.com/fwlink/?LinkId=526218&clcid=0x409) on Mac. 10 | 11 | - Make sure that you can access the Cosmos DB from your local machine (you might need to allow you public IP to access Cosmos DB). 12 | 13 | ![Cosmos DB Firewall](../assets/cosmos-db-firwall.png) 14 | 15 | - Insert initial data into the model-pricing container (sample data for the model-pricing container can be found in the [/src/usage-reports/model-pricing.json](../src/usage-reports/model-pricing.json)). 16 | 17 | ![Insert Data](../assets/cosmos-db-model-pricing.png) 18 | 19 | > **Note:** Pricing in the sample file is based on the public Azure pricing for East US region (which in many cases similar to other regions). You can review the prices on Azure docs related to the service being used and update the ```model-pricing``` accordingly. 20 | 21 | Below is a sample pricing entry for gpt-4o: 22 | 23 | ```json 24 | { 25 | "id": "4", 26 | "model": "gpt-4o", 27 | "deploymentName": "gpt-4o", 28 | "isActive": true, 29 | "CostPerInputUnit": 0.005, 30 | "CostPerOutputUnit": 0.015, 31 | "CostUnit": 1000, 32 | "BaseCost": 0, 33 | "Currency": "USD", 34 | "CalculationMethod": "tokens", 35 | "region": "ALL" 36 | } 37 | ``` 38 | 39 | ## Preparing the Power BI Dashboard 40 | 41 | Now you can open the [src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-5-Incremetal.pbix](../src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-5-Incremetal.pbix) file in the Power BI Desktop application. 42 | 43 | As this PowerBI file is using import mode, you should see some data already there from previously connected data source. 44 | 45 | In order to link the dashboard to the Cosmos DB, you need to update the connection string in the Power BI file. 46 | 47 | 1. Click on "Transform Data" in the Home tab. 48 | 49 | ![Transform Data](../assets/power-bi-data-source-transform.png) 50 | 51 | 2. Right click on the "ai-usage-container" data table and select "Advanced Editor". 52 | 53 | ![Edit Data](../assets/power-bi-data-source-adv-editor.png) 54 | 55 | 3. Replace the Cosmos DB endpoint with the one you have deployed. 56 | 57 | ![Edit Data AI Usage](../assets/power-bi-data-source-adv-editor-update.png) 58 | 59 | 4. Repeat the same for model-pricing data table. 60 | 61 | ![Edit Data Model Pricing](../assets/power-bi-data-source-adv-editor-update2.png) 62 | 63 | 5. Click on "Refresh Preview" to force the Power BI to refresh the data. 64 | 65 | 6. Click on "Close & Apply" to save the changes. 66 | 67 | 7. Now you should see the data from the Cosmos DB in the Power BI. 68 | 69 | ![Power BI Dashboard](../assets/power-bi-data-final.png) 70 | 71 | 8. If you need to get fresh copy of the data, you can click on "Refresh" in the Home tab. -------------------------------------------------------------------------------- /guides/routing-configurations.md: -------------------------------------------------------------------------------- 1 | # Gateway routing configurations 2 | 3 | ## Dynamic Throttling Assignment 4 | 5 | Some times with reserved OpenAI models through PTU (provisioned throughput units), latency increases the closer you are getting to 100% utilization. 6 | 7 | Although it is important to size correctly the capacity allocation for PTU, some occasional spikes can push the service to reach 90%+ utilization which results in increased latency. 8 | 9 | In many cases, this is not a challenge, but in other cases where the use case is sensitive to latency this can potential impact the experience. 10 | 11 | AI Hub Gateway routing engine offers a way to handle such events by falling back to other OpenAI instance to relief the primary PTU instance from being at maximum capacity. 12 | 13 | Introducing ```Dynamic Throttling Assignment`` which is a routing strategy that allows you to define a target TPM that when it is reached, APIM will switch incoming traffic to backup OpenAI service temporary (by default for 30 seconds) allowing the PTU instance to regain capacity again then it will resume using it. 14 | 15 | ### Example 16 | 17 | APIM, as part of AI Hub Gateway, is configured with 2 OpenAI services with a deployment called ptu-gpt4-o (notice primary PTU has priority 1 and PAYGO has priority 2) 18 | 19 | An OpenAI deployment of gpt4-o has 50 PTU, which let's assume it can handle a 100K TPM (this is an estimate). 20 | 21 | You can set a target of 80K TPM for that service, where APIM will use a rolling tokens-count against the deployment and automatically switch to the next priority OpenAI deployment once the target 80K TPM is reached. 22 | 23 | In oder to leverage the dynamic throtlling, you have to configure multiple points: 24 | 25 | 1. Add ```targetTPMLimit``` to the OpenAI backend routes (inbound policy section): 26 | 27 | ```csharp 28 | // Notice targetTPMLimit is set to 500 TPM to guide APIM to switch suspend traffic to this backend 29 | routes.Add(new JObject() 30 | { 31 | { "name", "EastUS" }, 32 | { "location", "eastus" }, 33 | { "backend-id", "openai-backend-0" }, 34 | { "priority", 1}, 35 | { "targetTPMLimit", 500 }, 36 | { "isThrottling", false }, 37 | { "retryAfter", DateTime.MinValue } 38 | }); 39 | ``` 40 | 41 | 2. Setup a counter against the PTU deployment name for APIM to count the tokens (inbound policy section): 42 | 43 | ```xml 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | ``` 53 | 54 | 3. Reference ```dynamic-throttling-assignment``` policy fragment (outbound policy section): 55 | 56 | ```xml 57 | 58 | 59 | 60 | 61 | ``` 62 | 63 | 4. Test the policy updates through setting up a small ```targetTPMLimit``` and leverage APIM trace to notice that APIM is switching traffic after hitting the limit and switch it back once the counter goes below that target limit. 64 | 65 | 5. Run a load test against the service to ensure that the selected target limit is sufficient to manage latency within acceptable parameters and reduce the limit if it is not. 66 | 67 | You can view a full APIM policy that is leveraging dynamic throttling policy [here](../infra/modules/apim/policies/openai_api_policy_dynamic_throttling.xml) -------------------------------------------------------------------------------- /guides/throttling-events-handling.md: -------------------------------------------------------------------------------- 1 | # Throttling Events Handling 2 | 3 | One of the key things to consider when working with AI Apps, is throttling. 4 | 5 | Throttling can happen due to the fact that AI backend is ran out of capacity (which AI Hub Gateway can help in mitigating by automatically failing back to another instance) or due to implemented capacity control measures in the AI Hub Gateway (preventing certain use cases from exceeding allocated capacity). 6 | 7 | ## Handling Throttling Events in AI Hub Gateway 8 | In either case, it might be very important to keep an eye on these events especially in production environments. 9 | 10 | AI Hub Gateway provides a mechanism that allows you to monitor these events per use case (product), per AI deployment/service and among other dimensions so you can take measures to address these events. 11 | 12 | A policy fragement [throttling-events](../infra/modules/apim/policies/frag-throttling-events.xml) is used to raise Application Insights custom metrics for throttling events. 13 | 14 | ```xml 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | ``` 31 | 32 | By referencing this policy in ```on-error``` section of an API, it will capture and raise ```429``` too many requests status code as a custom metric in Application Insights. 33 | 34 | ```xml 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | ``` 44 | 45 | It is designed in a way that the fragment is expecting 2 variables to be set by the referencing API to allow it to be used across multiple APIs that has the potentially of being throttled. 46 | 47 | ## View throttling events in Application Insights 48 | 49 | Once the policy is in place, you can view the custom metric in Application Insights under Metrics with ```throttling-events``` namespace and ```AI Throttling``` metric name. 50 | 51 | You can also split the metric by the different dimensions to get a better understanding of the throttling events (in the chart below I'm splitting by ```Product Name```). 52 | 53 | ![Throttling Events](../assets/throttling-events-app-insights.png) 54 | 55 | ## Creating Alerts in Azure Monitor 56 | 57 | Having a nice dashboard is helpful, but it is even more helpful to have alerts that can notify you when certain throttling thresholds are met. 58 | 59 | You can create alerts in Azure Monitor based on the custom metric ```AI Throttling``` and set the threshold that you want to be notified on. 60 | 61 | ![Create Alert](../assets/throttling-events-alert.png) 62 | 63 | You can create a generic alert that will notify you when the number of throttling events exceeds a certain threshold. 64 | 65 | You can also create refined alert that will notify you when the number of throttling events exceeds a certain threshold for a specific product or AI deployment. 66 | 67 | ## Conclusion 68 | 69 | Throttling events can be a sign of potential service degradation and it is important to monitor and address them as soon as possible. 70 | 71 | AI Hub Gateway provides a mechanism to monitor these events and take measures to address them. 72 | 73 | Although using Alerts can be helpful, it is important to keep in mind the following points: 74 | - Alerts should be used in significant events that require attention not a noisy notification that will be ignored over time. 75 | - Add in place a mechanism to address the issue that caused the throttling event in the first place. 76 | - Keep an eye on the alerts and adjust the thresholds as needed. 77 | 78 | 79 | -------------------------------------------------------------------------------- /infra/abbreviations.json: -------------------------------------------------------------------------------- 1 | { 2 | "analysisServicesServers": "as", 3 | "apiManagementService": "apim-", 4 | "appConfigurationConfigurationStores": "appcs-", 5 | "appManagedEnvironments": "cae-", 6 | "appContainerApps": "ca-", 7 | "authorizationPolicyDefinitions": "policy-", 8 | "automationAutomationAccounts": "aa-", 9 | "blueprintBlueprints": "bp-", 10 | "blueprintBlueprintsArtifacts": "bpa-", 11 | "cacheRedis": "redis-", 12 | "cdnProfiles": "cdnp-", 13 | "cdnProfilesEndpoints": "cdne-", 14 | "cognitiveServicesAccounts": "cog-", 15 | "cognitiveServicesFormRecognizer": "cog-fr-", 16 | "cognitiveServicesTextAnalytics": "cog-ta-", 17 | "computeAvailabilitySets": "avail-", 18 | "computeCloudServices": "cld-", 19 | "computeDiskEncryptionSets": "des", 20 | "computeDisks": "disk", 21 | "computeDisksOs": "osdisk", 22 | "computeGalleries": "gal", 23 | "computeSnapshots": "snap-", 24 | "computeVirtualMachines": "vm", 25 | "computeVirtualMachineScaleSets": "vmss-", 26 | "containerInstanceContainerGroups": "ci", 27 | "containerRegistryRegistries": "cr", 28 | "containerServiceManagedClusters": "aks-", 29 | "databricksWorkspaces": "dbw-", 30 | "dataFactoryFactories": "adf-", 31 | "dataLakeAnalyticsAccounts": "dla", 32 | "dataLakeStoreAccounts": "dls", 33 | "dataMigrationServices": "dms-", 34 | "dBforMySQLServers": "mysql-", 35 | "dBforPostgreSQLServers": "psql-", 36 | "devicesIotHubs": "iot-", 37 | "devicesProvisioningServices": "provs-", 38 | "devicesProvisioningServicesCertificates": "pcert-", 39 | "documentDBDatabaseAccounts": "cosmos-", 40 | "eventGridDomains": "evgd-", 41 | "eventGridDomainsTopics": "evgt-", 42 | "eventGridEventSubscriptions": "evgs-", 43 | "eventHubNamespaces": "evhns-", 44 | "eventHubNamespacesEventHubs": "evh-", 45 | "hdInsightClustersHadoop": "hadoop-", 46 | "hdInsightClustersHbase": "hbase-", 47 | "hdInsightClustersKafka": "kafka-", 48 | "hdInsightClustersMl": "mls-", 49 | "hdInsightClustersSpark": "spark-", 50 | "hdInsightClustersStorm": "storm-", 51 | "hybridComputeMachines": "arcs-", 52 | "insightsActionGroups": "ag-", 53 | "insightsComponents": "appi-", 54 | "keyVaultVaults": "kv-", 55 | "kubernetesConnectedClusters": "arck", 56 | "kustoClusters": "dec", 57 | "kustoClustersDatabases": "dedb", 58 | "logicIntegrationAccounts": "ia-", 59 | "logicWorkflows": "logic-", 60 | "machineLearningServicesWorkspaces": "mlw-", 61 | "managedIdentityUserAssignedIdentities": "id-", 62 | "managementManagementGroups": "mg-", 63 | "migrateAssessmentProjects": "migr-", 64 | "networkApplicationGateways": "agw-", 65 | "networkApplicationSecurityGroups": "asg-", 66 | "networkAzureFirewalls": "afw-", 67 | "networkBastionHosts": "bas-", 68 | "networkConnections": "con-", 69 | "networkDnsZones": "dnsz-", 70 | "networkExpressRouteCircuits": "erc-", 71 | "networkFirewallPolicies": "afwp-", 72 | "networkFirewallPoliciesWebApplication": "waf", 73 | "networkFirewallPoliciesRuleGroups": "wafrg", 74 | "networkFrontDoors": "fd-", 75 | "networkFrontdoorWebApplicationFirewallPolicies": "fdfp-", 76 | "networkLoadBalancersExternal": "lbe-", 77 | "networkLoadBalancersInternal": "lbi-", 78 | "networkLoadBalancersInboundNatRules": "rule-", 79 | "networkLocalNetworkGateways": "lgw-", 80 | "networkNatGateways": "ng-", 81 | "networkNetworkInterfaces": "nic-", 82 | "networkNetworkSecurityGroups": "nsg-", 83 | "networkNetworkSecurityGroupsSecurityRules": "nsgsr-", 84 | "networkNetworkWatchers": "nw-", 85 | "networkPrivateDnsZones": "pdnsz-", 86 | "networkPrivateLinkServices": "pl-", 87 | "networkPublicIPAddresses": "pip-", 88 | "networkPublicIPPrefixes": "ippre-", 89 | "networkRouteFilters": "rf-", 90 | "networkRouteTables": "rt-", 91 | "networkRouteTablesRoutes": "udr-", 92 | "networkTrafficManagerProfiles": "traf-", 93 | "networkVirtualNetworkGateways": "vgw-", 94 | "networkVirtualNetworks": "vnet-", 95 | "networkVirtualNetworksSubnets": "snet-", 96 | "networkVirtualNetworksVirtualNetworkPeerings": "peer-", 97 | "networkVirtualWans": "vwan-", 98 | "networkVpnGateways": "vpng-", 99 | "networkVpnGatewaysVpnConnections": "vcn-", 100 | "networkVpnGatewaysVpnSites": "vst-", 101 | "notificationHubsNamespaces": "ntfns-", 102 | "notificationHubsNamespacesNotificationHubs": "ntf-", 103 | "operationalInsightsWorkspaces": "log-", 104 | "portalDashboards": "dash-", 105 | "powerBIDedicatedCapacities": "pbi-", 106 | "purviewAccounts": "pview-", 107 | "privateEndpoints": "pe-", 108 | "recoveryServicesVaults": "rsv-", 109 | "resourcesResourceGroups": "rg-", 110 | "searchSearchServices": "srch-", 111 | "serviceBusNamespaces": "sb-", 112 | "serviceBusNamespacesQueues": "sbq-", 113 | "serviceBusNamespacesTopics": "sbt-", 114 | "serviceEndPointPolicies": "se-", 115 | "serviceFabricClusters": "sf-", 116 | "signalRServiceSignalR": "sigr", 117 | "sqlManagedInstances": "sqlmi-", 118 | "sqlServers": "sql-", 119 | "sqlServersDataWarehouse": "sqldw-", 120 | "sqlServersDatabases": "sqldb-", 121 | "sqlServersDatabasesStretch": "sqlstrdb-", 122 | "storageStorageAccounts": "st", 123 | "storageStorageAccountsVm": "stvm", 124 | "storSimpleManagers": "ssimp", 125 | "streamAnalyticsCluster": "asa-", 126 | "synapseWorkspaces": "syn", 127 | "synapseWorkspacesAnalyticsWorkspaces": "synw", 128 | "synapseWorkspacesSqlPoolsDedicated": "syndp", 129 | "synapseWorkspacesSqlPoolsSpark": "synsp", 130 | "timeSeriesInsightsEnvironments": "tsi-", 131 | "webServerFarms": "plan-", 132 | "webSitesAppService": "app-", 133 | "webSitesAppServiceEnvironment": "ase-", 134 | "webSitesFunctions": "func-", 135 | "webStaticSites": "stapp-" 136 | } -------------------------------------------------------------------------------- /infra/main.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environmentName": { 6 | "value": "${AZURE_ENV_NAME}" 7 | }, 8 | "location": { 9 | "value": "${AZURE_LOCATION}" 10 | }, 11 | "entraAuth": { 12 | "value": "${AZURE_ENTRA_AUTH}" 13 | }, 14 | "entraTenantId": { 15 | "value": "${AZURE_TENANT_ID}" 16 | }, 17 | "entraClientId": { 18 | "value": "${AZURE_CLIENT_ID}" 19 | }, 20 | "entraAudience": { 21 | "value": "${AZURE_AUDIENCE}" 22 | }, 23 | "deploymentCapacity": { 24 | "value": "${OPENAI_CAPACITY}", 25 | "metadata": { 26 | "description": "The OpenAI endpoints capacity (in thousands of tokens per minute)." 27 | } 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /infra/modules/ai/cognitiveservices.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | param managedIdentityName string = '' 5 | param deployments array = [] 6 | param kind string = 'OpenAI' 7 | param sku object = { 8 | name: 'S0' 9 | } 10 | param deploymentCapacity int = 1 11 | 12 | // Networking 13 | param publicNetworkAccess string = 'Disabled' 14 | param openAiPrivateEndpointName string 15 | param vNetName string 16 | param vNetLocation string 17 | param privateEndpointSubnetName string 18 | param openAiDnsZoneName string 19 | 20 | // Use existing network/dns zone 21 | param dnsZoneRG string 22 | param dnsSubscriptionId string 23 | param vNetRG string 24 | resource vnet 'Microsoft.Network/virtualNetworks@2022-01-01' existing = { 25 | name: vNetName 26 | scope: resourceGroup(vNetRG) 27 | } 28 | 29 | // Get existing subnet 30 | resource subnet 'Microsoft.Network/virtualNetworks/subnets@2022-01-01' existing = { 31 | name: privateEndpointSubnetName 32 | parent: vnet 33 | } 34 | 35 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' existing = { 36 | name: managedIdentityName 37 | } 38 | 39 | resource account 'Microsoft.CognitiveServices/accounts@2023-05-01' = { 40 | name: name 41 | location: location 42 | tags: union(tags, { 'azd-service-name': name }) 43 | kind: kind 44 | identity: { 45 | type: 'UserAssigned' 46 | userAssignedIdentities: { 47 | '${managedIdentity.id}': {} 48 | } 49 | } 50 | properties: { 51 | customSubDomainName: toLower(name) 52 | publicNetworkAccess: publicNetworkAccess 53 | networkAcls: { 54 | defaultAction: 'Deny' 55 | ipRules: [] 56 | virtualNetworkRules: [] 57 | } 58 | } 59 | sku: sku 60 | } 61 | 62 | @batchSize(1) 63 | resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for deployment in deployments: { 64 | parent: account 65 | name: deployment.name 66 | properties: { 67 | model: deployment.model 68 | raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null 69 | } 70 | sku: contains(deployment, 'sku') ? deployment.sku : { 71 | name: 'Standard' 72 | capacity: deploymentCapacity 73 | } 74 | }] 75 | 76 | module privateEndpoint '../networking/private-endpoint.bicep' = { 77 | name: '${account.name}-privateEndpoint' 78 | params: { 79 | groupIds: [ 80 | 'account' 81 | ] 82 | dnsZoneName: openAiDnsZoneName 83 | name: openAiPrivateEndpointName 84 | privateLinkServiceId: account.id 85 | location: vNetLocation 86 | privateEndpointSubnetId: subnet.id 87 | dnsZoneRG: dnsZoneRG 88 | dnsSubId: dnsSubscriptionId 89 | } 90 | dependsOn: [ 91 | deployment 92 | ] 93 | } 94 | 95 | output openAiName string = account.name 96 | output openAiEndpointUri string = '${account.properties.endpoint}openai/' 97 | -------------------------------------------------------------------------------- /infra/modules/apim/api.bicep: -------------------------------------------------------------------------------- 1 | @description('The name of the API') 2 | @minLength(1) 3 | @maxLength(63) 4 | param apiName string 5 | 6 | @description('The display name of the API') 7 | @minLength(1) 8 | @maxLength(63) 9 | param apiDispalyName string 10 | 11 | @description('The contents of the OpenAPI definition') 12 | @minLength(1) 13 | param openApiSpecification string 14 | 15 | @description('The XML Policy document for the API') 16 | @minLength(1) 17 | param policyDocument string 18 | 19 | @description('The name of the API Management service to deploy the API to.') 20 | @minLength(1) 21 | param serviceName string 22 | 23 | @description('The API description (if blank, use the name of the API)') 24 | param apiDescription string = '' 25 | 26 | @description('The relative path for the API (if different to the API name)') 27 | param path string = '' 28 | 29 | @description('The (optional) service URL') 30 | param serviceUrl string = '' 31 | 32 | @description('Set to true if a subscription is required') 33 | param subscriptionRequired bool = true 34 | 35 | @description('API Revision number. Default is 1') 36 | param apiRevision string = '1' 37 | 38 | @description('Ability to override the subscription key name. Default is Ocp-Apim-Subscription-Key') 39 | param subscriptionKeyName string = '' 40 | 41 | param enableAPIDeployment bool = true 42 | 43 | // Assume the content format is JSON format if the ending is .json - otherwise, it's YAML 44 | var contentFormat = startsWith(openApiSpecification, '{') ? 'openapi+json' : 'openapi' 45 | 46 | @description('The type of the API') 47 | @allowed([ 48 | 'http' 49 | 'soap' 50 | 'graphql' 51 | 'websocket' 52 | ]) 53 | param apiType string = 'http' 54 | 55 | @description('The protocols supported by the API') 56 | @allowed([ 57 | 'http' 58 | 'https' 59 | 'ws' 60 | 'wss' 61 | ]) 62 | param apiProtocols array = [ 63 | 'https' 64 | ] 65 | 66 | resource apimService 'Microsoft.ApiManagement/service@2022-08-01' existing = { 67 | name: serviceName 68 | } 69 | 70 | var isWebSotcketAPI = contains(apiProtocols, 'ws') || contains(apiProtocols, 'wss') 71 | 72 | resource apiDefinition 'Microsoft.ApiManagement/service/apis@2022-08-01' = if(enableAPIDeployment && !isWebSotcketAPI) { 73 | name: apiName 74 | parent: apimService 75 | properties: { 76 | path: (path == '') ? apiName : path 77 | apiRevision: apiRevision 78 | description: (apiDescription == '') ? apiName : apiDescription 79 | displayName: apiDispalyName 80 | format: (openApiSpecification != 'NA') ? contentFormat : null 81 | value: (openApiSpecification != 'NA') ? openApiSpecification : null 82 | subscriptionRequired: subscriptionRequired 83 | subscriptionKeyParameterNames: { 84 | header: empty(subscriptionKeyName) ? 'Ocp-Apim-Subscription-Key' : subscriptionKeyName 85 | } 86 | type: apiType 87 | protocols: apiProtocols 88 | serviceUrl: (serviceUrl == '') ? 'https://to-be-replaced-by-policy' : serviceUrl 89 | } 90 | } 91 | 92 | resource apiDefinitionWebSocket 'Microsoft.ApiManagement/service/apis@2022-08-01' = if(enableAPIDeployment && isWebSotcketAPI) { 93 | name: apiName 94 | parent: apimService 95 | properties: { 96 | path: (path == '') ? apiName : path 97 | apiRevision: apiRevision 98 | description: (apiDescription == '') ? apiName : apiDescription 99 | displayName: apiDispalyName 100 | subscriptionRequired: subscriptionRequired 101 | subscriptionKeyParameterNames: { 102 | header: empty(subscriptionKeyName) ? 'Ocp-Apim-Subscription-Key' : subscriptionKeyName 103 | } 104 | type: apiType 105 | protocols: apiProtocols 106 | serviceUrl: (serviceUrl == '') ? 'https://to-be-replaced-by-policy' : serviceUrl 107 | } 108 | } 109 | 110 | resource apiPolicy 'Microsoft.ApiManagement/service/apis/policies@2022-08-01' = if(enableAPIDeployment && policyDocument != 'NA') { 111 | name: 'policy' 112 | parent: apiDefinition 113 | properties: { 114 | format: 'rawxml' 115 | value: policyDocument 116 | } 117 | } 118 | 119 | output id string = (enableAPIDeployment) ? apiDefinition.id : '' 120 | output path string = (enableAPIDeployment) ? apiDefinition.properties.path : '' 121 | -------------------------------------------------------------------------------- /infra/modules/apim/openai-api/oai-realtime-api-ws.json: -------------------------------------------------------------------------------- 1 | { 2 | "swagger": "2.0", 3 | "info": { 4 | "title": "OAI-Realtime-API", 5 | "version": "1.0" 6 | }, 7 | "host": "apim-d6vjlv67krcvy.azure-api.net", 8 | "basePath": "/openai/realtime", 9 | "schemes": [ 10 | "ws", 11 | "wss" 12 | ], 13 | "securityDefinitions": { 14 | "apiKeyHeader": { 15 | "type": "apiKey", 16 | "name": "api-key", 17 | "in": "header" 18 | }, 19 | "apiKeyQuery": { 20 | "type": "apiKey", 21 | "name": "api-key", 22 | "in": "query" 23 | } 24 | }, 25 | "security": [ 26 | { 27 | "apiKeyHeader": [] 28 | }, 29 | { 30 | "apiKeyQuery": [] 31 | } 32 | ], 33 | "paths": { 34 | "": { 35 | "get": { 36 | "description": "WebSocket opening handshake", 37 | "operationId": "onHandshake", 38 | "summary": "onHandshake", 39 | "responses": { 40 | "200": { 41 | "description": null 42 | } 43 | } 44 | } 45 | } 46 | }, 47 | "tags": [] 48 | } -------------------------------------------------------------------------------- /infra/modules/apim/policies/ai-model-inference-api-policy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | @("Bearer " + (string)context.Variables["msi-access-token"]) 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/ai-search-api-policy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | @("Bearer " + (string)context.Variables["msi-access-token"]) 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/frag-aad-auth.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 10 | 12 | 13 | {{audience}} 14 | 15 | 16 | https://sts.windows.net/{{tenant-id}}/ 17 | 18 | 19 | 20 | {{client-id}} 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/frag-ai-usage.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | @{ 6 | return new JObject( 7 | new JProperty("id", (string)context.Variables.GetValueOrDefault("idPrefix", "ai") + "-" + Guid.NewGuid().ToString()), 8 | new JProperty("timestamp", DateTime.UtcNow.ToString()), 9 | new JProperty("appId", context.Request.Headers.GetValueOrDefault("Authorization",string.Empty).Split(' ').LastOrDefault()?.AsJwt()?.Claims.GetValueOrDefault("appid", "NA")), 10 | new JProperty("subscriptionId", context.Subscription?.Id?.ToString() ?? "Portal-Admin"), 11 | new JProperty("productName", context.Product?.Name?.ToString() ?? "Portal-Admin"), 12 | new JProperty("targetService", (string)context.Variables.GetValueOrDefault("targetService", "NA")), 13 | new JProperty("model", (string)context.Variables.GetValueOrDefault("model", "NA")), 14 | new JProperty("gatewayName", context.Deployment?.ServiceName ?? "NA"), 15 | new JProperty("gatewayRegion", context.Deployment?.Region ?? "NA"), 16 | new JProperty("aiGatewayId", context.Deployment?.Gateway?.Id ?? "NA"), 17 | new JProperty("RequestIp", context.Request?.IpAddress ?? "NA"), 18 | new JProperty("operationName", context.Operation?.Name ?? "NA"), 19 | new JProperty("sessionId", (string)context.Variables.GetValueOrDefault("sessionId", "NA")), 20 | new JProperty("endUserId", (string)context.Variables.GetValueOrDefault("endUserId", "NA")), 21 | new JProperty("backendId", (string)context.Variables.GetValueOrDefault("backendId", "NA")), 22 | new JProperty("routeLocation", (string)context.Variables.GetValueOrDefault("routeLocation", "NA")), 23 | new JProperty("routeName", (string)context.Variables.GetValueOrDefault("routeName", "NA")), 24 | new JProperty("deploymentName", (string)context.Variables.GetValueOrDefault("deploymentName", "NA")), 25 | new JProperty("promptTokens", (int)context.Variables.GetValueOrDefault("promptTokens", 1)), 26 | new JProperty("responseTokens", (int)context.Variables.GetValueOrDefault("responseTokens", 0)), 27 | new JProperty("totalTokens", (int)context.Variables.GetValueOrDefault("totalTokens", 1)) 28 | ).ToString(); 29 | } 30 | 31 | 32 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/frag-backend-routing.xml: -------------------------------------------------------------------------------- 1 | 2 | 0)" count="3" interval="0"> 3 | 4 | ("isThrottling") && DateTime.Now >= route.Value("retryAfter")) 12 | { 13 | route["isThrottling"] = false; 14 | route["retryAfter"] = DateTime.MinValue; 15 | } 16 | } 17 | 18 | return routes; 19 | }" /> 20 | 21 | 22 | availableRoutesIndexes = new List(); 26 | 27 | for (int i = 0; i < routes.Count; i++) 28 | { 29 | JObject route = (JObject)routes[i]; 30 | 31 | if (!route.Value("isThrottling")) 32 | { 33 | int routePriority = route.Value("priority"); 34 | 35 | if (routePriority < selectedPriority) 36 | { 37 | selectedPriority = routePriority; 38 | availableRoutesIndexes.Clear(); 39 | availableRoutesIndexes.Add(i); 40 | } 41 | else if (routePriority == selectedPriority) 42 | { 43 | availableRoutesIndexes.Add(i); 44 | } 45 | } 46 | } 47 | 48 | if (availableRoutesIndexes.Count == 1) 49 | { 50 | return availableRoutesIndexes[0]; 51 | } 52 | 53 | if (availableRoutesIndexes.Count > 0) 54 | { 55 | //Returns a random route from the list if we have more than one available with the same priority 56 | return availableRoutesIndexes[new Random().Next(0, availableRoutesIndexes.Count)]; 57 | } 58 | else 59 | { 60 | //If there are no available routes, the request will be sent to the first one 61 | return 0; 62 | } 63 | }" /> 64 | ("backend-id"))" /> 65 | ("location"))" /> 66 | ("name"))" /> 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | ("routeIndex"); 84 | int retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("Retry-After", "-1")); 85 | 86 | if (retryAfter == -1) 87 | { 88 | retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("x-ratelimit-reset-requests", "-1")); 89 | } 90 | 91 | if (retryAfter == -1) 92 | { 93 | retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("x-ratelimit-reset-tokens", "10")); 94 | } 95 | 96 | JObject route = (JObject)routes[currentrouteIndex]; 97 | route["isThrottling"] = true; 98 | route["retryAfter"] = DateTime.Now.AddSeconds(retryAfter); 99 | 100 | return routes; 101 | }" /> 102 | 103 | ("isThrottling")) 113 | { 114 | remainingRoutes++; 115 | } 116 | } 117 | 118 | return remainingRoutes; 119 | }" /> 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/frag-dynamic-throttling-assignment.xml: -------------------------------------------------------------------------------- 1 | 2 | ("targetTPMLimit") ?? -1; 11 | int consumedTPM = route.Value("consumedTPM") ?? 0; 12 | 13 | if (targetTPMLimit == -1) 14 | { 15 | outputRoutes.Add(route); 16 | continue; 17 | } 18 | 19 | string consumedTokensKey = route.Value("backend-id") + "-" + targetDeployment + "-ConsumedTokens"; 20 | string remainingTokensKey = route.Value("backend-id") + "-" + targetDeployment + "-RemainingTokens"; 21 | 22 | if (context.Variables.ContainsKey(consumedTokensKey) && route["backend-id"].ToString() == activeBackendId) 23 | { 24 | int requestConsumedTokens = (int)context.Variables[consumedTokensKey]; 25 | int remainingTokens = (int)context.Variables[remainingTokensKey]; 26 | 27 | // Calcualting the total consumed tokens so far 28 | int consumedTokens = 1000000 - remainingTokens - requestConsumedTokens; 29 | 30 | double consumedPercentage = (double)consumedTokens / (double)targetTPMLimit; 31 | 32 | route["consumedPercentage"] = consumedPercentage; 33 | 34 | if (consumedPercentage > 0.8) 35 | { 36 | if((bool)route["isThrottling"] == false) 37 | { 38 | route["isThrottling"] = true; 39 | route["retryAfter"] = DateTime.Now.AddSeconds(30); 40 | } 41 | } 42 | else 43 | { 44 | route["isThrottling"] = false; 45 | route["retryAfter"] = DateTime.MinValue; 46 | } 47 | } 48 | 49 | outputRoutes.Add(route); 50 | } 51 | 52 | return outputRoutes; 53 | }" /> 54 | 55 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/frag-openai-usage-streaming.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | availableRoutesIndexes = new List(); 10 | 11 | for (int i = 0; i < routes.Count; i++) 12 | { 13 | JObject route = (JObject)routes[i]; 14 | 15 | if (!route.Value("isThrottling")) 16 | { 17 | int routePriority = route.Value("priority"); 18 | 19 | if (routePriority < selectedPriority) 20 | { 21 | selectedPriority = routePriority; 22 | availableRoutesIndexes.Clear(); 23 | availableRoutesIndexes.Add(i); 24 | } 25 | else if (routePriority == selectedPriority) 26 | { 27 | availableRoutesIndexes.Add(i); 28 | } 29 | } 30 | } 31 | 32 | if (availableRoutesIndexes.Count == 1) 33 | { 34 | return availableRoutesIndexes[0]; 35 | } 36 | 37 | if (availableRoutesIndexes.Count > 0) 38 | { 39 | //Returns a random route from the list if we have more than one available with the same priority 40 | return availableRoutesIndexes[new Random().Next(0, availableRoutesIndexes.Count)]; 41 | } 42 | else 43 | { 44 | //If there are no available routes, the request will be sent to the first one 45 | return 0; 46 | } 47 | }" /> 48 | ("backend-id"))" /> 49 | ("location"))" /> 50 | ("name"))" /> 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/frag-openai-usage.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | @{ 11 | //Avoid reading response body as it can only be ready once, instead, before calling this fragement, a variable call responseBody will be set in the outbound policy 12 | //var responseBody = context.Response.Body?.As(true); //Avoid this one 13 | var responseBody = (JObject)context.Variables["responseBody"]; //It is set in the outbound policy before calling the fragment 14 | return new JObject( 15 | new JProperty("id", responseBody?["id"]?.ToString() ?? Guid.NewGuid().ToString()), 16 | new JProperty("timestamp", DateTime.UtcNow.ToString()), 17 | new JProperty("appId", context.Request.Headers.GetValueOrDefault("Authorization",string.Empty).Split(' ').LastOrDefault()?.AsJwt()?.Claims.GetValueOrDefault("appid", "NA")), 18 | new JProperty("subscriptionId", context.Subscription?.Id?.ToString() ?? "Portal-Admin"), 19 | new JProperty("productName", context.Product?.Name?.ToString() ?? "Portal-Admin"), 20 | new JProperty("targetService", responseBody?["object"]?.ToString() ?? "NA"), 21 | new JProperty("model", responseBody?["model"]?.ToString() ?? "NA"), 22 | new JProperty("gatewayName", context.Deployment?.ServiceName ?? "NA"), 23 | new JProperty("gatewayRegion", context.Deployment?.Region ?? "NA"), 24 | new JProperty("aiGatewayId", context.Deployment?.Gateway?.Id ?? "NA"), 25 | new JProperty("RequestIp", context.Request?.IpAddress ?? "NA"), 26 | new JProperty("operationName", context.Operation?.Name ?? "NA"), 27 | new JProperty("sessionId", (string)context.Variables.GetValueOrDefault("sessionId", "NA")), 28 | new JProperty("endUserId", (string)context.Variables.GetValueOrDefault("endUserId", "NA")), 29 | new JProperty("backendId", (string)context.Variables.GetValueOrDefault("backendId", "NA")), 30 | new JProperty("routeLocation", (string)context.Variables.GetValueOrDefault("routeLocation", "NA")), 31 | new JProperty("routeName", (string)context.Variables.GetValueOrDefault("routeName", "NA")), 32 | new JProperty("deploymentName", (string)context.Variables.GetValueOrDefault("deploymentName", "NA")), 33 | new JProperty("promptTokens", responseBody?["usage"]?["prompt_tokens"]?.Value() ?? 1), 34 | new JProperty("responseTokens", responseBody?["usage"]?["completion_tokens"]?.Value() ?? 0), 35 | new JProperty("totalTokens", responseBody?["usage"]?["total_tokens"]?.Value() ?? 1) 36 | ).ToString(); 37 | } 38 | 39 | 40 | @{ 41 | return ((JObject)context.Variables["responseBody"]).ToString(); 42 | } 43 | 44 | 45 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/frag-throttling-events.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/frag-validate-routes.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | o["deploymentName"]?.Value() == deploymentName); 11 | if(cluster == null) 12 | { 13 | //Error: No cluster matched the requested deployment name 14 | return new JArray() { new JObject() 15 | { 16 | { "name", deploymentName }, 17 | { "location", "NA" }, 18 | { "url", "No routes found for the deployment (" + deploymentName + ") in the region (" + context.Deployment.Region + ")" } 19 | } 20 | }; 21 | } 22 | JArray routes = (JArray)cluster["routes"]; 23 | return routes; 24 | }" /> 25 | 26 | 27 | 28 | 29 | 30 | @(((JArray)context.Variables["routes"]).ToString()) 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/hr_product_policy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/openai-realtime-policy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | @("Bearer " + (string)context.Variables["msi-access-token"]) 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/openai_api_policy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | (true)["stream"].Type != JTokenType.Null)"> 11 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | @("Bearer " + (string)context.Variables["msi-access-token"]) 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/retail_product_policy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/search_hr_product_policy.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /infra/modules/apim/policies/translator-api-policy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | @("Bearer " + (string)context.Variables["msi-access-token"]) 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /infra/modules/cosmos-db/cosmos-db.bicep: -------------------------------------------------------------------------------- 1 | @description('Azure Cosmos DB account name, max length 44 characters') 2 | param accountName string 3 | 4 | @description('Location for the Azure Cosmos DB account.') 5 | param location string = resourceGroup().location 6 | 7 | @description('The primary region for the Azure Cosmos DB account.') 8 | param primaryRegion string = location 9 | 10 | param tags object = {} 11 | 12 | @allowed([ 13 | 'Eventual' 14 | 'ConsistentPrefix' 15 | 'Session' 16 | 'BoundedStaleness' 17 | 'Strong' 18 | ]) 19 | @description('The default consistency level of the Cosmos DB account.') 20 | param defaultConsistencyLevel string = 'Session' 21 | 22 | @minValue(10) 23 | @maxValue(2147483647) 24 | @description('Max stale requests. Required for BoundedStaleness. Valid ranges, Single Region: 10 to 2147483647. Multi Region: 100000 to 2147483647.') 25 | param maxStalenessPrefix int = 100000 26 | 27 | @minValue(5) 28 | @maxValue(86400) 29 | @description('Max lag time (minutes). Required for BoundedStaleness. Valid ranges, Single Region: 5 to 84600. Multi Region: 300 to 86400.') 30 | param maxIntervalInSeconds int = 300 31 | 32 | @allowed([ 33 | true 34 | false 35 | ]) 36 | @description('Enable system managed failover for regions') 37 | param systemManagedFailover bool = true 38 | 39 | @description('The name for the database') 40 | param databaseName string = 'ai-usage-db' 41 | 42 | @description('The name for the container') 43 | param containerName string = 'ai-usage-container' 44 | 45 | @description('The name for the container') 46 | param pricingContainerName string = 'model-pricing' 47 | 48 | @description('The name for the container') 49 | param streamingExportConfigContainerName string = 'streaming-export-config' 50 | 51 | @minValue(400) 52 | @maxValue(1000000) 53 | @description('The throughput for the container') 54 | param throughput int = 400 55 | 56 | var consistencyPolicy = { 57 | Eventual: { 58 | defaultConsistencyLevel: 'Eventual' 59 | } 60 | ConsistentPrefix: { 61 | defaultConsistencyLevel: 'ConsistentPrefix' 62 | } 63 | Session: { 64 | defaultConsistencyLevel: 'Session' 65 | } 66 | BoundedStaleness: { 67 | defaultConsistencyLevel: 'BoundedStaleness' 68 | maxStalenessPrefix: maxStalenessPrefix 69 | maxIntervalInSeconds: maxIntervalInSeconds 70 | } 71 | Strong: { 72 | defaultConsistencyLevel: 'Strong' 73 | } 74 | } 75 | var locations = [ 76 | { 77 | locationName: primaryRegion 78 | failoverPriority: 0 79 | isZoneRedundant: false 80 | } 81 | ] 82 | 83 | // Networking 84 | param cosmosPrivateEndpointName string 85 | param vNetName string 86 | param privateEndpointSubnetName string 87 | param cosmosDnsZoneName string 88 | param publicAccess string = 'Disabled' 89 | 90 | // Use existing network/dns zone 91 | param dnsZoneRG string 92 | param dnsSubscriptionId string 93 | 94 | param vNetRG string 95 | resource vnet 'Microsoft.Network/virtualNetworks@2022-01-01' existing = { 96 | name: vNetName 97 | scope: resourceGroup(vNetRG) 98 | } 99 | 100 | // Get existing subnet 101 | resource subnet 'Microsoft.Network/virtualNetworks/subnets@2022-01-01' existing = { 102 | name: privateEndpointSubnetName 103 | parent: vnet 104 | } 105 | 106 | resource account 'Microsoft.DocumentDB/databaseAccounts@2024-02-15-preview' = { 107 | name: toLower(accountName) 108 | location: location 109 | tags: union(tags, { 'azd-service-name': accountName }) 110 | kind: 'GlobalDocumentDB' 111 | properties: { 112 | consistencyPolicy: consistencyPolicy[defaultConsistencyLevel] 113 | locations: locations 114 | databaseAccountOfferType: 'Standard' 115 | enableAutomaticFailover: systemManagedFailover 116 | disableKeyBasedMetadataWriteAccess: true 117 | publicNetworkAccess: publicAccess 118 | } 119 | } 120 | 121 | resource database 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases@2024-02-15-preview' = { 122 | parent: account 123 | name: databaseName 124 | properties: { 125 | resource: { 126 | id: databaseName 127 | } 128 | } 129 | } 130 | 131 | resource container 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2024-02-15-preview' = { 132 | parent: database 133 | name: containerName 134 | properties: { 135 | resource: { 136 | id: containerName 137 | partitionKey: { 138 | paths: [ 139 | '/productName' 140 | ] 141 | kind: 'Hash' 142 | } 143 | indexingPolicy: { 144 | indexingMode: 'consistent' 145 | automatic: true 146 | } 147 | } 148 | options: { 149 | throughput: throughput 150 | } 151 | } 152 | } 153 | 154 | resource modelPricingContainer 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2024-02-15-preview' = { 155 | parent: database 156 | name: pricingContainerName 157 | properties: { 158 | resource: { 159 | id: pricingContainerName 160 | partitionKey: { 161 | paths: [ 162 | '/model' 163 | ] 164 | kind: 'Hash' 165 | } 166 | indexingPolicy: { 167 | indexingMode: 'consistent' 168 | automatic: true 169 | } 170 | } 171 | options: { 172 | throughput: throughput 173 | } 174 | } 175 | } 176 | 177 | resource streamingExportConfigContainer 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2024-02-15-preview' = { 178 | parent: database 179 | name: streamingExportConfigContainerName 180 | properties: { 181 | resource: { 182 | id: streamingExportConfigContainerName 183 | partitionKey: { 184 | paths: [ 185 | '/type' 186 | ] 187 | kind: 'Hash' 188 | } 189 | indexingPolicy: { 190 | indexingMode: 'consistent' 191 | automatic: true 192 | } 193 | } 194 | options: { 195 | throughput: throughput 196 | } 197 | } 198 | } 199 | 200 | module privateEndpoint '../networking/private-endpoint.bicep' = { 201 | name: '${accountName}-privateEndpoint' 202 | params: { 203 | groupIds: [ 204 | 'sql' 205 | ] 206 | dnsZoneName: cosmosDnsZoneName 207 | name: cosmosPrivateEndpointName 208 | privateLinkServiceId: account.id 209 | location: location 210 | dnsZoneRG: dnsZoneRG 211 | privateEndpointSubnetId: subnet.id 212 | dnsSubId: dnsSubscriptionId 213 | } 214 | } 215 | 216 | output location string = location 217 | output cosmosDbAccountName string = account.name 218 | output cosmosDbDatabaseName string = database.name 219 | output cosmosDbContainerName string = container.name 220 | output cosmosDbPricingContainerName string = modelPricingContainer.name 221 | output cosmosDbStreamingExportConfigContainerName string = streamingExportConfigContainer.name 222 | output resourceId string = database.id 223 | output cosmosDbEndpoint string = 'https://${account.name}.documents.azure.com:443/' 224 | -------------------------------------------------------------------------------- /infra/modules/event-hub/event-hub.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param sku string = 'Standard' 4 | param capacity int = 1 5 | param tags object = {} 6 | param eventHubName string = 'ai-usage' 7 | 8 | param eventHubPrivateEndpointName string 9 | param vNetName string 10 | param privateEndpointSubnetName string 11 | param eventHubDnsZoneName string 12 | 13 | param publicNetworkAccess string = 'Disabled' 14 | 15 | // Use existing network/dns zone 16 | param dnsZoneRG string 17 | param dnsSubscriptionId string 18 | param vNetRG string 19 | 20 | resource vnet 'Microsoft.Network/virtualNetworks@2022-01-01' existing = { 21 | name: vNetName 22 | scope: resourceGroup(vNetRG) 23 | } 24 | 25 | // Get existing subnet 26 | resource subnet 'Microsoft.Network/virtualNetworks/subnets@2022-01-01' existing = { 27 | name: privateEndpointSubnetName 28 | parent: vnet 29 | } 30 | 31 | resource eventHubNamespace 'Microsoft.EventHub/namespaces@2024-05-01-preview' = { 32 | name: name 33 | location: location 34 | tags: union(tags, { 'azd-service-name': name }) 35 | sku: { 36 | name: sku 37 | tier: sku 38 | capacity: capacity 39 | } 40 | properties: { 41 | isAutoInflateEnabled: false 42 | maximumThroughputUnits: 0 43 | publicNetworkAccess: publicNetworkAccess 44 | } 45 | } 46 | 47 | resource eventHub 'Microsoft.EventHub/namespaces/eventhubs@2024-05-01-preview' = { 48 | name: 'ai-usage' 49 | parent: eventHubNamespace 50 | properties: { 51 | messageRetentionInDays: 7 52 | partitionCount: 2 53 | status: 'Active' 54 | } 55 | } 56 | 57 | module privateEndpoint '../networking/private-endpoint.bicep' = { 58 | name: '${eventHubName}-privateEndpoint' 59 | params: { 60 | groupIds: [ 61 | 'namespace' 62 | ] 63 | dnsZoneName: eventHubDnsZoneName 64 | name: eventHubPrivateEndpointName 65 | privateLinkServiceId: eventHubNamespace.id 66 | location: location 67 | dnsZoneRG: dnsZoneRG 68 | privateEndpointSubnetId: subnet.id 69 | dnsSubId: dnsSubscriptionId 70 | } 71 | } 72 | 73 | output eventHubNamespaceName string = eventHubNamespace.name 74 | output eventHubName string = eventHub.name 75 | output eventHubEndpoint string = eventHubNamespace.properties.serviceBusEndpoint 76 | -------------------------------------------------------------------------------- /infra/modules/functionapp/functionapp.bicep: -------------------------------------------------------------------------------- 1 | 2 | param functionAppName string 3 | param tags object = {} 4 | param azdserviceName string 5 | param storageAccountName string 6 | param functionContentShareName string 7 | 8 | param functionAppIdentityName string 9 | 10 | param applicationInsightsName string 11 | param eventHubNamespaceName string 12 | param eventHubName string 13 | //param vnetName string 14 | param functionAppSubnetId string 15 | 16 | param cosmosDBEndpoint string 17 | param cosmosDatabaseName string 18 | param cosmosContainerName string 19 | 20 | param location string = resourceGroup().location 21 | 22 | var functionPlanOS = 'Linux' 23 | var functionRuntime = 'dotnet-isolated' 24 | var dotnetFrameworkVersion = '8.0' 25 | var linuxFxVersion = 'DOTNET-ISOLATED|8.0' 26 | var isReserved = functionPlanOS == 'Linux' 27 | 28 | resource functionAppmanagedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-31' existing = { 29 | name: functionAppIdentityName 30 | } 31 | 32 | resource storageAccount 'Microsoft.Storage/storageAccounts@2023-05-01' existing = { 33 | name: storageAccountName 34 | } 35 | 36 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = { 37 | name: applicationInsightsName 38 | } 39 | 40 | 41 | var storageAccountConnectionString = 'DefaultEndpointsProtocol=https;AccountName=${storageAccount.name};AccountKey=${storageAccount.listKeys().keys[0].value};EndpointSuffix=core.windows.net' 42 | 43 | resource hostingPlan 'Microsoft.Web/serverfarms@2023-12-01' = { 44 | name: 'hosting-plan-${functionAppName}' 45 | tags: union(tags, { 'azd-service-name': 'hosting-plan-${functionAppName}' }) 46 | location: location 47 | sku: { 48 | name: 'EP1' 49 | tier: 'ElasticPremium' 50 | family: 'EP' 51 | } 52 | kind: 'elastic' 53 | properties: { 54 | maximumElasticWorkerCount: 10 55 | reserved: isReserved 56 | } 57 | } 58 | 59 | resource functionApp 'Microsoft.Web/sites@2023-12-01' = { 60 | name: functionAppName 61 | location: location 62 | kind: 'functionapp,linux' 63 | tags: union(tags, { 'azd-service-name': azdserviceName }) 64 | identity: { 65 | type: 'UserAssigned' 66 | userAssignedIdentities: { 67 | '${functionAppmanagedIdentity.id}': {} 68 | } 69 | } 70 | properties: { 71 | enabled: true 72 | serverFarmId: hostingPlan.id 73 | reserved: isReserved 74 | virtualNetworkSubnetId: functionAppSubnetId 75 | } 76 | } 77 | 78 | 79 | // Add the function to the subnet 80 | resource networkConfig 'Microsoft.Web/sites/networkConfig@2023-12-01' = { 81 | parent: functionApp 82 | name: 'virtualNetwork' 83 | properties: { 84 | subnetResourceId: functionAppSubnetId 85 | swiftSupported: true 86 | } 87 | } 88 | 89 | //create functionapp siteconfig 90 | resource functionAppSiteConfig 'Microsoft.Web/sites/config@2023-12-01' = { 91 | parent: functionApp 92 | name: 'web' 93 | properties: { 94 | linuxFxVersion: linuxFxVersion 95 | detailedErrorLoggingEnabled: true 96 | vnetRouteAllEnabled: true 97 | ftpsState: 'FtpsOnly' 98 | minTlsVersion: '1.2' 99 | scmMinTlsVersion: '1.2' 100 | minimumElasticInstanceCount: 1 101 | //vnetName: vnetName 102 | publicNetworkAccess: 'Enabled' 103 | functionsRuntimeScaleMonitoringEnabled: true 104 | netFrameworkVersion: dotnetFrameworkVersion 105 | } 106 | dependsOn: [ 107 | applicationInsights 108 | ] 109 | } 110 | 111 | //Create functionapp appsettings 112 | 113 | resource functionAppSettings 'Microsoft.Web/sites/config@2023-12-01' = { 114 | parent: functionApp 115 | name: 'appsettings' 116 | properties: { 117 | APPLICATIONINSIGHTS_CONNECTION_STRING: applicationInsights.properties.ConnectionString 118 | AzureWebJobsStorage: storageAccountConnectionString 119 | //AzureWebJobsStorage__accountname: storageAccountName 120 | FUNCTIONS_EXTENSION_VERSION: '~4' 121 | FUNCTIONS_WORKER_RUNTIME: functionRuntime 122 | WEBSITE_CONTENTAZUREFILECONNECTIONSTRING: storageAccountConnectionString 123 | WEBSITE_CONTENTSHARE: functionContentShareName 124 | WEBSITE_VNET_ROUTE_ALL: '1' 125 | WEBSITE_CONTENTOVERVNET: '1' 126 | //EventHub Input Trigger Settings With Managed Identity 127 | //https://learn.microsoft.com/en-us/azure/azure-functions/functions-reference?tabs=eventhubs&pivots=programming-language-csharp#common-properties-for-identity-based-connections 128 | EventHubConnection__clientId: functionAppmanagedIdentity.properties.clientId 129 | EventHubConnection__credential: 'managedidentity' 130 | EventHubConnection__fullyQualifiedNamespace: '${eventHubNamespaceName}.servicebus.windows.net' 131 | EventHubName: eventHubName 132 | 133 | //CosmosDB 134 | CosmosAccountEndpoint: cosmosDBEndpoint 135 | CosmosDatabaseName: cosmosDatabaseName 136 | CosmosContainerName: cosmosContainerName 137 | CosmosManagedIdentityId: functionAppmanagedIdentity.properties.clientId 138 | } 139 | dependsOn: [ 140 | storageAccount 141 | ] 142 | } 143 | -------------------------------------------------------------------------------- /infra/modules/functionapp/storageaccount.bicep: -------------------------------------------------------------------------------- 1 | param storageAccountName string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | param functionAppManagedIdentityName string 6 | 7 | //Networking 8 | param vNetName string 9 | param privateEndpointSubnetName string 10 | param storageBlobDnsZoneName string 11 | param storageBlobPrivateEndpointName string 12 | param storageFileDnsZoneName string 13 | param storageFilePrivateEndpointName string 14 | param storageTableDnsZoneName string 15 | param storageTablePrivateEndpointName string 16 | param storageQueueDnsZoneName string 17 | param storageQueuePrivateEndpointName string 18 | // https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#storage-blob-data-owner 19 | var storageBlobDataOwnerRoleId = subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b') 20 | 21 | // Use existing network/dns zone 22 | param dnsZoneRG string 23 | param dnsSubscriptionId string 24 | param vNetRG string 25 | 26 | param provisionFunctionShare bool = true 27 | param provisionLogicShare bool = true 28 | 29 | param functionContentShareName string 30 | param logicContentShareName string 31 | 32 | resource vnet 'Microsoft.Network/virtualNetworks@2022-01-01' existing = { 33 | name: vNetName 34 | scope: resourceGroup(vNetRG) 35 | } 36 | 37 | // Get existing subnet 38 | resource subnet 'Microsoft.Network/virtualNetworks/subnets@2022-01-01' existing = { 39 | name: privateEndpointSubnetName 40 | parent: vnet 41 | } 42 | 43 | resource functionAppmanagedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' existing = { 44 | name: functionAppManagedIdentityName 45 | } 46 | 47 | @description('Storage Account type') 48 | @allowed([ 49 | 'Standard_LRS' 50 | 'Standard_GRS' 51 | 'Standard_RAGRS' 52 | ]) 53 | param storageAccountType string = 'Standard_LRS' 54 | 55 | 56 | resource storageAccount 'Microsoft.Storage/storageAccounts@2022-05-01' = { 57 | name: storageAccountName 58 | location: location 59 | tags: union(tags, { 'azd-service-name': storageAccountName }) 60 | sku: { 61 | name: storageAccountType 62 | } 63 | kind: 'StorageV2' 64 | properties: { 65 | supportsHttpsTrafficOnly: true 66 | publicNetworkAccess: 'Disabled' 67 | allowBlobPublicAccess: false 68 | accessTier: 'Hot' 69 | networkAcls: { 70 | bypass: 'None' 71 | defaultAction: 'Deny' 72 | } 73 | } 74 | } 75 | 76 | resource shareFunctionApp 'Microsoft.Storage/storageAccounts/fileServices/shares@2022-05-01' = if (provisionFunctionShare) { 77 | name: '${storageAccountName}/default/${functionContentShareName}' 78 | dependsOn: [ 79 | storageAccount 80 | ] 81 | } 82 | 83 | resource shareLogicApp 'Microsoft.Storage/storageAccounts/fileServices/shares@2022-05-01' = if (provisionLogicShare) { 84 | name: '${storageAccountName}/default/${logicContentShareName}' 85 | dependsOn: [ 86 | storageAccount 87 | ] 88 | } 89 | 90 | resource storageAccountFunctionAppRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { 91 | name: guid(storageAccount.id, functionAppmanagedIdentity.name, storageBlobDataOwnerRoleId) 92 | properties: { 93 | principalId: functionAppmanagedIdentity.properties.principalId 94 | roleDefinitionId: storageBlobDataOwnerRoleId 95 | } 96 | scope: storageAccount 97 | } 98 | 99 | module privateEndpointBlob '../networking/private-endpoint.bicep' = { 100 | name: '${storageAccountName}-blob-privateEndpoint' 101 | params: { 102 | groupIds: [ 103 | 'blob' 104 | ] 105 | dnsZoneName: storageBlobDnsZoneName 106 | name: storageBlobPrivateEndpointName 107 | privateLinkServiceId: storageAccount.id 108 | location: location 109 | dnsZoneRG: dnsZoneRG 110 | privateEndpointSubnetId: subnet.id 111 | dnsSubId: dnsSubscriptionId 112 | } 113 | } 114 | 115 | module privateEndpointFile '../networking/private-endpoint.bicep' = { 116 | name: '${storageAccountName}-file-privateEndpoint' 117 | params: { 118 | groupIds: [ 119 | 'file' 120 | ] 121 | dnsZoneName: storageFileDnsZoneName 122 | name: storageFilePrivateEndpointName 123 | privateLinkServiceId: storageAccount.id 124 | location: location 125 | dnsZoneRG: dnsZoneRG 126 | privateEndpointSubnetId: subnet.id 127 | dnsSubId: dnsSubscriptionId 128 | } 129 | } 130 | 131 | module privateEndpointTable '../networking/private-endpoint.bicep' = { 132 | name: '${storageAccountName}-table-privateEndpoint' 133 | params: { 134 | groupIds: [ 135 | 'table' 136 | ] 137 | dnsZoneName: storageTableDnsZoneName 138 | name: storageTablePrivateEndpointName 139 | privateLinkServiceId: storageAccount.id 140 | location: location 141 | dnsZoneRG: dnsZoneRG 142 | privateEndpointSubnetId: subnet.id 143 | dnsSubId: dnsSubscriptionId 144 | } 145 | } 146 | 147 | module privateEndpointQueue '../networking/private-endpoint.bicep' = { 148 | name: '${storageAccountName}-queue-privateEndpoint' 149 | params: { 150 | groupIds: [ 151 | 'queue' 152 | ] 153 | dnsZoneName: storageQueueDnsZoneName 154 | name: storageQueuePrivateEndpointName 155 | privateLinkServiceId: storageAccount.id 156 | location: location 157 | dnsZoneRG: dnsZoneRG 158 | privateEndpointSubnetId: subnet.id 159 | dnsSubId: dnsSubscriptionId 160 | } 161 | } 162 | 163 | 164 | output storageAccountName string = storageAccount.name 165 | -------------------------------------------------------------------------------- /infra/modules/logicapp/api-connection-access.bicep: -------------------------------------------------------------------------------- 1 | param connectionName string 2 | param accessPolicyName string 3 | param identityPrincipalId string 4 | param location string = resourceGroup().location 5 | 6 | resource logicAppConnectionExisting 'Microsoft.Web/connections@2016-06-01' existing = { 7 | name: connectionName 8 | resource accessPolicy 'accessPolicies@2016-06-01' = { 9 | name: accessPolicyName 10 | location: location 11 | properties: { 12 | principal: { 13 | type: 'ActiveDirectory' 14 | identity: { 15 | tenantId: subscription().tenantId 16 | objectId: identityPrincipalId 17 | } 18 | } 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /infra/modules/logicapp/api-connection.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "connection_name": { 6 | "type": "string" 7 | }, 8 | "display_name": { 9 | "type": "string" 10 | }, 11 | "location": { 12 | "type": "string" 13 | } 14 | }, 15 | "variables": {}, 16 | "resources": [ 17 | { 18 | "type": "Microsoft.Web/connections", 19 | "name": "[parameters('connection_name')]", 20 | "apiVersion": "[providers('Microsoft.Web','connections').apiVersions[0]]", 21 | "location": "[parameters('location')]", 22 | "kind": "V2", 23 | "properties": { 24 | "alternativeParameterValues": {}, 25 | "api": { 26 | "id": "[subscriptionResourceId('Microsoft.Web/locations/managedApis', parameters('location'), parameters('connection_name'))]" 27 | }, 28 | "authenticatedUser": {}, 29 | "connectionState": "Enabled", 30 | "customParameterValues": {}, 31 | "displayName": "[parameters('display_name')]", 32 | "parameterValueSet": { 33 | "name": "managedIdentityAuth", 34 | "values": {} 35 | } 36 | } 37 | } 38 | ], 39 | "outputs": { 40 | "connectRuntimeUrl": { 41 | "type": "string", 42 | "value": "[reference(resourceId('Microsoft.Web/connections', parameters('connection_name')), '2016-06-01').connectionRuntimeUrl]" 43 | }, 44 | "resourceId": { 45 | "type": "string", 46 | "value": "[resourceId('Microsoft.Web/connections', parameters('connection_name'))]" 47 | }, 48 | "apiId": { 49 | "type": "string", 50 | "value": "[subscriptionResourceId('Microsoft.Web/locations/managedApis', parameters('location'), parameters('connection_name'))]" 51 | } 52 | } 53 | } -------------------------------------------------------------------------------- /infra/modules/monitor/applicationinsights.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param dashboardName string 3 | param location string = resourceGroup().location 4 | param tags object = {} 5 | 6 | param logAnalyticsWorkspaceId string 7 | 8 | param createDashboard bool 9 | 10 | // Networking 11 | param privateLinkScopeName string 12 | 13 | resource privateLinkScope 'microsoft.insights/privateLinkScopes@2021-07-01-preview' existing = if (privateLinkScopeName != '') { 14 | name: privateLinkScopeName 15 | } 16 | 17 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = { 18 | name: name 19 | location: location 20 | tags: union(tags, { 'azd-service-name': name }) 21 | kind: 'web' 22 | properties: { 23 | Application_Type: 'web' 24 | WorkspaceResourceId: logAnalyticsWorkspaceId 25 | publicNetworkAccessForIngestion: privateLinkScopeName != '' ? 'Disabled' : 'Enabled' 26 | publicNetworkAccessForQuery: privateLinkScopeName != '' ? 'Enabled' : 'Enabled' 27 | CustomMetricsOptedInType: 'WithDimensions' 28 | } 29 | } 30 | 31 | resource appInsightsScopedResource 'Microsoft.Insights/privateLinkScopes/scopedResources@2021-07-01-preview' = if (privateLinkScopeName != '') { 32 | parent: privateLinkScope 33 | name: '${applicationInsights.name}-connection' 34 | properties: { 35 | linkedResourceId: applicationInsights.id 36 | } 37 | } 38 | 39 | module applicationInsightsDashboard 'applicationinsights-dashboard.bicep' = if(createDashboard) { 40 | name: 'application-insights-dashboard' 41 | params: { 42 | name: dashboardName 43 | location: location 44 | applicationInsightsName: applicationInsights.name 45 | } 46 | } 47 | 48 | output connectionString string = applicationInsights.properties.ConnectionString 49 | output instrumentationKey string = applicationInsights.properties.InstrumentationKey 50 | output name string = applicationInsights.name 51 | -------------------------------------------------------------------------------- /infra/modules/monitor/loganalytics.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | // Networking 6 | param privateLinkScopeName string 7 | 8 | resource privateLinkScope 'microsoft.insights/privateLinkScopes@2021-07-01-preview' existing = if (privateLinkScopeName != '') { 9 | name: privateLinkScopeName 10 | } 11 | 12 | resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' = { 13 | name: name 14 | location: location 15 | tags: union(tags, { 'azd-service-name': name }) 16 | properties: any({ 17 | retentionInDays: 30 18 | features: { 19 | searchVersion: 1 20 | } 21 | sku: { 22 | name: 'PerGB2018' 23 | } 24 | publicNetworkAccessForIngestion: privateLinkScopeName != '' ? 'Disabled' : 'Enabled' 25 | publicNetworkAccessForQuery: privateLinkScopeName != '' ? 'Enabled' : 'Enabled' 26 | }) 27 | } 28 | 29 | resource logAnalyticsScopedResource 'Microsoft.Insights/privateLinkScopes/scopedResources@2021-07-01-preview' = if (privateLinkScopeName != '') { 30 | parent: privateLinkScope 31 | name: '${logAnalytics.name}-connection' 32 | properties: { 33 | linkedResourceId: logAnalytics.id 34 | } 35 | } 36 | 37 | output id string = logAnalytics.id 38 | output name string = logAnalytics.name 39 | -------------------------------------------------------------------------------- /infra/modules/monitor/monitoring.bicep: -------------------------------------------------------------------------------- 1 | param logAnalyticsName string 2 | param apimApplicationInsightsName string 3 | param apimApplicationInsightsDashboardName string 4 | param functionApplicationInsightsName string 5 | param functionApplicationInsightsDashboardName string 6 | param location string = resourceGroup().location 7 | param tags object = {} 8 | 9 | param createDashboard bool 10 | 11 | // Networking 12 | param usePrivateLinkScope bool = true 13 | var privateLinkScopeName = 'ampls-monitoring' 14 | param vNetName string 15 | param privateEndpointSubnetName string 16 | param applicationInsightsDnsZoneName string 17 | 18 | // Use existing network/dns zone 19 | param dnsZoneRG string 20 | param dnsSubscriptionId string 21 | param vNetRG string 22 | resource vnet 'Microsoft.Network/virtualNetworks@2022-01-01' existing = { 23 | name: vNetName 24 | scope: resourceGroup(vNetRG) 25 | } 26 | 27 | // Get existing subnet 28 | resource subnet 'Microsoft.Network/virtualNetworks/subnets@2022-01-01' existing = { 29 | name: privateEndpointSubnetName 30 | parent: vnet 31 | } 32 | 33 | resource privateLinkScope 'microsoft.insights/privateLinkScopes@2021-07-01-preview' = if (usePrivateLinkScope) { 34 | name: privateLinkScopeName 35 | location: 'global' 36 | properties: { 37 | accessModeSettings: { 38 | ingestionAccessMode: 'Open' 39 | queryAccessMode: 'Open' 40 | } 41 | } 42 | } 43 | 44 | module logAnalytics 'loganalytics.bicep' = { 45 | name: 'log-analytics' 46 | params: { 47 | name: logAnalyticsName 48 | location: location 49 | tags: tags 50 | privateLinkScopeName: usePrivateLinkScope ? privateLinkScopeName : '' 51 | } 52 | } 53 | 54 | // APIM App Insights 55 | module apimApplicationInsights 'applicationinsights.bicep' = { 56 | name: 'application-insights' 57 | params: { 58 | name: apimApplicationInsightsName 59 | location: location 60 | tags: tags 61 | dashboardName: apimApplicationInsightsDashboardName 62 | logAnalyticsWorkspaceId: logAnalytics.outputs.id 63 | privateLinkScopeName: usePrivateLinkScope ? privateLinkScopeName : '' 64 | createDashboard: createDashboard 65 | } 66 | } 67 | 68 | // Function App Insights 69 | module functionApplicationInsights 'applicationinsights.bicep' = { 70 | name: 'func-application-insights' 71 | params: { 72 | name: functionApplicationInsightsName 73 | location: location 74 | tags: tags 75 | dashboardName: functionApplicationInsightsDashboardName 76 | logAnalyticsWorkspaceId: logAnalytics.outputs.id 77 | privateLinkScopeName: usePrivateLinkScope ? privateLinkScopeName : '' 78 | createDashboard: createDashboard 79 | } 80 | } 81 | 82 | module privateEndpoint '../networking/private-endpoint.bicep' = if (usePrivateLinkScope) { 83 | name: '${privateLinkScopeName}-privateEndpoint' 84 | params: { 85 | groupIds: [ 86 | 'azuremonitor' 87 | ] 88 | dnsZoneName: applicationInsightsDnsZoneName 89 | name: '${privateLinkScopeName}-pe' 90 | privateLinkServiceId: privateLinkScope.id 91 | location: location 92 | dnsZoneRG: dnsZoneRG 93 | privateEndpointSubnetId: subnet.id 94 | dnsSubId: dnsSubscriptionId 95 | } 96 | dependsOn: [ 97 | logAnalytics 98 | apimApplicationInsights 99 | functionApplicationInsights 100 | ] 101 | } 102 | 103 | output applicationInsightsName string = apimApplicationInsights.outputs.name 104 | output applicationInsightsConnectionString string = apimApplicationInsights.outputs.connectionString 105 | output applicationInsightsInstrumentationKey string = apimApplicationInsights.outputs.instrumentationKey 106 | output funcApplicationInsightsName string = functionApplicationInsights.outputs.name 107 | output funcApplicationInsightsConnectionString string = functionApplicationInsights.outputs.connectionString 108 | output funcApplicationInsightsInstrumentationKey string = functionApplicationInsights.outputs.instrumentationKey 109 | output logAnalyticsWorkspaceId string = logAnalytics.outputs.id 110 | output logAnalyticsWorkspaceName string = logAnalytics.outputs.name 111 | -------------------------------------------------------------------------------- /infra/modules/networking/dns.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param tags object = {} 3 | 4 | resource privateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = { 5 | name: name 6 | location: 'global' 7 | tags: union(tags, { 'azd-service-name': name }) 8 | } 9 | 10 | output privateDnsZoneName string = privateDnsZone.name 11 | -------------------------------------------------------------------------------- /infra/modules/networking/private-endpoint.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param privateLinkServiceId string 3 | param groupIds array 4 | param dnsZoneName string 5 | param location string 6 | 7 | param privateEndpointSubnetId string 8 | param dnsZoneRG string 9 | param dnsSubId string 10 | 11 | 12 | resource privateEndpointDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' existing = { 13 | name: dnsZoneName 14 | scope: resourceGroup(dnsSubId ,dnsZoneRG) 15 | } 16 | 17 | resource privateEndpoint 'Microsoft.Network/privateEndpoints@2022-09-01' = { 18 | name: name 19 | location: location 20 | dependsOn: [ 21 | privateEndpointDnsZone 22 | ] 23 | properties: { 24 | subnet: { 25 | id: privateEndpointSubnetId 26 | } 27 | privateLinkServiceConnections: [ 28 | { 29 | name: name 30 | properties: { 31 | privateLinkServiceId: privateLinkServiceId 32 | groupIds: groupIds 33 | } 34 | } 35 | ] 36 | } 37 | } 38 | 39 | resource privateEndpointDnsGroup 'Microsoft.Network/privateEndpoints/privateDnsZoneGroups@2022-09-01' = { 40 | parent: privateEndpoint 41 | name: 'privateDnsZoneGroup' 42 | properties: { 43 | privateDnsZoneConfigs: [ 44 | { 45 | name: 'default' 46 | properties: { 47 | privateDnsZoneId: privateEndpointDnsZone.id 48 | } 49 | } 50 | ] 51 | } 52 | } 53 | 54 | output privateEndpointName string = privateEndpoint.name 55 | -------------------------------------------------------------------------------- /infra/modules/networking/subnet.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param properties object 3 | param vnetName string 4 | param vnetRG string 5 | 6 | resource vnet 'Microsoft.Network/virtualNetworks@2022-01-01' existing = { 7 | name: vnetName 8 | scope: resourceGroup(vnetRG) 9 | } 10 | 11 | 12 | resource subnet 'Microsoft.Network/virtualNetworks/subnets@2022-01-01' = { 13 | name: '${vnet.name}/${name}' 14 | properties: properties 15 | } 16 | -------------------------------------------------------------------------------- /infra/modules/networking/vnet-existing.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param vnetRG string 3 | param apimSubnetName string 4 | param privateEndpointSubnetName string 5 | param functionAppSubnetName string 6 | 7 | resource virtualNetwork 'Microsoft.Network/virtualNetworks@2019-11-01' existing = { 8 | name: name 9 | scope: resourceGroup(vnetRG) 10 | } 11 | 12 | resource apimSubnet 'Microsoft.Network/virtualNetworks/subnets@2023-11-01' existing = { 13 | name: apimSubnetName 14 | parent: virtualNetwork 15 | } 16 | 17 | resource privateEndpointSubnet 'Microsoft.Network/virtualNetworks/subnets@2023-11-01' existing = { 18 | name: privateEndpointSubnetName 19 | parent: virtualNetwork 20 | } 21 | 22 | resource functionAppSubnet 'Microsoft.Network/virtualNetworks/subnets@2023-11-01' existing = { 23 | name: functionAppSubnetName 24 | parent: virtualNetwork 25 | } 26 | 27 | output virtualNetworkId string = virtualNetwork.id 28 | output vnetName string = virtualNetwork.name 29 | output apimSubnetName string = apimSubnet.name 30 | output apimSubnetId string = '${virtualNetwork.id}/subnets/${apimSubnetName}' 31 | output privateEndpointSubnetName string = privateEndpointSubnet.name 32 | output privateEndpointSubnetId string = '${virtualNetwork.id}/subnets/${privateEndpointSubnetName}' 33 | output functionAppSubnetName string = functionAppSubnet.name 34 | output functionAppSubnetId string = '${virtualNetwork.id}/subnets/${functionAppSubnetName}' 35 | output location string = virtualNetwork.location 36 | output vnetRG string = vnetRG 37 | -------------------------------------------------------------------------------- /infra/modules/security/managed-identity-apim.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | 5 | var cognitiveServicesUserRoleDefinitionId = resourceId('Microsoft.Authorization/roleDefinitions', '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd') 6 | var eventHubsDataSenderRoleDefinitionId = resourceId('Microsoft.Authorization/roleDefinitions', '2b629674-e913-4c01-ae53-ef4638d8f975') 7 | 8 | // Getting definitions for 'Search Index Data Reader' and 'Search Index Data Contributor' 9 | // var searchIndexDataReaderRoleDefinitionId = resourceId('Microsoft.Authorization/roleDefinitions', '1407120a-92aa-4202-b7e9-c0e197c71c8f') 10 | // var searchIndexDataContributorRoleDefinitionId = resourceId('Microsoft.Authorization/roleDefinitions', '8ebe5a00-799e-43f5-93ac-243d3dce84a7') 11 | 12 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' = { 13 | name: name 14 | location: location 15 | tags: union(tags, { 'azd-service-name': name }) 16 | } 17 | 18 | // Assign the Cognitive Services User role to the user-defined managed identity used by workloads 19 | resource cognitiveServicesUserRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { 20 | name: guid(managedIdentity.id, cognitiveServicesUserRoleDefinitionId) 21 | scope: resourceGroup() 22 | properties: { 23 | roleDefinitionId: cognitiveServicesUserRoleDefinitionId 24 | principalId: managedIdentity.properties.principalId 25 | principalType: 'ServicePrincipal' 26 | } 27 | } 28 | 29 | // Assign to Azure Event Hubs Data Sender role to the user-defined managed identity used by workloads 30 | resource eventHubsDataSenderRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { 31 | name: guid(managedIdentity.id, eventHubsDataSenderRoleDefinitionId) 32 | scope: resourceGroup() 33 | properties: { 34 | roleDefinitionId: eventHubsDataSenderRoleDefinitionId 35 | principalId: managedIdentity.properties.principalId 36 | principalType: 'ServicePrincipal' 37 | } 38 | } 39 | 40 | 41 | output managedIdentityName string = managedIdentity.name 42 | -------------------------------------------------------------------------------- /infra/modules/security/managed-identity-stream-analytics.bicep: -------------------------------------------------------------------------------- 1 | param name string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | param cosmosDbAccountName string 5 | 6 | var docDbAccNativeContributorRoleDefinitionId = '00000000-0000-0000-0000-000000000002' 7 | var eventHubsDataOwnerRoleDefinitionId = resourceId('Microsoft.Authorization/roleDefinitions', 'f526a384-b230-433a-b45c-95f59c4a2dec') 8 | 9 | resource cosmosDbAccount 'Microsoft.DocumentDB/databaseAccounts@2024-02-15-preview' existing = { 10 | name: cosmosDbAccountName 11 | } 12 | 13 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' = { 14 | name: name 15 | location: location 16 | tags: union(tags, { 'azd-service-name': name }) 17 | } 18 | 19 | resource sqlRoleAssignment 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2023-04-15' = { 20 | name: guid(docDbAccNativeContributorRoleDefinitionId, managedIdentity.id, cosmosDbAccount.id) 21 | parent: cosmosDbAccount 22 | properties:{ 23 | principalId: managedIdentity.properties.principalId 24 | roleDefinitionId: '/${cosmosDbAccount.id}/sqlRoleDefinitions/${docDbAccNativeContributorRoleDefinitionId}' 25 | scope: cosmosDbAccount.id 26 | } 27 | } 28 | 29 | // Assign to Azure Event Hubs Data Owner role to the user-defined managed identity used by workloads 30 | resource eventHubsDataOwnerRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { 31 | name: guid(managedIdentity.id, eventHubsDataOwnerRoleDefinitionId) 32 | scope: resourceGroup() 33 | properties: { 34 | roleDefinitionId: eventHubsDataOwnerRoleDefinitionId 35 | principalId: managedIdentity.properties.principalId 36 | principalType: 'ServicePrincipal' 37 | } 38 | } 39 | 40 | 41 | output managedIdentityName string = managedIdentity.name 42 | -------------------------------------------------------------------------------- /infra/modules/stream-analytics/stream-analytics.bicep: -------------------------------------------------------------------------------- 1 | param jobName string 2 | param location string = resourceGroup().location 3 | param tags object = {} 4 | param eventHubNamespace string 5 | param eventHubName string 6 | param cosmosDbAccountName string 7 | param cosmosDbDatabaseName string 8 | param cosmosDbContainerName string 9 | param managedIdentityName string 10 | 11 | 12 | resource managedIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2018-11-30' existing = { 13 | name: managedIdentityName 14 | } 15 | 16 | resource streamAnalyticsJob 'Microsoft.StreamAnalytics/streamingjobs@2021-10-01-preview' = { 17 | name: jobName 18 | location: location 19 | tags: union(tags, { 'azd-service-name': jobName }) 20 | identity: { 21 | type: 'UserAssigned' 22 | userAssignedIdentities: { 23 | '${managedIdentity.id}': {} 24 | } 25 | } 26 | properties: { 27 | sku: { 28 | name: 'StandardV2' 29 | } 30 | eventsOutOfOrderPolicy: 'Adjust' 31 | outputErrorPolicy: 'Stop' 32 | eventsOutOfOrderMaxDelayInSeconds: 5 33 | compatibilityLevel: '1.2' 34 | inputs: [ 35 | { 36 | name: 'input' 37 | properties: { 38 | type: 'Stream' 39 | serialization: { 40 | type: 'Json' 41 | properties: { 42 | encoding: 'UTF8' 43 | } 44 | } 45 | datasource: { 46 | type: 'Microsoft.EventHub/EventHub' 47 | properties: { 48 | authenticationMode: 'Msi' 49 | eventHubName: eventHubName 50 | serviceBusNamespace: eventHubNamespace 51 | } 52 | } 53 | } 54 | } 55 | ] 56 | outputs: [ 57 | { 58 | name: 'output' 59 | properties: { 60 | datasource: { 61 | type: 'Microsoft.Storage/DocumentDB' 62 | properties: { 63 | accountId: cosmosDbAccountName 64 | database: cosmosDbDatabaseName 65 | collectionNamePattern: cosmosDbContainerName 66 | authenticationMode: 'Msi' 67 | documentId: 'id' 68 | partitionKey: 'productName' 69 | } 70 | } 71 | } 72 | } 73 | ] 74 | transformation: { 75 | name: 'transformation' 76 | properties: { 77 | query: 'SELECT * INTO [output] FROM [input]' 78 | streamingUnits: 3 79 | } 80 | } 81 | } 82 | } 83 | 84 | output asaId string = streamAnalyticsJob.id 85 | -------------------------------------------------------------------------------- /scripts/apim-event-hub-logger.ps1: -------------------------------------------------------------------------------- 1 | # Selecting target subscription 2 | $subcriptionId = "" 3 | Set-AzContext -Subscription $subcriptionId 4 | 5 | # API Management service-specific details 6 | $apimServiceName = "apim-ai-gateway" 7 | $resourceGroupName = "rg-ai-gateway" 8 | 9 | # Event Hub connection string 10 | $eventHubConnectionString = "Endpoint=sb://.servicebus.windows.net/;SharedAccessKeyName=;SharedAccessKey= 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/apim/http/chat.http: -------------------------------------------------------------------------------- 1 | POST https://apim-7pg4fleh6wgj6.azure-api.net/openai/deployments/gpt-4o/chat/completions?api-version=2024-05-01-preview 2 | Content-Type: application/json 3 | api-key: a67da71776184be6842383d95094f275 4 | 5 | { 6 | "stream": true, 7 | "model": "chat", 8 | "messages": [ 9 | { 10 | "role": "system", 11 | "content": "You are a helpful assistant that responds in Markdown. Help me with my math homework!" 12 | }, 13 | { 14 | "role": "user", 15 | "content": "How to calculate the distance between earth and moon?" 16 | } 17 | ] 18 | } -------------------------------------------------------------------------------- /src/apim/oa-fragments-archived/oai-blocked-streaming-in-policy.xml: -------------------------------------------------------------------------------- 1 | <<<<<<< HEAD:src/apim/oa-fragments/oai-blocked-streaming-in-policy.xml 2 | 3 | 4 | (true)[" stream"].Type != JTokenType.Null)"> 5 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 20 | @{return Guid.NewGuid().ToString();} 21 | 22 | Streaming chat completions are not allowed by this organization. 23 | 24 | 25 | 26 | ======= 27 | 28 | 29 | (true)[" stream"].Type != JTokenType.Null)"> 30 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 45 | @{return Guid.NewGuid().ToString();} 46 | 47 | Streaming chat completions are not allowed by this organization. 48 | 49 | 50 | 51 | >>>>>>> azd-deployment:src/apim/oa-fragments-archived/oai-blocked-streaming-in-policy.xml 52 | -------------------------------------------------------------------------------- /src/apim/oa-fragments-archived/oai-clusters-lb-configuration-be-policy.xml: -------------------------------------------------------------------------------- 1 | 2 | 0)" count="3" interval="0"> 3 | 4 | ("isThrottling") && DateTime.Now >= route.Value("retryAfter")) 12 | { 13 | route["isThrottling"] = false; 14 | route["retryAfter"] = DateTime.MinValue; 15 | } 16 | } 17 | 18 | return routes; 19 | }" /> 20 | 21 | 22 | availableRoutesIndexes = new List(); 27 | 28 | for (int i = 0; i < routes.Count; i++) 29 | { 30 | JObject route = (JObject)routes[i]; 31 | 32 | if (!route.Value("isThrottling")) 33 | { 34 | int routePriority = route.Value("priority"); 35 | 36 | if (routePriority < selectedPriority) 37 | { 38 | selectedPriority = routePriority; 39 | availableRoutesIndexes.Clear(); 40 | availableRoutesIndexes.Add(i); 41 | } 42 | else if (routePriority == selectedPriority) 43 | { 44 | availableRoutesIndexes.Add(i); 45 | } 46 | } 47 | } 48 | 49 | if (availableRoutesIndexes.Count == 1) 50 | { 51 | return availableRoutesIndexes[0]; 52 | } 53 | 54 | if (availableRoutesIndexes.Count > 0) 55 | { 56 | //Returns a random route from the list if we have more than one available with the same priority 57 | return availableRoutesIndexes[new Random().Next(0, availableRoutesIndexes.Count)]; 58 | } 59 | else 60 | { 61 | //If there are no available routes, the request will be sent to the first one 62 | return 0; 63 | } 64 | }" /> 65 | 66 | ("url") + "/openai")" /> 67 | ("location"))" /> 68 | ("name"))" /> 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | ("routeIndex"); 79 | int retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("Retry-After", "-1")); 80 | 81 | if (retryAfter == -1) 82 | { 83 | retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("x-ratelimit-reset-requests", "-1")); 84 | } 85 | 86 | if (retryAfter == -1) 87 | { 88 | retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("x-ratelimit-reset-tokens", "10")); 89 | } 90 | 91 | JObject route = (JObject)routes[currentrouteIndex]; 92 | route["isThrottling"] = true; 93 | route["retryAfter"] = DateTime.Now.AddSeconds(retryAfter); 94 | 95 | return routes; 96 | }" /> 97 | 98 | ("isThrottling")) 108 | { 109 | remainingRoutes++; 110 | } 111 | } 112 | 113 | return remainingRoutes; 114 | }" /> 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /src/apim/oa-fragments-archived/oai-clusters-lb-configuration-in-policy.xml: -------------------------------------------------------------------------------- 1 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | o["deploymentName"]?.Value() == deploymentName); 96 | if(cluster == null) 97 | { 98 | //Error has no cluster matched the deployment name 99 | return new JArray() { new JObject() 100 | { 101 | { "name", deploymentName }, 102 | { "location", "NA" }, 103 | { "url", "No routes found for the deployment (" + deploymentName + ") in the region (" + context.Deployment.Region + ")" } 104 | } 105 | }; 106 | } 107 | JArray routes = (JArray)cluster["routes"]; 108 | return routes; 109 | }" /> 110 | 111 | 112 | 113 | 114 | 115 | 116 | @(((JArray)context.Variables["routes"]).ToString()) 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /src/apim/oa-fragments-archived/oai-usage-eventhub-out-policy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | @{ 5 | //Avoid reading response body as it can only be ready once, instead, before calling this fragement, a variable call responseBody will be set in the outbound policy 6 | //var responseBody = context.Response.Body?.As(true); //Avoid this one 7 | var responseBody = (JObject)context.Variables["responseBody"]; //It is set in the outbound policy before calling the fragment 8 | return new JObject( 9 | new JProperty("id", responseBody?["id"]?.ToString() ?? "NA"), 10 | new JProperty("timestamp", DateTime.UtcNow.ToString()), 11 | new JProperty("appId", context.Request.Headers.GetValueOrDefault("Authorization",string.Empty).Split(' ').LastOrDefault()?.AsJwt()?.Claims.GetValueOrDefault("appid", "NA")), 12 | new JProperty("subscriptionId", context.Subscription?.Id?.ToString() ?? "Portal-Admin"), 13 | new JProperty("productName", context.Product?.Name?.ToString() ?? "Portal-Admin"), 14 | new JProperty("targetService", responseBody?["object"]?.ToString() ?? "NA"), 15 | new JProperty("model", responseBody?["model"]?.ToString() ?? "NA"), 16 | new JProperty("gatewayName", context.Deployment?.ServiceName ?? "NA"), 17 | new JProperty("gatewayRegion", context.Deployment?.Region ?? "NA"), 18 | new JProperty("aiGatewayId", context.Deployment?.Gateway?.Id ?? "NA"), 19 | new JProperty("RequestIp", context.Request?.IpAddress ?? "NA"), 20 | new JProperty("operationName", context.Operation?.Name ?? "NA"), 21 | new JProperty("routeUrl", (string)context.Variables.GetValueOrDefault("routeUrl", "NA")), 22 | new JProperty("routeLocation", (string)context.Variables.GetValueOrDefault("routeLocation", "NA")), 23 | new JProperty("routeName", (string)context.Variables.GetValueOrDefault("routeName", "NA")), 24 | new JProperty("deploymentName", (string)context.Variables.GetValueOrDefault("deploymentName", "NA")), 25 | new JProperty("promptTokens", responseBody?["usage"]?["prompt_tokens"]?.ToString() ?? "0"), 26 | new JProperty("responseTokens", responseBody?["usage"]?["completion_tokens"]?.ToString() ?? "0"), 27 | new JProperty("totalTokens", responseBody?["usage"]?["total_tokens"]?.ToString() ?? "0") 28 | ).ToString(); 29 | } 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/apim/oa-weighted-lb/oai-clusters-weighted-lb-configuration-be-policy.xml: -------------------------------------------------------------------------------- 1 | 2 | 0)" count="3" interval="0"> 3 | ()) 12 | { 13 | nextRouteIndex = i; 14 | break; 15 | } 16 | } 17 | return nextRouteIndex; 18 | }" /> 19 | 20 | 21 | 22 | 23 | 24 | ("url") + "/openai")" /> 25 | ("location"))" /> 26 | ("name"))" /> 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/apim/oa-weighted-lb/oai-clusters-weighted-lb-configuration-in-policy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | o["deploymentName"]?.Value() == deploymentName); 91 | if(cluster == null) 92 | { 93 | //Error has no cluster matched the deployment name 94 | } 95 | JArray routes = (JArray)cluster["routes"]; 96 | return routes; 97 | }" /> 98 | 99 | 108 | 109 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /src/apim/oai-api/oai-api-policy-archived.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | @{ 20 | return ((JObject)context.Variables["responseBody"]).ToString(); 21 | } 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/testing/openai-testing.http: -------------------------------------------------------------------------------- 1 | @aiHubGatewayOpenAIBaseUrl = "https://REPLACE.azure-api.net/openai/deployments" 2 | @aiHRSubscriptionKey = "" 3 | @aiRetailSubscriptionKey = "" 4 | 5 | ### gpt-35-turbo/AI-HR 6 | POST {{aiHubGatewayOpenAIBaseUrl}}/chat/chat/completions?api-version=2024-06-01 7 | Content-Type: application/json 8 | api-key: {{aiHRSubscriptionKey}} 9 | 10 | { 11 | "messages": [ 12 | {"role": "system", "content": "You are a helpful assistant."}, 13 | {"role": "user", "content": "Tell me a joke."} 14 | ], 15 | "stream": true 16 | } 17 | 18 | ### gpt-4o/AI-HR 19 | POST {{aiHubGatewayOpenAIBaseUrl}}/gpt-4o/chat/completions?api-version=2024-06-01 20 | Content-Type: application/json 21 | api-key: {{aiHRSubscriptionKey}} 22 | 23 | { 24 | "messages": [ 25 | {"role": "system", "content": "You are a helpful assistant."}, 26 | {"role": "user", "content": "Tell me a joke."} 27 | ], 28 | "stream": false 29 | } 30 | 31 | ### gpt-35-turbo/AI-Retail 32 | POST {{aiHubGatewayOpenAIBaseUrl}}/chat/chat/completions?api-version=2024-06-01 33 | Content-Type: application/json 34 | api-key: {{aiRetailSubscriptionKey}} 35 | 36 | { 37 | "messages": [ 38 | {"role": "system", "content": "You are a helpful assistant."}, 39 | {"role": "user", "content": "Tell me a joke."} 40 | ], 41 | "stream": false 42 | } -------------------------------------------------------------------------------- /src/usage-ingestion-function/.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # Azure Functions localsettings file 5 | local.settings.json 6 | 7 | # User-specific files 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Build results 17 | [Dd]ebug/ 18 | [Dd]ebugPublic/ 19 | [Rr]elease/ 20 | [Rr]eleases/ 21 | x64/ 22 | x86/ 23 | bld/ 24 | [Bb]in/ 25 | [Oo]bj/ 26 | [Ll]og/ 27 | 28 | # Visual Studio 2015 cache/options directory 29 | .vs/ 30 | # Uncomment if you have tasks that create the project's static files in wwwroot 31 | #wwwroot/ 32 | 33 | # MSTest test Results 34 | [Tt]est[Rr]esult*/ 35 | [Bb]uild[Ll]og.* 36 | 37 | # NUNIT 38 | *.VisualState.xml 39 | TestResult.xml 40 | 41 | # Build Results of an ATL Project 42 | [Dd]ebugPS/ 43 | [Rr]eleasePS/ 44 | dlldata.c 45 | 46 | # DNX 47 | project.lock.json 48 | project.fragment.lock.json 49 | artifacts/ 50 | 51 | *_i.c 52 | *_p.c 53 | *_i.h 54 | *.ilk 55 | *.meta 56 | *.obj 57 | *.pch 58 | *.pdb 59 | *.pgc 60 | *.pgd 61 | *.rsp 62 | *.sbr 63 | *.tlb 64 | *.tli 65 | *.tlh 66 | *.tmp 67 | *.tmp_proj 68 | *.log 69 | *.vspscc 70 | *.vssscc 71 | .builds 72 | *.pidb 73 | *.svclog 74 | *.scc 75 | 76 | # Chutzpah Test files 77 | _Chutzpah* 78 | 79 | # Visual C++ cache files 80 | ipch/ 81 | *.aps 82 | *.ncb 83 | *.opendb 84 | *.opensdf 85 | *.sdf 86 | *.cachefile 87 | *.VC.db 88 | *.VC.VC.opendb 89 | 90 | # Visual Studio profiler 91 | *.psess 92 | *.vsp 93 | *.vspx 94 | *.sap 95 | 96 | # TFS 2012 Local Workspace 97 | $tf/ 98 | 99 | # Guidance Automation Toolkit 100 | *.gpState 101 | 102 | # ReSharper is a .NET coding add-in 103 | _ReSharper*/ 104 | *.[Rr]e[Ss]harper 105 | *.DotSettings.user 106 | 107 | # JustCode is a .NET coding add-in 108 | .JustCode 109 | 110 | # TeamCity is a build add-in 111 | _TeamCity* 112 | 113 | # DotCover is a Code Coverage Tool 114 | *.dotCover 115 | 116 | # NCrunch 117 | _NCrunch_* 118 | .*crunch*.local.xml 119 | nCrunchTemp_* 120 | 121 | # MightyMoose 122 | *.mm.* 123 | AutoTest.Net/ 124 | 125 | # Web workbench (sass) 126 | .sass-cache/ 127 | 128 | # Installshield output folder 129 | [Ee]xpress/ 130 | 131 | # DocProject is a documentation generator add-in 132 | DocProject/buildhelp/ 133 | DocProject/Help/*.HxT 134 | DocProject/Help/*.HxC 135 | DocProject/Help/*.hhc 136 | DocProject/Help/*.hhk 137 | DocProject/Help/*.hhp 138 | DocProject/Help/Html2 139 | DocProject/Help/html 140 | 141 | # Click-Once directory 142 | publish/ 143 | 144 | # Publish Web Output 145 | *.[Pp]ublish.xml 146 | *.azurePubxml 147 | # TODO: Comment the next line if you want to checkin your web deploy settings 148 | # but database connection strings (with potential passwords) will be unencrypted 149 | #*.pubxml 150 | *.publishproj 151 | 152 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 153 | # checkin your Azure Web App publish settings, but sensitive information contained 154 | # in these scripts will be unencrypted 155 | PublishScripts/ 156 | 157 | # NuGet Packages 158 | *.nupkg 159 | # The packages folder can be ignored because of Package Restore 160 | **/packages/* 161 | # except build/, which is used as an MSBuild target. 162 | !**/packages/build/ 163 | # Uncomment if necessary however generally it will be regenerated when needed 164 | #!**/packages/repositories.config 165 | # NuGet v3's project.json files produces more ignoreable files 166 | *.nuget.props 167 | *.nuget.targets 168 | 169 | # Microsoft Azure Build Output 170 | csx/ 171 | *.build.csdef 172 | 173 | # Microsoft Azure Emulator 174 | ecf/ 175 | rcf/ 176 | 177 | # Windows Store app package directories and files 178 | AppPackages/ 179 | BundleArtifacts/ 180 | Package.StoreAssociation.xml 181 | _pkginfo.txt 182 | 183 | # Visual Studio cache files 184 | # files ending in .cache can be ignored 185 | *.[Cc]ache 186 | # but keep track of directories ending in .cache 187 | !*.[Cc]ache/ 188 | 189 | # Others 190 | ClientBin/ 191 | ~$* 192 | *~ 193 | *.dbmdl 194 | *.dbproj.schemaview 195 | *.jfm 196 | *.pfx 197 | *.publishsettings 198 | node_modules/ 199 | orleans.codegen.cs 200 | 201 | # Since there are multiple workflows, uncomment next line to ignore bower_components 202 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 203 | #bower_components/ 204 | 205 | # RIA/Silverlight projects 206 | Generated_Code/ 207 | 208 | # Backup & report files from converting an old project file 209 | # to a newer Visual Studio version. Backup files are not needed, 210 | # because we have git ;-) 211 | _UpgradeReport_Files/ 212 | Backup*/ 213 | UpgradeLog*.XML 214 | UpgradeLog*.htm 215 | 216 | # SQL Server files 217 | *.mdf 218 | *.ldf 219 | 220 | # Business Intelligence projects 221 | *.rdl.data 222 | *.bim.layout 223 | *.bim_*.settings 224 | 225 | # Microsoft Fakes 226 | FakesAssemblies/ 227 | 228 | # GhostDoc plugin setting file 229 | *.GhostDoc.xml 230 | 231 | # Node.js Tools for Visual Studio 232 | .ntvs_analysis.dat 233 | 234 | # Visual Studio 6 build log 235 | *.plg 236 | 237 | # Visual Studio 6 workspace options file 238 | *.opt 239 | 240 | # Visual Studio LightSwitch build output 241 | **/*.HTMLClient/GeneratedArtifacts 242 | **/*.DesktopClient/GeneratedArtifacts 243 | **/*.DesktopClient/ModelManifest.xml 244 | **/*.Server/GeneratedArtifacts 245 | **/*.Server/ModelManifest.xml 246 | _Pvt_Extensions 247 | 248 | # Paket dependency manager 249 | .paket/paket.exe 250 | paket-files/ 251 | 252 | # FAKE - F# Make 253 | .fake/ 254 | 255 | # JetBrains Rider 256 | .idea/ 257 | *.sln.iml 258 | 259 | # CodeRush 260 | .cr/ 261 | 262 | # Python Tools for Visual Studio (PTVS) 263 | __pycache__/ 264 | *.pyc -------------------------------------------------------------------------------- /src/usage-ingestion-function/Program.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.Azure.Functions.Worker; 2 | using Microsoft.Extensions.Hosting; 3 | using Microsoft.Extensions.DependencyInjection; 4 | 5 | var host = new HostBuilder() 6 | .ConfigureFunctionsWebApplication() 7 | .ConfigureServices(services => { 8 | services.AddApplicationInsightsTelemetryWorkerService(); 9 | services.ConfigureFunctionsApplicationInsights(); 10 | }) 11 | .Build(); 12 | 13 | host.Run(); 14 | -------------------------------------------------------------------------------- /src/usage-ingestion-function/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "usage_ingestion_func": { 4 | "commandName": "Project", 5 | "commandLineArgs": "--port 7291", 6 | "launchBrowser": false 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /src/usage-ingestion-function/UsageProcessorFunction.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Text; 3 | using Azure.Messaging.EventHubs; 4 | using Microsoft.Azure.Functions.Worker; 5 | using Microsoft.Extensions.Logging; 6 | using Azure.Identity; 7 | using Microsoft.Azure.Cosmos; 8 | using System.Threading.Tasks; 9 | using Newtonsoft.Json; 10 | using Newtonsoft.Json.Linq; 11 | using Microsoft.Extensions.Configuration; 12 | 13 | namespace AIHubGateway.UsageProcessing 14 | { 15 | public class UsageProcessorFunction 16 | { 17 | private readonly ILogger _logger; 18 | private CosmosClient _cosmosClient; 19 | private Container _container; 20 | 21 | public UsageProcessorFunction(ILogger logger, IConfiguration configuration) 22 | { 23 | _logger = logger; 24 | //_logger.LogInformation("UsageProcessorFunction created v1"); 25 | 26 | // Read Cosmos DB settings from IConfiguration 27 | string accountEndpoint = configuration["CosmosAccountEndpoint"] ?? string.Empty; 28 | string databaseName = configuration["CosmosDatabaseName"] ?? string.Empty; 29 | string containerName = configuration["CosmosContainerName"] ?? string.Empty; 30 | string cosmosDbManagedIdentityClientId = configuration["CosmosManagedIdentityId"] ?? string.Empty; // using the same identity used with event hub 31 | 32 | //_logger.LogInformation($"Cosmos DB settings: acc:{accountEndpoint}, db:{databaseName}, cont:{containerName}, mi:{cosmosDbManagedIdentityClientId}"); 33 | 34 | // Create a new CosmosClient using the DefaultAzureCredential 35 | var credential = string.IsNullOrEmpty(cosmosDbManagedIdentityClientId) ? new DefaultAzureCredential() : new DefaultAzureCredential(new DefaultAzureCredentialOptions { ManagedIdentityClientId = cosmosDbManagedIdentityClientId }); 36 | _cosmosClient = new CosmosClient(accountEndpoint, credential); 37 | _container = _cosmosClient.GetContainer(databaseName, containerName); 38 | } 39 | 40 | [Function(nameof(UsageProcessorFunction))] 41 | public async Task Run([EventHubTrigger("ai-usage", Connection = "EventHubConnection")] EventData[] events) 42 | { 43 | try 44 | { 45 | foreach (EventData @event in events) 46 | { 47 | //_logger.LogInformation("Event Body: {body}", Encoding.UTF8.GetString(@event.Body.ToArray())); 48 | 49 | // Convert the event body to a dynamic object 50 | dynamic data = JsonConvert.DeserializeObject(Encoding.UTF8.GetString(@event.Body.ToArray())); 51 | 52 | // Insert the event into Cosmos DB 53 | ItemResponse response = await _container.CreateItemAsync(data); 54 | } 55 | } 56 | catch (Exception ex) 57 | { 58 | _logger.LogError(ex, $"Error processing event: ${ex.Message}"); 59 | } 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/usage-ingestion-function/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | }, 9 | "enableLiveMetricsFilters": true 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /src/usage-ingestion-function/usage-ingestion-func.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | net8.0 4 | v4 5 | Exe 6 | enable 7 | enable 8 | usage_ingestion_func 9 | e06a8c82-3af6-486b-94b5-fda429dfc27a 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | PreserveNewest 25 | 26 | 27 | PreserveNewest 28 | Never 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /src/usage-ingestion-logicapp/.funcignore: -------------------------------------------------------------------------------- 1 | .debug 2 | .git* 3 | .vscode 4 | __azurite_db*__.json 5 | __blobstorage__ 6 | __queuestorage__ 7 | local.settings.json 8 | test 9 | workflow-designtime/ -------------------------------------------------------------------------------- /src/usage-ingestion-logicapp/ai-usage-ingestion/workflow.json: -------------------------------------------------------------------------------- 1 | { 2 | "definition": { 3 | "$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#", 4 | "actions": { 5 | "Create_Usage_Log": { 6 | "inputs": { 7 | "parameters": { 8 | "containerId": "@appsetting('CosmosDBContainerUsage')", 9 | "databaseId": "@appsetting('CosmosDBDatabase')", 10 | "isUpsert": true, 11 | "item": "@triggerBody()?['contentData']" 12 | }, 13 | "serviceProviderConfiguration": { 14 | "connectionName": "AzureCosmosDB", 15 | "operationId": "CreateOrUpdateDocument", 16 | "serviceProviderId": "/serviceProviders/AzureCosmosDB" 17 | } 18 | }, 19 | "runAfter": {}, 20 | "type": "ServiceProvider" 21 | } 22 | }, 23 | "contentVersion": "1.0.0.0", 24 | "outputs": {}, 25 | "triggers": { 26 | "New_Usage_Record_Received": { 27 | "inputs": { 28 | "parameters": { 29 | "eventHubName": "@appsetting('eventHub_name')" 30 | }, 31 | "serviceProviderConfiguration": { 32 | "connectionName": "eventHub", 33 | "operationId": "receiveEvents", 34 | "serviceProviderId": "/serviceProviders/eventHub" 35 | } 36 | }, 37 | "splitOn": "@triggerOutputs()?['body']", 38 | "type": "ServiceProvider" 39 | } 40 | } 41 | }, 42 | "kind": "Stateful" 43 | } -------------------------------------------------------------------------------- /src/usage-ingestion-logicapp/connections.json: -------------------------------------------------------------------------------- 1 | { 2 | "managedApiConnections": { 3 | "azuremonitorlogs": { 4 | "api": { 5 | "id": "@appsetting('AzureMonitor_Api_Id')" 6 | }, 7 | "authentication": { 8 | "type": "ManagedServiceIdentity" 9 | }, 10 | "connection": { 11 | "id": "@appsetting('AzureMonitor_Resource_Id')" 12 | }, 13 | "connectionProperties": { 14 | "authentication": { 15 | "additionalAudiences": [ 16 | "https://api.loganalytics.io" 17 | ], 18 | "audience": "https://management.core.windows.net/", 19 | "type": "ManagedServiceIdentity" 20 | } 21 | }, 22 | "connectionRuntimeUrl": "@appsetting('AzureMonitor_ConnectRuntime_Url')" 23 | } 24 | }, 25 | "serviceProviderConnections": { 26 | "AzureCosmosDB": { 27 | "displayName": "conn-cosmos-db", 28 | "parameterValues": { 29 | "connectionString": "@appsetting('AzureCosmosDB_connectionString')" 30 | }, 31 | "serviceProvider": { 32 | "id": "/serviceProviders/AzureCosmosDB" 33 | } 34 | }, 35 | "eventHub": { 36 | "displayName": "conn-ai-usage-event-hub", 37 | "parameterSetName": "ManagedServiceIdentity", 38 | "parameterValues": { 39 | "authProvider": { 40 | "Type": "ManagedServiceIdentity" 41 | }, 42 | "fullyQualifiedNamespace": "@appsetting('eventHub_fullyQualifiedNamespace')" 43 | }, 44 | "serviceProvider": { 45 | "id": "/serviceProviders/eventHub" 46 | } 47 | } 48 | } 49 | } -------------------------------------------------------------------------------- /src/usage-ingestion-logicapp/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | } 9 | } 10 | }, 11 | "extensionBundle": { 12 | "id": "Microsoft.Azure.Functions.ExtensionBundle.Workflows", 13 | "version": "[1.*, 2.0.0)" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/usage-ingestion-logicapp/package.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /src/usage-ingestion-logicapp/workflow-designtime/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "extensionBundle": { 4 | "id": "Microsoft.Azure.Functions.ExtensionBundle.Workflows", 5 | "version": "[1.*, 2.0.0)" 6 | }, 7 | "extensions": { 8 | "workflow": { 9 | "settings": { 10 | "Runtime.WorkflowOperationDiscoveryHostMode": "true" 11 | } 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-3.pbit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-3.pbit -------------------------------------------------------------------------------- /src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-3.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-3.pbix -------------------------------------------------------------------------------- /src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-4.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-4.pbix -------------------------------------------------------------------------------- /src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-5-Incremetal.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/ai-hub-gateway-solution-accelerator/55a5cccd0675808cac7ad422a6af780e5d01fc0f/src/usage-reports/AI-Hub-Gateway-Usage-Report-v1-5-Incremetal.pbix -------------------------------------------------------------------------------- /src/usage-reports/AI-Search-Cost-Estimation-Logic.md: -------------------------------------------------------------------------------- 1 | # Azure AI Search Cost Estimation 2 | 3 | As Azure AI Search has cost associated with multiple dimensions like service tier, number of units, storage, data transfer and other enabled features like cognitive skills, it is challenging to come up with a simple formula to estimate the cost per request. 4 | 5 | This document provides a high-level overview of the cost estimation logic for Azure AI Search. The cost estimation logic is based on the pricing details provided by Microsoft for Azure AI Search. 6 | 7 | ## Scenario 1: Cost Estimation for Standard S1 Search Service: 8 | 9 | - **Service Tier**: Standard S1 10 | - **Number of Units**: 2 11 | - **Region**: East US 12 | - **Duration**: 1 month 13 | - **Semantic Ranker**: 100K requests 14 | 15 | Total Cost ~ $590/month 16 | 17 | Assuming you have 100% of all API calls going through APIM, you can estimate the cost by multiplying each service usage percentage per month by the cost of the service. 18 | 19 | - **Search-Retail**: 60% * $590 = $354 20 | - **Search-HR**: 40% * $590 = $236 -------------------------------------------------------------------------------- /src/usage-reports/model-pricing.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "1", 4 | "model": "ada", 5 | "deploymentName": "embedding", 6 | "isActive": true, 7 | "CostPerInputUnit": 0.0001, 8 | "CostPerOutputUnit": 0, 9 | "CostUnit": 1000, 10 | "BaseCost": 0, 11 | "Currency": "USD", 12 | "CalculationMethod": "tokens", 13 | "region": "ALL" 14 | }, 15 | { 16 | "id": "2", 17 | "model": "gpt-4o-mini", 18 | "deploymentName": "chat", 19 | "isActive": true, 20 | "CostPerInputUnit": 0.15, 21 | "CostPerOutputUnit": 0.60, 22 | "CostUnit": 1000000, 23 | "BaseCost": 0, 24 | "Currency": "USD", 25 | "CalculationMethod": "tokens", 26 | "region": "ALL" 27 | }, 28 | { 29 | "id": "3", 30 | "model": "gpt-4", 31 | "deploymentName": "gpt-4", 32 | "isActive": true, 33 | "CostPerInputUnit": 0.03, 34 | "CostPerOutputUnit": 0.06, 35 | "CostUnit": 1000, 36 | "BaseCost": 0, 37 | "Currency": "USD", 38 | "CalculationMethod": "tokens", 39 | "region": "ALL" 40 | }, 41 | { 42 | "id": "4", 43 | "model": "gpt-4o", 44 | "deploymentName": "gpt-4o", 45 | "isActive": true, 46 | "CostPerInputUnit": 0.005, 47 | "CostPerOutputUnit": 0.015, 48 | "CostUnit": 1000, 49 | "BaseCost": 0, 50 | "Currency": "USD", 51 | "CalculationMethod": "tokens", 52 | "region": "ALL" 53 | }, 54 | { 55 | "id": "5", 56 | "model": "dall-e-3", 57 | "deploymentName": "dall-e-3", 58 | "isActive": true, 59 | "CostPerInputUnit": 4, 60 | "CostPerOutputUnit": 0, 61 | "CostUnit": 100, 62 | "BaseCost": 0, 63 | "Currency": "USD", 64 | "CalculationMethod": "tokens", 65 | "region": "ALL" 66 | }, 67 | { 68 | "id": "6", 69 | "model": "ai-search", 70 | "deploymentName": "ai-search-business", 71 | "isActive": true, 72 | "CostPerInputUnit": 1, 73 | "CostPerOutputUnit": 0, 74 | "CostUnit": 1, 75 | "BaseCost": 600, 76 | "Currency": "USD", 77 | "CalculationMethod": "percentage", 78 | "region": "ALL" 79 | }, 80 | { 81 | "id": "7", 82 | "model": "ai-search", 83 | "deploymentName": "ai-search-marketing", 84 | "isActive": true, 85 | "CostPerInputUnit": 1, 86 | "CostPerOutputUnit": 0, 87 | "CostUnit": 1, 88 | "BaseCost": 1000, 89 | "Currency": "USD", 90 | "CalculationMethod": "percentage", 91 | "region": "ALL" 92 | } 93 | ] -------------------------------------------------------------------------------- /src/usage-reports/usage-record.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "chatcmpl-9TVtj333ld9WDBiRh9qvLE9ZpA", 3 | "timestamp": "5/24/2024 2:45:56 PM", 4 | "appId": "11115293-844b-456f-8dbc-0c2d78e3e307", 5 | "subscriptionId": "master", 6 | "productName": "AI-Retail", 7 | "targetService": "chat.completion", 8 | "model": "gpt-35-turbo", 9 | "gatewayName": "APIM-HOST.azure-api.net", 10 | "gatewayRegion": "East US", 11 | "aiGatewayId": "managed", 12 | "RequestIp": "X.Y.Z", 13 | "operationName": "Creates a completion for the chat message", 14 | "sessionId": "NA", 15 | "endUserId": "NA", 16 | "backendId": "openai-backend-2", 17 | "routeLocation": "eastus2", 18 | "routeName": "EastUS2", 19 | "deploymentName": "chat", 20 | "promptTokens": "15000", 21 | "responseTokens": "5000", 22 | "totalTokens": "20000", 23 | "EventProcessedUtcTime": "2024-05-27T14:46:04.4009773Z", 24 | "PartitionId": 1, 25 | "EventEnqueuedUtcTime": "2024-05-27T14:46:04.1720000Z" 26 | } --------------------------------------------------------------------------------