├── .gitattributes ├── .github └── workflows │ ├── CustomRateLimitAPI.yml │ └── IaC.yml ├── .gitignore ├── APIM ├── Fragments │ ├── AllPolicy.xml │ ├── Inbound-CustomRateLimiter.xml │ ├── Inbound-Logger.xml │ ├── Outbound-CustomRateLimiter.xml │ └── Outbound-Logger.xml └── Scripts │ └── CreateLogger.ps ├── IaC └── main.tf ├── LICENSE.txt ├── Microsoft.OpenAIRateLimiter.API.sln ├── Microsoft.OpenAIRateLimiter.API ├── .gitignore ├── Microsoft.OpenAIRateLimiter.API.csproj ├── Models │ ├── BudgetAlert.cs │ ├── KVQuota.cs │ └── QuotaDetail.cs ├── Properties │ ├── ServiceDependencies │ │ └── CustomRateLimitAPI │ │ │ ├── appInsights1.arm.json │ │ │ └── storage1.arm.json │ ├── launchSettings.json │ ├── serviceDependencies.CustomRateLimitAPI.json │ ├── serviceDependencies.json │ └── serviceDependencies.local.json ├── QuotaRequest.cs ├── Startup.cs └── host.json ├── Microsoft.OpenAIRateLimiter.Service ├── Common │ └── HttpUtilities.cs ├── IParseService.cs ├── IQuotaService.cs ├── ITokenService.cs ├── Microsoft.OpenAIRateLimiter.Service.csproj ├── Models │ ├── QuotaDTO.cs │ ├── QuotaEntity.cs │ ├── QuotaEntry.cs │ └── QuotaTransDTO.cs ├── ParseService.cs ├── QuotaService.cs └── TokenService.cs └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.github/workflows/CustomRateLimitAPI.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Rate Limiter API to Azure Function App 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - 'README.md' 7 | - 'IaC/**' 8 | - 'APIM/**' 9 | - '.github/workflows/**' 10 | 11 | # CONFIGURATION 12 | # For help, go to https://github.com/Azure/Actions 13 | # 14 | # 1. Paste the RBAC json into the following secret in your repository: 15 | # AZURE_RBAC_CREDENTIALS 16 | # 17 | # 2. Change these variables for your configuration: 18 | env: 19 | AZURE_FUNCTIONAPP_NAME: CustomQuotaAPI # set this to your function app name on Azure 20 | AZURE_FUNCTIONAPP_PACKAGE_PATH: '.' # set this to the path to your function app project, defaults to the repository root 21 | DOTNET_VERSION: '6.0.x' # set this to the dotnet version to use (e.g. '2.1.x', '3.1.x', '5.0.x') 22 | 23 | jobs: 24 | build-and-deploy: 25 | runs-on: windows-latest 26 | steps: 27 | - name: 'Checkout GitHub Action' 28 | uses: actions/checkout@v3 29 | 30 | - name: 'Login via Azure CLI' 31 | uses: azure/login@v1 32 | with: 33 | creds: ${{ secrets.AZURE_RBAC_CREDENTIALS }} 34 | 35 | - name: Setup DotNet ${{ env.DOTNET_VERSION }} Environment 36 | uses: actions/setup-dotnet@v3 37 | with: 38 | dotnet-version: ${{ env.DOTNET_VERSION }} 39 | 40 | - name: 'Resolve Project Dependencies Using Dotnet' 41 | shell: pwsh 42 | run: | 43 | pushd './${{ env.AZURE_FUNCTIONAPP_PACKAGE_PATH }}' 44 | dotnet build --configuration Release --output ./output 45 | popd 46 | 47 | - name: 'Run Azure Functions Action' 48 | uses: Azure/functions-action@v1 49 | id: fa 50 | with: 51 | app-name: ${{ env.AZURE_FUNCTIONAPP_NAME }} 52 | package: '${{ env.AZURE_FUNCTIONAPP_PACKAGE_PATH }}/output' 53 | -------------------------------------------------------------------------------- /.github/workflows/IaC.yml: -------------------------------------------------------------------------------- 1 | # This workflow installs the latest version of Terraform CLI and configures the Terraform CLI configuration file 2 | # with an API token for Terraform Cloud (app.terraform.io). On pull request events, this workflow will run 3 | # `terraform init`, `terraform fmt`, and `terraform plan` (speculative plan via Terraform Cloud). On push events 4 | # to the "master" branch, `terraform apply` will be executed. 5 | # 6 | # Documentation for `hashicorp/setup-terraform` is located here: https://github.com/hashicorp/setup-terraform 7 | # 8 | # To use this workflow, you will need to complete the following setup steps. 9 | # 10 | # 1. Create a `main.tf` file in the root of this repository with the `remote` backend and one or more resources defined. 11 | # Example `main.tf`: 12 | # # The configuration for the `remote` backend. 13 | # terraform { 14 | # backend "remote" { 15 | # # The name of your Terraform Cloud organization. 16 | # organization = "example-organization" 17 | # 18 | # # The name of the Terraform Cloud workspace to store Terraform state files in. 19 | # workspaces { 20 | # name = "example-workspace" 21 | # } 22 | # } 23 | # } 24 | # 25 | # # An example resource that does nothing. 26 | # resource "null_resource" "example" { 27 | # triggers = { 28 | # value = "A example resource that does nothing!" 29 | # } 30 | # } 31 | # 32 | # 33 | # 2. Generate a Terraform Cloud user API token and store it as a GitHub secret (e.g. TF_API_TOKEN) on this repository. 34 | # Documentation: 35 | # - https://www.terraform.io/docs/cloud/users-teams-organizations/api-tokens.html 36 | # - https://help.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets 37 | # 38 | # 3. Reference the GitHub secret in step using the `hashicorp/setup-terraform` GitHub Action. 39 | # Example: 40 | # - name: Setup Terraform 41 | # uses: hashicorp/setup-terraform@v1 42 | # with: 43 | # cli_config_credentials_token: ${{ secrets.TF_API_TOKEN }} 44 | 45 | name: 'Terraform' 46 | 47 | on: 48 | push: 49 | branches: [ "master" ] 50 | pull_request: 51 | 52 | permissions: 53 | contents: read 54 | 55 | jobs: 56 | terraform: 57 | name: 'Terraform' 58 | runs-on: ubuntu-latest 59 | environment: production 60 | 61 | # Use the Bash shell regardless whether the GitHub Actions runner is ubuntu-latest, macos-latest, or windows-latest 62 | defaults: 63 | run: 64 | shell: bash 65 | 66 | steps: 67 | # Checkout the repository to the GitHub Actions runner 68 | - name: Checkout 69 | uses: actions/checkout@v3 70 | 71 | # Install the latest version of Terraform CLI and configure the Terraform CLI configuration file with a Terraform Cloud user API token 72 | - name: Setup Terraform 73 | uses: hashicorp/setup-terraform@v1 74 | with: 75 | cli_config_credentials_token: ${{ secrets.TF_API_TOKEN }} 76 | 77 | # Initialize a new or existing Terraform working directory by creating initial files, loading any remote state, downloading modules, etc. 78 | - name: Terraform Init 79 | run: terraform init 80 | 81 | # Checks that Terraform file was validated 82 | - name: Terraform Validate 83 | run: terraform validate 84 | 85 | # Generates an execution plan for Terraform 86 | - name: Terraform Plan 87 | run: terraform plan -input=false 88 | 89 | # On push to "master", build or change infrastructure according to Terraform configuration files 90 | # Note: It is recommended to set up a required "strict" status check in your repository for "Terraform Cloud". See the documentation on "strict" required status checks for more information: https://help.github.com/en/github/administering-a-repository/types-of-required-status-checks 91 | - name: Terraform Apply 92 | if: github.ref == 'refs/heads/"master"' && github.event_name == 'push' 93 | run: terraform apply -auto-approve -input=false 94 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Oo]ut/ 33 | [Ll]og/ 34 | [Ll]ogs/ 35 | 36 | # Visual Studio 2015/2017 cache/options directory 37 | .vs/ 38 | # Uncomment if you have tasks that create the project's static files in wwwroot 39 | #wwwroot/ 40 | 41 | # Visual Studio 2017 auto generated files 42 | Generated\ Files/ 43 | 44 | # MSTest test Results 45 | [Tt]est[Rr]esult*/ 46 | [Bb]uild[Ll]og.* 47 | 48 | # NUnit 49 | *.VisualState.xml 50 | TestResult.xml 51 | nunit-*.xml 52 | 53 | # Build Results of an ATL Project 54 | [Dd]ebugPS/ 55 | [Rr]eleasePS/ 56 | dlldata.c 57 | 58 | # Benchmark Results 59 | BenchmarkDotNet.Artifacts/ 60 | 61 | # .NET Core 62 | project.lock.json 63 | project.fragment.lock.json 64 | artifacts/ 65 | 66 | # ASP.NET Scaffolding 67 | ScaffoldingReadMe.txt 68 | 69 | # StyleCop 70 | StyleCopReport.xml 71 | 72 | # Files built by Visual Studio 73 | *_i.c 74 | *_p.c 75 | *_h.h 76 | *.ilk 77 | *.meta 78 | *.obj 79 | *.iobj 80 | *.pch 81 | *.pdb 82 | *.ipdb 83 | *.pgc 84 | *.pgd 85 | *.rsp 86 | *.sbr 87 | *.tlb 88 | *.tli 89 | *.tlh 90 | *.tmp 91 | *.tmp_proj 92 | *_wpftmp.csproj 93 | *.log 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio LightSwitch build output 298 | **/*.HTMLClient/GeneratedArtifacts 299 | **/*.DesktopClient/GeneratedArtifacts 300 | **/*.DesktopClient/ModelManifest.xml 301 | **/*.Server/GeneratedArtifacts 302 | **/*.Server/ModelManifest.xml 303 | _Pvt_Extensions 304 | 305 | # Paket dependency manager 306 | .paket/paket.exe 307 | paket-files/ 308 | 309 | # FAKE - F# Make 310 | .fake/ 311 | 312 | # CodeRush personal settings 313 | .cr/personal 314 | 315 | # Python Tools for Visual Studio (PTVS) 316 | __pycache__/ 317 | *.pyc 318 | 319 | # Cake - Uncomment if you are using it 320 | # tools/** 321 | # !tools/packages.config 322 | 323 | # Tabs Studio 324 | *.tss 325 | 326 | # Telerik's JustMock configuration file 327 | *.jmconfig 328 | 329 | # BizTalk build output 330 | *.btp.cs 331 | *.btm.cs 332 | *.odx.cs 333 | *.xsd.cs 334 | 335 | # OpenCover UI analysis results 336 | OpenCover/ 337 | 338 | # Azure Stream Analytics local run output 339 | ASALocalRun/ 340 | 341 | # MSBuild Binary and Structured Log 342 | *.binlog 343 | 344 | # NVidia Nsight GPU debugger configuration file 345 | *.nvuser 346 | 347 | # MFractors (Xamarin productivity tool) working folder 348 | .mfractor/ 349 | 350 | # Local History for Visual Studio 351 | .localhistory/ 352 | 353 | # BeatPulse healthcheck temp database 354 | healthchecksdb 355 | 356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 357 | MigrationBackup/ 358 | 359 | # Ionide (cross platform F# VS Code tools) working folder 360 | .ionide/ 361 | 362 | # Fody - auto-generated XML schema 363 | FodyWeavers.xsd -------------------------------------------------------------------------------- /APIM/Fragments/AllPolicy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /APIM/Fragments/Inbound-CustomRateLimiter.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | @(context.Variables.GetValueOrDefault("QuotaURL")) 7 | get 8 | 9 | {{QuotaQueryKey}} 10 | 11 | 12 | 13 | 14 | ()["Amount"] <= 0.00)"> 15 | 16 | 17 | 18 | application/json 19 | 20 | @{ 21 | return new JObject( 22 | new JProperty("statusCode", 429), 23 | new JProperty("message", "You have reached your monthly spend amount") 24 | ).ToString(); } 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /APIM/Fragments/Inbound-Logger.xml: -------------------------------------------------------------------------------- 1 | 2 | @{ 3 | 4 | var requestBody = context.Request.Body?.As(true); 5 | 6 | string model = requestBody["model"]?.ToString(); 7 | string messages = requestBody["messages"]?.ToString(); 8 | 9 | return new JObject( 10 | new JProperty("reqURL", context.Request.Url.ToString()), 11 | new JProperty("subscriptionId", context.Subscription?.Key), 12 | new JProperty("model", model), 13 | new JProperty("messages", messages), 14 | new JProperty("timeStamp", DateTime.UtcNow.ToString()) 15 | ).ToString(); 16 | } 17 | 18 | -------------------------------------------------------------------------------- /APIM/Fragments/Outbound-CustomRateLimiter.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | {{CustomQuotaUpdateURL}} 4 | POST 5 | 6 | {{QuotaQueryKey}} 7 | 8 | @{ 9 | 10 | return new JObject( 11 | new JProperty("subscriptionKey", context.Subscription?.Key), 12 | new JProperty("prompt", context.Variables.GetValueOrDefault("prompt") ?? ""), 13 | new JProperty("responseBody", context.Response.Body?.As(true)) 14 | ).ToString(); 15 | 16 | } 17 | 18 | 19 | -------------------------------------------------------------------------------- /APIM/Fragments/Outbound-Logger.xml: -------------------------------------------------------------------------------- 1 | 2 | @{ 3 | 4 | var resp = context.Response.Body?.As(true); 5 | 6 | string response = resp["choices"]?.ToString(); 7 | 8 | return new JObject( 9 | new JProperty("processTime", context.Response.Headers.GetValueOrDefault("openai-processing-ms",string.Empty)), 10 | new JProperty("choices", response), 11 | new JProperty("operation", resp["object"].ToString()), 12 | new JProperty("model", resp["model"].ToString()), 13 | new JProperty("completionTokens", resp["usage"]["completion_tokens"].ToString()), 14 | new JProperty("promptTokens", resp["usage"]["prompt_tokens"].ToString()), 15 | new JProperty("TotalTokens", resp["usage"]["total_tokens"].ToString()), 16 | new JProperty("timeStamp", DateTime.UtcNow.ToString()) 17 | ).ToString(); 18 | 19 | } 20 | 21 | -------------------------------------------------------------------------------- /APIM/Scripts/CreateLogger.ps: -------------------------------------------------------------------------------- 1 | 2 | 3 | # API Management specific info 4 | $apimServiceName = "" 5 | $resourceGroupName = "" 6 | 7 | # Create logger 8 | $context = New-AzApiManagementContext -ResourceGroupName $resourceGroupName -ServiceName $apimServiceName 9 | New-AzApiManagementLogger -Context $context -LoggerId "EHAppInnoAILogger" -Name "ApimEventHubAI" -ConnectionString "Endpoint=sb://ehappinnoai.servicebus.windows.net/;" -Description "EHAppInnoAI.ehaoi Event Hub Logger" 10 | -------------------------------------------------------------------------------- /IaC/main.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | 3 | required_providers { 4 | azurerm = { 5 | source = "hashicorp/azurerm" 6 | version = ">=3.0.0" 7 | 8 | } 9 | 10 | azapi = { 11 | source = "Azure/azapi" 12 | } 13 | 14 | } 15 | 16 | } 17 | 18 | //Step 1, Set All Variables 19 | provider "azurerm" { 20 | features { 21 | resource_group { 22 | prevent_deletion_if_contains_resources = false 23 | } 24 | } 25 | 26 | skip_provider_registration = true 27 | 28 | subscription_id = var.subscriptionid 29 | 30 | } 31 | 32 | provider "azapi" { 33 | 34 | skip_provider_registration = true 35 | 36 | subscription_id = var.subscriptionid 37 | 38 | } 39 | 40 | //Step 2, Provision Tag Schema 41 | locals { 42 | tags = { 43 | environment = "prod" 44 | department = "IT" 45 | source = "terraform" 46 | contactemail = "${var.contactemail}" 47 | } 48 | } 49 | 50 | resource "azurerm_resource_group" "perftestgroup" { 51 | name = "${var.projectnamingconvention}-rg" 52 | location = var.location 53 | 54 | tags = local.tags 55 | } 56 | 57 | resource "azurerm_storage_account" "storageQuotaApp" { 58 | name = lower(replace("${var.projectnamingconvention}quotaappsto", "-", "")) 59 | resource_group_name = azurerm_resource_group.perftestgroup.name 60 | location = azurerm_resource_group.perftestgroup.location 61 | account_tier = "Standard" 62 | account_replication_type = "LRS" 63 | 64 | tags = local.tags 65 | } 66 | 67 | resource "azurerm_storage_account" "storageTokenizerApp" { 68 | name = lower(replace("${var.projectnamingconvention}tokenizersto", "-", "")) 69 | resource_group_name = azurerm_resource_group.perftestgroup.name 70 | location = azurerm_resource_group.perftestgroup.location 71 | account_tier = "Standard" 72 | account_replication_type = "LRS" 73 | 74 | tags = local.tags 75 | } 76 | 77 | //create log analytics workspace 78 | resource "azurerm_log_analytics_workspace" "law" { 79 | name = "${var.projectnamingconvention}-law" 80 | location = azurerm_resource_group.perftestgroup.location 81 | resource_group_name = azurerm_resource_group.perftestgroup.name 82 | sku = "PerGB2018" 83 | retention_in_days = 30 84 | 85 | tags = local.tags 86 | } 87 | 88 | //create workspace-based app insights instance 89 | resource "azurerm_application_insights" "appinsights" { 90 | name = "${var.projectnamingconvention}-appinsights" 91 | location = azurerm_resource_group.perftestgroup.location 92 | resource_group_name = azurerm_resource_group.perftestgroup.name 93 | application_type = "web" 94 | retention_in_days = 30 95 | 96 | workspace_id = azurerm_log_analytics_workspace.law.id 97 | 98 | tags = local.tags 99 | } 100 | 101 | resource "azurerm_service_plan" "functionserviceplan" { 102 | name = "${var.projectnamingconvention}-asp-win-1" 103 | resource_group_name = azurerm_resource_group.perftestgroup.name 104 | location = azurerm_resource_group.perftestgroup.location 105 | 106 | maximum_elastic_worker_count = 50 107 | 108 | os_type = "Windows" 109 | sku_name = "EP2" 110 | 111 | tags = local.tags 112 | } 113 | 114 | resource "azurerm_windows_function_app" "Quotafunction" { 115 | name = "${var.projectnamingconvention}-quota-func-1" 116 | location = azurerm_resource_group.perftestgroup.location 117 | resource_group_name = azurerm_resource_group.perftestgroup.name 118 | service_plan_id = azurerm_service_plan.functionserviceplan.id 119 | 120 | storage_account_name = azurerm_storage_account.storageQuotaApp.name 121 | storage_account_access_key = azurerm_storage_account.storageQuotaApp.primary_access_key 122 | 123 | app_settings = { 124 | "APPINSIGHTS_INSTRUMENTATIONKEY" = azurerm_application_insights.appinsights.instrumentation_key 125 | "TokenizerURL" = "https://${azurerm_linux_function_app.tokenizerfunction.default_hostname}" 126 | "TokenizerKey" = data.azurerm_function_app_host_keys.tokenizer.primary_key 127 | "RedisInstance" = azurerm_redis_enterprise_cluster.RedisCache.name 128 | "TableName" = azurerm_storage_table.kvTable.name 129 | } 130 | 131 | connection_string { 132 | name = "RedisConn" 133 | type = "Custom" 134 | value = "${azurerm_redis_enterprise_cluster.RedisCache.hostname}:${azurerm_redis_enterprise_database.RedisDB.port},password=${azurerm_redis_enterprise_database.RedisDB.primary_access_key},ssl=True,abortConnect=False" 135 | } 136 | 137 | connection_string { 138 | name = "StorageConn" 139 | type = "Custom" 140 | value = azurerm_storage_account.storageSolution.primary_connection_string 141 | } 142 | 143 | depends_on = [azurerm_redis_enterprise_database.RedisDB, 144 | azurerm_storage_account.storageSolution, 145 | azurerm_storage_table.kvTable, 146 | azurerm_linux_function_app.tokenizerfunction] 147 | 148 | identity { 149 | type = "SystemAssigned" 150 | } 151 | 152 | site_config { 153 | application_stack { 154 | dotnet_version = "v6.0" 155 | } 156 | elastic_instance_minimum = 5 157 | } 158 | 159 | tags = local.tags 160 | } 161 | 162 | data "azurerm_function_app_host_keys" "Quotafunction" { 163 | name = azurerm_windows_function_app.Quotafunction.name 164 | resource_group_name = azurerm_resource_group.perftestgroup.name 165 | } 166 | 167 | resource "azurerm_service_plan" "pyfunctionserviceplan" { 168 | name = "${var.projectnamingconvention}-asp-linux-1" 169 | resource_group_name = azurerm_resource_group.perftestgroup.name 170 | location = azurerm_resource_group.perftestgroup.location 171 | 172 | maximum_elastic_worker_count = 50 173 | 174 | os_type = "Linux" 175 | sku_name = "EP2" 176 | 177 | tags = local.tags 178 | } 179 | 180 | resource "azurerm_linux_function_app" "tokenizerfunction" { 181 | name = "${var.projectnamingconvention}-tokenizer-func-1" 182 | location = azurerm_resource_group.perftestgroup.location 183 | resource_group_name = azurerm_resource_group.perftestgroup.name 184 | service_plan_id = azurerm_service_plan.pyfunctionserviceplan.id 185 | 186 | storage_account_name = azurerm_storage_account.storageTokenizerApp.name 187 | storage_account_access_key = azurerm_storage_account.storageTokenizerApp.primary_access_key 188 | 189 | identity { 190 | type = "SystemAssigned" 191 | } 192 | 193 | app_settings = { 194 | "APPINSIGHTS_INSTRUMENTATIONKEY" = azurerm_application_insights.appinsights.instrumentation_key 195 | } 196 | 197 | site_config { 198 | application_stack { 199 | python_version = "3.10" 200 | } 201 | elastic_instance_minimum = 5 202 | } 203 | 204 | tags = local.tags 205 | } 206 | 207 | data "azurerm_function_app_host_keys" "tokenizer" { 208 | name = azurerm_linux_function_app.tokenizerfunction.name 209 | resource_group_name = azurerm_resource_group.perftestgroup.name 210 | } 211 | 212 | //Enterprise REDIS for HADR 213 | resource "azurerm_redis_enterprise_cluster" "RedisCache" { 214 | name = "${var.projectnamingconvention}rediscache" 215 | resource_group_name = azurerm_resource_group.perftestgroup.name 216 | location = azurerm_resource_group.perftestgroup.location 217 | 218 | sku_name = "Enterprise_E10-2" 219 | 220 | tags = local.tags 221 | } 222 | 223 | resource "azurerm_redis_enterprise_database" "RedisDB" { 224 | name = "default" 225 | 226 | cluster_id = azurerm_redis_enterprise_cluster.RedisCache.id 227 | client_protocol = "Encrypted" 228 | clustering_policy = "EnterpriseCluster" 229 | eviction_policy = "NoEviction" 230 | 231 | linked_database_id = [ 232 | "${azurerm_redis_enterprise_cluster.RedisCache.id}/databases/default" 233 | ] 234 | 235 | linked_database_group_nickname = "${var.projectnamingconvention}GeoGroup" 236 | } 237 | 238 | resource "azurerm_storage_account" "storageSolution" { 239 | name = lower(replace("${var.projectnamingconvention}gatewaytablesto", "-", "")) 240 | resource_group_name = azurerm_resource_group.perftestgroup.name 241 | location = azurerm_resource_group.perftestgroup.location 242 | account_tier = "Standard" 243 | account_replication_type = "LRS" 244 | 245 | tags = local.tags 246 | } 247 | 248 | resource "azurerm_storage_table" "kvTable" { 249 | name = "AIKeyValueStore" 250 | storage_account_name = azurerm_storage_account.storageSolution.name 251 | depends_on = [azurerm_storage_account.storageSolution] 252 | 253 | } 254 | 255 | resource "azurerm_eventhub_namespace" "main" { 256 | name = "${var.projectnamingconvention}ehns" 257 | location = azurerm_resource_group.perftestgroup.location 258 | resource_group_name = azurerm_resource_group.perftestgroup.name 259 | sku = "Standard" 260 | capacity = 5 261 | 262 | tags = local.tags 263 | } 264 | 265 | resource "azurerm_eventhub" "main" { 266 | name = "${var.projectnamingconvention}eh" 267 | namespace_name = azurerm_eventhub_namespace.main.name 268 | resource_group_name = azurerm_resource_group.perftestgroup.name 269 | partition_count = 2 270 | message_retention = 1 271 | 272 | depends_on = [azurerm_eventhub_namespace.main] 273 | } 274 | 275 | resource "azurerm_api_management" "apim" { 276 | name = "${var.projectnamingconvention}apim" 277 | location = azurerm_resource_group.perftestgroup.location 278 | resource_group_name = azurerm_resource_group.perftestgroup.name 279 | publisher_name = var.companyname 280 | publisher_email = var.contactemail 281 | sku_name = "Premium_1" 282 | 283 | identity { 284 | type = "SystemAssigned" 285 | } 286 | 287 | tags = local.tags 288 | } 289 | 290 | // azure api management logger eventhub 291 | resource "azurerm_api_management_logger" "apimlogger" { 292 | name = "${var.projectnamingconvention}apimlogger" 293 | api_management_name = azurerm_api_management.apim.name 294 | resource_group_name = azurerm_resource_group.perftestgroup.name 295 | 296 | description = "logger for eventhub" 297 | 298 | eventhub { 299 | name = azurerm_eventhub.main.name 300 | connection_string = azurerm_eventhub_namespace.main.default_primary_connection_string 301 | } 302 | 303 | depends_on = [azurerm_eventhub.main, azurerm_api_management.apim] 304 | 305 | } 306 | 307 | resource "azurerm_api_management_named_value" "CustomQuotaUpdateURL" { 308 | name = "CustomQuotaUpdateURL" 309 | resource_group_name = azurerm_resource_group.perftestgroup.name 310 | api_management_name = azurerm_api_management.apim.name 311 | display_name = "CustomQuotaUpdateURL" 312 | value = "https://${azurerm_windows_function_app.Quotafunction.default_hostname}/api/Quota/Update" 313 | } 314 | 315 | resource "azurerm_api_management_named_value" "QuotaQueryKey" { 316 | name = "QuotaQueryKey" 317 | resource_group_name = azurerm_resource_group.perftestgroup.name 318 | api_management_name = azurerm_api_management.apim.name 319 | display_name = "QuotaQueryKey" 320 | value = data.azurerm_function_app_host_keys.Quotafunction.primary_key 321 | } 322 | 323 | resource "azurerm_api_management_named_value" "QuotaQueryURL" { 324 | name = "QuotaQueryURL" 325 | resource_group_name = azurerm_resource_group.perftestgroup.name 326 | api_management_name = azurerm_api_management.apim.name 327 | display_name = "QuotaQueryURL" 328 | value = "https://${azurerm_windows_function_app.Quotafunction.default_hostname}/api/Quota/{keyId}" 329 | } 330 | 331 | 332 | resource "azapi_resource" "Inbound-CustomRateLimiter" { 333 | type = "Microsoft.ApiManagement/service/policyFragments@2023-03-01-preview" 334 | name = "Inbound-CustomRateLimiter" 335 | parent_id = azurerm_api_management.apim.id 336 | 337 | body = jsonencode({ 338 | properties = { 339 | description = "Inbound-CustomRateLimiter" 340 | format = "xml" 341 | value = "" 342 | } 343 | }) 344 | 345 | depends_on = [azurerm_api_management.apim] 346 | } 347 | 348 | resource "azapi_resource" "Outbound-CustomRateLimiter" { 349 | type = "Microsoft.ApiManagement/service/policyFragments@2023-03-01-preview" 350 | name = "Outbound-CustomRateLimiter" 351 | parent_id = azurerm_api_management.apim.id 352 | 353 | body = jsonencode({ 354 | properties = { 355 | description = "Outbound-CustomRateLimiter" 356 | format = "xml" 357 | value = "" 358 | } 359 | }) 360 | 361 | depends_on = [azurerm_api_management.apim] 362 | } 363 | 364 | resource "azapi_resource" "Inbound-Logger" { 365 | type = "Microsoft.ApiManagement/service/policyFragments@2023-03-01-preview" 366 | name = "Inbound-Logger" 367 | parent_id = azurerm_api_management.apim.id 368 | 369 | body = jsonencode({ 370 | properties = { 371 | description = "Inbound-Logger" 372 | format = "xml" 373 | value = "" 374 | } 375 | }) 376 | depends_on = [azurerm_api_management.apim] 377 | } 378 | 379 | resource "azapi_resource" "Outbound-Logger" { 380 | type = "Microsoft.ApiManagement/service/policyFragments@2023-03-01-preview" 381 | name = "Outbound-Logger" 382 | parent_id = azurerm_api_management.apim.id 383 | 384 | body = jsonencode({ 385 | properties = { 386 | description = "Outbound-Logger" 387 | format = "xml" 388 | value = "" 389 | } 390 | }) 391 | depends_on = [azurerm_api_management.apim] 392 | } 393 | 394 | 395 | /* 396 | // Cosmos DB Capability for Multi-region HADR 397 | 398 | resource "azurerm_cosmosdb_account" "CosmosDB" { 399 | name = "${var.projectnamingconvention}cosmosdb" 400 | location = azurerm_resource_group.perftestgroup.location 401 | resource_group_name = azurerm_resource_group.perftestgroup.name 402 | offer_type = "Standard" 403 | kind = "GlobalDocumentDB" 404 | consistency_policy { 405 | consistency_level = "Session" 406 | max_interval_in_seconds = 5 407 | max_staleness_prefix = 100 408 | } 409 | enable_automatic_failover = false 410 | geo_location { 411 | location = azurerm_resource_group.perftestgroup.location 412 | failover_priority = 0 413 | } 414 | 415 | enable_multiple_write_locations = false 416 | 417 | tags = local.tags 418 | } 419 | 420 | resource "azurerm_cosmosdb_sql_database" "main" { 421 | name = "${var.projectnamingconvention}cosmossqldb" 422 | resource_group_name = azurerm_resource_group.perftestgroup.name 423 | account_name = azurerm_cosmosdb_account.CosmosDB.name 424 | depends_on = [ azurerm_cosmosdb_account.CosmosDB ] 425 | 426 | tags = local.tags 427 | } 428 | 429 | resource "azurerm_cosmosdb_sql_container" "main" { 430 | name = "${var.projectnamingconvention}container" 431 | resource_group_name = azurerm_resource_group.perftestgroup.name 432 | account_name = azurerm_cosmosdb_account.CosmosDB.name 433 | database_name = azurerm_cosmosdb_sql_database.main.name 434 | partition_key_path = "/id" 435 | throughput = 4000 436 | depends_on = [ azurerm_cosmosdb_account.CosmosDB , azurerm_cosmosdb_sql_database.main ] 437 | 438 | tags = local.tags 439 | } 440 | 441 | */ 442 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [year] [fullname] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.6.33801.468 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.OpenAIRateLimiter.API", "Microsoft.OpenAIRateLimiter.API\Microsoft.OpenAIRateLimiter.API.csproj", "{B6C47021-831F-4CB9-BD9F-0A70AAB1B94A}" 7 | EndProject 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.OpenAIRateLimiter.Service", "Microsoft.OpenAIRateLimiter.Service\Microsoft.OpenAIRateLimiter.Service.csproj", "{07347341-4BCF-4D46-B07B-E4E9C1BB8B16}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Release|Any CPU = Release|Any CPU 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {B6C47021-831F-4CB9-BD9F-0A70AAB1B94A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 17 | {B6C47021-831F-4CB9-BD9F-0A70AAB1B94A}.Debug|Any CPU.Build.0 = Debug|Any CPU 18 | {B6C47021-831F-4CB9-BD9F-0A70AAB1B94A}.Release|Any CPU.ActiveCfg = Release|Any CPU 19 | {B6C47021-831F-4CB9-BD9F-0A70AAB1B94A}.Release|Any CPU.Build.0 = Release|Any CPU 20 | {07347341-4BCF-4D46-B07B-E4E9C1BB8B16}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {07347341-4BCF-4D46-B07B-E4E9C1BB8B16}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {07347341-4BCF-4D46-B07B-E4E9C1BB8B16}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {07347341-4BCF-4D46-B07B-E4E9C1BB8B16}.Release|Any CPU.Build.0 = Release|Any CPU 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {006E479E-5094-4F57-A290-EFD7BADE1148} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # Azure Functions localsettings file 5 | local.settings.json 6 | 7 | # User-specific files 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Build results 17 | [Dd]ebug/ 18 | [Dd]ebugPublic/ 19 | [Rr]elease/ 20 | [Rr]eleases/ 21 | x64/ 22 | x86/ 23 | bld/ 24 | [Bb]in/ 25 | [Oo]bj/ 26 | [Ll]og/ 27 | 28 | # Visual Studio 2015 cache/options directory 29 | .vs/ 30 | # Uncomment if you have tasks that create the project's static files in wwwroot 31 | #wwwroot/ 32 | 33 | # MSTest test Results 34 | [Tt]est[Rr]esult*/ 35 | [Bb]uild[Ll]og.* 36 | 37 | # NUNIT 38 | *.VisualState.xml 39 | TestResult.xml 40 | 41 | # Build Results of an ATL Project 42 | [Dd]ebugPS/ 43 | [Rr]eleasePS/ 44 | dlldata.c 45 | 46 | # DNX 47 | project.lock.json 48 | project.fragment.lock.json 49 | artifacts/ 50 | 51 | *_i.c 52 | *_p.c 53 | *_i.h 54 | *.ilk 55 | *.meta 56 | *.obj 57 | *.pch 58 | *.pdb 59 | *.pgc 60 | *.pgd 61 | *.rsp 62 | *.sbr 63 | *.tlb 64 | *.tli 65 | *.tlh 66 | *.tmp 67 | *.tmp_proj 68 | *.log 69 | *.vspscc 70 | *.vssscc 71 | .builds 72 | *.pidb 73 | *.svclog 74 | *.scc 75 | 76 | # Chutzpah Test files 77 | _Chutzpah* 78 | 79 | # Visual C++ cache files 80 | ipch/ 81 | *.aps 82 | *.ncb 83 | *.opendb 84 | *.opensdf 85 | *.sdf 86 | *.cachefile 87 | *.VC.db 88 | *.VC.VC.opendb 89 | 90 | # Visual Studio profiler 91 | *.psess 92 | *.vsp 93 | *.vspx 94 | *.sap 95 | 96 | # TFS 2012 Local Workspace 97 | $tf/ 98 | 99 | # Guidance Automation Toolkit 100 | *.gpState 101 | 102 | # ReSharper is a .NET coding add-in 103 | _ReSharper*/ 104 | *.[Rr]e[Ss]harper 105 | *.DotSettings.user 106 | 107 | # JustCode is a .NET coding add-in 108 | .JustCode 109 | 110 | # TeamCity is a build add-in 111 | _TeamCity* 112 | 113 | # DotCover is a Code Coverage Tool 114 | *.dotCover 115 | 116 | # NCrunch 117 | _NCrunch_* 118 | .*crunch*.local.xml 119 | nCrunchTemp_* 120 | 121 | # MightyMoose 122 | *.mm.* 123 | AutoTest.Net/ 124 | 125 | # Web workbench (sass) 126 | .sass-cache/ 127 | 128 | # Installshield output folder 129 | [Ee]xpress/ 130 | 131 | # DocProject is a documentation generator add-in 132 | DocProject/buildhelp/ 133 | DocProject/Help/*.HxT 134 | DocProject/Help/*.HxC 135 | DocProject/Help/*.hhc 136 | DocProject/Help/*.hhk 137 | DocProject/Help/*.hhp 138 | DocProject/Help/Html2 139 | DocProject/Help/html 140 | 141 | # Click-Once directory 142 | publish/ 143 | 144 | # Publish Web Output 145 | *.[Pp]ublish.xml 146 | *.azurePubxml 147 | # TODO: Comment the next line if you want to checkin your web deploy settings 148 | # but database connection strings (with potential passwords) will be unencrypted 149 | #*.pubxml 150 | *.publishproj 151 | 152 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 153 | # checkin your Azure Web App publish settings, but sensitive information contained 154 | # in these scripts will be unencrypted 155 | PublishScripts/ 156 | 157 | # NuGet Packages 158 | *.nupkg 159 | # The packages folder can be ignored because of Package Restore 160 | **/packages/* 161 | # except build/, which is used as an MSBuild target. 162 | !**/packages/build/ 163 | # Uncomment if necessary however generally it will be regenerated when needed 164 | #!**/packages/repositories.config 165 | # NuGet v3's project.json files produces more ignoreable files 166 | *.nuget.props 167 | *.nuget.targets 168 | 169 | # Microsoft Azure Build Output 170 | csx/ 171 | *.build.csdef 172 | 173 | # Microsoft Azure Emulator 174 | ecf/ 175 | rcf/ 176 | 177 | # Windows Store app package directories and files 178 | AppPackages/ 179 | BundleArtifacts/ 180 | Package.StoreAssociation.xml 181 | _pkginfo.txt 182 | 183 | # Visual Studio cache files 184 | # files ending in .cache can be ignored 185 | *.[Cc]ache 186 | # but keep track of directories ending in .cache 187 | !*.[Cc]ache/ 188 | 189 | # Others 190 | ClientBin/ 191 | ~$* 192 | *~ 193 | *.dbmdl 194 | *.dbproj.schemaview 195 | *.jfm 196 | *.pfx 197 | *.publishsettings 198 | node_modules/ 199 | orleans.codegen.cs 200 | 201 | # Since there are multiple workflows, uncomment next line to ignore bower_components 202 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 203 | #bower_components/ 204 | 205 | # RIA/Silverlight projects 206 | Generated_Code/ 207 | 208 | # Backup & report files from converting an old project file 209 | # to a newer Visual Studio version. Backup files are not needed, 210 | # because we have git ;-) 211 | _UpgradeReport_Files/ 212 | Backup*/ 213 | UpgradeLog*.XML 214 | UpgradeLog*.htm 215 | 216 | # SQL Server files 217 | *.mdf 218 | *.ldf 219 | 220 | # Business Intelligence projects 221 | *.rdl.data 222 | *.bim.layout 223 | *.bim_*.settings 224 | 225 | # Microsoft Fakes 226 | FakesAssemblies/ 227 | 228 | # GhostDoc plugin setting file 229 | *.GhostDoc.xml 230 | 231 | # Node.js Tools for Visual Studio 232 | .ntvs_analysis.dat 233 | 234 | # Visual Studio 6 build log 235 | *.plg 236 | 237 | # Visual Studio 6 workspace options file 238 | *.opt 239 | 240 | # Visual Studio LightSwitch build output 241 | **/*.HTMLClient/GeneratedArtifacts 242 | **/*.DesktopClient/GeneratedArtifacts 243 | **/*.DesktopClient/ModelManifest.xml 244 | **/*.Server/GeneratedArtifacts 245 | **/*.Server/ModelManifest.xml 246 | _Pvt_Extensions 247 | 248 | # Paket dependency manager 249 | .paket/paket.exe 250 | paket-files/ 251 | 252 | # FAKE - F# Make 253 | .fake/ 254 | 255 | # JetBrains Rider 256 | .idea/ 257 | *.sln.iml 258 | 259 | # CodeRush 260 | .cr/ 261 | 262 | # Python Tools for Visual Studio (PTVS) 263 | __pycache__/ 264 | *.pyc -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/Microsoft.OpenAIRateLimiter.API.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | net6.0 4 | v4 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | PreserveNewest 20 | 21 | 22 | PreserveNewest 23 | Never 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/Models/BudgetAlert.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | 4 | namespace Microsoft.OpenAIRateLimiter.API.Models 5 | { 6 | public class BudgetAlert 7 | { 8 | public string schemaId { get; set; } 9 | public Data data { get; set; } 10 | } 11 | public class AlertContext 12 | { 13 | public string AlertCategory { get; set; } 14 | public AlertData AlertData { get; set; } 15 | } 16 | 17 | public class AlertData 18 | { 19 | public string Scope { get; set; } 20 | public string ThresholdType { get; set; } 21 | public string BudgetType { get; set; } 22 | public string BudgetThreshold { get; set; } 23 | public string NotificationThresholdAmount { get; set; } 24 | public string BudgetName { get; set; } 25 | public string BudgetId { get; set; } 26 | public string BudgetStartDate { get; set; } 27 | public string BudgetCreator { get; set; } 28 | public string Unit { get; set; } 29 | public string SpentAmount { get; set; } 30 | } 31 | 32 | public class Data 33 | { 34 | public Essentials essentials { get; set; } 35 | public AlertContext alertContext { get; set; } 36 | } 37 | 38 | public class Essentials 39 | { 40 | public string monitoringService { get; set; } 41 | public DateTime firedDateTime { get; set; } 42 | public string description { get; set; } 43 | public string essentialsVersion { get; set; } 44 | public string alertContextVersion { get; set; } 45 | public string alertId { get; set; } 46 | public object alertRule { get; set; } 47 | public object severity { get; set; } 48 | public object signalType { get; set; } 49 | public object monitorCondition { get; set; } 50 | public object alertTargetIDs { get; set; } 51 | public List configurationItems { get; set; } 52 | public object originAlertId { get; set; } 53 | } 54 | 55 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/Models/KVQuota.cs: -------------------------------------------------------------------------------- 1 | using System.Text.Json.Serialization; 2 | 3 | namespace Microsoft.OpenAIRateLimiter.API.Models 4 | { 5 | public class KVQuota 6 | { 7 | [JsonPropertyName("subscriptionKey")] 8 | public string SubscriptionKey { get; set; } 9 | 10 | [JsonPropertyName("productName")] 11 | public string ProductName { get; set; } 12 | 13 | [JsonPropertyName("amount")] 14 | public string Amount { get; set; } 15 | 16 | [JsonPropertyName("rateLimitOnCost")] 17 | public bool RateLimitOnCost { get; set; } = true; 18 | } 19 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/Models/QuotaDetail.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Text.Json.Serialization; 3 | 4 | namespace Microsoft.OpenAIRateLimiter.API.Models 5 | { 6 | public class QuotaDetail 7 | { 8 | 9 | [JsonPropertyName("subscriptionKey")] 10 | public string SubscriptionKey { get; set; } = default!; 11 | 12 | public string RowKey { get; set; } = default!; 13 | 14 | [JsonPropertyName("productName")] 15 | public string ProductName { get; set; } = default!; 16 | 17 | [JsonPropertyName("timestamp")] 18 | public DateTimeOffset? Timestamp { get; set; } = default!; 19 | 20 | [JsonPropertyName("model")] 21 | public string Model { get; set; } = default!; 22 | 23 | [JsonPropertyName("tokenAmount")] 24 | public int TotalTokens { get; set; } 25 | 26 | [JsonPropertyName("promptTokens")] 27 | public int PromptTokens { get; set; } 28 | 29 | [JsonPropertyName("operation")] 30 | public string Operation { get; set; } = ""; 31 | 32 | [JsonPropertyName("amount")] 33 | public decimal Amount { get; set; } 34 | 35 | [JsonPropertyName("transCost")] 36 | public string TransCost { get; set; } 37 | 38 | [JsonPropertyName("balance")] 39 | public string Balance { get; set; } 40 | 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/Properties/ServiceDependencies/CustomRateLimitAPI/appInsights1.arm.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "resourceGroupName": { 6 | "type": "string", 7 | "defaultValue": "rg-AppInno-OpenAI-Dev", 8 | "metadata": { 9 | "_parameterType": "resourceGroup", 10 | "description": "Name of the resource group for the resource. It is recommended to put resources under same resource group for better tracking." 11 | } 12 | }, 13 | "resourceGroupLocation": { 14 | "type": "string", 15 | "defaultValue": "eastus", 16 | "metadata": { 17 | "_parameterType": "location", 18 | "description": "Location of the resource group. Resource groups could have different location than resources." 19 | } 20 | }, 21 | "resourceLocation": { 22 | "type": "string", 23 | "defaultValue": "[parameters('resourceGroupLocation')]", 24 | "metadata": { 25 | "_parameterType": "location", 26 | "description": "Location of the resource. By default use resource group's location, unless the resource provider is not supported there." 27 | } 28 | } 29 | }, 30 | "resources": [ 31 | { 32 | "type": "Microsoft.Resources/resourceGroups", 33 | "name": "[parameters('resourceGroupName')]", 34 | "location": "[parameters('resourceGroupLocation')]", 35 | "apiVersion": "2019-10-01" 36 | }, 37 | { 38 | "type": "Microsoft.Resources/deployments", 39 | "name": "[concat(parameters('resourceGroupName'), 'Deployment', uniqueString(concat('AppInno-OpenAI', subscription().subscriptionId)))]", 40 | "resourceGroup": "[parameters('resourceGroupName')]", 41 | "apiVersion": "2019-10-01", 42 | "dependsOn": [ 43 | "[parameters('resourceGroupName')]" 44 | ], 45 | "properties": { 46 | "mode": "Incremental", 47 | "template": { 48 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 49 | "contentVersion": "1.0.0.0", 50 | "resources": [ 51 | { 52 | "name": "AppInno-OpenAI", 53 | "type": "microsoft.insights/components", 54 | "location": "[parameters('resourceLocation')]", 55 | "kind": "web", 56 | "properties": {}, 57 | "apiVersion": "2015-05-01" 58 | } 59 | ] 60 | } 61 | } 62 | } 63 | ], 64 | "metadata": { 65 | "_dependencyType": "appInsights.azure" 66 | } 67 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/Properties/ServiceDependencies/CustomRateLimitAPI/storage1.arm.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "resourceGroupName": { 6 | "type": "string", 7 | "defaultValue": "rg-AppInno-OpenAI-Dev", 8 | "metadata": { 9 | "_parameterType": "resourceGroup", 10 | "description": "Name of the resource group for the resource. It is recommended to put resources under same resource group for better tracking." 11 | } 12 | }, 13 | "resourceGroupLocation": { 14 | "type": "string", 15 | "defaultValue": "eastus", 16 | "metadata": { 17 | "_parameterType": "location", 18 | "description": "Location of the resource group. Resource groups could have different location than resources." 19 | } 20 | }, 21 | "resourceLocation": { 22 | "type": "string", 23 | "defaultValue": "[parameters('resourceGroupLocation')]", 24 | "metadata": { 25 | "_parameterType": "location", 26 | "description": "Location of the resource. By default use resource group's location, unless the resource provider is not supported there." 27 | } 28 | } 29 | }, 30 | "resources": [ 31 | { 32 | "type": "Microsoft.Resources/resourceGroups", 33 | "name": "[parameters('resourceGroupName')]", 34 | "location": "[parameters('resourceGroupLocation')]", 35 | "apiVersion": "2019-10-01" 36 | }, 37 | { 38 | "type": "Microsoft.Resources/deployments", 39 | "name": "[concat(parameters('resourceGroupName'), 'Deployment', uniqueString(concat('rgappinnoopenaidev8e89', subscription().subscriptionId)))]", 40 | "resourceGroup": "[parameters('resourceGroupName')]", 41 | "apiVersion": "2019-10-01", 42 | "dependsOn": [ 43 | "[parameters('resourceGroupName')]" 44 | ], 45 | "properties": { 46 | "mode": "Incremental", 47 | "template": { 48 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", 49 | "contentVersion": "1.0.0.0", 50 | "resources": [ 51 | { 52 | "sku": { 53 | "name": "Standard_LRS", 54 | "tier": "Standard" 55 | }, 56 | "kind": "Storage", 57 | "name": "rgappinnoopenaidev8e89", 58 | "type": "Microsoft.Storage/storageAccounts", 59 | "location": "[parameters('resourceLocation')]", 60 | "apiVersion": "2017-10-01" 61 | } 62 | ] 63 | } 64 | } 65 | } 66 | ], 67 | "metadata": { 68 | "_dependencyType": "storage.azure" 69 | } 70 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "Microsoft.OpenAIRateLimiter.API": { 4 | "commandName": "Project", 5 | "commandLineArgs": "--port 7290", 6 | "launchBrowser": false 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/Properties/serviceDependencies.CustomRateLimitAPI.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "storage1": { 4 | "resourceId": "/subscriptions/[parameters('subscriptionId')]/resourceGroups/[parameters('resourceGroupName')]/providers/Microsoft.Storage/storageAccounts/rgappinnoopenaidev8e89", 5 | "type": "storage.azure", 6 | "connectionId": "AzureWebJobsStorage" 7 | }, 8 | "appInsights1": { 9 | "resourceId": "/subscriptions/[parameters('subscriptionId')]/resourceGroups/[parameters('resourceGroupName')]/providers/microsoft.insights/components/AppInno-OpenAI", 10 | "type": "appInsights.azure", 11 | "connectionId": "APPLICATIONINSIGHTS_CONNECTION_STRING" 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/Properties/serviceDependencies.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "storage1": { 4 | "type": "storage", 5 | "connectionId": "AzureWebJobsStorage" 6 | }, 7 | "appInsights1": { 8 | "type": "appInsights", 9 | "connectionId": "APPLICATIONINSIGHTS_CONNECTION_STRING" 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/Properties/serviceDependencies.local.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "appInsights1": { 4 | "type": "appInsights.sdk" 5 | }, 6 | "storage1": { 7 | "type": "storage.emulator", 8 | "connectionId": "AzureWebJobsStorage" 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/QuotaRequest.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Net; 4 | using System.Threading.Tasks; 5 | using Microsoft.AspNetCore.Http; 6 | using Microsoft.Azure.WebJobs; 7 | using Microsoft.Azure.WebJobs.Extensions.Http; 8 | using Microsoft.Azure.WebJobs.Extensions.OpenApi.Core.Attributes; 9 | using Microsoft.Azure.WebJobs.Extensions.OpenApi.Core.Enums; 10 | using Microsoft.Extensions.Logging; 11 | using Microsoft.OpenAIRateLimiter.API.Models; 12 | using Microsoft.OpenApi.Models; 13 | using Newtonsoft.Json; 14 | using System.Net.Http; 15 | using Microsoft.OpenAIRateLimiter.Service; 16 | using Microsoft.OpenAIRateLimiter.Service.Common; 17 | using Microsoft.OpenAIRateLimiter.Service.Models; 18 | using System.Collections.Generic; 19 | using System.Linq; 20 | using Newtonsoft.Json.Linq; 21 | using System.Data; 22 | 23 | namespace Microsoft.OpenAIRateLimiter.API 24 | { 25 | public class QuotaRequest 26 | { 27 | private readonly IQuotaService _svc; 28 | private readonly IParseService _parseSvc; 29 | 30 | public QuotaRequest(IQuotaService quotaService, IParseService parseService) 31 | { 32 | _svc = quotaService; 33 | _parseSvc = parseService; 34 | } 35 | 36 | [FunctionName("Create")] 37 | [OpenApiOperation(operationId: "Create")] 38 | [OpenApiSecurity("function_key", SecuritySchemeType.ApiKey, Name = "code", In = OpenApiSecurityLocationType.Query)] 39 | [OpenApiRequestBody(contentType: "application/json", bodyType: typeof(KVQuota), Required = true, Description = "The minimum required parameters")] 40 | [OpenApiResponseWithBody(statusCode: HttpStatusCode.OK, contentType: "text/plain", bodyType: typeof(string), Description = "The OK response")] 41 | [OpenApiResponseWithBody(statusCode: HttpStatusCode.InternalServerError, contentType: "application/json", bodyType: typeof(Exception), Description = "Exception")] 42 | public async Task CreateQuota( 43 | [HttpTrigger(AuthorizationLevel.Function, "post", Route = "Quota/")] HttpRequest req, ILogger log) 44 | { 45 | log.LogInformation($"Entered CreateQuota"); 46 | 47 | try 48 | { 49 | 50 | string requestBody = await new StreamReader(req.Body).ReadToEndAsync(); 51 | 52 | log.LogInformation($"Request Body = {requestBody}"); 53 | 54 | var data = JsonConvert.DeserializeObject(requestBody); 55 | 56 | if (data?.SubscriptionKey is null) 57 | return HttpUtilities.RESTResponse(data?.SubscriptionKey); 58 | 59 | if (data?.ProductName is null) 60 | return HttpUtilities.RESTResponse(data?.ProductName); 61 | 62 | if (data?.Amount is null) 63 | return HttpUtilities.RESTResponse(data?.Amount); 64 | 65 | return HttpUtilities.RESTResponse(await _svc.Create(new QuotaDTO() { Key = data.SubscriptionKey, 66 | Product = data.ProductName, 67 | Value = Convert.ToDecimal(data.Amount), 68 | RateLimitOnCost = data.RateLimitOnCost })); 69 | 70 | } 71 | catch (Exception ex) 72 | { 73 | log.LogError(ex, ex.Message); 74 | return HttpUtilities.RESTResponse(ex); 75 | } 76 | 77 | } 78 | 79 | [FunctionName("Update")] 80 | [OpenApiOperation(operationId: "Update")] 81 | [OpenApiSecurity("function_key", SecuritySchemeType.ApiKey, Name = "code", In = OpenApiSecurityLocationType.Query)] 82 | [OpenApiRequestBody(contentType: "application/json", bodyType: typeof(QuotaEntry), Required = true, Description = "The minimum required parameters")] 83 | [OpenApiResponseWithBody(statusCode: HttpStatusCode.OK, contentType: "text/plain", bodyType: typeof(string), Description = "The OK response")] 84 | [OpenApiResponseWithBody(statusCode: HttpStatusCode.InternalServerError, contentType: "application/json", bodyType: typeof(Exception), Description = "Exception")] 85 | public async Task UpdateQuota( 86 | [HttpTrigger(AuthorizationLevel.Function, "post", Route = "Quota/Update")] HttpRequest req, ILogger log) 87 | { 88 | log.LogInformation($"Entered UpdateQuota"); 89 | 90 | try 91 | { 92 | 93 | string requestBody = await new StreamReader(req.Body).ReadToEndAsync(); 94 | 95 | log.LogInformation($"Request Body = {requestBody}"); 96 | 97 | var quotaObj = JsonConvert.DeserializeObject(requestBody); 98 | 99 | var info = await _parseSvc.Parse(quotaObj); 100 | 101 | return HttpUtilities.RESTResponse(await _svc.Update(info)); 102 | 103 | } 104 | catch (Exception ex) 105 | { 106 | log.LogError(ex, ex.Message); 107 | return HttpUtilities.RESTResponse(ex); 108 | } 109 | 110 | } 111 | 112 | [FunctionName("BudgetAlertEndpoint")] 113 | [OpenApiOperation(operationId: "Budget")] 114 | [OpenApiSecurity("function_key", SecuritySchemeType.ApiKey, Name = "code", In = OpenApiSecurityLocationType.Query)] 115 | [OpenApiRequestBody(contentType: "application/json", bodyType: typeof(string), Required = true, Description = "The minimum required parameters")] 116 | [OpenApiResponseWithBody(statusCode: HttpStatusCode.OK, contentType: "text/plain", bodyType: typeof(string), Description = "The OK response")] 117 | [OpenApiResponseWithBody(statusCode: HttpStatusCode.InternalServerError, contentType: "application/json", bodyType: typeof(Exception), Description = "Exception")] 118 | public async Task BudgetAlertEndpoint( 119 | [HttpTrigger(AuthorizationLevel.Function, "post", Route = "Quota/Budget")] HttpRequest req, ILogger log) 120 | { 121 | log.LogInformation($"Entered BudgetAlertEndpoint"); 122 | 123 | try 124 | { 125 | 126 | string requestBody = await new StreamReader(req.Body).ReadToEndAsync(); 127 | 128 | log.LogInformation($"Request Body = {requestBody}"); 129 | 130 | var alert = JsonConvert.DeserializeObject(requestBody); 131 | 132 | if (alert?.data?.alertContext?.AlertData?.BudgetName is null) { 133 | log.LogError($"Missing Budget Name = {requestBody}"); 134 | return HttpUtilities.RESTResponse(alert?.data?.alertContext?.AlertData?.BudgetName); 135 | } 136 | 137 | return HttpUtilities.RESTResponse(await _svc.BudgetUpdate(new QuotaDTO() { Product = alert.data.alertContext.AlertData.BudgetName, Value = 0M })); 138 | 139 | } 140 | catch (Exception ex) 141 | { 142 | log.LogError(ex, ex.Message); 143 | return HttpUtilities.RESTResponse(ex); 144 | } 145 | 146 | } 147 | 148 | [FunctionName("GetQuotaByKey")] 149 | [OpenApiOperation(operationId: "GetQuotaByKey")] 150 | [OpenApiSecurity("function_key", SecuritySchemeType.ApiKey, Name = "code", In = OpenApiSecurityLocationType.Query)] 151 | [OpenApiParameter(name: "keyId", In = ParameterLocation.Path, Required = true, Type = typeof(string), Description = "The subscription id of the Quota key")] 152 | [OpenApiResponseWithBody(statusCode: HttpStatusCode.OK, contentType: "application/json", bodyType: typeof(KVQuota), Description = "The OK response")] 153 | [OpenApiResponseWithBody(statusCode: HttpStatusCode.InternalServerError, contentType: "application/json", bodyType: typeof(Exception), Description = "Exception")] 154 | public async Task GetQuotaByKey( 155 | [HttpTrigger(AuthorizationLevel.Function, "get", Route = "Quota/{keyId}")] HttpRequest req, string keyId, ILogger log) 156 | { 157 | log.LogInformation($"Entered GetQuotaByKey Key = {keyId}"); 158 | 159 | try 160 | { 161 | req.ToString(); 162 | 163 | if (string.IsNullOrEmpty(keyId)) 164 | return HttpUtilities.RESTResponse(keyId); 165 | 166 | var retVal = await _svc.GetById(keyId); 167 | 168 | log.LogInformation($"returned value from _svc.GetById = {retVal}"); 169 | 170 | return HttpUtilities.RESTResponse(new KVQuota() { SubscriptionKey = keyId, Amount = retVal.ToString() }); 171 | 172 | } 173 | catch (Exception ex) 174 | { 175 | log.LogError(ex, ex.Message); 176 | return HttpUtilities.RESTResponse(ex); 177 | 178 | } 179 | 180 | } 181 | 182 | [FunctionName("GetQuotaHistory")] 183 | [OpenApiOperation(operationId: "GetQuotaHistory")] 184 | [OpenApiSecurity("function_key", SecuritySchemeType.ApiKey, Name = "code", In = OpenApiSecurityLocationType.Query)] 185 | [OpenApiParameter(name: "keyId", In = ParameterLocation.Path, Required = true, Type = typeof(string), Description = "The subscription id of the Quota key")] 186 | [OpenApiResponseWithBody(statusCode: HttpStatusCode.OK, contentType: "application/json", bodyType: typeof(List), Description = "The OK response")] 187 | [OpenApiResponseWithBody(statusCode: HttpStatusCode.InternalServerError, contentType: "application/json", bodyType: typeof(Exception), Description = "Exception")] 188 | public async Task GetQuotaHistory( 189 | [HttpTrigger(AuthorizationLevel.Function, "get", Route = "Quota/{keyId}/history")] HttpRequest req, string keyId, ILogger log) 190 | { 191 | log.LogInformation($"Entered GetQuotaHistory Key = {keyId}"); 192 | 193 | try 194 | { 195 | req.ToString(); 196 | 197 | if (string.IsNullOrEmpty(keyId)) 198 | return HttpUtilities.RESTResponse(keyId); 199 | 200 | var retVal = await _svc.GetHistoryById(keyId); 201 | 202 | return HttpUtilities.RESTResponse(retVal.Select(s => ConvertQuotaEntity(s))); 203 | 204 | } 205 | catch (Exception ex) 206 | { 207 | log.LogError(ex, ex.Message); 208 | return HttpUtilities.RESTResponse(ex); 209 | 210 | } 211 | 212 | } 213 | 214 | [FunctionName("GetAll")] 215 | [OpenApiOperation(operationId: "GetAll")] 216 | [OpenApiSecurity("function_key", SecuritySchemeType.ApiKey, Name = "code", In = OpenApiSecurityLocationType.Query)] 217 | [OpenApiResponseWithBody(statusCode: HttpStatusCode.OK, contentType: "application/json", bodyType: typeof(List), Description = "The OK response")] 218 | [OpenApiResponseWithBody(statusCode: HttpStatusCode.InternalServerError, contentType: "application/json", bodyType: typeof(Exception), Description = "Exception")] 219 | public async Task GetAll( 220 | [HttpTrigger(AuthorizationLevel.Function, "get", Route = "Quota/")] HttpRequest req, ILogger log) 221 | { 222 | log.LogInformation("Entered GetAll"); 223 | 224 | try 225 | { 226 | 227 | 228 | req.ToString(); 229 | 230 | var allQuotas = await _svc.GetAll(); 231 | 232 | var convertedQuotas = allQuotas.Select(x => ConvertQuotaEntity(x)); 233 | 234 | return HttpUtilities.RESTResponse(convertedQuotas); 235 | 236 | } 237 | catch (Exception ex) 238 | { 239 | log.LogError(ex, ex.Message); 240 | return HttpUtilities.RESTResponse(ex); 241 | 242 | } 243 | 244 | } 245 | 246 | [FunctionName("MonthlyReset")] 247 | public void Run([TimerTrigger("1 0 1 1-12 *")] TimerInfo myTimer, ILogger log) 248 | { 249 | log.LogInformation($"C# Timer trigger function executed at: {DateTime.Now}"); 250 | 251 | //Reset limits 252 | } 253 | 254 | private QuotaDetail ConvertQuotaEntity(QuotaEntity quotaEntity) 255 | { 256 | return new QuotaDetail() { SubscriptionKey = quotaEntity.PartitionKey, 257 | ProductName = quotaEntity.ProductName, 258 | Amount = Convert.ToDecimal(quotaEntity.Amount), 259 | Balance = quotaEntity.Balance, 260 | Model = quotaEntity.Model, 261 | Operation = quotaEntity.Operation, 262 | TotalTokens = quotaEntity.TotalTokens, 263 | Timestamp = quotaEntity.Timestamp.Value, 264 | PromptTokens = quotaEntity.PromptTokens, 265 | TransCost = quotaEntity.TransCost, 266 | RowKey = quotaEntity.RowKey }; 267 | 268 | } 269 | } 270 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/Startup.cs: -------------------------------------------------------------------------------- 1 |  2 | using Azure.Core; 3 | using Azure.Data.Tables; 4 | using Microsoft.Azure.Functions.Extensions.DependencyInjection; 5 | using Microsoft.Extensions.Configuration; 6 | using Microsoft.Extensions.DependencyInjection; 7 | using Microsoft.OpenAIRateLimiter.Service; 8 | using System; 9 | using System.Net; 10 | 11 | [assembly: FunctionsStartup(typeof(Microsoft.OpenAIRateLimiter.API.Startup))] 12 | namespace Microsoft.OpenAIRateLimiter.API 13 | { 14 | 15 | public class Startup : FunctionsStartup 16 | { 17 | public override void Configure(IFunctionsHostBuilder builder) 18 | { 19 | var config = new ConfigurationBuilder() 20 | .AddJsonFile("local.settings.json", optional: true, reloadOnChange: true) 21 | .AddEnvironmentVariables() 22 | .Build(); 23 | 24 | builder.Services.AddLogging(); 25 | 26 | builder.Services.AddHttpClient("Tokenizer", httpClient => 27 | { 28 | httpClient.BaseAddress = new Uri(config["TokenizerURL"]); 29 | 30 | httpClient.DefaultRequestHeaders.Add("x-functions-key", config["TokenizerKey"]); 31 | 32 | }); 33 | 34 | builder.Services.AddTransient(); 35 | 36 | builder.Services.AddTransient(); 37 | 38 | builder.Services.AddScoped(); 39 | 40 | builder.Services.AddStackExchangeRedisCache(options => 41 | { 42 | options.Configuration = config.GetConnectionString("RedisConn"); 43 | options.InstanceName = config["RedisInstance"]; 44 | }); 45 | 46 | builder.Services.AddScoped(Provider => 47 | { 48 | return new TableServiceClient(config.GetConnectionString("StorageConn")).GetTableClient(config["TableName"]); 49 | 50 | }); 51 | 52 | //builder.Services.AddTransient(Provider => { 53 | // return new( 54 | // accountEndpoint: Environment.GetEnvironmentVariable("COSMOS_ENDPOINT")!, 55 | // tokenCredential: new DefaultAzureCredential() 56 | // ); 57 | 58 | //}); 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.API/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | } 9 | }, 10 | "logLevel": { 11 | "Microsoft.OpenAIRateLimiter": "Information" 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.Service/Common/HttpUtilities.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using System.Net; 3 | using System.Text; 4 | 5 | 6 | namespace Microsoft.OpenAIRateLimiter.Service.Common 7 | { 8 | public static class HttpUtilities 9 | { 10 | 11 | public static HttpResponseMessage RESTResponse(T model) 12 | { 13 | 14 | HttpStatusCode httpStatusCode; 15 | 16 | if (model is Exception) 17 | httpStatusCode = HttpStatusCode.InternalServerError; 18 | else if (model is null) 19 | httpStatusCode = HttpStatusCode.BadRequest; 20 | else 21 | httpStatusCode = HttpStatusCode.OK; 22 | 23 | return GenerateRequestMessage(model, httpStatusCode); 24 | } 25 | 26 | public static HttpResponseMessage GenerateRequestMessage(T model, HttpStatusCode statusCode) 27 | { 28 | return new HttpResponseMessage(statusCode) { Content = GenerateJSONContent(model) }; 29 | } 30 | 31 | private static StringContent GenerateJSONContent(T model) 32 | { 33 | return new StringContent(JsonConvert.SerializeObject(model, Formatting.Indented), 34 | Encoding.UTF8, 35 | "application/json"); 36 | } 37 | 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.Service/IParseService.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.OpenAIRateLimiter.Service.Models; 2 | 3 | namespace Microsoft.OpenAIRateLimiter.Service 4 | { 5 | public interface IParseService 6 | { 7 | Task Parse(QuotaEntry entry); 8 | } 9 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.Service/IQuotaService.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.OpenAIRateLimiter.Service.Models; 2 | 3 | namespace Microsoft.OpenAIRateLimiter.Service 4 | { 5 | public interface IQuotaService 6 | { 7 | Task Create(QuotaDTO quota); 8 | 9 | Task BudgetUpdate(QuotaDTO quota); 10 | 11 | Task GetById(string key); 12 | 13 | Task Update(QuotaTransDTO quota); 14 | 15 | Task> GetAll(); 16 | 17 | Task> GetHistoryById(string key); 18 | } 19 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.Service/ITokenService.cs: -------------------------------------------------------------------------------- 1 | namespace Microsoft.OpenAIRateLimiter.Service 2 | { 3 | public interface ITokenService 4 | { 5 | decimal CalculateCost(int completionTokens, int promptTokens, string model); 6 | Task GetTokenCount(string prompt, string model); 7 | } 8 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.Service/Microsoft.OpenAIRateLimiter.Service.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net6.0 5 | enable 6 | enable 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.Service/Models/QuotaDTO.cs: -------------------------------------------------------------------------------- 1 | namespace Microsoft.OpenAIRateLimiter.Service.Models 2 | { 3 | public class QuotaDTO 4 | { 5 | public string Key { get; set; } = ""; 6 | 7 | public decimal Value { get; set; } 8 | 9 | public string Product { get; set; } = ""; 10 | 11 | public bool RateLimitOnCost { get; set; } = true; 12 | } 13 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.Service/Models/QuotaEntity.cs: -------------------------------------------------------------------------------- 1 | using Azure; 2 | using Azure.Data.Tables; 3 | using System.Text.Json.Serialization; 4 | 5 | namespace Microsoft.OpenAIRateLimiter.Service.Models 6 | { 7 | public class QuotaEntity : ITableEntity 8 | { 9 | 10 | public string PartitionKey { get; set; } = default!; 11 | 12 | public string RowKey { get; set; } = default!; 13 | 14 | public string ProductName { get; set; } = default!; 15 | 16 | public DateTimeOffset? Timestamp { get; set; } = default!; 17 | 18 | public string Model { get; set; } = default!; 19 | 20 | public int TotalTokens { get; set; } 21 | 22 | public int PromptTokens { get; set; } 23 | 24 | public string Operation { get; set; } = default!; 25 | 26 | public double Amount { get; set; } = default!; 27 | 28 | public string TransCost { get; set; } = default!; 29 | 30 | public string Balance { get; set; } = default!; 31 | 32 | public bool RateLimitOnCost { get; set; } = default!; 33 | 34 | public ETag ETag { get; set; } = default!; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.Service/Models/QuotaEntry.cs: -------------------------------------------------------------------------------- 1 | using System.Text.Json.Serialization; 2 | 3 | namespace Microsoft.OpenAIRateLimiter.Service.Models 4 | { 5 | public class QuotaEntry 6 | { 7 | 8 | [JsonPropertyName("subscriptionKey")] 9 | public string SubscriptionKey { get; set; } = string.Empty; 10 | 11 | [JsonPropertyName("prompt")] 12 | public string Prompt { get; set; } = string.Empty; 13 | 14 | [JsonPropertyName("responseBody")] 15 | public string ResponseBody { get; set; } = string.Empty; 16 | 17 | } 18 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.Service/Models/QuotaTransDTO.cs: -------------------------------------------------------------------------------- 1 | namespace Microsoft.OpenAIRateLimiter.Service.Models 2 | { 3 | public class QuotaTransDTO 4 | { 5 | public string subscription { get; set; } = ""; 6 | 7 | public decimal Value { get; set; } 8 | 9 | public string Model { get; set; } = ""; 10 | 11 | public int PromptTokens { get; set; } 12 | 13 | public int TotalTokens { get; set; } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.Service/ParseService.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.OpenAIRateLimiter.Service.Models; 2 | using Newtonsoft.Json.Linq; 3 | 4 | namespace Microsoft.OpenAIRateLimiter.Service 5 | { 6 | public class ParseService : IParseService 7 | { 8 | private readonly ITokenService _svc; 9 | 10 | public ParseService(ITokenService tokenService) 11 | { 12 | _svc = tokenService; 13 | } 14 | 15 | public async Task Parse(QuotaEntry entry) 16 | { 17 | 18 | var retVal = new QuotaTransDTO(); 19 | 20 | if (entry.SubscriptionKey is null) 21 | throw new Exception("SubscriptionKey is Null"); 22 | 23 | retVal.subscription = entry.SubscriptionKey; 24 | 25 | if (entry.ResponseBody.Contains("data: ")) 26 | { 27 | var splitData = entry.ResponseBody.Split("data: ", StringSplitOptions.RemoveEmptyEntries); 28 | 29 | retVal.TotalTokens = splitData.Length - 1; 30 | 31 | var objRes = GetObject(splitData[0]); 32 | 33 | retVal.Model = objRes["model"]?.ToString() ?? ""; 34 | 35 | retVal.PromptTokens = await _svc.GetTokenCount(entry.Prompt, retVal.Model); 36 | 37 | } 38 | else 39 | { 40 | 41 | var objRes = GetObject(entry.ResponseBody); 42 | 43 | retVal.Model = objRes["model"]?.ToString() ?? ""; 44 | if (!(objRes["usage"]is null)) 45 | { 46 | retVal.TotalTokens = Convert.ToInt32(objRes["usage"]["total_tokens"]); 47 | retVal.PromptTokens = Convert.ToInt32(objRes["usage"]["prompt_tokens"]); 48 | } 49 | } 50 | 51 | retVal.Value = _svc.CalculateCost(retVal.TotalTokens - retVal.PromptTokens, retVal.PromptTokens, retVal.Model); 52 | //retVal.Value = retVal.TotalTokens; 53 | 54 | return retVal; 55 | } 56 | 57 | private JObject GetObject(string value) 58 | { 59 | return JObject.Parse(value); 60 | } 61 | 62 | } 63 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.Service/QuotaService.cs: -------------------------------------------------------------------------------- 1 | using Azure.Data.Tables; 2 | using Microsoft.Extensions.Azure; 3 | using Microsoft.Extensions.Caching.Distributed; 4 | using Microsoft.Extensions.Logging; 5 | using Microsoft.OpenAIRateLimiter.Service.Models; 6 | using System.Diagnostics.CodeAnalysis; 7 | using System.Text; 8 | 9 | namespace Microsoft.OpenAIRateLimiter.Service 10 | { 11 | public class QuotaService : IQuotaService 12 | { 13 | 14 | private readonly IDistributedCache _cache; 15 | 16 | private readonly TableClient _client; 17 | 18 | private readonly ILogger _logger; 19 | 20 | private const string CreateOperation = "Create"; 21 | private const string UpdateOperation = "Update"; 22 | 23 | public QuotaService(IDistributedCache cache, TableClient client, ILogger logger) 24 | { 25 | _cache = cache; 26 | _client = client; 27 | _logger = logger; 28 | } 29 | 30 | public async Task Create(QuotaDTO quota) 31 | { 32 | 33 | await PersisttoCache(quota); 34 | 35 | if(await Exists(quota.Key)) 36 | await PersisttoTable(new QuotaEntity() { PartitionKey = quota.Key, 37 | RowKey = Guid.NewGuid().ToString(), 38 | Operation = "Deposit", 39 | Amount = Convert.ToDouble(quota.Value) }); 40 | else 41 | await PersisttoTable(new QuotaEntity() { PartitionKey = quota.Key, 42 | RowKey = Guid.NewGuid().ToString(), 43 | ProductName = quota.Product, 44 | Operation = CreateOperation, 45 | Amount = Convert.ToDouble(quota.Value), 46 | RateLimitOnCost = quota.RateLimitOnCost }); 47 | return true; 48 | 49 | } 50 | 51 | public async Task BudgetUpdate(QuotaDTO quota) 52 | { 53 | 54 | quota.Key = _client.Query(x => x.ProductName == quota.Product) 55 | .Select(z => z.PartitionKey).FirstOrDefault() ?? ""; 56 | 57 | if (string.IsNullOrEmpty(quota.Key)) 58 | throw new Exception($"PartitionKey not found for product = {quota.Product} "); 59 | 60 | await PersisttoCache(quota); 61 | 62 | await PersisttoTable(new QuotaEntity() { PartitionKey = quota.Key, 63 | RowKey = Guid.NewGuid().ToString(), 64 | ProductName = quota.Product, 65 | Operation = "BudgetStop", 66 | Amount = Convert.ToDouble(quota.Value) }); 67 | 68 | return true; 69 | 70 | } 71 | 72 | public async Task Update(QuotaTransDTO quota) 73 | { 74 | _logger.LogInformation($"Update Value = {quota.Value}; Update Total Tokens = {quota.TotalTokens}"); 75 | var currentAmount = Convert.ToDecimal(await GetById(quota.subscription) ?? 0M); 76 | 77 | var rateLimitValue = await GetSubRateLimitOnCost(quota.subscription) ? quota.Value : quota.TotalTokens ; 78 | 79 | var newQuota = new QuotaDTO() 80 | { 81 | Key = quota.subscription, 82 | Value = (currentAmount - rateLimitValue) > 0M ? currentAmount - rateLimitValue : 0M 83 | }; 84 | 85 | await PersisttoCache(newQuota); 86 | 87 | await PersisttoTable(new QuotaEntity() 88 | { 89 | PartitionKey = newQuota.Key, 90 | RowKey = Guid.NewGuid().ToString(), 91 | Operation = UpdateOperation, 92 | PromptTokens = quota.PromptTokens, 93 | TotalTokens = quota.TotalTokens, 94 | Model = quota.Model, 95 | TransCost = rateLimitValue.ToString(), 96 | Balance = newQuota.Value.ToString() 97 | }); 98 | 99 | return true; 100 | } 101 | 102 | public async Task GetById(string key) 103 | { 104 | return Convert.ToDecimal(await _cache.GetStringAsync(key)); 105 | } 106 | 107 | public async Task> GetAll() 108 | { 109 | var result = new List(); 110 | 111 | var keys = _client.Query(x => x.PartitionKey != "" && x.Operation == CreateOperation).ToList(); 112 | 113 | await new TaskFactory().StartNew(() => { 114 | keys.ForEach( x => 115 | { 116 | x.Balance = GetById(x.PartitionKey).Result.ToString() ?? "0"; 117 | x.TotalTokens = CalculateTokenUsage(x.PartitionKey); 118 | }); }); 119 | 120 | return keys; 121 | 122 | } 123 | 124 | public async Task> GetHistoryById(string key) 125 | { 126 | return await GetallRecords(key); 127 | } 128 | 129 | #region Private Methods 130 | 131 | private int CalculateTokenUsage(string subscription) 132 | { 133 | var amount = _client.Query(x => x.PartitionKey == subscription).Sum(s => s.TotalTokens); 134 | 135 | return amount; 136 | 137 | } 138 | 139 | private async Task PersisttoCache(QuotaDTO quota) 140 | { 141 | await _cache.SetAsync(quota.Key, Encoding.UTF8.GetBytes(quota.Value.ToString())); 142 | } 143 | 144 | private async Task PersisttoTable(QuotaEntity entity) 145 | { 146 | await _client.AddEntityAsync(entity); 147 | } 148 | 149 | private async Task> GetallRecords(string subscriptionKey) 150 | { 151 | return await new TaskFactory().StartNew(() => { return _client.Query(x => x.PartitionKey == subscriptionKey).ToList(); }); 152 | } 153 | 154 | private async Task GetSubRateLimitOnCost(string subscriptionKey) 155 | { 156 | 157 | var Retval = true; 158 | 159 | await foreach (var q in _client.QueryAsync(x => x.PartitionKey == subscriptionKey && x.Operation == CreateOperation, 1)) 160 | { 161 | Retval = q?.RateLimitOnCost ?? true; 162 | break; 163 | } 164 | 165 | return Retval; 166 | 167 | } 168 | 169 | private async Task Exists(string subscriptionKey) 170 | { 171 | 172 | var Retval = false; 173 | 174 | await foreach (var q in _client.QueryAsync(x => x.PartitionKey == subscriptionKey, 1)) 175 | { 176 | Retval = true; 177 | break; 178 | } 179 | 180 | return Retval; 181 | 182 | } 183 | 184 | #endregion 185 | 186 | } 187 | } -------------------------------------------------------------------------------- /Microsoft.OpenAIRateLimiter.Service/TokenService.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System.Data; 4 | using System.Text; 5 | 6 | namespace Microsoft.OpenAIRateLimiter.Service 7 | { 8 | public class TokenService : ITokenService 9 | { 10 | private readonly HttpClient _client; 11 | 12 | public TokenService(IHttpClientFactory httpClientFactory) 13 | { 14 | 15 | _client = httpClientFactory.CreateClient("Tokenizer"); 16 | } 17 | 18 | public async Task GetTokenCount(string prompt, string model) 19 | { 20 | 21 | var payload = new { input = prompt, model = GetModel(model) }; 22 | 23 | HttpContent c = new StringContent(JsonConvert.SerializeObject(payload), Encoding.UTF8, "application/json"); 24 | 25 | var resp = await _client.PostAsync("/api/tokenize", c); 26 | 27 | var body = await resp.Content.ReadAsStringAsync(); 28 | 29 | return Convert.ToInt32(JObject.Parse(body)["num_tokens"]); 30 | 31 | } 32 | 33 | public decimal CalculateCost(int completionTokens, int promptTokens, string model) 34 | { 35 | 36 | decimal retVal = 0M; 37 | 38 | //var dt = new DataTable(); 39 | //var v = dt.Compute($"({totalToken} / 1000) * .002", ""); 40 | 41 | switch (model.Trim().ToLower()) 42 | { 43 | case "gpt-35-turbo": 44 | retVal = Convert.ToDecimal(promptTokens) / 1000M * .0015M; 45 | retVal += Convert.ToDecimal(completionTokens) / 1000M * .002M; 46 | break; 47 | 48 | case "gpt-4": 49 | retVal = Convert.ToDecimal(promptTokens) / 1000M * .03M; 50 | retVal += Convert.ToDecimal(completionTokens) / 1000M * .06M; 51 | break; 52 | 53 | case "gpt-4-32k": 54 | retVal = Convert.ToDecimal(promptTokens) / 1000M * .06M; 55 | retVal += Convert.ToDecimal(completionTokens) / 1000M * .12M; 56 | break; 57 | 58 | default: 59 | retVal = Convert.ToDecimal(promptTokens) / 1000M * .003M; 60 | retVal += Convert.ToDecimal(completionTokens) / 1000M * .004M; 61 | break; 62 | } 63 | 64 | return retVal; 65 | } 66 | 67 | #region Private 68 | 69 | private string GetModel(string model) 70 | { 71 | var retVal = ""; 72 | 73 | switch (model.Trim().ToLower()) 74 | { 75 | case "gpt-35-turbo": 76 | retVal = "gpt-3.5-turbo"; 77 | break; 78 | 79 | case "gpt-4": 80 | retVal = "gpt-4"; 81 | break; 82 | 83 | default: 84 | break; 85 | } 86 | 87 | return retVal; 88 | } 89 | 90 | #endregion 91 | } 92 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Open AI Cost Gateway Pattern 2 | 3 | Real-Time Capabilities:
4 | Track Spending & Tokens By Product (Cost Chargeback) for every Request, incliding streaming
5 | Rate Limit By Product based on spending Limits (default) or Tokens (429 Rate Limiting Response when Spending/Token limit has been reached )
6 | 7 | 8 | ## Architecture 9 | 10 | 11 | ![AI Cost Gateway](https://github.com/ThePreston/Custom-Rate-Limiter-API/assets/84995595/c0992f84-5d3b-4799-9d87-b3e0e82fcb21) 12 | 13 | 14 | 15 | ## Open AI Service, Real-Time Cost Tracking And Rate Limiting Per HTTP Request (by Product) 16 | 17 | 18 | ![Picture1](https://github.com/ThePreston/Custom-Rate-Limiter-API/assets/84995595/1a27d263-f69e-41c0-9f30-7fb9e5d23cf7) 19 | 20 | 21 |
22 | 23 | ## Addtional Capabilities - Any Service, Rate Limiting based on Budget (by Product) and Event Hub Logging 24 | 25 | Additional Capabilities:
26 | Rate Limiting based on Budget Alerts
27 | Logging via Event Hubs to Data Lake Hub
28 | 29 | 30 | ![Picture2](https://github.com/ThePreston/Custom-Rate-Limiter-API/assets/84995595/8e335ce5-f484-4b39-85f7-6b4accae5d4a) 31 | 32 | 33 |
34 | 35 | 36 | 37 | ## High Level Architecture of all Features in the repo 38 | 39 |
40 | Open AI Transactional Cost Tracking and Rate limiting
41 | Budget Alert Rate Limiting
42 | Event Hub Logging
43 | 44 |
45 | 46 | 47 | ![AI Gateway](https://github.com/ThePreston/Custom-Rate-Limiter-API/assets/84995595/cc3d5d63-0df0-43b9-923a-7a1a32da487d) 48 | 49 | 50 | ## Streaming Capabilities 51 | Streaming responses do not include Token Information, that must be calculated
52 | Prompt Tokens are calcuated using Additional Python Function API wrapper that uses TikToken :
53 | 54 | https://github.com/awkwardindustries/dossier/tree/main/samples/open-ai/tokenizer/azure-function-python-v2 55 | 56 | 57 | ## Methods 58 | 59 | 1) Create 60 | 2) Update 61 | 3) Budget Alert Endpoint 62 | 4) GetAll 63 | 5) GetById 64 | 65 | 66 |
67 | 68 | 69 | ## AOAI Swagger 70 | 71 | Repo:
72 | [azure-rest-api-specs/specification/cognitiveservices/data-plane/AzureOpenAI/](https://github.com/Azure/azure-rest-api-specs/tree/main/specification/cognitiveservices/data-plane/AzureOpenAI) 73 | 74 | JSON Repo: 75 | https://github.com/Azure/azure-rest-api-specs/blob/main/specification/cognitiveservices/data-plane/AzureOpenAI/inference/stable/2023-05-15/inference.json 76 | 77 | JSON File URI: 78 | https://raw.githubusercontent.com/Azure/azure-rest-api-specs/main/specification/cognitiveservices/data-plane/AzureOpenAI/inference/stable/2023-05-15/inference.json 79 | 80 |
81 | 82 | 83 | ## Budget Alerts 84 | 85 | Latency:
86 | Cost and usage data is typically available within 8-24 hours and budgets are evaluated against these costs every 24 hours. 87 | 88 |
89 | Documentation:
90 | https://learn.microsoft.com/en-us/azure/cost-management-billing/costs/tutorial-acm-create-budgets 91 | 92 | 93 | ## FAQ 94 | 95 | Cost API:
96 | Attempted this but Proved to be Overly Complicated. Cost and usage data is typically available within 8-24 hours. 97 | would have to create a polling mechanism to call Cost API for each resource to be monitored 98 |
99 | 100 | Streaming Responses:
101 | when "Stream" : true added to JSON payload, No Token information is provided by Open AI Service.
102 | Prompt Tokens are calculated using a Python Function (PyTokenizer) that wraps a BPE Tokenizer library TikToken
103 | Completion Tokens are calculated by counting the SSE responses and subtracting 2
104 | 105 | 106 | Granularity of Cost Tracking:
107 | Solution uses APIM Product Subscription Keys but can also be used against individual ID's, header value, etc 108 | --------------------------------------------------------------------------------