├── .gitignore ├── .vscode ├── launch.json └── tasks.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── aisentry ├── adapters │ ├── Api_Request_Transformer.py │ ├── SampleAdapter │ │ └── sample_api_request_transformer.py │ └── adapters.py ├── asgi_facade.py ├── asgi_summary_worker.py ├── asgi_worker.py ├── dapr.yaml ├── facade │ ├── __init__.py │ ├── app.py │ └── req.txt ├── requirements.txt ├── utils │ ├── __init__.py │ ├── ai_sentry_helpers.py │ ├── analyze_pii.py │ ├── analyze_pii_chunked_ta.py │ ├── analyze_pii_openai.py │ ├── aoai_streaming_response.py │ ├── approaches │ │ ├── approach.py │ │ ├── headerselector.py │ │ └── randomallocation.py │ ├── auth_helpers.py │ ├── combined-cert.crt │ └── combined_cert.pem └── worker │ ├── cosmos_logger │ ├── __init__.py │ └── cosmos_logger.py │ ├── loganalytics_logger │ ├── __init__.py │ └── loganalytics_logger.py │ ├── requirements.txt │ └── usage_summary │ ├── __init__.py │ ├── requirements.txt │ └── usage_logger.py ├── build ├── Dockerfile.facade ├── Dockerfile.worker ├── DockerfileSummary.worker └── build-ai-sentry-containers.ps1 ├── content ├── documentation │ ├── ACADeployment.md │ ├── AI-Sentry-config-settings.md │ ├── AKSDeployment.md │ ├── AzureInfrastrcuture.md │ ├── ComsosDB-LoggingSchema.md │ ├── CosmosDBSetup.md │ ├── LocalDebugging.md │ ├── LocalDeployment.md │ ├── SummaryLog-schema.md │ ├── Workload-identity-config.md │ └── ai-sentry-config.json └── images │ ├── AI-Sentry-AKS-view.drawio.png │ ├── AI-Sentry-HighLevel.drawio.png │ ├── AI-Sentry-features.png │ ├── AI-Sentry.drawio │ ├── cosmosdb_request_logs.png │ ├── cosmosdb_summary_logs.png │ ├── openai_rbac.png │ └── openai_rbac2.png ├── deploy ├── aks │ ├── ai-sentry-deployment.yaml │ └── namespace.yaml └── local │ └── components │ ├── cosmosdb.yaml │ └── pubsub.yaml ├── infrastructure ├── APIM │ └── ai-sentry-policy.xml ├── bicepconfig.json ├── deploy.ps1 ├── main.bicep ├── main.param.json └── open-ai │ └── main.bicep ├── requirements.txt ├── scripts ├── create-escaped-json.ps1 ├── setup-env.ps1 └── setup-env.sh └── tests ├── http ├── .env.sample ├── adapter_test-ai-sentry.http ├── adapter_test-apim.http ├── adapter_test.http ├── get-thread.http ├── list_assistants.http └── non_streaming_embedding.http ├── loadTests ├── embeddings │ └── locust.py ├── non-streaming │ └── locustfile.py └── streaming │ └── locustfile.py └── sdk ├── embedding.py └── stream_completion.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | #dapr logs 163 | .log 164 | .DS_Store 165 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "type": "debugpy", 9 | "request": "launch", 10 | "name": "AI-SENTRY Facade", 11 | "program": "${workspaceFolder}/aisentry/asgi_facade.py", 12 | "console": "integratedTerminal", 13 | "preLaunchTask": "facadeapp-up", 14 | "postDebugTask": "facadeapp-down" 15 | }, 16 | { 17 | "type": "debugpy", 18 | "request": "launch", 19 | "name": "comsosDB logger with Dapr", 20 | "program": "${workspaceFolder}/aisentry/worker/cosmos_logger.py", 21 | "console": "integratedTerminal", 22 | "preLaunchTask": "logger-up", 23 | "postDebugTask": "logger-down" 24 | } 25 | ] 26 | } -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "label": "facadeapp-up", 6 | "type": "daprd", 7 | "appId": "facadeapp", 8 | "metricsPort": 9090, 9 | "enableHealthCheck": false 10 | }, 11 | { 12 | "label": "facadeapp-down", 13 | "type": "daprd-down", 14 | "appId": "facadeapp" 15 | }, 16 | { 17 | "label": "logger-up", 18 | "type": "daprd", 19 | "appId": "aoai-cosmosdb-logging-processor", 20 | "metricsPort": 9091 21 | }, 22 | { 23 | "label": "logger-down", 24 | "type": "daprd-down", 25 | "appId": "aoai-cosmosdb-logging-processor" 26 | } 27 | ] 28 | } 29 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | If you would like to contribute to the solution solution accelerator repository there are many ways you can help. 4 | 5 | ## Reporting issues 6 | 7 | We use [GitHub issues](https://github.com/microsoft/ai-sentry/issues) as an issue tracker for the repository. Firstly, please search in open issues and try to make sure your problem doesn't exist. If there is an issue, add your comments to this issue. 8 | If there are no issues yet, please open a new one. 9 | 10 | ## Contributing Code 11 | 12 | If you would like to contribute an improvement or a fix please create a Pull Request using the steps below. Ensure to describe the feature or bug as part of the pull request. 13 | 14 | ## Sending a Pull Request 15 | 16 | Before submitting a pull request please make sure the following is done: 17 | 18 | 1. Fork [the repository](https://github.com/microsoft/aicentral) 19 | 2. Create a branch from the `main` 20 | 3. Ensure that the code build and runs without any errors 21 | 4. If required update the README.md 22 | 5. Complete the [CLA](#contributor-license-agreement-cla) 23 | 24 | ### Code of Conduct 25 | 26 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 27 | 28 | ### Contributor License Agreement (CLA) 29 | 30 | You will need to complete a Contributor License Agreement (CLA). Briefly, this agreement testifies that you are granting us permission to use the submitted change according to the terms of the project's license, and that the work being submitted is under appropriate copyright. 31 | 32 | Please submit a Contributor License Agreement (CLA) before submitting a pull request. You may visit [https://cla.microsoft.com](https://cla.microsoft.com) to sign digitally. Alternatively, download the agreement ([Microsoft Contribution License Agreement.docx](https://www.codeplex.com/Download?ProjectName=typescript&DownloadId=822190)), sign, scan, and email it back to . Be sure to include your github user name along with the agreement. Once we have received the signed CLA, we'll review the request. 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI-Sentry Facade 2 | 3 | 4 | ![AI-Sentry-features image](/content/images/AI-Sentry-features.png) 5 | 6 | *Ai-Sentry* is transparent python + DAPR based pluggable Generative AI Facade layer, designed to support the following features for large enterprises developing and operating Generative AI solutions: 7 | 8 | - Cross charge back on token usage across different openAI consumers 9 | - Request/Response async based logging with ability to toggle PII stripping of information. This level of logging is useful for many things such as legal compliance as well as assessing and replaying back request/responses against newer models to help you deal with model upgrades without affecting your existing users. 10 | - Smarter load balancing by taking into account Azure openAI's response header load metrics and pooling of multi backends with same model capabilities 11 | - Support streaming and non streaming responses (including logging of these) 12 | - Extensibility of custom adapters to help you deal with SDK / API deprecations from client side - so you can provide backwards compatibility if needed. 13 | 14 | 15 | AI-Sentry is not designed to replace existing API Gateway solutions such as Azure APIM - rather it is designed to sit between API Gateway and the openAI endpoints - providing ultimate control for your openAI solutions. 16 | 17 | We try to perform heavy processing outside of the direct HTTP calls pipeline to minimise latency to the consumers and rely on DAPR side cars and Pub / Sub patterns to perform the work asynchronously. 18 | 19 | Because AI-Sentry uses DAPR; the technology choices for log persistence, and message brokers is swappable out via DAPR's native [components](https://docs.dapr.io/concepts/components-concept/). Our example uses REDIS and Event Hubs as the message broker for PUB/SUB, and CosmosDB as the Log persistence store. 20 | 21 | ## High Level Design 22 | 23 | ![ISentryHighLevel image](/content/images/AI-Sentry-HighLevel.drawio.png) 24 | 25 | 26 | 27 | ## Backend Configuration 28 | 29 | The following environment variables need to exist. How you feed them in is up to you - i.e. Kubernetes secrets, configmaps, etc... 30 | 31 | | Name | Value | Component | 32 | | -------- | -------- | -------- | 33 | | AI-SENTRY-ENDPOINT-CONFIG | Example JSON value is located [here](/content/documentation/ai-sentry-config.json). This is used to map openai endpoints / deployments - so that when we are load balancing we are hitting group of same openAI models from the pool. Make sure to include /openai in your endpoint url configuration. You can leverage the following [script](scripts/create-escaped-json.ps1) to help you generate JSON escaped string of this JSON.|Facade App | 34 | |AI-SENTRY-LANGUAGE-KEY| your Congnitive Services General API Key| CosmosDB Worker | 35 | |AI-SENTRY-LANGUAGE-ENDPOINT| your language text anlaytics or general service endpoint url| CosmosDB Worker | 36 | 37 | 38 | ## Consumer Configuration 39 | 40 | Whatever you front AI-Sentry with e.g. Azure APIM, some other API gateway technology - you will need to supply some mandatory HTTP headers. 41 | 42 | |HTTP HEADER NAME| HTTP HEADER VALUE| 43 | | -------- | --------| 44 | |ai-sentry-consumer| this can be any string - it is used to represent a consumer or a product that uses generative ai backend. We use this for logging purposes| 45 | | ai-sentry-log-level | This toggles logging level for the actual consumer. Accepted values are: COMPLETE, PII_STRIPPING_ENABLED or DISABLED | 46 | |ai-sentry-backend-pool| Provide the name of the pool from the AI-SENTRY-ENDPOINT-CONFIG configuration. E.g. Pool1| 47 | |ai-sentry-adapters| Provide list of adapter names you want to run through prior to sending out the request to openai endpoint. Example: ```["SampleApiRequestTransformer","adapter2..."]``` 48 | 49 | ## Getting started 50 | 51 | For more information on setting up AI-Sentry in your environment please follow the following detailed sections. 52 | 53 | - [Setting up CosmosDB dbs/table](/content/documentation/CosmosDBSetup.md) 54 | 55 | - [Setting up AI-Sentry on AKS](/content/documentation/AKSDeployment.md) 56 | 57 | - [CosmosDB Logging Schema](/content/documentation/ComsosDB-LoggingSchema.md) 58 | 59 | - [Summary Logging Schema](/content/documentation/SummaryLog-schema.md) 60 | 61 | - [Setting up workload identity - if you want to auth to openai backends via JWT instead of api keys](/content/documentation/Workload-identity-config.md) 62 | 63 | 64 | ## Looking for dotnet based solution? 65 | 66 | Thankfully our colleauge Graeme Foster has published a dotnet version with similar feature sets. Please go and check it out: https://github.com/microsoft/aicentral 67 | 68 | ## Contributing 69 | 70 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 71 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 72 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 73 | 74 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 75 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 76 | provided by the bot. You will only need to do this once across all repos using our CLA. 77 | 78 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 79 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 80 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 81 | 82 | ## Trademarks 83 | 84 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 85 | trademarks or logos is subject to and must follow 86 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 87 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 88 | Any use of third-party trademarks or logos are subject to those third-party's policies. 89 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Support 2 | 3 | ## How to file issues and get help 4 | 5 | We use [GitHub issues](https://github.com/microsoft/ai-sentry/issues) as an issue tracker for the repository. Firstly, please search in open issues and try to make sure your problem doesn't exist. If there is an issue, add your comments to this issue. 6 | If there are no issues yet, please open a new one. 7 | 8 | ## Microsoft Support Policy 9 | 10 | Support for AICentral is limited to the resources listed above. 11 | -------------------------------------------------------------------------------- /aisentry/adapters/Api_Request_Transformer.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | class ApiRequestTransformer(ABC): 4 | @abstractmethod 5 | def transform_path(self, path): 6 | pass 7 | 8 | @abstractmethod 9 | def transform_method(self, method): 10 | pass 11 | 12 | @abstractmethod 13 | def transform_body(self, body): 14 | pass 15 | 16 | @abstractmethod 17 | def transform_query_string(self, query_string): 18 | pass 19 | 20 | @abstractmethod 21 | def transform_headers(self, headers): 22 | pass 23 | 24 | # @abstractmethod 25 | # def perform_transformations(self, path, method, body, query_string, headers): 26 | # pass -------------------------------------------------------------------------------- /aisentry/adapters/SampleAdapter/sample_api_request_transformer.py: -------------------------------------------------------------------------------- 1 | from adapters.Api_Request_Transformer import ApiRequestTransformer 2 | import httpx 3 | from typing import Optional 4 | 5 | class SampleApiRequestTransformer(ApiRequestTransformer): 6 | def __init__(self, request: httpx.Request): 7 | self.request = request 8 | 9 | def transform_body(self, body: Optional[str] = None): 10 | # Implement your transformation logic here 11 | return body 12 | 13 | def transform_query_string(self, query_string: Optional[str] = None): 14 | # Implement your transformation logic here 15 | return query_string 16 | 17 | def transform_headers(self, headers: Optional[dict] = None): 18 | # Implement your transformation logic here 19 | if headers is None: 20 | headers = {} 21 | headers['Sample-Api-Request-Header'] = 'SAMPLE VALUE' 22 | return headers 23 | 24 | def transform_method(self, method: Optional[str] = None): 25 | # Implement your transformation logic here 26 | return method 27 | 28 | def transform_path(self, path: Optional[str] = None): 29 | # Implement your transformation logic here 30 | return path -------------------------------------------------------------------------------- /aisentry/adapters/adapters.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from httpx import Request 3 | from adapters.SampleAdapter.sample_api_request_transformer import SampleApiRequestTransformer 4 | 5 | 6 | class AdapterEnum(Enum): 7 | SampleApiRequestTransformer = 'SampleApiRequestTransformer' 8 | Adapter2 = 'Adapter2' 9 | Adapter3 = 'Adapter3' 10 | 11 | def return_adapter(request: Request, adapter): 12 | adapter_enum = adapter 13 | if adapter_enum == AdapterEnum.SampleApiRequestTransformer.value: 14 | return SampleApiRequestTransformer(request) 15 | # elif adapter_enum == AdapterEnum.Adapter2.value: 16 | # return Adapter2() 17 | # elif adapter_enum == AdapterEnum.Adapter3.value: 18 | # return Adapter3() 19 | else: 20 | raise ValueError(f"Invalid adapter enum: {adapter_enum}") -------------------------------------------------------------------------------- /aisentry/asgi_facade.py: -------------------------------------------------------------------------------- 1 | from facade import app 2 | 3 | if __name__ == "__main__": 4 | #app.run() 5 | app.run(host='0.0.0.0', port=6124) -------------------------------------------------------------------------------- /aisentry/asgi_summary_worker.py: -------------------------------------------------------------------------------- 1 | import os 2 | from worker.usage_summary import usage_logger 3 | 4 | 5 | app_port = os.getenv('USAGE_WORKER_PORT', '7001') 6 | if __name__ == "__main__": 7 | usage_logger.run(host='0.0.0.0', port=7001) -------------------------------------------------------------------------------- /aisentry/asgi_worker.py: -------------------------------------------------------------------------------- 1 | import os 2 | from worker.cosmos_logger import cosmos_logger 3 | 4 | app_port = os.getenv('COSMOSWORKER_PORT', '7000') 5 | if __name__ == "__main__": 6 | cosmos_logger.run(host='0.0.0.0', port=7000) -------------------------------------------------------------------------------- /aisentry/dapr.yaml: -------------------------------------------------------------------------------- 1 | version: 1 2 | common: 3 | resourcesPath: ../deploy/local/components 4 | apps: 5 | - appID: ai-sentry-facade-entry 6 | appDirPath: ./ 7 | enableAppHealthCheck: false 8 | appPort: 6124 9 | command: ["python3", "asgi_facade.py"] 10 | 11 | - appID: aoai-cosmosdb-logging-processor 12 | appDirPath: ./ 13 | appPort: 6001 14 | command: ["python3", "asgi_worker.py"] 15 | 16 | - appID: aoai-cosmosdb-summary-logging-processor22 17 | appDirPath: ./ 18 | appPort: 6002 19 | command: ["python3", "asgi_summary_worker.py"] -------------------------------------------------------------------------------- /aisentry/facade/__init__.py: -------------------------------------------------------------------------------- 1 | from .app import app 2 | 3 | __all__ = [ 4 | "app" 5 | ] 6 | 7 | -------------------------------------------------------------------------------- /aisentry/facade/app.py: -------------------------------------------------------------------------------- 1 | \ 2 | import logging 3 | import uuid 4 | from datetime import datetime 5 | from dapr.clients import DaprClient 6 | from azure.identity import DefaultAzureCredential 7 | import httpcore 8 | from enum import Enum 9 | from typing import Tuple 10 | from quart import Quart, jsonify, request, make_response 11 | from quart.helpers import stream_with_context 12 | from urllib.request import urlopen 13 | from urllib.parse import urljoin 14 | from datetime import datetime, timezone 15 | import httpx 16 | from requests.exceptions import HTTPError 17 | import jwt 18 | import json 19 | from dotenv import load_dotenv 20 | import os 21 | import tiktoken 22 | from utils.ai_sentry_helpers import select_pool, init_endpoint_stats, getNextAvailableEndpointInfo, AISentryHeaders, openAILogObject,Usage, num_tokens_from_string 23 | from adapters.adapters import return_adapter 24 | 25 | # initial setup for logging / env variable loading 26 | log_level = os.getenv('LOG_LEVEL', 'INFO').upper() 27 | 28 | logger = logging.getLogger(__name__) 29 | logging.basicConfig(level=getattr(logging, log_level), 30 | format='%(asctime)s - %(levelname)s - %(message)s', 31 | datefmt='%d-%m-%Y %H:%M:%S' 32 | ) 33 | load_dotenv(".env", override=True) 34 | 35 | logger.info("Starting Ai-Sentry Facade app") 36 | app = Quart(__name__) 37 | 38 | 39 | # Setup openAI Endpoints 40 | endpoint_info = os.getenv('AI-SENTRY-ENDPOINT-CONFIG') 41 | if endpoint_info is None: 42 | raise ValueError("AI-SENTRY-ENDPOINT-CONFIG environment variable is not set") 43 | 44 | 45 | logger.debug(f"AI-SENTRY-ENDPOINT-CONFIG value: {endpoint_info}") 46 | 47 | # Convert the JSON string back to a Python object 48 | endpoint_data = json.loads(endpoint_info) 49 | 50 | 51 | 52 | open_ai_endpoint_availability_stats = init_endpoint_stats(endpoint_data) 53 | logger.info(f"Configured the following openAiEndpoints: {open_ai_endpoint_availability_stats}") 54 | 55 | # Initialise DaprClient globally 56 | daprClient = DaprClient() 57 | 58 | streaming_completion_token_count=0 59 | streaming_prompt_token_count=0 60 | model_name="" 61 | openai_response_id="" 62 | 63 | @app.route('/liveness', methods=['GET']) 64 | async def kubeliveness(): 65 | return jsonify(message="Kubernetes Liveness check") 66 | 67 | @app.route('/dapr/health', methods=['GET']) 68 | async def dapr_health_check(): 69 | return '', 200 70 | 71 | # Service unavailable 72 | # return '', 503 73 | 74 | 75 | @app.route('/dapr/config', methods=['GET']) 76 | async def dapr_config(): 77 | return '', 200 78 | 79 | 80 | 81 | @app.route('/openai/', methods=['GET', 'POST', 'PUT', 'DELETE']) 82 | async def catch_all(path): 83 | 84 | # Get the original request method, headers, body and parameters 85 | method = request.method 86 | original_headers = request.headers 87 | params = request.args 88 | body = None 89 | body = await request.get_data() 90 | 91 | # Request Processed variable 92 | request_processed = False 93 | max_retries = 3 94 | current_retry = 0 95 | 96 | # pull out the ai-sentry specific headers - we use them further for worker processing options. 97 | ai_sentry_headers = AISentryHeaders() 98 | ai_sentry_headers_used = ai_sentry_headers.validate_headers(original_headers.items()) 99 | logger.info(f"ai-sentry headers used: {ai_sentry_headers_used}") 100 | 101 | pool_name = ai_sentry_headers_used.get('ai-sentry-backend-pool', None) 102 | ai_sentry_adapters = ai_sentry_headers_used.get('ai-sentry-adapters', None) 103 | x_aisentry_correlation = ai_sentry_headers_used.get('x-aisentry-correlation', "00000000-0000-0000-0000-000000000000") 104 | logger.info(f"correlation id used: {x_aisentry_correlation}") 105 | 106 | logger.info(f"ai-sentry adapters used: {ai_sentry_adapters}") 107 | 108 | ai_sentry_adapters_json = json.loads(ai_sentry_adapters) 109 | logger.info(f"Selected pool name: {pool_name}") 110 | 111 | # Create a new set of headers that exclude the ai-sentry specific headers which we will forward onto openAI endpoints 112 | exclude_headers = ['host', 'content-length']+list(ai_sentry_headers_used) 113 | openAI_request_headers = {k: v for k, v in original_headers.items() if k.lower() not in exclude_headers} 114 | 115 | pool_endpoints = select_pool(open_ai_endpoint_availability_stats, pool_name) 116 | 117 | #strip api-key value if it is in use 118 | pool_endpoints_without_api_key = [{k: v for k, v in endpoint.items() if k != 'api-key'} for endpoint in pool_endpoints] 119 | logger.info(f"Selected pool: {pool_endpoints_without_api_key}") 120 | 121 | while not request_processed and current_retry <= max_retries: 122 | logger.info(f"Processing request retry#: {current_retry}") 123 | endpoint_info = await getNextAvailableEndpointInfo(pool_endpoints) 124 | client = endpoint_info["client"] 125 | 126 | 127 | # if openAI_request_headers.get('Api-Key') is not None: 128 | # logger.info("detected use of api-key header - will use this for authentication") 129 | # logger.debug(f"Swapping out api-key inside header with {endpoint_info['api-key']} value") 130 | # openAI_request_headers['Api-Key'] = endpoint_info['api-key'] 131 | 132 | if endpoint_info['api-key'] is not None: 133 | logger.info("No api-key header detected - will use the default api-key for authentication") 134 | openAI_request_headers['Api-Key'] = endpoint_info['api-key'] 135 | 136 | else: 137 | logger.info("No api-key config detected - will use oAuth to talk to openAI backend services.") 138 | #Get Access Token from workload identity 139 | credential = DefaultAzureCredential() 140 | token = credential.get_token("https://cognitiveservices.azure.com/.default") 141 | openAI_request_headers['Authorization'] = f"Bearer {token.token}" 142 | 143 | 144 | decoded_body = body.decode('UTF-8').strip() 145 | json_body = None 146 | 147 | if not decoded_body: 148 | logger.info("Received an empty or None body") 149 | # Handle the empty or None body case here 150 | json_body = {} 151 | else: 152 | json_body = json.loads(decoded_body) 153 | 154 | 155 | object_value = json_body.get("object") 156 | 157 | if object_value == "assistant": 158 | logger.info("Detected assistant request") 159 | prompt_contents = None 160 | prompt_contents_string = None 161 | 162 | if 'messages' in json_body: 163 | prompt_contents = json_body['messages'] 164 | prompt_contents_string = json.dumps(prompt_contents) 165 | 166 | else: 167 | logger.info("Messages not found in json_body, assuming the request is an embedding request") 168 | prompt_contents= json_body.get('input') 169 | prompt_contents_string = json.dumps(prompt_contents) 170 | 171 | 172 | 173 | is_stream_request = False 174 | if json_body.get('stream') is True: 175 | logger.info("Detected stream request") 176 | is_stream_request = True 177 | 178 | 179 | 180 | # Create a httpx Request object 181 | timeout = httpx.Timeout(timeout=5.0, read=60.0) 182 | 183 | # Apply the adapter transformation logic one by one 184 | for adapter in ai_sentry_adapters_json: 185 | logger.info(f"Applying transformation logic for adapter: {adapter}") 186 | try: 187 | adapter_instance = return_adapter(request, adapter) 188 | except Exception as e: 189 | logger.error(f"({x_aisentry_correlation}) #1 Error loading adapter: {adapter} - {e}") 190 | return jsonify(error=str(e)), 500 191 | path = adapter_instance.transform_path(path) 192 | method = adapter_instance.transform_method(method) 193 | body = adapter_instance.transform_body(body) 194 | params = adapter_instance.transform_query_string(params) 195 | openAI_request_headers = adapter_instance.transform_headers(openAI_request_headers) 196 | logger.info(f"Transformation logic applied for adapter: {adapter}") 197 | 198 | req = client.build_request(method, path, content=body, headers=openAI_request_headers, params=params, timeout=timeout) 199 | 200 | logger.info(f"Forwarding {method} request to {req.url}") 201 | 202 | # Handle streaming and non-streaming responses 203 | if is_stream_request: 204 | try: 205 | response = await client.send(req, stream=True) 206 | # potentially recieve a timeout or a HTTP > 499 207 | response.raise_for_status() 208 | current_retry += 1 209 | 210 | except httpcore.ConnectTimeout as timeout_err: 211 | logger.error(f"({x_aisentry_correlation}) #2 Connection timed out: {timeout_err}") 212 | return jsonify(error=str(timeout_err)), 500 213 | 214 | except HTTPError as http_err: 215 | logger.info(f"HTTP error occurred: {http_err}") 216 | if http_err.response.status_code == 429: # 429 is the status code for Too Many Requests 217 | logger.info(f"Received 429 response from endpoint, retrying next available endpoint") 218 | current_retry += 1 219 | endpoint_info["connection_errors_count"]+=1 220 | request_processed = False 221 | continue 222 | 223 | except Exception as e: 224 | # Connection Failures 225 | logger.error(f"({x_aisentry_correlation}) #3 An unexpected error occurred: {e}") 226 | 227 | if "429 Too Many Requests" in str(e): 228 | logger.info(f"Received 429 response from endpoint, retrying next available endpoint") 229 | current_retry += 1 230 | endpoint_info["connection_errors_count"]+=1 231 | request_processed = False 232 | continue 233 | 234 | return jsonify(error=str(e)), 500 235 | 236 | 237 | @stream_with_context 238 | async def stream_response(response): 239 | logger.info("Streaming response") 240 | 241 | complete_buffered_response = [] 242 | global content_buffered_string 243 | content_buffered = [] 244 | response_stream = [] 245 | global model_name 246 | global openai_response_id 247 | 248 | async for line in response.aiter_lines(): 249 | yield f"{line}\r\n" 250 | if line.startswith("data: "): 251 | data=line[6:] 252 | if data!= "[DONE]": 253 | # buffer the response - so we can calculate the token count using tiktok library 254 | complete_buffered_response.append(data) 255 | streaming_content_json = json.loads(data) 256 | logger.debug(f"Streaming content: {streaming_content_json}") 257 | model_name = streaming_content_json['model'] 258 | openai_response_id= streaming_content_json['id'] 259 | if streaming_content_json['choices']: 260 | delta = streaming_content_json['choices'][0]['delta'] 261 | response_stream.append(streaming_content_json) 262 | 263 | if delta.get('content') is not None: 264 | content_buffered.append(delta['content']) 265 | 266 | 267 | content_buffered_string = "".join(content_buffered) 268 | 269 | # Calculate the token count using tiktok library 270 | global streaming_completion_token_count 271 | global streaming_prompt_token_count 272 | 273 | streaming_completion_token_count = num_tokens_from_string(content_buffered_string, model_name) 274 | streaming_prompt_token_count = num_tokens_from_string(prompt_contents_string, model_name) 275 | 276 | logger.info(f"Streamed completion total Token count: {streaming_completion_token_count}") 277 | logger.info(f"Streamed prompt total Token count: {streaming_prompt_token_count}") 278 | try: 279 | proxy_streaming_response = await make_response( stream_response(response)) 280 | proxy_streaming_response_body = await proxy_streaming_response.data 281 | proxy_streaming_response.timeout = None 282 | proxy_streaming_response.status_code = response.status_code 283 | proxy_streaming_response.headers = {k: str(v) for k, v in response.headers.items()} 284 | except Exception as e: 285 | logger.error(f"({x_aisentry_correlation}) #4 An error occurred while streaming response: {e}") 286 | return jsonify(error=str(e)), 500 287 | 288 | # Record the stats for openAi endpoints 289 | if proxy_streaming_response.headers.get("x-ratelimit-remaining-tokens") is not None: 290 | endpoint_info["x-ratelimit-remaining-tokens"]=response.headers["x-ratelimit-remaining-tokens"] 291 | else: 292 | endpoint_info["x-ratelimit-remaining-tokens"]=0 293 | 294 | if proxy_streaming_response.headers.get("x-ratelimit-remaining-requests") is not None: 295 | endpoint_info["x-ratelimit-remaining-requests"]=response.headers["x-ratelimit-remaining-requests"] 296 | else: 297 | endpoint_info["x-ratelimit-remaining-tokens"]=0 298 | endpoint_info["x-ratelimit-remaining-requests"]=0 299 | 300 | utc_now = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') 301 | request_body = json.loads(body) 302 | 303 | global model_name 304 | global openai_response_id 305 | global content_buffered_string 306 | 307 | streamed_token_usage = Usage(streaming_completion_token_count, streaming_prompt_token_count, streaming_completion_token_count+streaming_prompt_token_count) 308 | 309 | openAi_response_object = openAILogObject(date_time_utc=utc_now, 310 | headers=openAI_request_headers, 311 | params=params, 312 | request_body=request_body, 313 | response_body=json.dumps(content_buffered_string), 314 | sentry_ai_headers=ai_sentry_headers_used, 315 | is_Streaming=is_stream_request, 316 | usage=streamed_token_usage.to_dict(), 317 | model=model_name, 318 | openai_response_id=openai_response_id 319 | ) 320 | 321 | logger.debug(f"OpenAI response: {json.dumps(openAi_response_object.to_dict())}") 322 | 323 | # Publish response payload to background queue for further processing (i.e. logging, PII stripping, etc) 324 | try: 325 | logger.info("Publishing to Dapr pub/sub") 326 | 327 | dapr_pub = daprClient.publish_event( 328 | pubsub_name='openaipubsub', 329 | topic_name='requests-logging', 330 | data = json.dumps(json.dumps(openAi_response_object.to_dict())), 331 | data_content_type='application/json' 332 | ) 333 | 334 | logger.info(f"Published to Dapr pub/sub: {dapr_pub}") 335 | request_processed = True 336 | 337 | except Exception as e: 338 | logger.error(f"({x_aisentry_correlation}) #5 Error publishing to Dapr pub/sub: {e}") 339 | 340 | 341 | return proxy_streaming_response 342 | 343 | else: 344 | try: 345 | response = await client.send(req, stream=False) 346 | response.raise_for_status() 347 | 348 | except Exception as e: 349 | # Connection Failures 350 | if response is None or response.status_code > 499: 351 | 352 | logger.error(f"({x_aisentry_correlation}) #6 An unexpected error occurred: {e}") 353 | # increment connection errors count for the endpoint 354 | endpoint_info["connection_errors_count"]+=1 355 | current_retry += 1 356 | request_processed = False 357 | continue 358 | else: 359 | logger.error(f"({x_aisentry_correlation}) #7 An unexpected error occured: {e}") 360 | 361 | # If response is a 429 Increment retry count - to pick next aviable endpoint 362 | if response.status_code == 429: 363 | 364 | logger.info(f"Received 429 response from endpoint, retrying next available endpoint") 365 | #endpoint_info["x-retry-after-ms"]=response.headers["x-retry-after-ms"] 366 | current_retry += 1 367 | endpoint_info["connection_errors_count"]+=1 368 | request_processed = False 369 | continue 370 | 371 | response_body = await response.aread() 372 | response_headers = {k: str(v) for k, v in response.headers.items()} 373 | proxy_response = await make_response( (response_body, response.status_code, response_headers) ) 374 | 375 | # Process in-line workers 376 | # Record the stats for openAi endpoints 377 | if response.headers.get("x-ratelimit-remaining-tokens") is not None: 378 | endpoint_info["x-ratelimit-remaining-tokens"]=response.headers["x-ratelimit-remaining-tokens"] 379 | else: 380 | endpoint_info["x-ratelimit-remaining-tokens"]=0 381 | 382 | if response.headers.get("x-ratelimit-remaining-requests") is not None: 383 | endpoint_info["x-ratelimit-remaining-requests"]=response.headers["x-ratelimit-remaining-requests"] 384 | else: 385 | endpoint_info["x-ratelimit-remaining-tokens"]=0 386 | 387 | utc_now = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') 388 | request_body = json_body 389 | response_json = json.loads(response_body) 390 | 391 | #Extract the token count from the response 392 | non_streaming_token_count = response_json.get('usage') 393 | non_streaming_model = response_json.get('model') 394 | non_streaming_openai_id = response_json.get('id', str(uuid.uuid4())) 395 | 396 | logger.info(f"non_streaming_token_count: {non_streaming_token_count}") 397 | logger.info(f"non_streaming_model: {non_streaming_model}") 398 | 399 | 400 | openAi_response_object = openAILogObject(date_time_utc=utc_now, 401 | headers=openAI_request_headers, 402 | params=params, 403 | request_body=request_body, 404 | response_body=response_json, 405 | sentry_ai_headers=ai_sentry_headers_used, 406 | is_Streaming=is_stream_request, 407 | usage=non_streaming_token_count, 408 | model=non_streaming_model, 409 | openai_response_id=non_streaming_openai_id) 410 | 411 | logger.debug(f"OpenAI response: {json.dumps(openAi_response_object.to_dict())}") 412 | 413 | # Publish response payload to background queue for further processing (i.e. logging, PII stripping, etc) 414 | try: 415 | logger.info("Publishing to Dapr pub/sub") 416 | 417 | dapr_pub = daprClient.publish_event( 418 | pubsub_name='openaipubsub', 419 | topic_name='requests-logging', 420 | data = json.dumps(json.dumps(openAi_response_object.to_dict())), 421 | data_content_type='application/json' 422 | ) 423 | 424 | logger.info(f"Published to Dapr pub/sub: {dapr_pub}") 425 | request_processed = True 426 | 427 | except Exception as e: 428 | logger.error(f"({x_aisentry_correlation}) #8 Error publishing to Dapr pub/sub: {e}") 429 | 430 | return proxy_response 431 | 432 | return jsonify(message=f"Request failed to process. Attempted to run: {current_retry}, against AI endpoint configuration unsucessfully"), 500 433 | -------------------------------------------------------------------------------- /aisentry/facade/req.txt: -------------------------------------------------------------------------------- 1 | aiofiles==23.2.1 2 | aiohttp==3.9.3 3 | aiosignal==1.3.1 4 | annotated-types==0.6.0 5 | anyio==4.3.0 6 | asgiref==3.8.1 7 | async==0.6.2 8 | asyncio==3.4.3 9 | attrs==23.2.0 10 | azure-ai-textanalytics==5.3.0 11 | azure-common==1.1.28 12 | azure-core==1.30.1 13 | azure-core-tracing-opentelemetry==1.0.0b11 14 | azure-functions==1.18.0 15 | azure-functions-durable==1.2.9 16 | azure-identity==1.16.1 17 | azure-keyvault-secrets==4.7.0 18 | azure-monitor-opentelemetry==1.1.0 19 | azure-monitor-opentelemetry-exporter==1.0.0b19 20 | azure-search-documents==11.4.0b11 21 | azure-storage-blob==12.19.0 22 | blinker==1.7.0 23 | Brotli==1.1.0 24 | certifi==2024.2.2 25 | cffi==1.16.0 26 | charset-normalizer==3.3.2 27 | click==8.1.7 28 | cloudevents==1.10.1 29 | colorama==0.4.6 30 | ConfigArgParse==1.7 31 | cryptography==42.0.5 32 | dapr==1.13.0 33 | dapr-client==0.4.0b1 34 | dapr-ext-fastapi==1.10.0 35 | dapr-ext-grpc==1.12.0 36 | Deprecated==1.2.14 37 | deprecation==2.1.0 38 | distro==1.9.0 39 | fastapi==0.110.0 40 | fastapi-responses==0.2.1 41 | fixedint==0.1.6 42 | flasgger==0.9.7.1 43 | Flask==3.0.2 44 | Flask-Cors==4.0.0 45 | flask-dapr==1.12.0 46 | Flask-Login==0.6.3 47 | frozenlist==1.4.1 48 | furl==2.1.3 49 | gevent==24.2.1 50 | geventhttpclient==2.0.12 51 | googleapis-common-protos==1.63.0 52 | greenlet==3.0.3 53 | grpcio==1.62.1 54 | grpcio-status==1.62.1 55 | gunicorn==21.2.0 56 | h11==0.14.0 57 | h2==4.1.0 58 | hpack==4.0.0 59 | httpcore==1.0.4 60 | httpx==0.27.0 61 | Hypercorn==0.16.0 62 | hyperframe==6.0.1 63 | idna==3.7 64 | importlib-metadata==6.8.0 65 | isodate==0.6.1 66 | itsdangerous==2.1.2 67 | Jinja2==3.1.4 68 | jsonschema==4.20.0 69 | jsonschema-specifications==2023.11.2 70 | jwt==1.3.1 71 | locust==2.24.1 72 | MarkupSafe==2.1.5 73 | mistune==3.0.2 74 | msal==1.28.0 75 | msal-extensions==1.1.0 76 | msgpack==1.0.8 77 | msrest==0.7.1 78 | multidict==6.0.5 79 | numpy==1.26.2 80 | oauthlib==3.2.2 81 | openai==1.14.2 82 | opentelemetry-api==1.21.0 83 | opentelemetry-instrumentation==0.42b0 84 | opentelemetry-instrumentation-aiohttp-client==0.42b0 85 | opentelemetry-instrumentation-asgi==0.42b0 86 | opentelemetry-instrumentation-dbapi==0.42b0 87 | opentelemetry-instrumentation-django==0.42b0 88 | opentelemetry-instrumentation-fastapi==0.42b0 89 | opentelemetry-instrumentation-flask==0.42b0 90 | opentelemetry-instrumentation-httpx==0.42b0 91 | opentelemetry-instrumentation-psycopg2==0.42b0 92 | opentelemetry-instrumentation-requests==0.42b0 93 | opentelemetry-instrumentation-urllib==0.42b0 94 | opentelemetry-instrumentation-urllib3==0.42b0 95 | opentelemetry-instrumentation-wsgi==0.42b0 96 | opentelemetry-resource-detector-azure==0.1.0 97 | opentelemetry-sdk==1.21.0 98 | opentelemetry-semantic-conventions==0.42b0 99 | opentelemetry-util-http==0.42b0 100 | orderedmultidict==1.0.1 101 | packaging==24.0 102 | pandas==2.1.3 103 | pandas-stubs==2.1.1.230928 104 | portalocker==2.8.2 105 | priority==2.0.0 106 | protobuf==4.25.3 107 | psutil==5.9.8 108 | pycparser==2.21 109 | pydantic==2.6.4 110 | pydantic_core==2.16.3 111 | PyJWT==2.8.0 112 | python-dateutil==2.9.0.post0 113 | python-dotenv==1.0.1 114 | pytz==2023.3.post1 115 | PyYAML==6.0.1 116 | pyzmq==25.1.2 117 | Quart==0.19.4 118 | quart-cors==0.7.0 119 | referencing==0.32.0 120 | regex==2023.10.3 121 | requests==2.31.0 122 | requests-oauthlib==1.3.1 123 | roundrobin==0.0.4 124 | rpds-py==0.13.2 125 | six==1.16.0 126 | sniffio==1.3.1 127 | starlette==0.36.3 128 | <<<<<<< HEAD 129 | tiktoken==0.5.2 130 | tqdm==4.66.3 131 | ======= 132 | tiktoken==0.7.0 133 | 134 | tqdm==4.66.2 135 | >>>>>>> f4291a2 (adding in utc time, increasing httpx conneciton limit to None) 136 | types-pytz==2023.3.1.1 137 | typing_extensions==4.10.0 138 | tzdata==2023.3 139 | urllib3==2.2.2 140 | uvicorn==0.24.0.post1 141 | Werkzeug==3.0.1 142 | wrapt==1.16.0 143 | wsproto==1.2.0 144 | yarl==1.9.4 145 | zipp==3.17.0 146 | zope.event==5.0 147 | zope.interface==6.2 148 | -------------------------------------------------------------------------------- /aisentry/requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==23.2.1 2 | aiohttp==3.9.3 3 | aiosignal==1.3.1 4 | annotated-types==0.6.0 5 | anyio==4.3.0 6 | asgiref==3.8.1 7 | async==0.6.2 8 | asyncio==3.4.3 9 | attrs==23.2.0 10 | azure-ai-textanalytics==5.3.0 11 | azure-common==1.1.28 12 | azure-core==1.30.1 13 | azure-identity==1.16.1 14 | blinker==1.7.0 15 | certifi==2024.2.2 16 | cffi==1.16.0 17 | charset-normalizer==3.3.2 18 | click==8.1.7 19 | cloudevents==1.10.1 20 | cryptography==42.0.5 21 | dapr==1.13.0 22 | dapr-client==0.4.0b1 23 | deprecation==2.1.0 24 | distro==1.9.0 25 | fastapi==0.110.0 26 | fastapi-responses==0.2.1 27 | Flask==3.0.2 28 | frozenlist==1.4.1 29 | googleapis-common-protos==1.63.0 30 | grpcio==1.62.1 31 | grpcio-status==1.62.1 32 | h11==0.14.0 33 | h2==4.1.0 34 | hpack==4.0.0 35 | httpcore==1.0.4 36 | httpx==0.27.0 37 | Hypercorn==0.16.0 38 | hyperframe==6.0.1 39 | idna==3.7 40 | isodate==0.6.1 41 | itsdangerous==2.1.2 42 | Jinja2==3.1.4 43 | jwt==1.3.1 44 | MarkupSafe==2.1.5 45 | msal==1.28.0 46 | msal-extensions==1.1.0 47 | multidict==6.0.5 48 | openai==1.14.2 49 | packaging==24.0 50 | portalocker==2.8.2 51 | priority==2.0.0 52 | protobuf==4.25.3 53 | pycparser==2.21 54 | pydantic==2.6.4 55 | pydantic_core==2.16.3 56 | PyJWT==2.8.0 57 | python-dateutil==2.9.0.post0 58 | python-dotenv==1.0.1 59 | Quart==0.19.4 60 | requests==2.31.0 61 | six==1.16.0 62 | sniffio==1.3.1 63 | starlette==0.36.3 64 | tqdm==4.66.3 65 | typing_extensions==4.10.0 66 | urllib3==2.2.2 67 | Werkzeug==3.0.1 68 | wsproto==1.2.0 69 | yarl==1.9.4 70 | tiktoken==0.7.0 -------------------------------------------------------------------------------- /aisentry/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/aisentry/utils/__init__.py -------------------------------------------------------------------------------- /aisentry/utils/ai_sentry_helpers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import logging 4 | from dotenv import load_dotenv 5 | import httpx 6 | import tiktoken 7 | 8 | 9 | # initial setup for logging / env variable loading 10 | log_level = os.getenv('LOG_LEVEL', 'INFO').upper() 11 | 12 | logger = logging.getLogger(__name__) 13 | logging.basicConfig(level=getattr(logging, log_level), 14 | format='%(asctime)s - %(levelname)s - %(message)s', 15 | datefmt='%d-%m-%Y %H:%M:%S' 16 | ) 17 | load_dotenv(".env", override=True) 18 | 19 | 20 | def get_endpoints_from_poolname(poolname, json_data): 21 | for pool in json_data['pools']: 22 | if pool['name'] == poolname: 23 | return pool['endpoints'] 24 | return None 25 | 26 | 27 | def num_tokens_from_string(string: str, encoding_name: str) -> int: 28 | """Returns the number of tokens in a text string.""" 29 | logger.info(f"encoding_name: {encoding_name}") 30 | 31 | encoding = tiktoken.encoding_for_model(encoding_name) 32 | num_tokens = len(encoding.encode(string)) 33 | return num_tokens 34 | 35 | 36 | 37 | def init_endpoint_stats(pools_info): 38 | pool_endpoints = {} 39 | for pool in pools_info["pools"]: 40 | transformed_endpoints = [] 41 | for endpoint in pool["endpoints"]: 42 | transformed_endpoints.append({ 43 | "url": endpoint["url"], 44 | "x-ratelimit-remaining-requests": endpoint["x-ratelimit-remaining-requests"], 45 | "x-ratelimit-remaining-tokens": endpoint["x-ratelimit-remaining-tokens"], 46 | "x-retry-after-ms": '0', 47 | "api-key": endpoint["api-key"], 48 | "client": httpx.AsyncClient(base_url=endpoint["url"],limits=httpx.Limits(max_keepalive_connections=None, max_connections=None)), 49 | "connection_errors_count": 0 50 | }) 51 | pool_endpoints[pool["name"]] = transformed_endpoints 52 | return pool_endpoints 53 | 54 | def select_pool(pool_endpoints, pool_name): 55 | return pool_endpoints.get(pool_name, None) 56 | 57 | async def getNextAvailableEndpointInfo(open_ai_endpoint_availability_stats): 58 | logger.debug(f"open_ai_endpoint_availability_stats: {open_ai_endpoint_availability_stats}") 59 | remaining_requests = sorted(open_ai_endpoint_availability_stats ,key=lambda x: int(x['x-ratelimit-remaining-requests']), reverse=True)[0] 60 | remaining_tokens = sorted(open_ai_endpoint_availability_stats ,key=lambda x: int(x['x-ratelimit-remaining-tokens']), reverse=True)[0] 61 | logger.info(f"Next available endpoint: {remaining_requests['url']}") 62 | 63 | # Add a new key 'max_limit' to each dictionary that is the maximum of 'x-ratelimit-remaining-requests' and 'x-ratelimit-remaining-tokens' 64 | # for endpoint in open_ai_endpoint_availability_stats: 65 | # endpoint['max_limit'] = max(endpoint['x-ratelimit-remaining-requests'], endpoint['x-ratelimit-remaining-tokens']) 66 | 67 | # Sort based on 'max_limit' 68 | #sorted_endpoints = sorted(open_ai_endpoint_availability_stats ,key=lambda x: x['max_limit'], reverse=True) 69 | 70 | # # Select the first endpoint with 'max_limit' greater than zero 71 | # highest_endpoint = next((endpoint for endpoint in sorted_endpoints if endpoint['max_limit'] > 0), None) 72 | 73 | # if highest_endpoint is not None: 74 | # logger.info(highest_endpoint) 75 | # else: 76 | # logger.info("No endpoint has a max_limit greater than zero.") 77 | 78 | return remaining_requests 79 | 80 | 81 | 82 | 83 | 84 | 85 | class AISentryHeaders: 86 | # we will check for following ai-sentry specific http headers 87 | # ai-sentry-consumer - just needs to be non-empty 88 | # ai-sentry-log-level - need to validate specific values - 89 | 90 | def __init__(self): 91 | # Define acceptable values for each header 92 | self.header_values = { 93 | "ai-sentry-log-level": {"COMPLETE", "PII_STRIPPING_ENABLED", "DISABLED"} 94 | } 95 | 96 | def validate_headers(self, headers): 97 | valid_headers = {} 98 | required_headers = ["Ai-Sentry-Backend-Pool", "Ai-Sentry-Consumer", "Ai-Sentry-Log-Level","Ai-Sentry-Adapters"] 99 | 100 | # for required_header in required_headers: 101 | # logger.info(f"required_header: {required_header}") 102 | # if required_header not in headers: 103 | # raise ValueError(f"Missing required header {required_header}") 104 | 105 | for header, value in headers: 106 | logger.debug(f"header: {header}, value: {value}") 107 | if header.lower() == "ai-sentry-backend-pool": 108 | if value == "": 109 | logger.info("ai-sentry-backend-pool cannot be an empty string") 110 | raise ValueError("ai-sentry-backend-pool cannot be an empty string") 111 | else: 112 | valid_headers[header.lower()] = value 113 | if header.lower() == "ai-sentry-consumer": 114 | if value == "": 115 | raise ValueError("ai-sentry-consumer cannot be an empty string") 116 | else: 117 | valid_headers[header.lower()] = value 118 | if header.lower() == "ai-sentry-adapters": 119 | if value == "": 120 | logger.info("ai-sentry-backend-adapters cannot be an empty string") 121 | raise ValueError("ai-sentry-adapters cannot be an empty string") 122 | else: 123 | valid_headers[header.lower()] = value 124 | if header.lower() == "x-aisentry-correlation": 125 | if value == "": 126 | logger.info("Assigning default value to x-aisentry-correlation") 127 | valid_headers[header.lower()] = "00000000-0000-0000-0000-000000000000" 128 | else: 129 | valid_headers[header.lower()] = value 130 | elif header.lower() in self.header_values: 131 | if value not in self.header_values[header.lower()]: 132 | raise ValueError(f"Invalid value {value} for header {header}, accepted values FULL, PII_STRIPPING_ENABLED, DISABLED") 133 | else: 134 | valid_headers[header.lower()] = value 135 | # valid_headers[header] = value 136 | 137 | return valid_headers 138 | 139 | 140 | 141 | class openAISummaryLogObject: 142 | def __init__(self,id, LogId, timestamp, product_id, usage, model, month_year): 143 | self.id = id 144 | self.LogId = LogId 145 | self.timestamp = timestamp 146 | self.product_id = product_id 147 | self.usage = usage 148 | self.model = model 149 | self.month_year = month_year 150 | 151 | def to_dict(self): 152 | return { 153 | 'id': self.id, 154 | 'LogId': self.LogId, 155 | 'model': self.model, 156 | 'timestamp': self.timestamp, 157 | 'ProductId': self.product_id, 158 | 'promptTokens': self.usage['prompt_tokens'] if self.usage is not None else None, 159 | 'responseTokens': self.usage.get('completion_tokens', None) if self.usage is not None else None, 160 | 'totalTokens': self.usage.get('total_tokens', None) if self.usage is not None else None , 161 | 'month_year': self.month_year 162 | } 163 | 164 | 165 | class openAILogObject: 166 | def __init__(self, date_time_utc, headers, params, request_body, response_body, sentry_ai_headers, is_Streaming, usage, model, openai_response_id): 167 | self.date_time_utc = date_time_utc 168 | self.headers = headers 169 | self.params = params 170 | self.request_body = request_body 171 | self.response_body = response_body 172 | self.sentry_ai_headers = sentry_ai_headers 173 | self.is_Streaming = is_Streaming 174 | self.usage = usage 175 | self.model = model 176 | self.openai_response_id = openai_response_id 177 | 178 | 179 | def to_dict(self): 180 | return { 181 | 'date_time_utc': self.date_time_utc, 182 | 'is_Streaming': self.is_Streaming, 183 | 'headers': self.headers, 184 | 'params': self.params, 185 | 'request_body': self.request_body, 186 | 'response_body': self.response_body, 187 | 'sentry_ai_headers': self.sentry_ai_headers, 188 | 'usage': self.usage, 189 | 'model': self.model, 190 | 'openai_response_id': self.openai_response_id 191 | } 192 | 193 | class Usage: 194 | def __init__(self, completion_tokens=None, prompt_tokens=None, total_tokens=None): 195 | self.completion_tokens = completion_tokens 196 | self.prompt_tokens = prompt_tokens 197 | self.total_tokens = total_tokens 198 | 199 | def to_dict(self): 200 | return { 201 | 'completion_tokens': self.completion_tokens, 202 | 'prompt_tokens': self.prompt_tokens, 203 | 'total_tokens': self.total_tokens 204 | } 205 | 206 | 207 | 208 | -------------------------------------------------------------------------------- /aisentry/utils/analyze_pii.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import os 4 | import json 5 | from typing import List 6 | from dotenv import load_dotenv 7 | 8 | from azure.core.credentials import AzureKeyCredential 9 | from azure.ai.textanalytics.aio import TextAnalyticsClient 10 | from azure.ai.textanalytics import RecognizePiiEntitiesAction 11 | import re 12 | 13 | async def analyze_pii_async(input_text: List[str]) -> List[str]: 14 | """ 15 | Processes a list of JSON strings, redacts PII, and returns valid JSON strings. 16 | """ 17 | log_level = os.getenv('LOG_LEVEL', 'INFO').upper() 18 | logger = logging.getLogger(__name__) 19 | logging.basicConfig(level=getattr(logging, log_level), 20 | format='%(asctime)s - %(levelname)s - %(message)s', 21 | datefmt='%d-%m-%Y %H:%M:%S' 22 | ) 23 | load_dotenv(".env", override=True) 24 | logger.debug(f"input_text: {input_text}") 25 | 26 | chunk_size = 1250 27 | endpoint = os.environ["AI-SENTRY-LANGUAGE-ENDPOINT"] 28 | key = os.environ["AI-SENTRY-LANGUAGE-KEY"] 29 | logger.info(f"Using cognitive endpoint: {endpoint}") 30 | 31 | text_analytics_client = TextAnalyticsClient( 32 | endpoint=endpoint, 33 | credential=AzureKeyCredential(key), 34 | ) 35 | 36 | 37 | 38 | def smart_json_loads(s: str): 39 | """ 40 | Attempts to load a JSON string, unescaping double backslashes if necessary. 41 | """ 42 | try: 43 | # Try normal loading first 44 | return json.loads(s) 45 | except json.JSONDecodeError: 46 | # Heuristic: if there are many double backslashes, try unescaping 47 | if re.search(r'\\\\', s): 48 | try: 49 | unescaped = s.encode().decode('unicode_escape') 50 | return json.loads(unescaped) 51 | except Exception: 52 | pass 53 | raise # Re-raise if still failing 54 | 55 | async def redact_pii_in_obj(obj, pii_entities): 56 | """ 57 | Recursively redacts PII entities in a JSON-like Python object. 58 | """ 59 | if isinstance(obj, dict): 60 | return {k: await redact_pii_in_obj(v, pii_entities) for k, v in obj.items()} 61 | elif isinstance(obj, list): 62 | return [await redact_pii_in_obj(item, pii_entities) for item in obj] 63 | elif isinstance(obj, str): 64 | redacted = obj 65 | for ent in pii_entities: 66 | if ent["confidence_score"] >= 0.8 and ent["category"] != "DateTime": 67 | redacted = redacted.replace(ent["text"], "PII_REDACTED") 68 | return redacted 69 | else: 70 | return obj 71 | 72 | output_texts = [] 73 | 74 | for i in range(0, len(input_text), chunk_size): 75 | chunk = input_text[i:i+chunk_size] 76 | logger.info(f"Processing chunk of size {len(chunk)}") 77 | 78 | async with text_analytics_client: 79 | poller = await text_analytics_client.begin_analyze_actions( 80 | chunk, 81 | display_name="PII Analysis", 82 | actions=[RecognizePiiEntitiesAction()] 83 | ) 84 | pages = await poller.result() 85 | document_results = [] 86 | async for page in pages: 87 | document_results.append(page) 88 | 89 | # For each document, parse as JSON, redact PII, then dump as JSON string 90 | for doc_str, action_results in zip(chunk, document_results): 91 | pii_entities = [] 92 | for result in action_results: 93 | if result.kind == "PiiEntityRecognition" and not result.is_error: 94 | for entity in result.entities: 95 | logger.debug(f"......Entity: {entity.text}") 96 | logger.debug(f".........Category: {entity.category}") 97 | logger.debug(f".........Confidence Score: {entity.confidence_score}") 98 | pii_entities.append({ 99 | "text": entity.text, 100 | "category": entity.category, 101 | "confidence_score": entity.confidence_score 102 | }) 103 | elif getattr(result, "is_error", False): 104 | logger.error(f'PII-Processing: An error with code {result.error.code} and message {result.error.message}') 105 | try: 106 | #data = json.loads(doc_str) 107 | data = smart_json_loads(doc_str) 108 | redacted_data = await redact_pii_in_obj(data, pii_entities) 109 | redacted_json = json.dumps(redacted_data, ensure_ascii=False, separators=(',', ':')) 110 | output_texts.append(redacted_json) 111 | except Exception as e: 112 | logger.error(f"Error redacting or serializing JSON: {e}") 113 | # Optionally, append the unredacted doc or an empty string 114 | output_texts.append(doc_str) 115 | 116 | logger.info(f"PII stripping completed") 117 | return output_texts -------------------------------------------------------------------------------- /aisentry/utils/analyze_pii_chunked_ta.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from typing import List 4 | 5 | 6 | batch_size = 1250 # Azure has a limit of 1250 characters per document 7 | def chunk_string(string, chunk_size): 8 | return [string[i:i+chunk_size] for i in range(0, len(string), chunk_size)] 9 | 10 | 11 | #def chunk_reponse_body(response_body: str) -> List[str]: 12 | 13 | 14 | 15 | async def analyze_pii_async(input_text: str) -> None: 16 | # [START analyze_async] 17 | import os 18 | from azure.core.credentials import AzureKeyCredential 19 | from azure.ai.textanalytics.aio import TextAnalyticsClient 20 | from azure.ai.textanalytics import ( 21 | RecognizeEntitiesAction, 22 | RecognizeLinkedEntitiesAction, 23 | RecognizePiiEntitiesAction, 24 | ExtractKeyPhrasesAction, 25 | AnalyzeSentimentAction, 26 | ) 27 | 28 | endpoint = os.environ["AI-SENTRY-LANGUAGE-ENDPOINT"] 29 | key = os.environ["AI-SENTRY-LANGUAGE-KEY"] 30 | print(f"inputData: {input_text}") 31 | 32 | text_analytics_client = TextAnalyticsClient( 33 | endpoint=endpoint, 34 | credential=AzureKeyCredential(key), 35 | ) 36 | 37 | 38 | for doc in input_text: 39 | logging.info(f"CHUNKING DOC: {doc}") 40 | chunks = chunk_string(doc, batch_size) 41 | for chunk in chunks: 42 | actions = [RecognizePiiEntitiesAction()] 43 | poller = await text_analytics_client.begin_analyze_actions([chunk], actions, language="en") 44 | result = await poller.result() 45 | 46 | async for doc in result: 47 | print("Redacted Text: {}".format(doc.redacted_text)) 48 | async for entity in doc.entities: 49 | print("Entity: {}".format(entity.text)) 50 | print("Category: {}".format(entity.category)) 51 | print("Confidence Score: {}\n".format(entity.confidence_score)) 52 | 53 | 54 | -------------------------------------------------------------------------------- /aisentry/utils/analyze_pii_openai.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import asyncio 4 | from dotenv import load_dotenv 5 | from openai import AsyncAzureOpenAI 6 | 7 | 8 | 9 | # initial setup for logging / env variable loading 10 | log_level = os.getenv('LOG_LEVEL', 'INFO').upper() 11 | 12 | logger = logging.getLogger(__name__) 13 | logging.basicConfig(level=getattr(logging, log_level), 14 | format='%(asctime)s - %(levelname)s - %(message)s', 15 | datefmt='%d-%m-%Y %H:%M:%S' 16 | ) 17 | load_dotenv(".env", override=True) 18 | 19 | # Initialize the OpenAI client with your key and endpoint 20 | 21 | load_dotenv(".env", override=True) 22 | 23 | openai_key = os.environ.get("PII_STRIPPING_OPENAI_API_KEY") 24 | openai_endpoint = os.environ.get("PII_STRIPPING_OPENAI_ENDPOINT") 25 | 26 | 27 | client = AsyncAzureOpenAI( 28 | api_key=openai_key, 29 | api_version="2023-12-01-preview", 30 | azure_endpoint=openai_endpoint 31 | ) 32 | 33 | pii_stripping_system_prompt = """Objective: Identify and flag any Personally Identifiable Information (PII) within text data to ensure data privacy and compliance with regulations such as GDPR, CCPA, etc. 34 | 35 | PII includes but is not limited to: 36 | Full Names: First and last names 37 | Addresses: Street address, city, state, zip code 38 | Phone Numbers: Any format of telephone numbers 39 | Email Addresses: Any format of email addresses 40 | Social Security Numbers (SSNs): XXX-XX-XXXX or similar formats 41 | Credit Card Numbers: Any format of credit/debit card numbers 42 | Bank Account Numbers: Any format of bank account numbers 43 | Driver's License Numbers: Any format of driver's license numbers 44 | Passport Numbers: Any format of passport numbers 45 | Date of Birth: Full date of birth (MM/DD/YYYY or similar formats) 46 | IP Addresses: Any format of IPv4 or IPv6 addresses 47 | API-KEY or Token: Any format of API keys or tokens 48 | Medical Information: Any health-related information that can identify an individual 49 | Biometric Data: Fingerprints, facial recognition data, etc. 50 | 51 | Instructions for the System: 52 | Input: Accept text data for analysis. 53 | Processing: 54 | Use pattern matching, regular expressions, and machine learning algorithms to identify potential PII. 55 | Cross-reference detected patterns with known PII formats. 56 | Output: 57 | Flag detected PII and categorize it. 58 | Provide a confidence score for each detected PII item. 59 | Highlight the specific text containing PII. 60 | 61 | Example: 62 | 63 | Input Text: 64 | 65 | John Doe lives at 123 Maple Street, Springfield, IL 62704. His email is john.doe@example.com, and his phone number is (555) 123-4567. He was born on 01/15/1985 and his SSN is 123-45-6789. 66 | 67 | Output: 68 | 69 | Keep the same text structure but replace the PII with placeholders: [PII-Redacted] 70 | 71 | Compliance Note: The system must handle all detected PII with strict confidentiality and in accordance with applicable data protection regulations.""" 72 | 73 | 74 | async def get_chat_pii_stripped_completion(prompt): 75 | # Send the request to Azure OpenAI 76 | response = await client.chat.completions.create( 77 | model="gpt-4", 78 | messages=[ 79 | {"role": "system", "content": pii_stripping_system_prompt}, 80 | {"role": "user", "content": f"Rewrite the input and Strip out PII information as per the system message from following input: {prompt}"} 81 | ] 82 | ) 83 | 84 | # Extract the text from the response 85 | #completion_text = response.completions[0].data.get("text", "") 86 | message_content = response['choices'][0]['message']['content'] 87 | 88 | logger.info(f"PII Stripped Completion Text: {message_content}") 89 | return message_content 90 | 91 | 92 | -------------------------------------------------------------------------------- /aisentry/utils/aoai_streaming_response.py: -------------------------------------------------------------------------------- 1 | class AOAI_Streaming_Response: 2 | def __init__(self, response): 3 | self.choices = response['choices'] 4 | self.created = response['created'] 5 | self.id = response['id'] 6 | self.model = response['model'] 7 | self.object = response['object'] 8 | self.system_fingerprint = response['system_fingerprint'] 9 | -------------------------------------------------------------------------------- /aisentry/utils/approaches/approach.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List 3 | 4 | class LoadBalancingApproach(ABC): 5 | 6 | def __init__( 7 | self, 8 | aoai_endpoints: List[str] 9 | ): 10 | self.aoai_endpoints = aoai_endpoints 11 | 12 | @abstractmethod 13 | def find_available_aoai(self): 14 | pass -------------------------------------------------------------------------------- /aisentry/utils/approaches/headerselector.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | @dataclass 4 | class AzureOpenAIDeployment: 5 | url: str 6 | retry_ms: int 7 | ratelimit_remaining_tokens: int 8 | ratelimit_remaining_requests: int 9 | 10 | class HeaderSelector: 11 | 12 | def __init__( 13 | self, 14 | openai_urls: [str] 15 | ): 16 | self.openai_urls = openai_urls 17 | 18 | def get_next_aoai(prompt: str): 19 | token_estimate = len(prompt)/4 20 | -------------------------------------------------------------------------------- /aisentry/utils/approaches/randomallocation.py: -------------------------------------------------------------------------------- 1 | from approach import Approach 2 | from typing import overload 3 | import random 4 | 5 | class RandomAllocation(Approach): 6 | def __init__(): 7 | pass 8 | 9 | @overload 10 | def find_available_aoai(self): 11 | aoai = random(self.aoai_endpoints) 12 | return aoai -------------------------------------------------------------------------------- /aisentry/utils/auth_helpers.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | 3 | # Error handler 4 | class AuthError(Exception): 5 | def __init__(self, error, status_code): 6 | self.error = error 7 | self.status_code = status_code 8 | 9 | class OriginalHttpRequest: 10 | def __init__(self, request_headers, params, response_body): 11 | self.request_headers = request_headers 12 | self.params = params 13 | self.response_body = response_body 14 | 15 | 16 | 17 | 18 | # @app.errorhandler(AuthError) 19 | # def handle_auth_error(ex): 20 | # print('handling error') 21 | # response = jsonify(ex.error) 22 | # response.status_code = ex.status_code 23 | # return response 24 | 25 | # def get_token_auth_header(): 26 | # """Obtains the Access Token from the Authorization Header 27 | # """ 28 | # auth = request.headers.get("Authorization", None) 29 | # if not auth: 30 | # raise AuthError({"code": "authorization_header_missing", 31 | # "description": 32 | # "Authorization header is expected"}, 401) 33 | 34 | # parts = auth.split() 35 | 36 | # if parts[0].lower() != "bearer": 37 | # raise AuthError({"code": "invalid_header", 38 | # "description": 39 | # "Authorization header must start with" 40 | # " Bearer"}, 401) 41 | # elif len(parts) == 1: 42 | # raise AuthError({"code": "invalid_header", 43 | # "description": "Token not found"}, 401) 44 | # elif len(parts) > 2: 45 | # raise AuthError({"code": "invalid_header", 46 | # "description": 47 | # "Authorization header must be" 48 | # " Bearer token"}, 401) 49 | 50 | # token = parts[1] 51 | # return token 52 | 53 | 54 | 55 | 56 | # def verify_jwt(tenant_id: str, audience: str = None): 57 | # try: 58 | 59 | # bearer_token = get_token_auth_header() 60 | # #unverified_header = jwt.get_unverified_header(bearer_token) 61 | 62 | # signing_key = jwks_client.get_signing_key_from_jwt(bearer_token) 63 | 64 | 65 | # data = jwt.api_jwt.decode_complete( 66 | # bearer_token, 67 | # signing_key.key, 68 | # algorithms=["RS256"], 69 | # audience=AUDIENCE, 70 | # issuer="https://sts.windows.net/" + tenant_id + "/" 71 | # ) 72 | 73 | # payload, header = data["payload"], data["header"] 74 | 75 | # print(f"payload: {payload}") 76 | # print(f"header: {header}") 77 | # #alg_obj = jwt.get_algorithm_by_name(header["alg"]) 78 | # return True 79 | # except Exception as e: 80 | # raise AuthError({"code": "invalid_header", 81 | # "description": 82 | # "Unable to verify authentication" 83 | # f" token with error {e}"}, 401) 84 | 85 | 86 | # raise AuthError({"code": "invalid_header", 87 | # "description": "Unable to find appropriate key"}, 401) 88 | 89 | # #jwt_keys[tenant_id] = jwks 90 | 91 | 92 | 93 | # @app.route('/helloauth', methods=['POST']) 94 | # async def hello_auth() -> Tuple[dict, str]: 95 | # bearer_token = get_token_auth_header() 96 | # verify_jwt("d3edbe6c-8eda-4e97-8370-86961098c24c") 97 | # jwt_token = jwt.get_unverified_header(bearer_token) 98 | 99 | # return jsonify(message=f"Hello, World {name}! and token {jwt_token}") -------------------------------------------------------------------------------- /aisentry/utils/combined-cert.crt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/aisentry/utils/combined-cert.crt -------------------------------------------------------------------------------- /aisentry/utils/combined_cert.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIHKTCCBhGgAwIBAgIQBXCHG2zHQ6+7D2T6NMnDgzANBgkqhkiG9w0BAQsFADBN 3 | MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMScwJQYDVQQDEx5E 4 | aWdpQ2VydCBTSEEyIFNlY3VyZSBTZXJ2ZXIgQ0EwHhcNMjQwMjI1MDAwMDAwWhcN 5 | MjUwMjI1MjM1OTU5WjB4MQswCQYDVQQGEwJVUzETMBEGA1UECBMKV2FzaGluZ3Rv 6 | bjEQMA4GA1UEBxMHUmVkbW9uZDEeMBwGA1UEChMVTWljcm9zb2Z0IENvcnBvcmF0 7 | aW9uMSIwIAYDVQQDDBkqLmNvZ25pdGl2ZS5taWNyb3NvZnQuY29tMIIBIjANBgkq 8 | hkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAwOt7yazkYYtbZqCoXhWHUbr/qT9nlnvG 9 | ucYmFDPiGRM8RSrrVKcNsBVD5MR7SuZUy9H8lT5v3jFbhZDEITgyqOgR9GFSJ2m8 10 | jWZG+YhPtCM+uHB9djFwj8O2GeBO176y151XRYVmVqf6igN3XrqNcNUSGGHjTeAK 11 | VwQSwRY11DxvtxmBhLVbwdXL7yD71/uJfau6a5jYUsXZQXmb8ABG2hpmXG0jEf/E 12 | LL3a4jXwTT/p+GhegdqF1/Mv8YvSnf5e0Df7l6+Nm3l9DWHHg+mHauXCmIlebZWo 13 | gkgd54IG+eqkZM5emyzGhq+AK5mg7Vp0GUGtLP0P3b8YY4gDzFOYzQIDAQABo4ID 14 | 2DCCA9QwHwYDVR0jBBgwFoAUD4BhHIIxYdUvKOeNRji0LOHG2eIwHQYDVR0OBBYE 15 | FOfeu5mW3U8S+pWU95vLuU11NhJPMIGBBgNVHREEejB4ghkqLmNvZ25pdGl2ZS5t 16 | aWNyb3NvZnQuY29tgh0qLmFwaS5jb2duaXRpdmUubWljcm9zb2Z0LmNvbYIdKi5j 17 | b2duaXRpdmVzZXJ2aWNlcy5henVyZS5jb22CHSouZGV2LmNvZ25pdGl2ZS5taWNy 18 | b3NvZnQuY29tMD4GA1UdIAQ3MDUwMwYGZ4EMAQICMCkwJwYIKwYBBQUHAgEWG2h0 19 | dHA6Ly93d3cuZGlnaWNlcnQuY29tL0NQUzAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0l 20 | BBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMIGNBgNVHR8EgYUwgYIwP6A9oDuGOWh0 21 | dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNvbS9EaWdpY2VydFNIQTJTZWN1cmVTZXJ2ZXJD 22 | QS0xLmNybDA/oD2gO4Y5aHR0cDovL2NybDQuZGlnaWNlcnQuY29tL0RpZ2ljZXJ0 23 | U0hBMlNlY3VyZVNlcnZlckNBLTEuY3JsMH4GCCsGAQUFBwEBBHIwcDAkBggrBgEF 24 | BQcwAYYYaHR0cDovL29jc3AuZGlnaWNlcnQuY29tMEgGCCsGAQUFBzAChjxodHRw 25 | Oi8vY2FjZXJ0cy5kaWdpY2VydC5jb20vRGlnaUNlcnRTSEEyU2VjdXJlU2VydmVy 26 | Q0EtMi5jcnQwDAYDVR0TAQH/BAIwADCCAX8GCisGAQQB1nkCBAIEggFvBIIBawFp 27 | AHUATnWjJ1yaEMM4W2zU3z9S6x3w4I4bjWnAsfpksWKaOd8AAAGN3apYgQAABAMA 28 | RjBEAiAKD2RDCeXYaUHu3eyYlzxZWRpUr4Dxyu6FVuhqMqEllgIgRMsNK3OF28gC 29 | RCCzmKBnzcLRWdXle8s0zIhCSKQeC8EAdwB9WR4S4XgqexxhZ3xe/fjQh1wUoE6V 30 | nrkDL9kOjC55uAAAAY3dqliIAAAEAwBIMEYCIQC/cn2ZQiz1u93mR/3Zmo2hgXS/ 31 | ZvUyGlsgYpdvzOmgAwIhAOBXToAYj4TbG7vzRjl7//L1aSb+hgZolmYEk+2mNk0d 32 | AHcA5tIxY0B3jMEQQQbXcbnOwdJA9paEhvu6hzId/R43jlAAAAGN3apYuwAABAMA 33 | SDBGAiEAheIGgEAGUvtRnMbeMehCU1zYCgoRkSQC4aHb+q/Vl6kCIQCLWypyI5Lz 34 | 0f07mu3L714TCF2OUy6ioesfQV+fS7uHnzANBgkqhkiG9w0BAQsFAAOCAQEAtyyc 35 | uNwfq9q9pEitic7QisFpOVVBV721T9D6ase4BXQd0woeFPJoyDiZImBp+jmw5W9m 36 | pym/WSpIxxHZUAd0IvxYuRwXBdMnZX2sTCNRjY5rOSK4AFfm2OMW1o52Xqjgi+XG 37 | 4IhX3oWyHBiZXeyOT0TXuWUWq45t9/MY5Usl/72GSPxixGieUA4r6BdZ/zoHzB8K 38 | t8V/cVwYm11TenQN9BXYQXQ0o66Vjt1Zqgadx68fz+hPclNVxeZiXeKbknMiguBs 39 | gub46tAgZ2m+F9/vDIgmBjw7VzFLUAJZOPeNc7rAwWBO+eXpiKwfHOnn0yet6/np 40 | vpayI9kPWD0dNsWycA== 41 | -----END CERTIFICATE----- 42 | -----BEGIN CERTIFICATE----- 43 | MIIE6DCCA9CgAwIBAgIQAnQuqhfKjiHHF7sf/P0MoDANBgkqhkiG9w0BAQsFADBh 44 | MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3 45 | d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD 46 | QTAeFw0yMDA5MjMwMDAwMDBaFw0zMDA5MjIyMzU5NTlaME0xCzAJBgNVBAYTAlVT 47 | MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxJzAlBgNVBAMTHkRpZ2lDZXJ0IFNIQTIg 48 | U2VjdXJlIFNlcnZlciBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB 49 | ANyuWJBNwcQwFZA1W248ghX1LFy949v/cUP6ZCWA1O4Yok3wZtAKc24RmDYXZK83 50 | nf36QYSvx6+M/hpzTc8zl5CilodTgyu5pnVILR1WN3vaMTIa16yrBvSqXUu3R0bd 51 | KpPDkC55gIDvEwRqFDu1m5K+wgdlTvza/P96rtxcflUxDOg5B6TXvi/TC2rSsd9f 52 | /ld0Uzs1gN2ujkSYs58O09rg1/RrKatEp0tYhG2SS4HD2nOLEpdIkARFdRrdNzGX 53 | kujNVA075ME/OV4uuPNcfhCOhkEAjUVmR7ChZc6gqikJTvOX6+guqw9ypzAO+sf0 54 | /RR3w6RbKFfCs/mC/bdFWJsCAwEAAaOCAa4wggGqMB0GA1UdDgQWBBQPgGEcgjFh 55 | 1S8o541GOLQs4cbZ4jAfBgNVHSMEGDAWgBQD3lA1VtFMu2bwo+IbG8OXsj3RVTAO 56 | BgNVHQ8BAf8EBAMCAYYwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMBIG 57 | A1UdEwEB/wQIMAYBAf8CAQAwdgYIKwYBBQUHAQEEajBoMCQGCCsGAQUFBzABhhho 58 | dHRwOi8vb2NzcC5kaWdpY2VydC5jb20wQAYIKwYBBQUHMAKGNGh0dHA6Ly9jYWNl 59 | cnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEdsb2JhbFJvb3RDQS5jcnQwewYDVR0f 60 | BHQwcjA3oDWgM4YxaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0R2xv 61 | YmFsUm9vdENBLmNybDA3oDWgM4YxaHR0cDovL2NybDQuZGlnaWNlcnQuY29tL0Rp 62 | Z2lDZXJ0R2xvYmFsUm9vdENBLmNybDAwBgNVHSAEKTAnMAcGBWeBDAEBMAgGBmeB 63 | DAECATAIBgZngQwBAgIwCAYGZ4EMAQIDMA0GCSqGSIb3DQEBCwUAA4IBAQB3MR8I 64 | l9cSm2PSEWUIpvZlubj6kgPLoX7hyA2MPrQbkb4CCF6fWXF7Ef3gwOOPWdegUqHQ 65 | S1TSSJZI73fpKQbLQxCgLzwWji3+HlU87MOY7hgNI+gH9bMtxKtXc1r2G1O6+x/6 66 | vYzTUVEgR17vf5irF0LKhVyfIjc0RXbyQ14AniKDrN+v0ebHExfppGlkTIBn6rak 67 | f4994VH6npdn6mkus5CkHBXIrMtPKex6XF2firjUDLuU7tC8y7WlHgjPxEEDDb0G 68 | w6D0yDdVSvG/5XlCNatBmO/8EznDu1vr72N8gJzISUZwa6CCUD7QBLbKJcXBBVVf 69 | 8nwvV9GvlW+sbXlr 70 | -----END CERTIFICATE----- 71 | -----BEGIN CERTIFICATE----- 72 | MIIDrzCCApegAwIBAgIQCDvgVpBCRrGhdWrJWZHHSjANBgkqhkiG9w0BAQUFADBh 73 | MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3 74 | d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD 75 | QTAeFw0wNjExMTAwMDAwMDBaFw0zMTExMTAwMDAwMDBaMGExCzAJBgNVBAYTAlVT 76 | MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j 77 | b20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IENBMIIBIjANBgkqhkiG 78 | 9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4jvhEXLeqKTTo1eqUKKPC3eQyaKl7hLOllsB 79 | CSDMAZOnTjC3U/dDxGkAV53ijSLdhwZAAIEJzs4bg7/fzTtxRuLWZscFs3YnFo97 80 | nh6Vfe63SKMI2tavegw5BmV/Sl0fvBf4q77uKNd0f3p4mVmFaG5cIzJLv07A6Fpt 81 | 43C/dxC//AH2hdmoRBBYMql1GNXRor5H4idq9Joz+EkIYIvUX7Q6hL+hqkpMfT7P 82 | T19sdl6gSzeRntwi5m3OFBqOasv+zbMUZBfHWymeMr/y7vrTC0LUq7dBMtoM1O/4 83 | gdW7jVg/tRvoSSiicNoxBN33shbyTApOB6jtSj1etX+jkMOvJwIDAQABo2MwYTAO 84 | BgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUA95QNVbR 85 | TLtm8KPiGxvDl7I90VUwHwYDVR0jBBgwFoAUA95QNVbRTLtm8KPiGxvDl7I90VUw 86 | DQYJKoZIhvcNAQEFBQADggEBAMucN6pIExIK+t1EnE9SsPTfrgT1eXkIoyQY/Esr 87 | hMAtudXH/vTBH1jLuG2cenTnmCmrEbXjcKChzUyImZOMkXDiqw8cvpOp/2PV5Adg 88 | 06O/nVsJ8dWO41P0jmP6P6fbtGbfYmbW0W5BjfIttep3Sp+dWOIrWcBAI+0tKIJF 89 | PnlUkiaY4IBIqDfv8NZ5YBberOgOzW6sRBc4L0na4UU+Krk2U886UAb3LujEV0ls 90 | YSEY1QSteDwsOoBrp+uvFRTp2InBuThs4pFsiv9kuXclVzDAGySj4dzp30d8tbQk 91 | CAUw7C29C79Fv1C5qfPrmAESrciIxpg0X40KPMbp1ZWVbd4= 92 | -----END CERTIFICATE----- 93 | -------------------------------------------------------------------------------- /aisentry/worker/cosmos_logger/__init__.py: -------------------------------------------------------------------------------- 1 | from .cosmos_logger import app 2 | 3 | __all__ = [ 4 | "cosmos_logger" 5 | ] -------------------------------------------------------------------------------- /aisentry/worker/cosmos_logger/cosmos_logger.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify 2 | from cloudevents.http import from_http 3 | from requests.exceptions import HTTPError 4 | from azure.core.exceptions import AzureError 5 | from dapr.clients import DaprClient 6 | from utils.analyze_pii import analyze_pii_async 7 | from utils.analyze_pii_openai import get_chat_pii_stripped_completion 8 | import logging 9 | import datetime 10 | import asyncio 11 | from dotenv import load_dotenv 12 | import json 13 | import requests 14 | import os 15 | import httpx 16 | import uuid 17 | from typing import List 18 | 19 | # initial setup for logging / env variable loading 20 | log_level = os.getenv('LOG_LEVEL', 'INFO').upper() 21 | 22 | logger = logging.getLogger(__name__) 23 | logging.basicConfig(level=getattr(logging, log_level), 24 | format='%(asctime)s - %(levelname)s - %(message)s', 25 | datefmt='%d-%m-%Y %H:%M:%S' 26 | ) 27 | load_dotenv(".env", override=True) 28 | 29 | app = Flask(__name__) 30 | 31 | app_port = os.getenv('APP_PORT', '7000') 32 | 33 | # This can be either OPENAI or TEXTANALYTICS 34 | pii_stripping_service = os.getenv('PII_STRIPPING_SERVICE', 'OPENAI') 35 | 36 | # Register Dapr pub/sub subscriptions 37 | @app.route('/dapr/subscribe', methods=['GET']) 38 | def subscribe(): 39 | subscriptions = [{ 40 | 'pubsubname': 'openaipubsub', 41 | 'topic': 'requests-logging', 42 | 'route': 'requestslogging' 43 | }] 44 | logger.info('Dapr pub/sub is subscribed to: ' + json.dumps(subscriptions)) 45 | return jsonify(subscriptions) 46 | 47 | 48 | # Dapr subscription in /dapr/subscribe sets up this route 49 | @app.route('/requestslogging', methods=['POST']) 50 | async def oairequests_subscriber(): 51 | event = from_http(request.headers, request.get_data()) 52 | data = json.loads(event.data) 53 | 54 | logger.debug(f"Received a request to log data{data}") 55 | 56 | response_body = data.get('response_body', None) 57 | 58 | consumer_id=data['sentry_ai_headers'].get('ai-sentry-consumer', None) 59 | log_level=data['sentry_ai_headers'].get('ai-sentry-log-level', None) 60 | model_name=data.get('model',None) 61 | openai_response_id=data.get('openai_response_id',None) 62 | date = datetime.datetime.fromisoformat(data.get('date_time_utc')) 63 | month_date = date.strftime("%m_%Y") 64 | 65 | logger.info('Consumer Product Id: %s', consumer_id) 66 | logger.info('LogId: %s', model_name) 67 | logger.info('LogLevel: %s', log_level) 68 | 69 | data['LogId']=f"{model_name}_{consumer_id}_{month_date}" 70 | data['id']=openai_response_id 71 | data['logLevel']=log_level 72 | 73 | output_binding_data = json.dumps(data) 74 | 75 | data = {key.lower(): value for key, value in data.items()} 76 | 77 | headers = data.get('sentry_ai_headers', None) 78 | 79 | 80 | if headers['ai-sentry-log-level'] == 'PII_STRIPPING_ENABLED': 81 | logger.info('Logging is set to PII Stripping mode') 82 | 83 | input_data: List[str] = [] 84 | input_data.append(output_binding_data) 85 | if pii_stripping_service == 'TEXTANALYTICS': 86 | logger.debug(f"PII stripping service: {pii_stripping_service}") 87 | output_binding_data = await analyze_pii_async(input_data) 88 | 89 | #OPENAI BASED PII Stripping 90 | else: 91 | logger.debug(f"PII stripping service: {pii_stripping_service}") 92 | 93 | # Ensure a new event loop is created if the current one is closed 94 | try: 95 | loop = asyncio.get_event_loop() 96 | if loop.is_closed(): 97 | loop = asyncio.new_event_loop() 98 | asyncio.set_event_loop(loop) 99 | except RuntimeError: 100 | loop = asyncio.new_event_loop() 101 | asyncio.set_event_loop(loop) 102 | 103 | if loop.is_running(): 104 | task = loop.create_task(get_chat_pii_stripped_completion(input_data)) 105 | result = loop.run_until_complete(task) 106 | else: 107 | result = loop.run_until_complete(get_chat_pii_stripped_completion(input_data)) 108 | 109 | print(result) 110 | # output_binding_data = await get_chat_pii_stripped_completion(input_data) 111 | 112 | logger.debug(f"PII stripped data: {output_binding_data}") 113 | 114 | 115 | elif headers['ai-sentry-log-level'] == 'COMPLETE': 116 | logger.info('Logging is set to complete mode') 117 | 118 | else: 119 | logger.info('Logging is set to disabled mode') 120 | return json.dumps(output_binding_data), 200 121 | 122 | 123 | # Send the output to CosmosDB using Dapr binding 124 | with DaprClient() as d: 125 | resp = d.invoke_binding( 126 | binding_name='cosmosdb-log', 127 | operation='create', 128 | binding_metadata={ 129 | 'partitionKey': 'LogId', 130 | }, 131 | data=output_binding_data 132 | ) 133 | 134 | # return json.dumps(requestPayload), 200, { 135 | # 'ContentType': 'application/json'} 136 | return json.dumps(output_binding_data), 200 137 | 138 | app.run(port=app_port) 139 | -------------------------------------------------------------------------------- /aisentry/worker/loganalytics_logger/__init__.py: -------------------------------------------------------------------------------- 1 | from .loganalytics_logger import app 2 | 3 | __all__ = [ 4 | "loganalytics_logger" 5 | ] -------------------------------------------------------------------------------- /aisentry/worker/loganalytics_logger/loganalytics_logger.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify 2 | from cloudevents.http import from_http 3 | from requests.exceptions import HTTPError 4 | from azure.identity import DefaultAzureCredential 5 | from azure.core.exceptions import AzureError 6 | from dapr.clients import DaprClient 7 | from utils.analyze_pii import analyze_pii_async 8 | from utils.analyze_pii_chunked_ta import analyze_pii_async 9 | import logging 10 | import asyncio 11 | from dotenv import load_dotenv 12 | import json 13 | import requests 14 | import os 15 | import httpx 16 | import uuid 17 | from typing import List 18 | 19 | 20 | # initial setup for logging / env variable loading 21 | log_level = os.getenv('LOG_LEVEL', 'INFO').upper() 22 | 23 | logger = logging.getLogger(__name__) 24 | logging.basicConfig(level=getattr(logging, log_level), 25 | format='%(asctime)s - %(levelname)s - %(message)s', 26 | datefmt='%d-%m-%Y %H:%M:%S' 27 | ) 28 | load_dotenv(".env", override=True) 29 | 30 | app = Flask(__name__) 31 | app_port = os.getenv('APP_PORT', '7000') 32 | 33 | 34 | # Register Dapr pub/sub subscriptions 35 | @app.route('/dapr/subscribe', methods=['GET']) 36 | def subscribe(): 37 | subscriptions = [{ 38 | 'pubsubname': 'openaipubsub', 39 | 'topic': 'requests-logging', 40 | 'route': 'requestslogging' 41 | }] 42 | print('Dapr pub/sub is subscribed to: ' + json.dumps(subscriptions)) 43 | return jsonify(subscriptions) 44 | 45 | 46 | # Dapr subscription in /dapr/subscribe sets up this route 47 | @app.route('/requestslogging', methods=['POST']) 48 | async def oairequests_subscriber(): 49 | event = from_http(request.headers, request.get_data()) 50 | data = json.loads(event.data) 51 | 52 | 53 | # Implement log analytics logging logic here. DAPR has no native support for log analytics output binding 54 | 55 | return json.dumps(data), 200 56 | 57 | app.run(port=app_port) 58 | -------------------------------------------------------------------------------- /aisentry/worker/requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==23.2.1 2 | aiohttp==3.9.4 3 | aiosignal==1.3.1 4 | annotated-types==0.6.0 5 | anyio==4.3.0 6 | asgiref==3.8.1 7 | async==0.6.2 8 | asyncio==3.4.3 9 | attrs==23.2.0 10 | azure-ai-textanalytics==5.3.0 11 | azure-common==1.1.28 12 | azure-core==1.30.1 13 | azure-identity==1.16.1 14 | blinker==1.7.0 15 | certifi==2024.2.2 16 | cffi==1.16.0 17 | charset-normalizer==3.3.2 18 | click==8.1.7 19 | cloudevents==1.10.1 20 | cryptography==42.0.5 21 | dapr==1.13.0 22 | dapr-client==0.4.0b1 23 | deprecation==2.1.0 24 | distro==1.9.0 25 | fastapi==0.110.0 26 | fastapi-responses==0.2.1 27 | Flask==3.0.2 28 | frozenlist==1.4.1 29 | googleapis-common-protos==1.63.0 30 | grpcio==1.62.1 31 | grpcio-status==1.62.1 32 | h11==0.14.0 33 | h2==4.1.0 34 | hpack==4.0.0 35 | httpcore==1.0.4 36 | httpx==0.27.0 37 | Hypercorn==0.16.0 38 | hyperframe==6.0.1 39 | idna==3.7 40 | isodate==0.6.1 41 | itsdangerous==2.1.2 42 | Jinja2==3.1.4 43 | jwt==1.3.1 44 | MarkupSafe==2.1.5 45 | msal==1.28.0 46 | msal-extensions==1.1.0 47 | multidict==6.0.5 48 | openai==1.14.2 49 | nest_asyncio==1.6.0 50 | packaging==24.0 51 | portalocker==2.8.2 52 | priority==2.0.0 53 | protobuf==4.25.3 54 | pycparser==2.21 55 | pydantic==2.6.4 56 | pydantic_core==2.16.3 57 | PyJWT==2.8.0 58 | python-dateutil==2.9.0.post0 59 | python-dotenv==1.0.1 60 | Quart==0.19.4 61 | requests==2.32.2 62 | six==1.16.0 63 | sniffio==1.3.1 64 | starlette==0.36.3 65 | tqdm==4.66.3 66 | typing_extensions==4.10.0 67 | urllib3==2.2.2 68 | Werkzeug==3.0.3 69 | wsproto==1.2.0 70 | yarl==1.9.4 71 | tiktoken==0.7.0 72 | 73 | -------------------------------------------------------------------------------- /aisentry/worker/usage_summary/__init__.py: -------------------------------------------------------------------------------- 1 | from .usage_logger import app 2 | 3 | __all__ = [ 4 | "usage_logger" 5 | ] -------------------------------------------------------------------------------- /aisentry/worker/usage_summary/requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==23.2.1 2 | aiohttp==3.9.4 3 | aiosignal==1.3.1 4 | annotated-types==0.6.0 5 | anyio==4.3.0 6 | asgiref==3.8.1 7 | async==0.6.2 8 | asyncio==3.4.3 9 | attrs==23.2.0 10 | azure-ai-textanalytics==5.3.0 11 | azure-common==1.1.28 12 | azure-core==1.30.1 13 | azure-identity==1.16.1 14 | blinker==1.7.0 15 | certifi==2024.2.2 16 | cffi==1.16.0 17 | charset-normalizer==3.3.2 18 | click==8.1.7 19 | cloudevents==1.10.1 20 | cryptography==42.0.5 21 | dapr==1.13.0 22 | dapr-client==0.4.0b1 23 | deprecation==2.1.0 24 | distro==1.9.0 25 | fastapi==0.110.0 26 | fastapi-responses==0.2.1 27 | Flask==3.0.2 28 | frozenlist==1.4.1 29 | googleapis-common-protos==1.63.0 30 | grpcio==1.62.1 31 | grpcio-status==1.62.1 32 | h11==0.14.0 33 | h2==4.1.0 34 | hpack==4.0.0 35 | httpcore==1.0.4 36 | httpx==0.27.0 37 | Hypercorn==0.16.0 38 | hyperframe==6.0.1 39 | idna==3.7 40 | isodate==0.6.1 41 | itsdangerous==2.1.2 42 | Jinja2==3.1.4 43 | jwt==1.3.1 44 | MarkupSafe==2.1.5 45 | msal==1.28.0 46 | msal-extensions==1.1.0 47 | multidict==6.0.5 48 | openai==1.14.2 49 | packaging==24.0 50 | portalocker==2.8.2 51 | priority==2.0.0 52 | protobuf==4.25.3 53 | pycparser==2.21 54 | pydantic==2.6.4 55 | pydantic_core==2.16.3 56 | PyJWT==2.8.0 57 | python-dateutil==2.9.0.post0 58 | python-dotenv==1.0.1 59 | Quart==0.19.4 60 | requests==2.32.2 61 | six==1.16.0 62 | sniffio==1.3.1 63 | starlette==0.36.3 64 | tqdm==4.66.3 65 | typing_extensions==4.10.0 66 | urllib3==2.2.1 67 | Werkzeug==3.0.3 68 | wsproto==1.2.0 69 | yarl==1.9.4 70 | tiktoken==0.7.0 71 | 72 | -------------------------------------------------------------------------------- /aisentry/worker/usage_summary/usage_logger.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify 2 | from cloudevents.http import from_http 3 | from requests.exceptions import HTTPError 4 | from azure.identity import DefaultAzureCredential 5 | from azure.core.exceptions import AzureError 6 | from dapr.clients import DaprClient 7 | from utils.analyze_pii import analyze_pii_async 8 | from utils.ai_sentry_helpers import openAISummaryLogObject 9 | from utils.analyze_pii_chunked_ta import analyze_pii_async 10 | import logging 11 | import datetime 12 | import asyncio 13 | from dotenv import load_dotenv 14 | import json 15 | import requests 16 | import os 17 | import httpx 18 | import uuid 19 | from typing import List 20 | 21 | # Get log level from environment variable 22 | log_level = os.getenv('LOG-LEVEL', 'INFO').upper() 23 | logger = logging.getLogger(__name__) 24 | app = Flask(__name__) 25 | 26 | # Set up the logger 27 | logging.basicConfig(level=getattr(logging, log_level), 28 | format='%(asctime)s - %(levelname)s - %(message)s', 29 | datefmt='%d-%m-%Y %H:%M:%S' 30 | ) 31 | logger.info(f"Log level is set to {log_level}") 32 | 33 | load_dotenv(".env", override=True) 34 | 35 | app_port = os.getenv('APP_PORT', '7001') 36 | 37 | 38 | # Register Dapr pub/sub subscriptions 39 | @app.route('/dapr/subscribe', methods=['GET']) 40 | def subscribe(): 41 | subscriptions = [{ 42 | 'pubsubname': 'openaipubsub', 43 | 'topic': 'requests-logging', 44 | 'route': 'requestslogging' 45 | }] 46 | 47 | return jsonify(subscriptions) 48 | 49 | 50 | # Dapr subscription in /dapr/subscribe sets up this route 51 | @app.route('/requestslogging', methods=['POST']) 52 | async def oairequests_subscriber(): 53 | event = from_http(request.headers, request.get_data()) 54 | data = json.loads(event.data) 55 | 56 | logger.debug(f"Received a request to log data{data}") 57 | 58 | consumer_id=data['sentry_ai_headers'].get('ai-sentry-consumer', None) 59 | model_name = data.get('model', None) 60 | usage_info=data.get('usage', None) 61 | openai_response_id=data.get('openai_response_id', None) 62 | date = datetime.datetime.fromisoformat(data.get('date_time_utc', None)) 63 | month_date = date.strftime("%m_%Y") 64 | 65 | logger.info('Consumer Product Id: %s', consumer_id) 66 | logger.info('ModelName: %s', model_name) 67 | logger.info('Usage Info: %s', usage_info) 68 | log_id=f"{model_name}_{consumer_id}_{month_date}" 69 | 70 | summary_info = openAISummaryLogObject(id=openai_response_id, LogId=log_id, timestamp=data['date_time_utc'], model=model_name, product_id=consumer_id, usage=usage_info, month_year=month_date) 71 | summary_info_dict = summary_info.to_dict() 72 | 73 | output_binding_data = json.dumps(summary_info_dict) 74 | 75 | # Send the output to CosmosDB using Dapr binding 76 | with DaprClient() as d: 77 | resp = d.invoke_binding( 78 | binding_name='summary-log', 79 | operation='create', 80 | binding_metadata={ 81 | 'partitionKey': 'LogId', 82 | }, 83 | data=output_binding_data 84 | ) 85 | 86 | return json.dumps(output_binding_data), 200 87 | 88 | app.run(port=app_port) 89 | -------------------------------------------------------------------------------- /build/Dockerfile.facade: -------------------------------------------------------------------------------- 1 | # pull official base image 2 | FROM python:3.11.3-slim-buster as build 3 | 4 | RUN apt-get update 5 | RUN apt-get install -y --no-install-recommends gcc build-essential 6 | 7 | WORKDIR /app 8 | 9 | RUN python -m venv /app/venv 10 | ENV PATH="/app/venv/bin:$PATH" 11 | 12 | COPY requirements.txt . 13 | RUN pip install --upgrade pip 14 | RUN pip install -r requirements.txt 15 | 16 | FROM python:3.11.3-slim-buster 17 | 18 | RUN groupadd -g 999 python && \ 19 | useradd -r -u 999 -g python python 20 | 21 | RUN mkdir /app && chown python:python /app 22 | WORKDIR /app 23 | 24 | RUN mkdir /utils 25 | COPY --chown=python:python utils/ /app/utils/ 26 | 27 | COPY /asgi_facade.py /app/asgi_facade.py 28 | 29 | COPY --chown=python:python --from=build /app/venv ./venv 30 | 31 | ENV PATH="/app/venv/bin:$PATH" 32 | COPY --chown=python:python . . 33 | 34 | USER 999 35 | 36 | ENV PATH="/app/venv/bin:$PATH" 37 | 38 | 39 | CMD ["hypercorn", "--bind", "0.0.0.0:5000", "asgi_facade:app"] -------------------------------------------------------------------------------- /build/Dockerfile.worker: -------------------------------------------------------------------------------- 1 | # pull official base image 2 | FROM python:3.11.3-slim-buster as build 3 | 4 | RUN apt-get update 5 | RUN apt-get install -y --no-install-recommends gcc build-essential bash 6 | 7 | WORKDIR /app 8 | 9 | RUN python -m venv /app/venv 10 | ENV PATH="/app/venv/bin:$PATH" 11 | 12 | COPY worker/requirements.txt . 13 | RUN pip install --upgrade pip 14 | RUN pip install -r requirements.txt 15 | 16 | FROM python:3.11.3-slim-buster 17 | 18 | RUN groupadd -g 999 python && \ 19 | useradd -r -u 999 -g python python 20 | 21 | RUN mkdir /app && chown python:python /app 22 | WORKDIR /app 23 | 24 | COPY --chown=python:python --from=build /app/venv ./venv 25 | 26 | RUN mkdir /worker 27 | COPY --chown=python:python worker/ /app/worker/ 28 | 29 | RUN mkdir /utils 30 | COPY --chown=python:python utils/ /app/utils/ 31 | 32 | COPY /asgi_worker.py /app/asgi_worker.py 33 | WORKDIR /app 34 | 35 | 36 | # Copy the certificate from your host to your docker image 37 | COPY /utils/combined-cert.crt /usr/local/share/ca-certificates/combined_cert.crt 38 | 39 | # Update the CA certificates 40 | RUN apt-get update && \ 41 | apt-get install -y ca-certificates && \ 42 | update-ca-certificates 43 | 44 | USER 999 45 | 46 | ENV PATH="/app/venv/bin:$PATH" 47 | 48 | CMD ["hypercorn", "--bind", "0.0.0.0:7000", "asgi_worker:app"] -------------------------------------------------------------------------------- /build/DockerfileSummary.worker: -------------------------------------------------------------------------------- 1 | # pull official base image 2 | FROM python:3.11.3-slim-buster as build 3 | 4 | RUN apt-get update 5 | RUN apt-get install -y --no-install-recommends gcc build-essential bash 6 | 7 | WORKDIR /app 8 | 9 | RUN python -m venv /app/venv 10 | ENV PATH="/app/venv/bin:$PATH" 11 | 12 | COPY worker/usage_summary/requirements.txt . 13 | RUN pip install --upgrade pip 14 | RUN pip install -r requirements.txt 15 | 16 | FROM python:3.11.3-slim-buster 17 | 18 | RUN groupadd -g 999 python && \ 19 | useradd -r -u 999 -g python python 20 | 21 | RUN mkdir /app && chown python:python /app 22 | WORKDIR /app 23 | 24 | COPY --chown=python:python --from=build /app/venv ./venv 25 | 26 | RUN mkdir /worker 27 | COPY --chown=python:python worker/ /app/worker/ 28 | 29 | RUN mkdir /utils 30 | COPY --chown=python:python utils/ /app/utils/ 31 | 32 | COPY /asgi_summary_worker.py /app/asgi_summary_worker.py 33 | WORKDIR /app 34 | 35 | 36 | # Copy the certificate from your host to your docker image 37 | COPY /utils/combined-cert.crt /usr/local/share/ca-certificates/combined_cert.crt 38 | 39 | # Update the CA certificates 40 | RUN apt-get update && \ 41 | apt-get install -y ca-certificates && \ 42 | update-ca-certificates 43 | 44 | USER 999 45 | 46 | ENV PATH="/app/venv/bin:$PATH" 47 | 48 | CMD ["hypercorn", "--bind", "0.0.0.0:7001", "asgi_summary_worker:app"] 49 | #CMD ["sleep", "600"] -------------------------------------------------------------------------------- /build/build-ai-sentry-containers.ps1: -------------------------------------------------------------------------------- 1 | param( 2 | [string]$version = "0.1.1", 3 | [string]$containerRegistry = "anevjesacrdevtest.azurecr.io" 4 | ) 5 | #Uncomment first line for very first time to set to the right acr context 6 | # az acr login --name anevjesacrdevtest 7 | Write-Host "Building AI-Sentry Facade:$version" 8 | docker build --platform linux/amd64 -t ai-sentry-facadeapp:$version -f Dockerfile.facade ../aisentry/ 9 | docker tag ai-sentry-facadeapp:$version $containerRegistry/ai-sentry-facadeapp:$version 10 | docker push $containerRegistry/ai-sentry-facadeapp:$version 11 | 12 | 13 | Write-Host "Building summary logger worker:$version" 14 | docker build --platform linux/amd64 -t ai-sentry-summary-logger:$version -f DockerfileSummary.worker ../aisentry/ 15 | docker tag ai-sentry-summary-logger:$version $containerRegistry/ai-sentry-summary-logger:$version 16 | docker push $containerRegistry/ai-sentry-summary-logger:$version 17 | 18 | Write-Host "Building cosmosdb logger worker:$version" 19 | docker build --platform linux/amd64 -t ai-sentry-cosmosdblogger:$version -f Dockerfile.worker ../aisentry/ 20 | docker tag ai-sentry-cosmosdblogger:$version $containerRegistry/ai-sentry-cosmosdblogger:$version 21 | docker push $containerRegistry/ai-sentry-cosmosdblogger:$version -------------------------------------------------------------------------------- /content/documentation/ACADeployment.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/documentation/ACADeployment.md -------------------------------------------------------------------------------- /content/documentation/AI-Sentry-config-settings.md: -------------------------------------------------------------------------------- 1 | Each pool is represented as an object within the "pools" array. Each pool object has the following properties: 2 | 3 | - name: The name of the pool. 4 | - description: A description of the pool. 5 | - endpoints: An array of endpoint objects that the pool can connect to. 6 | Each endpoint object has the following properties: 7 | 8 | - url: The URL of the OpenAI instance. You're expected to replace with the actual instance name. You need to append 9 | - api-key: The API key for accessing the OpenAI instance. You're expected to replace your-api-key with the actual API key. 10 | 11 | Please note: We also support JWT auth to backend openAI instances. If you simply set "api-Key": null within the property bags inside the facade layer config; you will leverage aks workload identity to connect to openAi backends - however you will need worklaod identity federated out with managed identity stood up with your AKS cluster - and ofcourse grant the RBAC to the managed identity across all the required openAI instances in the backend. -------------------------------------------------------------------------------- /content/documentation/AKSDeployment.md: -------------------------------------------------------------------------------- 1 | # AKS Deployment 2 | 3 | 4 | 5 | ![AKS view](/content/images/AI-Sentry-AKS-view.drawio.png) 6 | 7 | 8 | # Prerequisites 9 | 10 | ## Setup infrastructure 11 | 12 | 13 | ## Client Tooling: 14 | 15 | Make sure to have current version of below tools on your client/ci/cd server which will be used to build/publish the docker images as well as execute the helm chart deployment to your kubernetes cluster. 16 | 17 | - az cli 18 | - docker 19 | - helm 20 | - kubectl 21 | 22 | 23 | ## Kuberenetes Cluster: 24 | 25 | Please ensure that you have the following installed in your kuberenetes cluster: 26 | 27 | - DAPR extension if using AKS - otherwise pull in DAPR from open source. 28 | 29 | ### Install DAPR Extension on AKS 30 | 31 | ```bash 32 | az extension add --name k8s-extension 33 | az extension update --name k8s-extension 34 | 35 | az provider list --query "[?contains(namespace,'Microsoft.KubernetesConfiguration')]" -o table 36 | az provider register --namespace Microsoft.KubernetesConfiguration 37 | 38 | 39 | az k8s-extension create --cluster-type managedClusters --cluster-name anevjes-aks --resource-group aks --name dapr --extension-type Microsoft.Dapr --auto-upgrade-minor-version false 40 | 41 | More info here: https://docs.dapr.io/operations/hosting/kubernetes/kubernetes-deploy/ 42 | ``` 43 | 44 | ### Install REDIS - (if using REDIS as your PUB/SUB DAPR component) 45 | ``` 46 | az aks get-credentials --admin --name anevjes-aks --resource-group aks 47 | helm repo add bitnami https://charts.bitnami.com/bitnami 48 | helm install sentry-redis bitnami/redis-cluster 49 | export REDIS_PASSWORD=$(kubectl get secret --namespace "default" sentry-redis-redis-cluster -o jsonpath="{.data.redis-password}" | base64 --decode) 50 | 51 | kubectl create namespace ai-sentry 52 | kubectl create secret generic redis --from-literal=redis-password=$REDIS_PASSWORD -n ai-sentry 53 | 54 | ``` 55 | 56 | 57 | 58 | # Build and publish container images 59 | 60 | Prepare the facade and worker images and push them to your image registry. 61 | 62 | ``` bash 63 | #Browse to build directory 64 | cd build 65 | ``` 66 | 67 | When you list source you should see the following powershell script 68 | 69 | ```bash 70 | ls 71 | 72 | Dockerfile.facade Dockerfile.worker DockerfileSummary.worker build-ai-sentry-containers.ps1 73 | ``` 74 | 75 | Login into az cli in the terminal session 76 | ``` 77 | az login 78 | ``` 79 | 80 | Now Login to your target container registry - in my example this is an Azure Container Registry 81 | 82 | ```bash 83 | az acr login -n 84 | ``` 85 | 86 | Now run the build-ai-sentry-containers.ps1 script 87 | 88 | ```bash 89 | build-ai-sentry-containers.ps1 -version "0.1.1" -containerRegistry "your-ACR-name" 90 | ``` 91 | 92 | You can confirm against your ACR that the images are there with correct tags. 93 | 94 | ```bash 95 | az acr repository show-tags --name --repository --output table 96 | ``` 97 | 98 | 99 | # Deploy AI-Sentry 100 | 101 | Now its time to kick off the Ai-Sentry deployment. 102 | 103 | - Browse to *deploy* directory at the root of the project level 104 | 105 | - Now browse into the *AKS* directory 106 | 107 | 108 | Currently I have not configured an end to end helm chart so for the inital deployment please update and use the ai-sentry-deployment.yaml file. 109 | 110 | 111 | If you crack open the ai-sentry-deployment.yaml file you will notice it is broken up into several sections: 112 | 113 | 1. **Service**: Two services are defined, `facadeapp` and `facadeapp-headless`. The `facadeapp` service is a LoadBalancer type, which exposes the `facadeapp` to network traffic. The `facadeapp-headless` service is a headless service (since `clusterIP` is set to `None`), used to expose pods directly without a load balancer. Both services select pods with the label `app: facadeapp` and expose the `facadeapp-http` port of those pods on their own port 80. 114 | 115 | 2. **Ingress**: This manages external access to the services in a cluster, typically HTTP. The Ingress `facadeapp-ingress` is routing traffic from the root path ("/") to the `facadeapp` service on port 80. The annotation `nginx.ingress.kubernetes.io/rewrite-target: /` indicates that the path in the request URL from the client is rewritten to "/" regardless of what the original path is. 116 | 117 | 3. **StatefulSet**: Three StatefulSets are defined for `facadeapp`, `cosmosdbloggerw`, and `cosmosdb-summary-loggerw`. StatefulSets are used for deploying, scaling, and managing stateful applications. Each StatefulSet has its own number of replicas, selectors, and container specifications. The `facadeapp` StatefulSet has Dapr annotations enabled, which means it's using DAPR for distributed application runtime. 118 | 119 | 4. **Component**: Three DAPR components are defined for `openaipubsub`, `cosmosdb-log`, and `summary-log`. These components are used for pub-sub messaging with Redis, and bindings with Azure Cosmos DB for logging and summary logging. The `openaipubsub` component is a Redis-based pub-sub component. The `cosmosdb-log` and `summary-log` components are bindings to Azure Cosmos DB for logging purposes. 120 | 121 | 5. **Images**: The images for the containers in the StatefulSets are pulled from a private Azure Container Registry (ACR) as indicated by the `youracr.azurecr.io` prefix in the image names. 122 | 123 | 6. **Environment Variables**: Various environment variables are set in the containers, including AI-SENTRY-ENDPOINT-CONFIG, LOG-LEVEL, AI-SENTRY-LANGUAGE-ENDPOINT, and AI-SENTRY-LANGUAGE-KEY. See [here](/content/documentation/ai-sentry-config.json) for example AI-SENTRY-ENDPOINT-CONFIG. You can leverage the following [script](scripts/create-escaped-json.ps1) to help you generate JSON escaped string of this JSON. 124 | 125 | 7. **Probes**: Liveness probes are defined for the `facadeapp` container to check the health of the application. If the application fails the liveness check, Kubernetes will restart the container. 126 | 127 | 8. **Commented Code**: There are commented sections for an Event Hub component for pub sub Dapr annotations. These can be uncommented and filled in as needed for additional functionality. 128 | 129 | 130 | ## Create ai-sentry namespace 131 | 132 | Create the ai-sentry namespace by running the following command: 133 | 134 | ```kubectl 135 | kuebctl apply -f namespace.yaml 136 | ``` 137 | 138 | ## Ai-Sentry deployment into namespace 139 | 140 | ```bash 141 | kubectl apply -f ai-sentry-deployment.yaml -n ai-sentry 142 | 143 | service/facadeapp created 144 | service/facadeapp-headless created 145 | ingress.networking.k8s.io/facadeapp-ingress created 146 | statefulset.apps/facadeapp created 147 | statefulset.apps/cosmosdbloggerw created 148 | statefulset.apps/cosmosdb-summary-loggerw created 149 | component.dapr.io/openaipubsub created 150 | component.dapr.io/cosmosdb-log created 151 | component.dapr.io/summary-log created 152 | ``` 153 | 154 | Now lets confirm the pods exist - it should look like the following: 155 | 156 | ```bash 157 | kubectl get pods -n ai-sentry 158 | 159 | AME READY STATUS RESTARTS AGEaks> kubectl get pods -n ai-sentry 160 | cosmosdb-summary-loggerw-0 2/2 Running 0 82s 161 | cosmosdb-summary-loggerw-1 2/2 Running 0 75s 162 | cosmosdbloggerw-0 2/2 Running 0 82s 163 | cosmosdbloggerw-1 2/2 Running 0 75s 164 | facadeapp-0 2/2 Running 0 82s 165 | ``` 166 | 167 | For ease of testing and getting the intial version up and running I've used a LoadBalancer service and an external IP. You will need to change this service type to meet your Cyber/Enterprise security posture -i.e. it could become ingress on a private IP address. 168 | 169 | 170 | -------------------------------------------------------------------------------- /content/documentation/AzureInfrastrcuture.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/documentation/AzureInfrastrcuture.md -------------------------------------------------------------------------------- /content/documentation/ComsosDB-LoggingSchema.md: -------------------------------------------------------------------------------- 1 | # Logging Schema 2 | 3 | Below you will find the output schema for CosmosDB logging. This is usefull for long term retention, legal requirements as well as model re-evaluation input/output to test how well other models are performing before cutting over. 4 | 5 | | Property | Description | 6 | | --- | --- | 7 | | `date_time_utc` | The date and time in UTC when the event occurred. | 8 | | `is_Streaming` | A boolean indicating whether the event is streaming or not. | 9 | | `headers` | An object containing various headers related to the event. | 10 | | `params` | An object containing parameters related to the event. | 11 | | `request_body` | An object containing the request body of the event. | 12 | | `response_body` | An object containing the response body of the event. For streaming responses we truncate the repsonse body purely down to the response text| 13 | | `sentry_ai_headers` | An object containing headers related to Sentry AI. | 14 | | `usage` | An object containing usage statistics.3 properties are included here: completion_tokens, prompt_tokens, and total_tokens.| 15 | | `model` | The model used for the event. | 16 | | `openai_response_id` | The ID of the OpenAI response. | 17 | | `LogId` | The log ID of the event. | 18 | | `id` | The ID of the event. | 19 | | `_rid` | The resource ID of the event. | 20 | | `_self` | The self-reference of the event. | 21 | | `_etag` | The ETag of the event. | 22 | | `_attachments` | The attachments of the event. | 23 | | `_ts` | The timestamp of the event. | 24 | 25 | JSON definition example: 26 | 27 | ```json 28 | { 29 | "date_time_utc": "2024-05-23T08:17:03.106257+00:00", 30 | "is_Streaming": false, 31 | "headers": { 32 | "Remote-Addr": "10.224.0.5", 33 | "Api-Key": "", 34 | "Arianwashere": "dfsdbgsdgsdgsdgs", 35 | "Ai-Sentry-Consumer": "Product1", 36 | "Ai-Sentry-Log-Level": "COMPLETE", 37 | "Ai-Sentry-Backend-Pool": "pool1", 38 | "Content-Type": "application/json" 39 | }, 40 | "params": { 41 | "api-version": "2023-07-01-preview" 42 | }, 43 | "request_body": { 44 | "messages": [ 45 | { 46 | "role": "system", 47 | "content": "you are a Financial analyst employee studying tech companies" 48 | }, 49 | { 50 | "role": "user", 51 | "content": "What are microsoft's most profitable products?" 52 | } 53 | ], 54 | "stream": false, 55 | "max_tokens": 800, 56 | "temperature": 0.7, 57 | "frequency_penalty": 0, 58 | "presence_penalty": 0, 59 | "top_p": 0.95, 60 | "stop": null 61 | }, 62 | "response_body": { 63 | "choices": [ 64 | { 65 | "content_filter_results": { 66 | "hate": { 67 | "filtered": false, 68 | "severity": "safe" 69 | }, 70 | "self_harm": { 71 | "filtered": false, 72 | "severity": "safe" 73 | }, 74 | "sexual": { 75 | "filtered": false, 76 | "severity": "safe" 77 | }, 78 | "violence": { 79 | "filtered": false, 80 | "severity": "safe" 81 | } 82 | }, 83 | "finish_reason": "stop", 84 | "index": 0, 85 | "message": { 86 | "content": "Microsoft's most profitable products include:\n\n1. Microsoft Office Suite: This includes popular software like Microsoft Word, Excel, PowerPoint, and Outlook, which are widely used by individuals and businesses for productivity and communication purposes.\n\n2. Windows Operating System: Microsoft's flagship operating system, Windows, is used by millions of individuals and businesses worldwide. It generates significant revenue through licensing fees, upgrades, and sales of pre-installed Windows on new devices.\n\n3. Azure Cloud Services: Microsoft's cloud computing platform, Azure, is one of the fastest-growing segments for the company. It provides infrastructure, platform, and software as a service (IaaS, PaaS, and SaaS) solutions, generating substantial revenue from businesses' cloud adoption.\n\n4. Xbox Gaming Console and Services: Microsoft's gaming division, including Xbox consoles and related services like Xbox Live and Xbox Game Pass, contributes significantly to the company's profits. Xbox hardware sales, game sales, and subscriptions generate substantial revenue.\n\n5. Surface Devices: Microsoft's range of Surface devices, including laptops, tablets, and accessories, has gained popularity among consumers and professionals. These products generate revenue through sales and contribute to Microsoft's profitability.\n\nIt is important to note that the profitability of these products may vary over time, and Microsoft's overall financial performance is influenced by various factors, including market conditions and competition.", 87 | "role": "assistant" 88 | } 89 | } 90 | ], 91 | "created": 1716452216, 92 | "id": "chatcmpl-9Rxv6geWpNxX0w00S9pQmIjxWK43p", 93 | "model": "gpt-35-turbo", 94 | "object": "chat.completion", 95 | "prompt_filter_results": [ 96 | { 97 | "prompt_index": 0, 98 | "content_filter_results": { 99 | "hate": { 100 | "filtered": false, 101 | "severity": "safe" 102 | }, 103 | "self_harm": { 104 | "filtered": false, 105 | "severity": "safe" 106 | }, 107 | "sexual": { 108 | "filtered": false, 109 | "severity": "safe" 110 | }, 111 | "violence": { 112 | "filtered": false, 113 | "severity": "safe" 114 | } 115 | } 116 | } 117 | ], 118 | "system_fingerprint": null, 119 | "usage": { 120 | "completion_tokens": 268, 121 | "prompt_tokens": 28, 122 | "total_tokens": 296 123 | } 124 | }, 125 | "sentry_ai_headers": { 126 | "Ai-Sentry-Consumer": "Product1", 127 | "Ai-Sentry-Log-Level": "COMPLETE" 128 | }, 129 | "usage": { 130 | "completion_tokens": 268, 131 | "prompt_tokens": 28, 132 | "total_tokens": 296 133 | }, 134 | "model": "gpt-35-turbo", 135 | "openai_response_id": "chatcmpl-9Rxv6geWpNxX0w00S9pQmIjxWK43p", 136 | "LogId": "gpt-35-turbo_Product1_05_2024", 137 | "id": "chatcmpl-9Rxv6geWpNxX0w00S9pQmIjxWK43p", 138 | "_rid": "vUpsAOlVEEuBAQAAAAAAAA==", 139 | "_self": "dbs/vUpsAA==/colls/vUpsAOlVEEs=/docs/vUpsAOlVEEuBAQAAAAAAAA==/", 140 | "_etag": "\"0200bb04-0000-1a00-0000-664efb7f0000\"", 141 | "_attachments": "attachments/", 142 | "_ts": 1716452223 143 | } 144 | ``` -------------------------------------------------------------------------------- /content/documentation/CosmosDBSetup.md: -------------------------------------------------------------------------------- 1 | # CosmosDB Setup 2 | -------------------------------------------------------------------------------- /content/documentation/LocalDebugging.md: -------------------------------------------------------------------------------- 1 | ``` 2 | cd aisentry 3 | python3 -m venv env 4 | 5 | source env/bin/activate 6 | 7 | pip install -r requirements.txt 8 | ``` -------------------------------------------------------------------------------- /content/documentation/LocalDeployment.md: -------------------------------------------------------------------------------- 1 | ## Running locally 2 | ``` 3 | # Need to move this logic into dockerfile scripts for workers using cognitive services 4 | openssl x509 -in "/Users/ariannevjestic/Downloads/_.cognitive.microsoft.com.cer" -out "/Users/ariannevjestic/Downloads/cogcerts/_.cognitive.microsoft.com.pem" -outform PEM 5 | openssl x509 -in "/Users/ariannevjestic/Downloads/2.cer" -out "/Users/ariannevjestic/Downloads/cogcerts/DigiCert_SHA2 Secure_ServerCA.pem" -outform PEM 6 | openssl x509 -in "/Users/ariannevjestic/Downloads/1.cer" -out "/Users/ariannevjestic/Downloads/1.pem" -outform PEM 7 | cat _.cognitive.microsoft.com.pem middle.pem cDigiCertGlobalRootCA.pem > combined_cert.pem 8 | openssl x509 -outform der -in combined_cert -out combined-cert.crt 9 | 10 | REQUESTS_CA_BUNDLE=/Users/ariannevjestic/Downloads/cogservices.pem 11 | 12 | #check for python env variable 13 | 14 | cd ./aoaifacadeapp 15 | dapr run -f dapr.yaml 16 | ``` 17 | 18 | 19 | ## Running individual apps: 20 | 21 | ### FacadeApp API: 22 | ``` 23 | dapr run --app-id facade-entry --app-port 6124 python3 aoaifacadeapp.py --resources-path components --log-level info 24 | ``` 25 | 26 | ### CosmosDB Worker: 27 | 28 | ``` 29 | dapr run --app-id oai-cosmosdb-logging-processor --app-port 3001 python3 ./workers/CosmosDBLogging/app.py --resources-path components --log-level info 30 | ``` 31 | -------------------------------------------------------------------------------- /content/documentation/SummaryLog-schema.md: -------------------------------------------------------------------------------- 1 | # Summary Logging Schema 2 | 3 | 4 | | Key | Description | 5 | | -------------- | --------------------------------------------------- | 6 | | `id` | Unique identifier for the chat completion | 7 | | `LogId` | Identifier for the log entry | 8 | | `model` | Model used for the AI operation | 9 | | `timestamp` | Time when the log was created | 10 | | `ProductId` | Identifier for the product | 11 | | `promptTokens` | Number of tokens used in the prompt | 12 | | `responseTokens` | Number of tokens used in the response | 13 | | `totalTokens` | Total number of tokens used (prompt + response) | 14 | | `month_year` | Month and year when the log was created | 15 | | `_rid` | Resource ID in Azure Cosmos DB | 16 | | `_self` | Self-link in Azure Cosmos DB | 17 | | `_etag` | Entity tag used for optimistic concurrency control | 18 | | `_attachments` | Link to attachments associated with the document | 19 | | `_ts` | Timestamp of the last update operation on the document | 20 | 21 | 22 | ## Example JSON 23 | 24 | ```json 25 | { 26 | "id": "chatcmpl-9XKn2H3CSlg91b6b3OuQBj6PdmVje", 27 | "LogId": "gpt-35-turbo_Product13_06_2024", 28 | "model": "gpt-35-turbo", 29 | "timestamp": "2024-06-07T03:42:59.400583+00:00", 30 | "ProductId": "Product13", 31 | "promptTokens": 42, 32 | "responseTokens": 645, 33 | "totalTokens": 687, 34 | "month_year": "06_2024", 35 | "_rid": "SxcAAOBkD+gIAAAAAAAAAA==", 36 | "_self": "dbs/SxcAAA==/colls/SxcAAOBkD+g=/docs/SxcAAOBkD+gIAAAAAAAAAA==/", 37 | "_etag": "\"a400f2ed-0000-1a00-0000-666281c30000\"", 38 | "_attachments": "attachments/", 39 | "_ts": 1717731779 40 | } 41 | ``` -------------------------------------------------------------------------------- /content/documentation/Workload-identity-config.md: -------------------------------------------------------------------------------- 1 | # AKS Workload Identity setup 2 | 3 | ## Enable Workload identity against existing AKS cluster 4 | ```powershell 5 | az aks update --resource-group "aks-devtest-rg" --name "anevjes-aks-dev" --enable-oidc-issuer --enable-workload-identity 6 | ``` 7 | 8 | ## MI creation 9 | ```powershell 10 | az account set --subscription "subscriptionID" 11 | ``` 12 | 13 | ```powershell 14 | az identity create --name "ai-sentry-be-mi" --resource-group "ai-sentry" --location "australiaeast" --subscription "879bb272-07db-4784-816a-a9fac90f49ae" 15 | ``` 16 | 17 | ```bash 18 | export USER_ASSIGNED_CLIENT_ID="$(az identity show --resource-group "ai-sentry" --name "ai-sentry-be-mi" --query 'clientId' -otsv)" 19 | ``` 20 | ## Grant MI access to openAI resources 21 | 22 | ![alt text](..\images\openai_rbac.png) 23 | 24 | and assign your newly built managed identity to above role: 25 | 26 | ![alt text](..\images\openai_rbac2.png) 27 | 28 | 29 | ## Env variables for service account in AKS 30 | 31 | ```bash 32 | export SERVICE_ACCOUNT_NAME="default" 33 | export SERVICE_ACCOUNT_NAMESPACE="ai-sentry" 34 | ``` 35 | 36 | ## OIDC Issuer url 37 | 38 | ```bash 39 | export AKS_OIDC_ISSUER="$(az aks show --name anevjes-aks-dev --resource-group aks-devtest-rg --query "oidcIssuerProfile.issuerUrl" -o tsv)" 40 | ``` 41 | 42 | ## Create AKS Service Account 43 | 44 | ```bash 45 | cat <.openai.azure.com/openai", 9 | "api-key": null, //If you simply set "api-Key": null the facade layer will leverage aks workload identity to connect to openAi backends., 10 | "x-ratelimit-remaining-requests": 1000, 11 | "x-ratelimit-remaining-tokens": 1000 12 | } 13 | ] 14 | }, 15 | { 16 | "name": "pool2", 17 | "description": "pool2 description", 18 | "endpoints": [ 19 | { 20 | "url": "https://.openai.azure.com/openai", 21 | "api-key": "your-api-key", //If you simply set "api-Key": null the facade layer will leverage aks workload identity to connect to openAi backends. 22 | "x-ratelimit-remaining-requests": 1000, 23 | "x-ratelimit-remaining-tokens": 1000 24 | }, 25 | { 26 | "url": "https://.openai.azure.com/openai", 27 | "api-key": "your-api-key",//If you simply set "api-Key": null the facade layer will leverage aks workload identity to connect to openAi backends. 28 | "x-ratelimit-remaining-requests": 1000, 29 | "x-ratelimit-remaining-tokens": 1000 30 | } 31 | ] 32 | } 33 | ] 34 | } -------------------------------------------------------------------------------- /content/images/AI-Sentry-AKS-view.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/AI-Sentry-AKS-view.drawio.png -------------------------------------------------------------------------------- /content/images/AI-Sentry-HighLevel.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/AI-Sentry-HighLevel.drawio.png -------------------------------------------------------------------------------- /content/images/AI-Sentry-features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/AI-Sentry-features.png -------------------------------------------------------------------------------- /content/images/cosmosdb_request_logs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/cosmosdb_request_logs.png -------------------------------------------------------------------------------- /content/images/cosmosdb_summary_logs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/cosmosdb_summary_logs.png -------------------------------------------------------------------------------- /content/images/openai_rbac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/openai_rbac.png -------------------------------------------------------------------------------- /content/images/openai_rbac2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/openai_rbac2.png -------------------------------------------------------------------------------- /deploy/aks/ai-sentry-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: facadeapp 5 | labels: 6 | app: facadeapp 7 | spec: 8 | ports: 9 | - name: http 10 | port: 80 11 | targetPort: facadeapp-http 12 | type: LoadBalancer 13 | selector: 14 | app: facadeapp 15 | 16 | --- 17 | apiVersion: v1 18 | kind: Service 19 | metadata: 20 | name: facadeapp-headless 21 | labels: 22 | app: facadeapp-headless 23 | spec: 24 | clusterIP: None 25 | selector: 26 | app: facadeapp 27 | ports: 28 | - name: http 29 | port: 80 30 | targetPort: facadeapp-http 31 | --- 32 | # Ingress: 33 | 34 | apiVersion: networking.k8s.io/v1 35 | kind: Ingress 36 | metadata: 37 | name: facadeapp-ingress 38 | annotations: 39 | nginx.ingress.kubernetes.io/rewrite-target: / 40 | spec: 41 | rules: 42 | - http: 43 | paths: 44 | - pathType: Prefix 45 | path: "/" 46 | backend: 47 | service: 48 | name: facadeapp 49 | port: 50 | number: 80 51 | --- 52 | 53 | kind: StatefulSet 54 | apiVersion: apps/v1 55 | metadata: 56 | name: facadeapp 57 | spec: 58 | serviceName: facadeapp 59 | replicas: 1 60 | selector: 61 | matchLabels: 62 | app: facadeapp 63 | template: 64 | metadata: 65 | labels: 66 | app: facadeapp 67 | azure.workload.identity/use: "true" 68 | annotations: 69 | dapr.io/enabled: "true" 70 | dapr.io/app-id: "facadeapp" 71 | dapr.io/app-port: "5000" 72 | dapr.io/log-as-json: "true" 73 | dapr.io/app-protocol: "http" 74 | dapr.io/enable-app-health-check: "true" 75 | dapr.io/app-health-check-path: "/dapr/health" 76 | dapr.io/app-health-probe-interval: "3" 77 | dapr.io/app-health-probe-timeout: "200" 78 | dapr.io/app-health-threshold: "2" 79 | azure.workload.identity/inject-proxy-sidecar: "true" 80 | azure.workload.identity/proxy-sidecar-port: "8000" 81 | 82 | spec: 83 | containers: 84 | - name: sentryai-facade 85 | image: youracr.azurecr.io/ai-sentry-facadeapp:0.1.1 86 | imagePullPolicy: Always 87 | ports: 88 | - name: facadeapp-http 89 | containerPort: 5000 90 | protocol: TCP 91 | livenessProbe: 92 | httpGet: 93 | path: /liveness 94 | port: 5000 95 | initialDelaySeconds: 15 96 | periodSeconds: 5 97 | failureThreshold: 3 98 | env: 99 | - name: "AI-SENTRY-ENDPOINT-CONFIG" 100 | value: "{\"pools\":[{\"name\":\"pool1\",\"description\":\"pool1 description\",\"endpoints\":[{\"url\":\"https://youropenaiendpoint.openai.azure.com/openai\",\"api-key\":\"yourkey or simply null\"}]},{\"name\":\"pool2\",\"description\":\"pool2 description\",\"endpoints\":[{\"url\":\"https://youropenai.openai.azure.com/openai\",\"api-key or simply null\":\"yourkey or simply null\"},{\"url\":\"https://youropenai.openai.azure.com/openai\",\"api-key or simply null\":\"yourkey\"}]}]}" 101 | - name: "LOG_LEVEL" 102 | value: "INFO" 103 | ### Required by Workload Identity. Take the output of the clientID for the managed idenitty built and federated with AKS. 104 | - name: "AZURE_CLIENT_ID" 105 | value: "your managed identity clientid if not using api keys to hit the backend openai apis." 106 | --- 107 | #CosmosDB Logger 108 | kind: StatefulSet 109 | apiVersion: apps/v1 110 | metadata: 111 | name: cosmosdbloggerw 112 | spec: 113 | serviceName: cosmosdblogger 114 | replicas: 2 115 | selector: 116 | matchLabels: 117 | app: cosmosdbloggerw 118 | template: 119 | metadata: 120 | labels: 121 | app: cosmosdbloggerw 122 | annotations: 123 | dapr.io/enabled: "true" 124 | dapr.io/app-id: "cosmosdbloggerw" 125 | dapr.io/app-port: "7000" 126 | dapr.io/log-as-json: "true" 127 | #dapr.io/app-protocol: "http" 128 | #dapr.io/enable-app-health-check: "true" 129 | #dapr.io/app-health-check-path: "/dapr/health" 130 | #dapr.io/app-health-probe-interval: "3" 131 | #dapr.io/app-health-probe-timeout: "200" 132 | #dapr.io/app-health-threshold: "2" 133 | 134 | spec: 135 | containers: 136 | - name: sentry-ai-cosmosdblogger 137 | image: youracr.azurecr.io/ai-sentry-cosmosdblogger:0.1.1 138 | imagePullPolicy: Always 139 | ports: 140 | - name: cosmoslogger 141 | containerPort: 7000 142 | protocol: TCP 143 | env: 144 | - name: "AI-SENTRY-LANGUAGE-ENDPOINT" 145 | value: "https://yourlanguageendpoint.cognitiveservices.azure.com/" 146 | - name: "AI-SENTRY-LANGUAGE-KEY" 147 | value: "your-key" 148 | - name: "LOG_LEVEL" 149 | value: "DEBUG" 150 | - name: "PII_STRIPPING_SERVICE" 151 | value: "OPENAI" 152 | - name: PII_STRIPPING_OPENAI_ENDPOINT 153 | value: "https://ptuopendeployment.openai.azure.com/" 154 | - name: "PII_STRIPPING_OPENAI_API_KEY" 155 | value: "yourapikeytoopenai / apim subscription key" 156 | --- 157 | #Summary Logger Logger 158 | kind: StatefulSet 159 | apiVersion: apps/v1 160 | metadata: 161 | name: cosmosdb-summary-loggerw 162 | spec: 163 | serviceName: cosmosdb-summary-loggerw 164 | replicas: 2 165 | selector: 166 | matchLabels: 167 | app: cosmosdb-summary-loggerw 168 | template: 169 | metadata: 170 | labels: 171 | app: cosmosdb-summary-loggerw 172 | annotations: 173 | dapr.io/enabled: "true" 174 | dapr.io/app-id: "cosmosdb-summary-loggerw" 175 | dapr.io/app-port: "7001" 176 | dapr.io/log-as-json: "true" 177 | #dapr.io/app-protocol: "http" 178 | #dapr.io/enable-app-health-check: "true" 179 | #dapr.io/app-health-check-path: "/dapr/health" 180 | #dapr.io/app-health-probe-interval: "3" 181 | #dapr.io/app-health-probe-timeout: "200" 182 | #dapr.io/app-health-threshold: "2" 183 | 184 | spec: 185 | containers: 186 | - name: sentry-ai-summary-logger 187 | image: youracr.azurecr.io/ai-sentry-summary-logger:0.1.1 188 | imagePullPolicy: Always 189 | ports: 190 | - name: summarylogger 191 | containerPort: 7001 192 | protocol: TCP 193 | env: 194 | - name: "LOG_LEVEL" 195 | value: "INFO" 196 | --- 197 | # Components - REDIS based pub-sub 198 | apiVersion: dapr.io/v1alpha1 199 | kind: Component 200 | metadata: 201 | name: openaipubsub 202 | spec: 203 | type: pubsub.redis 204 | version: v1 205 | metadata: 206 | # These settings will work out of the box if you use `helm install 207 | # bitnami/redis`. If you have your own setup, replace 208 | # `redis-master:6379` with your own Redis master address, and the 209 | # Redis password with your own Secret's name. For more information, 210 | # see https://docs.dapr.io/operations/components/component-secrets . 211 | - name: redisHost 212 | value: :6379 213 | - name: redisPassword 214 | value: your-redis-password 215 | # secretKeyRef: 216 | # name: redis 217 | # key: redis-password 218 | # auth: 219 | # secretStore: kubernetes 220 | --- 221 | apiVersion: dapr.io/v1alpha1 222 | kind: Component 223 | metadata: 224 | name: cosmosdb-log 225 | spec: 226 | type: bindings.azure.cosmosdb 227 | version: v1 228 | initTimeout: 5m 229 | metadata: 230 | - name: url 231 | value: https://.documents.azure.com:443/ 232 | - name: masterKey 233 | value: your-key 234 | - name: database 235 | value: request-logs 236 | - name: collection 237 | value: logs 238 | - name: partitionKey 239 | value: "LogId" 240 | --- 241 | apiVersion: dapr.io/v1alpha1 242 | kind: Component 243 | metadata: 244 | name: summary-log 245 | spec: 246 | type: bindings.azure.cosmosdb 247 | version: v1 248 | initTimeout: 5m 249 | metadata: 250 | - name: url 251 | value: https://.documents.azure.com:443/ 252 | - name: masterKey 253 | value: your-key 254 | - name: database 255 | value: summary-logs 256 | - name: collection 257 | value: logs 258 | - name: partitionKey 259 | value: "LogId" 260 | --- 261 | # IF you want to use Event hub as pub sub layer uncomment and fill in below 262 | 263 | # apiVersion: dapr.io/v1alpha1 264 | # kind: Component 265 | # metadata: 266 | # name: openaipubsub 267 | # spec: 268 | # type: pubsub.azure.eventhubs 269 | # version: v1 270 | # metadata: 271 | # # Either connectionString or eventHubNamespace is required 272 | # # Use connectionString when *not* using Microsoft Entra ID 273 | # - name: connectionString 274 | # value: "" 275 | # # Use eventHubNamespace when using Microsoft Entra ID 276 | # # - name: eventHubNamespace 277 | # # value: "namespace" 278 | # - name: consumerID # Optional. If not supplied, the runtime will create one. 279 | # value: "ai-sentry-logger" 280 | # - name: enableEntityManagement 281 | # value: "false" 282 | # # The following four properties are needed only if enableEntityManagement is set to true 283 | # # - name: resourceGroupName 284 | # # value: "test-rg" 285 | # # - name: subscriptionID 286 | # # value: "value of Azure subscription ID" 287 | # # - name: partitionCount 288 | # # value: "1" 289 | # # - name: messageRetentionInDays 290 | # # value: "3" 291 | # # Checkpoint store attributes 292 | # - name: storageAccountName 293 | # value: "" 294 | # - name: storageAccountKey 295 | # value: "" 296 | # - name: storageContainerName 297 | # value: "ai-sen-checkpoints" 298 | # # Alternative to passing storageAccountKey 299 | # # - name: storageConnectionString 300 | # # value: "DefaultEndpointsProtocol=https;AccountName=;AccountKey=" 301 | 302 | 303 | -------------------------------------------------------------------------------- /deploy/aks/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: ai-sentry 5 | labels: 6 | name: ai-sentry -------------------------------------------------------------------------------- /deploy/local/components/cosmosdb.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dapr.io/v1alpha1 2 | kind: Component 3 | metadata: 4 | name: cosmosdb-log 5 | spec: 6 | type: bindings.azure.cosmosdb 7 | version: v1 8 | initTimeout: 5m 9 | metadata: 10 | - name: url 11 | value: https://.documents.azure.com:443/ 12 | - name: masterKey 13 | value: 14 | - name: database 15 | value: request-logs 16 | - name: collection 17 | value: logs 18 | - name: partitionKey 19 | value: "LogId" 20 | --- 21 | apiVersion: dapr.io/v1alpha1 22 | kind: Component 23 | metadata: 24 | name: summary-log 25 | spec: 26 | type: bindings.azure.cosmosdb 27 | version: v1 28 | initTimeout: 5m 29 | metadata: 30 | - name: url 31 | value: https://.documents.azure.com:443/ 32 | - name: masterKey 33 | value: 34 | - name: database 35 | value: summary-logs 36 | - name: collection 37 | value: logs 38 | - name: partitionKey 39 | value: "openai_response_id" 40 | # --- 41 | -------------------------------------------------------------------------------- /deploy/local/components/pubsub.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dapr.io/v1alpha1 2 | kind: Component 3 | metadata: 4 | name: openaipubsub 5 | spec: 6 | type: pubsub.redis 7 | version: v1 8 | metadata: 9 | - name: redisHost 10 | value: localhost:6379 11 | - name: redisPassword 12 | value: "" 13 | - name: queueDepth 14 | value: "5" 15 | - name: concurrency 16 | value: "3" 17 | 18 | --- 19 | apiVersion: dapr.io/v1alpha1 20 | kind: Component 21 | metadata: 22 | name: openaipubsub-old 23 | spec: 24 | type: pubsub.azure.eventhubs 25 | version: v1 26 | metadata: 27 | # Either connectionString or eventHubNamespace is required 28 | # Use connectionString when *not* using Microsoft Entra ID 29 | - name: connectionString 30 | value: "" 31 | # Use eventHubNamespace when using Microsoft Entra ID 32 | # - name: eventHubNamespace 33 | # value: "namespace" 34 | - name: consumerID # Optional. If not supplied, the runtime will create one. 35 | value: "ai-sentry-logger" 36 | - name: enableEntityManagement 37 | value: "false" 38 | # The following four properties are needed only if enableEntityManagement is set to true 39 | # - name: resourceGroupName 40 | # value: "test-rg" 41 | # - name: subscriptionID 42 | # value: "value of Azure subscription ID" 43 | # - name: partitionCount 44 | # value: "1" 45 | # - name: messageRetentionInDays 46 | # value: "3" 47 | # Checkpoint store attributes 48 | - name: storageAccountName 49 | value: "" 50 | - name: storageAccountKey 51 | value: "" 52 | - name: storageContainerName 53 | value: "ai-sen-checkpoints" 54 | # Alternative to passing storageAccountKey 55 | # - name: storageConnectionString 56 | # value: "DefaultEndpointsProtocol=https;AccountName=;AccountKey=" 57 | 58 | 59 | -------------------------------------------------------------------------------- /infrastructure/APIM/ai-sentry-policy.xml: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 19 | 20 | 21 | @((string)context.Variables.GetValueOrDefault("ai-sentry-callling-user")) 22 | 23 | 24 | @(String.Format("{0} | {1}", context.Api.Name, context.Variables["correlation-id"])) 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | arian-test-caller 34 | 35 | 36 | COMPLETE 37 | 38 | 39 | pool1 40 | 41 | 42 | ["SampleApiRequestTransformer"] 43 | 44 | 45 | @((String)context.Variables["correlation-id"]) 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | * 57 | 58 | 59 | GET 60 | POST 61 | 62 | 63 |
api-key
64 |
content-type
65 |
66 |
67 |
68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 |
-------------------------------------------------------------------------------- /infrastructure/bicepconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "experimentalFeaturesEnabled": { 3 | "userDefinedTypes": true 4 | } 5 | } -------------------------------------------------------------------------------- /infrastructure/deploy.ps1: -------------------------------------------------------------------------------- 1 | param([Parameter(mandatory=$true, HelpMessage="Subscirption ID")] 2 | [string] 3 | $subscriptionId='eb51c20e-72d0-49ea-b7fc-2b0a23c73341', 4 | 5 | [Parameter(mandatory=$true, HelpMessage="Resource group name")] 6 | [string] 7 | $resourceGroupName='ai-sentry-rg', 8 | 9 | [Parameter(mandatory=$true, HelpMessage="Location of resource group")] 10 | [string] 11 | $location='australiaeast') 12 | 13 | $ProgressPreference = "SilentlyContinue" 14 | 15 | # Login to Azure 16 | #az login 17 | #Connect-AzAccount 18 | Write-Host "Setting context to subscriptionId: $($subscriptionId)..." 19 | az account set --subscription $subscriptionId 20 | 21 | Write-Host "Deploying AI Sentry resources..." 22 | 23 | # Check if the resource group exists 24 | $resourceGroupExists = az group exists --name $resourceGroupName 25 | 26 | if ($resourceGroupExists -eq 'false') { 27 | # Create the resource group 28 | az group create --name $resourceGroupName --location $location 29 | } 30 | 31 | $deploymentName = "deployment-" + (Get-Date -Format "yyyyMMddHHmmss") 32 | 33 | az deployment group create --resource-group $resourceGroupName --template-file ./main.bicep --parameters ./main.param.json -n $deploymentName 34 | 35 | Write-Host "Deployment complete." -------------------------------------------------------------------------------- /infrastructure/main.bicep: -------------------------------------------------------------------------------- 1 | // -- PARAMETERS -- 2 | @description('Required - Environment Type') 3 | @allowed([ 4 | 'dev' 5 | 'test' 6 | 'prod' 7 | ] ) 8 | param environmentType string = 'dev' 9 | 10 | @description('Required - tags') 11 | param tags object = { 12 | Environment: environmentType 13 | Role: 'ai-sentry' 14 | } 15 | 16 | @description('Required - AKS name') 17 | param aksName string 18 | 19 | param aksPrimaryAgentPoolProfile array = [ 20 | { 21 | count: 3 22 | mode: 'System' 23 | name: 'systempool' 24 | vmSize: 'Standard_DS2_v2' 25 | } 26 | ] 27 | 28 | @description('Required - Azure Container Registry Name') 29 | param containerRegistryName string 30 | 31 | @description('Required - Cosmos DB Name') 32 | param cosmosDbName string 33 | 34 | @description('Optional. List of Cosmos DB capabilities for the account. Some capabilities are not compatible with each other.') 35 | @allowed([ 36 | 'EnableCassandra' 37 | 'EnableTable' 38 | 'EnableGremlin' 39 | 'EnableMongo' 40 | 'DisableRateLimitingResponses' 41 | 'EnableServerless' 42 | ]) 43 | param capabilitiesToAdd string[] = [ 44 | 'EnableServerless' 45 | 'EnableGremlin' 46 | 'DisableRateLimitingResponses' 47 | ] 48 | 49 | @description('Required - Cosmos data locations') 50 | param cosmosDbLocations object[] = [ 51 | { 52 | failoverPriority: 0 53 | isZoneRedundant: false 54 | locationName: resourceGroup().location // where the data is located 55 | } 56 | ] 57 | 58 | @description('Required - Open AI Name') 59 | param openAiName string = 'ai-sentry-openai' 60 | 61 | @description('Required - Open AI Location') 62 | param openAiLocation string = 'australiaeast' 63 | 64 | @description('Required - Open AI SKU') 65 | param openAiSku string = 'S0' 66 | 67 | @description('Required - OpenAI Model Config') 68 | param modelDeploymentConfig array = [ 69 | { 70 | name: 'text-embedding-ada-002' 71 | version: '2' 72 | capacity: 1 73 | scaleType: 'Standard' 74 | raiPolicyName: '' 75 | } 76 | { 77 | name: 'gpt-35-turbo' 78 | version: '0613' 79 | capacity: 1 80 | scaleType: 'Standard' 81 | raiPolicyName: '' 82 | } 83 | ] 84 | 85 | @description('Required - API Management Name') 86 | param apimName string = 'ai-sentry-apim001' 87 | 88 | // @description('The name of the virtual network subnet to be associated with the API Management service.') 89 | // param apimSubnetName string = 'apim-subnet' 90 | 91 | // @description('The name of the public ip to be associated with the API Management service.') 92 | // param apimPublicIp string = 'apim-pip-001' 93 | 94 | @description('The name of the publisher email to be associated with the API Management service.') 95 | param apimPublisherEmail string = 'publisher@email.com' 96 | 97 | @description('The name of the publisher name to be associated with the API Management service.') 98 | param apimPublisherName string = 'APIM Publisher' 99 | 100 | @description('The sku to be associated with the API Management service.') 101 | param apimSku string = 'Developer' 102 | 103 | // @description('The desired Availability Zones for the API Management service. e.g. [1,2] for deployment in Availability Zones 1 and 2') 104 | // param apimZones []? 105 | 106 | // @description('The desired custom hostnames for the API Management service endpoints') 107 | // param apimHostnameConfigurations object[] = [] 108 | 109 | 110 | // -- RESOURCE CREATION ORCHESTRATOR -- 111 | 112 | // 1. Create Azure Kubernetes Managed Cluster with defaults via AVM module 113 | module managedCluster 'br/public:avm/res/container-service/managed-cluster:0.1.7' = { 114 | name: 'managedClusterDeployment' 115 | params: { 116 | // Required parameters 117 | name: aksName 118 | primaryAgentPoolProfile: aksPrimaryAgentPoolProfile 119 | // Non-required parameters 120 | location: resourceGroup().location 121 | enableContainerInsights: false 122 | managedIdentities: { 123 | systemAssigned: true 124 | } 125 | } 126 | } 127 | 128 | // 2. Create Container Registry resources 129 | module registry 'br/public:avm/res/container-registry/registry:0.2.0' = { 130 | name: '${uniqueString(deployment().name)}-containerRegistryCreation' 131 | params: { 132 | // Required parameters 133 | name: containerRegistryName 134 | // Non-required parameters 135 | publicNetworkAccess: 'Enabled' 136 | acrAdminUserEnabled: false 137 | zoneRedundancy: 'Disabled' 138 | managedIdentities: { 139 | systemAssigned: true 140 | } 141 | acrSku: 'Standard' 142 | location: resourceGroup().location 143 | tags: tags 144 | } 145 | } 146 | 147 | // 3. Create CosmosDB resources 148 | module databaseAccount 'br/public:avm/res/document-db/database-account:0.5.4' = { 149 | name: '${uniqueString(deployment().name)}-cosmosDbCreation' 150 | params: { 151 | // Required parameters 152 | locations: cosmosDbLocations 153 | name: cosmosDbName 154 | // Non-required parameters 155 | capabilitiesToAdd: capabilitiesToAdd 156 | managedIdentities: { 157 | systemAssigned: true 158 | } 159 | location: resourceGroup().location 160 | tags: tags 161 | } 162 | } 163 | 164 | // 3.1 Add CosmosDB Containers 165 | 166 | module cosmosDbFulllogsContainer 'br/public:avm/res/document-db/container:0.5.4' = { 167 | name: '${uniqueString(deployment().name)}-cosmosDbFullLogsContainerCreation' 168 | params: { 169 | // Required parameters 170 | databaseAccountName: databaseAccount.outputs.databaseAccountName 171 | databaseName: 'ai-sentry' 172 | containerName: 'request-logs' 173 | partitionKeyPath: '/LogId' 174 | throughput: 400 175 | // Non-required parameters 176 | indexingPolicy: { 177 | automatic: true 178 | includedPaths: [ 179 | { 180 | path: '/*' 181 | indexes: [ 182 | { 183 | kind: 'Range' 184 | dataType: 'Number' 185 | } 186 | { 187 | kind: 'Range' 188 | dataType: 'String' 189 | } 190 | ] 191 | } 192 | ] 193 | } 194 | tags: tags 195 | } 196 | } 197 | 198 | module cosmosDbSummaryContainer 'br/public:avm/res/document-db/container:0.5.4' = { 199 | name: '${uniqueString(deployment().name)}-cosmosDbContainersCreation' 200 | params: { 201 | // Required parameters 202 | databaseAccountName: databaseAccount.outputs.databaseAccountName 203 | databaseName: 'ai-sentry' 204 | containerName: 'summary-logs' 205 | partitionKeyPath: '/LogId' 206 | throughput: 400 207 | // Non-required parameters 208 | indexingPolicy: { 209 | automatic: true 210 | includedPaths: [ 211 | { 212 | path: '/*' 213 | indexes: [ 214 | { 215 | kind: 'Range' 216 | dataType: 'Number' 217 | } 218 | { 219 | kind: 'Range' 220 | dataType: 'String' 221 | } 222 | ] 223 | } 224 | ] 225 | } 226 | tags: tags 227 | } 228 | } 229 | 230 | // 4. Create OpenAI resources 231 | module openAI 'open-ai/main.bicep' = { 232 | name: '${uniqueString(deployment().name)}-openAICreation' 233 | params:{ 234 | // vnetName:vnetName 235 | // peSubnetName:peSubnetName 236 | name: openAiName 237 | location: openAiLocation 238 | sku: openAiSku 239 | managedIdentities: { 240 | systemAssigned: true 241 | } 242 | tags: tags 243 | publicNetworkAccess: 'Enabled' 244 | deploymentConfig: modelDeploymentConfig 245 | // workspaceId:logAnalytics.outputs.logAnalyticsWorkspaceId 246 | } 247 | } 248 | 249 | // 5. Create API Management Service resources 250 | module apimService 'br/public:avm/res/api-management/service:0.1.7' = { 251 | name: '${uniqueString(deployment().name)}-ApiManagementCreation' 252 | params: { 253 | // Required parameters 254 | name: apimName 255 | sku: apimSku 256 | // apimPublicIpName: apimPublicIp 257 | // virtualNetworkType: 'External' 258 | // subnetResourceId: '/subscriptions/${vnetSubscriptionId}/resourceGroups/${vnetResourceGroupName}/providers/Microsoft.Network/virtualNetworks/${vnetName}/subnets/${apimSubnetName}' 259 | publisherEmail: apimPublisherEmail 260 | publisherName: apimPublisherName 261 | managedIdentities: { 262 | systemAssigned: true 263 | } 264 | // Non-required parameters 265 | // hostnameConfigurations: apimHostnameConfigurations 266 | // zones: apimZones 267 | location: resourceGroup().location 268 | minApiVersion: '2021-08-01' 269 | tags: tags 270 | } 271 | } 272 | 273 | -------------------------------------------------------------------------------- /infrastructure/main.param.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "environmentType": { 6 | "value": "dev" 7 | }, 8 | "tags": { 9 | "value": { 10 | "Environment": "dev", 11 | "Role": "ai-sentry" 12 | } 13 | }, 14 | "aksName": { 15 | "value": "aisentryaks" 16 | }, 17 | "aksPrimaryAgentPoolProfile": { 18 | "value": [ 19 | { 20 | "count": 1, 21 | "mode": "System", 22 | "name": "systempool", 23 | "vmSize": "Standard_B2s" 24 | } 25 | ] 26 | }, 27 | "containerRegistryName": { 28 | "value": "aisentryacr001" 29 | }, 30 | "cosmosDbName": { 31 | "value": "aisentrycosmodb001" 32 | }, 33 | "capabilitiesToAdd": { 34 | "value": [ 35 | "EnableServerless", 36 | "EnableGremlin", 37 | "DisableRateLimitingResponses" 38 | ] 39 | }, 40 | "cosmosDbLocations": { 41 | "value": [ 42 | { 43 | "failoverPriority": 0, 44 | "isZoneRedundant": false, 45 | "locationName": "australiaeast" 46 | } 47 | ] 48 | }, 49 | "openAiName": { 50 | "value": "aisentryopenai" 51 | }, 52 | "openAiLocation": { 53 | "value": "australiaeast" 54 | }, 55 | "openAiSku": { 56 | "value": "S0" 57 | }, 58 | "modelDeploymentConfig": { 59 | "value": [ 60 | { 61 | "name": "text-embedding-ada-002", 62 | "version": "2", 63 | "sku": "Standard", 64 | "capacity": 1, 65 | "raiPolicyName": "" 66 | }, 67 | { 68 | "name": "gpt-35-turbo", 69 | "version": "0613", 70 | "sku": "Standard", 71 | "capacity": 1, 72 | "raiPolicyName": "" 73 | } 74 | ] 75 | }, 76 | "apimName": { 77 | "value": "aisentryapim001" 78 | }, 79 | "apimPublisherEmail": { 80 | "value": "publisher@email.com" 81 | }, 82 | "apimPublisherName": { 83 | "value": "APIM Publisher" 84 | }, 85 | "apimSku": { 86 | "value": "Developer" 87 | } 88 | } 89 | } -------------------------------------------------------------------------------- /infrastructure/open-ai/main.bicep: -------------------------------------------------------------------------------- 1 | // Parameters 2 | 3 | @description('Specifies the name of the vnet to injeciton private endpoint to') 4 | param vnetName string? 5 | 6 | @description('private endpoint for openAI isntances') 7 | param peSubnetName string? 8 | 9 | param name string 10 | param location string 11 | param sku string 12 | param customSubDomainName string? 13 | 14 | @description('Optional. The managed identity definition for this resource.') 15 | param managedIdentities managedIdentitiesType 16 | 17 | @description('Specifies the openAI model deployment config required') 18 | param deploymentConfig array 19 | 20 | 21 | @description('Specifies the resource tags.') 22 | param tags object 23 | 24 | 25 | @description('Specifies whether or not public endpoint access is allowed for this account..') 26 | @allowed([ 27 | 'Enabled' 28 | 'Disabled' 29 | ]) 30 | param publicNetworkAccess string = 'Enabled' 31 | 32 | 33 | @description('Specifies the workspace id of the Log Analytics used to monitor the Application Gateway.') 34 | param workspaceId string? 35 | 36 | // Variables 37 | var diagnosticSettingsName = 'diagnosticSettings' 38 | var openAiLogCategories = [ 39 | 'Audit' 40 | 'RequestResponse' 41 | 'Trace' 42 | ] 43 | var openAiMetricCategories = [ 44 | 'AllMetrics' 45 | ] 46 | var openAiLogs = [for category in openAiLogCategories: { 47 | category: category 48 | enabled: true 49 | }] 50 | var openAiMetrics = [for category in openAiMetricCategories: { 51 | category: category 52 | enabled: true 53 | }] 54 | 55 | var formattedUserAssignedIdentities = reduce( 56 | map((managedIdentities.?userAssignedResourceIds ?? []), (id) => { '${id}': {} }), 57 | {}, 58 | (cur, next) => union(cur, next) 59 | ) // Converts the flat array to an object like { '${id1}': {}, '${id2}': {} } 60 | 61 | var identity = !empty(managedIdentities) 62 | ? { 63 | type: (managedIdentities.?systemAssigned ?? false) 64 | ? (!empty(managedIdentities.?userAssignedResourceIds ?? {}) ? 'SystemAssigned,UserAssigned' : 'SystemAssigned') 65 | : (!empty(managedIdentities.?userAssignedResourceIds ?? {}) ? 'UserAssigned' : null) 66 | userAssignedIdentities: !empty(formattedUserAssignedIdentities) ? formattedUserAssignedIdentities : null 67 | } 68 | : null 69 | 70 | // Resources 71 | 72 | resource openAiDeployment 'Microsoft.CognitiveServices/accounts@2023-05-01' = { 73 | name: name 74 | location: location 75 | sku: { 76 | name: sku 77 | } 78 | kind: 'OpenAI' 79 | identity: identity 80 | tags: tags 81 | properties: { 82 | customSubDomainName: customSubDomainName 83 | publicNetworkAccess: publicNetworkAccess 84 | } 85 | } 86 | 87 | // module privateendpoints '../core/network/privateendpoint/main.bicep' = [for (openAi, i) in openAIConfig: { 88 | // name: 'openAI-privateendpoint-${i}' 89 | // params:{ 90 | // privateEndpointName:'openAi-${openAi.name}-pe' 91 | // location:resourceGroup().location 92 | // subnetName:peSubnetName 93 | // vnetName:vnetName 94 | // privateLinkServiceId:openAiDeployment[i].id 95 | // groupIds:['account'] 96 | // } 97 | // dependsOn:[ 98 | // openAiDeployment 99 | // ] 100 | // }] 101 | 102 | // What we want to do is iterate through all the models 103 | // And deploy 104 | // modelDeployments is an array of objects that contain the model name and version 105 | @batchSize(1) 106 | resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for deployment in deploymentConfig: { 107 | parent: openAiDeployment 108 | sku: { 109 | capacity: deployment.capacity 110 | name: deployment.sku 111 | } 112 | name: deployment.name 113 | properties: { 114 | model: { 115 | format: 'OpenAI' 116 | name: deployment.name 117 | version: deployment.version 118 | } 119 | raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null 120 | } 121 | }] 122 | 123 | // @batchSize(1) 124 | // resource model 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for (openAi, i) in openAIConfig: { 125 | // name: openAi.Name 126 | // parent: openAiDeployment[i] 127 | // sku: { 128 | // name:'Standard' 129 | // capacity: openAi.ModelDeployments[i].capacity 130 | // } 131 | // properties: { 132 | // model: { 133 | // format: 'OpenAI' 134 | // name: openAi.ModelDeployments[i].name 135 | // version: openAi.ModelDeployments[i].version 136 | // } 137 | // //raiPolicyName: openAi.ModelDeployments.raiPolicyName 138 | // } 139 | // }] 140 | 141 | 142 | // resource openAiDiagnosticSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = [for (openAi, i) in openAIConfig: { 143 | // name: diagnosticSettingsName 144 | // scope: openAiDeployment[i] 145 | // properties: { 146 | // workspaceId: workspaceId 147 | // logs: openAiLogs 148 | // metrics: openAiMetrics 149 | // } 150 | // }] 151 | 152 | // Outputs 153 | output deployedopenAIAccount object = { 154 | openAIName: name 155 | } 156 | 157 | 158 | // =============== // 159 | // Definitions // 160 | // =============== // 161 | 162 | type managedIdentitiesType = { 163 | @description('Optional. Enables system assigned managed identity on the resource.') 164 | systemAssigned: bool? 165 | 166 | @description('Optional. The resource ID(s) to assign to the resource.') 167 | userAssignedResourceIds: string[]? 168 | }? 169 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/requirements.txt -------------------------------------------------------------------------------- /scripts/create-escaped-json.ps1: -------------------------------------------------------------------------------- 1 | param ( 2 | [Parameter(Mandatory=$false)] 3 | [string]$jsonFilePath 4 | ) 5 | 6 | # Read the JSON file 7 | $json = Get-Content -Raw -Path $jsonFilePath | ConvertFrom-Json -Depth 99 8 | 9 | # Convert the JSON object to a string 10 | $jsonString = ConvertTo-Json -InputObject $json -Compress -Depth 99 11 | $escapedJsonString = $jsonString | ConvertTo-Json -Depth 99 12 | 13 | # Print the escaped JSON string 14 | Write-Output $escapedJsonString -------------------------------------------------------------------------------- /scripts/setup-env.ps1: -------------------------------------------------------------------------------- 1 | $envFile = Get-Content -Path .\..\.env 2 | 3 | # Loop through each line in the file 4 | foreach ($line in $envFile) { 5 | # Split the line into name and value 6 | $parts = $line -split '=', 2 7 | 8 | # Set the environment variable 9 | [Environment]::SetEnvironmentVariable($parts[0], $parts[1], 'User') 10 | } -------------------------------------------------------------------------------- /scripts/setup-env.sh: -------------------------------------------------------------------------------- 1 | source ..\.env -------------------------------------------------------------------------------- /tests/http/.env.sample: -------------------------------------------------------------------------------- 1 | AOAI_ENDPOINT=___.openai.azure.com 2 | AOAI_API_KEY= 3 | BEARER_TOKEN= -------------------------------------------------------------------------------- /tests/http/adapter_test-ai-sentry.http: -------------------------------------------------------------------------------- 1 | POST http://20.167.97.191/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-15-preview HTTP/1.1 2 | Ai-Sentry-Consumer: Product-car-review 3 | Ai-Sentry-Log-Level: COMPLETE 4 | Ai-Sentry-Backend-Pool: pool1 5 | Ai-Sentry-Adapters: [] 6 | ArianWasHere: aaaa 7 | Content-Type: application/json 8 | 9 | { 10 | "messages": [ 11 | { 12 | "role": "system", 13 | "content": "you are a car reviewer studying japenese cars" 14 | }, 15 | { 16 | "role":"user", 17 | "content":"Write a 3 sentence review on honda integra type r DC2" 18 | } 19 | ], 20 | "stream":true, 21 | "max_tokens": 800, 22 | "temperature": 0.7, 23 | "frequency_penalty": 0, 24 | "presence_penalty": 0, 25 | "top_p": 0.95, 26 | "stop": null 27 | } -------------------------------------------------------------------------------- /tests/http/adapter_test.http: -------------------------------------------------------------------------------- 1 | POST http://4.147.64.25/openai/deployments/gpt-4o/chat/completions?api-version=2023-07-01-preview HTTP/1.1 2 | ai-sentry-consumer: Product-car-review 3 | ai-sentry-log-level: PII_STRIPPING_ENABLED 4 | ai-sentry-backend-pool: pool1 5 | ai-sentry-adapters: ["SampleApiRequestTransformer"] 6 | Content-Type: application/json 7 | 8 | { 9 | "messages": [ 10 | { 11 | "role": "system", 12 | "content": "you are a car reviewer studying japenese cars" 13 | }, 14 | { 15 | "role":"user", 16 | "content":"Write a review on toyota yaris gr" 17 | } 18 | ], 19 | "stream":true, 20 | "max_tokens": 800, 21 | "temperature": 0.7, 22 | "frequency_penalty": 0, 23 | "presence_penalty": 0, 24 | "top_p": 0.95, 25 | "stop": null 26 | } -------------------------------------------------------------------------------- /tests/http/get-thread.http: -------------------------------------------------------------------------------- 1 | GET http://20.167.97.191/openai/threads/thread_58vrUKZ2LYSoGdVYk6WfXzxQ?api-version=2024-05-01-preview 2 | Ai-Sentry-Consumer: test-assistant 3 | Ai-Sentry-Log-Level: DISABLED 4 | Ai-Sentry-Backend-Pool: pool1 5 | Ai-Sentry-Adapters: [] -------------------------------------------------------------------------------- /tests/http/list_assistants.http: -------------------------------------------------------------------------------- 1 | POST http://20.167.97.191/openai/assistants?api-version=2024-05-01-preview 2 | Content-Type: application/json 3 | Ai-Sentry-Consumer: Product-car-review 4 | Ai-Sentry-Log-Level: PII_STRIPPING_ENABLED 5 | Ai-Sentry-Backend-Pool: pool1 6 | Ai-Sentry-Adapters: [] 7 | 8 | { 9 | 10 | "instructions": "You are an AI assistant that can write code to help answer math questions.", 11 | "tools": [ 12 | { "type": "code_interpreter" } 13 | ], 14 | "model": "gpt-4o" 15 | } -------------------------------------------------------------------------------- /tests/http/non_streaming_embedding.http: -------------------------------------------------------------------------------- 1 | @resource_name = 20.167.97.191 2 | @deployment_name = text-embedding-ada-002 3 | @api_key = {{$dotenv AOAI_API_KEY}}ct 4 | 5 | ###‰ 6 | POST http://{{resource_name}}/openai/deployments/{{deployment_name}}/embeddings?api-version=2024-06-01 HTTP/1.1 7 | Content-Type: application/json 8 | #api-key: {{api_key}} 9 | ai-sentry-backend-pool:pool1 10 | ai-sentry-consumer:embedding-automated-test1 11 | ai-sentry-log-level:PII_STRIPPING_ENABLED 12 | ai-sentry-adapters: [] 13 | 14 | { 15 | "input": "Sample Document goes here" 16 | } -------------------------------------------------------------------------------- /tests/loadTests/embeddings/locust.py: -------------------------------------------------------------------------------- 1 | from locust import HttpUser, task, between 2 | 3 | class EmbeddingTestUser(HttpUser): 4 | wait_time = between(1, 5) # Simulated users will wait 1-5 seconds between tasks 5 | 6 | @task 7 | def post_embedding(self): 8 | headers = { 9 | "Content-Type": "application/json", 10 | "api-key": "your_api_key_here", # Replace with your actual API key 11 | "ai-sentry-backend-pool": "pool1", 12 | "ai-sentry-consumer": "embedding-automated-test1", 13 | "ai-sentry-log-level": "PII_STRIPPING_ENABLED", 14 | "ai-sentry-adapters": "[]" 15 | } 16 | payload = { 17 | "input": "Sample Document goes here" 18 | } 19 | self.client.post("/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-06-01", json=payload, headers=headers) 20 | 21 | # Note: Ensure you replace "your_api_key_here" with the actual API key. 22 | # You might also need to adjust the host in the Locust command-line or within the script if it's dynamic. -------------------------------------------------------------------------------- /tests/loadTests/non-streaming/locustfile.py: -------------------------------------------------------------------------------- 1 | from locust import HttpUser, task, between 2 | import json, os 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv(".env", override=True) 6 | 7 | azure_endpoint = os.getenv("azure_endpoint", "http://20.11.111.242/"), 8 | api_key=os.getenv("api_key", "defaultKey"), 9 | api_version=os.getenv("api_version", "2023-07-01-preview"), 10 | aoai_deployment_name = os.getenv("aoai_deployment_name", "chat"), 11 | ai_sentry_consumer = os.getenv("ai-sentry-consumer", "locustloadtest"), 12 | ai_sentry_backend_pool = os.getenv("ai-sentry-backend-pool", "pool1"), 13 | ai_sentry_log_level = os.getenv("ai-sentry-log-level", "COMPLETE") 14 | 15 | # Non-Streaming Load Test 16 | class OpenAIUser(HttpUser): 17 | host = azure_endpoint 18 | wait_time = between(1, 2.5) 19 | 20 | headers = { 21 | "Content-Type": "application/json", 22 | "ai-sentry-consumer": "locustloadtest", 23 | "ai-sentry-backend-pool":"pool1", 24 | "ai-sentry-adapters":"[\"SampleApiRequestTransformer\"]", 25 | "ai-sentry-log-level": ai_sentry_log_level, 26 | "api-key": "\"{}\"".format(api_key) 27 | 28 | } 29 | 30 | body = { 31 | "messages": [ 32 | { 33 | "role": "system", 34 | "content": "You are an AI assistant that helps people find information." 35 | }, 36 | { 37 | "role": "user", 38 | "content": "What is Microsoft's most profitable business?" 39 | } 40 | ], 41 | "max_tokens": 800, 42 | "temperature": 0.7, 43 | "frequency_penalty": 0, 44 | "presence_penalty": 0, 45 | "top_p": 0.95, 46 | "stop": None 47 | } 48 | 49 | @task 50 | def post_openai(self): 51 | self.client.post(f"openai/deployments/chat/chat/completions?api-version=2024-02-15-preview", data=json.dumps(self.body), headers=self.headers) -------------------------------------------------------------------------------- /tests/loadTests/streaming/locustfile.py: -------------------------------------------------------------------------------- 1 | from locust import HttpUser, task, between 2 | import json, os 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv(".env", override=True) 6 | 7 | azure_endpoint = os.getenv("azure_endpoint", "http://4.195.10.69/"), 8 | #api_key=os.getenv("api_key", "defaultkey"), 9 | api_version=os.getenv("api_version", "2023-07-01-preview"), 10 | aoai_deployment_name = os.getenv("aoai_deployment_name", "gpt4o") 11 | 12 | # Non-Streaming Load Test 13 | class OpenAIUser(HttpUser): 14 | host = azure_endpoint 15 | wait_time = between(1, 2.5) 16 | 17 | headers = { 18 | "Content-Type": "application/json", 19 | "ai-sentry-consumer": "locustloadtest", 20 | "ai-sentry-log-level": "PII_STRIPPING_ENABLED", 21 | "ai-sentry-backend-pool": "pool1", 22 | "ai-sentry-adapters": "[]" 23 | #"api-key": "\"{}\"".format(api_key) 24 | } 25 | 26 | body = { 27 | "messages": [ 28 | { 29 | "role": "system", 30 | "content": "You are an AI assistant that helps people find information." 31 | }, 32 | { 33 | "role": "user", 34 | "content": "What does Microsoft do, and what is its most profitiable business division?" 35 | } 36 | ], 37 | "stream": True, 38 | "max_tokens": 800, 39 | "temperature": 0.7, 40 | "frequency_penalty": 0, 41 | "presence_penalty": 0, 42 | "top_p": 0.95, 43 | "stop": None 44 | } 45 | 46 | @task 47 | def post_openai(self): 48 | self.client.post(f"/openai/deployments/{aoai_deployment_name}/chat/completions?api-version=2024-02-15-preview", data=json.dumps(self.body), headers=self.headers) -------------------------------------------------------------------------------- /tests/sdk/embedding.py: -------------------------------------------------------------------------------- 1 | import os 2 | from openai import AzureOpenAI 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv(".env", override=True) 6 | 7 | client = AzureOpenAI( 8 | api_key = os.getenv("api_key"), 9 | api_version = "2024-02-01", 10 | azure_endpoint = os.getenv("azure_endpoint", "http://4.147.128.191/openai"), 11 | ) 12 | 13 | response = client.embeddings.create( 14 | input = "Your text string goes here", 15 | model= "text-embedding-ada-002" 16 | ) 17 | 18 | print(response.model_dump_json(indent=2)) -------------------------------------------------------------------------------- /tests/sdk/stream_completion.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import asyncio 4 | from openai import AsyncAzureOpenAI 5 | from dotenv import load_dotenv 6 | from timeit import default_timer 7 | 8 | load_dotenv(".env", override=True) 9 | 10 | azure_openai_client = AsyncAzureOpenAI( 11 | azure_endpoint = os.getenv("azure_endpoint", "http://localhost:6124/"), 12 | api_key=os.getenv("api_key", "yourapikey"), 13 | api_version=os.getenv("api_version", "2023-07-01-preview") 14 | ) 15 | 16 | 17 | async def get_response(message): 18 | start = default_timer() 19 | end_first_chunk = None 20 | end = None 21 | response = await azure_openai_client.chat.completions.create( 22 | model = os.getenv("aoai_deployment", "chat"), 23 | temperature = 0.4, 24 | messages = [ 25 | {"role": "user", "content": message} 26 | ], 27 | stream=True 28 | ) 29 | #print(response.model_dump_json(indent=2)) - > not response 30 | 31 | async for chunk in response: 32 | if not end_first_chunk: 33 | end_first_chunk = default_timer() 34 | print(chunk .model_dump_json(indent=2)) 35 | 36 | end = default_timer() 37 | 38 | print(f"Elapse: {end-start}, First Chunk: {end_first_chunk-start}, Last Chunk: {end-end_first_chunk}") 39 | 40 | def main(): 41 | print("Azure OpenAI SDK Stream Completion Test") 42 | asyncio.run(get_response('What does Microsoft do?')) 43 | 44 | 45 | if __name__ == "__main__": 46 | main() --------------------------------------------------------------------------------