├── .gitignore
├── .vscode
    ├── launch.json
    └── tasks.json
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
├── SUPPORT.md
├── aisentry
    ├── adapters
    │   ├── Api_Request_Transformer.py
    │   ├── SampleAdapter
    │   │   └── sample_api_request_transformer.py
    │   └── adapters.py
    ├── asgi_facade.py
    ├── asgi_summary_worker.py
    ├── asgi_worker.py
    ├── dapr.yaml
    ├── facade
    │   ├── __init__.py
    │   ├── app.py
    │   └── req.txt
    ├── requirements.txt
    ├── utils
    │   ├── __init__.py
    │   ├── ai_sentry_helpers.py
    │   ├── analyze_pii.py
    │   ├── analyze_pii_chunked_ta.py
    │   ├── analyze_pii_openai.py
    │   ├── aoai_streaming_response.py
    │   ├── approaches
    │   │   ├── approach.py
    │   │   ├── headerselector.py
    │   │   └── randomallocation.py
    │   ├── auth_helpers.py
    │   ├── combined-cert.crt
    │   └── combined_cert.pem
    └── worker
    │   ├── cosmos_logger
    │       ├── __init__.py
    │       └── cosmos_logger.py
    │   ├── loganalytics_logger
    │       ├── __init__.py
    │       └── loganalytics_logger.py
    │   ├── requirements.txt
    │   └── usage_summary
    │       ├── __init__.py
    │       ├── requirements.txt
    │       └── usage_logger.py
├── build
    ├── Dockerfile.facade
    ├── Dockerfile.worker
    ├── DockerfileSummary.worker
    └── build-ai-sentry-containers.ps1
├── content
    ├── documentation
    │   ├── ACADeployment.md
    │   ├── AI-Sentry-config-settings.md
    │   ├── AKSDeployment.md
    │   ├── AzureInfrastrcuture.md
    │   ├── ComsosDB-LoggingSchema.md
    │   ├── CosmosDBSetup.md
    │   ├── LocalDebugging.md
    │   ├── LocalDeployment.md
    │   ├── SummaryLog-schema.md
    │   ├── Workload-identity-config.md
    │   └── ai-sentry-config.json
    └── images
    │   ├── AI-Sentry-AKS-view.drawio.png
    │   ├── AI-Sentry-HighLevel.drawio.png
    │   ├── AI-Sentry-features.png
    │   ├── AI-Sentry.drawio
    │   ├── cosmosdb_request_logs.png
    │   ├── cosmosdb_summary_logs.png
    │   ├── openai_rbac.png
    │   └── openai_rbac2.png
├── deploy
    ├── aks
    │   ├── ai-sentry-deployment.yaml
    │   └── namespace.yaml
    └── local
    │   └── components
    │       ├── cosmosdb.yaml
    │       └── pubsub.yaml
├── infrastructure
    ├── APIM
    │   └── ai-sentry-policy.xml
    ├── bicepconfig.json
    ├── deploy.ps1
    ├── main.bicep
    ├── main.param.json
    └── open-ai
    │   └── main.bicep
├── requirements.txt
├── scripts
    ├── create-escaped-json.ps1
    ├── setup-env.ps1
    └── setup-env.sh
└── tests
    ├── http
        ├── .env.sample
        ├── adapter_test-ai-sentry.http
        ├── adapter_test-apim.http
        ├── adapter_test.http
        ├── get-thread.http
        ├── list_assistants.http
        └── non_streaming_embedding.http
    ├── loadTests
        ├── embeddings
        │   └── locust.py
        ├── non-streaming
        │   └── locustfile.py
        └── streaming
        │   └── locustfile.py
    └── sdk
        ├── embedding.py
        └── stream_completion.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | 
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | #dapr logs
163 | .log
164 | .DS_Store
165 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |           {
 8 |             "type": "debugpy",
 9 |             "request": "launch",
10 |             "name": "AI-SENTRY Facade",
11 |             "program": "${workspaceFolder}/aisentry/asgi_facade.py",
12 |             "console": "integratedTerminal",
13 |             "preLaunchTask": "facadeapp-up",
14 |             "postDebugTask": "facadeapp-down"
15 |           },
16 |           {
17 |             "type": "debugpy",
18 |             "request": "launch",
19 |             "name": "comsosDB logger with Dapr",
20 |             "program": "${workspaceFolder}/aisentry/worker/cosmos_logger.py",
21 |             "console": "integratedTerminal",
22 |             "preLaunchTask": "logger-up",
23 |             "postDebugTask": "logger-down"
24 |           }
25 |     ]
26 | }


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "2.0.0",
 3 |     "tasks": [
 4 |         {
 5 |             "label": "facadeapp-up",
 6 |             "type": "daprd",
 7 |             "appId": "facadeapp",
 8 |             "metricsPort": 9090,
 9 |             "enableHealthCheck": false
10 |         },
11 |         {
12 |             "label": "facadeapp-down",
13 |             "type": "daprd-down",
14 |             "appId": "facadeapp"
15 |         },
16 |         {
17 |             "label": "logger-up",
18 |             "type": "daprd",
19 |             "appId": "aoai-cosmosdb-logging-processor",
20 |             "metricsPort": 9091
21 |         },
22 |         {
23 |             "label": "logger-down",
24 |             "type": "daprd-down",
25 |             "appId": "aoai-cosmosdb-logging-processor"
26 |         }
27 |    ]
28 | }
29 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to contribute
 2 | 
 3 | If you would like to contribute to the solution solution accelerator repository there are many ways you can help.
 4 | 
 5 | ## Reporting issues
 6 | 
 7 | We use [GitHub issues](https://github.com/microsoft/ai-sentry/issues) as an issue tracker for the repository. Firstly, please search in open issues and try to make sure your problem doesn't exist. If there is an issue, add your comments to this issue.
 8 | If there are no issues yet, please open a new one.
 9 | 
10 | ## Contributing Code
11 | 
12 | If you would like to contribute an improvement or a fix please create a Pull Request using the steps below. Ensure to describe the feature or bug as part of the pull request.
13 | 
14 | ## Sending a Pull Request
15 | 
16 | Before submitting a pull request please make sure the following is done:
17 | 
18 | 1. Fork [the repository](https://github.com/microsoft/aicentral)
19 | 2. Create a branch from the `main`
20 | 3. Ensure that the code build and runs without any errors
21 | 4. If required update the README.md
22 | 5. Complete the [CLA](#contributor-license-agreement-cla)
23 | 
24 | ### Code of Conduct
25 | 
26 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
27 | 
28 | ### Contributor License Agreement (CLA)
29 | 
30 | You will need to complete a Contributor License Agreement (CLA). Briefly, this agreement testifies that you are granting us permission to use the submitted change according to the terms of the project's license, and that the work being submitted is under appropriate copyright.
31 | 
32 | Please submit a Contributor License Agreement (CLA) before submitting a pull request. You may visit [https://cla.microsoft.com](https://cla.microsoft.com) to sign digitally. Alternatively, download the agreement ([Microsoft Contribution License Agreement.docx](https://www.codeplex.com/Download?ProjectName=typescript&DownloadId=822190)), sign, scan, and email it back to <cla@microsoft.com>. Be sure to include your github user name along with the agreement. Once we have received the signed CLA, we'll review the request.
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AI-Sentry Facade
 2 | 
 3 | 
 4 | ![AI-Sentry-features image](/content/images/AI-Sentry-features.png)
 5 | 
 6 | *Ai-Sentry* is transparent python + DAPR based pluggable Generative AI Facade layer, designed to support the following features for large enterprises developing and operating Generative AI solutions:
 7 | 
 8 | - Cross charge back on token usage across different openAI consumers
 9 | - Request/Response async based logging with ability to toggle PII stripping of information. This level of logging is useful for many things such as legal compliance as well as assessing and replaying back request/responses against newer models to help you deal with model upgrades without affecting your existing users.
10 | - Smarter load balancing by taking into account Azure openAI's response header load metrics and pooling of multi backends with same model capabilities
11 | - Support streaming and non streaming responses (including logging of these)
12 | - Extensibility of custom adapters to help you deal with SDK / API deprecations from client side - so you can provide backwards compatibility if needed.
13 | 
14 | 
15 | AI-Sentry is not designed to replace existing API Gateway solutions such as Azure APIM - rather it is designed to sit between API Gateway and the openAI endpoints - providing ultimate control for your openAI solutions.
16 | 
17 | We try to perform heavy processing outside of the direct HTTP calls pipeline to minimise latency to the consumers and rely on DAPR side cars and Pub / Sub patterns to perform the work asynchronously.
18 | 
19 | Because AI-Sentry uses DAPR; the technology choices for log persistence, and message brokers is swappable out via DAPR's native [components](https://docs.dapr.io/concepts/components-concept/). Our example uses REDIS and Event Hubs as the message broker for PUB/SUB, and CosmosDB as the Log persistence store.
20 | 
21 | ## High Level Design
22 | 
23 | ![ISentryHighLevel image](/content/images/AI-Sentry-HighLevel.drawio.png)
24 | 
25 | 
26 | 
27 | ## Backend Configuration
28 | 
29 | The following environment variables need to exist. How you feed them in is up to you - i.e. Kubernetes secrets, configmaps, etc...
30 | 
31 | | Name | Value | Component |
32 | | -------- | -------- | -------- |
33 | |  AI-SENTRY-ENDPOINT-CONFIG  | Example JSON value is located [here](/content/documentation/ai-sentry-config.json). This is used to map openai endpoints / deployments - so that when we are load balancing we are hitting group of same openAI models from the pool.  Make sure to include /openai in your endpoint url configuration. You can leverage the following [script](scripts/create-escaped-json.ps1) to help you generate JSON escaped string of this JSON.|Facade App |
34 | |AI-SENTRY-LANGUAGE-KEY| your Congnitive Services General API Key| CosmosDB Worker |
35 | |AI-SENTRY-LANGUAGE-ENDPOINT| your language text anlaytics or general service endpoint url| CosmosDB Worker |
36 | 
37 | 
38 | ## Consumer Configuration
39 | 
40 | Whatever you front AI-Sentry with e.g. Azure APIM, some other API gateway technology - you will need to supply some mandatory HTTP headers.
41 | 
42 | |HTTP HEADER NAME| HTTP HEADER VALUE|
43 | | -------- | --------|
44 | |ai-sentry-consumer| this can be any string - it is used to represent a consumer or a product that uses generative ai backend. We use this for logging purposes|
45 | | ai-sentry-log-level | This toggles logging level for the actual consumer. Accepted values are: COMPLETE, PII_STRIPPING_ENABLED or DISABLED |
46 | |ai-sentry-backend-pool| Provide the name of the pool from the AI-SENTRY-ENDPOINT-CONFIG configuration. E.g. Pool1|
47 | |ai-sentry-adapters| Provide list of adapter names you want to run through prior to sending out the request to openai endpoint. Example: ```["SampleApiRequestTransformer","adapter2..."]```
48 | 
49 | ## Getting started
50 | 
51 | For more information on setting up AI-Sentry in your environment please follow the following detailed sections.
52 | 
53 | - [Setting up CosmosDB dbs/table](/content/documentation/CosmosDBSetup.md)
54 | 
55 | - [Setting up AI-Sentry on AKS](/content/documentation/AKSDeployment.md)
56 | 
57 | - [CosmosDB Logging Schema](/content/documentation/ComsosDB-LoggingSchema.md)
58 | 
59 | - [Summary Logging Schema](/content/documentation/SummaryLog-schema.md)
60 | 
61 | - [Setting up workload identity - if you want to auth to openai backends via JWT instead of api keys](/content/documentation/Workload-identity-config.md)
62 | 
63 | 
64 | ## Looking for dotnet based solution?
65 | 
66 | Thankfully our colleauge Graeme Foster has published a dotnet version with similar feature sets. Please go and check it out: https://github.com/microsoft/aicentral
67 | 
68 | ## Contributing
69 | 
70 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
71 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
72 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
73 | 
74 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
75 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
76 | provided by the bot. You will only need to do this once across all repos using our CLA.
77 | 
78 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
79 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
80 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
81 | 
82 | ## Trademarks
83 | 
84 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 
85 | trademarks or logos is subject to and must follow 
86 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
87 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
88 | Any use of third-party trademarks or logos are subject to those third-party's policies.
89 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # Support
 2 | 
 3 | ## How to file issues and get help
 4 | 
 5 | We use [GitHub issues](https://github.com/microsoft/ai-sentry/issues) as an issue tracker for the repository. Firstly, please search in open issues and try to make sure your problem doesn't exist. If there is an issue, add your comments to this issue.
 6 | If there are no issues yet, please open a new one.
 7 | 
 8 | ## Microsoft Support Policy
 9 | 
10 | Support for AICentral is limited to the resources listed above.
11 | 


--------------------------------------------------------------------------------
/aisentry/adapters/Api_Request_Transformer.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | class ApiRequestTransformer(ABC):
 4 |     @abstractmethod
 5 |     def transform_path(self, path):
 6 |         pass
 7 | 
 8 |     @abstractmethod
 9 |     def transform_method(self, method):
10 |         pass
11 | 
12 |     @abstractmethod
13 |     def transform_body(self, body):
14 |         pass
15 | 
16 |     @abstractmethod
17 |     def transform_query_string(self, query_string):
18 |         pass
19 | 
20 |     @abstractmethod
21 |     def transform_headers(self, headers):
22 |         pass
23 | 
24 |     # @abstractmethod
25 |     # def perform_transformations(self, path, method, body, query_string, headers):
26 |     #     pass


--------------------------------------------------------------------------------
/aisentry/adapters/SampleAdapter/sample_api_request_transformer.py:
--------------------------------------------------------------------------------
 1 | from adapters.Api_Request_Transformer import ApiRequestTransformer
 2 | import httpx
 3 | from typing import Optional
 4 | 
 5 | class SampleApiRequestTransformer(ApiRequestTransformer):
 6 |     def __init__(self, request: httpx.Request):
 7 |         self.request = request
 8 | 
 9 |     def transform_body(self, body: Optional[str] = None):
10 |         # Implement your transformation logic here
11 |         return body
12 | 
13 |     def transform_query_string(self, query_string: Optional[str] = None):
14 |         # Implement your transformation logic here
15 |         return query_string
16 | 
17 |     def transform_headers(self, headers: Optional[dict] = None):
18 |         # Implement your transformation logic here
19 |         if headers is None:
20 |             headers = {}
21 |         headers['Sample-Api-Request-Header'] = 'SAMPLE VALUE'
22 |         return headers
23 | 
24 |     def transform_method(self, method: Optional[str] = None):
25 |         # Implement your transformation logic here
26 |         return method
27 | 
28 |     def transform_path(self, path: Optional[str] = None):
29 |         # Implement your transformation logic here
30 |         return path


--------------------------------------------------------------------------------
/aisentry/adapters/adapters.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from httpx import Request
 3 | from adapters.SampleAdapter.sample_api_request_transformer import SampleApiRequestTransformer
 4 | 
 5 | 
 6 | class AdapterEnum(Enum):
 7 |     SampleApiRequestTransformer = 'SampleApiRequestTransformer'
 8 |     Adapter2 = 'Adapter2'
 9 |     Adapter3 = 'Adapter3'
10 | 
11 | def return_adapter(request: Request, adapter):
12 |     adapter_enum = adapter
13 |     if adapter_enum == AdapterEnum.SampleApiRequestTransformer.value:
14 |         return SampleApiRequestTransformer(request)
15 |     # elif adapter_enum == AdapterEnum.Adapter2.value:
16 |     #     return Adapter2()
17 |     # elif adapter_enum == AdapterEnum.Adapter3.value:
18 |     #     return Adapter3()
19 |     else:
20 |         raise ValueError(f"Invalid adapter enum: {adapter_enum}")


--------------------------------------------------------------------------------
/aisentry/asgi_facade.py:
--------------------------------------------------------------------------------
1 | from facade import app
2 | 
3 | if __name__ == "__main__":
4 |     #app.run()
5 |     app.run(host='0.0.0.0', port=6124)


--------------------------------------------------------------------------------
/aisentry/asgi_summary_worker.py:
--------------------------------------------------------------------------------
1 | import os
2 | from worker.usage_summary import usage_logger
3 | 
4 | 
5 | app_port = os.getenv('USAGE_WORKER_PORT', '7001')
6 | if __name__ == "__main__":
7 |     usage_logger.run(host='0.0.0.0', port=7001)


--------------------------------------------------------------------------------
/aisentry/asgi_worker.py:
--------------------------------------------------------------------------------
1 | import os
2 | from worker.cosmos_logger import cosmos_logger
3 | 
4 | app_port = os.getenv('COSMOSWORKER_PORT', '7000')
5 | if __name__ == "__main__":
6 |     cosmos_logger.run(host='0.0.0.0', port=7000)


--------------------------------------------------------------------------------
/aisentry/dapr.yaml:
--------------------------------------------------------------------------------
 1 | version: 1
 2 | common:
 3 |   resourcesPath: ../deploy/local/components
 4 | apps:
 5 |   - appID: ai-sentry-facade-entry
 6 |     appDirPath: ./
 7 |     enableAppHealthCheck: false
 8 |     appPort: 6124
 9 |     command: ["python3", "asgi_facade.py"]
10 | 
11 |   - appID: aoai-cosmosdb-logging-processor
12 |     appDirPath: ./
13 |     appPort: 6001
14 |     command: ["python3", "asgi_worker.py"]
15 | 
16 |   - appID: aoai-cosmosdb-summary-logging-processor22
17 |     appDirPath: ./
18 |     appPort: 6002
19 |     command: ["python3", "asgi_summary_worker.py"]


--------------------------------------------------------------------------------
/aisentry/facade/__init__.py:
--------------------------------------------------------------------------------
1 | from .app import app
2 | 
3 | __all__ = [
4 |     "app"
5 | ]
6 | 
7 | 


--------------------------------------------------------------------------------
/aisentry/facade/app.py:
--------------------------------------------------------------------------------
  1 | \
  2 | import logging
  3 | import uuid
  4 | from datetime import datetime
  5 | from dapr.clients import DaprClient
  6 | from azure.identity import DefaultAzureCredential
  7 | import httpcore
  8 | from enum import Enum
  9 | from typing import Tuple
 10 | from quart import Quart, jsonify, request, make_response
 11 | from quart.helpers import stream_with_context
 12 | from urllib.request import urlopen
 13 | from urllib.parse import urljoin
 14 | from datetime import datetime, timezone
 15 | import httpx
 16 | from requests.exceptions import HTTPError
 17 | import jwt
 18 | import json
 19 | from dotenv import load_dotenv
 20 | import os
 21 | import tiktoken
 22 | from utils.ai_sentry_helpers import select_pool, init_endpoint_stats, getNextAvailableEndpointInfo, AISentryHeaders, openAILogObject,Usage, num_tokens_from_string
 23 | from adapters.adapters import return_adapter
 24 | 
 25 | # initial setup for logging / env variable loading
 26 | log_level = os.getenv('LOG_LEVEL', 'INFO').upper()
 27 | 
 28 | logger = logging.getLogger(__name__)
 29 | logging.basicConfig(level=getattr(logging, log_level),
 30 |                     format='%(asctime)s - %(levelname)s - %(message)s',
 31 |                     datefmt='%d-%m-%Y %H:%M:%S'
 32 |                     )
 33 | load_dotenv(".env", override=True)
 34 | 
 35 | logger.info("Starting Ai-Sentry Facade app")
 36 | app = Quart(__name__)
 37 | 
 38 | 
 39 | # Setup openAI Endpoints
 40 | endpoint_info = os.getenv('AI-SENTRY-ENDPOINT-CONFIG')
 41 | if endpoint_info is None:
 42 |     raise ValueError("AI-SENTRY-ENDPOINT-CONFIG environment variable is not set")
 43 | 
 44 | 
 45 | logger.debug(f"AI-SENTRY-ENDPOINT-CONFIG value: {endpoint_info}")
 46 | 
 47 | # Convert the JSON string back to a Python object
 48 | endpoint_data = json.loads(endpoint_info)
 49 | 
 50 | 
 51 | 
 52 | open_ai_endpoint_availability_stats = init_endpoint_stats(endpoint_data)
 53 | logger.info(f"Configured the following openAiEndpoints: {open_ai_endpoint_availability_stats}")
 54 | 
 55 | # Initialise DaprClient globally
 56 | daprClient = DaprClient()
 57 | 
 58 | streaming_completion_token_count=0
 59 | streaming_prompt_token_count=0
 60 | model_name=""
 61 | openai_response_id=""
 62 | 
 63 | @app.route('/liveness', methods=['GET'])
 64 | async def kubeliveness():
 65 |   return jsonify(message="Kubernetes Liveness check")
 66 | 
 67 | @app.route('/dapr/health', methods=['GET'])
 68 | async def dapr_health_check():
 69 |     return '', 200
 70 | 
 71 |     # Service unavailable
 72 |     # return '', 503
 73 | 
 74 | 
 75 | @app.route('/dapr/config', methods=['GET'])
 76 | async def dapr_config():
 77 |     return '', 200
 78 | 
 79 | 
 80 | 
 81 | @app.route('/openai/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
 82 | async def catch_all(path):
 83 |         
 84 |         # Get the original request method, headers, body and parameters
 85 |         method = request.method
 86 |         original_headers = request.headers
 87 |         params = request.args
 88 |         body = None
 89 |         body = await request.get_data()
 90 | 
 91 |         # Request Processed variable
 92 |         request_processed = False
 93 |         max_retries = 3
 94 |         current_retry = 0
 95 |         
 96 |         # pull out the ai-sentry specific headers - we use them further for worker processing options.
 97 |         ai_sentry_headers = AISentryHeaders()
 98 |         ai_sentry_headers_used = ai_sentry_headers.validate_headers(original_headers.items())
 99 |         logger.info(f"ai-sentry headers used: {ai_sentry_headers_used}")
100 | 
101 |         pool_name = ai_sentry_headers_used.get('ai-sentry-backend-pool', None)
102 |         ai_sentry_adapters = ai_sentry_headers_used.get('ai-sentry-adapters', None)
103 |         x_aisentry_correlation = ai_sentry_headers_used.get('x-aisentry-correlation', "00000000-0000-0000-0000-000000000000")
104 |         logger.info(f"correlation id used: {x_aisentry_correlation}")
105 | 
106 |         logger.info(f"ai-sentry adapters used: {ai_sentry_adapters}")
107 | 
108 |         ai_sentry_adapters_json = json.loads(ai_sentry_adapters)
109 |         logger.info(f"Selected pool name: {pool_name}")
110 |         
111 |         # Create a new set of headers that exclude the ai-sentry specific headers which we will forward onto openAI endpoints
112 |         exclude_headers = ['host', 'content-length']+list(ai_sentry_headers_used)
113 |         openAI_request_headers = {k: v for k, v in original_headers.items() if k.lower() not in exclude_headers}
114 | 
115 |         pool_endpoints = select_pool(open_ai_endpoint_availability_stats, pool_name)
116 |         
117 |         #strip api-key value if it is in use
118 |         pool_endpoints_without_api_key = [{k: v for k, v in endpoint.items() if k != 'api-key'} for endpoint in pool_endpoints]
119 |         logger.info(f"Selected pool: {pool_endpoints_without_api_key}")
120 | 
121 |         while not request_processed and current_retry <= max_retries:
122 |             logger.info(f"Processing request retry#: {current_retry}")
123 |             endpoint_info = await getNextAvailableEndpointInfo(pool_endpoints)
124 |             client = endpoint_info["client"]
125 | 
126 | 
127 |             # if openAI_request_headers.get('Api-Key') is not None:
128 |             #     logger.info("detected use of api-key header - will use this for authentication")
129 |             #     logger.debug(f"Swapping out api-key inside header with {endpoint_info['api-key']} value")
130 |             #     openAI_request_headers['Api-Key'] = endpoint_info['api-key']
131 | 
132 |             if endpoint_info['api-key'] is not None:
133 |                 logger.info("No api-key header detected - will use the default api-key for authentication")
134 |                 openAI_request_headers['Api-Key'] = endpoint_info['api-key']
135 | 
136 |             else:
137 |                 logger.info("No api-key config detected - will use oAuth to talk to openAI backend services.")
138 |                 #Get Access Token from workload identity
139 |                 credential = DefaultAzureCredential()
140 |                 token = credential.get_token("https://cognitiveservices.azure.com/.default")
141 |                 openAI_request_headers['Authorization'] = f"Bearer {token.token}"
142 | 
143 |             
144 |             decoded_body = body.decode('UTF-8').strip()
145 |             json_body = None
146 | 
147 |             if not decoded_body:
148 |                 logger.info("Received an empty or None body")
149 |                 # Handle the empty or None body case here
150 |                 json_body = {}
151 |             else:
152 |                 json_body = json.loads(decoded_body)
153 | 
154 |             
155 |             object_value = json_body.get("object")
156 | 
157 |             if object_value == "assistant":
158 |                 logger.info("Detected assistant request")
159 |                 prompt_contents = None
160 |                 prompt_contents_string = None
161 | 
162 |             if 'messages' in json_body:
163 |                 prompt_contents = json_body['messages']
164 |                 prompt_contents_string = json.dumps(prompt_contents)
165 |             
166 |             else:
167 |                 logger.info("Messages not found in json_body, assuming the request is an embedding request")
168 |                 prompt_contents= json_body.get('input')
169 |                 prompt_contents_string = json.dumps(prompt_contents)
170 |             
171 |             
172 |             
173 |             is_stream_request = False
174 |             if json_body.get('stream') is True:
175 |                 logger.info("Detected stream request")
176 |                 is_stream_request = True
177 | 
178 |                 
179 | 
180 |             # Create a httpx Request object
181 |             timeout = httpx.Timeout(timeout=5.0, read=60.0)
182 | 
183 |             # Apply the adapter transformation logic one by one
184 |             for adapter in ai_sentry_adapters_json:
185 |                 logger.info(f"Applying transformation logic for adapter: {adapter}")
186 |                 try:
187 |                     adapter_instance = return_adapter(request, adapter) 
188 |                 except Exception as e:
189 |                     logger.error(f"({x_aisentry_correlation}) #1 Error loading adapter: {adapter} - {e}")
190 |                     return jsonify(error=str(e)), 500
191 |                 path = adapter_instance.transform_path(path)
192 |                 method = adapter_instance.transform_method(method)
193 |                 body = adapter_instance.transform_body(body)
194 |                 params = adapter_instance.transform_query_string(params)
195 |                 openAI_request_headers = adapter_instance.transform_headers(openAI_request_headers)
196 |                 logger.info(f"Transformation logic applied for adapter: {adapter}")
197 | 
198 |             req = client.build_request(method, path, content=body, headers=openAI_request_headers, params=params, timeout=timeout)
199 | 
200 |             logger.info(f"Forwarding {method} request to {req.url}")
201 | 
202 |             # Handle streaming and non-streaming responses
203 |             if is_stream_request:
204 |                     try:
205 |                         response = await client.send(req, stream=True)
206 |                         # potentially recieve a timeout or a  HTTP > 499
207 |                         response.raise_for_status()
208 |                         current_retry += 1
209 |                     
210 |                     except httpcore.ConnectTimeout as timeout_err:
211 |                         logger.error(f"({x_aisentry_correlation}) #2 Connection timed out: {timeout_err}")
212 |                         return jsonify(error=str(timeout_err)), 500
213 |                     
214 |                     except HTTPError as http_err:
215 |                         logger.info(f"HTTP error occurred: {http_err}")
216 |                         if http_err.response.status_code == 429:  # 429 is the status code for Too Many Requests
217 |                             logger.info(f"Received 429 response from endpoint, retrying next available endpoint")
218 |                             current_retry += 1
219 |                             endpoint_info["connection_errors_count"]+=1
220 |                             request_processed = False
221 |                             continue
222 | 
223 |                     except Exception as e:
224 |                             # Connection Failures
225 |                             logger.error(f"({x_aisentry_correlation}) #3 An unexpected error occurred: {e}")
226 | 
227 |                             if "429 Too Many Requests" in str(e):
228 |                                 logger.info(f"Received 429 response from endpoint, retrying next available endpoint")
229 |                                 current_retry += 1
230 |                                 endpoint_info["connection_errors_count"]+=1
231 |                                 request_processed = False
232 |                                 continue
233 | 
234 |                             return jsonify(error=str(e)), 500
235 | 
236 |                     
237 |                     @stream_with_context
238 |                     async def stream_response(response):
239 |                         logger.info("Streaming response")
240 | 
241 |                         complete_buffered_response = []
242 |                         global content_buffered_string
243 |                         content_buffered = []
244 |                         response_stream = []
245 |                         global model_name
246 |                         global openai_response_id
247 |                         
248 |                         async for line in response.aiter_lines():                        
249 |                             yield f"{line}\r\n"
250 |                             if line.startswith("data: "):
251 |                                 data=line[6:]
252 |                                 if data!= "[DONE]":
253 |                                     # buffer the response - so we can calculate the token count using tiktok library
254 |                                     complete_buffered_response.append(data)
255 |                                     streaming_content_json = json.loads(data)
256 |                                     logger.debug(f"Streaming content: {streaming_content_json}")
257 |                                     model_name = streaming_content_json['model']
258 |                                     openai_response_id= streaming_content_json['id']
259 |                                     if streaming_content_json['choices']:
260 |                                         delta = streaming_content_json['choices'][0]['delta']
261 |                                         response_stream.append(streaming_content_json)
262 |                                         
263 |                                         if delta.get('content') is not None:
264 |                                             content_buffered.append(delta['content'])
265 | 
266 | 
267 |                         content_buffered_string = "".join(content_buffered)
268 | 
269 |                         # Calculate the token count using tiktok library
270 |                         global streaming_completion_token_count
271 |                         global streaming_prompt_token_count
272 | 
273 |                         streaming_completion_token_count = num_tokens_from_string(content_buffered_string, model_name)
274 |                         streaming_prompt_token_count = num_tokens_from_string(prompt_contents_string, model_name)
275 | 
276 |                         logger.info(f"Streamed completion total Token count: {streaming_completion_token_count}")
277 |                         logger.info(f"Streamed prompt total Token count: {streaming_prompt_token_count}")
278 |                     try:
279 |                         proxy_streaming_response = await make_response( stream_response(response))
280 |                         proxy_streaming_response_body = await proxy_streaming_response.data
281 |                         proxy_streaming_response.timeout = None
282 |                         proxy_streaming_response.status_code = response.status_code
283 |                         proxy_streaming_response.headers = {k: str(v) for k, v in response.headers.items()}
284 |                     except Exception as e:
285 |                             logger.error(f"({x_aisentry_correlation}) #4 An error occurred while streaming response: {e}")
286 |                             return jsonify(error=str(e)), 500
287 | 
288 |                     # Record the stats for openAi endpoints
289 |                     if proxy_streaming_response.headers.get("x-ratelimit-remaining-tokens") is not None:
290 |                         endpoint_info["x-ratelimit-remaining-tokens"]=response.headers["x-ratelimit-remaining-tokens"]
291 |                     else:
292 |                         endpoint_info["x-ratelimit-remaining-tokens"]=0
293 | 
294 |                     if proxy_streaming_response.headers.get("x-ratelimit-remaining-requests") is not None:
295 |                         endpoint_info["x-ratelimit-remaining-requests"]=response.headers["x-ratelimit-remaining-requests"]
296 |                     else:
297 |                         endpoint_info["x-ratelimit-remaining-tokens"]=0
298 |                         endpoint_info["x-ratelimit-remaining-requests"]=0
299 | 
300 |                     utc_now = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
301 |                     request_body = json.loads(body) 
302 | 
303 |                     global model_name
304 |                     global openai_response_id
305 |                     global content_buffered_string
306 | 
307 |                     streamed_token_usage = Usage(streaming_completion_token_count, streaming_prompt_token_count, streaming_completion_token_count+streaming_prompt_token_count)
308 | 
309 |                     openAi_response_object = openAILogObject(date_time_utc=utc_now, 
310 |                                                     headers=openAI_request_headers, 
311 |                                                     params=params, 
312 |                                                     request_body=request_body, 
313 |                                                     response_body=json.dumps(content_buffered_string),
314 |                                                     sentry_ai_headers=ai_sentry_headers_used,
315 |                                                     is_Streaming=is_stream_request,
316 |                                                     usage=streamed_token_usage.to_dict(),
317 |                                                     model=model_name,
318 |                                                     openai_response_id=openai_response_id
319 |                                                     )
320 |             
321 |                     logger.debug(f"OpenAI response: {json.dumps(openAi_response_object.to_dict())}")
322 |             
323 |                     # Publish response payload to background queue for further processing (i.e. logging, PII stripping, etc)
324 |                     try:
325 |                         logger.info("Publishing to Dapr pub/sub")
326 | 
327 |                         dapr_pub = daprClient.publish_event(
328 |                                     pubsub_name='openaipubsub',
329 |                                     topic_name='requests-logging',
330 |                                     data = json.dumps(json.dumps(openAi_response_object.to_dict())),
331 |                                     data_content_type='application/json'
332 |                         )
333 | 
334 |                         logger.info(f"Published to Dapr pub/sub: {dapr_pub}")
335 |                         request_processed = True
336 | 
337 |                     except Exception as e:
338 |                         logger.error(f"({x_aisentry_correlation}) #5 Error publishing to Dapr pub/sub: {e}")
339 | 
340 | 
341 |                     return proxy_streaming_response
342 | 
343 |             else:
344 |                     try:
345 |                         response = await client.send(req, stream=False)
346 |                         response.raise_for_status()
347 | 
348 |                     except Exception as e:
349 |                         # Connection Failures
350 |                         if response is None or response.status_code > 499:
351 | 
352 |                             logger.error(f"({x_aisentry_correlation}) #6 An unexpected error occurred: {e}")
353 |                             # increment connection errors count for the endpoint
354 |                             endpoint_info["connection_errors_count"]+=1
355 |                             current_retry += 1
356 |                             request_processed = False
357 |                             continue
358 |                         else:
359 |                             logger.error(f"({x_aisentry_correlation}) #7 An unexpected error occured: {e}")
360 | 
361 |                     # If response is a 429 Increment retry count - to pick next aviable endpoint
362 |                     if response.status_code == 429:
363 | 
364 |                         logger.info(f"Received 429 response from endpoint, retrying next available endpoint")
365 |                         #endpoint_info["x-retry-after-ms"]=response.headers["x-retry-after-ms"]
366 |                         current_retry += 1
367 |                         endpoint_info["connection_errors_count"]+=1
368 |                         request_processed = False
369 |                         continue
370 | 
371 |                     response_body = await response.aread()
372 |                     response_headers = {k: str(v) for k, v in response.headers.items()}
373 |                     proxy_response = await make_response( (response_body, response.status_code, response_headers) )
374 | 
375 |                     # Process in-line workers
376 |                     # Record the stats for openAi endpoints
377 |                     if response.headers.get("x-ratelimit-remaining-tokens") is not None:
378 |                         endpoint_info["x-ratelimit-remaining-tokens"]=response.headers["x-ratelimit-remaining-tokens"]
379 |                     else:
380 |                         endpoint_info["x-ratelimit-remaining-tokens"]=0
381 | 
382 |                     if response.headers.get("x-ratelimit-remaining-requests") is not None:
383 |                         endpoint_info["x-ratelimit-remaining-requests"]=response.headers["x-ratelimit-remaining-requests"]
384 |                     else:
385 |                         endpoint_info["x-ratelimit-remaining-tokens"]=0
386 | 
387 |                     utc_now = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
388 |                     request_body = json_body   
389 |                     response_json = json.loads(response_body)
390 | 
391 |                     #Extract the token count from the response
392 |                     non_streaming_token_count = response_json.get('usage')
393 |                     non_streaming_model = response_json.get('model')
394 |                     non_streaming_openai_id = response_json.get('id', str(uuid.uuid4()))
395 | 
396 |                     logger.info(f"non_streaming_token_count: {non_streaming_token_count}")
397 |                     logger.info(f"non_streaming_model: {non_streaming_model}")
398 |         
399 | 
400 |                     openAi_response_object = openAILogObject(date_time_utc=utc_now, 
401 |                                                     headers=openAI_request_headers, 
402 |                                                     params=params, 
403 |                                                     request_body=request_body, 
404 |                                                     response_body=response_json,
405 |                                                     sentry_ai_headers=ai_sentry_headers_used,
406 |                                                     is_Streaming=is_stream_request,
407 |                                                     usage=non_streaming_token_count,
408 |                                                     model=non_streaming_model,
409 |                                                     openai_response_id=non_streaming_openai_id)
410 |             
411 |                     logger.debug(f"OpenAI response: {json.dumps(openAi_response_object.to_dict())}")
412 |             
413 |                     # Publish response payload to background queue for further processing (i.e. logging, PII stripping, etc)
414 |                     try:
415 |                         logger.info("Publishing to Dapr pub/sub")
416 |                 
417 |                         dapr_pub = daprClient.publish_event(
418 |                                     pubsub_name='openaipubsub',
419 |                                     topic_name='requests-logging',
420 |                                     data = json.dumps(json.dumps(openAi_response_object.to_dict())),
421 |                                     data_content_type='application/json'
422 |                         )
423 | 
424 |                         logger.info(f"Published to Dapr pub/sub: {dapr_pub}")
425 |                         request_processed = True
426 | 
427 |                     except Exception as e:
428 |                         logger.error(f"({x_aisentry_correlation}) #8 Error publishing to Dapr pub/sub: {e}")
429 | 
430 |                     return proxy_response
431 |         
432 |         return jsonify(message=f"Request failed to process. Attempted to run: {current_retry},  against AI endpoint configuration unsucessfully"), 500
433 | 


--------------------------------------------------------------------------------
/aisentry/facade/req.txt:
--------------------------------------------------------------------------------
  1 | aiofiles==23.2.1
  2 | aiohttp==3.9.3
  3 | aiosignal==1.3.1
  4 | annotated-types==0.6.0
  5 | anyio==4.3.0
  6 | asgiref==3.8.1
  7 | async==0.6.2
  8 | asyncio==3.4.3
  9 | attrs==23.2.0
 10 | azure-ai-textanalytics==5.3.0
 11 | azure-common==1.1.28
 12 | azure-core==1.30.1
 13 | azure-core-tracing-opentelemetry==1.0.0b11
 14 | azure-functions==1.18.0
 15 | azure-functions-durable==1.2.9
 16 | azure-identity==1.16.1
 17 | azure-keyvault-secrets==4.7.0
 18 | azure-monitor-opentelemetry==1.1.0
 19 | azure-monitor-opentelemetry-exporter==1.0.0b19
 20 | azure-search-documents==11.4.0b11
 21 | azure-storage-blob==12.19.0
 22 | blinker==1.7.0
 23 | Brotli==1.1.0
 24 | certifi==2024.2.2
 25 | cffi==1.16.0
 26 | charset-normalizer==3.3.2
 27 | click==8.1.7
 28 | cloudevents==1.10.1
 29 | colorama==0.4.6
 30 | ConfigArgParse==1.7
 31 | cryptography==42.0.5
 32 | dapr==1.13.0
 33 | dapr-client==0.4.0b1
 34 | dapr-ext-fastapi==1.10.0
 35 | dapr-ext-grpc==1.12.0
 36 | Deprecated==1.2.14
 37 | deprecation==2.1.0
 38 | distro==1.9.0
 39 | fastapi==0.110.0
 40 | fastapi-responses==0.2.1
 41 | fixedint==0.1.6
 42 | flasgger==0.9.7.1
 43 | Flask==3.0.2
 44 | Flask-Cors==4.0.0
 45 | flask-dapr==1.12.0
 46 | Flask-Login==0.6.3
 47 | frozenlist==1.4.1
 48 | furl==2.1.3
 49 | gevent==24.2.1
 50 | geventhttpclient==2.0.12
 51 | googleapis-common-protos==1.63.0
 52 | greenlet==3.0.3
 53 | grpcio==1.62.1
 54 | grpcio-status==1.62.1
 55 | gunicorn==21.2.0
 56 | h11==0.14.0
 57 | h2==4.1.0
 58 | hpack==4.0.0
 59 | httpcore==1.0.4
 60 | httpx==0.27.0
 61 | Hypercorn==0.16.0
 62 | hyperframe==6.0.1
 63 | idna==3.7
 64 | importlib-metadata==6.8.0
 65 | isodate==0.6.1
 66 | itsdangerous==2.1.2
 67 | Jinja2==3.1.4
 68 | jsonschema==4.20.0
 69 | jsonschema-specifications==2023.11.2
 70 | jwt==1.3.1
 71 | locust==2.24.1
 72 | MarkupSafe==2.1.5
 73 | mistune==3.0.2
 74 | msal==1.28.0
 75 | msal-extensions==1.1.0
 76 | msgpack==1.0.8
 77 | msrest==0.7.1
 78 | multidict==6.0.5
 79 | numpy==1.26.2
 80 | oauthlib==3.2.2
 81 | openai==1.14.2
 82 | opentelemetry-api==1.21.0
 83 | opentelemetry-instrumentation==0.42b0
 84 | opentelemetry-instrumentation-aiohttp-client==0.42b0
 85 | opentelemetry-instrumentation-asgi==0.42b0
 86 | opentelemetry-instrumentation-dbapi==0.42b0
 87 | opentelemetry-instrumentation-django==0.42b0
 88 | opentelemetry-instrumentation-fastapi==0.42b0
 89 | opentelemetry-instrumentation-flask==0.42b0
 90 | opentelemetry-instrumentation-httpx==0.42b0
 91 | opentelemetry-instrumentation-psycopg2==0.42b0
 92 | opentelemetry-instrumentation-requests==0.42b0
 93 | opentelemetry-instrumentation-urllib==0.42b0
 94 | opentelemetry-instrumentation-urllib3==0.42b0
 95 | opentelemetry-instrumentation-wsgi==0.42b0
 96 | opentelemetry-resource-detector-azure==0.1.0
 97 | opentelemetry-sdk==1.21.0
 98 | opentelemetry-semantic-conventions==0.42b0
 99 | opentelemetry-util-http==0.42b0
100 | orderedmultidict==1.0.1
101 | packaging==24.0
102 | pandas==2.1.3
103 | pandas-stubs==2.1.1.230928
104 | portalocker==2.8.2
105 | priority==2.0.0
106 | protobuf==4.25.3
107 | psutil==5.9.8
108 | pycparser==2.21
109 | pydantic==2.6.4
110 | pydantic_core==2.16.3
111 | PyJWT==2.8.0
112 | python-dateutil==2.9.0.post0
113 | python-dotenv==1.0.1
114 | pytz==2023.3.post1
115 | PyYAML==6.0.1
116 | pyzmq==25.1.2
117 | Quart==0.19.4
118 | quart-cors==0.7.0
119 | referencing==0.32.0
120 | regex==2023.10.3
121 | requests==2.31.0
122 | requests-oauthlib==1.3.1
123 | roundrobin==0.0.4
124 | rpds-py==0.13.2
125 | six==1.16.0
126 | sniffio==1.3.1
127 | starlette==0.36.3
128 | <<<<<<< HEAD
129 | tiktoken==0.5.2
130 | tqdm==4.66.3
131 | =======
132 | tiktoken==0.7.0
133 |  
134 | tqdm==4.66.2
135 | >>>>>>> f4291a2 (adding in utc time, increasing httpx conneciton limit to None)
136 | types-pytz==2023.3.1.1
137 | typing_extensions==4.10.0
138 | tzdata==2023.3
139 | urllib3==2.2.2
140 | uvicorn==0.24.0.post1
141 | Werkzeug==3.0.1
142 | wrapt==1.16.0
143 | wsproto==1.2.0
144 | yarl==1.9.4
145 | zipp==3.17.0
146 | zope.event==5.0
147 | zope.interface==6.2
148 | 


--------------------------------------------------------------------------------
/aisentry/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==23.2.1
 2 | aiohttp==3.9.3
 3 | aiosignal==1.3.1
 4 | annotated-types==0.6.0
 5 | anyio==4.3.0
 6 | asgiref==3.8.1
 7 | async==0.6.2
 8 | asyncio==3.4.3
 9 | attrs==23.2.0
10 | azure-ai-textanalytics==5.3.0
11 | azure-common==1.1.28
12 | azure-core==1.30.1
13 | azure-identity==1.16.1
14 | blinker==1.7.0
15 | certifi==2024.2.2
16 | cffi==1.16.0
17 | charset-normalizer==3.3.2
18 | click==8.1.7
19 | cloudevents==1.10.1
20 | cryptography==42.0.5
21 | dapr==1.13.0
22 | dapr-client==0.4.0b1
23 | deprecation==2.1.0
24 | distro==1.9.0
25 | fastapi==0.110.0
26 | fastapi-responses==0.2.1
27 | Flask==3.0.2
28 | frozenlist==1.4.1
29 | googleapis-common-protos==1.63.0
30 | grpcio==1.62.1
31 | grpcio-status==1.62.1
32 | h11==0.14.0
33 | h2==4.1.0
34 | hpack==4.0.0
35 | httpcore==1.0.4
36 | httpx==0.27.0
37 | Hypercorn==0.16.0
38 | hyperframe==6.0.1
39 | idna==3.7
40 | isodate==0.6.1
41 | itsdangerous==2.1.2
42 | Jinja2==3.1.4
43 | jwt==1.3.1
44 | MarkupSafe==2.1.5
45 | msal==1.28.0
46 | msal-extensions==1.1.0
47 | multidict==6.0.5
48 | openai==1.14.2
49 | packaging==24.0
50 | portalocker==2.8.2
51 | priority==2.0.0
52 | protobuf==4.25.3
53 | pycparser==2.21
54 | pydantic==2.6.4
55 | pydantic_core==2.16.3
56 | PyJWT==2.8.0
57 | python-dateutil==2.9.0.post0
58 | python-dotenv==1.0.1
59 | Quart==0.19.4
60 | requests==2.31.0
61 | six==1.16.0
62 | sniffio==1.3.1
63 | starlette==0.36.3
64 | tqdm==4.66.3
65 | typing_extensions==4.10.0
66 | urllib3==2.2.2
67 | Werkzeug==3.0.1
68 | wsproto==1.2.0
69 | yarl==1.9.4
70 | tiktoken==0.7.0


--------------------------------------------------------------------------------
/aisentry/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/aisentry/utils/__init__.py


--------------------------------------------------------------------------------
/aisentry/utils/ai_sentry_helpers.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import logging
  4 | from dotenv import load_dotenv
  5 | import httpx
  6 | import tiktoken
  7 | 
  8 | 
  9 | # initial setup for logging / env variable loading
 10 | log_level = os.getenv('LOG_LEVEL', 'INFO').upper()
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | logging.basicConfig(level=getattr(logging, log_level),
 14 |                     format='%(asctime)s - %(levelname)s - %(message)s',
 15 |                     datefmt='%d-%m-%Y %H:%M:%S'
 16 |                     )
 17 | load_dotenv(".env", override=True)
 18 | 
 19 | 
 20 | def get_endpoints_from_poolname(poolname, json_data):
 21 |     for pool in json_data['pools']:
 22 |         if pool['name'] == poolname:
 23 |             return pool['endpoints']
 24 |     return None
 25 | 
 26 | 
 27 | def num_tokens_from_string(string: str, encoding_name: str) -> int:
 28 |     """Returns the number of tokens in a text string."""
 29 |     logger.info(f"encoding_name: {encoding_name}")
 30 |     
 31 |     encoding = tiktoken.encoding_for_model(encoding_name)
 32 |     num_tokens = len(encoding.encode(string))
 33 |     return num_tokens
 34 | 
 35 | 
 36 | 
 37 | def init_endpoint_stats(pools_info):
 38 |     pool_endpoints = {}
 39 |     for pool in pools_info["pools"]:
 40 |         transformed_endpoints = []
 41 |         for endpoint in pool["endpoints"]:
 42 |             transformed_endpoints.append({
 43 |                 "url": endpoint["url"],
 44 |                 "x-ratelimit-remaining-requests": endpoint["x-ratelimit-remaining-requests"],
 45 |                 "x-ratelimit-remaining-tokens": endpoint["x-ratelimit-remaining-tokens"],
 46 |                 "x-retry-after-ms": '0',
 47 |                 "api-key": endpoint["api-key"],
 48 |                 "client": httpx.AsyncClient(base_url=endpoint["url"],limits=httpx.Limits(max_keepalive_connections=None, max_connections=None)),
 49 |                 "connection_errors_count": 0
 50 |             })
 51 |         pool_endpoints[pool["name"]] = transformed_endpoints
 52 |     return pool_endpoints
 53 | 
 54 | def select_pool(pool_endpoints, pool_name):
 55 |     return pool_endpoints.get(pool_name, None)
 56 | 
 57 | async def getNextAvailableEndpointInfo(open_ai_endpoint_availability_stats):
 58 |     logger.debug(f"open_ai_endpoint_availability_stats: {open_ai_endpoint_availability_stats}")
 59 |     remaining_requests = sorted(open_ai_endpoint_availability_stats ,key=lambda x: int(x['x-ratelimit-remaining-requests']), reverse=True)[0]
 60 |     remaining_tokens = sorted(open_ai_endpoint_availability_stats ,key=lambda x: int(x['x-ratelimit-remaining-tokens']), reverse=True)[0]
 61 |     logger.info(f"Next available endpoint: {remaining_requests['url']}")
 62 | 
 63 |     # Add a new key 'max_limit' to each dictionary that is the maximum of 'x-ratelimit-remaining-requests' and 'x-ratelimit-remaining-tokens'
 64 |     # for endpoint in open_ai_endpoint_availability_stats:
 65 |     #     endpoint['max_limit'] = max(endpoint['x-ratelimit-remaining-requests'], endpoint['x-ratelimit-remaining-tokens'])
 66 | 
 67 |     # Sort based on 'max_limit'
 68 |     #sorted_endpoints = sorted(open_ai_endpoint_availability_stats ,key=lambda x: x['max_limit'], reverse=True)
 69 | 
 70 |     # # Select the first endpoint with 'max_limit' greater than zero
 71 |     # highest_endpoint = next((endpoint for endpoint in sorted_endpoints if endpoint['max_limit'] > 0), None)
 72 | 
 73 |     # if highest_endpoint is not None:
 74 |     #     logger.info(highest_endpoint)
 75 |     # else:
 76 |         # logger.info("No endpoint has a max_limit greater than zero.")
 77 | 
 78 |     return remaining_requests
 79 | 
 80 | 
 81 | 
 82 | 
 83 | 
 84 | 
 85 | class AISentryHeaders:
 86 |     # we will check for following ai-sentry specific http headers
 87 |     # ai-sentry-consumer - just needs to be non-empty
 88 |     # ai-sentry-log-level - need to validate specific values - 
 89 | 
 90 |     def __init__(self):
 91 |         # Define acceptable values for each header
 92 |         self.header_values = {
 93 |             "ai-sentry-log-level": {"COMPLETE", "PII_STRIPPING_ENABLED", "DISABLED"}
 94 |         }
 95 | 
 96 |     def validate_headers(self, headers):
 97 |         valid_headers = {}
 98 |         required_headers = ["Ai-Sentry-Backend-Pool", "Ai-Sentry-Consumer", "Ai-Sentry-Log-Level","Ai-Sentry-Adapters"]
 99 | 
100 |         # for required_header in required_headers:
101 |         #     logger.info(f"required_header: {required_header}")
102 |         #     if required_header not in headers:
103 |         #         raise ValueError(f"Missing required header {required_header}")
104 | 
105 |         for header, value in headers:
106 |             logger.debug(f"header: {header}, value: {value}")
107 |             if header.lower() == "ai-sentry-backend-pool":
108 |                 if value == "":
109 |                     logger.info("ai-sentry-backend-pool cannot be an empty string")
110 |                     raise ValueError("ai-sentry-backend-pool cannot be an empty string")
111 |                 else:
112 |                     valid_headers[header.lower()] = value
113 |             if header.lower() == "ai-sentry-consumer":
114 |                 if value == "":
115 |                     raise ValueError("ai-sentry-consumer cannot be an empty string")
116 |                 else:
117 |                     valid_headers[header.lower()] = value
118 |             if header.lower() == "ai-sentry-adapters":
119 |                 if value == "":
120 |                     logger.info("ai-sentry-backend-adapters cannot be an empty string")
121 |                     raise ValueError("ai-sentry-adapters cannot be an empty string")
122 |                 else:
123 |                     valid_headers[header.lower()] = value
124 |             if header.lower() == "x-aisentry-correlation":
125 |                 if value == "":
126 |                     logger.info("Assigning default value to x-aisentry-correlation")
127 |                     valid_headers[header.lower()] = "00000000-0000-0000-0000-000000000000"
128 |                 else:
129 |                     valid_headers[header.lower()] = value
130 |             elif header.lower() in self.header_values:
131 |                 if value not in self.header_values[header.lower()]:
132 |                     raise ValueError(f"Invalid value {value} for header {header}, accepted values FULL, PII_STRIPPING_ENABLED, DISABLED")
133 |                 else:
134 |                     valid_headers[header.lower()] = value
135 |             # valid_headers[header] = value
136 | 
137 |         return valid_headers
138 | 
139 | 
140 |   
141 | class openAISummaryLogObject:
142 |     def __init__(self,id, LogId, timestamp, product_id, usage, model, month_year):
143 |         self.id = id
144 |         self.LogId = LogId
145 |         self.timestamp = timestamp
146 |         self.product_id = product_id
147 |         self.usage = usage
148 |         self.model = model
149 |         self.month_year = month_year
150 |     
151 |     def to_dict(self):
152 |         return {
153 |             'id': self.id,
154 |             'LogId': self.LogId,
155 |             'model': self.model,
156 |             'timestamp': self.timestamp,
157 |             'ProductId': self.product_id,
158 |             'promptTokens': self.usage['prompt_tokens'] if self.usage is not None else None,
159 |             'responseTokens': self.usage.get('completion_tokens', None) if self.usage is not None else None,
160 |             'totalTokens': self.usage.get('total_tokens', None) if self.usage is not None else None ,
161 |             'month_year': self.month_year
162 |         }   
163 | 
164 | 
165 | class openAILogObject:
166 |     def __init__(self, date_time_utc, headers, params, request_body, response_body, sentry_ai_headers, is_Streaming, usage, model, openai_response_id):
167 |         self.date_time_utc = date_time_utc
168 |         self.headers = headers
169 |         self.params = params
170 |         self.request_body = request_body
171 |         self.response_body = response_body
172 |         self.sentry_ai_headers = sentry_ai_headers
173 |         self.is_Streaming = is_Streaming
174 |         self.usage = usage
175 |         self.model = model
176 |         self.openai_response_id = openai_response_id
177 | 
178 | 
179 |     def to_dict(self):
180 |         return {
181 |             'date_time_utc': self.date_time_utc,
182 |             'is_Streaming': self.is_Streaming,
183 |             'headers': self.headers,
184 |             'params': self.params,
185 |             'request_body': self.request_body,
186 |             'response_body': self.response_body,
187 |             'sentry_ai_headers': self.sentry_ai_headers,
188 |             'usage': self.usage,
189 |             'model': self.model,
190 |             'openai_response_id': self.openai_response_id
191 |         }
192 |     
193 | class Usage:
194 |     def __init__(self, completion_tokens=None, prompt_tokens=None, total_tokens=None):
195 |         self.completion_tokens = completion_tokens
196 |         self.prompt_tokens = prompt_tokens
197 |         self.total_tokens = total_tokens
198 | 
199 |     def to_dict(self):
200 |         return {
201 |             'completion_tokens': self.completion_tokens,
202 |             'prompt_tokens': self.prompt_tokens,
203 |             'total_tokens': self.total_tokens
204 |         }
205 | 
206 | 
207 | 
208 | 


--------------------------------------------------------------------------------
/aisentry/utils/analyze_pii.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging
  3 | import os
  4 | import json
  5 | from typing import List
  6 | from dotenv import load_dotenv
  7 | 
  8 | from azure.core.credentials import AzureKeyCredential
  9 | from azure.ai.textanalytics.aio import TextAnalyticsClient
 10 | from azure.ai.textanalytics import RecognizePiiEntitiesAction
 11 | import re
 12 | 
 13 | async def analyze_pii_async(input_text: List[str]) -> List[str]:
 14 |     """
 15 |     Processes a list of JSON strings, redacts PII, and returns valid JSON strings.
 16 |     """
 17 |     log_level = os.getenv('LOG_LEVEL', 'INFO').upper()
 18 |     logger = logging.getLogger(__name__)
 19 |     logging.basicConfig(level=getattr(logging, log_level),
 20 |                         format='%(asctime)s - %(levelname)s - %(message)s',
 21 |                         datefmt='%d-%m-%Y %H:%M:%S'
 22 |                         )
 23 |     load_dotenv(".env", override=True)
 24 |     logger.debug(f"input_text: {input_text}")
 25 | 
 26 |     chunk_size = 1250
 27 |     endpoint = os.environ["AI-SENTRY-LANGUAGE-ENDPOINT"]
 28 |     key = os.environ["AI-SENTRY-LANGUAGE-KEY"]
 29 |     logger.info(f"Using cognitive endpoint: {endpoint}")
 30 | 
 31 |     text_analytics_client = TextAnalyticsClient(
 32 |         endpoint=endpoint,
 33 |         credential=AzureKeyCredential(key),
 34 |     )
 35 | 
 36 | 
 37 | 
 38 | def smart_json_loads(s: str):
 39 |     """
 40 |     Attempts to load a JSON string, unescaping double backslashes if necessary.
 41 |     """
 42 |     try:
 43 |         # Try normal loading first
 44 |         return json.loads(s)
 45 |     except json.JSONDecodeError:
 46 |         # Heuristic: if there are many double backslashes, try unescaping
 47 |         if re.search(r'\\\\', s):
 48 |             try:
 49 |                 unescaped = s.encode().decode('unicode_escape')
 50 |                 return json.loads(unescaped)
 51 |             except Exception:
 52 |                 pass
 53 |         raise  # Re-raise if still failing
 54 | 
 55 |     async def redact_pii_in_obj(obj, pii_entities):
 56 |         """
 57 |         Recursively redacts PII entities in a JSON-like Python object.
 58 |         """
 59 |         if isinstance(obj, dict):
 60 |             return {k: await redact_pii_in_obj(v, pii_entities) for k, v in obj.items()}
 61 |         elif isinstance(obj, list):
 62 |             return [await redact_pii_in_obj(item, pii_entities) for item in obj]
 63 |         elif isinstance(obj, str):
 64 |             redacted = obj
 65 |             for ent in pii_entities:
 66 |                 if ent["confidence_score"] >= 0.8 and ent["category"] != "DateTime":
 67 |                     redacted = redacted.replace(ent["text"], "PII_REDACTED")
 68 |             return redacted
 69 |         else:
 70 |             return obj
 71 | 
 72 |     output_texts = []
 73 | 
 74 |     for i in range(0, len(input_text), chunk_size):
 75 |         chunk = input_text[i:i+chunk_size]
 76 |         logger.info(f"Processing chunk of size {len(chunk)}")
 77 | 
 78 |         async with text_analytics_client:
 79 |             poller = await text_analytics_client.begin_analyze_actions(
 80 |                 chunk,
 81 |                 display_name="PII Analysis",
 82 |                 actions=[RecognizePiiEntitiesAction()]
 83 |             )
 84 |             pages = await poller.result()
 85 |             document_results = []
 86 |             async for page in pages:
 87 |                 document_results.append(page)
 88 | 
 89 |         # For each document, parse as JSON, redact PII, then dump as JSON string
 90 |         for doc_str, action_results in zip(chunk, document_results):
 91 |             pii_entities = []
 92 |             for result in action_results:
 93 |                 if result.kind == "PiiEntityRecognition" and not result.is_error:
 94 |                     for entity in result.entities:
 95 |                         logger.debug(f"......Entity: {entity.text}")
 96 |                         logger.debug(f".........Category: {entity.category}")
 97 |                         logger.debug(f".........Confidence Score: {entity.confidence_score}")
 98 |                         pii_entities.append({
 99 |                             "text": entity.text,
100 |                             "category": entity.category,
101 |                             "confidence_score": entity.confidence_score
102 |                         })
103 |                 elif getattr(result, "is_error", False):
104 |                     logger.error(f'PII-Processing: An error with code {result.error.code} and message {result.error.message}')
105 |             try:
106 |                 #data = json.loads(doc_str)
107 |                 data = smart_json_loads(doc_str)
108 |                 redacted_data = await redact_pii_in_obj(data, pii_entities)
109 |                 redacted_json = json.dumps(redacted_data, ensure_ascii=False, separators=(',', ':'))
110 |                 output_texts.append(redacted_json)
111 |             except Exception as e:
112 |                 logger.error(f"Error redacting or serializing JSON: {e}")
113 |                 # Optionally, append the unredacted doc or an empty string
114 |                 output_texts.append(doc_str)
115 | 
116 |     logger.info(f"PII stripping completed")
117 |     return output_texts


--------------------------------------------------------------------------------
/aisentry/utils/analyze_pii_chunked_ta.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import logging
 3 | from typing import List
 4 | 
 5 | 
 6 | batch_size = 1250 # Azure has a limit of 1250 characters per document
 7 | def chunk_string(string, chunk_size):
 8 |     return [string[i:i+chunk_size] for i in range(0, len(string), chunk_size)]
 9 | 
10 | 
11 | #def chunk_reponse_body(response_body: str) -> List[str]:
12 |     
13 | 
14 | 
15 | async def analyze_pii_async(input_text: str) -> None:
16 |     # [START analyze_async]
17 |     import os
18 |     from azure.core.credentials import AzureKeyCredential
19 |     from azure.ai.textanalytics.aio import TextAnalyticsClient
20 |     from azure.ai.textanalytics import (
21 |         RecognizeEntitiesAction,
22 |         RecognizeLinkedEntitiesAction,
23 |         RecognizePiiEntitiesAction,
24 |         ExtractKeyPhrasesAction,
25 |         AnalyzeSentimentAction,
26 |     )
27 | 
28 |     endpoint = os.environ["AI-SENTRY-LANGUAGE-ENDPOINT"]
29 |     key = os.environ["AI-SENTRY-LANGUAGE-KEY"]
30 |     print(f"inputData: {input_text}")
31 | 
32 |     text_analytics_client = TextAnalyticsClient(
33 |         endpoint=endpoint,
34 |         credential=AzureKeyCredential(key),
35 |     )
36 | 
37 | 
38 |     for doc in input_text:
39 |         logging.info(f"CHUNKING DOC: {doc}")
40 |         chunks = chunk_string(doc, batch_size)  
41 |         for chunk in chunks:
42 |             actions = [RecognizePiiEntitiesAction()]
43 |             poller = await text_analytics_client.begin_analyze_actions([chunk], actions, language="en")
44 |             result = await poller.result()
45 | 
46 |             async for doc in result:
47 |                 print("Redacted Text: {}".format(doc.redacted_text))
48 |                 async for entity in doc.entities:
49 |                     print("Entity: {}".format(entity.text))
50 |                     print("Category: {}".format(entity.category))
51 |                     print("Confidence Score: {}\n".format(entity.confidence_score))
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/aisentry/utils/analyze_pii_openai.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import asyncio
 4 | from dotenv import load_dotenv
 5 | from openai import AsyncAzureOpenAI
 6 | 
 7 | 
 8 | 
 9 | # initial setup for logging / env variable loading
10 | log_level = os.getenv('LOG_LEVEL', 'INFO').upper()
11 | 
12 | logger = logging.getLogger(__name__)
13 | logging.basicConfig(level=getattr(logging, log_level),
14 |                     format='%(asctime)s - %(levelname)s - %(message)s',
15 |                     datefmt='%d-%m-%Y %H:%M:%S'
16 |                     )
17 | load_dotenv(".env", override=True)
18 | 
19 | # Initialize the OpenAI client with your key and endpoint
20 | 
21 | load_dotenv(".env", override=True)
22 | 
23 | openai_key = os.environ.get("PII_STRIPPING_OPENAI_API_KEY")
24 | openai_endpoint = os.environ.get("PII_STRIPPING_OPENAI_ENDPOINT")
25 | 
26 | 
27 | client = AsyncAzureOpenAI(
28 |     api_key=openai_key,  
29 |     api_version="2023-12-01-preview",
30 |     azure_endpoint=openai_endpoint
31 | )
32 | 
33 | pii_stripping_system_prompt = """Objective: Identify and flag any Personally Identifiable Information (PII) within text data to ensure data privacy and compliance with regulations such as GDPR, CCPA, etc.
34 | 
35 | PII includes but is not limited to:
36 | Full Names: First and last names
37 | Addresses: Street address, city, state, zip code
38 | Phone Numbers: Any format of telephone numbers
39 | Email Addresses: Any format of email addresses
40 | Social Security Numbers (SSNs): XXX-XX-XXXX or similar formats
41 | Credit Card Numbers: Any format of credit/debit card numbers
42 | Bank Account Numbers: Any format of bank account numbers
43 | Driver's License Numbers: Any format of driver's license numbers
44 | Passport Numbers: Any format of passport numbers
45 | Date of Birth: Full date of birth (MM/DD/YYYY or similar formats)
46 | IP Addresses: Any format of IPv4 or IPv6 addresses
47 | API-KEY or Token: Any format of API keys or tokens
48 | Medical Information: Any health-related information that can identify an individual
49 | Biometric Data: Fingerprints, facial recognition data, etc.
50 | 
51 | Instructions for the System:
52 | Input: Accept text data for analysis.
53 | Processing:
54 | Use pattern matching, regular expressions, and machine learning algorithms to identify potential PII.
55 | Cross-reference detected patterns with known PII formats.
56 | Output:
57 | Flag detected PII and categorize it.
58 | Provide a confidence score for each detected PII item.
59 | Highlight the specific text containing PII.
60 | 
61 | Example:
62 | 
63 | Input Text:
64 | 
65 | John Doe lives at 123 Maple Street, Springfield, IL 62704. His email is john.doe@example.com, and his phone number is (555) 123-4567. He was born on 01/15/1985 and his SSN is 123-45-6789.  
66 |  
67 | Output:
68 | 
69 | Keep the same text structure but replace the PII with placeholders: [PII-Redacted]
70 |  
71 | Compliance Note: The system must handle all detected PII with strict confidentiality and in accordance with applicable data protection regulations."""
72 | 
73 | 
74 | async def get_chat_pii_stripped_completion(prompt):
75 |     # Send the request to Azure OpenAI
76 |     response = await client.chat.completions.create(
77 |         model="gpt-4",
78 |         messages=[
79 |     {"role": "system", "content": pii_stripping_system_prompt},
80 |     {"role": "user", "content": f"Rewrite the input and Strip out PII information as per the system message from following input: {prompt}"}
81 |   ]
82 |     )
83 | 
84 |     # Extract the text from the response
85 |     #completion_text = response.completions[0].data.get("text", "")
86 |     message_content = response['choices'][0]['message']['content']
87 | 
88 |     logger.info(f"PII Stripped Completion Text: {message_content}")
89 |     return message_content
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/aisentry/utils/aoai_streaming_response.py:
--------------------------------------------------------------------------------
1 | class AOAI_Streaming_Response:
2 |     def __init__(self, response):
3 |         self.choices = response['choices']
4 |         self.created = response['created']
5 |         self.id = response['id']
6 |         self.model = response['model']
7 |         self.object = response['object']
8 |         self.system_fingerprint = response['system_fingerprint']
9 | 


--------------------------------------------------------------------------------
/aisentry/utils/approaches/approach.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import List
 3 | 
 4 | class LoadBalancingApproach(ABC):
 5 |     
 6 |     def __init__(
 7 |             self,
 8 |             aoai_endpoints: List[str]
 9 |     ):
10 |         self.aoai_endpoints = aoai_endpoints
11 |     
12 |     @abstractmethod
13 |     def find_available_aoai(self):
14 |         pass


--------------------------------------------------------------------------------
/aisentry/utils/approaches/headerselector.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | @dataclass
 4 | class AzureOpenAIDeployment:
 5 |     url: str
 6 |     retry_ms: int
 7 |     ratelimit_remaining_tokens: int
 8 |     ratelimit_remaining_requests: int
 9 | 
10 | class HeaderSelector:
11 | 
12 |     def __init__(
13 |             self,
14 |             openai_urls: [str]
15 |     ):
16 |         self.openai_urls = openai_urls
17 | 
18 |     def get_next_aoai(prompt: str):
19 |         token_estimate = len(prompt)/4
20 | 


--------------------------------------------------------------------------------
/aisentry/utils/approaches/randomallocation.py:
--------------------------------------------------------------------------------
 1 | from approach import Approach
 2 | from typing import overload
 3 | import random
 4 | 
 5 | class RandomAllocation(Approach):
 6 |     def __init__():
 7 |         pass
 8 | 
 9 |     @overload   
10 |     def find_available_aoai(self):
11 |         aoai = random(self.aoai_endpoints)
12 |         return aoai


--------------------------------------------------------------------------------
/aisentry/utils/auth_helpers.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask
 2 | 
 3 | # Error handler
 4 | class AuthError(Exception):
 5 |     def __init__(self, error, status_code):
 6 |         self.error = error
 7 |         self.status_code = status_code
 8 | 
 9 | class OriginalHttpRequest:
10 |     def __init__(self, request_headers, params, response_body):
11 |         self.request_headers = request_headers
12 |         self.params = params
13 |         self.response_body = response_body
14 | 
15 |     
16 | 
17 | 
18 | # @app.errorhandler(AuthError)
19 | # def handle_auth_error(ex):
20 | #     print('handling error')
21 | #     response = jsonify(ex.error)
22 | #     response.status_code = ex.status_code
23 | #     return response
24 | 
25 | # def get_token_auth_header():
26 | #     """Obtains the Access Token from the Authorization Header
27 | #     """
28 | #     auth = request.headers.get("Authorization", None)
29 | #     if not auth:
30 | #         raise AuthError({"code": "authorization_header_missing",
31 | #                         "description":
32 | #                         "Authorization header is expected"}, 401)
33 | 
34 | #     parts = auth.split()
35 | 
36 | #     if parts[0].lower() != "bearer":
37 | #         raise AuthError({"code": "invalid_header",
38 | #                         "description":
39 | #                         "Authorization header must start with"
40 | #                         " Bearer"}, 401)
41 | #     elif len(parts) == 1:
42 | #         raise AuthError({"code": "invalid_header",
43 | #                         "description": "Token not found"}, 401)
44 | #     elif len(parts) > 2:
45 | #         raise AuthError({"code": "invalid_header",
46 | #                         "description":
47 | #                         "Authorization header must be"
48 | #                         " Bearer token"}, 401)
49 | 
50 | #     token = parts[1]
51 | #     return token
52 | 
53 | 
54 | 
55 | 
56 | # def verify_jwt(tenant_id: str, audience: str = None):
57 | #     try:
58 | 
59 | #         bearer_token = get_token_auth_header()
60 | #         #unverified_header = jwt.get_unverified_header(bearer_token)
61 | 
62 | #         signing_key = jwks_client.get_signing_key_from_jwt(bearer_token)
63 | 
64 | 
65 | #         data = jwt.api_jwt.decode_complete(
66 | #                     bearer_token,
67 | #                     signing_key.key,
68 | #                     algorithms=["RS256"],
69 | #                     audience=AUDIENCE,
70 | #                     issuer="https://sts.windows.net/" + tenant_id + "/"
71 | #                 )
72 |         
73 | #         payload, header = data["payload"], data["header"]
74 | 
75 | #         print(f"payload: {payload}")
76 | #         print(f"header: {header}")
77 | #         #alg_obj = jwt.get_algorithm_by_name(header["alg"])
78 | #         return True
79 | #     except Exception as e:
80 | #         raise AuthError({"code": "invalid_header",
81 | #                                 "description":
82 | #                                 "Unable to verify authentication"
83 | #                                 f" token with error {e}"}, 401)
84 | 
85 |    
86 | #     raise AuthError({"code": "invalid_header",
87 | #                          "description": "Unable to find appropriate key"}, 401)
88 |     
89 | #         #jwt_keys[tenant_id] = jwks
90 |     
91 | 
92 | 
93 | # @app.route('/helloauth', methods=['POST'])
94 | # async def hello_auth() -> Tuple[dict, str]:
95 | #     bearer_token = get_token_auth_header()
96 | #     verify_jwt("d3edbe6c-8eda-4e97-8370-86961098c24c")
97 | #     jwt_token = jwt.get_unverified_header(bearer_token)
98 | 
99 | #     return jsonify(message=f"Hello, World {name}! and token {jwt_token}")


--------------------------------------------------------------------------------
/aisentry/utils/combined-cert.crt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/aisentry/utils/combined-cert.crt


--------------------------------------------------------------------------------
/aisentry/utils/combined_cert.pem:
--------------------------------------------------------------------------------
 1 | -----BEGIN CERTIFICATE-----
 2 | MIIHKTCCBhGgAwIBAgIQBXCHG2zHQ6+7D2T6NMnDgzANBgkqhkiG9w0BAQsFADBN
 3 | MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMScwJQYDVQQDEx5E
 4 | aWdpQ2VydCBTSEEyIFNlY3VyZSBTZXJ2ZXIgQ0EwHhcNMjQwMjI1MDAwMDAwWhcN
 5 | MjUwMjI1MjM1OTU5WjB4MQswCQYDVQQGEwJVUzETMBEGA1UECBMKV2FzaGluZ3Rv
 6 | bjEQMA4GA1UEBxMHUmVkbW9uZDEeMBwGA1UEChMVTWljcm9zb2Z0IENvcnBvcmF0
 7 | aW9uMSIwIAYDVQQDDBkqLmNvZ25pdGl2ZS5taWNyb3NvZnQuY29tMIIBIjANBgkq
 8 | hkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAwOt7yazkYYtbZqCoXhWHUbr/qT9nlnvG
 9 | ucYmFDPiGRM8RSrrVKcNsBVD5MR7SuZUy9H8lT5v3jFbhZDEITgyqOgR9GFSJ2m8
10 | jWZG+YhPtCM+uHB9djFwj8O2GeBO176y151XRYVmVqf6igN3XrqNcNUSGGHjTeAK
11 | VwQSwRY11DxvtxmBhLVbwdXL7yD71/uJfau6a5jYUsXZQXmb8ABG2hpmXG0jEf/E
12 | LL3a4jXwTT/p+GhegdqF1/Mv8YvSnf5e0Df7l6+Nm3l9DWHHg+mHauXCmIlebZWo
13 | gkgd54IG+eqkZM5emyzGhq+AK5mg7Vp0GUGtLP0P3b8YY4gDzFOYzQIDAQABo4ID
14 | 2DCCA9QwHwYDVR0jBBgwFoAUD4BhHIIxYdUvKOeNRji0LOHG2eIwHQYDVR0OBBYE
15 | FOfeu5mW3U8S+pWU95vLuU11NhJPMIGBBgNVHREEejB4ghkqLmNvZ25pdGl2ZS5t
16 | aWNyb3NvZnQuY29tgh0qLmFwaS5jb2duaXRpdmUubWljcm9zb2Z0LmNvbYIdKi5j
17 | b2duaXRpdmVzZXJ2aWNlcy5henVyZS5jb22CHSouZGV2LmNvZ25pdGl2ZS5taWNy
18 | b3NvZnQuY29tMD4GA1UdIAQ3MDUwMwYGZ4EMAQICMCkwJwYIKwYBBQUHAgEWG2h0
19 | dHA6Ly93d3cuZGlnaWNlcnQuY29tL0NQUzAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0l
20 | BBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMIGNBgNVHR8EgYUwgYIwP6A9oDuGOWh0
21 | dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNvbS9EaWdpY2VydFNIQTJTZWN1cmVTZXJ2ZXJD
22 | QS0xLmNybDA/oD2gO4Y5aHR0cDovL2NybDQuZGlnaWNlcnQuY29tL0RpZ2ljZXJ0
23 | U0hBMlNlY3VyZVNlcnZlckNBLTEuY3JsMH4GCCsGAQUFBwEBBHIwcDAkBggrBgEF
24 | BQcwAYYYaHR0cDovL29jc3AuZGlnaWNlcnQuY29tMEgGCCsGAQUFBzAChjxodHRw
25 | Oi8vY2FjZXJ0cy5kaWdpY2VydC5jb20vRGlnaUNlcnRTSEEyU2VjdXJlU2VydmVy
26 | Q0EtMi5jcnQwDAYDVR0TAQH/BAIwADCCAX8GCisGAQQB1nkCBAIEggFvBIIBawFp
27 | AHUATnWjJ1yaEMM4W2zU3z9S6x3w4I4bjWnAsfpksWKaOd8AAAGN3apYgQAABAMA
28 | RjBEAiAKD2RDCeXYaUHu3eyYlzxZWRpUr4Dxyu6FVuhqMqEllgIgRMsNK3OF28gC
29 | RCCzmKBnzcLRWdXle8s0zIhCSKQeC8EAdwB9WR4S4XgqexxhZ3xe/fjQh1wUoE6V
30 | nrkDL9kOjC55uAAAAY3dqliIAAAEAwBIMEYCIQC/cn2ZQiz1u93mR/3Zmo2hgXS/
31 | ZvUyGlsgYpdvzOmgAwIhAOBXToAYj4TbG7vzRjl7//L1aSb+hgZolmYEk+2mNk0d
32 | AHcA5tIxY0B3jMEQQQbXcbnOwdJA9paEhvu6hzId/R43jlAAAAGN3apYuwAABAMA
33 | SDBGAiEAheIGgEAGUvtRnMbeMehCU1zYCgoRkSQC4aHb+q/Vl6kCIQCLWypyI5Lz
34 | 0f07mu3L714TCF2OUy6ioesfQV+fS7uHnzANBgkqhkiG9w0BAQsFAAOCAQEAtyyc
35 | uNwfq9q9pEitic7QisFpOVVBV721T9D6ase4BXQd0woeFPJoyDiZImBp+jmw5W9m
36 | pym/WSpIxxHZUAd0IvxYuRwXBdMnZX2sTCNRjY5rOSK4AFfm2OMW1o52Xqjgi+XG
37 | 4IhX3oWyHBiZXeyOT0TXuWUWq45t9/MY5Usl/72GSPxixGieUA4r6BdZ/zoHzB8K
38 | t8V/cVwYm11TenQN9BXYQXQ0o66Vjt1Zqgadx68fz+hPclNVxeZiXeKbknMiguBs
39 | gub46tAgZ2m+F9/vDIgmBjw7VzFLUAJZOPeNc7rAwWBO+eXpiKwfHOnn0yet6/np
40 | vpayI9kPWD0dNsWycA==
41 | -----END CERTIFICATE-----
42 | -----BEGIN CERTIFICATE-----
43 | MIIE6DCCA9CgAwIBAgIQAnQuqhfKjiHHF7sf/P0MoDANBgkqhkiG9w0BAQsFADBh
44 | MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3
45 | d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD
46 | QTAeFw0yMDA5MjMwMDAwMDBaFw0zMDA5MjIyMzU5NTlaME0xCzAJBgNVBAYTAlVT
47 | MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxJzAlBgNVBAMTHkRpZ2lDZXJ0IFNIQTIg
48 | U2VjdXJlIFNlcnZlciBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB
49 | ANyuWJBNwcQwFZA1W248ghX1LFy949v/cUP6ZCWA1O4Yok3wZtAKc24RmDYXZK83
50 | nf36QYSvx6+M/hpzTc8zl5CilodTgyu5pnVILR1WN3vaMTIa16yrBvSqXUu3R0bd
51 | KpPDkC55gIDvEwRqFDu1m5K+wgdlTvza/P96rtxcflUxDOg5B6TXvi/TC2rSsd9f
52 | /ld0Uzs1gN2ujkSYs58O09rg1/RrKatEp0tYhG2SS4HD2nOLEpdIkARFdRrdNzGX
53 | kujNVA075ME/OV4uuPNcfhCOhkEAjUVmR7ChZc6gqikJTvOX6+guqw9ypzAO+sf0
54 | /RR3w6RbKFfCs/mC/bdFWJsCAwEAAaOCAa4wggGqMB0GA1UdDgQWBBQPgGEcgjFh
55 | 1S8o541GOLQs4cbZ4jAfBgNVHSMEGDAWgBQD3lA1VtFMu2bwo+IbG8OXsj3RVTAO
56 | BgNVHQ8BAf8EBAMCAYYwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMBIG
57 | A1UdEwEB/wQIMAYBAf8CAQAwdgYIKwYBBQUHAQEEajBoMCQGCCsGAQUFBzABhhho
58 | dHRwOi8vb2NzcC5kaWdpY2VydC5jb20wQAYIKwYBBQUHMAKGNGh0dHA6Ly9jYWNl
59 | cnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEdsb2JhbFJvb3RDQS5jcnQwewYDVR0f
60 | BHQwcjA3oDWgM4YxaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0R2xv
61 | YmFsUm9vdENBLmNybDA3oDWgM4YxaHR0cDovL2NybDQuZGlnaWNlcnQuY29tL0Rp
62 | Z2lDZXJ0R2xvYmFsUm9vdENBLmNybDAwBgNVHSAEKTAnMAcGBWeBDAEBMAgGBmeB
63 | DAECATAIBgZngQwBAgIwCAYGZ4EMAQIDMA0GCSqGSIb3DQEBCwUAA4IBAQB3MR8I
64 | l9cSm2PSEWUIpvZlubj6kgPLoX7hyA2MPrQbkb4CCF6fWXF7Ef3gwOOPWdegUqHQ
65 | S1TSSJZI73fpKQbLQxCgLzwWji3+HlU87MOY7hgNI+gH9bMtxKtXc1r2G1O6+x/6
66 | vYzTUVEgR17vf5irF0LKhVyfIjc0RXbyQ14AniKDrN+v0ebHExfppGlkTIBn6rak
67 | f4994VH6npdn6mkus5CkHBXIrMtPKex6XF2firjUDLuU7tC8y7WlHgjPxEEDDb0G
68 | w6D0yDdVSvG/5XlCNatBmO/8EznDu1vr72N8gJzISUZwa6CCUD7QBLbKJcXBBVVf
69 | 8nwvV9GvlW+sbXlr
70 | -----END CERTIFICATE-----
71 | -----BEGIN CERTIFICATE-----
72 | MIIDrzCCApegAwIBAgIQCDvgVpBCRrGhdWrJWZHHSjANBgkqhkiG9w0BAQUFADBh
73 | MQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3
74 | d3cuZGlnaWNlcnQuY29tMSAwHgYDVQQDExdEaWdpQ2VydCBHbG9iYWwgUm9vdCBD
75 | QTAeFw0wNjExMTAwMDAwMDBaFw0zMTExMTAwMDAwMDBaMGExCzAJBgNVBAYTAlVT
76 | MRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5j
77 | b20xIDAeBgNVBAMTF0RpZ2lDZXJ0IEdsb2JhbCBSb290IENBMIIBIjANBgkqhkiG
78 | 9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4jvhEXLeqKTTo1eqUKKPC3eQyaKl7hLOllsB
79 | CSDMAZOnTjC3U/dDxGkAV53ijSLdhwZAAIEJzs4bg7/fzTtxRuLWZscFs3YnFo97
80 | nh6Vfe63SKMI2tavegw5BmV/Sl0fvBf4q77uKNd0f3p4mVmFaG5cIzJLv07A6Fpt
81 | 43C/dxC//AH2hdmoRBBYMql1GNXRor5H4idq9Joz+EkIYIvUX7Q6hL+hqkpMfT7P
82 | T19sdl6gSzeRntwi5m3OFBqOasv+zbMUZBfHWymeMr/y7vrTC0LUq7dBMtoM1O/4
83 | gdW7jVg/tRvoSSiicNoxBN33shbyTApOB6jtSj1etX+jkMOvJwIDAQABo2MwYTAO
84 | BgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUA95QNVbR
85 | TLtm8KPiGxvDl7I90VUwHwYDVR0jBBgwFoAUA95QNVbRTLtm8KPiGxvDl7I90VUw
86 | DQYJKoZIhvcNAQEFBQADggEBAMucN6pIExIK+t1EnE9SsPTfrgT1eXkIoyQY/Esr
87 | hMAtudXH/vTBH1jLuG2cenTnmCmrEbXjcKChzUyImZOMkXDiqw8cvpOp/2PV5Adg
88 | 06O/nVsJ8dWO41P0jmP6P6fbtGbfYmbW0W5BjfIttep3Sp+dWOIrWcBAI+0tKIJF
89 | PnlUkiaY4IBIqDfv8NZ5YBberOgOzW6sRBc4L0na4UU+Krk2U886UAb3LujEV0ls
90 | YSEY1QSteDwsOoBrp+uvFRTp2InBuThs4pFsiv9kuXclVzDAGySj4dzp30d8tbQk
91 | CAUw7C29C79Fv1C5qfPrmAESrciIxpg0X40KPMbp1ZWVbd4=
92 | -----END CERTIFICATE-----
93 | 


--------------------------------------------------------------------------------
/aisentry/worker/cosmos_logger/__init__.py:
--------------------------------------------------------------------------------
1 | from .cosmos_logger import app
2 | 
3 | __all__ = [
4 |     "cosmos_logger"
5 | ]


--------------------------------------------------------------------------------
/aisentry/worker/cosmos_logger/cosmos_logger.py:
--------------------------------------------------------------------------------
  1 | from flask import Flask, request, jsonify
  2 | from cloudevents.http import from_http
  3 | from requests.exceptions import HTTPError
  4 | from azure.core.exceptions import AzureError
  5 | from dapr.clients import DaprClient
  6 | from utils.analyze_pii import analyze_pii_async
  7 | from utils.analyze_pii_openai import get_chat_pii_stripped_completion
  8 | import logging
  9 | import datetime
 10 | import asyncio
 11 | from dotenv import load_dotenv
 12 | import json
 13 | import requests
 14 | import os
 15 | import httpx
 16 | import uuid
 17 | from typing import List
 18 | 
 19 | # initial setup for logging / env variable loading
 20 | log_level = os.getenv('LOG_LEVEL', 'INFO').upper()
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | logging.basicConfig(level=getattr(logging, log_level),
 24 |                     format='%(asctime)s - %(levelname)s - %(message)s',
 25 |                     datefmt='%d-%m-%Y %H:%M:%S'
 26 |                     )
 27 | load_dotenv(".env", override=True)
 28 | 
 29 | app = Flask(__name__)
 30 | 
 31 | app_port = os.getenv('APP_PORT', '7000')
 32 | 
 33 | # This can be either OPENAI or TEXTANALYTICS
 34 | pii_stripping_service = os.getenv('PII_STRIPPING_SERVICE', 'OPENAI')
 35 | 
 36 | # Register Dapr pub/sub subscriptions
 37 | @app.route('/dapr/subscribe', methods=['GET'])
 38 | def subscribe():
 39 |     subscriptions = [{
 40 |         'pubsubname': 'openaipubsub',
 41 |         'topic': 'requests-logging',
 42 |         'route': 'requestslogging'
 43 |     }]
 44 |     logger.info('Dapr pub/sub is subscribed to: ' + json.dumps(subscriptions))
 45 |     return jsonify(subscriptions)
 46 | 
 47 | 
 48 | # Dapr subscription in /dapr/subscribe sets up this route
 49 | @app.route('/requestslogging', methods=['POST'])
 50 | async def oairequests_subscriber():
 51 |     event = from_http(request.headers, request.get_data())
 52 |     data = json.loads(event.data)
 53 | 
 54 |     logger.debug(f"Received a request to log data{data}")
 55 | 
 56 |     response_body = data.get('response_body', None)
 57 |     
 58 |     consumer_id=data['sentry_ai_headers'].get('ai-sentry-consumer', None)
 59 |     log_level=data['sentry_ai_headers'].get('ai-sentry-log-level', None)
 60 |     model_name=data.get('model',None)
 61 |     openai_response_id=data.get('openai_response_id',None)
 62 |     date = datetime.datetime.fromisoformat(data.get('date_time_utc'))
 63 |     month_date = date.strftime("%m_%Y")
 64 |     
 65 |     logger.info('Consumer Product Id: %s', consumer_id)
 66 |     logger.info('LogId: %s', model_name)
 67 |     logger.info('LogLevel: %s', log_level)
 68 | 
 69 |     data['LogId']=f"{model_name}_{consumer_id}_{month_date}"
 70 |     data['id']=openai_response_id
 71 |     data['logLevel']=log_level
 72 |     
 73 |     output_binding_data = json.dumps(data)
 74 | 
 75 |     data = {key.lower(): value for key, value in data.items()}
 76 |    
 77 |     headers = data.get('sentry_ai_headers', None)
 78 |     
 79 |     
 80 |     if headers['ai-sentry-log-level'] == 'PII_STRIPPING_ENABLED':
 81 |         logger.info('Logging is set to PII Stripping mode')
 82 | 
 83 |         input_data: List[str] = []
 84 |         input_data.append(output_binding_data)
 85 |         if pii_stripping_service == 'TEXTANALYTICS':
 86 |             logger.debug(f"PII stripping service: {pii_stripping_service}")
 87 |             output_binding_data = await analyze_pii_async(input_data)
 88 |         
 89 |         #OPENAI BASED PII Stripping
 90 |         else:
 91 |             logger.debug(f"PII stripping service: {pii_stripping_service}")
 92 | 
 93 |             # Ensure a new event loop is created if the current one is closed
 94 |             try:
 95 |                 loop = asyncio.get_event_loop()
 96 |                 if loop.is_closed():
 97 |                     loop = asyncio.new_event_loop()
 98 |                     asyncio.set_event_loop(loop)
 99 |             except RuntimeError:
100 |                 loop = asyncio.new_event_loop()
101 |                 asyncio.set_event_loop(loop)
102 | 
103 |             if loop.is_running():
104 |                 task = loop.create_task(get_chat_pii_stripped_completion(input_data))
105 |                 result = loop.run_until_complete(task)
106 |             else:
107 |                 result = loop.run_until_complete(get_chat_pii_stripped_completion(input_data))
108 | 
109 |             print(result)
110 |             # output_binding_data = await get_chat_pii_stripped_completion(input_data)
111 |         
112 |         logger.debug(f"PII stripped data: {output_binding_data}")
113 | 
114 | 
115 |     elif headers['ai-sentry-log-level'] == 'COMPLETE':
116 |         logger.info('Logging is set to complete mode')
117 |     
118 |     else:
119 |         logger.info('Logging is set to disabled mode')
120 |         return json.dumps(output_binding_data), 200
121 | 
122 |     
123 |     # Send the output to CosmosDB using Dapr binding
124 |     with DaprClient() as d:
125 |         resp = d.invoke_binding(
126 |             binding_name='cosmosdb-log',
127 |             operation='create',
128 |             binding_metadata={
129 |                 'partitionKey': 'LogId',
130 |             },
131 |             data=output_binding_data
132 |         )
133 |     
134 |     # return json.dumps(requestPayload), 200, {
135 |     #     'ContentType': 'application/json'}
136 |     return json.dumps(output_binding_data), 200
137 | 
138 | app.run(port=app_port)
139 | 


--------------------------------------------------------------------------------
/aisentry/worker/loganalytics_logger/__init__.py:
--------------------------------------------------------------------------------
1 | from .loganalytics_logger import app
2 | 
3 | __all__ = [
4 |     "loganalytics_logger"
5 | ]


--------------------------------------------------------------------------------
/aisentry/worker/loganalytics_logger/loganalytics_logger.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, request, jsonify
 2 | from cloudevents.http import from_http
 3 | from requests.exceptions import HTTPError
 4 | from azure.identity import DefaultAzureCredential
 5 | from azure.core.exceptions import AzureError
 6 | from dapr.clients import DaprClient
 7 | from utils.analyze_pii import analyze_pii_async
 8 | from utils.analyze_pii_chunked_ta import analyze_pii_async
 9 | import logging
10 | import asyncio
11 | from dotenv import load_dotenv
12 | import json
13 | import requests
14 | import os
15 | import httpx
16 | import uuid
17 | from typing import List
18 | 
19 | 
20 | # initial setup for logging / env variable loading
21 | log_level = os.getenv('LOG_LEVEL', 'INFO').upper()
22 | 
23 | logger = logging.getLogger(__name__)
24 | logging.basicConfig(level=getattr(logging, log_level),
25 |                     format='%(asctime)s - %(levelname)s - %(message)s',
26 |                     datefmt='%d-%m-%Y %H:%M:%S'
27 |                     )
28 | load_dotenv(".env", override=True)
29 | 
30 | app = Flask(__name__)
31 | app_port = os.getenv('APP_PORT', '7000')
32 | 
33 | 
34 | # Register Dapr pub/sub subscriptions
35 | @app.route('/dapr/subscribe', methods=['GET'])
36 | def subscribe():
37 |     subscriptions = [{
38 |         'pubsubname': 'openaipubsub',
39 |         'topic': 'requests-logging',
40 |         'route': 'requestslogging'
41 |     }]
42 |     print('Dapr pub/sub is subscribed to: ' + json.dumps(subscriptions))
43 |     return jsonify(subscriptions)
44 | 
45 | 
46 | # Dapr subscription in /dapr/subscribe sets up this route
47 | @app.route('/requestslogging', methods=['POST'])
48 | async def oairequests_subscriber():
49 |     event = from_http(request.headers, request.get_data())
50 |     data = json.loads(event.data)
51 | 
52 | 
53 |     # Implement log analytics logging logic here. DAPR has no native support for log analytics output binding
54 |    
55 |     return json.dumps(data), 200
56 | 
57 | app.run(port=app_port)
58 | 


--------------------------------------------------------------------------------
/aisentry/worker/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==23.2.1
 2 | aiohttp==3.9.4
 3 | aiosignal==1.3.1
 4 | annotated-types==0.6.0
 5 | anyio==4.3.0
 6 | asgiref==3.8.1
 7 | async==0.6.2
 8 | asyncio==3.4.3
 9 | attrs==23.2.0
10 | azure-ai-textanalytics==5.3.0
11 | azure-common==1.1.28
12 | azure-core==1.30.1
13 | azure-identity==1.16.1
14 | blinker==1.7.0
15 | certifi==2024.2.2
16 | cffi==1.16.0
17 | charset-normalizer==3.3.2
18 | click==8.1.7
19 | cloudevents==1.10.1
20 | cryptography==42.0.5
21 | dapr==1.13.0
22 | dapr-client==0.4.0b1
23 | deprecation==2.1.0
24 | distro==1.9.0
25 | fastapi==0.110.0
26 | fastapi-responses==0.2.1
27 | Flask==3.0.2
28 | frozenlist==1.4.1
29 | googleapis-common-protos==1.63.0
30 | grpcio==1.62.1
31 | grpcio-status==1.62.1
32 | h11==0.14.0
33 | h2==4.1.0
34 | hpack==4.0.0
35 | httpcore==1.0.4
36 | httpx==0.27.0
37 | Hypercorn==0.16.0
38 | hyperframe==6.0.1
39 | idna==3.7
40 | isodate==0.6.1
41 | itsdangerous==2.1.2
42 | Jinja2==3.1.4
43 | jwt==1.3.1
44 | MarkupSafe==2.1.5
45 | msal==1.28.0
46 | msal-extensions==1.1.0
47 | multidict==6.0.5
48 | openai==1.14.2
49 | nest_asyncio==1.6.0
50 | packaging==24.0
51 | portalocker==2.8.2
52 | priority==2.0.0
53 | protobuf==4.25.3
54 | pycparser==2.21
55 | pydantic==2.6.4
56 | pydantic_core==2.16.3
57 | PyJWT==2.8.0
58 | python-dateutil==2.9.0.post0
59 | python-dotenv==1.0.1
60 | Quart==0.19.4
61 | requests==2.32.2
62 | six==1.16.0
63 | sniffio==1.3.1
64 | starlette==0.36.3
65 | tqdm==4.66.3
66 | typing_extensions==4.10.0
67 | urllib3==2.2.2
68 | Werkzeug==3.0.3
69 | wsproto==1.2.0
70 | yarl==1.9.4
71 | tiktoken==0.7.0
72 |  
73 | 


--------------------------------------------------------------------------------
/aisentry/worker/usage_summary/__init__.py:
--------------------------------------------------------------------------------
1 | from .usage_logger import app
2 | 
3 | __all__ = [
4 |     "usage_logger"
5 | ]


--------------------------------------------------------------------------------
/aisentry/worker/usage_summary/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==23.2.1
 2 | aiohttp==3.9.4
 3 | aiosignal==1.3.1
 4 | annotated-types==0.6.0
 5 | anyio==4.3.0
 6 | asgiref==3.8.1
 7 | async==0.6.2
 8 | asyncio==3.4.3
 9 | attrs==23.2.0
10 | azure-ai-textanalytics==5.3.0
11 | azure-common==1.1.28
12 | azure-core==1.30.1
13 | azure-identity==1.16.1
14 | blinker==1.7.0
15 | certifi==2024.2.2
16 | cffi==1.16.0
17 | charset-normalizer==3.3.2
18 | click==8.1.7
19 | cloudevents==1.10.1
20 | cryptography==42.0.5
21 | dapr==1.13.0
22 | dapr-client==0.4.0b1
23 | deprecation==2.1.0
24 | distro==1.9.0
25 | fastapi==0.110.0
26 | fastapi-responses==0.2.1
27 | Flask==3.0.2
28 | frozenlist==1.4.1
29 | googleapis-common-protos==1.63.0
30 | grpcio==1.62.1
31 | grpcio-status==1.62.1
32 | h11==0.14.0
33 | h2==4.1.0
34 | hpack==4.0.0
35 | httpcore==1.0.4
36 | httpx==0.27.0
37 | Hypercorn==0.16.0
38 | hyperframe==6.0.1
39 | idna==3.7
40 | isodate==0.6.1
41 | itsdangerous==2.1.2
42 | Jinja2==3.1.4
43 | jwt==1.3.1
44 | MarkupSafe==2.1.5
45 | msal==1.28.0
46 | msal-extensions==1.1.0
47 | multidict==6.0.5
48 | openai==1.14.2
49 | packaging==24.0
50 | portalocker==2.8.2
51 | priority==2.0.0
52 | protobuf==4.25.3
53 | pycparser==2.21
54 | pydantic==2.6.4
55 | pydantic_core==2.16.3
56 | PyJWT==2.8.0
57 | python-dateutil==2.9.0.post0
58 | python-dotenv==1.0.1
59 | Quart==0.19.4
60 | requests==2.32.2
61 | six==1.16.0
62 | sniffio==1.3.1
63 | starlette==0.36.3
64 | tqdm==4.66.3
65 | typing_extensions==4.10.0
66 | urllib3==2.2.1
67 | Werkzeug==3.0.3
68 | wsproto==1.2.0
69 | yarl==1.9.4
70 | tiktoken==0.7.0
71 |  
72 | 


--------------------------------------------------------------------------------
/aisentry/worker/usage_summary/usage_logger.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, request, jsonify
 2 | from cloudevents.http import from_http
 3 | from requests.exceptions import HTTPError
 4 | from azure.identity import DefaultAzureCredential
 5 | from azure.core.exceptions import AzureError
 6 | from dapr.clients import DaprClient
 7 | from utils.analyze_pii import analyze_pii_async
 8 | from utils.ai_sentry_helpers import openAISummaryLogObject
 9 | from utils.analyze_pii_chunked_ta import analyze_pii_async
10 | import logging
11 | import datetime
12 | import asyncio
13 | from dotenv import load_dotenv
14 | import json
15 | import requests
16 | import os
17 | import httpx
18 | import uuid
19 | from typing import List
20 | 
21 | # Get log level from environment variable
22 | log_level = os.getenv('LOG-LEVEL', 'INFO').upper()
23 | logger = logging.getLogger(__name__)
24 | app = Flask(__name__)
25 | 
26 | # Set up the logger
27 | logging.basicConfig(level=getattr(logging, log_level),
28 |                     format='%(asctime)s - %(levelname)s - %(message)s',
29 |                     datefmt='%d-%m-%Y %H:%M:%S'
30 |                     )
31 | logger.info(f"Log level is set to {log_level}")
32 | 
33 | load_dotenv(".env", override=True)
34 | 
35 | app_port = os.getenv('APP_PORT', '7001')
36 | 
37 | 
38 | # Register Dapr pub/sub subscriptions
39 | @app.route('/dapr/subscribe', methods=['GET'])
40 | def subscribe():
41 |     subscriptions = [{
42 |         'pubsubname': 'openaipubsub',
43 |         'topic': 'requests-logging',
44 |         'route': 'requestslogging'
45 |     }]
46 | 
47 |     return jsonify(subscriptions)
48 | 
49 | 
50 | # Dapr subscription in /dapr/subscribe sets up this route
51 | @app.route('/requestslogging', methods=['POST'])
52 | async def oairequests_subscriber():
53 |     event = from_http(request.headers, request.get_data())
54 |     data = json.loads(event.data)
55 | 
56 |     logger.debug(f"Received a request to log data{data}")
57 |     
58 |     consumer_id=data['sentry_ai_headers'].get('ai-sentry-consumer', None)
59 |     model_name = data.get('model', None)
60 |     usage_info=data.get('usage', None)
61 |     openai_response_id=data.get('openai_response_id', None)
62 |     date = datetime.datetime.fromisoformat(data.get('date_time_utc', None))
63 |     month_date = date.strftime("%m_%Y")
64 |     
65 |     logger.info('Consumer Product Id: %s', consumer_id)
66 |     logger.info('ModelName: %s', model_name)
67 |     logger.info('Usage Info: %s', usage_info)   
68 |     log_id=f"{model_name}_{consumer_id}_{month_date}"
69 | 
70 |     summary_info = openAISummaryLogObject(id=openai_response_id, LogId=log_id, timestamp=data['date_time_utc'], model=model_name, product_id=consumer_id, usage=usage_info, month_year=month_date)
71 |     summary_info_dict = summary_info.to_dict()
72 | 
73 |     output_binding_data = json.dumps(summary_info_dict)
74 |        
75 |     # Send the output to CosmosDB using Dapr binding
76 |     with DaprClient() as d:
77 |         resp = d.invoke_binding(
78 |             binding_name='summary-log',
79 |             operation='create',
80 |             binding_metadata={
81 |                 'partitionKey': 'LogId',
82 |             },
83 |             data=output_binding_data
84 |         )
85 |    
86 |     return json.dumps(output_binding_data), 200
87 | 
88 | app.run(port=app_port)
89 | 


--------------------------------------------------------------------------------
/build/Dockerfile.facade:
--------------------------------------------------------------------------------
 1 | # pull official base image
 2 | FROM python:3.11.3-slim-buster as build
 3 | 
 4 | RUN apt-get update
 5 | RUN apt-get install -y --no-install-recommends gcc build-essential
 6 | 
 7 | WORKDIR /app
 8 | 
 9 | RUN python -m venv /app/venv
10 | ENV PATH="/app/venv/bin:$PATH"
11 | 
12 | COPY requirements.txt .
13 | RUN pip install --upgrade pip 
14 | RUN pip install -r requirements.txt
15 | 
16 | FROM python:3.11.3-slim-buster
17 | 
18 | RUN groupadd -g 999 python && \
19 |     useradd -r -u 999 -g python python
20 | 
21 | RUN mkdir /app && chown python:python /app
22 | WORKDIR /app
23 | 
24 | RUN mkdir /utils
25 | COPY --chown=python:python utils/ /app/utils/
26 | 
27 | COPY /asgi_facade.py /app/asgi_facade.py
28 | 
29 | COPY --chown=python:python --from=build /app/venv ./venv
30 | 
31 | ENV PATH="/app/venv/bin:$PATH"
32 | COPY --chown=python:python . .
33 | 
34 | USER 999
35 | 
36 | ENV PATH="/app/venv/bin:$PATH"
37 | 
38 | 
39 | CMD ["hypercorn", "--bind", "0.0.0.0:5000", "asgi_facade:app"]


--------------------------------------------------------------------------------
/build/Dockerfile.worker:
--------------------------------------------------------------------------------
 1 | # pull official base image
 2 | FROM python:3.11.3-slim-buster as build
 3 | 
 4 | RUN apt-get update
 5 | RUN apt-get install -y --no-install-recommends gcc build-essential bash
 6 | 
 7 | WORKDIR /app
 8 | 
 9 | RUN python -m venv /app/venv
10 | ENV PATH="/app/venv/bin:$PATH"
11 | 
12 | COPY worker/requirements.txt .
13 | RUN pip install --upgrade pip 
14 | RUN pip install -r requirements.txt
15 | 
16 | FROM python:3.11.3-slim-buster
17 | 
18 | RUN groupadd -g 999 python && \
19 |     useradd -r -u 999 -g python python
20 | 
21 | RUN mkdir /app && chown python:python /app
22 | WORKDIR /app
23 | 
24 | COPY --chown=python:python --from=build /app/venv ./venv
25 | 
26 | RUN mkdir /worker
27 | COPY --chown=python:python worker/ /app/worker/
28 | 
29 | RUN mkdir /utils
30 | COPY --chown=python:python utils/ /app/utils/
31 | 
32 | COPY /asgi_worker.py /app/asgi_worker.py
33 | WORKDIR /app
34 | 
35 | 
36 | # Copy the certificate from your host to your docker image
37 | COPY /utils/combined-cert.crt /usr/local/share/ca-certificates/combined_cert.crt
38 | 
39 | # Update the CA certificates
40 | RUN apt-get update && \
41 |     apt-get install -y ca-certificates && \
42 |     update-ca-certificates
43 | 
44 | USER 999
45 | 
46 | ENV PATH="/app/venv/bin:$PATH"
47 | 
48 | CMD ["hypercorn", "--bind", "0.0.0.0:7000", "asgi_worker:app"]


--------------------------------------------------------------------------------
/build/DockerfileSummary.worker:
--------------------------------------------------------------------------------
 1 | # pull official base image
 2 | FROM python:3.11.3-slim-buster as build
 3 | 
 4 | RUN apt-get update
 5 | RUN apt-get install -y --no-install-recommends gcc build-essential bash
 6 | 
 7 | WORKDIR /app
 8 | 
 9 | RUN python -m venv /app/venv
10 | ENV PATH="/app/venv/bin:$PATH"
11 | 
12 | COPY worker/usage_summary/requirements.txt .
13 | RUN pip install --upgrade pip 
14 | RUN pip install -r requirements.txt
15 | 
16 | FROM python:3.11.3-slim-buster
17 | 
18 | RUN groupadd -g 999 python && \
19 |     useradd -r -u 999 -g python python
20 | 
21 | RUN mkdir /app && chown python:python /app
22 | WORKDIR /app
23 | 
24 | COPY --chown=python:python --from=build /app/venv ./venv
25 | 
26 | RUN mkdir /worker
27 | COPY --chown=python:python worker/ /app/worker/
28 | 
29 | RUN mkdir /utils
30 | COPY --chown=python:python utils/ /app/utils/
31 | 
32 | COPY /asgi_summary_worker.py /app/asgi_summary_worker.py
33 | WORKDIR /app
34 | 
35 | 
36 | # Copy the certificate from your host to your docker image
37 | COPY /utils/combined-cert.crt /usr/local/share/ca-certificates/combined_cert.crt
38 | 
39 | # Update the CA certificates
40 | RUN apt-get update && \
41 |     apt-get install -y ca-certificates && \
42 |     update-ca-certificates
43 | 
44 | USER 999
45 | 
46 | ENV PATH="/app/venv/bin:$PATH"
47 | 
48 | CMD ["hypercorn", "--bind", "0.0.0.0:7001", "asgi_summary_worker:app"]
49 | #CMD ["sleep", "600"]


--------------------------------------------------------------------------------
/build/build-ai-sentry-containers.ps1:
--------------------------------------------------------------------------------
 1 | param(
 2 |     [string]$version = "0.1.1",
 3 |     [string]$containerRegistry = "anevjesacrdevtest.azurecr.io"
 4 | )
 5 | #Uncomment first line for very first time to set to the right acr context
 6 | # az acr login --name anevjesacrdevtest
 7 | Write-Host "Building AI-Sentry Facade:$version"
 8 | docker build --platform linux/amd64 -t ai-sentry-facadeapp:$version -f Dockerfile.facade ../aisentry/
 9 | docker tag ai-sentry-facadeapp:$version $containerRegistry/ai-sentry-facadeapp:$version
10 | docker push $containerRegistry/ai-sentry-facadeapp:$version
11 | 
12 | 
13 | Write-Host "Building summary logger worker:$version"
14 | docker build --platform linux/amd64 -t ai-sentry-summary-logger:$version -f DockerfileSummary.worker ../aisentry/
15 | docker tag ai-sentry-summary-logger:$version $containerRegistry/ai-sentry-summary-logger:$version
16 | docker push $containerRegistry/ai-sentry-summary-logger:$version
17 | 
18 | Write-Host "Building cosmosdb logger worker:$version"
19 | docker build --platform linux/amd64 -t ai-sentry-cosmosdblogger:$version -f Dockerfile.worker ../aisentry/
20 | docker tag ai-sentry-cosmosdblogger:$version $containerRegistry/ai-sentry-cosmosdblogger:$version
21 | docker push $containerRegistry/ai-sentry-cosmosdblogger:$version


--------------------------------------------------------------------------------
/content/documentation/ACADeployment.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/documentation/ACADeployment.md


--------------------------------------------------------------------------------
/content/documentation/AI-Sentry-config-settings.md:
--------------------------------------------------------------------------------
 1 | Each pool is represented as an object within the "pools" array. Each pool object has the following properties:
 2 | 
 3 | - name: The name of the pool.
 4 | - description: A description of the pool.
 5 | - endpoints: An array of endpoint objects that the pool can connect to.
 6 | Each endpoint object has the following properties:
 7 | 
 8 | - url: The URL of the OpenAI instance. You're expected to replace <your-open-ai-instance> with the actual instance name. You need to append
 9 | - api-key: The API key for accessing the OpenAI instance. You're expected to replace your-api-key with the actual API key. 
10 | 
11 | Please note: We also support JWT auth to backend openAI instances. If you simply set "api-Key": null within the property bags inside the facade layer config; you will leverage aks workload identity to connect to openAi backends - however you will need worklaod identity federated out with managed identity stood up with your AKS cluster - and ofcourse grant the RBAC to the managed identity across all the required openAI instances in the backend.


--------------------------------------------------------------------------------
/content/documentation/AKSDeployment.md:
--------------------------------------------------------------------------------
  1 | # AKS Deployment
  2 | 
  3 | 
  4 | 
  5 | ![AKS view](/content/images/AI-Sentry-AKS-view.drawio.png)
  6 | 
  7 | 
  8 | # Prerequisites
  9 | 
 10 | ## Setup infrastructure
 11 | 
 12 | 
 13 | ## Client Tooling:
 14 | 
 15 | Make sure to have current version of below tools on your client/ci/cd server which will be used to build/publish the docker images as well as execute the helm chart deployment to your kubernetes cluster.
 16 | 
 17 | - az cli 
 18 | - docker
 19 | - helm
 20 | - kubectl
 21 | 
 22 | 
 23 | ## Kuberenetes Cluster:
 24 | 
 25 | Please ensure that you have the following installed in your kuberenetes cluster:
 26 | 
 27 |  - DAPR extension if using AKS - otherwise pull in DAPR from open source.
 28 | 
 29 |  ### Install DAPR Extension on AKS
 30 | 
 31 | ```bash
 32 | az extension add --name k8s-extension
 33 | az extension update --name k8s-extension
 34 | 
 35 | az provider list --query "[?contains(namespace,'Microsoft.KubernetesConfiguration')]" -o table
 36 | az provider register --namespace Microsoft.KubernetesConfiguration
 37 | 
 38 | 
 39 | az k8s-extension create --cluster-type managedClusters --cluster-name anevjes-aks --resource-group aks --name dapr --extension-type Microsoft.Dapr --auto-upgrade-minor-version false
 40 | 
 41 | More info here: https://docs.dapr.io/operations/hosting/kubernetes/kubernetes-deploy/
 42 | ```
 43 | 
 44 | ### Install REDIS - (if using REDIS as your PUB/SUB DAPR component)
 45 | ```
 46 | az aks get-credentials --admin --name anevjes-aks --resource-group aks
 47 | helm repo add bitnami https://charts.bitnami.com/bitnami
 48 | helm install sentry-redis bitnami/redis-cluster
 49 | export REDIS_PASSWORD=$(kubectl get secret --namespace "default" sentry-redis-redis-cluster -o jsonpath="{.data.redis-password}" | base64 --decode)
 50 | 
 51 | kubectl create namespace ai-sentry
 52 | kubectl create secret generic redis --from-literal=redis-password=$REDIS_PASSWORD -n ai-sentry
 53 | 
 54 | ```
 55 | 
 56 | 
 57 | 
 58 | # Build and publish container images
 59 | 
 60 | Prepare the facade and worker images and push them to your image registry.
 61 | 
 62 | ``` bash
 63 | #Browse to build directory
 64 | cd build
 65 | ```
 66 | 
 67 | When you list source you should see the following powershell script
 68 | 
 69 | ```bash
 70 | ls
 71 | 
 72 | Dockerfile.facade               Dockerfile.worker               DockerfileSummary.worker        build-ai-sentry-containers.ps1
 73 | ```
 74 | 
 75 | Login into az cli in the terminal session
 76 | ```
 77 | az login
 78 | ```
 79 | 
 80 | Now Login to your target container registry - in my example this is an Azure Container Registry
 81 | 
 82 | ```bash
 83 | az acr login -n <your-ACR-name>
 84 | ```
 85 | 
 86 | Now run the build-ai-sentry-containers.ps1 script
 87 | 
 88 | ```bash
 89 | build-ai-sentry-containers.ps1 -version "0.1.1" -containerRegistry "your-ACR-name"
 90 | ```
 91 | 
 92 | You can confirm against your ACR that the images are there with correct tags.
 93 | 
 94 | ```bash
 95 | az acr repository show-tags --name <your-ACR-name> --repository <your-image-name> --output table
 96 | ```
 97 | 
 98 | 
 99 | # Deploy AI-Sentry
100 | 
101 | Now its time to kick off the Ai-Sentry deployment.
102 | 
103 | - Browse to *deploy* directory at the root of the project level
104 | 
105 | - Now browse into the *AKS* directory
106 | 
107 | 
108 | Currently I have not configured an end to end helm chart so for the inital deployment please update and use the ai-sentry-deployment.yaml file.
109 | 
110 | 
111 | If you crack open the ai-sentry-deployment.yaml file you will notice it is broken up into several sections:
112 | 
113 | 1. **Service**: Two services are defined, `facadeapp` and `facadeapp-headless`. The `facadeapp` service is a LoadBalancer type, which exposes the `facadeapp` to network traffic. The `facadeapp-headless` service is a headless service (since `clusterIP` is set to `None`), used to expose pods directly without a load balancer. Both services select pods with the label `app: facadeapp` and expose the `facadeapp-http` port of those pods on their own port 80.
114 | 
115 | 2. **Ingress**: This manages external access to the services in a cluster, typically HTTP. The Ingress `facadeapp-ingress` is routing traffic from the root path ("/") to the `facadeapp` service on port 80. The annotation `nginx.ingress.kubernetes.io/rewrite-target: /` indicates that the path in the request URL from the client is rewritten to "/" regardless of what the original path is.
116 | 
117 | 3. **StatefulSet**: Three StatefulSets are defined for `facadeapp`, `cosmosdbloggerw`, and `cosmosdb-summary-loggerw`. StatefulSets are used for deploying, scaling, and managing stateful applications. Each StatefulSet has its own number of replicas, selectors, and container specifications. The `facadeapp` StatefulSet has Dapr annotations enabled, which means it's using DAPR for distributed application runtime.
118 | 
119 | 4. **Component**: Three DAPR components are defined for `openaipubsub`, `cosmosdb-log`, and `summary-log`. These components are used for pub-sub messaging with Redis, and bindings with Azure Cosmos DB for logging and summary logging. The `openaipubsub` component is a Redis-based pub-sub component. The `cosmosdb-log` and `summary-log` components are bindings to Azure Cosmos DB for logging purposes.
120 | 
121 | 5. **Images**: The images for the containers in the StatefulSets are pulled from a private Azure Container Registry (ACR) as indicated by the `youracr.azurecr.io` prefix in the image names.
122 | 
123 | 6. **Environment Variables**: Various environment variables are set in the containers, including AI-SENTRY-ENDPOINT-CONFIG, LOG-LEVEL, AI-SENTRY-LANGUAGE-ENDPOINT, and AI-SENTRY-LANGUAGE-KEY. See [here](/content/documentation/ai-sentry-config.json) for example AI-SENTRY-ENDPOINT-CONFIG. You can leverage the following [script](scripts/create-escaped-json.ps1) to help you generate JSON escaped string of this JSON.
124 | 
125 | 7. **Probes**: Liveness probes are defined for the `facadeapp` container to check the health of the application. If the application fails the liveness check, Kubernetes will restart the container.
126 | 
127 | 8. **Commented Code**: There are commented sections for an Event Hub component for pub sub Dapr annotations. These can be uncommented and filled in as needed for additional functionality.
128 | 
129 | 
130 | ## Create ai-sentry namespace
131 | 
132 | Create the ai-sentry namespace by running the following command:
133 | 
134 | ```kubectl
135 | kuebctl apply -f namespace.yaml
136 | ```
137 | 
138 | ## Ai-Sentry deployment into namespace
139 | 
140 | ```bash
141 | kubectl apply -f ai-sentry-deployment.yaml -n ai-sentry
142 | 
143 | service/facadeapp created
144 | service/facadeapp-headless created
145 | ingress.networking.k8s.io/facadeapp-ingress created
146 | statefulset.apps/facadeapp created
147 | statefulset.apps/cosmosdbloggerw created
148 | statefulset.apps/cosmosdb-summary-loggerw created
149 | component.dapr.io/openaipubsub created
150 | component.dapr.io/cosmosdb-log created
151 | component.dapr.io/summary-log created
152 | ```
153 | 
154 | Now lets confirm the pods exist - it should look like the following:
155 | 
156 | ```bash
157 | kubectl get pods -n ai-sentry
158 | 
159 | AME                         READY   STATUS    RESTARTS   AGEaks> kubectl get pods -n ai-sentry
160 | cosmosdb-summary-loggerw-0   2/2     Running   0          82s
161 | cosmosdb-summary-loggerw-1   2/2     Running   0          75s
162 | cosmosdbloggerw-0            2/2     Running   0          82s
163 | cosmosdbloggerw-1            2/2     Running   0          75s
164 | facadeapp-0                  2/2     Running   0          82s
165 | ```
166 | 
167 | For ease of testing and getting the intial version up and running I've used a LoadBalancer service and an external IP. You will need to change this service type to meet your Cyber/Enterprise security posture -i.e. it could become ingress on a private IP address.
168 | 
169 | 
170 | 


--------------------------------------------------------------------------------
/content/documentation/AzureInfrastrcuture.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/documentation/AzureInfrastrcuture.md


--------------------------------------------------------------------------------
/content/documentation/ComsosDB-LoggingSchema.md:
--------------------------------------------------------------------------------
  1 | # Logging Schema
  2 | 
  3 | Below you will find the output schema for CosmosDB logging. This is usefull for long term retention, legal requirements as well as model re-evaluation input/output to test how well other models are performing before cutting over.
  4 | 
  5 | | Property | Description |
  6 | | --- | --- |
  7 | | `date_time_utc` | The date and time in UTC when the event occurred. |
  8 | | `is_Streaming` | A boolean indicating whether the event is streaming or not. |
  9 | | `headers` | An object containing various headers related to the event. |
 10 | | `params` | An object containing parameters related to the event. |
 11 | | `request_body` | An object containing the request body of the event. |
 12 | | `response_body` | An object containing the response body of the event. For streaming responses we truncate the repsonse body purely down to the response text|
 13 | | `sentry_ai_headers` | An object containing headers related to Sentry AI. |
 14 | | `usage` | An object containing usage statistics.3 properties are included here: completion_tokens, prompt_tokens, and total_tokens.|
 15 | | `model` | The model used for the event. |
 16 | | `openai_response_id` | The ID of the OpenAI response. |
 17 | | `LogId` | The log ID of the event. |
 18 | | `id` | The ID of the event. |
 19 | | `_rid` | The resource ID of the event. |
 20 | | `_self` | The self-reference of the event. |
 21 | | `_etag` | The ETag of the event. |
 22 | | `_attachments` | The attachments of the event. |
 23 | | `_ts` | The timestamp of the event. |
 24 | 
 25 | JSON definition example:
 26 | 
 27 | ```json
 28 | {
 29 |         "date_time_utc": "2024-05-23T08:17:03.106257+00:00",
 30 |         "is_Streaming": false,
 31 |         "headers": {
 32 |             "Remote-Addr": "10.224.0.5",
 33 |             "Api-Key": "",
 34 |             "Arianwashere": "dfsdbgsdgsdgsdgs",
 35 |             "Ai-Sentry-Consumer": "Product1",
 36 |             "Ai-Sentry-Log-Level": "COMPLETE",
 37 |             "Ai-Sentry-Backend-Pool": "pool1",
 38 |             "Content-Type": "application/json"
 39 |         },
 40 |         "params": {
 41 |             "api-version": "2023-07-01-preview"
 42 |         },
 43 |         "request_body": {
 44 |             "messages": [
 45 |                 {
 46 |                     "role": "system",
 47 |                     "content": "you are a Financial analyst employee studying tech companies"
 48 |                 },
 49 |                 {
 50 |                     "role": "user",
 51 |                     "content": "What are microsoft's most profitable products?"
 52 |                 }
 53 |             ],
 54 |             "stream": false,
 55 |             "max_tokens": 800,
 56 |             "temperature": 0.7,
 57 |             "frequency_penalty": 0,
 58 |             "presence_penalty": 0,
 59 |             "top_p": 0.95,
 60 |             "stop": null
 61 |         },
 62 |         "response_body": {
 63 |             "choices": [
 64 |                 {
 65 |                     "content_filter_results": {
 66 |                         "hate": {
 67 |                             "filtered": false,
 68 |                             "severity": "safe"
 69 |                         },
 70 |                         "self_harm": {
 71 |                             "filtered": false,
 72 |                             "severity": "safe"
 73 |                         },
 74 |                         "sexual": {
 75 |                             "filtered": false,
 76 |                             "severity": "safe"
 77 |                         },
 78 |                         "violence": {
 79 |                             "filtered": false,
 80 |                             "severity": "safe"
 81 |                         }
 82 |                     },
 83 |                     "finish_reason": "stop",
 84 |                     "index": 0,
 85 |                     "message": {
 86 |                         "content": "Microsoft's most profitable products include:\n\n1. Microsoft Office Suite: This includes popular software like Microsoft Word, Excel, PowerPoint, and Outlook, which are widely used by individuals and businesses for productivity and communication purposes.\n\n2. Windows Operating System: Microsoft's flagship operating system, Windows, is used by millions of individuals and businesses worldwide. It generates significant revenue through licensing fees, upgrades, and sales of pre-installed Windows on new devices.\n\n3. Azure Cloud Services: Microsoft's cloud computing platform, Azure, is one of the fastest-growing segments for the company. It provides infrastructure, platform, and software as a service (IaaS, PaaS, and SaaS) solutions, generating substantial revenue from businesses' cloud adoption.\n\n4. Xbox Gaming Console and Services: Microsoft's gaming division, including Xbox consoles and related services like Xbox Live and Xbox Game Pass, contributes significantly to the company's profits. Xbox hardware sales, game sales, and subscriptions generate substantial revenue.\n\n5. Surface Devices: Microsoft's range of Surface devices, including laptops, tablets, and accessories, has gained popularity among consumers and professionals. These products generate revenue through sales and contribute to Microsoft's profitability.\n\nIt is important to note that the profitability of these products may vary over time, and Microsoft's overall financial performance is influenced by various factors, including market conditions and competition.",
 87 |                         "role": "assistant"
 88 |                     }
 89 |                 }
 90 |             ],
 91 |             "created": 1716452216,
 92 |             "id": "chatcmpl-9Rxv6geWpNxX0w00S9pQmIjxWK43p",
 93 |             "model": "gpt-35-turbo",
 94 |             "object": "chat.completion",
 95 |             "prompt_filter_results": [
 96 |                 {
 97 |                     "prompt_index": 0,
 98 |                     "content_filter_results": {
 99 |                         "hate": {
100 |                             "filtered": false,
101 |                             "severity": "safe"
102 |                         },
103 |                         "self_harm": {
104 |                             "filtered": false,
105 |                             "severity": "safe"
106 |                         },
107 |                         "sexual": {
108 |                             "filtered": false,
109 |                             "severity": "safe"
110 |                         },
111 |                         "violence": {
112 |                             "filtered": false,
113 |                             "severity": "safe"
114 |                         }
115 |                     }
116 |                 }
117 |             ],
118 |             "system_fingerprint": null,
119 |             "usage": {
120 |                 "completion_tokens": 268,
121 |                 "prompt_tokens": 28,
122 |                 "total_tokens": 296
123 |             }
124 |         },
125 |         "sentry_ai_headers": {
126 |             "Ai-Sentry-Consumer": "Product1",
127 |             "Ai-Sentry-Log-Level": "COMPLETE"
128 |         },
129 |         "usage": {
130 |             "completion_tokens": 268,
131 |             "prompt_tokens": 28,
132 |             "total_tokens": 296
133 |         },
134 |         "model": "gpt-35-turbo",
135 |         "openai_response_id": "chatcmpl-9Rxv6geWpNxX0w00S9pQmIjxWK43p",
136 |         "LogId": "gpt-35-turbo_Product1_05_2024",
137 |         "id": "chatcmpl-9Rxv6geWpNxX0w00S9pQmIjxWK43p",
138 |         "_rid": "vUpsAOlVEEuBAQAAAAAAAA==",
139 |         "_self": "dbs/vUpsAA==/colls/vUpsAOlVEEs=/docs/vUpsAOlVEEuBAQAAAAAAAA==/",
140 |         "_etag": "\"0200bb04-0000-1a00-0000-664efb7f0000\"",
141 |         "_attachments": "attachments/",
142 |         "_ts": 1716452223
143 |     }
144 | ```


--------------------------------------------------------------------------------
/content/documentation/CosmosDBSetup.md:
--------------------------------------------------------------------------------
1 | # CosmosDB Setup
2 | 


--------------------------------------------------------------------------------
/content/documentation/LocalDebugging.md:
--------------------------------------------------------------------------------
1 | ```
2 | cd aisentry
3 | python3 -m venv env
4 | 
5 | source env/bin/activate
6 | 
7 | pip install -r requirements.txt
8 | ```


--------------------------------------------------------------------------------
/content/documentation/LocalDeployment.md:
--------------------------------------------------------------------------------
 1 | ## Running locally
 2 | ```
 3 | # Need to move this logic into dockerfile scripts for workers using cognitive services
 4 | openssl x509 -in "/Users/ariannevjestic/Downloads/_.cognitive.microsoft.com.cer" -out "/Users/ariannevjestic/Downloads/cogcerts/_.cognitive.microsoft.com.pem" -outform PEM
 5 | openssl x509 -in "/Users/ariannevjestic/Downloads/2.cer" -out "/Users/ariannevjestic/Downloads/cogcerts/DigiCert_SHA2 Secure_ServerCA.pem" -outform PEM
 6 | openssl x509 -in "/Users/ariannevjestic/Downloads/1.cer" -out "/Users/ariannevjestic/Downloads/1.pem" -outform PEM
 7 | cat _.cognitive.microsoft.com.pem middle.pem cDigiCertGlobalRootCA.pem > combined_cert.pem
 8 | openssl x509 -outform der -in combined_cert -out combined-cert.crt
 9 | 
10 | REQUESTS_CA_BUNDLE=/Users/ariannevjestic/Downloads/cogservices.pem
11 | 
12 | #check for python env variable
13 | 
14 | cd ./aoaifacadeapp
15 | dapr run -f dapr.yaml
16 | ```
17 | 
18 | 
19 | ## Running individual apps:
20 | 
21 | ### FacadeApp API:
22 | ```
23 | dapr run --app-id facade-entry --app-port 6124 python3 aoaifacadeapp.py --resources-path components --log-level info
24 | ```
25 | 
26 | ### CosmosDB Worker:
27 | 
28 | ```
29 | dapr run --app-id oai-cosmosdb-logging-processor --app-port 3001 python3 ./workers/CosmosDBLogging/app.py --resources-path components --log-level info
30 | ```
31 | 


--------------------------------------------------------------------------------
/content/documentation/SummaryLog-schema.md:
--------------------------------------------------------------------------------
 1 | # Summary Logging Schema
 2 | 
 3 | 
 4 | | Key            | Description                                         |
 5 | | -------------- | --------------------------------------------------- |
 6 | | `id`           | Unique identifier for the chat completion           |
 7 | | `LogId`        | Identifier for the log entry                        |
 8 | | `model`        | Model used for the AI operation                     |
 9 | | `timestamp`    | Time when the log was created                       |
10 | | `ProductId`    | Identifier for the product                          |
11 | | `promptTokens` | Number of tokens used in the prompt                 |
12 | | `responseTokens` | Number of tokens used in the response            |
13 | | `totalTokens`  | Total number of tokens used (prompt + response)     |
14 | | `month_year`   | Month and year when the log was created             |
15 | | `_rid`         | Resource ID in Azure Cosmos DB                      |
16 | | `_self`        | Self-link in Azure Cosmos DB                        |
17 | | `_etag`        | Entity tag used for optimistic concurrency control  |
18 | | `_attachments` | Link to attachments associated with the document    |
19 | | `_ts`          | Timestamp of the last update operation on the document |
20 | 
21 | 
22 | ## Example JSON
23 | 
24 | ```json
25 | {
26 |     "id": "chatcmpl-9XKn2H3CSlg91b6b3OuQBj6PdmVje",
27 |     "LogId": "gpt-35-turbo_Product13_06_2024",
28 |     "model": "gpt-35-turbo",
29 |     "timestamp": "2024-06-07T03:42:59.400583+00:00",
30 |     "ProductId": "Product13",
31 |     "promptTokens": 42,
32 |     "responseTokens": 645,
33 |     "totalTokens": 687,
34 |     "month_year": "06_2024",
35 |     "_rid": "SxcAAOBkD+gIAAAAAAAAAA==",
36 |     "_self": "dbs/SxcAAA==/colls/SxcAAOBkD+g=/docs/SxcAAOBkD+gIAAAAAAAAAA==/",
37 |     "_etag": "\"a400f2ed-0000-1a00-0000-666281c30000\"",
38 |     "_attachments": "attachments/",
39 |     "_ts": 1717731779
40 | }
41 | ```


--------------------------------------------------------------------------------
/content/documentation/Workload-identity-config.md:
--------------------------------------------------------------------------------
 1 | # AKS Workload Identity setup
 2 | 
 3 | ## Enable Workload identity against existing AKS cluster
 4 | ```powershell
 5 | az aks update --resource-group "aks-devtest-rg" --name "anevjes-aks-dev" --enable-oidc-issuer --enable-workload-identity
 6 | ```
 7 | 
 8 | ## MI creation
 9 | ```powershell
10 | az account set --subscription "subscriptionID"
11 | ```
12 | 
13 | ```powershell
14 | az identity create --name "ai-sentry-be-mi" --resource-group "ai-sentry" --location "australiaeast" --subscription "879bb272-07db-4784-816a-a9fac90f49ae"
15 | ```
16 | 
17 | ```bash
18 | export USER_ASSIGNED_CLIENT_ID="$(az identity show --resource-group "ai-sentry" --name "ai-sentry-be-mi" --query 'clientId' -otsv)"
19 | ```
20 | ## Grant MI access to openAI resources
21 | 
22 | ![alt text](..\images\openai_rbac.png)
23 | 
24 | and assign your newly built managed identity to above role:
25 | 
26 | ![alt text](..\images\openai_rbac2.png)
27 | 
28 | 
29 | ## Env variables for service account in AKS
30 | 
31 | ```bash
32 | export SERVICE_ACCOUNT_NAME="default"
33 | export SERVICE_ACCOUNT_NAMESPACE="ai-sentry"
34 | ```
35 | 
36 | ## OIDC Issuer url
37 | 
38 | ```bash
39 | export AKS_OIDC_ISSUER="$(az aks show --name anevjes-aks-dev --resource-group aks-devtest-rg --query "oidcIssuerProfile.issuerUrl" -o tsv)"
40 | ```
41 | 
42 | ## Create AKS Service Account
43 | 
44 | ```bash
45 | cat <<EOF | kubectl apply -f -
46 | apiVersion: v1
47 | kind: ServiceAccount
48 | metadata:
49 |   annotations:
50 |     azure.workload.identity/client-id: ${USER_ASSIGNED_CLIENT_ID}
51 |   name: ${SERVICE_ACCOUNT_NAME}
52 |   namespace: ${SERVICE_ACCOUNT_NAMESPACE}
53 | EOF
54 | ```
55 | ## Establish federated identity credential trust
56 | 
57 | ```powershell
58 | az identity federated-credential create --name ai-sentry-be-fed --identity-name ai-sentry-be-mi --resource-group ai-sentry --issuer ${AKS_OIDC_ISSUER} --subject system:serviceaccount:${SERVICE_ACCOUNT_NAMESPACE}:${SERVICE_ACCOUNT_NAME} --audience api://AzureADTokenExchange
59 | ```
60 | 


--------------------------------------------------------------------------------
/content/documentation/ai-sentry-config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "pools": [
 3 |         {
 4 |             "name": "pool1",
 5 |             "description": "pool1 description",
 6 |             "endpoints": [
 7 |                 {
 8 |                     "url": "https://<your-open-ai-instance>.openai.azure.com/openai",
 9 |                     "api-key": null, //If you simply set "api-Key": null the facade layer will leverage aks workload identity to connect to openAi backends.,
10 |                     "x-ratelimit-remaining-requests": 1000,
11 |                     "x-ratelimit-remaining-tokens": 1000
12 |                 }
13 |             ]
14 |         },
15 |         {
16 |             "name": "pool2",
17 |             "description": "pool2 description",
18 |             "endpoints": [
19 |                 {
20 |                     "url": "https://<your-open-ai-instance>.openai.azure.com/openai",
21 |                     "api-key": "your-api-key", //If you simply set "api-Key": null the facade layer will leverage aks workload identity to connect to openAi backends.
22 |                     "x-ratelimit-remaining-requests": 1000,
23 |                     "x-ratelimit-remaining-tokens": 1000
24 |                 },
25 |                 {
26 |                     "url": "https://<your-open-ai-instance>.openai.azure.com/openai",
27 |                     "api-key": "your-api-key",//If you simply set "api-Key": null the facade layer will leverage aks workload identity to connect to openAi backends.
28 |                     "x-ratelimit-remaining-requests": 1000,
29 |                     "x-ratelimit-remaining-tokens": 1000
30 |                 }
31 |             ]
32 |         }
33 |     ]
34 | }


--------------------------------------------------------------------------------
/content/images/AI-Sentry-AKS-view.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/AI-Sentry-AKS-view.drawio.png


--------------------------------------------------------------------------------
/content/images/AI-Sentry-HighLevel.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/AI-Sentry-HighLevel.drawio.png


--------------------------------------------------------------------------------
/content/images/AI-Sentry-features.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/AI-Sentry-features.png


--------------------------------------------------------------------------------
/content/images/cosmosdb_request_logs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/cosmosdb_request_logs.png


--------------------------------------------------------------------------------
/content/images/cosmosdb_summary_logs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/cosmosdb_summary_logs.png


--------------------------------------------------------------------------------
/content/images/openai_rbac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/openai_rbac.png


--------------------------------------------------------------------------------
/content/images/openai_rbac2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/content/images/openai_rbac2.png


--------------------------------------------------------------------------------
/deploy/aks/ai-sentry-deployment.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: v1
  2 | kind: Service
  3 | metadata:
  4 |   name: facadeapp
  5 |   labels:
  6 |     app: facadeapp
  7 | spec:
  8 |   ports:
  9 |     - name: http
 10 |       port: 80
 11 |       targetPort: facadeapp-http
 12 |   type: LoadBalancer
 13 |   selector:
 14 |     app: facadeapp
 15 | 
 16 | ---
 17 | apiVersion: v1
 18 | kind: Service
 19 | metadata:
 20 |   name: facadeapp-headless
 21 |   labels:
 22 |     app: facadeapp-headless
 23 | spec:
 24 |   clusterIP: None
 25 |   selector:
 26 |     app: facadeapp
 27 |   ports:
 28 |     - name: http
 29 |       port: 80
 30 |       targetPort: facadeapp-http
 31 | ---
 32 | # Ingress:
 33 | 
 34 | apiVersion: networking.k8s.io/v1
 35 | kind: Ingress
 36 | metadata:
 37 |   name: facadeapp-ingress
 38 |   annotations:
 39 |     nginx.ingress.kubernetes.io/rewrite-target: /
 40 | spec:
 41 |   rules:
 42 |   - http:
 43 |       paths:
 44 |       - pathType: Prefix
 45 |         path: "/"
 46 |         backend:
 47 |           service:
 48 |             name: facadeapp
 49 |             port:
 50 |               number: 80
 51 | ---
 52 | 
 53 | kind: StatefulSet
 54 | apiVersion: apps/v1
 55 | metadata:
 56 |   name: facadeapp
 57 | spec:
 58 |   serviceName: facadeapp
 59 |   replicas: 1
 60 |   selector:
 61 |     matchLabels:
 62 |       app: facadeapp
 63 |   template:
 64 |     metadata:
 65 |       labels:
 66 |         app: facadeapp
 67 |         azure.workload.identity/use: "true"
 68 |       annotations:
 69 |         dapr.io/enabled: "true"
 70 |         dapr.io/app-id: "facadeapp"
 71 |         dapr.io/app-port: "5000"
 72 |         dapr.io/log-as-json: "true"
 73 |         dapr.io/app-protocol: "http"
 74 |         dapr.io/enable-app-health-check: "true"
 75 |         dapr.io/app-health-check-path: "/dapr/health"
 76 |         dapr.io/app-health-probe-interval: "3"
 77 |         dapr.io/app-health-probe-timeout: "200"
 78 |         dapr.io/app-health-threshold: "2"
 79 |         azure.workload.identity/inject-proxy-sidecar: "true"
 80 |         azure.workload.identity/proxy-sidecar-port: "8000"
 81 | 
 82 |     spec:
 83 |       containers:
 84 |         - name: sentryai-facade
 85 |           image: youracr.azurecr.io/ai-sentry-facadeapp:0.1.1
 86 |           imagePullPolicy: Always
 87 |           ports:
 88 |             - name: facadeapp-http
 89 |               containerPort: 5000
 90 |               protocol: TCP
 91 |           livenessProbe:
 92 |             httpGet:
 93 |               path: /liveness
 94 |               port: 5000
 95 |             initialDelaySeconds: 15
 96 |             periodSeconds: 5
 97 |             failureThreshold: 3
 98 |           env:
 99 |           - name: "AI-SENTRY-ENDPOINT-CONFIG"
100 |             value: "{\"pools\":[{\"name\":\"pool1\",\"description\":\"pool1 description\",\"endpoints\":[{\"url\":\"https://youropenaiendpoint.openai.azure.com/openai\",\"api-key\":\"yourkey or simply null\"}]},{\"name\":\"pool2\",\"description\":\"pool2 description\",\"endpoints\":[{\"url\":\"https://youropenai.openai.azure.com/openai\",\"api-key or simply null\":\"yourkey or simply null\"},{\"url\":\"https://youropenai.openai.azure.com/openai\",\"api-key or simply null\":\"yourkey\"}]}]}"
101 |           - name: "LOG_LEVEL"
102 |             value: "INFO"
103 |           ### Required by Workload Identity. Take the output of the clientID for the managed idenitty built and federated with AKS.
104 |           - name: "AZURE_CLIENT_ID"
105 |             value: "your managed identity clientid if not using api keys to hit the backend openai apis."
106 | ---
107 | #CosmosDB Logger
108 | kind: StatefulSet
109 | apiVersion: apps/v1
110 | metadata:
111 |   name: cosmosdbloggerw
112 | spec:
113 |   serviceName: cosmosdblogger
114 |   replicas: 2
115 |   selector:
116 |     matchLabels:
117 |       app: cosmosdbloggerw
118 |   template:
119 |     metadata:
120 |       labels:
121 |         app: cosmosdbloggerw
122 |       annotations:
123 |         dapr.io/enabled: "true"
124 |         dapr.io/app-id: "cosmosdbloggerw"
125 |         dapr.io/app-port: "7000"
126 |         dapr.io/log-as-json: "true"
127 |         #dapr.io/app-protocol: "http"
128 |         #dapr.io/enable-app-health-check: "true"
129 |         #dapr.io/app-health-check-path: "/dapr/health"
130 |         #dapr.io/app-health-probe-interval: "3"
131 |         #dapr.io/app-health-probe-timeout: "200"
132 |         #dapr.io/app-health-threshold: "2"
133 | 
134 |     spec:
135 |       containers:
136 |         - name: sentry-ai-cosmosdblogger
137 |           image: youracr.azurecr.io/ai-sentry-cosmosdblogger:0.1.1
138 |           imagePullPolicy: Always
139 |           ports:
140 |             - name: cosmoslogger
141 |               containerPort: 7000
142 |               protocol: TCP
143 |           env:
144 |             - name: "AI-SENTRY-LANGUAGE-ENDPOINT"
145 |               value: "https://yourlanguageendpoint.cognitiveservices.azure.com/"
146 |             - name: "AI-SENTRY-LANGUAGE-KEY"
147 |               value: "your-key"
148 |             - name: "LOG_LEVEL"
149 |               value: "DEBUG"
150 |             - name: "PII_STRIPPING_SERVICE"
151 |               value: "OPENAI"
152 |             - name: PII_STRIPPING_OPENAI_ENDPOINT
153 |               value: "https://ptuopendeployment.openai.azure.com/"
154 |             - name: "PII_STRIPPING_OPENAI_API_KEY"
155 |               value: "yourapikeytoopenai / apim subscription key"
156 | ---
157 | #Summary Logger Logger
158 | kind: StatefulSet
159 | apiVersion: apps/v1
160 | metadata:
161 |   name: cosmosdb-summary-loggerw
162 | spec:
163 |   serviceName: cosmosdb-summary-loggerw
164 |   replicas: 2
165 |   selector:
166 |     matchLabels:
167 |       app: cosmosdb-summary-loggerw
168 |   template:
169 |     metadata:
170 |       labels:
171 |         app: cosmosdb-summary-loggerw
172 |       annotations:
173 |         dapr.io/enabled: "true"
174 |         dapr.io/app-id: "cosmosdb-summary-loggerw"
175 |         dapr.io/app-port: "7001"
176 |         dapr.io/log-as-json: "true"
177 |         #dapr.io/app-protocol: "http"
178 |         #dapr.io/enable-app-health-check: "true"
179 |         #dapr.io/app-health-check-path: "/dapr/health"
180 |         #dapr.io/app-health-probe-interval: "3"
181 |         #dapr.io/app-health-probe-timeout: "200"
182 |         #dapr.io/app-health-threshold: "2"
183 | 
184 |     spec:
185 |       containers:
186 |         - name: sentry-ai-summary-logger
187 |           image: youracr.azurecr.io/ai-sentry-summary-logger:0.1.1
188 |           imagePullPolicy: Always
189 |           ports:
190 |             - name: summarylogger
191 |               containerPort: 7001
192 |               protocol: TCP
193 |           env:
194 |             - name: "LOG_LEVEL"
195 |               value: "INFO"
196 | ---
197 | # Components - REDIS based pub-sub
198 | apiVersion: dapr.io/v1alpha1
199 | kind: Component
200 | metadata:
201 |   name: openaipubsub
202 | spec:
203 |   type: pubsub.redis
204 |   version: v1
205 |   metadata:
206 |   # These settings will work out of the box if you use `helm install
207 |   # bitnami/redis`.  If you have your own setup, replace
208 |   # `redis-master:6379` with your own Redis master address, and the
209 |   # Redis password with your own Secret's name. For more information,
210 |   # see https://docs.dapr.io/operations/components/component-secrets .
211 |   - name: redisHost
212 |     value: <your-redis-ip>:6379
213 |   - name: redisPassword
214 |     value: your-redis-password
215 | #     secretKeyRef:
216 | #       name: redis
217 | #       key: redis-password
218 | # auth:
219 | #   secretStore: kubernetes      
220 | ---
221 | apiVersion: dapr.io/v1alpha1
222 | kind: Component
223 | metadata:
224 |   name: cosmosdb-log
225 | spec:
226 |   type: bindings.azure.cosmosdb
227 |   version: v1
228 |   initTimeout: 5m
229 |   metadata:
230 |   - name: url
231 |     value: https://<your-cosmos-endpoint>.documents.azure.com:443/
232 |   - name: masterKey
233 |     value: your-key
234 |   - name: database
235 |     value: request-logs
236 |   - name: collection
237 |     value: logs
238 |   - name: partitionKey
239 |     value: "LogId"
240 | ---
241 | apiVersion: dapr.io/v1alpha1
242 | kind: Component
243 | metadata:
244 |   name: summary-log
245 | spec:
246 |   type: bindings.azure.cosmosdb
247 |   version: v1
248 |   initTimeout: 5m
249 |   metadata:
250 |   - name: url
251 |     value: https://<your-cosmos-endpoint>.documents.azure.com:443/
252 |   - name: masterKey
253 |     value: your-key
254 |   - name: database
255 |     value: summary-logs
256 |   - name: collection
257 |     value: logs
258 |   - name: partitionKey
259 |     value: "LogId"
260 | ---
261 | # IF you want to use Event hub as pub sub layer uncomment and fill in below
262 | 
263 | # apiVersion: dapr.io/v1alpha1
264 | # kind: Component
265 | # metadata:
266 | #   name: openaipubsub
267 | # spec:
268 | #   type: pubsub.azure.eventhubs
269 | #   version: v1
270 | #   metadata:
271 | #     # Either connectionString or eventHubNamespace is required
272 | #     # Use connectionString when *not* using Microsoft Entra ID
273 | #     - name: connectionString
274 | #       value: "<your-eh-connection-string>"
275 | #     # Use eventHubNamespace when using Microsoft Entra ID
276 | #     # - name: eventHubNamespace
277 | #     #   value: "namespace"
278 | #     - name: consumerID # Optional. If not supplied, the runtime will create one.
279 | #       value: "ai-sentry-logger"
280 | #     - name: enableEntityManagement
281 | #       value: "false"
282 | #     # The following four properties are needed only if enableEntityManagement is set to true
283 | #     # - name: resourceGroupName
284 | #     #   value: "test-rg"
285 | #     # - name: subscriptionID
286 | #     #   value: "value of Azure subscription ID"
287 | #     # - name: partitionCount
288 | #     #   value: "1"
289 | #     # - name: messageRetentionInDays
290 | #     #   value: "3"
291 | #     # Checkpoint store attributes
292 | #     - name: storageAccountName
293 | #       value: "<your-storage-account-name>"
294 | #     - name: storageAccountKey
295 | #       value: "<your-storage-account-key>"
296 | #     - name: storageContainerName
297 | #       value: "ai-sen-checkpoints"
298 | #     # Alternative to passing storageAccountKey
299 | #     # - name: storageConnectionString
300 | #     #   value: "DefaultEndpointsProtocol=https;AccountName=<account>;AccountKey=<account-key>"
301 | 
302 | 
303 | 


--------------------------------------------------------------------------------
/deploy/aks/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: ai-sentry
5 |   labels:
6 |     name: ai-sentry


--------------------------------------------------------------------------------
/deploy/local/components/cosmosdb.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: dapr.io/v1alpha1
 2 | kind: Component
 3 | metadata:
 4 |   name: cosmosdb-log
 5 | spec:
 6 |   type: bindings.azure.cosmosdb
 7 |   version: v1
 8 |   initTimeout: 5m
 9 |   metadata:
10 |   - name: url
11 |     value: https://<your-cosmos-db>.documents.azure.com:443/
12 |   - name: masterKey
13 |     value: 
14 |   - name: database
15 |     value: request-logs
16 |   - name: collection
17 |     value: logs
18 |   - name: partitionKey
19 |     value: "LogId"
20 | ---
21 | apiVersion: dapr.io/v1alpha1
22 | kind: Component
23 | metadata:
24 |   name: summary-log
25 | spec:
26 |   type: bindings.azure.cosmosdb
27 |   version: v1
28 |   initTimeout: 5m
29 |   metadata:
30 |   - name: url
31 |     value: https://<your-cosmos-db>.documents.azure.com:443/
32 |   - name: masterKey
33 |     value: 
34 |   - name: database
35 |     value: summary-logs
36 |   - name: collection
37 |     value: logs
38 |   - name: partitionKey
39 |     value: "openai_response_id"
40 | # ---
41 | 


--------------------------------------------------------------------------------
/deploy/local/components/pubsub.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: dapr.io/v1alpha1
 2 | kind: Component
 3 | metadata:
 4 |   name: openaipubsub
 5 | spec:
 6 |   type: pubsub.redis
 7 |   version: v1
 8 |   metadata:
 9 |   - name: redisHost
10 |     value: localhost:6379
11 |   - name: redisPassword
12 |     value: ""
13 |   - name: queueDepth
14 |     value: "5"
15 |   - name: concurrency
16 |     value: "3"
17 | 
18 | ---
19 | apiVersion: dapr.io/v1alpha1
20 | kind: Component
21 | metadata:
22 |   name: openaipubsub-old
23 | spec:
24 |   type: pubsub.azure.eventhubs
25 |   version: v1
26 |   metadata:
27 |     # Either connectionString or eventHubNamespace is required
28 |     # Use connectionString when *not* using Microsoft Entra ID
29 |     - name: connectionString
30 |       value: ""
31 |     # Use eventHubNamespace when using Microsoft Entra ID
32 |     # - name: eventHubNamespace
33 |     #   value: "namespace"
34 |     - name: consumerID # Optional. If not supplied, the runtime will create one.
35 |       value: "ai-sentry-logger"
36 |     - name: enableEntityManagement
37 |       value: "false"
38 |     # The following four properties are needed only if enableEntityManagement is set to true
39 |     # - name: resourceGroupName
40 |     #   value: "test-rg"
41 |     # - name: subscriptionID
42 |     #   value: "value of Azure subscription ID"
43 |     # - name: partitionCount
44 |     #   value: "1"
45 |     # - name: messageRetentionInDays
46 |     #   value: "3"
47 |     # Checkpoint store attributes
48 |     - name: storageAccountName
49 |       value: ""
50 |     - name: storageAccountKey
51 |       value: ""
52 |     - name: storageContainerName
53 |       value: "ai-sen-checkpoints"
54 |     # Alternative to passing storageAccountKey
55 |     # - name: storageConnectionString
56 |     #   value: "DefaultEndpointsProtocol=https;AccountName=<account>;AccountKey=<account-key>"
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/infrastructure/APIM/ai-sentry-policy.xml:
--------------------------------------------------------------------------------
 1 | <!--
 2 |     - Policies are applied in the order they appear.
 3 |     - Position <base/> inside a section to inherit policies from the outer scope.
 4 |     - Comments within policies are not preserved.
 5 | -->
 6 | <!-- Add policies as children to the <inbound>, <outbound>, <backend>, and <on-error> elements -->
 7 | <policies>
 8 |     <!-- Throttle, authorize, validate, cache, or transform the requests -->
 9 |     <inbound>
10 |         <base />
11 |         <set-variable name="route-to-ai-sentry" value="{{route-to-ai-sentry}}" />
12 |         <set-variable name="ai-sentry" value="@((string)context.Variables["route-to-ai-sentry"])" />
13 |         <set-variable name="ai-sentry-callling-user" value="@{
14 |         var jwtToken = (context.Request.Headers != null) ? context.Request.Headers.GetValueOrDefault("Authorization","").AsJwt() : null;
15 |         var unique_name = jwtToken?.Claims.GetValueOrDefault("unique_name") ?? "unknown";
16 |         string first10Chars = originalString.Substring(0, 10);
17 |         return first10Chars;
18 |      }" />
19 |         <set-variable name="correlation-id" value="@(context.Request.Headers.GetValueOrDefault("x-correlation-id", Guid.NewGuid().ToString()))" />
20 |         <set-header name="ai-sentry-calling-user" exists-action="override">
21 |             <value>@((string)context.Variables.GetValueOrDefault("ai-sentry-callling-user"))</value>
22 |         </set-header>
23 |         <trace source="Global APIM Policy" severity="information">
24 |             <message>@(String.Format("{0} | {1}", context.Api.Name, context.Variables["correlation-id"]))</message>
25 |             <metadata name="correlation-id" value="@((string)context.Variables["correlation-id"])" />
26 |         </trace>
27 |         <rate-limit-by-key calls="300" renewal-period="15" counter-key="@(context.Request.Headers.GetValueOrDefault("api-key", "default-value"))" />
28 |         <choose>
29 |             <when condition="@((String)context.Variables["ai-sentry"]=="true")">
30 |                 <!-- Route to openai via AI-Sentry -->
31 |                 <!-- Add Ai-sentry specific header -->
32 |                 <set-header name="ai-sentry-consumer" exists-action="override">
33 |                     <value>arian-test-caller</value>
34 |                 </set-header>
35 |                 <set-header name="ai-sentry-log-level" exists-action="override">
36 |                     <value>COMPLETE</value>
37 |                 </set-header>
38 |                 <set-header name="Ai-Sentry-Backend-Pool" exists-action="override">
39 |                     <value>pool1</value>
40 |                 </set-header>
41 |                 <set-header name="ai-sentry-adapters" exists-action="override">
42 |                     <value>["SampleApiRequestTransformer"]</value>
43 |                 </set-header>
44 |                 <set-header name="x-aisentry-correlation" exists-action="override">
45 |                     <value>@((String)context.Variables["correlation-id"])</value>
46 |                 </set-header>
47 |                 <set-backend-service backend-id="be-aisentry" />
48 |             </when>
49 |             <otherwise>
50 |                 <!-- bypass Ai-Sentry and go straight to openAI backend -->
51 |                 <set-backend-service backend-id="be-openai" />
52 |             </otherwise>
53 |         </choose>
54 |         <cors allow-credentials="false">
55 |             <allowed-origins>
56 |                 <origin>*</origin>
57 |             </allowed-origins>
58 |             <allowed-methods>
59 |                 <method>GET</method>
60 |                 <method>POST</method>
61 |             </allowed-methods>
62 |             <allowed-headers>
63 |                 <header>api-key</header>
64 |                 <header>content-type</header>
65 |             </allowed-headers>
66 |         </cors>
67 |     </inbound>
68 |     <!-- Control if and how the requests are forwarded to services  -->
69 |     <backend>
70 |         <base />
71 |     </backend>
72 |     <!-- Customize the responses -->
73 |     <outbound>
74 |         <base />
75 |     </outbound>
76 |     <!-- Handle exceptions and customize error responses  -->
77 |     <on-error>
78 |         <base />
79 |     </on-error>
80 | </policies>


--------------------------------------------------------------------------------
/infrastructure/bicepconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |     "experimentalFeaturesEnabled": {
3 |         "userDefinedTypes": true
4 |     }
5 | }


--------------------------------------------------------------------------------
/infrastructure/deploy.ps1:
--------------------------------------------------------------------------------
 1 | param([Parameter(mandatory=$true, HelpMessage="Subscirption ID")]
 2 | [string]
 3 | $subscriptionId='eb51c20e-72d0-49ea-b7fc-2b0a23c73341',    
 4 | 
 5 | [Parameter(mandatory=$true, HelpMessage="Resource group name")]
 6 | [string]
 7 | $resourceGroupName='ai-sentry-rg',
 8 | 
 9 | [Parameter(mandatory=$true, HelpMessage="Location of resource group")]
10 | [string]
11 | $location='australiaeast')
12 | 
13 | $ProgressPreference = "SilentlyContinue"
14 | 
15 | # Login to Azure
16 | #az login
17 | #Connect-AzAccount
18 | Write-Host "Setting context to subscriptionId:  $($subscriptionId)..."
19 | az account set --subscription $subscriptionId
20 | 
21 | Write-Host "Deploying AI Sentry resources..."
22 | 
23 | # Check if the resource group exists
24 | $resourceGroupExists = az group exists --name $resourceGroupName
25 | 
26 | if ($resourceGroupExists -eq 'false') {
27 |     # Create the resource group
28 |     az group create --name $resourceGroupName --location $location
29 | }
30 | 
31 | $deploymentName = "deployment-" + (Get-Date -Format "yyyyMMddHHmmss")
32 | 
33 | az deployment group create --resource-group $resourceGroupName --template-file ./main.bicep --parameters ./main.param.json -n $deploymentName
34 | 
35 | Write-Host "Deployment complete."


--------------------------------------------------------------------------------
/infrastructure/main.bicep:
--------------------------------------------------------------------------------
  1 | // -- PARAMETERS -- 
  2 | @description('Required - Environment Type')
  3 | @allowed([
  4 |   'dev'
  5 |   'test'
  6 |   'prod'
  7 | ] )
  8 | param environmentType string = 'dev'
  9 | 
 10 | @description('Required - tags')
 11 | param tags object = {
 12 |   Environment: environmentType
 13 |   Role: 'ai-sentry'
 14 | }
 15 | 
 16 | @description('Required - AKS name')
 17 | param aksName string
 18 | 
 19 | param aksPrimaryAgentPoolProfile array = [
 20 |   {
 21 |     count: 3
 22 |     mode: 'System'
 23 |     name: 'systempool'
 24 |     vmSize: 'Standard_DS2_v2'
 25 |   }
 26 | ]
 27 | 
 28 | @description('Required - Azure Container Registry Name')
 29 | param containerRegistryName string
 30 | 
 31 | @description('Required - Cosmos DB Name')
 32 | param cosmosDbName string
 33 | 
 34 | @description('Optional. List of Cosmos DB capabilities for the account. Some capabilities are not compatible with each other.')
 35 | @allowed([
 36 |   'EnableCassandra'
 37 |   'EnableTable'
 38 |   'EnableGremlin'
 39 |   'EnableMongo'
 40 |   'DisableRateLimitingResponses'
 41 |   'EnableServerless'
 42 | ])
 43 | param capabilitiesToAdd string[] = [
 44 |   'EnableServerless'
 45 |   'EnableGremlin'
 46 |   'DisableRateLimitingResponses'
 47 | ]
 48 | 
 49 | @description('Required - Cosmos data locations')
 50 | param cosmosDbLocations object[] = [
 51 |   {
 52 |     failoverPriority: 0
 53 |     isZoneRedundant: false
 54 |     locationName: resourceGroup().location // where the data is located
 55 |   }
 56 | ]
 57 | 
 58 | @description('Required - Open AI Name')
 59 | param openAiName string = 'ai-sentry-openai'
 60 | 
 61 | @description('Required - Open AI Location')
 62 | param openAiLocation string = 'australiaeast'
 63 | 
 64 | @description('Required - Open AI SKU')
 65 | param openAiSku string = 'S0'
 66 | 
 67 | @description('Required - OpenAI Model Config')
 68 | param modelDeploymentConfig array = [
 69 |   {
 70 |     name: 'text-embedding-ada-002'
 71 |     version: '2'
 72 |     capacity: 1
 73 |     scaleType: 'Standard'
 74 |     raiPolicyName: ''
 75 |   }
 76 |   {
 77 |     name: 'gpt-35-turbo'
 78 |     version: '0613'
 79 |     capacity: 1
 80 |     scaleType: 'Standard'
 81 |     raiPolicyName: ''
 82 |   }
 83 | ]
 84 | 
 85 | @description('Required - API Management Name')
 86 | param apimName string = 'ai-sentry-apim001'
 87 | 
 88 | // @description('The name of the virtual network subnet to be associated with the API Management service.')
 89 | // param apimSubnetName string = 'apim-subnet'
 90 | 
 91 | // @description('The name of the public ip to be associated with the API Management service.')
 92 | // param apimPublicIp string = 'apim-pip-001'
 93 | 
 94 | @description('The name of the publisher email to be associated with the API Management service.')
 95 | param apimPublisherEmail string = 'publisher@email.com'
 96 | 
 97 | @description('The name of the publisher name to be associated with the API Management service.')
 98 | param apimPublisherName string = 'APIM Publisher'
 99 | 
100 | @description('The sku to be associated with the API Management service.')
101 | param apimSku string = 'Developer'
102 | 
103 | // @description('The desired Availability Zones for the API Management service. e.g. [1,2] for deployment in Availability Zones 1 and 2')
104 | // param apimZones []?
105 | 
106 | // @description('The desired custom hostnames for the API Management service endpoints')
107 | // param apimHostnameConfigurations object[] = []
108 | 
109 | 
110 | // -- RESOURCE CREATION ORCHESTRATOR -- 
111 | 
112 | // 1. Create Azure Kubernetes Managed Cluster with defaults via AVM module
113 | module managedCluster 'br/public:avm/res/container-service/managed-cluster:0.1.7' = {
114 |   name: 'managedClusterDeployment'
115 |   params: {
116 |     // Required parameters
117 |     name: aksName
118 |     primaryAgentPoolProfile: aksPrimaryAgentPoolProfile
119 |     // Non-required parameters
120 |     location: resourceGroup().location
121 |     enableContainerInsights: false
122 |     managedIdentities: {
123 |       systemAssigned: true
124 |     }
125 |   }
126 | }
127 | 
128 | // 2. Create Container Registry resources
129 | module registry 'br/public:avm/res/container-registry/registry:0.2.0' = {
130 |   name: '${uniqueString(deployment().name)}-containerRegistryCreation'
131 |   params: {
132 |     // Required parameters
133 |     name: containerRegistryName
134 |     // Non-required parameters
135 |     publicNetworkAccess: 'Enabled'
136 |     acrAdminUserEnabled: false
137 |     zoneRedundancy: 'Disabled'
138 |     managedIdentities: {
139 |       systemAssigned: true
140 |     }
141 |     acrSku: 'Standard'
142 |     location: resourceGroup().location
143 |     tags: tags
144 |   }
145 | }
146 | 
147 | // 3. Create CosmosDB resources
148 | module databaseAccount 'br/public:avm/res/document-db/database-account:0.5.4' = {
149 |   name: '${uniqueString(deployment().name)}-cosmosDbCreation'
150 |   params: {
151 |     // Required parameters
152 |     locations: cosmosDbLocations
153 |     name: cosmosDbName
154 |     // Non-required parameters
155 |     capabilitiesToAdd: capabilitiesToAdd
156 |     managedIdentities: {
157 |       systemAssigned: true
158 |     }
159 |     location: resourceGroup().location
160 |     tags: tags
161 |   }
162 | }
163 | 
164 | // 3.1 Add CosmosDB Containers
165 | 
166 | module cosmosDbFulllogsContainer 'br/public:avm/res/document-db/container:0.5.4' = {
167 |   name: '${uniqueString(deployment().name)}-cosmosDbFullLogsContainerCreation'
168 |   params: {
169 |     // Required parameters
170 |     databaseAccountName: databaseAccount.outputs.databaseAccountName
171 |     databaseName: 'ai-sentry'
172 |     containerName: 'request-logs'
173 |     partitionKeyPath: '/LogId'
174 |     throughput: 400
175 |     // Non-required parameters
176 |     indexingPolicy: {
177 |       automatic: true
178 |       includedPaths: [
179 |         {
180 |           path: '/*'
181 |           indexes: [
182 |             {
183 |               kind: 'Range'
184 |               dataType: 'Number'
185 |             }
186 |             {
187 |               kind: 'Range'
188 |               dataType: 'String'
189 |             }
190 |           ]
191 |         }
192 |       ]
193 |     }
194 |     tags: tags
195 |   }
196 | }
197 | 
198 | module cosmosDbSummaryContainer 'br/public:avm/res/document-db/container:0.5.4' = {
199 |   name: '${uniqueString(deployment().name)}-cosmosDbContainersCreation'
200 |   params: {
201 |     // Required parameters
202 |     databaseAccountName: databaseAccount.outputs.databaseAccountName
203 |     databaseName: 'ai-sentry'
204 |     containerName: 'summary-logs'
205 |     partitionKeyPath: '/LogId'
206 |     throughput: 400
207 |     // Non-required parameters
208 |     indexingPolicy: {
209 |       automatic: true
210 |       includedPaths: [
211 |         {
212 |           path: '/*'
213 |           indexes: [
214 |             {
215 |               kind: 'Range'
216 |               dataType: 'Number'
217 |             }
218 |             {
219 |               kind: 'Range'
220 |               dataType: 'String'
221 |             }
222 |           ]
223 |         }
224 |       ]
225 |     }
226 |     tags: tags
227 |   }
228 | }
229 | 
230 | // 4. Create OpenAI resources
231 | module openAI 'open-ai/main.bicep' = {
232 |   name: '${uniqueString(deployment().name)}-openAICreation'
233 |   params:{
234 |     // vnetName:vnetName
235 |     // peSubnetName:peSubnetName
236 |     name: openAiName
237 |     location: openAiLocation
238 |     sku: openAiSku
239 |     managedIdentities: {
240 |       systemAssigned: true
241 |     }
242 |     tags: tags
243 |     publicNetworkAccess: 'Enabled'
244 |     deploymentConfig: modelDeploymentConfig
245 |     // workspaceId:logAnalytics.outputs.logAnalyticsWorkspaceId
246 |   }
247 | }
248 | 
249 | // 5. Create API Management Service resources
250 | module apimService 'br/public:avm/res/api-management/service:0.1.7' = {
251 |   name: '${uniqueString(deployment().name)}-ApiManagementCreation'
252 |   params: {
253 |     // Required parameters
254 |     name: apimName
255 |     sku: apimSku
256 |     // apimPublicIpName: apimPublicIp
257 |     // virtualNetworkType: 'External'
258 |     // subnetResourceId:  '/subscriptions/${vnetSubscriptionId}/resourceGroups/${vnetResourceGroupName}/providers/Microsoft.Network/virtualNetworks/${vnetName}/subnets/${apimSubnetName}'
259 |     publisherEmail: apimPublisherEmail
260 |     publisherName: apimPublisherName
261 |     managedIdentities: {
262 |       systemAssigned: true
263 |     }
264 |     // Non-required parameters
265 |     // hostnameConfigurations: apimHostnameConfigurations
266 |     // zones: apimZones
267 |     location: resourceGroup().location
268 |     minApiVersion: '2021-08-01'
269 |     tags: tags
270 |   }
271 | }
272 | 
273 | 


--------------------------------------------------------------------------------
/infrastructure/main.param.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#",
 3 |     "contentVersion": "1.0.0.0",
 4 |     "parameters": {
 5 |       "environmentType": {
 6 |         "value": "dev"
 7 |       },
 8 |       "tags": {
 9 |         "value": {
10 |           "Environment": "dev",
11 |           "Role": "ai-sentry"
12 |         }
13 |       },
14 |       "aksName": {
15 |         "value": "aisentryaks"
16 |       },
17 |       "aksPrimaryAgentPoolProfile": {
18 |         "value": [
19 |           {
20 |             "count": 1,
21 |             "mode": "System",
22 |             "name": "systempool",
23 |             "vmSize": "Standard_B2s"
24 |           }
25 |         ]
26 |       },
27 |       "containerRegistryName": {
28 |         "value": "aisentryacr001"
29 |       },
30 |       "cosmosDbName": {
31 |         "value": "aisentrycosmodb001"
32 |       },
33 |       "capabilitiesToAdd": {
34 |         "value": [
35 |           "EnableServerless",
36 |           "EnableGremlin",
37 |           "DisableRateLimitingResponses"
38 |         ]
39 |       },
40 |       "cosmosDbLocations": {
41 |         "value": [
42 |           {
43 |             "failoverPriority": 0,
44 |             "isZoneRedundant": false,
45 |             "locationName": "australiaeast"
46 |           }
47 |         ]
48 |       },
49 |       "openAiName": {
50 |         "value": "aisentryopenai"
51 |       },
52 |       "openAiLocation": {
53 |         "value": "australiaeast"
54 |       },
55 |       "openAiSku": {
56 |         "value": "S0"
57 |       },
58 |       "modelDeploymentConfig": {
59 |         "value": [
60 |           {
61 |             "name": "text-embedding-ada-002",
62 |             "version": "2",
63 |             "sku": "Standard",
64 |             "capacity": 1,
65 |             "raiPolicyName": ""
66 |           },
67 |           {
68 |             "name": "gpt-35-turbo",
69 |             "version": "0613",
70 |             "sku": "Standard",
71 |             "capacity": 1,
72 |             "raiPolicyName": ""
73 |           }
74 |         ]
75 |       },
76 |       "apimName": {
77 |         "value": "aisentryapim001"
78 |       },
79 |       "apimPublisherEmail": {
80 |         "value": "publisher@email.com"
81 |       },
82 |       "apimPublisherName": {
83 |         "value": "APIM Publisher"
84 |       },
85 |       "apimSku": {
86 |         "value": "Developer"
87 |       }
88 |     }
89 |   }


--------------------------------------------------------------------------------
/infrastructure/open-ai/main.bicep:
--------------------------------------------------------------------------------
  1 | // Parameters
  2 | 
  3 | @description('Specifies the name of the vnet to injeciton private endpoint to')
  4 | param vnetName string?
  5 | 
  6 | @description('private endpoint for openAI isntances')
  7 | param peSubnetName string?
  8 | 
  9 | param name string
 10 | param location string
 11 | param sku string
 12 | param customSubDomainName string?
 13 | 
 14 | @description('Optional. The managed identity definition for this resource.')
 15 | param managedIdentities managedIdentitiesType
 16 | 
 17 | @description('Specifies the openAI model deployment config required')
 18 | param deploymentConfig array
 19 | 
 20 | 
 21 | @description('Specifies the resource tags.')
 22 | param tags object
 23 | 
 24 | 
 25 | @description('Specifies whether or not public endpoint access is allowed for this account..')
 26 | @allowed([
 27 |   'Enabled'
 28 |   'Disabled'
 29 | ])
 30 | param publicNetworkAccess string = 'Enabled'
 31 | 
 32 | 
 33 | @description('Specifies the workspace id of the Log Analytics used to monitor the Application Gateway.')
 34 | param workspaceId string?
 35 | 
 36 | // Variables
 37 | var diagnosticSettingsName = 'diagnosticSettings'
 38 | var openAiLogCategories = [
 39 |   'Audit'
 40 |   'RequestResponse'
 41 |   'Trace'
 42 | ]
 43 | var openAiMetricCategories = [
 44 |   'AllMetrics'
 45 | ]
 46 | var openAiLogs = [for category in openAiLogCategories: {
 47 |   category: category
 48 |   enabled: true
 49 | }]
 50 | var openAiMetrics = [for category in openAiMetricCategories: {
 51 |   category: category
 52 |   enabled: true
 53 | }]
 54 | 
 55 | var formattedUserAssignedIdentities = reduce(
 56 |   map((managedIdentities.?userAssignedResourceIds ?? []), (id) => { '${id}': {} }),
 57 |   {},
 58 |   (cur, next) => union(cur, next)
 59 | ) // Converts the flat array to an object like { '${id1}': {}, '${id2}': {} }
 60 | 
 61 | var identity = !empty(managedIdentities)
 62 |   ? {
 63 |       type: (managedIdentities.?systemAssigned ?? false)
 64 |         ? (!empty(managedIdentities.?userAssignedResourceIds ?? {}) ? 'SystemAssigned,UserAssigned' : 'SystemAssigned')
 65 |         : (!empty(managedIdentities.?userAssignedResourceIds ?? {}) ? 'UserAssigned' : null)
 66 |       userAssignedIdentities: !empty(formattedUserAssignedIdentities) ? formattedUserAssignedIdentities : null
 67 |     }
 68 |   : null
 69 | 
 70 | // Resources
 71 | 
 72 | resource openAiDeployment 'Microsoft.CognitiveServices/accounts@2023-05-01' = {
 73 |   name: name
 74 |   location: location
 75 |   sku: {
 76 |     name: sku
 77 |   }
 78 |   kind: 'OpenAI'
 79 |   identity: identity
 80 |   tags: tags
 81 |   properties: {
 82 |     customSubDomainName: customSubDomainName
 83 |     publicNetworkAccess: publicNetworkAccess
 84 |   }
 85 | }
 86 | 
 87 | // module privateendpoints '../core/network/privateendpoint/main.bicep' = [for (openAi, i) in openAIConfig: {
 88 | //   name: 'openAI-privateendpoint-${i}'
 89 | //   params:{
 90 | //     privateEndpointName:'openAi-${openAi.name}-pe'
 91 | //     location:resourceGroup().location
 92 | //     subnetName:peSubnetName
 93 | //     vnetName:vnetName
 94 | //     privateLinkServiceId:openAiDeployment[i].id
 95 | //     groupIds:['account']
 96 | //   }
 97 | //   dependsOn:[
 98 | //     openAiDeployment
 99 | //   ] 
100 | // }]
101 | 
102 | // What we want to do is iterate through all the models
103 | // And deploy
104 | // modelDeployments is an array of objects that contain the model name and version
105 | @batchSize(1)
106 | resource deployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for deployment in deploymentConfig: {
107 |   parent: openAiDeployment
108 |   sku: {
109 |     capacity: deployment.capacity
110 |     name: deployment.sku
111 |   }
112 |   name: deployment.name
113 |   properties: {
114 |     model: {
115 |       format: 'OpenAI'
116 |       name: deployment.name
117 |       version: deployment.version
118 |     }
119 |     raiPolicyName: contains(deployment, 'raiPolicyName') ? deployment.raiPolicyName : null
120 |   }
121 | }]
122 | 
123 | // @batchSize(1)
124 | // resource model 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = [for (openAi, i) in openAIConfig: {
125 | //   name: openAi.Name
126 | //   parent: openAiDeployment[i]
127 | //   sku: {
128 | //     name:'Standard'
129 | //     capacity: openAi.ModelDeployments[i].capacity
130 | //   }
131 | //   properties: {
132 | //     model: {
133 | //       format: 'OpenAI'
134 | //       name: openAi.ModelDeployments[i].name
135 | //       version: openAi.ModelDeployments[i].version
136 | //     }
137 | //     //raiPolicyName: openAi.ModelDeployments.raiPolicyName
138 | //   }
139 | // }]
140 | 
141 | 
142 | // resource openAiDiagnosticSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = [for (openAi, i) in openAIConfig: {
143 | //   name: diagnosticSettingsName
144 | //   scope: openAiDeployment[i]
145 | //   properties: {
146 | //     workspaceId: workspaceId
147 | //     logs: openAiLogs
148 | //     metrics: openAiMetrics
149 | //   }
150 | // }]
151 | 
152 | // Outputs
153 | output deployedopenAIAccount object = {
154 |   openAIName: name
155 | }
156 | 
157 | 
158 | // =============== //
159 | //   Definitions   //
160 | // =============== //
161 | 
162 | type managedIdentitiesType = {
163 |   @description('Optional. Enables system assigned managed identity on the resource.')
164 |   systemAssigned: bool?
165 | 
166 |   @description('Optional. The resource ID(s) to assign to the resource.')
167 |   userAssignedResourceIds: string[]?
168 | }?
169 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/ai-sentry/0cba2505a47d93c6d34ebef0b7d44074cc3c49f2/requirements.txt


--------------------------------------------------------------------------------
/scripts/create-escaped-json.ps1:
--------------------------------------------------------------------------------
 1 | param (
 2 |     [Parameter(Mandatory=$false)]
 3 |     [string]$jsonFilePath
 4 | )
 5 | 
 6 | # Read the JSON file
 7 | $json = Get-Content -Raw -Path $jsonFilePath | ConvertFrom-Json -Depth 99
 8 | 
 9 | # Convert the JSON object to a string
10 | $jsonString = ConvertTo-Json -InputObject $json -Compress -Depth 99
11 | $escapedJsonString = $jsonString | ConvertTo-Json -Depth 99
12 | 
13 | # Print the escaped JSON string
14 | Write-Output $escapedJsonString


--------------------------------------------------------------------------------
/scripts/setup-env.ps1:
--------------------------------------------------------------------------------
 1 | $envFile = Get-Content -Path .\..\.env
 2 | 
 3 | # Loop through each line in the file
 4 | foreach ($line in $envFile) {
 5 |     # Split the line into name and value
 6 |     $parts = $line -split '=', 2
 7 | 
 8 |     # Set the environment variable
 9 |     [Environment]::SetEnvironmentVariable($parts[0], $parts[1], 'User')
10 | }


--------------------------------------------------------------------------------
/scripts/setup-env.sh:
--------------------------------------------------------------------------------
1 | source ..\.env


--------------------------------------------------------------------------------
/tests/http/.env.sample:
--------------------------------------------------------------------------------
1 | AOAI_ENDPOINT=___.openai.azure.com
2 | AOAI_API_KEY=
3 | BEARER_TOKEN=


--------------------------------------------------------------------------------
/tests/http/adapter_test-ai-sentry.http:
--------------------------------------------------------------------------------
 1 | POST http://20.167.97.191/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-15-preview HTTP/1.1
 2 | Ai-Sentry-Consumer: Product-car-review
 3 | Ai-Sentry-Log-Level: COMPLETE
 4 | Ai-Sentry-Backend-Pool: pool1
 5 | Ai-Sentry-Adapters: []
 6 | ArianWasHere: aaaa
 7 | Content-Type: application/json
 8 | 
 9 | {
10 |     "messages": [
11 |         {
12 |             "role": "system",
13 |             "content": "you are a car reviewer studying japenese cars"
14 |         },
15 |         {
16 |             "role":"user",
17 |             "content":"Write a 3 sentence review on honda integra type r DC2"
18 |         }
19 |     ],
20 |     "stream":true,
21 |     "max_tokens": 800,
22 |     "temperature": 0.7,
23 |     "frequency_penalty": 0,
24 |     "presence_penalty": 0,
25 |     "top_p": 0.95,
26 |     "stop": null
27 | }


--------------------------------------------------------------------------------
/tests/http/adapter_test.http:
--------------------------------------------------------------------------------
 1 | POST http://4.147.64.25/openai/deployments/gpt-4o/chat/completions?api-version=2023-07-01-preview HTTP/1.1
 2 | ai-sentry-consumer: Product-car-review
 3 | ai-sentry-log-level: PII_STRIPPING_ENABLED
 4 | ai-sentry-backend-pool: pool1
 5 | ai-sentry-adapters: ["SampleApiRequestTransformer"]
 6 | Content-Type: application/json
 7 | 
 8 | {
 9 |     "messages": [
10 |         {
11 |             "role": "system",
12 |             "content": "you are a car reviewer studying japenese cars"
13 |         },
14 |         {
15 |             "role":"user",
16 |             "content":"Write a review on toyota yaris gr"
17 |         }
18 |     ],
19 |     "stream":true,
20 |     "max_tokens": 800,
21 |     "temperature": 0.7,
22 |     "frequency_penalty": 0,
23 |     "presence_penalty": 0,
24 |     "top_p": 0.95,
25 |     "stop": null
26 | }


--------------------------------------------------------------------------------
/tests/http/get-thread.http:
--------------------------------------------------------------------------------
1 | GET http://20.167.97.191/openai/threads/thread_58vrUKZ2LYSoGdVYk6WfXzxQ?api-version=2024-05-01-preview
2 | Ai-Sentry-Consumer: test-assistant
3 | Ai-Sentry-Log-Level: DISABLED
4 | Ai-Sentry-Backend-Pool: pool1
5 | Ai-Sentry-Adapters: []


--------------------------------------------------------------------------------
/tests/http/list_assistants.http:
--------------------------------------------------------------------------------
 1 | POST http://20.167.97.191/openai/assistants?api-version=2024-05-01-preview
 2 | Content-Type: application/json
 3 | Ai-Sentry-Consumer: Product-car-review
 4 | Ai-Sentry-Log-Level: PII_STRIPPING_ENABLED
 5 | Ai-Sentry-Backend-Pool: pool1
 6 | Ai-Sentry-Adapters: []
 7 | 
 8 | {
 9 | 
10 |   "instructions": "You are an AI assistant that can write code to help answer math questions.",
11 |   "tools": [
12 |     { "type": "code_interpreter" }
13 |   ],
14 |   "model": "gpt-4o"
15 | }


--------------------------------------------------------------------------------
/tests/http/non_streaming_embedding.http:
--------------------------------------------------------------------------------
 1 | @resource_name = 20.167.97.191
 2 | @deployment_name = text-embedding-ada-002
 3 | @api_key = {{$dotenv AOAI_API_KEY}}ct
 4 | 
 5 | ###‰
 6 | POST http://{{resource_name}}/openai/deployments/{{deployment_name}}/embeddings?api-version=2024-06-01 HTTP/1.1
 7 | Content-Type: application/json
 8 | #api-key: {{api_key}}
 9 | ai-sentry-backend-pool:pool1
10 | ai-sentry-consumer:embedding-automated-test1
11 | ai-sentry-log-level:PII_STRIPPING_ENABLED
12 | ai-sentry-adapters: []
13 | 
14 | {
15 |   "input": "Sample Document goes here"
16 | }


--------------------------------------------------------------------------------
/tests/loadTests/embeddings/locust.py:
--------------------------------------------------------------------------------
 1 | from locust import HttpUser, task, between
 2 | 
 3 | class EmbeddingTestUser(HttpUser):
 4 |     wait_time = between(1, 5)  # Simulated users will wait 1-5 seconds between tasks
 5 |     
 6 |     @task
 7 |     def post_embedding(self):
 8 |         headers = {
 9 |             "Content-Type": "application/json",
10 |             "api-key": "your_api_key_here",  # Replace with your actual API key
11 |             "ai-sentry-backend-pool": "pool1",
12 |             "ai-sentry-consumer": "embedding-automated-test1",
13 |             "ai-sentry-log-level": "PII_STRIPPING_ENABLED",
14 |             "ai-sentry-adapters": "[]"
15 |         }
16 |         payload = {
17 |             "input": "Sample Document goes here"
18 |         }
19 |         self.client.post("/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-06-01", json=payload, headers=headers)
20 | 
21 | # Note: Ensure you replace "your_api_key_here" with the actual API key.
22 | # You might also need to adjust the host in the Locust command-line or within the script if it's dynamic.


--------------------------------------------------------------------------------
/tests/loadTests/non-streaming/locustfile.py:
--------------------------------------------------------------------------------
 1 | from locust import HttpUser, task, between
 2 | import json, os
 3 | from dotenv import load_dotenv
 4 | 
 5 | load_dotenv(".env", override=True)
 6 | 
 7 | azure_endpoint = os.getenv("azure_endpoint", "http://20.11.111.242/"), 
 8 | api_key=os.getenv("api_key", "defaultKey"),  
 9 | api_version=os.getenv("api_version", "2023-07-01-preview"),
10 | aoai_deployment_name = os.getenv("aoai_deployment_name", "chat"),
11 | ai_sentry_consumer = os.getenv("ai-sentry-consumer", "locustloadtest"),
12 | ai_sentry_backend_pool = os.getenv("ai-sentry-backend-pool", "pool1"),
13 | ai_sentry_log_level = os.getenv("ai-sentry-log-level", "COMPLETE")
14 | 
15 | # Non-Streaming Load Test
16 | class OpenAIUser(HttpUser):
17 |     host = azure_endpoint
18 |     wait_time = between(1, 2.5)
19 | 
20 |     headers = {
21 |         "Content-Type": "application/json",
22 |         "ai-sentry-consumer": "locustloadtest",
23 |         "ai-sentry-backend-pool":"pool1",
24 |         "ai-sentry-adapters":"[\"SampleApiRequestTransformer\"]",
25 |         "ai-sentry-log-level": ai_sentry_log_level,
26 |         "api-key": "\"{}\"".format(api_key)
27 | 
28 |     }
29 | 
30 |     body = {
31 |         "messages": [
32 |             {
33 |                 "role": "system",
34 |                 "content": "You are an AI assistant that helps people find information."
35 |             },
36 |             {
37 |                 "role": "user",
38 |                 "content": "What is Microsoft's most profitable business?"
39 |             }
40 |         ],
41 |         "max_tokens": 800,
42 |         "temperature": 0.7,
43 |         "frequency_penalty": 0,
44 |         "presence_penalty": 0,
45 |         "top_p": 0.95,
46 |         "stop": None
47 |     }
48 | 
49 |     @task
50 |     def post_openai(self):
51 |         self.client.post(f"openai/deployments/chat/chat/completions?api-version=2024-02-15-preview", data=json.dumps(self.body), headers=self.headers)


--------------------------------------------------------------------------------
/tests/loadTests/streaming/locustfile.py:
--------------------------------------------------------------------------------
 1 | from locust import HttpUser, task, between
 2 | import json, os
 3 | from dotenv import load_dotenv
 4 | 
 5 | load_dotenv(".env", override=True)
 6 | 
 7 | azure_endpoint = os.getenv("azure_endpoint", "http://4.195.10.69/"), 
 8 | #api_key=os.getenv("api_key", "defaultkey"),  
 9 | api_version=os.getenv("api_version", "2023-07-01-preview"),
10 | aoai_deployment_name = os.getenv("aoai_deployment_name", "gpt4o")
11 | 
12 | # Non-Streaming Load Test
13 | class OpenAIUser(HttpUser):
14 |     host = azure_endpoint
15 |     wait_time = between(1, 2.5)
16 | 
17 |     headers = {
18 |         "Content-Type": "application/json",
19 |         "ai-sentry-consumer": "locustloadtest",
20 |         "ai-sentry-log-level": "PII_STRIPPING_ENABLED",
21 |         "ai-sentry-backend-pool": "pool1",
22 |         "ai-sentry-adapters": "[]"
23 |         #"api-key": "\"{}\"".format(api_key)
24 |     }
25 | 
26 |     body = {
27 |         "messages": [
28 |             {
29 |                 "role": "system",
30 |                 "content": "You are an AI assistant that helps people find information."
31 |             },
32 |                         {
33 |                 "role": "user",
34 |                 "content": "What does Microsoft do, and what is its most profitiable business division?"
35 |             }
36 |         ],
37 |         "stream": True,
38 |         "max_tokens": 800,
39 |         "temperature": 0.7,
40 |         "frequency_penalty": 0,
41 |         "presence_penalty": 0,
42 |         "top_p": 0.95,
43 |         "stop": None
44 |     }
45 | 
46 |     @task
47 |     def post_openai(self):
48 |         self.client.post(f"/openai/deployments/{aoai_deployment_name}/chat/completions?api-version=2024-02-15-preview", data=json.dumps(self.body), headers=self.headers)


--------------------------------------------------------------------------------
/tests/sdk/embedding.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from openai import AzureOpenAI
 3 | from dotenv import load_dotenv
 4 | 
 5 | load_dotenv(".env", override=True)
 6 | 
 7 | client = AzureOpenAI(
 8 |   api_key = os.getenv("api_key"),  
 9 |   api_version = "2024-02-01",
10 |   azure_endpoint = os.getenv("azure_endpoint", "http://4.147.128.191/openai"), 
11 | )
12 | 
13 | response = client.embeddings.create(
14 |     input = "Your text string goes here",
15 |     model= "text-embedding-ada-002"
16 | )
17 | 
18 | print(response.model_dump_json(indent=2))


--------------------------------------------------------------------------------
/tests/sdk/stream_completion.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import asyncio
 4 | from openai import AsyncAzureOpenAI
 5 | from dotenv import load_dotenv
 6 | from timeit import default_timer
 7 | 
 8 | load_dotenv(".env", override=True)
 9 | 
10 | azure_openai_client = AsyncAzureOpenAI(
11 |   azure_endpoint = os.getenv("azure_endpoint", "http://localhost:6124/"), 
12 |   api_key=os.getenv("api_key", "yourapikey"),  
13 |   api_version=os.getenv("api_version", "2023-07-01-preview")
14 | )
15 | 
16 | 
17 | async def get_response(message):
18 |     start = default_timer()
19 |     end_first_chunk = None
20 |     end = None
21 |     response = await azure_openai_client.chat.completions.create(
22 |         model = os.getenv("aoai_deployment", "chat"),
23 |         temperature = 0.4,
24 |         messages = [
25 |             {"role": "user", "content": message}
26 |         ],
27 |         stream=True
28 |     )
29 |     #print(response.model_dump_json(indent=2)) - > not response
30 | 
31 |     async for chunk in response:
32 |         if not end_first_chunk:
33 |             end_first_chunk = default_timer()
34 |         print(chunk .model_dump_json(indent=2))
35 | 
36 |     end = default_timer()
37 | 
38 |     print(f"Elapse: {end-start}, First Chunk: {end_first_chunk-start}, Last Chunk: {end-end_first_chunk}")
39 | 
40 | def main():
41 |     print("Azure OpenAI SDK Stream Completion Test")
42 |     asyncio.run(get_response('What does Microsoft do?'))
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     main()


--------------------------------------------------------------------------------