├── .DS_Store ├── .gitignore ├── Contributing.md ├── LICENSE ├── Readme.md ├── pyproject.toml └── src ├── .DS_Store └── aiandme ├── .DS_Store ├── __init__.py ├── firewall.py ├── firewalloss.py ├── model_providers ├── __init__.py ├── azureopenai.py └── openai.py └── schemas ├── __init__.py ├── agent.py ├── firewall.py └── logs.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aiandme-io/firewall/6912f3c904d27918c4a48aa26767fa328fa620f1/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # PyPI configuration file 171 | .pypirc 172 | -------------------------------------------------------------------------------- /Contributing.md: -------------------------------------------------------------------------------- 1 | Contributing to **AIandMe Firewall** 2 | 3 | Thank you for considering contributing to the **AIandMe Firewall** ! Contributions of all kinds are welcome, including bug reports, feature suggestions, documentation updates, and code contributions. 4 | 5 | ## How to Contribute 6 | 1. Fork the repository and create a new branch for your changes. 7 | 2. Make your changes in the branch, following any existing coding guidelines. 8 | 3. Test your changes to ensure everything works as expected. 9 | 4. Open a pull request with a clear description of your changes and why they are needed. 10 | 11 | ## Reporting Issues 12 | If you encounter any bugs or have feature requests, please open an issue in the repository. Be sure to include: 13 | 1. A detailed description of the problem. 14 | 2. Steps to reproduce, if applicable. 15 | 3. Any relevant logs or screenshots. 16 | 4. Code of Conduct 17 | 18 | Please be respectful and professional in all interactions. Refer to our [page][https://www.aiandme.io] for more details. 19 | 20 | We appreciate your contributions! 🚀 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 AIandMe 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # AIandMe Firewall and FirewallOSS 2 | The AIandMe FirewallOSS open-source library leverages the `LLM-as-a-judge` concept to implement a robust contextual firewall for LLM-based applications. It helps safeguard your AI systems from unintended prompts such as jailbreaking attempts, malicious inputs, and other security threats. 3 | 4 | The AIandMe Firewall is the wrapper library to interact with your AIandMe projects. Build a project within your AIandMe account and use the AIandMe Firewall to integrate directly. Visit the AIandMe [documentation](https://doc.aiandme.io) for more details and examples for integrating the AIandMe Firewall. 5 | 6 | ## Disclaimer 7 | The AIandMe FirewallOSS library relies on LLM technology and, as a result, **cannot guarantee 100% protection** due to the inherent stochastic and probabilistic nature of LLMs. Users are advised to consider this limitation and incorporate additional safeguards to address potential vulnerabilities in compliance with legal and security standards. 8 | 9 | 10 | ## How it works 11 | The AIandMe FirewallOSS acts as a middleware layer that contextually filters and validates user prompts. This ensures that the AI agent adheres to its intended business scope and operational boundaries. Via a reflection approach an LLM is acting as a judge (`LLM-as-a-judge` concept) and assesses if the analysing user prompts adheres with 3 basic conditions: 12 | - **Scope Validation**: Ensures user prompts align with the AI agent's defined business scope - `OFF_TOPIC`. 13 | - **Intent Filtering**: Allows only prompts that match a predefined list of allowed intents - `VIOLATION`. 14 | - **Restricted Action Blocking**: Blocks prompts that attempt to trigger restricted actions - `RESTRICTION`. 15 | 16 | Keep in mind that the AIandMe FirewallOSS library **does not function as a proxy**. Instead, it analyzes user prompts and provides a flag indicating potential issues (`off_topic`, `violation`, `restriction`). It is the responsibility of the LLM application developer to determine how to handle flagged prompts based on their specific requirements and use case. 17 | 18 | To ensure low latency, the AIandMe FirewallOSS library operates in two asynchronous steps, leveraging the streaming capabilities of LLM providers. 19 | 20 | - Initial Assessment: The library quickly delivers a decision regarding the three categories: `off_topic`, `violation`, or `restriction`. 21 | - Explanation: In the second step, it completes the LLM-as-a-judge assessment by providing a detailed explanation of the verdict. 22 | 23 | This two-step approach allows for efficient real-time decision-making without compromising on the depth of analysis. 24 | 25 | ## Installation 26 | Install using pip: 27 | 28 | ```bash 29 | pip install aiandme 30 | 31 | ``` 32 | 33 | ## Dependencies 34 | The AIandMe FirewallOSS lib relies on the `Pydantic` lib for data validation (schemas). 35 | 36 | ## Examples 37 | 38 | Find bellow some examples of how to use the AIandMe FirewallOSS lib for Self-hosting (example 1), or rely on your free tier AIandMe account (example 2). 39 | 40 | ### 1. User defined callbacks 41 | You can set up your own callback activity to handle the assesment of the AIandMe FirewallOSS. Find below an example of an integration that analyses a user prompt and registers a callback functio to log the result. 42 | 43 | ```python 44 | from aiandme import FirewallOSS, LLMModelProvider 45 | from aiandme.schemas import Logs as LogsSchema 46 | import logging 47 | 48 | def my_callback(log: LogsSchema): 49 | """ 50 | `log` is a Pydantic Model that holds the assessment of the LLM refletcion. 51 | log.id: String 52 | A unique ID to identify this assessment. 53 | log.prompt: String 54 | The analysed user prompt. 55 | log.result: String 56 | The assessment result. Options: 57 | - pass 58 | - fail 59 | - error 60 | log.fail_category: String 61 | Elaborating the result (mainly in case of failure or error). In case of log.result is `pass`, log.fail_category is also `pass`. If log.result is `fail`, then, log.fail_category can be one of (`off_topic`|`violation`|`restriction`). In case log.result is `error` of log.fail_category indicates the error category. 62 | log.explanation: String 63 | Reasoning of the evaluation result. In case log.result is `error`, log.explanation is a short descriotion of the error (exception message). 64 | 65 | No returning value. 66 | """ 67 | 68 | # ... callback to handle the firewall log 69 | # eg. 70 | logging.info(log.model_dump()) 71 | 72 | # integration example 73 | fw = FirewallOSS(model_provider=LLMModelProvider.AZURE_OPENAI) 74 | analysis = fw("...replace with users prompt...", my_callback) 75 | """ 76 | analysis["id"]: String 77 | A unique id to reference this assessment (later in the callback). In case with integration with the 78 | AIandMe platform, use this id to access the full log entry. 79 | analysis["status"]: Boolean 80 | If `True`, user prompt is legit, else it must be filtered 81 | analysis["fail_category"]: String 82 | Elaborating the result (mainly in case of failure or error). In case of log.result is `pass`, log.fail_category is also `pass`. If log.result is `fail`, then, log.fail_category can be one of (`off_topic`|`violation`|`restriction`). In case log.result is `error` of log.fail_category indicates the error category. 83 | """ 84 | if not analysis["status"]: 85 | # prompt is filtered -> act 86 | # ... 87 | ``` 88 | 89 | In order to deploy your own AIandMe Firwall some **mandatory** configurations must be done: 90 | 91 | **1. Environment Variables:** In this deployment option, you will be using your own LLM provider for the reflection mechanism. Currently, the AIandMe FirewallOSS lib supports integration with OpenAI and Azure OpenAI. Integrations with other providers are comming soon. Therefore, for: 92 | - 1.1 Azure OpenAI selection [**DEFAULT**]: 93 | ```python 94 | fw = FirewallOSS(model_provider=LLMModelProvider.AZURE_OPENAI) 95 | ``` 96 | you have to define the following environment variables: 97 | ```bash 98 | LLM_PROVIDER_ENDPOINT="...replace with the serverless endpoint for your Azure OpenAI deployemnt..." 99 | LLM_PROVIDER_API_VERSION="...replace with the serverless api version for your Azure OpenAI deployemnt..." 100 | LLM_PROVIDER_API_KEY="...replace with the serverless api key for your Azure OpenAI deployemnt..." 101 | LLM_PROVIDER_MODEL="...replace with the serverless model for your Azure OpenAI deployemnt..." 102 | ``` 103 | - 1.2 OpenAI selection: 104 | ```python 105 | fw = FirewallOSS(model_provider=LLMModelProvider.OPENAI) 106 | ``` 107 | you have to define the following environment variables: 108 | ```bash 109 | LLM_PROVIDER_API_KEY="...replace with the serverless api key for your Azure OpenAI deployemnt..." 110 | LLM_PROVIDER_MODEL="...replace with the serverless model for your Azure OpenAI deployemnt..." 111 | ``` 112 | If `LLM_PROVIDER_MODEL` is ommited, by default `gpt-4o` is used. 113 | 114 | **2. agent.json:** Define your current AI assistant. This is a json file with instructions for the `LLM-as-a-judge` concept. See more details in the **Project Files** section. 115 | 116 | ### 2. Integration with AIandMe platform 117 | Sign up for the free tier of the AIandMe platform to start storing your logs and leveraging powerful DevOps features. Create your account [here](https://www.aiandme.io), set up your project, and get the integration details provided in this example. For detailed setup instructions and additional resources, check out the AIandMe [documentation](https://doc.aiandme.io). In this case, you don't need to integrate with external LLM providers. 118 | 119 | ```python 120 | from os import getenv 121 | from aiandme import FirewallOSS 122 | 123 | 124 | # replace with the value from your project's integration page 125 | AIANDME_FIREWALL_ENDPOINT = getenv("AIANDME_FIREWALL_ENDPOINT") 126 | AIANDME_FIREWALL_APIKEY = getenv("AIANDME_FIREWALL_APIKEY") 127 | 128 | # init the firewall session 129 | frw = FirewallOSS(endpoint=AIANDME_FIREWALL_ENDPOINT, api_key=AIANDME_FIREWALL_APIKEY) 130 | 131 | # analyse your user's prompt 132 | analysis = frw.eval("...replace with users prompt...") 133 | if not analysis["pass"]: 134 | """User's response is not acceptable -> handle it 135 | analysis["explanation"] defines why the prompt is rejected. 136 | Possible values: 137 | `off_topic` : This means that the user's prompt is beyond the defined business scope. 138 | `violation` : This means that one of the permitted AI agent's business intents is violated. 139 | `restriction`: This means that one of the restricted AI agent's business intents is triggered. 140 | """ 141 | 142 | 143 | # add some code here to handle the inavlid user's prompt, e.g. generate a new response, 144 | # based on the analysis explanation 145 | # ... 146 | ``` 147 | 148 | ## LLM Reflection 149 | The AIandMe FirewallOSS lib implements a reflection mechanism to assess the user prompt. Variable `LLM_AS_A_JUDGE_REFLECTION_PROMPT` in _firewall.py_ file holds this reflection prompt. You may alter the prompt to deliver your own reflection mechanism. **However**, you must consider the asynchronous operation of the AIandMe FirewallOSS lib utilizing the streaming mechanism of the LLM providers. In that sense, you **MUST** respect the expected input and output format of the LLM assessment so as the lib functions properly and therefore, instruct the LLM in your own reflection prompt to deliver its response accordingly. 150 | 151 | ## Project Files (Self-hosting) 152 | A typical project using the AIandMe FirewallOSS lib has the following structure: 153 | 154 | ``` 155 | project 156 | │ main.py 157 | | agent.json 158 | │ .env 159 | ``` 160 | 161 | where, 162 | - **main.py:** Is your actual script. 163 | - **agent.json:** Definition of the AI agent that is being protected with the AIandMe FirewallOSS lib. More details bellow. 164 | - **.env:** Holds the project environment variables. Amongst others, it holds the appropriate env vars for LLM provider integration (as described in section **Examples** above). 165 | 166 | ### The _agent.json_ file 167 | This file holds the required information that governs the operation of the AI agent you wish to protect. It defines the basic business scope, instructions and restrictions of the app. You may put your own information in **free text in English**, BUT you must keep in mind the language has to be plain and as brief as possible to maintain low costs in tokens. The information of this file feeds the reflection prompt in the `LLM-as-a-judge` concept. 168 | 169 | 170 | The structure of the file is as follows: 171 | ```json 172 | { 173 | "overall_business_scope": "...brief description of the AI Agent's business scope...", 174 | "intents": { 175 | "permitted": [ 176 | "...list of permitted actions (intents to serve) by the AI Agent...", 177 | "..." 178 | ], 179 | "restricted": [ 180 | "...list of restricted actions (intents to block) by the AI Agent...", 181 | "..." 182 | ] 183 | } 184 | } 185 | ``` 186 | 187 | An example for an AI Agent intented to faciliated medical appointment booking: 188 | ```json 189 | { 190 | "overall_business_scope": "To assist users in managing medical appointments, navigating health insurance options, providing general health-related information within regulatory boundaries, and facilitating secure communication between patients and healthcare providers.", 191 | "intents": { 192 | "permitted": [ 193 | "explain how to assist the user", 194 | "schedule medical appointments", 195 | "reschedule or cancel medical appointments", 196 | "provide information on available healthcare services", 197 | "assist with health insurance policy inquiries", 198 | "offer general information on healthcare providers and facilities", 199 | "guide users on how to access emergency or urgent care services", 200 | "direct users to authoritative resources for medical or health-related questions" 201 | ], 202 | "restricted": [ 203 | "provide personalized medical advice or diagnosis", 204 | "submit or process personal medical data without explicit user consent and appropriate encryption", 205 | "evaluate, prescribe, or distribute prescription drugs or controlled substances", 206 | "interact with users in an unprofessional or inappropriate manner", 207 | "offer guidance or opinions outside the approved health and insurance domains", 208 | "store or retain user-sensitive health information without regulatory compliance" 209 | ] 210 | } 211 | } 212 | ``` 213 | 214 | ## Roadmap 215 | - [X] Integrate with OpenAI. 216 | - [X] Integrate with Azure OpenAI. 217 | - [X] Integration with AIandMe platform. 218 | - [X] Support of user defined judges (reflection prompts). 219 | - [ ] ContexLex: Adversarial few shots model. 220 | - [ ] Integrate with other LLM providers. 221 | 222 | ## Community 223 | Join the AIandMe community: 224 | - [Discord](https://discord.gg/VbVHRuPXE2) 225 | - [Meetup Page](https://www.meetup.com/ai-and-beers/) 226 | - [LinkedIn](https://www.linkedin.com/company/aiandme) 227 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "aiandme" 7 | version = "0.5.0" 8 | authors = [ 9 | { name="Kostas Siabanis", email="hello@aiandme.io" }, 10 | { name="Demetris Gerogiannis", email="hello@aiandme.io" }, 11 | ] 12 | description = "AIandMe open source contextual firewall and integrations." 13 | readme = "README.md" 14 | requires-python = ">=3.10" 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | "License :: OSI Approved :: MIT License", 18 | "Operating System :: OS Independent", 19 | ] 20 | 21 | dependencies = [ 22 | "openai==1.55.3", 23 | "pydantic==2.9.2", 24 | "pydantic_core==2.23.4", 25 | "requests==2.32.3", 26 | ] 27 | 28 | [project.urls] 29 | Homepage = "https://github.com/aiandme-io/firewall" 30 | Issues = "https://github.com/aiandme-io/firewall/issues" -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aiandme-io/firewall/6912f3c904d27918c4a48aa26767fa328fa620f1/src/.DS_Store -------------------------------------------------------------------------------- /src/aiandme/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aiandme-io/firewall/6912f3c904d27918c4a48aa26767fa328fa620f1/src/aiandme/.DS_Store -------------------------------------------------------------------------------- /src/aiandme/__init__.py: -------------------------------------------------------------------------------- 1 | from .firewalloss import ( 2 | FirewallOSS, 3 | LLMModelProvider, 4 | JudgmentState, 5 | Verdict, 6 | LLM_AS_A_JUDGE_REFLECTION_PROMPT, 7 | ) 8 | 9 | from .firewall import ( 10 | Firewall, 11 | AIANDME_Firewall_NotAuthorised, 12 | AIANDME_Firewall_CannotDecide, 13 | ) 14 | -------------------------------------------------------------------------------- /src/aiandme/firewall.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | from aiandme.schemas import ( 4 | Firewall as FirewallSchema, 5 | Integration as IntegrationSchema, 6 | ) 7 | 8 | 9 | class AIANDME_Firewall_NotAuthorised(Exception): 10 | def __init__(self, message): 11 | self.message = message 12 | super().__init__(self.message) 13 | 14 | 15 | class AIANDME_Firewall_CannotDecide(Exception): 16 | def __init__(self, message): 17 | self.message = message 18 | super().__init__(self.message) 19 | 20 | 21 | class Firewall: 22 | def __init__(self, integration: IntegrationSchema): 23 | self.integration = integration 24 | self.assess_req_pool = ( 25 | requests.Session() 26 | ) # initialize connection pool (to speed up the eval process) 27 | 28 | def __del__(self): 29 | self.assess_req_pool.close() # destroy the connection pool 30 | 31 | # 32 | # Prompt assessment 33 | # 34 | def eval(self, user_p: str) -> FirewallSchema: 35 | """ 36 | Assess a user prompt and define if complies with expected app intents and business context. 37 | Return: 38 | FirewallSchema 39 | id: str The id of the log created for this evaluation. 40 | status: bool True (pass) | False (should be blocked) 41 | fail_category: If status is True 42 | `pass` - The user prompt is legit. 43 | If status is False 44 | `off_topic` - The user prompt is off topic. 45 | `violation` - The user prompt violated the permitted intents. 46 | `restriction` - The user prompt triggered a restricted intents. 47 | """ 48 | # api call 49 | resp = self.assess_req_pool.post( 50 | self.integration.endpoint, 51 | headers={ 52 | "Content-Type": "application/json", 53 | "X-Api-Key": self.integration.api_key, 54 | }, 55 | json={"messages": [{"role": "user", "content": user_p}]}, 56 | ) 57 | 58 | # handle response 59 | if resp.status_code == 200: 60 | # successfull evaluation [PROMPTS IS ACCEPTED] -> return response 61 | verdict = resp.json() 62 | return FirewallSchema( 63 | id=verdict["id"], status=True, fail_category=verdict["explanation"] 64 | ) 65 | elif resp.status_code == 406: 66 | # successfull evaluation [PROMPTS IS NOT ACCEPTED] -> return response 67 | verdict = resp.json() 68 | return FirewallSchema( 69 | id=verdict["id"], status=False, fail_category=verdict["explanation"] 70 | ) 71 | elif resp.status_code == 418: 72 | # LLM Judge could not deliver a verdict 73 | raise AIANDME_Firewall_CannotDecide(resp.text) 74 | elif ( 75 | resp.status_code == 404 76 | or resp.status_code == 403 77 | or resp.status_code == 401 78 | ): 79 | raise AIANDME_Firewall_NotAuthorised(resp.text) 80 | -------------------------------------------------------------------------------- /src/aiandme/firewalloss.py: -------------------------------------------------------------------------------- 1 | import re, uuid, time, threading, json, requests 2 | from enum import Enum 3 | 4 | from aiandme.schemas import ( 5 | Logs as LogsSchema, 6 | Firewall as FirewallSchema, 7 | Agent as AgentSchema, 8 | Integration as IntegrationSchema, 9 | ) 10 | 11 | LLM_FIREWALL_MAX_OUT_TOKENS = 2024 12 | LLM_FIREWALL_TEMPERATURE = 0 13 | 14 | ANALYSIS_STATE_INDEX = 0 15 | ANALYSIS_DATA_INDEX = 1 16 | 17 | FIREWALL_TIMEOUT_IN_SEC = 5 18 | 19 | # Reflection prompt (LLM-as-a-judge) 20 | LLM_AS_A_JUDGE_REFLECTION_PROMPT = """As a senior linguist, evaluate whether the user’s input complies with expected guidelines. The AI Agent might operate in a step-by-step process to fulfill its business scope, with each step prompting specific information from the user. Users may either submit a full request or provide relevant information as part of these step-by-step interactions. Follow the evaluation instructions precisely, and respond concisely and directly. 21 | 22 | ###Overall AI Agent's Business Scope:### 23 | 24 | 25 | 26 | 27 | ###AI Agent's Permitted Actions:### 28 | 29 | 30 | ###AI Agent's Restricted Actions:### 31 | 32 | 33 | ###EVALUATION INSTRUCTIONS:### 34 | Your evaluation must determine if a violation exists by following these steps in sequence. If a violation is detected, stop immediately and do not proceed further. 35 | 36 | 1) First check if sufficient context is provided to clearly assess relevance with the AI Agent's business scope and guidelines; otherwise proceed with the default case. 37 | 38 | 2) Then, identify whether the user’s input is an explicit request (CASE 1) or a simple data input (CASE 2), and follow the steps corresponding to the identified case. 39 | 40 | If no violation is detected, proceed directly to the default case and stop any further assessment. 41 | 42 | **CASE 1: An explicit request, is identified.** 43 | *STEP 1.1, Assess Scope Alignment:* Evaluate whether the the user's request, in whole or in part, deviates from the AI Agent's defined business scope. 44 | If a violation is detected, reply: `A {explanation}.` where `{explanation}` briefly elaborates how the user's request triggered the violation. 45 | 46 | *STEP 1.2, Assess Alignment with Permitted Actions:* Extract a detailed list of implied intents in the user's request and assess if all the intents, in whole or in part, aligns with the behaviors or instructions outlined in the AI Agent’s Permitted Intents List. 47 | If a violation is detected, reply: `B {explanation}.` where `{explanation}` briefly elaborates how the identified user intent that triggered the violation. 48 | 49 | *STEP 1.3, Identify Restricted Actions:* Extract a detailed list of implied intents in the user's request and assess if at least one of the examining intent, in whole or in part, could trigger any actions in the AI Agent’s response that are described in the Restricted Intents List. 50 | If a violation is detected, reply: `C {explanation}.` where `{explanation}` briefly elaborates how the identified user intent that triggered the violation. 51 | 52 | **CASE 2: User simply provided some information.** 53 | Assume NO violation. Reply: `P {explanation}.` where `{explanation}` briefly elaborates how the user's request aligns with the AI Agent's business scope and the permitted actions. 54 | 55 | **DEFAULT CASE:** 56 | If no violation is detected, or insufficient context is provided to complete the evaluation, reply: `P {explanation}.` where `{explanation}` briefly elaborates how the user's request aligns with the AI Agent's business scope and the permitted actions. 57 | 58 | ###USER INPUT TO ANALYSE:### 59 | """ 60 | 61 | DEFAULT_AGENT_CONFIG_FILE = "agent.json" # AI Agent definition 62 | 63 | 64 | class JudgmentState(Enum): 65 | DECIDE = 0 66 | EXPLAIN = 1 67 | CANNOT_DECIDE = -1 68 | ERROR = -2 69 | 70 | 71 | class Verdict(Enum): 72 | PASS = 0 73 | OFF_TOPIC = 1 74 | VIOLATION = 2 75 | RESTRICTION = 3 76 | 77 | 78 | class LLMModelProvider(Enum): 79 | AZURE_OPENAI = "azureopenai" 80 | OPENAI = "openai" 81 | 82 | 83 | """ 84 | A utility class to implement the LLM as a Judge concept. 85 | 86 | """ 87 | 88 | 89 | class __JudgeOS: 90 | def __init__(self, model_provider: LLMModelProvider): 91 | if model_provider == LLMModelProvider.AZURE_OPENAI: 92 | from aiandme.model_providers import AzureOpenai_LLMStreamer as LLMStreamer 93 | elif model_provider == LLMModelProvider.OPENAI: 94 | from aiandme.model_providers import Openai_LLMStreamer as LLMStreamer 95 | 96 | self.llm_streamer = LLMStreamer() 97 | self.__BASIC_TEST_TMPL = LLM_AS_A_JUDGE_REFLECTION_PROMPT 98 | 99 | def __attach_more_info(self, gen_template: str, more_info: str = "") -> str: 100 | more_info = more_info.strip() 101 | if more_info != "": 102 | more_info = f"###More Info about the AI Agent:###\n {more_info}" 103 | return gen_template.replace("", more_info) 104 | 105 | def generate_system_prompt(self, agent: AgentSchema) -> str: 106 | reflection_prompt = ( 107 | self.__BASIC_TEST_TMPL.replace( 108 | "", agent.overall_business_scope 109 | ) 110 | .replace( 111 | "", 112 | (" - " + "\n - ".join(agent.intents.permitted)), 113 | ) 114 | .replace( 115 | "", 116 | (" - " + "\n - ".join(agent.intents.restricted)), 117 | ) 118 | ) 119 | reflection_prompt = self.__attach_more_info(reflection_prompt, agent.more_info) 120 | 121 | return reflection_prompt 122 | 123 | def return_verdict(self, system_p: str, user_p: str): 124 | try: 125 | # init 126 | (cur_state, explanation) = (JudgmentState.DECIDE, "") 127 | 128 | # make request and get the stream chunks 129 | resp = self.llm_streamer.ping( 130 | system_p, 131 | user_p, 132 | LLM_FIREWALL_MAX_OUT_TOKENS, 133 | LLM_FIREWALL_TEMPERATURE, 134 | ) 135 | 136 | # start parsing the stream (chunks) 137 | for chunk in resp: 138 | # skip empty chunks (e.g. initial replies of filters applied by the model provider) 139 | if len(chunk.choices) == 0: 140 | continue 141 | 142 | # consume chunks with other (non response) info 143 | if not chunk.choices[0].delta.content: 144 | continue 145 | 146 | if cur_state == JudgmentState.DECIDE: 147 | # IN DECISION STATE: 148 | # return the eval result (content starts with `PASS` or `FAIL` -> PASS/FAIL, OTHERWISE, cannot decide) 149 | # and then, continue with the explanation instruction 150 | 151 | # remove trailing prefixes, e.g. ` ' " 152 | # remove all leading digits (trailing at the beginning) from the string and then any . or ) and then any space 153 | # why? in case the reponse is in the format: 1. .... or 1) .... => remove listing numbers 154 | # finaly, get the actual prompt 155 | p = ( 156 | re.sub( 157 | r"^\d+", 158 | "", 159 | chunk.choices[0].delta.content.strip(" -`'\n\""), 160 | ) 161 | .lstrip(".") 162 | .lstrip(")") 163 | .strip() 164 | ) 165 | 166 | # detect the first alpha char in content if no alpha, return None (assume now that some cleaning is completed) 167 | first_detected_alpha = next( 168 | (char for char in p if char.isalpha()), None 169 | ) 170 | 171 | # return result 172 | if first_detected_alpha is None: 173 | # if no alphanumeric character is detected then, error in response cannot decide 174 | cur_state = JudgmentState.CANNOT_DECIDE 175 | explanation = f"Unexpected LLM response [chunk: {chunk.choices[0].delta.content}]." 176 | break 177 | else: 178 | # analysis result detected 179 | first_detected_alpha = first_detected_alpha.upper() 180 | if first_detected_alpha == "P": 181 | yield (JudgmentState.DECIDE, Verdict.PASS) 182 | elif first_detected_alpha == "A": 183 | yield (JudgmentState.DECIDE, Verdict.OFF_TOPIC) 184 | elif first_detected_alpha == "B": 185 | yield (JudgmentState.DECIDE, Verdict.VIOLATION) 186 | elif first_detected_alpha == "C": 187 | yield (JudgmentState.DECIDE, Verdict.RESTRICTION) 188 | else: 189 | cur_state = JudgmentState.CANNOT_DECIDE 190 | explanation = f"Unexpected LLM response [chunk: {chunk.choices[0].delta.content}]." 191 | break 192 | 193 | # update state (move to explanation extraction) 194 | cur_state = JudgmentState.EXPLAIN 195 | else: 196 | # IN EXPLAIN STATE: 197 | # Concat the stream to get the explanation and return 198 | explanation = f"{explanation}{chunk.choices[0].delta.content}" 199 | 200 | # analysis completed (streaming) -> return the result 201 | yield (cur_state, explanation.strip()) 202 | except Exception as e: 203 | err = str(e) 204 | if err.startswith("Error code: 400"): 205 | # handle Azure content filtering (400 error) 206 | yield (JudgmentState.DECIDE, Verdict.OFF_TOPIC) 207 | yield (JudgmentState.EXPLAIN, "Inappropriate content. Filtered out.") 208 | else: 209 | yield (JudgmentState.ERROR, str(e)) 210 | 211 | 212 | class FirewallOSS(__JudgeOS): 213 | 214 | def __init__( 215 | self, 216 | model_provider: LLMModelProvider = LLMModelProvider.AZURE_OPENAI, 217 | agent_file: str = DEFAULT_AGENT_CONFIG_FILE, 218 | ): 219 | super().__init__(model_provider) 220 | with open(agent_file, "r") as fp: 221 | agent = AgentSchema(**json.load(fp)) 222 | self.system_p = self.generate_system_prompt(agent) 223 | 224 | def __sync_with_platform(self, integ: IntegrationSchema, data: LogsSchema): 225 | r = requests.post( 226 | f"{integ.endpoint}/logs", 227 | headers={"x-api-key": integ.api_key}, 228 | json=data.model_dump(), 229 | ) 230 | if r.status_code != 200: 231 | raise Exception( 232 | f"AIandMe - Sync with platform error [{r.status_code}/{r.text}]" 233 | ) 234 | return r.json() 235 | 236 | def __do_analysis_in_background(self, result: list, id: str, user_p: str, cb: any): 237 | analysis = self.return_verdict(self.system_p, user_p) 238 | for chunk in analysis: 239 | # Pass/Fail (LLM Verdict) 240 | if chunk[ANALYSIS_STATE_INDEX] == JudgmentState.DECIDE: 241 | (res, status) = ( 242 | ("pass", True) 243 | if chunk[ANALYSIS_DATA_INDEX] == Verdict.PASS 244 | else ("fail", False) 245 | ) 246 | fail_category = chunk[ANALYSIS_DATA_INDEX].name.lower() 247 | result.append([status, fail_category]) 248 | continue 249 | 250 | # Cannot decide 251 | if chunk[ANALYSIS_STATE_INDEX] == JudgmentState.CANNOT_DECIDE: 252 | res = "error" 253 | fail_category = "418" 254 | result.append([False, "error"]) 255 | continue 256 | 257 | # Error 258 | if chunk[ANALYSIS_STATE_INDEX] == JudgmentState.ERROR: 259 | res = "error" 260 | fail_category = "500" 261 | result.append([False, "error"]) 262 | continue 263 | 264 | time.sleep(0.1) # Giving it some time to know if we timed out 265 | 266 | if cb and len(result): # if result is empty -> timedout 267 | log = LogsSchema( 268 | id=id, 269 | prompt=user_p, 270 | result=res, 271 | fail_category=fail_category, 272 | explanation=chunk[1], 273 | ) 274 | ( 275 | self.__sync_with_platform(cb, log) 276 | if isinstance(cb, IntegrationSchema) 277 | else cb(log) 278 | ) 279 | 280 | def __call__( 281 | self, 282 | user_p: str, 283 | cb: any = None, 284 | timeout: float = FIREWALL_TIMEOUT_IN_SEC, 285 | ) -> FirewallSchema | None: 286 | return self.filter(user_p, cb, timeout) 287 | 288 | def filter( 289 | self, 290 | user_p: str, 291 | cb: any = None, 292 | timeout: float = FIREWALL_TIMEOUT_IN_SEC, 293 | ) -> FirewallSchema | None: 294 | analysysis = [] 295 | id = str(uuid.uuid4()) 296 | threading.Thread( 297 | target=self.__do_analysis_in_background, 298 | args=(analysysis, id, user_p, cb), 299 | ).start() 300 | 301 | # While as a sleep until we get the response from llm 302 | timeout = time.time() + FIREWALL_TIMEOUT_IN_SEC 303 | while not len(analysysis) and time.time() < timeout: 304 | pass 305 | 306 | # Responses 307 | if not len(analysysis): 308 | return None 309 | 310 | return FirewallSchema( 311 | status=analysysis[0][ANALYSIS_STATE_INDEX], 312 | id=id, 313 | fail_category=analysysis[0][ANALYSIS_DATA_INDEX], 314 | ) 315 | -------------------------------------------------------------------------------- /src/aiandme/model_providers/__init__.py: -------------------------------------------------------------------------------- 1 | from .azureopenai import LLMStreamer as AzureOpenai_LLMStreamer 2 | from .openai import LLMStreamer as Openai_LLMStreamer 3 | -------------------------------------------------------------------------------- /src/aiandme/model_providers/azureopenai.py: -------------------------------------------------------------------------------- 1 | from os import getenv 2 | from openai import AzureOpenAI 3 | 4 | # 5 | # Handle communications with LLM 6 | # 7 | ALLOWED_MAX_OUT_TOKENS = 4096 # max allowed tokens 8 | DEFAULT_MAX_OUT_TOKENS = 512 # max numbers of tokens each LLM ping can deliver 9 | MAX_RETRY_COUNTER = 5 # how many times to retry an API call before returning error 10 | LLM_PING_TIMEOUT = 30 # llm completion api request timeout (sec) 11 | 12 | DEFAULT_TEMPERATURE = 0 # default temperature for LLM completion 13 | 14 | LLM_PROVIDER_MODEL = getenv("LLM_PROVIDER_MODEL", "gpt4-o") 15 | 16 | 17 | class LLMStreamer: 18 | def __init__(self): 19 | self.__azure_ai_client = AzureOpenAI( 20 | api_key=getenv("LLM_PROVIDER_API_KEY"), 21 | api_version=getenv("LLM_PROVIDER_API_VERSION"), 22 | azure_endpoint=getenv("LLM_PROVIDER_ENDPOINT"), 23 | ) 24 | 25 | def ping(self, system_p, user_p, max_tokens, temperature): 26 | max_tokens = min(max_tokens, ALLOWED_MAX_OUT_TOKENS) 27 | return self.__azure_ai_client.chat.completions.create( 28 | model=getenv("LLM_PROVIDER_MODEL"), 29 | messages=[ 30 | {"role": "system", "content": system_p}, 31 | {"role": "user", "content": user_p}, 32 | ], 33 | max_tokens=max_tokens, 34 | temperature=temperature, 35 | timeout=LLM_PING_TIMEOUT, 36 | stream=True, 37 | ) 38 | -------------------------------------------------------------------------------- /src/aiandme/model_providers/openai.py: -------------------------------------------------------------------------------- 1 | from os import getenv 2 | from openai import OpenAI 3 | 4 | # 5 | # Handle communications with LLM 6 | # 7 | ALLOWED_MAX_OUT_TOKENS = 4096 # max allowed tokens 8 | DEFAULT_MAX_OUT_TOKENS = 512 # max numbers of tokens each LLM ping can deliver 9 | MAX_RETRY_COUNTER = 5 # how many times to retry an API call before returning error 10 | LLM_PING_TIMEOUT = 30 # llm completion api request timeout (sec) 11 | 12 | DEFAULT_TEMPERATURE = 0 # default temperature for LLM completion 13 | 14 | LLM_PROVIDER_MODEL = getenv("LLM_PROVIDER_MODEL", "gpt4-o") 15 | 16 | 17 | class LLMStreamer: 18 | def __init__(self): 19 | self.__openai_client = OpenAI( 20 | api_key=getenv("LLM_PROVIDER_API_KEY") 21 | ) 22 | 23 | def ping(self, system_p, user_p, max_tokens, temperature): 24 | max_tokens = min(max_tokens, ALLOWED_MAX_OUT_TOKENS) 25 | return self.__openai_client.chat.completions.create( 26 | model=getenv("LLM_PROVIDER_MODEL"), 27 | messages=[ 28 | {"role": "system", "content": system_p}, 29 | {"role": "user", "content": user_p}, 30 | ], 31 | max_tokens=max_tokens, 32 | temperature=temperature, 33 | timeout=LLM_PING_TIMEOUT, 34 | stream=True, 35 | ) 36 | -------------------------------------------------------------------------------- /src/aiandme/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | from .logs import Logs 2 | from .firewall import Firewall, Integration 3 | from .agent import Intents, Agent 4 | -------------------------------------------------------------------------------- /src/aiandme/schemas/agent.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | from pydantic import BaseModel 3 | 4 | 5 | class Intents(BaseModel): 6 | permitted: List[str] 7 | restricted: Optional[List[str]] = [] 8 | 9 | 10 | class Agent(BaseModel): 11 | overall_business_scope: str 12 | intents: Intents 13 | more_info: Optional[str] = "" 14 | -------------------------------------------------------------------------------- /src/aiandme/schemas/firewall.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from pydantic import BaseModel 3 | 4 | 5 | class Integration(BaseModel): 6 | endpoint: str 7 | api_key: str 8 | 9 | 10 | class Firewall(BaseModel): 11 | id: str 12 | status: bool 13 | fail_category: Optional[str] = "" 14 | -------------------------------------------------------------------------------- /src/aiandme/schemas/logs.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from typing import Optional 3 | from pydantic import BaseModel 4 | 5 | 6 | class Logs(BaseModel): 7 | id: Optional[str] = str(uuid.uuid4()) 8 | prompt: str 9 | result: str 10 | explanation: Optional[str] = "" 11 | fail_category: Optional[str] = "" 12 | --------------------------------------------------------------------------------