├── .DS_Store
├── .gitignore
├── Contributing.md
├── LICENSE
├── Readme.md
├── pyproject.toml
└── src
    ├── .DS_Store
    └── aiandme
        ├── .DS_Store
        ├── __init__.py
        ├── firewall.py
        ├── firewalloss.py
        ├── model_providers
            ├── __init__.py
            ├── azureopenai.py
            └── openai.py
        └── schemas
            ├── __init__.py
            ├── agent.py
            ├── firewall.py
            └── logs.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aiandme-io/firewall/6912f3c904d27918c4a48aa26767fa328fa620f1/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 
170 | # PyPI configuration file
171 | .pypirc
172 | 


--------------------------------------------------------------------------------
/Contributing.md:
--------------------------------------------------------------------------------
 1 | Contributing to **AIandMe Firewall**
 2 | 
 3 | Thank you for considering contributing to the **AIandMe Firewall** ! Contributions of all kinds are welcome, including bug reports, feature suggestions, documentation updates, and code contributions.
 4 | 
 5 | ## How to Contribute
 6 | 1. Fork the repository and create a new branch for your changes.
 7 | 2. Make your changes in the branch, following any existing coding guidelines.
 8 | 3. Test your changes to ensure everything works as expected.
 9 | 4. Open a pull request with a clear description of your changes and why they are needed.
10 | 
11 | ## Reporting Issues
12 | If you encounter any bugs or have feature requests, please open an issue in the repository. Be sure to include:
13 | 1. A detailed description of the problem.
14 | 2. Steps to reproduce, if applicable.
15 | 3. Any relevant logs or screenshots.
16 | 4. Code of Conduct
17 | 
18 | Please be respectful and professional in all interactions. Refer to our [page][https://www.aiandme.io] for more details.
19 | 
20 | We appreciate your contributions! 🚀


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 AIandMe
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
  1 | # AIandMe Firewall and FirewallOSS
  2 | The AIandMe FirewallOSS open-source library leverages the `LLM-as-a-judge` concept to implement a robust contextual firewall for LLM-based applications. It helps safeguard your AI systems from unintended prompts such as jailbreaking attempts, malicious inputs, and other security threats.
  3 | 
  4 | The AIandMe Firewall is the wrapper library to interact with your AIandMe projects. Build a project within your AIandMe account and use the AIandMe Firewall to integrate directly.  Visit the AIandMe [documentation](https://doc.aiandme.io) for more details and examples for integrating the AIandMe Firewall.
  5 | 
  6 | ## Disclaimer
  7 | The AIandMe FirewallOSS library relies on LLM technology and, as a result, **cannot guarantee 100% protection** due to the inherent stochastic and probabilistic nature of LLMs. Users are advised to consider this limitation and incorporate additional safeguards to address potential vulnerabilities in compliance with legal and security standards.
  8 | 
  9 | 
 10 | ## How it works
 11 | The AIandMe FirewallOSS acts as a middleware layer that contextually filters and validates user prompts. This ensures that the AI agent adheres to its intended business scope and operational boundaries. Via a reflection approach an LLM is acting as a judge (`LLM-as-a-judge` concept) and assesses if the analysing user prompts adheres with 3 basic conditions:
 12 | - **Scope Validation**: Ensures user prompts align with the AI agent's defined business scope - `OFF_TOPIC`.
 13 | - **Intent Filtering**: Allows only prompts that match a predefined list of allowed intents - `VIOLATION`.
 14 | - **Restricted Action Blocking**: Blocks prompts that attempt to trigger restricted actions - `RESTRICTION`.
 15 | 
 16 | Keep in mind that the AIandMe FirewallOSS library **does not function as a proxy**. Instead, it analyzes user prompts and provides a flag indicating potential issues (`off_topic`, `violation`, `restriction`). It is the responsibility of the LLM application developer to determine how to handle flagged prompts based on their specific requirements and use case.
 17 | 
 18 | To ensure low latency, the AIandMe FirewallOSS library operates in two asynchronous steps, leveraging the streaming capabilities of LLM providers.
 19 | 
 20 | - Initial Assessment: The library quickly delivers a decision regarding the three categories: `off_topic`, `violation`, or `restriction`.
 21 | - Explanation: In the second step, it completes the LLM-as-a-judge assessment by providing a detailed explanation of the verdict.
 22 | 
 23 | This two-step approach allows for efficient real-time decision-making without compromising on the depth of analysis.
 24 | 
 25 | ## Installation
 26 | Install using pip:
 27 | 
 28 | ```bash
 29 | pip install aiandme
 30 | 
 31 | ```
 32 | 
 33 | ## Dependencies
 34 | The AIandMe FirewallOSS lib relies on the `Pydantic` lib for data validation (schemas).
 35 | 
 36 | ## Examples
 37 | 
 38 | Find bellow some examples of how to use the AIandMe FirewallOSS lib for Self-hosting (example 1), or rely on your free tier AIandMe account (example 2).
 39 | 
 40 | ### 1. User defined callbacks
 41 | You can set up your own callback activity to handle the assesment of the AIandMe FirewallOSS. Find below an example of an integration that analyses a user prompt and registers a callback functio to log the result.
 42 | 
 43 | ```python 
 44 | from aiandme import FirewallOSS, LLMModelProvider
 45 | from aiandme.schemas import Logs as LogsSchema
 46 | import logging
 47 | 
 48 | def my_callback(log: LogsSchema):
 49 |     """
 50 |     `log` is a Pydantic Model that holds the assessment of the LLM refletcion.
 51 |     log.id: String
 52 |         A unique ID to identify this assessment.
 53 |     log.prompt: String
 54 |         The analysed user prompt.
 55 |     log.result: String
 56 |         The assessment result. Options:
 57 |         - pass
 58 |         - fail
 59 |         - error
 60 |     log.fail_category: String
 61 |         Elaborating the result (mainly in case of failure or error). In case of log.result is `pass`, log.fail_category is also `pass`. If log.result is `fail`, then, log.fail_category can be one of (`off_topic`|`violation`|`restriction`). In case log.result is `error` of log.fail_category indicates the error category.
 62 |     log.explanation: String
 63 |         Reasoning of the evaluation result. In case  log.result is `error`, log.explanation is a short descriotion of the error (exception message).
 64 |     
 65 |     No returning value.
 66 |     """
 67 | 
 68 |     # ... callback to handle the firewall log
 69 |     # eg.
 70 |     logging.info(log.model_dump())
 71 | 
 72 | # integration example
 73 | fw = FirewallOSS(model_provider=LLMModelProvider.AZURE_OPENAI)
 74 | analysis = fw("...replace with users prompt...", my_callback)
 75 | """
 76 |   analysis["id"]: String
 77 |     A unique id to reference this assessment (later in the callback). In case with integration with the
 78 |     AIandMe platform, use this id to access the full log entry.
 79 |   analysis["status"]: Boolean
 80 |     If `True`, user prompt is legit, else it must be filtered
 81 |   analysis["fail_category"]: String
 82 |     Elaborating the result (mainly in case of failure or error). In case of log.result is `pass`, log.fail_category is also `pass`. If log.result is `fail`, then, log.fail_category can be one of (`off_topic`|`violation`|`restriction`). In case log.result is `error` of log.fail_category indicates the error category.
 83 | """
 84 | if not analysis["status"]:
 85 |   # prompt is filtered -> act
 86 |   # ...
 87 | ```
 88 | 
 89 | In order to deploy your own AIandMe Firwall some **mandatory** configurations must be done:
 90 | 
 91 | **1. Environment Variables:** In this deployment option, you will be using your own LLM provider for the reflection mechanism. Currently, the AIandMe FirewallOSS lib supports integration with OpenAI and Azure OpenAI. Integrations with other providers are comming soon. Therefore, for:
 92 | - 1.1 Azure OpenAI selection [**DEFAULT**]:
 93 | ```python
 94 |     fw = FirewallOSS(model_provider=LLMModelProvider.AZURE_OPENAI)
 95 | ```
 96 | you have to define the following environment variables:
 97 | ```bash
 98 | LLM_PROVIDER_ENDPOINT="...replace with the serverless endpoint for your Azure OpenAI deployemnt..."
 99 | LLM_PROVIDER_API_VERSION="...replace with the serverless api version for your Azure OpenAI deployemnt..."
100 | LLM_PROVIDER_API_KEY="...replace with the serverless api key for your Azure OpenAI deployemnt..."
101 | LLM_PROVIDER_MODEL="...replace with the serverless model for your Azure OpenAI deployemnt..."
102 | ```
103 | - 1.2 OpenAI selection:
104 | ```python
105 |     fw = FirewallOSS(model_provider=LLMModelProvider.OPENAI)
106 | ```
107 | you have to define the following environment variables:
108 | ```bash
109 | LLM_PROVIDER_API_KEY="...replace with the serverless api key for your Azure OpenAI deployemnt..."
110 | LLM_PROVIDER_MODEL="...replace with the serverless model for your Azure OpenAI deployemnt..."
111 | ```
112 | If `LLM_PROVIDER_MODEL` is ommited, by default `gpt-4o` is used.
113 | 
114 | **2. agent.json:** Define your current AI assistant. This is a json file with instructions for the `LLM-as-a-judge` concept. See more details in the **Project Files** section.
115 | 
116 | ### 2. Integration with AIandMe platform
117 | Sign up for the free tier of the AIandMe platform to start storing your logs and leveraging powerful DevOps features. Create your account [here](https://www.aiandme.io), set up your project, and get the integration details provided in this example. For detailed setup instructions and additional resources, check out the AIandMe [documentation](https://doc.aiandme.io). In this case, you don't need to integrate with external LLM providers.
118 | 
119 | ```python 
120 | from os import getenv
121 | from aiandme import FirewallOSS
122 | 
123 | 
124 | # replace with the value from your project's integration page
125 | AIANDME_FIREWALL_ENDPOINT = getenv("AIANDME_FIREWALL_ENDPOINT")
126 | AIANDME_FIREWALL_APIKEY = getenv("AIANDME_FIREWALL_APIKEY")
127 | 
128 | # init the firewall session
129 | frw = FirewallOSS(endpoint=AIANDME_FIREWALL_ENDPOINT, api_key=AIANDME_FIREWALL_APIKEY)
130 | 
131 | # analyse your user's prompt
132 | analysis = frw.eval("...replace with users prompt...")
133 | if not analysis["pass"]:
134 |     """User's response is not acceptable -> handle it
135 |     analysis["explanation"] defines why the prompt is rejected.
136 |     Possible values:
137 |       `off_topic`  : This means that the user's prompt is beyond the defined business scope.
138 |       `violation`  : This means that one of the permitted AI agent's business intents is violated.
139 |       `restriction`: This means that one of the restricted AI agent's business intents is triggered.
140 |     """
141 | 
142 | 
143 |     # add some code here to handle the inavlid user's prompt, e.g. generate a new response,
144 |     # based on the analysis explanation
145 |     # ...
146 | ```
147 | 
148 | ## LLM Reflection
149 | The AIandMe FirewallOSS lib implements a reflection mechanism to assess the user prompt. Variable `LLM_AS_A_JUDGE_REFLECTION_PROMPT` in _firewall.py_ file holds this reflection prompt. You may alter the prompt to deliver your own reflection mechanism. **However**, you must consider the asynchronous operation of the AIandMe FirewallOSS lib utilizing the streaming mechanism of the LLM providers. In that sense, you **MUST** respect the expected input and output format of the LLM assessment so as the lib functions properly and therefore, instruct the LLM in your own reflection prompt to deliver its response accordingly.
150 | 
151 | ## Project Files (Self-hosting)
152 | A typical project using the AIandMe FirewallOSS lib has the following structure:
153 | 
154 | ```
155 | project
156 | │   main.py
157 | |   agent.json
158 | │   .env
159 | ```
160 | 
161 | where,
162 | - **main.py:** Is your actual script.
163 | - **agent.json:** Definition of the AI agent that is being protected with the AIandMe FirewallOSS lib. More details bellow.
164 | - **.env:** Holds the project environment variables. Amongst others, it holds the appropriate env vars for LLM provider integration (as described in section **Examples** above).
165 | 
166 | ### The _agent.json_ file
167 | This file holds the required information that governs the operation of the AI agent you wish to protect. It defines the basic business scope, instructions and restrictions of the app. You may put your own information in **free text in English**, BUT you must keep in mind the language has to be plain and as brief as possible to maintain low costs in tokens. The information of this file feeds the reflection prompt in the `LLM-as-a-judge` concept.
168 | 
169 | 
170 | The structure of the file is as follows:
171 | ```json
172 | {
173 |   "overall_business_scope": "...brief description of the AI Agent's business scope...",
174 |   "intents": {
175 |     "permitted": [
176 |       "...list of permitted actions (intents to serve) by the AI Agent...",
177 |       "..."
178 |     ],
179 |     "restricted": [
180 |       "...list of restricted actions (intents to block) by the AI Agent...",
181 |       "..."
182 |     ]
183 |   }
184 | }
185 | ```
186 | 
187 | An example for an AI Agent intented to faciliated medical appointment booking:
188 | ```json
189 | {
190 |   "overall_business_scope": "To assist users in managing medical appointments, navigating health insurance options, providing general health-related information within regulatory boundaries, and facilitating secure communication between patients and healthcare providers.",
191 |   "intents": {
192 |     "permitted": [
193 |       "explain how to assist the user",
194 |       "schedule medical appointments",
195 |       "reschedule or cancel medical appointments",
196 |       "provide information on available healthcare services",
197 |       "assist with health insurance policy inquiries",
198 |       "offer general information on healthcare providers and facilities",
199 |       "guide users on how to access emergency or urgent care services",
200 |       "direct users to authoritative resources for medical or health-related questions"
201 |     ],
202 |     "restricted": [
203 |       "provide personalized medical advice or diagnosis",
204 |       "submit or process personal medical data without explicit user consent and appropriate encryption",
205 |       "evaluate, prescribe, or distribute prescription drugs or controlled substances",
206 |       "interact with users in an unprofessional or inappropriate manner",
207 |       "offer guidance or opinions outside the approved health and insurance domains",
208 |       "store or retain user-sensitive health information without regulatory compliance"
209 |     ]
210 |   }
211 | }
212 | ```
213 | 
214 | ## Roadmap
215 | - [X] Integrate with OpenAI.
216 | - [X] Integrate with Azure OpenAI.
217 | - [X] Integration with AIandMe platform.
218 | - [X] Support of user defined judges (reflection prompts).
219 | - [ ] ContexLex: Adversarial few shots model.
220 | - [ ] Integrate with other LLM providers.
221 | 
222 | ## Community
223 | Join the AIandMe community:
224 | - [Discord](https://discord.gg/VbVHRuPXE2)
225 | - [Meetup Page](https://www.meetup.com/ai-and-beers/)
226 | - [LinkedIn](https://www.linkedin.com/company/aiandme)
227 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "aiandme"
 7 | version = "0.5.0"
 8 | authors = [
 9 |   { name="Kostas Siabanis", email="hello@aiandme.io" },
10 |   { name="Demetris Gerogiannis", email="hello@aiandme.io" },
11 | ]
12 | description = "AIandMe open source contextual firewall and integrations."
13 | readme = "README.md"
14 | requires-python = ">=3.10"
15 | classifiers = [
16 |     "Programming Language :: Python :: 3",
17 |     "License :: OSI Approved :: MIT License",
18 |     "Operating System :: OS Independent",
19 | ]
20 | 
21 | dependencies = [
22 |     "openai==1.55.3",
23 |     "pydantic==2.9.2",
24 |     "pydantic_core==2.23.4",
25 |     "requests==2.32.3",
26 | ]
27 | 
28 | [project.urls]
29 | Homepage = "https://github.com/aiandme-io/firewall"
30 | Issues = "https://github.com/aiandme-io/firewall/issues"


--------------------------------------------------------------------------------
/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aiandme-io/firewall/6912f3c904d27918c4a48aa26767fa328fa620f1/src/.DS_Store


--------------------------------------------------------------------------------
/src/aiandme/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aiandme-io/firewall/6912f3c904d27918c4a48aa26767fa328fa620f1/src/aiandme/.DS_Store


--------------------------------------------------------------------------------
/src/aiandme/__init__.py:
--------------------------------------------------------------------------------
 1 | from .firewalloss import (
 2 |     FirewallOSS,
 3 |     LLMModelProvider,
 4 |     JudgmentState,
 5 |     Verdict,
 6 |     LLM_AS_A_JUDGE_REFLECTION_PROMPT,
 7 | )
 8 | 
 9 | from .firewall import (
10 |     Firewall,
11 |     AIANDME_Firewall_NotAuthorised,
12 |     AIANDME_Firewall_CannotDecide,
13 | )
14 | 


--------------------------------------------------------------------------------
/src/aiandme/firewall.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | from aiandme.schemas import (
 4 |     Firewall as FirewallSchema,
 5 |     Integration as IntegrationSchema,
 6 | )
 7 | 
 8 | 
 9 | class AIANDME_Firewall_NotAuthorised(Exception):
10 |     def __init__(self, message):
11 |         self.message = message
12 |         super().__init__(self.message)
13 | 
14 | 
15 | class AIANDME_Firewall_CannotDecide(Exception):
16 |     def __init__(self, message):
17 |         self.message = message
18 |         super().__init__(self.message)
19 | 
20 | 
21 | class Firewall:
22 |     def __init__(self, integration: IntegrationSchema):
23 |         self.integration = integration
24 |         self.assess_req_pool = (
25 |             requests.Session()
26 |         )  # initialize connection pool (to speed up the eval process)
27 | 
28 |     def __del__(self):
29 |         self.assess_req_pool.close()  # destroy the connection pool
30 | 
31 |     #
32 |     # Prompt assessment
33 |     #
34 |     def eval(self, user_p: str) -> FirewallSchema:
35 |         """
36 |         Assess a user prompt and define if complies with expected app intents and business context.
37 |         Return:
38 |             FirewallSchema
39 |                 id: str         The id of the log created for this evaluation.
40 |                 status: bool    True (pass) | False (should be blocked)
41 |                 fail_category:  If status is True
42 |                                     `pass`        - The user prompt is legit.
43 |                                 If status is False
44 |                                     `off_topic`   - The user prompt is off topic.
45 |                                     `violation`   - The user prompt violated the permitted intents.
46 |                                     `restriction` - The user prompt triggered a restricted intents.
47 |         """
48 |         # api call
49 |         resp = self.assess_req_pool.post(
50 |             self.integration.endpoint,
51 |             headers={
52 |                 "Content-Type": "application/json",
53 |                 "X-Api-Key": self.integration.api_key,
54 |             },
55 |             json={"messages": [{"role": "user", "content": user_p}]},
56 |         )
57 | 
58 |         # handle response
59 |         if resp.status_code == 200:
60 |             # successfull evaluation [PROMPTS IS ACCEPTED] -> return response
61 |             verdict = resp.json()
62 |             return FirewallSchema(
63 |                 id=verdict["id"], status=True, fail_category=verdict["explanation"]
64 |             )
65 |         elif resp.status_code == 406:
66 |             # successfull evaluation [PROMPTS IS NOT ACCEPTED] -> return response
67 |             verdict = resp.json()
68 |             return FirewallSchema(
69 |                 id=verdict["id"], status=False, fail_category=verdict["explanation"]
70 |             )
71 |         elif resp.status_code == 418:
72 |             # LLM Judge could not deliver a verdict
73 |             raise AIANDME_Firewall_CannotDecide(resp.text)
74 |         elif (
75 |             resp.status_code == 404
76 |             or resp.status_code == 403
77 |             or resp.status_code == 401
78 |         ):
79 |             raise AIANDME_Firewall_NotAuthorised(resp.text)
80 | 


--------------------------------------------------------------------------------
/src/aiandme/firewalloss.py:
--------------------------------------------------------------------------------
  1 | import re, uuid, time, threading, json, requests
  2 | from enum import Enum
  3 | 
  4 | from aiandme.schemas import (
  5 |     Logs as LogsSchema,
  6 |     Firewall as FirewallSchema,
  7 |     Agent as AgentSchema,
  8 |     Integration as IntegrationSchema,
  9 | )
 10 | 
 11 | LLM_FIREWALL_MAX_OUT_TOKENS = 2024
 12 | LLM_FIREWALL_TEMPERATURE = 0
 13 | 
 14 | ANALYSIS_STATE_INDEX = 0
 15 | ANALYSIS_DATA_INDEX = 1
 16 | 
 17 | FIREWALL_TIMEOUT_IN_SEC = 5
 18 | 
 19 | # Reflection prompt (LLM-as-a-judge)
 20 | LLM_AS_A_JUDGE_REFLECTION_PROMPT = """As a senior linguist, evaluate whether the user’s input complies with expected guidelines. The AI Agent might operate in a step-by-step process to fulfill its business scope, with each step prompting specific information from the user. Users may either submit a full request or provide relevant information as part of these step-by-step interactions. Follow the evaluation instructions precisely, and respond concisely and directly.
 21 | 
 22 | ###Overall AI Agent's Business Scope:###
 23 | <OVERALL_BUSINESS_SCOPE>
 24 | 
 25 | <MORE_INFO>
 26 | 
 27 | ###AI Agent's Permitted Actions:###
 28 | <PERMITTED_INTENTS_LIST>
 29 | 
 30 | ###AI Agent's Restricted Actions:###
 31 | <RESTRICTED_INTENTS_LIST>
 32 | 
 33 | ###EVALUATION INSTRUCTIONS:###
 34 | Your evaluation must determine if a violation exists by following these steps in sequence. If a violation is detected, stop immediately and do not proceed further.
 35 | 
 36 | 1) First check if sufficient context is provided to clearly assess relevance with the AI Agent's business scope and guidelines; otherwise proceed with the default case.
 37 | 
 38 | 2) Then, identify whether the user’s input is an explicit request (CASE 1) or a simple data input (CASE 2), and follow the steps corresponding to the identified case. 
 39 | 
 40 | If no violation is detected, proceed directly to the default case and stop any further assessment.
 41 | 
 42 |     **CASE 1: An explicit request, is identified.**
 43 |         *STEP 1.1, Assess Scope Alignment:* Evaluate whether the the user's request, in whole or in part, deviates from the AI Agent's defined business scope.
 44 |         If a violation is detected, reply: `A {explanation}.` where `{explanation}` briefly elaborates how the user's request triggered the violation.
 45 | 
 46 |         *STEP 1.2, Assess Alignment with Permitted Actions:* Extract a detailed list of implied intents in the user's request and assess if all the intents, in whole or in part, aligns with the behaviors or instructions outlined in the AI Agent’s Permitted Intents List.
 47 |         If a violation is detected, reply: `B {explanation}.` where `{explanation}` briefly elaborates how the identified user intent that triggered the violation. 
 48 | 
 49 |         *STEP 1.3, Identify Restricted Actions:* Extract a detailed list of implied intents in the user's request and assess if at least one of the examining intent, in whole or in part, could trigger any actions in the AI Agent’s response that are described in the Restricted Intents List.
 50 |         If a violation is detected, reply: `C {explanation}.` where `{explanation}` briefly elaborates how the identified user intent that triggered the violation.
 51 | 
 52 |     **CASE 2: User simply provided some information.**
 53 |         Assume NO violation. Reply: `P {explanation}.` where `{explanation}` briefly elaborates how the user's request aligns with the AI Agent's business scope and the permitted actions.
 54 | 
 55 | **DEFAULT CASE:**
 56 | If no violation is detected, or insufficient context is provided to complete the evaluation, reply: `P {explanation}.` where `{explanation}` briefly elaborates how the user's request aligns with the AI Agent's business scope and the permitted actions.
 57 | 
 58 | ###USER INPUT TO ANALYSE:###
 59 | """
 60 | 
 61 | DEFAULT_AGENT_CONFIG_FILE = "agent.json"  # AI Agent definition
 62 | 
 63 | 
 64 | class JudgmentState(Enum):
 65 |     DECIDE = 0
 66 |     EXPLAIN = 1
 67 |     CANNOT_DECIDE = -1
 68 |     ERROR = -2
 69 | 
 70 | 
 71 | class Verdict(Enum):
 72 |     PASS = 0
 73 |     OFF_TOPIC = 1
 74 |     VIOLATION = 2
 75 |     RESTRICTION = 3
 76 | 
 77 | 
 78 | class LLMModelProvider(Enum):
 79 |     AZURE_OPENAI = "azureopenai"
 80 |     OPENAI = "openai"
 81 | 
 82 | 
 83 | """
 84 |     A utility class to implement the LLM as a Judge concept.
 85 | 
 86 | """
 87 | 
 88 | 
 89 | class __JudgeOS:
 90 |     def __init__(self, model_provider: LLMModelProvider):
 91 |         if model_provider == LLMModelProvider.AZURE_OPENAI:
 92 |             from aiandme.model_providers import AzureOpenai_LLMStreamer as LLMStreamer
 93 |         elif model_provider == LLMModelProvider.OPENAI:
 94 |             from aiandme.model_providers import Openai_LLMStreamer as LLMStreamer
 95 | 
 96 |         self.llm_streamer = LLMStreamer()
 97 |         self.__BASIC_TEST_TMPL = LLM_AS_A_JUDGE_REFLECTION_PROMPT
 98 | 
 99 |     def __attach_more_info(self, gen_template: str, more_info: str = "") -> str:
100 |         more_info = more_info.strip()
101 |         if more_info != "":
102 |             more_info = f"###More Info about the AI Agent:###\n {more_info}"
103 |         return gen_template.replace("<MORE_INFO>", more_info)
104 | 
105 |     def generate_system_prompt(self, agent: AgentSchema) -> str:
106 |         reflection_prompt = (
107 |             self.__BASIC_TEST_TMPL.replace(
108 |                 "<OVERALL_BUSINESS_SCOPE>", agent.overall_business_scope
109 |             )
110 |             .replace(
111 |                 "<PERMITTED_INTENTS_LIST>",
112 |                 (" - " + "\n - ".join(agent.intents.permitted)),
113 |             )
114 |             .replace(
115 |                 "<RESTRICTED_INTENTS_LIST>",
116 |                 (" - " + "\n - ".join(agent.intents.restricted)),
117 |             )
118 |         )
119 |         reflection_prompt = self.__attach_more_info(reflection_prompt, agent.more_info)
120 | 
121 |         return reflection_prompt
122 | 
123 |     def return_verdict(self, system_p: str, user_p: str):
124 |         try:
125 |             # init
126 |             (cur_state, explanation) = (JudgmentState.DECIDE, "")
127 | 
128 |             # make request and get the stream chunks
129 |             resp = self.llm_streamer.ping(
130 |                 system_p,
131 |                 user_p,
132 |                 LLM_FIREWALL_MAX_OUT_TOKENS,
133 |                 LLM_FIREWALL_TEMPERATURE,
134 |             )
135 | 
136 |             # start parsing the stream (chunks)
137 |             for chunk in resp:
138 |                 # skip empty chunks (e.g. initial replies of filters applied by the model provider)
139 |                 if len(chunk.choices) == 0:
140 |                     continue
141 | 
142 |                 # consume chunks with other (non response) info
143 |                 if not chunk.choices[0].delta.content:
144 |                     continue
145 | 
146 |                 if cur_state == JudgmentState.DECIDE:
147 |                     # IN DECISION STATE:
148 |                     #  return the eval result (content starts with `PASS` or `FAIL` -> PASS/FAIL, OTHERWISE, cannot decide)
149 |                     #  and then, continue with the explanation instruction
150 | 
151 |                     # remove trailing prefixes, e.g. ` ' "
152 |                     # remove all leading digits (trailing at the beginning) from the string and then any . or ) and then any space
153 |                     # why? in case the reponse is in the format: 1. .... or 1) .... => remove listing numbers
154 |                     # finaly, get the actual prompt
155 |                     p = (
156 |                         re.sub(
157 |                             r"^\d+",
158 |                             "",
159 |                             chunk.choices[0].delta.content.strip(" -`'\n\""),
160 |                         )
161 |                         .lstrip(".")
162 |                         .lstrip(")")
163 |                         .strip()
164 |                     )
165 | 
166 |                     # detect the first alpha char in content if no alpha, return None (assume now that some cleaning is completed)
167 |                     first_detected_alpha = next(
168 |                         (char for char in p if char.isalpha()), None
169 |                     )
170 | 
171 |                     # return result
172 |                     if first_detected_alpha is None:
173 |                         # if no alphanumeric character is detected then, error in response cannot decide
174 |                         cur_state = JudgmentState.CANNOT_DECIDE
175 |                         explanation = f"Unexpected LLM response [chunk: {chunk.choices[0].delta.content}]."
176 |                         break
177 |                     else:
178 |                         # analysis result detected
179 |                         first_detected_alpha = first_detected_alpha.upper()
180 |                         if first_detected_alpha == "P":
181 |                             yield (JudgmentState.DECIDE, Verdict.PASS)
182 |                         elif first_detected_alpha == "A":
183 |                             yield (JudgmentState.DECIDE, Verdict.OFF_TOPIC)
184 |                         elif first_detected_alpha == "B":
185 |                             yield (JudgmentState.DECIDE, Verdict.VIOLATION)
186 |                         elif first_detected_alpha == "C":
187 |                             yield (JudgmentState.DECIDE, Verdict.RESTRICTION)
188 |                         else:
189 |                             cur_state = JudgmentState.CANNOT_DECIDE
190 |                             explanation = f"Unexpected LLM response [chunk: {chunk.choices[0].delta.content}]."
191 |                             break
192 | 
193 |                     # update state (move to explanation extraction)
194 |                     cur_state = JudgmentState.EXPLAIN
195 |                 else:
196 |                     # IN EXPLAIN STATE:
197 |                     #  Concat the stream to get the explanation and return
198 |                     explanation = f"{explanation}{chunk.choices[0].delta.content}"
199 | 
200 |             # analysis completed (streaming) -> return the result
201 |             yield (cur_state, explanation.strip())
202 |         except Exception as e:
203 |             err = str(e)
204 |             if err.startswith("Error code: 400"):
205 |                 # handle Azure content filtering (400 error)
206 |                 yield (JudgmentState.DECIDE, Verdict.OFF_TOPIC)
207 |                 yield (JudgmentState.EXPLAIN, "Inappropriate content. Filtered out.")
208 |             else:
209 |                 yield (JudgmentState.ERROR, str(e))
210 | 
211 | 
212 | class FirewallOSS(__JudgeOS):
213 | 
214 |     def __init__(
215 |         self,
216 |         model_provider: LLMModelProvider = LLMModelProvider.AZURE_OPENAI,
217 |         agent_file: str = DEFAULT_AGENT_CONFIG_FILE,
218 |     ):
219 |         super().__init__(model_provider)
220 |         with open(agent_file, "r") as fp:
221 |             agent = AgentSchema(**json.load(fp))
222 |         self.system_p = self.generate_system_prompt(agent)
223 | 
224 |     def __sync_with_platform(self, integ: IntegrationSchema, data: LogsSchema):
225 |         r = requests.post(
226 |             f"{integ.endpoint}/logs",
227 |             headers={"x-api-key": integ.api_key},
228 |             json=data.model_dump(),
229 |         )
230 |         if r.status_code != 200:
231 |             raise Exception(
232 |                 f"AIandMe - Sync with platform error [{r.status_code}/{r.text}]"
233 |             )
234 |         return r.json()
235 | 
236 |     def __do_analysis_in_background(self, result: list, id: str, user_p: str, cb: any):
237 |         analysis = self.return_verdict(self.system_p, user_p)
238 |         for chunk in analysis:
239 |             # Pass/Fail (LLM Verdict)
240 |             if chunk[ANALYSIS_STATE_INDEX] == JudgmentState.DECIDE:
241 |                 (res, status) = (
242 |                     ("pass", True)
243 |                     if chunk[ANALYSIS_DATA_INDEX] == Verdict.PASS
244 |                     else ("fail", False)
245 |                 )
246 |                 fail_category = chunk[ANALYSIS_DATA_INDEX].name.lower()
247 |                 result.append([status, fail_category])
248 |                 continue
249 | 
250 |             # Cannot decide
251 |             if chunk[ANALYSIS_STATE_INDEX] == JudgmentState.CANNOT_DECIDE:
252 |                 res = "error"
253 |                 fail_category = "418"
254 |                 result.append([False, "error"])
255 |                 continue
256 | 
257 |             # Error
258 |             if chunk[ANALYSIS_STATE_INDEX] == JudgmentState.ERROR:
259 |                 res = "error"
260 |                 fail_category = "500"
261 |                 result.append([False, "error"])
262 |                 continue
263 | 
264 |         time.sleep(0.1)  # Giving it some time to know if we timed out
265 | 
266 |         if cb and len(result):  # if result is empty -> timedout
267 |             log = LogsSchema(
268 |                 id=id,
269 |                 prompt=user_p,
270 |                 result=res,
271 |                 fail_category=fail_category,
272 |                 explanation=chunk[1],
273 |             )
274 |             (
275 |                 self.__sync_with_platform(cb, log)
276 |                 if isinstance(cb, IntegrationSchema)
277 |                 else cb(log)
278 |             )
279 | 
280 |     def __call__(
281 |         self,
282 |         user_p: str,
283 |         cb: any = None,
284 |         timeout: float = FIREWALL_TIMEOUT_IN_SEC,
285 |     ) -> FirewallSchema | None:
286 |         return self.filter(user_p, cb, timeout)
287 | 
288 |     def filter(
289 |         self,
290 |         user_p: str,
291 |         cb: any = None,
292 |         timeout: float = FIREWALL_TIMEOUT_IN_SEC,
293 |     ) -> FirewallSchema | None:
294 |         analysysis = []
295 |         id = str(uuid.uuid4())
296 |         threading.Thread(
297 |             target=self.__do_analysis_in_background,
298 |             args=(analysysis, id, user_p, cb),
299 |         ).start()
300 | 
301 |         # While as a sleep until we get the response from llm
302 |         timeout = time.time() + FIREWALL_TIMEOUT_IN_SEC
303 |         while not len(analysysis) and time.time() < timeout:
304 |             pass
305 | 
306 |         # Responses
307 |         if not len(analysysis):
308 |             return None
309 | 
310 |         return FirewallSchema(
311 |             status=analysysis[0][ANALYSIS_STATE_INDEX],
312 |             id=id,
313 |             fail_category=analysysis[0][ANALYSIS_DATA_INDEX],
314 |         )
315 | 


--------------------------------------------------------------------------------
/src/aiandme/model_providers/__init__.py:
--------------------------------------------------------------------------------
1 | from .azureopenai import LLMStreamer as AzureOpenai_LLMStreamer
2 | from .openai import LLMStreamer as Openai_LLMStreamer
3 | 


--------------------------------------------------------------------------------
/src/aiandme/model_providers/azureopenai.py:
--------------------------------------------------------------------------------
 1 | from os import getenv
 2 | from openai import AzureOpenAI
 3 | 
 4 | #
 5 | # Handle communications with LLM
 6 | #
 7 | ALLOWED_MAX_OUT_TOKENS = 4096  # max allowed tokens
 8 | DEFAULT_MAX_OUT_TOKENS = 512  # max numbers of tokens each LLM ping can deliver
 9 | MAX_RETRY_COUNTER = 5  # how many times to retry an API call before returning error
10 | LLM_PING_TIMEOUT = 30  # llm completion api request timeout (sec)
11 | 
12 | DEFAULT_TEMPERATURE = 0  # default temperature for LLM completion
13 | 
14 | LLM_PROVIDER_MODEL = getenv("LLM_PROVIDER_MODEL", "gpt4-o")
15 | 
16 | 
17 | class LLMStreamer:
18 |     def __init__(self):
19 |         self.__azure_ai_client = AzureOpenAI(
20 |             api_key=getenv("LLM_PROVIDER_API_KEY"),
21 |             api_version=getenv("LLM_PROVIDER_API_VERSION"),
22 |             azure_endpoint=getenv("LLM_PROVIDER_ENDPOINT"),
23 |         )
24 | 
25 |     def ping(self, system_p, user_p, max_tokens, temperature):
26 |         max_tokens = min(max_tokens, ALLOWED_MAX_OUT_TOKENS)
27 |         return self.__azure_ai_client.chat.completions.create(
28 |             model=getenv("LLM_PROVIDER_MODEL"),
29 |             messages=[
30 |                 {"role": "system", "content": system_p},
31 |                 {"role": "user", "content": user_p},
32 |             ],
33 |             max_tokens=max_tokens,
34 |             temperature=temperature,
35 |             timeout=LLM_PING_TIMEOUT,
36 |             stream=True,
37 |         )
38 | 


--------------------------------------------------------------------------------
/src/aiandme/model_providers/openai.py:
--------------------------------------------------------------------------------
 1 | from os import getenv
 2 | from openai import OpenAI
 3 | 
 4 | #
 5 | # Handle communications with LLM
 6 | #
 7 | ALLOWED_MAX_OUT_TOKENS = 4096  # max allowed tokens
 8 | DEFAULT_MAX_OUT_TOKENS = 512  # max numbers of tokens each LLM ping can deliver
 9 | MAX_RETRY_COUNTER = 5  # how many times to retry an API call before returning error
10 | LLM_PING_TIMEOUT = 30  # llm completion api request timeout (sec)
11 | 
12 | DEFAULT_TEMPERATURE = 0  # default temperature for LLM completion
13 | 
14 | LLM_PROVIDER_MODEL = getenv("LLM_PROVIDER_MODEL", "gpt4-o")
15 | 
16 | 
17 | class LLMStreamer:
18 |     def __init__(self):
19 |         self.__openai_client = OpenAI(
20 |             api_key=getenv("LLM_PROVIDER_API_KEY")
21 |         )
22 | 
23 |     def ping(self, system_p, user_p, max_tokens, temperature):
24 |         max_tokens = min(max_tokens, ALLOWED_MAX_OUT_TOKENS)
25 |         return self.__openai_client.chat.completions.create(
26 |             model=getenv("LLM_PROVIDER_MODEL"),
27 |             messages=[
28 |                 {"role": "system", "content": system_p},
29 |                 {"role": "user", "content": user_p},
30 |             ],
31 |             max_tokens=max_tokens,
32 |             temperature=temperature,
33 |             timeout=LLM_PING_TIMEOUT,
34 |             stream=True,
35 |         )
36 | 


--------------------------------------------------------------------------------
/src/aiandme/schemas/__init__.py:
--------------------------------------------------------------------------------
1 | from .logs import Logs
2 | from .firewall import Firewall, Integration
3 | from .agent import Intents, Agent
4 | 


--------------------------------------------------------------------------------
/src/aiandme/schemas/agent.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, List
 2 | from pydantic import BaseModel
 3 | 
 4 | 
 5 | class Intents(BaseModel):
 6 |     permitted: List[str]
 7 |     restricted: Optional[List[str]] = []
 8 | 
 9 | 
10 | class Agent(BaseModel):
11 |     overall_business_scope: str
12 |     intents: Intents
13 |     more_info: Optional[str] = ""
14 | 


--------------------------------------------------------------------------------
/src/aiandme/schemas/firewall.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from pydantic import BaseModel
 3 | 
 4 | 
 5 | class Integration(BaseModel):
 6 |     endpoint: str
 7 |     api_key: str
 8 | 
 9 | 
10 | class Firewall(BaseModel):
11 |     id: str
12 |     status: bool
13 |     fail_category: Optional[str] = ""
14 | 


--------------------------------------------------------------------------------
/src/aiandme/schemas/logs.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | from typing import Optional
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class Logs(BaseModel):
 7 |     id: Optional[str] = str(uuid.uuid4())
 8 |     prompt: str
 9 |     result: str
10 |     explanation: Optional[str] = ""
11 |     fail_category: Optional[str] = ""
12 | 


--------------------------------------------------------------------------------