├── .flake8 ├── .gitignore ├── LICENSE ├── README.md ├── examples ├── chat_client │ ├── async_chat_client.py │ ├── async_stream_chat_client.py │ ├── chat_client.py │ ├── standard_logger_chat_client.py │ └── stream_chat_client.py ├── legacy_chat_completion │ ├── chat_completion.py │ ├── chat_completion_async.py │ ├── chat_completion_async_stream.py │ ├── chat_completion_functions.py │ ├── chat_completion_functions_rest.py │ ├── chat_completion_rest.py │ └── chat_completion_stream.py └── legacy_completion │ ├── completion.py │ ├── completion_async.py │ ├── completion_async_stream.py │ ├── completion_langchain.py │ ├── completion_rest.py │ ├── completion_rest_async.py │ ├── completion_stream.py │ ├── in_memory_logging.py │ ├── log_to_file.py │ └── standard_logging.py ├── mona_openai ├── __init__.py ├── analysis │ ├── analyzer.py │ ├── privacy.py │ ├── profanity.py │ ├── textual.py │ └── util.py ├── endpoints │ ├── chat_completion.py │ ├── completion.py │ ├── endpoint_wrapping.py │ └── wrapping_getter.py ├── exceptions.py ├── loggers │ ├── __init__.py │ ├── file_logger.py │ ├── in_memory_logging.py │ ├── logger.py │ ├── mona_logger │ │ ├── mona_client.py │ │ └── mona_logger.py │ └── standard_logging.py ├── mona_openai_client.py ├── mona_openai_create.py ├── mona_openai_legacy.py ├── mona_openai_logging.py └── util │ ├── async_util.py │ ├── func_util.py │ ├── general_consts.py │ ├── object_util.py │ ├── openai_util.py │ ├── stream_util.py │ ├── tokens_util.py │ ├── typing_util.py │ └── validation_util.py ├── pyproject.toml ├── requirements.txt └── tests ├── __init__.py ├── mocks ├── mock_mona_client.py └── mock_openai.py ├── test_chat_completion.py ├── test_completion.py ├── test_privacy_analyzer.py └── test_textual_analyzer.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | per-file-ignores = __init__.py:F401,F403 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | **/__pycache__/ 3 | 4 | .pytest_cache 5 | 6 | .vscode 7 | 8 | *.ipynb 9 | 10 | dist 11 | mona_openai.egg-info 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mona-OpenAI Integration Client 2 |

3 | Mona's logo 4 |

5 | 6 |

7 | 8 | 9 | Use one line of code to get instant live monitoring for your OpenAI usage including: 10 | * Tokens usage 11 | * Hallucination alerts 12 | * Profanity and privacy analyses 13 | * Behavioral drifts and anomalies 14 | * LangChain support 15 | * Much much more 16 | 17 | ## Setting Up 18 | 19 | ```console 20 | $ pip install mona_openai 21 | ``` 22 | 23 | ## Quick Start 24 | 25 | You can find boilerplate code for many use cases under [the "examples" folder](https://github.com/monalabs/mona-openai/tree/main/examples). 26 | 27 | ### With Mona 28 | 29 | [Sign up for a free Mona account here](https://www.monalabs.io/openai-gpt-monitoring). 30 | 31 | ```py 32 | from openai import OpenAI 33 | 34 | from mona_openai import monitor_client 35 | 36 | MONA_API_KEY = environ.get("MONA_API_KEY") 37 | MONA_SECRET = environ.get("MONA_SECRET") 38 | MONA_CREDS = { 39 | "key": MONA_API_KEY, 40 | "secret": MONA_SECRET, 41 | } 42 | 43 | # This is the name of the monitoring class on Mona 44 | MONITORING_CONTEXT_NAME = "NEW_CHAT_CLIENT_CONTEXT" 45 | 46 | openAI_client = monitor_client(OpenAI(api_key=environ.get("OPEN_AI_KEY")), MONA_CREDS, MONITORING_CONTEXT_NAME) 47 | 48 | response = openAI_client.chat.completions.create( 49 | model="gpt-3.5-turbo", 50 | messages=[ 51 | {"role": "system", "content": "You are a helpful assistant."}, 52 | {"role": "user", "content": "Who won the world series in 2020?"}, 53 | {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}, 54 | {"role": "user", "content": "Where was it played?"} 55 | ] 56 | ) 57 | print(response.choices[0].message.content) 58 | ``` 59 | 60 | ### With Standard Logging 61 | 62 | ```py 63 | from openai import OpenAI 64 | from os import environ 65 | 66 | from mona_openai import monitor_client_with_logger 67 | 68 | from mona_openai.loggers import StandardLogger 69 | from logging import WARNING 70 | 71 | MONA_API_KEY = environ.get("MONA_API_KEY") 72 | MONA_SECRET = environ.get("MONA_SECRET") 73 | MONA_CREDS = { 74 | "key": MONA_API_KEY, 75 | "secret": MONA_SECRET, 76 | } 77 | 78 | # This is the name of the monitoring class on Mona 79 | MONITORING_CONTEXT_NAME = "NEW_CHAT_CLIENT_CONTEXT" 80 | 81 | openAI_client = monitor_client_with_logger(OpenAI(api_key=environ.get("OPEN_AI_KEY")), StandardLogger(WARNING)) 82 | 83 | response = openAI_client.chat.completions.create( 84 | model="gpt-3.5-turbo", 85 | messages=[ 86 | {"role": "system", "content": "You are a helpful assistant."}, 87 | {"role": "user", "content": "Who won the world series in 2020?"}, 88 | {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}, 89 | {"role": "user", "content": "Where was it played?"} 90 | ] 91 | ) 92 | print(response.choices[0].message.content) 93 | ``` 94 | 95 | ## Supported OpenAI APIs 96 | Currently this client supports just the Chat Completion API. Mona, not using this client, can support processes based on other APIs and also non-OpenAI-based apps. 97 | If you have a differrent use-case, we'd love to hear about it! Please email us at support@monalabs.io. 98 | 99 | ## Usage 100 | ### Initialization 101 | 102 | The main functions exposed in this package are `monitor_client` and `monitor_client_with_logger`. 103 | 104 | These functions return an openai client that wraps the original chat completion method with an equivalent API that also logs relevant metrics for monitoring behind the scenes. 105 | 106 | See above quick start examples for usage. 107 | 108 | #### Specs 109 | The specs arg allows you to configure what should be monitored. It expects a python dict with the follwoing possible keys: 110 | * sampling_ratio (1): A number between 0 and 1 for how often should the call be logged. 111 | * avoid_monitoring_exceptions (False): Whether or not to log out to Mona when there is an OpenAI exception. Default is to track exceptions - and Mona will alert you on things like a jump in number of exceptions 112 | * export_prompt (False): Whether Mona should export the actual prompt text. Be default set to False to avoid privacy concerns. 113 | * export_response_texts (False): Whether Mona should export the actual response texts. Be default set to False to avoid privacy concerns. 114 | * analysis: A dictionary mapping each analysis type to a boolean value telling the client whether or not to run said analysis and log it to Mona. Possible options currently are "privacy", "profanity", and "textual". By default, all analyses take place and are logged out to Mona. 115 | 116 | ### Using custom loggers 117 | You don't have to have a Mona account to use this package. You can define specific loggers to log out the data to a file, memory, or just a given python logger. 118 | 119 | This SDK provides a simple interface to implement your own loggers by inheriting from Logger under loggers/logger.py. 120 | Alternatively, by using the standard python logging library as in the example, you can create logging handlers to log the data out to any mechanism you choose (e.g., Kafka, Logstash, etc...) 121 | 122 | ### Mona arguments you can add to the API call 123 | 124 | * MONA_context_id: The unique id of the context in which the call is made. By using this ID you can export more data to Mona to the same context from other places. If not supplied, the "id" field of the OpenAI Endpoint's response will be used as the Mona context ID automatically. 125 | * MONA_export_timestamp: Can be used to simulate as if the current call was made in a different time, as far as Mona is concerned. 126 | * MONA_additional_data: A JSON-serializable dict with any other data you want to add to the monitoring context. This comes in handy if you want to add more information to the monitoring contex that isn't part of the basic OpenAI API call information. For example, if you are using a specific template ID or if this call is being made for a specific customer ID, these are fields you can add there to help get full context when monitoring with Mona. 127 | 128 | 129 | ### Using OpenAI with REST calls instead of OpenAI's Python client 130 | 131 | See rest examples in legacy example folder 132 | 133 | ### Stream support 134 | 135 | OpenAI allows receiving responses as a stream of tokens using the "stream" parameter. When this is done, Mona will collect all the tokens in memory (without interrupting the streaming process) and will create the analysis and log out the data the moment the stream is over. You don't need to do anything to make this happen. 136 | 137 | Since for streaming responses OpenAI doesn't supply the full usage tokens summary, Mona uses the tiktoken package to calculate the tokens of the prompt and completion and log them for monitoring. 138 | 139 | NOTE: Stream is currently only supported with SDK usage, and not with using REST directly. 140 | 141 | ## Legacy LangChain support 142 | 143 | You can use the exported `monitor_langchain_llm` to wrap a LangChain OpenAI LLM (chat or normal) with Mona's monitoring capabilities: 144 | 145 | ```py 146 | from mona_openai import monitor_langchain_llm 147 | 148 | from langchain.llms import OpenAI 149 | 150 | # Wrap the LLM object with Mona monitoring. 151 | llm = monitor_langchain_llm( 152 | OpenAI(OPEN_AI_KEY), 153 | MONA_CREDS, 154 | MONITORING_CONTEXT_NAME) 155 | ``` 156 | 157 | See full example in completion_langchain.py in the examples folder. 158 | 159 | ## Mona SDK 160 | 161 | This package uses the mona_sdk package to export the relevant data to Mona. There are several environment variables you can use to configure the SDK's behavior. For example, you can set it up to raise exceptions when exporting data to Mona fails (it doesn't do that by default). 162 | 163 | ## Monitoring for profanity 164 | 165 | Mona uses the alt-profanity-check pacakge (https://pypi.org/project/alt-profanity-check/) to create both boolean predictions and probabilty scores for the existence of profanity both in the prompt and in the responses. We use the built in package methods for that. If you want, for example, to use a different probability threshold for the boolean prediction, you can do that by changing your Mona config on the Mona dashboard. 166 | 167 | ## Using nest-asyncio 168 | 169 | In environments in which there's a forever running event loop (e.g., Jupyter notebooks), the client might use [nest_asyncio.apply()](https://pypi.org/project/nest-asyncio/) to run joint sync and async code. -------------------------------------------------------------------------------- /examples/chat_client/async_chat_client.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from os import environ 3 | 4 | from openai import AsyncOpenAI 5 | 6 | from mona_openai import monitor_client 7 | 8 | MONA_API_KEY = environ.get("MONA_API_KEY") 9 | MONA_SECRET = environ.get("MONA_SECRET") 10 | MONA_CREDS = { 11 | "key": MONA_API_KEY, 12 | "secret": MONA_SECRET, 13 | } 14 | 15 | # This is the name of the monitoring class on Mona 16 | MONITORING_CONTEXT_NAME = "NEW_CHAT_CLIENT_CONTEXT" 17 | 18 | openAI_client = monitor_client(AsyncOpenAI(api_key=environ.get("OPEN_AI_KEY")), MONA_CREDS, MONITORING_CONTEXT_NAME) 19 | 20 | 21 | 22 | response = asyncio.run(openAI_client.chat.completions.create( 23 | model="gpt-3.5-turbo", 24 | messages=[ 25 | {"role": "system", "content": "You are a helpful assistant."}, 26 | {"role": "user", "content": "Who won the world series in 2020?"}, 27 | {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}, 28 | {"role": "user", "content": "Where was it played?"} 29 | ], 30 | max_tokens=20, 31 | n=1, 32 | temperature=0.2, 33 | # Adding additional information for monitoring purposes, unrelated to 34 | # internal OpenAI call. 35 | MONA_additional_data={"customer_id": "A531251"}, 36 | )) 37 | print(response.choices[0].message.content) 38 | -------------------------------------------------------------------------------- /examples/chat_client/async_stream_chat_client.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from os import environ 3 | 4 | from openai import AsyncOpenAI 5 | 6 | from mona_openai import monitor_client 7 | 8 | MONA_API_KEY = environ.get("MONA_API_KEY") 9 | MONA_SECRET = environ.get("MONA_SECRET") 10 | MONA_CREDS = { 11 | "key": MONA_API_KEY, 12 | "secret": MONA_SECRET, 13 | } 14 | 15 | # This is the name of the monitoring class on Mona 16 | MONITORING_CONTEXT_NAME = "NEW_CHAT_CLIENT_CONTEXT" 17 | 18 | openAI_client = monitor_client(AsyncOpenAI(api_key=environ.get("OPEN_AI_KEY")), MONA_CREDS, MONITORING_CONTEXT_NAME) 19 | 20 | 21 | async def openAI_logic(): 22 | response = await openAI_client.chat.completions.create( 23 | model="gpt-3.5-turbo", 24 | stream=True, 25 | messages=[ 26 | {"role": "system", "content": "You are a helpful assistant."}, 27 | {"role": "user", "content": "Who won the world series in 2020?"}, 28 | {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}, 29 | {"role": "user", "content": "Where was it played?"} 30 | ], 31 | max_tokens=20, 32 | n=1, 33 | temperature=0.2, 34 | # Adding additional information for monitoring purposes, unrelated to 35 | # internal OpenAI call. 36 | MONA_additional_data={"customer_id": "A531251"}, 37 | ) 38 | async for event in response: 39 | print(event.choices[0].delta.content) 40 | 41 | asyncio.run(openAI_logic()) -------------------------------------------------------------------------------- /examples/chat_client/chat_client.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | from openai import OpenAI 4 | 5 | from mona_openai import monitor_client 6 | 7 | MONA_API_KEY = environ.get("MONA_API_KEY") 8 | MONA_SECRET = environ.get("MONA_SECRET") 9 | MONA_CREDS = { 10 | "key": MONA_API_KEY, 11 | "secret": MONA_SECRET, 12 | } 13 | 14 | # This is the name of the monitoring class on Mona 15 | MONITORING_CONTEXT_NAME = "NEW_CHAT_CLIENT_CONTEXT" 16 | 17 | openAI_client = monitor_client(OpenAI(api_key=environ.get("OPEN_AI_KEY")), MONA_CREDS, MONITORING_CONTEXT_NAME) 18 | 19 | response = openAI_client.chat.completions.create( 20 | model="gpt-3.5-turbo", 21 | messages=[ 22 | {"role": "system", "content": "You are a helpful assistant."}, 23 | {"role": "user", "content": "Who won the world series in 2020?"}, 24 | {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}, 25 | {"role": "user", "content": "Where was it played?"} 26 | ], 27 | max_tokens=20, 28 | n=1, 29 | temperature=0.2, 30 | # Adding additional information for monitoring purposes, unrelated to 31 | # internal OpenAI call. 32 | MONA_additional_data={"customer_id": "A531251"}, 33 | ) 34 | print(response.choices[0].message.content) 35 | -------------------------------------------------------------------------------- /examples/chat_client/standard_logger_chat_client.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | from os import environ 3 | 4 | from mona_openai import monitor_client_with_logger 5 | 6 | from mona_openai.loggers import StandardLogger 7 | from logging import WARNING 8 | 9 | MONA_API_KEY = environ.get("MONA_API_KEY") 10 | MONA_SECRET = environ.get("MONA_SECRET") 11 | MONA_CREDS = { 12 | "key": MONA_API_KEY, 13 | "secret": MONA_SECRET, 14 | } 15 | 16 | # This is the name of the monitoring class on Mona 17 | MONITORING_CONTEXT_NAME = "NEW_CHAT_CLIENT_CONTEXT" 18 | 19 | openAI_client = monitor_client_with_logger(OpenAI(api_key=environ.get("OPEN_AI_KEY")), StandardLogger(WARNING)) 20 | 21 | response = openAI_client.chat.completions.create( 22 | model="gpt-3.5-turbo", 23 | messages=[ 24 | {"role": "system", "content": "You are a helpful assistant."}, 25 | {"role": "user", "content": "Who won the world series in 2020?"}, 26 | {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}, 27 | {"role": "user", "content": "Where was it played?"} 28 | ] 29 | ) 30 | print(response.choices[0].message.content) -------------------------------------------------------------------------------- /examples/chat_client/stream_chat_client.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | from openai import OpenAI 4 | 5 | from mona_openai import monitor_client 6 | 7 | MONA_API_KEY = environ.get("MONA_API_KEY") 8 | MONA_SECRET = environ.get("MONA_SECRET") 9 | MONA_CREDS = { 10 | "key": MONA_API_KEY, 11 | "secret": MONA_SECRET, 12 | } 13 | 14 | # This is the name of the monitoring class on Mona 15 | MONITORING_CONTEXT_NAME = "NEW_CHAT_CLIENT_CONTEXT" 16 | 17 | openAI_client = monitor_client(OpenAI(api_key=environ.get("OPEN_AI_KEY")), MONA_CREDS, MONITORING_CONTEXT_NAME) 18 | 19 | 20 | response = openAI_client.chat.completions.create( 21 | model="gpt-3.5-turbo", 22 | stream=True, 23 | messages=[ 24 | {"role": "system", "content": "You are a helpful assistant."}, 25 | {"role": "user", "content": "Who won the world series in 2020?"}, 26 | {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}, 27 | {"role": "user", "content": "Where was it played?"} 28 | ], 29 | max_tokens=20, 30 | n=1, 31 | temperature=0.2, 32 | # Adding additional information for monitoring purposes, unrelated to 33 | # internal OpenAI call. 34 | MONA_additional_data={"customer_id": "A531251"}, 35 | ) 36 | 37 | for event in response: 38 | print(event.choices[0].delta.content) -------------------------------------------------------------------------------- /examples/legacy_chat_completion/chat_completion.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | import openai 4 | 5 | from mona_openai import monitor 6 | 7 | openai.api_key = environ.get("OPEN_AI_KEY") 8 | 9 | MONA_API_KEY = environ.get("MONA_API_KEY") 10 | MONA_SECRET = environ.get("MONA_SECRET") 11 | MONA_CREDS = { 12 | "key": MONA_API_KEY, 13 | "secret": MONA_SECRET, 14 | } 15 | 16 | # This is the name of the monitoring class on Mona 17 | MONITORING_CONTEXT_NAME = "MONITORED_CHAT_COMPLETION_USE_CASE_NAME" 18 | 19 | monitored_chat_completion = monitor( 20 | openai.ChatCompletion, 21 | MONA_CREDS, 22 | MONITORING_CONTEXT_NAME, 23 | ) 24 | 25 | response = monitored_chat_completion.create( 26 | model="gpt-3.5-turbo", 27 | messages=[ 28 | {"role": "user", "content": "I want to generate some text about "} 29 | ], 30 | max_tokens=20, 31 | n=1, 32 | temperature=0.2, 33 | # Adding additional information for monitoring purposes, unrelated to 34 | # internal OpenAI call. 35 | MONA_additional_data={"customer_id": "A531251"}, 36 | ) 37 | print(response.choices[0].message.content) 38 | -------------------------------------------------------------------------------- /examples/legacy_chat_completion/chat_completion_async.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from os import environ 3 | 4 | import openai 5 | 6 | from mona_openai import monitor 7 | 8 | openai.api_key = environ.get("OPEN_AI_KEY") 9 | 10 | MONA_API_KEY = environ.get("MONA_API_KEY") 11 | MONA_SECRET = environ.get("MONA_SECRET") 12 | MONA_CREDS = { 13 | "key": MONA_API_KEY, 14 | "secret": MONA_SECRET, 15 | } 16 | 17 | # This is the name of the monitoring class on Mona 18 | MONITORING_CONTEXT_NAME = "MONITORED_CHAT_COMPLETION_USE_CASE_NAME" 19 | 20 | monitored_chat_completion = monitor( 21 | openai.ChatCompletion, 22 | MONA_CREDS, 23 | MONITORING_CONTEXT_NAME, 24 | ) 25 | 26 | 27 | response = asyncio.run( 28 | monitored_chat_completion.acreate( 29 | model="gpt-3.5-turbo", 30 | messages=[ 31 | {"role": "user", "content": "I want to generate some text about "} 32 | ], 33 | max_tokens=20, 34 | n=1, 35 | temperature=0.2, 36 | # Adding additional information for monitoring purposes, unrelated to 37 | # internal OpenAI call. 38 | MONA_additional_data={"customer_id": "A531251"}, 39 | ) 40 | ) 41 | 42 | print(response.choices[0].message.content) 43 | -------------------------------------------------------------------------------- /examples/legacy_chat_completion/chat_completion_async_stream.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from os import environ 3 | 4 | import openai 5 | 6 | from mona_openai import monitor 7 | 8 | openai.api_key = environ.get("OPEN_AI_KEY") 9 | 10 | MONA_API_KEY = environ.get("MONA_API_KEY") 11 | MONA_SECRET = environ.get("MONA_SECRET") 12 | MONA_CREDS = { 13 | "key": MONA_API_KEY, 14 | "secret": MONA_SECRET, 15 | } 16 | 17 | # This is the name of the monitoring class on Mona 18 | MONITORING_CONTEXT_NAME = "MONITORED_CHAT_COMPLETION_USE_CASE_NAME" 19 | 20 | monitored_chat_completion = monitor( 21 | openai.ChatCompletion, 22 | MONA_CREDS, 23 | MONITORING_CONTEXT_NAME, 24 | ) 25 | 26 | 27 | async def openAI_logic(): 28 | response = await monitored_chat_completion.acreate( 29 | stream=True, 30 | model="gpt-3.5-turbo", 31 | messages=[ 32 | {"role": "user", "content": "I want to generate some text about "} 33 | ], 34 | max_tokens=20, 35 | n=1, 36 | temperature=0.2, 37 | # Adding additional information for monitoring purposes, unrelated to 38 | # internal OpenAI call. 39 | MONA_additional_data={"customer_id": "A531251"}, 40 | ) 41 | async for event in response: 42 | print(event.choices[0].delta.get("content", "")) 43 | 44 | 45 | asyncio.run(openAI_logic()) 46 | -------------------------------------------------------------------------------- /examples/legacy_chat_completion/chat_completion_functions.py: -------------------------------------------------------------------------------- 1 | import json 2 | from os import environ 3 | 4 | import openai 5 | 6 | from mona_openai import monitor 7 | 8 | openai.api_key = environ.get("OPEN_AI_KEY") 9 | 10 | MONA_API_KEY = environ.get("MONA_API_KEY") 11 | MONA_SECRET = environ.get("MONA_SECRET") 12 | MONA_CREDS = { 13 | "key": MONA_API_KEY, 14 | "secret": MONA_SECRET, 15 | } 16 | 17 | # This is the name of the monitoring class on Mona. 18 | MONITORING_CONTEXT_NAME = "MONITORED_CHAT_COMPLETION_USE_CASE_NAME" 19 | 20 | monitored_chat_completion = monitor( 21 | openai.ChatCompletion, 22 | MONA_CREDS, 23 | MONITORING_CONTEXT_NAME, 24 | ) 25 | 26 | 27 | def get_current_weather(location, unit="fahrenheit"): 28 | """Get the current weather in a given location""" 29 | weather_info = { 30 | "location": location, 31 | "temperature": "72", 32 | "unit": unit, 33 | "forecast": ["sunny", "windy"], 34 | } 35 | return json.dumps(weather_info) 36 | 37 | 38 | def run_conversation(): 39 | messages = [ 40 | {"role": "user", "content": "What's the weather like in Boston?"} 41 | ] 42 | functions = [ 43 | { 44 | "name": "get_current_weather", 45 | "description": "Get the current weather in a given location", 46 | "parameters": { 47 | "type": "object", 48 | "properties": { 49 | "location": { 50 | "type": "string", 51 | "description": "The city and state, e.g. San " 52 | "Francisco, CA", 53 | }, 54 | "unit": { 55 | "type": "string", 56 | "enum": ["celsius", "fahrenheit"], 57 | }, 58 | }, 59 | "required": ["location"], 60 | }, 61 | } 62 | ] 63 | 64 | response = monitored_chat_completion.create( 65 | model="gpt-3.5-turbo-0613", 66 | messages=messages, 67 | functions=functions, 68 | function_call="auto", 69 | ) 70 | response_message = response["choices"][0]["message"] 71 | 72 | if response_message.get("function_call"): 73 | available_functions = { 74 | "get_current_weather": get_current_weather, 75 | } 76 | function_name = response_message["function_call"]["name"] 77 | fuction_to_call = available_functions[function_name] 78 | function_args = json.loads( 79 | response_message["function_call"]["arguments"] 80 | ) 81 | function_response = fuction_to_call( 82 | location=function_args.get("location"), 83 | unit=function_args.get("unit"), 84 | ) 85 | 86 | messages.append(response_message) 87 | messages.append( 88 | { 89 | "role": "function", 90 | "name": function_name, 91 | "content": function_response, 92 | } 93 | ) 94 | second_response = monitored_chat_completion.create( 95 | model="gpt-3.5-turbo-0613", 96 | messages=messages, 97 | ) 98 | return second_response 99 | 100 | 101 | print(run_conversation()) 102 | -------------------------------------------------------------------------------- /examples/legacy_chat_completion/chat_completion_functions_rest.py: -------------------------------------------------------------------------------- 1 | import json 2 | from os import environ 3 | 4 | import requests 5 | 6 | from mona_openai import get_rest_monitor 7 | 8 | MONA_API_KEY = environ.get("MONA_API_KEY") 9 | MONA_SECRET = environ.get("MONA_SECRET") 10 | MONA_CREDS = { 11 | "key": MONA_API_KEY, 12 | "secret": MONA_SECRET, 13 | } 14 | 15 | # This is the name of the monitoring class on Mona 16 | MONITORING_CONTEXT_NAME = "MONITORED_CHAT_COMPLETION_USE_CASE_NAME" 17 | # Get Mona logger 18 | mona_logger = get_rest_monitor( 19 | "ChatCompletion", 20 | MONA_CREDS, 21 | MONITORING_CONTEXT_NAME, 22 | ) 23 | 24 | # Set up the API endpoint URL and authentication headers 25 | url = "https://api.openai.com/v1/chat/completions" 26 | headers = { 27 | "Content-Type": "application/json", 28 | "Authorization": f"Bearer {environ.get('OPEN_AI_KEY')}", 29 | } 30 | 31 | 32 | def get_current_weather(location, unit="fahrenheit"): 33 | """Get the current weather in a given location""" 34 | weather_info = { 35 | "location": location, 36 | "temperature": "72", 37 | "unit": unit, 38 | "forecast": ["sunny", "windy"], 39 | } 40 | return json.dumps(weather_info) 41 | 42 | 43 | def run_conversation(): 44 | messages = [ 45 | {"role": "user", "content": "What's the weather like in Boston?"} 46 | ] 47 | functions = [ 48 | { 49 | "name": "get_current_weather", 50 | "description": "Get the current weather in a given location", 51 | "parameters": { 52 | "type": "object", 53 | "properties": { 54 | "location": { 55 | "type": "string", 56 | "description": "The city and state, e.g. San " 57 | "Francisco, CA", 58 | }, 59 | "unit": { 60 | "type": "string", 61 | "enum": ["celsius", "fahrenheit"], 62 | }, 63 | }, 64 | "required": ["location"], 65 | }, 66 | } 67 | ] 68 | 69 | # Set up the request data 70 | data = { 71 | "messages": messages, 72 | "functions": functions, 73 | "max_tokens": 20, 74 | "temperature": 0.2, 75 | "model": "gpt-3.5-turbo-0613", 76 | "n": 1, 77 | } 78 | 79 | # The log_request function returns two other function for later logging 80 | # the response or the exception. When we later do that, the logger will 81 | # actually calculate all the relevant metrics and will send them to 82 | # Mona. 83 | response_logger, exception_logger = mona_logger.log_request( 84 | data, additional_data={"customer_id": "A531251"} 85 | ) 86 | 87 | try: 88 | # Send the request to the API 89 | response = requests.post(url, headers=headers, json=data) 90 | 91 | # Check for HTTP errors 92 | response.raise_for_status() 93 | 94 | # Log response to Mona 95 | response_logger(response.json()) 96 | print(response.json()["choices"][0]["message"]) 97 | 98 | except Exception: 99 | # Log exception to Mona 100 | exception_logger() 101 | 102 | response_message = response.json()["choices"][0]["message"] 103 | 104 | if response_message.get("function_call"): 105 | available_functions = { 106 | "get_current_weather": get_current_weather, 107 | } 108 | function_name = response_message["function_call"]["name"] 109 | fuction_to_call = available_functions[function_name] 110 | function_args = json.loads( 111 | response_message["function_call"]["arguments"] 112 | ) 113 | function_response = fuction_to_call( 114 | location=function_args.get("location"), 115 | unit=function_args.get("unit"), 116 | ) 117 | 118 | messages.append(response_message) 119 | messages.append( 120 | { 121 | "role": "function", 122 | "name": function_name, 123 | "content": function_response, 124 | } 125 | ) 126 | 127 | data = { 128 | "messages": messages, 129 | "functions": functions, 130 | "max_tokens": 20, 131 | "temperature": 0.2, 132 | "model": "gpt-3.5-turbo-0613", 133 | "n": 1, 134 | } 135 | 136 | # The log_request function returns two other function for later logging 137 | # the response or the exception. When we later do that, the logger will 138 | # actually calculate all the relevant metrics and will send them to 139 | # Mona. 140 | response_logger, exception_logger = mona_logger.log_request( 141 | data, additional_data={"customer_id": "A531251"} 142 | ) 143 | 144 | try: 145 | # Send the request to the API 146 | second_response = requests.post(url, headers=headers, json=data) 147 | 148 | # Check for HTTP errors 149 | second_response.raise_for_status() 150 | 151 | # Log response to Mona 152 | response_logger(second_response.json()) 153 | print(second_response.json()["choices"][0]["message"]) 154 | return second_response.json() 155 | 156 | except Exception: 157 | # Log exception to Mona 158 | exception_logger() 159 | 160 | 161 | print(run_conversation()) 162 | -------------------------------------------------------------------------------- /examples/legacy_chat_completion/chat_completion_rest.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | import openai 4 | import requests 5 | 6 | from mona_openai import get_rest_monitor 7 | 8 | openai.api_key = environ.get("OPEN_AI_KEY") 9 | 10 | MONA_API_KEY = environ.get("MONA_API_KEY") 11 | MONA_SECRET = environ.get("MONA_SECRET") 12 | MONA_CREDS = { 13 | "key": MONA_API_KEY, 14 | "secret": MONA_SECRET, 15 | } 16 | 17 | # This is the name of the monitoring class on Mona 18 | MONITORING_CONTEXT_NAME = "MONITORED_CHAT_COMPLETION_USE_CASE_NAME" 19 | 20 | 21 | # Direct REST usage, without OpenAI client 22 | 23 | # Get Mona logger 24 | mona_logger = get_rest_monitor( 25 | "ChatCompletion", 26 | MONA_CREDS, 27 | MONITORING_CONTEXT_NAME, 28 | ) 29 | 30 | # Set up the API endpoint URL and authentication headers 31 | url = "https://api.openai.com/v1/chat/completions" 32 | headers = { 33 | "Content-Type": "application/json", 34 | "Authorization": f"Bearer {environ.get('OPEN_AI_KEY')}", 35 | } 36 | 37 | # Set up the request data 38 | data = { 39 | "messages": [ 40 | {"role": "user", "content": "I want to generate some text about "} 41 | ], 42 | "max_tokens": 20, 43 | "temperature": 0.2, 44 | "model": "gpt-3.5-turbo", 45 | "n": 1, 46 | } 47 | 48 | # The log_request function returns two other function for later logging 49 | # the response or the exception. When we later do that, the logger will 50 | # actually calculate all the relevant metrics and will send them to 51 | # Mona. 52 | response_logger, exception_logger = mona_logger.log_request( 53 | data, additional_data={"customer_id": "A531251"} 54 | ) 55 | 56 | try: 57 | # Send the request to the API 58 | response = requests.post(url, headers=headers, json=data) 59 | 60 | # Check for HTTP errors 61 | response.raise_for_status() 62 | 63 | # Log response to Mona 64 | response_logger(response.json()) 65 | print(response.json()["choices"][0]["message"]["content"]) 66 | 67 | except Exception: 68 | # Log exception to Mona 69 | exception_logger() 70 | -------------------------------------------------------------------------------- /examples/legacy_chat_completion/chat_completion_stream.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | import openai 4 | 5 | from mona_openai import monitor 6 | 7 | openai.api_key = environ.get("OPEN_AI_KEY") 8 | 9 | MONA_API_KEY = environ.get("MONA_API_KEY") 10 | MONA_SECRET = environ.get("MONA_SECRET") 11 | MONA_CREDS = { 12 | "key": MONA_API_KEY, 13 | "secret": MONA_SECRET, 14 | } 15 | 16 | # This is the name of the monitoring class on Mona 17 | MONITORING_CONTEXT_NAME = "MONITORED_CHAT_COMPLETION_USE_CASE_NAME" 18 | 19 | monitored_chat_completion = monitor( 20 | openai.ChatCompletion, 21 | MONA_CREDS, 22 | MONITORING_CONTEXT_NAME, 23 | ) 24 | 25 | response = monitored_chat_completion.create( 26 | stream=True, 27 | model="gpt-3.5-turbo", 28 | messages=[ 29 | {"role": "user", "content": "I want to generate some text about "} 30 | ], 31 | max_tokens=20, 32 | n=1, 33 | temperature=0.2, 34 | # Adding additional information for monitoring purposes, unrelated to 35 | # internal OpenAI call. 36 | MONA_additional_data={"customer_id": "A531251"}, 37 | ) 38 | 39 | for event in response: 40 | print(event.choices[0].delta.get("content", "")) 41 | -------------------------------------------------------------------------------- /examples/legacy_completion/completion.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | import openai 4 | 5 | from mona_openai import monitor 6 | 7 | openai.api_key = environ.get("OPEN_AI_KEY") 8 | 9 | MONA_API_KEY = environ.get("MONA_API_KEY") 10 | MONA_SECRET = environ.get("MONA_SECRET") 11 | MONA_CREDS = { 12 | "key": MONA_API_KEY, 13 | "secret": MONA_SECRET, 14 | } 15 | 16 | # This is the name of the monitoring class on Mona 17 | MONITORING_CONTEXT_NAME = "MONITORED_COMPLETION_USE_CASE_NAME" 18 | 19 | monitored_completion = monitor( 20 | openai.Completion, 21 | MONA_CREDS, 22 | MONITORING_CONTEXT_NAME, 23 | ) 24 | 25 | response = monitored_completion.create( 26 | model="text-ada-001", 27 | prompt="I want to generate some text about ", 28 | max_tokens=20, 29 | n=1, 30 | temperature=0.2, 31 | # Adding additional information for monitoring purposes, unrelated to 32 | # internal OpenAI call. 33 | MONA_additional_data={"customer_id": "A531251"}, 34 | ) 35 | print(response.choices[0].text) 36 | -------------------------------------------------------------------------------- /examples/legacy_completion/completion_async.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from os import environ 3 | 4 | import openai 5 | 6 | from mona_openai import monitor 7 | 8 | openai.api_key = environ.get("OPEN_AI_KEY") 9 | 10 | MONA_API_KEY = environ.get("MONA_API_KEY") 11 | MONA_SECRET = environ.get("MONA_SECRET") 12 | MONA_CREDS = { 13 | "key": MONA_API_KEY, 14 | "secret": MONA_SECRET, 15 | } 16 | 17 | # This is the name of the monitoring class on Mona 18 | MONITORING_CONTEXT_NAME = "MONITORED_COMPLETION_USE_CASE_NAME" 19 | 20 | monitored_completion = monitor( 21 | openai.Completion, 22 | MONA_CREDS, 23 | MONITORING_CONTEXT_NAME, 24 | ) 25 | 26 | response = asyncio.run( 27 | monitored_completion.acreate( 28 | model="text-ada-001", 29 | prompt="I want to generate some text about ", 30 | max_tokens=20, 31 | n=1, 32 | temperature=0.2, 33 | # Adding additional information for monitoring purposes, unrelated to 34 | # internal OpenAI call. 35 | MONA_additional_data={"customer_id": "A531251"}, 36 | ) 37 | ) 38 | 39 | print(response.choices[0].text) 40 | -------------------------------------------------------------------------------- /examples/legacy_completion/completion_async_stream.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from os import environ 3 | 4 | import openai 5 | 6 | from mona_openai import monitor 7 | 8 | openai.api_key = environ.get("OPEN_AI_KEY") 9 | 10 | MONA_API_KEY = environ.get("MONA_API_KEY") 11 | MONA_SECRET = environ.get("MONA_SECRET") 12 | MONA_CREDS = { 13 | "key": MONA_API_KEY, 14 | "secret": MONA_SECRET, 15 | } 16 | 17 | # This is the name of the monitoring class on Mona 18 | MONITORING_CONTEXT_NAME = "MONITORED_COMPLETION_USE_CASE_NAME" 19 | 20 | monitored_completion = monitor( 21 | openai.Completion, 22 | MONA_CREDS, 23 | MONITORING_CONTEXT_NAME, 24 | ) 25 | 26 | 27 | async def openAI_logic(): 28 | response = await monitored_completion.acreate( 29 | stream=True, 30 | model="text-ada-001", 31 | prompt="I want to generate some text about ", 32 | max_tokens=20, 33 | n=1, 34 | temperature=0.2, 35 | # Adding additional information for monitoring purposes, unrelated to 36 | # internal OpenAI call. 37 | MONA_additional_data={"customer_id": "A531251"}, 38 | ) 39 | async for event in response: 40 | print(event.choices[0].text) 41 | 42 | 43 | asyncio.run(openAI_logic()) 44 | -------------------------------------------------------------------------------- /examples/legacy_completion/completion_langchain.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | from langchain.chains import LLMChain 4 | from langchain.llms import OpenAI 5 | from langchain.prompts import PromptTemplate 6 | 7 | from mona_openai import monitor_langchain_llm 8 | 9 | MONA_API_KEY = environ.get("MONA_API_KEY") 10 | MONA_SECRET = environ.get("MONA_SECRET") 11 | MONA_CREDS = { 12 | "key": MONA_API_KEY, 13 | "secret": MONA_SECRET, 14 | } 15 | 16 | # This is the name of the monitoring class on Mona. 17 | MONITORING_CONTEXT_NAME = "MONITORED_LANGCHAIN_LLM" 18 | 19 | # Wrap the LLM object with Mona monitoring. 20 | llm = monitor_langchain_llm( 21 | OpenAI(openai_api_key=environ.get("OPEN_AI_KEY")), 22 | MONA_CREDS, 23 | MONITORING_CONTEXT_NAME, 24 | ) 25 | 26 | # Now you can use the llm directly along with additional Mona data. 27 | 28 | print( 29 | llm.predict( 30 | "What would be a good company name for a company that makes " 31 | "colorful socks?", 32 | MONA_additional_data={"customer_id": "A531251"}, 33 | MONA_context_id="some_random_id", 34 | ) 35 | ) 36 | 37 | # Or you can use the llm as part of a chain or agent. 38 | 39 | prompt = PromptTemplate.from_template( 40 | "What is a good name for a company that makes {product}?" 41 | ) 42 | 43 | chain = LLMChain( 44 | llm=llm, 45 | prompt=prompt, 46 | llm_kwargs={ 47 | "MONA_additional_data": {"customer_id": "A531251"}, 48 | "MONA_context_id": "fkljdaslfkjasl", 49 | }, 50 | ) 51 | 52 | print(chain.run("colorful socks")) 53 | -------------------------------------------------------------------------------- /examples/legacy_completion/completion_rest.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | import openai 4 | import requests 5 | 6 | from mona_openai import get_rest_monitor 7 | 8 | openai.api_key = environ.get("OPEN_AI_KEY") 9 | 10 | MONA_API_KEY = environ.get("MONA_API_KEY") 11 | MONA_SECRET = environ.get("MONA_SECRET") 12 | MONA_CREDS = { 13 | "key": MONA_API_KEY, 14 | "secret": MONA_SECRET, 15 | } 16 | 17 | # This is the name of the monitoring class on Mona 18 | MONITORING_CONTEXT_NAME = "MONITORED_COMPLETION_USE_CASE_NAME" 19 | 20 | 21 | # Direct REST usage, without OpenAI client 22 | 23 | # Get Mona logger 24 | mona_logger = get_rest_monitor( 25 | "Completion", 26 | MONA_CREDS, 27 | MONITORING_CONTEXT_NAME, 28 | ) 29 | 30 | # Set up the API endpoint URL and authentication headers 31 | url = "https://api.openai.com/v1/completions" 32 | headers = { 33 | "Content-Type": "application/json", 34 | "Authorization": f"Bearer {environ.get('OPEN_AI_KEY')}", 35 | } 36 | 37 | # Set up the request data 38 | data = { 39 | "prompt": "I want to generate some text about ", 40 | "max_tokens": 20, 41 | "temperature": 0.2, 42 | "model": "text-ada-001", 43 | "n": 1, 44 | } 45 | 46 | # The log_request function returns two other function for later logging 47 | # the response or the exception. When we later do that, the logger will 48 | # actually calculate all the relevant metrics and will send them to 49 | # Mona. 50 | response_logger, exception_logger = mona_logger.log_request( 51 | data, additional_data={"customer_id": "A531251"} 52 | ) 53 | 54 | try: 55 | # Send the request to the API 56 | response = requests.post(url, headers=headers, json=data) 57 | 58 | # Check for HTTP errors 59 | response.raise_for_status() 60 | 61 | # Log response to Mona 62 | response_logger(response.json(), additional_data={"some_other_data": True}) 63 | print(response.json()["choices"][0]["text"]) 64 | 65 | except Exception: 66 | # Log exception to Mona 67 | exception_logger() 68 | -------------------------------------------------------------------------------- /examples/legacy_completion/completion_rest_async.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from os import environ 3 | 4 | import openai 5 | import requests 6 | 7 | from mona_openai import get_rest_monitor 8 | 9 | openai.api_key = environ.get("OPEN_AI_KEY") 10 | 11 | MONA_API_KEY = environ.get("MONA_API_KEY") 12 | MONA_SECRET = environ.get("MONA_SECRET") 13 | MONA_CREDS = { 14 | "key": MONA_API_KEY, 15 | "secret": MONA_SECRET, 16 | } 17 | 18 | # This is the name of the monitoring class on Mona 19 | MONITORING_CONTEXT_NAME = "MONITORED_COMPLETION_USE_CASE_NAME" 20 | 21 | 22 | # Direct REST usage, without OpenAI client 23 | 24 | # Get Mona logger 25 | mona_logger = get_rest_monitor( 26 | "Completion", 27 | MONA_CREDS, 28 | MONITORING_CONTEXT_NAME, 29 | ) 30 | 31 | # Set up the API endpoint URL and authentication headers 32 | url = "https://api.openai.com/v1/completions" 33 | headers = { 34 | "Content-Type": "application/json", 35 | "Authorization": f"Bearer {environ.get('OPEN_AI_KEY')}", 36 | } 37 | 38 | # Set up the request data 39 | data = { 40 | "prompt": "I want to generate some text about ", 41 | "max_tokens": 20, 42 | "temperature": 0.2, 43 | "model": "text-ada-001", 44 | "n": 1, 45 | } 46 | 47 | # The async_log_request function returns two other function for later logging 48 | # the response or the exception. When we later do that, the logger will 49 | # actually calculate all the relevant metrics and will send them to 50 | # Mona. 51 | response_logger, exception_logger = mona_logger.async_log_request( 52 | data, additional_data={"customer_id": "A531251"} 53 | ) 54 | 55 | try: 56 | # Send the request to the API 57 | response = requests.post(url, headers=headers, json=data) 58 | 59 | # Check for HTTP errors 60 | response.raise_for_status() 61 | 62 | # Log response to Mona 63 | asyncio.run(response_logger(response.json())) 64 | print(response.json()["choices"][0]["text"]) 65 | 66 | except Exception: 67 | # Log exception to Mona 68 | asyncio.run(exception_logger()) 69 | -------------------------------------------------------------------------------- /examples/legacy_completion/completion_stream.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | import openai 4 | 5 | from mona_openai import monitor 6 | 7 | openai.api_key = environ.get("OPEN_AI_KEY") 8 | 9 | MONA_API_KEY = environ.get("MONA_API_KEY") 10 | MONA_SECRET = environ.get("MONA_SECRET") 11 | MONA_CREDS = { 12 | "key": MONA_API_KEY, 13 | "secret": MONA_SECRET, 14 | } 15 | 16 | # This is the name of the monitoring class on Mona 17 | MONITORING_CONTEXT_NAME = "MONITORED_COMPLETION_USE_CASE_NAME" 18 | 19 | monitored_completion = monitor( 20 | openai.Completion, 21 | MONA_CREDS, 22 | MONITORING_CONTEXT_NAME, 23 | ) 24 | 25 | response = monitored_completion.create( 26 | stream=True, 27 | model="text-ada-001", 28 | prompt="I want to generate some text about ", 29 | max_tokens=20, 30 | n=1, 31 | temperature=0.2, 32 | # Adding additional information for monitoring purposes, unrelated to 33 | # internal OpenAI call. 34 | MONA_additional_data={"customer_id": "A531251"}, 35 | ) 36 | 37 | for event in response: 38 | print(event.choices[0].text) 39 | -------------------------------------------------------------------------------- /examples/legacy_completion/in_memory_logging.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | import openai 4 | 5 | from mona_openai import monitor_with_logger 6 | from mona_openai.loggers import InMemoryLogger 7 | 8 | openai.api_key = environ.get("OPEN_AI_KEY") 9 | 10 | logger = InMemoryLogger() 11 | monitored_completion = monitor_with_logger( 12 | openai.Completion, 13 | logger, 14 | ) 15 | 16 | response = monitored_completion.create( 17 | model="text-ada-001", 18 | prompt="I want to generate some text about ", 19 | max_tokens=20, 20 | n=1, 21 | temperature=0.2, 22 | # Adding additional information for monitoring purposes, unrelated to 23 | # internal OpenAI call. 24 | MONA_additional_data={"customer_id": "A531251"}, 25 | ) 26 | 27 | print(logger.latest_messages) 28 | -------------------------------------------------------------------------------- /examples/legacy_completion/log_to_file.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | import openai 4 | 5 | from mona_openai import monitor_with_logger 6 | from mona_openai.loggers import FileLogger 7 | 8 | openai.api_key = environ.get("OPEN_AI_KEY") 9 | 10 | logger = FileLogger("temp.txt") 11 | monitored_completion = monitor_with_logger( 12 | openai.Completion, 13 | logger, 14 | ) 15 | 16 | response = monitored_completion.create( 17 | model="text-ada-001", 18 | prompt="I want to generate some text about ", 19 | max_tokens=20, 20 | n=1, 21 | temperature=0.2, 22 | # Adding additional information for monitoring purposes, unrelated to 23 | # internal OpenAI call. 24 | MONA_additional_data={"customer_id": "A531251"}, 25 | ) 26 | -------------------------------------------------------------------------------- /examples/legacy_completion/standard_logging.py: -------------------------------------------------------------------------------- 1 | from logging import WARNING 2 | from os import environ 3 | 4 | import openai 5 | 6 | from mona_openai import monitor_with_logger 7 | from mona_openai.loggers import StandardLogger 8 | 9 | openai.api_key = environ.get("OPEN_AI_KEY") 10 | 11 | monitored_completion = monitor_with_logger( 12 | openai.Completion, 13 | StandardLogger(WARNING), 14 | ) 15 | 16 | response = monitored_completion.create( 17 | model="text-ada-001", 18 | prompt="I want to generate some text about ", 19 | max_tokens=20, 20 | n=1, 21 | temperature=0.2, 22 | # Adding additional information for monitoring purposes, unrelated to 23 | # internal OpenAI call. 24 | MONA_additional_data={"customer_id": "A531251"}, 25 | ) 26 | -------------------------------------------------------------------------------- /mona_openai/__init__.py: -------------------------------------------------------------------------------- 1 | from .exceptions import * 2 | from .loggers import * 3 | from .mona_openai_legacy import ( 4 | get_rest_monitor, 5 | get_rest_monitor_with_logger, 6 | monitor, 7 | monitor_langchain_llm, 8 | monitor_langchain_llm_with_logger, 9 | monitor_with_logger, 10 | ) 11 | from .mona_openai_client import monitor_client_with_logger, monitor_client 12 | -------------------------------------------------------------------------------- /mona_openai/analysis/analyzer.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | 3 | 4 | class Analyzer(metaclass=ABCMeta): 5 | """ 6 | A parent analyzer class for typing purposes and basic shared logic. 7 | """ 8 | 9 | def is_none_analyzer(self) -> bool: 10 | return False 11 | 12 | def _none_init(self) -> None: 13 | """ 14 | Child classes can override this to allow specific logic when 15 | converting an Analyzer to a NoneAnalyzer. 16 | """ 17 | pass 18 | -------------------------------------------------------------------------------- /mona_openai/analysis/privacy.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Callable, Iterable 2 | from typing import Optional 3 | 4 | from .analyzer import Analyzer 5 | from .util import get_analyzers 6 | 7 | """ 8 | Functionality for extracting privacy information from GAI responses 9 | when comparing to given input prompts. 10 | 11 | TODO(itai): Add many more functions to extract information such as: 12 | Bank accounts, full names, SSN, passport numbers, etc... 13 | """ 14 | import re 15 | 16 | from phonenumbers import PhoneNumberMatcher 17 | 18 | # TODO(itai): Add module-level tests for this module, specifically for email 19 | # extraction, since this is our own logic and not using an external library. 20 | 21 | EMAIL_RE_PATTERN = ( 22 | r"([-!#-'*+/-9=?A-Z^-~]+(\.[-!#-'*+/-9=?A-Z^-~]+)*|\"([]!#-[^-~" 23 | r" \t]|(\\[\t -~]))+\")@([-!#-'*+/-9=?A-Z^-~]+(\.[-!#-'*+/-9=?A" 24 | r"-Z^-~]+)*|\[[\t -Z^-~]*])" 25 | ) 26 | 27 | 28 | def _extract_phone_numbers(text: str) -> set[str]: 29 | """ 30 | Extract phone numbers from a prompt string and return as a set. 31 | """ 32 | phone_numbers = set() 33 | # We use "US" just as a default region in case there are no country codes 34 | # since we don't care about the formatting of the found number, but just 35 | # whether it is a phone number or not, this has no consequences. 36 | for match in PhoneNumberMatcher(text, "US"): 37 | number_string = "+{}{}".format( 38 | match.number.country_code, match.number.national_number 39 | ) 40 | phone_numbers.add(number_string) 41 | return phone_numbers 42 | 43 | 44 | def _extract_all_emails(text: str) -> set[str]: 45 | """ 46 | returns all email addresses found in the given prompt. 47 | """ 48 | return set(re.findall(EMAIL_RE_PATTERN, text)) 49 | 50 | 51 | class PrivacyAnalyzer(Analyzer): 52 | """ 53 | An analyzer class that takes a text and provides functionality to extract 54 | privacy-related metrics from that text. 55 | """ 56 | 57 | def __init__(self, text: str) -> None: 58 | self._text = text 59 | self._phone_numbers = _extract_phone_numbers(text) 60 | self._emails = _extract_all_emails(text) 61 | 62 | def _none_init(self) -> None: 63 | self._text = None 64 | self._phone_numbers = set() 65 | self._emails = set() 66 | 67 | def get_phone_numbers_count(self) -> int: 68 | """ 69 | Returns the number of phone numbers in the initially given text. 70 | """ 71 | return len(self._phone_numbers) 72 | 73 | def get_emails_count(self) -> int: 74 | """ 75 | Returns the number of email addresses in the initially given text. 76 | """ 77 | return len(self._emails) 78 | 79 | @classmethod 80 | def _get_phone_numbers_from_instance(cls, instance) -> set[str]: 81 | return instance._phone_numbers 82 | 83 | @classmethod 84 | def _get_emails_from_instance(cls, instance) -> set[str]: 85 | return instance._emails 86 | 87 | def _get_previously_unseen_x_count( 88 | self, 89 | others: Iterable["PrivacyAnalyzer"], 90 | extraction_function: Callable, 91 | ) -> int: 92 | return len( 93 | extraction_function(self) 94 | - set().union( 95 | *tuple(extraction_function(other) for other in others) 96 | ) 97 | ) 98 | 99 | def get_previously_unseen_phone_numbers_count( 100 | self, others: Iterable["PrivacyAnalyzer"] 101 | ) -> int: 102 | """ 103 | Returns the number of phone numbers in the initially given text, that 104 | don't also appear in any of the given other analyzers. 105 | """ 106 | return self._get_previously_unseen_x_count( 107 | others, self._get_phone_numbers_from_instance 108 | ) 109 | 110 | def get_previously_unseen_emails_count( 111 | self, others: Iterable["PrivacyAnalyzer"] 112 | ) -> int: 113 | """ 114 | Returns the number of email addresses in the initially given text, 115 | that don't also appear in any of the given other analyzers. 116 | """ 117 | return self._get_previously_unseen_x_count( 118 | others, self._get_emails_from_instance 119 | ) 120 | 121 | 122 | def get_privacy_analyzers( 123 | texts: Iterable[Optional[str]], 124 | ) -> tuple[PrivacyAnalyzer, ...]: 125 | """ 126 | Returns a tuple of PrivacyAnalyzer objects, one for each text in the given 127 | iterable. 128 | """ 129 | return get_analyzers(texts, PrivacyAnalyzer) 130 | -------------------------------------------------------------------------------- /mona_openai/analysis/profanity.py: -------------------------------------------------------------------------------- 1 | """ 2 | Logic to create profanity analysis. 3 | """ 4 | from collections.abc import Iterable 5 | from typing import Optional 6 | 7 | from profanity_check import predict, predict_prob 8 | 9 | _DECIMAL_PLACES = 2 10 | 11 | 12 | def _clear_nones(texts): 13 | return tuple(x for x in texts if x is not None) 14 | 15 | 16 | def get_profanity_prob(texts: Iterable[Optional[str]]) -> tuple[float, ...]: 17 | texts = _clear_nones(texts) 18 | if not texts: 19 | return () 20 | return tuple(round(x, _DECIMAL_PLACES) for x in predict_prob(texts)) 21 | 22 | 23 | def get_has_profanity(texts: Iterable[Optional[str]]) -> tuple[bool, ...]: 24 | texts = _clear_nones(texts) 25 | if not texts: 26 | return () 27 | return tuple(bool(x) for x in predict(texts)) 28 | -------------------------------------------------------------------------------- /mona_openai/analysis/textual.py: -------------------------------------------------------------------------------- 1 | """ 2 | A module to derive text-related metrics such as text length, usage of 3 | specific grammatical words, text repetition, etc... 4 | 5 | These analyses can be used to detect significant drifts that could be 6 | caused by hallucinations or bugs. 7 | 8 | NOTE: There are many more analyses that can be added here. 9 | """ 10 | 11 | from collections.abc import Iterable 12 | from typing import Optional 13 | 14 | from .analyzer import Analyzer 15 | from .util import get_analyzers 16 | 17 | PREPOSITIONS = set( 18 | ( 19 | "aboard", 20 | "about", 21 | "above", 22 | "across", 23 | "after", 24 | "against", 25 | "along", 26 | "amid", 27 | "among", 28 | "around", 29 | "as", 30 | "at", 31 | "before", 32 | "behind", 33 | "below", 34 | "beneath", 35 | "beside", 36 | "between", 37 | "beyond", 38 | "but", 39 | "by", 40 | "concerning", 41 | "considering", 42 | "despite", 43 | "down", 44 | "during", 45 | "except", 46 | "for", 47 | "from", 48 | "in", 49 | "inside", 50 | "into", 51 | "like", 52 | "near", 53 | "of", 54 | "off", 55 | "on", 56 | "onto", 57 | "out", 58 | "outside", 59 | "over", 60 | "past", 61 | "regarding", 62 | "round", 63 | "since", 64 | "through", 65 | "throughout", 66 | "till", 67 | "to", 68 | "toward", 69 | "under", 70 | "underneath", 71 | "until", 72 | "unto", 73 | "up", 74 | "upon", 75 | "with", 76 | "within", 77 | "without", 78 | ) 79 | ) 80 | 81 | 82 | class TextualAnalyzer(Analyzer): 83 | """ 84 | An analyzer class that takes a text and provides methods to get analysis 85 | on that text such as length, word count, etc... 86 | """ 87 | 88 | def __init__(self, text: str) -> None: 89 | self._text = text 90 | self._splitted_text = text.split() 91 | self._prepositions = tuple( 92 | x for x in self._splitted_text if x in PREPOSITIONS 93 | ) 94 | 95 | def _none_init(self) -> None: 96 | self._text = None 97 | self._splitted_text = () 98 | self._prepositions = () 99 | 100 | def get_length(self) -> int: 101 | """ 102 | Returns the length of the text. 103 | """ 104 | return len(self._text) 105 | 106 | def get_word_count(self) -> int: 107 | """ 108 | Returns the number of the words in the text. 109 | """ 110 | return len(self._splitted_text) 111 | 112 | def get_preposition_count(self) -> int: 113 | """ 114 | Returns the number of prepositions in the text. 115 | """ 116 | return len(self._prepositions) 117 | 118 | def get_preposition_ratio(self) -> float: 119 | """ 120 | Returns the ratio of prepositions in the text. 121 | """ 122 | word_count = self.get_word_count() 123 | return self.get_preposition_count() / word_count if word_count else 0 124 | 125 | def get_words_not_in_others_count( 126 | self, others: Iterable["TextualAnalyzer"] 127 | ) -> int: 128 | """ 129 | Returns the number of the words in the text that do not appear in the 130 | given other texts. 131 | """ 132 | others_words_set = set().union( 133 | *tuple(other._splitted_text for other in others) 134 | ) 135 | return len( 136 | [ 137 | word 138 | for word in self._splitted_text 139 | if word not in others_words_set 140 | ] 141 | ) 142 | 143 | 144 | def get_textual_analyzers( 145 | texts: Iterable[Optional[str]], 146 | ) -> tuple[TextualAnalyzer, ...]: 147 | """ 148 | Returns a tuple of TextualAnalyzers for all the given texts. 149 | """ 150 | return get_analyzers(texts, TextualAnalyzer) 151 | -------------------------------------------------------------------------------- /mona_openai/analysis/util.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from collections.abc import Iterable 3 | from functools import wraps 4 | from typing import Optional 5 | 6 | from .analyzer import Analyzer 7 | 8 | 9 | def _get_none_analyzer_class(base_class: type(Analyzer)): 10 | """ 11 | This introduces the concept of a "None"-typed analyzer, which emulates a 12 | regular analyzer but returns None for all functions. This is helpful in 13 | cases where a message isn't textual, and hence requires no analysis and we 14 | want the relevant metrics to be "None". Specifically it is relevant when 15 | the message describes a function call. 16 | 17 | In short, these None analyzers allow us to keep the code flow as if all 18 | messages are the same. 19 | 20 | TODO(itai): In the future we might want to introduce specific analyses for 21 | function calls, which can be done within existing analyzers or with 22 | new analyzers. 23 | """ 24 | 25 | # Add is_none_analyzer method that always returns True, after the loop to 26 | # make sure it overrides any such original method. 27 | def is_none_analyzer(self): 28 | return True 29 | 30 | attrs = { 31 | "__init__": base_class._none_init, 32 | "is_none_analyzer": is_none_analyzer, 33 | } 34 | 35 | for attr_name, attr_value in base_class.__dict__.items(): 36 | if callable(attr_value) and not attr_name.startswith("_"): 37 | 38 | @wraps(attr_value) 39 | def make_none_func(attr_value): 40 | def none_func(*args, **kwargs): 41 | return None 42 | 43 | return none_func 44 | 45 | attrs[attr_name] = make_none_func(attr_value) 46 | 47 | return type(f"None{base_class.__name__}", (base_class,), attrs) 48 | 49 | 50 | def create_combined_analyzer(instances: Iterable[Analyzer]): 51 | """ 52 | Create a new analyzer that has the same methods as the given analyzers. 53 | 54 | This function takes an iterable of analyzers of a given class and returns 55 | a new object that has the same methods as the given instances. When calling 56 | these methods, it returns a tuple containing all the results of running 57 | that method for all instances. It disregards "None" typed analyzers. 58 | 59 | Args: 60 | instances: An iterable of instances of a given class. 61 | 62 | Returns: 63 | A new object that has the same methods as the given instances. 64 | """ 65 | 66 | class CombinedObject: 67 | def __init__(self, instances: Iterable[Analyzer]): 68 | self._instances = tuple( 69 | instance 70 | for instance in instances 71 | if not instance.is_none_analyzer() 72 | ) 73 | 74 | def __getattr__(self, name): 75 | def method(*args, **kwargs): 76 | results = [] 77 | for instance in self._instances: 78 | func = getattr(instance, name) 79 | if inspect.ismethod(func) or inspect.isfunction(func): 80 | results.append(func(*args, **kwargs)) 81 | return tuple(results) 82 | 83 | return method 84 | 85 | return CombinedObject(instances) 86 | 87 | 88 | def get_analyzers(texts: Iterable[Optional[str]], AnalyzerClass: Analyzer): 89 | """ 90 | Returns a tuple of regular_class objects, one for each text in the given 91 | iterable, or none_class objects where the text is a None. 92 | """ 93 | NoneClass = _get_none_analyzer_class(AnalyzerClass) 94 | return tuple( 95 | AnalyzerClass(text) if text is not None else NoneClass() 96 | for text in texts 97 | ) 98 | -------------------------------------------------------------------------------- /mona_openai/endpoints/chat_completion.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Mona wrapping code for OpenAI's ChatCompletion API. 3 | """ 4 | from collections.abc import Callable, Iterable, Mapping 5 | from copy import deepcopy 6 | from functools import wraps 7 | 8 | from ..analysis.privacy import PrivacyAnalyzer, get_privacy_analyzers 9 | from ..analysis.profanity import get_has_profanity, get_profanity_prob 10 | from ..analysis.textual import TextualAnalyzer, get_textual_analyzers 11 | from ..analysis.util import create_combined_analyzer 12 | from .endpoint_wrapping import OpenAIEndpointWrappingLogic 13 | 14 | CHAT_COMPLETION_CLASS_NAME = "ChatCompletion" 15 | 16 | 17 | def _get_choices_texts(response: Mapping) -> tuple: 18 | return tuple( 19 | choice["message"].get("content") for choice in response["choices"] 20 | ) 21 | 22 | 23 | def _get_prompt_texts(request: Mapping) -> tuple: 24 | return tuple(message["content"] for message in request["messages"]) 25 | 26 | 27 | def _get_texts(func: Callable) -> Callable: 28 | def wrapper(self, input: Mapping, response: Mapping): 29 | return func( 30 | self, 31 | input["messages"][-1]["content"] 32 | if input["messages"][-1]["role"] == "user" 33 | else None, 34 | _get_prompt_texts(input), 35 | _get_choices_texts(response), 36 | ) 37 | 38 | return wrapper 39 | 40 | 41 | def _get_analyzers(analyzers_getter: Callable) -> Callable: 42 | def decorator(func: Callable) -> Callable: 43 | @wraps(func) 44 | def wrapper( 45 | self, 46 | last_user_message: str, 47 | messages: Iterable[str], 48 | answers: Iterable[str], 49 | ): 50 | return func( 51 | self, 52 | analyzers_getter((last_user_message,))[0] 53 | if last_user_message is not None 54 | else None, 55 | analyzers_getter(messages), 56 | analyzers_getter(answers), 57 | ) 58 | 59 | return wrapper 60 | 61 | return decorator 62 | 63 | 64 | class ChatCompletionWrapping(OpenAIEndpointWrappingLogic): 65 | def _get_endpoint_name(self): 66 | return CHAT_COMPLETION_CLASS_NAME 67 | 68 | def _internal_get_clean_message(self, message: Mapping) -> Mapping: 69 | """ 70 | Returns a copy of the given message with relevant data removed, for 71 | example the actual texts, to avoid sending such information, that 72 | is sometimes sensitive, to Mona. 73 | """ 74 | new_message = deepcopy(message) 75 | if not self._specs.get("export_prompt", False): 76 | for input_message in new_message["input"]["messages"]: 77 | input_message.pop("content", None) 78 | 79 | if "response" in message and not self._specs.get( 80 | "export_response_texts", False 81 | ): 82 | for choice in new_message["response"]["choices"]: 83 | choice["message"].pop("content", None) 84 | 85 | return new_message 86 | 87 | @_get_texts 88 | @_get_analyzers(get_privacy_analyzers) 89 | def _get_full_privacy_analysis( 90 | self, 91 | last_user_message_analyzer: PrivacyAnalyzer, 92 | messages_privacy_analyzers: Iterable[PrivacyAnalyzer], 93 | answers_privacy_analyzers: Iterable[PrivacyAnalyzer], 94 | ) -> dict: 95 | combined_messages = create_combined_analyzer( 96 | messages_privacy_analyzers 97 | ) 98 | combined_answers = create_combined_analyzer(answers_privacy_analyzers) 99 | ret = { 100 | "total_prompt_phone_number_count": sum( 101 | combined_messages.get_phone_numbers_count() 102 | ), 103 | "answer_unknown_phone_number_count": ( 104 | combined_answers.get_previously_unseen_phone_numbers_count( 105 | messages_privacy_analyzers 106 | ) 107 | ), 108 | "total_prompt_email_count": sum( 109 | combined_messages.get_emails_count() 110 | ), 111 | "answer_unknown_email_count": ( 112 | combined_answers.get_previously_unseen_emails_count( 113 | messages_privacy_analyzers 114 | ) 115 | ), 116 | } 117 | if last_user_message_analyzer is not None: 118 | ret.update( 119 | { 120 | "last_user_message_phone_number_count": ( 121 | last_user_message_analyzer.get_phone_numbers_count() 122 | ), 123 | "last_user_message_emails_count": ( 124 | last_user_message_analyzer.get_emails_count() 125 | ), 126 | } 127 | ) 128 | return ret 129 | 130 | @_get_texts 131 | @_get_analyzers(get_textual_analyzers) 132 | def _get_full_textual_analysis( 133 | self, 134 | last_user_message_analyzer: TextualAnalyzer, 135 | messages_textual_analyzers: Iterable[TextualAnalyzer], 136 | answers_textual_analyzers: Iterable[TextualAnalyzer], 137 | ) -> dict: 138 | combined_messages = create_combined_analyzer( 139 | messages_textual_analyzers 140 | ) 141 | combined_answers = create_combined_analyzer(answers_textual_analyzers) 142 | total_prompt_word_count = sum(combined_messages.get_word_count()) 143 | total_prompt_preposition_count = sum( 144 | combined_messages.get_preposition_count() 145 | ) 146 | 147 | ret = { 148 | "total_prompt_length": sum(combined_messages.get_length()), 149 | "answer_length": combined_answers.get_length(), 150 | "total_prompt_word_count": total_prompt_word_count, 151 | "answer_word_count": combined_answers.get_word_count(), 152 | "total_prompt_preposition_count": total_prompt_preposition_count, 153 | "total_prompt_preposition_ratio": total_prompt_preposition_count 154 | / total_prompt_word_count 155 | if total_prompt_word_count != 0 156 | else None, 157 | "answer_preposition_count": ( 158 | combined_answers.get_preposition_count() 159 | ), 160 | "answer_preposition_ratio": ( 161 | combined_answers.get_preposition_ratio() 162 | ), 163 | "answer_words_not_in_prompt_count": ( 164 | combined_answers.get_words_not_in_others_count( 165 | messages_textual_analyzers 166 | ) 167 | ), 168 | "answer_words_not_in_prompt_ratio": tuple( 169 | analyzer.get_words_not_in_others_count( 170 | messages_textual_analyzers 171 | ) 172 | / analyzer.get_word_count() 173 | if analyzer.get_word_count() > 0 174 | else 0.0 175 | for analyzer in answers_textual_analyzers 176 | if not analyzer.is_none_analyzer() 177 | ), 178 | } 179 | 180 | if last_user_message_analyzer is not None: 181 | ret.update( 182 | { 183 | "last_user_message_length": ( 184 | last_user_message_analyzer.get_length() 185 | ), 186 | "last_user_message_word_count": ( 187 | last_user_message_analyzer.get_word_count() 188 | ), 189 | "last_user_message_preposition_count": ( 190 | last_user_message_analyzer.get_preposition_count() 191 | ), 192 | "last_user_message_preposition_ratio": ( 193 | last_user_message_analyzer.get_preposition_ratio() 194 | ), 195 | } 196 | ) 197 | 198 | return ret 199 | 200 | @_get_texts 201 | def _get_full_profainty_analysis( 202 | self, 203 | last_user_message: str, 204 | messages: Iterable[str], 205 | answers: Iterable[str], 206 | ) -> dict: 207 | ret: dict = { 208 | "prompt_profanity_prob": get_profanity_prob(messages), 209 | "prompt_has_profanity": get_has_profanity(messages), 210 | "answer_profanity_prob": get_profanity_prob(answers), 211 | "answer_has_profanity": get_has_profanity(answers), 212 | } 213 | 214 | if last_user_message is not None: 215 | ret.update( 216 | { 217 | "last_user_message_profanity_prob": get_profanity_prob( 218 | (last_user_message,) 219 | )[0], 220 | "last_user_message_has_profanity": get_has_profanity( 221 | (last_user_message,) 222 | )[0], 223 | } 224 | ) 225 | 226 | return ret 227 | 228 | def get_stream_delta_text_from_choice(self, choice: Mapping) -> str: 229 | return choice["delta"].get("content", "") 230 | 231 | def get_final_choice(self, text: str) -> dict: 232 | return {"message": {"role": "assistant", "content": text}} 233 | 234 | def get_all_prompt_texts(self, request: Mapping) -> Iterable[str]: 235 | return _get_prompt_texts(request) 236 | 237 | def get_all_response_texts(self, response: Mapping) -> Iterable[str]: 238 | return _get_choices_texts(response) 239 | -------------------------------------------------------------------------------- /mona_openai/endpoints/completion.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Mona wrapping code for OpenAI's Completion API. 3 | """ 4 | from collections.abc import Callable, Iterable, Mapping 5 | from copy import deepcopy 6 | from functools import wraps 7 | 8 | from ..analysis.privacy import PrivacyAnalyzer, get_privacy_analyzers 9 | from ..analysis.profanity import get_has_profanity, get_profanity_prob 10 | from ..analysis.textual import TextualAnalyzer, get_textual_analyzers 11 | from ..analysis.util import create_combined_analyzer 12 | from .endpoint_wrapping import OpenAIEndpointWrappingLogic 13 | 14 | COMPLETION_CLASS_NAME = "Completion" 15 | 16 | 17 | def _get_prompts(request: Mapping) -> Iterable[str]: 18 | prompts = request.get("prompt", ()) 19 | return (prompts,) if isinstance(prompts, str) else prompts 20 | 21 | 22 | def _get_choices_texts(response: Mapping) -> Iterable[str]: 23 | return tuple((choice["text"] for choice in response["choices"])) 24 | 25 | 26 | def _get_texts(func: Callable) -> Callable: 27 | def wrapper(self, input: Mapping, response: Mapping): 28 | return func(self, _get_prompts(input), _get_choices_texts(response)) 29 | 30 | return wrapper 31 | 32 | 33 | def _get_analyzers(analyzers_getter: Callable) -> Callable: 34 | def decorator(func: Callable) -> Callable: 35 | @wraps(func) 36 | def wrapper(self, prompts: Iterable[str], answers: Iterable[str]): 37 | return func( 38 | self, analyzers_getter(prompts), analyzers_getter(answers) 39 | ) 40 | 41 | return wrapper 42 | 43 | return decorator 44 | 45 | 46 | class CompletionWrapping(OpenAIEndpointWrappingLogic): 47 | def _get_endpoint_name(self) -> str: 48 | return COMPLETION_CLASS_NAME 49 | 50 | def _internal_get_clean_message(self, message: Mapping) -> Mapping: 51 | """ 52 | Returns a copy of the given message with relevant data removed, for 53 | example the actual texts, to avoid sending such information, that 54 | is sometimes sensitive, to Mona. 55 | """ 56 | new_message = deepcopy(message) 57 | if not self._specs.get("export_prompt", False): 58 | new_message["input"].pop("prompt", None) 59 | 60 | if "response" in message and not self._specs.get( 61 | "export_response_texts", False 62 | ): 63 | for choice in new_message["response"]["choices"]: 64 | choice.pop("text", None) 65 | 66 | return new_message 67 | 68 | @_get_texts 69 | @_get_analyzers(get_privacy_analyzers) 70 | def _get_full_privacy_analysis( 71 | self, 72 | prompts_privacy_analyzers: Iterable[PrivacyAnalyzer], 73 | answers_privacy_analyzers: Iterable[PrivacyAnalyzer], 74 | ) -> dict: 75 | combined_prompts = create_combined_analyzer(prompts_privacy_analyzers) 76 | combined_answers = create_combined_analyzer(answers_privacy_analyzers) 77 | return { 78 | "prompt_phone_number_count": ( 79 | combined_prompts.get_phone_numbers_count() 80 | ), 81 | "answer_unknown_phone_number_count": ( 82 | combined_answers.get_previously_unseen_phone_numbers_count( 83 | prompts_privacy_analyzers 84 | ) 85 | ), 86 | "prompt_email_count": combined_prompts.get_emails_count(), 87 | "answer_unknown_email_count": ( 88 | combined_answers.get_previously_unseen_emails_count( 89 | prompts_privacy_analyzers 90 | ) 91 | ), 92 | } 93 | 94 | @_get_texts 95 | @_get_analyzers(get_textual_analyzers) 96 | def _get_full_textual_analysis( 97 | self, 98 | prompts_textual_analyzers: Iterable[TextualAnalyzer], 99 | answers_textual_analyzers: Iterable[TextualAnalyzer], 100 | ) -> dict: 101 | combined_prompts = create_combined_analyzer(prompts_textual_analyzers) 102 | combined_answers = create_combined_analyzer(answers_textual_analyzers) 103 | return { 104 | "prompt_length": combined_prompts.get_length(), 105 | "answer_length": combined_answers.get_length(), 106 | "prompt_word_count": combined_prompts.get_word_count(), 107 | "answer_word_count": combined_answers.get_word_count(), 108 | "prompt_preposition_count": ( 109 | combined_prompts.get_preposition_count() 110 | ), 111 | "prompt_preposition_ratio": ( 112 | combined_prompts.get_preposition_ratio() 113 | ), 114 | "answer_preposition_count": ( 115 | combined_answers.get_preposition_count() 116 | ), 117 | "answer_preposition_ratio": ( 118 | combined_answers.get_preposition_ratio() 119 | ), 120 | "answer_words_not_in_prompt_count": ( 121 | combined_answers.get_words_not_in_others_count( 122 | prompts_textual_analyzers 123 | ) 124 | ), 125 | "answer_words_not_in_prompt_ratio": tuple( 126 | analyzer.get_words_not_in_others_count( 127 | prompts_textual_analyzers 128 | ) 129 | / analyzer.get_word_count() 130 | if analyzer.get_word_count() > 0 131 | else 0.0 132 | for analyzer in answers_textual_analyzers 133 | ), 134 | } 135 | 136 | @_get_texts 137 | def _get_full_profainty_analysis( 138 | self, prompts: Iterable[str], answers: Iterable[str] 139 | ) -> dict: 140 | return { 141 | "prompt_profanity_prob": get_profanity_prob(prompts), 142 | "prompt_has_profanity": get_has_profanity(prompts), 143 | "answer_profanity_prob": get_profanity_prob(answers), 144 | "answer_has_profanity": get_has_profanity(answers), 145 | } 146 | 147 | def get_stream_delta_text_from_choice(self, choice: Mapping) -> str: 148 | return choice["text"] 149 | 150 | def get_final_choice(self, text: str) -> dict: 151 | return {"text": text} 152 | 153 | def get_all_prompt_texts(self, request: Mapping) -> Iterable[str]: 154 | return _get_prompts(request) 155 | 156 | def get_all_response_texts(self, response: Mapping) -> Iterable[str]: 157 | return _get_choices_texts(response) 158 | -------------------------------------------------------------------------------- /mona_openai/endpoints/endpoint_wrapping.py: -------------------------------------------------------------------------------- 1 | """ 2 | A module for general logic for wrapping OpenAI endpoints. 3 | """ 4 | import abc 5 | from collections.abc import Iterable, Mapping 6 | from copy import deepcopy 7 | 8 | from ..util.typing_util import SupportedOpenAIClassesType 9 | from ..util.validation_util import validate_openai_class 10 | 11 | # These are keys that might exist in the OpenAI request object that we don't 12 | # want to send to Mona. A sort of blacklist of keys. 13 | INPUT_KEYS_TO_POP = ("api_key", "organization") 14 | 15 | 16 | class OpenAIEndpointWrappingLogic(metaclass=abc.ABCMeta): 17 | """ 18 | An abstract class used for wrapping OpenAI endpoints. Each child of this 19 | class must implement several key logics for wrapping specific OpenAI 20 | endpoints' logic. 21 | 22 | Note that subclasses are not the actual wrappers of the OpenAI endpoint 23 | classes, but provide functions to create the actual wrapper classes. 24 | """ 25 | 26 | def __init__(self, specs): 27 | self._specs = specs 28 | self._analysis_functions = { 29 | "privacy": self._get_full_privacy_analysis, 30 | "textual": self._get_full_textual_analysis, 31 | "profanity": self._get_full_profainty_analysis, 32 | } 33 | 34 | def wrap_class(self, openai_class) -> SupportedOpenAIClassesType: 35 | """ 36 | Returns a monitored class wrapping the given openai class, enriching it 37 | with specific capabilities to be used by an inhereting monitored class. 38 | """ 39 | validate_openai_class(openai_class, self._get_endpoint_name()) 40 | 41 | class WrapperClass(openai_class): 42 | # TODO(itai): Have a smarter way to "import" all the methods to 43 | # this class instead of just copying them. 44 | @classmethod 45 | def _get_full_analysis( 46 | cls, input: Mapping, response: Mapping 47 | ) -> dict: 48 | return self.get_full_analysis(input, response) 49 | 50 | @classmethod 51 | def _get_clean_message(cls, message: Mapping) -> Mapping: 52 | return self.get_clean_message(message) 53 | 54 | @classmethod 55 | def _get_stream_delta_text_from_choice( 56 | cls, choice: Mapping 57 | ) -> str: 58 | return self.get_stream_delta_text_from_choice(choice) 59 | 60 | @classmethod 61 | def _get_final_choice(cls, text: str) -> dict: 62 | return self.get_final_choice(text) 63 | 64 | @classmethod 65 | def _get_all_prompt_texts(cls, request: Mapping) -> Iterable[str]: 66 | return self.get_all_prompt_texts(request) 67 | 68 | @classmethod 69 | def _get_all_response_texts( 70 | cls, response: Mapping 71 | ) -> Iterable[str]: 72 | return self.get_all_response_texts(response) 73 | 74 | return type( 75 | f"Monitored{self._get_endpoint_name()}", (WrapperClass,), {} 76 | ) 77 | 78 | @abc.abstractmethod 79 | def _get_endpoint_name(self) -> str: 80 | """ 81 | Returns the name of the OpenAI endpoint that is being wrapped. 82 | """ 83 | pass 84 | 85 | @abc.abstractmethod 86 | def _internal_get_clean_message(self, message: Mapping) -> Mapping: 87 | """ 88 | This method will be called in child classes for specific message 89 | cleaning logic. 90 | """ 91 | pass 92 | 93 | def get_clean_message(self, message: Mapping) -> Mapping: 94 | """ 95 | Given a mona message, returns a "clean" message in the sense that it 96 | will not hold any information that shouldn't be exported to Mona 97 | (e.g., actual prompts). 98 | """ 99 | new_message = deepcopy(message) 100 | for key in INPUT_KEYS_TO_POP: 101 | new_message["input"].pop(key, None) 102 | return self._internal_get_clean_message(new_message) 103 | 104 | def get_full_analysis(self, input: Mapping, response: Mapping) -> dict: 105 | """ 106 | Returns a dict mapping each analysis type to all related analysis 107 | fields for the given prompt and answers according to the given 108 | specs (if no "analysis" spec is given - return result for all 109 | analysis types). 110 | 111 | TODO(itai): Consider propogating the specs to allow the user to 112 | choose specific anlyses to be made from within each analysis 113 | category. 114 | """ 115 | return { 116 | x: self._analysis_functions[x](input, response) 117 | for x in self._analysis_functions 118 | if self._specs.get("analysis", {}).get(x, True) 119 | } 120 | 121 | @abc.abstractmethod 122 | def _get_full_privacy_analysis( 123 | self, input: Mapping, response: Mapping 124 | ) -> dict: 125 | """ 126 | Returns a dictionary with all calculated privacy analysis params. 127 | """ 128 | pass 129 | 130 | @abc.abstractmethod 131 | def _get_full_textual_analysis( 132 | self, input: Mapping, response: Mapping 133 | ) -> dict: 134 | """ 135 | Returns a dictionary with all calculated textual analysis params. 136 | """ 137 | pass 138 | 139 | @abc.abstractmethod 140 | def _get_full_profainty_analysis( 141 | self, input: Mapping, response: Mapping 142 | ) -> dict: 143 | """ 144 | Returns a dictionary with all calculated profanity analysis params. 145 | """ 146 | pass 147 | 148 | @abc.abstractmethod 149 | def get_stream_delta_text_from_choice(self, choice: Mapping) -> str: 150 | """ 151 | Given a stream response "choice", returns the text from that choice. 152 | """ 153 | pass 154 | 155 | @abc.abstractmethod 156 | def get_final_choice(self, text: str) -> dict: 157 | """ 158 | Returns a dictionary for a "choice" object as it would have been 159 | received from OpenAI's API that holds the given text as the content. 160 | """ 161 | pass 162 | 163 | @abc.abstractmethod 164 | def get_all_prompt_texts(self, request: Mapping) -> Iterable[str]: 165 | """ 166 | Given a request object, returns all the prompt texts from that 167 | request. 168 | """ 169 | pass 170 | 171 | @abc.abstractmethod 172 | def get_all_response_texts(self, response: Mapping) -> Iterable[str]: 173 | """ 174 | Given a response object, returns all the possible response texts. 175 | """ 176 | pass 177 | -------------------------------------------------------------------------------- /mona_openai/endpoints/wrapping_getter.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping 2 | 3 | from ..exceptions import WrongOpenAIClassException 4 | from .chat_completion import CHAT_COMPLETION_CLASS_NAME, ChatCompletionWrapping 5 | from .completion import COMPLETION_CLASS_NAME, CompletionWrapping 6 | 7 | # TODO(Itai): This is essetially a nice-looking "switch" statement. We should 8 | # try to use the name to find the exact monitoring-enrichment function and 9 | # filename instead of listing all options here. 10 | ENDPOINT_NAME_TO_WRAPPING = { 11 | COMPLETION_CLASS_NAME: CompletionWrapping, 12 | CHAT_COMPLETION_CLASS_NAME: ChatCompletionWrapping, 13 | } 14 | 15 | 16 | def get_endpoint_wrapping(endpoint_name: str, specs: Mapping): 17 | try: 18 | return ENDPOINT_NAME_TO_WRAPPING[endpoint_name](specs) 19 | except KeyError: 20 | raise WrongOpenAIClassException( 21 | f"Not a supported class name: {endpoint_name}" 22 | ) 23 | -------------------------------------------------------------------------------- /mona_openai/exceptions.py: -------------------------------------------------------------------------------- 1 | class WrongOpenAIClassException(Exception): 2 | pass 3 | 4 | 5 | class InvalidSamplingRatioException(Exception): 6 | pass 7 | 8 | 9 | class InvalidMonaCredsException(Exception): 10 | pass 11 | 12 | 13 | class InvalidLagnchainLLMException(Exception): 14 | pass 15 | -------------------------------------------------------------------------------- /mona_openai/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | from .file_logger import FileLogger 2 | from .in_memory_logging import InMemoryLogger 3 | from .logger import Logger 4 | from .mona_logger.mona_logger import MonaLogger 5 | from .standard_logging import StandardLogger 6 | -------------------------------------------------------------------------------- /mona_openai/loggers/file_logger.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import json 3 | from collections.abc import Mapping 4 | from typing import Optional 5 | 6 | from .logger import Logger 7 | 8 | 9 | class FileLogger(Logger): 10 | """ 11 | A simple logging class that saves monitored data in a file. 12 | """ 13 | 14 | def __init__(self, file_name): 15 | self.file = open(file_name, "w") 16 | 17 | atexit.register(self.close_file) 18 | 19 | def close_file(self) -> None: 20 | if not self.file.closed: 21 | self.file.close() 22 | 23 | def log( 24 | self, 25 | message: Mapping, 26 | context_id: Optional[str] = None, 27 | export_timestamp: Optional[float] = None, 28 | ) -> None: 29 | self.file.writelines( 30 | [ 31 | json.dumps( 32 | { 33 | "message": message, 34 | "context_id": context_id, 35 | "export_timestamp": export_timestamp, 36 | } 37 | ) 38 | ] 39 | ) 40 | 41 | async def alog( 42 | self, 43 | message: Mapping, 44 | context_id: Optional[str] = None, 45 | export_timestamp: Optional[float] = None, 46 | ) -> None: 47 | # TODO: Imlement actual asyncio usage. 48 | return self.log(message, context_id, export_timestamp) 49 | -------------------------------------------------------------------------------- /mona_openai/loggers/in_memory_logging.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | from collections.abc import Mapping 3 | from typing import Optional 4 | 5 | from .logger import Logger 6 | 7 | DEFAULT_MAX_LEN = 1000 8 | 9 | 10 | class InMemoryLogger(Logger): 11 | """ 12 | A simple logging class that saves monitored data in an in-memory list 13 | under self.latest_messages. 14 | """ 15 | 16 | def __init__(self, max_len=DEFAULT_MAX_LEN): 17 | self.latest_messages = deque(maxlen=max_len) 18 | 19 | def log( 20 | self, 21 | message: Mapping, 22 | context_id: Optional[str] = None, 23 | export_timestamp: Optional[float] = None, 24 | ) -> None: 25 | self.latest_messages.append( 26 | { 27 | "message": message, 28 | "context_id": context_id, 29 | "export_timestamp": export_timestamp, 30 | } 31 | ) 32 | 33 | async def alog( 34 | self, 35 | message: Mapping, 36 | context_id: Optional[str] = None, 37 | export_timestamp: Optional[float] = None, 38 | ) -> None: 39 | self.log(message, context_id, export_timestamp) 40 | -------------------------------------------------------------------------------- /mona_openai/loggers/logger.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from collections.abc import Mapping 3 | from typing import Optional 4 | 5 | 6 | class Logger(metaclass=abc.ABCMeta): 7 | """ 8 | An abstract class/interface for logging messages containing OpenAI calls 9 | analysis data. 10 | """ 11 | 12 | def start_monitoring(self, openai_class_name: str): 13 | """ 14 | This function will be called once this logger is used for wrapping 15 | an OpenAI class. 16 | Child classes may choose to use this hook in order to run some logic 17 | preliminary to actual logging. 18 | """ 19 | pass 20 | 21 | @abc.abstractmethod 22 | def log( 23 | self, 24 | message: Mapping, 25 | context_id: Optional[str] = None, 26 | export_timestamp: Optional[float] = None, 27 | ): 28 | """ 29 | Every child class must implement this basic function which gets a 30 | dictionary to be logged. 31 | 32 | The interface here allows the logger to get two other parameters: 33 | - context_id: used to trace different logs related to the same 34 | context, in logging mechanisms where such a capability is 35 | relevant. 36 | - export_timestamp: Used to simulate logging of historical data, 37 | allowing the caller to specify when the message was created, in 38 | logging mechanisms where such a capability is relevant. 39 | """ 40 | pass 41 | 42 | @abc.abstractmethod 43 | async def alog( 44 | self, 45 | message: Mapping, 46 | context_id: Optional[str] = None, 47 | export_timestamp: Optional[float] = None, 48 | ): 49 | """ 50 | Child classes should implement this async version of the "log" 51 | function. 52 | """ 53 | pass 54 | -------------------------------------------------------------------------------- /mona_openai/loggers/mona_logger/mona_client.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from mona_sdk.async_client import AsyncClient 4 | from mona_sdk.client import Client 5 | 6 | from ...exceptions import InvalidMonaCredsException 7 | 8 | MONA_API_KEY_KEY = "key" 9 | MONA_API_SECRET_KEY = "secret" 10 | 11 | MonaCredsType = Union[dict, list, tuple] 12 | 13 | 14 | def get_mona_clients(creds): 15 | """ 16 | Returns both a sync and an async mona client for the given 17 | credentials. 18 | 19 | creds: Either a tuple or a dict containing API key and secret for 20 | Mona's API. 21 | """ 22 | if len(creds) != 2: 23 | raise InvalidMonaCredsException( 24 | "There should be exactly two parts to Mona creds. API key and" 25 | " secret." 26 | ) 27 | 28 | # Creds could be in a dict. 29 | if isinstance(creds, dict): 30 | if MONA_API_KEY_KEY not in creds or MONA_API_SECRET_KEY not in creds: 31 | raise InvalidMonaCredsException( 32 | f"Mona creds dict should hold keys:" 33 | f"{MONA_API_KEY_KEY}, {MONA_API_SECRET_KEY}" 34 | ) 35 | return Client( 36 | creds[MONA_API_KEY_KEY], creds[MONA_API_SECRET_KEY] 37 | ), AsyncClient(creds[MONA_API_KEY_KEY], creds[MONA_API_SECRET_KEY]) 38 | 39 | # If creds are not in a dict, they are in a tuple (pair). 40 | if not isinstance(creds[0], str) or not isinstance(creds[1], str): 41 | raise InvalidMonaCredsException( 42 | "Mona API key and secret should both be strings." 43 | ) 44 | 45 | return Client(creds[0], creds[1]), AsyncClient(creds[0], creds[1]) 46 | -------------------------------------------------------------------------------- /mona_openai/loggers/mona_logger/mona_logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections.abc import Callable, Mapping 3 | from typing import Optional 4 | 5 | from mona_sdk import MonaSingleMessage 6 | 7 | from ..logger import Logger 8 | from .mona_client import MonaCredsType, get_mona_clients 9 | 10 | 11 | class MonaLogger(Logger): 12 | def __init__( 13 | self, 14 | mona_creds: MonaCredsType, 15 | context_class: str, 16 | mona_clients_getter: Callable = get_mona_clients, 17 | ): 18 | self.client, self.async_client = mona_clients_getter(mona_creds) 19 | self.context_class = context_class 20 | 21 | def start_monitoring(self, openai_class_name) -> dict: 22 | """ 23 | Calls Mona's server to init the given context class specifically for 24 | the given OpenAI class name. 25 | """ 26 | response = self.client.create_openai_context_class( 27 | self.context_class, openai_class_name 28 | ) 29 | error_message = response.get("error_message") 30 | if error_message: 31 | logging.warning( 32 | f"Problem initializing Mona context class" 33 | f" '{self.context_class}': {error_message}" 34 | ) 35 | else: 36 | logging.info( 37 | f"Made sure Mona context class '{self.context_class}' " 38 | "is initialised" 39 | ) 40 | return response 41 | 42 | def log( 43 | self, 44 | message: Mapping, 45 | context_id: Optional[str] = None, 46 | export_timestamp: Optional[float] = None, 47 | ) -> None: 48 | """ 49 | Logs the given message to Mona. 50 | """ 51 | return self.client.export( 52 | MonaSingleMessage( 53 | message=message, 54 | contextClass=self.context_class, 55 | contextId=context_id, 56 | exportTimestamp=export_timestamp, 57 | ) 58 | ) 59 | 60 | async def alog( 61 | self, 62 | message: Mapping, 63 | context_id: Optional[str] = None, 64 | export_timestamp: Optional[float] = None, 65 | ) -> None: 66 | """ 67 | Async logs the given message to Mona. 68 | """ 69 | return await self.async_client.export_async( 70 | MonaSingleMessage( 71 | message=message, 72 | contextClass=self.context_class, 73 | contextId=context_id, 74 | exportTimestamp=export_timestamp, 75 | ) 76 | ) 77 | -------------------------------------------------------------------------------- /mona_openai/loggers/standard_logging.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping 2 | from logging import INFO, getLogger 3 | from typing import Optional 4 | 5 | from .logger import Logger 6 | 7 | 8 | class StandardLogger(Logger): 9 | """ 10 | A simple logging class that logs monitored data using python's logging 11 | package. 12 | """ 13 | 14 | def __init__( 15 | self, 16 | logging_level=INFO, 17 | underlying_logger=None, 18 | ): 19 | self.underlying_logger = underlying_logger or getLogger("Mona") 20 | self.level = logging_level 21 | 22 | def log( 23 | self, 24 | message: Mapping, 25 | context_id: Optional[str] = None, 26 | export_timestamp: Optional[float] = None, 27 | ) -> None: 28 | self.underlying_logger.log( 29 | self.level, 30 | { 31 | "message": message, 32 | "context_id": context_id, 33 | "export_timestamp": export_timestamp, 34 | }, 35 | ) 36 | 37 | async def alog( 38 | self, 39 | message: Mapping, 40 | context_id: Optional[str] = None, 41 | export_timestamp: Optional[float] = None, 42 | ) -> None: 43 | return self.log(message, context_id, export_timestamp) 44 | 45 | def start_monitoring(self, openai_class_name: str) -> None: 46 | self.underlying_logger.log( 47 | self.level, f"Started monitoring for OpenAI's {openai_class_name}" 48 | ) 49 | -------------------------------------------------------------------------------- /mona_openai/mona_openai_client.py: -------------------------------------------------------------------------------- 1 | import openai 2 | from collections.abc import Mapping, Callable 3 | from .loggers.mona_logger.mona_client import MonaCredsType, get_mona_clients 4 | from .loggers.mona_logger.mona_logger import MonaLogger 5 | from .util.general_consts import EMPTY_DICT 6 | from .util.validation_util import validate_and_get_sampling_ratio 7 | from .endpoints.chat_completion import CHAT_COMPLETION_CLASS_NAME 8 | from .mona_openai_create import create_logic 9 | from .util.async_util import ( 10 | run_in_an_event_loop, 11 | ) 12 | from .mona_openai_logging import get_logging_message_for_create 13 | from .endpoints.wrapping_getter import get_endpoint_wrapping 14 | 15 | 16 | def monitor_client_with_logger(openai_client, logger, specs=EMPTY_DICT): 17 | sampling_ratio = validate_and_get_sampling_ratio(specs) 18 | 19 | # TODO(itai): We currently support only chat completion and use the legacy 20 | # class name. This should be changed and the library refactored to 21 | # support the different opneAI endpoints. 22 | logger.start_monitoring(CHAT_COMPLETION_CLASS_NAME) 23 | 24 | original_create = openai_client.chat.completions.create 25 | 26 | wrapping_logic = get_endpoint_wrapping(CHAT_COMPLETION_CLASS_NAME, specs) 27 | 28 | def _get_logging_message( 29 | kwargs_param: Mapping, 30 | start_time: float, 31 | is_exception: bool, 32 | is_async: bool, 33 | stream_start_time: float, 34 | response: Mapping, 35 | ) -> dict: 36 | """ 37 | Returns a dict to be used for data logging. 38 | """ 39 | return get_logging_message_for_create( 40 | CHAT_COMPLETION_CLASS_NAME, 41 | wrapping_logic.get_full_analysis, 42 | wrapping_logic.get_clean_message, 43 | kwargs_param, 44 | start_time, 45 | is_exception, 46 | is_async, 47 | stream_start_time, 48 | response, 49 | ) 50 | 51 | def wrapped_create(*args, **kwargs): 52 | return run_in_an_event_loop( 53 | create_logic( 54 | False, 55 | logger.log, 56 | original_create, 57 | _get_logging_message, 58 | sampling_ratio, 59 | wrapping_logic.get_all_prompt_texts, 60 | wrapping_logic.get_all_response_texts, 61 | wrapping_logic.get_stream_delta_text_from_choice, 62 | wrapping_logic.get_final_choice, 63 | specs, 64 | args, 65 | kwargs, 66 | ) 67 | ) 68 | 69 | async def async_wrapped_create(*args, **kwargs): 70 | return await create_logic( 71 | True, 72 | logger.alog, 73 | original_create, 74 | _get_logging_message, 75 | sampling_ratio, 76 | wrapping_logic.get_all_prompt_texts, 77 | wrapping_logic.get_all_response_texts, 78 | wrapping_logic.get_stream_delta_text_from_choice, 79 | wrapping_logic.get_final_choice, 80 | specs, 81 | args, 82 | kwargs, 83 | ) 84 | 85 | openai_client.chat.completions.create = ( 86 | async_wrapped_create 87 | if isinstance(openai_client, openai.AsyncOpenAI) 88 | else wrapped_create 89 | ) 90 | return openai_client 91 | 92 | 93 | def monitor_client( 94 | openai_client, 95 | mona_creds: MonaCredsType, 96 | context_class: str, 97 | specs: Mapping = EMPTY_DICT, 98 | mona_clients_getter: Callable = get_mona_clients, 99 | ): 100 | """ 101 | A simple wrapper around "monitor_with_logger" to use with a Mona logger. 102 | See "monitor_with_logger" for full documentation. 103 | 104 | Args: 105 | openai_class: An OpenAI API class to wrap with monitoring 106 | capabilties. 107 | mona_creds: Either a dict or pair of Mona API key and secret to 108 | set up Mona's clients from its SDK 109 | context_class: The Mona context class name to use for 110 | monitoring. Use a name of your choice. 111 | specs: A dictionary of specifications such as monitoring 112 | sampling ratio. 113 | mona_clients_getter: Used only for testing purposes. 114 | """ 115 | return monitor_client_with_logger( 116 | openai_client, 117 | MonaLogger(mona_creds, context_class, mona_clients_getter), 118 | specs, 119 | ) 120 | -------------------------------------------------------------------------------- /mona_openai/mona_openai_create.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections.abc import Callable 3 | from .util.async_util import call_non_blocking_sync_or_async 4 | from .util.func_util import add_conditional_sampling 5 | from .util.openai_util import get_model_param 6 | from .util.stream_util import ResponseGatheringIterator 7 | from .util.tokens_util import get_usage 8 | from .util.object_util import get_subscriptable_obj 9 | 10 | MONA_ARGS_PREFIX = "MONA_" 11 | CONTEXT_ID_ARG_NAME = MONA_ARGS_PREFIX + "context_id" 12 | EXPORT_TIMESTAMP_ARG_NAME = MONA_ARGS_PREFIX + "export_timestamp" 13 | ADDITIONAL_DATA_ARG_NAME = MONA_ARGS_PREFIX + "additional_data" 14 | 15 | 16 | async def create_logic( 17 | is_async, 18 | export_function: Callable, 19 | super_function: Callable, 20 | logging_message_getter, 21 | sampling_ratio, 22 | all_prompt_texts_getter, 23 | all_response_texts_getter, 24 | stream_delta_text_from_choice_getter, 25 | final_choice_getter, 26 | specs, 27 | args, 28 | kwargs, 29 | ): 30 | is_stream = kwargs.get("stream", False) 31 | 32 | response = None 33 | 34 | # will be used only when stream is enabled 35 | stream_start_time = None 36 | 37 | async def _inner_log_message(is_exception): 38 | subscriptable_response = get_subscriptable_obj(response) 39 | 40 | return await call_non_blocking_sync_or_async( 41 | export_function, 42 | ( 43 | logging_message_getter( 44 | kwargs, 45 | start_time, 46 | is_exception, 47 | is_async, 48 | stream_start_time, 49 | subscriptable_response, 50 | ), 51 | kwargs.get( 52 | CONTEXT_ID_ARG_NAME, 53 | subscriptable_response.get("id", None) 54 | if subscriptable_response 55 | else None, 56 | ), 57 | kwargs.get(EXPORT_TIMESTAMP_ARG_NAME, start_time), 58 | ), 59 | ) 60 | 61 | log_message = add_conditional_sampling(_inner_log_message, sampling_ratio) 62 | 63 | start_time = time.time() 64 | 65 | async def inner_super_function(): 66 | # Call the actual openai create function without the Mona 67 | # specific arguments. 68 | return await call_non_blocking_sync_or_async( 69 | super_function, 70 | args, 71 | { 72 | x: kwargs[x] 73 | for x in kwargs 74 | if not x.startswith(MONA_ARGS_PREFIX) 75 | }, 76 | ) 77 | 78 | async def inner_handle_exception(): 79 | if not specs.get("avoid_monitoring_exceptions", False): 80 | await log_message(True) 81 | 82 | if not is_stream: 83 | try: 84 | response = await inner_super_function() 85 | except Exception: 86 | await inner_handle_exception() 87 | raise 88 | 89 | await log_message(False) 90 | 91 | return response 92 | 93 | async def _stream_done_callback(final_response, actual_stream_start_time): 94 | nonlocal response 95 | nonlocal stream_start_time 96 | # There is no usage data in returned stream responses, so 97 | # we add it here. 98 | response = final_response | { 99 | "usage": get_usage( 100 | model=get_model_param(kwargs), 101 | prompt_texts=all_prompt_texts_getter(kwargs), 102 | response_texts=all_response_texts_getter(final_response), 103 | ) 104 | } 105 | stream_start_time = actual_stream_start_time 106 | await log_message(False) 107 | 108 | try: 109 | # Call the actual openai create function without the Mona 110 | # specific arguments. 111 | return ResponseGatheringIterator( 112 | original_iterator=await inner_super_function(), 113 | delta_choice_text_getter=(stream_delta_text_from_choice_getter), 114 | final_choice_getter=final_choice_getter, 115 | callback=_stream_done_callback, 116 | ) 117 | 118 | except Exception: 119 | await inner_handle_exception() 120 | raise 121 | -------------------------------------------------------------------------------- /mona_openai/mona_openai_legacy.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections.abc import Callable, Mapping 3 | from typing import Optional 4 | 5 | from .endpoints.wrapping_getter import get_endpoint_wrapping 6 | from .exceptions import InvalidLagnchainLLMException 7 | from .loggers.logger import Logger 8 | from .loggers.mona_logger.mona_client import MonaCredsType, get_mona_clients 9 | from .loggers.mona_logger.mona_logger import MonaLogger 10 | from .util.async_util import ( 11 | run_in_an_event_loop, 12 | ) 13 | from .util.func_util import add_conditional_sampling 14 | from .util.general_consts import EMPTY_DICT 15 | from .util.typing_util import SupportedOpenAIClassesType 16 | from .util.validation_util import validate_and_get_sampling_ratio 17 | from .mona_openai_create import create_logic 18 | from .mona_openai_logging import ( 19 | get_logging_message, 20 | get_logging_message_for_create, 21 | ) 22 | 23 | MONA_ARGS_PREFIX = "MONA_" 24 | CONTEXT_ID_ARG_NAME = MONA_ARGS_PREFIX + "context_id" 25 | EXPORT_TIMESTAMP_ARG_NAME = MONA_ARGS_PREFIX + "export_timestamp" 26 | ADDITIONAL_DATA_ARG_NAME = MONA_ARGS_PREFIX + "additional_data" 27 | 28 | 29 | # TODO(itai): Consider creating some sturct (as NamedTuple or dataclass) for 30 | # the specs param. 31 | 32 | 33 | def monitor( 34 | openai_class: SupportedOpenAIClassesType, 35 | mona_creds: MonaCredsType, 36 | context_class: str, 37 | specs: Mapping = EMPTY_DICT, 38 | mona_clients_getter: Callable = get_mona_clients, 39 | ) -> SupportedOpenAIClassesType: 40 | """ 41 | A simple wrapper around "monitor_with_logger" to use with a Mona logger. 42 | See "monitor_with_logger" for full documentation. 43 | 44 | Args: 45 | openai_class: An OpenAI API class to wrap with monitoring 46 | capabilties. 47 | mona_creds: Either a dict or pair of Mona API key and secret to 48 | set up Mona's clients from its SDK 49 | context_class: The Mona context class name to use for 50 | monitoring. Use a name of your choice. 51 | specs: A dictionary of specifications such as monitoring 52 | sampling ratio. 53 | mona_clients_getter: Used only for testing purposes. 54 | """ 55 | return monitor_with_logger( 56 | openai_class, 57 | MonaLogger(mona_creds, context_class, mona_clients_getter), 58 | specs, 59 | ) 60 | 61 | 62 | def monitor_with_logger(openai_class, logger, specs=EMPTY_DICT): 63 | """ 64 | Returns a Wrapped version of a given OpenAI class with monitoring logic. 65 | 66 | You can use the returned class' "create" and "acreate" functions 67 | exactly as you would the original class, and monitoring will be 68 | taken care of for you. 69 | 70 | This client will automatically monitor for you things like latency, 71 | prompt and response lengths, number of tokens, etc., along with any 72 | endpoint parameter usage (e.g., it tracks the "temperature" and 73 | "max_tokens" params you use). 74 | 75 | The logic for what to do with the calculated analysis data is set by the 76 | given logger object. 77 | 78 | You can also add other named args when calling "create" or 79 | "acreate" by using a new named argument called 80 | "MONA_additional_data" and set it to any JSON serializable 81 | dictionary. 82 | This allows you to add metadata about the call such as a prompt 83 | template ID, information about the context in which the API call is 84 | made, etc... 85 | 86 | Furthermore, you can add to create/acreate functions mona specific 87 | arguments: 88 | MONA_context_id: The unique id of the context in which the call 89 | is made. By using this ID you can export more data to Mona 90 | to the same context from other places. If not used, the 91 | "id" field of the OpenAI Endpoint's response will be used. 92 | MONA_export_timestamp: Can be used to simulate as if the 93 | current call was made in a different time, as far as Mona 94 | is concerned. 95 | 96 | Args: 97 | openai_class: An OpenAI API class to wrap with monitoring 98 | capabilties. 99 | logger: A logger object used to log out the calculated analysis. 100 | specs: A dictionary of specifications such as monitoring 101 | sampling ratio. 102 | """ 103 | 104 | sampling_ratio = validate_and_get_sampling_ratio(specs) 105 | 106 | base_class = get_endpoint_wrapping( 107 | openai_class.__name__, specs 108 | ).wrap_class(openai_class) 109 | 110 | logger.start_monitoring(openai_class.__name__) 111 | 112 | class MonitoredOpenAI(base_class): 113 | """ 114 | A monitored version of an openai API class. 115 | """ 116 | 117 | @classmethod 118 | def _get_logging_message( 119 | cls, 120 | kwargs_param: Mapping, 121 | start_time: float, 122 | is_exception: bool, 123 | is_async: bool, 124 | stream_start_time: float, 125 | response: Mapping, 126 | ) -> dict: 127 | """ 128 | Returns a dict to be used for data logging. 129 | """ 130 | return get_logging_message_for_create( 131 | openai_class.__name__, 132 | super()._get_full_analysis, 133 | super()._get_clean_message, 134 | kwargs_param, 135 | start_time, 136 | is_exception, 137 | is_async, 138 | stream_start_time, 139 | response, 140 | ) 141 | 142 | @classmethod 143 | async def _inner_create( 144 | cls, 145 | export_function: Callable, 146 | super_function: Callable, 147 | args, 148 | kwargs, 149 | ): 150 | """ 151 | The main logic for wrapping create functions with monitoring data 152 | logging. 153 | This internal function porovides a template for both sync 154 | and async activations (helps with wrapping both "create" 155 | and "acreate"). 156 | """ 157 | return await create_logic( 158 | super_function.__name__ == "acreate", 159 | export_function, 160 | super_function, 161 | cls._get_logging_message, 162 | sampling_ratio, 163 | base_class._get_all_prompt_texts, 164 | base_class._get_all_response_texts, 165 | base_class._get_stream_delta_text_from_choice, 166 | base_class._get_final_choice, 167 | specs, 168 | args, 169 | kwargs, 170 | ) 171 | 172 | @classmethod 173 | def create(cls, *args, **kwargs) -> dict: 174 | """ 175 | A monitored version of the openai base class' "create" 176 | function. 177 | """ 178 | return run_in_an_event_loop( 179 | cls._inner_create(logger.log, super().create, args, kwargs) 180 | ) 181 | 182 | @classmethod 183 | async def acreate(cls, *args, **kwargs) -> dict: 184 | """ 185 | An async monitored version of the openai base class' 186 | "acreate" function. 187 | """ 188 | return await cls._inner_create( 189 | logger.alog, super().acreate, args, kwargs 190 | ) 191 | 192 | return type(base_class.__name__, (MonitoredOpenAI,), {}) 193 | 194 | 195 | def get_rest_monitor( 196 | openai_endpoint_name: str, 197 | mona_creds: MonaCredsType, 198 | context_class: str, 199 | specs: Mapping = EMPTY_DICT, 200 | mona_clients_getter: Callable = get_mona_clients, 201 | ) -> type: 202 | """ 203 | A wrapper around get_rest_monitor_with_logger that automatically uses 204 | a Mona logger. 205 | """ 206 | return get_rest_monitor_with_logger( 207 | openai_endpoint_name, 208 | MonaLogger(mona_creds, context_class, mona_clients_getter), 209 | specs, 210 | ) 211 | 212 | 213 | def get_rest_monitor_with_logger( 214 | # TODO(itai): Consider understanding endpoint name from complete url. 215 | openai_endpoint_name: str, 216 | logger: Logger, 217 | specs: Mapping = EMPTY_DICT, 218 | ) -> type: 219 | """ 220 | Returns a client class for monitoring OpenAI REST calls not done 221 | using the OpenAI python client (e.g., for Azure users using their 222 | endpoints directly). This isn't a wrapper for any http requesting 223 | library and doesn't call the OpenAI API for you - it's just an easy 224 | logging client to log requests, responses and exceptions. 225 | """ 226 | 227 | logger.start_monitoring(openai_endpoint_name) 228 | 229 | sampling_ratio = validate_and_get_sampling_ratio(specs) 230 | 231 | wrapping_logic = get_endpoint_wrapping(openai_endpoint_name, specs) 232 | 233 | class RestClient: 234 | """ 235 | This will be the returned monitoring class. We follow 236 | OpenAI's way of doing things by using a static classe with 237 | relevant class methods. 238 | """ 239 | 240 | @classmethod 241 | def _inner_log_request( 242 | cls, 243 | message_logging_function: Callable, 244 | request_dict: Mapping, 245 | additional_data: Mapping = EMPTY_DICT, 246 | context_id: Optional[str] = None, 247 | export_timestamp: Optional[float] = None, 248 | ) -> tuple[Callable, Callable]: 249 | """ 250 | Actual logic for logging requests, responses and exceptions. 251 | """ 252 | start_time = time.time() 253 | 254 | if additional_data is None: 255 | additional_data = EMPTY_DICT 256 | 257 | def _inner_log_message( 258 | is_exception: bool, 259 | more_additional_data: Mapping, 260 | response: Optional[Mapping] = None, 261 | ): 262 | return message_logging_function( 263 | get_logging_message( 264 | api_name=openai_endpoint_name, 265 | request_input=request_dict, 266 | start_time=start_time, 267 | is_exception=is_exception, 268 | is_async=False, 269 | # TODO(itai): Support stream in REST as well. 270 | stream_start_time=None, 271 | response=response, 272 | analysis_getter=wrapping_logic.get_full_analysis, 273 | message_cleaner=wrapping_logic.get_clean_message, 274 | additional_data={ 275 | **additional_data, 276 | **more_additional_data, 277 | }, 278 | ), 279 | context_id, 280 | export_timestamp, 281 | ) 282 | 283 | log_message = add_conditional_sampling( 284 | _inner_log_message, sampling_ratio 285 | ) 286 | 287 | def log_response( 288 | response: Mapping, additional_data: Mapping = EMPTY_DICT 289 | ): 290 | """ 291 | Only when this function is called, will data be logged 292 | out. This function should be called with a 293 | response object from the OpenAI API as close as 294 | possible to when it is received to allow accurate 295 | latency logging. 296 | """ 297 | return log_message( 298 | False, 299 | more_additional_data=additional_data, 300 | response=response, 301 | ) 302 | 303 | def log_exception(additional_data: Mapping = EMPTY_DICT): 304 | return log_message(True, more_additional_data=additional_data) 305 | 306 | return log_response, log_exception 307 | 308 | @classmethod 309 | def log_request( 310 | cls, 311 | request_dict: Mapping, 312 | additional_data: Mapping = EMPTY_DICT, 313 | context_id: Optional[str] = None, 314 | export_timestamp: Optional[float] = None, 315 | ): 316 | """ 317 | Sets up logging for OpenAI request/response objects. 318 | 319 | This function should be called with a request data dict, 320 | for example, what you would use as "json" when using 321 | "requests" to post. 322 | 323 | It returns a response logging function to be used with the 324 | response object, as well as an exception logging function in case 325 | of exceptions. 326 | 327 | Note that this call does not log anything until one of the 328 | returned callbacks is called. 329 | """ 330 | return cls._inner_log_request( 331 | logger.log, 332 | request_dict, 333 | additional_data, 334 | context_id, 335 | export_timestamp, 336 | ) 337 | 338 | @classmethod 339 | def async_log_request( 340 | cls, 341 | request_dict: Mapping, 342 | additional_data: Mapping = EMPTY_DICT, 343 | context_id: Optional[str] = None, 344 | export_timestamp: Optional[float] = None, 345 | ): 346 | """ 347 | Async version of "log_request". See function's docstring for more 348 | details. 349 | """ 350 | return cls._inner_log_request( 351 | logger.alog, 352 | request_dict, 353 | additional_data, 354 | context_id, 355 | export_timestamp, 356 | ) 357 | 358 | return RestClient 359 | 360 | 361 | def _validate_langchain_llm(llm) -> None: 362 | if not hasattr(llm, "client"): 363 | raise InvalidLagnchainLLMException( 364 | "LLM has no client attribute - must be an OpenAI LLM" 365 | ) 366 | 367 | 368 | def monitor_langchain_llm( 369 | llm, 370 | mona_creds: MonaCredsType, 371 | context_class: str, 372 | specs: Mapping = EMPTY_DICT, 373 | mona_clients_getter: Callable = get_mona_clients, 374 | ): 375 | """ 376 | Wraps given llm with automatic mona-monitoring logic. 377 | """ 378 | _validate_langchain_llm(llm) 379 | llm.client = monitor( 380 | llm.client, mona_creds, context_class, specs, mona_clients_getter 381 | ) 382 | return llm 383 | 384 | 385 | def monitor_langchain_llm_with_logger( 386 | llm, logger: Logger, specs: Mapping = EMPTY_DICT 387 | ): 388 | """ 389 | Wraps given llm with monitoring logic, logging the analysis with the given 390 | logger. 391 | """ 392 | _validate_langchain_llm(llm) 393 | llm.client = monitor_with_logger(llm.client, logger, specs) 394 | return llm 395 | -------------------------------------------------------------------------------- /mona_openai/mona_openai_logging.py: -------------------------------------------------------------------------------- 1 | import time 2 | from copy import deepcopy 3 | from collections.abc import Callable, Mapping 4 | from typing import Optional 5 | from .util.general_consts import EMPTY_DICT 6 | 7 | MONA_ARGS_PREFIX = "MONA_" 8 | ADDITIONAL_DATA_ARG_NAME = MONA_ARGS_PREFIX + "additional_data" 9 | 10 | 11 | def get_logging_message( 12 | api_name: str, 13 | request_input: Mapping, 14 | start_time: float, 15 | is_exception: bool, 16 | is_async: bool, 17 | stream_start_time: Optional[float], 18 | response: Optional[Mapping], 19 | analysis_getter: Callable[[Mapping, Mapping], dict], 20 | message_cleaner: Callable[[Mapping], dict], 21 | additional_data: Mapping, 22 | ) -> dict: 23 | """ 24 | Returns a dict object containing all the monitoring analysis to be used 25 | for data logging. 26 | """ 27 | 28 | message = { 29 | "input": request_input, 30 | "latency": time.time() - start_time, 31 | "stream_start_latency": stream_start_time - start_time 32 | if stream_start_time is not None 33 | else None, 34 | "is_exception": is_exception, 35 | "api_name": api_name, 36 | "is_async": is_async, 37 | } 38 | 39 | if additional_data: 40 | message["additional_data"] = additional_data 41 | 42 | if response: 43 | message["response"] = response 44 | message["analysis"] = analysis_getter(request_input, response) 45 | 46 | return message_cleaner(message) 47 | 48 | 49 | def get_logging_message_for_create( 50 | api_name, 51 | analysis_getter, 52 | message_cleaner, 53 | kwargs_param: Mapping, 54 | start_time: float, 55 | is_exception: bool, 56 | is_async: bool, 57 | stream_start_time: float, 58 | response: Mapping, 59 | ) -> dict: 60 | """ 61 | Returns a dict to be used for data logging. 62 | """ 63 | # Recreate the input dict to avoid manipulating the caller's data, 64 | # and remove Mona-related data. 65 | request_input = deepcopy( 66 | { 67 | x: kwargs_param[x] 68 | for x in kwargs_param 69 | if not x.startswith(MONA_ARGS_PREFIX) 70 | } 71 | ) 72 | 73 | additional_data: Mapping = kwargs_param.get( 74 | ADDITIONAL_DATA_ARG_NAME, EMPTY_DICT 75 | ) 76 | 77 | return get_logging_message( 78 | api_name=api_name, 79 | request_input=request_input, 80 | start_time=start_time, 81 | is_exception=is_exception, 82 | is_async=is_async, 83 | stream_start_time=stream_start_time, 84 | response=response, 85 | analysis_getter=analysis_getter, 86 | message_cleaner=message_cleaner, 87 | additional_data=additional_data, 88 | ) 89 | -------------------------------------------------------------------------------- /mona_openai/util/async_util.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from collections.abc import Callable, Coroutine, Mapping 3 | from inspect import iscoroutinefunction, ismethod 4 | 5 | import nest_asyncio 6 | 7 | from .general_consts import EMPTY_DICT 8 | 9 | 10 | def run_in_an_event_loop(coroutine: Coroutine): 11 | """ 12 | A light wrapper around asyncio.run to avoid crushing when trying to run a 13 | coroutine in an environment where an event loop is already in place and 14 | asyncio.run doesn't work. 15 | """ 16 | try: 17 | return asyncio.run(coroutine) 18 | except RuntimeError: 19 | try: 20 | return asyncio.get_event_loop().run_until_complete(coroutine) 21 | except RuntimeError: 22 | # This happens in environments that already have an event loop 23 | # that is "run forever". We therefor must allow a "nested" event 24 | # loop that we can run within the main loop. 25 | nest_asyncio.apply() 26 | return asyncio.run(coroutine) 27 | 28 | 29 | def _is_async_function_or_method(obj): 30 | # Check if it's a coroutine function 31 | if iscoroutinefunction(obj): 32 | return True 33 | 34 | # If it's a bound method, check the function of the method 35 | if ismethod(obj): 36 | return asyncio.iscoroutinefunction(obj.__func__) 37 | 38 | return False 39 | 40 | 41 | async def call_non_blocking_sync_or_async( 42 | function: Callable, func_args=(), func_kwargs: Mapping = EMPTY_DICT 43 | ): 44 | """ 45 | A higher order function that allows calling both sync and async 46 | functions as if they were async, avoid blocking when relevant, and 47 | maintain one code base for both cases. 48 | """ 49 | try: 50 | return await function(*func_args, **func_kwargs) 51 | except TypeError: 52 | return function(*func_args, **func_kwargs) 53 | -------------------------------------------------------------------------------- /mona_openai/util/func_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | A module with utility (usually higher order) functions for enriching 3 | and extending other functions' functionalities. 4 | """ 5 | from collections.abc import Callable 6 | from random import random 7 | 8 | 9 | def add_conditional_sampling( 10 | inner_func: Callable, sampling_ratio: float 11 | ) -> Callable: 12 | """ 13 | A higher order function that returns a "sampled" version of the 14 | given inner function only if needed. This allows for adding 15 | sampling mechanisms while avoiding conditionals or random number 16 | creations when either is not necessary. 17 | """ 18 | 19 | async def _sampled_func(*args, **kwargs): 20 | if random() < sampling_ratio: 21 | return await inner_func(*args, **kwargs) 22 | 23 | return inner_func if sampling_ratio == 1 else _sampled_func 24 | -------------------------------------------------------------------------------- /mona_openai/util/general_consts.py: -------------------------------------------------------------------------------- 1 | from types import MappingProxyType 2 | 3 | EMPTY_DICT: MappingProxyType = MappingProxyType({}) 4 | -------------------------------------------------------------------------------- /mona_openai/util/object_util.py: -------------------------------------------------------------------------------- 1 | def _object_to_dict(obj): 2 | """ 3 | Recursively converts an object's attributes to a dictionary. 4 | """ 5 | if isinstance(obj, (int, float, str, bool, type(None))): 6 | # Base case for simple types 7 | return obj 8 | 9 | if isinstance(obj, (list, tuple)): 10 | # Process each item in list or tuple recursively 11 | return [_object_to_dict(item) for item in obj] 12 | 13 | if hasattr(obj, "__dict__"): 14 | # Process each attribute of the object recursively 15 | return { 16 | attr: _object_to_dict(getattr(obj, attr)) 17 | for attr in dir(obj) 18 | if not attr.startswith("__") and not callable(getattr(obj, attr)) 19 | } 20 | 21 | return str(obj) # Fallback for unhandled types 22 | 23 | 24 | def get_subscriptable_obj(obj): 25 | # Check if the object is already subscriptable (like dict, list, tuple) 26 | if isinstance(obj, (dict, list, tuple)): 27 | return obj 28 | else: 29 | return _object_to_dict(obj) 30 | -------------------------------------------------------------------------------- /mona_openai/util/openai_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Util logic for OpenAI related logic and language (e.g., dealing with API 3 | parameter names). 4 | """ 5 | 6 | 7 | def get_model_param(request: dict) -> str: 8 | """ 9 | Returns the "model" param in the request, the "engine" param if no 10 | "model" param is used, and None if neither exists (which isn't expected 11 | to happen) 12 | """ 13 | return request.get("model", request.get("engine")) 14 | -------------------------------------------------------------------------------- /mona_openai/util/stream_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | A util module for everything related to supporting streams. 3 | """ 4 | import inspect 5 | import time 6 | from collections.abc import Callable, Iterator 7 | from typing import Optional 8 | 9 | from .async_util import run_in_an_event_loop 10 | from .object_util import get_subscriptable_obj 11 | 12 | 13 | class ResponseGatheringIterator: 14 | """ 15 | A generator class that takes an original OpenAI stream response generator 16 | and wraps it with functionality to gather all the stream of responses as 17 | they come, and create from them a singular reponse object as would have 18 | been received in non-stream OpenAI usage. 19 | 20 | Once the original generator is done it creates the full response and calls 21 | a callback with it. 22 | 23 | It acts both as sync and async generator to ease the use of sync/async 24 | joint code. 25 | """ 26 | 27 | def __init__( 28 | self, 29 | delta_choice_text_getter: Callable, 30 | final_choice_getter: Callable, 31 | original_iterator: Iterator, 32 | callback: Callable, 33 | ) -> None: 34 | self._original_iterator = original_iterator 35 | self._delta_choice_text_getter = delta_choice_text_getter 36 | self._final_choice_getter = final_choice_getter 37 | self._callback = callback 38 | self._initial_event_recieved_time: Optional[float] = None 39 | self._common_response_information: dict = {} 40 | self._choices: dict = {} 41 | 42 | def __iter__(self): 43 | return self 44 | 45 | def __aiter__(self): 46 | return self 47 | 48 | def __next__(self): 49 | try: 50 | return self._add_response(self._original_iterator.__next__()) 51 | except StopIteration: 52 | self._call_callback() 53 | raise 54 | 55 | async def __anext__(self): 56 | try: 57 | return self._add_response( 58 | await self._original_iterator.__anext__() 59 | ) 60 | except StopAsyncIteration: 61 | await self._a_call_callback() 62 | raise 63 | 64 | def _add_response(self, event: dict) -> dict: 65 | """ 66 | The main and only exposed function of the ResponseGatherer class. Use 67 | this function to collect stream events. 68 | """ 69 | subscriptable_event = get_subscriptable_obj(event) 70 | if self._initial_event_recieved_time is None: 71 | self._initial_event_recieved_time = time.time() 72 | self._common_response_information = { 73 | x: subscriptable_event[x] 74 | for x in subscriptable_event 75 | if x != "choices" 76 | } 77 | 78 | # Gather response events by choice index. 79 | self._handle_choice(self._get_only_choice(subscriptable_event)) 80 | 81 | return event 82 | 83 | def _call_callback(self): 84 | # We allow an async function as the callback event if this class is 85 | # used as a sync generator. This code handles this scenario. 86 | callback_args = ( 87 | self._create_singular_response(), 88 | self._initial_event_recieved_time, 89 | ) 90 | if inspect.iscoroutinefunction(self._callback): 91 | run_in_an_event_loop(self._callback(*callback_args)) 92 | return 93 | 94 | self._callback(*callback_args) 95 | 96 | async def _a_call_callback(self): 97 | await self._callback( 98 | self._create_singular_response(), 99 | self._initial_event_recieved_time, 100 | ) 101 | 102 | def _handle_choice(self, choice: dict) -> None: 103 | index = choice["index"] 104 | self._choices[index] = self._choices.get(index, []) + [choice] 105 | 106 | def _get_only_choice(self, event: dict) -> dict: 107 | # Stream response events have only a single choice that specifies 108 | # its own index. 109 | return event["choices"][0] 110 | 111 | def _create_singular_response(self) -> dict: 112 | choices = [ 113 | self._get_full_choice(choice) for choice in self._choices.values() 114 | ] 115 | return {**self._common_response_information, "choices": choices} 116 | 117 | def _get_full_choice(self, choice: dict) -> dict: 118 | all_tokens = list( 119 | self._delta_choice_text_getter(choice_event) 120 | for choice_event in choice 121 | ) 122 | 123 | full_text = "".join(token for token in all_tokens if token is not None) 124 | 125 | return { 126 | **self._final_choice_getter(full_text), 127 | "index": choice[0]["index"], 128 | "finish_reason": choice[-1]["finish_reason"], 129 | } 130 | -------------------------------------------------------------------------------- /mona_openai/util/tokens_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | A utility module for everything realted to encoding tokens. 3 | """ 4 | from collections.abc import Iterable 5 | 6 | import tiktoken 7 | 8 | 9 | def _get_number_of_tokens(text: str, enc: tiktoken.Encoding) -> int: 10 | return len(enc.encode(text)) 11 | 12 | 13 | def _get_encoding(model: str) -> tiktoken.Encoding: 14 | return tiktoken.encoding_for_model(model) 15 | 16 | 17 | def get_usage( 18 | model: str, prompt_texts: Iterable[str], response_texts: Iterable[str] 19 | ) -> dict: 20 | """ 21 | Returns a usage dict containing the number of tokens in the prompt, in the 22 | response, and totally. 23 | """ 24 | enc = _get_encoding(model) 25 | 26 | def get_tokens_sum(texts): 27 | return sum(_get_number_of_tokens(text, enc) for text in texts) 28 | 29 | usage = { 30 | "prompt_tokens": get_tokens_sum(prompt_texts), 31 | "completion_tokens": get_tokens_sum(response_texts), 32 | } 33 | usage["total_tokens"] = usage["prompt_tokens"] + usage["completion_tokens"] 34 | 35 | return usage 36 | -------------------------------------------------------------------------------- /mona_openai/util/typing_util.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, Type, Union 2 | 3 | if TYPE_CHECKING: 4 | import openai 5 | 6 | CompletionType = Type["openai.api_resources.completion.Completion"] 7 | ChatCompletionType = Type[ 8 | "openai.api_resources.chat_completion.ChatCompletion" 9 | ] 10 | SupportedOpenAIClassesType = Union[CompletionType, ChatCompletionType] 11 | -------------------------------------------------------------------------------- /mona_openai/util/validation_util.py: -------------------------------------------------------------------------------- 1 | from ..exceptions import ( 2 | InvalidSamplingRatioException, 3 | WrongOpenAIClassException, 4 | ) 5 | 6 | 7 | def validate_openai_class(openai_class: type, required_name: str) -> None: 8 | """ 9 | Validates that the given OpenAI API class' name is the given 10 | required name. 11 | """ 12 | class_name = openai_class.__name__ 13 | if class_name != required_name: 14 | raise WrongOpenAIClassException( 15 | f"Name is {class_name} and must be {required_name}" 16 | ) 17 | 18 | 19 | def validate_and_get_sampling_ratio(specs: dict) -> float: 20 | """ 21 | Validates that the sampling ratio in a given specs dict is a valid 22 | number (between 0 and 1). Returns a default value of 1 if no 23 | sampling ratio is mentioned. 24 | """ 25 | sampling_ratio = specs.get("sampling_ratio", 1) 26 | if sampling_ratio < 0 or sampling_ratio > 1: 27 | raise InvalidSamplingRatioException( 28 | f"sampling ratio is {sampling_ratio} but must be a number " 29 | f"between 0 and 1 (inclusive)" 30 | ) 31 | return sampling_ratio 32 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "mona-openai" 7 | version = "0.2.1" 8 | authors = [ 9 | { name="Itai Bar Sinai", email="itai@monalabs.io" }, 10 | ] 11 | description = "Integration client for monitoring OpenAI usage with Mona" 12 | readme = "README.md" 13 | requires-python = ">=3.9" 14 | classifiers = [ 15 | "Programming Language :: Python :: 3", 16 | "License :: OSI Approved :: Apache Software License", 17 | "Operating System :: OS Independent", 18 | ] 19 | keywords = ["OpenAI", "LLMs", "GPT", "Mona", "Monitoring", "AI"] 20 | dynamic = ["dependencies"] 21 | 22 | [project.urls] 23 | "Homepage" = "https://github.com/monalabs/mona-openai" 24 | "Bug Tracker" = "https://github.com/monalabs/mona-openai/issues" 25 | 26 | [tool.setuptools.dynamic] 27 | dependencies = {file = ["requirements.txt"]} 28 | 29 | [tool.black] 30 | line-length = 79 31 | 32 | [tool.mypy] 33 | ignore_missing_imports = true 34 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mona-sdk>=0.0.49 2 | alt-profanity-check>=1.2.2 3 | phonenumberslite>=8.13.7 4 | tiktoken>=0.3.3 5 | nest_asyncio>=1.5.6 -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monalabs/mona-openai/8fd8fbd11cbd55e7153a5f141cf535d3ef951569/tests/__init__.py -------------------------------------------------------------------------------- /tests/mocks/mock_mona_client.py: -------------------------------------------------------------------------------- 1 | from deepdiff import DeepDiff 2 | from mona_sdk.client import Client 3 | 4 | 5 | def _get_clean_dict( 6 | input_dict, keys_to_remove=("latency", "stream_start_latency") 7 | ): 8 | # TODO(itai): While we can't really test latency values, we should try to 9 | # add a test for these fields' existence when relevant. 10 | ret = {x: input_dict[x] for x in input_dict if x not in keys_to_remove} 11 | for choice in ret.get("response", {"choices": ()})["choices"]: 12 | # We don't support monitoring this field so no need to test for it. 13 | choice.pop("logprobs", None) 14 | return ret 15 | 16 | 17 | def _assert_message_equality(message_1, message_2): 18 | print(message_1) 19 | print(message_2) 20 | message_1 = _get_clean_dict(message_1) 21 | message_2 = _get_clean_dict(message_2) 22 | print(DeepDiff(message_1, message_2)) 23 | assert message_1 == message_2 24 | 25 | 26 | def _assert_export_num(expected_messages, export_num, last_message): 27 | assert export_num < len( 28 | expected_messages 29 | ), f"export called more than expected. Last message is: {last_message}" 30 | 31 | 32 | def _get_mock_mona_client(expected_export_messages): 33 | class MockMonaClient(Client): 34 | def __init__(self, *args, **kwargs): 35 | self._export_num = 0 36 | 37 | def create_openai_context_class(self, context_class, openai_api_type): 38 | return {} 39 | 40 | def export(self, message, filter_none_fields=None): 41 | _assert_export_num( 42 | expected_export_messages, self._export_num, message 43 | ) 44 | 45 | expected_mona_message = expected_export_messages[self._export_num] 46 | 47 | _assert_message_equality( 48 | message.message, expected_mona_message["message"] 49 | ) 50 | 51 | assert ( 52 | message.contextClass == expected_mona_message["context_class"] 53 | ) 54 | 55 | if "context_id" in expected_mona_message: 56 | assert message.contextId == expected_mona_message["context_id"] 57 | 58 | if "export_timestamp" in expected_mona_message: 59 | assert ( 60 | message.exportTimestamp 61 | == expected_mona_message["export_timestamp"] 62 | ) 63 | 64 | self._export_num += 1 65 | 66 | # We combine the mock sync and async clients as this property has no 67 | # relevance in testing. 68 | async def export_async(self, message, filter_none_fields=None): 69 | return self.export(message, filter_none_fields) 70 | 71 | return MockMonaClient() 72 | 73 | 74 | def get_mock_mona_clients_getter( 75 | expected_export_messages, async_expected_export_messages 76 | ): 77 | """ 78 | Returns a getter function that can be used to get a pair of a mock 79 | "sync" and a mock "async" Mona clients. The given expected export 80 | messages in the params will be used to assert that relevant 81 | exporting is done and not more than that by each mock client. 82 | """ 83 | 84 | def mock_get_mona_client(creds): 85 | return _get_mock_mona_client( 86 | expected_export_messages 87 | ), _get_mock_mona_client(async_expected_export_messages) 88 | 89 | return mock_get_mona_client 90 | -------------------------------------------------------------------------------- /tests/mocks/mock_openai.py: -------------------------------------------------------------------------------- 1 | class mockCreateExceptionCommand: 2 | pass 3 | 4 | 5 | class mockCreateException(Exception): 6 | pass 7 | 8 | 9 | def get_mock_openai_class(original_class, create_responses, acreate_responses): 10 | """ 11 | Unlike the mock mona clients, the class returned from this function 12 | makes no assertions about the data being sent, only about the 13 | number of calls. 14 | """ 15 | 16 | class MockWrapper(original_class): 17 | _create_count = 0 18 | _acreate_count = 0 19 | 20 | @classmethod 21 | def _maybe_raise_exception(cls, current_response): 22 | if isinstance(current_response, mockCreateExceptionCommand): 23 | raise mockCreateException( 24 | "Some mock exception for testing purposes" 25 | ) 26 | 27 | @classmethod 28 | def _handle_current_response(cls, responses, count): 29 | current_response = responses[count - 1] 30 | cls._maybe_raise_exception(current_response) 31 | return current_response 32 | 33 | @classmethod 34 | def create(cls, *args, **kwargs): 35 | cls._create_count += 1 36 | assert cls._create_count <= len( 37 | create_responses 38 | ), "Too many create calls" 39 | return cls._handle_current_response( 40 | create_responses, cls._create_count 41 | ) 42 | 43 | @classmethod 44 | async def acreate(cls, *args, **kwargs): 45 | cls._acreate_count += 1 46 | assert cls._acreate_count <= len( 47 | acreate_responses 48 | ), "Too many acreate calls" 49 | return cls._handle_current_response( 50 | acreate_responses, cls._acreate_count 51 | ) 52 | 53 | return type(original_class.__name__, (MockWrapper,), {}) 54 | -------------------------------------------------------------------------------- /tests/test_chat_completion.py: -------------------------------------------------------------------------------- 1 | """ 2 | TODO: These are legacy tests. Need to recreate for new openAI client mechanism 3 | Tests for ChatCompletion api Mona wrapping. 4 | """ 5 | from copy import deepcopy 6 | 7 | from openai import ChatCompletion 8 | 9 | from mona_openai.mona_openai_create import get_rest_monitor, monitor 10 | 11 | from .mocks.mock_mona_client import get_mock_mona_clients_getter 12 | from .mocks.mock_openai import get_mock_openai_class 13 | 14 | _DEFAULT_CONTEXT_CLASS = "TEST_CLASS" 15 | 16 | _DEFAULT_RESPONSE_TEXT = "\n\nMy name is" 17 | 18 | _DEFAULT_RESPONSE_COMMON_VARIABLES = { 19 | "created": 1684827250, 20 | "id": "chatcmpl-7JGp0PUipNwDQeja4P7SwSLa1I19H", 21 | "model": "gpt-3.5-turbo-0301", 22 | "object": "chat.completion", 23 | } 24 | 25 | _DEFAULT_RESPONSE = { 26 | "choices": [ 27 | { 28 | "finish_reason": "length", 29 | "index": 0, 30 | "message": { 31 | "role": "assistant", 32 | "content": _DEFAULT_RESPONSE_TEXT, 33 | }, 34 | } 35 | ], 36 | "usage": { 37 | "completion_tokens": 4, 38 | "prompt_tokens": 8, 39 | "total_tokens": 12, 40 | }, 41 | } | _DEFAULT_RESPONSE_COMMON_VARIABLES 42 | 43 | 44 | def _get_response_without_texts(response): 45 | new_response = deepcopy(response) 46 | for choice in new_response["choices"]: 47 | choice["message"].pop("content") 48 | return new_response 49 | 50 | 51 | _DEFAULT_EXPORTED_RESPONSE = _get_response_without_texts(_DEFAULT_RESPONSE) 52 | 53 | _DEFAULT_INPUT = { 54 | "model": "gpt-3.5-turbo", 55 | "messages": [ 56 | {"role": "user", "content": "I want to generate some text about "} 57 | ], 58 | "max_tokens": 20, 59 | "n": 1, 60 | "temperature": 0.2, 61 | "api_key": "some_key_that_should_be_removed", 62 | "organization": "some_organization_that_should_be_removed", 63 | } 64 | 65 | 66 | # By default we don't export the prompt to Mona 67 | def _remove_texts_from_input(input): 68 | new_input = deepcopy(input) 69 | for message in new_input["messages"]: 70 | message.pop("content", None) 71 | 72 | return new_input 73 | 74 | 75 | def _remove_irrelevant_input_keys(input): 76 | new_input = deepcopy(input) 77 | new_input.pop("api_key", None) 78 | new_input.pop("organization", None) 79 | return new_input 80 | 81 | 82 | def _get_clean_input(input): 83 | return _remove_irrelevant_input_keys(_remove_texts_from_input(input)) 84 | 85 | 86 | _DEFAULT_EXPORTED_INPUT = _get_clean_input(_DEFAULT_INPUT) 87 | 88 | _DEFAULT_ANALYSIS = { 89 | "privacy": { 90 | "total_prompt_phone_number_count": 0, 91 | "answer_unknown_phone_number_count": (0,), 92 | "total_prompt_email_count": 0, 93 | "answer_unknown_email_count": (0,), 94 | "last_user_message_phone_number_count": 0, 95 | "last_user_message_emails_count": 0, 96 | }, 97 | "textual": { 98 | "total_prompt_length": 35, 99 | "answer_length": (12,), 100 | "total_prompt_word_count": 7, 101 | "answer_word_count": (3,), 102 | "total_prompt_preposition_count": 2, 103 | "total_prompt_preposition_ratio": 0.2857142857142857, 104 | "answer_preposition_count": (0,), 105 | "answer_preposition_ratio": (0,), 106 | "answer_words_not_in_prompt_count": (3,), 107 | "answer_words_not_in_prompt_ratio": (1,), 108 | "last_user_message_length": 35, 109 | "last_user_message_word_count": 7, 110 | "last_user_message_preposition_count": 2, 111 | "last_user_message_preposition_ratio": 0.2857142857142857, 112 | }, 113 | "profanity": { 114 | "prompt_profanity_prob": (0.05,), 115 | "prompt_has_profanity": (False,), 116 | "answer_profanity_prob": (0.05,), 117 | "answer_has_profanity": (False,), 118 | "last_user_message_profanity_prob": 0.05, 119 | "last_user_message_has_profanity": False, 120 | }, 121 | } 122 | 123 | 124 | def _remove_none_values(dict): 125 | return {x: y for x, y in dict.items() if y is not None} 126 | 127 | 128 | def _get_mock_openai_class(*args, **kwargs): 129 | return get_mock_openai_class(ChatCompletion, *args, **kwargs) 130 | 131 | 132 | def _get_mona_message( 133 | input=_DEFAULT_EXPORTED_INPUT, 134 | is_exception=False, 135 | is_async=False, 136 | is_stream=None, 137 | response=_DEFAULT_EXPORTED_RESPONSE, 138 | analysis=_DEFAULT_ANALYSIS, 139 | context_class=_DEFAULT_CONTEXT_CLASS, 140 | context_id=None, 141 | export_timestamp=None, 142 | additional_data=None, 143 | ): 144 | message = { 145 | "message": { 146 | "input": input, 147 | "is_exception": is_exception, 148 | "api_name": "ChatCompletion", 149 | "is_async": is_async, 150 | "response": response, 151 | "analysis": analysis, 152 | "additional_data": additional_data, 153 | }, 154 | "context_class": context_class, 155 | "context_id": context_id, 156 | "export_timestamp": export_timestamp, 157 | } 158 | 159 | message["message"] = _remove_none_values(message["message"]) 160 | return _remove_none_values(message) 161 | 162 | 163 | # TODO(itai): Consider using named argunments in test function calls across 164 | # the board. 165 | 166 | 167 | def test_basic(): 168 | monitor( 169 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 170 | (), 171 | _DEFAULT_CONTEXT_CLASS, 172 | mona_clients_getter=get_mock_mona_clients_getter( 173 | (_get_mona_message(),), () 174 | ), 175 | ).create(**_DEFAULT_INPUT) 176 | 177 | 178 | def test_multiple_messages_not_ending_with_user_message(): 179 | new_input = deepcopy(_DEFAULT_INPUT) 180 | new_input["messages"] = ( 181 | [{"role": "system", "content": "you are an assistant"}] 182 | + new_input["messages"] 183 | + [{"role": "assistant", "content": "some initial answer"}] 184 | ) 185 | 186 | expected_input = _get_clean_input(new_input) 187 | 188 | new_analysis = { 189 | "privacy": { 190 | "total_prompt_phone_number_count": 0, 191 | "answer_unknown_phone_number_count": (0,), 192 | "total_prompt_email_count": 0, 193 | "answer_unknown_email_count": (0,), 194 | }, 195 | "textual": { 196 | "total_prompt_length": 74, 197 | "answer_length": (12,), 198 | "total_prompt_word_count": 14, 199 | "answer_word_count": (3,), 200 | "total_prompt_preposition_count": 2, 201 | "total_prompt_preposition_ratio": 0.14285714285714285, 202 | "answer_preposition_count": (0,), 203 | "answer_preposition_ratio": (0,), 204 | "answer_words_not_in_prompt_count": (3,), 205 | "answer_words_not_in_prompt_ratio": (1,), 206 | }, 207 | "profanity": { 208 | "prompt_profanity_prob": (0.05, 0.05, 0.02), 209 | "prompt_has_profanity": (False, False, False), 210 | "answer_profanity_prob": (0.05,), 211 | "answer_has_profanity": (False,), 212 | }, 213 | } 214 | 215 | monitor( 216 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 217 | (), 218 | _DEFAULT_CONTEXT_CLASS, 219 | mona_clients_getter=get_mock_mona_clients_getter( 220 | ( 221 | _get_mona_message( 222 | input=expected_input, 223 | analysis=new_analysis, 224 | ), 225 | ), 226 | (), 227 | ), 228 | ).create(**new_input) 229 | 230 | 231 | def test_multiple_messages(): 232 | new_input = deepcopy(_DEFAULT_INPUT) 233 | new_input["messages"] = ( 234 | [{"role": "system", "content": "you are an assistant"}] 235 | + new_input["messages"] 236 | + [ 237 | {"role": "assistant", "content": "some initial answer"}, 238 | {"role": "user", "content": "some user new prompt"}, 239 | ] 240 | ) 241 | 242 | expected_input = _get_clean_input(new_input) 243 | 244 | new_analysis = { 245 | "privacy": { 246 | "total_prompt_phone_number_count": 0, 247 | "answer_unknown_phone_number_count": (0,), 248 | "total_prompt_email_count": 0, 249 | "answer_unknown_email_count": (0,), 250 | "last_user_message_phone_number_count": 0, 251 | "last_user_message_emails_count": 0, 252 | }, 253 | "textual": { 254 | "total_prompt_length": 94, 255 | "answer_length": (12,), 256 | "total_prompt_word_count": 18, 257 | "answer_word_count": (3,), 258 | "total_prompt_preposition_count": 2, 259 | "total_prompt_preposition_ratio": 0.1111111111111111, 260 | "answer_preposition_count": (0,), 261 | "answer_preposition_ratio": (0,), 262 | "answer_words_not_in_prompt_count": (3,), 263 | "answer_words_not_in_prompt_ratio": (1,), 264 | "last_user_message_length": 20, 265 | "last_user_message_word_count": 4, 266 | "last_user_message_preposition_count": 0, 267 | "last_user_message_preposition_ratio": 0, 268 | }, 269 | "profanity": { 270 | "prompt_profanity_prob": (0.05, 0.05, 0.02, 0.03), 271 | "prompt_has_profanity": (False, False, False, False), 272 | "answer_profanity_prob": (0.05,), 273 | "answer_has_profanity": (False,), 274 | "last_user_message_profanity_prob": 0.03, 275 | "last_user_message_has_profanity": False, 276 | }, 277 | } 278 | 279 | monitor( 280 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 281 | (), 282 | _DEFAULT_CONTEXT_CLASS, 283 | mona_clients_getter=get_mock_mona_clients_getter( 284 | ( 285 | _get_mona_message( 286 | input=expected_input, 287 | analysis=new_analysis, 288 | ), 289 | ), 290 | (), 291 | ), 292 | ).create(**new_input) 293 | 294 | 295 | def test_rest(): 296 | get_rest_monitor( 297 | ChatCompletion.__name__, 298 | (), 299 | _DEFAULT_CONTEXT_CLASS, 300 | mona_clients_getter=get_mock_mona_clients_getter( 301 | (_get_mona_message(),), () 302 | ), 303 | ).log_request(_DEFAULT_INPUT)[0](_DEFAULT_RESPONSE) 304 | 305 | 306 | def test_rest_exception(): 307 | get_rest_monitor( 308 | ChatCompletion.__name__, 309 | (), 310 | _DEFAULT_CONTEXT_CLASS, 311 | mona_clients_getter=get_mock_mona_clients_getter( 312 | ( 313 | _get_mona_message( 314 | is_exception=True, response=None, analysis=None 315 | ), 316 | ), 317 | (), 318 | ), 319 | ).log_request(_DEFAULT_INPUT)[1]() 320 | 321 | 322 | def test_export_response_text(): 323 | monitor( 324 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 325 | (), 326 | _DEFAULT_CONTEXT_CLASS, 327 | {"export_response_texts": True}, 328 | mona_clients_getter=get_mock_mona_clients_getter( 329 | (_get_mona_message(response=_DEFAULT_RESPONSE),), () 330 | ), 331 | ).create(**_DEFAULT_INPUT) 332 | 333 | 334 | def test_export_prompt(): 335 | monitor( 336 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 337 | (), 338 | _DEFAULT_CONTEXT_CLASS, 339 | {"export_prompt": True}, 340 | mona_clients_getter=get_mock_mona_clients_getter( 341 | ( 342 | _get_mona_message( 343 | input=_remove_irrelevant_input_keys(_DEFAULT_INPUT) 344 | ), 345 | ), 346 | (), 347 | ), 348 | ).create(**_DEFAULT_INPUT) 349 | 350 | 351 | def test_multiple_answers(): 352 | new_input = deepcopy(_DEFAULT_INPUT) 353 | new_input["n"] = 3 354 | expected_input = _get_clean_input(new_input) 355 | 356 | new_response = deepcopy(_DEFAULT_RESPONSE) 357 | new_response["choices"] = [ 358 | { 359 | "finish_reason": "length", 360 | "index": 0, 361 | "logprobs": None, 362 | "message": { 363 | "role": "assistant", 364 | "content": "\n\nMy name is", 365 | }, 366 | }, 367 | { 368 | "finish_reason": "length", 369 | "index": 1, 370 | "logprobs": None, 371 | "message": { 372 | "role": "assistant", 373 | "content": "\n\nMy thing is", 374 | }, 375 | }, 376 | { 377 | "finish_reason": "length", 378 | "index": 2, 379 | "logprobs": None, 380 | "message": { 381 | "role": "assistant", 382 | "content": "\n\nbladf", 383 | }, 384 | }, 385 | ] 386 | 387 | new_expected_response = _get_response_without_texts(new_response) 388 | 389 | new_analysis = { 390 | "privacy": { 391 | "total_prompt_phone_number_count": 0, 392 | "answer_unknown_phone_number_count": (0, 0, 0), 393 | "total_prompt_email_count": 0, 394 | "answer_unknown_email_count": (0, 0, 0), 395 | "last_user_message_phone_number_count": 0, 396 | "last_user_message_emails_count": 0, 397 | }, 398 | "textual": { 399 | "total_prompt_length": 35, 400 | "answer_length": (12, 13, 7), 401 | "total_prompt_word_count": 7, 402 | "answer_word_count": (3, 3, 1), 403 | "total_prompt_preposition_count": 2, 404 | "total_prompt_preposition_ratio": 0.2857142857142857, 405 | "answer_preposition_count": (0, 0, 0), 406 | "answer_preposition_ratio": (0, 0, 0), 407 | "answer_words_not_in_prompt_count": (3, 3, 1), 408 | "answer_words_not_in_prompt_ratio": (1.0, 1.0, 1.0), 409 | "last_user_message_length": 35, 410 | "last_user_message_word_count": 7, 411 | "last_user_message_preposition_count": 2, 412 | "last_user_message_preposition_ratio": 0.2857142857142857, 413 | }, 414 | "profanity": { 415 | "prompt_profanity_prob": (0.05,), 416 | "prompt_has_profanity": (False,), 417 | "answer_profanity_prob": (0.05, 0.01, 0.05), 418 | "answer_has_profanity": (False, False, False), 419 | "last_user_message_profanity_prob": 0.05, 420 | "last_user_message_has_profanity": False, 421 | }, 422 | } 423 | 424 | monitor( 425 | _get_mock_openai_class((new_response,), ()), 426 | (), 427 | _DEFAULT_CONTEXT_CLASS, 428 | mona_clients_getter=get_mock_mona_clients_getter( 429 | ( 430 | _get_mona_message( 431 | response=new_expected_response, 432 | input=expected_input, 433 | analysis=new_analysis, 434 | ), 435 | ), 436 | (), 437 | ), 438 | ).create(**new_input) 439 | 440 | 441 | def test_stream(): 442 | def response_generator(): 443 | words = _DEFAULT_RESPONSE_TEXT.split(" ") 444 | last_index = len(words) - 1 445 | for i, word in enumerate(words): 446 | choice = { 447 | "delta": {"content": (word + " ") if i < last_index else word}, 448 | "index": 0, 449 | "logprobs": None, 450 | "finish_reason": None if i < last_index else "length", 451 | } 452 | yield _DEFAULT_RESPONSE_COMMON_VARIABLES | {"choices": [choice]} 453 | 454 | input = deepcopy(_DEFAULT_INPUT) 455 | input["stream"] = True 456 | 457 | expected_input = _get_clean_input(input) 458 | 459 | for _ in monitor( 460 | _get_mock_openai_class((response_generator(),), ()), 461 | (), 462 | _DEFAULT_CONTEXT_CLASS, 463 | mona_clients_getter=get_mock_mona_clients_getter( 464 | (_get_mona_message(is_stream=True, input=expected_input),), () 465 | ), 466 | ).create(**input): 467 | pass 468 | 469 | 470 | def test_stream_multiple_answers(): 471 | def response_generator(): 472 | words = _DEFAULT_RESPONSE_TEXT.split(" ") 473 | for i, word in enumerate(words): 474 | yield _DEFAULT_RESPONSE_COMMON_VARIABLES | { 475 | "choices": [ 476 | { 477 | "delta": { 478 | "content": (word + " ") 479 | if i < len(words) - 1 480 | else word 481 | }, 482 | "index": 0, 483 | "logprobs": None, 484 | "finish_reason": None 485 | if i < len(words) - 1 486 | else "length", 487 | } 488 | ] 489 | } 490 | yield _DEFAULT_RESPONSE_COMMON_VARIABLES | { 491 | "choices": [ 492 | { 493 | "delta": { 494 | "content": (word + " ") 495 | if i < len(words) - 1 496 | else word 497 | }, 498 | "index": 1, 499 | "logprobs": None, 500 | "finish_reason": None 501 | if i < len(words) - 1 502 | else "length", 503 | } 504 | ] 505 | } 506 | 507 | input = deepcopy(_DEFAULT_INPUT) 508 | input["stream"] = True 509 | input["n"] = 2 510 | 511 | expected_input = _get_clean_input(input) 512 | 513 | expected_response = deepcopy(_DEFAULT_EXPORTED_RESPONSE) 514 | expected_response["choices"] += deepcopy(expected_response["choices"]) 515 | expected_response["choices"][1]["index"] = 1 516 | expected_response["usage"] = { 517 | "completion_tokens": 8, 518 | "prompt_tokens": 8, 519 | "total_tokens": 16, 520 | } 521 | 522 | new_analysis = { 523 | "privacy": { 524 | "total_prompt_phone_number_count": 0, 525 | "answer_unknown_phone_number_count": (0, 0), 526 | "total_prompt_email_count": 0, 527 | "answer_unknown_email_count": (0, 0), 528 | "last_user_message_phone_number_count": 0, 529 | "last_user_message_emails_count": 0, 530 | }, 531 | "textual": { 532 | "total_prompt_length": 35, 533 | "answer_length": (12, 12), 534 | "total_prompt_word_count": 7, 535 | "answer_word_count": (3, 3), 536 | "total_prompt_preposition_count": 2, 537 | "total_prompt_preposition_ratio": 0.2857142857142857, 538 | "answer_preposition_count": (0, 0), 539 | "answer_preposition_ratio": (0, 0), 540 | "answer_words_not_in_prompt_count": (3, 3), 541 | "answer_words_not_in_prompt_ratio": (1.0, 1.0), 542 | "last_user_message_length": 35, 543 | "last_user_message_word_count": 7, 544 | "last_user_message_preposition_count": 2, 545 | "last_user_message_preposition_ratio": 0.2857142857142857, 546 | }, 547 | "profanity": { 548 | "prompt_profanity_prob": (0.05,), 549 | "prompt_has_profanity": (False,), 550 | "answer_profanity_prob": (0.05, 0.05), 551 | "answer_has_profanity": (False, False), 552 | "last_user_message_profanity_prob": 0.05, 553 | "last_user_message_has_profanity": False, 554 | }, 555 | } 556 | 557 | for _ in monitor( 558 | _get_mock_openai_class((response_generator(),), ()), 559 | (), 560 | _DEFAULT_CONTEXT_CLASS, 561 | mona_clients_getter=get_mock_mona_clients_getter( 562 | ( 563 | _get_mona_message( 564 | is_stream=True, 565 | input=expected_input, 566 | response=expected_response, 567 | analysis=new_analysis, 568 | ), 569 | ), 570 | (), 571 | ), 572 | ).create(**input): 573 | pass 574 | -------------------------------------------------------------------------------- /tests/test_completion.py: -------------------------------------------------------------------------------- 1 | """ 2 | TODO: These are legacy tests. Need to recreate for new openAI client mechanism 3 | Tests for Completion api Mona wrapping. 4 | 5 | NOTE: Many of these tests can be considered more generic than just for 6 | "Completion", since they test general mona-openai capabilities 7 | (e.g., sampling ratio, basic flows). We should either keep them 8 | here (since they do use the "Completion" API after all) or create a 9 | more generic test module in addition to this one 10 | """ 11 | import asyncio 12 | from copy import deepcopy 13 | 14 | import pytest 15 | from openai import Completion 16 | 17 | from mona_openai.exceptions import InvalidSamplingRatioException 18 | from mona_openai.mona_openai_legacy import ( 19 | CONTEXT_ID_ARG_NAME, 20 | EXPORT_TIMESTAMP_ARG_NAME, 21 | get_rest_monitor, 22 | monitor, 23 | ) 24 | 25 | from .mocks.mock_mona_client import get_mock_mona_clients_getter 26 | from .mocks.mock_openai import ( 27 | get_mock_openai_class, 28 | mockCreateException, 29 | mockCreateExceptionCommand, 30 | ) 31 | 32 | _DEFAULT_CONTEXT_CLASS = "TEST_CLASS" 33 | 34 | _DEFAULT_RESPONSE_TEXT = "\n\nMy name is" 35 | 36 | _DEFAULT_RESPONSE_COMMON_VARIABLES = { 37 | "created": 1679231055, 38 | "id": "cmpl-6vmzn6DUc2ZNjkyEvAyTf2tAgPl3A", 39 | "model": "text-ada-001", 40 | "object": "text_completion", 41 | } 42 | 43 | _DEFAULT_RESPONSE = { 44 | "choices": [ 45 | { 46 | "finish_reason": "length", 47 | "index": 0, 48 | "text": _DEFAULT_RESPONSE_TEXT, 49 | } 50 | ], 51 | "usage": {"completion_tokens": 5, "prompt_tokens": 8, "total_tokens": 13}, 52 | } | _DEFAULT_RESPONSE_COMMON_VARIABLES 53 | 54 | 55 | def _get_response_without_texts(response): 56 | new_response = deepcopy(response) 57 | for choice in new_response["choices"]: 58 | choice.pop("text") 59 | return new_response 60 | 61 | 62 | _DEFAULT_EXPORTED_RESPONSE = _get_response_without_texts(_DEFAULT_RESPONSE) 63 | 64 | _DEFAULT_INPUT = { 65 | "prompt": "I want to generate some text about ", 66 | "model": "text-ada-001", 67 | "temperature": 0.6, 68 | "n": 1, 69 | "max_tokens": 5, 70 | "api_key": "some_key_that_should_be_removed", 71 | "organization": "some_organization_that_should_be_removed", 72 | } 73 | 74 | 75 | # By default we don't export the prompt to Mona 76 | def _remove_texts_from_input(input): 77 | new_input = deepcopy(input) 78 | new_input.pop("prompt", None) 79 | 80 | return new_input 81 | 82 | 83 | def _remove_irrelevant_input_keys(input): 84 | new_input = deepcopy(input) 85 | new_input.pop("api_key", None) 86 | new_input.pop("organization", None) 87 | return new_input 88 | 89 | 90 | def _get_clean_input(input): 91 | return _remove_irrelevant_input_keys(_remove_texts_from_input(input)) 92 | 93 | 94 | # By default we don't export the prompt to Mona 95 | _DEFAULT_EXPORTED_INPUT = _get_clean_input(_DEFAULT_INPUT) 96 | 97 | _DEFAULT_ANALYSIS = { 98 | "privacy": { 99 | "prompt_phone_number_count": (0,), 100 | "answer_unknown_phone_number_count": (0,), 101 | "prompt_email_count": (0,), 102 | "answer_unknown_email_count": (0,), 103 | }, 104 | "textual": { 105 | "prompt_length": (35,), 106 | "answer_length": (12,), 107 | "prompt_word_count": (7,), 108 | "answer_word_count": (3,), 109 | "prompt_preposition_count": (2,), 110 | "prompt_preposition_ratio": (0.2857142857142857,), 111 | "answer_preposition_count": (0,), 112 | "answer_preposition_ratio": (0.0,), 113 | "answer_words_not_in_prompt_count": (3,), 114 | "answer_words_not_in_prompt_ratio": (1.0,), 115 | }, 116 | "profanity": { 117 | "prompt_profanity_prob": (0.05,), 118 | "answer_profanity_prob": (0.05,), 119 | "prompt_has_profanity": (False,), 120 | "answer_has_profanity": (False,), 121 | }, 122 | } 123 | 124 | 125 | def _remove_none_values(dict): 126 | return {x: y for x, y in dict.items() if y is not None} 127 | 128 | 129 | def _get_mock_openai_class(*args, **kwargs): 130 | return get_mock_openai_class(Completion, *args, **kwargs) 131 | 132 | 133 | def _get_mona_message( 134 | input=_DEFAULT_EXPORTED_INPUT, 135 | is_exception=False, 136 | is_async=False, 137 | is_stream=None, 138 | response=_DEFAULT_EXPORTED_RESPONSE, 139 | analysis=_DEFAULT_ANALYSIS, 140 | context_class=_DEFAULT_CONTEXT_CLASS, 141 | context_id=None, 142 | export_timestamp=None, 143 | additional_data=None, 144 | ): 145 | message = { 146 | "message": { 147 | "input": input, 148 | "is_exception": is_exception, 149 | "api_name": "Completion", 150 | "is_async": is_async, 151 | "response": response, 152 | "analysis": analysis, 153 | "additional_data": additional_data, 154 | }, 155 | "context_class": context_class, 156 | "context_id": context_id, 157 | "export_timestamp": export_timestamp, 158 | } 159 | 160 | message["message"] = _remove_none_values(message["message"]) 161 | return _remove_none_values(message) 162 | 163 | 164 | # TODO(itai): Consider using named argunments in test function calls across 165 | # the board. 166 | 167 | 168 | def test_basic(): 169 | monitor( 170 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 171 | (), 172 | _DEFAULT_CONTEXT_CLASS, 173 | mona_clients_getter=get_mock_mona_clients_getter( 174 | (_get_mona_message(),), () 175 | ), 176 | ).create(**_DEFAULT_INPUT) 177 | 178 | 179 | def test_multiple_prompts(): 180 | new_input = deepcopy(_DEFAULT_INPUT) 181 | new_input["prompt"] = ( 182 | "I want to generate some text about ", 183 | "I also want to generate some text about ", 184 | ) 185 | expected_input = _get_clean_input(new_input) 186 | 187 | new_response = deepcopy(_DEFAULT_RESPONSE) 188 | new_response["choices"] = [ 189 | { 190 | "finish_reason": "length", 191 | "index": 0, 192 | "logprobs": None, 193 | "text": "\n\nMy name is", 194 | }, 195 | { 196 | "finish_reason": "length", 197 | "index": 1, 198 | "logprobs": None, 199 | "text": "\n\nMy thing is", 200 | }, 201 | ] 202 | new_expected_response = _get_response_without_texts(new_response) 203 | 204 | new_analysis = { 205 | "privacy": { 206 | "prompt_phone_number_count": (0, 0), 207 | "answer_unknown_phone_number_count": (0, 0), 208 | "prompt_email_count": (0, 0), 209 | "answer_unknown_email_count": (0, 0), 210 | }, 211 | "textual": { 212 | "prompt_length": (35, 40), 213 | "answer_length": (12, 13), 214 | "prompt_word_count": (7, 8), 215 | "answer_word_count": (3, 3), 216 | "prompt_preposition_count": (2, 2), 217 | "prompt_preposition_ratio": (0.2857142857142857, 0.25), 218 | "answer_preposition_count": (0, 0), 219 | "answer_preposition_ratio": (0.0, 0.0), 220 | "answer_words_not_in_prompt_count": (3, 3), 221 | "answer_words_not_in_prompt_ratio": (1.0, 1.0), 222 | }, 223 | "profanity": { 224 | "prompt_profanity_prob": (0.05, 0.05), 225 | "answer_profanity_prob": (0.05, 0.01), 226 | "prompt_has_profanity": (False, False), 227 | "answer_has_profanity": (False, False), 228 | }, 229 | } 230 | 231 | monitor( 232 | _get_mock_openai_class((new_response,), ()), 233 | (), 234 | _DEFAULT_CONTEXT_CLASS, 235 | mona_clients_getter=get_mock_mona_clients_getter( 236 | ( 237 | _get_mona_message( 238 | response=new_expected_response, 239 | input=expected_input, 240 | analysis=new_analysis, 241 | ), 242 | ), 243 | (), 244 | ), 245 | ).create(**new_input) 246 | 247 | 248 | def test_rest(): 249 | get_rest_monitor( 250 | Completion.__name__, 251 | (), 252 | _DEFAULT_CONTEXT_CLASS, 253 | mona_clients_getter=get_mock_mona_clients_getter( 254 | (_get_mona_message(),), () 255 | ), 256 | ).log_request(_DEFAULT_INPUT)[0](_DEFAULT_RESPONSE) 257 | 258 | 259 | def test_rest_more_additional_data(): 260 | additional_data = {"foo": "bar"} 261 | more_additional_data = {"foo2": "bar2"} 262 | total_additional_data = {**additional_data, **more_additional_data} 263 | get_rest_monitor( 264 | Completion.__name__, 265 | (), 266 | _DEFAULT_CONTEXT_CLASS, 267 | mona_clients_getter=get_mock_mona_clients_getter( 268 | (_get_mona_message(additional_data=total_additional_data),), () 269 | ), 270 | ).log_request(_DEFAULT_INPUT, additional_data=additional_data)[0]( 271 | _DEFAULT_RESPONSE, additional_data=more_additional_data 272 | ) 273 | 274 | 275 | def test_rest_async(): 276 | asyncio.run( 277 | get_rest_monitor( 278 | Completion.__name__, 279 | (), 280 | _DEFAULT_CONTEXT_CLASS, 281 | mona_clients_getter=get_mock_mona_clients_getter( 282 | (), (_get_mona_message(),) 283 | ), 284 | ).async_log_request(_DEFAULT_INPUT)[0](_DEFAULT_RESPONSE) 285 | ) 286 | 287 | 288 | def test_rest_exception(): 289 | get_rest_monitor( 290 | Completion.__name__, 291 | (), 292 | _DEFAULT_CONTEXT_CLASS, 293 | mona_clients_getter=get_mock_mona_clients_getter( 294 | ( 295 | _get_mona_message( 296 | is_exception=True, response=None, analysis=None 297 | ), 298 | ), 299 | (), 300 | ), 301 | ).log_request(_DEFAULT_INPUT)[1]() 302 | 303 | 304 | def test_rest_exception_async(): 305 | asyncio.run( 306 | get_rest_monitor( 307 | Completion.__name__, 308 | (), 309 | _DEFAULT_CONTEXT_CLASS, 310 | mona_clients_getter=get_mock_mona_clients_getter( 311 | (), 312 | ( 313 | _get_mona_message( 314 | is_exception=True, response=None, analysis=None 315 | ), 316 | ), 317 | ), 318 | ).async_log_request(_DEFAULT_INPUT)[1]() 319 | ) 320 | 321 | 322 | def test_export_response_text(): 323 | monitor( 324 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 325 | (), 326 | _DEFAULT_CONTEXT_CLASS, 327 | {"export_response_texts": True}, 328 | mona_clients_getter=get_mock_mona_clients_getter( 329 | (_get_mona_message(response=_DEFAULT_RESPONSE),), () 330 | ), 331 | ).create(**_DEFAULT_INPUT) 332 | 333 | 334 | def test_export_prompt(): 335 | monitor( 336 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 337 | (), 338 | _DEFAULT_CONTEXT_CLASS, 339 | {"export_prompt": True}, 340 | mona_clients_getter=get_mock_mona_clients_getter( 341 | ( 342 | _get_mona_message( 343 | input=_remove_irrelevant_input_keys(_DEFAULT_INPUT) 344 | ), 345 | ), 346 | (), 347 | ), 348 | ).create(**_DEFAULT_INPUT) 349 | 350 | 351 | def test_bad_sampling_ratios(): 352 | with pytest.raises(InvalidSamplingRatioException): 353 | monitor( 354 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 355 | (), 356 | _DEFAULT_CONTEXT_CLASS, 357 | {"sampling_ratio": 1.1}, 358 | mona_clients_getter=get_mock_mona_clients_getter( 359 | (_get_mona_message(),), () 360 | ), 361 | ) 362 | 363 | with pytest.raises(InvalidSamplingRatioException): 364 | monitor( 365 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 366 | (), 367 | _DEFAULT_CONTEXT_CLASS, 368 | {"sampling_ratio": -1}, 369 | mona_clients_getter=get_mock_mona_clients_getter( 370 | (_get_mona_message(),), () 371 | ), 372 | ) 373 | 374 | 375 | def test_async(): 376 | monitored_completion = monitor( 377 | _get_mock_openai_class((), (_DEFAULT_RESPONSE,)), 378 | (), 379 | _DEFAULT_CONTEXT_CLASS, 380 | mona_clients_getter=get_mock_mona_clients_getter( 381 | (), (_get_mona_message(is_async=True),) 382 | ), 383 | ) 384 | 385 | asyncio.run(monitored_completion.acreate(**_DEFAULT_INPUT)) 386 | 387 | 388 | def test_exception(): 389 | monitored_completion = monitor( 390 | _get_mock_openai_class((mockCreateExceptionCommand(),), ()), 391 | (), 392 | _DEFAULT_CONTEXT_CLASS, 393 | mona_clients_getter=get_mock_mona_clients_getter( 394 | ( 395 | _get_mona_message( 396 | is_exception=True, response=None, analysis=None 397 | ), 398 | ), 399 | (), 400 | ), 401 | ) 402 | 403 | with pytest.raises(mockCreateException): 404 | monitored_completion.create(**_DEFAULT_INPUT) 405 | 406 | 407 | def test_exception_without_monitoring(): 408 | monitored_completion = monitor( 409 | _get_mock_openai_class((mockCreateExceptionCommand(),), ()), 410 | (), 411 | _DEFAULT_CONTEXT_CLASS, 412 | {"avoid_monitoring_exceptions": True}, 413 | mona_clients_getter=get_mock_mona_clients_getter((), ()), 414 | ) 415 | 416 | with pytest.raises(mockCreateException): 417 | monitored_completion.create(**_DEFAULT_INPUT) 418 | 419 | 420 | def test_context_id(): 421 | context_id = "some_context_id" 422 | monitor( 423 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 424 | (), 425 | _DEFAULT_CONTEXT_CLASS, 426 | mona_clients_getter=get_mock_mona_clients_getter( 427 | (_get_mona_message(context_id=context_id),), () 428 | ), 429 | ).create(**{**_DEFAULT_INPUT, CONTEXT_ID_ARG_NAME: context_id}) 430 | 431 | 432 | def test_export_timestamp(): 433 | export_timestamp = 1679244447 434 | monitor( 435 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 436 | (), 437 | _DEFAULT_CONTEXT_CLASS, 438 | mona_clients_getter=get_mock_mona_clients_getter( 439 | (_get_mona_message(export_timestamp=export_timestamp),), () 440 | ), 441 | ).create(**{**_DEFAULT_INPUT, EXPORT_TIMESTAMP_ARG_NAME: export_timestamp}) 442 | 443 | 444 | def test_no_profanity(): 445 | expected_analysis = deepcopy(_DEFAULT_ANALYSIS) 446 | expected_analysis.pop("profanity") 447 | monitor( 448 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 449 | (), 450 | _DEFAULT_CONTEXT_CLASS, 451 | {"analysis": {"profanity": False}}, 452 | mona_clients_getter=get_mock_mona_clients_getter( 453 | (_get_mona_message(analysis=expected_analysis),), () 454 | ), 455 | ).create(**_DEFAULT_INPUT) 456 | 457 | 458 | def test_no_textual_or_privacy(): 459 | expected_analysis = deepcopy(_DEFAULT_ANALYSIS) 460 | expected_analysis.pop("privacy") 461 | expected_analysis.pop("textual") 462 | monitor( 463 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 464 | (), 465 | _DEFAULT_CONTEXT_CLASS, 466 | {"analysis": {"privacy": False, "textual": False}}, 467 | mona_clients_getter=get_mock_mona_clients_getter( 468 | (_get_mona_message(analysis=expected_analysis),), () 469 | ), 470 | ).create(**_DEFAULT_INPUT) 471 | 472 | 473 | def test_multiple_answers(): 474 | new_input = deepcopy(_DEFAULT_INPUT) 475 | new_input["n"] = 3 476 | expected_input = _get_clean_input(new_input) 477 | 478 | new_response = deepcopy(_DEFAULT_RESPONSE) 479 | new_response["choices"] = [ 480 | { 481 | "finish_reason": "length", 482 | "index": 0, 483 | "logprobs": None, 484 | "text": "\n\nMy name is", 485 | }, 486 | { 487 | "finish_reason": "length", 488 | "index": 1, 489 | "logprobs": None, 490 | "text": "\n\nMy thing is", 491 | }, 492 | { 493 | "finish_reason": "length", 494 | "index": 2, 495 | "logprobs": None, 496 | "text": "\n\nbladf", 497 | }, 498 | ] 499 | 500 | new_expected_response = _get_response_without_texts(new_response) 501 | 502 | new_analysis = { 503 | "privacy": { 504 | "prompt_phone_number_count": (0,), 505 | "answer_unknown_phone_number_count": (0, 0, 0), 506 | "prompt_email_count": (0,), 507 | "answer_unknown_email_count": (0, 0, 0), 508 | }, 509 | "textual": { 510 | "prompt_length": (35,), 511 | "answer_length": (12, 13, 7), 512 | "prompt_word_count": (7,), 513 | "answer_word_count": (3, 3, 1), 514 | "prompt_preposition_count": (2,), 515 | "prompt_preposition_ratio": (0.2857142857142857,), 516 | "answer_preposition_count": (0, 0, 0), 517 | "answer_preposition_ratio": (0.0, 0.0, 0.0), 518 | "answer_words_not_in_prompt_count": (3, 3, 1), 519 | "answer_words_not_in_prompt_ratio": (1.0, 1.0, 1.0), 520 | }, 521 | "profanity": { 522 | "prompt_profanity_prob": (0.05,), 523 | "answer_profanity_prob": (0.05, 0.01, 0.05), 524 | "prompt_has_profanity": (False,), 525 | "answer_has_profanity": (False, False, False), 526 | }, 527 | } 528 | 529 | monitor( 530 | _get_mock_openai_class((new_response,), ()), 531 | (), 532 | _DEFAULT_CONTEXT_CLASS, 533 | mona_clients_getter=get_mock_mona_clients_getter( 534 | ( 535 | _get_mona_message( 536 | response=new_expected_response, 537 | input=expected_input, 538 | analysis=new_analysis, 539 | ), 540 | ), 541 | (), 542 | ), 543 | ).create(**new_input) 544 | 545 | 546 | def test_additional_data(): 547 | new_input = deepcopy(_DEFAULT_INPUT) 548 | additional_data = {"foo": "bar", "foo2": 2} 549 | new_input["MONA_additional_data"] = additional_data 550 | 551 | monitor( 552 | _get_mock_openai_class((_DEFAULT_RESPONSE,), ()), 553 | (), 554 | _DEFAULT_CONTEXT_CLASS, 555 | mona_clients_getter=get_mock_mona_clients_getter( 556 | (_get_mona_message(additional_data=additional_data),), () 557 | ), 558 | ).create(**new_input) 559 | 560 | 561 | def test_stream(): 562 | def response_generator(): 563 | words = _DEFAULT_RESPONSE_TEXT.split(" ") 564 | last_index = len(words) - 1 565 | for i, word in enumerate(words): 566 | choice = { 567 | "text": (word + " ") if i < last_index else word, 568 | "index": 0, 569 | "logprobs": None, 570 | "finish_reason": None if i < last_index else "length", 571 | } 572 | yield _DEFAULT_RESPONSE_COMMON_VARIABLES | {"choices": [choice]} 573 | 574 | input = deepcopy(_DEFAULT_INPUT) 575 | input["stream"] = True 576 | 577 | expected_input = _get_clean_input(input) 578 | 579 | for _ in monitor( 580 | _get_mock_openai_class((response_generator(),), ()), 581 | (), 582 | _DEFAULT_CONTEXT_CLASS, 583 | mona_clients_getter=get_mock_mona_clients_getter( 584 | (_get_mona_message(is_stream=True, input=expected_input),), () 585 | ), 586 | ).create(**input): 587 | pass 588 | 589 | 590 | def test_stream_multiple_answers(): 591 | def response_generator(): 592 | words = _DEFAULT_RESPONSE_TEXT.split(" ") 593 | for i, word in enumerate(words): 594 | yield _DEFAULT_RESPONSE_COMMON_VARIABLES | { 595 | "choices": [ 596 | { 597 | "text": (word + " ") if i < len(words) - 1 else word, 598 | "index": 0, 599 | "logprobs": None, 600 | "finish_reason": None 601 | if i < len(words) - 1 602 | else "length", 603 | } 604 | ] 605 | } 606 | yield _DEFAULT_RESPONSE_COMMON_VARIABLES | { 607 | "choices": [ 608 | { 609 | "text": (word + " ") if i < len(words) - 1 else word, 610 | "index": 1, 611 | "logprobs": None, 612 | "finish_reason": None 613 | if i < len(words) - 1 614 | else "length", 615 | } 616 | ] 617 | } 618 | 619 | input = deepcopy(_DEFAULT_INPUT) 620 | input["stream"] = True 621 | input["n"] = 2 622 | 623 | expected_input = _get_clean_input(input) 624 | 625 | expected_response = deepcopy(_DEFAULT_EXPORTED_RESPONSE) 626 | expected_response["choices"] += deepcopy(expected_response["choices"]) 627 | expected_response["choices"][1]["index"] = 1 628 | expected_response["usage"] = { 629 | "completion_tokens": 10, 630 | "prompt_tokens": 8, 631 | "total_tokens": 18, 632 | } 633 | 634 | new_analysis = { 635 | "privacy": { 636 | "prompt_phone_number_count": (0,), 637 | "answer_unknown_phone_number_count": (0, 0), 638 | "prompt_email_count": (0,), 639 | "answer_unknown_email_count": (0, 0), 640 | }, 641 | "textual": { 642 | "prompt_length": (35,), 643 | "answer_length": (12, 12), 644 | "prompt_word_count": (7,), 645 | "answer_word_count": (3, 3), 646 | "prompt_preposition_count": (2,), 647 | "prompt_preposition_ratio": (0.2857142857142857,), 648 | "answer_preposition_count": (0, 0), 649 | "answer_preposition_ratio": (0.0, 0.0), 650 | "answer_words_not_in_prompt_count": (3, 3), 651 | "answer_words_not_in_prompt_ratio": (1.0, 1.0), 652 | }, 653 | "profanity": { 654 | "prompt_profanity_prob": (0.05,), 655 | "answer_profanity_prob": (0.05, 0.05), 656 | "prompt_has_profanity": (False,), 657 | "answer_has_profanity": (False, False), 658 | }, 659 | } 660 | 661 | for _ in monitor( 662 | _get_mock_openai_class((response_generator(),), ()), 663 | (), 664 | _DEFAULT_CONTEXT_CLASS, 665 | mona_clients_getter=get_mock_mona_clients_getter( 666 | ( 667 | _get_mona_message( 668 | is_stream=True, 669 | input=expected_input, 670 | response=expected_response, 671 | analysis=new_analysis, 672 | ), 673 | ), 674 | (), 675 | ), 676 | ).create(**input): 677 | pass 678 | 679 | 680 | def test_stream_async(): 681 | async def response_generator(): 682 | words = _DEFAULT_RESPONSE_TEXT.split(" ") 683 | for i, word in enumerate(words): 684 | choice = { 685 | "text": (word + " ") if i < len(words) - 1 else word, 686 | "index": 0, 687 | "logprobs": None, 688 | "finish_reason": None if i < len(words) - 1 else "length", 689 | } 690 | yield _DEFAULT_RESPONSE_COMMON_VARIABLES | {"choices": [choice]} 691 | 692 | input = deepcopy(_DEFAULT_INPUT) 693 | input["stream"] = True 694 | 695 | expected_input = _get_clean_input(input) 696 | 697 | async def iterate_gen(): 698 | async for _ in await monitor( 699 | _get_mock_openai_class((), (response_generator(),)), 700 | (), 701 | _DEFAULT_CONTEXT_CLASS, 702 | mona_clients_getter=get_mock_mona_clients_getter( 703 | (), 704 | ( 705 | _get_mona_message( 706 | is_stream=True, is_async=True, input=expected_input 707 | ), 708 | ), 709 | ), 710 | ).acreate(**input): 711 | pass 712 | 713 | asyncio.run(iterate_gen()) 714 | -------------------------------------------------------------------------------- /tests/test_privacy_analyzer.py: -------------------------------------------------------------------------------- 1 | from mona_openai.analysis.privacy import PrivacyAnalyzer 2 | 3 | 4 | def test_phone_numbers_count(): 5 | assert ( 6 | PrivacyAnalyzer( 7 | "Here's a phone number: +972584932014 and another one: " 8 | "(212)456-7890" 9 | ).get_phone_numbers_count() 10 | == 2 11 | ) 12 | 13 | 14 | def test_emails_count(): 15 | assert ( 16 | PrivacyAnalyzer( 17 | "Here's an email: itai@monalabs.io and another one: " 18 | "flsdakjflkjsa@gmail.com" 19 | ).get_emails_count() 20 | == 2 21 | ) 22 | 23 | 24 | def test_other_phone_numbers(): 25 | assert ( 26 | PrivacyAnalyzer( 27 | "Here's a phone number: +972584932014 and another one: " 28 | "(212)456-7890" 29 | ).get_previously_unseen_phone_numbers_count( 30 | ( 31 | PrivacyAnalyzer( 32 | "Here's a different phone number: +972584332014 an " 33 | "existing one: (212)456-7890" 34 | ), 35 | PrivacyAnalyzer(" a different phone number: +972584332015"), 36 | ) 37 | ) 38 | == 1 39 | ) 40 | 41 | 42 | def test_other_emails(): 43 | assert ( 44 | PrivacyAnalyzer( 45 | "Here's an email: itai@monalabs.io and another one: " 46 | "flsdakjflkjsa@gmail.com" 47 | ).get_previously_unseen_emails_count( 48 | ( 49 | PrivacyAnalyzer( 50 | "Here's a different email: flasdkjfg@gmail.com an " 51 | "existing one: flsdakjflkjsa@gmail.com" 52 | ), 53 | PrivacyAnalyzer( 54 | " a different email: f4234lsdakjflkjsa@gmail.com" 55 | ), 56 | ) 57 | ) 58 | == 1 59 | ) 60 | -------------------------------------------------------------------------------- /tests/test_textual_analyzer.py: -------------------------------------------------------------------------------- 1 | from mona_openai.analysis.textual import TextualAnalyzer 2 | 3 | 4 | def test_get_length(): 5 | text = "bla balskdf nblaskd" 6 | assert TextualAnalyzer(text).get_length() == len(text) 7 | 8 | 9 | def test_get_word_count(): 10 | text = "bla balskdf nblaskd" 11 | assert TextualAnalyzer(text).get_word_count() == 3 12 | 13 | 14 | def test_get_preposition_count(): 15 | text = "bla balskdf of nblaskd from there" 16 | assert TextualAnalyzer(text).get_preposition_count() == 2 17 | 18 | 19 | def test_get_preposition_ratio(): 20 | text = "bla balskdf of nblaskd from there" 21 | assert TextualAnalyzer(text).get_preposition_ratio() == 2 / 6 22 | 23 | 24 | def test_get_preposition_ratio_no_words(): 25 | text = " " 26 | assert TextualAnalyzer(text).get_preposition_ratio() == 0 27 | 28 | 29 | def test_new_words(): 30 | assert ( 31 | TextualAnalyzer("this is a word").get_words_not_in_others_count( 32 | ( 33 | TextualAnalyzer("has this word"), 34 | TextualAnalyzer("no bla bla is"), 35 | ) 36 | ) 37 | == 1 38 | ) 39 | --------------------------------------------------------------------------------