├── .gitattributes ├── .github └── workflows │ └── python-publish.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── async_openai ├── __init__.py ├── client.py ├── external_client.py ├── loadbalancer.py ├── manager.py ├── meta.py ├── routes.py ├── schemas │ ├── __init__.py │ ├── chat.py │ ├── completions.py │ ├── edits.py │ ├── embeddings.py │ ├── external │ │ ├── __init__.py │ │ ├── fireworks │ │ │ ├── __init__.py │ │ │ └── chat.py │ │ └── together │ │ │ ├── __init__.py │ │ │ ├── chat.py │ │ │ └── embeddings.py │ ├── images.py │ └── models.py ├── types │ ├── __init__.py │ ├── context.py │ ├── errors.py │ ├── functions.py │ ├── options.py │ ├── pricing.yaml │ ├── resources.py │ ├── responses.py │ └── routes.py ├── utils │ ├── __init__.py │ ├── config.py │ ├── embedding.py │ ├── external_config.py │ ├── fixjson.py │ ├── helpers.py │ ├── logs.py │ ├── presets │ │ ├── fireworks.yaml │ │ ├── together.yaml │ │ └── together_proxy.yaml │ ├── resolvers.py │ └── tokenization.py └── version.py ├── setup.py └── tests ├── chat.py ├── chat_functions.py ├── client.py ├── client_rotate.py ├── completion.py └── external_provider.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | workflow_dispatch: 13 | push: 14 | paths: 15 | # - 'setup.py' 16 | - 'async_openai/version.py' 17 | release: 18 | types: [created] 19 | 20 | jobs: 21 | build-python-package: 22 | 23 | runs-on: ubuntu-latest 24 | 25 | steps: 26 | - uses: actions/checkout@v2 27 | - name: Set up Python 28 | uses: actions/setup-python@v2 29 | with: 30 | python-version: '3.x' 31 | - name: Install dependencies 32 | run: | 33 | python -m pip install --upgrade pip 34 | pip install build 35 | - name: Build package 36 | run: python -m build 37 | - name: Publish package 38 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 39 | with: 40 | user: __token__ 41 | password: ${{ secrets.pypi_api_token }} 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | cache_** 3 | ***_cache 4 | ***cache* 5 | **cache/** 6 | *.DS_Store 7 | tests* 8 | __pycache__* 9 | *logs 10 | *dist 11 | *build 12 | **build.sh 13 | **build_lib.sh 14 | **build_docker.sh 15 | **run_test.sh 16 | *test.py 17 | *.egg-info* 18 | *.vscode 19 | **test 20 | **.ipynb** 21 | **test.sh 22 | /.idea/ 23 | async_openai/v1* 24 | tests/private_* 25 | !tests/ 26 | tests/v2/private_* 27 | tests/v2/fireworks* 28 | tests/v2/together* 29 | async_openai/schemas/external/huggingface -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelogs 2 | 3 | #### v0.0.53 (2024-05-31) 4 | 5 | - Added support for new `gpt-4-o` model 6 | - Added support for external providers 7 | - Update certain validation behavior of Functions 8 | 9 | - TODO: Add batch create support 10 | 11 | #### v0.0.52 (2024-02-28) 12 | 13 | - Added support for the following parameters in `model_configurations` in `OpenAIManager`: 14 | 15 | - `ping_timeout` - allows for custom timeouts for each client. 16 | 17 | - `included_models` - allows for more flexible setting of models in Azure. 18 | 19 | - `weight` - allows for weighted selection of clients. 20 | 21 | - Improved Healthcheck behavior to cache if successful for a period of time, and always recheck if not. 22 | 23 | - Added `dimension` parameter for `embedding` models. 24 | 25 | #### v0.0.51rc (2024-02-07) 26 | 27 | - Modification of `async_openai.types.context.ModelContextHandler` to a proxied object singleton. 28 | 29 | - Begin adding support for external providers, such as `together` to allow usage in conjunction with `OpenAI` models. WIP. 30 | 31 | - Rework of `api_resource` and `root_name` in `Route` objects to be settable during initialization. This is to allow for flexibility for external providers. 32 | 33 | - Added capability to have multi-api-key support for external providers, allowing for automatic rotation between api keys. 34 | 35 | #### v0.0.50 (2024-02-01) 36 | 37 | **Breaking Changes** 38 | 39 | - The `OpenAI` client has been refactored to be a singleton `ProxyObject` vs a `Type` object. 40 | 41 | Currently, this API is accessible with `async_openai.OpenAIManager`, which provides all the existing functionality of the `OpenAI` client, with a few additional features. 42 | 43 | - `OpenAIManager` supports automatic proxy rotation and client selection based on available models. 44 | 45 | - `OpenAIManager` supports automatic retrying of failed requests, as well as enabling automatic healthchecking prior to each request to ensure the endpoint is available with `auto_healthcheck_enabled`, otherwise it will rotate to another endpoint. This is useful for ensuring high availability and reliability of the API. 46 | 47 | Future versions will deprecate the `OpenAI` client in favor of the `OpenAIManager` object. 48 | 49 | - Added new `OpenAIFunctions` class which provides a robust interface for creating and running functions. This class is also a singleton `ProxyObject`. 50 | 51 | This can be accessed through the `OpenAIManager.functions` object 52 | 53 | 54 | #### v0.0.41 (2023-11-06) 55 | 56 | **Update to Latest OpenAI API** 57 | 58 | This version updates the API to the latest version of OpenAI's API, which includes the following changes: 59 | 60 | - addition of `gpt-4-turbo` models 61 | 62 | - Add additional supported parameters to `chat` endpoint. We maintain v1 parameters for `azure` endpoints, but will pass through the new parameters for `openai` endpoints. 63 | 64 | - Add gradual support for `tools` 65 | 66 | **Updates** 67 | 68 | - Rework of validating `models`, which now is no longer done, and expects the user to pass the correct model name. 69 | 70 | - No longer supporting `validate_max_tokens` as there are now many different schemas for `max_tokens` depending on the model. 71 | 72 | 73 | 74 | #### v0.0.40 (2023-10-18) 75 | 76 | **Potentially Breaking Changes** 77 | 78 | This version introduces full compatability with `pydantic v1/v2` where previous versions would only work with `pydantic v1`. Auto-detection and handling of deprecated methods of `pydantic` models are handled by `lazyops`, and require `lazyops >= 0.2.60`. 79 | 80 | With `pydantic v2` support, there should be a slight performance increase in parsing `pydantic` objects, although the majority of the time is spent waiting for the API to respond. 81 | 82 | Additionally, support is added for handling the response like a `dict` object, so you can access the response like `response['choices']` rather than `response.choices`. 83 | 84 | #### v0.0.36 (2023-10-11) 85 | 86 | **Additions** 87 | 88 | - Added auto-parsing of `pydantic` objects in `function_call` parameters and return the same object schema in `chat_response.function_result_objects`. 89 | 90 | 91 | #### v0.0.35 (2023-10-06) 92 | 93 | **Additions** 94 | 95 | - Added `auto_retry` option to `OpenAI` client, which will automatically retry failed requests. 96 | - Added `RotatingClients` class which handles the rotation of multiple clients. This can be enabled by passing `rotating_clients=True` to the `OpenAI` client while configuring. 97 | - Added `OpenAI.chat_create` and `OpenAI.async_chat_create` methods which automatically handles rotating clients and retrying failed requests. 98 | - Added `azure_model_mapping` which allows automatically mapping of Azure models to OpenAI models when passing `openai` models as a parameter, it will automatically convert it to the Azure model. This is only done in `chat` implementation. 99 | 100 | **Fixes** 101 | 102 | - Fixed `api_version` Configuration handling. 103 | - Fixed parsing of `function_call` in streaming implementation. 104 | 105 | 106 | 107 | #### v0.0.34 (2023-10-06) 108 | 109 | **Changes** 110 | 111 | - Updated default `api_version` to `2023-07-01-preview` 112 | - Added `__getitem__` attributes to completion and chat objects, allowing them to act like `dict` objects. 113 | - Added `functions` and `function_call` to `Chat` completion routes. 114 | - `function.properties` can pass through a `pydantic` object which will convert it automatically to a `dict` json schema. 115 | - Added `function_call` attribute in `ChatMessage` objects, allowing for easy access to the function call. 116 | - Streaming is not supported for `functions` at this time. 117 | 118 | #### v0.0.33 (2023-08-24) 119 | 120 | **Changes** 121 | 122 | - Updated auto-configuring `httpx`'s logger to be disabled if `debug_enabled` is set to `False`. 123 | 124 | 125 | #### v0.0.32 (2023-08-23) 126 | 127 | **Changes** 128 | 129 | - Updated `headers` behavior and parameter, allowing it to be passed to each API call. 130 | - Updated `auth` behavior, which now utilizes `httpx.Auth` rather than injecting into the header directly. 131 | - Added `custom_headers` configuration that can be passed to the `OpenAI` client during initialization. 132 | - Added customization of `connection_pool`, controlling the number of concurrent connections to the API. 133 | 134 | - Reworked `streaming` implementations, which previously didn't properly work. 135 | - Added `parse_stream` parameter (default: true) which defers parsing of the stream util it is called with `result.stream` or `result.astream`, rather than parsing the stream as it is received. 136 | 137 | 138 | #### v0.0.31 (2023-08-11) 139 | 140 | 141 | **Changes** 142 | 143 | - Updated some behaviors of the `OpenAI` Client. 144 | * allow for customization of retry behavior or completely disabling it. 145 | 146 | - Routes now take the `is_azure` parameter during init rather than using `@property` to determine the route. 147 | - The `_send` method is better optimized for retry behaviors. 148 | 149 | **Fixes** 150 | 151 | - Resolved `model` endpoint. 152 | - Resolved handling of `Azure` models. 153 | 154 | 155 | 156 | --- 157 | 158 | #### v0.0.30 (2023-08-10) 159 | 160 | _Potentially breaking changes in this version_ 161 | 162 | **Changes** 163 | 164 | - Refactored the architecture of the `OpenAI` Client to accomodate multi-client initialization. i.e. `OpenAI` can now be initialized with multiple API keys and will automatically rotate between them, as well as switch back and forth between Azure and OpenAI. 165 | 166 | - Settings are initialized after first call, rather than globally. 167 | 168 | - Routes, Clients are configured after first call, rather than during initialization. 169 | 170 | 171 | **Fixes** 172 | 173 | - Resolved `embedding` endpoints. 174 | 175 | **Updates** 176 | 177 | - Changed default `api-version` to `2023-03-15-preview` 178 | 179 | --- 180 | 181 | #### v0.0.22 (2023-06-14) 182 | - Update pricing to reflect OpenAI's new pricing model 183 | - `gpt-3.5-turbo` 184 | - `text-embedding-ada-002` 185 | - Bugfix for consumption and usage validation in `chat` models 186 | - Added support for `gpt-3.5-turbo-16k` 187 | - Modified handling of `gpt-3.5-turbo`'s consumption pricing to reflect `prompt` and `completion` usage 188 | - Modified default `Embedding` model to be `ada` 189 | 190 | --- 191 | #### 0.0.17 (2023-04-12) 192 | - Add better support for chatgpt models and `gpt-4` 193 | - Better validation `max_tokens` 194 | 195 | --- 196 | #### 0.0.11 (2023-03-07) 197 | - Added support for GPT-3.5 Turbo through `async_openai.OpenAI.chat` 198 | - Refactored `async_openai.OpenAI` to utilize a `metaclass` rather than initalizing directly 199 | 200 | #### 0.0.7 (2023-02-02) 201 | - Refactor `async_openai.types.options.OpenAIModel` to handle more robust parsing of model names. 202 | 203 | #### 0.0.3 (2022-12-21) 204 | - Fix proper charge for `babbage` and `ada` models. 205 | 206 | 207 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Growth Engine Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include async_openai * 2 | recursive-exclude * __pycache__ 3 | recursive-exclude * *.py[co] 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # async-openai 2 | Unofficial Async Python client library for the [OpenAI](https://openai.com) API based on [Documented Specs](https://beta.openai.com/docs/api-reference/making-requests) 3 | 4 | **Latest Version**: [![PyPI version](https://badge.fury.io/py/async-openai.svg)](https://badge.fury.io/py/async-openai) 5 | 6 | **[Official Client](https://github.com/openai/openai-python)** 7 | 8 | ## Features 9 | 10 | - [x] Asyncio based with Sync and Async Support with `httpx` 11 | 12 | - [ ] Supports all API endpoints 13 | 14 | - [x] `Completions`: [Docs](https://beta.openai.com/docs/api-reference/completions) 15 | 16 | - [x] Supports Streaming 17 | 18 | - [x] `Chat`: [Docs](https://beta.openai.com/docs/api-reference/chat) 19 | 20 | - [x] Supports Streaming 21 | 22 | - [x] Supports `Functions` 23 | 24 | - [x] `Edits`: [Docs](https://beta.openai.com/docs/api-reference/edits) 25 | 26 | - [x] `Embeddings`: [Docs](https://beta.openai.com/docs/api-reference/embeddings) 27 | 28 | - [x] `Models`: [Docs](https://beta.openai.com/docs/api-reference/models) 29 | 30 | - [x] Strongly typed validation of requests and responses with `Pydantic` Models with transparent 31 | access to the raw response and object-based results. 32 | 33 | - [x] Handles Retries automatically through `backoff` and custom retry logic. 34 | 35 | - [x] Handles `rate_limit` errors and retries automatically. (when passing `auto_retry = True`) 36 | 37 | - [x] Supports Multiple Clients and Auto-Rotation of Clients 38 | 39 | - [x] Supports `Azure` API 40 | 41 | - [x] Supports Local and Remote Cloud Object Storage File Handling Asyncronously through `file-io` 42 | 43 | - [x] Supports `S3`: `s3://bucket/path/to/file.txt` 44 | 45 | - [x] Supports `GCS`: `gs://bucket/path/to/file.txt` 46 | 47 | - [x] Supports `Minio`: `minio://bucket/path/to/file.txt` 48 | 49 | - [x] Supports `limited` cost tracking for `Completions` and `Edits` requests (when stream is not enabled) 50 | 51 | - [x] Parallelization Safe with ThreadPools or any `asyncio` compatible event loop. Can handle 100s of requests per second. (If you don't run into rate limits) 52 | 53 | 54 | --- 55 | 56 | ## Installation 57 | 58 | ```bash 59 | # Install from stable 60 | pip install async-openai 61 | 62 | # Install from dev/latest 63 | pip install git+https://github.com/GrowthEngineAI/async-openai.git 64 | 65 | ``` 66 | 67 | ### Quick Usage 68 | 69 | ```python 70 | 71 | import asyncio 72 | from async_openai import OpenAI, settings, CompletionResponse 73 | 74 | # Environment variables should pick up the defaults 75 | # however, you can also set them explicitly. See below for more details. 76 | 77 | # `api_key` - Your OpenAI API key. Env: [`OPENAI_API_KEY`] 78 | # `url` - The URL of the OpenAI API. Env: [`OPENAI_URL`] 79 | # `api_type` - The OpenAI API type. Env: [`OPENAI_API_TYPE`] 80 | # `api_version` - The OpenAI API version. Env: [`OPENAI_API_VERSION`] 81 | # `organization` - The OpenAI organization. Env: [`OPENAI_ORGANIZATION`] 82 | # `proxies` - A dictionary of proxies to be used. Env: [`OPENAI_PROXIES`] 83 | # `timeout` - The timeout in seconds to be used. Env: [`OPENAI_TIMEOUT`] 84 | # `max_retries` - The number of retries to be used. Env: [`OPENAI_MAX_RETRIES`] 85 | 86 | OpenAI.configure( 87 | api_key = "sk-XXXX", 88 | organization = "org-XXXX", 89 | debug_enabled = False, 90 | ) 91 | 92 | # Alternatively you can configure the settings through environment variables 93 | # settings.configure( 94 | # api_key = "sk-XXXX", 95 | # organization = "org-XXXX", 96 | # ) 97 | 98 | 99 | # [Sync] create a completion 100 | # Results return a CompletionResult object 101 | result: CompletionResponse = OpenAI.completions.create( 102 | prompt = 'say this is a test', 103 | max_tokens = 4, 104 | stream = True 105 | ) 106 | 107 | # print the completion text 108 | # which are concatenated together from the result['choices'][n]['text'] 109 | 110 | print(result.text) 111 | 112 | # print the number of choices returned 113 | print(len(result)) 114 | 115 | # get the cost consumption for the request 116 | print(result.consumption) 117 | 118 | # [Async] create a completion 119 | # All async methods are generally prefixed with `async_` 120 | 121 | result: CompletionResponse = asyncio.run( 122 | OpenAI.completions.async_create( 123 | prompt = 'say this is a test', 124 | max_tokens = 4, 125 | stream = True 126 | ) 127 | ) 128 | 129 | ``` 130 | 131 | ### Configuration and Environment Variables 132 | 133 | The following environment variables can be used to configure the client. 134 | 135 | ``` 136 | 137 | OpenAI Configuration 138 | 139 | url: The OpenAI API URL | Env: [`OPENAI_API_URL`] 140 | scheme: The OpenAI API Scheme | Env: [`OPENAI_API_SCHEME`] 141 | host: The OpenAI API Host | Env: [`OPENAI_API_HOST`] 142 | port: The OpenAI API Port | Env: [`OPENAI_API_PORT`] 143 | api_base: The OpenAI API Base | Env: [`OPENAI_API_BASE`] 144 | api_key: The OpenAI API Key | Env: [`OPENAI_API_KEY`] 145 | api_path: The OpenAI API Path | Env: [`OPENAI_API_PATH`] 146 | api_type: The OpenAI API Type | Env: [`OPENAI_API_TYPE`] 147 | api_version: The OpenAI API Version | Env: [`OPENAI_API_VERSION`] 148 | api_key_path: The API Key Path | Env: [`OPENAI_API_KEY_PATH`] 149 | organization: Organization | Env: [`OPENAI_ORGANIZATION`] 150 | proxies: The OpenAI Proxies | Env: [`OPENAI_PROXIES`] 151 | timeout: Timeout in Seconds | Env: [`OPENAI_TIMEOUT`] 152 | max_retries: The OpenAI Max Retries | Env: [`OPENAI_MAX_RETRIES`] 153 | ignore_errors: Ignore Errors | Env: [`OPENAI_IGNORE_ERRORS`] 154 | disable_retries: Disable Retries | Env: [`OPENAI_DISABLE_RETRIES`] 155 | max_connections: Max Connections | Env: [`OPENAI_MAX_CONNECTIONS`] 156 | max_keepalive_connections: Max Keepalive Connections | Env: [`OPENAI_MAX_KEEPALIVE_CONNECTIONS`] 157 | keepalive_expiry: Keepalive Expiry | Env: [`OPENAI_KEEPALIVE_EXPIRY`] 158 | custom_headers: Custom Headers | Env: [`OPENAI_CUSTOM_HEADERS`] 159 | 160 | Azure Configuration 161 | 162 | azure_url: The OpenAI API URL | Env: [`AZURE_OPENAI_API_URL`] 163 | azure_scheme: The OpenAI API Scheme | Env: [`AZURE_OPENAI_API_SCHEME`] 164 | azure_host: The OpenAI API Host | Env: [`AZURE_OPENAI_API_HOST`] 165 | azure_port: The OpenAI API Port | Env: [`AZURE_OPENAI_API_PORT`] 166 | azure_api_key: The OpenAI API Key | Env: [`AZURE_OPENAI_API_KEY`] 167 | azure_api_base: The OpenAI API Base | Env: [`AZURE_OPENAI_API_BASE`] 168 | azure_api_path: The OpenAI API Path | Env: [`AZURE_OPENAI_API_PATH`] 169 | azure_api_type: The OpenAI API Type | Env: [`AZURE_OPENAI_API_TYPE`] 170 | azure_api_version: The OpenAI API Version | Env: [`AZURE_OPENAI_API_VERSION`] 171 | azure_api_key_path: The API Key Path | Env: [`AZURE_OPENAI_API_KEY_PATH`] 172 | azure_organization: Organization | Env: [`AZURE_OPENAI_ORGANIZATION`] 173 | azure_proxies: The OpenAI Proxies | Env: [`AZURE_OPENAI_PROXIES`] 174 | azure_timeout: Timeout in Seconds | Env: [`AZURE_OPENAI_TIMEOUT`] 175 | azure_max_retries: The OpenAI Max Retries | Env: [`AZURE_OPENAI_MAX_RETRIES`] 176 | 177 | ``` 178 | 179 | 180 | ### Initialize Clients Manually, and working with multiple clients 181 | 182 | ```python 183 | 184 | from async_openai import OpenAI 185 | 186 | # Configure your primary client (default) 187 | 188 | 189 | OpenAI.configure( 190 | api_key = "sk-XXXX", 191 | organization = "org-XXXX", 192 | debug_enabled = False, 193 | 194 | # Azure Configuration 195 | azure_api_base = 'https://....openai.azure.com/', 196 | azure_api_version = '2023-07-01-preview', 197 | azure_api_key = '....', 198 | ) 199 | 200 | # Returns the default client (openai) 201 | oai = OpenAI.init_api_client() 202 | 203 | # Configure your secondary client (azure) and use it directly 204 | az = OpenAI.init_api_client('az', set_as_default = False, debug_enabled = True) 205 | result = az.completions.create( 206 | prompt = 'say this is a test', 207 | max_tokens = 4, 208 | stream = True 209 | ) 210 | 211 | 212 | # Use the default client (openai) 213 | result = OpenAI.completions.create( 214 | prompt = 'say this is a test', 215 | max_tokens = 4, 216 | stream = True 217 | ) 218 | # Or 219 | result = oai.completions.create( 220 | prompt = 'say this is a test', 221 | max_tokens = 4, 222 | stream = True 223 | ) 224 | 225 | # You can select the different clients by name or index 226 | result = OpenAI['az'].completions.create( 227 | prompt = 'say this is a test', 228 | max_tokens = 4, 229 | stream = True 230 | ) 231 | 232 | # Use the default client (openai) 233 | result = OpenAI['default'].completions.create( 234 | prompt = 'say this is a test', 235 | max_tokens = 4, 236 | stream = True 237 | ) 238 | 239 | # Will use the `default` client since it was initialized first 240 | result = OpenAI[0].completions.create( 241 | prompt = 'say this is a test', 242 | max_tokens = 4, 243 | stream = True 244 | ) 245 | 246 | ``` 247 | 248 | ### Handling Errors, Retries, and Rotations 249 | 250 | The below will show you how to rotate between multiple clients when you hit an error. 251 | 252 | **Important** Auto-rotation is only supported with `chat_create` and `async_chat_create` methods. Otherwise, you should handle the rotation manually. 253 | 254 | ```python 255 | 256 | import asyncio 257 | from async_openai import OpenAI, ChatResponse 258 | from async_openai.utils import logger 259 | 260 | OpenAI.configure( 261 | api_key = "sk-XXXX", 262 | organization = "org-XXXX", 263 | debug_enabled = False, 264 | 265 | # Azure Configuration 266 | azure_api_base = 'https://....openai.azure.com/', 267 | azure_api_version = '2023-07-01-preview', 268 | azure_api_key = '....', 269 | 270 | # This will allow you to auto rotate clients when you hit an error. 271 | # But only if you have multiple clients configured and are using `OpenAI.chat_create` 272 | enable_rotating_clients = True, 273 | 274 | # This will prioritize Azure over OpenAI when using `OpenAI.chat_create` 275 | prioritize = "azure", 276 | ) 277 | 278 | # Display the current client 279 | OpenAI.get_current_client_info(verbose = True) 280 | 281 | # Rotate to the next client 282 | # OpenAI.rotate_client(verbose = True) 283 | 284 | ### 285 | # [Sync] create a completion with auto-rotation and auto-retry 286 | ### 287 | 288 | result: ChatResponse = OpenAI.chat_create( 289 | model = "gpt-3.5-turbo-16k", 290 | messages = [ 291 | {"role": "user", "content": "Translate the following English text to French: “Multiple models, each with different capabilities and price points. Prices are per 1,000 tokens. You can think of tokens as pieces of words, where 1,000 tokens is about 750 words. This paragraph is 35 tokens”"} 292 | ], 293 | auto_retry = True, 294 | 295 | ) 296 | 297 | logger.info(f'Result Chat Message: {result.messages}') 298 | logger.info(f'Result Usage: {result.usage}') 299 | logger.info(f'Result Consumption: {result.consumption}') 300 | 301 | ### 302 | # [Async] create a completion with auto-rotation and auto-retry 303 | ### 304 | 305 | result: ChatResponse = asyncio.run( 306 | OpenAI.async_chat_create( 307 | model = "gpt-3.5-turbo-16k", 308 | messages = [ 309 | {"role": "user", "content": "Translate the following English text to French: “Multiple models, each with different capabilities and price points. Prices are per 1,000 tokens. You can think of tokens as pieces of words, where 1,000 tokens is about 750 words. This paragraph is 35 tokens”"} 310 | ], 311 | auto_retry = True, 312 | ) 313 | ) 314 | 315 | ``` 316 | 317 | ### Function Calls 318 | 319 | The latest version of the API allows for function calls to be made. This is currently only supported in `Chat` and requires api version: `2023-07-01-preview` for `azure`. 320 | 321 | Function calls support using `pydantic` models to auto-generate the schemas 322 | 323 | ```python 324 | 325 | import asyncio 326 | from enum import Enum 327 | from client_rotate import OpenAI 328 | from async_openai.utils import logger 329 | from pydantic import BaseModel, Field 330 | 331 | class Unit(str, Enum): 332 | celsius = "celsius" 333 | fahrenheit = "fahrenheit" 334 | 335 | class Weather(BaseModel): 336 | location: str = Field(..., description="The city and state, e.g. San Francisco, CA.") 337 | unit: Unit = Field(Unit.fahrenheit) 338 | 339 | functions = [ 340 | { 341 | "name": "get_current_weather", 342 | "description": "Get the current weather in a given location", 343 | "parameters": Weather, 344 | } 345 | ] 346 | 347 | result: ChatResponse = OpenAI.chat_create( 348 | model = "gpt-3.5-turbo-16k", 349 | messages = [ 350 | {"role": "user", "content": "What's the weather like in Boston today?"} 351 | ], 352 | functions = functions, 353 | auto_retry = True, 354 | ) 355 | 356 | logger.info(f'Result Chat Message: {result.messages}') 357 | logger.info(f'Result Chat Function: {result.function_results}') 358 | logger.info(f'Result Usage: {result.usage}') 359 | logger.info(f'Result Consumption: {result.consumption}') 360 | 361 | """ 362 | Result: 363 | 364 | > Result Chat Message: [ChatMessage(content='', role='assistant', function_call=FunctionCall(name='get_current_weather', arguments={'location': 'Boston, MA'}), name=None)] 365 | > Result Chat Function: [FunctionCall(name='get_current_weather', arguments={'location': 'Boston, MA'})] 366 | > Result Usage: prompt_tokens=16 completion_tokens=19 total_tokens=35 367 | > Result Consumption: 0.00012399999999999998 368 | """ 369 | 370 | ``` 371 | 372 | ### Configure Azure Model Mapping 373 | 374 | Your azure models may be named differently than the default mapping. By configuring the mapping, you can automatically map the models to the correct azure model (when using openai model names). 375 | 376 | ```python 377 | 378 | from async_openai import OpenAI 379 | 380 | """ 381 | Default Azure Model Mapping 382 | { 383 | 'gpt-3.5-turbo': 'gpt-35-turbo', 384 | 'gpt-3.5-turbo-16k': 'gpt-35-turbo-16k', 385 | 'gpt-3.5-turbo-instruct': 'gpt-35-turbo-instruct', 386 | 'gpt-3.5-turbo-0301': 'gpt-35-turbo-0301', 387 | 'gpt-3.5-turbo-0613': 'gpt-35-turbo-0613', 388 | } 389 | """ 390 | 391 | AzureModelMapping = { 392 | 'gpt-3.5-turbo': 'azure-gpt-35-turbo', 393 | 'gpt-3.5-turbo-16k': 'azure-gpt-35-turbo-16k', 394 | 'gpt-3.5-turbo-instruct': 'azure-gpt-35-turbo-instruct', 395 | 'gpt-3.5-turbo-0301': 'azure-gpt-35-turbo-0301', 396 | 'gpt-3.5-turbo-0613': 'azure-gpt-35-turbo-0613', 397 | } 398 | 399 | OpenAI.configure( 400 | api_key = "sk-XXXX", 401 | organization = "org-XXXX", 402 | debug_enabled = False, 403 | 404 | # Azure Configuration 405 | azure_api_base = 'https://....openai.azure.com/', 406 | azure_api_version = '2023-07-01-preview', 407 | azure_api_key = '....', 408 | azure_model_mapping = AzureModelMapping, 409 | ) 410 | 411 | # This will now use the azure endpoint as the default client 412 | OpenAI.init_api_client('az', set_as_default = True, debug_enabled = True) 413 | 414 | # This will automatically map "gpt-3.5-turbo-16k" -> "azure-gpt-35-turbo-16k" 415 | result: ChatResponse = OpenAI.chat.create( 416 | model = "gpt-3.5-turbo-16k", 417 | messages = [ 418 | {"role": "user", "content": "Translate the following English text to French: “Multiple models, each with different capabilities and price points. Prices are per 1,000 tokens. You can think of tokens as pieces of words, where 1,000 tokens is about 750 words. This paragraph is 35 tokens”"} 419 | ], 420 | auto_retry = True, 421 | ) 422 | 423 | 424 | ``` 425 | 426 | 427 | --- 428 | 429 | ### Dependencies 430 | 431 | The aim of this library is to be as lightweight as possible. It is built on top of the following libraries: 432 | 433 | - [aiohttpx](https://github.com/GrowthEngineAI/aiohttpx): Unified Async / Sync HTTP Client that wraps around `httpx` 434 | 435 | - [httpx](https://www.python-httpx.org/): Async / Sync HTTP Requests 436 | 437 | - [lazyops](https://github.com/trisongz/lazyops): Provides numerous utility functions for working with Async / Sync code and data structures 438 | 439 | - [pydantic](https://pydantic-docs.helpmanual.io/): Type Support 440 | 441 | - [file-io](https://github.com/trisongz/file-io): Async Cloud-based File Storage I/O 442 | 443 | - [backoff](https://github.com/litl/backoff): Retries with Exponential Backoff 444 | 445 | 446 | -------------------------------------------------------------------------------- /async_openai/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from async_openai.types import errors 4 | from async_openai.utils.config import OpenAISettings, get_settings 5 | from async_openai.types.errors import ( 6 | OpenAIError, 7 | APIError, 8 | AuthenticationError, 9 | InvalidRequestError, 10 | RateLimitError, 11 | APIConnectionError, 12 | Timeout, 13 | TryAgain, 14 | ServiceUnavailableError, 15 | ) 16 | 17 | from async_openai.types.options import ( 18 | ApiType, 19 | CompletionModels, 20 | FilePurpose, 21 | FinetuneModels, 22 | ImageSize, 23 | ImageFormat, 24 | ) 25 | 26 | ## Base Object Models 27 | from async_openai.schemas.completions import CompletionChoice, CompletionObject, CompletionResponse 28 | from async_openai.schemas.chat import ChatMessage, ChatChoice, ChatObject, ChatResponse 29 | from async_openai.schemas.edits import EditChoice, EditObject, EditResponse 30 | from async_openai.schemas.embeddings import EmbeddingData, EmbeddingObject, EmbeddingResponse 31 | # from async_openai.schemas.files import FileChoice, FileObject, FileResponse 32 | from async_openai.schemas.images import ImageData, ImageObject, ImageResponse 33 | from async_openai.schemas.models import ModelData, ModelObject, ModelResponse 34 | 35 | 36 | ## Route Models 37 | from async_openai.schemas.completions import CompletionRoute 38 | from async_openai.schemas.chat import ChatRoute 39 | from async_openai.schemas.edits import EditRoute 40 | from async_openai.schemas.embeddings import EmbeddingRoute 41 | # from async_openai.schemas.files import FileRoute 42 | from async_openai.schemas.images import ImageRoute 43 | from async_openai.schemas.models import ModelRoute 44 | 45 | 46 | 47 | from async_openai.routes import ApiRoutes 48 | from async_openai.client import OpenAIClient, OpenAI, OpenAIManager 49 | 50 | 51 | 52 | # Completions = OpenAI.completions 53 | # Edits = OpenAI.edits 54 | # Embeddings = OpenAI.embeddings 55 | # # Files = OpenAI.files 56 | # Images = OpenAI.images 57 | # Models = OpenAI.models 58 | -------------------------------------------------------------------------------- /async_openai/client.py: -------------------------------------------------------------------------------- 1 | import aiohttpx 2 | import contextlib 3 | from typing import Optional, Callable, Dict, Union, List 4 | from lazyops.utils.helpers import timed_cache 5 | from async_openai.schemas import * 6 | from async_openai.types.options import ApiType 7 | from async_openai.utils.logs import logger 8 | from async_openai.utils.config import get_settings, OpenAISettings, AzureOpenAISettings, OpenAIAuth, ProxyObject 9 | from async_openai.routes import ApiRoutes 10 | from async_openai.meta import OpenAIMetaClass 11 | from async_openai.manager import OpenAIManager as OpenAISessionManager 12 | 13 | _update_params = [ 14 | 'url', 15 | 'scheme', 16 | 'host', 17 | 'port', 18 | 'api_path', 19 | 'api_base', 20 | 'api_key', 21 | 'api_type', 22 | 'api_version', 23 | 'organization', 24 | 'proxies', 25 | 'app_info', 26 | 27 | ] 28 | 29 | class OpenAIClient: 30 | """ 31 | Main Client for all the routes in the API. 32 | """ 33 | 34 | api_key: Optional[str] = None 35 | url: Optional[str] = None 36 | scheme: Optional[str] = None 37 | host: Optional[str] = None 38 | port: Optional[int] = None 39 | api_base: Optional[str] = None 40 | api_path: Optional[str] = None 41 | api_type: Optional[ApiType] = None 42 | api_version: Optional[str] = None 43 | organization: Optional[str] = None 44 | proxies: Optional[Union[str, Dict]] = None 45 | app_info: Optional[Dict[str, str]] = None 46 | 47 | headers: Optional[Dict] = None 48 | debug_enabled: Optional[bool] = None 49 | on_error: Optional[Callable] = None 50 | timeout: Optional[int] = None 51 | max_retries: Optional[int] = None 52 | ignore_errors: Optional[bool] = None 53 | disable_retries: Optional[bool] = None 54 | retry_function: Optional[Callable] = None 55 | 56 | api_url: Optional[str] = None 57 | base_url: Optional[str] = None 58 | 59 | settings: Optional[OpenAISettings] = None 60 | name: Optional[str] = None 61 | is_azure: Optional[bool] = None 62 | azure_model_mapping: Optional[Dict[str, str]] = None 63 | 64 | auth: Optional[OpenAIAuth] = None 65 | _client: Optional[aiohttpx.Client] = None 66 | _routes: Optional[ApiRoutes] = None 67 | _kwargs: Optional[Dict] = None 68 | 69 | def __init__( 70 | self, 71 | **kwargs 72 | ): 73 | """ 74 | Lazily Instantiates the OpenAI Client 75 | """ 76 | self.model_rate_limits: Dict[str, Dict[str, int]] = {} 77 | self.client_callbacks: List[Callable] = [] 78 | self.configure_params(**kwargs) 79 | 80 | def response_event_hook(self, response: aiohttpx.Response): 81 | """ 82 | Monitor the rate limits 83 | """ 84 | url = response.url 85 | headers = response.headers 86 | with contextlib.suppress(Exception): 87 | if self.is_azure: 88 | model_name = str(url).split('deployments/', 1)[-1].split('/', 1)[0].strip() 89 | else: 90 | model_name = headers.get('openai-model') 91 | model_name = model_name.lstrip("https:").strip() 92 | if not model_name: return 93 | if model_name not in self.model_rate_limits: 94 | self.model_rate_limits[model_name] = {} 95 | for key, value in { 96 | 'x-ratelimit-remaining-requests': 'remaining', 97 | 'x-ratelimit-remaining-tokens': 'remaining_tokens', 98 | 'x-ratelimit-limit-tokens': 'limit_tokens', 99 | 'x-ratelimit-limit-requests': 'limit_requests', 100 | }.items(): 101 | if key in headers: 102 | self.model_rate_limits[model_name][value] = int(headers[key]) 103 | if self.debug_enabled: 104 | logger.info(f"Rate Limits: {self.model_rate_limits}") 105 | 106 | async def aresponse_event_hook(self, response: aiohttpx.Response): 107 | """ 108 | Monitor the rate limits 109 | """ 110 | return self.response_event_hook(response) 111 | 112 | @property 113 | def client(self) -> aiohttpx.Client: 114 | """ 115 | Returns the aiohttpx client 116 | """ 117 | if self._client is None: 118 | self.configure_client() 119 | return self._client 120 | 121 | @property 122 | def routes(self) -> ApiRoutes: 123 | """ 124 | Returns the routes class 125 | """ 126 | if self._routes is None: 127 | self.configure_routes() 128 | return self._routes 129 | 130 | def configure_params( 131 | self, 132 | api_key: Optional[str] = None, 133 | url: Optional[str] = None, 134 | scheme: Optional[str] = None, 135 | host: Optional[str] = None, 136 | port: Optional[int] = None, 137 | api_base: Optional[str] = None, 138 | api_path: Optional[str] = None, 139 | api_type: Optional[ApiType] = None, 140 | api_version: Optional[str] = None, 141 | organization: Optional[str] = None, 142 | proxies: Optional[Union[str, Dict]] = None, 143 | app_info: Optional[Dict[str, str]] = None, 144 | 145 | headers: Optional[Dict] = None, 146 | debug_enabled: Optional[bool] = None, 147 | on_error: Optional[Callable] = None, 148 | timeout: Optional[int] = None, 149 | max_retries: Optional[int] = None, 150 | ignore_errors: Optional[bool] = None, 151 | disable_retries: Optional[bool] = None, 152 | retry_function: Optional[Callable] = None, 153 | 154 | settings: Optional[OpenAISettings] = None, 155 | name: Optional[str] = None, 156 | is_azure: Optional[bool] = None, 157 | azure_model_mapping: Optional[Dict[str, str]] = None, 158 | auth: Optional[OpenAIAuth] = None, 159 | client_callbacks: Optional[List[Callable]] = None, 160 | **kwargs 161 | ): # sourcery skip: low-code-quality 162 | """ 163 | Helper to configure the client 164 | """ 165 | if self.settings is None and settings is None: 166 | settings = get_settings() 167 | if settings is not None: 168 | self.settings = settings.azure if is_azure else settings 169 | if api_key is not None: 170 | self.api_key = api_key 171 | elif self.api_key is None: 172 | self.api_key = self.settings.api_key 173 | if api_type is not None: 174 | self.api_type = api_type 175 | elif self.api_type is None: 176 | self.api_type = self.settings.api_type 177 | if organization is not None: 178 | self.organization = organization 179 | elif self.organization is None: 180 | self.organization = self.settings.organization 181 | if proxies is not None: 182 | self.proxies = proxies 183 | elif self.proxies is None: 184 | self.proxies = self.settings.proxies 185 | if app_info is not None: 186 | self.app_info = app_info 187 | elif self.app_info is None: 188 | self.app_info = self.settings.app_info 189 | if any( 190 | [ 191 | url is not None, 192 | scheme is not None, 193 | host is not None, 194 | port is not None, 195 | api_base is not None, 196 | self.api_url is None, 197 | ] 198 | ): 199 | self.api_url = self.settings.get_api_url(host = host, port = port, scheme = scheme, url = url, api_base = api_base) 200 | if any( 201 | [ 202 | url is not None, 203 | scheme is not None, 204 | host is not None, 205 | port is not None, 206 | api_path is not None, 207 | api_base is not None, 208 | self.base_url is None, 209 | ] 210 | ): 211 | self.base_url = self.settings.get_base_api_url(host = host, port = port, scheme = scheme, url = url, api_path = api_path, api_base = api_base) 212 | 213 | if debug_enabled is not None: 214 | self.debug_enabled = debug_enabled 215 | elif self.debug_enabled is None: 216 | self.debug_enabled = self.settings.debug_enabled 217 | 218 | if timeout is not None: 219 | self.timeout = timeout 220 | elif self.timeout is None: 221 | self.timeout = self.settings.timeout 222 | 223 | if headers is not None: 224 | self.headers = headers 225 | else: 226 | self.headers = self.settings.get_headers(api_version = self.api_version, api_type = self.api_type, organization = self.organization, app_info = self.app_info) 227 | # self.headers = self.settings.get_headers(api_key = self.api_key, api_version = self.api_version, api_type = self.api_type, organization = self.organization, app_info = self.app_info) 228 | 229 | if on_error is not None: 230 | self.on_error = on_error 231 | if ignore_errors is not None: 232 | self.ignore_errors = ignore_errors 233 | elif self.ignore_errors is None: 234 | self.ignore_errors = self.settings.ignore_errors 235 | if max_retries is not None: 236 | self.max_retries = max_retries 237 | elif self.max_retries is None: 238 | self.max_retries = self.settings.max_retries 239 | if disable_retries is not None: 240 | self.disable_retries = disable_retries 241 | elif self.disable_retries is None: 242 | self.disable_retries = self.settings.disable_retries 243 | 244 | if retry_function is not None: 245 | self.retry_function = retry_function 246 | 247 | if is_azure is not None: 248 | self.is_azure = is_azure 249 | elif self.is_azure is None: 250 | self.is_azure = isinstance(self.settings, AzureOpenAISettings) 251 | if azure_model_mapping is not None: 252 | self.azure_model_mapping = azure_model_mapping 253 | if name is not None: 254 | self.name = name 255 | elif self.name is None: 256 | self.name = 'default' 257 | if api_version is not None: 258 | self.api_version = api_version 259 | elif self.api_version is None: 260 | self.api_version = self.settings.api_version 261 | 262 | 263 | if auth is not None: 264 | self.auth = auth 265 | elif self.auth is None: 266 | self.auth = self.settings.get_api_client_auth(api_key = self.api_key, api_type = self.api_type) 267 | 268 | if kwargs: self._kwargs = kwargs 269 | self.log_method = logger.info if self.debug_enabled else logger.debug 270 | if not self.debug_enabled: 271 | self.settings.disable_httpx_logger() 272 | 273 | if client_callbacks is not None: 274 | self.client_callbacks = client_callbacks 275 | # if self.debug_enabled: 276 | # logger.info(f"OpenAI Client Configured: {self.client.base_url}") 277 | # logger.debug(f"Debug Enabled: {self.debug_enabled}") 278 | 279 | def configure_client(self, **kwargs): 280 | """ 281 | Helper to configure the client 282 | """ 283 | if self._client is not None: return 284 | # logger.info(f"OpenAI Client Configured: {self.base_url} [{self.name}]") 285 | extra_kwargs = {} 286 | if self.settings.limit_monitor_enabled: 287 | extra_kwargs['event_hooks'] = {'response': [self.response_event_hook]} 288 | extra_kwargs['async_event_hooks'] = {'response': [self.aresponse_event_hook]} 289 | 290 | self._client = aiohttpx.Client( 291 | base_url = self.base_url, 292 | timeout = self.timeout, 293 | limits = self.settings.api_client_limits, 294 | auth = self.auth, 295 | headers = self.headers, 296 | **extra_kwargs, 297 | ) 298 | 299 | def configure_routes(self, **kwargs): 300 | """ 301 | Helper to configure the client routes 302 | """ 303 | if self._routes is not None: return 304 | kwargs = kwargs or {} 305 | if self._kwargs: kwargs.update(self._kwargs) 306 | self._routes = ApiRoutes( 307 | client = self.client, 308 | name = self.name, 309 | # headers = self.headers, 310 | debug_enabled = self.debug_enabled, 311 | on_error = self.on_error, 312 | ignore_errors = self.ignore_errors, 313 | timeout = self.timeout, 314 | max_retries = self.max_retries, 315 | settings = self.settings, 316 | is_azure = self.is_azure, 317 | azure_model_mapping = self.azure_model_mapping, 318 | disable_retries = self.disable_retries, 319 | retry_function = self.retry_function, 320 | client_callbacks = self.client_callbacks, 321 | **kwargs 322 | ) 323 | if self.debug_enabled: 324 | logger.info(f"[{self.name}] OpenAI Client Configured: {self.client.base_url} [Azure: {self.is_azure}]") 325 | logger.debug(f"Debug Enabled: {self.debug_enabled}") 326 | 327 | 328 | def reset( 329 | self, 330 | **kwargs 331 | ): 332 | """ 333 | Resets the client to the default settings 334 | """ 335 | self._client = None 336 | self._routes = None 337 | self.configure_params(**kwargs) 338 | 339 | 340 | @property 341 | def completions(self) -> CompletionRoute: 342 | """ 343 | Returns the `CompletionRoute` class for interacting with `Completions`. 344 | 345 | Doc: `https://beta.openai.com/docs/api-reference/completions` 346 | """ 347 | return self.routes.completions 348 | 349 | @property 350 | def chat(self) -> ChatRoute: 351 | """ 352 | Returns the `ChatRoute` class for interacting with `Chat` components 353 | 354 | Doc: `https://platform.openai.com/docs/api-reference/chat` 355 | """ 356 | return self.routes.chat 357 | 358 | @property 359 | def edits(self) -> EditRoute: 360 | """ 361 | Returns the `EditRoute` class for interacting with `Edits`. 362 | 363 | Doc: `https://beta.openai.com/docs/api-reference/edits` 364 | """ 365 | return self.routes.edits 366 | 367 | @property 368 | def embeddings(self) -> EmbeddingRoute: 369 | """ 370 | Returns the `EmbeddingRoute` class for interacting with `Embeddings`. 371 | 372 | Doc: `https://beta.openai.com/docs/api-reference/embeddings` 373 | """ 374 | return self.routes.embeddings 375 | 376 | @property 377 | def images(self) -> ImageRoute: 378 | """ 379 | Returns the `ImageRoute` class for interacting with `Images`. 380 | 381 | Doc: `https://beta.openai.com/docs/api-reference/images` 382 | """ 383 | return self.routes.images 384 | 385 | @property 386 | def models(self) -> ModelRoute: 387 | """ 388 | Returns the `ModelRoute` class for interacting with `models`. 389 | 390 | Doc: `https://beta.openai.com/docs/api-reference/models` 391 | """ 392 | return self.routes.models 393 | 394 | """ 395 | Context Managers 396 | """ 397 | 398 | async def async_close(self): 399 | await self.client.aclose() 400 | 401 | def close(self): 402 | self.client.close() 403 | 404 | def __enter__(self): 405 | return self 406 | 407 | def __exit__(self, exc_type, exc_value, traceback): 408 | self.close() 409 | 410 | async def __aenter__(self): 411 | return self 412 | 413 | async def __aexit__(self, exc_type, exc_value, traceback): 414 | await self.async_close() 415 | 416 | @timed_cache(secs = 120, cache_if_result = True) 417 | def ping(self, timeout: Optional[float] = 1.0, base_url: Optional[str] = None) -> bool: 418 | """ 419 | Pings the API Endpoint to check if it's alive. 420 | """ 421 | try: 422 | # with contextlib.suppress(Exception): 423 | response = self.client.get(base_url or '/', timeout = timeout) 424 | try: 425 | data = response.json() 426 | # we should expect a 404 with a json response 427 | # if self.debug_enabled: logger.info(f"API Ping: {data}\n{response.headers}") 428 | if data.get('error'): return True 429 | except Exception as e: 430 | logger.error(f"[{self.name} - {response.status_code}] API Ping Failed: {response.text[:500]}") 431 | except Exception as e: 432 | logger.error(f"[{self.name}] API Ping Failed: {e}") 433 | return False 434 | 435 | @timed_cache(secs = 120, cache_if_result = True) 436 | async def aping(self, timeout: Optional[float] = 1.0, base_url: Optional[str] = None) -> bool: 437 | """ 438 | Pings the API Endpoint to check if it's alive. 439 | """ 440 | try: 441 | response = await self.client.async_get(base_url or '/', timeout = timeout) 442 | try: 443 | data = response.json() 444 | # we should expect a 404 with a json response 445 | if data.get('error'): return True 446 | except Exception as e: 447 | logger.error(f"[{self.name} - {response.status_code}] API Ping Failed: {response.text[:500]}") 448 | except Exception as e: 449 | logger.error(f"[{self.name}] API Ping Failed: {e}") 450 | return False 451 | 452 | 453 | class OpenAI(metaclass = OpenAIMetaClass): 454 | """ 455 | [V1] Interface for OpenAI 456 | 457 | Deprecating this class in future versions 458 | """ 459 | pass 460 | 461 | OpenAIManager: OpenAISessionManager = ProxyObject(OpenAISessionManager) 462 | 463 | 464 | 465 | -------------------------------------------------------------------------------- /async_openai/external_client.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | """ 4 | OpenAI Client that supports external providers and configurations 5 | that have OpenAI-compatible endpoints. 6 | """ 7 | 8 | import abc 9 | import aiohttpx 10 | import contextlib 11 | from typing import Optional, Callable, Dict, Union, List 12 | 13 | from async_openai.schemas import * 14 | from async_openai.utils.config import get_settings, OpenAISettings 15 | from async_openai.utils.logs import logger 16 | from async_openai.utils.config import ProxyObject 17 | from async_openai.utils.external_config import ExternalProviderSettings, ExternalProviderAuth 18 | from async_openai.routes import ApiRoutes 19 | 20 | 21 | class ExternalOpenAIClient(abc.ABC): 22 | """ 23 | External Client for all the routes in the API. 24 | """ 25 | 26 | is_azure: bool = False 27 | 28 | _client: Optional[aiohttpx.Client] = None 29 | _routes: Optional[ApiRoutes] = None 30 | _kwargs: Optional[Dict] = None 31 | 32 | def __init__( 33 | self, 34 | name: str, 35 | provider: ExternalProviderSettings, 36 | is_proxied: Optional[bool] = None, 37 | **kwargs 38 | ): 39 | """ 40 | Lazily Instantiates the OpenAI Client 41 | """ 42 | self.name = name 43 | self.provider = provider 44 | self.debug_enabled: Optional[bool] = None 45 | self.on_error: Optional[Callable] = None 46 | self.timeout: Optional[int] = None 47 | self.max_retries: Optional[int] = None 48 | self.ignore_errors: Optional[bool] = None 49 | self.disable_retries: Optional[bool] = None 50 | self.retry_function: Optional[Callable] = None 51 | 52 | self.is_proxied = is_proxied if is_proxied is not None else \ 53 | (self.provider.config.has_proxy and '_noproxy' not in self.name) 54 | # logger.info(f"External Provider Configured: {self.name} [Proxied: {self.is_proxied}]") 55 | 56 | self.settings: Optional[OpenAISettings] = kwargs.pop('settings', get_settings()) 57 | self.client_callbacks: List[Callable] = [] 58 | self.auth: Optional[ExternalProviderAuth] = None 59 | self.configure_params(**kwargs) 60 | 61 | 62 | @property 63 | def client(self) -> aiohttpx.Client: 64 | """ 65 | Returns the aiohttpx client 66 | """ 67 | if self._client is None: 68 | self.configure_client() 69 | return self._client 70 | 71 | @property 72 | def routes(self) -> ApiRoutes: 73 | """ 74 | Returns the routes class 75 | """ 76 | if self._routes is None: 77 | self.configure_routes() 78 | return self._routes 79 | 80 | def configure_params( 81 | self, 82 | debug_enabled: Optional[bool] = None, 83 | on_error: Optional[Callable] = None, 84 | timeout: Optional[int] = None, 85 | max_retries: Optional[int] = None, 86 | ignore_errors: Optional[bool] = None, 87 | disable_retries: Optional[bool] = None, 88 | retry_function: Optional[Callable] = None, 89 | auth: Optional[ExternalProviderAuth] = None, 90 | client_callbacks: Optional[List[Callable]] = None, 91 | **kwargs 92 | ): # sourcery skip: low-code-quality 93 | """ 94 | Helper to configure the client 95 | """ 96 | 97 | if debug_enabled is not None: 98 | self.debug_enabled = debug_enabled 99 | elif self.debug_enabled is None: 100 | self.debug_enabled = self.settings.debug_enabled 101 | 102 | if timeout is not None: 103 | self.timeout = timeout 104 | elif self.timeout is None: 105 | self.timeout = self.settings.timeout 106 | 107 | if on_error is not None: 108 | self.on_error = on_error 109 | if ignore_errors is not None: 110 | self.ignore_errors = ignore_errors 111 | elif self.ignore_errors is None: 112 | self.ignore_errors = self.settings.ignore_errors 113 | if max_retries is not None: 114 | self.max_retries = max_retries 115 | elif self.max_retries is None: 116 | if self.provider.config.max_retries is not None: 117 | self.max_retries = self.provider.config.max_retries 118 | else: 119 | self.max_retries = self.settings.max_retries 120 | if disable_retries is not None: 121 | self.disable_retries = disable_retries 122 | elif self.disable_retries is None: 123 | self.disable_retries = self.settings.disable_retries 124 | if retry_function is not None: 125 | self.retry_function = retry_function 126 | 127 | if auth is not None: 128 | self.auth = auth 129 | elif self.auth is None: 130 | self.auth = ExternalProviderAuth(config = self.provider.config, is_proxied = self.is_proxied) 131 | 132 | if kwargs: self._kwargs = kwargs 133 | self.log_method = logger.info if self.debug_enabled else logger.debug 134 | if not self.debug_enabled: 135 | self.settings.disable_httpx_logger() 136 | 137 | if client_callbacks is not None: 138 | self.client_callbacks = client_callbacks 139 | 140 | def configure_client(self, **kwargs): 141 | """ 142 | Helper to configure the client 143 | """ 144 | if self._client is not None: return 145 | # logger.info(f"OpenAI Client Configured: {self.base_url} [{self.name}]") 146 | extra_kwargs = {} 147 | 148 | self._client = aiohttpx.Client( 149 | base_url = self.provider.config.proxy_url if self.is_proxied else self.provider.config.api_url, 150 | timeout = self.timeout, 151 | limits = self.settings.api_client_limits, 152 | auth = self.auth, 153 | headers = { 154 | 'content-type': 'application/json', 155 | }, 156 | **extra_kwargs, 157 | ) 158 | # logger.info(f"External Configured: {self._client.base_url} [{self.name}]") 159 | 160 | def configure_routes(self, **kwargs): 161 | """ 162 | Helper to configure the client routes 163 | """ 164 | if self._routes is not None: return 165 | kwargs = kwargs or {} 166 | if self._kwargs: kwargs.update(self._kwargs) 167 | self._routes = ApiRoutes( 168 | client = self.client, 169 | name = self.provider.name, 170 | # headers = self.headers, 171 | debug_enabled = self.debug_enabled, 172 | on_error = self.on_error, 173 | ignore_errors = self.ignore_errors, 174 | timeout = self.timeout, 175 | max_retries = self.max_retries, 176 | settings = self.settings, 177 | disable_retries = self.disable_retries, 178 | retry_function = self.retry_function, 179 | client_callbacks = self.client_callbacks, 180 | route_classes = self.provider.routes.api_route_classes, 181 | is_azure = False, 182 | **kwargs 183 | ) 184 | if self.debug_enabled: 185 | logger.info(f"[{self.name}] External Provider Configured: {self.client.base_url}") 186 | logger.debug(f"Debug Enabled: {self.debug_enabled}") 187 | 188 | 189 | def reset( 190 | self, 191 | **kwargs 192 | ): 193 | """ 194 | Resets the client to the default settings 195 | """ 196 | self._client = None 197 | self._routes = None 198 | self.configure_params(**kwargs) 199 | 200 | 201 | @property 202 | def completions(self) -> CompletionRoute: 203 | """ 204 | Returns the `CompletionRoute` class for interacting with `Completions`. 205 | 206 | Doc: `https://beta.openai.com/docs/api-reference/completions` 207 | """ 208 | return self.routes.completions 209 | 210 | @property 211 | def chat(self) -> ChatRoute: 212 | """ 213 | Returns the `ChatRoute` class for interacting with `Chat` components 214 | 215 | Doc: `https://platform.openai.com/docs/api-reference/chat` 216 | """ 217 | return self.routes.chat 218 | 219 | @property 220 | def edits(self) -> EditRoute: 221 | """ 222 | Returns the `EditRoute` class for interacting with `Edits`. 223 | 224 | Doc: `https://beta.openai.com/docs/api-reference/edits` 225 | """ 226 | return self.routes.edits 227 | 228 | @property 229 | def embeddings(self) -> EmbeddingRoute: 230 | """ 231 | Returns the `EmbeddingRoute` class for interacting with `Embeddings`. 232 | 233 | Doc: `https://beta.openai.com/docs/api-reference/embeddings` 234 | """ 235 | return self.routes.embeddings 236 | 237 | @property 238 | def images(self) -> ImageRoute: 239 | """ 240 | Returns the `ImageRoute` class for interacting with `Images`. 241 | 242 | Doc: `https://beta.openai.com/docs/api-reference/images` 243 | """ 244 | return self.routes.images 245 | 246 | @property 247 | def models(self) -> ModelRoute: 248 | """ 249 | Returns the `ModelRoute` class for interacting with `models`. 250 | 251 | Doc: `https://beta.openai.com/docs/api-reference/models` 252 | """ 253 | return self.routes.models 254 | 255 | """ 256 | Context Managers 257 | """ 258 | 259 | async def async_close(self): 260 | await self.client.aclose() 261 | 262 | def close(self): 263 | self.client.close() 264 | 265 | def __enter__(self): 266 | return self 267 | 268 | def __exit__(self, exc_type, exc_value, traceback): 269 | self.close() 270 | 271 | async def __aenter__(self): 272 | return self 273 | 274 | async def __aexit__(self, exc_type, exc_value, traceback): 275 | await self.async_close() 276 | 277 | 278 | def ping(self, timeout: Optional[float] = 1.0) -> bool: 279 | """ 280 | Pings the API Endpoint to check if it's alive. 281 | """ 282 | try: 283 | # with contextlib.suppress(Exception): 284 | response = self.client.get('/', timeout = timeout) 285 | data = response.json() 286 | # we should expect a 404 with a json response 287 | # if self.debug_enabled: logger.info(f"API Ping: {data}\n{response.headers}") 288 | if data.get('error'): return True 289 | except Exception as e: 290 | logger.error(f"API Ping Failed: {e}") 291 | return False 292 | 293 | async def aping(self, timeout: Optional[float] = 1.0) -> bool: 294 | """ 295 | Pings the API Endpoint to check if it's alive. 296 | """ 297 | try: 298 | response = await self.client.async_get('/', timeout = timeout) 299 | data = response.json() 300 | # we should expect a 404 with a json response 301 | if data.get('error'): return True 302 | except Exception as e: 303 | logger.error(f"[{self.name}] API Ping Failed: {e}") 304 | return False 305 | 306 | 307 | -------------------------------------------------------------------------------- /async_openai/loadbalancer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Client LoadBalancer 3 | """ 4 | 5 | from __future__ import annotations 6 | 7 | import random 8 | from typing import Optional, List, Dict, Union, TYPE_CHECKING 9 | 10 | from async_openai.schemas import * 11 | from async_openai.utils.config import get_settings, OpenAISettings 12 | from async_openai.utils.logs import logger 13 | 14 | if TYPE_CHECKING: 15 | from async_openai.client import OpenAIClient, OpenAISessionManager 16 | from async_openai.external_client import ExternalOpenAIClient, ExternalProviderSettings 17 | 18 | 19 | class ClientLoadBalancer: 20 | """ 21 | Manages a set of clients that can be rotated. 22 | """ 23 | def __init__( 24 | self, 25 | prioritize: Optional[str] = None, 26 | settings: Optional[OpenAISettings] = None, 27 | azure_model_mapping: Optional[Dict[str, str]] = None, 28 | healthcheck: Optional[bool] = True, 29 | manager: Optional['OpenAISessionManager'] = None, 30 | ): 31 | self.settings = settings or get_settings() 32 | self.clients: Dict[str, 'OpenAIClient'] = {} 33 | self.rotate_index: int = 0 34 | self.rotate_client_names: List[str] = [] 35 | self.azure_model_mapping: Dict[str, str] = azure_model_mapping 36 | self.healthcheck: bool = healthcheck 37 | self.manager: Optional['OpenAISessionManager'] = manager 38 | 39 | assert prioritize in [None, 'azure', 'openai'], f'Invalid `prioritize` value: {prioritize}' 40 | self.prioritize: Optional[str] = prioritize 41 | 42 | @property 43 | def client_names(self) -> List[str]: 44 | """ 45 | Returns the list of client names. 46 | """ 47 | return list(self.clients.keys()) 48 | 49 | def run_client_init(self): 50 | """ 51 | Initializes the Client. 52 | 53 | Can be subclassed to provide custom initialization. 54 | """ 55 | self.init_api_client() 56 | if self.settings.has_valid_azure: 57 | self.init_api_client(client_name = 'az', is_azure = True, set_as_default = self.prioritize == 'azure', set_as_current = self.prioritize == 'azure') 58 | 59 | 60 | @property 61 | def api(self) -> 'OpenAIClient': 62 | """ 63 | Returns the inherited OpenAI client. 64 | """ 65 | if not self.clients: 66 | self.run_client_init() 67 | if not self.rotate_client_names or self.rotate_index < len(self.client_names): 68 | return self.clients[self.client_names[self.rotate_index]] 69 | try: 70 | return self.clients[self.rotate_client_names[self.rotate_index]] 71 | except IndexError as e: 72 | logger.error(f'Index Error: {self.rotate_index} - {self.rotate_client_names}') 73 | raise IndexError(f'Index Error: {self.rotate_index} - {self.rotate_client_names} - {self.client_names} ({len(self.clients)})') from e 74 | 75 | def increase_rotate_index(self): 76 | """ 77 | Increases the rotate index 78 | """ 79 | if self.rotate_index >= len(self.clients) - 1: 80 | self.rotate_index = 0 81 | else: 82 | self.rotate_index += 1 83 | 84 | def rotate_client(self, index: Optional[int] = None, require_azure: Optional[bool] = None, verbose: Optional[bool] = False): 85 | """ 86 | Rotates the clients 87 | """ 88 | if index is not None: 89 | self.rotate_index = index 90 | return 91 | self.increase_rotate_index() 92 | if require_azure: 93 | while not self.api.is_azure: 94 | self.increase_rotate_index() 95 | if verbose: 96 | logger.info(f'Rotated Client: {self.api.name} (Azure: {self.api.is_azure} - {self.api.api_version}) [{self.rotate_index+1}/{len(self.clients)}]') 97 | 98 | def set_client(self, client_name: Optional[str] = None, verbose: Optional[bool] = False): 99 | """ 100 | Sets the client 101 | """ 102 | if client_name is None: 103 | raise ValueError('`client_name` is required.') 104 | if client_name not in self.clients: 105 | raise ValueError(f'Client `{client_name}` does not exist.') 106 | self.rotate_index = self.client_names.index(client_name) 107 | if verbose: 108 | logger.info(f'Set Client: {self.api.name} (Azure: {self.api.is_azure} - {self.api.api_version})) [{self.rotate_index+1}/{len(self.clients)}]') 109 | 110 | def current_client_info(self, verbose: Optional[bool] = False) -> Dict[str, Union[str, int]]: 111 | """ 112 | Returns the current client info 113 | """ 114 | data = { 115 | 'name': self.api.name, 116 | 'is_azure': self.api.is_azure, 117 | 'api_version': self.api.api_version, 118 | 'index': self.rotate_index, 119 | 'total': len(self.clients), 120 | } 121 | if verbose: 122 | logger.info(f'Current Client: {self.api.name} (Azure: {self.api.is_azure} - {self.api.api_version}) [{self.rotate_index+1}/{len(self.clients)}]') 123 | return data 124 | 125 | 126 | def configure_client(self, client_name: Optional[str] = None, priority: Optional[int] = None, **kwargs): 127 | """ 128 | Configure a new client 129 | """ 130 | client_name = client_name or 'default' 131 | if client_name not in self.clients: 132 | raise ValueError(f'Client `{client_name}` does not exist.') 133 | self.clients[client_name].reset(**kwargs) 134 | if priority is not None: 135 | if client_name in self.rotate_client_names: 136 | self.rotate_client_names.remove(client_name) 137 | self.rotate_client_names.insert(priority, client_name) 138 | 139 | def init_api_client( 140 | self, 141 | client_name: Optional[str] = None, 142 | set_as_default: Optional[bool] = False, 143 | is_azure: Optional[bool] = None, 144 | priority: Optional[int] = None, 145 | set_as_current: Optional[bool] = False, 146 | **kwargs 147 | ) -> 'OpenAIClient': 148 | """ 149 | Creates a new OpenAI client. 150 | """ 151 | client_name = client_name or 'default' 152 | if client_name in self.clients: 153 | return self.clients[client_name] 154 | 155 | from async_openai.client import OpenAIClient 156 | if is_azure is None and \ 157 | ( 158 | 'az' in client_name and self.settings.has_valid_azure 159 | ): 160 | is_azure = True 161 | if 'client_callbacks' not in kwargs and \ 162 | self.manager and \ 163 | self.manager.client_callbacks: 164 | kwargs['client_callbacks'] = self.manager.client_callbacks 165 | client = OpenAIClient( 166 | name = client_name, 167 | settings = self.settings, 168 | is_azure = is_azure, 169 | azure_model_mapping = self.azure_model_mapping, 170 | **kwargs 171 | ) 172 | self.clients[client_name] = client 173 | if set_as_default: 174 | self.rotate_client_names.insert(0, client_name) 175 | elif priority is not None: 176 | if client_name in self.rotate_client_names: 177 | self.rotate_client_names.remove(client_name) 178 | self.rotate_client_names.insert(priority, client_name) 179 | elif self.prioritize: 180 | if ( 181 | self.prioritize == 'azure' 182 | and is_azure 183 | or self.prioritize != 'azure' 184 | and self.prioritize == 'openai' 185 | and not is_azure 186 | ): 187 | self.rotate_client_names.insert(0, client_name) 188 | elif self.prioritize in ['azure', 'openai']: 189 | self.rotate_client_names.append(client_name) 190 | if set_as_current: 191 | self.rotate_index = self.rotate_client_names.index(client_name) 192 | return client 193 | 194 | def get_api_client(self, client_name: Optional[str] = None, require_azure: Optional[bool] = None, **kwargs) -> 'OpenAIClient': 195 | """ 196 | Initializes a new OpenAI client or Returns an existing one. 197 | """ 198 | if not client_name and not self.clients: 199 | client_name = 'default' 200 | if client_name and client_name not in self.clients: 201 | self.clients[client_name] = self.init_api_client(client_name = client_name, **kwargs) 202 | if not client_name and require_azure: 203 | while not self.api.is_azure: 204 | self.increase_rotate_index() 205 | return self.api 206 | return self.clients[client_name] if client_name else self.api 207 | 208 | 209 | def get_api_client_from_list(self, client_names: List[str], require_azure: Optional[bool] = None, **kwargs) -> 'OpenAIClient': 210 | """ 211 | Initializes a new OpenAI client or Returns an existing one from a list of client names. 212 | """ 213 | if not self.healthcheck: 214 | name = self.manager.select_client_name_from_weights(client_names) if self.manager.has_client_weights else random.choice(client_names) 215 | return self.get_api_client(client_name = name, require_azure = require_azure, **kwargs) 216 | available = [] 217 | for client_name in client_names: 218 | if client_name not in self.clients: 219 | self.clients[client_name] = self.init_api_client(client_name = client_name, **kwargs) 220 | if require_azure and not self.clients[client_name].is_azure: 221 | continue 222 | if not self.clients[client_name].ping(**self.manager.get_client_ping_params(client_name)): 223 | continue 224 | if not self.manager.has_client_weights: 225 | return self.clients[client_name] 226 | available.append(client_name) 227 | # return self.clients[client_name] 228 | if available: 229 | name = self.manager.select_client_name_from_weights(available) 230 | return self.clients[name] 231 | raise ValueError(f'No healthy client found from: {client_names}') 232 | 233 | async def aget_api_client_from_list(self, client_names: List[str], require_azure: Optional[bool] = None, **kwargs) -> 'OpenAIClient': 234 | """ 235 | Initializes a new OpenAI client or Returns an existing one from a list of client names. 236 | """ 237 | if not self.healthcheck: 238 | name = self.manager.select_client_name_from_weights(client_names) if self.manager.has_client_weights else random.choice(client_names) 239 | return self.get_api_client(client_name = name, require_azure = require_azure, **kwargs) 240 | available = [] 241 | for client_name in client_names: 242 | if client_name not in self.clients: 243 | self.clients[client_name] = self.init_api_client(client_name = client_name, **kwargs) 244 | if require_azure and not self.clients[client_name].is_azure: 245 | continue 246 | if not await self.clients[client_name].aping(**self.manager.get_client_ping_params(client_name)): 247 | continue 248 | if not self.manager.has_client_weights: 249 | return self.clients[client_name] 250 | available.append(client_name) 251 | 252 | if available: 253 | name = self.manager.select_client_name_from_weights(available) 254 | return self.clients[name] 255 | raise ValueError(f'No healthy client found from: {client_names}') 256 | 257 | def __getitem__(self, key: Union[str, int]) -> 'OpenAIClient': 258 | """ 259 | Returns a client by name. 260 | """ 261 | if isinstance(key, int): 262 | key = self.rotate_client_names[key] if self.rotate_client_names else self.client_names[key] 263 | return self.clients[key] -------------------------------------------------------------------------------- /async_openai/routes.py: -------------------------------------------------------------------------------- 1 | import aiohttpx 2 | 3 | from typing import Optional, Dict, Callable, List, Type, TYPE_CHECKING 4 | from async_openai.schemas import * 5 | from async_openai.types.routes import BaseRoute 6 | from async_openai.utils.config import get_settings, OpenAISettings, AzureOpenAISettings 7 | from async_openai.utils.logs import logger 8 | 9 | 10 | RouteClasses = { 11 | 'completions': CompletionRoute, 12 | 'chat': ChatRoute, 13 | 'edits': EditRoute, 14 | 'embeddings': EmbeddingRoute, 15 | # 'files': FileRoute, 16 | 'images': ImageRoute, 17 | 'models': ModelRoute, 18 | 19 | } 20 | 21 | class ApiRoutes: 22 | 23 | """ 24 | Container for all the routes in the API. 25 | """ 26 | 27 | completions: CompletionRoute = None 28 | chat: ChatRoute = None 29 | edits: EditRoute = None 30 | embeddings: EmbeddingRoute = None 31 | # files: FileRoute = None 32 | images: ImageRoute = None 33 | models: ModelRoute = None 34 | 35 | def __init__( 36 | self, 37 | client: aiohttpx.Client, 38 | name: str, 39 | # headers: Optional[Dict] = None, 40 | debug_enabled: Optional[bool] = False, 41 | on_error: Optional[Callable] = None, 42 | ignore_errors: Optional[bool] = False, 43 | disable_retries: Optional[bool] = None, 44 | retry_function: Optional[Callable] = None, 45 | 46 | timeout: Optional[int] = None, 47 | max_retries: Optional[int] = None, 48 | settings: Optional[OpenAISettings] = None, 49 | is_azure: Optional[bool] = None, 50 | client_callbacks: Optional[List[Callable]] = None, 51 | route_classes: Optional[Dict[str, Type[BaseRoute]]] = None, 52 | 53 | **kwargs 54 | ): 55 | self.client = client 56 | self.name = name 57 | self.settings = settings or get_settings() 58 | # self.headers = headers or self.settings.get_headers() 59 | self.debug_enabled = debug_enabled 60 | self.on_error = on_error 61 | self.ignore_errors = ignore_errors 62 | self.disable_retries = disable_retries 63 | self.retry_function = retry_function 64 | 65 | self.timeout = timeout 66 | self.max_retries = max_retries 67 | self.route_classes = route_classes or RouteClasses.copy() 68 | self.is_azure = is_azure if is_azure is not None else \ 69 | isinstance(self.settings, AzureOpenAISettings) 70 | self.kwargs = kwargs or {} 71 | if client_callbacks: 72 | self.kwargs['client_callbacks'] = client_callbacks 73 | self.init_routes() 74 | 75 | 76 | 77 | def init_routes(self): 78 | """ 79 | Initializes the routes 80 | """ 81 | for route, route_class in self.route_classes.items(): 82 | try: 83 | setattr(self, route, route_class( 84 | client = self.client, 85 | name = self.name, 86 | # headers = self.headers, 87 | debug_enabled = self.debug_enabled, 88 | on_error = self.on_error, 89 | ignore_errors = self.ignore_errors, 90 | disable_retries = self.disable_retries, 91 | retry_function = self.retry_function, 92 | timeout = self.timeout, 93 | max_retries = self.max_retries, 94 | settings = self.settings, 95 | is_azure = self.is_azure, 96 | **self.kwargs 97 | )) 98 | except Exception as e: 99 | logger.error(f"[{self.name}] Failed to initialize route {route} with error: {e}") 100 | raise e 101 | -------------------------------------------------------------------------------- /async_openai/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | ## Base Object Models 3 | from async_openai.schemas.completions import CompletionChoice, CompletionObject, CompletionResponse 4 | from async_openai.schemas.chat import ChatMessage, ChatChoice, ChatObject, ChatResponse 5 | from async_openai.schemas.edits import EditChoice, EditObject, EditResponse 6 | from async_openai.schemas.embeddings import EmbeddingData, EmbeddingObject, EmbeddingResponse 7 | # from async_openai.schemas.files import FileChoice, FileObject, FileResponse 8 | from async_openai.schemas.images import ImageData, ImageObject, ImageResponse 9 | from async_openai.schemas.models import ModelData, ModelObject, ModelResponse 10 | 11 | ## Route Models 12 | from async_openai.schemas.completions import CompletionRoute 13 | from async_openai.schemas.chat import ChatRoute 14 | from async_openai.schemas.edits import EditRoute 15 | from async_openai.schemas.embeddings import EmbeddingRoute 16 | # from async_openai.schemas.files import FileRoute 17 | from async_openai.schemas.images import ImageRoute 18 | 19 | from async_openai.schemas.models import ModelRoute 20 | 21 | 22 | -------------------------------------------------------------------------------- /async_openai/schemas/edits.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Type, Any, Union, List, Dict 2 | from lazyops.types import validator, lazyproperty, Field 3 | 4 | from async_openai.types.options import OpenAIModel 5 | from async_openai.types.resources import BaseResource 6 | from async_openai.types.responses import BaseResponse 7 | from async_openai.types.routes import BaseRoute 8 | 9 | 10 | __all__ = [ 11 | 'EditChoice', 12 | 'EditObject', 13 | 'EditResponse', 14 | 'EditRoute', 15 | ] 16 | 17 | 18 | class EditChoice(BaseResource): 19 | text: str 20 | index: int 21 | logprobs: Optional[Any] 22 | finish_reason: Optional[str] 23 | 24 | class EditObject(BaseResource): 25 | model: Optional[Union[str, OpenAIModel, Any]] = "curie" 26 | instruction: Optional[str] 27 | input: Optional[str] = "" 28 | n: Optional[int] = 1 29 | temperature: Optional[float] = 1.0 30 | top_p: Optional[float] = 1.0 31 | user: Optional[str] = None 32 | 33 | @validator('model', pre=True, always=True) 34 | def validate_model(cls, v) -> OpenAIModel: 35 | """ 36 | Validate the model 37 | """ 38 | if isinstance(v, OpenAIModel): 39 | return v 40 | if isinstance(v, dict): 41 | return OpenAIModel(**v) 42 | return OpenAIModel(value = v, mode = 'edit') 43 | 44 | 45 | class EditResponse(BaseResponse): 46 | choices: Optional[List[EditChoice]] 47 | choice_model: Optional[Type[BaseResource]] = EditChoice 48 | 49 | 50 | @lazyproperty 51 | def text(self) -> str: 52 | """ 53 | Returns the text for the edits 54 | """ 55 | if self.choices: 56 | return ''.join([choice.text for choice in self.choices]) 57 | return self.response.text 58 | 59 | 60 | class EditRoute(BaseRoute): 61 | input_model: Optional[Type[BaseResource]] = EditObject 62 | response_model: Optional[Type[BaseResource]] = EditResponse 63 | 64 | api_resource: Optional[str] = Field(default = 'edits') 65 | 66 | # @lazyproperty 67 | # def api_resource(self): 68 | # return 'edits' 69 | 70 | def create( 71 | self, 72 | input_object: Optional[Type[BaseResource]] = None, 73 | **kwargs 74 | ) -> EditResponse: 75 | """ 76 | 77 | """ 78 | return super().create(input_object = input_object, **kwargs) 79 | 80 | async def async_create( 81 | self, 82 | input_object: Optional[Type[BaseResource]] = None, 83 | **kwargs 84 | ) -> EditResponse: 85 | """ 86 | 87 | """ 88 | return await super().async_create(input_object = input_object, **kwargs) 89 | 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /async_openai/schemas/embeddings.py: -------------------------------------------------------------------------------- 1 | import time 2 | import asyncio 3 | from typing import Optional, Type, Any, Union, List, Dict, overload 4 | from lazyops.types import validator, lazyproperty, Field 5 | 6 | from async_openai.types.context import ModelContextHandler 7 | from async_openai.types.resources import BaseResource 8 | from async_openai.types.responses import BaseResponse 9 | from async_openai.types.routes import BaseRoute 10 | from async_openai.types.errors import RateLimitError, InvalidMaxTokens, InvalidRequestError, APIError, MaxRetriesExceeded 11 | from async_openai.utils import logger 12 | 13 | __all__ = [ 14 | 'EmbeddingData', 15 | 'EmbeddingObject', 16 | 'EmbeddingResponse', 17 | 'EmbeddingRoute', 18 | ] 19 | 20 | 21 | 22 | class EmbeddingData(BaseResource): 23 | object: Optional[str] = 'embedding' 24 | embedding: Optional[List[float]] = [] 25 | index: Optional[int] = 0 26 | 27 | class EmbeddingObject(BaseResource): 28 | model: Optional[str] = "text-embedding-ada-002" 29 | input: Optional[Union[List[Any], Any]] = None 30 | user: Optional[str] = None 31 | dimensions: Optional[int] = None 32 | encoding_format: Optional[str] = None 33 | 34 | 35 | @validator('model', pre=True, always=True) 36 | def validate_model(cls, v, values: Dict[str, Any]) -> str: 37 | """ 38 | Validate the model 39 | """ 40 | if not v: 41 | if values.get('engine'): 42 | v = values.get('engine') 43 | elif values.get('deployment'): 44 | v = values.get('deployment') 45 | v = ModelContextHandler.resolve_model_name(v) 46 | # if values.get('validate_model_aliases', False): 47 | # v = ModelContextHandler[v].name 48 | return v 49 | 50 | 51 | def dict(self, *args, exclude: Any = None, exclude_unset: bool = True, **kwargs): 52 | """ 53 | Returns the dict representation of the response 54 | """ 55 | return super().dict(*args, exclude = exclude, exclude_unset = exclude_unset, **kwargs) 56 | 57 | 58 | 59 | class EmbeddingResponse(BaseResponse): 60 | data: Optional[List[EmbeddingData]] = None 61 | data_model: Optional[Type[BaseResource]] = EmbeddingData 62 | input_object: Optional[EmbeddingObject] = None 63 | 64 | @lazyproperty 65 | def embeddings(self) -> List[List[float]]: 66 | """ 67 | Returns the text for the response 68 | object 69 | """ 70 | if self.data: 71 | return [data.embedding for data in self.data] if self.data else [] 72 | return None 73 | 74 | @lazyproperty 75 | def openai_model(self): 76 | """ 77 | Returns the model for the completions 78 | """ 79 | return self.headers.get('openai-model', self.input_object.model) 80 | 81 | @lazyproperty 82 | def consumption(self) -> int: 83 | """ 84 | Returns the consumption for the completions 85 | """ 86 | return ModelContextHandler.get_consumption_cost( 87 | model_name = self.openai_model, 88 | usage = self.usage, 89 | ) 90 | 91 | 92 | 93 | class EmbeddingRoute(BaseRoute): 94 | input_model: Optional[Type[BaseResource]] = EmbeddingObject 95 | response_model: Optional[Type[BaseResource]] = EmbeddingResponse 96 | api_resource: Optional[str] = Field(default = 'embeddings') 97 | root_name: Optional[str] = Field(default = 'embedding') 98 | 99 | # @lazyproperty 100 | # def api_resource(self): 101 | # return 'embeddings' 102 | 103 | # @lazyproperty 104 | # def root_name(self): 105 | # return 'embedding' 106 | 107 | @overload 108 | def create( 109 | self, 110 | input: Optional[Union[str, List[str], List[List]]] = None, 111 | model: Optional[str] = "text-embedding-ada-002", 112 | dimensions: Optional[int] = None, 113 | encoding_format: Optional[str] = 'float', 114 | user: Optional[str] = None, 115 | auto_retry: Optional[bool] = False, 116 | auto_retry_limit: Optional[int] = None, 117 | **kwargs 118 | ) -> EmbeddingResponse: 119 | """ 120 | Creates a embedding response for the provided prompt and parameters 121 | 122 | Usage: 123 | 124 | ```python 125 | >>> result = OpenAI.embedding.create( 126 | >>> input = 'say this is a test', 127 | >>> ) 128 | ``` 129 | 130 | **Parameters:** 131 | 132 | :input (string, array, required): Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. Each input must not exceed the max input tokens for the model (8191 tokens for text-embedding-ada-002). Example Python code for counting tokens. 133 | 134 | :model (string, required): ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them. 135 | Default: `text-embedding-ada-002` 136 | 137 | :dimensions (optional): Number of dimensions to use for the embedding. If not specified, the model will use the default number of dimensions for the model. This is only supported in `text-embedding-3` and later models 138 | Default: `None` 139 | 140 | :encoding_format (optional): The format of the encoding. If not specified, the model will use the default encoding format for the model. Defaults to `float` 141 | Default: `float` 142 | 143 | :user (optional): A unique identifier representing your end-user, which can help OpenAI to 144 | monitor and detect abuse. 145 | Default: `None` 146 | 147 | Returns: `EmbeddingResponse` 148 | """ 149 | ... 150 | 151 | 152 | def create( 153 | self, 154 | input_object: Optional[EmbeddingObject] = None, 155 | auto_retry: Optional[bool] = False, 156 | auto_retry_limit: Optional[int] = None, 157 | **kwargs 158 | ) -> EmbeddingResponse: 159 | """ 160 | Creates a embedding response for the provided prompt and parameters 161 | 162 | Usage: 163 | 164 | ```python 165 | >>> result = OpenAI.embedding.create( 166 | >>> input = 'say this is a test', 167 | >>> ) 168 | ``` 169 | 170 | **Parameters:** 171 | 172 | :input (string, array, required): Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. Each input must not exceed the max input tokens for the model (8191 tokens for text-embedding-ada-002). Example Python code for counting tokens. 173 | 174 | :model (string, required): ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them. 175 | Default: `text-embedding-ada-002` 176 | 177 | :dimensions (optional): Number of dimensions to use for the embedding. If not specified, the model will use the default number of dimensions for the model. This is only supported in `text-embedding-3` and later models 178 | Default: `None` 179 | 180 | :encoding_format (optional): The format of the encoding. If not specified, the model will use the default encoding format for the model. Defaults to `float` 181 | Default: `float` 182 | 183 | :user (optional): A unique identifier representing your end-user, which can help OpenAI to 184 | monitor and detect abuse. 185 | Default: `None` 186 | 187 | Returns: `EmbeddingResponse` 188 | """ 189 | return super().create( 190 | input_object = input_object, 191 | auto_retry = auto_retry, 192 | auto_retry_limit = auto_retry_limit, 193 | **kwargs 194 | ) 195 | 196 | @overload 197 | async def async_create( 198 | self, 199 | input: Optional[Union[str, List[str], List[List]]] = None, 200 | model: Optional[str] = "text-embedding-ada-002", 201 | dimensions: Optional[int] = None, 202 | encoding_format: Optional[str] = 'float', 203 | user: Optional[str] = None, 204 | auto_retry: Optional[bool] = False, 205 | auto_retry_limit: Optional[int] = None, 206 | **kwargs 207 | ) -> EmbeddingResponse: 208 | """ 209 | Creates a embedding response for the provided prompt and parameters 210 | 211 | Usage: 212 | 213 | ```python 214 | >>> result = await OpenAI.embedding.async_create( 215 | >>> input = 'say this is a test', 216 | >>> ) 217 | ``` 218 | 219 | **Parameters:** 220 | 221 | :input (string, array, required): Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. Each input must not exceed the max input tokens for the model (8191 tokens for text-embedding-ada-002). Example Python code for counting tokens. 222 | 223 | :model (string, required): ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them. 224 | Default: `text-embedding-ada-002` 225 | 226 | :dimensions (optional): Number of dimensions to use for the embedding. If not specified, the model will use the default number of dimensions for the model. This is only supported in `text-embedding-3` and later models 227 | Default: `None` 228 | 229 | :encoding_format (optional): The format of the encoding. If not specified, the model will use the default encoding format for the model. Defaults to `float` 230 | Default: `float` 231 | 232 | :user (optional): A unique identifier representing your end-user, which can help OpenAI to 233 | monitor and detect abuse. 234 | Default: `None` 235 | 236 | Returns: `EmbeddingResponse` 237 | """ 238 | ... 239 | 240 | 241 | async def async_create( 242 | self, 243 | input_object: Optional[EmbeddingObject] = None, 244 | auto_retry: Optional[bool] = False, 245 | auto_retry_limit: Optional[int] = None, 246 | **kwargs 247 | ) -> EmbeddingResponse: # sourcery skip: low-code-quality 248 | """ 249 | Usage: 250 | 251 | ```python 252 | >>> result = OpenAI.embedding.create( 253 | >>> input = 'say this is a test', 254 | >>> ) 255 | ``` 256 | 257 | **Parameters:** 258 | 259 | :input (string, array, required): Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. Each input must not exceed the max input tokens for the model (8191 tokens for text-embedding-ada-002). Example Python code for counting tokens. 260 | 261 | :model (string): ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them. 262 | Default: `text-embedding-ada-002` 263 | 264 | :dimensions (optional): Number of dimensions to use for the embedding. If not specified, the model will use the default number of dimensions for the model. This is only supported in `text-embedding-3` and later models 265 | Default: `None` 266 | 267 | :encoding_format (optional): The format of the encoding. If not specified, the model will use the default encoding format for the model. Defaults to `float` 268 | Default: `float` 269 | 270 | :user (optional): A unique identifier representing your end-user, which can help OpenAI to 271 | monitor and detect abuse. 272 | Default: `None` 273 | 274 | Returns: `EmbeddingResponse` 275 | """ 276 | return await super().async_create( 277 | input_object = input_object, 278 | auto_retry = auto_retry, 279 | auto_retry_limit = auto_retry_limit, 280 | **kwargs 281 | ) 282 | 283 | -------------------------------------------------------------------------------- /async_openai/schemas/external/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Supporting External Providers 3 | 4 | - together (https://together.ai/) 5 | """ 6 | 7 | -------------------------------------------------------------------------------- /async_openai/schemas/external/fireworks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GrowthEngineAI/async-openai/cbe8525a6b8605729af5f14182dfdea8f656294f/async_openai/schemas/external/fireworks/__init__.py -------------------------------------------------------------------------------- /async_openai/schemas/external/fireworks/chat.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | """ 4 | Fireworks.ai Chat Route 5 | """ 6 | import json 7 | from ...chat import ( 8 | ChatRoute as BaseChatRoute, 9 | ChatObject as BaseChatObject, 10 | ChatResponse as BaseChatResponse, 11 | ChatChoice as BaseChatChoice, 12 | ChatMessage as BaseChatMessage, 13 | Function, FunctionCall, Tool, logger 14 | ) 15 | from lazyops.types import validator, root_validator, BaseModel, lazyproperty, Field, PYD_VERSION 16 | from async_openai.types.context import ModelContextHandler 17 | from typing import Any, Dict, List, Optional, Union, Set, Type, TYPE_CHECKING 18 | 19 | 20 | class ChatObject(BaseChatObject): 21 | model: Optional[str] = "accounts/fireworks/models/firefunction-v1" 22 | response_format: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None 23 | is_json_mode: Optional[bool] = Field(None, exclude = True) 24 | is_grammar_mode: Optional[bool] = Field(None, exclude = True) 25 | 26 | @validator('model', pre=True, always=True) 27 | def validate_model(cls, v, values: Dict[str, Any]) -> str: 28 | """ 29 | Validate the model 30 | """ 31 | if not v: 32 | if values.get('engine'): 33 | v = values.get('engine') 34 | elif values.get('deployment'): 35 | v = values.get('deployment') 36 | 37 | v = ModelContextHandler.resolve_external_model_name(v) 38 | return v 39 | 40 | 41 | """ 42 | Handle Validation for JSON Mode 43 | 44 | JSON mode corrals the LLM into outputting JSON conforming to a provided schema. 45 | To activate JSON mode, provide the response_format parameter to the Chat Completions 46 | API with {"type": "json_object"}. The JSON Schema can be specified with the schema 47 | property of response_format. The schema property should be a JSON Schema object. 48 | """ 49 | 50 | 51 | @root_validator(pre = True) 52 | def validate_obj(cls, values: Dict[str, Any]) -> Dict[str, Any]: 53 | """ 54 | Validate the object 55 | """ 56 | if values.get('functions'): 57 | if not all(isinstance(f, Function) for f in values['functions']): 58 | values['functions'] = [Function(**f) for f in values['functions']] 59 | if not values.get('function_call'): 60 | values['function_call'] = 'auto' 61 | 62 | 63 | response_format: Dict[str, Any] = values.get('response_format', {}) 64 | if response_format.get('type') == 'json_object': 65 | values['is_json_mode'] = True 66 | if not response_format.get('schema') and values.get('functions'): 67 | func = values['functions'][0] if \ 68 | len(values['functions']) == 1 or \ 69 | values.get('function_call') == 'auto' else \ 70 | next((f for f in values['functions'] if f.name == values['function_call'])) 71 | 72 | assert func, 'No function found' 73 | schema = func.model_json_schema() 74 | values['response_format']['schema'] = schema 75 | 76 | elif response_format.get('type') == 'grammar': 77 | values['is_grammar_mode'] = True 78 | 79 | 80 | # Disable tools if response format is json_object 81 | elif values.get('tools'): 82 | tools = [] 83 | for tool in values['tools']: 84 | if isinstance(tool, Tool): 85 | tools.append(tool) 86 | elif isinstance(tool, dict): 87 | # This should be the correct format 88 | if tool.get('function'): 89 | tools.append(Tool(**tool)) 90 | else: 91 | # This is previously supported format 92 | tools.append(Tool(function = Function(**tool))) 93 | else: 94 | raise ValueError(f'Invalid tool: {tool}') 95 | values['tools'] = tools 96 | if not values.get('tool_choice'): 97 | values['tool_choice'] = 'auto' 98 | return values 99 | 100 | def dict(self, **kwargs) -> Dict[str, Any]: 101 | """ 102 | Return the dict 103 | """ 104 | exclude: Set[str] = kwargs.pop('exclude', None) or set() 105 | if self.is_json_mode or self.is_grammar_mode: 106 | exclude.add('tools') 107 | exclude.add('tool_choice') 108 | exclude.add('functions') 109 | exclude.add('function_call') 110 | 111 | return super().dict(exclude = exclude, **kwargs) 112 | 113 | 114 | class ChatMessage(BaseChatMessage): 115 | 116 | """ 117 | Handle some validation here 118 | """ 119 | 120 | @root_validator(pre = True) 121 | def validate_message(cls, values: Dict[str, Any]) -> Dict[str, Any]: 122 | """ 123 | Validate the object 124 | """ 125 | if values.get('tool_calls'): 126 | for tc in values['tool_calls']: 127 | if tc.get('type') == 'function' and tc.get('function'): 128 | func = FunctionCall(**tc['function']) 129 | values['function_call'] = func 130 | break 131 | return values 132 | 133 | 134 | 135 | class ChatChoice(BaseChatChoice): 136 | message: ChatMessage 137 | 138 | 139 | class ChatResponse(BaseChatResponse): 140 | 141 | input_object: Optional[ChatObject] = None 142 | choice_model: Optional[Type[ChatChoice]] = ChatChoice 143 | 144 | 145 | class ChatRoute(BaseChatRoute): 146 | input_model: Optional[Type[ChatObject]] = ChatObject 147 | response_model: Optional[Type[ChatResponse]] = ChatResponse -------------------------------------------------------------------------------- /async_openai/schemas/external/together/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GrowthEngineAI/async-openai/cbe8525a6b8605729af5f14182dfdea8f656294f/async_openai/schemas/external/together/__init__.py -------------------------------------------------------------------------------- /async_openai/schemas/external/together/chat.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | """ 4 | Together.xyz Chat Route 5 | """ 6 | import json 7 | from ...chat import ( 8 | ChatRoute as BaseChatRoute, 9 | ChatObject as BaseChatObject, 10 | ChatResponse as BaseChatResponse, 11 | ChatChoice as BaseChatChoice, 12 | ChatMessage as BaseChatMessage, 13 | Function, FunctionCall, Tool, logger 14 | ) 15 | from lazyops.types import validator, root_validator, BaseModel, lazyproperty, Field, PYD_VERSION 16 | from async_openai.types.context import ModelContextHandler 17 | from typing import Any, Dict, List, Optional, Union, Set, Type, TYPE_CHECKING 18 | 19 | # if PYD_VERSION == 2: 20 | # from pydantic import model_validator 21 | # else: 22 | # from lazyops.types.models import root_validator 23 | 24 | class ChatObject(BaseChatObject): 25 | model: Optional[str] = "mistralai/Mistral-7B-Instruct-v0.1" 26 | response_format: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None 27 | is_json_mode: Optional[bool] = Field(None, exclude = True) 28 | 29 | @validator('model', pre=True, always=True) 30 | def validate_model(cls, v, values: Dict[str, Any]) -> str: 31 | """ 32 | Validate the model 33 | """ 34 | if not v: 35 | if values.get('engine'): 36 | v = values.get('engine') 37 | elif values.get('deployment'): 38 | v = values.get('deployment') 39 | 40 | v = ModelContextHandler.resolve_external_model_name(v) 41 | return v 42 | 43 | 44 | """ 45 | Handle Validation for JSON Mode 46 | 47 | JSON mode corrals the LLM into outputting JSON conforming to a provided schema. 48 | To activate JSON mode, provide the response_format parameter to the Chat Completions 49 | API with {"type": "json_object"}. The JSON Schema can be specified with the schema 50 | property of response_format. The schema property should be a JSON Schema object. 51 | """ 52 | 53 | # if PYD_VERSION == 2: 54 | # @model_validator(mode = 'after') 55 | # def validate_obj(self): 56 | # """ 57 | # Validate the object 58 | # """ 59 | 60 | # else: 61 | 62 | @root_validator(pre = True) 63 | def validate_obj(cls, values: Dict[str, Any]) -> Dict[str, Any]: 64 | """ 65 | Validate the object 66 | """ 67 | if values.get('functions'): 68 | if not all(isinstance(f, Function) for f in values['functions']): 69 | values['functions'] = [Function(**f) for f in values['functions']] 70 | if not values.get('function_call'): 71 | values['function_call'] = 'auto' 72 | 73 | if values.get('response_format', {}).get('type') == 'json_object': 74 | values['is_json_mode'] = True 75 | if not values['response_format'].get('schema') and values.get('functions'): 76 | func = values['functions'][0] if \ 77 | len(values['functions']) == 1 or \ 78 | values.get('function_call') == 'auto' else \ 79 | next((f for f in values['functions'] if f.name == values['function_call'])) 80 | 81 | assert func, 'No function found' 82 | schema = func.model_json_schema() 83 | # _ = schema.pop('additionalProperties', None) 84 | values['response_format']['schema'] = schema 85 | # logger.info(values["response_format"], prefix = 'JSON Mode') 86 | 87 | 88 | # Disable tools if response format is json_object 89 | elif values.get('tools'): 90 | tools = [] 91 | for tool in values['tools']: 92 | if isinstance(tool, Tool): 93 | tools.append(tool) 94 | elif isinstance(tool, dict): 95 | # This should be the correct format 96 | if tool.get('function'): 97 | tools.append(Tool(**tool)) 98 | else: 99 | # This is previously supported format 100 | tools.append(Tool(function = Function(**tool))) 101 | else: 102 | raise ValueError(f'Invalid tool: {tool}') 103 | values['tools'] = tools 104 | if not values.get('tool_choice'): 105 | values['tool_choice'] = 'auto' 106 | return values 107 | 108 | def dict(self, **kwargs) -> Dict[str, Any]: 109 | """ 110 | Return the dict 111 | """ 112 | exclude: Set[str] = kwargs.pop('exclude', None) or set() 113 | if self.is_json_mode: 114 | exclude.add('tools') 115 | exclude.add('tool_choice') 116 | exclude.add('functions') 117 | exclude.add('function_call') 118 | 119 | return super().dict(exclude = exclude, **kwargs) 120 | 121 | 122 | class ChatMessage(BaseChatMessage): 123 | 124 | """ 125 | Handle some validation here 126 | """ 127 | 128 | @root_validator(pre = True) 129 | def validate_message(cls, values: Dict[str, Any]) -> Dict[str, Any]: 130 | """ 131 | Validate the object 132 | """ 133 | # if values.get('content') and '"arguments"' in values['content']: 134 | # content = values.pop('content') 135 | # try: 136 | # data = json.loads(content) 137 | # except Exception as e: 138 | # try: 139 | # content = content.split('\n', 1)[-1].strip() 140 | # data = json.loads(content) 141 | # except Exception as e: 142 | # logger.error(f'Invalid JSON: {content}: {e}') 143 | # raise e 144 | # values['function_call'] = FunctionCall(**data) 145 | if values.get('tool_calls'): 146 | for tc in values['tool_calls']: 147 | if tc.get('type') == 'function' and tc.get('function'): 148 | func = FunctionCall(**tc['function']) 149 | values['function_call'] = func 150 | break 151 | return values 152 | 153 | 154 | 155 | class ChatChoice(BaseChatChoice): 156 | message: ChatMessage 157 | 158 | 159 | class ChatResponse(BaseChatResponse): 160 | 161 | input_object: Optional[ChatObject] = None 162 | choice_model: Optional[Type[ChatChoice]] = ChatChoice 163 | 164 | 165 | class ChatRoute(BaseChatRoute): 166 | input_model: Optional[Type[ChatObject]] = ChatObject 167 | response_model: Optional[Type[ChatResponse]] = ChatResponse -------------------------------------------------------------------------------- /async_openai/schemas/external/together/embeddings.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | """ 4 | Together.xyz Embedding Route 5 | """ 6 | 7 | 8 | from ...embeddings import ( 9 | EmbeddingRoute as BaseEmbeddingRoute, 10 | EmbeddingObject as BaseEmbeddingObject, 11 | EmbeddingResponse as BaseEmbeddingResponse, 12 | logger 13 | ) 14 | from lazyops.types import validator, lazyproperty, Field 15 | from async_openai.types.context import ModelContextHandler 16 | from async_openai.types.resources import Usage 17 | from typing import Any, Dict, List, Optional, Union, Set, Type, TYPE_CHECKING 18 | 19 | 20 | class EmbeddingObject(BaseEmbeddingObject): 21 | model: Optional[str] = "togethercomputer/m2-bert-80M-32k-retrieval" 22 | 23 | @validator('model', pre=True, always=True) 24 | def validate_model(cls, v, values: Dict[str, Any]) -> str: 25 | """ 26 | Validate the model 27 | """ 28 | if not v: 29 | if values.get('engine'): 30 | v = values.get('engine') 31 | elif values.get('deployment'): 32 | v = values.get('deployment') 33 | v = ModelContextHandler.resolve_external_model_name(v) 34 | return v 35 | 36 | 37 | class EmbeddingResponse(BaseEmbeddingResponse): 38 | 39 | usage: Optional[Usage] = Field(default_factory = Usage) 40 | 41 | 42 | @lazyproperty 43 | def consumption(self) -> int: 44 | """ 45 | Returns the consumption for the completions 46 | """ 47 | try: 48 | if not self.usage.prompt_tokens: 49 | self.usage.prompt_tokens = ModelContextHandler.count_tokens(self.input_object.input, model_name=self.input_object.model) 50 | return ModelContextHandler.get_consumption_cost( 51 | model_name = self.input_object.model, 52 | usage = self.usage, 53 | ) 54 | except Exception as e: 55 | logger.error(f"Error getting consumption: {e}") 56 | return 0 57 | 58 | 59 | class EmbeddingRoute(BaseEmbeddingRoute): 60 | input_model: Optional[Type[EmbeddingObject]] = EmbeddingObject 61 | response_model: Optional[Type[EmbeddingResponse]] = EmbeddingResponse -------------------------------------------------------------------------------- /async_openai/schemas/images.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Type, Any, Union, List, Dict 2 | from lazyops.types import validator, lazyproperty, BaseModel, Field 3 | 4 | from async_openai.types.options import ImageSize, ImageFormat 5 | from async_openai.types.resources import BaseResource, File, FileType, _has_fileio 6 | from async_openai.types.responses import BaseResponse 7 | from async_openai.types.routes import BaseRoute 8 | 9 | __all__ = [ 10 | 'ImageData', 11 | 'ImageObject', 12 | 'ImageResponse', 13 | 'ImageRoute', 14 | ] 15 | 16 | class ImageData(BaseModel): 17 | url: Optional[str] = None 18 | data: Optional[bytes] = None 19 | 20 | class ImageObject(BaseResource): 21 | prompt: Optional[str] 22 | mask: Optional[Union[str, FileType, Any]] 23 | image: Optional[Union[str, FileType, Any]] 24 | n: Optional[int] = 1 25 | size: Optional[Union[str, ImageSize]] = ImageSize.large 26 | response_format: Optional[Union[str, ImageFormat]] = ImageFormat.url 27 | user: Optional[str] = None 28 | 29 | @validator("size") 30 | def validate_size(cls, value): 31 | return ImageSize.from_str(value) if isinstance(value, str) else value 32 | 33 | @validator("response_format") 34 | def validate_response_format(cls, value): 35 | if isinstance(value, str): 36 | value = ImageFormat(value) 37 | return value 38 | 39 | def get_params(self, **kwargs) -> List: 40 | """ 41 | Transforms the data to the req params 42 | """ 43 | files = [(k, (None, v)) for k, v in self.dict(exclude_none=True, exclude={'mask', 'image'}).items()] 44 | if self.mask: 45 | mask = File(self.mask) 46 | files.append(("mask", ("mask", mask.read_bytes(), "application/octet-stream"))) 47 | if self.image: 48 | image = File(self.image) 49 | files.append(("image", ("image", image.read_bytes(), "application/octet-stream"))) 50 | 51 | return files 52 | 53 | async def async_get_params(self, **kwargs) -> List: 54 | """ 55 | Transforms the data to the req params 56 | """ 57 | files = [(k, (None, v)) for k, v in self.dict(exclude_none=True, exclude={'mask', 'image'}).items()] 58 | if self.mask: 59 | mask = File(self.mask) 60 | files.append(("mask", ("mask", (await mask.async_read_bytes() if _has_fileio else mask.read_bytes()), "application/octet-stream"))) 61 | if self.image: 62 | image = File(self.image) 63 | files.append(("image", ("image", (await image.async_read_bytes() if _has_fileio else image.read_bytes()), "application/octet-stream"))) 64 | 65 | return files 66 | 67 | 68 | class ImageResponse(BaseResponse): 69 | data: Optional[List[ImageData]] 70 | data_model: Optional[Type[ImageData]] = ImageData 71 | 72 | @lazyproperty 73 | def image_urls(self) -> List[str]: 74 | """ 75 | Returns the list of image urls 76 | """ 77 | if self.data: 78 | return [data.url for data in self.data] if self.data else [] 79 | return None 80 | 81 | 82 | 83 | class ImageRoute(BaseRoute): 84 | input_model: Optional[Type[BaseResource]] = ImageObject 85 | response_model: Optional[Type[BaseResource]] = ImageResponse 86 | 87 | api_resource: Optional[str] = Field(default = 'images') 88 | 89 | # @lazyproperty 90 | # def api_resource(self): 91 | # return 'images' 92 | 93 | def create( 94 | self, 95 | input_object: Optional[Type[BaseResource]] = None, 96 | **kwargs 97 | ) -> ImageResponse: 98 | """ 99 | 100 | """ 101 | return super().create(input_object = input_object, **kwargs) 102 | 103 | async def async_create( 104 | self, 105 | input_object: Optional[Type[BaseResource]] = None, 106 | **kwargs 107 | ) -> ImageResponse: 108 | """ 109 | 110 | """ 111 | return await super().async_create(input_object = input_object, **kwargs) 112 | 113 | -------------------------------------------------------------------------------- /async_openai/schemas/models.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from typing import Optional, Type, List, Dict, Any 3 | from lazyops.types import lazyproperty, Field 4 | 5 | from async_openai.types.resources import BaseResource, Permission 6 | from async_openai.types.responses import BaseResponse 7 | from async_openai.types.routes import BaseRoute 8 | 9 | 10 | __all__ = [ 11 | 'ModelData', 12 | 'ModelObject', 13 | 'ModelResponse', 14 | 'ModelRoute', 15 | ] 16 | 17 | 18 | class ModelData(BaseResource): 19 | id: str 20 | status: Optional[str] = None 21 | owned_by: Optional[str] = None 22 | created: Optional[datetime.datetime] = None 23 | updated_at: Optional[datetime.datetime] = None 24 | created_at: Optional[datetime.datetime] = None 25 | permission: Optional[List[Permission]] = Field(default_factory = list) 26 | root: Optional[str] = None 27 | parent: Optional[str] = None 28 | object: Optional[str] = 'model' 29 | capabilities: Optional[Dict[str, Any]] = Field(default_factory = dict) 30 | lifecycle_status: Optional[str] = None 31 | deprecation: Optional[Dict[str, Any]] = Field(default_factory = dict) 32 | 33 | 34 | @lazyproperty 35 | def model_age(self) -> Optional[datetime.datetime]: 36 | """ 37 | Returns how long ago the model was created 38 | """ 39 | if self.created: 40 | return datetime.datetime.now(tz = datetime.timezone.utc) - self.created 41 | 42 | 43 | class ModelObject(BaseResource): 44 | model: Optional[str] 45 | 46 | 47 | class ModelResponse(BaseResponse): 48 | data: Optional[List[ModelData]] 49 | data_model: Optional[Type[BaseResource]] = ModelData 50 | 51 | @lazyproperty 52 | def model_list(self) -> List[str]: 53 | """ 54 | Returns a list of model IDs 55 | """ 56 | return [model.id for model in self.data] if self.data and isinstance(self.data, list) else [] 57 | 58 | 59 | class ModelRoute(BaseRoute): 60 | input_model: Optional[Type[BaseResource]] = ModelObject 61 | response_model: Optional[Type[BaseResource]] = ModelResponse 62 | 63 | api_resource: Optional[str] = Field(default = 'models') 64 | 65 | # @lazyproperty 66 | # def api_resource(self): 67 | # return 'models' 68 | 69 | @lazyproperty 70 | def create_enabled(self): 71 | """ 72 | Returns whether the Create Route is Enabled 73 | """ 74 | return True 75 | 76 | 77 | @lazyproperty 78 | def list_enabled(self): 79 | """ 80 | Returns whether the List Route is Enabled 81 | """ 82 | return False 83 | 84 | @lazyproperty 85 | def get_enabled(self): 86 | """ 87 | Returns whether the Get Route is Enabled 88 | """ 89 | return False 90 | 91 | def retrieve( 92 | self, 93 | resource_id: str, 94 | params: Optional[Dict[str, Any]] = None, 95 | **kwargs 96 | ) -> ModelResponse: 97 | """ 98 | Retrieve a Single Model by Resource ID 99 | 100 | :param resource_id: The ID of the Resource to GET 101 | :param params: Optional Query Parameters 102 | """ 103 | return super().retrieve(resource_id = resource_id, params = params, **kwargs) 104 | 105 | async def async_retrieve( 106 | self, 107 | resource_id: str, 108 | params: Optional[Dict[str, Any]] = None, 109 | **kwargs 110 | ) -> ModelResponse: 111 | """ 112 | Retrieve a Single Model by Resource ID 113 | 114 | :param resource_id: The ID of the Resource to GET 115 | :param param 116 | """ 117 | return await super().async_retrieve(resource_id = resource_id, params = params, **kwargs) 118 | 119 | 120 | def list( 121 | self, 122 | params: Optional[Dict[str, Any]] = None, 123 | **kwargs 124 | ) -> ModelResponse: 125 | """ 126 | List all available Models 127 | 128 | :param params: Optional Query Parameters 129 | """ 130 | return super().list(params = params, **kwargs) 131 | 132 | async def async_list( 133 | self, 134 | params: Optional[Dict[str, Any]] = None, 135 | **kwargs 136 | ) -> ModelResponse: 137 | """ 138 | List all available Models 139 | 140 | :param params: Optional Query Parameters 141 | """ 142 | return await super().async_list(params = params, **kwargs) 143 | 144 | 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /async_openai/types/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from async_openai.types.errors import ( 4 | OpenAIError, 5 | APIError, 6 | AuthenticationError, 7 | InvalidRequestError, 8 | RateLimitError, 9 | APIConnectionError, 10 | Timeout, 11 | TryAgain, 12 | ServiceUnavailableError, 13 | fatal_exception, 14 | error_handler, 15 | ) 16 | 17 | from async_openai.types.options import ( 18 | ApiType, 19 | CompletionModels, 20 | FilePurpose, 21 | FinetuneModels, 22 | ImageSize, 23 | ImageFormat, 24 | ) 25 | 26 | # from async_openai.types.base import ( 27 | # Usage, 28 | # Permission, 29 | # BaseResource, 30 | # FileObject, 31 | # EventObject, 32 | # FileResource, 33 | # BaseResponse, 34 | # BaseRoute, 35 | 36 | # RESPONSE_SUCCESS_CODES 37 | # ) 38 | 39 | -------------------------------------------------------------------------------- /async_openai/types/context.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | """ 4 | OpenAI Cost Functions and Handler 5 | """ 6 | import abc 7 | import tiktoken 8 | from pathlib import Path 9 | from lazyops.types import BaseModel, validator, Field, lazyproperty 10 | from lazyops.libs.proxyobj import ProxyObject 11 | from typing import Optional, Union, Dict, Any, List, Tuple, Type, TYPE_CHECKING 12 | 13 | if TYPE_CHECKING: 14 | from .resources import Usage 15 | from async_openai.schemas.chat import ChatMessage 16 | from transformers import PreTrainedTokenizer 17 | from async_openai.utils.external_config import ExternalProviderSettings, ProviderModel 18 | 19 | pricing_file_path = Path(__file__).parent.joinpath('pricing.yaml') 20 | 21 | class ModelCosts(BaseModel): 22 | """ 23 | Represents a model's costs 24 | """ 25 | unit: Optional[int] = 1000 26 | input: Optional[float] = 0.0 27 | output: Optional[float] = 0.0 28 | total: Optional[float] = 0.0 29 | 30 | 31 | class ModelCostItem(BaseModel): 32 | """ 33 | Represents a model's Cost Item 34 | """ 35 | name: str 36 | aliases: Optional[List[str]] = None 37 | context_length: Optional[int] = 0 38 | costs: Optional[ModelCosts] = Field(default_factory=ModelCosts) 39 | endpoints: Optional[List[str]] = None 40 | 41 | def get_costs( 42 | self, 43 | input_tokens: Optional[int] = None, 44 | output_tokens: Optional[int] = None, 45 | total_tokens: Optional[int] = None, 46 | usage: Optional['Usage'] = None, 47 | **kwargs 48 | ) -> float: 49 | """ 50 | Gets the costs 51 | """ 52 | if usage is not None: 53 | input_tokens = usage.prompt_tokens 54 | output_tokens = usage.completion_tokens 55 | if kwargs.get('prompt_tokens'): 56 | input_tokens = kwargs['prompt_tokens'] 57 | if kwargs.get('completion_tokens'): 58 | output_tokens = kwargs['completion_tokens'] 59 | 60 | assert input_tokens is not None or output_tokens is not None or total_tokens is not None, "Must provide either input_tokens, output_tokens, or total_tokens" 61 | if self.costs is None: return 0.0 62 | cost = 0.0 63 | if self.costs.input: 64 | cost += self.costs.input * input_tokens / self.costs.unit 65 | if self.costs.output: 66 | cost += self.costs.output * output_tokens / self.costs.unit 67 | if self.costs.total and total_tokens is not None: 68 | cost += self.costs.total * total_tokens / self.costs.unit 69 | return cost 70 | 71 | 72 | class ModelCostHandlerClass(abc.ABC): 73 | """ 74 | The Model Cost Handler 75 | """ 76 | def __init__( 77 | self, 78 | **kwargs, 79 | ): 80 | """ 81 | Initializes the Model Cost Handler 82 | """ 83 | self._models: Optional[Dict[str, ModelCostItem]] = None 84 | self._model_aliases: Optional[Dict[str, str]] = None 85 | self.tokenizers: Optional[Dict[str, tiktoken.Encoding]] = {} 86 | 87 | self.external_models: Optional[Dict[str, 'ProviderModel']] = {} 88 | self.external_model_aliases: Optional[Dict[str, str]] = {} 89 | self.external_tokenizers: Optional[Dict[str, 'PreTrainedTokenizer']] = {} 90 | 91 | @staticmethod 92 | def load_models() -> Dict[str, ModelCostItem]: 93 | """ 94 | Loads the models 95 | """ 96 | import yaml 97 | models: Dict[str, Dict[str, Any]] = yaml.safe_load(pricing_file_path.read_text()) 98 | return {k: ModelCostItem(name = k, **v) for k, v in models.items()} 99 | 100 | @property 101 | def models(self) -> Dict[str, ModelCostItem]: 102 | """ 103 | Gets the models 104 | """ 105 | if self._models is None: self._models = self.load_models() 106 | return self._models 107 | 108 | @property 109 | def model_aliases(self) -> Dict[str, str]: 110 | """ 111 | Gets the model aliases 112 | """ 113 | if self._model_aliases is None: 114 | self._model_aliases = {alias: model for model, item in self.models.items() for alias in item.aliases or []} 115 | return self._model_aliases 116 | 117 | 118 | def get_external_model(self, name: str) -> Optional['ProviderModel']: 119 | """ 120 | Gets the model 121 | """ 122 | if name not in self.external_model_aliases and name not in self.external_models: 123 | raise KeyError(f"Model {name} not found: {self.external_model_aliases} / {list(self.external_models.keys())}") 124 | # print(f"Model {name} not found: {self.external_model_aliases} / {self.external_models}") 125 | # return None 126 | if name in self.external_model_aliases: 127 | name = self.external_model_aliases[name] 128 | return self.external_models[name] 129 | 130 | def resolve_external_model_name(self, model_name: str) -> str: 131 | """ 132 | Resolves the Model Name from the model aliases 133 | """ 134 | # Try to remove the provider name 135 | model = self.get_external_model(model_name) 136 | return model.name 137 | 138 | def resolve_model_name(self, model_name: str) -> str: 139 | """ 140 | Resolves the Model Name from the model aliases 141 | """ 142 | # Try to remove the version number 143 | if model_name in self.models: 144 | return model_name 145 | 146 | # if model_name in self.model_aliases: 147 | return self.model_aliases.get(model_name, model_name) 148 | 149 | # key = model_name.rsplit('-', 1)[0].strip() 150 | # if key in self.model_aliases: 151 | # return self.model_aliases[key] 152 | # elif key in self.models: 153 | # self.model_aliases[model_name] = key 154 | # return key 155 | # raise KeyError(f"Model {key}/{model_name} not found in {self.model_aliases} / {list(self.models.keys())}") 156 | 157 | def __getitem__(self, key: str) -> ModelCostItem: 158 | """ 159 | Gets a model by name 160 | """ 161 | if '/' in key or key in self.external_model_aliases: return self.get_external_model(key) 162 | if key not in self.model_aliases and key not in self.models: 163 | return self.models[self.resolve_model_name(key)] 164 | if key in self.model_aliases: 165 | key = self.model_aliases[key] 166 | return self.models[key] 167 | 168 | def get(self, key: str, default: Optional[str] = None) -> Optional[ModelCostItem]: 169 | """ 170 | Gets a model by name 171 | """ 172 | try: 173 | return self[key] 174 | except KeyError: 175 | if default is None: 176 | raise KeyError(f"Model {key} not found") from None 177 | return self[default] 178 | 179 | def add_provider(self, provider: 'ExternalProviderSettings'): 180 | """ 181 | Adds a provider to the handler 182 | """ 183 | for model in provider.models: 184 | model_name = f'{provider.name}/{model.name}' 185 | self.external_models[model_name] = model 186 | if model.name not in self.external_model_aliases: 187 | self.external_model_aliases[model.name] = model_name 188 | for alias in model.aliases or []: 189 | model_alias = f'{provider.name}/{alias}' 190 | self.external_model_aliases[model_alias] = model_name 191 | if alias not in self.external_model_aliases: 192 | self.external_model_aliases[alias] = model_name 193 | 194 | def add_model(self, model: str, source_model: str): 195 | """ 196 | Add a model to the handler 197 | 198 | Args: 199 | model (str): The model name 200 | source_model (str): The source model name 201 | """ 202 | if model in self.model_aliases or model in self.models: 203 | return 204 | 205 | src_model = self[source_model] 206 | # Add to the model aliases 207 | self.model_aliases[model] = src_model.name 208 | 209 | def get_external_tokenizer(self, name: str) -> Optional['PreTrainedTokenizer']: 210 | """ 211 | Gets the tokenizer 212 | """ 213 | # Remove the provider name 214 | model = self.get_external_model(name) 215 | tokenizer_name = model.tokenizer or model.name 216 | if tokenizer_name not in self.external_tokenizers: 217 | try: 218 | from transformers.models.auto.tokenization_auto import AutoTokenizer 219 | except ImportError as e: 220 | raise ImportError("transformers is not installed, please install it to use this feature") from e 221 | tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) 222 | self.external_tokenizers[tokenizer_name] = tokenizer 223 | return self.external_tokenizers[tokenizer_name] 224 | 225 | def get_tokenizer(self, name: str) -> Optional[Union[tiktoken.Encoding, 'PreTrainedTokenizer']]: 226 | """ 227 | Gets the tokenizer 228 | """ 229 | # Switch the 35 -> 3.5 230 | # OpenAI Models don't have / in the name 231 | if '/' in name or name in self.external_model_aliases \ 232 | or name in self.external_models: return self.get_external_tokenizer(name) 233 | 234 | if '35' in name: name = name.replace('35', '3.5') 235 | if name not in self.tokenizers: 236 | if name in {'text-embedding-3-small', 'text-embedding-3-large'}: 237 | enc_name = 'cl100k_base' 238 | self.tokenizers[name] = tiktoken.get_encoding(enc_name) 239 | else: 240 | self.tokenizers[name] = tiktoken.encoding_for_model(name) 241 | return self.tokenizers[name] 242 | 243 | def count_chat_tokens( 244 | self, 245 | messages: List[Union[Dict[str, str], 'ChatMessage']], 246 | model_name: str, 247 | reply_padding_token_count: Optional[int] = 3, 248 | message_padding_token_count: Optional[int] = 4, 249 | **kwargs 250 | ) -> int: 251 | """ 252 | Returns the number of tokens in the chat. 253 | """ 254 | num_tokens = 0 255 | tokenizer = self.get_tokenizer(model_name) 256 | for message in messages: 257 | if message.get('name'): 258 | num_tokens -= 1 259 | num_tokens += message_padding_token_count + len(tokenizer.encode(message.get('content', ''))) 260 | num_tokens += reply_padding_token_count # every reply is primed with <|start|>assistant<|message|> 261 | return num_tokens 262 | 263 | def count_tokens( 264 | self, 265 | text: Union[str, List[str]], 266 | model_name: str, 267 | **kwargs 268 | ) -> int: 269 | """ 270 | Returns the number of tokens in the text. 271 | """ 272 | tokenizer = self.get_tokenizer(model_name) 273 | return ( 274 | sum(len(tokenizer.encode(t)) for t in text) 275 | if isinstance(text, list) 276 | else len(tokenizer.encode(text)) 277 | ) 278 | 279 | def get_consumption_cost(self, model_name: str, usage: 'Usage', **kwargs) -> float: 280 | """ 281 | Gets the consumption cost 282 | """ 283 | # Switch the 35 -> 3.5 284 | if '35' in model_name: model_name = model_name.replace('35', '3.5') 285 | model = self[model_name] 286 | if isinstance(usage, dict): 287 | from .resources import Usage 288 | usage = Usage(**usage) 289 | return model.get_costs(usage = usage, **kwargs) 290 | 291 | 292 | def truncate_to_max_length(self, text: str, model_name: str, context_length: Optional[int] = None, **kwargs) -> str: 293 | """ 294 | Truncates the text to the max length 295 | """ 296 | tokenizer = self.get_tokenizer(model_name) 297 | if context_length is None: 298 | context_length = self[model_name].context_length 299 | tokens = tokenizer.encode(text) 300 | if len(tokens) > context_length: 301 | tokens = tokens[-context_length:] 302 | decoded = tokenizer.decode(tokens) 303 | text = text[-len(decoded):] 304 | return text 305 | 306 | 307 | 308 | 309 | class ModelContextHandlerMetaClass(type): 310 | """ 311 | The Model Cost Handler 312 | """ 313 | 314 | _models: Optional[Dict[str, ModelCostItem]] = None 315 | _model_aliases: Optional[Dict[str, str]] = None 316 | tokenizers: Optional[Dict[str, tiktoken.Encoding]] = {} 317 | 318 | def load_models(cls) -> Dict[str, ModelCostItem]: 319 | """ 320 | Loads the models 321 | """ 322 | import yaml 323 | models: Dict[str, Dict[str, Any]] = yaml.safe_load(pricing_file_path.read_text()) 324 | return {k: ModelCostItem(name = k, **v) for k, v in models.items()} 325 | 326 | @property 327 | def models(cls) -> Dict[str, ModelCostItem]: 328 | """ 329 | Gets the models 330 | """ 331 | if cls._models is None: 332 | cls._models = cls.load_models() 333 | return cls._models 334 | 335 | @property 336 | def model_aliases(cls) -> Dict[str, str]: 337 | """ 338 | Gets the model aliases 339 | """ 340 | if cls._model_aliases is None: 341 | cls._model_aliases = {alias: model for model, item in cls.models.items() for alias in item.aliases or []} 342 | return cls._model_aliases 343 | 344 | def resolve_model_name(cls, model_name: str) -> str: 345 | """ 346 | Resolves the Model Name from the model aliases 347 | """ 348 | # Try to remove the version number 349 | key = model_name.rsplit('-', 1)[0].strip() 350 | if key in cls.model_aliases: 351 | cls.model_aliases[model_name] = cls.model_aliases[key] 352 | if key in cls.models: 353 | cls.model_aliases[model_name] = key 354 | return key 355 | raise KeyError(f"Model {model_name} not found") 356 | 357 | def __getitem__(cls, key: str) -> ModelCostItem: 358 | """ 359 | Gets a model by name 360 | """ 361 | if key not in cls.model_aliases and key not in cls.models: 362 | return cls.resolve_model_name(key) 363 | if key in cls.model_aliases: 364 | key = cls.model_aliases[key] 365 | return cls.models[key] 366 | 367 | def get(cls, key: str, default: Optional[str] = None) -> Optional[ModelCostItem]: 368 | """ 369 | Gets a model by name 370 | """ 371 | try: 372 | return cls[key] 373 | except KeyError: 374 | if default is None: 375 | raise KeyError(f"Model {key} not found") from None 376 | return cls[default] 377 | 378 | def add_model(cls, model: str, source_model: str): 379 | """ 380 | Add a model to the handler 381 | 382 | Args: 383 | model (str): The model name 384 | source_model (str): The source model name 385 | """ 386 | if model in cls.model_aliases or model in cls.models: 387 | return 388 | 389 | src_model = cls[source_model] 390 | # Add to the model aliases 391 | cls.model_aliases[model] = src_model.name 392 | 393 | 394 | def get_tokenizer(cls, name: str) -> Optional[tiktoken.Encoding]: 395 | """ 396 | Gets the tokenizer 397 | """ 398 | # Switch the 35 -> 3.5 399 | if '35' in name: name = name.replace('35', '3.5') 400 | if name not in cls.tokenizers: 401 | if name in {'text-embedding-3-small', 'text-embedding-3-large'}: 402 | enc_name = 'cl100k_base' 403 | cls.tokenizers[name] = tiktoken.get_encoding(enc_name) 404 | else: 405 | cls.tokenizers[name] = tiktoken.encoding_for_model(name) 406 | return cls.tokenizers[name] 407 | 408 | def count_chat_tokens( 409 | cls, 410 | messages: List[Union[Dict[str, str], 'ChatMessage']], 411 | model_name: str, 412 | reply_padding_token_count: Optional[int] = 3, 413 | message_padding_token_count: Optional[int] = 4, 414 | **kwargs 415 | ) -> int: 416 | """ 417 | Returns the number of tokens in the chat. 418 | """ 419 | num_tokens = 0 420 | tokenizer = cls.get_tokenizer(model_name) 421 | for message in messages: 422 | if message.get('name'): 423 | num_tokens -= 1 424 | num_tokens += message_padding_token_count + len(tokenizer.encode(message.get('content', ''))) 425 | num_tokens += reply_padding_token_count # every reply is primed with <|start|>assistant<|message|> 426 | return num_tokens 427 | 428 | def count_tokens( 429 | cls, 430 | text: Union[str, List[str]], 431 | model_name: str, 432 | **kwargs 433 | ) -> int: 434 | """ 435 | Returns the number of tokens in the text. 436 | """ 437 | tokenizer = cls.get_tokenizer(model_name) 438 | return ( 439 | sum(len(tokenizer.encode(t)) for t in text) 440 | if isinstance(text, list) 441 | else len(tokenizer.encode(text)) 442 | ) 443 | 444 | def get_consumption_cost(cls, model_name: str, usage: 'Usage', **kwargs) -> float: 445 | """ 446 | Gets the consumption cost 447 | """ 448 | # Switch the 35 -> 3.5 449 | if '35' in model_name: model_name = model_name.replace('35', '3.5') 450 | model = cls[model_name] 451 | if isinstance(usage, dict): 452 | from .resources import Usage 453 | usage = Usage(**usage) 454 | return model.get_costs(usage = usage, **kwargs) 455 | 456 | def resolve_model_name(cls, model_name: str) -> str: 457 | """ 458 | Resolves the Model Name from the model aliases 459 | """ 460 | return cls.model_aliases.get(model_name, model_name) 461 | 462 | def truncate_to_max_length(cls, text: str, model_name: str, context_length: Optional[int] = None, **kwargs) -> str: 463 | """ 464 | Truncates the text to the max length 465 | """ 466 | tokenizer = cls.get_tokenizer(model_name) 467 | if context_length is None: 468 | context_length = cls[model_name].context_length 469 | 470 | tokens = tokenizer.encode(text) 471 | if len(tokens) > context_length: 472 | tokens = tokens[-context_length:] 473 | decoded = tokenizer.decode(tokens) 474 | text = text[-len(decoded):] 475 | 476 | return text 477 | 478 | 479 | 480 | class ModelContextHandlerV1(metaclass = ModelContextHandlerMetaClass): 481 | """ 482 | The Model Cost Handler 483 | """ 484 | pass 485 | 486 | 487 | ModelContextHandler: ModelCostHandlerClass = ProxyObject(ModelCostHandlerClass) -------------------------------------------------------------------------------- /async_openai/types/errors.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import httpx 4 | import aiohttpx 5 | import contextlib 6 | from typing import Any, Optional, Union, Dict 7 | from lazyops.types import BaseModel, lazyproperty 8 | 9 | __all__ = [ 10 | "OpenAIError", 11 | "ExceptionModel", 12 | "fatal_exception", 13 | "APIError", 14 | "TryAgain", 15 | "APIConnectionError", 16 | "Timeout", 17 | "InvalidRequestError", 18 | "AuthenticationError", 19 | "PermissionError", 20 | "RateLimitError", 21 | "ServiceUnavailableError", 22 | "InvalidAPIType", 23 | "error_handler", 24 | ] 25 | 26 | class ExceptionModel(BaseModel): 27 | response: aiohttpx.Response 28 | data: Optional[Union[Dict, Any]] 29 | message: Optional[str] = None 30 | should_retry: Optional[bool] = False 31 | 32 | @lazyproperty 33 | def headers(self): 34 | """ 35 | Returns the response headers. 36 | """ 37 | return self.response.headers 38 | 39 | @lazyproperty 40 | def stream(self) -> bool: 41 | """ 42 | Returns True if the response is a streaming response. 43 | """ 44 | return "text/event-stream" in self.headers.get("content-type", "") 45 | 46 | @lazyproperty 47 | def response_data(self): 48 | return self.data or self.response.json() 49 | 50 | @lazyproperty 51 | def http_body(self): 52 | body = self.data if self.stream else self.response.content 53 | try: 54 | if hasattr(body, "decode"): 55 | body = body.decode("utf-8") 56 | return body 57 | except (json.JSONDecodeError, UnicodeDecodeError) as e: 58 | raise ValueError( 59 | f"HTTP code {self.status_code} from API ({body})" 60 | ) from e 61 | 62 | @lazyproperty 63 | def response_json(self): 64 | try: 65 | return json.loads(self.http_body) 66 | except json.JSONDecodeError: 67 | return {} 68 | 69 | @lazyproperty 70 | def response_text(self): 71 | return self.response.text 72 | 73 | @lazyproperty 74 | def status_code(self): 75 | return self.response.status_code 76 | 77 | @lazyproperty 78 | def error_data(self) -> Dict: 79 | return self.response_json.get("error", {}) 80 | 81 | @lazyproperty 82 | def request_id(self) -> str: 83 | return self.headers.get("request-id", None) 84 | 85 | @lazyproperty 86 | def organization(self) -> str: 87 | return self.headers.get("openai-organization", None) 88 | 89 | @lazyproperty 90 | def error_message(self) -> str: 91 | msg: str = self.message or ("(Error occurred while streaming.)" if self.stream else "") 92 | if self.error_data.get("message"): 93 | msg += " " + self.error_data.get("message") 94 | if self.error_data.get("internal_message"): 95 | msg += "\n\n" + self.error_data["internal_message"] 96 | return msg.strip() or self.response_text 97 | 98 | 99 | class OpenAIError(Exception): 100 | def __init__( 101 | self, 102 | response: aiohttpx.Response, 103 | data: Optional[Union[Dict, Any]], 104 | message: Optional[str] = None, 105 | should_retry: Optional[bool] = False, 106 | **kwargs 107 | ): 108 | self.status = response.status_code 109 | self.response = response 110 | self.message = message 111 | self.exc = ExceptionModel( 112 | response=response, 113 | message=message, 114 | data=data, 115 | should_retry=should_retry, 116 | **kwargs 117 | ) 118 | self.post_init(**kwargs) 119 | 120 | def post_init(self, **kwargs): 121 | pass 122 | 123 | def __str__(self): 124 | msg = self.exc.error_message or "" 125 | if self.exc.request_id is not None: 126 | return f"Request {self.exc.request_id}: {msg}" 127 | else: 128 | return msg 129 | 130 | @property 131 | def user_message(self): 132 | return self.exc.error_message 133 | 134 | def __repr__(self): 135 | return f"[OpenAI] {self.__class__.__name__} \ 136 | (message={self.exc.error_message}, \ 137 | http_status={self.exc.status_code}, \ 138 | request_id={self.exc.request_id})" 139 | 140 | 141 | 142 | class MaxRetriesExhausted(Exception): 143 | """ 144 | Max Retries Exhausted 145 | """ 146 | 147 | def __init__(self, name: str, func_name: str, model: str, attempts: int, max_attempts: int): 148 | self.name = name 149 | self.func_name = func_name 150 | self.model = model 151 | self.attempts = attempts 152 | self.max_attempts = max_attempts 153 | 154 | def __str__(self): 155 | return f"[{self.name} - {self.model}] All retries exhausted for {self.func_name}. ({self.attempts}/{self.max_attempts})" 156 | 157 | def __repr__(self): 158 | """ 159 | Returns the string representation of the error. 160 | """ 161 | return f"[{self.name} - {self.model}] (func_name={self.func_name}, attempts={self.attempts}, max_attempts={self.max_attempts})" 162 | 163 | 164 | class APIError(OpenAIError): 165 | pass 166 | 167 | 168 | class TryAgain(OpenAIError): 169 | pass 170 | 171 | 172 | class Timeout(OpenAIError): 173 | pass 174 | 175 | 176 | class APIConnectionError(OpenAIError): 177 | pass 178 | 179 | 180 | class InvalidRequestError(OpenAIError): 181 | pass 182 | 183 | 184 | class AuthenticationError(OpenAIError): 185 | pass 186 | 187 | 188 | class PermissionError(OpenAIError): 189 | pass 190 | 191 | 192 | class RateLimitError(OpenAIError): 193 | 194 | def post_init(self, **kwargs): 195 | """ 196 | Gets the rate limit reset time 197 | """ 198 | self.retry_after_seconds: Optional[float] = None 199 | with contextlib.suppress(Exception): 200 | self.retry_after_seconds = float(self.exc.error_message.split("Please retry after", 1)[1].split("second", 1)[0].strip()) 201 | 202 | class ServiceTimeoutError(OpenAIError): 203 | pass 204 | 205 | 206 | class ServiceUnavailableError(OpenAIError): 207 | pass 208 | 209 | 210 | class InvalidAPIType(OpenAIError): 211 | pass 212 | 213 | 214 | class InvalidMaxTokens(InvalidRequestError): 215 | pass 216 | 217 | def post_init(self, **kwargs): 218 | """ 219 | Gets the maximum context length and requested max tokens 220 | """ 221 | self.maximum_context_length: Optional[int] = None 222 | self.requested_max_tokens: Optional[int] = None 223 | with contextlib.suppress(Exception): 224 | self.maximum_context_length = int(self.exc.error_message.split("maximum context length is", 1)[1].split(" ", 1)[0].strip()) 225 | self.requested_max_tokens = int(self.exc.error_message.split("requested", 1)[1].split(" ", 1)[0].strip()) 226 | 227 | 228 | def fatal_exception(exc) -> bool: 229 | """ 230 | Checks if the exception is fatal. 231 | """ 232 | print(f"Checking if exception is fatal: {exc} ({type(exc)} = ({type(exc) == aiohttpx.ReadTimeout} is readtimeout)) ") 233 | 234 | if isinstance(exc, aiohttpx.ReadTimeout) or type(exc) == aiohttpx.ReadTimeout: 235 | return True 236 | 237 | if not isinstance(exc, OpenAIError): 238 | # retry on all other errors (eg. network) 239 | return False 240 | 241 | # retry on server errors and client errors 242 | # with 429 status code (rate limited), 243 | # with 400, 404, 415 status codes (invalid request), 244 | # 400 can include invalid parameters, such as invalid `max_tokens` 245 | # don't retry on other client errors 246 | if isinstance(exc, (InvalidMaxTokens, InvalidRequestError, MaxRetriesExhausted)): 247 | return True 248 | 249 | return (400 <= exc.status < 500) and exc.status not in [429, 400, 404, 415, 524] # [429, 400, 404, 415] 250 | 251 | 252 | def error_handler( 253 | response: aiohttpx.Response, 254 | data: Optional[Any] = None, # Line specific for streaming responses 255 | should_retry: Optional[bool] = False, 256 | **kwargs 257 | ): 258 | 259 | if response.status_code == 503: 260 | raise ServiceUnavailableError( 261 | response = response, 262 | message = "The server is overloaded or not ready yet.", 263 | data = data, 264 | should_retry = should_retry, 265 | **kwargs 266 | ) 267 | if response.status_code == 429: 268 | raise RateLimitError( 269 | response = response, 270 | data = data, 271 | should_retry = should_retry, 272 | **kwargs 273 | ) 274 | if response.status_code in [400, 404, 415]: 275 | if 'maximum context length' in response.text: 276 | return InvalidMaxTokens( 277 | response = response, 278 | data = data, 279 | should_retry = False, 280 | **kwargs 281 | ) 282 | return InvalidRequestError( 283 | response = response, 284 | data = data, 285 | should_retry = should_retry, 286 | **kwargs 287 | ) 288 | if response.status_code == 401: 289 | return AuthenticationError( 290 | response = response, 291 | data = data, 292 | should_retry = should_retry, 293 | **kwargs 294 | ) 295 | if response.status_code == 403: 296 | return PermissionError( 297 | response = response, 298 | data = data, 299 | should_retry = should_retry, 300 | **kwargs 301 | ) 302 | if response.status_code == 409: 303 | return TryAgain( 304 | response = response, 305 | data = data, 306 | should_retry = should_retry, 307 | **kwargs 308 | ) 309 | 310 | # Service is likely down. 311 | if response.status_code == 524: 312 | raise ServiceTimeoutError( 313 | response = response, 314 | data = data, 315 | should_retry = False, 316 | **kwargs 317 | ) 318 | 319 | raise APIError( 320 | response = response, 321 | data = data, 322 | should_retry = should_retry, 323 | **kwargs 324 | ) 325 | 326 | 327 | 328 | class MaxRetriesExceeded(Exception): 329 | def __init__( 330 | self, 331 | attempts: int, 332 | base_exception: OpenAIError, 333 | name: Optional[str] = None, 334 | ): 335 | self.name = name 336 | self.attempts = attempts 337 | self.ex = base_exception 338 | 339 | def __str__(self): 340 | return f"[{self.name}] Max {self.attempts} retries exceeded: {str(self.ex)}" 341 | 342 | 343 | @property 344 | def user_message(self): 345 | """ 346 | Returns the error message. 347 | """ 348 | return f"[{self.name}] Max {self.attempts} retries exceeded: {self.ex.user_message}" 349 | 350 | def __repr__(self): 351 | """ 352 | Returns the string representation of the error. 353 | """ 354 | return f"[{self.name}] {repr(self.ex)} (attempts={self.attempts})" 355 | -------------------------------------------------------------------------------- /async_openai/types/options.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Optional, Union 3 | from lazyops.types import BaseModel, lazyproperty 4 | 5 | """ 6 | Pricing Options 7 | 8 | # https://openai.com/api/pricing/ 9 | """ 10 | 11 | _image_prices = { 12 | 'small': 0.016, 13 | 'medium': 0.018, 14 | 'large': 0.02, 15 | } 16 | 17 | # price per 1k/tokens 18 | _completion_prices = { 19 | 'ada': 0.0004, 20 | 'babbage': 0.0005, 21 | 'curie': 0.002, 22 | 'davinci': 0.02, 23 | } 24 | 25 | _finetune_training_prices = { 26 | 'ada': 0.0004, 27 | 'babbage': 0.0006, 28 | 'curie': 0.003, 29 | 'davinci': 0.03, 30 | } 31 | 32 | _finetune_usage_prices = { 33 | 'ada': 0.0016, 34 | 'babbage': 0.0024, 35 | 'curie': 0.012, 36 | 'davinci': 0.12, 37 | } 38 | 39 | _embedding_prices = { 40 | # 'ada': 0.004, 41 | 'ada': 0.0001, 42 | 'babbage': 0.005, 43 | 'curie': 0.02, 44 | 'davinci': 0.2, 45 | } 46 | 47 | _chat_prices = { 48 | 'gpt-3.5-turbo': 0.002, 49 | 50 | } 51 | 52 | _chat_gpt_prices = { 53 | 'gpt-4-32k': { 54 | 'prompt': 0.06, 55 | 'completion': 0.12, 56 | }, 57 | 'gpt-3.5-turbo-16k': { 58 | 'prompt': 0.003, 59 | 'completion': 0.004, 60 | }, 61 | 'gpt-35-turbo-16k': { 62 | 'prompt': 0.003, 63 | 'completion': 0.004, 64 | }, 65 | 'gpt-3-turbo-16k': { 66 | 'prompt': 0.003, 67 | 'completion': 0.004, 68 | }, 69 | 'gpt-4': { 70 | 'prompt': 0.03, 71 | 'completion': 0.06, 72 | }, 73 | 'gpt-3.5-turbo': { 74 | 'prompt': 0.0015, 75 | 'completion': 0.002, 76 | }, 77 | 'gpt-35-turbo': { 78 | 'prompt': 0.0015, 79 | 'completion': 0.002, 80 | }, 81 | 'gpt-3-turbo': { 82 | 'prompt': 0.0015, 83 | 'completion': 0.002, 84 | }, 85 | 'gpt-3.5-turbo-instruct': { 86 | 'prompt': 0.0015, 87 | 'completion': 0.002, 88 | }, 89 | 'gpt-35-turbo-instruct': { 90 | 'prompt': 0.0015, 91 | 'completion': 0.002, 92 | }, 93 | 'gpt-3-turbo-instruct': { 94 | 'prompt': 0.0015, 95 | 'completion': 0.002, 96 | }, 97 | 98 | } 99 | 100 | # TODO rework this module 101 | 102 | _cost_modes = { 103 | 'embedding': _embedding_prices, 104 | 'train': _finetune_training_prices, 105 | 'finetune': _finetune_usage_prices, 106 | 'completion': _completion_prices, 107 | } 108 | 109 | def get_arch( 110 | model_name: str, 111 | ) -> str: 112 | """ 113 | Get the arch 114 | """ 115 | for arch in { 116 | 'babbage', 117 | 'curie', 118 | 'davinci', 119 | 'ada', 120 | }: 121 | if arch in model_name: 122 | return arch 123 | 124 | def get_consumption_cost( 125 | model_name: str, 126 | total_tokens: int = 1, 127 | default_token_cost: Optional[float] = 0.00001, 128 | prompt_tokens: Optional[int] = None, 129 | completion_tokens: Optional[int] = None, 130 | mode: Optional[str] = None, 131 | ) -> float: 132 | """ 133 | Returns the total cost of the model 134 | usage 135 | """ 136 | if prompt_tokens and completion_tokens: 137 | total_tokens = prompt_tokens + completion_tokens 138 | if (not mode or mode == 'chat') and any( 139 | arch in model_name for arch in { 140 | 'gpt-3.5', 141 | 'gpt-35', 142 | 'gpt-3', 143 | 'gpt-4', 144 | }): 145 | return next( 146 | ( 147 | ( 148 | prompt_tokens * (_chat_gpt_prices[gpt_model]['prompt'] / 1000) 149 | ) 150 | + ( 151 | completion_tokens * (_chat_gpt_prices[gpt_model]['completion'] / 1000) 152 | ) 153 | if prompt_tokens and completion_tokens 154 | else total_tokens 155 | * ( 156 | ( 157 | (_chat_gpt_prices[gpt_model]['prompt'] + _chat_gpt_prices[gpt_model]['completion']) / 2 158 | ) 159 | / 1000 160 | ) 161 | for gpt_model in _chat_gpt_prices 162 | if gpt_model in model_name 163 | ), 164 | total_tokens * (_chat_prices['gpt-3.5-turbo'] / 1000), 165 | ) 166 | 167 | arch = get_arch(model_name) 168 | return total_tokens * (_cost_modes[mode][arch] / 1000) if mode in _cost_modes else total_tokens * default_token_cost 169 | 170 | 171 | 172 | class ApiType(str, Enum): 173 | azure = "azure" 174 | openai = "openai" 175 | open_ai = "openai" 176 | azure_ad = "azure_ad" 177 | azuread = "azure_ad" 178 | 179 | def get_version( 180 | self, 181 | version: Optional[str] = None 182 | ): 183 | if self.value in {"azure", "azure_ad", "azuread"} and not version: 184 | return "2023-07-01-preview" 185 | return version 186 | 187 | 188 | class FilePurpose(str, Enum): 189 | """ 190 | File Purpose 191 | """ 192 | 193 | finetune = "fine-tune" 194 | fine_tune = "fine-tune" 195 | train = "fine-tune-train" 196 | search = "search" 197 | batch = "batch" 198 | 199 | @classmethod 200 | def parse_str(cls, value: Union[str, 'FilePurpose'], raise_error: bool = True): 201 | if isinstance(value, cls): return value 202 | if "train" in value: 203 | return cls.train 204 | elif "finetune" in value: 205 | return cls.finetune 206 | elif "fine-tune" in value: 207 | return cls.fine_tune 208 | elif "search" in value: 209 | return cls.search 210 | elif "batch" in value: 211 | return cls.batch 212 | if not raise_error: return None 213 | raise ValueError(f"Cannot convert {value} to FilePurpose") 214 | 215 | 216 | class OpenAIModelType(str, Enum): 217 | """ 218 | OpenAI Model Types 219 | """ 220 | text = "text" 221 | audio = "audio" 222 | code = "code" 223 | chat = "chat" 224 | custom = "custom" 225 | 226 | @classmethod 227 | def parse(cls, value: Union[str, 'OpenAIModelType'], raise_error: bool = True): 228 | if isinstance(value, cls): return value 229 | if "text" in value: 230 | return cls.text 231 | elif "audio" in value: 232 | return cls.audio 233 | elif "code" in value: 234 | return cls.code 235 | elif "gpt-3.5" in value or "gpt-4" in value or "chat" in value: 236 | return cls.chat 237 | return cls.custom 238 | 239 | 240 | class OpenAIModelArch(str, Enum): 241 | """ 242 | OpenAI Model Architectures 243 | """ 244 | 245 | davinci = "davinci" 246 | curie = "curie" 247 | babbage = "babbage" 248 | ada = "ada" 249 | chat = "gpt-3.5" 250 | chat_gpt4 = "gpt-4" 251 | custom = "custom" 252 | 253 | @classmethod 254 | def parse(cls, value: Union[str, 'OpenAIModelArch'], raise_error: bool = True): 255 | if isinstance(value, cls): return value 256 | if "davinci" in value: 257 | return cls.davinci 258 | elif "curie" in value: 259 | return cls.curie 260 | elif "babbage" in value: 261 | return cls.babbage 262 | elif "ada" in value: 263 | return cls.ada 264 | elif "gpt-4" in value: 265 | return cls.chat_gpt4 266 | elif "gpt-3.5" in value or "gpt-3" in value or "chat" in value: 267 | return cls.chat 268 | return cls.custom 269 | 270 | 271 | @lazyproperty 272 | def model_version(self): 273 | return "003" if self.value == "davinci" else "001" 274 | 275 | @lazyproperty 276 | def edit_model(self): 277 | return f"text-{self.value}-edit-{self.model_version}" 278 | 279 | @lazyproperty 280 | def completion_model(self): 281 | return f"text-{self.value}-{self.model_version}" 282 | 283 | @lazyproperty 284 | def embedding_model(self): 285 | return f"text-similarity-{self.value}-{self.model_version}" 286 | 287 | @lazyproperty 288 | def chat_model(self): 289 | return 'gpt-3.5-turbo' if self.value == 'gpt-3.5' else self.value 290 | 291 | @lazyproperty 292 | def finetune_model(self): 293 | return self.value 294 | 295 | 296 | class ModelMode(str, Enum): 297 | """ 298 | Model Mode 299 | """ 300 | 301 | completion = "completion" 302 | edit = "edit" 303 | finetune = "finetune" 304 | fine_tune = "finetune" 305 | train = "train" 306 | embedding = "embedding" 307 | similiarity = "similiarity" 308 | search = "search" 309 | chat = "chat" 310 | 311 | @classmethod 312 | def parse(cls, value: Union[str, 'ModelMode'], raise_error: bool = True): 313 | if isinstance(value, cls): return value 314 | if "completion" in value: 315 | return cls.completion 316 | if "edit" in value: 317 | return cls.edit 318 | if "finetune" in value: 319 | return cls.finetune 320 | if "fine-tune" in value: 321 | return cls.fine_tune 322 | if "train" in value: 323 | return cls.train 324 | if "embedding" in value: 325 | return cls.embedding 326 | if "search" in value: 327 | return cls.search 328 | if "similiarity" in value: 329 | return cls.similiarity 330 | if "gpt-3.5" in value or "gpt-35" in value or 'gpt-4' in value or "chat" in value: 331 | return cls.chat 332 | if "text" in value: 333 | return cls.completion 334 | if not raise_error: return None 335 | raise ValueError(f"Cannot convert {value} to ModelMode") 336 | 337 | @classmethod 338 | def get_text_modes(cls): 339 | return [ 340 | cls.completion, 341 | cls.edit, 342 | cls.embedding, 343 | cls.similiarity, 344 | cls.search, 345 | # cls.chat 346 | ] 347 | 348 | class OpenAIModel(object): 349 | 350 | def __init__( 351 | self, 352 | value: str, 353 | **kwargs 354 | ): 355 | self.src_value = value 356 | self.src_splits = value.split("-") 357 | self.mode: ModelMode = kwargs.get("mode") 358 | self.model_arch: OpenAIModelArch = kwargs.get("model_arch") 359 | self.model_type: OpenAIModelType = kwargs.get("model_type") 360 | self.version: str = kwargs.get("version") 361 | self.parse_values() 362 | 363 | def parse_values(self): 364 | """ 365 | Parse the source values into the correct parts 366 | """ 367 | self.mode = ModelMode.parse((self.mode or self.src_value), raise_error = False) or ModelMode.completion 368 | self.model_arch = OpenAIModelArch.parse((self.model_arch or self.src_value), raise_error = False) 369 | self.model_type = OpenAIModelType.parse( 370 | (self.model_type or \ 371 | ("text" if self.mode in ModelMode.get_text_modes() else self.src_value) 372 | ), raise_error = False) 373 | if not self.version: 374 | ver_values = [x for x in self.src_splits if (x[0].isdigit() and x[-1].isdigit())] 375 | if ver_values: 376 | self.version = '-'.join(ver_values) 377 | if self.mode in {ModelMode.chat}: 378 | if self.version in {'35', '3.5', '4', '3', '16k', '32k'}: 379 | self.version = None 380 | else: 381 | self.version = self.version.rsplit('-', 1)[-1] 382 | 383 | elif self.mode == ModelMode.completion: 384 | self.version = "003" if self.model_arch == "davinci" else "001" 385 | elif self.mode == ModelMode.chat: 386 | pass 387 | elif self.model_type != OpenAIModelType.custom: 388 | self.version = "001" 389 | 390 | 391 | @lazyproperty 392 | def value(self) -> str: 393 | """ 394 | The value of the model 395 | """ 396 | if self.model_arch == OpenAIModelArch.custom or self.model_type == OpenAIModelType.custom: 397 | return self.src_value 398 | if self.mode == ModelMode.chat: 399 | return f'{self.src_value}-{self.version}' if self.version else self.src_value 400 | t = f'{self.model_type.value}' 401 | if self.mode != ModelMode.completion: 402 | t += f'-{self.mode.value}' 403 | t += f'-{self.model_arch.value}' 404 | if self.version: 405 | t += f'-{self.version}' 406 | return t 407 | 408 | def dict(self, *args, **kwargs): 409 | return { 410 | "value": self.value, 411 | "mode": self.mode.value, 412 | "model_arch": self.model_arch.value, 413 | "model_type": self.model_type.value, 414 | "version": self.version, 415 | } 416 | 417 | def __str__(self): 418 | return f'OpenAIModel(value="{self.value}", mode="{self.mode}", model_arch="{self.model_arch}", model_type="{self.model_type}", version="{self.version}")' 419 | 420 | def __repr__(self) -> str: 421 | return f'OpenAIModel(value="{self.value}", mode="{self.mode}", model_arch="{self.model_arch}", model_type="{self.model_type}", version="{self.version}")' 422 | 423 | def __json__(self): 424 | return self.value 425 | 426 | # def __dict__(self): 427 | # return self.value 428 | 429 | def get_cost( 430 | self, 431 | total_tokens: int = 1, 432 | mode: Optional[str] = None, 433 | raise_error: bool = True, 434 | default_token_cost: Optional[float] = 0.00001, 435 | prompt_tokens: Optional[int] = None, 436 | completion_tokens: Optional[int] = None, 437 | ) -> float: 438 | """ 439 | Returns the total cost of the model 440 | usage 441 | """ 442 | if prompt_tokens and completion_tokens: 443 | total_tokens = prompt_tokens + completion_tokens 444 | 445 | mode = mode or self.mode.value 446 | if mode in ['completion', 'edit']: 447 | return total_tokens * (_completion_prices[self.model_arch.value] / 1000) 448 | if mode in ['chat']: 449 | return next( 450 | ( 451 | ( 452 | prompt_tokens * (_chat_gpt_prices[gpt_model]['prompt'] / 1000) 453 | ) 454 | + ( 455 | completion_tokens * (_chat_gpt_prices[gpt_model]['completion'] / 1000) 456 | ) 457 | if prompt_tokens and completion_tokens 458 | else total_tokens 459 | * ( 460 | ( 461 | (_chat_gpt_prices[gpt_model]['prompt'] + _chat_gpt_prices[gpt_model]['completion']) / 2 462 | ) 463 | / 1000 464 | ) 465 | for gpt_model in _chat_gpt_prices 466 | if gpt_model in self.src_value 467 | ), 468 | total_tokens * (_chat_prices['gpt-3.5-turbo'] / 1000), 469 | ) 470 | if 'embedding' in mode: 471 | return total_tokens * (_embedding_prices[self.model_arch.value] / 1000) 472 | if 'train' in mode: 473 | return total_tokens * (_finetune_training_prices[self.model_arch.value] / 1000) 474 | if 'finetune' in mode or 'fine-tune' in mode: 475 | return total_tokens * (_finetune_usage_prices[self.model_arch.value] / 1000) 476 | if raise_error: raise ValueError(f"Invalid mode {mode}") 477 | return total_tokens * default_token_cost 478 | 479 | 480 | class EditModels(str, Enum): 481 | """ 482 | Just the base models available 483 | """ 484 | davinci = "text-davinci-edit-003" 485 | curie = "text-curie-edit-001" 486 | babbage = "text-babbage-edit-001" 487 | ada = "text-ada-edit-001" 488 | 489 | @lazyproperty 490 | def model_type(self) -> str: 491 | return self.value.split("-")[1] 492 | 493 | class EmbeddingModels(str, Enum): 494 | """ 495 | Just the base models available 496 | """ 497 | davinci = "text-similarity-davinci-003" 498 | curie = "text-similarity-curie-001" 499 | babbage = "text-similarity-babbage-001" 500 | ada = "text-similarity-ada-001" 501 | 502 | @lazyproperty 503 | def model_type(self) -> str: 504 | return self.value.split("-")[2] 505 | 506 | class CompletionModels(str, Enum): 507 | """ 508 | Just the base models available 509 | """ 510 | davinci = "text-davinci-003" 511 | curie = "text-curie-001" 512 | babbage = "text-babbage-001" 513 | ada = "text-ada-001" 514 | 515 | @lazyproperty 516 | def model_type(self) -> str: 517 | return self.value.split("-")[1] 518 | 519 | 520 | class FinetuneModels(str, Enum): 521 | """ 522 | Supported finetune models. 523 | """ 524 | ada = "ada" 525 | babbage = "babbage" 526 | curie = "curie" 527 | davici = "davici" 528 | 529 | @lazyproperty 530 | def model_type(self): 531 | return self.value 532 | 533 | class ImageSize(str, Enum): 534 | """ 535 | Size of the image 536 | """ 537 | 538 | small = "256x256" 539 | medium = "512x512" 540 | large = "1024x1024" 541 | 542 | @lazyproperty 543 | def image_type(self): 544 | if self.value == "256x256": 545 | return 'small' 546 | if self.value == "512x512": 547 | return 'medium' 548 | if self.value == "1024x1024": 549 | return 'large' 550 | raise ValueError(f"Cannot convert {self.value} to Kind") 551 | 552 | @classmethod 553 | def from_str(cls, value: str) -> "ImageSize": 554 | """ 555 | :param value: Size of the image 556 | :type value: str 557 | :return: ImageSize 558 | :rtype: ImageSize 559 | """ 560 | if value == "256x256": 561 | return cls.small 562 | if value == "512x512": 563 | return cls.medium 564 | if value == "1024x1024": 565 | return cls.large 566 | try: 567 | return cls(value) 568 | except ValueError as e: 569 | raise ValueError(f"Cannot convert {value} to ImageSize") from e 570 | 571 | def get_cost( 572 | self, 573 | total_images: int = 1, 574 | ) -> float: 575 | """ 576 | Returns the total cost of the model 577 | usage 578 | """ 579 | return total_images * _image_prices[self.value] 580 | 581 | class ImageFormat(str, Enum): 582 | """ 583 | Format of the image 584 | """ 585 | 586 | url = "url" 587 | b64 = "b64_json" 588 | b64_json = "b64_json" -------------------------------------------------------------------------------- /async_openai/types/pricing.yaml: -------------------------------------------------------------------------------- 1 | # The OpenAI Pricing 2 | # https://openai.com/api/pricing/ 3 | 4 | gpt-4-1106-preview: 5 | aliases: 6 | - gpt-4-turbo 7 | context_length: 128000 # 131072 ? 8 | costs: 9 | unit: 1000 10 | input: 0.01 11 | output: 0.03 12 | endpoints: 13 | - chat 14 | 15 | gpt-4-0125-preview: 16 | aliases: 17 | - gpt-4-turbo-preview 18 | - gpt-4-turbo-v 19 | context_length: 128000 20 | costs: 21 | unit: 1000 22 | input: 0.01 23 | output: 0.03 24 | endpoints: 25 | - chat 26 | 27 | gpt-4-turbo-2024-04-09: 28 | aliases: 29 | - gpt-4-turbo-2024 30 | - gpt-4-turbo-2024-04-09 31 | - gpt-4-2024-preview 32 | context_length: 128000 33 | costs: 34 | unit: 1_000_000 35 | input: 10.00 36 | output: 30.00 37 | endpoints: 38 | - chat 39 | 40 | gpt-4o-mini: 41 | aliases: 42 | - gpt-4-o-mini 43 | - gpt4o-mini 44 | - gpt-4o-mini-2024-07-18 45 | context_length: 128000 46 | costs: 47 | unit: 1_000_000 48 | input: 0.150 49 | output: 0.600 50 | batch_costs: 51 | unit: 1_000_000 52 | input: 0.075 53 | output: 0.300 54 | endpoints: 55 | - chat 56 | 57 | gpt-4o: 58 | aliases: 59 | - gpt-4-o 60 | - gpt4o 61 | - gpt-4o-2024-05-13 62 | context_length: 128000 63 | costs: 64 | unit: 1_000_000 65 | input: 5.00 66 | output: 15.00 67 | endpoints: 68 | - chat 69 | 70 | gpt-4: 71 | aliases: 72 | - gpt-4-0613 73 | context_length: 8192 74 | costs: 75 | unit: 1000 76 | input: 0.03 77 | output: 0.06 78 | endpoints: 79 | - chat 80 | 81 | gpt-4-32k: 82 | aliases: 83 | - gpt-4-32k-0613 84 | context_length: 32768 85 | costs: 86 | unit: 1000 87 | input: 0.06 88 | output: 0.12 89 | endpoints: 90 | - chat 91 | 92 | 93 | gpt-3.5-turbo: 94 | aliases: 95 | # - gpt-35 96 | # - gpt-35-turbo 97 | - gpt-3.5 98 | # Support previous 16k context length 99 | # - gpt-3.5-16k 100 | # - gpt-35-16k 101 | # - gpt-35-turbo-16k 102 | # - gpt-3.5-turbo-16k 103 | # - gpt-3.5-turbo-1106 104 | # - gpt-35-turbo-1106 105 | # - gpt-3.5-turbo-0301 106 | # - gpt-35-turbo-0301 107 | # - gpt-3.5-turbo-0613 108 | # - gpt-35-turbo-0613 109 | context_length: 16384 110 | costs: 111 | unit: 1000 112 | input: 0.0010 113 | output: 0.0020 114 | endpoints: 115 | - chat 116 | 117 | gpt-3.5-turbo-16k: 118 | aliases: 119 | # Support previous 16k context length 120 | - gpt-3.5-16k 121 | # - gpt-35-16k 122 | # - gpt-35-turbo-16k 123 | - gpt-3.5-turbo-16k 124 | - gpt-3.5-turbo-16k-0613 125 | context_length: 16384 126 | costs: 127 | unit: 1000 128 | input: 0.0010 129 | output: 0.0020 130 | endpoints: 131 | - chat 132 | 133 | gpt-3.5-turbo-1106: 134 | aliases: 135 | - gpt-3.5-1106 136 | # - gpt-35-1106 137 | # - gpt-35-turbo-1106 138 | context_length: 16384 139 | costs: 140 | unit: 1000 141 | input: 0.0010 142 | output: 0.0020 143 | endpoints: 144 | - chat 145 | 146 | gpt-3.5-turbo-0301: 147 | aliases: 148 | - gpt-3.5-0301 149 | # - gpt-35-0301 150 | # - gpt-35-turbo-0301 151 | context_length: 16384 152 | costs: 153 | unit: 1000 154 | input: 0.0010 155 | output: 0.0020 156 | endpoints: 157 | - chat 158 | 159 | gpt-3.5-turbo-0613: 160 | aliases: 161 | - gpt-3.5-0613 162 | # - gpt-35-0613 163 | # - gpt-35-turbo-0613 164 | context_length: 16384 165 | costs: 166 | unit: 1000 167 | input: 0.0010 168 | output: 0.0020 169 | endpoints: 170 | - chat 171 | 172 | gpt-3.5-turbo-0125: 173 | aliases: 174 | - gpt-3.5-0125 175 | context_length: 16384 176 | costs: 177 | unit: 1000 178 | input: 0.0005 179 | output: 0.0015 180 | endpoints: 181 | - chat 182 | 183 | gpt-3.5-turbo-instruct: 184 | aliases: 185 | - gpt-3.5-instruct 186 | # - gpt-35-instruct 187 | # - gpt-35-turbo-instruct 188 | context_length: 4096 189 | costs: 190 | unit: 1000 191 | input: 0.0015 192 | output: 0.0020 193 | endpoints: 194 | - completion 195 | 196 | text-embedding-ada-002: 197 | aliases: 198 | - ada 199 | - ada-v2 200 | - text-embedding-ada 201 | context_length: 8191 202 | costs: 203 | unit: 1000 204 | input: 0.0001 205 | endpoints: 206 | - embeddings 207 | 208 | text-embedding-3-small: 209 | aliases: 210 | - t3small 211 | - t3-small 212 | context_length: 8191 213 | costs: 214 | unit: 1000 215 | input: 0.00002 216 | endpoints: 217 | - embeddings 218 | 219 | text-embedding-3-large: 220 | aliases: 221 | - t3large 222 | - t3-large 223 | context_length: 8191 224 | costs: 225 | unit: 1000 226 | input: 0.00013 227 | endpoints: 228 | - embeddings 229 | 230 | -------------------------------------------------------------------------------- /async_openai/types/resources.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import aiohttpx 4 | import datetime 5 | import tempfile 6 | import pathlib 7 | from pydantic import ConfigDict 8 | from pydantic.types import ByteSize 9 | from lazyops.types import BaseModel, validator, lazyproperty 10 | from lazyops.types.models import get_pyd_field_names, pyd_parse_obj, get_pyd_dict, _BaseModel 11 | from lazyops.utils import ObjectDecoder, ObjectEncoder 12 | from async_openai.utils.logs import logger 13 | from async_openai.utils.helpers import aparse_stream, parse_stream 14 | 15 | from async_openai.types.options import FilePurpose 16 | 17 | from typing import Dict, Optional, Any, List, Type, Union, Tuple, Iterator, AsyncIterator, TYPE_CHECKING 18 | 19 | 20 | try: 21 | from fileio import File, FileType 22 | _has_fileio = True 23 | except ImportError: 24 | from pathlib import Path as File 25 | FileType = Union[File, str, os.PathLike] 26 | _has_fileio = False 27 | 28 | 29 | __all__ = [ 30 | 'BaseResource', 31 | 'Permission', 32 | 'Usage', 33 | 'FileObject', 34 | 'EventObject', 35 | 'FileResource', 36 | ] 37 | 38 | VALID_SEND_KWARGS = [ 39 | 'method', 40 | 'url', 41 | 'content', 42 | 'data', 43 | 'files', 44 | 'json', 45 | 'params', 46 | 'headers', 47 | 'cookies', 48 | 'auth', 49 | 'follow_redirects', 50 | 'timeout', 51 | 'extensions', 52 | ] 53 | 54 | 55 | class Usage(BaseModel): 56 | prompt_tokens: Optional[int] = 0 57 | completion_tokens: Optional[int] = 0 58 | total_tokens: Optional[int] = 0 59 | 60 | # @lazyproperty 61 | @property 62 | def consumption(self) -> int: 63 | """ 64 | Gets the consumption 65 | """ 66 | return self.total_tokens 67 | 68 | def update(self, usage: Union['Usage', Dict[str, int]]): 69 | """ 70 | Updates the consumption 71 | """ 72 | if isinstance(usage, Usage): 73 | if usage.prompt_tokens: self.prompt_tokens += usage.prompt_tokens 74 | if usage.completion_tokens: self.completion_tokens += usage.completion_tokens 75 | if usage.total_tokens: self.total_tokens += usage.total_tokens 76 | return 77 | 78 | if usage.get('prompt_tokens'): self.prompt_tokens += usage.get('prompt_tokens') 79 | if usage.get('completion_tokens'): self.completion_tokens += usage.get('completion_tokens') 80 | if usage.get('total_tokens'): self.total_tokens += usage.get('total_tokens') 81 | 82 | def __iadd__(self, other: Union['Usage', Dict[str, int]]): 83 | """ 84 | Adds the usage 85 | """ 86 | self.update(other) 87 | return self.consumption 88 | 89 | 90 | class BaseResource(BaseModel): 91 | 92 | """ 93 | Base Object class for resources to 94 | inherit from 95 | """ 96 | 97 | # model_config = ConfigDict(extra = 'allow', arbitrary_types_allowed = True) 98 | # def get(self, name, default: Any = None): 99 | # """ 100 | # Get an attribute from the model 101 | # """ 102 | # return getattr(self, name, default) 103 | 104 | if TYPE_CHECKING: 105 | id: Optional[str] 106 | file_id: Optional[str] 107 | fine_tune_id: Optional[str] 108 | model_id: Optional[str] 109 | completion_id: Optional[str] 110 | openai_id: Optional[str] 111 | model: Optional[str] 112 | 113 | 114 | @lazyproperty 115 | def resource_id(self): 116 | """ 117 | Returns the resource id 118 | """ 119 | if hasattr(self, 'id'): 120 | return self.id 121 | if hasattr(self, 'file_id'): 122 | return self.file_id 123 | if hasattr(self, 'fine_tune_id'): 124 | return self.fine_tune_id 125 | if hasattr(self, 'model_id'): 126 | return self.model_id 127 | if hasattr(self, 'completion_id'): 128 | return self.completion_id 129 | return self.openai_id if hasattr(self, 'openai_id') else None 130 | 131 | @classmethod 132 | def parse_obj( 133 | cls, 134 | obj: Any, 135 | strict: Optional[bool] = False, 136 | from_attributes: Optional[bool] = True, 137 | **kwargs 138 | ) -> 'BaseResource': 139 | """ 140 | Parses an object into the resource 141 | """ 142 | #return cls(**obj) 143 | # logger.info(f"Obj: {cls}: {obj}") 144 | return pyd_parse_obj(cls, obj, strict = strict, from_attributes = from_attributes, **kwargs) 145 | 146 | @staticmethod 147 | def create_resource( 148 | resource: Type['BaseResource'], 149 | **kwargs 150 | ) -> Tuple['BaseResource', Dict]: 151 | """ 152 | Extracts the resource from the kwargs and returns the resource 153 | and the remaining kwargs 154 | """ 155 | resource_fields = get_pyd_field_names(resource) 156 | resource_kwargs = {k: v for k, v in kwargs.items() if k in resource_fields} 157 | return_kwargs = {k: v for k, v in kwargs.items() if k not in resource_fields} 158 | resource_obj = resource.parse_obj(resource_kwargs) 159 | return resource_obj, return_kwargs 160 | 161 | 162 | @staticmethod 163 | def create_batch_resource( 164 | resource: Type['BaseResource'], 165 | batch: List[Union[Dict[str, Any], Any]], 166 | **kwargs 167 | ) -> Tuple[List['BaseResource'], Dict]: 168 | """ 169 | Extracts the resource from the kwargs and returns the resource 170 | and the remaining kwargs 171 | """ 172 | resource_fields = get_pyd_field_names(resource) 173 | resource_kwargs = {k: v for k, v in kwargs.items() if k in resource_fields} 174 | return_kwargs = {k: v for k, v in kwargs.items() if k not in resource_fields} 175 | resource_objs = [] 176 | for item in batch: 177 | if isinstance(item, dict): 178 | item.update(resource_kwargs) 179 | resource_objs.append(resource.parse_obj(item)) 180 | else: 181 | resource_objs.append(item) 182 | return resource_objs, return_kwargs 183 | 184 | @classmethod 185 | def create_many(cls, data: List[Dict]) -> List['BaseResource']: 186 | """ 187 | Creates many resources 188 | """ 189 | return [cls.parse_obj(d) for d in data] 190 | 191 | @staticmethod 192 | def handle_json( 193 | content: Any, 194 | **kwargs 195 | ) -> Union[Dict, List]: 196 | """ 197 | Handles the json response 198 | """ 199 | return json.loads(content, cls = ObjectDecoder, **kwargs) 200 | 201 | 202 | @staticmethod 203 | def handle_stream( 204 | response: aiohttpx.Response, 205 | streaming: Optional[bool] = False, 206 | ) -> Iterator[Dict]: 207 | """ 208 | Handles the stream response 209 | """ 210 | for line in parse_stream(response): 211 | if not line.strip(): continue 212 | try: 213 | yield json.loads(line) 214 | except Exception as e: 215 | logger.error(f'Error: {line}: {e}') 216 | 217 | @staticmethod 218 | async def ahandle_stream( 219 | response: aiohttpx.Response, 220 | streaming: Optional[bool] = False, 221 | ) -> AsyncIterator[Dict]: 222 | """ 223 | Handles the stream response 224 | """ 225 | async for line in aparse_stream(response): 226 | if not line.strip(): continue 227 | try: 228 | yield json.loads(line) 229 | except Exception as e: 230 | logger.error(f'Error: {line}: {e}') 231 | 232 | 233 | def __getitem__(self, key: str) -> Any: 234 | """ 235 | Mimic dict 236 | """ 237 | return getattr(self, key) 238 | 239 | 240 | class Permission(BaseResource): 241 | id: str 242 | object: str 243 | created: datetime.datetime 244 | allow_create_engine: bool 245 | allow_sampling: bool 246 | allow_logprobs: bool 247 | allow_search_indices: bool 248 | allow_view: bool 249 | allow_fine_tuning: bool 250 | organization: str 251 | group: Optional[str] 252 | is_blocking: bool 253 | 254 | @property 255 | def since_seconds(self): 256 | return (datetime.datetime.now(datetime.timezone.utc) - self.created).total_seconds() 257 | 258 | 259 | class FileObject(BaseResource): 260 | id: str 261 | object: Optional[str] = 'file' 262 | bytes: Optional[ByteSize] 263 | created_at: Optional[datetime.datetime] 264 | filename: Optional[str] 265 | purpose: Optional[FilePurpose] = FilePurpose.fine_tune 266 | 267 | @validator("created_at") 268 | def validate_created_at(cls, value): 269 | return datetime.datetime.fromtimestamp(value, datetime.timezone.utc) if value else value 270 | 271 | @classmethod 272 | def create_many(cls, data: List[Dict]) -> List['FileObject']: 273 | """ 274 | Creates many resources 275 | """ 276 | return [cls.parse_obj(d) for d in data] 277 | 278 | class EventObject(BaseResource): 279 | object: Optional[str] 280 | created_at: Optional[datetime.datetime] 281 | level: Optional[str] 282 | message: Optional[str] 283 | 284 | @property 285 | def since_seconds(self) -> int: 286 | if self.created_at is None: return -1 287 | return (datetime.datetime.now(datetime.timezone.utc) - self.created_at).total_seconds() 288 | 289 | 290 | class FileResource(BaseResource): 291 | file: Optional[Union[str, FileType, Any]] 292 | file_id: Optional[str] 293 | filename: Optional[str] = None 294 | purpose: FilePurpose = FilePurpose.fine_tune 295 | model: Optional[str] = None 296 | 297 | @validator("purpose") 298 | def validate_purpose(cls, value): 299 | return FilePurpose.parse_str(value) if isinstance(value, str) else value 300 | 301 | def get_params(self, **kwargs) -> List: 302 | """ 303 | Transforms the data to the req params 304 | """ 305 | files = [("purpose", (None, self.purpose.value))] 306 | if self.purpose == FilePurpose.search and self.model: 307 | files.append(("model", (None, self.model))) 308 | if self.file: 309 | file = File(self.file) 310 | files.append( 311 | ("file", (self.filename or file.name, file.read_bytes(), "application/octet-stream")) 312 | ) 313 | return files 314 | 315 | async def async_get_params(self, **kwargs) -> List: 316 | """ 317 | Transforms the data to the req params 318 | """ 319 | files = [("purpose", (None, self.purpose.value))] 320 | if self.purpose == FilePurpose.search and self.model: 321 | files.append(("model", (None, self.model))) 322 | if self.file: 323 | file = File(self.file) 324 | files.append( 325 | ("file", (self.filename or file.name, (await file.async_read_bytes() if _has_fileio else file.read_bytes()), "application/octet-stream")) 326 | ) 327 | return files 328 | 329 | @classmethod 330 | def create_from_batch( 331 | cls, 332 | batch: List[Union[Dict[str, Any], str]], 333 | output_path: Optional[str] = None, 334 | file_id: Optional[str] = None, 335 | filename: Optional[str] = None, 336 | purpose: Optional[FilePurpose] = None, 337 | **kwargs, 338 | ) -> Tuple['FileObject', Dict[str, Any]]: 339 | """ 340 | Creates a file object from a batch in jsonl format 341 | """ 342 | for n, b in enumerate(batch): 343 | if isinstance(b, dict): 344 | batch[n] = json.dumps(b, cls = ObjectEncoder) 345 | if output_path: 346 | output = pathlib.Path(output_path) 347 | else: 348 | tmp = tempfile.NamedTemporaryFile(delete = False) 349 | tmp.close() 350 | output = pathlib.Path(tmp.name) 351 | 352 | with output.open('w') as f: 353 | for b in batch: 354 | f.write(f'{b}\n') 355 | resource_fields = get_pyd_field_names(cls) 356 | resource_kwargs = {k: v for k, v in kwargs.items() if k in resource_fields} 357 | return_kwargs = {k: v for k, v in kwargs.items() if k not in resource_fields} 358 | return cls( 359 | file = output, 360 | purpose = purpose, 361 | filename = filename, 362 | file_id = file_id, 363 | **resource_kwargs 364 | ), return_kwargs 365 | 366 | 367 | -------------------------------------------------------------------------------- /async_openai/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from async_openai.utils.logs import logger 4 | from async_openai.utils.helpers import ( 5 | is_naive, 6 | total_seconds, 7 | remove_trailing_slash, 8 | parse_stream, 9 | aparse_stream, 10 | 11 | ) 12 | from async_openai.utils.config import ( 13 | OpenAISettings, 14 | get_settings 15 | ) 16 | 17 | from async_openai.utils.tokenization import ( 18 | modelname_to_contextsize, 19 | get_token_count, 20 | get_max_tokens, 21 | get_chat_tokens_count, 22 | get_max_chat_tokens, 23 | fast_tokenize, 24 | ) 25 | 26 | from async_openai.utils.resolvers import fix_json -------------------------------------------------------------------------------- /async_openai/utils/embedding.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | """ 4 | Embedding Utility Helpers 5 | 6 | These are borrowed from the `openai` experiments library 7 | 8 | - We specifically use lazy loading to avoid runtime errors if the user does not have the required dependencies 9 | """ 10 | 11 | from lazyops.libs import lazyload as lz 12 | from lazyops.libs.proxyobj import ProxyObject 13 | from lazyops.types.common import Literal 14 | from typing import Dict, Callable, List, Union, Optional 15 | 16 | if lz.TYPE_CHECKING: 17 | from scipy import spatial 18 | import numpy as np 19 | from numpy import ndarray 20 | else: 21 | spatial = lz.LazyLoad("scipy.spatial") 22 | np = lz.LazyLoad("numpy") 23 | 24 | def _initialize_distance_dict(*args, **kwargs) -> Dict[str, Callable[..., float]]: 25 | """ 26 | Initializes the distance dictionary 27 | """ 28 | return { 29 | "cosine": spatial.distance.cosine, 30 | "euclidean": spatial.distance.euclidean, 31 | "inner_product": lambda x, y: -np.dot(x, y), 32 | "L1": spatial.distance.cityblock, 33 | "L2": spatial.distance.euclidean, 34 | "Linf": spatial.distance.chebyshev, 35 | } 36 | 37 | 38 | distance_metrics: Dict[str, Callable[..., float]] = ProxyObject(obj_getter = _initialize_distance_dict) 39 | 40 | MetricT = Literal["cosine", "L1", "L2", "Linf"] 41 | 42 | 43 | def distances_from_embeddings( 44 | query_embedding: List[float], 45 | embeddings: List[List[float]], 46 | distance_metric: Optional[MetricT] = "cosine", 47 | ) -> List[List]: 48 | """ 49 | Return the distances between a query embedding and a list of embeddings. 50 | """ 51 | return [ 52 | distance_metrics[distance_metric](query_embedding, embedding) 53 | for embedding in embeddings 54 | ] 55 | 56 | 57 | def indices_of_nearest_neighbors_from_distances( 58 | distances: 'ndarray', 59 | reverse: Optional[bool] = False, 60 | ) -> 'ndarray': 61 | """ 62 | Return a list of indices of nearest neighbors from a list of distances. 63 | """ 64 | return np.argsort(distances)[::-1] if reverse else np.argsort(distances) 65 | # if reverse: distances = distances[::-1] 66 | # return np.argsort(distances) -------------------------------------------------------------------------------- /async_openai/utils/helpers.py: -------------------------------------------------------------------------------- 1 | 2 | import random 3 | import inspect 4 | import aiohttpx 5 | import bisect 6 | import itertools 7 | 8 | from datetime import datetime, timedelta 9 | 10 | from typing import Dict, Optional, Iterator, AsyncIterator, Union, List, Tuple 11 | from lazyops.utils.helpers import timed, timer, is_coro_func 12 | 13 | __all__ = [ 14 | 'is_naive', 15 | 'total_seconds', 16 | 'remove_trailing_slash', 17 | 'full_name', 18 | 'merge_dicts', 19 | 'is_coro_func', 20 | 'timed', 21 | 'timer', 22 | 'parse_stream', 23 | 'aparse_stream', 24 | ] 25 | 26 | 27 | def merge_dicts(x: Dict, y: Dict): 28 | z = x.copy() 29 | z.update(y) 30 | return z 31 | 32 | 33 | def full_name(func, follow_wrapper_chains=True): 34 | """ 35 | Return full name of `func` by adding the module and function name. 36 | 37 | If this function is decorated, attempt to unwrap it till the original function to use that 38 | function name by setting `follow_wrapper_chains` to True. 39 | """ 40 | if follow_wrapper_chains: func = inspect.unwrap(func) 41 | return f'{func.__module__}.{func.__qualname__}' 42 | 43 | def is_naive(dt: datetime): 44 | """Determines if a given datetime.datetime is naive.""" 45 | return dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None 46 | 47 | 48 | def total_seconds(delta: timedelta): 49 | """Determines total seconds with python < 2.7 compat.""" 50 | # http://stackoverflow.com/questions/3694835/python-2-6-5-divide-timedelta-with-timedelta 51 | return (delta.microseconds + (delta.seconds + delta.days * 24 * 3600) * 1e6) / 1e6 52 | 53 | 54 | def remove_trailing_slash(host: str): 55 | """ 56 | Removes trailing slash from a host if it exists. 57 | """ 58 | return host[:-1] if host.endswith("/") else host 59 | 60 | 61 | def parse_stream_line_bytes(line: bytes) -> Optional[str]: 62 | """ 63 | Parse a line from a Server-Sent Events stream. 64 | """ 65 | if line: 66 | if line.strip() == b"data: [DONE]": 67 | # return here will cause GeneratorExit exception in urllib3 68 | # and it will close http connection with TCP Reset 69 | return None 70 | if line.startswith(b"data: "): 71 | line = line[len(b"data: "):] 72 | return line.decode("utf-8") 73 | else: 74 | return None 75 | return None 76 | 77 | 78 | def parse_stream_line_string(line: str) -> Optional[str]: 79 | """ 80 | Parse a line from a Server-Sent Events stream. 81 | """ 82 | if line: 83 | if line.strip() == "data: [DONE]": 84 | # return here will cause GeneratorExit exception in urllib3 85 | # and it will close http connection with TCP Reset 86 | return None 87 | return line[len("data: "):] if line.startswith("data: ") else None 88 | return None 89 | 90 | def parse_stream_line(line: Union[str, bytes]) -> Optional[str]: 91 | """ 92 | Parse a line from a Server-Sent Events stream. 93 | """ 94 | if isinstance(line, bytes): 95 | return parse_stream_line_bytes(line) 96 | elif isinstance(line, str): 97 | return parse_stream_line_string(line) 98 | else: 99 | raise TypeError("line must be str or bytes") 100 | 101 | 102 | def parse_stream(response: aiohttpx.Response) -> Iterator[str]: 103 | """ 104 | Parse a Server-Sent Events stream. 105 | """ 106 | for line in response.iter_lines(): 107 | _line = parse_stream_line(line) 108 | if _line is not None: 109 | yield _line 110 | 111 | async def aparse_stream(response: aiohttpx.Response) -> AsyncIterator[str]: 112 | """ 113 | Parse a Server-Sent Events stream. 114 | """ 115 | async for line in response.aiter_lines(): 116 | _line = parse_stream_line(line) 117 | if _line is not None: 118 | yield _line 119 | 120 | 121 | def weighted_choice(choices: Union[List[Tuple[str, float]], Dict[str, float]]) -> str: 122 | """ 123 | Randomly selects a choice based on the weights provided 124 | """ 125 | if isinstance(choices, dict): 126 | choices = list(choices.items()) 127 | weights = list(zip(*choices))[1] 128 | return choices[bisect.bisect( 129 | list(itertools.accumulate(weights)), 130 | random.uniform(0, sum(weights)) 131 | )][0] 132 | 133 | -------------------------------------------------------------------------------- /async_openai/utils/logs.py: -------------------------------------------------------------------------------- 1 | import os 2 | from lazyops.utils.logs import get_logger, change_logger_level, null_logger 3 | 4 | # to prevent recursive imports, we'll just use os environ here 5 | if os.getenv('DEBUG_ENABLED') == 'True': 6 | logger_level = 'DEV' 7 | else: 8 | logger_level: str = os.getenv('LOGGER_LEVEL', 'INFO').upper() 9 | 10 | logger = get_logger(logger_level) -------------------------------------------------------------------------------- /async_openai/utils/presets/fireworks.yaml: -------------------------------------------------------------------------------- 1 | # Configuration for fireworks.ai External Provider 2 | name: fireworks 3 | config: 4 | api_base: https://api.fireworks.ai 5 | api_path: inference/v1 6 | api_key_header: Authorization 7 | api_key_scheme: Bearer 8 | api_keys: env/FIREWORK_AI_API_KEYS 9 | api_key: env/FIREWORK_AI_API_KEY 10 | hf_compatible: true 11 | routes: 12 | chat: 13 | object_class: async_openai.schemas.external.fireworks.chat.ChatObject 14 | response_class: async_openai.schemas.external.fireworks.chat.ChatResponse 15 | route_class: async_openai.schemas.external.fireworks.chat.ChatRoute 16 | 17 | models: 18 | - name: accounts/fireworks/models/firefunction-v1 19 | aliases: 20 | - firefunction-v1 21 | context_length: 32768 22 | costs: 23 | unit: 1_000_000 24 | input: 0.7 25 | output: 2.8 26 | endpoints: 27 | - chat 28 | - name: accounts/fireworks/models/bleat-adapter 29 | aliases: 30 | - fireworks-bleat 31 | context_length: 4096 32 | costs: 33 | unit: 1_000_000 34 | input: 0.7 35 | output: 2.8 36 | endpoints: 37 | - completions 38 | - name: accounts/fireworks/models/mixtral-8x7b-instruct 39 | aliases: 40 | - mixtral-8x7b 41 | - mixtral-8x7b-instruct 42 | context_length: 32768 43 | costs: 44 | unit: 1_000_000 45 | input: 0.4 46 | output: 1.6 47 | endpoints: 48 | - chat 49 | - completions 50 | -------------------------------------------------------------------------------- /async_openai/utils/presets/together.yaml: -------------------------------------------------------------------------------- 1 | # Configuration for together.xyz External Provider 2 | name: together 3 | config: 4 | api_base: https://api.together.xyz 5 | api_path: /v1 6 | api_key_header: Authorization 7 | api_key_scheme: Bearer 8 | api_keys: env/TOGETHER_API_KEYS 9 | api_key: env/TOGETHER_API_KEY 10 | max_retries: env/TOGETHER_MAX_RETRIES 11 | hf_compatible: true 12 | routes: 13 | chat: 14 | object_class: async_openai.schemas.external.together.chat.ChatObject 15 | response_class: async_openai.schemas.external.together.chat.ChatResponse 16 | route_class: async_openai.schemas.external.together.chat.ChatRoute 17 | embeddings: 18 | object_class: async_openai.schemas.external.together.embeddings.EmbeddingObject 19 | response_class: async_openai.schemas.external.together.embeddings.EmbeddingResponse 20 | route_class: async_openai.schemas.external.together.embeddings.EmbeddingRoute 21 | 22 | # routes: 23 | # completion: 24 | # path: /complete 25 | # params: 26 | # model: str 27 | # prompt: str 28 | # max_tokens: int 29 | # ... 30 | models: 31 | - name: mistralai/Mixtral-8x7B-Instruct-v0.1 32 | aliases: 33 | - mixtral-8x7b 34 | - mixtral-8x7b-instruct 35 | context_length: 32768 36 | costs: 37 | unit: 1_000_000 38 | input: 0.6 39 | output: 0.6 40 | endpoints: 41 | - chat 42 | - completions 43 | - name: mistralai/Mistral-7B-Instruct-v0.1 44 | aliases: 45 | - mistral-7b-instruct-v1 46 | context_length: 4096 47 | costs: 48 | unit: 1_000_000 49 | input: 0.2 50 | output: 0.2 51 | endpoints: 52 | - chat 53 | - completions 54 | - name: mistralai/Mistral-7B-Instruct-v0.2 55 | aliases: 56 | - mistral-7b-instruct-v2 57 | context_length: 32768 58 | costs: 59 | unit: 1_000_000 60 | input: 0.2 61 | output: 0.2 62 | endpoints: 63 | - chat 64 | - completions 65 | 66 | # Embedding Models 67 | - name: togethercomputer/m2-bert-80M-2k-retrieval 68 | aliases: 69 | - m2-bert-80M-2k-retrieval 70 | - m2-bert-2k-retrieval 71 | - m2-bert-2k 72 | context_length: 2048 73 | costs: 74 | unit: 1_000_000 75 | input: 0.008 76 | endpoints: 77 | - embeddings 78 | 79 | - name: togethercomputer/m2-bert-80M-8k-retrieval 80 | aliases: 81 | - m2-bert-80M-8k-retrieval 82 | - m2-bert-8k-retrieval 83 | - m2-bert-8k 84 | context_length: 8192 85 | costs: 86 | unit: 1_000_000 87 | input: 0.008 88 | endpoints: 89 | - embeddings 90 | 91 | - name: togethercomputer/m2-bert-80M-32k-retrieval 92 | aliases: 93 | - m2-bert-80M-32k-retrieval 94 | - m2-bert-32k-retrieval 95 | - m2-bert-32k 96 | context_length: 32768 97 | costs: 98 | unit: 1_000_000 99 | input: 0.008 100 | endpoints: 101 | - embeddings -------------------------------------------------------------------------------- /async_openai/utils/presets/together_proxy.yaml: -------------------------------------------------------------------------------- 1 | # Configuration for together.xyz External Provider with Helicone Proxy Support 2 | name: together 3 | config: 4 | api_base: https://api.together.xyz 5 | api_path: /v1 6 | api_key_header: Authorization 7 | api_key_scheme: Bearer 8 | api_keys: env/TOGETHER_API_KEYS 9 | api_key: env/TOGETHER_API_KEY 10 | max_retries: env/TOGETHER_MAX_RETRIES 11 | hf_compatible: true 12 | proxy_url: https://gateway.hconeai.com/v1 13 | proxy_headers: 14 | Helicone-Auth: Bearer env/HELICONE_API_KEY 15 | Helicone-Target-Provider: Together-AI 16 | Helicone-Target-Url: https://api.together.xyz 17 | routes: 18 | chat: 19 | object_class: async_openai.schemas.external.together.chat.ChatObject 20 | response_class: async_openai.schemas.external.together.chat.ChatResponse 21 | route_class: async_openai.schemas.external.together.chat.ChatRoute 22 | embeddings: 23 | object_class: async_openai.schemas.external.together.embeddings.EmbeddingObject 24 | response_class: async_openai.schemas.external.together.embeddings.EmbeddingResponse 25 | route_class: async_openai.schemas.external.together.embeddings.EmbeddingRoute 26 | 27 | models: 28 | - name: mistralai/Mixtral-8x7B-Instruct-v0.1 29 | aliases: 30 | - mixtral-8x7b 31 | - mixtral-8x7b-instruct 32 | context_length: 32768 33 | costs: 34 | unit: 1_000_000 35 | input: 0.6 36 | output: 0.6 37 | endpoints: 38 | - chat 39 | - completions 40 | - name: mistralai/Mistral-7B-Instruct-v0.1 41 | aliases: 42 | - mistral-7b-instruct-v1 43 | context_length: 4096 44 | costs: 45 | unit: 1_000_000 46 | input: 0.2 47 | output: 0.2 48 | endpoints: 49 | - chat 50 | - completions 51 | - name: mistralai/Mistral-7B-Instruct-v0.2 52 | aliases: 53 | - mistral-7b-instruct-v2 54 | context_length: 32768 55 | costs: 56 | unit: 1_000_000 57 | input: 0.2 58 | output: 0.2 59 | endpoints: 60 | - chat 61 | - completions 62 | 63 | # Embedding Models 64 | - name: togethercomputer/m2-bert-80M-2k-retrieval 65 | aliases: 66 | - m2-bert-80M-2k-retrieval 67 | - m2-bert-2k-retrieval 68 | - m2-bert-2k 69 | context_length: 2048 70 | costs: 71 | unit: 1_000_000 72 | input: 0.008 73 | endpoints: 74 | - embeddings 75 | 76 | - name: togethercomputer/m2-bert-80M-8k-retrieval 77 | aliases: 78 | - m2-bert-80M-8k-retrieval 79 | - m2-bert-8k-retrieval 80 | - m2-bert-8k 81 | context_length: 8192 82 | costs: 83 | unit: 1_000_000 84 | input: 0.008 85 | endpoints: 86 | - embeddings 87 | 88 | - name: togethercomputer/m2-bert-80M-32k-retrieval 89 | aliases: 90 | - m2-bert-80M-32k-retrieval 91 | - m2-bert-32k-retrieval 92 | - m2-bert-32k 93 | context_length: 32768 94 | costs: 95 | unit: 1_000_000 96 | input: 0.008 97 | endpoints: 98 | - embeddings -------------------------------------------------------------------------------- /async_openai/utils/resolvers.py: -------------------------------------------------------------------------------- 1 | 2 | import re 3 | import json 4 | from typing import Optional, Callable, Dict, Union, List, Any 5 | from .logs import logger 6 | from .fixjson import fix_json 7 | 8 | 9 | _json_pattern = re.compile(r"({[^}]*$|{.*})", flags=re.DOTALL) 10 | 11 | def build_stack(json_str: str): 12 | stack = [] 13 | fixed_str = "" 14 | open_quotes = False 15 | 16 | # a flag indicating whether we've seen a comma or colon most recently 17 | # since last opening/closing a dict or list 18 | last_seen_comma_or_colon = None 19 | 20 | for i, char in enumerate(json_str): 21 | if not open_quotes: 22 | # opening a new nested 23 | if char in "{[": 24 | stack.append(char) 25 | last_seen_comma_or_colon = None 26 | # closing a nested 27 | elif char in "}]": 28 | stack.pop() 29 | last_seen_comma_or_colon = None 30 | if char in ",:": 31 | last_seen_comma_or_colon = char 32 | # opening or closing a string, only it's not escaped 33 | if char == '"' and i > 0 and json_str[i - 1] != "\\": 34 | open_quotes = not open_quotes 35 | 36 | fixed_str += char 37 | 38 | return (stack, fixed_str, open_quotes, last_seen_comma_or_colon) 39 | 40 | 41 | 42 | def is_truncated(json_str: str): 43 | """ 44 | Check if the json string is truncated by checking if the number of opening 45 | brackets is greater than the number of closing brackets. 46 | """ 47 | stack, _, _, _ = build_stack(json_str) 48 | return len(stack) > 0 49 | 50 | 51 | def find_json_response(full_response: str, verbose: Optional[bool] = False): 52 | """ 53 | Takes a full response that might contain other strings and attempts to extract the JSON payload. 54 | Has support for truncated JSON where the JSON begins but the token window ends before the json is 55 | is properly closed. 56 | """ 57 | # Deal with fully included responses as well as truncated responses that only have one 58 | if full_response.startswith("{") and not full_response.endswith("}"): 59 | full_response += "}" 60 | 61 | extracted_responses = list(_json_pattern.finditer(full_response)) 62 | if not extracted_responses: 63 | logger.error( 64 | f"Unable to find any responses of the matching type `{full_response}`" 65 | ) 66 | return None 67 | 68 | if len(extracted_responses) > 1 and verbose: 69 | logger.error(f"Unexpected response > 1, continuing anyway... {extracted_responses}") 70 | 71 | extracted_response = extracted_responses[0] 72 | 73 | if is_truncated(extracted_response.group(0)): 74 | # Start at the same location and just expand to the end of the message 75 | extracted_str = full_response[extracted_response.start() :] 76 | else: 77 | extracted_str = extracted_response.group(0) 78 | 79 | return extracted_str 80 | 81 | def try_load_json( 82 | text: str, 83 | object_hook: Optional[Callable] = None, 84 | **kwargs, 85 | ): 86 | """ 87 | Attempts to load the text as JSON 88 | """ 89 | try: 90 | return json.loads(text, object_hook = object_hook, **kwargs) 91 | except Exception as e1: 92 | try: 93 | return json.loads(fix_json(text), object_hook = object_hook, **kwargs) 94 | except Exception as e2: 95 | logger.error(f"Unable to load JSON. Errors: {e1}, {e2}") 96 | raise e2 97 | 98 | 99 | def extract_json_response( 100 | full_response: str, 101 | verbose: Optional[bool] = False, 102 | raise_exceptions: Optional[bool] = False, 103 | object_hook: Optional[Callable] = None, 104 | ) -> Union[Dict[str, Any], List[Any], Any]: 105 | """ 106 | Returns the extracted JSON response from the full response 107 | """ 108 | extracted_str = find_json_response(full_response, verbose = verbose) 109 | if not extracted_str: 110 | return None 111 | try: 112 | return try_load_json(extracted_str, object_hook = object_hook) 113 | except Exception as e: 114 | if verbose: logger.trace(f"Unable to extract JSON response from {extracted_str}", error = e) 115 | if raise_exceptions: raise e 116 | return None -------------------------------------------------------------------------------- /async_openai/utils/tokenization.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import functools 4 | import tiktoken 5 | import contextlib 6 | from typing import Optional, Union, List, Dict, Any, TYPE_CHECKING 7 | 8 | if TYPE_CHECKING: 9 | from async_openai.schemas.chat import ChatMessage 10 | 11 | def modelname_to_contextsize(modelname: str) -> int: 12 | """ 13 | Calculate the maximum number of tokens possible to generate for a model. 14 | 15 | text-davinci-003: 4,097 tokens 16 | text-curie-001: 2,048 tokens 17 | text-babbage-001: 2,048 tokens 18 | text-ada-001: 2,048 tokens 19 | code-davinci-002: 8,000 tokens 20 | code-cushman-001: 2,048 tokens 21 | gpt-3.5-turbo: 4,096 tokens 22 | gpt-3.5-turbo-16k: 16,384 tokens 23 | gpt-4: 8,192 tokens 24 | gpt-4-32k: 32,768 tokens 25 | 26 | Args: 27 | modelname: The modelname we want to know the context size for. 28 | 29 | Returns: 30 | The maximum context size 31 | """ 32 | if modelname == "code-davinci-002": 33 | return 8000 34 | 35 | if modelname in { 36 | "text-curie-001", 37 | "text-babbage-001", 38 | "text-ada-001", 39 | "code-cushman-001", 40 | }: 41 | return 2048 42 | 43 | # Check GPT4 44 | if modelname.startswith("gpt-4") or modelname.startswith("gpt4"): 45 | if "32k" in modelname: 46 | return 32768 47 | return 16384 if "16k" in modelname else 8192 48 | 49 | # Check GPT3.5 50 | if "gpt" in modelname \ 51 | and "turbo" in modelname \ 52 | and ("3.5" in modelname or "35" in modelname): 53 | return 16384 if "16k" in modelname else 4096 54 | 55 | return 4097 56 | 57 | 58 | def get_encoder( 59 | model_name: str, 60 | ) -> tiktoken.Encoding: 61 | """ 62 | Returns the correct encoder for the model name. 63 | """ 64 | if "gpt" in model_name and "2" not in model_name: 65 | 66 | # Likely GPT4 or GPT3.5 67 | return tiktoken.get_encoding("cl100k_base") 68 | encoder = "gpt2" 69 | if model_name in {"text-davinci-003", "text-davinci-002"}: 70 | encoder = "p50k_base" 71 | if model_name.startswith("code"): 72 | encoder = "p50k_base" 73 | 74 | return tiktoken.get_encoding(encoder) 75 | 76 | @functools.lru_cache(maxsize = 2048) 77 | def get_token_count( 78 | text: str, 79 | model_name: str, 80 | ) -> int: 81 | """ 82 | Returns the number of tokens in the text. 83 | """ 84 | return len(get_encoder(model_name).encode(text)) 85 | 86 | 87 | def get_max_tokens( 88 | text: Union[str, List[str]], 89 | model_name: str, 90 | max_tokens: Optional[int] = None, 91 | padding_token_count: Optional[int] = 16 # tokens added to make sure we do not go over the limit 92 | ): 93 | """ 94 | Returns the maximum number of tokens that can be generated for a model. 95 | """ 96 | max_model_tokens = modelname_to_contextsize(model_name) - padding_token_count 97 | if isinstance(text, list): 98 | all_text_tokens = [get_token_count(t, model_name) for t in text] 99 | text_tokens = max(all_text_tokens) 100 | else: 101 | text_tokens = get_token_count(text, model_name) 102 | max_input_tokens = max_model_tokens - text_tokens 103 | if max_tokens is None: 104 | return max_input_tokens 105 | return min(max_input_tokens, max_tokens) 106 | # return modelname_to_contextsize(model_name) - get_token_count(text, model_name) 107 | 108 | 109 | def get_chat_tokens_count( 110 | messages: List[Union[Dict[str, str], 'ChatMessage']], 111 | model_name: str, 112 | reply_padding_token_count: Optional[int] = 3, 113 | message_padding_token_count: Optional[int] = 4, 114 | **kwargs 115 | ) -> int: 116 | """ 117 | Returns the number of tokens in the chat. 118 | """ 119 | num_tokens = 0 120 | for message in messages: 121 | if message.get('name'): 122 | num_tokens -= 1 123 | num_tokens += message_padding_token_count + get_token_count(message.get('content', ''), model_name) 124 | 125 | num_tokens += reply_padding_token_count # every reply is primed with <|start|>assistant<|message|> 126 | return num_tokens 127 | 128 | def get_max_chat_tokens( 129 | messages: List[Union[Dict[str, str], 'ChatMessage']], 130 | model_name: str, 131 | max_tokens: Optional[int] = None, 132 | reply_padding_token_count: Optional[int] = 3, 133 | message_padding_token_count: Optional[int] = 4, 134 | padding_token_count: Optional[int] = 16 # tokens added to make sure we do not go over the li 135 | ): 136 | """ 137 | Returns the maximum number of tokens that can be generated for a model. 138 | """ 139 | 140 | num_tokens = 0 141 | for message in messages: 142 | if message.get('name'): 143 | num_tokens -= 1 144 | num_tokens += message_padding_token_count + get_token_count(message.get('content', ''), model_name) 145 | 146 | num_tokens += reply_padding_token_count # every reply is primed with <|start|>assistant<|message|> 147 | max_model_tokens = modelname_to_contextsize(model_name) - padding_token_count 148 | max_input_tokens = max_model_tokens - num_tokens 149 | if max_tokens is None: 150 | return max_input_tokens 151 | return min(max_input_tokens, max_tokens) 152 | 153 | 154 | def fast_tokenize(text: Any) -> int: 155 | """ 156 | Do a very fast tokenization of the text 157 | by estimating the number of tokens based on the 158 | number of characters in the string. 159 | 160 | 1 token ~= 4 characters 161 | """ 162 | return len(str(text)) // 4 -------------------------------------------------------------------------------- /async_openai/version.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.0.53' -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | from setuptools import setup, find_packages 4 | 5 | if sys.version_info.major != 3: 6 | raise RuntimeError("This package requires Python 3+") 7 | 8 | pkg_name = 'async_openai' 9 | gitrepo = 'GrowthEngineAI/async-openai' 10 | 11 | root = Path(__file__).parent 12 | version = root.joinpath('async_openai/version.py').read_text().split('VERSION = ', 1)[-1].strip().replace('-', '').replace("'", '') 13 | 14 | requirements = [ 15 | 'aiohttpx >= 0.0.12', 16 | # 'file-io', 17 | 'backoff', 18 | 'tiktoken', 19 | 'lazyops >= 0.2.76', # Pydantic Support 20 | 'pydantic', 21 | 'jinja2', 22 | 'pyyaml', 23 | # 'pydantic-settings', # remove to allow for v1/v2 support 24 | ] 25 | 26 | if sys.version_info.minor < 8: 27 | requirements.append('typing_extensions') 28 | 29 | extras = { 30 | 'cache': ['kvdb'], # Adds caching support 31 | 'utils': ['numpy', 'scipy'] # Adds embedding utility support 32 | } 33 | 34 | args = { 35 | 'packages': find_packages(include = [f'{pkg_name}', f'{pkg_name}.*',]), 36 | 'install_requires': requirements, 37 | 'include_package_data': True, 38 | 'long_description': root.joinpath('README.md').read_text(encoding='utf-8'), 39 | 'entry_points': { 40 | "console_scripts": [] 41 | }, 42 | 'extras_require': extras, 43 | } 44 | 45 | setup( 46 | name = pkg_name, 47 | version = version, 48 | url=f'https://github.com/{gitrepo}', 49 | license='MIT Style', 50 | description='Unofficial Async Python client library for the OpenAI API', 51 | author='Tri Songz', 52 | author_email='ts@growthengineai.com', 53 | long_description_content_type="text/markdown", 54 | classifiers=[ 55 | 'Intended Audience :: Developers', 56 | 'License :: OSI Approved :: MIT License', 57 | 'Programming Language :: Python :: 3.7', 58 | 'Topic :: Software Development :: Libraries', 59 | ], 60 | **args 61 | ) -------------------------------------------------------------------------------- /tests/chat.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from client import OpenAI 3 | from async_openai.utils import logger 4 | 5 | async def run_test(): 6 | 7 | model = "gpt-3.5-turbo-16k" 8 | 9 | result = await OpenAI.chat.async_create( 10 | model = model, 11 | messages = [ 12 | {"role": "user", "content": "Translate the following English text to French: “Multiple models, each with different capabilities and price points. Prices are per 1,000 tokens. You can think of tokens as pieces of words, where 1,000 tokens is about 750 words. This paragraph is 35 tokens”"} 13 | ], 14 | ) 15 | logger.info(f'Result Model: {result}') 16 | logger.info(f'Result Type: {type(result)}') 17 | 18 | logger.info(f'Result Text: {result.text}') 19 | logger.info(f'Result Chat Message: {result.messages}') 20 | 21 | logger.info(f'Result Usage: {result.usage}') 22 | logger.info(f'Result Consumption: {result.consumption}') 23 | 24 | 25 | 26 | result = OpenAI.chat.create( 27 | messages = [ 28 | {"role": "user", "content": "Translate the following English text to French: “Multiple models, each with different capabilities and price points. Prices are per 1,000 tokens. You can think of tokens as pieces of words, where 1,000 tokens is about 750 words. This paragraph is 35 tokens”"} 29 | ], 30 | ) 31 | 32 | logger.info(f'Result Model: {result}') 33 | logger.info(f'Result Type: {type(result)}') 34 | 35 | logger.info(f'Result Text: {result.text}') 36 | logger.info(f'Result Chat Message: {result.messages}') 37 | 38 | logger.info(f'Result Usage: {result.usage}') 39 | 40 | 41 | 42 | asyncio.run(run_test()) 43 | 44 | -------------------------------------------------------------------------------- /tests/chat_functions.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from enum import Enum 3 | from client_rotate import OpenAI 4 | from async_openai.utils import logger 5 | from pydantic import BaseModel, Field 6 | 7 | 8 | class Unit(str, Enum): 9 | celsius = "celsius" 10 | fahrenheit = "fahrenheit" 11 | 12 | class Weather(BaseModel): 13 | location: str = Field(..., description="The city and state, e.g. San Francisco, CA.") 14 | unit: Unit = Field(Unit.fahrenheit) 15 | 16 | functions = [ 17 | { 18 | "name": "get_current_weather", 19 | "description": "Get the current weather in a given location", 20 | "parameters": Weather, 21 | } 22 | ] 23 | 24 | async def run_test(): 25 | 26 | model = "gpt-3.5-turbo-instruct" 27 | 28 | result = await OpenAI.chat.async_create( 29 | model = model, 30 | messages = [ 31 | {"role": "user", "content": "What's the weather like in Boston today?"} 32 | ], 33 | functions = functions, 34 | ) 35 | logger.info(f'Result Model: {result}') 36 | logger.info(f'Result Type: {type(result)}') 37 | 38 | logger.info(f'Result Text: {result.text}') 39 | logger.info(f'Result Chat Message: {result.messages}') 40 | logger.info(f'Result Chat Function: {result.function_results}') 41 | 42 | logger.info(f'Result Usage: {result.usage}') 43 | logger.info(f'Result Consumption: {result.consumption}') 44 | 45 | 46 | 47 | result = OpenAI.chat.create( 48 | model = model, 49 | messages = [ 50 | {"role": "user", "content": "What's the weather like in Boston today?"} 51 | ], 52 | functions = functions, 53 | ) 54 | 55 | logger.info(f'Result Model: {result}') 56 | logger.info(f'Result Type: {type(result)}') 57 | 58 | logger.info(f'Result Text: {result.text}') 59 | logger.info(f'Result Chat Message: {result.messages}') 60 | logger.info(f'Result Chat Function: {result.function_results}') 61 | 62 | logger.info(f'Result Usage: {result.usage}') 63 | 64 | 65 | logger.info(functions) 66 | logger.info(Weather.schema_json(indent=2)) 67 | # asyncio.run(run_test()) 68 | 69 | -------------------------------------------------------------------------------- /tests/client.py: -------------------------------------------------------------------------------- 1 | from async_openai import OpenAI 2 | 3 | org_id = 'org-...' 4 | api_key = 'sk-...' 5 | 6 | OpenAI.configure( 7 | api_key = api_key, 8 | organization = org_id, 9 | debug_enabled = True, 10 | ) -------------------------------------------------------------------------------- /tests/client_rotate.py: -------------------------------------------------------------------------------- 1 | from async_openai import OpenAI 2 | 3 | org_id = 'org-...' 4 | api_key = 'sk-...' 5 | 6 | azure_api_base = "https://....openai.azure.com/" 7 | 8 | # azure_api_version = "2023-03-15-preview" 9 | azure_api_version = "2023-07-01-preview" 10 | azure_api_key = "...." 11 | 12 | OpenAI.configure( 13 | # OpenAI Configuration 14 | api_key = api_key, 15 | organization = org_id, 16 | debug_enabled = True, 17 | 18 | # Azure Configuration 19 | azure_api_base = azure_api_base, 20 | azure_api_version = azure_api_version, 21 | azure_api_key = azure_api_key, 22 | enable_rotating_clients = True, 23 | prioritize = "azure", 24 | ) 25 | 26 | 27 | print(OpenAI.settings.azure.dict()) 28 | OpenAI.get_current_client_info(verbose = True) 29 | OpenAI.rotate_client(verbose = True) -------------------------------------------------------------------------------- /tests/completion.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from client import OpenAI 3 | from async_openai.utils import logger 4 | 5 | async def run_test(): 6 | result = await OpenAI.completions.async_create( 7 | prompt = 'say this is a test', 8 | max_tokens = 4, 9 | stream = False 10 | ) 11 | logger.info(f'Result Model: {result}') 12 | logger.info(f'Result Type: {type(result)}') 13 | 14 | logger.info(f'Result Text: {result.text}') 15 | logger.info(f'Result Usage: {result.usage}') 16 | logger.info(f'Result Consumption: {result.consumption}') 17 | 18 | 19 | 20 | result = OpenAI.completions.create( 21 | prompt = 'say this is a test', 22 | max_tokens = 4, 23 | stream = True 24 | ) 25 | 26 | 27 | logger.info(f'Result Model: {result}') 28 | logger.info(f'Result Type: {type(result)}') 29 | 30 | logger.info(f'Result Text: {result.text}') 31 | logger.info(f'Result Usage: {result.usage}') 32 | 33 | 34 | 35 | asyncio.run(run_test()) 36 | 37 | -------------------------------------------------------------------------------- /tests/external_provider.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # os.environ['TOGETHER_API_KEY'] = 'test123' 4 | os.environ['TOGETHER_API_KEYS'] = '[test1253, test4565]' 5 | 6 | from async_openai.utils.external_config import ExternalProviderSettings 7 | 8 | def test_external_provider(): 9 | s = ExternalProviderSettings.from_preset('together') 10 | print(s) 11 | 12 | test_external_provider() --------------------------------------------------------------------------------