├── .gitattributes
├── .github
    └── workflows
    │   └── python-publish.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── async_openai
    ├── __init__.py
    ├── client.py
    ├── external_client.py
    ├── loadbalancer.py
    ├── manager.py
    ├── meta.py
    ├── routes.py
    ├── schemas
    │   ├── __init__.py
    │   ├── chat.py
    │   ├── completions.py
    │   ├── edits.py
    │   ├── embeddings.py
    │   ├── external
    │   │   ├── __init__.py
    │   │   ├── fireworks
    │   │   │   ├── __init__.py
    │   │   │   └── chat.py
    │   │   └── together
    │   │   │   ├── __init__.py
    │   │   │   ├── chat.py
    │   │   │   └── embeddings.py
    │   ├── images.py
    │   └── models.py
    ├── types
    │   ├── __init__.py
    │   ├── context.py
    │   ├── errors.py
    │   ├── functions.py
    │   ├── options.py
    │   ├── pricing.yaml
    │   ├── resources.py
    │   ├── responses.py
    │   └── routes.py
    ├── utils
    │   ├── __init__.py
    │   ├── config.py
    │   ├── embedding.py
    │   ├── external_config.py
    │   ├── fixjson.py
    │   ├── helpers.py
    │   ├── logs.py
    │   ├── presets
    │   │   ├── fireworks.yaml
    │   │   ├── together.yaml
    │   │   └── together_proxy.yaml
    │   ├── resolvers.py
    │   └── tokenization.py
    └── version.py
├── setup.py
└── tests
    ├── chat.py
    ├── chat_functions.py
    ├── client.py
    ├── client_rotate.py
    ├── completion.py
    └── external_provider.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   workflow_dispatch:
13 |   push:
14 |     paths:
15 |       # - 'setup.py'
16 |       - 'async_openai/version.py'
17 |   release:
18 |     types: [created]
19 | 
20 | jobs:
21 |   build-python-package:
22 | 
23 |     runs-on: ubuntu-latest
24 | 
25 |     steps:
26 |     - uses: actions/checkout@v2
27 |     - name: Set up Python
28 |       uses: actions/setup-python@v2
29 |       with:
30 |         python-version: '3.x'
31 |     - name: Install dependencies
32 |       run: |
33 |         python -m pip install --upgrade pip
34 |         pip install build
35 |     - name: Build package
36 |       run: python -m build
37 |     - name: Publish package
38 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
39 |       with:
40 |         user: __token__
41 |         password: ${{ secrets.pypi_api_token }}
42 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | cache_**
 3 | ***_cache
 4 | ***cache*
 5 | **cache/**
 6 | *.DS_Store
 7 | tests*
 8 | __pycache__*
 9 | *logs
10 | *dist
11 | *build
12 | **build.sh
13 | **build_lib.sh
14 | **build_docker.sh
15 | **run_test.sh
16 | *test.py
17 | *.egg-info*
18 | *.vscode
19 | **test
20 | **.ipynb**
21 | **test.sh
22 | /.idea/
23 | async_openai/v1*
24 | tests/private_*
25 | !tests/
26 | tests/v2/private_*
27 | tests/v2/fireworks*
28 | tests/v2/together*
29 | async_openai/schemas/external/huggingface


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelogs
  2 | 
  3 | #### v0.0.53 (2024-05-31)
  4 | 
  5 | - Added support for new `gpt-4-o` model
  6 | - Added support for external providers
  7 | - Update certain validation behavior of Functions
  8 | 
  9 | - TODO: Add batch create support
 10 | 
 11 | #### v0.0.52 (2024-02-28)
 12 | 
 13 | - Added support for the following parameters in `model_configurations` in `OpenAIManager`:
 14 | 
 15 |   - `ping_timeout` - allows for custom timeouts for each client.
 16 | 
 17 |   - `included_models` - allows for more flexible setting of models in Azure.
 18 | 
 19 |   - `weight` - allows for weighted selection of clients.
 20 | 
 21 | - Improved Healthcheck behavior to cache if successful for a period of time, and always recheck if not.
 22 | 
 23 | - Added `dimension` parameter for `embedding` models.
 24 | 
 25 | #### v0.0.51rc (2024-02-07)
 26 | 
 27 | - Modification of `async_openai.types.context.ModelContextHandler` to a proxied object singleton.
 28 | 
 29 | - Begin adding support for external providers, such as `together` to allow usage in conjunction with `OpenAI` models. WIP.
 30 | 
 31 | - Rework of `api_resource` and `root_name` in `Route` objects to be settable during initialization. This is to allow for flexibility for external providers.
 32 | 
 33 | - Added capability to have multi-api-key support for external providers, allowing for automatic rotation between api keys.
 34 | 
 35 | #### v0.0.50 (2024-02-01)
 36 | 
 37 | **Breaking Changes**
 38 | 
 39 | - The `OpenAI` client has been refactored to be a singleton `ProxyObject` vs a `Type` object.
 40 |   
 41 |   Currently, this API is accessible with `async_openai.OpenAIManager`, which provides all the existing functionality of the `OpenAI` client, with a few additional features.
 42 | 
 43 |     - `OpenAIManager` supports automatic proxy rotation and client selection based on available models.
 44 | 
 45 |     - `OpenAIManager` supports automatic retrying of failed requests, as well as enabling automatic healthchecking prior to each request to ensure the endpoint is available with `auto_healthcheck_enabled`, otherwise it will rotate to another endpoint. This is useful for ensuring high availability and reliability of the API.
 46 |     
 47 |   Future versions will deprecate the `OpenAI` client in favor of the `OpenAIManager` object.
 48 | 
 49 | - Added new `OpenAIFunctions` class which provides a robust interface for creating and running functions. This class is also a singleton `ProxyObject`.
 50 | 
 51 |   This can be accessed through the `OpenAIManager.functions` object
 52 | 
 53 | 
 54 | #### v0.0.41 (2023-11-06)
 55 | 
 56 | **Update to Latest OpenAI API**
 57 | 
 58 | This version updates the API to the latest version of OpenAI's API, which includes the following changes:
 59 | 
 60 | - addition of `gpt-4-turbo` models
 61 | 
 62 | - Add additional supported parameters to `chat` endpoint. We maintain v1 parameters for `azure` endpoints, but will pass through the new parameters for `openai` endpoints.
 63 | 
 64 | - Add gradual support for `tools`
 65 | 
 66 | **Updates**
 67 | 
 68 | - Rework of validating `models`, which now is no longer done, and expects the user to pass the correct model name.
 69 | 
 70 | - No longer supporting `validate_max_tokens` as there are now many different schemas for `max_tokens` depending on the model.
 71 | 
 72 | 
 73 | 
 74 | #### v0.0.40 (2023-10-18)
 75 | 
 76 | **Potentially Breaking Changes**
 77 | 
 78 | This version introduces full compatability with `pydantic v1/v2` where previous versions would only work with `pydantic v1`. Auto-detection and handling of deprecated methods of `pydantic` models are handled by `lazyops`, and require `lazyops >= 0.2.60`.
 79 | 
 80 | With `pydantic v2` support, there should be a slight performance increase in parsing `pydantic` objects, although the majority of the time is spent waiting for the API to respond.
 81 | 
 82 | Additionally, support is added for handling the response like a `dict` object, so you can access the response like `response['choices']` rather than `response.choices`.
 83 | 
 84 | #### v0.0.36 (2023-10-11)
 85 | 
 86 | **Additions**
 87 | 
 88 | - Added auto-parsing of `pydantic` objects in `function_call` parameters and return the same object schema in `chat_response.function_result_objects`.
 89 | 
 90 | 
 91 | #### v0.0.35 (2023-10-06)
 92 | 
 93 | **Additions**
 94 | 
 95 | - Added `auto_retry` option to `OpenAI` client, which will automatically retry failed requests.
 96 | - Added `RotatingClients` class which handles the rotation of multiple clients. This can be enabled by passing `rotating_clients=True` to the `OpenAI` client while configuring.
 97 | - Added `OpenAI.chat_create` and `OpenAI.async_chat_create` methods which automatically handles rotating clients and retrying failed requests.
 98 | - Added `azure_model_mapping` which allows automatically mapping of Azure models to OpenAI models when passing `openai` models as a parameter, it will automatically convert it to the Azure model. This is only done in `chat` implementation.
 99 | 
100 | **Fixes**
101 | 
102 | - Fixed `api_version` Configuration handling.
103 | - Fixed parsing of `function_call` in streaming implementation.
104 | 
105 | 
106 | 
107 | #### v0.0.34 (2023-10-06)
108 | 
109 | **Changes** 
110 | 
111 | - Updated default `api_version` to `2023-07-01-preview`
112 | - Added `__getitem__` attributes to completion and chat objects, allowing them to act like `dict` objects.
113 | - Added `functions` and `function_call` to `Chat` completion routes.
114 |   - `function.properties` can pass through a `pydantic` object which will convert it automatically to a `dict` json schema.
115 | - Added `function_call` attribute in `ChatMessage` objects, allowing for easy access to the function call.
116 | - Streaming is not supported for `functions` at this time.
117 | 
118 | #### v0.0.33 (2023-08-24)
119 | 
120 | **Changes**
121 | 
122 | - Updated auto-configuring `httpx`'s logger to be disabled if `debug_enabled` is set to `False`.
123 | 
124 | 
125 | #### v0.0.32 (2023-08-23)
126 | 
127 | **Changes**
128 | 
129 | - Updated `headers` behavior and parameter, allowing it to be passed to each API call.
130 | - Updated `auth` behavior, which now utilizes `httpx.Auth` rather than injecting into the header directly.
131 | - Added `custom_headers` configuration that can be passed to the `OpenAI` client during initialization.
132 | - Added customization of `connection_pool`, controlling the number of concurrent connections to the API.
133 | 
134 | - Reworked `streaming` implementations, which previously didn't properly work.
135 | - Added `parse_stream` parameter (default: true) which defers parsing of the stream util it is called with `result.stream` or `result.astream`, rather than parsing the stream as it is received.
136 | 
137 | 
138 | #### v0.0.31 (2023-08-11)
139 | 
140 | 
141 | **Changes**
142 | 
143 | - Updated some behaviors of the `OpenAI` Client.
144 |   * allow for customization of retry behavior or completely disabling it.
145 | 
146 | - Routes now take the `is_azure` parameter during init rather than using `@property` to determine the route.
147 | - The `_send` method is better optimized for retry behaviors.
148 | 
149 | **Fixes**
150 | 
151 | - Resolved `model` endpoint.
152 | - Resolved handling of `Azure` models.
153 | 
154 | 
155 | 
156 | ---
157 | 
158 | #### v0.0.30 (2023-08-10)
159 | 
160 | _Potentially breaking changes in this version_
161 | 
162 | **Changes**
163 | 
164 | - Refactored the architecture of the `OpenAI` Client to accomodate multi-client initialization. i.e. `OpenAI` can now be initialized with multiple API keys and will automatically rotate between them, as well as switch back and forth between Azure and OpenAI.
165 | 
166 | - Settings are initialized after first call, rather than globally.
167 | 
168 | - Routes, Clients are configured after first call, rather than during initialization.
169 | 
170 | 
171 | **Fixes**
172 | 
173 | - Resolved `embedding` endpoints.
174 | 
175 | **Updates**
176 | 
177 | - Changed default `api-version` to `2023-03-15-preview`
178 | 
179 | ---
180 | 
181 | #### v0.0.22 (2023-06-14)
182 |   - Update pricing to reflect OpenAI's new pricing model
183 |     - `gpt-3.5-turbo`
184 |     - `text-embedding-ada-002`
185 |   - Bugfix for consumption and usage validation in `chat` models
186 |   - Added support for `gpt-3.5-turbo-16k`
187 |   - Modified handling of `gpt-3.5-turbo`'s consumption pricing to reflect `prompt` and `completion` usage
188 |   - Modified default `Embedding` model to be `ada`
189 | 
190 | ---
191 | #### 0.0.17 (2023-04-12)
192 |   - Add better support for chatgpt models and `gpt-4`
193 |   - Better validation `max_tokens`
194 | 
195 | ---
196 | #### 0.0.11 (2023-03-07)
197 |   - Added support for GPT-3.5 Turbo through `async_openai.OpenAI.chat`
198 |   - Refactored `async_openai.OpenAI` to utilize a `metaclass` rather than initalizing directly
199 | 
200 | #### 0.0.7 (2023-02-02)
201 |   - Refactor `async_openai.types.options.OpenAIModel` to handle more robust parsing of model names.
202 | 
203 | #### 0.0.3 (2022-12-21)
204 |   - Fix proper charge for `babbage` and `ada` models.
205 | 
206 | 
207 |   


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Growth Engine Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include async_openai *
2 | recursive-exclude * __pycache__
3 | recursive-exclude * *.py[co]
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # async-openai
  2 |  Unofficial Async Python client library for the [OpenAI](https://openai.com) API based on [Documented Specs](https://beta.openai.com/docs/api-reference/making-requests)
  3 | 
  4 |  **Latest Version**: [![PyPI version](https://badge.fury.io/py/async-openai.svg)](https://badge.fury.io/py/async-openai)
  5 | 
  6 |  **[Official Client](https://github.com/openai/openai-python)**
  7 | 
  8 | ## Features
  9 | 
 10 | - [x] Asyncio based with Sync and Async Support with `httpx`
 11 | 
 12 | - [ ] Supports all API endpoints
 13 | 
 14 |     - [x] `Completions`: [Docs](https://beta.openai.com/docs/api-reference/completions)
 15 | 
 16 |       - [x] Supports Streaming
 17 | 
 18 |     - [x] `Chat`: [Docs](https://beta.openai.com/docs/api-reference/chat)
 19 | 
 20 |       - [x] Supports Streaming
 21 | 
 22 |       - [x] Supports `Functions`
 23 |     
 24 |     - [x] `Edits`: [Docs](https://beta.openai.com/docs/api-reference/edits)
 25 |     
 26 |     - [x] `Embeddings`: [Docs](https://beta.openai.com/docs/api-reference/embeddings)
 27 | 
 28 |     - [x] `Models`: [Docs](https://beta.openai.com/docs/api-reference/models)
 29 | 
 30 | - [x] Strongly typed validation of requests and responses with `Pydantic` Models with transparent 
 31 |     access to the raw response and object-based results.
 32 | 
 33 | - [x] Handles Retries automatically through `backoff` and custom retry logic.
 34 |    
 35 |    - [x] Handles `rate_limit` errors and retries automatically. (when passing `auto_retry = True`)
 36 | 
 37 | - [x] Supports Multiple Clients and Auto-Rotation of Clients
 38 | 
 39 | - [x] Supports `Azure` API
 40 | 
 41 | - [x] Supports Local and Remote Cloud Object Storage File Handling Asyncronously through `file-io`
 42 | 
 43 |     - [x] Supports `S3`: `s3://bucket/path/to/file.txt`
 44 |     
 45 |     - [x] Supports `GCS`: `gs://bucket/path/to/file.txt`
 46 | 
 47 |     - [x] Supports `Minio`: `minio://bucket/path/to/file.txt`
 48 | 
 49 | - [x] Supports `limited` cost tracking for `Completions` and `Edits` requests (when stream is not enabled)
 50 | 
 51 | - [x] Parallelization Safe with ThreadPools or any `asyncio` compatible event loop. Can handle 100s of requests per second. (If you don't run into rate limits)
 52 | 
 53 | 
 54 | ---
 55 |  
 56 | ## Installation
 57 | 
 58 | ```bash
 59 | # Install from stable
 60 | pip install async-openai
 61 | 
 62 | # Install from dev/latest
 63 | pip install git+https://github.com/GrowthEngineAI/async-openai.git
 64 | 
 65 | ```
 66 | 
 67 | ### Quick Usage
 68 | 
 69 | ```python
 70 | 
 71 | import asyncio
 72 | from async_openai import OpenAI, settings, CompletionResponse
 73 | 
 74 | # Environment variables should pick up the defaults
 75 | # however, you can also set them explicitly. See below for more details.
 76 | 
 77 | # `api_key` - Your OpenAI API key.                  Env: [`OPENAI_API_KEY`]
 78 | # `url` - The URL of the OpenAI API.                Env: [`OPENAI_URL`]
 79 | # `api_type` - The OpenAI API type.                 Env: [`OPENAI_API_TYPE`]
 80 | # `api_version` - The OpenAI API version.           Env: [`OPENAI_API_VERSION`]
 81 | # `organization` - The OpenAI organization.         Env: [`OPENAI_ORGANIZATION`]
 82 | # `proxies` - A dictionary of proxies to be used.   Env: [`OPENAI_PROXIES`]
 83 | # `timeout` - The timeout in seconds to be used.    Env: [`OPENAI_TIMEOUT`]
 84 | # `max_retries` - The number of retries to be used. Env: [`OPENAI_MAX_RETRIES`]
 85 | 
 86 | OpenAI.configure(
 87 |     api_key = "sk-XXXX",
 88 |     organization = "org-XXXX",
 89 |     debug_enabled = False,
 90 | )
 91 | 
 92 | # Alternatively you can configure the settings through environment variables
 93 | # settings.configure(
 94 | #    api_key = "sk-XXXX",
 95 | #     organization = "org-XXXX",
 96 | # )
 97 | 
 98 | 
 99 | # [Sync] create a completion
100 | # Results return a CompletionResult object
101 | result: CompletionResponse = OpenAI.completions.create(
102 |     prompt = 'say this is a test',
103 |     max_tokens = 4,
104 |     stream = True
105 | )
106 | 
107 | # print the completion text
108 | # which are concatenated together from the result['choices'][n]['text']
109 | 
110 | print(result.text)
111 | 
112 | # print the number of choices returned
113 | print(len(result))
114 | 
115 | # get the cost consumption for the request
116 | print(result.consumption)
117 | 
118 | # [Async] create a completion
119 | # All async methods are generally prefixed with `async_`
120 | 
121 | result: CompletionResponse = asyncio.run(
122 |     OpenAI.completions.async_create(
123 |         prompt = 'say this is a test',
124 |         max_tokens = 4,
125 |         stream = True
126 |     )
127 | )
128 | 
129 | ```
130 | 
131 | ### Configuration and Environment Variables
132 | 
133 | The following environment variables can be used to configure the client.
134 | 
135 | ```
136 | 
137 | OpenAI Configuration
138 | 
139 | url: The OpenAI API URL                                     | Env: [`OPENAI_API_URL`]
140 | scheme: The OpenAI API Scheme                               | Env: [`OPENAI_API_SCHEME`]
141 | host: The OpenAI API Host                                   | Env: [`OPENAI_API_HOST`]
142 | port: The OpenAI API Port                                   | Env: [`OPENAI_API_PORT`]
143 | api_base: The OpenAI API Base                               | Env: [`OPENAI_API_BASE`]
144 | api_key: The OpenAI API Key                                 | Env: [`OPENAI_API_KEY`]
145 | api_path: The OpenAI API Path                               | Env: [`OPENAI_API_PATH`]
146 | api_type: The OpenAI API Type                               | Env: [`OPENAI_API_TYPE`]
147 | api_version: The OpenAI API Version                         | Env: [`OPENAI_API_VERSION`]
148 | api_key_path: The API Key Path                              | Env: [`OPENAI_API_KEY_PATH`]
149 | organization: Organization                                  | Env: [`OPENAI_ORGANIZATION`]
150 | proxies: The OpenAI Proxies                                 | Env: [`OPENAI_PROXIES`]
151 | timeout: Timeout in Seconds                                 | Env: [`OPENAI_TIMEOUT`]
152 | max_retries: The OpenAI Max Retries                         | Env: [`OPENAI_MAX_RETRIES`]
153 | ignore_errors: Ignore Errors                                | Env: [`OPENAI_IGNORE_ERRORS`]
154 | disable_retries: Disable Retries                            | Env: [`OPENAI_DISABLE_RETRIES`]
155 | max_connections: Max Connections                            | Env: [`OPENAI_MAX_CONNECTIONS`]
156 | max_keepalive_connections: Max Keepalive Connections        | Env: [`OPENAI_MAX_KEEPALIVE_CONNECTIONS`]
157 | keepalive_expiry: Keepalive Expiry                          | Env: [`OPENAI_KEEPALIVE_EXPIRY`]
158 | custom_headers: Custom Headers                              | Env: [`OPENAI_CUSTOM_HEADERS`]
159 | 
160 | Azure Configuration
161 | 
162 | azure_url: The OpenAI API URL                               | Env: [`AZURE_OPENAI_API_URL`]
163 | azure_scheme: The OpenAI API Scheme                         | Env: [`AZURE_OPENAI_API_SCHEME`]
164 | azure_host: The OpenAI API Host                             | Env: [`AZURE_OPENAI_API_HOST`]
165 | azure_port: The OpenAI API Port                             | Env: [`AZURE_OPENAI_API_PORT`]
166 | azure_api_key: The OpenAI API Key                           | Env: [`AZURE_OPENAI_API_KEY`]
167 | azure_api_base: The OpenAI API Base                         | Env: [`AZURE_OPENAI_API_BASE`]
168 | azure_api_path: The OpenAI API Path                         | Env: [`AZURE_OPENAI_API_PATH`]
169 | azure_api_type: The OpenAI API Type                         | Env: [`AZURE_OPENAI_API_TYPE`]
170 | azure_api_version: The OpenAI API Version                   | Env: [`AZURE_OPENAI_API_VERSION`]
171 | azure_api_key_path: The API Key Path                        | Env: [`AZURE_OPENAI_API_KEY_PATH`]
172 | azure_organization: Organization                            | Env: [`AZURE_OPENAI_ORGANIZATION`]
173 | azure_proxies: The OpenAI Proxies                           | Env: [`AZURE_OPENAI_PROXIES`]
174 | azure_timeout: Timeout in Seconds                           | Env: [`AZURE_OPENAI_TIMEOUT`]
175 | azure_max_retries: The OpenAI Max Retries                   | Env: [`AZURE_OPENAI_MAX_RETRIES`]
176 | 
177 | ```
178 | 
179 | 
180 | ### Initialize Clients Manually, and working with multiple clients
181 | 
182 | ```python
183 | 
184 | from async_openai import OpenAI
185 | 
186 | # Configure your primary client (default)
187 | 
188 | 
189 | OpenAI.configure(
190 |     api_key = "sk-XXXX",
191 |     organization = "org-XXXX",
192 |     debug_enabled = False,
193 | 
194 |     # Azure Configuration
195 |     azure_api_base = 'https://....openai.azure.com/',
196 |     azure_api_version = '2023-07-01-preview',
197 |     azure_api_key = '....',
198 | )
199 | 
200 | # Returns the default client (openai)
201 | oai = OpenAI.init_api_client()
202 | 
203 | # Configure your secondary client (azure) and use it directly
204 | az = OpenAI.init_api_client('az', set_as_default = False, debug_enabled = True)
205 | result = az.completions.create(
206 |     prompt = 'say this is a test',
207 |     max_tokens = 4,
208 |     stream = True
209 | )
210 | 
211 | 
212 | # Use the default client (openai)
213 | result = OpenAI.completions.create(
214 |     prompt = 'say this is a test',
215 |     max_tokens = 4,
216 |     stream = True
217 | )
218 | # Or 
219 | result = oai.completions.create(
220 |     prompt = 'say this is a test',
221 |     max_tokens = 4,
222 |     stream = True
223 | )
224 | 
225 | # You can select the different clients by name or index
226 | result = OpenAI['az'].completions.create(
227 |     prompt = 'say this is a test',
228 |     max_tokens = 4,
229 |     stream = True
230 | )
231 | 
232 | # Use the default client (openai)
233 | result = OpenAI['default'].completions.create(
234 |     prompt = 'say this is a test',
235 |     max_tokens = 4,
236 |     stream = True
237 | )
238 | 
239 | # Will use the `default` client since it was initialized first
240 | result = OpenAI[0].completions.create(
241 |     prompt = 'say this is a test',
242 |     max_tokens = 4,
243 |     stream = True
244 | )
245 | 
246 | ```
247 | 
248 | ### Handling Errors, Retries, and Rotations
249 | 
250 | The below will show you how to rotate between multiple clients when you hit an error.
251 | 
252 | **Important** Auto-rotation is only supported with `chat_create` and `async_chat_create` methods. Otherwise, you should handle the rotation manually.
253 | 
254 | ```python
255 | 
256 | import asyncio
257 | from async_openai import OpenAI, ChatResponse
258 | from async_openai.utils import logger
259 | 
260 | OpenAI.configure(
261 |     api_key = "sk-XXXX",
262 |     organization = "org-XXXX",
263 |     debug_enabled = False,
264 | 
265 |     # Azure Configuration
266 |     azure_api_base = 'https://....openai.azure.com/',
267 |     azure_api_version = '2023-07-01-preview',
268 |     azure_api_key = '....',
269 | 
270 |     # This will allow you to auto rotate clients when you hit an error.
271 |     # But only if you have multiple clients configured and are using `OpenAI.chat_create`
272 |     enable_rotating_clients = True, 
273 | 
274 |     # This will prioritize Azure over OpenAI when using `OpenAI.chat_create`
275 |     prioritize = "azure",
276 | )
277 | 
278 | # Display the current client
279 | OpenAI.get_current_client_info(verbose = True)
280 | 
281 | # Rotate to the next client
282 | # OpenAI.rotate_client(verbose = True)
283 | 
284 | ###
285 | # [Sync] create a completion with auto-rotation and auto-retry
286 | ###
287 | 
288 | result: ChatResponse = OpenAI.chat_create(
289 |     model = "gpt-3.5-turbo-16k",
290 |     messages = [
291 |         {"role": "user", "content": "Translate the following English text to French: “Multiple models, each with different capabilities and price points. Prices are per 1,000 tokens. You can think of tokens as pieces of words, where 1,000 tokens is about 750 words. This paragraph is 35 tokens”"}
292 |     ],
293 |     auto_retry = True,
294 | 
295 | )
296 | 
297 | logger.info(f'Result Chat Message: {result.messages}')
298 | logger.info(f'Result Usage: {result.usage}')
299 | logger.info(f'Result Consumption: {result.consumption}')
300 | 
301 | ###
302 | # [Async] create a completion with auto-rotation and auto-retry
303 | ###
304 | 
305 | result: ChatResponse = asyncio.run(
306 |     OpenAI.async_chat_create(
307 |         model = "gpt-3.5-turbo-16k",
308 |         messages = [
309 |             {"role": "user", "content": "Translate the following English text to French: “Multiple models, each with different capabilities and price points. Prices are per 1,000 tokens. You can think of tokens as pieces of words, where 1,000 tokens is about 750 words. This paragraph is 35 tokens”"}
310 |         ],
311 |         auto_retry = True,
312 |     )
313 | )
314 | 
315 | ```
316 | 
317 | ### Function Calls
318 | 
319 | The latest version of the API allows for function calls to be made. This is currently only supported in `Chat` and requires api version: `2023-07-01-preview` for `azure`.
320 | 
321 | Function calls support using `pydantic` models to auto-generate the schemas
322 | 
323 | ```python
324 | 
325 | import asyncio
326 | from enum import Enum
327 | from client_rotate import OpenAI
328 | from async_openai.utils import logger
329 | from pydantic import BaseModel, Field
330 | 
331 | class Unit(str, Enum):
332 |     celsius = "celsius"
333 |     fahrenheit = "fahrenheit"
334 | 
335 | class Weather(BaseModel):
336 |     location: str = Field(..., description="The city and state, e.g. San Francisco, CA.")
337 |     unit: Unit = Field(Unit.fahrenheit)
338 | 
339 | functions = [ 
340 |   {
341 |     "name": "get_current_weather",
342 |     "description": "Get the current weather in a given location",
343 |     "parameters": Weather,
344 |   }
345 | ]
346 | 
347 | result: ChatResponse = OpenAI.chat_create(
348 |     model = "gpt-3.5-turbo-16k",
349 |     messages = [
350 |         {"role": "user", "content": "What's the weather like in Boston today?"}
351 |     ],
352 |     functions = functions,
353 |     auto_retry = True,
354 | )
355 | 
356 | logger.info(f'Result Chat Message: {result.messages}')
357 | logger.info(f'Result Chat Function: {result.function_results}')
358 | logger.info(f'Result Usage: {result.usage}')
359 | logger.info(f'Result Consumption: {result.consumption}')
360 | 
361 | """
362 | Result:
363 | 
364 | > Result Chat Message: [ChatMessage(content='', role='assistant', function_call=FunctionCall(name='get_current_weather', arguments={'location': 'Boston, MA'}), name=None)]
365 | > Result Chat Function: [FunctionCall(name='get_current_weather', arguments={'location': 'Boston, MA'})]
366 | > Result Usage: prompt_tokens=16 completion_tokens=19 total_tokens=35
367 | > Result Consumption: 0.00012399999999999998
368 | """
369 | 
370 | ```
371 | 
372 | ### Configure Azure Model Mapping
373 | 
374 | Your azure models may be named differently than the default mapping. By configuring the mapping, you can automatically map the models to the correct azure model (when using openai model names).
375 | 
376 | ```python
377 | 
378 | from async_openai import OpenAI
379 | 
380 | """
381 | Default Azure Model Mapping
382 | {
383 |     'gpt-3.5-turbo': 'gpt-35-turbo',
384 |     'gpt-3.5-turbo-16k': 'gpt-35-turbo-16k',
385 |     'gpt-3.5-turbo-instruct': 'gpt-35-turbo-instruct',
386 |     'gpt-3.5-turbo-0301': 'gpt-35-turbo-0301',
387 |     'gpt-3.5-turbo-0613': 'gpt-35-turbo-0613',
388 | }
389 | """
390 | 
391 | AzureModelMapping = {
392 |     'gpt-3.5-turbo': 'azure-gpt-35-turbo',
393 |     'gpt-3.5-turbo-16k': 'azure-gpt-35-turbo-16k',
394 |     'gpt-3.5-turbo-instruct': 'azure-gpt-35-turbo-instruct',
395 |     'gpt-3.5-turbo-0301': 'azure-gpt-35-turbo-0301',
396 |     'gpt-3.5-turbo-0613': 'azure-gpt-35-turbo-0613',
397 | }
398 | 
399 | OpenAI.configure(
400 |     api_key = "sk-XXXX",
401 |     organization = "org-XXXX",
402 |     debug_enabled = False,
403 | 
404 |     # Azure Configuration
405 |     azure_api_base = 'https://....openai.azure.com/',
406 |     azure_api_version = '2023-07-01-preview',
407 |     azure_api_key = '....',
408 |     azure_model_mapping = AzureModelMapping,
409 | )
410 | 
411 | # This will now use the azure endpoint as the default client
412 | OpenAI.init_api_client('az', set_as_default = True, debug_enabled = True)
413 | 
414 | # This will automatically map "gpt-3.5-turbo-16k" -> "azure-gpt-35-turbo-16k"
415 | result: ChatResponse = OpenAI.chat.create(
416 |     model = "gpt-3.5-turbo-16k",
417 |     messages = [
418 |         {"role": "user", "content": "Translate the following English text to French: “Multiple models, each with different capabilities and price points. Prices are per 1,000 tokens. You can think of tokens as pieces of words, where 1,000 tokens is about 750 words. This paragraph is 35 tokens”"}
419 |     ],
420 |     auto_retry = True,
421 | )
422 | 
423 | 
424 | ```
425 | 
426 | 
427 | ---
428 | 
429 | ### Dependencies
430 | 
431 | The aim of this library is to be as lightweight as possible. It is built on top of the following libraries:
432 | 
433 | - [aiohttpx](https://github.com/GrowthEngineAI/aiohttpx): Unified Async / Sync HTTP Client that wraps around `httpx`
434 | 
435 |     - [httpx](https://www.python-httpx.org/): Async / Sync HTTP Requests
436 | 
437 |     - [lazyops](https://github.com/trisongz/lazyops): Provides numerous utility functions for working with Async / Sync code and data structures
438 | 
439 | - [pydantic](https://pydantic-docs.helpmanual.io/): Type Support
440 | 
441 | - [file-io](https://github.com/trisongz/file-io): Async Cloud-based File Storage I/O
442 | 
443 | - [backoff](https://github.com/litl/backoff): Retries with Exponential Backoff
444 | 
445 | 
446 | 


--------------------------------------------------------------------------------
/async_openai/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from async_openai.types import errors
 4 | from async_openai.utils.config import OpenAISettings, get_settings
 5 | from async_openai.types.errors import (
 6 |     OpenAIError,
 7 |     APIError,
 8 |     AuthenticationError,
 9 |     InvalidRequestError,
10 |     RateLimitError,
11 |     APIConnectionError,
12 |     Timeout,
13 |     TryAgain,
14 |     ServiceUnavailableError,
15 | )
16 | 
17 | from async_openai.types.options import (
18 |     ApiType,
19 |     CompletionModels,
20 |     FilePurpose,
21 |     FinetuneModels,
22 |     ImageSize,
23 |     ImageFormat,
24 | )
25 | 
26 | ## Base Object Models
27 | from async_openai.schemas.completions import CompletionChoice, CompletionObject, CompletionResponse
28 | from async_openai.schemas.chat import ChatMessage, ChatChoice, ChatObject, ChatResponse
29 | from async_openai.schemas.edits import EditChoice, EditObject, EditResponse
30 | from async_openai.schemas.embeddings import EmbeddingData, EmbeddingObject, EmbeddingResponse
31 | # from async_openai.schemas.files import FileChoice, FileObject, FileResponse
32 | from async_openai.schemas.images import ImageData, ImageObject, ImageResponse
33 | from async_openai.schemas.models import ModelData, ModelObject, ModelResponse
34 | 
35 | 
36 | ## Route Models
37 | from async_openai.schemas.completions import CompletionRoute
38 | from async_openai.schemas.chat import ChatRoute
39 | from async_openai.schemas.edits import EditRoute
40 | from async_openai.schemas.embeddings import EmbeddingRoute
41 | # from async_openai.schemas.files import FileRoute
42 | from async_openai.schemas.images import ImageRoute
43 | from async_openai.schemas.models import ModelRoute
44 | 
45 | 
46 | 
47 | from async_openai.routes import ApiRoutes
48 | from async_openai.client import OpenAIClient, OpenAI, OpenAIManager
49 | 
50 | 
51 | 
52 | # Completions = OpenAI.completions
53 | # Edits = OpenAI.edits
54 | # Embeddings = OpenAI.embeddings
55 | # # Files = OpenAI.files
56 | # Images = OpenAI.images
57 | # Models = OpenAI.models
58 | 


--------------------------------------------------------------------------------
/async_openai/client.py:
--------------------------------------------------------------------------------
  1 | import aiohttpx
  2 | import contextlib
  3 | from typing import Optional, Callable, Dict, Union, List
  4 | from lazyops.utils.helpers import timed_cache
  5 | from async_openai.schemas import *
  6 | from async_openai.types.options import ApiType
  7 | from async_openai.utils.logs import logger
  8 | from async_openai.utils.config import get_settings, OpenAISettings, AzureOpenAISettings, OpenAIAuth, ProxyObject
  9 | from async_openai.routes import ApiRoutes
 10 | from async_openai.meta import OpenAIMetaClass
 11 | from async_openai.manager import OpenAIManager as OpenAISessionManager
 12 | 
 13 | _update_params = [
 14 |     'url',
 15 |     'scheme',
 16 |     'host',
 17 |     'port',
 18 |     'api_path',
 19 |     'api_base',
 20 |     'api_key',
 21 |     'api_type',
 22 |     'api_version',
 23 |     'organization',
 24 |     'proxies',
 25 |     'app_info',
 26 | 
 27 | ]
 28 | 
 29 | class OpenAIClient:
 30 |     """
 31 |     Main Client for all the routes in the API.
 32 |     """
 33 | 
 34 |     api_key: Optional[str] = None
 35 |     url: Optional[str] = None
 36 |     scheme: Optional[str] = None
 37 |     host: Optional[str] = None
 38 |     port: Optional[int] = None
 39 |     api_base: Optional[str] = None
 40 |     api_path: Optional[str] = None
 41 |     api_type: Optional[ApiType] = None
 42 |     api_version: Optional[str] = None
 43 |     organization: Optional[str] = None
 44 |     proxies: Optional[Union[str, Dict]] = None
 45 |     app_info: Optional[Dict[str, str]] = None
 46 |     
 47 |     headers: Optional[Dict] = None
 48 |     debug_enabled: Optional[bool] = None
 49 |     on_error: Optional[Callable] = None
 50 |     timeout: Optional[int] = None
 51 |     max_retries: Optional[int] = None
 52 |     ignore_errors: Optional[bool] = None
 53 |     disable_retries: Optional[bool] = None
 54 |     retry_function: Optional[Callable] = None
 55 | 
 56 |     api_url: Optional[str] = None
 57 |     base_url: Optional[str] = None
 58 | 
 59 |     settings: Optional[OpenAISettings] = None
 60 |     name: Optional[str] = None
 61 |     is_azure: Optional[bool] = None
 62 |     azure_model_mapping: Optional[Dict[str, str]] = None
 63 | 
 64 |     auth: Optional[OpenAIAuth] = None
 65 |     _client: Optional[aiohttpx.Client] = None
 66 |     _routes: Optional[ApiRoutes] = None
 67 |     _kwargs: Optional[Dict] = None
 68 | 
 69 |     def __init__(
 70 |         self,
 71 |         **kwargs
 72 |     ):  
 73 |         """
 74 |         Lazily Instantiates the OpenAI Client
 75 |         """
 76 |         self.model_rate_limits: Dict[str, Dict[str, int]] = {}
 77 |         self.client_callbacks: List[Callable] = []
 78 |         self.configure_params(**kwargs)
 79 | 
 80 |     def response_event_hook(self, response: aiohttpx.Response):
 81 |         """
 82 |         Monitor the rate limits
 83 |         """
 84 |         url = response.url
 85 |         headers = response.headers
 86 |         with contextlib.suppress(Exception):
 87 |             if self.is_azure:
 88 |                 model_name = str(url).split('deployments/', 1)[-1].split('/', 1)[0].strip()
 89 |             else:
 90 |                 model_name = headers.get('openai-model')
 91 |             model_name = model_name.lstrip("https:").strip()
 92 |             if not model_name: return
 93 |             if model_name not in self.model_rate_limits:
 94 |                 self.model_rate_limits[model_name] = {}
 95 |             for key, value in {
 96 |                 'x-ratelimit-remaining-requests': 'remaining',
 97 |                 'x-ratelimit-remaining-tokens': 'remaining_tokens',
 98 |                 'x-ratelimit-limit-tokens': 'limit_tokens',
 99 |                 'x-ratelimit-limit-requests': 'limit_requests',
100 |             }.items():
101 |                 if key in headers:
102 |                     self.model_rate_limits[model_name][value] = int(headers[key])
103 |             if self.debug_enabled:
104 |                 logger.info(f"Rate Limits: {self.model_rate_limits}")
105 |     
106 |     async def aresponse_event_hook(self, response: aiohttpx.Response):
107 |         """
108 |         Monitor the rate limits
109 |         """
110 |         return self.response_event_hook(response)
111 | 
112 |     @property
113 |     def client(self) -> aiohttpx.Client:
114 |         """
115 |         Returns the aiohttpx client
116 |         """
117 |         if self._client is None:
118 |             self.configure_client()
119 |         return self._client
120 |     
121 |     @property
122 |     def routes(self) -> ApiRoutes:
123 |         """
124 |         Returns the routes class
125 |         """
126 |         if self._routes is None:
127 |             self.configure_routes()
128 |         return self._routes
129 | 
130 |     def configure_params(
131 |         self, 
132 |         api_key: Optional[str] = None,
133 |         url: Optional[str] = None,
134 |         scheme: Optional[str] = None,
135 |         host: Optional[str] = None,
136 |         port: Optional[int] = None,
137 |         api_base: Optional[str] = None,
138 |         api_path: Optional[str] = None,
139 |         api_type: Optional[ApiType] = None,
140 |         api_version: Optional[str] = None,
141 |         organization: Optional[str] = None,
142 |         proxies: Optional[Union[str, Dict]] = None,
143 |         app_info: Optional[Dict[str, str]] = None,
144 |         
145 |         headers: Optional[Dict] = None,
146 |         debug_enabled: Optional[bool] = None,
147 |         on_error: Optional[Callable] = None,
148 |         timeout: Optional[int] = None,
149 |         max_retries: Optional[int] = None,
150 |         ignore_errors: Optional[bool] = None,
151 |         disable_retries: Optional[bool] = None,
152 |         retry_function: Optional[Callable] = None,
153 | 
154 |         settings: Optional[OpenAISettings] = None,
155 |         name: Optional[str] = None,
156 |         is_azure: Optional[bool] = None,
157 |         azure_model_mapping: Optional[Dict[str, str]] = None,
158 |         auth: Optional[OpenAIAuth] = None,
159 |         client_callbacks: Optional[List[Callable]] = None,
160 |         **kwargs
161 |     ):  # sourcery skip: low-code-quality
162 |         """
163 |         Helper to configure the client
164 |         """
165 |         if self.settings is None and settings is None:
166 |             settings = get_settings()
167 |         if settings is not None:
168 |             self.settings = settings.azure if is_azure else settings
169 |         if api_key is not None:
170 |             self.api_key = api_key
171 |         elif self.api_key is None:
172 |             self.api_key = self.settings.api_key
173 |         if api_type is not None:
174 |             self.api_type = api_type
175 |         elif self.api_type is None:
176 |             self.api_type = self.settings.api_type
177 |         if organization is not None:
178 |             self.organization = organization
179 |         elif self.organization is None:
180 |             self.organization = self.settings.organization
181 |         if proxies is not None:
182 |             self.proxies = proxies
183 |         elif self.proxies is None:
184 |             self.proxies = self.settings.proxies
185 |         if app_info is not None:
186 |             self.app_info = app_info
187 |         elif self.app_info is None:
188 |             self.app_info = self.settings.app_info
189 |         if any(
190 |             [
191 |                 url is not None,
192 |                 scheme is not None,
193 |                 host is not None,
194 |                 port is not None,
195 |                 api_base is not None,
196 |                 self.api_url is None,
197 |             ]
198 |         ):
199 |             self.api_url = self.settings.get_api_url(host = host, port = port, scheme = scheme, url = url, api_base = api_base)
200 |         if any(
201 |             [
202 |                 url is not None,
203 |                 scheme is not None,
204 |                 host is not None,
205 |                 port is not None,
206 |                 api_path is not None,
207 |                 api_base is not None,
208 |                 self.base_url is None,
209 |             ]
210 |         ):
211 |             self.base_url = self.settings.get_base_api_url(host = host, port = port, scheme = scheme, url = url, api_path = api_path, api_base = api_base)
212 |         
213 |         if debug_enabled is not None:
214 |             self.debug_enabled = debug_enabled
215 |         elif self.debug_enabled is None:
216 |             self.debug_enabled = self.settings.debug_enabled
217 |         
218 |         if timeout is not None:
219 |             self.timeout = timeout
220 |         elif self.timeout is None:
221 |             self.timeout = self.settings.timeout
222 | 
223 |         if headers is not None:
224 |             self.headers = headers
225 |         else:
226 |             self.headers = self.settings.get_headers(api_version = self.api_version, api_type = self.api_type, organization = self.organization, app_info = self.app_info)
227 |             # self.headers = self.settings.get_headers(api_key = self.api_key, api_version = self.api_version, api_type = self.api_type, organization = self.organization, app_info = self.app_info)
228 |         
229 |         if on_error is not None:
230 |             self.on_error = on_error
231 |         if ignore_errors is not None:
232 |             self.ignore_errors = ignore_errors
233 |         elif self.ignore_errors is None:
234 |             self.ignore_errors = self.settings.ignore_errors
235 |         if max_retries is not None:
236 |             self.max_retries = max_retries
237 |         elif self.max_retries is None:
238 |             self.max_retries = self.settings.max_retries
239 |         if disable_retries is not None:
240 |             self.disable_retries = disable_retries
241 |         elif self.disable_retries is None:
242 |             self.disable_retries = self.settings.disable_retries
243 |         
244 |         if retry_function is not None:
245 |             self.retry_function = retry_function
246 |         
247 |         if is_azure is not None:
248 |             self.is_azure = is_azure
249 |         elif self.is_azure is None:
250 |             self.is_azure = isinstance(self.settings, AzureOpenAISettings)
251 |         if azure_model_mapping is not None:
252 |             self.azure_model_mapping = azure_model_mapping
253 |         if name is not None:
254 |             self.name = name
255 |         elif self.name is None:
256 |             self.name = 'default'
257 |         if api_version is not None:
258 |             self.api_version = api_version
259 |         elif self.api_version is None:
260 |             self.api_version = self.settings.api_version
261 |         
262 |         
263 |         if auth is not None:
264 |             self.auth = auth
265 |         elif self.auth is None:
266 |             self.auth = self.settings.get_api_client_auth(api_key = self.api_key, api_type = self.api_type)
267 | 
268 |         if kwargs: self._kwargs = kwargs
269 |         self.log_method = logger.info if self.debug_enabled else logger.debug
270 |         if not self.debug_enabled:
271 |             self.settings.disable_httpx_logger()
272 |         
273 |         if client_callbacks is not None:
274 |             self.client_callbacks = client_callbacks
275 |         # if self.debug_enabled:
276 |         #     logger.info(f"OpenAI Client Configured: {self.client.base_url}")
277 |         #     logger.debug(f"Debug Enabled: {self.debug_enabled}")
278 | 
279 |     def configure_client(self, **kwargs):
280 |         """
281 |         Helper to configure the client
282 |         """
283 |         if self._client is not None: return
284 |         # logger.info(f"OpenAI Client Configured: {self.base_url} [{self.name}]")
285 |         extra_kwargs = {}
286 |         if self.settings.limit_monitor_enabled:
287 |             extra_kwargs['event_hooks'] = {'response': [self.response_event_hook]}
288 |             extra_kwargs['async_event_hooks'] = {'response': [self.aresponse_event_hook]}
289 | 
290 |         self._client = aiohttpx.Client(
291 |             base_url = self.base_url,
292 |             timeout = self.timeout,
293 |             limits = self.settings.api_client_limits,
294 |             auth = self.auth,
295 |             headers = self.headers,
296 |             **extra_kwargs,
297 |         )
298 | 
299 |     def configure_routes(self, **kwargs):
300 |         """
301 |         Helper to configure the client routes
302 |         """
303 |         if self._routes is not None: return
304 |         kwargs = kwargs or {}
305 |         if self._kwargs: kwargs.update(self._kwargs)
306 |         self._routes = ApiRoutes(
307 |             client = self.client,
308 |             name = self.name,
309 |             # headers = self.headers,
310 |             debug_enabled = self.debug_enabled,
311 |             on_error = self.on_error,
312 |             ignore_errors = self.ignore_errors,
313 |             timeout = self.timeout,
314 |             max_retries = self.max_retries,
315 |             settings = self.settings,
316 |             is_azure = self.is_azure,
317 |             azure_model_mapping = self.azure_model_mapping,
318 |             disable_retries = self.disable_retries,
319 |             retry_function = self.retry_function,
320 |             client_callbacks = self.client_callbacks,
321 |             **kwargs
322 |         )
323 |         if self.debug_enabled:
324 |             logger.info(f"[{self.name}] OpenAI Client Configured: {self.client.base_url} [Azure: {self.is_azure}]")
325 |             logger.debug(f"Debug Enabled: {self.debug_enabled}")
326 | 
327 | 
328 |     def reset(
329 |         self,
330 |         **kwargs
331 |     ):
332 |         """
333 |         Resets the client to the default settings
334 |         """
335 |         self._client = None
336 |         self._routes = None
337 |         self.configure_params(**kwargs)
338 | 
339 | 
340 |     @property
341 |     def completions(self) -> CompletionRoute:
342 |         """
343 |         Returns the `CompletionRoute` class for interacting with `Completions`.
344 |         
345 |         Doc: `https://beta.openai.com/docs/api-reference/completions`
346 |         """
347 |         return self.routes.completions
348 |     
349 |     @property
350 |     def chat(self) -> ChatRoute:
351 |         """
352 |         Returns the `ChatRoute` class for interacting with `Chat` components
353 | 
354 |         Doc: `https://platform.openai.com/docs/api-reference/chat`
355 |         """
356 |         return self.routes.chat
357 | 
358 |     @property
359 |     def edits(self) -> EditRoute:
360 |         """
361 |         Returns the `EditRoute` class for interacting with `Edits`.
362 |         
363 |         Doc: `https://beta.openai.com/docs/api-reference/edits`
364 |         """
365 |         return self.routes.edits
366 |     
367 |     @property
368 |     def embeddings(self) -> EmbeddingRoute:
369 |         """
370 |         Returns the `EmbeddingRoute` class for interacting with `Embeddings`.
371 |         
372 |         Doc: `https://beta.openai.com/docs/api-reference/embeddings`
373 |         """
374 |         return self.routes.embeddings
375 |     
376 |     @property
377 |     def images(self) -> ImageRoute:
378 |         """
379 |         Returns the `ImageRoute` class for interacting with `Images`.
380 |         
381 |         Doc: `https://beta.openai.com/docs/api-reference/images`
382 |         """
383 |         return self.routes.images
384 |     
385 |     @property
386 |     def models(self) -> ModelRoute:
387 |         """
388 |         Returns the `ModelRoute` class for interacting with `models`.
389 |         
390 |         Doc: `https://beta.openai.com/docs/api-reference/models`
391 |         """
392 |         return self.routes.models
393 | 
394 |     """
395 |     Context Managers
396 |     """
397 | 
398 |     async def async_close(self):
399 |         await self.client.aclose()
400 |     
401 |     def close(self):
402 |         self.client.close()
403 |     
404 |     def __enter__(self):
405 |         return self
406 |     
407 |     def __exit__(self, exc_type, exc_value, traceback):
408 |         self.close()
409 |     
410 |     async def __aenter__(self):
411 |         return self
412 |     
413 |     async def __aexit__(self, exc_type, exc_value, traceback):
414 |         await self.async_close()
415 | 
416 |     @timed_cache(secs = 120, cache_if_result = True)
417 |     def ping(self, timeout: Optional[float] = 1.0, base_url: Optional[str] = None) -> bool:
418 |         """
419 |         Pings the API Endpoint to check if it's alive.
420 |         """
421 |         try:
422 |         # with contextlib.suppress(Exception):
423 |             response = self.client.get(base_url or '/', timeout = timeout)
424 |             try:
425 |                 data = response.json()
426 |                 # we should expect a 404 with a json response
427 |                 # if self.debug_enabled: logger.info(f"API Ping: {data}\n{response.headers}")
428 |                 if data.get('error'): return True
429 |             except Exception as e:
430 |                 logger.error(f"[{self.name} - {response.status_code}] API Ping Failed: {response.text[:500]}")
431 |         except Exception as e:
432 |             logger.error(f"[{self.name}] API Ping Failed: {e}")
433 |         return False
434 |     
435 |     @timed_cache(secs = 120, cache_if_result = True)
436 |     async def aping(self, timeout: Optional[float] = 1.0, base_url: Optional[str] = None) -> bool:
437 |         """
438 |         Pings the API Endpoint to check if it's alive.
439 |         """
440 |         try:
441 |             response = await self.client.async_get(base_url or '/', timeout = timeout)
442 |             try:
443 |                 data = response.json()
444 |                 # we should expect a 404 with a json response
445 |                 if data.get('error'): return True
446 |             except Exception as e:
447 |                 logger.error(f"[{self.name} - {response.status_code}] API Ping Failed: {response.text[:500]}")
448 |         except Exception as e:
449 |             logger.error(f"[{self.name}] API Ping Failed: {e}")
450 |         return False
451 | 
452 | 
453 | class OpenAI(metaclass = OpenAIMetaClass):
454 |     """
455 |     [V1] Interface for OpenAI
456 | 
457 |     Deprecating this class in future versions
458 |     """
459 |     pass
460 | 
461 | OpenAIManager: OpenAISessionManager = ProxyObject(OpenAISessionManager)
462 | 
463 | 
464 | 
465 | 


--------------------------------------------------------------------------------
/async_openai/external_client.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | """
  4 | OpenAI Client that supports external providers and configurations
  5 | that have OpenAI-compatible endpoints.
  6 | """
  7 | 
  8 | import abc
  9 | import aiohttpx
 10 | import contextlib
 11 | from typing import Optional, Callable, Dict, Union, List
 12 | 
 13 | from async_openai.schemas import *
 14 | from async_openai.utils.config import get_settings, OpenAISettings
 15 | from async_openai.utils.logs import logger
 16 | from async_openai.utils.config import ProxyObject
 17 | from async_openai.utils.external_config import ExternalProviderSettings, ExternalProviderAuth
 18 | from async_openai.routes import ApiRoutes
 19 | 
 20 | 
 21 | class ExternalOpenAIClient(abc.ABC):
 22 |     """
 23 |     External Client for all the routes in the API.
 24 |     """
 25 | 
 26 |     is_azure: bool = False
 27 | 
 28 |     _client: Optional[aiohttpx.Client] = None
 29 |     _routes: Optional[ApiRoutes] = None
 30 |     _kwargs: Optional[Dict] = None
 31 | 
 32 |     def __init__(
 33 |         self,
 34 |         name: str,
 35 |         provider: ExternalProviderSettings,
 36 |         is_proxied: Optional[bool] = None,
 37 |         **kwargs
 38 |     ):  
 39 |         """
 40 |         Lazily Instantiates the OpenAI Client
 41 |         """
 42 |         self.name = name
 43 |         self.provider = provider
 44 |         self.debug_enabled: Optional[bool] = None
 45 |         self.on_error: Optional[Callable] = None
 46 |         self.timeout: Optional[int] = None
 47 |         self.max_retries: Optional[int] = None
 48 |         self.ignore_errors: Optional[bool] = None
 49 |         self.disable_retries: Optional[bool] = None
 50 |         self.retry_function: Optional[Callable] = None
 51 | 
 52 |         self.is_proxied = is_proxied if is_proxied is not None else \
 53 |            (self.provider.config.has_proxy and  '_noproxy' not in self.name)
 54 |         # logger.info(f"External Provider Configured: {self.name} [Proxied: {self.is_proxied}]")
 55 |         
 56 |         self.settings: Optional[OpenAISettings] = kwargs.pop('settings', get_settings())
 57 |         self.client_callbacks: List[Callable] = []
 58 |         self.auth: Optional[ExternalProviderAuth] = None
 59 |         self.configure_params(**kwargs)
 60 | 
 61 | 
 62 |     @property
 63 |     def client(self) -> aiohttpx.Client:
 64 |         """
 65 |         Returns the aiohttpx client
 66 |         """
 67 |         if self._client is None:
 68 |             self.configure_client()
 69 |         return self._client
 70 |     
 71 |     @property
 72 |     def routes(self) -> ApiRoutes:
 73 |         """
 74 |         Returns the routes class
 75 |         """
 76 |         if self._routes is None:
 77 |             self.configure_routes()
 78 |         return self._routes
 79 | 
 80 |     def configure_params(
 81 |         self, 
 82 |         debug_enabled: Optional[bool] = None,
 83 |         on_error: Optional[Callable] = None,
 84 |         timeout: Optional[int] = None,
 85 |         max_retries: Optional[int] = None,
 86 |         ignore_errors: Optional[bool] = None,
 87 |         disable_retries: Optional[bool] = None,
 88 |         retry_function: Optional[Callable] = None,
 89 |         auth: Optional[ExternalProviderAuth] = None,
 90 |         client_callbacks: Optional[List[Callable]] = None,
 91 |         **kwargs
 92 |     ):  # sourcery skip: low-code-quality
 93 |         """
 94 |         Helper to configure the client
 95 |         """
 96 |         
 97 |         if debug_enabled is not None:
 98 |             self.debug_enabled = debug_enabled
 99 |         elif self.debug_enabled is None:
100 |             self.debug_enabled = self.settings.debug_enabled
101 |         
102 |         if timeout is not None:
103 |             self.timeout = timeout
104 |         elif self.timeout is None:
105 |             self.timeout = self.settings.timeout
106 | 
107 |         if on_error is not None:
108 |             self.on_error = on_error
109 |         if ignore_errors is not None:
110 |             self.ignore_errors = ignore_errors
111 |         elif self.ignore_errors is None:
112 |             self.ignore_errors = self.settings.ignore_errors
113 |         if max_retries is not None:
114 |             self.max_retries = max_retries
115 |         elif self.max_retries is None:
116 |             if self.provider.config.max_retries is not None:
117 |                 self.max_retries = self.provider.config.max_retries
118 |             else:
119 |                 self.max_retries = self.settings.max_retries
120 |         if disable_retries is not None:
121 |             self.disable_retries = disable_retries
122 |         elif self.disable_retries is None:
123 |             self.disable_retries = self.settings.disable_retries
124 |         if retry_function is not None:
125 |             self.retry_function = retry_function
126 |         
127 |         if auth is not None:
128 |             self.auth = auth
129 |         elif self.auth is None:
130 |             self.auth = ExternalProviderAuth(config = self.provider.config, is_proxied = self.is_proxied)
131 |         
132 |         if kwargs: self._kwargs = kwargs
133 |         self.log_method = logger.info if self.debug_enabled else logger.debug
134 |         if not self.debug_enabled:
135 |             self.settings.disable_httpx_logger()
136 |         
137 |         if client_callbacks is not None:
138 |             self.client_callbacks = client_callbacks
139 | 
140 |     def configure_client(self, **kwargs):
141 |         """
142 |         Helper to configure the client
143 |         """
144 |         if self._client is not None: return
145 |         # logger.info(f"OpenAI Client Configured: {self.base_url} [{self.name}]")
146 |         extra_kwargs = {}
147 |         
148 |         self._client = aiohttpx.Client(
149 |             base_url = self.provider.config.proxy_url if self.is_proxied else self.provider.config.api_url,
150 |             timeout = self.timeout,
151 |             limits = self.settings.api_client_limits,
152 |             auth = self.auth,
153 |             headers = {
154 |                 'content-type': 'application/json',
155 |             },
156 |             **extra_kwargs,
157 |         )
158 |         # logger.info(f"External Configured: {self._client.base_url} [{self.name}]")
159 | 
160 |     def configure_routes(self, **kwargs):
161 |         """
162 |         Helper to configure the client routes
163 |         """
164 |         if self._routes is not None: return
165 |         kwargs = kwargs or {}
166 |         if self._kwargs: kwargs.update(self._kwargs)
167 |         self._routes = ApiRoutes(
168 |             client = self.client,
169 |             name = self.provider.name,
170 |             # headers = self.headers,
171 |             debug_enabled = self.debug_enabled,
172 |             on_error = self.on_error,
173 |             ignore_errors = self.ignore_errors,
174 |             timeout = self.timeout,
175 |             max_retries = self.max_retries,
176 |             settings = self.settings,
177 |             disable_retries = self.disable_retries,
178 |             retry_function = self.retry_function,
179 |             client_callbacks = self.client_callbacks,
180 |             route_classes = self.provider.routes.api_route_classes,
181 |             is_azure = False,
182 |             **kwargs
183 |         )
184 |         if self.debug_enabled:
185 |             logger.info(f"[{self.name}] External Provider Configured: {self.client.base_url}")
186 |             logger.debug(f"Debug Enabled: {self.debug_enabled}")
187 | 
188 | 
189 |     def reset(
190 |         self,
191 |         **kwargs
192 |     ):
193 |         """
194 |         Resets the client to the default settings
195 |         """
196 |         self._client = None
197 |         self._routes = None
198 |         self.configure_params(**kwargs)
199 | 
200 | 
201 |     @property
202 |     def completions(self) -> CompletionRoute:
203 |         """
204 |         Returns the `CompletionRoute` class for interacting with `Completions`.
205 |         
206 |         Doc: `https://beta.openai.com/docs/api-reference/completions`
207 |         """
208 |         return self.routes.completions
209 |     
210 |     @property
211 |     def chat(self) -> ChatRoute:
212 |         """
213 |         Returns the `ChatRoute` class for interacting with `Chat` components
214 | 
215 |         Doc: `https://platform.openai.com/docs/api-reference/chat`
216 |         """
217 |         return self.routes.chat
218 | 
219 |     @property
220 |     def edits(self) -> EditRoute:
221 |         """
222 |         Returns the `EditRoute` class for interacting with `Edits`.
223 |         
224 |         Doc: `https://beta.openai.com/docs/api-reference/edits`
225 |         """
226 |         return self.routes.edits
227 |     
228 |     @property
229 |     def embeddings(self) -> EmbeddingRoute:
230 |         """
231 |         Returns the `EmbeddingRoute` class for interacting with `Embeddings`.
232 |         
233 |         Doc: `https://beta.openai.com/docs/api-reference/embeddings`
234 |         """
235 |         return self.routes.embeddings
236 |     
237 |     @property
238 |     def images(self) -> ImageRoute:
239 |         """
240 |         Returns the `ImageRoute` class for interacting with `Images`.
241 |         
242 |         Doc: `https://beta.openai.com/docs/api-reference/images`
243 |         """
244 |         return self.routes.images
245 |     
246 |     @property
247 |     def models(self) -> ModelRoute:
248 |         """
249 |         Returns the `ModelRoute` class for interacting with `models`.
250 |         
251 |         Doc: `https://beta.openai.com/docs/api-reference/models`
252 |         """
253 |         return self.routes.models
254 | 
255 |     """
256 |     Context Managers
257 |     """
258 | 
259 |     async def async_close(self):
260 |         await self.client.aclose()
261 |     
262 |     def close(self):
263 |         self.client.close()
264 |     
265 |     def __enter__(self):
266 |         return self
267 |     
268 |     def __exit__(self, exc_type, exc_value, traceback):
269 |         self.close()
270 |     
271 |     async def __aenter__(self):
272 |         return self
273 |     
274 |     async def __aexit__(self, exc_type, exc_value, traceback):
275 |         await self.async_close()
276 | 
277 | 
278 |     def ping(self, timeout: Optional[float] = 1.0) -> bool:
279 |         """
280 |         Pings the API Endpoint to check if it's alive.
281 |         """
282 |         try:
283 |         # with contextlib.suppress(Exception):
284 |             response = self.client.get('/', timeout = timeout)
285 |             data = response.json()
286 |             # we should expect a 404 with a json response
287 |             # if self.debug_enabled: logger.info(f"API Ping: {data}\n{response.headers}")
288 |             if data.get('error'): return True
289 |         except Exception as e:
290 |             logger.error(f"API Ping Failed: {e}")
291 |         return False
292 |     
293 |     async def aping(self, timeout: Optional[float] = 1.0) -> bool:
294 |         """
295 |         Pings the API Endpoint to check if it's alive.
296 |         """
297 |         try:
298 |             response = await self.client.async_get('/', timeout = timeout)
299 |             data = response.json()
300 |             # we should expect a 404 with a json response
301 |             if data.get('error'): return True
302 |         except Exception as e:
303 |             logger.error(f"[{self.name}] API Ping Failed: {e}")
304 |         return False
305 | 
306 | 
307 | 


--------------------------------------------------------------------------------
/async_openai/loadbalancer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Client LoadBalancer
  3 | """
  4 | 
  5 | from __future__ import annotations
  6 | 
  7 | import random
  8 | from typing import Optional, List, Dict, Union, TYPE_CHECKING
  9 | 
 10 | from async_openai.schemas import *
 11 | from async_openai.utils.config import get_settings, OpenAISettings
 12 | from async_openai.utils.logs import logger
 13 | 
 14 | if TYPE_CHECKING:
 15 |     from async_openai.client import OpenAIClient, OpenAISessionManager
 16 |     from async_openai.external_client import ExternalOpenAIClient, ExternalProviderSettings
 17 | 
 18 | 
 19 | class ClientLoadBalancer:
 20 |     """
 21 |     Manages a set of clients that can be rotated.    
 22 |     """
 23 |     def __init__(
 24 |         self, 
 25 |         prioritize: Optional[str] = None, 
 26 |         settings: Optional[OpenAISettings] = None, 
 27 |         azure_model_mapping: Optional[Dict[str, str]] = None, 
 28 |         healthcheck: Optional[bool] = True,
 29 |         manager: Optional['OpenAISessionManager'] = None,
 30 |     ):
 31 |         self.settings = settings or get_settings()
 32 |         self.clients: Dict[str, 'OpenAIClient'] = {}
 33 |         self.rotate_index: int = 0
 34 |         self.rotate_client_names: List[str] = []
 35 |         self.azure_model_mapping: Dict[str, str] = azure_model_mapping
 36 |         self.healthcheck: bool = healthcheck
 37 |         self.manager: Optional['OpenAISessionManager'] = manager
 38 | 
 39 |         assert prioritize in [None, 'azure', 'openai'], f'Invalid `prioritize` value: {prioritize}'
 40 |         self.prioritize: Optional[str] = prioritize
 41 |     
 42 |     @property
 43 |     def client_names(self) -> List[str]:
 44 |         """
 45 |         Returns the list of client names.
 46 |         """
 47 |         return list(self.clients.keys())
 48 |     
 49 |     def run_client_init(self):
 50 |         """
 51 |         Initializes the Client. 
 52 | 
 53 |         Can be subclassed to provide custom initialization.
 54 |         """
 55 |         self.init_api_client()
 56 |         if self.settings.has_valid_azure:
 57 |             self.init_api_client(client_name = 'az', is_azure = True, set_as_default = self.prioritize == 'azure', set_as_current = self.prioritize == 'azure')
 58 | 
 59 | 
 60 |     @property
 61 |     def api(self) -> 'OpenAIClient':
 62 |         """
 63 |         Returns the inherited OpenAI client.
 64 |         """
 65 |         if not self.clients: 
 66 |             self.run_client_init()
 67 |         if not self.rotate_client_names or self.rotate_index < len(self.client_names):
 68 |             return self.clients[self.client_names[self.rotate_index]]
 69 |         try:
 70 |             return self.clients[self.rotate_client_names[self.rotate_index]]
 71 |         except IndexError as e:
 72 |             logger.error(f'Index Error: {self.rotate_index} - {self.rotate_client_names}')
 73 |             raise IndexError(f'Index Error: {self.rotate_index} - {self.rotate_client_names} - {self.client_names} ({len(self.clients)})') from e
 74 |     
 75 |     def increase_rotate_index(self):
 76 |         """
 77 |         Increases the rotate index
 78 |         """
 79 |         if self.rotate_index >= len(self.clients) - 1:
 80 |             self.rotate_index = 0
 81 |         else:
 82 |             self.rotate_index += 1
 83 | 
 84 |     def rotate_client(self, index: Optional[int] = None, require_azure: Optional[bool] = None, verbose: Optional[bool] = False):
 85 |         """
 86 |         Rotates the clients
 87 |         """
 88 |         if index is not None:
 89 |             self.rotate_index = index
 90 |             return
 91 |         self.increase_rotate_index()
 92 |         if require_azure:
 93 |             while not self.api.is_azure:
 94 |                 self.increase_rotate_index()
 95 |         if verbose:
 96 |             logger.info(f'Rotated Client: {self.api.name} (Azure: {self.api.is_azure} - {self.api.api_version}) [{self.rotate_index+1}/{len(self.clients)}]')
 97 |     
 98 |     def set_client(self, client_name: Optional[str] = None, verbose: Optional[bool] = False):
 99 |         """
100 |         Sets the client
101 |         """
102 |         if client_name is None:
103 |             raise ValueError('`client_name` is required.')
104 |         if client_name not in self.clients:
105 |             raise ValueError(f'Client `{client_name}` does not exist.')
106 |         self.rotate_index = self.client_names.index(client_name)
107 |         if verbose:
108 |             logger.info(f'Set Client: {self.api.name} (Azure: {self.api.is_azure} - {self.api.api_version})) [{self.rotate_index+1}/{len(self.clients)}]')
109 | 
110 |     def current_client_info(self, verbose: Optional[bool] = False) -> Dict[str, Union[str, int]]:
111 |         """
112 |         Returns the current client info
113 |         """
114 |         data = {
115 |             'name': self.api.name,
116 |             'is_azure': self.api.is_azure,
117 |             'api_version': self.api.api_version,
118 |             'index': self.rotate_index,
119 |             'total': len(self.clients),
120 |         }
121 |         if verbose:
122 |             logger.info(f'Current Client: {self.api.name} (Azure: {self.api.is_azure} - {self.api.api_version}) [{self.rotate_index+1}/{len(self.clients)}]')
123 |         return data
124 | 
125 | 
126 |     def configure_client(self, client_name: Optional[str] = None, priority: Optional[int] = None, **kwargs):
127 |         """
128 |         Configure a new client
129 |         """
130 |         client_name = client_name or 'default'
131 |         if client_name not in self.clients:
132 |             raise ValueError(f'Client `{client_name}` does not exist.')
133 |         self.clients[client_name].reset(**kwargs)
134 |         if priority is not None:
135 |             if client_name in self.rotate_client_names:
136 |                 self.rotate_client_names.remove(client_name)
137 |             self.rotate_client_names.insert(priority, client_name)
138 | 
139 |     def init_api_client(
140 |         self, 
141 |         client_name: Optional[str] = None, 
142 |         set_as_default: Optional[bool] = False, 
143 |         is_azure: Optional[bool] = None,
144 |         priority: Optional[int] = None,
145 |         set_as_current: Optional[bool] = False,
146 |         **kwargs
147 |     ) -> 'OpenAIClient':
148 |         """
149 |         Creates a new OpenAI client.
150 |         """
151 |         client_name = client_name or 'default'
152 |         if client_name in self.clients:
153 |             return self.clients[client_name]
154 | 
155 |         from async_openai.client import OpenAIClient
156 |         if is_azure is None and \
157 |                 (
158 |                 'az' in client_name and self.settings.has_valid_azure
159 |             ):
160 |             is_azure = True
161 |         if 'client_callbacks' not in kwargs and \
162 |                 self.manager and \
163 |                 self.manager.client_callbacks:
164 |             kwargs['client_callbacks'] = self.manager.client_callbacks
165 |         client = OpenAIClient(
166 |             name = client_name,
167 |             settings = self.settings,
168 |             is_azure = is_azure,
169 |             azure_model_mapping = self.azure_model_mapping,
170 |             **kwargs
171 |         )
172 |         self.clients[client_name] = client
173 |         if set_as_default:
174 |             self.rotate_client_names.insert(0, client_name)
175 |         elif priority is not None:
176 |             if client_name in self.rotate_client_names:
177 |                 self.rotate_client_names.remove(client_name)
178 |             self.rotate_client_names.insert(priority, client_name)
179 |         elif self.prioritize:
180 |             if (
181 |                 self.prioritize == 'azure'
182 |                 and is_azure
183 |                 or self.prioritize != 'azure'
184 |                 and self.prioritize == 'openai'
185 |                 and not is_azure
186 |             ):
187 |                 self.rotate_client_names.insert(0, client_name)
188 |             elif self.prioritize in ['azure', 'openai']:
189 |                 self.rotate_client_names.append(client_name)
190 |         if set_as_current:
191 |             self.rotate_index = self.rotate_client_names.index(client_name)
192 |         return client
193 |     
194 |     def get_api_client(self, client_name: Optional[str] = None, require_azure: Optional[bool] = None, **kwargs) -> 'OpenAIClient':
195 |         """
196 |         Initializes a new OpenAI client or Returns an existing one.
197 |         """
198 |         if not client_name and not self.clients:
199 |             client_name = 'default'
200 |         if client_name and client_name not in self.clients:
201 |             self.clients[client_name] = self.init_api_client(client_name = client_name, **kwargs)
202 |         if not client_name and require_azure:
203 |             while not self.api.is_azure:
204 |                 self.increase_rotate_index()
205 |             return self.api
206 |         return self.clients[client_name] if client_name else self.api
207 |     
208 | 
209 |     def get_api_client_from_list(self, client_names: List[str], require_azure: Optional[bool] = None, **kwargs) -> 'OpenAIClient':
210 |         """
211 |         Initializes a new OpenAI client or Returns an existing one from a list of client names.
212 |         """
213 |         if not self.healthcheck:
214 |             name = self.manager.select_client_name_from_weights(client_names) if self.manager.has_client_weights else random.choice(client_names)
215 |             return self.get_api_client(client_name = name, require_azure = require_azure, **kwargs)
216 |         available = []
217 |         for client_name in client_names:
218 |             if client_name not in self.clients:
219 |                 self.clients[client_name] = self.init_api_client(client_name = client_name, **kwargs)
220 |             if require_azure and not self.clients[client_name].is_azure:
221 |                 continue
222 |             if not self.clients[client_name].ping(**self.manager.get_client_ping_params(client_name)):
223 |                 continue
224 |             if not self.manager.has_client_weights: 
225 |                 return self.clients[client_name]
226 |             available.append(client_name)
227 |             # return self.clients[client_name]
228 |         if available:
229 |             name = self.manager.select_client_name_from_weights(available)
230 |             return self.clients[name]
231 |         raise ValueError(f'No healthy client found from: {client_names}')
232 |     
233 |     async def aget_api_client_from_list(self, client_names: List[str], require_azure: Optional[bool] = None, **kwargs) -> 'OpenAIClient':
234 |         """
235 |         Initializes a new OpenAI client or Returns an existing one from a list of client names.
236 |         """
237 |         if not self.healthcheck:
238 |             name = self.manager.select_client_name_from_weights(client_names) if self.manager.has_client_weights else random.choice(client_names)
239 |             return self.get_api_client(client_name = name, require_azure = require_azure, **kwargs)
240 |         available = []
241 |         for client_name in client_names:
242 |             if client_name not in self.clients:
243 |                 self.clients[client_name] = self.init_api_client(client_name = client_name, **kwargs)
244 |             if require_azure and not self.clients[client_name].is_azure:
245 |                 continue
246 |             if not await self.clients[client_name].aping(**self.manager.get_client_ping_params(client_name)):
247 |                 continue
248 |             if not self.manager.has_client_weights: 
249 |                 return self.clients[client_name]
250 |             available.append(client_name)
251 |         
252 |         if available:
253 |             name = self.manager.select_client_name_from_weights(available)
254 |             return self.clients[name]
255 |         raise ValueError(f'No healthy client found from: {client_names}')
256 | 
257 |     def __getitem__(self, key: Union[str, int]) -> 'OpenAIClient':
258 |         """
259 |         Returns a client by name.
260 |         """
261 |         if isinstance(key, int):
262 |             key = self.rotate_client_names[key] if self.rotate_client_names else self.client_names[key]
263 |         return self.clients[key]


--------------------------------------------------------------------------------
/async_openai/routes.py:
--------------------------------------------------------------------------------
  1 | import aiohttpx
  2 | 
  3 | from typing import Optional, Dict, Callable, List, Type, TYPE_CHECKING
  4 | from async_openai.schemas import *
  5 | from async_openai.types.routes import BaseRoute
  6 | from async_openai.utils.config import get_settings, OpenAISettings, AzureOpenAISettings
  7 | from async_openai.utils.logs import logger
  8 | 
  9 | 
 10 | RouteClasses = {
 11 |     'completions': CompletionRoute,
 12 |     'chat': ChatRoute,
 13 |     'edits': EditRoute,
 14 |     'embeddings': EmbeddingRoute,
 15 |     # 'files': FileRoute,
 16 |     'images': ImageRoute,
 17 |     'models': ModelRoute,
 18 | 
 19 | }
 20 | 
 21 | class ApiRoutes:
 22 | 
 23 |     """
 24 |     Container for all the routes in the API.
 25 |     """
 26 | 
 27 |     completions: CompletionRoute = None
 28 |     chat: ChatRoute = None
 29 |     edits: EditRoute = None
 30 |     embeddings: EmbeddingRoute = None
 31 |     # files: FileRoute = None
 32 |     images: ImageRoute = None
 33 |     models: ModelRoute = None
 34 |     
 35 |     def __init__(
 36 |         self,
 37 |         client: aiohttpx.Client,
 38 |         name: str,
 39 |         # headers: Optional[Dict] = None,
 40 |         debug_enabled: Optional[bool] = False,
 41 |         on_error: Optional[Callable] = None,
 42 |         ignore_errors: Optional[bool] = False,
 43 |         disable_retries: Optional[bool] = None,
 44 |         retry_function: Optional[Callable] = None,
 45 | 
 46 |         timeout: Optional[int] = None,
 47 |         max_retries: Optional[int] = None,
 48 |         settings: Optional[OpenAISettings] = None,
 49 |         is_azure: Optional[bool] = None,
 50 |         client_callbacks: Optional[List[Callable]] = None,
 51 |         route_classes: Optional[Dict[str, Type[BaseRoute]]] = None,
 52 |         
 53 |         **kwargs
 54 |     ):
 55 |         self.client = client
 56 |         self.name = name
 57 |         self.settings = settings or get_settings()
 58 |         # self.headers = headers or self.settings.get_headers()
 59 |         self.debug_enabled = debug_enabled
 60 |         self.on_error = on_error
 61 |         self.ignore_errors = ignore_errors
 62 |         self.disable_retries = disable_retries
 63 |         self.retry_function = retry_function
 64 | 
 65 |         self.timeout = timeout
 66 |         self.max_retries = max_retries
 67 |         self.route_classes = route_classes or RouteClasses.copy()
 68 |         self.is_azure = is_azure if is_azure is not None else \
 69 |             isinstance(self.settings, AzureOpenAISettings)
 70 |         self.kwargs = kwargs or {}
 71 |         if client_callbacks:
 72 |             self.kwargs['client_callbacks'] = client_callbacks
 73 |         self.init_routes()
 74 |     
 75 | 
 76 | 
 77 |     def init_routes(self):
 78 |         """
 79 |         Initializes the routes
 80 |         """
 81 |         for route, route_class in self.route_classes.items():
 82 |             try:
 83 |                 setattr(self, route, route_class(
 84 |                     client = self.client,
 85 |                     name = self.name,
 86 |                     # headers = self.headers,
 87 |                     debug_enabled = self.debug_enabled,
 88 |                     on_error = self.on_error,
 89 |                     ignore_errors = self.ignore_errors,
 90 |                     disable_retries = self.disable_retries,
 91 |                     retry_function = self.retry_function,
 92 |                     timeout = self.timeout,
 93 |                     max_retries = self.max_retries,
 94 |                     settings = self.settings,
 95 |                     is_azure = self.is_azure,
 96 |                     **self.kwargs
 97 |                 ))
 98 |             except Exception as e:
 99 |                 logger.error(f"[{self.name}] Failed to initialize route {route} with error: {e}")
100 |                 raise e
101 | 


--------------------------------------------------------------------------------
/async_openai/schemas/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Base Object Models
 3 | from async_openai.schemas.completions import CompletionChoice, CompletionObject, CompletionResponse
 4 | from async_openai.schemas.chat import ChatMessage, ChatChoice, ChatObject, ChatResponse
 5 | from async_openai.schemas.edits import EditChoice, EditObject, EditResponse
 6 | from async_openai.schemas.embeddings import EmbeddingData, EmbeddingObject, EmbeddingResponse
 7 | # from async_openai.schemas.files import FileChoice, FileObject, FileResponse
 8 | from async_openai.schemas.images import ImageData, ImageObject, ImageResponse
 9 | from async_openai.schemas.models import ModelData, ModelObject, ModelResponse
10 | 
11 | ## Route Models
12 | from async_openai.schemas.completions import CompletionRoute
13 | from async_openai.schemas.chat import ChatRoute
14 | from async_openai.schemas.edits import EditRoute
15 | from async_openai.schemas.embeddings import EmbeddingRoute
16 | # from async_openai.schemas.files import FileRoute
17 | from async_openai.schemas.images import ImageRoute
18 | 
19 | from async_openai.schemas.models import ModelRoute
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/async_openai/schemas/edits.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Type, Any, Union, List, Dict
 2 | from lazyops.types import validator, lazyproperty, Field
 3 | 
 4 | from async_openai.types.options import OpenAIModel
 5 | from async_openai.types.resources import BaseResource
 6 | from async_openai.types.responses import BaseResponse
 7 | from async_openai.types.routes import BaseRoute
 8 | 
 9 | 
10 | __all__ = [
11 |     'EditChoice',
12 |     'EditObject',
13 |     'EditResponse',
14 |     'EditRoute',
15 | ]
16 | 
17 | 
18 | class EditChoice(BaseResource):
19 |     text: str
20 |     index: int
21 |     logprobs: Optional[Any]
22 |     finish_reason: Optional[str]
23 | 
24 | class EditObject(BaseResource):
25 |     model: Optional[Union[str, OpenAIModel, Any]] = "curie"
26 |     instruction: Optional[str]
27 |     input: Optional[str] = ""
28 |     n: Optional[int] = 1
29 |     temperature: Optional[float] = 1.0
30 |     top_p: Optional[float] = 1.0
31 |     user: Optional[str] = None
32 | 
33 |     @validator('model', pre=True, always=True)
34 |     def validate_model(cls, v) -> OpenAIModel:
35 |         """
36 |         Validate the model
37 |         """
38 |         if isinstance(v, OpenAIModel):
39 |             return v
40 |         if isinstance(v, dict):
41 |             return OpenAIModel(**v)
42 |         return OpenAIModel(value = v, mode = 'edit')
43 | 
44 | 
45 | class EditResponse(BaseResponse):
46 |     choices: Optional[List[EditChoice]]
47 |     choice_model: Optional[Type[BaseResource]] = EditChoice
48 | 
49 | 
50 |     @lazyproperty
51 |     def text(self) -> str:
52 |         """
53 |         Returns the text for the edits
54 |         """
55 |         if self.choices:
56 |             return ''.join([choice.text for choice in self.choices])
57 |         return self.response.text
58 | 
59 | 
60 | class EditRoute(BaseRoute):
61 |     input_model: Optional[Type[BaseResource]] = EditObject
62 |     response_model: Optional[Type[BaseResource]] = EditResponse
63 | 
64 |     api_resource: Optional[str] = Field(default = 'edits')
65 | 
66 |     # @lazyproperty
67 |     # def api_resource(self):
68 |     #     return 'edits'
69 | 
70 |     def create(
71 |         self, 
72 |         input_object: Optional[Type[BaseResource]] = None,
73 |         **kwargs
74 |     ) -> EditResponse:
75 |         """
76 |         
77 |         """
78 |         return super().create(input_object = input_object, **kwargs)
79 |     
80 |     async def async_create(
81 |         self, 
82 |         input_object: Optional[Type[BaseResource]] = None,
83 |         **kwargs
84 |     ) -> EditResponse:
85 |         """
86 |         
87 |         """
88 |         return await super().async_create(input_object = input_object, **kwargs)
89 | 
90 |     
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/async_openai/schemas/embeddings.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import asyncio
  3 | from typing import Optional, Type, Any, Union, List, Dict, overload
  4 | from lazyops.types import validator, lazyproperty, Field
  5 | 
  6 | from async_openai.types.context import ModelContextHandler
  7 | from async_openai.types.resources import BaseResource
  8 | from async_openai.types.responses import BaseResponse
  9 | from async_openai.types.routes import BaseRoute
 10 | from async_openai.types.errors import RateLimitError, InvalidMaxTokens, InvalidRequestError, APIError, MaxRetriesExceeded
 11 | from async_openai.utils import logger
 12 | 
 13 | __all__ = [
 14 |     'EmbeddingData',
 15 |     'EmbeddingObject',
 16 |     'EmbeddingResponse',
 17 |     'EmbeddingRoute',
 18 | ]
 19 | 
 20 | 
 21 | 
 22 | class EmbeddingData(BaseResource):
 23 |     object: Optional[str] = 'embedding'
 24 |     embedding: Optional[List[float]] = []
 25 |     index: Optional[int] = 0
 26 | 
 27 | class EmbeddingObject(BaseResource):
 28 |     model: Optional[str] = "text-embedding-ada-002"
 29 |     input: Optional[Union[List[Any], Any]] = None
 30 |     user: Optional[str] = None
 31 |     dimensions: Optional[int] = None
 32 |     encoding_format: Optional[str] = None
 33 | 
 34 | 
 35 |     @validator('model', pre=True, always=True)
 36 |     def validate_model(cls, v, values: Dict[str, Any]) -> str:
 37 |         """
 38 |         Validate the model
 39 |         """
 40 |         if not v:
 41 |             if values.get('engine'):
 42 |                 v = values.get('engine')
 43 |             elif values.get('deployment'):
 44 |                 v = values.get('deployment')
 45 |         v = ModelContextHandler.resolve_model_name(v)
 46 |         # if values.get('validate_model_aliases', False):
 47 |         #     v = ModelContextHandler[v].name
 48 |         return v
 49 |     
 50 | 
 51 |     def dict(self, *args, exclude: Any = None, exclude_unset: bool = True, **kwargs):
 52 |         """
 53 |         Returns the dict representation of the response
 54 |         """
 55 |         return super().dict(*args, exclude = exclude, exclude_unset = exclude_unset, **kwargs)
 56 |     
 57 | 
 58 | 
 59 | class EmbeddingResponse(BaseResponse):
 60 |     data: Optional[List[EmbeddingData]] = None
 61 |     data_model: Optional[Type[BaseResource]] = EmbeddingData
 62 |     input_object: Optional[EmbeddingObject] = None
 63 | 
 64 |     @lazyproperty
 65 |     def embeddings(self) -> List[List[float]]:
 66 |         """
 67 |         Returns the text for the response
 68 |         object
 69 |         """
 70 |         if self.data:
 71 |             return [data.embedding for data in self.data] if self.data else []
 72 |         return None
 73 | 
 74 |     @lazyproperty
 75 |     def openai_model(self):
 76 |         """
 77 |         Returns the model for the completions
 78 |         """
 79 |         return self.headers.get('openai-model', self.input_object.model)
 80 | 
 81 |     @lazyproperty
 82 |     def consumption(self) -> int:
 83 |         """
 84 |         Returns the consumption for the completions
 85 |         """ 
 86 |         return ModelContextHandler.get_consumption_cost(
 87 |             model_name = self.openai_model,
 88 |             usage = self.usage,
 89 |         )
 90 | 
 91 | 
 92 | 
 93 | class EmbeddingRoute(BaseRoute):
 94 |     input_model: Optional[Type[BaseResource]] = EmbeddingObject
 95 |     response_model: Optional[Type[BaseResource]] = EmbeddingResponse
 96 |     api_resource: Optional[str] = Field(default = 'embeddings')
 97 |     root_name: Optional[str] = Field(default = 'embedding')
 98 | 
 99 |     # @lazyproperty
100 |     # def api_resource(self):
101 |     #     return 'embeddings'
102 |     
103 |     # @lazyproperty
104 |     # def root_name(self):
105 |     #     return 'embedding'
106 | 
107 |     @overload
108 |     def create(
109 |         self, 
110 |         input: Optional[Union[str, List[str], List[List]]] = None,
111 |         model: Optional[str] = "text-embedding-ada-002",
112 |         dimensions: Optional[int] = None,
113 |         encoding_format: Optional[str] = 'float',
114 |         user: Optional[str] = None,
115 |         auto_retry: Optional[bool] = False,
116 |         auto_retry_limit: Optional[int] = None,
117 |         **kwargs
118 |     ) -> EmbeddingResponse:
119 |         """
120 |         Creates a embedding response for the provided prompt and parameters
121 | 
122 |         Usage:
123 | 
124 |         ```python
125 |         >>> result = OpenAI.embedding.create(
126 |         >>>    input = 'say this is a test',
127 |         >>> )
128 |         ```
129 | 
130 |         **Parameters:**
131 | 
132 |         :input (string, array, required): Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. Each input must not exceed the max input tokens for the model (8191 tokens for text-embedding-ada-002). Example Python code for counting tokens.
133 | 
134 |         :model (string, required): ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them.
135 |         Default: `text-embedding-ada-002`
136 | 
137 |         :dimensions (optional): Number of dimensions to use for the embedding. If not specified, the model will use the default number of dimensions for the model. This is only supported in `text-embedding-3` and later models
138 |         Default: `None`
139 | 
140 |         :encoding_format (optional): The format of the encoding. If not specified, the model will use the default encoding format for the model. Defaults to `float`
141 |         Default: `float`
142 | 
143 |         :user (optional): A unique identifier representing your end-user, which can help OpenAI to 
144 |         monitor and detect abuse.
145 |         Default: `None`
146 | 
147 |         Returns: `EmbeddingResponse`
148 |         """
149 |         ...
150 | 
151 | 
152 |     def create(
153 |         self, 
154 |         input_object: Optional[EmbeddingObject] = None,
155 |         auto_retry: Optional[bool] = False,
156 |         auto_retry_limit: Optional[int] = None,
157 |         **kwargs
158 |     ) -> EmbeddingResponse:
159 |         """
160 |         Creates a embedding response for the provided prompt and parameters
161 | 
162 |         Usage:
163 | 
164 |         ```python
165 |         >>> result = OpenAI.embedding.create(
166 |         >>>    input = 'say this is a test',
167 |         >>> )
168 |         ```
169 | 
170 |         **Parameters:**
171 | 
172 |         :input (string, array, required): Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. Each input must not exceed the max input tokens for the model (8191 tokens for text-embedding-ada-002). Example Python code for counting tokens.
173 | 
174 |         :model (string, required): ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them.
175 |         Default: `text-embedding-ada-002`
176 | 
177 |         :dimensions (optional): Number of dimensions to use for the embedding. If not specified, the model will use the default number of dimensions for the model. This is only supported in `text-embedding-3` and later models
178 |         Default: `None`
179 | 
180 |         :encoding_format (optional): The format of the encoding. If not specified, the model will use the default encoding format for the model. Defaults to `float`
181 |         Default: `float`
182 | 
183 |         :user (optional): A unique identifier representing your end-user, which can help OpenAI to 
184 |         monitor and detect abuse.
185 |         Default: `None`
186 | 
187 |         Returns: `EmbeddingResponse`
188 |         """
189 |         return super().create(
190 |             input_object = input_object,
191 |             auto_retry = auto_retry,
192 |             auto_retry_limit = auto_retry_limit,
193 |             **kwargs
194 |         )
195 | 
196 |     @overload
197 |     async def async_create(
198 |         self, 
199 |         input: Optional[Union[str, List[str], List[List]]] = None,
200 |         model: Optional[str] = "text-embedding-ada-002",
201 |         dimensions: Optional[int] = None,
202 |         encoding_format: Optional[str] = 'float',
203 |         user: Optional[str] = None,
204 |         auto_retry: Optional[bool] = False,
205 |         auto_retry_limit: Optional[int] = None,
206 |         **kwargs
207 |     ) -> EmbeddingResponse:
208 |         """
209 |         Creates a embedding response for the provided prompt and parameters
210 | 
211 |         Usage:
212 | 
213 |         ```python
214 |         >>> result = await OpenAI.embedding.async_create(
215 |         >>>    input = 'say this is a test',
216 |         >>> )
217 |         ```
218 | 
219 |         **Parameters:**
220 | 
221 |         :input (string, array, required): Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. Each input must not exceed the max input tokens for the model (8191 tokens for text-embedding-ada-002). Example Python code for counting tokens.
222 | 
223 |         :model (string, required): ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them.
224 |         Default: `text-embedding-ada-002`
225 | 
226 |         :dimensions (optional): Number of dimensions to use for the embedding. If not specified, the model will use the default number of dimensions for the model. This is only supported in `text-embedding-3` and later models
227 |         Default: `None`
228 | 
229 |         :encoding_format (optional): The format of the encoding. If not specified, the model will use the default encoding format for the model. Defaults to `float`
230 |         Default: `float`
231 | 
232 |         :user (optional): A unique identifier representing your end-user, which can help OpenAI to 
233 |         monitor and detect abuse.
234 |         Default: `None`
235 | 
236 |         Returns: `EmbeddingResponse`
237 |         """
238 |         ...
239 | 
240 | 
241 |     async def async_create(
242 |         self, 
243 |         input_object: Optional[EmbeddingObject] = None,
244 |         auto_retry: Optional[bool] = False,
245 |         auto_retry_limit: Optional[int] = None,
246 |         **kwargs
247 |     ) -> EmbeddingResponse:  # sourcery skip: low-code-quality
248 |         """
249 |         Usage:
250 | 
251 |         ```python
252 |         >>> result = OpenAI.embedding.create(
253 |         >>>    input = 'say this is a test',
254 |         >>> )
255 |         ```
256 | 
257 |         **Parameters:**
258 | 
259 |         :input (string, array, required): Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. Each input must not exceed the max input tokens for the model (8191 tokens for text-embedding-ada-002). Example Python code for counting tokens.
260 | 
261 |         :model (string): ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them.
262 |         Default: `text-embedding-ada-002`
263 | 
264 |         :dimensions (optional): Number of dimensions to use for the embedding. If not specified, the model will use the default number of dimensions for the model. This is only supported in `text-embedding-3` and later models
265 |         Default: `None`
266 | 
267 |         :encoding_format (optional): The format of the encoding. If not specified, the model will use the default encoding format for the model. Defaults to `float`
268 |         Default: `float`
269 | 
270 |         :user (optional): A unique identifier representing your end-user, which can help OpenAI to 
271 |         monitor and detect abuse.
272 |         Default: `None`
273 | 
274 |         Returns: `EmbeddingResponse`
275 |         """
276 |         return await super().async_create(
277 |             input_object = input_object,
278 |             auto_retry = auto_retry,
279 |             auto_retry_limit = auto_retry_limit,
280 |             **kwargs
281 |         )
282 |         
283 | 


--------------------------------------------------------------------------------
/async_openai/schemas/external/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Supporting External Providers
3 | 
4 | - together (https://together.ai/)
5 | """
6 | 
7 | 


--------------------------------------------------------------------------------
/async_openai/schemas/external/fireworks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GrowthEngineAI/async-openai/cbe8525a6b8605729af5f14182dfdea8f656294f/async_openai/schemas/external/fireworks/__init__.py


--------------------------------------------------------------------------------
/async_openai/schemas/external/fireworks/chat.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | """
  4 | Fireworks.ai Chat Route
  5 | """
  6 | import json
  7 | from ...chat import (
  8 |     ChatRoute as BaseChatRoute,
  9 |     ChatObject as BaseChatObject,
 10 |     ChatResponse as BaseChatResponse,
 11 |     ChatChoice as BaseChatChoice,
 12 |     ChatMessage as BaseChatMessage, 
 13 |     Function, FunctionCall, Tool, logger
 14 | )
 15 | from lazyops.types import validator, root_validator, BaseModel, lazyproperty, Field, PYD_VERSION
 16 | from async_openai.types.context import ModelContextHandler
 17 | from typing import Any, Dict, List, Optional, Union, Set, Type, TYPE_CHECKING
 18 | 
 19 | 
 20 | class ChatObject(BaseChatObject):
 21 |     model: Optional[str] = "accounts/fireworks/models/firefunction-v1"
 22 |     response_format: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None
 23 |     is_json_mode: Optional[bool] = Field(None, exclude = True)
 24 |     is_grammar_mode: Optional[bool] = Field(None, exclude = True)
 25 | 
 26 |     @validator('model', pre=True, always=True)
 27 |     def validate_model(cls, v, values: Dict[str, Any]) -> str:
 28 |         """
 29 |         Validate the model
 30 |         """
 31 |         if not v:
 32 |             if values.get('engine'):
 33 |                 v = values.get('engine')
 34 |             elif values.get('deployment'):
 35 |                 v = values.get('deployment')
 36 |         
 37 |         v = ModelContextHandler.resolve_external_model_name(v)
 38 |         return v
 39 | 
 40 | 
 41 |     """
 42 |     Handle Validation for JSON Mode
 43 | 
 44 |     JSON mode corrals the LLM into outputting JSON conforming to a provided schema. 
 45 |     To activate JSON mode, provide the response_format parameter to the Chat Completions 
 46 |     API with {"type": "json_object"}. The JSON Schema can be specified with the schema 
 47 |     property of response_format. The schema property should be a JSON Schema object.
 48 |     """
 49 | 
 50 | 
 51 |     @root_validator(pre = True)
 52 |     def validate_obj(cls, values: Dict[str, Any]) -> Dict[str, Any]:
 53 |         """
 54 |         Validate the object
 55 |         """
 56 |         if values.get('functions'):
 57 |             if not all(isinstance(f, Function) for f in values['functions']):
 58 |                 values['functions'] = [Function(**f) for f in values['functions']]
 59 |             if not values.get('function_call'):
 60 |                 values['function_call'] = 'auto'
 61 |         
 62 |         
 63 |         response_format: Dict[str, Any] = values.get('response_format', {})
 64 |         if response_format.get('type') == 'json_object':
 65 |             values['is_json_mode'] = True
 66 |             if not response_format.get('schema') and values.get('functions'):
 67 |                 func = values['functions'][0] if \
 68 |                     len(values['functions']) == 1 or \
 69 |                     values.get('function_call') == 'auto' else \
 70 |                     next((f for f in values['functions'] if f.name == values['function_call']))
 71 |                 
 72 |                 assert func, 'No function found'
 73 |                 schema = func.model_json_schema()
 74 |                 values['response_format']['schema'] = schema
 75 |         
 76 |         elif response_format.get('type') == 'grammar':
 77 |             values['is_grammar_mode'] = True
 78 | 
 79 |                 
 80 |         # Disable tools if response format is json_object
 81 |         elif values.get('tools'):
 82 |             tools = []
 83 |             for tool in values['tools']:
 84 |                 if isinstance(tool, Tool):
 85 |                     tools.append(tool)
 86 |                 elif isinstance(tool, dict):
 87 |                     # This should be the correct format
 88 |                     if tool.get('function'):
 89 |                         tools.append(Tool(**tool))
 90 |                     else:
 91 |                         # This is previously supported format
 92 |                         tools.append(Tool(function = Function(**tool)))
 93 |                 else:
 94 |                     raise ValueError(f'Invalid tool: {tool}')
 95 |             values['tools'] = tools
 96 |             if not values.get('tool_choice'):
 97 |                 values['tool_choice'] = 'auto'
 98 |         return values
 99 |     
100 |     def dict(self, **kwargs) -> Dict[str, Any]:
101 |         """
102 |         Return the dict
103 |         """
104 |         exclude: Set[str] = kwargs.pop('exclude', None) or set()
105 |         if self.is_json_mode or self.is_grammar_mode:
106 |             exclude.add('tools')
107 |             exclude.add('tool_choice')
108 |             exclude.add('functions')
109 |             exclude.add('function_call')
110 |         
111 |         return super().dict(exclude = exclude, **kwargs)
112 |         
113 | 
114 | class ChatMessage(BaseChatMessage):
115 |     
116 |     """
117 |     Handle some validation here
118 |     """
119 | 
120 |     @root_validator(pre = True)
121 |     def validate_message(cls, values: Dict[str, Any]) -> Dict[str, Any]:
122 |         """
123 |         Validate the object
124 |         """
125 |         if values.get('tool_calls'):
126 |             for tc in values['tool_calls']:
127 |                 if tc.get('type') == 'function' and tc.get('function'):
128 |                     func = FunctionCall(**tc['function'])
129 |                     values['function_call'] = func
130 |                     break
131 |         return values
132 | 
133 | 
134 | 
135 | class ChatChoice(BaseChatChoice):
136 |     message: ChatMessage
137 | 
138 | 
139 | class ChatResponse(BaseChatResponse):
140 | 
141 |     input_object: Optional[ChatObject] = None
142 |     choice_model: Optional[Type[ChatChoice]] = ChatChoice
143 | 
144 | 
145 | class ChatRoute(BaseChatRoute):
146 |     input_model: Optional[Type[ChatObject]] = ChatObject
147 |     response_model: Optional[Type[ChatResponse]] = ChatResponse


--------------------------------------------------------------------------------
/async_openai/schemas/external/together/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GrowthEngineAI/async-openai/cbe8525a6b8605729af5f14182dfdea8f656294f/async_openai/schemas/external/together/__init__.py


--------------------------------------------------------------------------------
/async_openai/schemas/external/together/chat.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | """
  4 | Together.xyz Chat Route
  5 | """
  6 | import json
  7 | from ...chat import (
  8 |     ChatRoute as BaseChatRoute,
  9 |     ChatObject as BaseChatObject,
 10 |     ChatResponse as BaseChatResponse,
 11 |     ChatChoice as BaseChatChoice,
 12 |     ChatMessage as BaseChatMessage, 
 13 |     Function, FunctionCall, Tool, logger
 14 | )
 15 | from lazyops.types import validator, root_validator, BaseModel, lazyproperty, Field, PYD_VERSION
 16 | from async_openai.types.context import ModelContextHandler
 17 | from typing import Any, Dict, List, Optional, Union, Set, Type, TYPE_CHECKING
 18 | 
 19 | # if PYD_VERSION == 2:
 20 | #     from pydantic import model_validator
 21 | # else:
 22 | #     from lazyops.types.models import root_validator
 23 | 
 24 | class ChatObject(BaseChatObject):
 25 |     model: Optional[str] = "mistralai/Mistral-7B-Instruct-v0.1"
 26 |     response_format: Optional[Dict[str, Union[str, Dict[str, Any]]]] = None
 27 |     is_json_mode: Optional[bool] = Field(None, exclude = True)
 28 | 
 29 |     @validator('model', pre=True, always=True)
 30 |     def validate_model(cls, v, values: Dict[str, Any]) -> str:
 31 |         """
 32 |         Validate the model
 33 |         """
 34 |         if not v:
 35 |             if values.get('engine'):
 36 |                 v = values.get('engine')
 37 |             elif values.get('deployment'):
 38 |                 v = values.get('deployment')
 39 |         
 40 |         v = ModelContextHandler.resolve_external_model_name(v)
 41 |         return v
 42 | 
 43 | 
 44 |     """
 45 |     Handle Validation for JSON Mode
 46 | 
 47 |     JSON mode corrals the LLM into outputting JSON conforming to a provided schema. 
 48 |     To activate JSON mode, provide the response_format parameter to the Chat Completions 
 49 |     API with {"type": "json_object"}. The JSON Schema can be specified with the schema 
 50 |     property of response_format. The schema property should be a JSON Schema object.
 51 |     """
 52 | 
 53 |     # if PYD_VERSION == 2:
 54 |     #     @model_validator(mode = 'after')
 55 |     #     def validate_obj(self):
 56 |     #         """
 57 |     #         Validate the object
 58 |     #         """
 59 |         
 60 |     # else:
 61 | 
 62 |     @root_validator(pre = True)
 63 |     def validate_obj(cls, values: Dict[str, Any]) -> Dict[str, Any]:
 64 |         """
 65 |         Validate the object
 66 |         """
 67 |         if values.get('functions'):
 68 |             if not all(isinstance(f, Function) for f in values['functions']):
 69 |                 values['functions'] = [Function(**f) for f in values['functions']]
 70 |             if not values.get('function_call'):
 71 |                 values['function_call'] = 'auto'
 72 |         
 73 |         if values.get('response_format', {}).get('type') == 'json_object':
 74 |             values['is_json_mode'] = True
 75 |             if not values['response_format'].get('schema') and  values.get('functions'):
 76 |                 func = values['functions'][0] if \
 77 |                     len(values['functions']) == 1 or \
 78 |                     values.get('function_call') == 'auto' else \
 79 |                     next((f for f in values['functions'] if f.name == values['function_call']))
 80 |                 
 81 |                 assert func, 'No function found'
 82 |                 schema = func.model_json_schema()
 83 |                 # _ = schema.pop('additionalProperties', None)
 84 |                 values['response_format']['schema'] = schema
 85 |                 # logger.info(values["response_format"], prefix = 'JSON Mode')
 86 | 
 87 |                 
 88 |         # Disable tools if response format is json_object
 89 |         elif values.get('tools'):
 90 |             tools = []
 91 |             for tool in values['tools']:
 92 |                 if isinstance(tool, Tool):
 93 |                     tools.append(tool)
 94 |                 elif isinstance(tool, dict):
 95 |                     # This should be the correct format
 96 |                     if tool.get('function'):
 97 |                         tools.append(Tool(**tool))
 98 |                     else:
 99 |                         # This is previously supported format
100 |                         tools.append(Tool(function = Function(**tool)))
101 |                 else:
102 |                     raise ValueError(f'Invalid tool: {tool}')
103 |             values['tools'] = tools
104 |             if not values.get('tool_choice'):
105 |                 values['tool_choice'] = 'auto'
106 |         return values
107 |     
108 |     def dict(self, **kwargs) -> Dict[str, Any]:
109 |         """
110 |         Return the dict
111 |         """
112 |         exclude: Set[str] = kwargs.pop('exclude', None) or set()
113 |         if self.is_json_mode:
114 |             exclude.add('tools')
115 |             exclude.add('tool_choice')
116 |             exclude.add('functions')
117 |             exclude.add('function_call')
118 |         
119 |         return super().dict(exclude = exclude, **kwargs)
120 |         
121 | 
122 | class ChatMessage(BaseChatMessage):
123 |     
124 |     """
125 |     Handle some validation here
126 |     """
127 | 
128 |     @root_validator(pre = True)
129 |     def validate_message(cls, values: Dict[str, Any]) -> Dict[str, Any]:
130 |         """
131 |         Validate the object
132 |         """
133 |         # if values.get('content') and '"arguments"' in values['content']:
134 |         #     content = values.pop('content')
135 |         #     try:
136 |         #         data = json.loads(content)
137 |         #     except Exception as e:
138 |         #         try:
139 |         #             content = content.split('\n', 1)[-1].strip()
140 |         #             data = json.loads(content)
141 |         #         except Exception as e:
142 |         #             logger.error(f'Invalid JSON: {content}: {e}')
143 |         #             raise e
144 |         #     values['function_call'] = FunctionCall(**data)
145 |         if values.get('tool_calls'):
146 |             for tc in values['tool_calls']:
147 |                 if tc.get('type') == 'function' and tc.get('function'):
148 |                     func = FunctionCall(**tc['function'])
149 |                     values['function_call'] = func
150 |                     break
151 |         return values
152 | 
153 | 
154 | 
155 | class ChatChoice(BaseChatChoice):
156 |     message: ChatMessage
157 | 
158 | 
159 | class ChatResponse(BaseChatResponse):
160 | 
161 |     input_object: Optional[ChatObject] = None
162 |     choice_model: Optional[Type[ChatChoice]] = ChatChoice
163 | 
164 | 
165 | class ChatRoute(BaseChatRoute):
166 |     input_model: Optional[Type[ChatObject]] = ChatObject
167 |     response_model: Optional[Type[ChatResponse]] = ChatResponse


--------------------------------------------------------------------------------
/async_openai/schemas/external/together/embeddings.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | """
 4 | Together.xyz Embedding Route
 5 | """
 6 | 
 7 | 
 8 | from ...embeddings import (
 9 |     EmbeddingRoute as BaseEmbeddingRoute,
10 |     EmbeddingObject as BaseEmbeddingObject,
11 |     EmbeddingResponse as BaseEmbeddingResponse,
12 |     logger
13 | )
14 | from lazyops.types import validator, lazyproperty, Field
15 | from async_openai.types.context import ModelContextHandler
16 | from async_openai.types.resources import Usage
17 | from typing import Any, Dict, List, Optional, Union, Set, Type, TYPE_CHECKING
18 | 
19 | 
20 | class EmbeddingObject(BaseEmbeddingObject):
21 |     model: Optional[str] = "togethercomputer/m2-bert-80M-32k-retrieval"
22 | 
23 |     @validator('model', pre=True, always=True)
24 |     def validate_model(cls, v, values: Dict[str, Any]) -> str:
25 |         """
26 |         Validate the model
27 |         """
28 |         if not v:
29 |             if values.get('engine'):
30 |                 v = values.get('engine')
31 |             elif values.get('deployment'):
32 |                 v = values.get('deployment')
33 |         v = ModelContextHandler.resolve_external_model_name(v)
34 |         return v
35 | 
36 | 
37 | class EmbeddingResponse(BaseEmbeddingResponse):
38 | 
39 |     usage: Optional[Usage] = Field(default_factory = Usage)
40 | 
41 | 
42 |     @lazyproperty
43 |     def consumption(self) -> int:
44 |         """
45 |         Returns the consumption for the completions
46 |         """
47 |         try:
48 |             if not self.usage.prompt_tokens:
49 |                 self.usage.prompt_tokens = ModelContextHandler.count_tokens(self.input_object.input, model_name=self.input_object.model)
50 |             return ModelContextHandler.get_consumption_cost(
51 |                 model_name = self.input_object.model,
52 |                 usage = self.usage,
53 |             )
54 |         except Exception as e:
55 |             logger.error(f"Error getting consumption: {e}")
56 |             return 0
57 | 
58 | 
59 | class EmbeddingRoute(BaseEmbeddingRoute):
60 |     input_model: Optional[Type[EmbeddingObject]] = EmbeddingObject
61 |     response_model: Optional[Type[EmbeddingResponse]] = EmbeddingResponse


--------------------------------------------------------------------------------
/async_openai/schemas/images.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Type, Any, Union, List, Dict
  2 | from lazyops.types import validator, lazyproperty, BaseModel, Field
  3 | 
  4 | from async_openai.types.options import ImageSize, ImageFormat
  5 | from async_openai.types.resources import BaseResource, File, FileType, _has_fileio
  6 | from async_openai.types.responses import BaseResponse
  7 | from async_openai.types.routes import BaseRoute
  8 | 
  9 | __all__ = [
 10 |     'ImageData',
 11 |     'ImageObject',
 12 |     'ImageResponse',
 13 |     'ImageRoute',
 14 | ]
 15 | 
 16 | class ImageData(BaseModel):
 17 |     url: Optional[str] = None
 18 |     data: Optional[bytes] = None
 19 | 
 20 | class ImageObject(BaseResource):
 21 |     prompt: Optional[str]
 22 |     mask: Optional[Union[str, FileType, Any]]
 23 |     image: Optional[Union[str, FileType, Any]]
 24 |     n: Optional[int] = 1
 25 |     size: Optional[Union[str, ImageSize]] = ImageSize.large
 26 |     response_format: Optional[Union[str, ImageFormat]] = ImageFormat.url
 27 |     user: Optional[str] = None
 28 | 
 29 |     @validator("size")
 30 |     def validate_size(cls, value):
 31 |         return ImageSize.from_str(value) if isinstance(value, str) else value
 32 |     
 33 |     @validator("response_format")
 34 |     def validate_response_format(cls, value):
 35 |         if isinstance(value, str):
 36 |             value = ImageFormat(value)
 37 |         return value
 38 |     
 39 |     def get_params(self, **kwargs) -> List:
 40 |         """
 41 |         Transforms the data to the req params
 42 |         """
 43 |         files = [(k, (None, v)) for k, v in self.dict(exclude_none=True, exclude={'mask', 'image'}).items()]
 44 |         if self.mask:
 45 |             mask = File(self.mask)
 46 |             files.append(("mask", ("mask", mask.read_bytes(), "application/octet-stream")))
 47 |         if self.image:
 48 |             image = File(self.image)
 49 |             files.append(("image", ("image", image.read_bytes(), "application/octet-stream")))
 50 |         
 51 |         return files
 52 |     
 53 |     async def async_get_params(self, **kwargs) -> List:
 54 |         """
 55 |         Transforms the data to the req params
 56 |         """
 57 |         files = [(k, (None, v)) for k, v in self.dict(exclude_none=True, exclude={'mask', 'image'}).items()]
 58 |         if self.mask:
 59 |             mask = File(self.mask)
 60 |             files.append(("mask", ("mask", (await mask.async_read_bytes() if _has_fileio else mask.read_bytes()), "application/octet-stream")))
 61 |         if self.image:
 62 |             image = File(self.image)
 63 |             files.append(("image", ("image", (await image.async_read_bytes() if _has_fileio else image.read_bytes()), "application/octet-stream")))
 64 |         
 65 |         return files
 66 | 
 67 | 
 68 | class ImageResponse(BaseResponse):
 69 |     data: Optional[List[ImageData]]
 70 |     data_model: Optional[Type[ImageData]] = ImageData
 71 | 
 72 |     @lazyproperty
 73 |     def image_urls(self) -> List[str]:
 74 |         """
 75 |         Returns the list of image urls
 76 |         """
 77 |         if self.data:
 78 |             return [data.url for data in self.data] if self.data else []
 79 |         return None
 80 |     
 81 | 
 82 | 
 83 | class ImageRoute(BaseRoute):
 84 |     input_model: Optional[Type[BaseResource]] = ImageObject
 85 |     response_model: Optional[Type[BaseResource]] = ImageResponse
 86 | 
 87 |     api_resource: Optional[str] = Field(default = 'images')
 88 | 
 89 |     # @lazyproperty
 90 |     # def api_resource(self):
 91 |     #     return 'images'
 92 | 
 93 |     def create(
 94 |         self, 
 95 |         input_object: Optional[Type[BaseResource]] = None,
 96 |         **kwargs
 97 |     ) -> ImageResponse:
 98 |         """
 99 |         
100 |         """
101 |         return super().create(input_object = input_object, **kwargs)
102 |     
103 |     async def async_create(
104 |         self, 
105 |         input_object: Optional[Type[BaseResource]] = None,
106 |         **kwargs
107 |     ) -> ImageResponse:
108 |         """
109 |         
110 |         """
111 |         return await super().async_create(input_object = input_object, **kwargs)
112 | 
113 | 


--------------------------------------------------------------------------------
/async_openai/schemas/models.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | from typing import Optional, Type, List, Dict, Any
  3 | from lazyops.types import lazyproperty, Field
  4 | 
  5 | from async_openai.types.resources import BaseResource, Permission
  6 | from async_openai.types.responses import BaseResponse
  7 | from async_openai.types.routes import BaseRoute
  8 | 
  9 | 
 10 | __all__ = [
 11 |     'ModelData',
 12 |     'ModelObject',
 13 |     'ModelResponse',
 14 |     'ModelRoute',
 15 | ]
 16 | 
 17 | 
 18 | class ModelData(BaseResource):
 19 |     id: str
 20 |     status: Optional[str] = None
 21 |     owned_by: Optional[str] = None
 22 |     created: Optional[datetime.datetime] = None
 23 |     updated_at: Optional[datetime.datetime] = None
 24 |     created_at: Optional[datetime.datetime] = None
 25 |     permission: Optional[List[Permission]] = Field(default_factory = list)
 26 |     root: Optional[str] = None
 27 |     parent: Optional[str] = None
 28 |     object: Optional[str] = 'model'
 29 |     capabilities: Optional[Dict[str, Any]] = Field(default_factory = dict)
 30 |     lifecycle_status: Optional[str] = None
 31 |     deprecation: Optional[Dict[str, Any]] = Field(default_factory = dict)
 32 | 
 33 | 
 34 |     @lazyproperty
 35 |     def model_age(self) -> Optional[datetime.datetime]:
 36 |         """
 37 |         Returns how long ago the model was created
 38 |         """
 39 |         if self.created:
 40 |             return datetime.datetime.now(tz = datetime.timezone.utc) - self.created
 41 | 
 42 | 
 43 | class ModelObject(BaseResource):
 44 |     model: Optional[str]
 45 | 
 46 | 
 47 | class ModelResponse(BaseResponse):
 48 |     data: Optional[List[ModelData]]
 49 |     data_model: Optional[Type[BaseResource]] = ModelData
 50 | 
 51 |     @lazyproperty
 52 |     def model_list(self) -> List[str]:
 53 |         """
 54 |         Returns a list of model IDs
 55 |         """
 56 |         return [model.id for model in self.data] if self.data and isinstance(self.data, list) else []
 57 | 
 58 | 
 59 | class ModelRoute(BaseRoute):
 60 |     input_model: Optional[Type[BaseResource]] = ModelObject
 61 |     response_model: Optional[Type[BaseResource]] = ModelResponse
 62 | 
 63 |     api_resource: Optional[str] = Field(default = 'models')
 64 | 
 65 |     # @lazyproperty
 66 |     # def api_resource(self):
 67 |     #     return 'models'
 68 |     
 69 |     @lazyproperty
 70 |     def create_enabled(self):
 71 |         """
 72 |         Returns whether the Create Route is Enabled
 73 |         """
 74 |         return True
 75 |     
 76 |     
 77 |     @lazyproperty
 78 |     def list_enabled(self):
 79 |         """
 80 |         Returns whether the List Route is Enabled
 81 |         """
 82 |         return False
 83 |     
 84 |     @lazyproperty
 85 |     def get_enabled(self):
 86 |         """
 87 |         Returns whether the Get Route is Enabled
 88 |         """
 89 |         return False
 90 | 
 91 |     def retrieve(
 92 |         self, 
 93 |         resource_id: str, 
 94 |         params: Optional[Dict[str, Any]] = None,
 95 |         **kwargs
 96 |     ) -> ModelResponse:
 97 |         """
 98 |         Retrieve a Single Model by Resource ID
 99 | 
100 |         :param resource_id: The ID of the Resource to GET
101 |         :param params: Optional Query Parameters
102 |         """
103 |         return super().retrieve(resource_id = resource_id, params = params, **kwargs)
104 |     
105 |     async def async_retrieve(
106 |         self,
107 |         resource_id: str,
108 |         params: Optional[Dict[str, Any]] = None,
109 |         **kwargs
110 |     ) -> ModelResponse:
111 |         """
112 |         Retrieve a Single Model by Resource ID
113 | 
114 |         :param resource_id: The ID of the Resource to GET
115 |         :param param
116 |         """
117 |         return await super().async_retrieve(resource_id = resource_id, params = params, **kwargs)
118 | 
119 | 
120 |     def list(
121 |         self, 
122 |         params: Optional[Dict[str, Any]] = None,
123 |         **kwargs
124 |     ) -> ModelResponse:
125 |         """
126 |         List all available Models
127 | 
128 |         :param params: Optional Query Parameters
129 |         """
130 |         return super().list(params = params, **kwargs)
131 |     
132 |     async def async_list(
133 |         self, 
134 |         params: Optional[Dict[str, Any]] = None,
135 |         **kwargs
136 |     ) -> ModelResponse:
137 |         """
138 |         List all available Models
139 | 
140 |         :param params: Optional Query Parameters
141 |         """
142 |         return await super().async_list(params = params, **kwargs)
143 | 
144 |     
145 | 
146 | 
147 | 
148 | 


--------------------------------------------------------------------------------
/async_openai/types/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from async_openai.types.errors import (
 4 |     OpenAIError,
 5 |     APIError,
 6 |     AuthenticationError,
 7 |     InvalidRequestError,
 8 |     RateLimitError,
 9 |     APIConnectionError,
10 |     Timeout,
11 |     TryAgain,
12 |     ServiceUnavailableError,
13 |     fatal_exception,
14 |     error_handler,
15 | )
16 | 
17 | from async_openai.types.options import (
18 |     ApiType,
19 |     CompletionModels,
20 |     FilePurpose,
21 |     FinetuneModels,
22 |     ImageSize,
23 |     ImageFormat,
24 | )
25 | 
26 | # from async_openai.types.base import (
27 | #     Usage,
28 | #     Permission,
29 | #     BaseResource,
30 | #     FileObject,
31 | #     EventObject,
32 | #     FileResource,
33 | #     BaseResponse,
34 | #     BaseRoute,
35 | 
36 | #     RESPONSE_SUCCESS_CODES
37 | # )
38 | 
39 | 


--------------------------------------------------------------------------------
/async_openai/types/context.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | """
  4 | OpenAI Cost Functions and Handler
  5 | """
  6 | import abc
  7 | import tiktoken
  8 | from pathlib import Path
  9 | from lazyops.types import BaseModel, validator, Field, lazyproperty
 10 | from lazyops.libs.proxyobj import ProxyObject
 11 | from typing import Optional, Union, Dict, Any, List, Tuple, Type, TYPE_CHECKING
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from .resources import Usage
 15 |     from async_openai.schemas.chat import ChatMessage
 16 |     from transformers import PreTrainedTokenizer
 17 |     from async_openai.utils.external_config import ExternalProviderSettings, ProviderModel
 18 | 
 19 | pricing_file_path = Path(__file__).parent.joinpath('pricing.yaml')
 20 | 
 21 | class ModelCosts(BaseModel):
 22 |     """
 23 |     Represents a model's costs
 24 |     """
 25 |     unit: Optional[int] = 1000
 26 |     input: Optional[float] = 0.0
 27 |     output: Optional[float] = 0.0
 28 |     total: Optional[float] = 0.0
 29 | 
 30 | 
 31 | class ModelCostItem(BaseModel):
 32 |     """
 33 |     Represents a model's Cost Item
 34 |     """
 35 |     name: str
 36 |     aliases: Optional[List[str]] = None
 37 |     context_length: Optional[int] = 0
 38 |     costs: Optional[ModelCosts] = Field(default_factory=ModelCosts)
 39 |     endpoints: Optional[List[str]] = None
 40 | 
 41 |     def get_costs(
 42 |         self, 
 43 |         input_tokens: Optional[int] = None,
 44 |         output_tokens: Optional[int] = None,
 45 |         total_tokens: Optional[int] = None,
 46 |         usage: Optional['Usage'] = None,
 47 |         **kwargs
 48 |     ) -> float:
 49 |         """
 50 |         Gets the costs
 51 |         """
 52 |         if usage is not None:
 53 |             input_tokens = usage.prompt_tokens
 54 |             output_tokens = usage.completion_tokens
 55 |         if kwargs.get('prompt_tokens'):
 56 |             input_tokens = kwargs['prompt_tokens']
 57 |         if kwargs.get('completion_tokens'):
 58 |             output_tokens = kwargs['completion_tokens']
 59 |         
 60 |         assert input_tokens is not None or output_tokens is not None or total_tokens is not None, "Must provide either input_tokens, output_tokens, or total_tokens"
 61 |         if self.costs is None: return 0.0
 62 |         cost = 0.0
 63 |         if self.costs.input:
 64 |             cost += self.costs.input * input_tokens / self.costs.unit
 65 |         if self.costs.output:
 66 |             cost += self.costs.output * output_tokens / self.costs.unit
 67 |         if self.costs.total and total_tokens is not None:
 68 |             cost += self.costs.total * total_tokens / self.costs.unit
 69 |         return cost
 70 |         
 71 | 
 72 | class ModelCostHandlerClass(abc.ABC):
 73 |     """
 74 |     The Model Cost Handler
 75 |     """
 76 |     def __init__(
 77 |         self,
 78 |         **kwargs,
 79 |     ):
 80 |         """
 81 |         Initializes the Model Cost Handler
 82 |         """
 83 |         self._models: Optional[Dict[str, ModelCostItem]] = None
 84 |         self._model_aliases: Optional[Dict[str, str]] = None
 85 |         self.tokenizers: Optional[Dict[str, tiktoken.Encoding]] = {}
 86 | 
 87 |         self.external_models: Optional[Dict[str, 'ProviderModel']] = {}
 88 |         self.external_model_aliases: Optional[Dict[str, str]] = {}
 89 |         self.external_tokenizers: Optional[Dict[str, 'PreTrainedTokenizer']] = {}
 90 | 
 91 |     @staticmethod
 92 |     def load_models() -> Dict[str, ModelCostItem]:
 93 |         """
 94 |         Loads the models
 95 |         """
 96 |         import yaml
 97 |         models: Dict[str, Dict[str, Any]] = yaml.safe_load(pricing_file_path.read_text())
 98 |         return {k: ModelCostItem(name = k, **v) for k, v in models.items()}
 99 | 
100 |     @property
101 |     def models(self) -> Dict[str, ModelCostItem]:
102 |         """
103 |         Gets the models
104 |         """
105 |         if self._models is None: self._models = self.load_models()
106 |         return self._models
107 |     
108 |     @property
109 |     def model_aliases(self) -> Dict[str, str]:
110 |         """
111 |         Gets the model aliases
112 |         """
113 |         if self._model_aliases is None:
114 |             self._model_aliases = {alias: model for model, item in self.models.items() for alias in item.aliases or []}
115 |         return self._model_aliases
116 | 
117 | 
118 |     def get_external_model(self, name: str) -> Optional['ProviderModel']:
119 |         """
120 |         Gets the model
121 |         """
122 |         if name not in self.external_model_aliases and name not in self.external_models:
123 |             raise KeyError(f"Model {name} not found: {self.external_model_aliases} / {list(self.external_models.keys())}")
124 |             # print(f"Model {name} not found: {self.external_model_aliases} / {self.external_models}")
125 |             # return None
126 |         if name in self.external_model_aliases:
127 |             name = self.external_model_aliases[name]
128 |         return self.external_models[name]
129 |     
130 |     def resolve_external_model_name(self, model_name: str) -> str:
131 |         """
132 |         Resolves the Model Name from the model aliases
133 |         """
134 |         # Try to remove the provider name
135 |         model = self.get_external_model(model_name)
136 |         return model.name
137 | 
138 |     def resolve_model_name(self, model_name: str) -> str:
139 |         """
140 |         Resolves the Model Name from the model aliases
141 |         """
142 |         # Try to remove the version number
143 |         if model_name in self.models:
144 |             return model_name
145 |         
146 |         # if model_name in self.model_aliases:
147 |         return self.model_aliases.get(model_name, model_name)
148 |         
149 |         # key = model_name.rsplit('-', 1)[0].strip()
150 |         # if key in self.model_aliases:
151 |         #     return self.model_aliases[key]
152 |         # elif key in self.models:
153 |         #     self.model_aliases[model_name] = key
154 |         #     return key
155 |         # raise KeyError(f"Model {key}/{model_name} not found in {self.model_aliases} / {list(self.models.keys())}")
156 |     
157 |     def __getitem__(self, key: str) -> ModelCostItem:
158 |         """
159 |         Gets a model by name
160 |         """
161 |         if '/' in key or key in self.external_model_aliases: return self.get_external_model(key)
162 |         if key not in self.model_aliases and key not in self.models:
163 |             return self.models[self.resolve_model_name(key)]
164 |         if key in self.model_aliases:
165 |             key = self.model_aliases[key]
166 |         return self.models[key]
167 |     
168 |     def get(self, key: str, default: Optional[str] = None) -> Optional[ModelCostItem]:
169 |         """
170 |         Gets a model by name
171 |         """
172 |         try:
173 |             return self[key]
174 |         except KeyError:
175 |             if default is None:
176 |                 raise KeyError(f"Model {key} not found") from None
177 |             return self[default]
178 |         
179 |     def add_provider(self, provider: 'ExternalProviderSettings'):
180 |         """
181 |         Adds a provider to the handler
182 |         """
183 |         for model in provider.models:
184 |             model_name = f'{provider.name}/{model.name}'
185 |             self.external_models[model_name] = model
186 |             if model.name not in self.external_model_aliases:
187 |                 self.external_model_aliases[model.name] = model_name
188 |             for alias in model.aliases or []:
189 |                 model_alias = f'{provider.name}/{alias}'
190 |                 self.external_model_aliases[model_alias] = model_name
191 |                 if alias not in self.external_model_aliases:
192 |                     self.external_model_aliases[alias] = model_name
193 |         
194 |     def add_model(self, model: str, source_model: str):
195 |         """
196 |         Add a model to the handler
197 |         
198 |         Args:
199 |             model (str): The model name
200 |             source_model (str): The source model name
201 |         """
202 |         if model in self.model_aliases or model in self.models:
203 |             return
204 |         
205 |         src_model = self[source_model]
206 |         # Add to the model aliases
207 |         self.model_aliases[model] = src_model.name
208 | 
209 |     def get_external_tokenizer(self, name: str) -> Optional['PreTrainedTokenizer']:
210 |         """
211 |         Gets the tokenizer
212 |         """
213 |         # Remove the provider name
214 |         model = self.get_external_model(name)
215 |         tokenizer_name = model.tokenizer or model.name
216 |         if tokenizer_name not in self.external_tokenizers:
217 |             try:
218 |                 from transformers.models.auto.tokenization_auto import AutoTokenizer
219 |             except ImportError as e:
220 |                 raise ImportError("transformers is not installed, please install it to use this feature") from e            
221 |             tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
222 |             self.external_tokenizers[tokenizer_name] = tokenizer
223 |         return self.external_tokenizers[tokenizer_name]
224 | 
225 |     def get_tokenizer(self, name: str) -> Optional[Union[tiktoken.Encoding, 'PreTrainedTokenizer']]:
226 |         """
227 |         Gets the tokenizer
228 |         """
229 |         # Switch the 35 -> 3.5
230 |         # OpenAI Models don't have / in the name
231 |         if '/' in name or name in self.external_model_aliases \
232 |             or name in self.external_models: return self.get_external_tokenizer(name)
233 |         
234 |         if '35' in name: name = name.replace('35', '3.5')    
235 |         if name not in self.tokenizers:
236 |             if name in {'text-embedding-3-small', 'text-embedding-3-large'}:
237 |                 enc_name = 'cl100k_base'
238 |                 self.tokenizers[name] = tiktoken.get_encoding(enc_name)
239 |             else:
240 |                 self.tokenizers[name] = tiktoken.encoding_for_model(name)
241 |         return self.tokenizers[name]
242 |     
243 |     def count_chat_tokens(
244 |         self, 
245 |         messages: List[Union[Dict[str, str], 'ChatMessage']],
246 |         model_name: str,
247 |         reply_padding_token_count: Optional[int] = 3,
248 |         message_padding_token_count: Optional[int] = 4,
249 |         **kwargs
250 |     ) -> int:
251 |         """
252 |         Returns the number of tokens in the chat.
253 |         """
254 |         num_tokens = 0
255 |         tokenizer = self.get_tokenizer(model_name)
256 |         for message in messages:
257 |             if message.get('name'):
258 |                 num_tokens -= 1
259 |             num_tokens += message_padding_token_count + len(tokenizer.encode(message.get('content', '')))
260 |         num_tokens += reply_padding_token_count  # every reply is primed with <|start|>assistant<|message|>
261 |         return num_tokens
262 | 
263 |     def count_tokens(
264 |         self,
265 |         text: Union[str, List[str]],
266 |         model_name: str,
267 |         **kwargs
268 |     ) -> int:
269 |         """
270 |         Returns the number of tokens in the text.
271 |         """
272 |         tokenizer = self.get_tokenizer(model_name)
273 |         return (
274 |             sum(len(tokenizer.encode(t)) for t in text)
275 |             if isinstance(text, list)
276 |             else len(tokenizer.encode(text))
277 |         )
278 |     
279 |     def get_consumption_cost(self, model_name: str, usage: 'Usage', **kwargs) -> float:
280 |         """
281 |         Gets the consumption cost
282 |         """
283 |         # Switch the 35 -> 3.5
284 |         if '35' in model_name: model_name = model_name.replace('35', '3.5')
285 |         model = self[model_name]
286 |         if isinstance(usage, dict):
287 |             from .resources import Usage
288 |             usage = Usage(**usage)
289 |         return model.get_costs(usage = usage, **kwargs)
290 |     
291 |     
292 |     def truncate_to_max_length(self, text: str, model_name: str, context_length: Optional[int] = None, **kwargs) -> str:
293 |         """
294 |         Truncates the text to the max length
295 |         """
296 |         tokenizer = self.get_tokenizer(model_name)
297 |         if context_length is None:
298 |             context_length = self[model_name].context_length
299 |         tokens = tokenizer.encode(text)
300 |         if len(tokens) > context_length:
301 |             tokens = tokens[-context_length:]
302 |             decoded = tokenizer.decode(tokens)
303 |             text = text[-len(decoded):]
304 |         return text
305 | 
306 | 
307 | 
308 | 
309 | class ModelContextHandlerMetaClass(type):
310 |     """
311 |     The Model Cost Handler
312 |     """
313 | 
314 |     _models: Optional[Dict[str, ModelCostItem]] = None
315 |     _model_aliases: Optional[Dict[str, str]] = None
316 |     tokenizers: Optional[Dict[str, tiktoken.Encoding]] = {}
317 | 
318 |     def load_models(cls) -> Dict[str, ModelCostItem]:
319 |         """
320 |         Loads the models
321 |         """
322 |         import yaml
323 |         models: Dict[str, Dict[str, Any]] = yaml.safe_load(pricing_file_path.read_text())
324 |         return {k: ModelCostItem(name = k, **v) for k, v in models.items()}
325 | 
326 |     @property
327 |     def models(cls) -> Dict[str, ModelCostItem]:
328 |         """
329 |         Gets the models
330 |         """
331 |         if cls._models is None:
332 |             cls._models = cls.load_models()
333 |         return cls._models
334 | 
335 |     @property
336 |     def model_aliases(cls) -> Dict[str, str]:
337 |         """
338 |         Gets the model aliases
339 |         """
340 |         if cls._model_aliases is None:
341 |             cls._model_aliases = {alias: model for model, item in cls.models.items() for alias in item.aliases or []}
342 |         return cls._model_aliases
343 |     
344 |     def resolve_model_name(cls, model_name: str) -> str:
345 |         """
346 |         Resolves the Model Name from the model aliases
347 |         """
348 |         # Try to remove the version number
349 |         key = model_name.rsplit('-', 1)[0].strip()
350 |         if key in cls.model_aliases:
351 |             cls.model_aliases[model_name] = cls.model_aliases[key]
352 |         if key in cls.models:
353 |             cls.model_aliases[model_name] = key
354 |             return key
355 |         raise KeyError(f"Model {model_name} not found")
356 |     
357 |     def __getitem__(cls, key: str) -> ModelCostItem:
358 |         """
359 |         Gets a model by name
360 |         """
361 |         if key not in cls.model_aliases and key not in cls.models:
362 |             return cls.resolve_model_name(key)
363 |         if key in cls.model_aliases:
364 |             key = cls.model_aliases[key]
365 |         return cls.models[key]
366 |     
367 |     def get(cls, key: str, default: Optional[str] = None) -> Optional[ModelCostItem]:
368 |         """
369 |         Gets a model by name
370 |         """
371 |         try:
372 |             return cls[key]
373 |         except KeyError:
374 |             if default is None:
375 |                 raise KeyError(f"Model {key} not found") from None
376 |             return cls[default]
377 |         
378 |     def add_model(cls, model: str, source_model: str):
379 |         """
380 |         Add a model to the handler
381 |         
382 |         Args:
383 |             model (str): The model name
384 |             source_model (str): The source model name
385 |         """
386 |         if model in cls.model_aliases or model in cls.models:
387 |             return
388 |         
389 |         src_model = cls[source_model]
390 |         # Add to the model aliases
391 |         cls.model_aliases[model] = src_model.name
392 | 
393 | 
394 |     def get_tokenizer(cls, name: str) -> Optional[tiktoken.Encoding]:
395 |         """
396 |         Gets the tokenizer
397 |         """
398 |         # Switch the 35 -> 3.5
399 |         if '35' in name: name = name.replace('35', '3.5')    
400 |         if name not in cls.tokenizers:
401 |             if name in {'text-embedding-3-small', 'text-embedding-3-large'}:
402 |                 enc_name = 'cl100k_base'
403 |                 cls.tokenizers[name] = tiktoken.get_encoding(enc_name)
404 |             else:
405 |                 cls.tokenizers[name] = tiktoken.encoding_for_model(name)
406 |         return cls.tokenizers[name]
407 |     
408 |     def count_chat_tokens(
409 |         cls, 
410 |         messages: List[Union[Dict[str, str], 'ChatMessage']],
411 |         model_name: str,
412 |         reply_padding_token_count: Optional[int] = 3,
413 |         message_padding_token_count: Optional[int] = 4,
414 |         **kwargs
415 |     ) -> int:
416 |         """
417 |         Returns the number of tokens in the chat.
418 |         """
419 |         num_tokens = 0
420 |         tokenizer = cls.get_tokenizer(model_name)
421 |         for message in messages:
422 |             if message.get('name'):
423 |                 num_tokens -= 1
424 |             num_tokens += message_padding_token_count + len(tokenizer.encode(message.get('content', '')))
425 |         num_tokens += reply_padding_token_count  # every reply is primed with <|start|>assistant<|message|>
426 |         return num_tokens
427 | 
428 |     def count_tokens(
429 |         cls,
430 |         text: Union[str, List[str]],
431 |         model_name: str,
432 |         **kwargs
433 |     ) -> int:
434 |         """
435 |         Returns the number of tokens in the text.
436 |         """
437 |         tokenizer = cls.get_tokenizer(model_name)
438 |         return (
439 |             sum(len(tokenizer.encode(t)) for t in text)
440 |             if isinstance(text, list)
441 |             else len(tokenizer.encode(text))
442 |         )
443 |     
444 |     def get_consumption_cost(cls, model_name: str, usage: 'Usage', **kwargs) -> float:
445 |         """
446 |         Gets the consumption cost
447 |         """
448 |         # Switch the 35 -> 3.5
449 |         if '35' in model_name: model_name = model_name.replace('35', '3.5')
450 |         model = cls[model_name]
451 |         if isinstance(usage, dict):
452 |             from .resources import Usage
453 |             usage = Usage(**usage)
454 |         return model.get_costs(usage = usage, **kwargs)
455 |     
456 |     def resolve_model_name(cls, model_name: str) -> str:
457 |         """
458 |         Resolves the Model Name from the model aliases
459 |         """
460 |         return cls.model_aliases.get(model_name, model_name)
461 |     
462 |     def truncate_to_max_length(cls, text: str, model_name: str, context_length: Optional[int] = None, **kwargs) -> str:
463 |         """
464 |         Truncates the text to the max length
465 |         """
466 |         tokenizer = cls.get_tokenizer(model_name)
467 |         if context_length is None:
468 |             context_length = cls[model_name].context_length
469 | 
470 |         tokens = tokenizer.encode(text)
471 |         if len(tokens) > context_length:
472 |             tokens = tokens[-context_length:]
473 |             decoded = tokenizer.decode(tokens)
474 |             text = text[-len(decoded):]
475 |         
476 |         return text
477 | 
478 | 
479 | 
480 | class ModelContextHandlerV1(metaclass = ModelContextHandlerMetaClass):
481 |     """
482 |     The Model Cost Handler
483 |     """
484 |     pass
485 | 
486 | 
487 | ModelContextHandler: ModelCostHandlerClass = ProxyObject(ModelCostHandlerClass)


--------------------------------------------------------------------------------
/async_openai/types/errors.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import json
  3 | import httpx
  4 | import aiohttpx
  5 | import contextlib
  6 | from typing import Any, Optional, Union, Dict
  7 | from lazyops.types import BaseModel, lazyproperty
  8 | 
  9 | __all__ = [
 10 |     "OpenAIError",
 11 |     "ExceptionModel",
 12 |     "fatal_exception",
 13 |     "APIError",
 14 |     "TryAgain",
 15 |     "APIConnectionError",
 16 |     "Timeout",
 17 |     "InvalidRequestError",
 18 |     "AuthenticationError",
 19 |     "PermissionError",
 20 |     "RateLimitError",
 21 |     "ServiceUnavailableError",
 22 |     "InvalidAPIType",
 23 |     "error_handler",
 24 | ]
 25 | 
 26 | class ExceptionModel(BaseModel):
 27 |     response: aiohttpx.Response
 28 |     data: Optional[Union[Dict, Any]]
 29 |     message: Optional[str] = None
 30 |     should_retry: Optional[bool] = False
 31 | 
 32 |     @lazyproperty
 33 |     def headers(self):
 34 |         """
 35 |         Returns the response headers.
 36 |         """
 37 |         return self.response.headers
 38 |     
 39 |     @lazyproperty
 40 |     def stream(self) -> bool:
 41 |         """
 42 |         Returns True if the response is a streaming response.
 43 |         """
 44 |         return "text/event-stream" in self.headers.get("content-type", "")
 45 | 
 46 |     @lazyproperty
 47 |     def response_data(self):
 48 |         return self.data or self.response.json()
 49 |     
 50 |     @lazyproperty
 51 |     def http_body(self):
 52 |         body = self.data if self.stream else self.response.content
 53 |         try:
 54 |             if hasattr(body, "decode"):
 55 |                 body = body.decode("utf-8")
 56 |             return body
 57 |         except (json.JSONDecodeError, UnicodeDecodeError) as e:
 58 |             raise ValueError(
 59 |                 f"HTTP code {self.status_code} from API ({body})"
 60 |             ) from e
 61 |     
 62 |     @lazyproperty
 63 |     def response_json(self):
 64 |         try:
 65 |             return json.loads(self.http_body)
 66 |         except json.JSONDecodeError:
 67 |             return {}
 68 | 
 69 |     @lazyproperty
 70 |     def response_text(self):
 71 |         return self.response.text
 72 |     
 73 |     @lazyproperty
 74 |     def status_code(self):
 75 |         return self.response.status_code
 76 |     
 77 |     @lazyproperty
 78 |     def error_data(self) -> Dict:
 79 |         return self.response_json.get("error", {})
 80 |     
 81 |     @lazyproperty
 82 |     def request_id(self) -> str:
 83 |         return self.headers.get("request-id", None)
 84 |     
 85 |     @lazyproperty
 86 |     def organization(self) -> str:
 87 |         return self.headers.get("openai-organization", None)
 88 |     
 89 |     @lazyproperty
 90 |     def error_message(self) -> str:
 91 |         msg: str = self.message or ("(Error occurred while streaming.)" if self.stream else "")
 92 |         if self.error_data.get("message"):
 93 |             msg += " " + self.error_data.get("message")
 94 |         if self.error_data.get("internal_message"):
 95 |             msg += "\n\n" + self.error_data["internal_message"]
 96 |         return msg.strip() or self.response_text
 97 | 
 98 | 
 99 | class OpenAIError(Exception):
100 |     def __init__(
101 |         self,
102 |         response: aiohttpx.Response,
103 |         data: Optional[Union[Dict, Any]],
104 |         message: Optional[str] = None,
105 |         should_retry: Optional[bool] = False,
106 |         **kwargs
107 |     ):
108 |         self.status = response.status_code
109 |         self.response = response
110 |         self.message = message
111 |         self.exc = ExceptionModel(
112 |             response=response,
113 |             message=message,
114 |             data=data,
115 |             should_retry=should_retry,
116 |             **kwargs
117 |         )
118 |         self.post_init(**kwargs)
119 |     
120 |     def post_init(self, **kwargs):
121 |         pass
122 |     
123 |     def __str__(self):
124 |         msg = self.exc.error_message or "<empty message>"
125 |         if self.exc.request_id is not None:
126 |             return f"Request {self.exc.request_id}: {msg}"
127 |         else:
128 |             return msg
129 |         
130 |     @property
131 |     def user_message(self):
132 |         return self.exc.error_message
133 |     
134 |     def __repr__(self):
135 |         return f"[OpenAI] {self.__class__.__name__} \
136 |             (message={self.exc.error_message}, \
137 |                 http_status={self.exc.status_code}, \
138 |                     request_id={self.exc.request_id})"
139 | 
140 | 
141 | 
142 | class MaxRetriesExhausted(Exception):
143 |     """
144 |     Max Retries Exhausted
145 |     """
146 | 
147 |     def __init__(self, name: str, func_name: str, model: str, attempts: int, max_attempts: int):
148 |         self.name = name
149 |         self.func_name = func_name
150 |         self.model = model
151 |         self.attempts = attempts
152 |         self.max_attempts = max_attempts
153 |     
154 |     def __str__(self):
155 |         return f"[{self.name} - {self.model}] All retries exhausted for {self.func_name}. ({self.attempts}/{self.max_attempts})"
156 |         
157 |     def __repr__(self):
158 |         """
159 |         Returns the string representation of the error.
160 |         """
161 |         return f"[{self.name} - {self.model}] (func_name={self.func_name}, attempts={self.attempts}, max_attempts={self.max_attempts})"
162 |         
163 | 
164 | class APIError(OpenAIError):
165 |     pass
166 | 
167 | 
168 | class TryAgain(OpenAIError):
169 |     pass
170 | 
171 | 
172 | class Timeout(OpenAIError):
173 |     pass
174 | 
175 | 
176 | class APIConnectionError(OpenAIError):
177 |     pass
178 | 
179 | 
180 | class InvalidRequestError(OpenAIError):
181 |     pass
182 | 
183 | 
184 | class AuthenticationError(OpenAIError):
185 |     pass
186 | 
187 | 
188 | class PermissionError(OpenAIError):
189 |     pass
190 | 
191 | 
192 | class RateLimitError(OpenAIError):
193 |     
194 |     def post_init(self, **kwargs):
195 |         """
196 |         Gets the rate limit reset time
197 |         """
198 |         self.retry_after_seconds: Optional[float] = None
199 |         with contextlib.suppress(Exception):
200 |             self.retry_after_seconds = float(self.exc.error_message.split("Please retry after", 1)[1].split("second", 1)[0].strip())
201 | 
202 | class ServiceTimeoutError(OpenAIError):
203 |     pass
204 | 
205 | 
206 | class ServiceUnavailableError(OpenAIError):
207 |     pass
208 | 
209 | 
210 | class InvalidAPIType(OpenAIError):
211 |     pass
212 | 
213 | 
214 | class InvalidMaxTokens(InvalidRequestError):
215 |     pass
216 | 
217 |     def post_init(self, **kwargs):
218 |         """
219 |         Gets the maximum context length and requested max tokens
220 |         """
221 |         self.maximum_context_length: Optional[int] = None
222 |         self.requested_max_tokens: Optional[int] = None
223 |         with contextlib.suppress(Exception):
224 |             self.maximum_context_length = int(self.exc.error_message.split("maximum context length is", 1)[1].split(" ", 1)[0].strip())
225 |             self.requested_max_tokens = int(self.exc.error_message.split("requested", 1)[1].split(" ", 1)[0].strip())
226 | 
227 | 
228 | def fatal_exception(exc) -> bool:
229 |     """
230 |     Checks if the exception is fatal.
231 |     """
232 |     print(f"Checking if exception is fatal: {exc} ({type(exc)} = ({type(exc) == aiohttpx.ReadTimeout} is readtimeout)) ")
233 | 
234 |     if isinstance(exc, aiohttpx.ReadTimeout) or type(exc) == aiohttpx.ReadTimeout:
235 |         return True
236 |     
237 |     if not isinstance(exc, OpenAIError):
238 |         # retry on all other errors (eg. network)
239 |         return False
240 |     
241 |     # retry on server errors and client errors
242 |     # with 429 status code (rate limited),
243 |     # with 400, 404, 415 status codes (invalid request),
244 |     # 400 can include invalid parameters, such as invalid `max_tokens`
245 |     # don't retry on other client errors
246 |     if isinstance(exc, (InvalidMaxTokens, InvalidRequestError, MaxRetriesExhausted)):
247 |         return True
248 |     
249 |     return (400 <= exc.status < 500) and exc.status not in [429, 400, 404, 415, 524] # [429, 400, 404, 415]
250 | 
251 | 
252 | def error_handler(
253 |     response: aiohttpx.Response,
254 |     data: Optional[Any] = None, # Line specific for streaming responses
255 |     should_retry: Optional[bool] = False,
256 |     **kwargs
257 | ):
258 | 
259 |     if response.status_code == 503:
260 |         raise ServiceUnavailableError(
261 |             response = response,
262 |             message = "The server is overloaded or not ready yet.",
263 |             data = data,
264 |             should_retry = should_retry,
265 |             **kwargs
266 |         )
267 |     if response.status_code == 429:
268 |         raise RateLimitError(
269 |             response = response,
270 |             data = data,
271 |             should_retry = should_retry,
272 |             **kwargs
273 |         )
274 |     if response.status_code in [400, 404, 415]:
275 |         if 'maximum context length' in response.text:
276 |             return InvalidMaxTokens(
277 |                 response = response,
278 |                 data = data,
279 |                 should_retry = False,
280 |                 **kwargs
281 |             )
282 |         return InvalidRequestError(
283 |             response = response,
284 |             data = data,
285 |             should_retry = should_retry,
286 |             **kwargs
287 |         )
288 |     if response.status_code == 401:
289 |         return AuthenticationError(
290 |             response = response,
291 |             data = data,
292 |             should_retry = should_retry,
293 |             **kwargs
294 |         )
295 |     if response.status_code == 403:
296 |         return PermissionError(
297 |             response = response,
298 |             data = data,
299 |             should_retry = should_retry,
300 |             **kwargs
301 |         )
302 |     if response.status_code == 409:
303 |         return TryAgain(
304 |             response = response,
305 |             data = data,
306 |             should_retry = should_retry,
307 |             **kwargs
308 |         )
309 |     
310 |     # Service is likely down.
311 |     if response.status_code == 524:
312 |         raise ServiceTimeoutError(
313 |             response = response,
314 |             data = data,
315 |             should_retry = False,
316 |             **kwargs
317 |         )
318 |     
319 |     raise APIError(
320 |         response = response,
321 |         data = data,
322 |         should_retry = should_retry,
323 |         **kwargs
324 |     )
325 | 
326 | 
327 | 
328 | class MaxRetriesExceeded(Exception):
329 |     def __init__(
330 |         self,
331 |         attempts: int,
332 |         base_exception: OpenAIError,
333 |         name: Optional[str] = None,
334 |     ):
335 |         self.name = name
336 |         self.attempts = attempts
337 |         self.ex = base_exception
338 |     
339 |     def __str__(self):
340 |         return f"[{self.name}] Max {self.attempts} retries exceeded: {str(self.ex)}"
341 |         
342 |         
343 |     @property
344 |     def user_message(self):
345 |         """
346 |         Returns the error message.
347 |         """
348 |         return f"[{self.name}] Max {self.attempts} retries exceeded: {self.ex.user_message}"
349 |     
350 |     def __repr__(self):
351 |         """
352 |         Returns the string representation of the error.
353 |         """
354 |         return f"[{self.name}] {repr(self.ex)} (attempts={self.attempts})"
355 |         


--------------------------------------------------------------------------------
/async_openai/types/options.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | from typing import Optional, Union
  3 | from lazyops.types import BaseModel, lazyproperty
  4 | 
  5 | """
  6 | Pricing Options
  7 | 
  8 | # https://openai.com/api/pricing/
  9 | """
 10 | 
 11 | _image_prices = {
 12 |     'small': 0.016,
 13 |     'medium': 0.018,
 14 |     'large': 0.02,
 15 | }
 16 | 
 17 | # price per 1k/tokens
 18 | _completion_prices = {
 19 |     'ada': 0.0004,
 20 |     'babbage': 0.0005,
 21 |     'curie': 0.002,
 22 |     'davinci': 0.02,
 23 | }
 24 | 
 25 | _finetune_training_prices = {
 26 |     'ada': 0.0004,
 27 |     'babbage': 0.0006,
 28 |     'curie': 0.003,
 29 |     'davinci': 0.03,
 30 | }
 31 | 
 32 | _finetune_usage_prices = {
 33 |     'ada': 0.0016,
 34 |     'babbage': 0.0024,
 35 |     'curie': 0.012,
 36 |     'davinci': 0.12,
 37 | }
 38 | 
 39 | _embedding_prices = {
 40 |     # 'ada': 0.004,
 41 |     'ada': 0.0001,
 42 |     'babbage': 0.005,
 43 |     'curie': 0.02,
 44 |     'davinci': 0.2,
 45 | }
 46 | 
 47 | _chat_prices = {
 48 |     'gpt-3.5-turbo': 0.002,
 49 | 
 50 | }
 51 | 
 52 | _chat_gpt_prices = {
 53 |     'gpt-4-32k': {
 54 |         'prompt': 0.06,
 55 |         'completion': 0.12,
 56 |     },
 57 |     'gpt-3.5-turbo-16k': {
 58 |         'prompt': 0.003,
 59 |         'completion': 0.004,
 60 |     },
 61 |     'gpt-35-turbo-16k': {
 62 |         'prompt': 0.003,
 63 |         'completion': 0.004,
 64 |     },
 65 |     'gpt-3-turbo-16k': {
 66 |         'prompt': 0.003,
 67 |         'completion': 0.004,
 68 |     },
 69 |     'gpt-4': {
 70 |         'prompt': 0.03,
 71 |         'completion': 0.06,
 72 |     },
 73 |     'gpt-3.5-turbo': {
 74 |         'prompt': 0.0015,
 75 |         'completion': 0.002,
 76 |     },
 77 |     'gpt-35-turbo': {
 78 |         'prompt': 0.0015,
 79 |         'completion': 0.002,
 80 |     },
 81 |     'gpt-3-turbo': {
 82 |         'prompt': 0.0015,
 83 |         'completion': 0.002,
 84 |     },
 85 |     'gpt-3.5-turbo-instruct': {
 86 |         'prompt': 0.0015,
 87 |         'completion': 0.002,
 88 |     },
 89 |     'gpt-35-turbo-instruct': {
 90 |         'prompt': 0.0015,
 91 |         'completion': 0.002,
 92 |     },
 93 |     'gpt-3-turbo-instruct': {
 94 |         'prompt': 0.0015,
 95 |         'completion': 0.002,
 96 |     },
 97 | 
 98 | }
 99 | 
100 | # TODO rework this module
101 | 
102 | _cost_modes = {
103 |     'embedding': _embedding_prices,
104 |     'train': _finetune_training_prices,
105 |     'finetune': _finetune_usage_prices,
106 |     'completion': _completion_prices,
107 | }
108 | 
109 | def get_arch(
110 |     model_name: str,
111 | ) -> str:
112 |     """
113 |     Get the arch
114 |     """
115 |     for arch in {
116 |         'babbage',
117 |         'curie',
118 |         'davinci',
119 |         'ada',
120 |     }:
121 |         if arch in model_name:
122 |             return arch
123 | 
124 | def get_consumption_cost(
125 |     model_name: str,
126 |     total_tokens: int = 1,
127 |     default_token_cost: Optional[float] = 0.00001,
128 |     prompt_tokens: Optional[int] = None,
129 |     completion_tokens: Optional[int] = None,
130 |     mode: Optional[str] = None,
131 | ) -> float:
132 |     """
133 |     Returns the total cost of the model
134 |     usage
135 |     """
136 |     if prompt_tokens and completion_tokens:
137 |         total_tokens = prompt_tokens + completion_tokens
138 |     if (not mode or mode == 'chat') and any(
139 |         arch in model_name for arch in {
140 |             'gpt-3.5',
141 |             'gpt-35',
142 |             'gpt-3',
143 |             'gpt-4',
144 |         }):
145 |             return next(
146 |                 (
147 |                     (
148 |                         prompt_tokens * (_chat_gpt_prices[gpt_model]['prompt'] / 1000)
149 |                     )
150 |                     + (
151 |                         completion_tokens * (_chat_gpt_prices[gpt_model]['completion'] / 1000)
152 |                     )
153 |                     if prompt_tokens and completion_tokens
154 |                     else total_tokens
155 |                     * (
156 |                         (
157 |                             (_chat_gpt_prices[gpt_model]['prompt'] + _chat_gpt_prices[gpt_model]['completion']) / 2
158 |                         )
159 |                         / 1000
160 |                     )
161 |                     for gpt_model in _chat_gpt_prices
162 |                     if gpt_model in model_name
163 |                 ),
164 |                 total_tokens * (_chat_prices['gpt-3.5-turbo'] / 1000),
165 |             )
166 | 
167 |     arch = get_arch(model_name)
168 |     return  total_tokens * (_cost_modes[mode][arch] / 1000) if mode in _cost_modes else total_tokens * default_token_cost
169 | 
170 | 
171 | 
172 | class ApiType(str, Enum):
173 |     azure = "azure"
174 |     openai = "openai"
175 |     open_ai = "openai"
176 |     azure_ad = "azure_ad"
177 |     azuread = "azure_ad"
178 | 
179 |     def get_version(
180 |         self, 
181 |         version: Optional[str] = None
182 |     ):
183 |         if self.value in {"azure", "azure_ad", "azuread"} and not version:
184 |             return "2023-07-01-preview"
185 |         return version
186 | 
187 | 
188 | class FilePurpose(str, Enum):
189 |     """
190 |     File Purpose
191 |     """
192 | 
193 |     finetune = "fine-tune"
194 |     fine_tune = "fine-tune"
195 |     train = "fine-tune-train"
196 |     search = "search"
197 |     batch = "batch"
198 | 
199 |     @classmethod
200 |     def parse_str(cls, value: Union[str, 'FilePurpose'], raise_error: bool = True):
201 |         if isinstance(value, cls): return value
202 |         if "train" in value:
203 |             return cls.train
204 |         elif "finetune" in value:
205 |             return cls.finetune
206 |         elif "fine-tune" in value:
207 |             return cls.fine_tune
208 |         elif "search" in value:
209 |             return cls.search
210 |         elif "batch" in value:
211 |             return cls.batch
212 |         if not raise_error: return None
213 |         raise ValueError(f"Cannot convert {value} to FilePurpose")
214 | 
215 | 
216 | class OpenAIModelType(str, Enum):
217 |     """
218 |     OpenAI Model Types
219 |     """
220 |     text = "text"
221 |     audio = "audio"
222 |     code = "code"
223 |     chat = "chat"
224 |     custom = "custom"
225 | 
226 |     @classmethod
227 |     def parse(cls, value: Union[str, 'OpenAIModelType'], raise_error: bool = True):
228 |         if isinstance(value, cls): return value
229 |         if "text" in value:
230 |             return cls.text
231 |         elif "audio" in value:
232 |             return cls.audio
233 |         elif "code" in value:
234 |             return cls.code
235 |         elif "gpt-3.5" in value or "gpt-4" in value or "chat" in value:
236 |             return cls.chat
237 |         return cls.custom
238 | 
239 | 
240 | class OpenAIModelArch(str, Enum):
241 |     """
242 |     OpenAI Model Architectures
243 |     """
244 | 
245 |     davinci = "davinci"
246 |     curie = "curie"
247 |     babbage = "babbage"
248 |     ada = "ada"
249 |     chat = "gpt-3.5"
250 |     chat_gpt4 = "gpt-4"
251 |     custom = "custom"
252 | 
253 |     @classmethod
254 |     def parse(cls, value: Union[str, 'OpenAIModelArch'], raise_error: bool = True):
255 |         if isinstance(value, cls): return value
256 |         if "davinci" in value:
257 |             return cls.davinci
258 |         elif "curie" in value:
259 |             return cls.curie
260 |         elif "babbage" in value:
261 |             return cls.babbage
262 |         elif "ada" in value:
263 |             return cls.ada
264 |         elif "gpt-4" in value:
265 |             return cls.chat_gpt4
266 |         elif "gpt-3.5" in value or "gpt-3" in value or "chat" in value:
267 |             return cls.chat
268 |         return cls.custom
269 | 
270 | 
271 |     @lazyproperty
272 |     def model_version(self):
273 |         return "003" if self.value == "davinci" else "001"
274 | 
275 |     @lazyproperty
276 |     def edit_model(self):
277 |         return f"text-{self.value}-edit-{self.model_version}"
278 |     
279 |     @lazyproperty
280 |     def completion_model(self):
281 |         return f"text-{self.value}-{self.model_version}"
282 |     
283 |     @lazyproperty
284 |     def embedding_model(self):
285 |         return f"text-similarity-{self.value}-{self.model_version}"
286 |     
287 |     @lazyproperty
288 |     def chat_model(self):
289 |         return 'gpt-3.5-turbo' if self.value == 'gpt-3.5' else self.value
290 |     
291 |     @lazyproperty
292 |     def finetune_model(self):
293 |         return self.value
294 |     
295 | 
296 | class ModelMode(str, Enum):
297 |     """
298 |     Model Mode
299 |     """
300 | 
301 |     completion = "completion"
302 |     edit = "edit"
303 |     finetune = "finetune"
304 |     fine_tune = "finetune"
305 |     train = "train"
306 |     embedding = "embedding"
307 |     similiarity = "similiarity"
308 |     search = "search"
309 |     chat = "chat"
310 | 
311 |     @classmethod
312 |     def parse(cls, value: Union[str, 'ModelMode'], raise_error: bool = True):
313 |         if isinstance(value, cls): return value
314 |         if "completion" in value:
315 |             return cls.completion
316 |         if "edit" in value:
317 |             return cls.edit
318 |         if "finetune" in value:
319 |             return cls.finetune
320 |         if "fine-tune" in value:
321 |             return cls.fine_tune
322 |         if "train" in value:
323 |             return cls.train
324 |         if "embedding" in value:
325 |             return cls.embedding
326 |         if "search" in value:
327 |             return cls.search
328 |         if "similiarity" in value:
329 |             return cls.similiarity
330 |         if "gpt-3.5" in value or "gpt-35" in value or 'gpt-4' in value or "chat" in value:
331 |             return cls.chat
332 |         if "text" in value:
333 |             return cls.completion
334 |         if not raise_error: return None
335 |         raise ValueError(f"Cannot convert {value} to ModelMode")
336 |     
337 |     @classmethod
338 |     def get_text_modes(cls):
339 |         return [
340 |             cls.completion, 
341 |             cls.edit,
342 |             cls.embedding,
343 |             cls.similiarity, 
344 |             cls.search, 
345 |             # cls.chat
346 |         ]
347 | 
348 | class OpenAIModel(object):
349 | 
350 |     def __init__(
351 |         self, 
352 |         value: str, 
353 |         **kwargs
354 |     ):
355 |         self.src_value = value
356 |         self.src_splits = value.split("-")
357 |         self.mode: ModelMode = kwargs.get("mode")
358 |         self.model_arch: OpenAIModelArch = kwargs.get("model_arch")
359 |         self.model_type: OpenAIModelType = kwargs.get("model_type")
360 |         self.version: str = kwargs.get("version")
361 |         self.parse_values()
362 | 
363 |     def parse_values(self):
364 |         """
365 |         Parse the source values into the correct parts
366 |         """
367 |         self.mode = ModelMode.parse((self.mode or self.src_value), raise_error = False) or ModelMode.completion
368 |         self.model_arch = OpenAIModelArch.parse((self.model_arch or self.src_value), raise_error = False)
369 |         self.model_type = OpenAIModelType.parse(
370 |             (self.model_type or \
371 |                 ("text" if self.mode in ModelMode.get_text_modes() else self.src_value)
372 |             ), raise_error = False)
373 |         if not self.version:
374 |             ver_values = [x for x in self.src_splits if (x[0].isdigit() and x[-1].isdigit())]
375 |             if ver_values:
376 |                 self.version = '-'.join(ver_values)
377 |                 if self.mode in {ModelMode.chat}:
378 |                     if self.version in {'35', '3.5', '4', '3', '16k', '32k'}:
379 |                         self.version = None
380 |                     else:
381 |                         self.version = self.version.rsplit('-', 1)[-1]
382 | 
383 |             elif self.mode == ModelMode.completion:
384 |                 self.version = "003" if self.model_arch == "davinci" else "001"
385 |             elif self.mode == ModelMode.chat:
386 |                 pass
387 |             elif self.model_type != OpenAIModelType.custom:
388 |                 self.version = "001"
389 | 
390 | 
391 |     @lazyproperty
392 |     def value(self) -> str:
393 |         """
394 |         The value of the model
395 |         """
396 |         if self.model_arch == OpenAIModelArch.custom or self.model_type == OpenAIModelType.custom:
397 |             return self.src_value
398 |         if self.mode == ModelMode.chat:
399 |             return f'{self.src_value}-{self.version}' if self.version else self.src_value
400 |         t = f'{self.model_type.value}'
401 |         if self.mode != ModelMode.completion:
402 |             t += f'-{self.mode.value}'
403 |         t += f'-{self.model_arch.value}'
404 |         if self.version:
405 |             t += f'-{self.version}'
406 |         return t
407 | 
408 |     def dict(self, *args, **kwargs):
409 |         return {
410 |             "value": self.value,
411 |             "mode": self.mode.value,
412 |             "model_arch": self.model_arch.value,
413 |             "model_type": self.model_type.value,
414 |             "version": self.version,
415 |         }
416 |     
417 |     def __str__(self):
418 |         return f'OpenAIModel(value="{self.value}", mode="{self.mode}", model_arch="{self.model_arch}", model_type="{self.model_type}", version="{self.version}")'
419 | 
420 |     def __repr__(self) -> str:
421 |         return f'OpenAIModel(value="{self.value}", mode="{self.mode}", model_arch="{self.model_arch}", model_type="{self.model_type}", version="{self.version}")'
422 | 
423 |     def __json__(self):
424 |         return self.value
425 |     
426 |     # def __dict__(self):
427 |     #     return self.value
428 | 
429 |     def get_cost(
430 |         self,
431 |         total_tokens: int = 1,
432 |         mode: Optional[str] = None,
433 |         raise_error: bool = True,
434 |         default_token_cost: Optional[float] = 0.00001,
435 |         prompt_tokens: Optional[int] = None,
436 |         completion_tokens: Optional[int] = None,
437 |     ) -> float:
438 |         """
439 |         Returns the total cost of the model
440 |         usage
441 |         """
442 |         if prompt_tokens and completion_tokens:
443 |             total_tokens = prompt_tokens + completion_tokens
444 | 
445 |         mode = mode or self.mode.value
446 |         if mode in ['completion', 'edit']:
447 |             return total_tokens * (_completion_prices[self.model_arch.value] / 1000)
448 |         if mode in ['chat']:
449 |             return next(
450 |                 (
451 |                     (
452 |                         prompt_tokens * (_chat_gpt_prices[gpt_model]['prompt'] / 1000)
453 |                     )
454 |                     + (
455 |                         completion_tokens * (_chat_gpt_prices[gpt_model]['completion'] / 1000)
456 |                     )
457 |                     if prompt_tokens and completion_tokens
458 |                     else total_tokens
459 |                     * (
460 |                         (
461 |                             (_chat_gpt_prices[gpt_model]['prompt'] + _chat_gpt_prices[gpt_model]['completion']) / 2
462 |                         )
463 |                         / 1000
464 |                     )
465 |                     for gpt_model in _chat_gpt_prices
466 |                     if gpt_model in self.src_value
467 |                 ),
468 |                 total_tokens * (_chat_prices['gpt-3.5-turbo'] / 1000),
469 |             )
470 |         if 'embedding' in mode:
471 |             return total_tokens * (_embedding_prices[self.model_arch.value]  / 1000)
472 |         if 'train' in mode:
473 |             return total_tokens * (_finetune_training_prices[self.model_arch.value] / 1000)
474 |         if 'finetune' in mode or 'fine-tune' in mode:
475 |             return total_tokens * (_finetune_usage_prices[self.model_arch.value] / 1000)
476 |         if raise_error: raise ValueError(f"Invalid mode {mode}")
477 |         return total_tokens * default_token_cost
478 | 
479 | 
480 | class EditModels(str, Enum):
481 |     """
482 |     Just the base models available
483 |     """
484 |     davinci = "text-davinci-edit-003"
485 |     curie = "text-curie-edit-001"
486 |     babbage = "text-babbage-edit-001"
487 |     ada = "text-ada-edit-001"
488 | 
489 |     @lazyproperty
490 |     def model_type(self) -> str:
491 |         return self.value.split("-")[1]
492 | 
493 | class EmbeddingModels(str, Enum):
494 |     """
495 |     Just the base models available
496 |     """
497 |     davinci = "text-similarity-davinci-003"
498 |     curie = "text-similarity-curie-001"
499 |     babbage = "text-similarity-babbage-001"
500 |     ada = "text-similarity-ada-001"
501 | 
502 |     @lazyproperty
503 |     def model_type(self) -> str:
504 |         return self.value.split("-")[2]
505 | 
506 | class CompletionModels(str, Enum):
507 |     """
508 |     Just the base models available
509 |     """
510 |     davinci = "text-davinci-003"
511 |     curie = "text-curie-001"
512 |     babbage = "text-babbage-001"
513 |     ada = "text-ada-001"
514 | 
515 |     @lazyproperty
516 |     def model_type(self) -> str:
517 |         return self.value.split("-")[1]
518 | 
519 | 
520 | class FinetuneModels(str, Enum):
521 |     """
522 |     Supported finetune models.
523 |     """
524 |     ada = "ada"
525 |     babbage = "babbage"
526 |     curie = "curie"
527 |     davici = "davici"
528 | 
529 |     @lazyproperty
530 |     def model_type(self):
531 |         return self.value
532 | 
533 | class ImageSize(str, Enum):
534 |     """
535 |     Size of the image
536 |     """
537 | 
538 |     small = "256x256"
539 |     medium = "512x512"
540 |     large = "1024x1024"
541 | 
542 |     @lazyproperty
543 |     def image_type(self):
544 |         if self.value == "256x256":
545 |             return 'small'
546 |         if self.value == "512x512":
547 |             return 'medium'
548 |         if self.value == "1024x1024":
549 |             return 'large'
550 |         raise ValueError(f"Cannot convert {self.value} to Kind")
551 | 
552 |     @classmethod
553 |     def from_str(cls, value: str) -> "ImageSize":
554 |         """
555 |         :param value: Size of the image
556 |         :type value: str
557 |         :return: ImageSize
558 |         :rtype: ImageSize
559 |         """
560 |         if value == "256x256":
561 |             return cls.small
562 |         if value == "512x512":
563 |             return cls.medium
564 |         if value == "1024x1024":
565 |             return cls.large
566 |         try:
567 |             return cls(value)
568 |         except ValueError as e:
569 |             raise ValueError(f"Cannot convert {value} to ImageSize") from e
570 |     
571 |     def get_cost(
572 |         self,
573 |         total_images: int = 1,
574 |     ) -> float:
575 |         """
576 |         Returns the total cost of the model
577 |         usage
578 |         """
579 |         return total_images * _image_prices[self.value]
580 | 
581 | class ImageFormat(str, Enum):
582 |     """
583 |     Format of the image
584 |     """
585 | 
586 |     url = "url"
587 |     b64 = "b64_json"
588 |     b64_json = "b64_json"


--------------------------------------------------------------------------------
/async_openai/types/pricing.yaml:
--------------------------------------------------------------------------------
  1 | # The OpenAI Pricing
  2 | # https://openai.com/api/pricing/
  3 | 
  4 | gpt-4-1106-preview:
  5 |   aliases:
  6 |   - gpt-4-turbo
  7 |   context_length: 128000 # 131072 ?
  8 |   costs:
  9 |     unit: 1000
 10 |     input: 0.01
 11 |     output: 0.03
 12 |   endpoints:
 13 |   - chat
 14 | 
 15 | gpt-4-0125-preview:
 16 |   aliases:
 17 |   - gpt-4-turbo-preview
 18 |   - gpt-4-turbo-v
 19 |   context_length: 128000
 20 |   costs:
 21 |     unit: 1000
 22 |     input: 0.01
 23 |     output: 0.03
 24 |   endpoints:
 25 |   - chat
 26 | 
 27 | gpt-4-turbo-2024-04-09:
 28 |   aliases:
 29 |   - gpt-4-turbo-2024
 30 |   - gpt-4-turbo-2024-04-09
 31 |   - gpt-4-2024-preview
 32 |   context_length: 128000
 33 |   costs:
 34 |     unit: 1_000_000
 35 |     input: 10.00
 36 |     output: 30.00
 37 |   endpoints:
 38 |   - chat
 39 | 
 40 | gpt-4o-mini:
 41 |   aliases:
 42 |   - gpt-4-o-mini
 43 |   - gpt4o-mini
 44 |   - gpt-4o-mini-2024-07-18
 45 |   context_length: 128000
 46 |   costs:
 47 |     unit: 1_000_000
 48 |     input: 0.150
 49 |     output: 0.600
 50 |   batch_costs:
 51 |     unit: 1_000_000
 52 |     input: 0.075
 53 |     output: 0.300
 54 |   endpoints:
 55 |   - chat
 56 | 
 57 | gpt-4o:
 58 |   aliases:
 59 |   - gpt-4-o
 60 |   - gpt4o
 61 |   - gpt-4o-2024-05-13
 62 |   context_length: 128000
 63 |   costs:
 64 |     unit: 1_000_000
 65 |     input: 5.00
 66 |     output: 15.00
 67 |   endpoints:
 68 |   - chat
 69 | 
 70 | gpt-4:
 71 |   aliases:
 72 |   - gpt-4-0613
 73 |   context_length: 8192
 74 |   costs:
 75 |     unit: 1000
 76 |     input: 0.03
 77 |     output: 0.06
 78 |   endpoints:
 79 |   - chat
 80 | 
 81 | gpt-4-32k:
 82 |   aliases:
 83 |   - gpt-4-32k-0613
 84 |   context_length: 32768
 85 |   costs:
 86 |     unit: 1000
 87 |     input: 0.06
 88 |     output: 0.12
 89 |   endpoints:
 90 |   - chat
 91 | 
 92 | 
 93 | gpt-3.5-turbo:
 94 |   aliases:
 95 |   # - gpt-35
 96 |   # - gpt-35-turbo
 97 |   - gpt-3.5
 98 |   # Support previous 16k context length
 99 |   # - gpt-3.5-16k
100 |   # - gpt-35-16k
101 |   # - gpt-35-turbo-16k
102 |   # - gpt-3.5-turbo-16k
103 |   # - gpt-3.5-turbo-1106
104 |   # - gpt-35-turbo-1106
105 |   # - gpt-3.5-turbo-0301
106 |   # - gpt-35-turbo-0301
107 |   # - gpt-3.5-turbo-0613
108 |   # - gpt-35-turbo-0613
109 |   context_length: 16384
110 |   costs:
111 |     unit: 1000
112 |     input: 0.0010
113 |     output: 0.0020
114 |   endpoints:
115 |   - chat
116 | 
117 | gpt-3.5-turbo-16k:
118 |   aliases:
119 |   # Support previous 16k context length
120 |   - gpt-3.5-16k
121 |   # - gpt-35-16k
122 |   # - gpt-35-turbo-16k
123 |   - gpt-3.5-turbo-16k
124 |   - gpt-3.5-turbo-16k-0613
125 |   context_length: 16384
126 |   costs:
127 |     unit: 1000
128 |     input: 0.0010
129 |     output: 0.0020
130 |   endpoints:
131 |   - chat
132 | 
133 | gpt-3.5-turbo-1106:
134 |   aliases:
135 |   - gpt-3.5-1106
136 |   # - gpt-35-1106
137 |   # - gpt-35-turbo-1106
138 |   context_length: 16384
139 |   costs:
140 |     unit: 1000
141 |     input: 0.0010
142 |     output: 0.0020
143 |   endpoints:
144 |   - chat
145 | 
146 | gpt-3.5-turbo-0301:
147 |   aliases:
148 |   - gpt-3.5-0301
149 |   # - gpt-35-0301
150 |   # - gpt-35-turbo-0301
151 |   context_length: 16384
152 |   costs:
153 |     unit: 1000
154 |     input: 0.0010
155 |     output: 0.0020
156 |   endpoints:
157 |   - chat
158 | 
159 | gpt-3.5-turbo-0613:
160 |   aliases:
161 |   - gpt-3.5-0613
162 |   # - gpt-35-0613
163 |   # - gpt-35-turbo-0613
164 |   context_length: 16384
165 |   costs:
166 |     unit: 1000
167 |     input: 0.0010
168 |     output: 0.0020
169 |   endpoints:
170 |   - chat
171 | 
172 | gpt-3.5-turbo-0125:
173 |   aliases:
174 |   - gpt-3.5-0125
175 |   context_length: 16384
176 |   costs:
177 |     unit: 1000
178 |     input: 0.0005
179 |     output: 0.0015
180 |   endpoints:
181 |   - chat
182 | 
183 | gpt-3.5-turbo-instruct:
184 |   aliases:
185 |   - gpt-3.5-instruct
186 |   # - gpt-35-instruct
187 |   # - gpt-35-turbo-instruct
188 |   context_length: 4096
189 |   costs:
190 |     unit: 1000
191 |     input: 0.0015
192 |     output: 0.0020
193 |   endpoints:
194 |   - completion
195 | 
196 | text-embedding-ada-002:
197 |   aliases:
198 |   - ada
199 |   - ada-v2
200 |   - text-embedding-ada
201 |   context_length: 8191
202 |   costs:
203 |     unit: 1000
204 |     input: 0.0001
205 |   endpoints:
206 |   - embeddings
207 | 
208 | text-embedding-3-small:
209 |   aliases:
210 |   - t3small
211 |   - t3-small
212 |   context_length: 8191
213 |   costs:
214 |     unit: 1000
215 |     input: 0.00002
216 |   endpoints:
217 |   - embeddings
218 | 
219 | text-embedding-3-large:
220 |   aliases:
221 |   - t3large
222 |   - t3-large
223 |   context_length: 8191
224 |   costs:
225 |     unit: 1000
226 |     input: 0.00013
227 |   endpoints:
228 |   - embeddings
229 | 
230 | 


--------------------------------------------------------------------------------
/async_openai/types/resources.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import aiohttpx
  4 | import datetime
  5 | import tempfile
  6 | import pathlib
  7 | from pydantic import ConfigDict
  8 | from pydantic.types import ByteSize
  9 | from lazyops.types import BaseModel, validator, lazyproperty
 10 | from lazyops.types.models import get_pyd_field_names, pyd_parse_obj, get_pyd_dict, _BaseModel
 11 | from lazyops.utils import ObjectDecoder, ObjectEncoder
 12 | from async_openai.utils.logs import logger
 13 | from async_openai.utils.helpers import aparse_stream, parse_stream
 14 | 
 15 | from async_openai.types.options import FilePurpose
 16 | 
 17 | from typing import Dict, Optional, Any, List, Type, Union, Tuple, Iterator, AsyncIterator, TYPE_CHECKING
 18 | 
 19 | 
 20 | try:
 21 |     from fileio import File, FileType
 22 |     _has_fileio = True
 23 | except ImportError:
 24 |     from pathlib import Path as File
 25 |     FileType = Union[File, str, os.PathLike]
 26 |     _has_fileio = False
 27 | 
 28 | 
 29 | __all__ = [
 30 |     'BaseResource',
 31 |     'Permission',
 32 |     'Usage',
 33 |     'FileObject',
 34 |     'EventObject',
 35 |     'FileResource',
 36 | ]
 37 | 
 38 | VALID_SEND_KWARGS = [
 39 |     'method',
 40 |     'url',
 41 |     'content',
 42 |     'data',
 43 |     'files',
 44 |     'json',
 45 |     'params',
 46 |     'headers',
 47 |     'cookies',
 48 |     'auth',
 49 |     'follow_redirects',
 50 |     'timeout',
 51 |     'extensions',
 52 | ]
 53 | 
 54 | 
 55 | class Usage(BaseModel):
 56 |     prompt_tokens: Optional[int] = 0
 57 |     completion_tokens: Optional[int] = 0
 58 |     total_tokens: Optional[int] = 0
 59 | 
 60 |     # @lazyproperty
 61 |     @property
 62 |     def consumption(self) -> int:
 63 |         """
 64 |         Gets the consumption
 65 |         """
 66 |         return self.total_tokens
 67 |     
 68 |     def update(self, usage: Union['Usage', Dict[str, int]]):
 69 |         """
 70 |         Updates the consumption
 71 |         """
 72 |         if isinstance(usage, Usage):
 73 |             if usage.prompt_tokens: self.prompt_tokens += usage.prompt_tokens
 74 |             if usage.completion_tokens: self.completion_tokens += usage.completion_tokens
 75 |             if usage.total_tokens: self.total_tokens += usage.total_tokens
 76 |             return
 77 |         
 78 |         if usage.get('prompt_tokens'): self.prompt_tokens += usage.get('prompt_tokens')
 79 |         if usage.get('completion_tokens'): self.completion_tokens += usage.get('completion_tokens')
 80 |         if usage.get('total_tokens'): self.total_tokens += usage.get('total_tokens')
 81 | 
 82 |     def __iadd__(self, other: Union['Usage', Dict[str, int]]):
 83 |         """
 84 |         Adds the usage
 85 |         """
 86 |         self.update(other)
 87 |         return self.consumption
 88 | 
 89 | 
 90 | class BaseResource(BaseModel):
 91 | 
 92 |     """
 93 |     Base Object class for resources to
 94 |     inherit from
 95 |     """
 96 | 
 97 |     # model_config = ConfigDict(extra = 'allow', arbitrary_types_allowed = True)
 98 |     # def get(self, name, default: Any = None):
 99 |     #     """
100 |     #     Get an attribute from the model
101 |     #     """
102 |     #     return getattr(self, name, default)
103 | 
104 |     if TYPE_CHECKING:
105 |         id: Optional[str]
106 |         file_id: Optional[str]
107 |         fine_tune_id: Optional[str]
108 |         model_id: Optional[str]
109 |         completion_id: Optional[str]
110 |         openai_id: Optional[str]
111 |         model: Optional[str]
112 | 
113 | 
114 |     @lazyproperty
115 |     def resource_id(self):
116 |         """
117 |         Returns the resource id
118 |         """
119 |         if hasattr(self, 'id'):
120 |             return self.id
121 |         if hasattr(self, 'file_id'):
122 |             return self.file_id
123 |         if hasattr(self, 'fine_tune_id'):
124 |             return self.fine_tune_id
125 |         if hasattr(self, 'model_id'):
126 |             return self.model_id
127 |         if hasattr(self, 'completion_id'):
128 |             return self.completion_id
129 |         return self.openai_id if hasattr(self, 'openai_id') else None
130 |     
131 |     @classmethod
132 |     def parse_obj(
133 |         cls,
134 |         obj: Any,
135 |         strict: Optional[bool] = False,
136 |         from_attributes: Optional[bool] = True,
137 |         **kwargs
138 |     ) -> 'BaseResource':
139 |         """
140 |         Parses an object into the resource
141 |         """
142 |         #return cls(**obj)
143 |         # logger.info(f"Obj: {cls}: {obj}")
144 |         return pyd_parse_obj(cls, obj, strict = strict, from_attributes = from_attributes, **kwargs)
145 |     
146 |     @staticmethod
147 |     def create_resource(
148 |         resource: Type['BaseResource'],
149 |         **kwargs
150 |     ) -> Tuple['BaseResource', Dict]:
151 |         """
152 |         Extracts the resource from the kwargs and returns the resource 
153 |         and the remaining kwargs
154 |         """
155 |         resource_fields = get_pyd_field_names(resource)
156 |         resource_kwargs = {k: v for k, v in kwargs.items() if k in resource_fields}
157 |         return_kwargs = {k: v for k, v in kwargs.items() if k not in resource_fields}
158 |         resource_obj = resource.parse_obj(resource_kwargs)
159 |         return resource_obj, return_kwargs
160 |     
161 | 
162 |     @staticmethod
163 |     def create_batch_resource(
164 |         resource: Type['BaseResource'],
165 |         batch: List[Union[Dict[str, Any], Any]],
166 |         **kwargs
167 |     ) -> Tuple[List['BaseResource'], Dict]:
168 |         """
169 |         Extracts the resource from the kwargs and returns the resource 
170 |         and the remaining kwargs
171 |         """
172 |         resource_fields = get_pyd_field_names(resource)
173 |         resource_kwargs = {k: v for k, v in kwargs.items() if k in resource_fields}
174 |         return_kwargs = {k: v for k, v in kwargs.items() if k not in resource_fields}
175 |         resource_objs = []
176 |         for item in batch:
177 |             if isinstance(item, dict):
178 |                 item.update(resource_kwargs)
179 |                 resource_objs.append(resource.parse_obj(item))
180 |             else:
181 |                 resource_objs.append(item)
182 |         return resource_objs, return_kwargs
183 | 
184 |     @classmethod
185 |     def create_many(cls, data: List[Dict]) -> List['BaseResource']:
186 |         """
187 |         Creates many resources
188 |         """
189 |         return [cls.parse_obj(d) for d in data]
190 |     
191 |     @staticmethod
192 |     def handle_json(
193 |         content: Any,
194 |         **kwargs
195 |     ) -> Union[Dict, List]:
196 |         """
197 |         Handles the json response
198 |         """
199 |         return json.loads(content, cls = ObjectDecoder, **kwargs)
200 | 
201 | 
202 |     @staticmethod
203 |     def handle_stream(
204 |         response: aiohttpx.Response,
205 |         streaming: Optional[bool] = False,
206 |     ) -> Iterator[Dict]:
207 |         """
208 |         Handles the stream response
209 |         """
210 |         for line in parse_stream(response):
211 |             if not line.strip(): continue
212 |             try:
213 |                 yield json.loads(line)
214 |             except Exception as e:
215 |                 logger.error(f'Error: {line}: {e}')
216 |     
217 |     @staticmethod
218 |     async def ahandle_stream(
219 |         response: aiohttpx.Response,
220 |         streaming: Optional[bool] = False,
221 |     ) -> AsyncIterator[Dict]:
222 |         """
223 |         Handles the stream response
224 |         """
225 |         async for line in aparse_stream(response):
226 |             if not line.strip(): continue
227 |             try:
228 |                 yield json.loads(line)
229 |             except Exception as e:
230 |                 logger.error(f'Error: {line}: {e}')
231 | 
232 | 
233 |     def __getitem__(self, key: str) -> Any:
234 |         """
235 |         Mimic dict
236 |         """
237 |         return getattr(self, key)
238 | 
239 | 
240 | class Permission(BaseResource):
241 |     id: str
242 |     object: str
243 |     created: datetime.datetime
244 |     allow_create_engine: bool
245 |     allow_sampling: bool
246 |     allow_logprobs: bool
247 |     allow_search_indices: bool
248 |     allow_view: bool
249 |     allow_fine_tuning: bool
250 |     organization: str
251 |     group: Optional[str]
252 |     is_blocking: bool
253 | 
254 |     @property
255 |     def since_seconds(self):
256 |         return (datetime.datetime.now(datetime.timezone.utc) - self.created).total_seconds()
257 | 
258 | 
259 | class FileObject(BaseResource):
260 |     id: str
261 |     object: Optional[str] = 'file'
262 |     bytes: Optional[ByteSize]
263 |     created_at: Optional[datetime.datetime]
264 |     filename: Optional[str]
265 |     purpose: Optional[FilePurpose] = FilePurpose.fine_tune
266 | 
267 |     @validator("created_at")
268 |     def validate_created_at(cls, value):
269 |         return datetime.datetime.fromtimestamp(value, datetime.timezone.utc) if value else value
270 |     
271 |     @classmethod
272 |     def create_many(cls, data: List[Dict]) -> List['FileObject']:
273 |         """
274 |         Creates many resources
275 |         """
276 |         return [cls.parse_obj(d) for d in data]
277 | 
278 | class EventObject(BaseResource):
279 |     object: Optional[str]
280 |     created_at: Optional[datetime.datetime]
281 |     level: Optional[str]
282 |     message: Optional[str]
283 | 
284 |     @property
285 |     def since_seconds(self) -> int:
286 |         if self.created_at is None: return -1
287 |         return (datetime.datetime.now(datetime.timezone.utc) - self.created_at).total_seconds()
288 | 
289 | 
290 | class FileResource(BaseResource):
291 |     file: Optional[Union[str, FileType, Any]]
292 |     file_id: Optional[str]
293 |     filename: Optional[str] = None
294 |     purpose: FilePurpose = FilePurpose.fine_tune
295 |     model: Optional[str] = None
296 | 
297 |     @validator("purpose")
298 |     def validate_purpose(cls, value):
299 |         return FilePurpose.parse_str(value) if isinstance(value, str) else value
300 |     
301 |     def get_params(self, **kwargs) -> List:
302 |         """
303 |         Transforms the data to the req params
304 |         """
305 |         files = [("purpose", (None, self.purpose.value))]
306 |         if self.purpose == FilePurpose.search and self.model:
307 |             files.append(("model", (None, self.model)))
308 |         if self.file:
309 |             file = File(self.file)
310 |             files.append(
311 |                 ("file", (self.filename or file.name, file.read_bytes(), "application/octet-stream"))
312 |             )
313 |         return files
314 |     
315 |     async def async_get_params(self, **kwargs) -> List:
316 |         """
317 |         Transforms the data to the req params
318 |         """
319 |         files = [("purpose", (None, self.purpose.value))]
320 |         if self.purpose == FilePurpose.search and self.model:
321 |             files.append(("model", (None, self.model)))
322 |         if self.file:
323 |             file = File(self.file)
324 |             files.append(
325 |                 ("file", (self.filename or file.name, (await file.async_read_bytes() if _has_fileio else file.read_bytes()), "application/octet-stream"))
326 |             )
327 |         return files
328 | 
329 |     @classmethod
330 |     def create_from_batch(
331 |         cls,
332 |         batch: List[Union[Dict[str, Any], str]],
333 |         output_path: Optional[str] = None,
334 |         file_id: Optional[str] = None,
335 |         filename: Optional[str] = None,
336 |         purpose: Optional[FilePurpose] = None,
337 |         **kwargs,
338 |     ) -> Tuple['FileObject', Dict[str, Any]]:
339 |         """
340 |         Creates a file object from a batch in jsonl format
341 |         """
342 |         for n, b in enumerate(batch):
343 |             if isinstance(b, dict):
344 |                 batch[n] = json.dumps(b, cls = ObjectEncoder)
345 |         if output_path:
346 |             output = pathlib.Path(output_path)
347 |         else:
348 |             tmp = tempfile.NamedTemporaryFile(delete = False)
349 |             tmp.close()
350 |             output = pathlib.Path(tmp.name)
351 | 
352 |         with output.open('w') as f:
353 |             for b in batch:
354 |                 f.write(f'{b}\n')
355 |         resource_fields = get_pyd_field_names(cls)
356 |         resource_kwargs = {k: v for k, v in kwargs.items() if k in resource_fields}
357 |         return_kwargs = {k: v for k, v in kwargs.items() if k not in resource_fields}
358 |         return cls(
359 |             file = output,
360 |             purpose = purpose,
361 |             filename = filename,
362 |             file_id = file_id,
363 |             **resource_kwargs
364 |         ), return_kwargs
365 |         
366 | 
367 |     


--------------------------------------------------------------------------------
/async_openai/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from async_openai.utils.logs import logger
 4 | from async_openai.utils.helpers import (
 5 |     is_naive,
 6 |     total_seconds,
 7 |     remove_trailing_slash,
 8 |     parse_stream,
 9 |     aparse_stream,
10 |     
11 | )
12 | from async_openai.utils.config import (
13 |     OpenAISettings, 
14 |     get_settings
15 | )
16 | 
17 | from async_openai.utils.tokenization import (
18 |     modelname_to_contextsize,
19 |     get_token_count,
20 |     get_max_tokens,
21 |     get_chat_tokens_count,
22 |     get_max_chat_tokens,
23 |     fast_tokenize,
24 | )
25 | 
26 | from async_openai.utils.resolvers import fix_json


--------------------------------------------------------------------------------
/async_openai/utils/embedding.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | """
 4 | Embedding Utility Helpers
 5 | 
 6 | These are borrowed from the `openai` experiments library
 7 | 
 8 | - We specifically use lazy loading to avoid runtime errors if the user does not have the required dependencies
 9 | """
10 | 
11 | from lazyops.libs import lazyload as lz
12 | from lazyops.libs.proxyobj import ProxyObject
13 | from lazyops.types.common import Literal
14 | from typing import Dict, Callable, List, Union, Optional
15 | 
16 | if lz.TYPE_CHECKING:
17 |     from scipy import spatial
18 |     import numpy as np
19 |     from numpy import ndarray
20 | else:
21 |     spatial = lz.LazyLoad("scipy.spatial")
22 |     np = lz.LazyLoad("numpy")
23 |     
24 | def _initialize_distance_dict(*args, **kwargs) -> Dict[str, Callable[..., float]]:
25 |     """
26 |     Initializes the distance dictionary
27 |     """
28 |     return {
29 |         "cosine": spatial.distance.cosine,
30 |         "euclidean": spatial.distance.euclidean,
31 |         "inner_product": lambda x, y: -np.dot(x, y),
32 |         "L1": spatial.distance.cityblock,
33 |         "L2": spatial.distance.euclidean,
34 |         "Linf": spatial.distance.chebyshev,
35 |     }
36 | 
37 | 
38 | distance_metrics: Dict[str, Callable[..., float]] = ProxyObject(obj_getter = _initialize_distance_dict)
39 | 
40 | MetricT = Literal["cosine", "L1", "L2", "Linf"]
41 | 
42 | 
43 | def distances_from_embeddings(
44 |     query_embedding: List[float],
45 |     embeddings: List[List[float]],
46 |     distance_metric: Optional[MetricT] = "cosine",
47 | ) -> List[List]:
48 |     """
49 |     Return the distances between a query embedding and a list of embeddings.
50 |     """
51 |     return [
52 |         distance_metrics[distance_metric](query_embedding, embedding)
53 |         for embedding in embeddings
54 |     ]
55 | 
56 | 
57 | def indices_of_nearest_neighbors_from_distances(
58 |     distances: 'ndarray',
59 |     reverse: Optional[bool] = False,
60 | ) -> 'ndarray':
61 |     """
62 |     Return a list of indices of nearest neighbors from a list of distances.
63 |     """
64 |     return np.argsort(distances)[::-1] if reverse else np.argsort(distances)
65 |     # if reverse: distances = distances[::-1] 
66 |     # return np.argsort(distances)


--------------------------------------------------------------------------------
/async_openai/utils/helpers.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import random
  3 | import inspect
  4 | import aiohttpx
  5 | import bisect
  6 | import itertools
  7 | 
  8 | from datetime import datetime, timedelta
  9 | 
 10 | from typing import Dict, Optional, Iterator, AsyncIterator, Union, List, Tuple
 11 | from lazyops.utils.helpers import timed, timer, is_coro_func
 12 | 
 13 | __all__ = [
 14 |     'is_naive',
 15 |     'total_seconds',
 16 |     'remove_trailing_slash',
 17 |     'full_name',
 18 |     'merge_dicts',
 19 |     'is_coro_func',
 20 |     'timed',
 21 |     'timer',
 22 |     'parse_stream',
 23 |     'aparse_stream',
 24 | ]
 25 | 
 26 | 
 27 | def merge_dicts(x: Dict, y: Dict):
 28 |     z = x.copy()
 29 |     z.update(y)
 30 |     return z
 31 | 
 32 | 
 33 | def full_name(func, follow_wrapper_chains=True):
 34 |     """
 35 |     Return full name of `func` by adding the module and function name.
 36 | 
 37 |     If this function is decorated, attempt to unwrap it till the original function to use that
 38 |     function name by setting `follow_wrapper_chains` to True.
 39 |     """
 40 |     if follow_wrapper_chains: func = inspect.unwrap(func)
 41 |     return f'{func.__module__}.{func.__qualname__}'
 42 | 
 43 | def is_naive(dt: datetime):
 44 |     """Determines if a given datetime.datetime is naive."""
 45 |     return dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None
 46 | 
 47 | 
 48 | def total_seconds(delta: timedelta):
 49 |     """Determines total seconds with python < 2.7 compat."""
 50 |     # http://stackoverflow.com/questions/3694835/python-2-6-5-divide-timedelta-with-timedelta
 51 |     return (delta.microseconds + (delta.seconds + delta.days * 24 * 3600) * 1e6) / 1e6
 52 | 
 53 | 
 54 | def remove_trailing_slash(host: str):
 55 |     """
 56 |     Removes trailing slash from a host if it exists.
 57 |     """
 58 |     return host[:-1] if host.endswith("/") else host
 59 | 
 60 | 
 61 | def parse_stream_line_bytes(line: bytes) -> Optional[str]:
 62 |     """
 63 |     Parse a line from a Server-Sent Events stream.
 64 |     """
 65 |     if line:
 66 |         if line.strip() == b"data: [DONE]":
 67 |             # return here will cause GeneratorExit exception in urllib3
 68 |             # and it will close http connection with TCP Reset
 69 |             return None
 70 |         if line.startswith(b"data: "):
 71 |             line = line[len(b"data: "):]
 72 |             return line.decode("utf-8")
 73 |         else:
 74 |             return None
 75 |     return None
 76 | 
 77 | 
 78 | def parse_stream_line_string(line: str) -> Optional[str]:
 79 |     """
 80 |     Parse a line from a Server-Sent Events stream.
 81 |     """
 82 |     if line:
 83 |         if line.strip() == "data: [DONE]":
 84 |             # return here will cause GeneratorExit exception in urllib3
 85 |             # and it will close http connection with TCP Reset
 86 |             return None
 87 |         return line[len("data: "):] if line.startswith("data: ") else None
 88 |     return None
 89 | 
 90 | def parse_stream_line(line: Union[str, bytes]) -> Optional[str]:
 91 |     """
 92 |     Parse a line from a Server-Sent Events stream.
 93 |     """
 94 |     if isinstance(line, bytes):
 95 |         return parse_stream_line_bytes(line)
 96 |     elif isinstance(line, str):
 97 |         return parse_stream_line_string(line)
 98 |     else:
 99 |         raise TypeError("line must be str or bytes")
100 | 
101 | 
102 | def parse_stream(response: aiohttpx.Response) -> Iterator[str]:
103 |     """
104 |     Parse a Server-Sent Events stream.
105 |     """
106 |     for line in response.iter_lines():
107 |         _line = parse_stream_line(line)
108 |         if _line is not None:
109 |             yield _line
110 | 
111 | async def aparse_stream(response: aiohttpx.Response) -> AsyncIterator[str]:
112 |     """
113 |     Parse a Server-Sent Events stream.
114 |     """
115 |     async for line in response.aiter_lines():
116 |         _line = parse_stream_line(line)
117 |         if _line is not None:
118 |             yield _line
119 | 
120 | 
121 | def weighted_choice(choices: Union[List[Tuple[str, float]], Dict[str, float]]) -> str:
122 |    """
123 |    Randomly selects a choice based on the weights provided
124 |    """
125 |    if isinstance(choices, dict):
126 |        choices = list(choices.items())
127 |    weights = list(zip(*choices))[1]
128 |    return choices[bisect.bisect(
129 |        list(itertools.accumulate(weights)),
130 |         random.uniform(0, sum(weights))
131 |     )][0]
132 | 
133 | 


--------------------------------------------------------------------------------
/async_openai/utils/logs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from lazyops.utils.logs import get_logger, change_logger_level, null_logger
 3 | 
 4 | # to prevent recursive imports, we'll just use os environ here
 5 | if os.getenv('DEBUG_ENABLED') == 'True':
 6 |     logger_level = 'DEV'
 7 | else:
 8 |     logger_level: str = os.getenv('LOGGER_LEVEL', 'INFO').upper()
 9 | 
10 | logger = get_logger(logger_level)


--------------------------------------------------------------------------------
/async_openai/utils/presets/fireworks.yaml:
--------------------------------------------------------------------------------
 1 | # Configuration for fireworks.ai External Provider
 2 | name: fireworks
 3 | config:
 4 |   api_base: https://api.fireworks.ai
 5 |   api_path: inference/v1
 6 |   api_key_header: Authorization
 7 |   api_key_scheme: Bearer 
 8 |   api_keys: env/FIREWORK_AI_API_KEYS
 9 |   api_key: env/FIREWORK_AI_API_KEY
10 |   hf_compatible: true
11 | routes:
12 |   chat:
13 |     object_class: async_openai.schemas.external.fireworks.chat.ChatObject
14 |     response_class: async_openai.schemas.external.fireworks.chat.ChatResponse
15 |     route_class: async_openai.schemas.external.fireworks.chat.ChatRoute
16 |   
17 | models:
18 | - name: accounts/fireworks/models/firefunction-v1
19 |   aliases:
20 |   - firefunction-v1
21 |   context_length: 32768
22 |   costs: 
23 |     unit: 1_000_000
24 |     input: 0.7
25 |     output: 2.8
26 |   endpoints:
27 |   - chat
28 | - name: accounts/fireworks/models/bleat-adapter
29 |   aliases:
30 |   - fireworks-bleat
31 |   context_length: 4096
32 |   costs: 
33 |     unit: 1_000_000
34 |     input: 0.7
35 |     output: 2.8
36 |   endpoints:
37 |   - completions
38 | - name: accounts/fireworks/models/mixtral-8x7b-instruct
39 |   aliases:
40 |   - mixtral-8x7b
41 |   - mixtral-8x7b-instruct
42 |   context_length: 32768
43 |   costs: 
44 |     unit: 1_000_000
45 |     input: 0.4
46 |     output: 1.6
47 |   endpoints:
48 |   - chat
49 |   - completions
50 | 


--------------------------------------------------------------------------------
/async_openai/utils/presets/together.yaml:
--------------------------------------------------------------------------------
  1 | # Configuration for together.xyz External Provider
  2 | name: together
  3 | config:
  4 |   api_base: https://api.together.xyz
  5 |   api_path: /v1
  6 |   api_key_header: Authorization
  7 |   api_key_scheme: Bearer 
  8 |   api_keys: env/TOGETHER_API_KEYS
  9 |   api_key: env/TOGETHER_API_KEY
 10 |   max_retries: env/TOGETHER_MAX_RETRIES
 11 |   hf_compatible: true
 12 | routes:
 13 |   chat:
 14 |     object_class: async_openai.schemas.external.together.chat.ChatObject
 15 |     response_class: async_openai.schemas.external.together.chat.ChatResponse
 16 |     route_class: async_openai.schemas.external.together.chat.ChatRoute
 17 |   embeddings: 
 18 |     object_class: async_openai.schemas.external.together.embeddings.EmbeddingObject
 19 |     response_class: async_openai.schemas.external.together.embeddings.EmbeddingResponse
 20 |     route_class: async_openai.schemas.external.together.embeddings.EmbeddingRoute
 21 | 
 22 | # routes:
 23 | #   completion: 
 24 | #     path: /complete
 25 | #     params:
 26 | #       model: str
 27 | #       prompt: str
 28 | #       max_tokens: int
 29 | #       ...
 30 | models:
 31 | - name: mistralai/Mixtral-8x7B-Instruct-v0.1
 32 |   aliases:
 33 |   - mixtral-8x7b
 34 |   - mixtral-8x7b-instruct
 35 |   context_length: 32768
 36 |   costs: 
 37 |     unit: 1_000_000
 38 |     input: 0.6
 39 |     output: 0.6
 40 |   endpoints:
 41 |   - chat
 42 |   - completions
 43 | - name: mistralai/Mistral-7B-Instruct-v0.1
 44 |   aliases:
 45 |   - mistral-7b-instruct-v1
 46 |   context_length: 4096
 47 |   costs: 
 48 |     unit: 1_000_000
 49 |     input: 0.2
 50 |     output: 0.2
 51 |   endpoints:
 52 |   - chat
 53 |   - completions
 54 | - name: mistralai/Mistral-7B-Instruct-v0.2
 55 |   aliases:
 56 |   - mistral-7b-instruct-v2
 57 |   context_length: 32768
 58 |   costs: 
 59 |     unit: 1_000_000
 60 |     input: 0.2
 61 |     output: 0.2
 62 |   endpoints:
 63 |   - chat
 64 |   - completions
 65 | 
 66 | # Embedding Models
 67 | - name: togethercomputer/m2-bert-80M-2k-retrieval
 68 |   aliases:
 69 |   - m2-bert-80M-2k-retrieval
 70 |   - m2-bert-2k-retrieval
 71 |   - m2-bert-2k
 72 |   context_length: 2048
 73 |   costs: 
 74 |     unit: 1_000_000
 75 |     input: 0.008
 76 |   endpoints:
 77 |   - embeddings
 78 | 
 79 | - name: togethercomputer/m2-bert-80M-8k-retrieval
 80 |   aliases:
 81 |   - m2-bert-80M-8k-retrieval
 82 |   - m2-bert-8k-retrieval
 83 |   - m2-bert-8k
 84 |   context_length: 8192
 85 |   costs: 
 86 |     unit: 1_000_000
 87 |     input: 0.008
 88 |   endpoints:
 89 |   - embeddings
 90 | 
 91 | - name: togethercomputer/m2-bert-80M-32k-retrieval
 92 |   aliases:
 93 |   - m2-bert-80M-32k-retrieval
 94 |   - m2-bert-32k-retrieval
 95 |   - m2-bert-32k
 96 |   context_length: 32768
 97 |   costs: 
 98 |     unit: 1_000_000
 99 |     input: 0.008
100 |   endpoints:
101 |   - embeddings


--------------------------------------------------------------------------------
/async_openai/utils/presets/together_proxy.yaml:
--------------------------------------------------------------------------------
 1 | # Configuration for together.xyz External Provider with Helicone Proxy Support
 2 | name: together
 3 | config:
 4 |   api_base: https://api.together.xyz
 5 |   api_path: /v1
 6 |   api_key_header: Authorization
 7 |   api_key_scheme: Bearer 
 8 |   api_keys: env/TOGETHER_API_KEYS
 9 |   api_key: env/TOGETHER_API_KEY
10 |   max_retries: env/TOGETHER_MAX_RETRIES
11 |   hf_compatible: true
12 |   proxy_url: https://gateway.hconeai.com/v1
13 |   proxy_headers:
14 |     Helicone-Auth: Bearer env/HELICONE_API_KEY
15 |     Helicone-Target-Provider: Together-AI
16 |     Helicone-Target-Url: https://api.together.xyz
17 | routes:
18 |   chat:
19 |     object_class: async_openai.schemas.external.together.chat.ChatObject
20 |     response_class: async_openai.schemas.external.together.chat.ChatResponse
21 |     route_class: async_openai.schemas.external.together.chat.ChatRoute
22 |   embeddings: 
23 |     object_class: async_openai.schemas.external.together.embeddings.EmbeddingObject
24 |     response_class: async_openai.schemas.external.together.embeddings.EmbeddingResponse
25 |     route_class: async_openai.schemas.external.together.embeddings.EmbeddingRoute
26 | 
27 | models:
28 | - name: mistralai/Mixtral-8x7B-Instruct-v0.1
29 |   aliases:
30 |   - mixtral-8x7b
31 |   - mixtral-8x7b-instruct
32 |   context_length: 32768
33 |   costs: 
34 |     unit: 1_000_000
35 |     input: 0.6
36 |     output: 0.6
37 |   endpoints:
38 |   - chat
39 |   - completions
40 | - name: mistralai/Mistral-7B-Instruct-v0.1
41 |   aliases:
42 |   - mistral-7b-instruct-v1
43 |   context_length: 4096
44 |   costs: 
45 |     unit: 1_000_000
46 |     input: 0.2
47 |     output: 0.2
48 |   endpoints:
49 |   - chat
50 |   - completions
51 | - name: mistralai/Mistral-7B-Instruct-v0.2
52 |   aliases:
53 |   - mistral-7b-instruct-v2
54 |   context_length: 32768
55 |   costs: 
56 |     unit: 1_000_000
57 |     input: 0.2
58 |     output: 0.2
59 |   endpoints:
60 |   - chat
61 |   - completions
62 | 
63 | # Embedding Models
64 | - name: togethercomputer/m2-bert-80M-2k-retrieval
65 |   aliases:
66 |   - m2-bert-80M-2k-retrieval
67 |   - m2-bert-2k-retrieval
68 |   - m2-bert-2k
69 |   context_length: 2048
70 |   costs: 
71 |     unit: 1_000_000
72 |     input: 0.008
73 |   endpoints:
74 |   - embeddings
75 | 
76 | - name: togethercomputer/m2-bert-80M-8k-retrieval
77 |   aliases:
78 |   - m2-bert-80M-8k-retrieval
79 |   - m2-bert-8k-retrieval
80 |   - m2-bert-8k
81 |   context_length: 8192
82 |   costs: 
83 |     unit: 1_000_000
84 |     input: 0.008
85 |   endpoints:
86 |   - embeddings
87 | 
88 | - name: togethercomputer/m2-bert-80M-32k-retrieval
89 |   aliases:
90 |   - m2-bert-80M-32k-retrieval
91 |   - m2-bert-32k-retrieval
92 |   - m2-bert-32k
93 |   context_length: 32768
94 |   costs: 
95 |     unit: 1_000_000
96 |     input: 0.008
97 |   endpoints:
98 |   - embeddings


--------------------------------------------------------------------------------
/async_openai/utils/resolvers.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import re
  3 | import json
  4 | from typing import Optional, Callable, Dict, Union, List, Any
  5 | from .logs import logger
  6 | from .fixjson import fix_json
  7 | 
  8 | 
  9 | _json_pattern = re.compile(r"({[^}]*$|{.*})", flags=re.DOTALL)
 10 | 
 11 | def build_stack(json_str: str):
 12 |     stack = []
 13 |     fixed_str = ""
 14 |     open_quotes = False
 15 | 
 16 |     # a flag indicating whether we've seen a comma or colon most recently
 17 |     # since last opening/closing a dict or list
 18 |     last_seen_comma_or_colon = None
 19 | 
 20 |     for i, char in enumerate(json_str):
 21 |         if not open_quotes:
 22 |             # opening a new nested
 23 |             if char in "{[":
 24 |                 stack.append(char)
 25 |                 last_seen_comma_or_colon = None
 26 |             # closing a nested
 27 |             elif char in "}]":
 28 |                 stack.pop()
 29 |                 last_seen_comma_or_colon = None
 30 |             if char in ",:":
 31 |                 last_seen_comma_or_colon = char
 32 |         # opening or closing a string, only it's not escaped
 33 |         if char == '"' and i > 0 and json_str[i - 1] != "\\":
 34 |             open_quotes = not open_quotes
 35 | 
 36 |         fixed_str += char
 37 | 
 38 |     return (stack, fixed_str, open_quotes, last_seen_comma_or_colon)
 39 | 
 40 | 
 41 | 
 42 | def is_truncated(json_str: str):
 43 |     """
 44 |     Check if the json string is truncated by checking if the number of opening
 45 |     brackets is greater than the number of closing brackets.
 46 |     """
 47 |     stack, _, _, _ = build_stack(json_str)
 48 |     return len(stack) > 0
 49 | 
 50 | 
 51 | def find_json_response(full_response: str, verbose: Optional[bool] = False):
 52 |     """
 53 |     Takes a full response that might contain other strings and attempts to extract the JSON payload.
 54 |     Has support for truncated JSON where the JSON begins but the token window ends before the json is
 55 |     is properly closed.
 56 |     """
 57 |     # Deal with fully included responses as well as truncated responses that only have one
 58 |     if full_response.startswith("{") and not full_response.endswith("}"):
 59 |         full_response += "}"
 60 |     
 61 |     extracted_responses = list(_json_pattern.finditer(full_response))
 62 |     if not extracted_responses:
 63 |         logger.error(
 64 |             f"Unable to find any responses of the matching type `{full_response}`"
 65 |         )
 66 |         return None
 67 | 
 68 |     if len(extracted_responses) > 1 and verbose:
 69 |         logger.error(f"Unexpected response > 1, continuing anyway... {extracted_responses}")
 70 | 
 71 |     extracted_response = extracted_responses[0]
 72 | 
 73 |     if is_truncated(extracted_response.group(0)):
 74 |         # Start at the same location and just expand to the end of the message
 75 |         extracted_str = full_response[extracted_response.start() :]
 76 |     else:
 77 |         extracted_str = extracted_response.group(0)
 78 | 
 79 |     return extracted_str
 80 | 
 81 | def try_load_json(
 82 |     text: str,
 83 |     object_hook: Optional[Callable] = None,
 84 |     **kwargs,
 85 | ):
 86 |     """
 87 |     Attempts to load the text as JSON
 88 |     """
 89 |     try:
 90 |         return json.loads(text, object_hook = object_hook, **kwargs)
 91 |     except Exception as e1:
 92 |         try:
 93 |             return json.loads(fix_json(text), object_hook = object_hook, **kwargs)
 94 |         except Exception as e2:
 95 |             logger.error(f"Unable to load JSON. Errors: {e1}, {e2}")
 96 |             raise e2
 97 | 
 98 | 
 99 | def extract_json_response(
100 |     full_response: str, 
101 |     verbose: Optional[bool] = False,
102 |     raise_exceptions: Optional[bool] = False,
103 |     object_hook: Optional[Callable] = None,
104 | ) -> Union[Dict[str, Any], List[Any], Any]:
105 |     """
106 |     Returns the extracted JSON response from the full response
107 |     """
108 |     extracted_str = find_json_response(full_response, verbose = verbose)
109 |     if not extracted_str:
110 |         return None
111 |     try:
112 |         return try_load_json(extracted_str, object_hook = object_hook)
113 |     except Exception as e:
114 |         if verbose: logger.trace(f"Unable to extract JSON response from {extracted_str}", error = e)
115 |         if raise_exceptions: raise e
116 |         return None


--------------------------------------------------------------------------------
/async_openai/utils/tokenization.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import functools
  4 | import tiktoken
  5 | import contextlib
  6 | from typing import Optional, Union, List, Dict, Any, TYPE_CHECKING
  7 | 
  8 | if TYPE_CHECKING:
  9 |     from async_openai.schemas.chat import ChatMessage
 10 | 
 11 | def modelname_to_contextsize(modelname: str) -> int:
 12 |     """
 13 |     Calculate the maximum number of tokens possible to generate for a model.
 14 |     
 15 |     text-davinci-003: 4,097 tokens
 16 |     text-curie-001: 2,048 tokens
 17 |     text-babbage-001: 2,048 tokens
 18 |     text-ada-001: 2,048 tokens
 19 |     code-davinci-002: 8,000 tokens
 20 |     code-cushman-001: 2,048 tokens
 21 |     gpt-3.5-turbo: 4,096 tokens
 22 |     gpt-3.5-turbo-16k: 16,384 tokens
 23 |     gpt-4: 8,192 tokens
 24 |     gpt-4-32k: 32,768 tokens
 25 | 
 26 |     Args:
 27 |         modelname: The modelname we want to know the context size for.
 28 | 
 29 |     Returns:
 30 |         The maximum context size
 31 |     """
 32 |     if modelname == "code-davinci-002":
 33 |         return 8000
 34 | 
 35 |     if modelname in {
 36 |         "text-curie-001",
 37 |         "text-babbage-001",
 38 |         "text-ada-001",
 39 |         "code-cushman-001",
 40 |     }:
 41 |         return 2048
 42 | 
 43 |     # Check GPT4
 44 |     if modelname.startswith("gpt-4") or modelname.startswith("gpt4"):
 45 |         if "32k" in modelname:
 46 |             return 32768
 47 |         return 16384 if "16k" in modelname else 8192
 48 |     
 49 |     # Check GPT3.5
 50 |     if "gpt" in modelname \
 51 |         and "turbo" in modelname \
 52 |         and ("3.5" in modelname or "35" in modelname):
 53 |         return 16384 if "16k" in modelname else 4096
 54 |     
 55 |     return 4097
 56 |     
 57 | 
 58 | def get_encoder(
 59 |     model_name: str,
 60 | ) -> tiktoken.Encoding:
 61 |     """
 62 |     Returns the correct encoder for the model name.
 63 |     """
 64 |     if "gpt" in model_name and "2" not in model_name:
 65 | 
 66 |         # Likely GPT4 or GPT3.5
 67 |         return tiktoken.get_encoding("cl100k_base")
 68 |     encoder = "gpt2"
 69 |     if model_name in {"text-davinci-003", "text-davinci-002"}:
 70 |         encoder = "p50k_base"
 71 |     if model_name.startswith("code"):
 72 |         encoder = "p50k_base"
 73 |     
 74 |     return tiktoken.get_encoding(encoder)
 75 | 
 76 | @functools.lru_cache(maxsize = 2048)
 77 | def get_token_count(
 78 |     text: str,
 79 |     model_name: str,
 80 | ) -> int:
 81 |     """
 82 |     Returns the number of tokens in the text.
 83 |     """
 84 |     return len(get_encoder(model_name).encode(text))
 85 | 
 86 | 
 87 | def get_max_tokens(
 88 |     text: Union[str, List[str]],
 89 |     model_name: str,
 90 |     max_tokens: Optional[int] = None,
 91 |     padding_token_count: Optional[int] = 16 # tokens added to make sure we do not go over the limit
 92 | ):
 93 |     """
 94 |     Returns the maximum number of tokens that can be generated for a model.
 95 |     """
 96 |     max_model_tokens = modelname_to_contextsize(model_name) - padding_token_count
 97 |     if isinstance(text, list):
 98 |         all_text_tokens = [get_token_count(t, model_name) for t in text]
 99 |         text_tokens = max(all_text_tokens)
100 |     else:
101 |         text_tokens = get_token_count(text, model_name)
102 |     max_input_tokens = max_model_tokens - text_tokens
103 |     if max_tokens is None:
104 |         return max_input_tokens
105 |     return min(max_input_tokens, max_tokens)
106 |     # return modelname_to_contextsize(model_name) - get_token_count(text, model_name)
107 | 
108 | 
109 | def get_chat_tokens_count(
110 |     messages: List[Union[Dict[str, str], 'ChatMessage']],
111 |     model_name: str,
112 |     reply_padding_token_count: Optional[int] = 3,
113 |     message_padding_token_count: Optional[int] = 4,
114 |     **kwargs
115 | ) -> int:
116 |     """
117 |     Returns the number of tokens in the chat.
118 |     """
119 |     num_tokens = 0
120 |     for message in messages:
121 |         if message.get('name'):
122 |             num_tokens -= 1
123 |         num_tokens += message_padding_token_count + get_token_count(message.get('content', ''), model_name)
124 | 
125 |     num_tokens += reply_padding_token_count  # every reply is primed with <|start|>assistant<|message|>
126 |     return num_tokens
127 | 
128 | def get_max_chat_tokens(
129 |     messages: List[Union[Dict[str, str], 'ChatMessage']],
130 |     model_name: str,
131 |     max_tokens: Optional[int] = None,
132 |     reply_padding_token_count: Optional[int] = 3,
133 |     message_padding_token_count: Optional[int] = 4,
134 |     padding_token_count: Optional[int] = 16 # tokens added to make sure we do not go over the li
135 | ):
136 |     """
137 |     Returns the maximum number of tokens that can be generated for a model.
138 |     """
139 | 
140 |     num_tokens = 0
141 |     for message in messages:
142 |         if message.get('name'):
143 |             num_tokens -= 1
144 |         num_tokens += message_padding_token_count + get_token_count(message.get('content', ''), model_name)
145 | 
146 |     num_tokens += reply_padding_token_count  # every reply is primed with <|start|>assistant<|message|>
147 |     max_model_tokens = modelname_to_contextsize(model_name) - padding_token_count
148 |     max_input_tokens = max_model_tokens - num_tokens
149 |     if max_tokens is None:
150 |         return max_input_tokens
151 |     return min(max_input_tokens, max_tokens)
152 | 
153 | 
154 | def fast_tokenize(text: Any) -> int:
155 |     """
156 |     Do a very fast tokenization of the text
157 |     by estimating the number of tokens based on the
158 |     number of characters in the string.
159 | 
160 |     1 token ~= 4 characters
161 |     """
162 |     return len(str(text)) // 4


--------------------------------------------------------------------------------
/async_openai/version.py:
--------------------------------------------------------------------------------
1 | VERSION = '0.0.53'


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | from setuptools import setup, find_packages
 4 | 
 5 | if sys.version_info.major != 3:
 6 |     raise RuntimeError("This package requires Python 3+")
 7 | 
 8 | pkg_name = 'async_openai'
 9 | gitrepo = 'GrowthEngineAI/async-openai'
10 | 
11 | root = Path(__file__).parent
12 | version = root.joinpath('async_openai/version.py').read_text().split('VERSION = ', 1)[-1].strip().replace('-', '').replace("'", '')
13 | 
14 | requirements = [
15 |     'aiohttpx >= 0.0.12',
16 |     # 'file-io',
17 |     'backoff',
18 |     'tiktoken',
19 |     'lazyops >= 0.2.76', # Pydantic Support
20 |     'pydantic',
21 |     'jinja2',
22 |     'pyyaml',
23 |     # 'pydantic-settings', # remove to allow for v1/v2 support
24 | ]
25 | 
26 | if sys.version_info.minor < 8:
27 |     requirements.append('typing_extensions')
28 | 
29 | extras = {
30 |     'cache': ['kvdb'], # Adds caching support
31 |     'utils': ['numpy', 'scipy'] # Adds embedding utility support
32 | }
33 | 
34 | args = {
35 |     'packages': find_packages(include = [f'{pkg_name}', f'{pkg_name}.*',]),
36 |     'install_requires': requirements,
37 |     'include_package_data': True,
38 |     'long_description': root.joinpath('README.md').read_text(encoding='utf-8'),
39 |     'entry_points': {
40 |         "console_scripts": []
41 |     },
42 |     'extras_require': extras,
43 | }
44 | 
45 | setup(
46 |     name = pkg_name,
47 |     version = version,
48 |     url=f'https://github.com/{gitrepo}',
49 |     license='MIT Style',
50 |     description='Unofficial Async Python client library for the OpenAI API',
51 |     author='Tri Songz',
52 |     author_email='ts@growthengineai.com',
53 |     long_description_content_type="text/markdown",
54 |     classifiers=[
55 |         'Intended Audience :: Developers',
56 |         'License :: OSI Approved :: MIT License',
57 |         'Programming Language :: Python :: 3.7',
58 |         'Topic :: Software Development :: Libraries',
59 |     ],
60 |     **args
61 | )


--------------------------------------------------------------------------------
/tests/chat.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from client import OpenAI
 3 | from async_openai.utils import logger
 4 | 
 5 | async def run_test():
 6 | 
 7 |     model = "gpt-3.5-turbo-16k"
 8 | 
 9 |     result = await OpenAI.chat.async_create(
10 |         model = model,
11 |         messages = [
12 |             {"role": "user", "content": "Translate the following English text to French: “Multiple models, each with different capabilities and price points. Prices are per 1,000 tokens. You can think of tokens as pieces of words, where 1,000 tokens is about 750 words. This paragraph is 35 tokens”"}
13 |         ],
14 |     )
15 |     logger.info(f'Result Model: {result}')
16 |     logger.info(f'Result Type: {type(result)}')
17 | 
18 |     logger.info(f'Result Text: {result.text}')
19 |     logger.info(f'Result Chat Message: {result.messages}')
20 |     
21 |     logger.info(f'Result Usage: {result.usage}')
22 |     logger.info(f'Result Consumption: {result.consumption}')
23 |     
24 |     
25 | 
26 |     result = OpenAI.chat.create(
27 |         messages = [
28 |             {"role": "user", "content": "Translate the following English text to French: “Multiple models, each with different capabilities and price points. Prices are per 1,000 tokens. You can think of tokens as pieces of words, where 1,000 tokens is about 750 words. This paragraph is 35 tokens”"}
29 |         ],
30 |     )
31 | 
32 |     logger.info(f'Result Model: {result}')
33 |     logger.info(f'Result Type: {type(result)}')
34 | 
35 |     logger.info(f'Result Text: {result.text}')
36 |     logger.info(f'Result Chat Message: {result.messages}')
37 |     
38 |     logger.info(f'Result Usage: {result.usage}')
39 |     
40 | 
41 | 
42 | asyncio.run(run_test())    
43 | 
44 | 


--------------------------------------------------------------------------------
/tests/chat_functions.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from enum import Enum
 3 | from client_rotate import OpenAI
 4 | from async_openai.utils import logger
 5 | from pydantic import BaseModel, Field
 6 | 
 7 | 
 8 | class Unit(str, Enum):
 9 |     celsius = "celsius"
10 |     fahrenheit = "fahrenheit"
11 | 
12 | class Weather(BaseModel):
13 |     location: str = Field(..., description="The city and state, e.g. San Francisco, CA.")
14 |     unit: Unit = Field(Unit.fahrenheit)
15 | 
16 | functions = [ 
17 |   {
18 |     "name": "get_current_weather",
19 |     "description": "Get the current weather in a given location",
20 |     "parameters": Weather,
21 |   }
22 | ]
23 | 
24 | async def run_test():
25 | 
26 |     model = "gpt-3.5-turbo-instruct"
27 | 
28 |     result = await OpenAI.chat.async_create(
29 |         model = model,
30 |         messages = [
31 |             {"role": "user", "content": "What's the weather like in Boston today?"}
32 |         ],
33 |         functions = functions,
34 |     )
35 |     logger.info(f'Result Model: {result}')
36 |     logger.info(f'Result Type: {type(result)}')
37 | 
38 |     logger.info(f'Result Text: {result.text}')
39 |     logger.info(f'Result Chat Message: {result.messages}')
40 |     logger.info(f'Result Chat Function: {result.function_results}')
41 |     
42 |     logger.info(f'Result Usage: {result.usage}')
43 |     logger.info(f'Result Consumption: {result.consumption}')
44 |     
45 |     
46 | 
47 |     result = OpenAI.chat.create(
48 |         model = model,
49 |         messages = [
50 |             {"role": "user", "content": "What's the weather like in Boston today?"}
51 |         ],
52 |         functions = functions,
53 |     )
54 | 
55 |     logger.info(f'Result Model: {result}')
56 |     logger.info(f'Result Type: {type(result)}')
57 | 
58 |     logger.info(f'Result Text: {result.text}')
59 |     logger.info(f'Result Chat Message: {result.messages}')
60 |     logger.info(f'Result Chat Function: {result.function_results}')
61 |     
62 |     logger.info(f'Result Usage: {result.usage}')
63 |     
64 | 
65 | logger.info(functions)
66 | logger.info(Weather.schema_json(indent=2))
67 | # asyncio.run(run_test())    
68 | 
69 | 


--------------------------------------------------------------------------------
/tests/client.py:
--------------------------------------------------------------------------------
 1 | from async_openai import OpenAI
 2 | 
 3 | org_id = 'org-...'
 4 | api_key = 'sk-...'
 5 | 
 6 | OpenAI.configure(
 7 |     api_key = api_key,
 8 |     organization = org_id,
 9 |     debug_enabled = True,
10 | )


--------------------------------------------------------------------------------
/tests/client_rotate.py:
--------------------------------------------------------------------------------
 1 | from async_openai import OpenAI
 2 | 
 3 | org_id = 'org-...'
 4 | api_key = 'sk-...'
 5 | 
 6 | azure_api_base = "https://....openai.azure.com/"
 7 | 
 8 | # azure_api_version = "2023-03-15-preview"
 9 | azure_api_version = "2023-07-01-preview"
10 | azure_api_key = "...."
11 | 
12 | OpenAI.configure(
13 |     # OpenAI Configuration
14 |     api_key = api_key,
15 |     organization = org_id,
16 |     debug_enabled = True,
17 | 
18 |     # Azure Configuration
19 |     azure_api_base = azure_api_base,
20 |     azure_api_version = azure_api_version,
21 |     azure_api_key = azure_api_key,
22 |     enable_rotating_clients = True,
23 |     prioritize = "azure",
24 | )
25 | 
26 | 
27 | print(OpenAI.settings.azure.dict())
28 | OpenAI.get_current_client_info(verbose = True)
29 | OpenAI.rotate_client(verbose = True)


--------------------------------------------------------------------------------
/tests/completion.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from client import OpenAI
 3 | from async_openai.utils import logger
 4 | 
 5 | async def run_test():
 6 |     result = await OpenAI.completions.async_create(
 7 |         prompt = 'say this is a test',
 8 |         max_tokens = 4,
 9 |         stream = False
10 |     )
11 |     logger.info(f'Result Model: {result}')
12 |     logger.info(f'Result Type: {type(result)}')
13 | 
14 |     logger.info(f'Result Text: {result.text}')
15 |     logger.info(f'Result Usage: {result.usage}')
16 |     logger.info(f'Result Consumption: {result.consumption}')
17 |     
18 |     
19 | 
20 |     result = OpenAI.completions.create(
21 |         prompt = 'say this is a test',
22 |         max_tokens = 4,
23 |         stream = True
24 |     )
25 |     
26 | 
27 |     logger.info(f'Result Model: {result}')
28 |     logger.info(f'Result Type: {type(result)}')
29 | 
30 |     logger.info(f'Result Text: {result.text}')
31 |     logger.info(f'Result Usage: {result.usage}')
32 |     
33 | 
34 | 
35 | asyncio.run(run_test())    
36 | 
37 | 


--------------------------------------------------------------------------------
/tests/external_provider.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | # os.environ['TOGETHER_API_KEY'] = 'test123'
 4 | os.environ['TOGETHER_API_KEYS'] = '[test1253, test4565]'
 5 | 
 6 | from async_openai.utils.external_config import ExternalProviderSettings
 7 | 
 8 | def test_external_provider():
 9 |     s = ExternalProviderSettings.from_preset('together')
10 |     print(s)
11 | 
12 | test_external_provider()


--------------------------------------------------------------------------------