├── setup.py ├── requirements.txt ├── pyproject.toml ├── LICENSE ├── mock_test.py ├── real_test.py ├── .gitignore ├── openai_multi_client └── __init__.py └── README.md /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | aioprocessing 3 | tenacity 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "setuptools-scm"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "openai_multi_client" 7 | authors = [ 8 | { name = "Ziyang Hu", email = "hu.ziyang@cantab.net" }, 9 | ] 10 | description = "A parallel client for OpenAI API (and more)" 11 | readme = "README.md" 12 | requires-python = ">=3.7" 13 | keywords = ["openai", "client"] 14 | license = { text = "MIT" } 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | ] 18 | dependencies = [ 19 | "openai", 20 | "tenacity", 21 | "aioprocessing", 22 | ] 23 | version = "0.1.1" 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Ziyang Hu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /mock_test.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from openai_multi_client import OpenAIMultiClient, Payload, OpenAIMultiOrderedClient 4 | 5 | 6 | def test(ordered): 7 | async def mock(payload: Payload): 8 | import random 9 | 10 | rand_wait_time = random.random() 11 | await asyncio.sleep(rand_wait_time) 12 | rand_fail = random.random() 13 | if rand_fail < 0.3: 14 | raise Exception("Mocked exception") 15 | return {"response": f"mocked {payload.metadata['id']}"} 16 | 17 | if ordered: 18 | api = OpenAIMultiOrderedClient(custom_api=mock, max_retries=3, retry_multiplier=2) 19 | else: 20 | api = OpenAIMultiClient(custom_api=mock, max_retries=3, retry_multiplier=2) 21 | 22 | def put_data(): 23 | for pid in range(100): 24 | pid = pid + 1 25 | print(f"Requesting {pid}") 26 | api.request({"prompt": f"This is test {pid}"}, metadata={'id': pid}, 27 | endpoint="completions") 28 | 29 | api.run_request_function(put_data) 30 | 31 | print('*' * 20) 32 | i = 0 33 | failed = 0 34 | for response in api: 35 | i += 1 36 | if response.failed: 37 | failed += 1 38 | print(f"Failed {response.metadata['id']}: {i}/100") 39 | else: 40 | print(f"Got response {response.metadata['id']}: {i}/100") 41 | 42 | print('*' * 20) 43 | print(f"Total failed: {failed}/100") 44 | print('*' * 20) 45 | 46 | 47 | if __name__ == '__main__': 48 | test(ordered=False) 49 | test(ordered=True) 50 | -------------------------------------------------------------------------------- /real_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from openai_multi_client import OpenAIMultiClient, Payload 3 | 4 | 5 | def test(): 6 | api = OpenAIMultiClient(endpoint="chats", data_template={"model": "gpt-3.5-turbo"}) 7 | 8 | def make_requests(): 9 | for pid in range(9): 10 | pid = pid + 1 11 | print(f"Requesting {pid}") 12 | api.request(data={ 13 | "messages": [{"role": "user", "content": f"Can you tell me what is {pid} * {pid}?"}] 14 | }, metadata={'id': pid}) 15 | 16 | api.run_request_function(make_requests) 17 | 18 | print('*' * 20) 19 | i = 0 20 | failed = 0 21 | for result in api: 22 | i += 1 23 | if result.failed: 24 | failed += 1 25 | print(f"Failed {result.metadata['id']}") 26 | else: 27 | print(f"Got response for {result.metadata['id']}:", result.response['choices'][0]['message']['content']) 28 | 29 | print('*' * 20) 30 | print(f"Total failed: {failed}") 31 | print('*' * 20) 32 | 33 | 34 | def on_success(result: Payload): 35 | pid = result.metadata['id'] 36 | if result.failed: 37 | print(f"Failed {pid}") 38 | else: 39 | print(f"Got response for {pid}:", 40 | result.response['choices'][0]['message']['content']) 41 | 42 | 43 | def test_callback(): 44 | api = OpenAIMultiClient(endpoint="chats", data_template={"model": "gpt-3.5-turbo"}) 45 | 46 | def make_requests(): 47 | for pid in range(9): 48 | pid = pid + 1 49 | 50 | print(f"Requesting {pid}") 51 | api.request(data={ 52 | "messages": [{"role": "user", "content": f"Can you tell me what is {pid} * {pid}?"}] 53 | }, callback=on_success, metadata={'id': pid}) 54 | 55 | api.run_request_function(make_requests) 56 | api.pull_all() 57 | 58 | 59 | if __name__ == '__main__': 60 | if 'OPENAI_API_KEY' not in os.environ: 61 | raise Exception("Please set the OPENAI_API_KEY environment variable to run this test") 62 | 63 | test_callback() 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Python template 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # poetry 99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 100 | # This is especially recommended for binary packages to ensure reproducibility, and is more 101 | # commonly ignored for libraries. 102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 103 | #poetry.lock 104 | 105 | # pdm 106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 107 | #pdm.lock 108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 109 | # in version control. 110 | # https://pdm.fming.dev/#use-with-ide 111 | .pdm.toml 112 | 113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 114 | __pypackages__/ 115 | 116 | # Celery stuff 117 | celerybeat-schedule 118 | celerybeat.pid 119 | 120 | # SageMath parsed files 121 | *.sage.py 122 | 123 | # Environments 124 | .env 125 | .venv 126 | env/ 127 | venv/ 128 | ENV/ 129 | env.bak/ 130 | venv.bak/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site 141 | 142 | # mypy 143 | .mypy_cache/ 144 | .dmypy.json 145 | dmypy.json 146 | 147 | # Pyre type checker 148 | .pyre/ 149 | 150 | # pytype static type analyzer 151 | .pytype/ 152 | 153 | # Cython debug symbols 154 | cython_debug/ 155 | 156 | # PyCharm 157 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 158 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 159 | # and can be added to the global gitignore or merged into this file. For a more nuclear 160 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 161 | .idea/ -------------------------------------------------------------------------------- /openai_multi_client/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import asyncio 3 | from dataclasses import dataclass 4 | from threading import Thread 5 | from typing import Any, Optional 6 | 7 | from aioprocessing import AioJoinableQueue, AioQueue 8 | from tenacity import wait_random_exponential, stop_after_attempt, AsyncRetrying, RetryError 9 | import openai 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | @dataclass 15 | class Payload: 16 | endpoint: str 17 | data: dict 18 | metadata: Optional[dict] 19 | max_retries: int 20 | retry_multiplier: float 21 | retry_max: float 22 | attempt: int = 0 23 | failed: bool = False 24 | response: Any = None 25 | callback: Any = None 26 | 27 | def call_callback(self): 28 | if self.callback: 29 | self.callback(self) 30 | 31 | 32 | class OpenAIMultiClient: 33 | def __init__(self, 34 | concurrency: int = 10, 35 | max_retries: int = 10, 36 | wait_interval: float = 0, 37 | retry_multiplier: float = 1, 38 | retry_max: float = 60, 39 | endpoint: Optional[str] = None, 40 | data_template: Optional[dict] = None, 41 | metadata_template: Optional[dict] = None, 42 | custom_api=None): 43 | self._endpoint = endpoint 44 | self._wait_interval = wait_interval 45 | self._data_template = data_template or {} 46 | self._metadata_template = metadata_template or {} 47 | self._max_retries = max_retries 48 | self._retry_multiplier = retry_multiplier 49 | self._retry_max = retry_max 50 | self._concurrency = concurrency 51 | self._loop = asyncio.new_event_loop() 52 | self._in_queue = AioJoinableQueue(maxsize=concurrency) 53 | self._out_queue = AioQueue(maxsize=concurrency) 54 | self._event_loop_thread = Thread(target=self._run_event_loop) 55 | self._event_loop_thread.start() 56 | self._mock_api = custom_api 57 | for i in range(concurrency): 58 | asyncio.run_coroutine_threadsafe(self._worker(i), self._loop) 59 | 60 | def run_request_function(self, input_function, *args, stop_at_end=True, **kwargs): 61 | if stop_at_end: 62 | def f(*args, **kwargs): 63 | input_function(*args, **kwargs) 64 | self.close() 65 | else: 66 | f = input_function 67 | input_thread = Thread(target=f, args=args, kwargs=kwargs) 68 | input_thread.start() 69 | 70 | def _run_event_loop(self): 71 | asyncio.set_event_loop(self._loop) 72 | self._loop.run_forever() 73 | 74 | async def _process_payload(self, payload: Payload) -> Payload: 75 | logger.debug(f"Processing {payload}") 76 | if self._mock_api: 77 | payload.response = await self._mock_api(payload) 78 | elif payload.endpoint == "completions": 79 | payload.response = await openai.Completion.acreate(**payload.data) 80 | elif payload.endpoint == "chat.completions" or payload.endpoint == "chats": 81 | payload.response = await openai.ChatCompletion.acreate(**payload.data) 82 | elif payload.endpoint == "embeddings": 83 | payload.response = await openai.Embedding.acreate(**payload.data) 84 | elif payload.endpoint == "edits": 85 | payload.response = await openai.Edit.acreate(**payload.data) 86 | elif payload.endpoint == "images": 87 | payload.response = await openai.Image.acreate(**payload.data) 88 | elif payload.endpoint == "fine-tunes": 89 | payload.response = await openai.FineTune.acreate(**payload.data) 90 | else: 91 | raise ValueError(f"Unknown endpoint {payload.endpoint}") 92 | logger.debug(f"Processed {payload}") 93 | return payload 94 | 95 | async def _worker(self, i): 96 | while True: 97 | payload = await self._in_queue.coro_get() 98 | 99 | if payload is None: 100 | logger.debug(f"Exiting worker {i}") 101 | self._in_queue.task_done() 102 | break 103 | 104 | try: 105 | async for attempt in AsyncRetrying( 106 | wait=wait_random_exponential(multiplier=payload.retry_multiplier, max=payload.retry_max), 107 | stop=stop_after_attempt(payload.max_retries)): 108 | with attempt: 109 | try: 110 | payload.attempt = attempt.retry_state.attempt_number 111 | payload = await self._process_payload(payload) 112 | await self._out_queue.coro_put(payload) 113 | self._in_queue.task_done() 114 | except Exception: 115 | logger.exception(f"Error processing {payload}") 116 | raise 117 | except RetryError: 118 | payload.failed = True 119 | logger.error(f"Failed to process {payload}") 120 | await self._out_queue.coro_put(payload) 121 | self._in_queue.task_done() 122 | await asyncio.sleep(self._wait_interval) 123 | 124 | def close(self): 125 | try: 126 | for i in range(self._concurrency): 127 | self._in_queue.put(None) 128 | self._in_queue.join() 129 | self._out_queue.put(None) 130 | self._loop.call_soon_threadsafe(self._loop.stop) 131 | self._event_loop_thread.join() 132 | except Exception as e: 133 | logger.error(f"Error closing: {e}") 134 | 135 | def __iter__(self): 136 | return self 137 | 138 | def __next__(self): 139 | out = self._out_queue.get() 140 | if out is None: 141 | raise StopIteration 142 | out.call_callback() 143 | return out 144 | 145 | def request(self, 146 | data: dict, 147 | endpoint: Optional[str] = None, 148 | metadata: Optional[dict] = None, 149 | callback: Any = None, 150 | max_retries: Optional[int] = None, 151 | retry_multiplier: Optional[float] = None, 152 | retry_max: Optional[float] = None): 153 | payload = Payload( 154 | endpoint=endpoint or self._endpoint, 155 | data={**self._data_template, **data}, 156 | metadata={**self._metadata_template, **(metadata or {})}, 157 | callback=callback, 158 | max_retries=max_retries or self._max_retries, 159 | retry_multiplier=retry_multiplier or self._retry_multiplier, 160 | retry_max=retry_max or self._retry_max 161 | ) 162 | self._in_queue.put(payload) 163 | 164 | def pull_all(self): 165 | for _ in self: 166 | pass 167 | 168 | 169 | class OrderedPayload(Payload): 170 | put_counter: int 171 | 172 | def __init__(self, *args, put_counter, **kwargs): 173 | super().__init__(*args, **kwargs) 174 | self.put_counter = put_counter 175 | 176 | 177 | class OpenAIMultiOrderedClient(OpenAIMultiClient): 178 | def __init__(self, *args, **kwargs): 179 | super().__init__(*args, **kwargs) 180 | self._put_counter = 0 181 | self._get_counter = 0 182 | self._get_cache = {} 183 | self._stopped = False 184 | 185 | def __iter__(self): 186 | return self 187 | 188 | def __next__(self): 189 | while True: 190 | if self._stopped: 191 | out = None 192 | else: 193 | out = self._out_queue.get() 194 | if out is None: 195 | self._stopped = True 196 | if self._get_counter == self._put_counter: 197 | raise StopIteration 198 | else: 199 | out = self._get_cache[self._get_counter] 200 | del self._get_cache[self._get_counter] 201 | self._get_counter += 1 202 | out.call_callback() 203 | return out 204 | 205 | data_counter = out.put_counter 206 | if data_counter == self._get_counter: 207 | self._get_counter += 1 208 | out.call_callback() 209 | return out 210 | self._get_cache[data_counter] = out 211 | if self._get_counter in self._get_cache: 212 | out = self._get_cache[self._get_counter] 213 | del self._get_cache[self._get_counter] 214 | self._get_counter += 1 215 | out.call_callback() 216 | return out 217 | 218 | def request(self, 219 | data: dict, 220 | endpoint: Optional[str] = None, 221 | metadata: Optional[dict] = None, 222 | callback: Any = None, 223 | max_retries: Optional[int] = None, 224 | retry_multiplier: Optional[float] = None, 225 | retry_max: Optional[float] = None): 226 | payload = OrderedPayload( 227 | endpoint=endpoint or self._endpoint, 228 | data={**self._data_template, **data}, 229 | metadata={**self._metadata_template, **(metadata or {})}, 230 | callback=callback, 231 | max_retries=max_retries or self._max_retries, 232 | retry_multiplier=retry_multiplier or self._retry_multiplier, 233 | retry_max=retry_max or self._retry_max, 234 | put_counter=self._put_counter 235 | ) 236 | self._put_counter += 1 237 | self._in_queue.put(payload) 238 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # openai-multi-client 2 | 3 | [![pypi](https://img.shields.io/pypi/v/openai_multi_client)](https://pypi.org/project/openai_multi_client/) 4 | 5 | `openai-multi-client` is a Python library that allows you to easily make multiple concurrent requests to the OpenAI API, 6 | either in order or unordered, with built-in retries for failed requests. It keeps your application code synchronous and 7 | easy to understand, without you having to reason about concurrency and deadlocks. This library is particularly useful 8 | when working with the OpenAI API for tasks that require a large number of requests. 9 | 10 | ### Table of Contents 11 | 12 | 1. [Motivation](#Motivation) 13 | 2. [Features](#Features) 14 | 3. [Installation](#Installation) 15 | 4. [Usage Example](#Usage-Example) 16 | 5. [API Reference](#API-Reference) 17 | 6. [Extended Example Adapted from Real-World Use](#Extended-Example-Adapted-from-Real-World-Use) 18 | 7. [Contributing](#Contributing) 19 | 8. [License](#License) 20 | 9. [Authorship Disclosure](#Authorship-Disclosure) 21 | 22 | ## Motivation 23 | 24 | Imagine you're sitting at your desk, eager to run a state-of-the-art language model analysis on your extensive database 25 | of collected articles. You're excited about discovering all the hidden gems and insights your data has to offer. 26 | However, there's one tiny problem – if you send requests to the API in a serial manner, it would take a lifetime to 27 | complete. 28 | 29 | This is where the OpenAI Multi-Client comes in. This library was created to help you fully utilize OpenAI's API without 30 | having to wait too long for results. It's designed to manage concurrent API requests so you can focus on analyzing the 31 | insights provided by the language model. 32 | 33 | No longer do you have to wait for one request to finish before starting the next. With OpenAI Multi-Client, you can now 34 | send multiple requests simultaneously, while also ensuring that any failed requests will be retried automatically. 35 | Furthermore, the library can be configured to maintain the order of requests and responses. 36 | 37 | So, the next time you find yourself with a collection of articles waiting to be analyzed, remember that OpenAI 38 | Multi-Client is here to help. Happy analyzing! 39 | 40 | ## Features 41 | 42 | - Concurrently send multiple requests to the OpenAI API 43 | - Support for ordered and unordered request handling 44 | - Built-in retries for failed requests with customizable retry settings 45 | - Customizable API client for easy testing and mocking 46 | - Simple, easy-to-use interface 47 | 48 | ## Installation 49 | 50 | To install `openai-multi-client`, run: 51 | 52 | ```bash 53 | pip install openai-multi-client 54 | ``` 55 | 56 | ## Usage Example 57 | 58 | Here is an example of how to use the `openai-multi-client` library. 59 | 60 | ```python 61 | from openai_multi_client import OpenAIMultiClient 62 | 63 | # Remember to set the OPENAI_API_KEY environment variable to your API key 64 | api = OpenAIMultiClient(endpoint="chats", data_template={"model": "gpt-3.5-turbo"}) 65 | 66 | 67 | def make_requests(): 68 | for num in range(1, 10): 69 | api.request(data={ 70 | "messages": [{ 71 | "role": "user", 72 | "content": f"Can you tell me what is {num} * {num}?" 73 | }] 74 | }, metadata={'num': num}) 75 | 76 | 77 | api.run_request_function(make_requests) 78 | 79 | for result in api: 80 | num = result.metadata['num'] 81 | response = result.response['choices'][0]['message']['content'] 82 | print(f"{num} * {num}:", response) 83 | ``` 84 | 85 | If you want the answers to be in the same order as the requests are sent, import and use `OpenAIMultiOrderedClient` 86 | instead of `OpenAIMultiClient`. 87 | 88 | If you prefer, you can use callback style programming instead of the iterating over the results: 89 | 90 | ```python 91 | from openai_multi_client import OpenAIMultiClient, Payload 92 | 93 | # Remember to set the OPENAI_API_KEY environment variable to your API key 94 | api = OpenAIMultiClient(endpoint="chats", data_template={"model": "gpt-3.5-turbo"}) 95 | 96 | 97 | def on_result(result: Payload): 98 | num = result.metadata['num'] 99 | response = result.response['choices'][0]['message']['content'] 100 | print(f"{num} * {num}:", response) 101 | 102 | 103 | def make_requests(): 104 | for num in range(1, 10): 105 | api.request(data={ 106 | "messages": [{ 107 | "role": "user", 108 | "content": f"Can you tell me what is {num} * {num}?" 109 | }] 110 | }, metadata={'num': num}, callback=on_result) 111 | 112 | 113 | api.run_request_function(make_requests) 114 | api.pull_all() 115 | ``` 116 | 117 | You can find more complete examples [here](./real_test.py) and [here](./mock_test.py). 118 | 119 | ## API Reference 120 | 121 | In the `OpenAIMultiClient` and `OpenAIMultiOrderedClient` classes, the `endpoint` and `data` parameters correspond to 122 | the endpoints and parameters expected by the official OpenAI API clients. 123 | 124 | ## Configuring API Keys and Endpoints 125 | 126 | Setting up OpenAI Multi-Client is straightforward. Since it utilizes the official OpenAI client under the hood, all you 127 | need to do is import the `openai` library and configure it as you usually would. 128 | 129 | To set up your API key, simply import the `openai` module and configure the API key using the following code: 130 | 131 | ```python 132 | import openai 133 | 134 | openai.api_key = "your_api_key_here" 135 | ``` 136 | 137 | Setting the environmental variable `OPENAI_API_KEY` also works as expected. 138 | 139 | You can also configure the API endpoint if needed: 140 | 141 | ```python 142 | openai.api_base = "azure_openai_api_base_here" 143 | ``` 144 | 145 | Once you've configured the `openai` library with your API key and endpoint, OpenAI Multi-Client will automatically use 146 | these settings when sending requests to the API. This makes it easy to integrate OpenAI Multi-Client into your existing 147 | projects without having to worry about separate configurations. 148 | 149 | ### Endpoints 150 | 151 | The `endpoint` parameter in the `request()` method or during the initialization of the classes specifies which OpenAI 152 | API endpoint to use for the requests. The available endpoints are: 153 | 154 | - `"completions"`: For text completion requests using the `Completion` endpoint. 155 | - `"chats"` or `"chat.completions"`: For chat completion requests using the `ChatCompletion` endpoint. 156 | - `"embeddings"`: For embedding requests using the `Embedding` endpoint. 157 | - `"edits"`: For edit requests using the `Edit` endpoint. 158 | - `"images"`: For image requests using the `Image` endpoint. 159 | - `"fine-tunes"`: For fine-tuning requests using the `FineTune` endpoint. 160 | 161 | ### Data 162 | 163 | The `data` parameter in the `request()` method specifies the request data sent to the OpenAI API. The data should be a 164 | dictionary containing the required and optional parameters for the specified endpoint. For example: 165 | 166 | - For the `"completions"` endpoint, the `data` dictionary may include 167 | the `model`, `prompt`, `temperature`, `max_tokens`, and other parameters expected by the `Completion` endpoint. 168 | - For the `"chats"` endpoint, the `data` dictionary may include the `model`, `messages`, `temperature`, `max_tokens`, 169 | and other parameters expected by the `ChatCompletion` endpoint. 170 | 171 | When using the `OpenAIMultiClient` or `OpenAIMultiOrderedClient`, make sure to provide the appropriate `endpoint` and 172 | the corresponding `data` as required by the official OpenAI API clients. 173 | 174 | For more details, see the [official documentation](https://github.com/openai/openai-python). 175 | 176 | ### OpenAIMultiClient 177 | 178 | `OpenAIMultiClient` is the primary class for making unordered concurrent requests to the OpenAI API. 179 | 180 | #### Initialization 181 | 182 | You initialize multiple instances of the clients and each will behave independently, with their own queue of requests 183 | and responses. The `OpenAIMultiClient` class can be initialized with the following parameters: 184 | 185 | ```python 186 | OpenAIMultiClient( 187 | concurrency=10, 188 | max_retries=10, 189 | wait_interval=0, 190 | retry_multiplier=1, 191 | retry_max=60, 192 | endpoint=None, 193 | data_template=None, 194 | metadata_template=None, 195 | custom_api=None) 196 | ``` 197 | 198 | - `concurrency`: (Optional) The number of concurrent requests. Default is 10. 199 | - `max_retries`: (Optional) The maximum number of retries for failed requests. Default is 10. 200 | - `wait_interval`: (Optional) The waiting time between retries. Default is 0. 201 | - `retry_multiplier`: (Optional) The multiplier for the waiting time between retries. Default is 1. 202 | - `retry_max`: (Optional) The maximum waiting time between retries. Default is 60. 203 | - `endpoint`: (Optional) The OpenAI API endpoint to be used, e.g., `"chats"` or `"completions"`. 204 | - `data_template`: (Optional) A template for the data sent with each request. The request data will be merged with this 205 | template. 206 | - `metadata_template`: (Optional) A template for the metadata associated with each request. The request metadata will be 207 | merged with this template. 208 | - `custom_api`: (Optional) A custom API function that can be used for testing or mocking the OpenAI API. You can also 209 | use this to connect to models other than LLMs. 210 | 211 | You should set `concurrency` to a sensible value based on your API rate limit. For paid customers using `gpt-3.5-turbo`, 212 | you have 3,500 requests per minute. Let's be generous and assume a request completes in one second. To avoid hitting the 213 | rate limit while maintaining a high throughput, you could set `concurrency` to a value like 50 or 100. Since you are not 214 | charged for failed requests and since exponential backoff is in effect, your requests will eventually complete even if 215 | you set `concurrency` to a high value, but as failed requests take up your rate limit, throughput will be lower. 216 | 217 | It is recommended to test your code with a mock API first when you are developing, because using the real API, high 218 | concurrency burns money fast. [This example](./mock_test.py) might be helpful. 219 | 220 | You can check your limits [here](https://platform.openai.com/account/rate-limits). 221 | 222 | #### Methods 223 | 224 | **Important Note**: Calling `request` may block the thread if the input queue is full. It is recommended to put the 225 | requesting logic into a function and call that function using `run_request_function` as done in the example, as it 226 | ensures that the requesting logic runs concurrently without blocking the main thread. The blocking of the input queue is 227 | a **key feature** of this library to ensure that your memory will not be flooded with a petabyte of pending requests 228 | streaming from your database. 229 | 230 | - `request(data, endpoint=None, metadata=None, max_retries=None, retry_multiplier=None, retry_max=None)`: Adds a request 231 | to the queue. 232 | - `data`: The data (as a dict) to be sent with the request. 233 | - `endpoint`: (Optional) The API endpoint to be used for this specific request. 234 | - `metadata`: (Optional) Metadata associated with the request. 235 | - `callback`: (Optional) A callback function that will be called with the payload object. 236 | - `max_retries`: (Optional) The maximum number of retries for failed requests. Default is the value set during 237 | initialization. 238 | - `retry_multiplier`: (Optional) The multiplier for the waiting time between retries. Default is the value set 239 | during initialization. 240 | - `retry_max`: (Optional) The maximum waiting time between retries. Default is the value set during initialization. 241 | 242 | - `run_request_function(input_function, *args, stop_at_end=True, **kwargs)`: Executes the input function in a separate 243 | thread, allowing it to add requests to the queue without blocking. 244 | - `input_function`: A function that adds requests to the queue using the `request()` method. 245 | - `*args`: (Optional) Additional arguments passed to the input function. 246 | - `stop_at_end`: (Optional) Whether to stop the event loop after executing the input function. Default is True. 247 | - `**kwargs`: (Optional) Additional keyword arguments passed to the input function. 248 | 249 | To retrieve results, use the client as an iterator. The following is also available: 250 | 251 | - `pull_all()`: Pulls all responses from the queue and discard them. Useful to get the queue started if you are using 252 | callbacks. 253 | 254 | ### OpenAIMultiOrderedClient 255 | 256 | `OpenAIMultiOrderedClient` is a subclass of `OpenAIMultiClient` for making ordered concurrent requests to the OpenAI 257 | API. The usage is the same as `OpenAIMultiClient`, but the responses will be returned in the order they were added to 258 | the queue. 259 | 260 | ### Logging 261 | 262 | The library uses the standard Python logging module. You can set the logging level 263 | using `logging.basicConfig(level=logging.INFO)` or `logging.basicConfig(level=logging.DEBUG)`. 264 | 265 | ## Extended Example Adapted from Real-World Use 266 | 267 | The scenario in the Motivation section is genuinely real. Although the OpenAI cookbook provides a [recipe](https://github.com/openai/openai-cookbook/blob/main/examples/api_request_parallel_processor.py) for making parallel requests, it relies on files for data persistence and can be quite challenging to adapt and use. We were surprised that a library like this didn't exist already, so we decided to create one ourselves. 268 | 269 | Here's a brief overview of the system. External and internal articles are continuously streamed from scrapers and other sources into a database. For each article, we let ChatGPT answer a few questions about it, and then we store the text and embeddings of the answers back into the database. The embeddings are indexed for nearest-neighbor search. At peak hours, we expect a high volume of articles, making the use of `OpenAIMultiClient` essential. 270 | 271 | We'll be using [CozoDB](https://github.com/cozodb/cozo) to store everything. CozoDB is a transactional hybrid relational-graph-vector database that uses Datalog for queries, which we developed for our work. In the real scenario that this example is based on, article deduplication, full-text search, and recursive, agentic behavior are also integrated using the functionalities provided by the database. However, these details have been omitted in the example for simplicity. 272 | 273 | The setup of the database and schema can be found in the code snippet below, where we create tables for articles and embeddings and index the embeddings for nearest-neighbor search: 274 | 275 | ```python 276 | from pycozo.client import Client 277 | 278 | # We use a global variable for the db for simplicity 279 | db = Client('rocksdb', 'analysis.db') # Using the RocksDB backend for persistence 280 | 281 | # Create the schema 282 | 283 | # The articles 284 | db.run(''' 285 | :create article { 286 | id: Uuid default rand_uuid_v4() # The key 287 | => 288 | title: String, # Title of the article 289 | text: String, # Text of the article 290 | analysis: Json default {} # Analyses will be stored in the JSON object 291 | } 292 | ''') 293 | 294 | # The embeddings are stored separately from the articles 295 | # Not strictly necessary, but cleaner 296 | db.run(''' 297 | :create embedding { 298 | id: Uuid, # The key consists of the article ID and the analysis key 299 | analysis_key: String # The analysis key 300 | => 301 | embedding: # The embedding of the article 302 | } 303 | ''') 304 | 305 | # Put the embeddings into an index for nearest-neighbor search 306 | db.run(''' 307 | ::hnsw create embedding:idx { 308 | dim: 1536, 309 | fields: [embedding], 310 | ef_construction: 200, 311 | m: 50, 312 | } 313 | ''') 314 | ``` 315 | 316 | The streaming service calls a function to insert articles into the database, and the database's ID is auto-generated: 317 | 318 | ```python 319 | def insert(title, text): 320 | db.run(''' 321 | ?[title, text] <- [[$title, $text]] 322 | 323 | :put article {=> title, text} 324 | ''', {'title': title, 'text': text}) 325 | ``` 326 | 327 | In another file, we set up the OpenAI multi-client and define the questions we want to ask. 328 | 329 | ```python 330 | from openai_client import OpenAIMultiClient 331 | 332 | # We could use separate clients for embedding and analysis to achieve more concurrency 333 | # But we'll just use a single client for simplicity 334 | client = OpenAIMultiClient() 335 | 336 | QUESTIONS = { 337 | 'economic': ''' 338 | You are the economics advisor to company X. 339 | Read the article from the user and summarize the potential economic impact for company X implied by the article from your perspective. 340 | Write clearly and concisely, in one paragraph. If there is no economic impact, write "No economic impact." 341 | ''', 342 | 343 | 'political': ''' 344 | You are the political advisor to company X. 345 | Read the article from the user and summarize the potential political impact for company X implied by the article from your perspective. 346 | Write clearly and concisely, in one paragraph. If there is no political impact, write "No political impact." 347 | ''', 348 | 349 | 'technological': ''' 350 | You are the technology advisor to company X. 351 | Read the article from the user and summarize the potential technological impact for company X implied by the article from your perspective. 352 | Write clearly and concisely, in one paragraph. If there is no technological impact, write "No technological impact." 353 | ''' 354 | } 355 | ``` 356 | 357 | The core of the system involves using the client to ask questions and store the results back into the database. When the answers are inserted into the database, we use the client again to get the embeddings and store them in the database as well. We use database callbacks to achieve this: 358 | 359 | 360 | ```python 361 | # Callback function to be called when the database is updated 362 | def insert_callback(op_name, new_rows, old_rows): 363 | # We only handle puts. Logic for handling deletes omitted for simplicity 364 | if op_name == 'Put': 365 | # Make a dict from the old rows replaced by the update 366 | existing = { 367 | id: analysis 368 | for [id, _title, _text, analysis] in old_rows 369 | } 370 | # Iterate over the inserted or updated rows 371 | for id, title, text, analysis in new_rows: 372 | # If the article already exists, we request embeddings for any newly added analyses 373 | if id in existing: 374 | old_analyses = existing[id] 375 | for key, value in analysis.items(): 376 | if key not in old_analyses: 377 | # Make a request for the embedding 378 | # The metadata are used for inserting the results back into the DB 379 | client.request( 380 | endpoint="embeddings", 381 | data={ 382 | 'model': "text-embedding-ada-002", 383 | 'input': f'{key} impact:\n{value}' 384 | }, 385 | metadata={ 386 | 'type': 'embedding', 387 | 'id': id, 388 | 'key': key 389 | }, 390 | callback=on_embedding_result 391 | ) 392 | else: 393 | # Otherwise, we ask for analyses for all questions 394 | context_text = f'{title}\n\n{text}' 395 | 396 | for key, question in QUESTIONS.items(): 397 | # Make a request for the analysis 398 | client.request( 399 | endpoint="chats", 400 | data={ 401 | # You can use GPT-4 if you are rich! 402 | "model": "gpt-3.5-turbo", 403 | "messages": [{ 404 | "role": "system", "content": question 405 | }, { 406 | "role": "user", "content": context_text 407 | }] 408 | 409 | }, 410 | metadata={ 411 | 'type': 'analysis', 412 | 'id': id, 413 | 'key': key 414 | }, 415 | callback=on_analysis_result 416 | ) 417 | 418 | 419 | # Register the callback, this will be called whenever an article is inserted or deleted 420 | # As the database already runs callbacks on a separate thread, 421 | # we don't need to use run_request_function 422 | callback_handle = db.register_callback('article', insert_callback) 423 | ``` 424 | 425 | Callback functions handle the results from the OpenAI client and start the system: 426 | 427 | ```python 428 | def on_embedding_result(result): 429 | if result.failed: 430 | print(f'Failed to get embedding for article {result.metadata["id"]}') 431 | return 432 | 433 | embedding = result.response['data'][0]['embedding'] 434 | db.run(''' 435 | ?[id, analysis_key, embedding] <- [[$id, $analysis_key, $embedding]] 436 | :put embedding {id, analysis_key => embedding} 437 | ''', { 438 | 'id': result.metadata['id'], 439 | 'analysis_key': result.metadata['key'], 440 | 'embedding': embedding 441 | }) 442 | 443 | 444 | def on_analysis_result(result): 445 | if result.failed: 446 | print(f'Failed to get embedding for article {result.metadata["id"]}') 447 | return 448 | 449 | analysis = { 450 | result.metadata['key']: result.response['choices'][0]['message']['content'] 451 | } 452 | 453 | db.run(''' 454 | ?[id, analysis] := *analysis{id, analysis: old}, analysis = old ++ $analysis 455 | :update article {id => analysis} 456 | ''', { 457 | 'id': result.metadata['id'], 458 | 'analysis': analysis 459 | }) 460 | ``` 461 | 462 | Finally, we start the system: 463 | 464 | ```python 465 | client.pull_all() 466 | ``` 467 | 468 | With this system in place, we can easily access the top news we're interested in by running nearest-neighbor vector searches: 469 | 470 | 471 | ```python 472 | for question in ['Red-alert level innovation from competitors', 473 | 'Disruption to our supply chain', 474 | 'Dangers to our overseas personnel in the Middle East']: 475 | # the `embed` function also uses the OpenAI multi-client, but its definition is omitted. 476 | embedding = embed(question) 477 | res = db.run(''' 478 | ?[id, title, text] := ~article:idx{id, title, text | query: $q, k: 3} 479 | ''', {'q': embedding}) 480 | print(res) 481 | ``` 482 | 483 | As you can see from the example questions, it would be challenging to find relevant news articles in the database without the analysis. But with the analysis, we can get the relevant articles very quickly. This is the power of vector indexing. We can also set up more agents to monitor the analysis and notify the CEO when something important comes up, or even have a community of agents using the database to collaborate. Thanks to the OpenAI multi-client, these agents can move at the speed of thought! 484 | 485 | ## Contributing 486 | 487 | Contributions to `openai-multi-client` are welcome! Feel free to submit a pull request or open an issue on GitHub. 488 | 489 | ## License 490 | 491 | `openai-multi-client` is released under the MIT License. See the [LICENSE](LICENSE) file for details. 492 | 493 | ## Authorship Disclosure 494 | 495 | GPT-4 wrote most of this README by analyzing code, incorporating the techniques outlined before. --------------------------------------------------------------------------------