├── setup.py
├── requirements.txt
├── pyproject.toml
├── LICENSE
├── mock_test.py
├── real_test.py
├── .gitignore
├── openai_multi_client
    └── __init__.py
└── README.md


/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | openai
2 | aioprocessing
3 | tenacity
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "setuptools-scm"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "openai_multi_client"
 7 | authors = [
 8 |     { name = "Ziyang Hu", email = "hu.ziyang@cantab.net" },
 9 | ]
10 | description = "A parallel client for OpenAI API (and more)"
11 | readme = "README.md"
12 | requires-python = ">=3.7"
13 | keywords = ["openai", "client"]
14 | license = { text = "MIT" }
15 | classifiers = [
16 |     "Programming Language :: Python :: 3",
17 | ]
18 | dependencies = [
19 |     "openai",
20 |     "tenacity",
21 |     "aioprocessing",
22 | ]
23 | version = "0.1.1"
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Ziyang Hu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/mock_test.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from openai_multi_client import OpenAIMultiClient, Payload, OpenAIMultiOrderedClient
 4 | 
 5 | 
 6 | def test(ordered):
 7 |     async def mock(payload: Payload):
 8 |         import random
 9 | 
10 |         rand_wait_time = random.random()
11 |         await asyncio.sleep(rand_wait_time)
12 |         rand_fail = random.random()
13 |         if rand_fail < 0.3:
14 |             raise Exception("Mocked exception")
15 |         return {"response": f"mocked {payload.metadata['id']}"}
16 | 
17 |     if ordered:
18 |         api = OpenAIMultiOrderedClient(custom_api=mock, max_retries=3, retry_multiplier=2)
19 |     else:
20 |         api = OpenAIMultiClient(custom_api=mock, max_retries=3, retry_multiplier=2)
21 | 
22 |     def put_data():
23 |         for pid in range(100):
24 |             pid = pid + 1
25 |             print(f"Requesting {pid}")
26 |             api.request({"prompt": f"This is test {pid}"}, metadata={'id': pid},
27 |                         endpoint="completions")
28 | 
29 |     api.run_request_function(put_data)
30 | 
31 |     print('*' * 20)
32 |     i = 0
33 |     failed = 0
34 |     for response in api:
35 |         i += 1
36 |         if response.failed:
37 |             failed += 1
38 |             print(f"Failed {response.metadata['id']}: {i}/100")
39 |         else:
40 |             print(f"Got response {response.metadata['id']}: {i}/100")
41 | 
42 |     print('*' * 20)
43 |     print(f"Total failed: {failed}/100")
44 |     print('*' * 20)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     test(ordered=False)
49 |     test(ordered=True)
50 | 


--------------------------------------------------------------------------------
/real_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from openai_multi_client import OpenAIMultiClient, Payload
 3 | 
 4 | 
 5 | def test():
 6 |     api = OpenAIMultiClient(endpoint="chats", data_template={"model": "gpt-3.5-turbo"})
 7 | 
 8 |     def make_requests():
 9 |         for pid in range(9):
10 |             pid = pid + 1
11 |             print(f"Requesting {pid}")
12 |             api.request(data={
13 |                 "messages": [{"role": "user", "content": f"Can you tell me what is {pid} * {pid}?"}]
14 |             }, metadata={'id': pid})
15 | 
16 |     api.run_request_function(make_requests)
17 | 
18 |     print('*' * 20)
19 |     i = 0
20 |     failed = 0
21 |     for result in api:
22 |         i += 1
23 |         if result.failed:
24 |             failed += 1
25 |             print(f"Failed {result.metadata['id']}")
26 |         else:
27 |             print(f"Got response for {result.metadata['id']}:", result.response['choices'][0]['message']['content'])
28 | 
29 |     print('*' * 20)
30 |     print(f"Total failed: {failed}")
31 |     print('*' * 20)
32 | 
33 | 
34 | def on_success(result: Payload):
35 |     pid = result.metadata['id']
36 |     if result.failed:
37 |         print(f"Failed {pid}")
38 |     else:
39 |         print(f"Got response for {pid}:",
40 |               result.response['choices'][0]['message']['content'])
41 | 
42 | 
43 | def test_callback():
44 |     api = OpenAIMultiClient(endpoint="chats", data_template={"model": "gpt-3.5-turbo"})
45 | 
46 |     def make_requests():
47 |         for pid in range(9):
48 |             pid = pid + 1
49 | 
50 |             print(f"Requesting {pid}")
51 |             api.request(data={
52 |                 "messages": [{"role": "user", "content": f"Can you tell me what is {pid} * {pid}?"}]
53 |             }, callback=on_success, metadata={'id': pid})
54 | 
55 |     api.run_request_function(make_requests)
56 |     api.pull_all()
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     if 'OPENAI_API_KEY' not in os.environ:
61 |         raise Exception("Please set the OPENAI_API_KEY environment variable to run this test")
62 | 
63 |     test_callback()
64 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ### Python template
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # poetry
 99 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
101 | #   commonly ignored for libraries.
102 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 | 
105 | # pdm
106 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | #   in version control.
110 | #   https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 | 
147 | # Pyre type checker
148 | .pyre/
149 | 
150 | # pytype static type analyzer
151 | .pytype/
152 | 
153 | # Cython debug symbols
154 | cython_debug/
155 | 
156 | # PyCharm
157 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
160 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | .idea/


--------------------------------------------------------------------------------
/openai_multi_client/__init__.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import asyncio
  3 | from dataclasses import dataclass
  4 | from threading import Thread
  5 | from typing import Any, Optional
  6 | 
  7 | from aioprocessing import AioJoinableQueue, AioQueue
  8 | from tenacity import wait_random_exponential, stop_after_attempt, AsyncRetrying, RetryError
  9 | import openai
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | @dataclass
 15 | class Payload:
 16 |     endpoint: str
 17 |     data: dict
 18 |     metadata: Optional[dict]
 19 |     max_retries: int
 20 |     retry_multiplier: float
 21 |     retry_max: float
 22 |     attempt: int = 0
 23 |     failed: bool = False
 24 |     response: Any = None
 25 |     callback: Any = None
 26 | 
 27 |     def call_callback(self):
 28 |         if self.callback:
 29 |             self.callback(self)
 30 | 
 31 | 
 32 | class OpenAIMultiClient:
 33 |     def __init__(self,
 34 |                  concurrency: int = 10,
 35 |                  max_retries: int = 10,
 36 |                  wait_interval: float = 0,
 37 |                  retry_multiplier: float = 1,
 38 |                  retry_max: float = 60,
 39 |                  endpoint: Optional[str] = None,
 40 |                  data_template: Optional[dict] = None,
 41 |                  metadata_template: Optional[dict] = None,
 42 |                  custom_api=None):
 43 |         self._endpoint = endpoint
 44 |         self._wait_interval = wait_interval
 45 |         self._data_template = data_template or {}
 46 |         self._metadata_template = metadata_template or {}
 47 |         self._max_retries = max_retries
 48 |         self._retry_multiplier = retry_multiplier
 49 |         self._retry_max = retry_max
 50 |         self._concurrency = concurrency
 51 |         self._loop = asyncio.new_event_loop()
 52 |         self._in_queue = AioJoinableQueue(maxsize=concurrency)
 53 |         self._out_queue = AioQueue(maxsize=concurrency)
 54 |         self._event_loop_thread = Thread(target=self._run_event_loop)
 55 |         self._event_loop_thread.start()
 56 |         self._mock_api = custom_api
 57 |         for i in range(concurrency):
 58 |             asyncio.run_coroutine_threadsafe(self._worker(i), self._loop)
 59 | 
 60 |     def run_request_function(self, input_function, *args, stop_at_end=True, **kwargs):
 61 |         if stop_at_end:
 62 |             def f(*args, **kwargs):
 63 |                 input_function(*args, **kwargs)
 64 |                 self.close()
 65 |         else:
 66 |             f = input_function
 67 |         input_thread = Thread(target=f, args=args, kwargs=kwargs)
 68 |         input_thread.start()
 69 | 
 70 |     def _run_event_loop(self):
 71 |         asyncio.set_event_loop(self._loop)
 72 |         self._loop.run_forever()
 73 | 
 74 |     async def _process_payload(self, payload: Payload) -> Payload:
 75 |         logger.debug(f"Processing {payload}")
 76 |         if self._mock_api:
 77 |             payload.response = await self._mock_api(payload)
 78 |         elif payload.endpoint == "completions":
 79 |             payload.response = await openai.Completion.acreate(**payload.data)
 80 |         elif payload.endpoint == "chat.completions" or payload.endpoint == "chats":
 81 |             payload.response = await openai.ChatCompletion.acreate(**payload.data)
 82 |         elif payload.endpoint == "embeddings":
 83 |             payload.response = await openai.Embedding.acreate(**payload.data)
 84 |         elif payload.endpoint == "edits":
 85 |             payload.response = await openai.Edit.acreate(**payload.data)
 86 |         elif payload.endpoint == "images":
 87 |             payload.response = await openai.Image.acreate(**payload.data)
 88 |         elif payload.endpoint == "fine-tunes":
 89 |             payload.response = await openai.FineTune.acreate(**payload.data)
 90 |         else:
 91 |             raise ValueError(f"Unknown endpoint {payload.endpoint}")
 92 |         logger.debug(f"Processed {payload}")
 93 |         return payload
 94 | 
 95 |     async def _worker(self, i):
 96 |         while True:
 97 |             payload = await self._in_queue.coro_get()
 98 | 
 99 |             if payload is None:
100 |                 logger.debug(f"Exiting worker {i}")
101 |                 self._in_queue.task_done()
102 |                 break
103 | 
104 |             try:
105 |                 async for attempt in AsyncRetrying(
106 |                         wait=wait_random_exponential(multiplier=payload.retry_multiplier, max=payload.retry_max),
107 |                         stop=stop_after_attempt(payload.max_retries)):
108 |                     with attempt:
109 |                         try:
110 |                             payload.attempt = attempt.retry_state.attempt_number
111 |                             payload = await self._process_payload(payload)
112 |                             await self._out_queue.coro_put(payload)
113 |                             self._in_queue.task_done()
114 |                         except Exception:
115 |                             logger.exception(f"Error processing {payload}")
116 |                             raise
117 |             except RetryError:
118 |                 payload.failed = True
119 |                 logger.error(f"Failed to process {payload}")
120 |                 await self._out_queue.coro_put(payload)
121 |                 self._in_queue.task_done()
122 |             await asyncio.sleep(self._wait_interval)
123 | 
124 |     def close(self):
125 |         try:
126 |             for i in range(self._concurrency):
127 |                 self._in_queue.put(None)
128 |             self._in_queue.join()
129 |             self._out_queue.put(None)
130 |             self._loop.call_soon_threadsafe(self._loop.stop)
131 |             self._event_loop_thread.join()
132 |         except Exception as e:
133 |             logger.error(f"Error closing: {e}")
134 | 
135 |     def __iter__(self):
136 |         return self
137 | 
138 |     def __next__(self):
139 |         out = self._out_queue.get()
140 |         if out is None:
141 |             raise StopIteration
142 |         out.call_callback()
143 |         return out
144 | 
145 |     def request(self,
146 |                 data: dict,
147 |                 endpoint: Optional[str] = None,
148 |                 metadata: Optional[dict] = None,
149 |                 callback: Any = None,
150 |                 max_retries: Optional[int] = None,
151 |                 retry_multiplier: Optional[float] = None,
152 |                 retry_max: Optional[float] = None):
153 |         payload = Payload(
154 |             endpoint=endpoint or self._endpoint,
155 |             data={**self._data_template, **data},
156 |             metadata={**self._metadata_template, **(metadata or {})},
157 |             callback=callback,
158 |             max_retries=max_retries or self._max_retries,
159 |             retry_multiplier=retry_multiplier or self._retry_multiplier,
160 |             retry_max=retry_max or self._retry_max
161 |         )
162 |         self._in_queue.put(payload)
163 | 
164 |     def pull_all(self):
165 |         for _ in self:
166 |             pass
167 | 
168 | 
169 | class OrderedPayload(Payload):
170 |     put_counter: int
171 | 
172 |     def __init__(self, *args, put_counter, **kwargs):
173 |         super().__init__(*args, **kwargs)
174 |         self.put_counter = put_counter
175 | 
176 | 
177 | class OpenAIMultiOrderedClient(OpenAIMultiClient):
178 |     def __init__(self, *args, **kwargs):
179 |         super().__init__(*args, **kwargs)
180 |         self._put_counter = 0
181 |         self._get_counter = 0
182 |         self._get_cache = {}
183 |         self._stopped = False
184 | 
185 |     def __iter__(self):
186 |         return self
187 | 
188 |     def __next__(self):
189 |         while True:
190 |             if self._stopped:
191 |                 out = None
192 |             else:
193 |                 out = self._out_queue.get()
194 |             if out is None:
195 |                 self._stopped = True
196 |                 if self._get_counter == self._put_counter:
197 |                     raise StopIteration
198 |                 else:
199 |                     out = self._get_cache[self._get_counter]
200 |                     del self._get_cache[self._get_counter]
201 |                     self._get_counter += 1
202 |                     out.call_callback()
203 |                     return out
204 | 
205 |             data_counter = out.put_counter
206 |             if data_counter == self._get_counter:
207 |                 self._get_counter += 1
208 |                 out.call_callback()
209 |                 return out
210 |             self._get_cache[data_counter] = out
211 |             if self._get_counter in self._get_cache:
212 |                 out = self._get_cache[self._get_counter]
213 |                 del self._get_cache[self._get_counter]
214 |                 self._get_counter += 1
215 |                 out.call_callback()
216 |                 return out
217 | 
218 |     def request(self,
219 |                 data: dict,
220 |                 endpoint: Optional[str] = None,
221 |                 metadata: Optional[dict] = None,
222 |                 callback: Any = None,
223 |                 max_retries: Optional[int] = None,
224 |                 retry_multiplier: Optional[float] = None,
225 |                 retry_max: Optional[float] = None):
226 |         payload = OrderedPayload(
227 |             endpoint=endpoint or self._endpoint,
228 |             data={**self._data_template, **data},
229 |             metadata={**self._metadata_template, **(metadata or {})},
230 |             callback=callback,
231 |             max_retries=max_retries or self._max_retries,
232 |             retry_multiplier=retry_multiplier or self._retry_multiplier,
233 |             retry_max=retry_max or self._retry_max,
234 |             put_counter=self._put_counter
235 |         )
236 |         self._put_counter += 1
237 |         self._in_queue.put(payload)
238 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # openai-multi-client
  2 | 
  3 | [![pypi](https://img.shields.io/pypi/v/openai_multi_client)](https://pypi.org/project/openai_multi_client/)
  4 | 
  5 | `openai-multi-client` is a Python library that allows you to easily make multiple concurrent requests to the OpenAI API,
  6 | either in order or unordered, with built-in retries for failed requests. It keeps your application code synchronous and
  7 | easy to understand, without you having to reason about concurrency and deadlocks. This library is particularly useful
  8 | when working with the OpenAI API for tasks that require a large number of requests.
  9 | 
 10 | ### Table of Contents
 11 | 
 12 | 1. [Motivation](#Motivation)
 13 | 2. [Features](#Features)
 14 | 3. [Installation](#Installation)
 15 | 4. [Usage Example](#Usage-Example)
 16 | 5. [API Reference](#API-Reference)
 17 | 6. [Extended Example Adapted from Real-World Use](#Extended-Example-Adapted-from-Real-World-Use)
 18 | 7. [Contributing](#Contributing)
 19 | 8. [License](#License)
 20 | 9. [Authorship Disclosure](#Authorship-Disclosure)
 21 | 
 22 | ## Motivation
 23 | 
 24 | Imagine you're sitting at your desk, eager to run a state-of-the-art language model analysis on your extensive database
 25 | of collected articles. You're excited about discovering all the hidden gems and insights your data has to offer.
 26 | However, there's one tiny problem – if you send requests to the API in a serial manner, it would take a lifetime to
 27 | complete.
 28 | 
 29 | This is where the OpenAI Multi-Client comes in. This library was created to help you fully utilize OpenAI's API without
 30 | having to wait too long for results. It's designed to manage concurrent API requests so you can focus on analyzing the
 31 | insights provided by the language model.
 32 | 
 33 | No longer do you have to wait for one request to finish before starting the next. With OpenAI Multi-Client, you can now
 34 | send multiple requests simultaneously, while also ensuring that any failed requests will be retried automatically.
 35 | Furthermore, the library can be configured to maintain the order of requests and responses.
 36 | 
 37 | So, the next time you find yourself with a collection of articles waiting to be analyzed, remember that OpenAI
 38 | Multi-Client is here to help. Happy analyzing!
 39 | 
 40 | ## Features
 41 | 
 42 | - Concurrently send multiple requests to the OpenAI API
 43 | - Support for ordered and unordered request handling
 44 | - Built-in retries for failed requests with customizable retry settings
 45 | - Customizable API client for easy testing and mocking
 46 | - Simple, easy-to-use interface
 47 | 
 48 | ## Installation
 49 | 
 50 | To install `openai-multi-client`, run:
 51 | 
 52 | ```bash
 53 | pip install openai-multi-client
 54 | ```
 55 | 
 56 | ## Usage Example
 57 | 
 58 | Here is an example of how to use the `openai-multi-client` library.
 59 | 
 60 | ```python
 61 | from openai_multi_client import OpenAIMultiClient
 62 | 
 63 | # Remember to set the OPENAI_API_KEY environment variable to your API key
 64 | api = OpenAIMultiClient(endpoint="chats", data_template={"model": "gpt-3.5-turbo"})
 65 | 
 66 | 
 67 | def make_requests():
 68 |     for num in range(1, 10):
 69 |         api.request(data={
 70 |             "messages": [{
 71 |                 "role": "user",
 72 |                 "content": f"Can you tell me what is {num} * {num}?"
 73 |             }]
 74 |         }, metadata={'num': num})
 75 | 
 76 | 
 77 | api.run_request_function(make_requests)
 78 | 
 79 | for result in api:
 80 |     num = result.metadata['num']
 81 |     response = result.response['choices'][0]['message']['content']
 82 |     print(f"{num} * {num}:", response)
 83 | ```
 84 | 
 85 | If you want the answers to be in the same order as the requests are sent, import and use `OpenAIMultiOrderedClient`
 86 | instead of `OpenAIMultiClient`.
 87 | 
 88 | If you prefer, you can use callback style programming instead of the iterating over the results:
 89 | 
 90 | ```python
 91 | from openai_multi_client import OpenAIMultiClient, Payload
 92 | 
 93 | # Remember to set the OPENAI_API_KEY environment variable to your API key
 94 | api = OpenAIMultiClient(endpoint="chats", data_template={"model": "gpt-3.5-turbo"})
 95 | 
 96 | 
 97 | def on_result(result: Payload):
 98 |     num = result.metadata['num']
 99 |     response = result.response['choices'][0]['message']['content']
100 |     print(f"{num} * {num}:", response)
101 | 
102 | 
103 | def make_requests():
104 |     for num in range(1, 10):
105 |         api.request(data={
106 |             "messages": [{
107 |                 "role": "user",
108 |                 "content": f"Can you tell me what is {num} * {num}?"
109 |             }]
110 |         }, metadata={'num': num}, callback=on_result)
111 | 
112 | 
113 | api.run_request_function(make_requests)
114 | api.pull_all()
115 | ```
116 | 
117 | You can find more complete examples [here](./real_test.py) and [here](./mock_test.py).
118 | 
119 | ## API Reference
120 | 
121 | In the `OpenAIMultiClient` and `OpenAIMultiOrderedClient` classes, the `endpoint` and `data` parameters correspond to
122 | the endpoints and parameters expected by the official OpenAI API clients.
123 | 
124 | ## Configuring API Keys and Endpoints
125 | 
126 | Setting up OpenAI Multi-Client is straightforward. Since it utilizes the official OpenAI client under the hood, all you
127 | need to do is import the `openai` library and configure it as you usually would.
128 | 
129 | To set up your API key, simply import the `openai` module and configure the API key using the following code:
130 | 
131 | ```python
132 | import openai
133 | 
134 | openai.api_key = "your_api_key_here"
135 | ```
136 | 
137 | Setting the environmental variable `OPENAI_API_KEY` also works as expected.
138 | 
139 | You can also configure the API endpoint if needed:
140 | 
141 | ```python
142 | openai.api_base = "azure_openai_api_base_here"
143 | ```
144 | 
145 | Once you've configured the `openai` library with your API key and endpoint, OpenAI Multi-Client will automatically use
146 | these settings when sending requests to the API. This makes it easy to integrate OpenAI Multi-Client into your existing
147 | projects without having to worry about separate configurations.
148 | 
149 | ### Endpoints
150 | 
151 | The `endpoint` parameter in the `request()` method or during the initialization of the classes specifies which OpenAI
152 | API endpoint to use for the requests. The available endpoints are:
153 | 
154 | - `"completions"`: For text completion requests using the `Completion` endpoint.
155 | - `"chats"` or `"chat.completions"`: For chat completion requests using the `ChatCompletion` endpoint.
156 | - `"embeddings"`: For embedding requests using the `Embedding` endpoint.
157 | - `"edits"`: For edit requests using the `Edit` endpoint.
158 | - `"images"`: For image requests using the `Image` endpoint.
159 | - `"fine-tunes"`: For fine-tuning requests using the `FineTune` endpoint.
160 | 
161 | ### Data
162 | 
163 | The `data` parameter in the `request()` method specifies the request data sent to the OpenAI API. The data should be a
164 | dictionary containing the required and optional parameters for the specified endpoint. For example:
165 | 
166 | - For the `"completions"` endpoint, the `data` dictionary may include
167 |   the `model`, `prompt`, `temperature`, `max_tokens`, and other parameters expected by the `Completion` endpoint.
168 | - For the `"chats"` endpoint, the `data` dictionary may include the `model`, `messages`, `temperature`, `max_tokens`,
169 |   and other parameters expected by the `ChatCompletion` endpoint.
170 | 
171 | When using the `OpenAIMultiClient` or `OpenAIMultiOrderedClient`, make sure to provide the appropriate `endpoint` and
172 | the corresponding `data` as required by the official OpenAI API clients.
173 | 
174 | For more details, see the [official documentation](https://github.com/openai/openai-python).
175 | 
176 | ### OpenAIMultiClient
177 | 
178 | `OpenAIMultiClient` is the primary class for making unordered concurrent requests to the OpenAI API.
179 | 
180 | #### Initialization
181 | 
182 | You initialize multiple instances of the clients and each will behave independently, with their own queue of requests
183 | and responses. The `OpenAIMultiClient` class can be initialized with the following parameters:
184 | 
185 | ```python
186 | OpenAIMultiClient(
187 |     concurrency=10,
188 |     max_retries=10,
189 |     wait_interval=0,
190 |     retry_multiplier=1,
191 |     retry_max=60,
192 |     endpoint=None,
193 |     data_template=None,
194 |     metadata_template=None,
195 |     custom_api=None)
196 | ```
197 | 
198 | - `concurrency`: (Optional) The number of concurrent requests. Default is 10.
199 | - `max_retries`: (Optional) The maximum number of retries for failed requests. Default is 10.
200 | - `wait_interval`: (Optional) The waiting time between retries. Default is 0.
201 | - `retry_multiplier`: (Optional) The multiplier for the waiting time between retries. Default is 1.
202 | - `retry_max`: (Optional) The maximum waiting time between retries. Default is 60.
203 | - `endpoint`: (Optional) The OpenAI API endpoint to be used, e.g., `"chats"` or `"completions"`.
204 | - `data_template`: (Optional) A template for the data sent with each request. The request data will be merged with this
205 |   template.
206 | - `metadata_template`: (Optional) A template for the metadata associated with each request. The request metadata will be
207 |   merged with this template.
208 | - `custom_api`: (Optional) A custom API function that can be used for testing or mocking the OpenAI API. You can also
209 |   use this to connect to models other than LLMs.
210 | 
211 | You should set `concurrency` to a sensible value based on your API rate limit. For paid customers using `gpt-3.5-turbo`,
212 | you have 3,500 requests per minute. Let's be generous and assume a request completes in one second. To avoid hitting the
213 | rate limit while maintaining a high throughput, you could set `concurrency` to a value like 50 or 100. Since you are not
214 | charged for failed requests and since exponential backoff is in effect, your requests will eventually complete even if
215 | you set `concurrency` to a high value, but as failed requests take up your rate limit, throughput will be lower.
216 | 
217 | It is recommended to test your code with a mock API first when you are developing, because using the real API, high
218 | concurrency burns money fast. [This example](./mock_test.py) might be helpful.
219 | 
220 | You can check your limits [here](https://platform.openai.com/account/rate-limits).
221 | 
222 | #### Methods
223 | 
224 | **Important Note**: Calling `request` may block the thread if the input queue is full. It is recommended to put the
225 | requesting logic into a function and call that function using `run_request_function` as done in the example, as it
226 | ensures that the requesting logic runs concurrently without blocking the main thread. The blocking of the input queue is
227 | a **key feature** of this library to ensure that your memory will not be flooded with a petabyte of pending requests
228 | streaming from your database.
229 | 
230 | - `request(data, endpoint=None, metadata=None, max_retries=None, retry_multiplier=None, retry_max=None)`: Adds a request
231 |   to the queue.
232 |     - `data`: The data (as a dict) to be sent with the request.
233 |     - `endpoint`: (Optional) The API endpoint to be used for this specific request.
234 |     - `metadata`: (Optional) Metadata associated with the request.
235 |     - `callback`: (Optional) A callback function that will be called with the payload object.
236 |     - `max_retries`: (Optional) The maximum number of retries for failed requests. Default is the value set during
237 |       initialization.
238 |     - `retry_multiplier`: (Optional) The multiplier for the waiting time between retries. Default is the value set
239 |       during initialization.
240 |     - `retry_max`: (Optional) The maximum waiting time between retries. Default is the value set during initialization.
241 | 
242 | - `run_request_function(input_function, *args, stop_at_end=True, **kwargs)`: Executes the input function in a separate
243 |   thread, allowing it to add requests to the queue without blocking.
244 |     - `input_function`: A function that adds requests to the queue using the `request()` method.
245 |     - `*args`: (Optional) Additional arguments passed to the input function.
246 |     - `stop_at_end`: (Optional) Whether to stop the event loop after executing the input function. Default is True.
247 |     - `**kwargs`: (Optional) Additional keyword arguments passed to the input function.
248 | 
249 | To retrieve results, use the client as an iterator. The following is also available:
250 | 
251 | - `pull_all()`: Pulls all responses from the queue and discard them. Useful to get the queue started if you are using
252 |   callbacks.
253 | 
254 | ### OpenAIMultiOrderedClient
255 | 
256 | `OpenAIMultiOrderedClient` is a subclass of `OpenAIMultiClient` for making ordered concurrent requests to the OpenAI
257 | API. The usage is the same as `OpenAIMultiClient`, but the responses will be returned in the order they were added to
258 | the queue.
259 | 
260 | ### Logging
261 | 
262 | The library uses the standard Python logging module. You can set the logging level
263 | using `logging.basicConfig(level=logging.INFO)` or `logging.basicConfig(level=logging.DEBUG)`.
264 | 
265 | ## Extended Example Adapted from Real-World Use
266 | 
267 | The scenario in the Motivation section is genuinely real. Although the OpenAI cookbook provides a [recipe](https://github.com/openai/openai-cookbook/blob/main/examples/api_request_parallel_processor.py) for making parallel requests, it relies on files for data persistence and can be quite challenging to adapt and use. We were surprised that a library like this didn't exist already, so we decided to create one ourselves.
268 | 
269 | Here's a brief overview of the system. External and internal articles are continuously streamed from scrapers and other sources into a database. For each article, we let ChatGPT answer a few questions about it, and then we store the text and embeddings of the answers back into the database. The embeddings are indexed for nearest-neighbor search. At peak hours, we expect a high volume of articles, making the use of `OpenAIMultiClient` essential.
270 | 
271 | We'll be using [CozoDB](https://github.com/cozodb/cozo) to store everything. CozoDB is a transactional hybrid relational-graph-vector database that uses Datalog for queries, which we developed for our work. In the real scenario that this example is based on, article deduplication, full-text search, and recursive, agentic behavior are also integrated using the functionalities provided by the database. However, these details have been omitted in the example for simplicity.
272 | 
273 | The setup of the database and schema can be found in the code snippet below, where we create tables for articles and embeddings and index the embeddings for nearest-neighbor search:
274 | 
275 | ```python
276 | from pycozo.client import Client
277 | 
278 | # We use a global variable for the db for simplicity
279 | db = Client('rocksdb', 'analysis.db')  # Using the RocksDB backend for persistence
280 | 
281 | # Create the schema
282 | 
283 | # The articles
284 | db.run('''
285 | :create article {
286 |     id: Uuid default rand_uuid_v4() # The key
287 |     =>
288 |     title: String,                  # Title of the article
289 |     text: String,                   # Text of the article
290 |     analysis: Json default {}       # Analyses will be stored in the JSON object
291 | }
292 | ''')
293 | 
294 | # The embeddings are stored separately from the articles
295 | # Not strictly necessary, but cleaner
296 | db.run('''
297 | :create embedding {
298 |     id: Uuid,                 # The key consists of the article ID and the analysis key
299 |     analysis_key: String      # The analysis key
300 |     =>
301 |     embedding: <F32; 1536>    # The embedding of the article
302 | }
303 | ''')
304 | 
305 | # Put the embeddings into an index for nearest-neighbor search
306 | db.run('''
307 | ::hnsw create embedding:idx {
308 |     dim: 1536,
309 |     fields: [embedding],
310 |     ef_construction: 200,
311 |     m: 50,
312 | }
313 | ''')
314 | ```
315 | 
316 | The streaming service calls a function to insert articles into the database, and the database's ID is auto-generated:
317 | 
318 | ```python
319 | def insert(title, text):
320 |     db.run('''
321 |        ?[title, text] <- [[$title, $text]]
322 | 
323 |        :put article {=> title, text}
324 |     ''', {'title': title, 'text': text})
325 | ```
326 | 
327 | In another file, we set up the OpenAI multi-client and define the questions we want to ask.
328 | 
329 | ```python
330 | from openai_client import OpenAIMultiClient
331 | 
332 | # We could use separate clients for embedding and analysis to achieve more concurrency
333 | # But we'll just use a single client for simplicity
334 | client = OpenAIMultiClient()
335 | 
336 | QUESTIONS = {
337 |     'economic': '''
338 | You are the economics advisor to company X.
339 | Read the article from the user and summarize the potential economic impact for company X implied by the article from your perspective.
340 | Write clearly and concisely, in one paragraph. If there is no economic impact, write "No economic impact."
341 |     ''',
342 | 
343 |     'political': '''
344 | You are the political advisor to company X.
345 | Read the article from the user and summarize the potential political impact for company X implied by the article from your perspective.
346 | Write clearly and concisely, in one paragraph. If there is no political impact, write "No political impact."
347 |     ''',
348 | 
349 |     'technological': '''
350 | You are the technology advisor to company X.
351 | Read the article from the user and summarize the potential technological impact for company X implied by the article from your perspective.
352 | Write clearly and concisely, in one paragraph. If there is no technological impact, write "No technological impact."
353 | '''
354 | }
355 | ```
356 | 
357 | The core of the system involves using the client to ask questions and store the results back into the database. When the answers are inserted into the database, we use the client again to get the embeddings and store them in the database as well. We use database callbacks to achieve this:
358 | 
359 | 
360 | ```python
361 | # Callback function to be called when the database is updated
362 | def insert_callback(op_name, new_rows, old_rows):
363 |     # We only handle puts. Logic for handling deletes omitted for simplicity
364 |     if op_name == 'Put':
365 |         # Make a dict from the old rows replaced by the update
366 |         existing = {
367 |             id: analysis
368 |             for [id, _title, _text, analysis] in old_rows
369 |         }
370 |         # Iterate over the inserted or updated rows
371 |         for id, title, text, analysis in new_rows:
372 |             # If the article already exists, we request embeddings for any newly added analyses
373 |             if id in existing:
374 |                 old_analyses = existing[id]
375 |                 for key, value in analysis.items():
376 |                     if key not in old_analyses:
377 |                         # Make a request for the embedding
378 |                         # The metadata are used for inserting the results back into the DB
379 |                         client.request(
380 |                             endpoint="embeddings",
381 |                             data={
382 |                                 'model': "text-embedding-ada-002",
383 |                                 'input': f'{key} impact:\n{value}'
384 |                             },
385 |                             metadata={
386 |                                 'type': 'embedding',
387 |                                 'id': id,
388 |                                 'key': key
389 |                             },
390 |                             callback=on_embedding_result
391 |                         )
392 |             else:
393 |                 # Otherwise, we ask for analyses for all questions
394 |                 context_text = f'{title}\n\n{text}'
395 | 
396 |                 for key, question in QUESTIONS.items():
397 |                     # Make a request for the analysis
398 |                     client.request(
399 |                         endpoint="chats",
400 |                         data={
401 |                             # You can use GPT-4 if you are rich!
402 |                             "model": "gpt-3.5-turbo",
403 |                             "messages": [{
404 |                                 "role": "system", "content": question
405 |                             }, {
406 |                                 "role": "user", "content": context_text
407 |                             }]
408 | 
409 |                         },
410 |                         metadata={
411 |                             'type': 'analysis',
412 |                             'id': id,
413 |                             'key': key
414 |                         },
415 |                         callback=on_analysis_result
416 |                     )
417 | 
418 | 
419 | # Register the callback, this will be called whenever an article is inserted or deleted
420 | # As the database already runs callbacks on a separate thread, 
421 | # we don't need to use run_request_function
422 | callback_handle = db.register_callback('article', insert_callback)
423 | ```
424 | 
425 | Callback functions handle the results from the OpenAI client and start the system:
426 | 
427 | ```python
428 | def on_embedding_result(result):
429 |     if result.failed:
430 |         print(f'Failed to get embedding for article {result.metadata["id"]}')
431 |         return
432 | 
433 |     embedding = result.response['data'][0]['embedding']
434 |     db.run('''
435 |         ?[id, analysis_key, embedding] <- [[$id, $analysis_key, $embedding]]
436 |         :put embedding {id, analysis_key => embedding}
437 |     ''', {
438 |         'id': result.metadata['id'],
439 |         'analysis_key': result.metadata['key'],
440 |         'embedding': embedding
441 |     })
442 | 
443 | 
444 | def on_analysis_result(result):
445 |     if result.failed:
446 |         print(f'Failed to get embedding for article {result.metadata["id"]}')
447 |         return
448 | 
449 |     analysis = {
450 |         result.metadata['key']: result.response['choices'][0]['message']['content']
451 |     }
452 |     
453 |     db.run('''
454 |         ?[id, analysis] := *analysis{id, analysis: old}, analysis = old ++ $analysis
455 |         :update article {id => analysis}
456 |     ''', {
457 |         'id': result.metadata['id'],
458 |         'analysis': analysis
459 |     })
460 | ```
461 | 
462 | Finally, we start the system:
463 | 
464 | ```python
465 | client.pull_all()
466 | ```
467 | 
468 | With this system in place, we can easily access the top news we're interested in by running nearest-neighbor vector searches:
469 | 
470 | 
471 | ```python
472 | for question in ['Red-alert level innovation from competitors',
473 |                  'Disruption to our supply chain',
474 |                  'Dangers to our overseas personnel in the Middle East']:
475 |     # the `embed` function also uses the OpenAI multi-client, but its definition is omitted.
476 |     embedding = embed(question)
477 |     res = db.run('''
478 |         ?[id, title, text] := ~article:idx{id, title, text | query: $q, k: 3}
479 |     ''', {'q': embedding})
480 |     print(res)
481 | ```
482 | 
483 | As you can see from the example questions, it would be challenging to find relevant news articles in the database without the analysis. But with the analysis, we can get the relevant articles very quickly. This is the power of vector indexing. We can also set up more agents to monitor the analysis and notify the CEO when something important comes up, or even have a community of agents using the database to collaborate. Thanks to the OpenAI multi-client, these agents can move at the speed of thought!
484 | 
485 | ## Contributing
486 | 
487 | Contributions to `openai-multi-client` are welcome! Feel free to submit a pull request or open an issue on GitHub.
488 | 
489 | ## License
490 | 
491 | `openai-multi-client` is released under the MIT License. See the [LICENSE](LICENSE) file for details.
492 | 
493 | ## Authorship Disclosure
494 | 
495 | GPT-4 wrote most of this README by analyzing code, incorporating the techniques outlined before.


--------------------------------------------------------------------------------