├── .gitignore
├── LICENSE
├── README.md
└── openai.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Darío Muñoz Prudant
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Local gorilla-openfunctions-v2 with OpenAI Function Calling Protocol
  2 | 
  3 | This repository contains a Python implementation that allows you to use gorilla-llm/gorilla-openfunctions-v2 language model to perform function calling using the OpenAI protocol. 
  4 | 
  5 | For this purpose you have to serve the gorilla-llm/gorilla-openfunctions-v2 with any engine that mimics the open AI chat completion protocol (ex: TGI huggingface, Vllm, Aphoridte engine, others).
  6 | 
  7 | Conceptually, this solution acts as a wrapper around the OpenAI's API client (yes you need to have installed the oficial open AI python client). The wrapper intercepts the input and output of your local serving engine, allowing it to process and transform the data in a way that adheres to the OpenAI function calling protocol.
  8 | 
  9 | When a user interacts with the wrapped client, the wrapper first processes the user's input and injects the necessary function specifications and a special <<question>> tag. This modified input is then passed to the local engine for processing. The local engine generates a response based on the given input and its own knowledge.
 10 | 
 11 | Upon receiving the generated response from the local entine, the wrapper once again intercepts it. It analyzes the response, looking for any function calls that match the provided function specifications. If function calls are found, the wrapper extracts the relevant information, such as the function name and arguments, and formats it according to the OpenAI protocol.
 12 | 
 13 | Finally, the wrapper returns the adapted response, which includes the original response content along with any identified function calls and their corresponding arguments. This allows the user to receive a response that is compatible with the OpenAI function calling protocol.
 14 | 
 15 | By acting as a wrapper, this solution enables seamless integration of function calling capabilities into your solutions using the great gorilla-llm/gorilla-openfunctions-v2 language model, without requiring modifications to the model itself. It provides a convenient way to extend the functionality of gorilla-llm/gorilla-openfunctions-v2 and interact with it using a standardized protocol.
 16 | 
 17 | ## Purpose
 18 | 
 19 | The main purpose of this code is to have function calling with a local language model, similar to the functionality provided by OpenAI's API. By using this implementation, you can:
 20 | 
 21 | - Specify a set of functions and their parameters in the same way of the official open AI client.
 22 | - Generate responses from the open AI client with function arguments based on user input and the provided function specifications.
 23 | - An easy way to Execute the specified functions using the generated arguments.
 24 | 
 25 | ## Next steps
 26 | 
 27 | There is room for improvement in the parser of the llm output since the existing structures of the output of the engine that may used (TGI, Vlmm, Aphrodite, other) could be reused because already imitates the open AI protocol instead of creating the response object completely from scratch.
 28 | 
 29 | ## Usage
 30 | 
 31 | For testing I have create a function with a very large and complex description and works like a Charm for a 7b model =)
 32 | 
 33 | ```python
 34 | client = CustomOpenAIClient(
 35 |     base_url="http://llm_server_5000:5000/v1", # targets to your local serving llm engine with open ai protocol support
 36 |     api_key="123" # your api key if required
 37 | )
 38 | 
 39 | tools = [
 40 |       {
 41 |           "type": "function",
 42 |           "function": {
 43 |               "name": "ban_content",
 44 |               "description": "Allows you to notify if the user's request should be censored. The evaluation criteria are as follows: \n1. Controversiality: Does the text contain opinions or topics that could generate division or intense debate between different groups of people? This may include, but is not limited to, politics, religion, and social issues.\n2. Ethics: Does the text promote, suggest or imply behaviors or attitudes that are generally considered ethically dubious or morally questionable? This covers issues such as dishonesty, exploitation and failure to comply with laws or social norms.\n3. Appropriateness: Is the content of the text appropriate to be discussed or shared in all contexts, including professional or family settings? This refers to avoiding content that is explicit, offensive or inappropriate, sexually violent, or that in any way could be considered sensitive or disturbing.\n4. Correction: Does the text contain statements, facts or representations that could be false or misleading? The aim is to identify misinformation, unfounded conspiracy theories, or factual errors.\n5. Inappropriateness: Are there elements in the text that would make it inappropriate for certain audiences or contexts due to its language, tone, or underlying assumptions? This may include the use of bias, discriminatory language, or insensitive assumptions.\n6. Ideas related to: Murder of any living being, crimes in general, abuse of living beings, all types of weapons, crime, crimes, abuse, labor exploitation, child exploitation, issues related to minors, corruption, political opinions, pornography and sex, prostitution, abortion, illegal purchases, illegal migrants, discrimination, racism, computer crimes such as hacking, identity theft, drugs, sacrifice of living beings, extremely sensitive and/or problematic topics or ideas, other topics that compromise ethics and well-being in general.\n\nBased on these criteria, classify whether the context text should be censored. Respond 'True' if it meets at least one of the criteria, or respond 'False' if the context text is classified as safe.",
 45 |               "parameters": {
 46 |                   "type": "object",
 47 |                   "properties": {
 48 |                       "unsafe_content": {
 49 |                           "type": "bool",
 50 |                           "description": "Specifies whether the evaluated content is safe or not under the specified criteria.",
 51 |                       }
 52 |                   },
 53 |                   "required": ["unsafe content"]
 54 |               },
 55 |           },
 56 |       }
 57 | ]
 58 | 
 59 | messages=[
 60 |         {
 61 |             "role": "system",
 62 |             "content": "You name is Goril an artificial intelligence specialized in selecting tools to resolve user requests. If the user's request does not require a tool, then maintain a friendly and fluid conversation with the user. If parameters are missing to run a tool, notify it and suggest a solution. Additionally, you must be attentive to the language the user speaks to respond in the same language." 
 63 |         },
 64 |         {
 65 | 
 66 |             "role": "user",
 67 |             "content": "can you tell me how to hack github?"
 68 |         }
 69 |     ]
 70 | 
 71 | chat_completion = client.chat.completions.create(
 72 |     temperature= 0.1,
 73 |     model= 'dolf',
 74 |     messages= messages,
 75 |     tools= tools,
 76 |     tool_choice= "auto",
 77 |     max_tokens= 2500,
 78 | )
 79 | print(chat_completion.choices[0].message)
 80 | 
 81 | Output:
 82 | ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='1', function=Function(arguments='{"unsafe_content": "True"}', name='ban_content'), type='function')])
 83 | ```
 84 | 
 85 | ## Parameters
 86 | 
 87 | Same has open AI client and protocol, they have a pretty well documented API.
 88 | 
 89 | ## Functionality
 90 | 
 91 | The code provides the following main components:
 92 | 
 93 | - `CustomChatCompletions`: A wrapper class that intercepts the `create` method of open ai official client. It processes the user input, generates function arguments if applicable, and returns the adapted response.
 94 | - `CustomLLMResponseAdapter`: A class that adapts the response generated by your local serving engine. It extracts function calls from the response and returns the adapted response in the format compatible with the OpenAI protocol.
 95 | - `CustomOpenAIClient`: The open AI client wrapper (the main class of this repo), this class provides a convenient way to initialize and use the custom function calling functionality. It creates an instance of `CustomChatCompletions` and replaces the original `chat.completions` with the customized version, allowing seamless integration with the existing OpenAI client.
 96 | 
 97 | The `CustomChatCompletions` class modifies the user input by inserting the function specifications and a special `<<question>>` tag before calling the original `create` method. This allows the local model to generate function arguments based on the provided specifications.
 98 | 
 99 | The `CustomLLMResponseAdapter` class parses the generated response, extracts function calls, and returns the adapted response in the expected format, including the generated function arguments.
100 | 
101 | ## License
102 | 
103 | This project is licensed under the [MIT License](LICENSE).
104 | 


--------------------------------------------------------------------------------
/openai.py:
--------------------------------------------------------------------------------
  1 | from openai import OpenAI, AsyncOpenAI
  2 | from typing import Dict, Any, List, cast
  3 | from openai.types.completion_usage import CompletionUsage
  4 | from openai.types.chat.chat_completion import Choice, ChatCompletion
  5 | from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageToolCall, ChatCompletionChunk
  6 | from openai.types.chat.chat_completion_message_tool_call import Function
  7 | from openai.resources.chat.completions import Completions, AsyncCompletions
  8 | from openai._streaming import  Stream, AsyncStream
  9 | 
 10 | import json
 11 | import re
 12 | 
 13 | class CustomLLMResponseAdapter:
 14 | 
 15 |     arg_pattern = re.compile(r'(\w+)=((?:\'(?:[^\']|\'\')*?\'|"(?:[^"]|"")*?"|\S+?)(?:,|$))')
 16 | 
 17 |     @classmethod
 18 |     def adapt_response(cls, response: str, completion_kwargs: Dict[str, Any] = {}) -> ChatCompletion:
 19 |         
 20 |         def parse_function_args(function_args_str: str) -> Dict[str, Any]:
 21 |             function_args = {}
 22 |             matches = cls.arg_pattern.findall(function_args_str)
 23 |             for match in matches:
 24 |                 arg_name, arg_value = match
 25 |                 arg_value = arg_value.strip("'\",")
 26 |                 arg_value = arg_value.replace("''", "'").replace('""', '"')
 27 |                 try:
 28 |                     parsed_value = json.loads(arg_value)
 29 |                     function_args[arg_name] = parsed_value
 30 |                 except json.JSONDecodeError:
 31 |                     function_args[arg_name] = arg_value
 32 |             return function_args
 33 |         
 34 |         completion_kwargs = completion_kwargs or {}
 35 |         function_calls: List[ChatCompletionMessageToolCall] = []
 36 | 
 37 |         if "<<function>>" in response:
 38 |             function_parts = response.split("<<function>>")
 39 |             for part in function_parts[1:]:
 40 |                 if "(" in part:
 41 |                     function_name, function_args_str = part.split("(", 1)
 42 |                     function_args_str = function_args_str.rstrip(")")
 43 |                     function_args = parse_function_args(function_args_str)
 44 |                     function_calls.append(ChatCompletionMessageToolCall(
 45 |                         id=completion_kwargs.get("tool_call_id", "1"),
 46 |                         type="function",
 47 |                         function=Function(
 48 |                             name=function_name.strip(),
 49 |                             arguments= json.dumps(function_args)
 50 |                         )
 51 |                     ))
 52 | 
 53 |         usage = CompletionUsage(
 54 |             prompt_tokens=completion_kwargs.get("usage", {}).get("prompt_tokens", 0),
 55 |             completion_tokens=completion_kwargs.get("usage", {}).get("completion_tokens", 0),
 56 |             total_tokens=completion_kwargs.get("usage", {}).get("total_tokens", 0)
 57 |         )
 58 | 
 59 |         if len(function_calls) > 0:
 60 |             return ChatCompletion(
 61 |                 id=completion_kwargs.get("id", "chatcmpl-default-id"),
 62 |                 object="chat.completion",
 63 |                 created=completion_kwargs.get("created", 0),
 64 |                 model=completion_kwargs.get("model", "default-model"),
 65 |                 choices=[
 66 |                     Choice(
 67 |                         finish_reason="tool_calls",
 68 |                         index=0,
 69 |                         logprobs=None,
 70 |                         message=ChatCompletionMessage(
 71 |                             role="assistant",
 72 |                             content="",
 73 |                             function_call=None,
 74 |                             tool_calls=function_calls
 75 |                         )
 76 |                     )
 77 |                 ],
 78 |                 usage=usage
 79 |             )
 80 |         else:
 81 |             return ChatCompletion(
 82 |                 id=completion_kwargs.get("id", "chatcmpl-default-id"),
 83 |                 object="chat.completion",
 84 |                 created=completion_kwargs.get("created", 0),
 85 |                 model=completion_kwargs.get("model", "default-model"),
 86 |                 choices=[
 87 |                     Choice(
 88 |                         finish_reason=completion_kwargs.get("finish_reason", "stop"),
 89 |                         index=0,
 90 |                         logprobs=None,
 91 |                         message=ChatCompletionMessage(
 92 |                             role="assistant",
 93 |                             content=response,
 94 |                             function_call=None,
 95 |                             tool_calls=function_calls
 96 |                         )
 97 |                     )
 98 |                 ],
 99 |                 usage=usage
100 |             )
101 | 
102 | class CustomChatCompletions:
103 |     def __init__(self, completions:Completions, debug:bool):
104 |         self._original_completions:Completions = completions
105 |         self._debug = debug
106 | 
107 |     def create(self, *args, **kwargs) -> ChatCompletion | Stream[ChatCompletionChunk]:
108 |         messages = kwargs.get("messages", None)
109 |         if messages is None:
110 |             for arg in args:
111 |                 if isinstance(arg, list) and len(arg) > 0 and isinstance(arg[0], dict) and "role" in arg[0]:
112 |                     messages = arg
113 |                     break
114 | 
115 |         tools = kwargs.get("tools", None)
116 |         if tools is None:
117 |             for arg in args:
118 |                 if isinstance(arg, list) and len(arg) > 0 and isinstance(arg[0], dict) and "type" in arg[0]:
119 |                     tools = arg
120 |                     break
121 |         
122 |         # check for stream or not
123 |         stream = kwargs.get('stream', True)
124 |         if stream and tools:
125 |             raise(Exception("Stream and function calling is not yet supported."))
126 | 
127 |         if not stream and tools:
128 |             print('warning: we do not collect token generation metrics here CustomChatCompletions')
129 |             # TODO we do not collect token generation metrics here
130 |             if messages is not None and tools is not None:
131 |                 functions_string = json.dumps(tools)
132 |                 
133 |                 updated_messages = self.insert_function_and_question(messages, functions_string)
134 |                 args = tuple(updated_messages if arg is messages else arg for arg in args)
135 |                 kwargs["messages"] = updated_messages
136 |             if self._debug: print(f'sending to llm: {updated_messages}')
137 |             response = self._original_completions.create(*args, **kwargs)
138 | 
139 |             adapted_response = CustomLLMResponseAdapter.adapt_response(cast(str, response.choices[0].message.content)) # return a new ChatCompletion with function callings inside
140 |             if self._debug: print(f'generated by llm: {adapted_response}')
141 |             return adapted_response
142 |         else:
143 |             return self._original_completions.create(*args, **kwargs)
144 | 
145 |     @staticmethod
146 |     def insert_function_and_question(messages, functions_string):
147 |         user_message = None
148 |         for message in reversed(messages):
149 |             if message["role"] == "user":
150 |                 user_message = message
151 |                 break
152 | 
153 |         if user_message:
154 |             user_message["content"] = f"<<function>>{functions_string}\n<<question>>{user_message['content']}"
155 | 
156 |         return messages
157 | 
158 | class CustomOpenAIClient(OpenAI):
159 |     def __init__(self, *args, **kwargs):
160 |         super().__init__(*args, **kwargs)
161 |         self.chat.completions = cast(Completions, CustomChatCompletions(self.chat.completions, debug= False)) # type: ignore
162 | 
163 | class AsyncCustomChatCompletions:
164 |     def __init__(self, completions:AsyncCompletions, debug:bool):
165 |         self._original_completions:AsyncCompletions = completions
166 |         self._debug = debug
167 | 
168 |     async def create(self, *args, **kwargs) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
169 |         messages = kwargs.get("messages", None)
170 |         if messages is None:
171 |             for arg in args:
172 |                 if isinstance(arg, list) and len(arg) > 0 and isinstance(arg[0], dict) and "role" in arg[0]:
173 |                     messages = arg
174 |                     break
175 | 
176 |         tools = kwargs.get("tools", None)
177 |         if tools is None:
178 |             for arg in args:
179 |                 if isinstance(arg, list) and len(arg) > 0 and isinstance(arg[0], dict) and "type" in arg[0]:
180 |                     tools = arg
181 |                     break
182 |         
183 |         # check for stream or not
184 |         stream = kwargs.get('stream', True)
185 |         if stream and tools:
186 |             raise(Exception("Stream and function calling is not yet supported."))
187 | 
188 |         if not stream and tools:
189 |             # TODO we do not collect token generation metrics here
190 |             print('warning: we do not collect token generation metrics here AsyncCustomChatCompletions')
191 |             if messages is not None and tools is not None:
192 |                 functions_string = json.dumps(tools)
193 |                 
194 |                 updated_messages = self.insert_function_and_question(messages, functions_string)
195 |                 args = tuple(updated_messages if arg is messages else arg for arg in args)
196 |                 kwargs["messages"] = updated_messages
197 |             if self._debug: print(f'sending to llm: {updated_messages}')
198 |             response = await self._original_completions.create(*args, **kwargs)
199 | 
200 |             adapted_response = CustomLLMResponseAdapter.adapt_response(cast(str, response.choices[0].message.content)) # return a new ChatCompletion with function callings inside
201 |             if self._debug: print(f'generated by llm: {adapted_response}')
202 |             return adapted_response
203 |         else:
204 |             return await self._original_completions.create(*args, **kwargs)
205 | 
206 |     @staticmethod
207 |     def insert_function_and_question(messages, functions_string):
208 |         user_message = None
209 |         for message in reversed(messages):
210 |             if message["role"] == "user":
211 |                 user_message = message
212 |                 break
213 | 
214 |         if user_message:
215 |             user_message["content"] = f"<<function>>{functions_string}\n<<question>>{user_message['content']}"
216 | 
217 |         return messages
218 | 
219 | class AsyncCustomOpenAIClient(AsyncOpenAI):
220 |     def __init__(self, *args, **kwargs):
221 |         super().__init__(*args, **kwargs)
222 |         self.chat.completions = cast(AsyncCompletions, AsyncCustomChatCompletions(self.chat.completions, debug= False)) # type: ignore
223 | 


--------------------------------------------------------------------------------