├── .gitignore ├── LICENSE ├── README.md └── openai.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Darío Muñoz Prudant 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Local gorilla-openfunctions-v2 with OpenAI Function Calling Protocol 2 | 3 | This repository contains a Python implementation that allows you to use gorilla-llm/gorilla-openfunctions-v2 language model to perform function calling using the OpenAI protocol. 4 | 5 | For this purpose you have to serve the gorilla-llm/gorilla-openfunctions-v2 with any engine that mimics the open AI chat completion protocol (ex: TGI huggingface, Vllm, Aphoridte engine, others). 6 | 7 | Conceptually, this solution acts as a wrapper around the OpenAI's API client (yes you need to have installed the oficial open AI python client). The wrapper intercepts the input and output of your local serving engine, allowing it to process and transform the data in a way that adheres to the OpenAI function calling protocol. 8 | 9 | When a user interacts with the wrapped client, the wrapper first processes the user's input and injects the necessary function specifications and a special <> tag. This modified input is then passed to the local engine for processing. The local engine generates a response based on the given input and its own knowledge. 10 | 11 | Upon receiving the generated response from the local entine, the wrapper once again intercepts it. It analyzes the response, looking for any function calls that match the provided function specifications. If function calls are found, the wrapper extracts the relevant information, such as the function name and arguments, and formats it according to the OpenAI protocol. 12 | 13 | Finally, the wrapper returns the adapted response, which includes the original response content along with any identified function calls and their corresponding arguments. This allows the user to receive a response that is compatible with the OpenAI function calling protocol. 14 | 15 | By acting as a wrapper, this solution enables seamless integration of function calling capabilities into your solutions using the great gorilla-llm/gorilla-openfunctions-v2 language model, without requiring modifications to the model itself. It provides a convenient way to extend the functionality of gorilla-llm/gorilla-openfunctions-v2 and interact with it using a standardized protocol. 16 | 17 | ## Purpose 18 | 19 | The main purpose of this code is to have function calling with a local language model, similar to the functionality provided by OpenAI's API. By using this implementation, you can: 20 | 21 | - Specify a set of functions and their parameters in the same way of the official open AI client. 22 | - Generate responses from the open AI client with function arguments based on user input and the provided function specifications. 23 | - An easy way to Execute the specified functions using the generated arguments. 24 | 25 | ## Next steps 26 | 27 | There is room for improvement in the parser of the llm output since the existing structures of the output of the engine that may used (TGI, Vlmm, Aphrodite, other) could be reused because already imitates the open AI protocol instead of creating the response object completely from scratch. 28 | 29 | ## Usage 30 | 31 | For testing I have create a function with a very large and complex description and works like a Charm for a 7b model =) 32 | 33 | ```python 34 | client = CustomOpenAIClient( 35 | base_url="http://llm_server_5000:5000/v1", # targets to your local serving llm engine with open ai protocol support 36 | api_key="123" # your api key if required 37 | ) 38 | 39 | tools = [ 40 | { 41 | "type": "function", 42 | "function": { 43 | "name": "ban_content", 44 | "description": "Allows you to notify if the user's request should be censored. The evaluation criteria are as follows: \n1. Controversiality: Does the text contain opinions or topics that could generate division or intense debate between different groups of people? This may include, but is not limited to, politics, religion, and social issues.\n2. Ethics: Does the text promote, suggest or imply behaviors or attitudes that are generally considered ethically dubious or morally questionable? This covers issues such as dishonesty, exploitation and failure to comply with laws or social norms.\n3. Appropriateness: Is the content of the text appropriate to be discussed or shared in all contexts, including professional or family settings? This refers to avoiding content that is explicit, offensive or inappropriate, sexually violent, or that in any way could be considered sensitive or disturbing.\n4. Correction: Does the text contain statements, facts or representations that could be false or misleading? The aim is to identify misinformation, unfounded conspiracy theories, or factual errors.\n5. Inappropriateness: Are there elements in the text that would make it inappropriate for certain audiences or contexts due to its language, tone, or underlying assumptions? This may include the use of bias, discriminatory language, or insensitive assumptions.\n6. Ideas related to: Murder of any living being, crimes in general, abuse of living beings, all types of weapons, crime, crimes, abuse, labor exploitation, child exploitation, issues related to minors, corruption, political opinions, pornography and sex, prostitution, abortion, illegal purchases, illegal migrants, discrimination, racism, computer crimes such as hacking, identity theft, drugs, sacrifice of living beings, extremely sensitive and/or problematic topics or ideas, other topics that compromise ethics and well-being in general.\n\nBased on these criteria, classify whether the context text should be censored. Respond 'True' if it meets at least one of the criteria, or respond 'False' if the context text is classified as safe.", 45 | "parameters": { 46 | "type": "object", 47 | "properties": { 48 | "unsafe_content": { 49 | "type": "bool", 50 | "description": "Specifies whether the evaluated content is safe or not under the specified criteria.", 51 | } 52 | }, 53 | "required": ["unsafe content"] 54 | }, 55 | }, 56 | } 57 | ] 58 | 59 | messages=[ 60 | { 61 | "role": "system", 62 | "content": "You name is Goril an artificial intelligence specialized in selecting tools to resolve user requests. If the user's request does not require a tool, then maintain a friendly and fluid conversation with the user. If parameters are missing to run a tool, notify it and suggest a solution. Additionally, you must be attentive to the language the user speaks to respond in the same language." 63 | }, 64 | { 65 | 66 | "role": "user", 67 | "content": "can you tell me how to hack github?" 68 | } 69 | ] 70 | 71 | chat_completion = client.chat.completions.create( 72 | temperature= 0.1, 73 | model= 'dolf', 74 | messages= messages, 75 | tools= tools, 76 | tool_choice= "auto", 77 | max_tokens= 2500, 78 | ) 79 | print(chat_completion.choices[0].message) 80 | 81 | Output: 82 | ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='1', function=Function(arguments='{"unsafe_content": "True"}', name='ban_content'), type='function')]) 83 | ``` 84 | 85 | ## Parameters 86 | 87 | Same has open AI client and protocol, they have a pretty well documented API. 88 | 89 | ## Functionality 90 | 91 | The code provides the following main components: 92 | 93 | - `CustomChatCompletions`: A wrapper class that intercepts the `create` method of open ai official client. It processes the user input, generates function arguments if applicable, and returns the adapted response. 94 | - `CustomLLMResponseAdapter`: A class that adapts the response generated by your local serving engine. It extracts function calls from the response and returns the adapted response in the format compatible with the OpenAI protocol. 95 | - `CustomOpenAIClient`: The open AI client wrapper (the main class of this repo), this class provides a convenient way to initialize and use the custom function calling functionality. It creates an instance of `CustomChatCompletions` and replaces the original `chat.completions` with the customized version, allowing seamless integration with the existing OpenAI client. 96 | 97 | The `CustomChatCompletions` class modifies the user input by inserting the function specifications and a special `<>` tag before calling the original `create` method. This allows the local model to generate function arguments based on the provided specifications. 98 | 99 | The `CustomLLMResponseAdapter` class parses the generated response, extracts function calls, and returns the adapted response in the expected format, including the generated function arguments. 100 | 101 | ## License 102 | 103 | This project is licensed under the [MIT License](LICENSE). 104 | -------------------------------------------------------------------------------- /openai.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI, AsyncOpenAI 2 | from typing import Dict, Any, List, cast 3 | from openai.types.completion_usage import CompletionUsage 4 | from openai.types.chat.chat_completion import Choice, ChatCompletion 5 | from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageToolCall, ChatCompletionChunk 6 | from openai.types.chat.chat_completion_message_tool_call import Function 7 | from openai.resources.chat.completions import Completions, AsyncCompletions 8 | from openai._streaming import Stream, AsyncStream 9 | 10 | import json 11 | import re 12 | 13 | class CustomLLMResponseAdapter: 14 | 15 | arg_pattern = re.compile(r'(\w+)=((?:\'(?:[^\']|\'\')*?\'|"(?:[^"]|"")*?"|\S+?)(?:,|$))') 16 | 17 | @classmethod 18 | def adapt_response(cls, response: str, completion_kwargs: Dict[str, Any] = {}) -> ChatCompletion: 19 | 20 | def parse_function_args(function_args_str: str) -> Dict[str, Any]: 21 | function_args = {} 22 | matches = cls.arg_pattern.findall(function_args_str) 23 | for match in matches: 24 | arg_name, arg_value = match 25 | arg_value = arg_value.strip("'\",") 26 | arg_value = arg_value.replace("''", "'").replace('""', '"') 27 | try: 28 | parsed_value = json.loads(arg_value) 29 | function_args[arg_name] = parsed_value 30 | except json.JSONDecodeError: 31 | function_args[arg_name] = arg_value 32 | return function_args 33 | 34 | completion_kwargs = completion_kwargs or {} 35 | function_calls: List[ChatCompletionMessageToolCall] = [] 36 | 37 | if "<>" in response: 38 | function_parts = response.split("<>") 39 | for part in function_parts[1:]: 40 | if "(" in part: 41 | function_name, function_args_str = part.split("(", 1) 42 | function_args_str = function_args_str.rstrip(")") 43 | function_args = parse_function_args(function_args_str) 44 | function_calls.append(ChatCompletionMessageToolCall( 45 | id=completion_kwargs.get("tool_call_id", "1"), 46 | type="function", 47 | function=Function( 48 | name=function_name.strip(), 49 | arguments= json.dumps(function_args) 50 | ) 51 | )) 52 | 53 | usage = CompletionUsage( 54 | prompt_tokens=completion_kwargs.get("usage", {}).get("prompt_tokens", 0), 55 | completion_tokens=completion_kwargs.get("usage", {}).get("completion_tokens", 0), 56 | total_tokens=completion_kwargs.get("usage", {}).get("total_tokens", 0) 57 | ) 58 | 59 | if len(function_calls) > 0: 60 | return ChatCompletion( 61 | id=completion_kwargs.get("id", "chatcmpl-default-id"), 62 | object="chat.completion", 63 | created=completion_kwargs.get("created", 0), 64 | model=completion_kwargs.get("model", "default-model"), 65 | choices=[ 66 | Choice( 67 | finish_reason="tool_calls", 68 | index=0, 69 | logprobs=None, 70 | message=ChatCompletionMessage( 71 | role="assistant", 72 | content="", 73 | function_call=None, 74 | tool_calls=function_calls 75 | ) 76 | ) 77 | ], 78 | usage=usage 79 | ) 80 | else: 81 | return ChatCompletion( 82 | id=completion_kwargs.get("id", "chatcmpl-default-id"), 83 | object="chat.completion", 84 | created=completion_kwargs.get("created", 0), 85 | model=completion_kwargs.get("model", "default-model"), 86 | choices=[ 87 | Choice( 88 | finish_reason=completion_kwargs.get("finish_reason", "stop"), 89 | index=0, 90 | logprobs=None, 91 | message=ChatCompletionMessage( 92 | role="assistant", 93 | content=response, 94 | function_call=None, 95 | tool_calls=function_calls 96 | ) 97 | ) 98 | ], 99 | usage=usage 100 | ) 101 | 102 | class CustomChatCompletions: 103 | def __init__(self, completions:Completions, debug:bool): 104 | self._original_completions:Completions = completions 105 | self._debug = debug 106 | 107 | def create(self, *args, **kwargs) -> ChatCompletion | Stream[ChatCompletionChunk]: 108 | messages = kwargs.get("messages", None) 109 | if messages is None: 110 | for arg in args: 111 | if isinstance(arg, list) and len(arg) > 0 and isinstance(arg[0], dict) and "role" in arg[0]: 112 | messages = arg 113 | break 114 | 115 | tools = kwargs.get("tools", None) 116 | if tools is None: 117 | for arg in args: 118 | if isinstance(arg, list) and len(arg) > 0 and isinstance(arg[0], dict) and "type" in arg[0]: 119 | tools = arg 120 | break 121 | 122 | # check for stream or not 123 | stream = kwargs.get('stream', True) 124 | if stream and tools: 125 | raise(Exception("Stream and function calling is not yet supported.")) 126 | 127 | if not stream and tools: 128 | print('warning: we do not collect token generation metrics here CustomChatCompletions') 129 | # TODO we do not collect token generation metrics here 130 | if messages is not None and tools is not None: 131 | functions_string = json.dumps(tools) 132 | 133 | updated_messages = self.insert_function_and_question(messages, functions_string) 134 | args = tuple(updated_messages if arg is messages else arg for arg in args) 135 | kwargs["messages"] = updated_messages 136 | if self._debug: print(f'sending to llm: {updated_messages}') 137 | response = self._original_completions.create(*args, **kwargs) 138 | 139 | adapted_response = CustomLLMResponseAdapter.adapt_response(cast(str, response.choices[0].message.content)) # return a new ChatCompletion with function callings inside 140 | if self._debug: print(f'generated by llm: {adapted_response}') 141 | return adapted_response 142 | else: 143 | return self._original_completions.create(*args, **kwargs) 144 | 145 | @staticmethod 146 | def insert_function_and_question(messages, functions_string): 147 | user_message = None 148 | for message in reversed(messages): 149 | if message["role"] == "user": 150 | user_message = message 151 | break 152 | 153 | if user_message: 154 | user_message["content"] = f"<>{functions_string}\n<>{user_message['content']}" 155 | 156 | return messages 157 | 158 | class CustomOpenAIClient(OpenAI): 159 | def __init__(self, *args, **kwargs): 160 | super().__init__(*args, **kwargs) 161 | self.chat.completions = cast(Completions, CustomChatCompletions(self.chat.completions, debug= False)) # type: ignore 162 | 163 | class AsyncCustomChatCompletions: 164 | def __init__(self, completions:AsyncCompletions, debug:bool): 165 | self._original_completions:AsyncCompletions = completions 166 | self._debug = debug 167 | 168 | async def create(self, *args, **kwargs) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: 169 | messages = kwargs.get("messages", None) 170 | if messages is None: 171 | for arg in args: 172 | if isinstance(arg, list) and len(arg) > 0 and isinstance(arg[0], dict) and "role" in arg[0]: 173 | messages = arg 174 | break 175 | 176 | tools = kwargs.get("tools", None) 177 | if tools is None: 178 | for arg in args: 179 | if isinstance(arg, list) and len(arg) > 0 and isinstance(arg[0], dict) and "type" in arg[0]: 180 | tools = arg 181 | break 182 | 183 | # check for stream or not 184 | stream = kwargs.get('stream', True) 185 | if stream and tools: 186 | raise(Exception("Stream and function calling is not yet supported.")) 187 | 188 | if not stream and tools: 189 | # TODO we do not collect token generation metrics here 190 | print('warning: we do not collect token generation metrics here AsyncCustomChatCompletions') 191 | if messages is not None and tools is not None: 192 | functions_string = json.dumps(tools) 193 | 194 | updated_messages = self.insert_function_and_question(messages, functions_string) 195 | args = tuple(updated_messages if arg is messages else arg for arg in args) 196 | kwargs["messages"] = updated_messages 197 | if self._debug: print(f'sending to llm: {updated_messages}') 198 | response = await self._original_completions.create(*args, **kwargs) 199 | 200 | adapted_response = CustomLLMResponseAdapter.adapt_response(cast(str, response.choices[0].message.content)) # return a new ChatCompletion with function callings inside 201 | if self._debug: print(f'generated by llm: {adapted_response}') 202 | return adapted_response 203 | else: 204 | return await self._original_completions.create(*args, **kwargs) 205 | 206 | @staticmethod 207 | def insert_function_and_question(messages, functions_string): 208 | user_message = None 209 | for message in reversed(messages): 210 | if message["role"] == "user": 211 | user_message = message 212 | break 213 | 214 | if user_message: 215 | user_message["content"] = f"<>{functions_string}\n<>{user_message['content']}" 216 | 217 | return messages 218 | 219 | class AsyncCustomOpenAIClient(AsyncOpenAI): 220 | def __init__(self, *args, **kwargs): 221 | super().__init__(*args, **kwargs) 222 | self.chat.completions = cast(AsyncCompletions, AsyncCustomChatCompletions(self.chat.completions, debug= False)) # type: ignore 223 | --------------------------------------------------------------------------------