├── tools ├── kill-kit.bat ├── kit-log.bat ├── code-log.bat └── create-log.bat ├── audio-client ├── requirements.txt ├── run.bat ├── gen_protoc.py ├── avatar.bat ├── prompt │ └── jira-vs-slack.json ├── ref │ ├── minimal-chatbot.py │ ├── sine-curve.py │ ├── pytts-demo.py │ └── portal.py ├── proto │ └── audio2face.proto ├── .vscode │ └── launch.json ├── audio2face_pb2.py ├── audio2face_pb2_grpc.py ├── test_client.py └── llm.py ├── README.md ├── LICENSE └── .gitignore /tools/kill-kit.bat: -------------------------------------------------------------------------------- 1 | taskkill /IM kit.exe /F -------------------------------------------------------------------------------- /tools/kit-log.bat: -------------------------------------------------------------------------------- 1 | start "" "%userprofile%\.nvidia-omniverse\logs\Kit\kit" -------------------------------------------------------------------------------- /tools/code-log.bat: -------------------------------------------------------------------------------- 1 | start "" "%userprofile%\.nvidia-omniverse\logs\Kit\Code" -------------------------------------------------------------------------------- /tools/create-log.bat: -------------------------------------------------------------------------------- 1 | start "" "%userprofile%\.nvidia-omniverse\logs\Kit\Create.Next" -------------------------------------------------------------------------------- /audio-client/requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | pydub 3 | gradio 4 | gradio_client 5 | requests 6 | litellm -------------------------------------------------------------------------------- /audio-client/run.bat: -------------------------------------------------------------------------------- 1 | @REM pip install protobuf==3.17.3 grpcio soundfile 2 | python test_client.py %1 /World/audio2face/PlayerStreaming /World/audio2gesture/PlayerStreaming -------------------------------------------------------------------------------- /audio-client/gen_protoc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | 5 | ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) 6 | proto_src_root = os.path.normpath(os.path.join(ROOT_DIR, "proto/")) 7 | proto_dst_root = os.path.normpath(os.path.join(ROOT_DIR, ".")) 8 | proto_fpath = os.path.normpath(os.path.join(ROOT_DIR, "proto", "audio2face.proto")) 9 | 10 | cmd = [ 11 | "python", 12 | "-m", 13 | "grpc_tools.protoc", 14 | "-I", 15 | f"{proto_src_root}", 16 | f"--python_out={proto_dst_root}", 17 | f"--grpc_python_out={proto_dst_root}", 18 | f"{proto_fpath}", 19 | ] 20 | 21 | print(cmd) 22 | subprocess.call(cmd) 23 | -------------------------------------------------------------------------------- /audio-client/avatar.bat: -------------------------------------------------------------------------------- 1 | @REM c:\p4\audio2face\run_avatar.bat ^ 2 | %localappdata%\ov\pkg\audio2face-2023.1.0-beta.4\avatar.kit.bat ^ 3 | --enable omni.services.transport.server.http ^ 4 | --enable omni.kit.tool.asset_exporter ^ 5 | --enable omni.avatar.livelink ^ 6 | --enable omni.avatar.ui.livelink ^ 7 | --/app/renderer/sleepMsOutOfFocus=0 ^ 8 | --/app/renderer/sleepMsOutOfFocus=0 ^ 9 | --/app/asyncRendering=false ^ 10 | --/rtx/reflections/enabled=false ^ 11 | --/rtx/translucency/enabled=false ^ 12 | --/rtx/post/lensFlares/enabled=false ^ 13 | --/rtx/post/dof/enabled=false ^ 14 | --/rtx/indirectDiffuse/enabled=false ^ 15 | %* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LLM MetaHuman 2 | 3 | LLM MetaHuman is an open solution for AI-powered photorealistic digital humans. 4 | 5 | ## Preparation steps 6 | 7 | - Install [Omniverse Launcher](https://www.nvidia.com/en-us/omniverse/download/) 8 | - Inside Omniverse Launcher, Install `Audio2Face`. 9 | - Install [Epic Games Store](https://store.epicgames.com/en-US/) 10 | - Inside Epic Games Store, Install Unreal Engine 5.x. 11 | - Follow [Audio2Face to UE Live Link Plugin](https://docs.omniverse.nvidia.com/audio2face/latest/user-manual/livelink-ue-plugin.html) to connect Audi2Face to Unreal Engine. 12 | 13 | ## Launch Audio2Face headless 14 | 15 | ## Launch llm.py 16 | 17 | ## Launch Unreal Engine Metahuman 18 | 19 | -------------------------------------------------------------------------------- /audio-client/prompt/jira-vs-slack.json: -------------------------------------------------------------------------------- 1 | { 2 | "task": "Write a stand-up comedy script with 10 dialogs", 3 | "characters": [ 4 | { 5 | "title": "Software Engineer", 6 | "name": "Alloy", 7 | "preference": "Email", 8 | "description": "Enthusiastic software engineer" 9 | }, 10 | { 11 | "title": "Program Manager", 12 | "name": "Nova", 13 | "preference": "Slack", 14 | "description": "Organized program manager" 15 | } 16 | ], 17 | "topic": "Argument about email vs Slack", 18 | "format": "A: says something in one line. B: says something in one line. Remove the number and quotes." 19 | } 20 | -------------------------------------------------------------------------------- /audio-client/ref/minimal-chatbot.py: -------------------------------------------------------------------------------- 1 | import random 2 | import gradio as gr 3 | 4 | 5 | def alternatingly_agree(message, history): 6 | if len(history) % 2 == 0: 7 | return f"Yes, I do think that '{message}'" 8 | else: 9 | return "I don't think so" 10 | 11 | 12 | count = 0 13 | 14 | 15 | def textbox_update(chatui_textbox): 16 | global count 17 | count += 1 18 | if count % 10 == 0: 19 | return "z" 20 | else: 21 | return chatui_textbox 22 | 23 | 24 | if __name__ == "__main__": 25 | with gr.ChatInterface(alternatingly_agree) as chat_ui: 26 | chat_ui.textbox.change( 27 | textbox_update, 28 | chat_ui.textbox, 29 | chat_ui.textbox, 30 | every=1, 31 | trigger_mode="once", 32 | ) 33 | chat_ui.launch() 34 | -------------------------------------------------------------------------------- /audio-client/proto/audio2face.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package nvidia.audio2face; 4 | 5 | service Audio2Face { 6 | rpc PushAudio(PushAudioRequest) returns (PushAudioResponse) {} 7 | rpc PushAudioStream(stream PushAudioStreamRequest) returns (PushAudioStreamResponse) {} 8 | } 9 | 10 | message PushAudioRequest { 11 | string instance_name = 1; 12 | int32 samplerate = 2; 13 | bytes audio_data = 3; 14 | bool block_until_playback_is_finished = 4; 15 | } 16 | 17 | message PushAudioResponse { 18 | bool success = 1; 19 | string message = 2; 20 | } 21 | 22 | message PushAudioStreamRequest { 23 | oneof streaming_request { 24 | PushAudioRequestStart start_marker = 1; 25 | bytes audio_data = 2; 26 | } 27 | } 28 | 29 | message PushAudioRequestStart { 30 | string instance_name = 1; 31 | int32 samplerate = 2; 32 | bool block_until_playback_is_finished = 3; 33 | } 34 | 35 | message PushAudioStreamResponse { 36 | bool success = 1; 37 | string message = 2; 38 | } 39 | -------------------------------------------------------------------------------- /audio-client/ref/sine-curve.py: -------------------------------------------------------------------------------- 1 | import math 2 | import gradio as gr 3 | import plotly.express as px 4 | import numpy as np 5 | 6 | 7 | plot_end = 2 * math.pi 8 | 9 | 10 | def get_plot(period=1): 11 | global plot_end 12 | x = np.arange(plot_end - 2 * math.pi, plot_end, 0.02) 13 | y = np.sin(2*math.pi*period * x) 14 | fig = px.line(x=x, y=y) 15 | plot_end += 2 * math.pi 16 | if plot_end > 1000: 17 | plot_end = 2 * math.pi 18 | return fig 19 | 20 | 21 | with gr.Blocks() as demo: 22 | with gr.Row(): 23 | with gr.Column(): 24 | gr.Markdown("Change the value of the slider to automatically update the plot") 25 | period = gr.Slider(label="Period of plot", value=1, minimum=0, maximum=10, step=1) 26 | plot = gr.Plot(label="Plot (updates every half second)") 27 | 28 | dep = demo.load(get_plot, None, plot, every=1) 29 | period.change(get_plot, period, plot, every=1, cancels=[dep]) 30 | 31 | 32 | if __name__ == "__main__": 33 | demo.queue().launch() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Vinjn Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /audio-client/ref/pytts-demo.py: -------------------------------------------------------------------------------- 1 | import pyttsx3 2 | 3 | engine = pyttsx3.init() # object creation 4 | 5 | """ RATE""" 6 | rate = engine.getProperty("rate") # getting details of current speaking rate 7 | print(rate) # printing current voice rate 8 | engine.setProperty("rate", 125) # setting up new voice rate 9 | 10 | 11 | """VOLUME""" 12 | volume = engine.getProperty( 13 | "volume" 14 | ) # getting to know current volume level (min=0 and max=1) 15 | print(volume) # printing current volume level 16 | engine.setProperty("volume", 1.0) # setting up volume level between 0 and 1 17 | 18 | """VOICE""" 19 | voices = engine.getProperty("voices") # getting details of current voice 20 | print(voices) 21 | engine.setProperty("voice", voices[0].id) # changing index, changes voices. o for male 22 | # engine.setProperty('voice', voices[1].id) #changing index, changes voices. 1 for female 23 | 24 | engine.say("Hello World!") 25 | engine.say("说什么 current speaking rate is " + str(rate)) 26 | engine.runAndWait() 27 | engine.stop() 28 | 29 | """Saving Voice to a file""" 30 | # On linux make sure that 'espeak' and 'ffmpeg' are installed 31 | engine.save_to_file("Hello World", "test.mp3") 32 | engine.runAndWait() 33 | -------------------------------------------------------------------------------- /audio-client/ref/portal.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | 3 | 4 | def task1(input_text): 5 | return "Task 1 Result: " + input_text 6 | 7 | 8 | def task2(input_image): 9 | return "Task 2 Result" 10 | 11 | 12 | def task3(input_image): 13 | return "Task 2 Result" 14 | 15 | 16 | # interface one 17 | iface1 = gr.Interface( 18 | fn=task1, inputs="text", outputs="text", title="Multi-Page Interface" 19 | ) 20 | # interface two 21 | iface2 = gr.Interface( 22 | fn=task2, inputs="image", outputs="text", title="Multi-Page Interface" 23 | ) 24 | 25 | tts_examples = [ 26 | "I love learning machine learning", 27 | "How do you do?", 28 | ] 29 | 30 | 31 | tts_demo = gr.load( 32 | "huggingface/facebook/fastspeech2-en-ljspeech", 33 | title=None, 34 | examples=tts_examples, 35 | description="Give me something to say!", 36 | cache_examples=False, 37 | ) 38 | 39 | stt_demo = gr.load( 40 | "huggingface/facebook/wav2vec2-base-960h", 41 | title=None, 42 | inputs="mic", 43 | description="Let me try to guess what you're saying!", 44 | ) 45 | 46 | 47 | demo = gr.TabbedInterface( 48 | [iface1, iface2, tts_demo, stt_demo], 49 | ["Text-to-text", "image-to-text", "Text-to-speech", "Speech-to-text"], 50 | ) 51 | 52 | # Run the interface 53 | demo.launch(share=True) 54 | -------------------------------------------------------------------------------- /audio-client/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "default", 9 | "type": "python", 10 | "request": "launch", 11 | "program": "${workspaceFolder}/llm.py", 12 | "console": "integratedTerminal", 13 | "args": [], 14 | "justMyCode": false 15 | }, 16 | { 17 | "name": "two metahumans", 18 | "type": "python", 19 | "request": "launch", 20 | "program": "${workspaceFolder}/llm.py", 21 | "console": "integratedTerminal", 22 | "args": [ 23 | "--a2f_instance_count=2", 24 | "--llm_streaming" 25 | ], 26 | "justMyCode": true 27 | }, 28 | { 29 | "name": "Python: Current File", 30 | "type": "python", 31 | "request": "launch", 32 | "program": "${file}", 33 | "console": "integratedTerminal", 34 | "justMyCode": true 35 | }, 36 | { 37 | "name": "gradio_7861 a2f_8012 livelink_12040", 38 | "type": "python", 39 | "request": "launch", 40 | "program": "${workspaceFolder}/llm.py", 41 | "console": "integratedTerminal", 42 | "args": [ 43 | "--gradio_port=7861", 44 | "--a2f_url=http://localhost:8012", 45 | "--tts_voice=alloy", 46 | "--livelink_host=localhost", 47 | "--livelink_subject=Audio2Face-1", 48 | "--livelink_port=12040", 49 | "--livelink_audio_port=12041" 50 | ], 51 | "justMyCode": false 52 | } 53 | ] 54 | } -------------------------------------------------------------------------------- /audio-client/audio2face_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # source: audio2face.proto 4 | """Generated protocol buffer code.""" 5 | from google.protobuf.internal import builder as _builder 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import descriptor_pool as _descriptor_pool 8 | from google.protobuf import symbol_database as _symbol_database 9 | # @@protoc_insertion_point(imports) 10 | 11 | _sym_db = _symbol_database.Default() 12 | 13 | 14 | 15 | 16 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10\x61udio2face.proto\x12\x11nvidia.audio2face\"{\n\x10PushAudioRequest\x12\x15\n\rinstance_name\x18\x01 \x01(\t\x12\x12\n\nsamplerate\x18\x02 \x01(\x05\x12\x12\n\naudio_data\x18\x03 \x01(\x0c\x12(\n block_until_playback_is_finished\x18\x04 \x01(\x08\"5\n\x11PushAudioResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"\x85\x01\n\x16PushAudioStreamRequest\x12@\n\x0cstart_marker\x18\x01 \x01(\x0b\x32(.nvidia.audio2face.PushAudioRequestStartH\x00\x12\x14\n\naudio_data\x18\x02 \x01(\x0cH\x00\x42\x13\n\x11streaming_request\"l\n\x15PushAudioRequestStart\x12\x15\n\rinstance_name\x18\x01 \x01(\t\x12\x12\n\nsamplerate\x18\x02 \x01(\x05\x12(\n block_until_playback_is_finished\x18\x03 \x01(\x08\";\n\x17PushAudioStreamResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t2\xd4\x01\n\nAudio2Face\x12X\n\tPushAudio\x12#.nvidia.audio2face.PushAudioRequest\x1a$.nvidia.audio2face.PushAudioResponse\"\x00\x12l\n\x0fPushAudioStream\x12).nvidia.audio2face.PushAudioStreamRequest\x1a*.nvidia.audio2face.PushAudioStreamResponse\"\x00(\x01\x62\x06proto3') 17 | 18 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) 19 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'audio2face_pb2', globals()) 20 | if _descriptor._USE_C_DESCRIPTORS == False: 21 | 22 | DESCRIPTOR._options = None 23 | _PUSHAUDIOREQUEST._serialized_start=39 24 | _PUSHAUDIOREQUEST._serialized_end=162 25 | _PUSHAUDIORESPONSE._serialized_start=164 26 | _PUSHAUDIORESPONSE._serialized_end=217 27 | _PUSHAUDIOSTREAMREQUEST._serialized_start=220 28 | _PUSHAUDIOSTREAMREQUEST._serialized_end=353 29 | _PUSHAUDIOREQUESTSTART._serialized_start=355 30 | _PUSHAUDIOREQUESTSTART._serialized_end=463 31 | _PUSHAUDIOSTREAMRESPONSE._serialized_start=465 32 | _PUSHAUDIOSTREAMRESPONSE._serialized_end=524 33 | _AUDIO2FACE._serialized_start=527 34 | _AUDIO2FACE._serialized_end=739 35 | # @@protoc_insertion_point(module_scope) 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # intermedaite media folders 132 | _*/ 133 | *.mp3 134 | *.wav 135 | *.usd 136 | *.mp4 137 | *.download 138 | *_uuid.txt 139 | -------------------------------------------------------------------------------- /audio-client/audio2face_pb2_grpc.py: -------------------------------------------------------------------------------- 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! 2 | """Client and server classes corresponding to protobuf-defined services.""" 3 | import grpc 4 | 5 | import audio2face_pb2 as audio2face__pb2 6 | 7 | 8 | class Audio2FaceStub(object): 9 | """Missing associated documentation comment in .proto file.""" 10 | 11 | def __init__(self, channel): 12 | """Constructor. 13 | 14 | Args: 15 | channel: A grpc.Channel. 16 | """ 17 | self.PushAudio = channel.unary_unary( 18 | '/nvidia.audio2face.Audio2Face/PushAudio', 19 | request_serializer=audio2face__pb2.PushAudioRequest.SerializeToString, 20 | response_deserializer=audio2face__pb2.PushAudioResponse.FromString, 21 | ) 22 | self.PushAudioStream = channel.stream_unary( 23 | '/nvidia.audio2face.Audio2Face/PushAudioStream', 24 | request_serializer=audio2face__pb2.PushAudioStreamRequest.SerializeToString, 25 | response_deserializer=audio2face__pb2.PushAudioStreamResponse.FromString, 26 | ) 27 | 28 | 29 | class Audio2FaceServicer(object): 30 | """Missing associated documentation comment in .proto file.""" 31 | 32 | def PushAudio(self, request, context): 33 | """Missing associated documentation comment in .proto file.""" 34 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 35 | context.set_details('Method not implemented!') 36 | raise NotImplementedError('Method not implemented!') 37 | 38 | def PushAudioStream(self, request_iterator, context): 39 | """Missing associated documentation comment in .proto file.""" 40 | context.set_code(grpc.StatusCode.UNIMPLEMENTED) 41 | context.set_details('Method not implemented!') 42 | raise NotImplementedError('Method not implemented!') 43 | 44 | 45 | def add_Audio2FaceServicer_to_server(servicer, server): 46 | rpc_method_handlers = { 47 | 'PushAudio': grpc.unary_unary_rpc_method_handler( 48 | servicer.PushAudio, 49 | request_deserializer=audio2face__pb2.PushAudioRequest.FromString, 50 | response_serializer=audio2face__pb2.PushAudioResponse.SerializeToString, 51 | ), 52 | 'PushAudioStream': grpc.stream_unary_rpc_method_handler( 53 | servicer.PushAudioStream, 54 | request_deserializer=audio2face__pb2.PushAudioStreamRequest.FromString, 55 | response_serializer=audio2face__pb2.PushAudioStreamResponse.SerializeToString, 56 | ), 57 | } 58 | generic_handler = grpc.method_handlers_generic_handler( 59 | 'nvidia.audio2face.Audio2Face', rpc_method_handlers) 60 | server.add_generic_rpc_handlers((generic_handler,)) 61 | 62 | 63 | # This class is part of an EXPERIMENTAL API. 64 | class Audio2Face(object): 65 | """Missing associated documentation comment in .proto file.""" 66 | 67 | @staticmethod 68 | def PushAudio(request, 69 | target, 70 | options=(), 71 | channel_credentials=None, 72 | call_credentials=None, 73 | insecure=False, 74 | compression=None, 75 | wait_for_ready=None, 76 | timeout=None, 77 | metadata=None): 78 | return grpc.experimental.unary_unary(request, target, '/nvidia.audio2face.Audio2Face/PushAudio', 79 | audio2face__pb2.PushAudioRequest.SerializeToString, 80 | audio2face__pb2.PushAudioResponse.FromString, 81 | options, channel_credentials, 82 | insecure, call_credentials, compression, wait_for_ready, timeout, metadata) 83 | 84 | @staticmethod 85 | def PushAudioStream(request_iterator, 86 | target, 87 | options=(), 88 | channel_credentials=None, 89 | call_credentials=None, 90 | insecure=False, 91 | compression=None, 92 | wait_for_ready=None, 93 | timeout=None, 94 | metadata=None): 95 | return grpc.experimental.stream_unary(request_iterator, target, '/nvidia.audio2face.Audio2Face/PushAudioStream', 96 | audio2face__pb2.PushAudioStreamRequest.SerializeToString, 97 | audio2face__pb2.PushAudioStreamResponse.FromString, 98 | options, channel_credentials, 99 | insecure, call_credentials, compression, wait_for_ready, timeout, metadata) 100 | -------------------------------------------------------------------------------- /audio-client/test_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | This demo script shows how to send audio data to Audio2Face Streaming Audio Player via gRPC requests. 3 | There are two options: 4 | * Send the whole track at once using PushAudioRequest() 5 | * Send the audio chunks seuqntially in a stream using PushAudioStreamRequest() 6 | For the second option this script emulates the stream of chunks, generated by splitting an input WAV audio file. 7 | But in a real application such stream of chunks may be aquired from some other streaming source: 8 | * streaming audio via internet, streaming Text-To-Speech, etc 9 | gRPC protocol details could be find in audio2face.proto 10 | """ 11 | 12 | import sys 13 | import time 14 | 15 | import audio2face_pb2 16 | import audio2face_pb2_grpc 17 | import grpc 18 | import numpy as np 19 | import soundfile 20 | 21 | 22 | def push_audio_track(url, audio_data, samplerate, instance_names): 23 | """ 24 | This function pushes the whole audio track at once via PushAudioRequest() 25 | PushAudioRequest parameters: 26 | * audio_data: bytes, containing audio data for the whole track, where each sample is encoded as 4 bytes (float32) 27 | * samplerate: sampling rate for the audio data 28 | * instance_names: prim path of the Audio2Face Streaming Audio Player on the stage, were to push the audio data 29 | * block_until_playback_is_finished: if True, the gRPC request will be blocked until the playback of the pushed track is finished 30 | The request is passed to PushAudio() 31 | """ 32 | 33 | block_until_playback_is_finished = True # ADJUST 34 | for instance_name in instance_names: 35 | with grpc.insecure_channel(url) as channel: 36 | stub = audio2face_pb2_grpc.Audio2FaceStub(channel) 37 | request = audio2face_pb2.PushAudioRequest() 38 | request.audio_data = audio_data.astype(np.float32).tobytes() 39 | request.samplerate = samplerate 40 | request.instance_name = instance_name 41 | request.block_until_playback_is_finished = block_until_playback_is_finished 42 | print("Sending audio data...") 43 | response = stub.PushAudio(request) 44 | if response.success: 45 | print("SUCCESS") 46 | else: 47 | print(f"ERROR: {response.message}") 48 | print("Closed channel") 49 | 50 | 51 | def push_audio_track_stream(url, audio_data, samplerate, instance_names): 52 | """ 53 | This function pushes audio chunks sequentially via PushAudioStreamRequest() 54 | The function emulates the stream of chunks, generated by splitting input audio track. 55 | But in a real application such stream of chunks may be aquired from some other streaming source. 56 | The first message must contain start_marker field, containing only meta information (without audio data): 57 | * samplerate: sampling rate for the audio data 58 | * instance_names: prim path of the Audio2Face Streaming Audio Player on the stage, were to push the audio data 59 | * block_until_playback_is_finished: if True, the gRPC request will be blocked until the playback of the pushed track is finished (after the last message) 60 | Second and other messages must contain audio_data field: 61 | * audio_data: bytes, containing audio data for an audio chunk, where each sample is encoded as 4 bytes (float32) 62 | All messages are packed into a Python generator and passed to PushAudioStream() 63 | """ 64 | 65 | chunk_size = samplerate // 10 # ADJUST 66 | sleep_between_chunks = 0.04 # ADJUST 67 | block_until_playback_is_finished = True # ADJUST 68 | 69 | with grpc.insecure_channel(url) as channel: 70 | print("Channel creadted") 71 | stub = audio2face_pb2_grpc.Audio2FaceStub(channel) 72 | 73 | for instance_name in instance_names: 74 | def make_generator(): 75 | start_marker = audio2face_pb2.PushAudioRequestStart( 76 | samplerate=samplerate, 77 | instance_name=instance_name, 78 | block_until_playback_is_finished=block_until_playback_is_finished, 79 | ) 80 | # At first, we send a message with start_marker 81 | yield audio2face_pb2.PushAudioStreamRequest(start_marker=start_marker) 82 | # Then we send messages with audio_data 83 | for i in range(len(audio_data) // chunk_size + 1): 84 | time.sleep(sleep_between_chunks) 85 | chunk = audio_data[i * chunk_size : i * chunk_size + chunk_size] 86 | yield audio2face_pb2.PushAudioStreamRequest(audio_data=chunk.astype(np.float32).tobytes()) 87 | 88 | request_generator = make_generator() 89 | print("Sending audio data...") 90 | response = stub.PushAudioStream(request_generator) 91 | if response.success: 92 | print("SUCCESS") 93 | else: 94 | print(f"ERROR: {response.message}") 95 | print("Channel closed") 96 | 97 | 98 | def main(): 99 | """ 100 | This demo script shows how to send audio data to Audio2Face Streaming Audio Player via gRPC requests. 101 | There two options: 102 | * Send the whole track at once using PushAudioRequest() 103 | * Send the audio chunks seuqntially in a stream using PushAudioStreamRequest() 104 | For the second option this script emulates the stream of chunks, generated by splitting an input WAV audio file. 105 | But in a real application such stream of chunks may be aquired from some other streaming source: 106 | * streaming audio via internet, streaming Text-To-Speech, etc 107 | gRPC protocol details could be find in audio2face.proto 108 | """ 109 | 110 | if len(sys.argv) < 3: 111 | print("Format: python test_client.py PATH_TO_WAV INSTANCE_NAME") 112 | return 113 | 114 | # Sleep time emulates long latency of the request 115 | sleep_time = 0.0 # ADJUST 116 | 117 | # URL of the Audio2Face Streaming Audio Player server (where A2F App is running) 118 | url = "localhost:50051" # ADJUST 119 | 120 | # Local input WAV file path 121 | audio_fpath = sys.argv[1] 122 | 123 | # Prim path of the Audio2Face Streaming Audio Player on the stage (were to push the audio data) 124 | instance_names = sys.argv[2:] 125 | 126 | data, samplerate = soundfile.read(audio_fpath, dtype="float32") 127 | 128 | # Only Mono audio is supported 129 | if len(data.shape) > 1: 130 | data = np.average(data, axis=1) 131 | 132 | print(f"Sleeping for {sleep_time} seconds") 133 | time.sleep(sleep_time) 134 | 135 | if 0: # ADJUST 136 | # Push the whole audio track at once 137 | push_audio_track(url, data, samplerate, instance_names) 138 | else: 139 | # Emulate audio stream and push audio chunks sequentially 140 | push_audio_track_stream(url, data, samplerate, instance_names) 141 | 142 | 143 | if __name__ == "__main__": 144 | main() 145 | -------------------------------------------------------------------------------- /audio-client/llm.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | from pydub import AudioSegment 3 | import gradio as gr 4 | import requests 5 | import os 6 | from litellm import completion 7 | import time 8 | import threading 9 | import queue 10 | import gradio_client as gc 11 | 12 | 13 | # XXX: increase requests speed 14 | # https://stackoverflow.com/a/72440253 15 | requests.packages.urllib3.util.connection.HAS_IPV6 = False 16 | 17 | args = None 18 | 19 | CWD = os.getcwd() 20 | print("CWD:", CWD) 21 | 22 | VOICE_ACTORS = ["nova", "alloy", "echo", "fable", "onyx", "shimmer"] 23 | 24 | 25 | def timing_decorator(func): 26 | def wrapper(*args, **kwargs): 27 | start_time = time.time() 28 | result = func(*args, **kwargs) 29 | end_time = time.time() 30 | elapsed_time = end_time - start_time 31 | print(f"{func.__name__} cost: {elapsed_time:.2f} seconds.") 32 | return result 33 | 34 | return wrapper 35 | 36 | 37 | class A2fInstance: 38 | files_to_delete = [] 39 | instaces = [] 40 | 41 | def __init__(self, index) -> None: 42 | self.SERVICE_HEALTHY = False 43 | self.LIVELINK_SERVICE_HEALTHY = False 44 | self.index = index 45 | 46 | @timing_decorator 47 | def post(self, end_point, data=None, verbose=True): 48 | if not self.SERVICE_HEALTHY: 49 | return None 50 | 51 | if verbose: 52 | print(f"++ {end_point}") 53 | api_url = f"{self.base_url}/{end_point}" 54 | try: 55 | response = requests.post(api_url, json=data) 56 | 57 | if response and response.status_code == 200: 58 | if verbose: 59 | print(response.json()) 60 | return response.json() 61 | else: 62 | if verbose: 63 | print(f"Error: {response.status_code} - {response.text}") 64 | return {"Error": response.status_code, "Reason": response.text} 65 | except Exception as e: 66 | print(e) 67 | self.SERVICE_HEALTHY = False 68 | return None 69 | 70 | @timing_decorator 71 | def get(self, end_point, data=None, verbose=True): 72 | if not self.SERVICE_HEALTHY: 73 | return None 74 | 75 | if verbose: 76 | print(f"++ {end_point}") 77 | api_url = f"{self.base_url}/{end_point}" 78 | 79 | try: 80 | response = requests.get(api_url, json=data) 81 | if response.status_code == 200: 82 | if verbose: 83 | print(response.json()) 84 | return response.json() 85 | else: 86 | if verbose: 87 | print(f"Error: {response.status_code} - {response.text}") 88 | return {"Error": response.status_code, "Reason": response.text} 89 | except Exception as e: 90 | print(e) 91 | self.SERVICE_HEALTHY = False 92 | return None 93 | 94 | def player_setlooping(self, flag=True): 95 | self.post( 96 | "A2F/Player/SetLooping", 97 | {"a2f_player": args.a2f_player_id, "loop_audio": flag}, 98 | ) 99 | 100 | def player_play(self): 101 | self.post("A2F/Player/Play", {"a2f_player": args.a2f_player_id}) 102 | 103 | def player_pause(self): 104 | self.post("A2F/Player/Pause", {"a2f_player": args.a2f_player_id}) 105 | 106 | def player_setrootpath(self, dir_path): 107 | self.post( 108 | "A2F/Player/SetRootPath", 109 | {"a2f_player": args.a2f_player_id, "dir_path": dir_path}, 110 | ) 111 | 112 | def player_settrack(self, file_name): 113 | self.post( 114 | "A2F/Player/SetTrack", 115 | {"a2f_player": args.a2f_player_id, "file_name": file_name}, 116 | ) 117 | 118 | def player_gettracks(self): 119 | self.post("A2F/Player/GetTracks", {"a2f_player": args.a2f_player_id}) 120 | 121 | def player_gettime(self): 122 | response = self.post( 123 | "A2F/Player/GetTime", {"a2f_player": args.a2f_player_id}, False 124 | ) 125 | if response and response["status"] == "OK": 126 | return response["result"] 127 | else: 128 | return 0 129 | 130 | def player_getrange(self): 131 | response = self.post( 132 | "A2F/Player/GetRange", {"a2f_player": args.a2f_player_id}, False 133 | ) 134 | if response and response["status"] == "OK": 135 | return response["result"]["work"] 136 | else: 137 | return (0, 0) 138 | 139 | def generatekeys(self): 140 | self.post("A2F/A2E/GenerateKeys", {"a2f_instance": args.a2f_instance_id}) 141 | 142 | def ActivateStreamLivelink(self, flag): 143 | self.post( 144 | "A2F/Exporter/ActivateStreamLivelink", 145 | {"node_path": args.a2f_livelink_id, "value": flag}, 146 | ) 147 | 148 | def IsStreamLivelinkConnected(self): 149 | response = self.post( 150 | "A2F/Exporter/IsStreamLivelinkConnected", 151 | {"node_path": args.a2f_livelink_id}, 152 | ) 153 | if response and response["status"] == "OK": 154 | return response["result"] 155 | else: 156 | return False 157 | 158 | def enable_audio_stream(self, flag): 159 | self.post( 160 | "A2F/Exporter/SetStreamLivelinkSettings", 161 | { 162 | "node_path": args.a2f_livelink_id, 163 | "values": {"enable_audio_stream": flag}, 164 | }, 165 | ) 166 | 167 | def set_livelink_ports( 168 | self, 169 | livelink_host, 170 | livelink_subject, 171 | livelink_port, 172 | livelink_audio_port, 173 | ): 174 | self.post( 175 | "A2F/Exporter/SetStreamLivelinkSettings", 176 | { 177 | "node_path": args.a2f_livelink_id, 178 | "values": { 179 | "livelink_host": livelink_host, 180 | "livelink_subject": livelink_subject, 181 | "livelink_port": livelink_port, 182 | "audio_port": livelink_audio_port, 183 | }, 184 | }, 185 | ) 186 | 187 | def get_preprocessing(self): 188 | response = self.post( 189 | "A2F/PRE/GetSettings", 190 | {"a2f_instance": args.a2f_instance_id}, 191 | ) 192 | if response and response["status"] == "OK": 193 | return response["result"] 194 | else: 195 | return {} 196 | 197 | def set_preprocessing(self, settings): 198 | settings["a2f_instance"] = args.a2f_instance_id 199 | self.post("A2F/PRE/SetSettings", settings) 200 | 201 | def get_postprocessing(self): 202 | response = self.post( 203 | "A2F/POST/GetSettings", 204 | {"a2f_instance": args.a2f_instance_id}, 205 | ) 206 | if response and response["status"] == "OK": 207 | return response["result"] 208 | else: 209 | return {} 210 | 211 | def set_postprocessing(self, settings): 212 | self.post( 213 | "A2F/POST/SetSettings", 214 | {"a2f_instance": args.a2f_instance_id, "settings": settings}, 215 | ) 216 | 217 | def setup(self): 218 | self.base_url = f"http://{args.a2f_host}:{args.a2f_port+self.index}" 219 | self.tts_voice = args.tts_voice 220 | if self.index > 0: 221 | # TODO: make it elegant 222 | self.tts_voice = VOICE_ACTORS[self.index % len(VOICE_ACTORS)] 223 | 224 | # always ping SERVICE_HEALTHY again in setup() 225 | self.SERVICE_HEALTHY = True 226 | 227 | self.ActivateStreamLivelink(True) 228 | if not self.SERVICE_HEALTHY: 229 | return 230 | 231 | self.player_setrootpath(CWD) 232 | self.player_setlooping(False) 233 | 234 | self.LIVELINK_SERVICE_HEALTHY = self.IsStreamLivelinkConnected() 235 | if not self.LIVELINK_SERVICE_HEALTHY: 236 | return 237 | 238 | self.enable_audio_stream(True) 239 | 240 | self.set_livelink_ports( 241 | args.livelink_host, 242 | f"{args.livelink_subject}-{self.index}", 243 | args.livelink_port + 10 * self.index, 244 | args.livelink_audio_port + 10 * self.index, 245 | ) 246 | 247 | pre_settings = self.get_preprocessing() 248 | pre_settings["prediction_delay"] = 0 249 | pre_settings["blink_interval"] = 1.5 250 | self.set_preprocessing(pre_settings) 251 | 252 | post_settings = self.get_postprocessing() 253 | post_settings["skin_strength"] = 1.3 254 | self.set_postprocessing(post_settings) 255 | 256 | 257 | A2fInstance.instaces = [] 258 | openai_client = OpenAI() 259 | gc_client: gc.Client = None 260 | chat_ui: gr.ChatInterface = None 261 | 262 | 263 | def run_single_pipeline(a2f, answer, a2f_peer=None): 264 | global stop_current_a2f_play 265 | 266 | if not a2f_peer: 267 | a2f_peer = a2f 268 | 269 | # print(answer) 270 | mp3_file = text_to_mp3(answer, a2f.tts_voice) 271 | wav_file = mp3_to_wav(mp3_file) 272 | duration = a2f_peer.player_getrange()[1] 273 | position = a2f_peer.player_gettime() 274 | while position > 0 and position < duration: 275 | print(position, duration) 276 | if stop_current_a2f_play: 277 | print("stop_current_a2f_play") 278 | stop_current_a2f_play = False 279 | return 280 | 281 | time.sleep(1) 282 | position = a2f_peer.player_gettime() 283 | print("z") 284 | time.sleep(1) 285 | a2f.player_setrootpath(CWD) 286 | a2f.player_settrack(wav_file) 287 | # a2f_generatekeys() 288 | 289 | a2f.player_play() 290 | 291 | for file in A2fInstance.files_to_delete: 292 | try: 293 | os.remove(file) 294 | except Exception: 295 | pass 296 | A2fInstance.files_to_delete.clear() 297 | 298 | A2fInstance.files_to_delete.append(mp3_file) 299 | A2fInstance.files_to_delete.append(wav_file) 300 | 301 | 302 | current_speaker = -1 303 | 304 | 305 | @timing_decorator 306 | def run_pipeline(answer): 307 | if args.a2f_instance_count == 1: 308 | run_single_pipeline(A2fInstance.instaces[0], answer) 309 | return 310 | 311 | global current_speaker 312 | if answer.startswith("("): 313 | current_speaker = -1 314 | elif answer.startswith("A:"): 315 | current_speaker = 0 316 | answer = answer[2:] 317 | elif answer.startswith("B:"): 318 | current_speaker = 1 319 | answer = answer[2:] 320 | 321 | if current_speaker < 0 or current_speaker >= args.a2f_instance_count: 322 | return 323 | 324 | a2f = A2fInstance.instaces[current_speaker] 325 | if not a2f.SERVICE_HEALTHY: 326 | return 327 | 328 | run_single_pipeline(a2f, answer) 329 | 330 | 331 | @timing_decorator 332 | def text_to_mp3(text, voice): 333 | response = openai_client.audio.speech.create( 334 | model=args.tts_model, 335 | voice=voice, 336 | speed=args.tts_speed, 337 | input=text, 338 | ) 339 | timestamp = time.time() 340 | mp3_filename = f"{timestamp}.mp3" 341 | response.stream_to_file(mp3_filename) 342 | 343 | return mp3_filename 344 | 345 | 346 | @timing_decorator 347 | def mp3_to_wav(mp3_filename): 348 | sound = AudioSegment.from_mp3(mp3_filename) 349 | sound = sound.set_frame_rate(22050) 350 | wav_filename = f"{mp3_filename}.wav" 351 | sound.export(wav_filename, format="wav") 352 | 353 | return wav_filename 354 | 355 | 356 | @timing_decorator 357 | def get_completion(chat_history): 358 | response = completion( 359 | model=args.llm_model, 360 | messages=chat_history, 361 | api_base=args.llm_url, 362 | stream=args.llm_streaming, 363 | ) 364 | 365 | print(response) 366 | return response 367 | 368 | 369 | q = queue.Queue() 370 | cleanup_queue = False 371 | stop_current_a2f_play = False 372 | 373 | 374 | def pipeline_worker(): 375 | while True: 376 | print("--------------------------") 377 | global cleanup_queue 378 | global stop_current_a2f_play 379 | if cleanup_queue: 380 | while not q.empty(): 381 | item = q.get() 382 | q.task_done() 383 | 384 | if item == "cleanup_queue_token": 385 | break 386 | cleanup_queue = False 387 | stop_current_a2f_play = True 388 | 389 | item = q.get() 390 | if item == "cleanup_queue_token": 391 | continue 392 | 393 | print(f"Begin: {item}") 394 | run_pipeline(item) 395 | print(f"End: {item}") 396 | q.task_done() 397 | 398 | 399 | def talk_to_peer(message): 400 | if not gc_client: 401 | return 402 | 403 | result = gc_client.predict( 404 | message, api_name="/chat" # str in 'Message' Textbox component 405 | ) 406 | print(f"from peer: {result}") 407 | 408 | # chat_ui.textbox.submit(None, [result, result]) 409 | # chat_ui.textbox.submit() 410 | 411 | 412 | def predict(message, history): 413 | print("==========================") 414 | if message == "setup": 415 | str = "" 416 | for a2f in A2fInstance.instaces: 417 | a2f.setup() 418 | str += f"A2F running: {a2f.SERVICE_HEALTHY}\n" 419 | str += f"Live Link running: {a2f.LIVELINK_SERVICE_HEALTHY}\n" 420 | yield str 421 | return 422 | 423 | if message == "ping": 424 | for a2f in A2fInstance.instaces: 425 | a2f.post("") 426 | a2f.get("") 427 | yield "A2F ping" 428 | return 429 | 430 | if message == "redo": 431 | for a2f in A2fInstance.instaces: 432 | a2f.player_play() 433 | yield "A2F redo" 434 | return 435 | 436 | if message == "stop": 437 | global cleanup_queue 438 | cleanup_queue = True 439 | q.put("cleanup_queue_token") 440 | yield "stopped" 441 | return 442 | 443 | if message.startswith("peer"): 444 | items = message.split() 445 | if len(items) >= 2: 446 | gradio_port = int(items[1]) 447 | # TODO: support non localhost 448 | args.gradio_peer_url = f"http://{args.gradio_host}:{gradio_port}/" 449 | global gc_client 450 | gc_client = gc.Client(args.gradio_peer_url) 451 | 452 | yield f"I will chat with another llm-metahuman: {args.gradio_peer_url}" 453 | return 454 | 455 | history_openai_format = [] 456 | for human, assistant in history: 457 | history_openai_format.append({"role": "user", "content": human}) 458 | history_openai_format.append({"role": "assistant", "content": assistant}) 459 | history_openai_format.append({"role": "user", "content": message}) 460 | 461 | # start_time = time.time() 462 | response = get_completion(history_openai_format) 463 | yield ".." 464 | 465 | # global cleanup_queue 466 | # cleanup_queue = True 467 | # q.put("cleanup_queue_token") 468 | 469 | if args.llm_streaming: 470 | # create variables to collect the stream of chunks 471 | UNUSED_collected_chunks = [] 472 | collected_messages = [] 473 | complete_sentences = "" 474 | # iterate through the stream of events 475 | for chunk in response: 476 | # chunk_time = ( 477 | # time.time() - start_time 478 | # ) # calculate the time delay of the chunk 479 | UNUSED_collected_chunks.append(chunk) # save the event response 480 | chunk_message = chunk.choices[0].delta.content # extract the message 481 | 482 | if not chunk_message: 483 | continue 484 | 485 | collected_messages.append(chunk_message) # save the message 486 | # print( 487 | # f"Message {chunk_time:.2f} s after request: {chunk_message}" 488 | # ) # print the delay and text 489 | print(chunk_message) 490 | 491 | if chunk_message in [ 492 | ".", 493 | "!", 494 | "?", 495 | "。", 496 | "!", 497 | "?", 498 | ] or chunk_message.endswith("\n"): 499 | # if not chunk_message or "\n" in chunk_message: 500 | one_sentence = "".join([m for m in collected_messages if m is not None]) 501 | if len(one_sentence) < 10: 502 | # ignore short sentences 503 | continue 504 | collected_messages = [] 505 | complete_sentences += one_sentence 506 | q.put(one_sentence) 507 | # run_pipeline(one_sentence) 508 | 509 | yield complete_sentences 510 | 511 | talk_to_peer(one_sentence) 512 | 513 | # print the time delay and text received 514 | # print(f"Full response received {chunk_time:.2f} seconds after request") 515 | # # clean None in collected_messages 516 | # collected_messages = [m for m in collected_messages if m is not None] 517 | # full_reply_content = "".join([m for m in collected_messages]) 518 | # print(f"Full conversation received: {full_reply_content}") 519 | # yield full_reply_content 520 | else: 521 | if len(response.choices[0].message.content) == 0: 522 | return 523 | 524 | answer = response.choices[0].message.content 525 | yield answer 526 | 527 | run_pipeline(answer) 528 | 529 | 530 | def main(): 531 | import argparse 532 | 533 | parser = argparse.ArgumentParser(description="llm.py arguments") 534 | 535 | # gradio settings 536 | parser.add_argument("--a2f_instance_count", type=int, default=1) 537 | parser.add_argument("--gradio_host", default="localhost") 538 | parser.add_argument("--gradio_port", type=int, default=7860) 539 | parser.add_argument( 540 | "--gradio_peer_url", 541 | default=None, 542 | help="the gradio peer that this gradio instance will chat with. Default value is None, which means chat with a human.", 543 | ) 544 | 545 | # llm / litellm settings 546 | parser.add_argument("--llm_engine", default="gpt", choices=["gpt", "llama2"]) 547 | parser.add_argument( 548 | "--llm_model", default=None, help="https://docs.litellm.ai/docs/providers" 549 | ) 550 | parser.add_argument("--llm_url", default=None) 551 | parser.add_argument( 552 | "--llm_streaming", default=True, action=argparse.BooleanOptionalAction 553 | ) 554 | 555 | # audio2face settings 556 | parser.add_argument("--a2f_host", default="localhost") 557 | parser.add_argument("--a2f_port", default=8011, type=int) 558 | parser.add_argument("--a2f_instance_id", default="/World/audio2face/CoreFullface") 559 | parser.add_argument("--a2f_player_id", default="/World/audio2face/Player") 560 | parser.add_argument("--a2f_livelink_id", default="/World/audio2face/StreamLivelink") 561 | 562 | # tts settings 563 | parser.add_argument("--tts_model", default="tts-1", choices=["tts-1", "tts-1-hd"]) 564 | parser.add_argument("--tts_speed", default=1.1, type=float) 565 | 566 | # livelink settings 567 | parser.add_argument("--livelink_host", default="localhost") 568 | parser.add_argument("--livelink_port", default=12030, type=int) 569 | parser.add_argument("--livelink_subject", default="Audio2Face") 570 | parser.add_argument("--livelink_audio_port", default=12031, type=int) 571 | 572 | parser.add_argument( 573 | "--tts_voice", 574 | default="nova", 575 | choices=VOICE_ACTORS, 576 | help="https://platform.openai.com/docs/guides/text-to-speech", 577 | ) 578 | 579 | global args 580 | args = parser.parse_args() 581 | 582 | if not args.llm_model: 583 | if args.llm_engine == "gpt": 584 | args.llm_model = args.llm_model or "gpt-3.5-turbo" 585 | elif args.llm_engine == "llama2": 586 | args.llm_model = args.llm_model or "ollama/llama2" 587 | args.llm_url = args.llm_url or "http://localhost:11434" 588 | 589 | threading.Thread(target=pipeline_worker, daemon=True).start() 590 | 591 | for i in range(args.a2f_instance_count): 592 | a2f = A2fInstance(i) 593 | a2f.setup() 594 | A2fInstance.instaces.append(a2f) 595 | 596 | global chat_ui 597 | chat_ui = gr.ChatInterface( 598 | predict, 599 | title=f"llm-metahuman @{args.gradio_port}", 600 | examples=["hello", "tell me 3 jokes", "what's the meaning of life?"], 601 | ) 602 | 603 | chat_ui.queue().launch(server_name=args.gradio_host, server_port=args.gradio_port) 604 | 605 | q.join() 606 | 607 | 608 | if __name__ == "__main__": 609 | main() 610 | --------------------------------------------------------------------------------