├── tools
    ├── kill-kit.bat
    ├── kit-log.bat
    ├── code-log.bat
    └── create-log.bat
├── audio-client
    ├── requirements.txt
    ├── run.bat
    ├── gen_protoc.py
    ├── avatar.bat
    ├── prompt
    │   └── jira-vs-slack.json
    ├── ref
    │   ├── minimal-chatbot.py
    │   ├── sine-curve.py
    │   ├── pytts-demo.py
    │   └── portal.py
    ├── proto
    │   └── audio2face.proto
    ├── .vscode
    │   └── launch.json
    ├── audio2face_pb2.py
    ├── audio2face_pb2_grpc.py
    ├── test_client.py
    └── llm.py
├── README.md
├── LICENSE
└── .gitignore


/tools/kill-kit.bat:
--------------------------------------------------------------------------------
1 | taskkill /IM kit.exe /F


--------------------------------------------------------------------------------
/tools/kit-log.bat:
--------------------------------------------------------------------------------
1 | start "" "%userprofile%\.nvidia-omniverse\logs\Kit\kit"


--------------------------------------------------------------------------------
/tools/code-log.bat:
--------------------------------------------------------------------------------
1 | start "" "%userprofile%\.nvidia-omniverse\logs\Kit\Code"


--------------------------------------------------------------------------------
/tools/create-log.bat:
--------------------------------------------------------------------------------
1 | start "" "%userprofile%\.nvidia-omniverse\logs\Kit\Create.Next"


--------------------------------------------------------------------------------
/audio-client/requirements.txt:
--------------------------------------------------------------------------------
1 | openai
2 | pydub
3 | gradio
4 | gradio_client
5 | requests
6 | litellm


--------------------------------------------------------------------------------
/audio-client/run.bat:
--------------------------------------------------------------------------------
1 | @REM pip install protobuf==3.17.3 grpcio soundfile
2 | python test_client.py %1 /World/audio2face/PlayerStreaming /World/audio2gesture/PlayerStreaming


--------------------------------------------------------------------------------
/audio-client/gen_protoc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | 
 4 | 
 5 | ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 6 | proto_src_root = os.path.normpath(os.path.join(ROOT_DIR, "proto/"))
 7 | proto_dst_root = os.path.normpath(os.path.join(ROOT_DIR, "."))
 8 | proto_fpath = os.path.normpath(os.path.join(ROOT_DIR, "proto", "audio2face.proto"))
 9 | 
10 | cmd = [
11 |     "python",
12 |     "-m",
13 |     "grpc_tools.protoc",
14 |     "-I",
15 |     f"{proto_src_root}",
16 |     f"--python_out={proto_dst_root}",
17 |     f"--grpc_python_out={proto_dst_root}",
18 |     f"{proto_fpath}",
19 | ]
20 | 
21 | print(cmd)
22 | subprocess.call(cmd)
23 | 


--------------------------------------------------------------------------------
/audio-client/avatar.bat:
--------------------------------------------------------------------------------
 1 | @REM c:\p4\audio2face\run_avatar.bat ^
 2 | %localappdata%\ov\pkg\audio2face-2023.1.0-beta.4\avatar.kit.bat ^
 3 |     --enable omni.services.transport.server.http ^
 4 |     --enable omni.kit.tool.asset_exporter ^
 5 |     --enable omni.avatar.livelink ^
 6 |     --enable omni.avatar.ui.livelink ^
 7 |     --/app/renderer/sleepMsOutOfFocus=0 ^
 8 |     --/app/renderer/sleepMsOutOfFocus=0 ^
 9 |     --/app/asyncRendering=false ^
10 |     --/rtx/reflections/enabled=false ^
11 |     --/rtx/translucency/enabled=false ^
12 |     --/rtx/post/lensFlares/enabled=false ^
13 |     --/rtx/post/dof/enabled=false ^
14 |     --/rtx/indirectDiffuse/enabled=false ^
15 |     %*


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # LLM MetaHuman
 2 | 
 3 | LLM MetaHuman is an open solution for AI-powered photorealistic digital humans.
 4 | 
 5 | ## Preparation steps
 6 | 
 7 | - Install [Omniverse Launcher](https://www.nvidia.com/en-us/omniverse/download/)
 8 | - Inside Omniverse Launcher, Install `Audio2Face`.
 9 | - Install [Epic Games Store](https://store.epicgames.com/en-US/)
10 | - Inside Epic Games Store, Install Unreal Engine 5.x.
11 | - Follow [Audio2Face to UE Live Link Plugin](https://docs.omniverse.nvidia.com/audio2face/latest/user-manual/livelink-ue-plugin.html) to connect Audi2Face to Unreal Engine.
12 | 
13 | ## Launch Audio2Face headless
14 | 
15 | ## Launch llm.py
16 | 
17 | ## Launch Unreal Engine Metahuman
18 | 
19 | 


--------------------------------------------------------------------------------
/audio-client/prompt/jira-vs-slack.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "task": "Write a stand-up comedy script with 10 dialogs",
 3 |     "characters": [
 4 |         {
 5 |             "title": "Software Engineer",
 6 |             "name": "Alloy",
 7 |             "preference": "Email",
 8 |             "description": "Enthusiastic software engineer"
 9 |         },
10 |         {
11 |             "title": "Program Manager",
12 |             "name": "Nova",
13 |             "preference": "Slack",
14 |             "description": "Organized program manager"
15 |         }
16 |     ],
17 |     "topic": "Argument about email vs Slack",
18 |     "format": "A: says something in one line. B: says something in one line. Remove the number and quotes."
19 | }
20 | 


--------------------------------------------------------------------------------
/audio-client/ref/minimal-chatbot.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import gradio as gr
 3 | 
 4 | 
 5 | def alternatingly_agree(message, history):
 6 |     if len(history) % 2 == 0:
 7 |         return f"Yes, I do think that '{message}'"
 8 |     else:
 9 |         return "I don't think so"
10 | 
11 | 
12 | count = 0
13 | 
14 | 
15 | def textbox_update(chatui_textbox):
16 |     global count
17 |     count += 1
18 |     if count % 10 == 0:
19 |         return "z"
20 |     else:
21 |         return chatui_textbox
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     with gr.ChatInterface(alternatingly_agree) as chat_ui:
26 |         chat_ui.textbox.change(
27 |             textbox_update,
28 |             chat_ui.textbox,
29 |             chat_ui.textbox,
30 |             every=1,
31 |             trigger_mode="once",
32 |         )
33 |     chat_ui.launch()
34 | 


--------------------------------------------------------------------------------
/audio-client/proto/audio2face.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package nvidia.audio2face;
 4 | 
 5 | service Audio2Face {
 6 |     rpc PushAudio(PushAudioRequest) returns (PushAudioResponse) {}
 7 |     rpc PushAudioStream(stream PushAudioStreamRequest) returns (PushAudioStreamResponse) {}
 8 | }
 9 | 
10 | message PushAudioRequest {
11 |     string instance_name = 1;
12 |     int32 samplerate = 2;
13 |     bytes audio_data = 3;
14 |     bool block_until_playback_is_finished = 4;
15 | }
16 | 
17 | message PushAudioResponse {
18 |     bool success = 1;
19 |     string message = 2;
20 | }
21 | 
22 | message PushAudioStreamRequest {
23 |     oneof streaming_request {
24 |         PushAudioRequestStart start_marker = 1;
25 |         bytes audio_data = 2;
26 |     }
27 | }
28 | 
29 | message PushAudioRequestStart {
30 |     string instance_name = 1;
31 |     int32 samplerate = 2;
32 |     bool block_until_playback_is_finished = 3;
33 | }
34 | 
35 | message PushAudioStreamResponse {
36 |     bool success = 1;
37 |     string message = 2;
38 | }
39 | 


--------------------------------------------------------------------------------
/audio-client/ref/sine-curve.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import gradio as gr
 3 | import plotly.express as px
 4 | import numpy as np
 5 | 
 6 | 
 7 | plot_end = 2 * math.pi
 8 | 
 9 | 
10 | def get_plot(period=1):
11 |     global plot_end
12 |     x = np.arange(plot_end - 2 * math.pi, plot_end, 0.02)
13 |     y = np.sin(2*math.pi*period * x)
14 |     fig = px.line(x=x, y=y)
15 |     plot_end += 2 * math.pi
16 |     if plot_end > 1000:
17 |         plot_end = 2 * math.pi
18 |     return fig
19 | 
20 | 
21 | with gr.Blocks() as demo:
22 |     with gr.Row():
23 |         with gr.Column():
24 |             gr.Markdown("Change the value of the slider to automatically update the plot")
25 |             period = gr.Slider(label="Period of plot", value=1, minimum=0, maximum=10, step=1)
26 |             plot = gr.Plot(label="Plot (updates every half second)")
27 | 
28 |     dep = demo.load(get_plot, None, plot, every=1)
29 |     period.change(get_plot, period, plot, every=1, cancels=[dep])
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     demo.queue().launch()


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Vinjn Zhang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/audio-client/ref/pytts-demo.py:
--------------------------------------------------------------------------------
 1 | import pyttsx3
 2 | 
 3 | engine = pyttsx3.init()  # object creation
 4 | 
 5 | """ RATE"""
 6 | rate = engine.getProperty("rate")  # getting details of current speaking rate
 7 | print(rate)  # printing current voice rate
 8 | engine.setProperty("rate", 125)  # setting up new voice rate
 9 | 
10 | 
11 | """VOLUME"""
12 | volume = engine.getProperty(
13 |     "volume"
14 | )  # getting to know current volume level (min=0 and max=1)
15 | print(volume)  # printing current volume level
16 | engine.setProperty("volume", 1.0)  # setting up volume level  between 0 and 1
17 | 
18 | """VOICE"""
19 | voices = engine.getProperty("voices")  # getting details of current voice
20 | print(voices)
21 | engine.setProperty("voice", voices[0].id)  # changing index, changes voices. o for male
22 | # engine.setProperty('voice', voices[1].id)   #changing index, changes voices. 1 for female
23 | 
24 | engine.say("Hello World!")
25 | engine.say("说什么 current speaking rate is " + str(rate))
26 | engine.runAndWait()
27 | engine.stop()
28 | 
29 | """Saving Voice to a file"""
30 | # On linux make sure that 'espeak' and 'ffmpeg' are installed
31 | engine.save_to_file("Hello World", "test.mp3")
32 | engine.runAndWait()
33 | 


--------------------------------------------------------------------------------
/audio-client/ref/portal.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | 
 3 | 
 4 | def task1(input_text):
 5 |     return "Task 1 Result: " + input_text
 6 | 
 7 | 
 8 | def task2(input_image):
 9 |     return "Task 2 Result"
10 | 
11 | 
12 | def task3(input_image):
13 |     return "Task 2 Result"
14 | 
15 | 
16 | # interface one
17 | iface1 = gr.Interface(
18 |     fn=task1, inputs="text", outputs="text", title="Multi-Page Interface"
19 | )
20 | # interface two
21 | iface2 = gr.Interface(
22 |     fn=task2, inputs="image", outputs="text", title="Multi-Page Interface"
23 | )
24 | 
25 | tts_examples = [
26 |     "I love learning machine learning",
27 |     "How do you do?",
28 | ]
29 | 
30 | 
31 | tts_demo = gr.load(
32 |     "huggingface/facebook/fastspeech2-en-ljspeech",
33 |     title=None,
34 |     examples=tts_examples,
35 |     description="Give me something to say!",
36 |     cache_examples=False,
37 | )
38 | 
39 | stt_demo = gr.load(
40 |     "huggingface/facebook/wav2vec2-base-960h",
41 |     title=None,
42 |     inputs="mic",
43 |     description="Let me try to guess what you're saying!",
44 | )
45 | 
46 | 
47 | demo = gr.TabbedInterface(
48 |     [iface1, iface2, tts_demo, stt_demo],
49 |     ["Text-to-text", "image-to-text", "Text-to-speech", "Speech-to-text"],
50 | )
51 | 
52 | # Run the interface
53 | demo.launch(share=True)
54 | 


--------------------------------------------------------------------------------
/audio-client/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "name": "default",
 9 |             "type": "python",
10 |             "request": "launch",
11 |             "program": "${workspaceFolder}/llm.py",
12 |             "console": "integratedTerminal",
13 |             "args": [],
14 |             "justMyCode": false
15 |         },
16 |         {
17 |             "name": "two metahumans",
18 |             "type": "python",
19 |             "request": "launch",
20 |             "program": "${workspaceFolder}/llm.py",
21 |             "console": "integratedTerminal",
22 |             "args": [
23 |                 "--a2f_instance_count=2",
24 |                 "--llm_streaming"
25 |             ],
26 |             "justMyCode": true
27 |         },
28 |         {
29 |             "name": "Python: Current File",
30 |             "type": "python",
31 |             "request": "launch",
32 |             "program": "${file}",
33 |             "console": "integratedTerminal",
34 |             "justMyCode": true
35 |         },
36 |         {
37 |             "name": "gradio_7861 a2f_8012 livelink_12040",
38 |             "type": "python",
39 |             "request": "launch",
40 |             "program": "${workspaceFolder}/llm.py",
41 |             "console": "integratedTerminal",
42 |             "args": [
43 |                 "--gradio_port=7861",
44 |                 "--a2f_url=http://localhost:8012",
45 |                 "--tts_voice=alloy",
46 |                 "--livelink_host=localhost",
47 |                 "--livelink_subject=Audio2Face-1",
48 |                 "--livelink_port=12040",
49 |                 "--livelink_audio_port=12041"
50 |             ],
51 |             "justMyCode": false
52 |         }
53 |     ]
54 | }


--------------------------------------------------------------------------------
/audio-client/audio2face_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # source: audio2face.proto
 4 | """Generated protocol buffer code."""
 5 | from google.protobuf.internal import builder as _builder
 6 | from google.protobuf import descriptor as _descriptor
 7 | from google.protobuf import descriptor_pool as _descriptor_pool
 8 | from google.protobuf import symbol_database as _symbol_database
 9 | # @@protoc_insertion_point(imports)
10 | 
11 | _sym_db = _symbol_database.Default()
12 | 
13 | 
14 | 
15 | 
16 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10\x61udio2face.proto\x12\x11nvidia.audio2face\"{\n\x10PushAudioRequest\x12\x15\n\rinstance_name\x18\x01 \x01(\t\x12\x12\n\nsamplerate\x18\x02 \x01(\x05\x12\x12\n\naudio_data\x18\x03 \x01(\x0c\x12(\n block_until_playback_is_finished\x18\x04 \x01(\x08\"5\n\x11PushAudioResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\"\x85\x01\n\x16PushAudioStreamRequest\x12@\n\x0cstart_marker\x18\x01 \x01(\x0b\x32(.nvidia.audio2face.PushAudioRequestStartH\x00\x12\x14\n\naudio_data\x18\x02 \x01(\x0cH\x00\x42\x13\n\x11streaming_request\"l\n\x15PushAudioRequestStart\x12\x15\n\rinstance_name\x18\x01 \x01(\t\x12\x12\n\nsamplerate\x18\x02 \x01(\x05\x12(\n block_until_playback_is_finished\x18\x03 \x01(\x08\";\n\x17PushAudioStreamResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t2\xd4\x01\n\nAudio2Face\x12X\n\tPushAudio\x12#.nvidia.audio2face.PushAudioRequest\x1a$.nvidia.audio2face.PushAudioResponse\"\x00\x12l\n\x0fPushAudioStream\x12).nvidia.audio2face.PushAudioStreamRequest\x1a*.nvidia.audio2face.PushAudioStreamResponse\"\x00(\x01\x62\x06proto3')
17 | 
18 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
19 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'audio2face_pb2', globals())
20 | if _descriptor._USE_C_DESCRIPTORS == False:
21 | 
22 |   DESCRIPTOR._options = None
23 |   _PUSHAUDIOREQUEST._serialized_start=39
24 |   _PUSHAUDIOREQUEST._serialized_end=162
25 |   _PUSHAUDIORESPONSE._serialized_start=164
26 |   _PUSHAUDIORESPONSE._serialized_end=217
27 |   _PUSHAUDIOSTREAMREQUEST._serialized_start=220
28 |   _PUSHAUDIOSTREAMREQUEST._serialized_end=353
29 |   _PUSHAUDIOREQUESTSTART._serialized_start=355
30 |   _PUSHAUDIOREQUESTSTART._serialized_end=463
31 |   _PUSHAUDIOSTREAMRESPONSE._serialized_start=465
32 |   _PUSHAUDIOSTREAMRESPONSE._serialized_end=524
33 |   _AUDIO2FACE._serialized_start=527
34 |   _AUDIO2FACE._serialized_end=739
35 | # @@protoc_insertion_point(module_scope)
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # intermedaite media folders
132 | _*/
133 | *.mp3
134 | *.wav
135 | *.usd
136 | *.mp4
137 | *.download
138 | *_uuid.txt
139 | 


--------------------------------------------------------------------------------
/audio-client/audio2face_pb2_grpc.py:
--------------------------------------------------------------------------------
  1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
  2 | """Client and server classes corresponding to protobuf-defined services."""
  3 | import grpc
  4 | 
  5 | import audio2face_pb2 as audio2face__pb2
  6 | 
  7 | 
  8 | class Audio2FaceStub(object):
  9 |     """Missing associated documentation comment in .proto file."""
 10 | 
 11 |     def __init__(self, channel):
 12 |         """Constructor.
 13 | 
 14 |         Args:
 15 |             channel: A grpc.Channel.
 16 |         """
 17 |         self.PushAudio = channel.unary_unary(
 18 |                 '/nvidia.audio2face.Audio2Face/PushAudio',
 19 |                 request_serializer=audio2face__pb2.PushAudioRequest.SerializeToString,
 20 |                 response_deserializer=audio2face__pb2.PushAudioResponse.FromString,
 21 |                 )
 22 |         self.PushAudioStream = channel.stream_unary(
 23 |                 '/nvidia.audio2face.Audio2Face/PushAudioStream',
 24 |                 request_serializer=audio2face__pb2.PushAudioStreamRequest.SerializeToString,
 25 |                 response_deserializer=audio2face__pb2.PushAudioStreamResponse.FromString,
 26 |                 )
 27 | 
 28 | 
 29 | class Audio2FaceServicer(object):
 30 |     """Missing associated documentation comment in .proto file."""
 31 | 
 32 |     def PushAudio(self, request, context):
 33 |         """Missing associated documentation comment in .proto file."""
 34 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 35 |         context.set_details('Method not implemented!')
 36 |         raise NotImplementedError('Method not implemented!')
 37 | 
 38 |     def PushAudioStream(self, request_iterator, context):
 39 |         """Missing associated documentation comment in .proto file."""
 40 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 41 |         context.set_details('Method not implemented!')
 42 |         raise NotImplementedError('Method not implemented!')
 43 | 
 44 | 
 45 | def add_Audio2FaceServicer_to_server(servicer, server):
 46 |     rpc_method_handlers = {
 47 |             'PushAudio': grpc.unary_unary_rpc_method_handler(
 48 |                     servicer.PushAudio,
 49 |                     request_deserializer=audio2face__pb2.PushAudioRequest.FromString,
 50 |                     response_serializer=audio2face__pb2.PushAudioResponse.SerializeToString,
 51 |             ),
 52 |             'PushAudioStream': grpc.stream_unary_rpc_method_handler(
 53 |                     servicer.PushAudioStream,
 54 |                     request_deserializer=audio2face__pb2.PushAudioStreamRequest.FromString,
 55 |                     response_serializer=audio2face__pb2.PushAudioStreamResponse.SerializeToString,
 56 |             ),
 57 |     }
 58 |     generic_handler = grpc.method_handlers_generic_handler(
 59 |             'nvidia.audio2face.Audio2Face', rpc_method_handlers)
 60 |     server.add_generic_rpc_handlers((generic_handler,))
 61 | 
 62 | 
 63 |  # This class is part of an EXPERIMENTAL API.
 64 | class Audio2Face(object):
 65 |     """Missing associated documentation comment in .proto file."""
 66 | 
 67 |     @staticmethod
 68 |     def PushAudio(request,
 69 |             target,
 70 |             options=(),
 71 |             channel_credentials=None,
 72 |             call_credentials=None,
 73 |             insecure=False,
 74 |             compression=None,
 75 |             wait_for_ready=None,
 76 |             timeout=None,
 77 |             metadata=None):
 78 |         return grpc.experimental.unary_unary(request, target, '/nvidia.audio2face.Audio2Face/PushAudio',
 79 |             audio2face__pb2.PushAudioRequest.SerializeToString,
 80 |             audio2face__pb2.PushAudioResponse.FromString,
 81 |             options, channel_credentials,
 82 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
 83 | 
 84 |     @staticmethod
 85 |     def PushAudioStream(request_iterator,
 86 |             target,
 87 |             options=(),
 88 |             channel_credentials=None,
 89 |             call_credentials=None,
 90 |             insecure=False,
 91 |             compression=None,
 92 |             wait_for_ready=None,
 93 |             timeout=None,
 94 |             metadata=None):
 95 |         return grpc.experimental.stream_unary(request_iterator, target, '/nvidia.audio2face.Audio2Face/PushAudioStream',
 96 |             audio2face__pb2.PushAudioStreamRequest.SerializeToString,
 97 |             audio2face__pb2.PushAudioStreamResponse.FromString,
 98 |             options, channel_credentials,
 99 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
100 | 


--------------------------------------------------------------------------------
/audio-client/test_client.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This demo script shows how to send audio data to Audio2Face Streaming Audio Player via gRPC requests.
  3 | There are two options:
  4 |  * Send the whole track at once using PushAudioRequest()
  5 |  * Send the audio chunks seuqntially in a stream using PushAudioStreamRequest()
  6 | For the second option this script emulates the stream of chunks, generated by splitting an input WAV audio file.
  7 | But in a real application such stream of chunks may be aquired from some other streaming source:
  8 |  * streaming audio via internet, streaming Text-To-Speech, etc
  9 | gRPC protocol details could be find in audio2face.proto
 10 | """
 11 | 
 12 | import sys
 13 | import time
 14 | 
 15 | import audio2face_pb2
 16 | import audio2face_pb2_grpc
 17 | import grpc
 18 | import numpy as np
 19 | import soundfile
 20 | 
 21 | 
 22 | def push_audio_track(url, audio_data, samplerate, instance_names):
 23 |     """
 24 |     This function pushes the whole audio track at once via PushAudioRequest()
 25 |     PushAudioRequest parameters:
 26 |      * audio_data: bytes, containing audio data for the whole track, where each sample is encoded as 4 bytes (float32)
 27 |      * samplerate: sampling rate for the audio data
 28 |      * instance_names: prim path of the Audio2Face Streaming Audio Player on the stage, were to push the audio data
 29 |      * block_until_playback_is_finished: if True, the gRPC request will be blocked until the playback of the pushed track is finished
 30 |     The request is passed to PushAudio()
 31 |     """
 32 | 
 33 |     block_until_playback_is_finished = True  # ADJUST
 34 |     for instance_name in instance_names:
 35 |         with grpc.insecure_channel(url) as channel:
 36 |             stub = audio2face_pb2_grpc.Audio2FaceStub(channel)
 37 |             request = audio2face_pb2.PushAudioRequest()
 38 |             request.audio_data = audio_data.astype(np.float32).tobytes()
 39 |             request.samplerate = samplerate
 40 |             request.instance_name = instance_name
 41 |             request.block_until_playback_is_finished = block_until_playback_is_finished
 42 |             print("Sending audio data...")
 43 |             response = stub.PushAudio(request)
 44 |             if response.success:
 45 |                 print("SUCCESS")
 46 |             else:
 47 |                 print(f"ERROR: {response.message}")
 48 |     print("Closed channel")
 49 | 
 50 | 
 51 | def push_audio_track_stream(url, audio_data, samplerate, instance_names):
 52 |     """
 53 |     This function pushes audio chunks sequentially via PushAudioStreamRequest()
 54 |     The function emulates the stream of chunks, generated by splitting input audio track.
 55 |     But in a real application such stream of chunks may be aquired from some other streaming source.
 56 |     The first message must contain start_marker field, containing only meta information (without audio data):
 57 |      * samplerate: sampling rate for the audio data
 58 |      * instance_names: prim path of the Audio2Face Streaming Audio Player on the stage, were to push the audio data
 59 |      * block_until_playback_is_finished: if True, the gRPC request will be blocked until the playback of the pushed track is finished (after the last message)
 60 |     Second and other messages must contain audio_data field:
 61 |      * audio_data: bytes, containing audio data for an audio chunk, where each sample is encoded as 4 bytes (float32)
 62 |     All messages are packed into a Python generator and passed to PushAudioStream()
 63 |     """
 64 | 
 65 |     chunk_size = samplerate // 10  # ADJUST
 66 |     sleep_between_chunks = 0.04  # ADJUST
 67 |     block_until_playback_is_finished = True  # ADJUST
 68 | 
 69 |     with grpc.insecure_channel(url) as channel:
 70 |         print("Channel creadted")
 71 |         stub = audio2face_pb2_grpc.Audio2FaceStub(channel)
 72 | 
 73 |         for instance_name in instance_names:
 74 |             def make_generator():
 75 |                 start_marker = audio2face_pb2.PushAudioRequestStart(
 76 |                     samplerate=samplerate,
 77 |                     instance_name=instance_name,
 78 |                     block_until_playback_is_finished=block_until_playback_is_finished,
 79 |                 )
 80 |                 # At first, we send a message with start_marker
 81 |                 yield audio2face_pb2.PushAudioStreamRequest(start_marker=start_marker)
 82 |                 # Then we send messages with audio_data
 83 |                 for i in range(len(audio_data) // chunk_size + 1):
 84 |                     time.sleep(sleep_between_chunks)
 85 |                     chunk = audio_data[i * chunk_size : i * chunk_size + chunk_size]
 86 |                     yield audio2face_pb2.PushAudioStreamRequest(audio_data=chunk.astype(np.float32).tobytes())
 87 | 
 88 |             request_generator = make_generator()
 89 |             print("Sending audio data...")
 90 |             response = stub.PushAudioStream(request_generator)
 91 |             if response.success:
 92 |                 print("SUCCESS")
 93 |             else:
 94 |                 print(f"ERROR: {response.message}")
 95 |     print("Channel closed")
 96 | 
 97 | 
 98 | def main():
 99 |     """
100 |     This demo script shows how to send audio data to Audio2Face Streaming Audio Player via gRPC requests.
101 |     There two options:
102 |      * Send the whole track at once using PushAudioRequest()
103 |      * Send the audio chunks seuqntially in a stream using PushAudioStreamRequest()
104 |     For the second option this script emulates the stream of chunks, generated by splitting an input WAV audio file.
105 |     But in a real application such stream of chunks may be aquired from some other streaming source:
106 |      * streaming audio via internet, streaming Text-To-Speech, etc
107 |     gRPC protocol details could be find in audio2face.proto
108 |     """
109 | 
110 |     if len(sys.argv) < 3:
111 |         print("Format: python test_client.py PATH_TO_WAV INSTANCE_NAME")
112 |         return
113 | 
114 |     # Sleep time emulates long latency of the request
115 |     sleep_time = 0.0  # ADJUST
116 | 
117 |     # URL of the Audio2Face Streaming Audio Player server (where A2F App is running)
118 |     url = "localhost:50051"  # ADJUST
119 | 
120 |     # Local input WAV file path
121 |     audio_fpath = sys.argv[1]
122 | 
123 |     # Prim path of the Audio2Face Streaming Audio Player on the stage (were to push the audio data)
124 |     instance_names = sys.argv[2:]
125 | 
126 |     data, samplerate = soundfile.read(audio_fpath, dtype="float32")
127 | 
128 |     # Only Mono audio is supported
129 |     if len(data.shape) > 1:
130 |         data = np.average(data, axis=1)
131 | 
132 |     print(f"Sleeping for {sleep_time} seconds")
133 |     time.sleep(sleep_time)
134 | 
135 |     if 0:  # ADJUST
136 |         # Push the whole audio track at once
137 |         push_audio_track(url, data, samplerate, instance_names)
138 |     else:
139 |         # Emulate audio stream and push audio chunks sequentially
140 |         push_audio_track_stream(url, data, samplerate, instance_names)
141 | 
142 | 
143 | if __name__ == "__main__":
144 |     main()
145 | 


--------------------------------------------------------------------------------
/audio-client/llm.py:
--------------------------------------------------------------------------------
  1 | from openai import OpenAI
  2 | from pydub import AudioSegment
  3 | import gradio as gr
  4 | import requests
  5 | import os
  6 | from litellm import completion
  7 | import time
  8 | import threading
  9 | import queue
 10 | import gradio_client as gc
 11 | 
 12 | 
 13 | # XXX: increase requests speed
 14 | # https://stackoverflow.com/a/72440253
 15 | requests.packages.urllib3.util.connection.HAS_IPV6 = False
 16 | 
 17 | args = None
 18 | 
 19 | CWD = os.getcwd()
 20 | print("CWD:", CWD)
 21 | 
 22 | VOICE_ACTORS = ["nova", "alloy", "echo", "fable", "onyx", "shimmer"]
 23 | 
 24 | 
 25 | def timing_decorator(func):
 26 |     def wrapper(*args, **kwargs):
 27 |         start_time = time.time()
 28 |         result = func(*args, **kwargs)
 29 |         end_time = time.time()
 30 |         elapsed_time = end_time - start_time
 31 |         print(f"{func.__name__} cost: {elapsed_time:.2f} seconds.")
 32 |         return result
 33 | 
 34 |     return wrapper
 35 | 
 36 | 
 37 | class A2fInstance:
 38 |     files_to_delete = []
 39 |     instaces = []
 40 | 
 41 |     def __init__(self, index) -> None:
 42 |         self.SERVICE_HEALTHY = False
 43 |         self.LIVELINK_SERVICE_HEALTHY = False
 44 |         self.index = index
 45 | 
 46 |     @timing_decorator
 47 |     def post(self, end_point, data=None, verbose=True):
 48 |         if not self.SERVICE_HEALTHY:
 49 |             return None
 50 | 
 51 |         if verbose:
 52 |             print(f"++ {end_point}")
 53 |         api_url = f"{self.base_url}/{end_point}"
 54 |         try:
 55 |             response = requests.post(api_url, json=data)
 56 | 
 57 |             if response and response.status_code == 200:
 58 |                 if verbose:
 59 |                     print(response.json())
 60 |                 return response.json()
 61 |             else:
 62 |                 if verbose:
 63 |                     print(f"Error: {response.status_code} - {response.text}")
 64 |                 return {"Error": response.status_code, "Reason": response.text}
 65 |         except Exception as e:
 66 |             print(e)
 67 |             self.SERVICE_HEALTHY = False
 68 |             return None
 69 | 
 70 |     @timing_decorator
 71 |     def get(self, end_point, data=None, verbose=True):
 72 |         if not self.SERVICE_HEALTHY:
 73 |             return None
 74 | 
 75 |         if verbose:
 76 |             print(f"++ {end_point}")
 77 |         api_url = f"{self.base_url}/{end_point}"
 78 | 
 79 |         try:
 80 |             response = requests.get(api_url, json=data)
 81 |             if response.status_code == 200:
 82 |                 if verbose:
 83 |                     print(response.json())
 84 |                 return response.json()
 85 |             else:
 86 |                 if verbose:
 87 |                     print(f"Error: {response.status_code} - {response.text}")
 88 |                 return {"Error": response.status_code, "Reason": response.text}
 89 |         except Exception as e:
 90 |             print(e)
 91 |             self.SERVICE_HEALTHY = False
 92 |             return None
 93 | 
 94 |     def player_setlooping(self, flag=True):
 95 |         self.post(
 96 |             "A2F/Player/SetLooping",
 97 |             {"a2f_player": args.a2f_player_id, "loop_audio": flag},
 98 |         )
 99 | 
100 |     def player_play(self):
101 |         self.post("A2F/Player/Play", {"a2f_player": args.a2f_player_id})
102 | 
103 |     def player_pause(self):
104 |         self.post("A2F/Player/Pause", {"a2f_player": args.a2f_player_id})
105 | 
106 |     def player_setrootpath(self, dir_path):
107 |         self.post(
108 |             "A2F/Player/SetRootPath",
109 |             {"a2f_player": args.a2f_player_id, "dir_path": dir_path},
110 |         )
111 | 
112 |     def player_settrack(self, file_name):
113 |         self.post(
114 |             "A2F/Player/SetTrack",
115 |             {"a2f_player": args.a2f_player_id, "file_name": file_name},
116 |         )
117 | 
118 |     def player_gettracks(self):
119 |         self.post("A2F/Player/GetTracks", {"a2f_player": args.a2f_player_id})
120 | 
121 |     def player_gettime(self):
122 |         response = self.post(
123 |             "A2F/Player/GetTime", {"a2f_player": args.a2f_player_id}, False
124 |         )
125 |         if response and response["status"] == "OK":
126 |             return response["result"]
127 |         else:
128 |             return 0
129 | 
130 |     def player_getrange(self):
131 |         response = self.post(
132 |             "A2F/Player/GetRange", {"a2f_player": args.a2f_player_id}, False
133 |         )
134 |         if response and response["status"] == "OK":
135 |             return response["result"]["work"]
136 |         else:
137 |             return (0, 0)
138 | 
139 |     def generatekeys(self):
140 |         self.post("A2F/A2E/GenerateKeys", {"a2f_instance": args.a2f_instance_id})
141 | 
142 |     def ActivateStreamLivelink(self, flag):
143 |         self.post(
144 |             "A2F/Exporter/ActivateStreamLivelink",
145 |             {"node_path": args.a2f_livelink_id, "value": flag},
146 |         )
147 | 
148 |     def IsStreamLivelinkConnected(self):
149 |         response = self.post(
150 |             "A2F/Exporter/IsStreamLivelinkConnected",
151 |             {"node_path": args.a2f_livelink_id},
152 |         )
153 |         if response and response["status"] == "OK":
154 |             return response["result"]
155 |         else:
156 |             return False
157 | 
158 |     def enable_audio_stream(self, flag):
159 |         self.post(
160 |             "A2F/Exporter/SetStreamLivelinkSettings",
161 |             {
162 |                 "node_path": args.a2f_livelink_id,
163 |                 "values": {"enable_audio_stream": flag},
164 |             },
165 |         )
166 | 
167 |     def set_livelink_ports(
168 |         self,
169 |         livelink_host,
170 |         livelink_subject,
171 |         livelink_port,
172 |         livelink_audio_port,
173 |     ):
174 |         self.post(
175 |             "A2F/Exporter/SetStreamLivelinkSettings",
176 |             {
177 |                 "node_path": args.a2f_livelink_id,
178 |                 "values": {
179 |                     "livelink_host": livelink_host,
180 |                     "livelink_subject": livelink_subject,
181 |                     "livelink_port": livelink_port,
182 |                     "audio_port": livelink_audio_port,
183 |                 },
184 |             },
185 |         )
186 | 
187 |     def get_preprocessing(self):
188 |         response = self.post(
189 |             "A2F/PRE/GetSettings",
190 |             {"a2f_instance": args.a2f_instance_id},
191 |         )
192 |         if response and response["status"] == "OK":
193 |             return response["result"]
194 |         else:
195 |             return {}
196 | 
197 |     def set_preprocessing(self, settings):
198 |         settings["a2f_instance"] = args.a2f_instance_id
199 |         self.post("A2F/PRE/SetSettings", settings)
200 | 
201 |     def get_postprocessing(self):
202 |         response = self.post(
203 |             "A2F/POST/GetSettings",
204 |             {"a2f_instance": args.a2f_instance_id},
205 |         )
206 |         if response and response["status"] == "OK":
207 |             return response["result"]
208 |         else:
209 |             return {}
210 | 
211 |     def set_postprocessing(self, settings):
212 |         self.post(
213 |             "A2F/POST/SetSettings",
214 |             {"a2f_instance": args.a2f_instance_id, "settings": settings},
215 |         )
216 | 
217 |     def setup(self):
218 |         self.base_url = f"http://{args.a2f_host}:{args.a2f_port+self.index}"
219 |         self.tts_voice = args.tts_voice
220 |         if self.index > 0:
221 |             # TODO: make it elegant
222 |             self.tts_voice = VOICE_ACTORS[self.index % len(VOICE_ACTORS)]
223 | 
224 |         # always ping SERVICE_HEALTHY again in setup()
225 |         self.SERVICE_HEALTHY = True
226 | 
227 |         self.ActivateStreamLivelink(True)
228 |         if not self.SERVICE_HEALTHY:
229 |             return
230 | 
231 |         self.player_setrootpath(CWD)
232 |         self.player_setlooping(False)
233 | 
234 |         self.LIVELINK_SERVICE_HEALTHY = self.IsStreamLivelinkConnected()
235 |         if not self.LIVELINK_SERVICE_HEALTHY:
236 |             return
237 | 
238 |         self.enable_audio_stream(True)
239 | 
240 |         self.set_livelink_ports(
241 |             args.livelink_host,
242 |             f"{args.livelink_subject}-{self.index}",
243 |             args.livelink_port + 10 * self.index,
244 |             args.livelink_audio_port + 10 * self.index,
245 |         )
246 | 
247 |         pre_settings = self.get_preprocessing()
248 |         pre_settings["prediction_delay"] = 0
249 |         pre_settings["blink_interval"] = 1.5
250 |         self.set_preprocessing(pre_settings)
251 | 
252 |         post_settings = self.get_postprocessing()
253 |         post_settings["skin_strength"] = 1.3
254 |         self.set_postprocessing(post_settings)
255 | 
256 | 
257 | A2fInstance.instaces = []
258 | openai_client = OpenAI()
259 | gc_client: gc.Client = None
260 | chat_ui: gr.ChatInterface = None
261 | 
262 | 
263 | def run_single_pipeline(a2f, answer, a2f_peer=None):
264 |     global stop_current_a2f_play
265 | 
266 |     if not a2f_peer:
267 |         a2f_peer = a2f
268 | 
269 |     # print(answer)
270 |     mp3_file = text_to_mp3(answer, a2f.tts_voice)
271 |     wav_file = mp3_to_wav(mp3_file)
272 |     duration = a2f_peer.player_getrange()[1]
273 |     position = a2f_peer.player_gettime()
274 |     while position > 0 and position < duration:
275 |         print(position, duration)
276 |         if stop_current_a2f_play:
277 |             print("stop_current_a2f_play")
278 |             stop_current_a2f_play = False
279 |             return
280 | 
281 |         time.sleep(1)
282 |         position = a2f_peer.player_gettime()
283 |         print("z")
284 |     time.sleep(1)
285 |     a2f.player_setrootpath(CWD)
286 |     a2f.player_settrack(wav_file)
287 |     # a2f_generatekeys()
288 | 
289 |     a2f.player_play()
290 | 
291 |     for file in A2fInstance.files_to_delete:
292 |         try:
293 |             os.remove(file)
294 |         except Exception:
295 |             pass
296 |     A2fInstance.files_to_delete.clear()
297 | 
298 |     A2fInstance.files_to_delete.append(mp3_file)
299 |     A2fInstance.files_to_delete.append(wav_file)
300 | 
301 | 
302 | current_speaker = -1
303 | 
304 | 
305 | @timing_decorator
306 | def run_pipeline(answer):
307 |     if args.a2f_instance_count == 1:
308 |         run_single_pipeline(A2fInstance.instaces[0], answer)
309 |         return
310 | 
311 |     global current_speaker
312 |     if answer.startswith("("):
313 |         current_speaker = -1
314 |     elif answer.startswith("A:"):
315 |         current_speaker = 0
316 |         answer = answer[2:]
317 |     elif answer.startswith("B:"):
318 |         current_speaker = 1
319 |         answer = answer[2:]
320 | 
321 |     if current_speaker < 0 or current_speaker >= args.a2f_instance_count:
322 |         return
323 | 
324 |     a2f = A2fInstance.instaces[current_speaker]
325 |     if not a2f.SERVICE_HEALTHY:
326 |         return
327 | 
328 |     run_single_pipeline(a2f, answer)
329 | 
330 | 
331 | @timing_decorator
332 | def text_to_mp3(text, voice):
333 |     response = openai_client.audio.speech.create(
334 |         model=args.tts_model,
335 |         voice=voice,
336 |         speed=args.tts_speed,
337 |         input=text,
338 |     )
339 |     timestamp = time.time()
340 |     mp3_filename = f"{timestamp}.mp3"
341 |     response.stream_to_file(mp3_filename)
342 | 
343 |     return mp3_filename
344 | 
345 | 
346 | @timing_decorator
347 | def mp3_to_wav(mp3_filename):
348 |     sound = AudioSegment.from_mp3(mp3_filename)
349 |     sound = sound.set_frame_rate(22050)
350 |     wav_filename = f"{mp3_filename}.wav"
351 |     sound.export(wav_filename, format="wav")
352 | 
353 |     return wav_filename
354 | 
355 | 
356 | @timing_decorator
357 | def get_completion(chat_history):
358 |     response = completion(
359 |         model=args.llm_model,
360 |         messages=chat_history,
361 |         api_base=args.llm_url,
362 |         stream=args.llm_streaming,
363 |     )
364 | 
365 |     print(response)
366 |     return response
367 | 
368 | 
369 | q = queue.Queue()
370 | cleanup_queue = False
371 | stop_current_a2f_play = False
372 | 
373 | 
374 | def pipeline_worker():
375 |     while True:
376 |         print("--------------------------")
377 |         global cleanup_queue
378 |         global stop_current_a2f_play
379 |         if cleanup_queue:
380 |             while not q.empty():
381 |                 item = q.get()
382 |                 q.task_done()
383 | 
384 |                 if item == "cleanup_queue_token":
385 |                     break
386 |             cleanup_queue = False
387 |             stop_current_a2f_play = True
388 | 
389 |         item = q.get()
390 |         if item == "cleanup_queue_token":
391 |             continue
392 | 
393 |         print(f"Begin: {item}")
394 |         run_pipeline(item)
395 |         print(f"End: {item}")
396 |         q.task_done()
397 | 
398 | 
399 | def talk_to_peer(message):
400 |     if not gc_client:
401 |         return
402 | 
403 |     result = gc_client.predict(
404 |         message, api_name="/chat"  # str  in 'Message' Textbox component
405 |     )
406 |     print(f"from peer: {result}")
407 | 
408 |     # chat_ui.textbox.submit(None, [result, result])
409 |     # chat_ui.textbox.submit()
410 | 
411 | 
412 | def predict(message, history):
413 |     print("==========================")
414 |     if message == "setup":
415 |         str = ""
416 |         for a2f in A2fInstance.instaces:
417 |             a2f.setup()
418 |             str += f"A2F running: {a2f.SERVICE_HEALTHY}\n"
419 |             str += f"Live Link running: {a2f.LIVELINK_SERVICE_HEALTHY}\n"
420 |         yield str
421 |         return
422 | 
423 |     if message == "ping":
424 |         for a2f in A2fInstance.instaces:
425 |             a2f.post("")
426 |             a2f.get("")
427 |         yield "A2F ping"
428 |         return
429 | 
430 |     if message == "redo":
431 |         for a2f in A2fInstance.instaces:
432 |             a2f.player_play()
433 |         yield "A2F redo"
434 |         return
435 | 
436 |     if message == "stop":
437 |         global cleanup_queue
438 |         cleanup_queue = True
439 |         q.put("cleanup_queue_token")
440 |         yield "stopped"
441 |         return
442 | 
443 |     if message.startswith("peer"):
444 |         items = message.split()
445 |         if len(items) >= 2:
446 |             gradio_port = int(items[1])
447 |             # TODO: support non localhost
448 |             args.gradio_peer_url = f"http://{args.gradio_host}:{gradio_port}/"
449 |             global gc_client
450 |             gc_client = gc.Client(args.gradio_peer_url)
451 | 
452 |             yield f"I will chat with another llm-metahuman: {args.gradio_peer_url}"
453 |         return
454 | 
455 |     history_openai_format = []
456 |     for human, assistant in history:
457 |         history_openai_format.append({"role": "user", "content": human})
458 |         history_openai_format.append({"role": "assistant", "content": assistant})
459 |     history_openai_format.append({"role": "user", "content": message})
460 | 
461 |     # start_time = time.time()
462 |     response = get_completion(history_openai_format)
463 |     yield ".."
464 | 
465 |     # global cleanup_queue
466 |     # cleanup_queue = True
467 |     # q.put("cleanup_queue_token")
468 | 
469 |     if args.llm_streaming:
470 |         # create variables to collect the stream of chunks
471 |         UNUSED_collected_chunks = []
472 |         collected_messages = []
473 |         complete_sentences = ""
474 |         # iterate through the stream of events
475 |         for chunk in response:
476 |             # chunk_time = (
477 |             #     time.time() - start_time
478 |             # )  # calculate the time delay of the chunk
479 |             UNUSED_collected_chunks.append(chunk)  # save the event response
480 |             chunk_message = chunk.choices[0].delta.content  # extract the message
481 | 
482 |             if not chunk_message:
483 |                 continue
484 | 
485 |             collected_messages.append(chunk_message)  # save the message
486 |             # print(
487 |             #     f"Message {chunk_time:.2f} s after request: {chunk_message}"
488 |             # )  # print the delay and text
489 |             print(chunk_message)
490 | 
491 |             if chunk_message in [
492 |                 ".",
493 |                 "!",
494 |                 "?",
495 |                 "。",
496 |                 "!",
497 |                 "？",
498 |             ] or chunk_message.endswith("\n"):
499 |                 # if not chunk_message or "\n" in chunk_message:
500 |                 one_sentence = "".join([m for m in collected_messages if m is not None])
501 |                 if len(one_sentence) < 10:
502 |                     # ignore short sentences
503 |                     continue
504 |                 collected_messages = []
505 |                 complete_sentences += one_sentence
506 |                 q.put(one_sentence)
507 |                 # run_pipeline(one_sentence)
508 | 
509 |                 yield complete_sentences
510 | 
511 |                 talk_to_peer(one_sentence)
512 | 
513 |         # print the time delay and text received
514 |         # print(f"Full response received {chunk_time:.2f} seconds after request")
515 |         # # clean None in collected_messages
516 |         # collected_messages = [m for m in collected_messages if m is not None]
517 |         # full_reply_content = "".join([m for m in collected_messages])
518 |         # print(f"Full conversation received: {full_reply_content}")
519 |         # yield full_reply_content
520 |     else:
521 |         if len(response.choices[0].message.content) == 0:
522 |             return
523 | 
524 |         answer = response.choices[0].message.content
525 |         yield answer
526 | 
527 |         run_pipeline(answer)
528 | 
529 | 
530 | def main():
531 |     import argparse
532 | 
533 |     parser = argparse.ArgumentParser(description="llm.py arguments")
534 | 
535 |     # gradio settings
536 |     parser.add_argument("--a2f_instance_count", type=int, default=1)
537 |     parser.add_argument("--gradio_host", default="localhost")
538 |     parser.add_argument("--gradio_port", type=int, default=7860)
539 |     parser.add_argument(
540 |         "--gradio_peer_url",
541 |         default=None,
542 |         help="the gradio peer that this gradio instance will chat with. Default value is None, which means chat with a human.",
543 |     )
544 | 
545 |     # llm / litellm settings
546 |     parser.add_argument("--llm_engine", default="gpt", choices=["gpt", "llama2"])
547 |     parser.add_argument(
548 |         "--llm_model", default=None, help="https://docs.litellm.ai/docs/providers"
549 |     )
550 |     parser.add_argument("--llm_url", default=None)
551 |     parser.add_argument(
552 |         "--llm_streaming", default=True, action=argparse.BooleanOptionalAction
553 |     )
554 | 
555 |     # audio2face settings
556 |     parser.add_argument("--a2f_host", default="localhost")
557 |     parser.add_argument("--a2f_port", default=8011, type=int)
558 |     parser.add_argument("--a2f_instance_id", default="/World/audio2face/CoreFullface")
559 |     parser.add_argument("--a2f_player_id", default="/World/audio2face/Player")
560 |     parser.add_argument("--a2f_livelink_id", default="/World/audio2face/StreamLivelink")
561 | 
562 |     # tts settings
563 |     parser.add_argument("--tts_model", default="tts-1", choices=["tts-1", "tts-1-hd"])
564 |     parser.add_argument("--tts_speed", default=1.1, type=float)
565 | 
566 |     # livelink settings
567 |     parser.add_argument("--livelink_host", default="localhost")
568 |     parser.add_argument("--livelink_port", default=12030, type=int)
569 |     parser.add_argument("--livelink_subject", default="Audio2Face")
570 |     parser.add_argument("--livelink_audio_port", default=12031, type=int)
571 | 
572 |     parser.add_argument(
573 |         "--tts_voice",
574 |         default="nova",
575 |         choices=VOICE_ACTORS,
576 |         help="https://platform.openai.com/docs/guides/text-to-speech",
577 |     )
578 | 
579 |     global args
580 |     args = parser.parse_args()
581 | 
582 |     if not args.llm_model:
583 |         if args.llm_engine == "gpt":
584 |             args.llm_model = args.llm_model or "gpt-3.5-turbo"
585 |         elif args.llm_engine == "llama2":
586 |             args.llm_model = args.llm_model or "ollama/llama2"
587 |             args.llm_url = args.llm_url or "http://localhost:11434"
588 | 
589 |     threading.Thread(target=pipeline_worker, daemon=True).start()
590 | 
591 |     for i in range(args.a2f_instance_count):
592 |         a2f = A2fInstance(i)
593 |         a2f.setup()
594 |         A2fInstance.instaces.append(a2f)
595 | 
596 |     global chat_ui
597 |     chat_ui = gr.ChatInterface(
598 |         predict,
599 |         title=f"llm-metahuman @{args.gradio_port}",
600 |         examples=["hello", "tell me 3 jokes", "what's the meaning of life?"],
601 |     )
602 | 
603 |     chat_ui.queue().launch(server_name=args.gradio_host, server_port=args.gradio_port)
604 | 
605 |     q.join()
606 | 
607 | 
608 | if __name__ == "__main__":
609 |     main()
610 | 


--------------------------------------------------------------------------------