├── studio-voice
    ├── interfaces
    │   ├── __init__.py
    │   └── studio_voice
    │   │   ├── __init__.py
    │   │   ├── studiovoice_pb2.pyi
    │   │   ├── studiovoice_pb2.py
    │   │   └── studiovoice_pb2_grpc.py
    ├── requirements.txt
    ├── assets
    │   ├── studio_voice_16k_input.wav
    │   └── studio_voice_48k_input.wav
    ├── protos
    │   ├── compile_protos.sh
    │   ├── compile_protos.bat
    │   └── proto
    │   │   └── nvidia
    │   │       └── maxine
    │   │           └── studiovoice
    │   │               └── v1
    │   │                   └── studiovoice.proto
    ├── README.md
    └── scripts
    │   └── studio_voice.py
├── audio2face-2d
    ├── python
    │   ├── requirements.txt
    │   ├── interfaces
    │   │   ├── __init__.py
    │   │   ├── audio2face2d_pb2_grpc.py
    │   │   ├── audio2face2d_pb2.py
    │   │   └── audio2face2d_pb2.pyi
    │   └── scripts
    │   │   └── audio2face-2d.py
    ├── assets
    │   ├── sample_audio.wav
    │   ├── sample_portrait_image.png
    │   ├── head_translation_animation.csv
    │   └── head_rotation_animation.csv
    ├── nodejs
    │   ├── package.json
    │   ├── scripts
    │   │   └── index.html
    │   └── interfaces
    │   │   └── audio2face2d_grpc_pb.js
    ├── protos
    │   ├── linux
    │   │   ├── nodejs
    │   │   │   └── compile_protos.sh
    │   │   └── python
    │   │   │   └── compile_protos.sh
    │   ├── windows
    │   │   ├── nodejs
    │   │   │   └── compile_protos.bat
    │   │   └── python
    │   │   │   └── compile_protos.bat
    │   └── proto
    │   │   └── nvidia
    │   │       └── maxine
    │   │           └── audio2face2d
    │   │               └── v1
    │   │                   └── audio2face2d.proto
    └── README.md
├── eye-contact
    ├── requirements.txt
    ├── assets
    │   ├── sample_streamable.mp4
    │   └── sample_transactional.mp4
    ├── interfaces
    │   ├── __init__.py
    │   ├── eyecontact_pb2_grpc.py
    │   ├── eyecontact_pb2.py
    │   └── eyecontact_pb2.pyi
    ├── protos
    │   ├── windows
    │   │   └── compile_protos.bat
    │   ├── linux
    │   │   └── compile_protos.sh
    │   └── proto
    │   │   └── nvidia
    │   │       └── maxine
    │   │           └── eyecontact
    │   │               └── v1
    │   │                   └── eyecontact.proto
    ├── scripts
    │   ├── constants.py
    │   └── eye-contact.py
    └── README.md
├── bnr
    ├── assets
    │   ├── bnr_16k_input.wav
    │   └── bnr_48k_input.wav
    ├── requirements.txt
    ├── interfaces
    │   ├── __init__.py
    │   └── bnr
    │   │   ├── __init__.py
    │   │   ├── bnr_pb2.py
    │   │   └── bnr_pb2_grpc.py
    ├── protos
    │   ├── compile_protos.sh
    │   ├── compile_protos.bat
    │   └── proto
    │   │   └── nvidia
    │   │       └── maxine
    │   │           └── bnr
    │   │               └── v1
    │   │                   └── bnr.proto
    ├── README.md
    └── scripts
    │   └── bnr.py
├── LICENSE.md
├── README.md
├── SECURITY.md
├── .gitignore
└── utils
    └── utils.py


/studio-voice/interfaces/__init__.py:
--------------------------------------------------------------------------------
1 | from . import studio_voice


--------------------------------------------------------------------------------
/audio2face-2d/python/requirements.txt:
--------------------------------------------------------------------------------
1 | grpcio==1.67.1
2 | grpcio-tools==1.67.1
3 | 


--------------------------------------------------------------------------------
/eye-contact/requirements.txt:
--------------------------------------------------------------------------------
1 | grpcio==1.67.1
2 | grpcio-tools==1.67.1
3 | tqdm==4.67.1
4 | 
5 | 


--------------------------------------------------------------------------------
/bnr/assets/bnr_16k_input.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Maxine/nim-clients/HEAD/bnr/assets/bnr_16k_input.wav


--------------------------------------------------------------------------------
/bnr/assets/bnr_48k_input.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Maxine/nim-clients/HEAD/bnr/assets/bnr_48k_input.wav


--------------------------------------------------------------------------------
/studio-voice/requirements.txt:
--------------------------------------------------------------------------------
1 | grpcio==1.67.1
2 | grpcio-tools==1.67.1
3 | soundfile==0.12.1
4 | numpy==1.26.4
5 | 


--------------------------------------------------------------------------------
/bnr/requirements.txt:
--------------------------------------------------------------------------------
1 | grpcio==1.67.1
2 | grpcio-tools==1.67.1
3 | soundfile==0.12.1
4 | numpy==1.26.4
5 | tqdm==4.67.1
6 | 


--------------------------------------------------------------------------------
/audio2face-2d/assets/sample_audio.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Maxine/nim-clients/HEAD/audio2face-2d/assets/sample_audio.wav


--------------------------------------------------------------------------------
/eye-contact/assets/sample_streamable.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Maxine/nim-clients/HEAD/eye-contact/assets/sample_streamable.mp4


--------------------------------------------------------------------------------
/eye-contact/assets/sample_transactional.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Maxine/nim-clients/HEAD/eye-contact/assets/sample_transactional.mp4


--------------------------------------------------------------------------------
/audio2face-2d/assets/sample_portrait_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Maxine/nim-clients/HEAD/audio2face-2d/assets/sample_portrait_image.png


--------------------------------------------------------------------------------
/studio-voice/assets/studio_voice_16k_input.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Maxine/nim-clients/HEAD/studio-voice/assets/studio_voice_16k_input.wav


--------------------------------------------------------------------------------
/studio-voice/assets/studio_voice_48k_input.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Maxine/nim-clients/HEAD/studio-voice/assets/studio_voice_48k_input.wav


--------------------------------------------------------------------------------
/audio2face-2d/python/interfaces/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from . import audio2face2d_pb2
4 | pwd = os.path.join(os.path.dirname(__file__))
5 | sys.path.insert(0, pwd)
6 | from . import audio2face2d_pb2_grpc
7 | sys.path.remove(pwd)
8 | 


--------------------------------------------------------------------------------
/studio-voice/interfaces/studio_voice/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from . import studiovoice_pb2
4 | pwd = os.path.join(os.path.dirname(__file__))
5 | sys.path.insert(0, pwd)
6 | from . import studiovoice_pb2_grpc
7 | sys.path.remove(pwd)


--------------------------------------------------------------------------------
/audio2face-2d/nodejs/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dependencies": {
 3 |     "@grpc/grpc-js": "^1.11.3",
 4 |     "commander": "^12.1.0",
 5 |     "csv-parse": "^5.5.5",
 6 |     "google-protobuf": "^3.21.4",
 7 |     "memorystream": "^0.3.1",
 8 |     "wav": "^1.0.2"
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/studio-voice/interfaces/studio_voice/studiovoice_pb2.pyi:
--------------------------------------------------------------------------------
 1 | from google.protobuf import descriptor as _descriptor
 2 | from google.protobuf import message as _message
 3 | from typing import ClassVar as _ClassVar, Optional as _Optional
 4 | 
 5 | DESCRIPTOR: _descriptor.FileDescriptor
 6 | 
 7 | class EnhanceAudioRequest(_message.Message):
 8 |     __slots__ = ("audio_stream_data",)
 9 |     AUDIO_STREAM_DATA_FIELD_NUMBER: _ClassVar[int]
10 |     audio_stream_data: bytes
11 |     def __init__(self, audio_stream_data: _Optional[bytes] = ...) -> None: ...
12 | 
13 | class EnhanceAudioResponse(_message.Message):
14 |     __slots__ = ("audio_stream_data",)
15 |     AUDIO_STREAM_DATA_FIELD_NUMBER: _ClassVar[int]
16 |     audio_stream_data: bytes
17 |     def __init__(self, audio_stream_data: _Optional[bytes] = ...) -> None: ...
18 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/bnr/interfaces/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a
 4 | # copy of this software and associated documentation files (the "Software"),
 5 | # to deal in the Software without restriction, including without limitation
 6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 7 | # and/or sell copies of the Software, and to permit persons to whom the
 8 | # Software is furnished to do so, subject to the following conditions:
 9 | #
10 | # The above copyright notice and this permission notice shall be included in
11 | # all copies or substantial portions of the Software.
12 | #
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 | # DEALINGS IN THE SOFTWARE.
20 | from . import bnr


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # NVIDIA Maxine NIM
 2 | 
 3 | NVIDIA Maxine is a suite of high-performance, easy-to-use, NVIDIA Inference Microservices (NIMs) for deploying AI features that enhance audio, video, and augmented reality effects for video conferencing and tele-presence.
 4 | 
 5 | 
 6 | ## NVIDIA Maxine NIM Clients
 7 | 
 8 | This repository provides sample client applications to interact with Maxine NIMs
 9 | 
10 | - [`eye-contact`](eye-contact) - NVIDIA Maxine Eye Contact feature estimates the gaze angles of a person in a video and redirects the gaze in the output video to make it frontal.
11 | [[Demo](https://build.nvidia.com/nvidia/eyecontact)] , [[Docs](https://docs.nvidia.com/nim/maxine/eye-contact/latest/index.html)]
12 | 
13 | - [`studio-voice`](studio-voice) - NVIDIA Maxine Studio Voice feature enhances the input speech recorded through low quality microphones in noisy and reverberant environments to studio-recorded quality speech.
14 | [[Demo](https://build.nvidia.com/nvidia/studiovoice)] , [[Docs](https://docs.nvidia.com/nim/maxine/studio-voice/latest/index.html)]
15 | 
16 | - [`audio2face-2d`](audio2face-2d) - NVIDIA Maxine Audio2Face-2D feature generates facial animations from a portrait photo and audio input, synchronizing mouth movements with speech to create realistic and engaging video outputs.
17 | [[Demo](https://build.nvidia.com/nvidia/audio2face-2d)] , [[Docs](https://docs.nvidia.com/nim/maxine/audio2face-2d/latest/index.html)]


--------------------------------------------------------------------------------
/bnr/interfaces/bnr/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a
 4 | # copy of this software and associated documentation files (the "Software"),
 5 | # to deal in the Software without restriction, including without limitation
 6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 7 | # and/or sell copies of the Software, and to permit persons to whom the
 8 | # Software is furnished to do so, subject to the following conditions:
 9 | #
10 | # The above copyright notice and this permission notice shall be included in
11 | # all copies or substantial portions of the Software.
12 | #
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 | # DEALINGS IN THE SOFTWARE.
20 | import os
21 | import sys
22 | from . import bnr_pb2
23 | pwd = os.path.join(os.path.dirname(__file__))
24 | sys.path.insert(0, pwd)
25 | from . import bnr_pb2_grpc
26 | sys.path.remove(pwd)


--------------------------------------------------------------------------------
/eye-contact/interfaces/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a
 6 | # copy of this software and associated documentation files (the "Software"),
 7 | # to deal in the Software without restriction, including without limitation
 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | # and/or sell copies of the Software, and to permit persons to whom the
10 | # Software is furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 | # DEALINGS IN THE SOFTWARE.
22 | 
23 | import os
24 | import sys
25 | from . import eyecontact_pb2
26 | 
27 | pwd = os.path.join(os.path.dirname(__file__))
28 | sys.path.insert(0, pwd)
29 | from . import eyecontact_pb2_grpc
30 | 
31 | sys.path.remove(pwd)
32 | 


--------------------------------------------------------------------------------
/audio2face-2d/nodejs/scripts/index.html:
--------------------------------------------------------------------------------
 1 | <!-- Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | Permission is hereby granted, free of charge, to any person obtaining a
 3 | copy of this software and associated documentation files (the "Software"),
 4 | to deal in the Software without restriction, including without limitation
 5 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
 6 | and/or sell copies of the Software, and to permit persons to whom the
 7 | Software is furnished to do so, subject to the following conditions:
 8 | The above copyright notice and this permission notice shall be included in
 9 | all copies or substantial portions of the Software.
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
12 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
13 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
14 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
15 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
16 | DEALINGS IN THE SOFTWARE. -->
17 | 
18 | <!DOCTYPE html>
19 | <html lang="en">
20 | <head>
21 |     <meta name="viewport"
22 |           content="width=500, 
23 |                    initial-scale=1.0">
24 |     <title>Maxine Audio2Face 2D</title>
25 | </head>
26 | <body>
27 |     <h2>Maxine Audio2Face 2D</h2>
28 |     <video src="http://127.0.0.1:3000/video" controls>
29 |    </video>
30 | </body>
31 | </html>
32 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Security
 3 | 
 4 | NVIDIA is dedicated to the security and trust of our software products and services, including all source code repositories managed through our organization.
 5 | 
 6 | If you need to report a security issue, please use the appropriate contact points outlined below. **Please do not report security vulnerabilities through GitHub.**
 7 | 
 8 | ## Reporting Potential Security Vulnerability in an NVIDIA Product
 9 | 
10 | To report a potential security vulnerability in any NVIDIA product:
11 | 
12 | - Web: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html)
13 | 
14 | - E-Mail: psirt@nvidia.com
15 |     - We encourage you to use the following PGP key for secure email communication: [NVIDIA public PGP Key for communication](https://www.nvidia.com/en-us/security/pgp-key)
16 |     - Please include the following information:
17 |    	 	- Product/Driver name and version/branch that contains the vulnerability
18 |      	- Type of vulnerability (code execution, denial of service, buffer overflow, etc.)
19 |    	 	- Instructions to reproduce the vulnerability
20 |    	 	- Proof-of-concept or exploit code
21 |    	 	- Potential impact of the vulnerability, including how an attacker could exploit the vulnerability
22 | 
23 | While NVIDIA currently does not have a bug bounty program, we do offer acknowledgement when an externally reported security issue is addressed under our coordinated vulnerability disclosure policy. Please visit our [Product Security Incident Response Team (PSIRT)](https://www.nvidia.com/en-us/security/psirt-policies/) policies page for more information.
24 | 
25 | ## NVIDIA Product Security
26 | 
27 | For all security-related concerns, please visit NVIDIA's Product Security portal at https://www.nvidia.com/en-us/security
28 | 


--------------------------------------------------------------------------------
/bnr/protos/compile_protos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a
 6 | # copy of this software and associated documentation files (the "Software"),
 7 | # to deal in the Software without restriction, including without limitation
 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | # and/or sell copies of the Software, and to permit persons to whom the
10 | # Software is furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 | # DEALINGS IN THE SOFTWARE.
22 | 
23 | 
24 | # This script compiles Protocol Buffer (protobuf) definitions for NVIDIA
25 | # Maxine Studio Voice on a Linux Client.
26 | #
27 | # Execute the script using `./compile_proto.sh`
28 | #
29 | # For more details, refer to README.md
30 | 
31 | SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
32 | PROTOS_DIR=$SCRIPT_DIR/proto/nvidia/maxine/bnr/v1
33 | OUT_DIR=$SCRIPT_DIR/../interfaces/bnr
34 | 
35 | python3 -m grpc_tools.protoc -I=$PROTOS_DIR \
36 |                              --python_out=$OUT_DIR \
37 |                              --grpc_python_out=$OUT_DIR \
38 |                              $PROTOS_DIR/bnr.proto
39 | 


--------------------------------------------------------------------------------
/studio-voice/protos/compile_protos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a
 6 | # copy of this software and associated documentation files (the "Software"),
 7 | # to deal in the Software without restriction, including without limitation
 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | # and/or sell copies of the Software, and to permit persons to whom the
10 | # Software is furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 | # DEALINGS IN THE SOFTWARE.
22 | 
23 | 
24 | # This script compiles Protocol Buffer (protobuf) definitions for NVIDIA
25 | # Maxine Studio Voice on a Linux Client.
26 | #
27 | # Execute the script using `./compile_proto.sh`
28 | #
29 | # For more details, refer to README.md
30 | 
31 | SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
32 | PROTOS_DIR=$SCRIPT_DIR/proto/nvidia/maxine/studiovoice/v1
33 | OUT_DIR=$SCRIPT_DIR/../interfaces/studio_voice
34 | 
35 | python3 -m grpc_tools.protoc -I=$PROTOS_DIR \
36 |                              --python_out=$OUT_DIR \
37 |                              --pyi_out=$OUT_DIR \
38 |                              --grpc_python_out=$OUT_DIR \
39 |                              $PROTOS_DIR/studiovoice.proto
40 | 


--------------------------------------------------------------------------------
/audio2face-2d/protos/linux/nodejs/compile_protos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a
 6 | # copy of this software and associated documentation files (the "Software"),
 7 | # to deal in the Software without restriction, including without limitation
 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | # and/or sell copies of the Software, and to permit persons to whom the
10 | # Software is furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 | # DEALINGS IN THE SOFTWARE.
22 | 
23 | 
24 | # This script compiles Protocol Buffer (protobuf) definitions for NVIDIA Maxine Audio2Face-2D NIM on a Linux Client.
25 | #
26 | # Execute the script using `./compile_protos.sh`
27 | #
28 | # For more details, refer to README.md
29 | 
30 | ROOT_DIR="$(dirname "$(readlink -f "$0")")"
31 | PROTOS_DIR=$ROOT_DIR/../../proto/nvidia/maxine/audio2face2d/v1
32 | OUT_DIR=$ROOT_DIR/../../../nodejs/interfaces
33 | 
34 | # Install grpc-tools
35 | npm install -g grpc-tools
36 | 
37 | # Generate the interface files
38 | grpc_tools_node_protoc --js_out=import_style=commonjs,binary:$OUT_DIR  $PROTOS_DIR/audio2face2d.proto --proto_path=$PROTOS_DIR --grpc_out=grpc_js:$OUT_DIR   --plugin=protoc-gen-grpc=`which grpc_tools_node_protoc_plugin`
39 | 


--------------------------------------------------------------------------------
/bnr/protos/compile_protos.bat:
--------------------------------------------------------------------------------
 1 | :: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | ::
 3 | :: Permission is hereby granted, free of charge, to any person obtaining a
 4 | :: copy of this software and associated documentation files (the "Software"),
 5 | :: to deal in the Software without restriction, including without limitation
 6 | :: the rights to use, copy, modify, merge, publish, distribute, sublicense,
 7 | :: and/or sell copies of the Software, and to permit persons to whom the
 8 | :: Software is furnished to do so, subject to the following conditions:
 9 | ::
10 | :: The above copyright notice and this permission notice shall be included in
11 | :: all copies or substantial portions of the Software.
12 | ::
13 | :: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | :: IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | :: FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 | :: THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | :: LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | :: FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 | :: DEALINGS IN THE SOFTWARE.
20 | 
21 | 
22 | :: This script compiles Protocol Buffer (protobuf) definitions for NVIDIA
23 | :: Maxine Studio Voice on a Windows Client.
24 | ::
25 | :: Execute the script using `compile_protos.bat`.
26 | ::
27 | :: For more details, refer to README.md
28 | 
29 | @echo off
30 | setlocal
31 | 
32 | :: Define the script directory and other variables
33 | set "SCRIPT_DIR=%~dp0"
34 | set "PROTOS_DIR=%SCRIPT_DIR%proto\nvidia\maxine\bnr\v1"
35 | set "OUT_DIR=%SCRIPT_DIR%..\interfaces\bnr"
36 | 
37 | :: Run the grpc_tools.protoc command with the necessary parameters
38 | python -m grpc_tools.protoc -I=%PROTOS_DIR% ^
39 |                             --python_out=%OUT_DIR% ^
40 |                             --grpc_python_out=%OUT_DIR% ^
41 |                             %PROTOS_DIR%\bnr.proto
42 | 
43 | endlocal
44 | 
45 | 


--------------------------------------------------------------------------------
/studio-voice/interfaces/studio_voice/studiovoice_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # NO CHECKED-IN PROTOBUF GENCODE
 4 | # source: studiovoice.proto
 5 | # Protobuf Python Version: 5.27.2
 6 | """Generated protocol buffer code."""
 7 | from google.protobuf import descriptor as _descriptor
 8 | from google.protobuf import descriptor_pool as _descriptor_pool
 9 | from google.protobuf import runtime_version as _runtime_version
10 | from google.protobuf import symbol_database as _symbol_database
11 | from google.protobuf.internal import builder as _builder
12 | _runtime_version.ValidateProtobufRuntimeVersion(
13 |     _runtime_version.Domain.PUBLIC,
14 |     5,
15 |     27,
16 |     2,
17 |     '',
18 |     'studiovoice.proto'
19 | )
20 | # @@protoc_insertion_point(imports)
21 | 
22 | _sym_db = _symbol_database.Default()
23 | 
24 | 
25 | 
26 | 
27 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x11studiovoice.proto\x12\x1cnvidia.maxine.studiovoice.v1\"B\n\x13\x45nhanceAudioRequest\x12\x1b\n\x11\x61udio_stream_data\x18\x01 \x01(\x0cH\x00\x42\x0e\n\x0cstream_input\"D\n\x14\x45nhanceAudioResponse\x12\x1b\n\x11\x61udio_stream_data\x18\x01 \x01(\x0cH\x00\x42\x0f\n\rstream_output2\x90\x01\n\x11MaxineStudioVoice\x12{\n\x0c\x45nhanceAudio\x12\x31.nvidia.maxine.studiovoice.v1.EnhanceAudioRequest\x1a\x32.nvidia.maxine.studiovoice.v1.EnhanceAudioResponse\"\x00(\x01\x30\x01\x62\x06proto3')
28 | 
29 | _globals = globals()
30 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
31 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'studiovoice_pb2', _globals)
32 | if not _descriptor._USE_C_DESCRIPTORS:
33 |   DESCRIPTOR._loaded_options = None
34 |   _globals['_ENHANCEAUDIOREQUEST']._serialized_start=51
35 |   _globals['_ENHANCEAUDIOREQUEST']._serialized_end=117
36 |   _globals['_ENHANCEAUDIORESPONSE']._serialized_start=119
37 |   _globals['_ENHANCEAUDIORESPONSE']._serialized_end=187
38 |   _globals['_MAXINESTUDIOVOICE']._serialized_start=190
39 |   _globals['_MAXINESTUDIOVOICE']._serialized_end=334
40 | # @@protoc_insertion_point(module_scope)
41 | 


--------------------------------------------------------------------------------
/studio-voice/protos/compile_protos.bat:
--------------------------------------------------------------------------------
 1 | :: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | ::
 3 | :: Permission is hereby granted, free of charge, to any person obtaining a
 4 | :: copy of this software and associated documentation files (the "Software"),
 5 | :: to deal in the Software without restriction, including without limitation
 6 | :: the rights to use, copy, modify, merge, publish, distribute, sublicense,
 7 | :: and/or sell copies of the Software, and to permit persons to whom the
 8 | :: Software is furnished to do so, subject to the following conditions:
 9 | ::
10 | :: The above copyright notice and this permission notice shall be included in
11 | :: all copies or substantial portions of the Software.
12 | ::
13 | :: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | :: IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | :: FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 | :: THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | :: LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | :: FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 | :: DEALINGS IN THE SOFTWARE.
20 | 
21 | 
22 | :: This script compiles Protocol Buffer (protobuf) definitions for NVIDIA
23 | :: Maxine Studio Voice on a Windows Client.
24 | ::
25 | :: Execute the script using `compile_protos.bat`.
26 | ::
27 | :: For more details, refer to README.md
28 | 
29 | @echo off
30 | setlocal
31 | 
32 | :: Define the script directory and other variables
33 | set "SCRIPT_DIR=%~dp0"
34 | set "PROTOS_DIR=%SCRIPT_DIR%proto\nvidia\maxine\studiovoice\v1"
35 | set "OUT_DIR=%SCRIPT_DIR%..\interfaces\studio_voice"
36 | 
37 | :: Run the grpc_tools.protoc command with the necessary parameters
38 | python -m grpc_tools.protoc -I=%PROTOS_DIR% ^
39 |                             --python_out=%OUT_DIR% ^
40 |                             --pyi_out=%OUT_DIR% ^
41 |                             --grpc_python_out=%OUT_DIR% ^
42 |                             %PROTOS_DIR%\studiovoice.proto
43 | 
44 | endlocal
45 | 
46 | 


--------------------------------------------------------------------------------
/bnr/interfaces/bnr/bnr_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # NO CHECKED-IN PROTOBUF GENCODE
 4 | # source: bnr.proto
 5 | # Protobuf Python Version: 5.27.2
 6 | """Generated protocol buffer code."""
 7 | from google.protobuf import descriptor as _descriptor
 8 | from google.protobuf import descriptor_pool as _descriptor_pool
 9 | from google.protobuf import runtime_version as _runtime_version
10 | from google.protobuf import symbol_database as _symbol_database
11 | from google.protobuf.internal import builder as _builder
12 | _runtime_version.ValidateProtobufRuntimeVersion(
13 |     _runtime_version.Domain.PUBLIC,
14 |     5,
15 |     27,
16 |     2,
17 |     '',
18 |     'bnr.proto'
19 | )
20 | # @@protoc_insertion_point(imports)
21 | 
22 | _sym_db = _symbol_database.Default()
23 | 
24 | 
25 | 
26 | 
27 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\tbnr.proto\x12\x14nvidia.maxine.bnr.v1\"F\n\x12\x45nhanceAudioConfig\x12\x1c\n\x0fintensity_ratio\x18\x01 \x01(\x02H\x00\x88\x01\x01\x42\x12\n\x10_intensity_ratio\"~\n\x13\x45nhanceAudioRequest\x12\x1b\n\x11\x61udio_stream_data\x18\x01 \x01(\x0cH\x00\x12:\n\x06\x63onfig\x18\x02 \x01(\x0b\x32(.nvidia.maxine.bnr.v1.EnhanceAudioConfigH\x00\x42\x0e\n\x0cstream_input\"\x80\x01\n\x14\x45nhanceAudioResponse\x12\x1b\n\x11\x61udio_stream_data\x18\x01 \x01(\x0cH\x00\x12:\n\x06\x63onfig\x18\x02 \x01(\x0b\x32(.nvidia.maxine.bnr.v1.EnhanceAudioConfigH\x00\x42\x0f\n\rstream_output2x\n\tMaxineBNR\x12k\n\x0c\x45nhanceAudio\x12).nvidia.maxine.bnr.v1.EnhanceAudioRequest\x1a*.nvidia.maxine.bnr.v1.EnhanceAudioResponse\"\x00(\x01\x30\x01\x62\x06proto3')
28 | 
29 | _globals = globals()
30 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
31 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'bnr_pb2', _globals)
32 | if not _descriptor._USE_C_DESCRIPTORS:
33 |   DESCRIPTOR._loaded_options = None
34 |   _globals['_ENHANCEAUDIOCONFIG']._serialized_start=35
35 |   _globals['_ENHANCEAUDIOCONFIG']._serialized_end=105
36 |   _globals['_ENHANCEAUDIOREQUEST']._serialized_start=107
37 |   _globals['_ENHANCEAUDIOREQUEST']._serialized_end=233
38 |   _globals['_ENHANCEAUDIORESPONSE']._serialized_start=236
39 |   _globals['_ENHANCEAUDIORESPONSE']._serialized_end=364
40 |   _globals['_MAXINEBNR']._serialized_start=366
41 |   _globals['_MAXINEBNR']._serialized_end=486
42 | # @@protoc_insertion_point(module_scope)
43 | 


--------------------------------------------------------------------------------
/studio-voice/protos/proto/nvidia/maxine/studiovoice/v1/studiovoice.proto:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Permission is hereby granted, free of charge, to any person obtaining a
 4 | // copy of this software and associated documentation files (the "Software"),
 5 | // to deal in the Software without restriction, including without limitation
 6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense,
 7 | // and/or sell copies of the Software, and to permit persons to whom the
 8 | // Software is furnished to do so, subject to the following conditions:
 9 | //
10 | // The above copyright notice and this permission notice shall be included in
11 | // all copies or substantial portions of the Software.
12 | //
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 | // DEALINGS IN THE SOFTWARE.
20 | 
21 | syntax = "proto3";
22 | 
23 | package nvidia.maxine.studiovoice.v1;
24 | 
25 | // The MaxineStudioVoice service provides APIs to run the
26 | // Maxine Studio Voice NIM.
27 | service MaxineStudioVoice {
28 |     // EnhanceAudio is a bidirectional streaming RPC to run the
29 |     // Maxine Studio Voice NIM on audio files.
30 |     //
31 |     // The client streams the input audio file in chunks in the input message and 
32 |     // receives the output audio file in chunks in the output message.
33 |     //
34 |     // The client should only pass one audio file per RPC invocation.
35 |     rpc EnhanceAudio(stream EnhanceAudioRequest) returns (stream EnhanceAudioResponse) {}
36 | }
37 | 
38 | // Input message for EnhanceAudio RPC.
39 | // Contains a chunk of input audio file data.
40 | message EnhanceAudioRequest {
41 |     oneof stream_input {
42 |         bytes audio_stream_data = 1;
43 |     }
44 | }
45 | 
46 | // Output message for EnhanceAudio RPC.
47 | // Contains a chunk of output audio file data.
48 | message EnhanceAudioResponse {
49 |     oneof stream_output {
50 |         bytes audio_stream_data = 1;
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/eye-contact/protos/windows/compile_protos.bat:
--------------------------------------------------------------------------------
 1 | :: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | ::
 3 | :: Permission is hereby granted, free of charge, to any person obtaining a
 4 | :: copy of this software and associated documentation files (the "Software"),
 5 | :: to deal in the Software without restriction, including without limitation
 6 | :: the rights to use, copy, modify, merge, publish, distribute, sublicense,
 7 | :: and/or sell copies of the Software, and to permit persons to whom the
 8 | :: Software is furnished to do so, subject to the following conditions:
 9 | ::
10 | :: The above copyright notice and this permission notice shall be included in
11 | :: all copies or substantial portions of the Software.
12 | ::
13 | :: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | :: IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | :: FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 | :: THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | :: LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | :: FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 | :: DEALINGS IN THE SOFTWARE.
20 | 
21 | 
22 | :: This script compiles Protocol Buffer (protobuf) definitions for NVIDIA Maxine Eye-Contact NIM on a Windows Client.
23 | ::
24 | :: Execute the script using `compile_protos.bat`.
25 | ::
26 | :: For more details, refer to README.txt.
27 | 
28 | 
29 | @echo off
30 | setlocal
31 | 
32 | :: Define the script directory
33 | set "SCRIPT_DIR=%~dp0"
34 | 
35 | :: Define the protobufs and output directories
36 | set "PROTOS_DIR=%SCRIPT_DIR%..\proto\nvidia\maxine\eyecontact\v1"
37 | set "OUT_DIR=%SCRIPT_DIR%..\..\interfaces\"
38 | 
39 | :: Log the paths for debugging
40 | echo "Using PROTOS_DIR: %PROTOS_DIR%"
41 | echo "Using OUT_DIR: %OUT_DIR%"
42 | 
43 | :: Check if Python is installed
44 | where python >nul 2>&1
45 | if errorlevel 1 (
46 |     echo [Error] Python is not installed or not in the PATH.
47 |     exit /b 1
48 | )
49 | 
50 | :: Run grpc_tools.protoc to generate Python gRPC code
51 | python -m grpc_tools.protoc -I=%PROTOS_DIR% ^
52 |                             --python_out=%OUT_DIR% ^
53 |                             --pyi_out=%OUT_DIR% ^
54 |                             --grpc_python_out=%OUT_DIR% ^
55 |                             %PROTOS_DIR%\eyecontact.proto
56 | if errorlevel 1 (
57 |     echo [Error] Failed to execute grpc_tools.protoc. Please check the paths and dependencies.
58 |     exit /b 1
59 | )
60 | 
61 | echo "gRPC files generated successfully."
62 | endlocal
63 | 
64 | 


--------------------------------------------------------------------------------
/audio2face-2d/protos/windows/nodejs/compile_protos.bat:
--------------------------------------------------------------------------------
 1 | :: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | ::
 3 | :: Permission is hereby granted, free of charge, to any person obtaining a
 4 | :: copy of this software and associated documentation files (the "Software"),
 5 | :: to deal in the Software without restriction, including without limitation
 6 | :: the rights to use, copy, modify, merge, publish, distribute, sublicense,
 7 | :: and/or sell copies of the Software, and to permit persons to whom the
 8 | :: Software is furnished to do so, subject to the following conditions:
 9 | ::
10 | :: The above copyright notice and this permission notice shall be included in
11 | :: all copies or substantial portions of the Software.
12 | ::
13 | :: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | :: IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | :: FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 | :: THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | :: LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | :: FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 | :: DEALINGS IN THE SOFTWARE.
20 | 
21 | 
22 | :: This script compiles Protocol Buffer (protobuf) definitions for NVIDIA Maxine Audio2Face-2D NIM on a Windows Client.
23 | ::
24 | :: Execute the script using `compile_protos.bat`.
25 | ::
26 | :: For more details, refer to README.txt.
27 | 
28 | 
29 | @echo off
30 | setlocal
31 | 
32 | set "SCRIPT_DIR=%~dp0"
33 | set "PROTOS_DIR=%SCRIPT_DIR%../../proto/nvidia/maxine/audio2face2d/v1"
34 | set "OUT_DIR=%SCRIPT_DIR%../../../nodejs/interfaces"
35 | 
36 | :: Install grpc-tools
37 | call npm install -g grpc-tools
38 | 
39 | if %errorlevel% neq 0 (
40 |     echo grpc-tools installation failed
41 |     exit /b %errorlevel%
42 | )
43 | 
44 | :: Check if running in PowerShell
45 | call powershell -Command "exit $PSVersionTable.PSVersion.Major -ne $null"
46 | if %errorlevel% equ 0 (
47 |     :: Running in PowerShell
48 |     powershell -Command "for /f 'delims=' %%i in ('Get-Command grpc_tools_node_protoc_plugin.cmd ^| Select-Object -ExpandProperty Source') do set GRPC_PLUGIN_PATH=%%i"
49 | ) else (
50 |     :: Running in Command Prompt
51 |     for /f "delims=" %%i in ('where grpc_tools_node_protoc_plugin.cmd') do set GRPC_PLUGIN_PATH=%%i
52 | )
53 | 
54 | :: Generate the interface files
55 | call grpc_tools_node_protoc --js_out=import_style=commonjs:%OUT_DIR%  %PROTOS_DIR%/audio2face2d.proto --proto_path=%PROTOS_DIR% --grpc_out=grpc_js:%OUT_DIR%   --plugin=protoc-gen-grpc=%GRPC_PLUGIN_PATH%
56 | endlocal


--------------------------------------------------------------------------------
/audio2face-2d/protos/windows/python/compile_protos.bat:
--------------------------------------------------------------------------------
 1 | :: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | ::
 3 | :: Permission is hereby granted, free of charge, to any person obtaining a
 4 | :: copy of this software and associated documentation files (the "Software"),
 5 | :: to deal in the Software without restriction, including without limitation
 6 | :: the rights to use, copy, modify, merge, publish, distribute, sublicense,
 7 | :: and/or sell copies of the Software, and to permit persons to whom the
 8 | :: Software is furnished to do so, subject to the following conditions:
 9 | ::
10 | :: The above copyright notice and this permission notice shall be included in
11 | :: all copies or substantial portions of the Software.
12 | ::
13 | :: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | :: IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | :: FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 | :: THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | :: LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | :: FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 | :: DEALINGS IN THE SOFTWARE.
20 | 
21 | 
22 | :: This script compiles Protocol Buffer (protobuf) definitions for NVIDIA Maxine Audio2Face-2D NIM on a Windows Client.
23 | ::
24 | :: Execute the script using `compile_protos.bat`.
25 | ::
26 | :: For more details, refer to README.txt.
27 | 
28 | 
29 | @echo off
30 | setlocal
31 | 
32 | :: Define the script directory
33 | set "SCRIPT_DIR=%~dp0"
34 | 
35 | :: Define the protobufs and output directories
36 | set "PROTOS_DIR=%SCRIPT_DIR%..\..\proto\nvidia\maxine\audio2face2d\v1"
37 | set "OUT_DIR=%SCRIPT_DIR%..\..\..\python\interfaces\"
38 | 
39 | :: Log the paths for debugging
40 | echo "Using PROTOS_DIR: %PROTOS_DIR%"
41 | echo "Using OUT_DIR: %OUT_DIR%"
42 | 
43 | :: Check if Python is installed
44 | where python >nul 2>&1
45 | if errorlevel 1 (
46 |     echo [Error] Python is not installed or not in the PATH.
47 |     exit /b 1
48 | )
49 | 
50 | :: Run grpc_tools.protoc to generate Python gRPC code
51 | python -m grpc_tools.protoc -I=%PROTOS_DIR% ^
52 |                             --python_out=%OUT_DIR% ^
53 |                             --pyi_out=%OUT_DIR% ^
54 |                             --grpc_python_out=%OUT_DIR% ^
55 |                             %PROTOS_DIR%\audio2face2d.proto
56 | if errorlevel 1 (
57 |     echo [Error] Failed to execute grpc_tools.protoc. Please check the paths and dependencies.
58 |     exit /b 1
59 | )
60 | 
61 | echo "gRPC files generated successfully."
62 | endlocal
63 | 
64 | 


--------------------------------------------------------------------------------
/bnr/protos/proto/nvidia/maxine/bnr/v1/bnr.proto:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | //
 3 | // Permission is hereby granted, free of charge, to any person obtaining a
 4 | // copy of this software and associated documentation files (the "Software"),
 5 | // to deal in the Software without restriction, including without limitation
 6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense,
 7 | // and/or sell copies of the Software, and to permit persons to whom the
 8 | // Software is furnished to do so, subject to the following conditions:
 9 | //
10 | // The above copyright notice and this permission notice shall be included in
11 | // all copies or substantial portions of the Software.
12 | //
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 | // DEALINGS IN THE SOFTWARE.
20 | 
21 | syntax = "proto3";
22 | 
23 | package nvidia.maxine.bnr.v1;
24 | 
25 | // The MaxineBNR service provides APIs to run the
26 | // Maxine BNR NIM.
27 | service MaxineBNR {
28 |     // EnhanceAudio is a bidirectional streaming RPC to run the
29 |     // Maxine BNR NIM on audio files.
30 |     //
31 |     // The client streams the input audio file in chunks in the input message and 
32 |     // receives the output audio file in chunks in the output message.
33 |     //
34 |     // The client should only pass one audio file per RPC invocation.
35 |     rpc EnhanceAudio(stream EnhanceAudioRequest) returns (stream EnhanceAudioResponse) {}
36 | }
37 | 
38 | // Configuration for EnhanceAudio API.
39 | message EnhanceAudioConfig {
40 |     // Intensity ratio between 0.0f to 1.0f. 
41 |     // Default: 1.0
42 |     optional float intensity_ratio = 1;
43 | }
44 | 
45 | // Input message for EnhanceAudio RPC.
46 | message EnhanceAudioRequest {
47 |     oneof stream_input {    
48 |         // Contains a chunk of input audio file data.
49 |         // 32 bit float audio samples
50 |         bytes audio_stream_data = 1;
51 | 
52 |         // Configuration parameters for the request
53 |         EnhanceAudioConfig config = 2;
54 |     }
55 | }
56 | 
57 | // Output message for EnhanceAudio RPC.
58 | message EnhanceAudioResponse {
59 |     oneof stream_output {
60 |         // Contains a chunk of output audio file data.
61 |         // 32 bit float audio samples
62 |         bytes audio_stream_data = 1;
63 | 
64 |         // Configuration parameters used
65 |         EnhanceAudioConfig config = 2;
66 |     }
67 | }


--------------------------------------------------------------------------------
/eye-contact/scripts/constants.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a
 4 | # copy of this software and associated documentation files (the "Software"),
 5 | # to deal in the Software without restriction, including without limitation
 6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 7 | # and/or sell copies of the Software, and to permit persons to whom the
 8 | # Software is furnished to do so, subject to the following conditions:
 9 | #
10 | # The above copyright notice and this permission notice shall be included in
11 | # all copies or substantial portions of the Software.
12 | #
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 | # DEALINGS IN THE SOFTWARE.
20 | 
21 | # Constants for data handling
22 | DATA_CHUNK_SIZE = 64 * 1024  # bytes, we send the mp4 file in 64KB chunks
23 | DEFAULT_BITRATE = 3000000  # bps
24 | DEFAULT_IDR_INTERVAL = 8  # frames
25 | DEFAULT_STREAMABLE_VIDEO_PATH = "../assets/sample_streamable.mp4"
26 | DEFAULT_NON_STREAMABLE_VIDEO_PATH = "../assets/sample_transactional.mp4"
27 | 
28 | # Default values from eyecontact.proto
29 | DEFAULT_TEMPORAL = 0xFFFFFFFF
30 | DEFAULT_DETECT_CLOSURE = 0
31 | DEFAULT_EYE_SIZE_SENSITIVITY = 3
32 | DEFAULT_ENABLE_LOOKAWAY = 0
33 | DEFAULT_LOOKAWAY_MAX_OFFSET = 5
34 | DEFAULT_LOOKAWAY_INTERVAL_MIN = 3
35 | DEFAULT_LOOKAWAY_INTERVAL_RANGE = 8
36 | DEFAULT_GAZE_PITCH_THRESHOLD_LOW = 25.0
37 | DEFAULT_GAZE_PITCH_THRESHOLD_HIGH = 30.0
38 | DEFAULT_GAZE_YAW_THRESHOLD_LOW = 20.0
39 | DEFAULT_GAZE_YAW_THRESHOLD_HIGH = 30.0
40 | DEFAULT_HEAD_PITCH_THRESHOLD_LOW = 20.0
41 | DEFAULT_HEAD_PITCH_THRESHOLD_HIGH = 25.0
42 | DEFAULT_HEAD_YAW_THRESHOLD_LOW = 25.0
43 | DEFAULT_HEAD_YAW_THRESHOLD_HIGH = 30.0
44 | 
45 | # Parameter validation ranges
46 | PARAM_RANGES = {
47 |     "temporal": (0, 0xFFFFFFFF),
48 |     "detect_closure": (0, 1),
49 |     "eye_size_sensitivity": (2, 6),
50 |     "enable_lookaway": (0, 1),
51 |     "lookaway_max_offset": (1, 10),
52 |     "lookaway_interval_min": (1, 600),
53 |     "lookaway_interval_range": (1, 600),
54 |     "gaze_pitch_threshold_low": (10.0, 35.0),
55 |     "gaze_pitch_threshold_high": (10.0, 35.0),
56 |     "gaze_yaw_threshold_low": (10.0, 35.0),
57 |     "gaze_yaw_threshold_high": (10.0, 35.0),
58 |     "head_pitch_threshold_low": (10.0, 35.0),
59 |     "head_pitch_threshold_high": (10.0, 35.0),
60 |     "head_yaw_threshold_low": (10.0, 35.0),
61 |     "head_yaw_threshold_high": (10.0, 35.0),
62 | }
63 | 


--------------------------------------------------------------------------------
/eye-contact/protos/linux/compile_protos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a
 6 | # copy of this software and associated documentation files (the "Software"),
 7 | # to deal in the Software without restriction, including without limitation
 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | # and/or sell copies of the Software, and to permit persons to whom the
10 | # Software is furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 | # DEALINGS IN THE SOFTWARE.
22 | 
23 | 
24 | # This script compiles Protocol Buffer (protobuf) definitions for NVIDIA Maxine Eye-Contact NIM on a Linux Client.
25 | #
26 | # Execute the script using `./compile_protos.sh`
27 | #
28 | # For more details, refer to README.md
29 | 
30 | 
31 | # Get the script directory's parent directory
32 | SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
33 | echo $SCRIPT_DIR
34 | # Define paths for proto files and output directory
35 | PROTOS_DIR=$(realpath "$SCRIPT_DIR/../proto/nvidia/maxine/eyecontact/v1")
36 | OUT_DIR=$(realpath "$SCRIPT_DIR/../../interfaces/")
37 | 
38 | # Check if required directories and files exist
39 | if [ ! -d "$PROTOS_DIR" ]; then
40 |     echo "[Error] Protos directory does not exist: $PROTOS_DIR"
41 |     exit 1
42 | fi
43 | 
44 | if [ ! -f "$PROTOS_DIR/eyecontact.proto" ]; then
45 |     echo "[Error] Protobuf file not found: $PROTOS_DIR/eyecontact.proto"
46 |     exit 1
47 | fi
48 | 
49 | # Check if Python is installed
50 | if ! command -v python3 > /dev/null; then
51 |     echo "[Error] Python3 is not installed or not in the PATH."
52 |     exit 1
53 | fi
54 | 
55 | # Log the paths for debugging
56 | echo "Using PROTOS_DIR: $PROTOS_DIR"
57 | echo "Using OUT_DIR: $OUT_DIR"
58 | 
59 | # Run grpc_tools.protoc
60 | python3 -m grpc_tools.protoc -I="$PROTOS_DIR" \
61 |                              --python_out="$OUT_DIR" \
62 |                              --pyi_out="$OUT_DIR" \
63 |                              --grpc_python_out="$OUT_DIR" \
64 |                              "$PROTOS_DIR/eyecontact.proto"
65 | 
66 | # Check if the command succeeded
67 | if [ $? -ne 0 ]; then
68 |     echo "[Error] Failed to execute grpc_tools.protoc."
69 |     exit 1
70 | fi
71 | 
72 | echo "gRPC files generated successfully."
73 | 
74 | 


--------------------------------------------------------------------------------
/audio2face-2d/protos/linux/python/compile_protos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a
 6 | # copy of this software and associated documentation files (the "Software"),
 7 | # to deal in the Software without restriction, including without limitation
 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | # and/or sell copies of the Software, and to permit persons to whom the
10 | # Software is furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 | # DEALINGS IN THE SOFTWARE.
22 | 
23 | 
24 | # This script compiles Protocol Buffer (protobuf) definitions for NVIDIA Maxine Audio2Face-2D NIM on a Linux Client.
25 | #
26 | # Execute the script using `./compile_protos.sh`
27 | #
28 | # For more details, refer to README.md
29 | 
30 | 
31 | # Get the script directory's parent directory
32 | SCRIPT_DIR="$(dirname "$(dirname "$(readlink -f "$0")")")"
33 | 
34 | # Define paths for proto files and output directory
35 | PROTOS_DIR=$(realpath "$SCRIPT_DIR/../proto/nvidia/maxine/audio2face2d/v1")
36 | OUT_DIR=$(realpath "$SCRIPT_DIR/../../python/interfaces/")
37 | 
38 | # Check if required directories and files exist
39 | if [ ! -d "$PROTOS_DIR" ]; then
40 |     echo "[Error] Protos directory does not exist: $PROTOS_DIR"
41 |     exit 1
42 | fi
43 | 
44 | if [ ! -f "$PROTOS_DIR/audio2face2d.proto" ]; then
45 |     echo "[Error] Protobuf file not found: $PROTOS_DIR/audio2face2d.proto"
46 |     exit 1
47 | fi
48 | 
49 | # Check if Python is installed
50 | if ! command -v python3 > /dev/null; then
51 |     echo "[Error] Python3 is not installed or not in the PATH."
52 |     exit 1
53 | fi
54 | 
55 | # Log the paths for debugging
56 | echo "Using PROTOS_DIR: $PROTOS_DIR"
57 | echo "Using OUT_DIR: $OUT_DIR"
58 | 
59 | # Run grpc_tools.protoc
60 | python3 -m grpc_tools.protoc -I="$PROTOS_DIR" \
61 |                              --python_out="$OUT_DIR" \
62 |                              --pyi_out="$OUT_DIR" \
63 |                              --grpc_python_out="$OUT_DIR" \
64 |                              "$PROTOS_DIR/audio2face2d.proto"
65 | 
66 | # Check if the command succeeded
67 | if [ $? -ne 0 ]; then
68 |     echo "[Error] Failed to execute grpc_tools.protoc."
69 |     exit 1
70 | fi
71 | 
72 | echo "gRPC files generated successfully."
73 | 
74 | 


--------------------------------------------------------------------------------
/audio2face-2d/nodejs/interfaces/audio2face2d_grpc_pb.js:
--------------------------------------------------------------------------------
 1 | // GENERATED CODE -- DO NOT EDIT!
 2 | 
 3 | 'use strict';
 4 | var grpc = require('@grpc/grpc-js');
 5 | var audio2face2d_pb = require('./audio2face2d_pb.js');
 6 | var google_protobuf_empty_pb = require('google-protobuf/google/protobuf/empty_pb.js');
 7 | 
 8 | function serialize_nvidia_maxine_audio2face2d_v1_AnimateRequest(arg) {
 9 |   if (!(arg instanceof audio2face2d_pb.AnimateRequest)) {
10 |     throw new Error('Expected argument of type nvidia.maxine.audio2face2d.v1.AnimateRequest');
11 |   }
12 |   return Buffer.from(arg.serializeBinary());
13 | }
14 | 
15 | function deserialize_nvidia_maxine_audio2face2d_v1_AnimateRequest(buffer_arg) {
16 |   return audio2face2d_pb.AnimateRequest.deserializeBinary(new Uint8Array(buffer_arg));
17 | }
18 | 
19 | function serialize_nvidia_maxine_audio2face2d_v1_AnimateResponse(arg) {
20 |   if (!(arg instanceof audio2face2d_pb.AnimateResponse)) {
21 |     throw new Error('Expected argument of type nvidia.maxine.audio2face2d.v1.AnimateResponse');
22 |   }
23 |   return Buffer.from(arg.serializeBinary());
24 | }
25 | 
26 | function deserialize_nvidia_maxine_audio2face2d_v1_AnimateResponse(buffer_arg) {
27 |   return audio2face2d_pb.AnimateResponse.deserializeBinary(new Uint8Array(buffer_arg));
28 | }
29 | 
30 | 
31 | // The Audio2Face2DService provides APIs to run the
32 | // Maxine Audio to Face - 2D feature.
33 | var Audio2Face2DServiceService = exports.Audio2Face2DServiceService = {
34 |   // Animate is a bidirectional streaming API to run the
35 | // Audio2Face-2D.
36 | //
37 | // The input message can contain AnimateConfig or bytes.
38 | // In the beginning of the stream, a request with AnimateConfig should
39 | // be sent to the server to set the feature's parameters.
40 | // The server will echo back a response with the config to signify that the
41 | // parameters were properly set. It is mandatory to set the portrait_image
42 | // config, other configuration parameters are optional and a default value will
43 | // be used if not set. Any AnimateConfig sent during the middle of the stream
44 | // will be ignored.
45 | //
46 | // After the configuration step, the client streams the input wav file in
47 | // chunks in the input message and receives the output mp4 file in chunks in
48 | // the output message. While the inference is running, the server will periodically
49 | // echo empty message to keep the channel alive. The client should ignore this message.
50 | //
51 | // It is recommended that the client should pass one file per API invocation.
52 | // The configurations are also set per invocation.
53 | animate: {
54 |     path: '/nvidia.maxine.audio2face2d.v1.Audio2Face2DService/Animate',
55 |     requestStream: true,
56 |     responseStream: true,
57 |     requestType: audio2face2d_pb.AnimateRequest,
58 |     responseType: audio2face2d_pb.AnimateResponse,
59 |     requestSerialize: serialize_nvidia_maxine_audio2face2d_v1_AnimateRequest,
60 |     requestDeserialize: deserialize_nvidia_maxine_audio2face2d_v1_AnimateRequest,
61 |     responseSerialize: serialize_nvidia_maxine_audio2face2d_v1_AnimateResponse,
62 |     responseDeserialize: deserialize_nvidia_maxine_audio2face2d_v1_AnimateResponse,
63 |   },
64 | };
65 | 
66 | exports.Audio2Face2DServiceClient = grpc.makeGenericClientConstructor(Audio2Face2DServiceService);
67 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | # Prerequisites
  5 | *.d
  6 | 
  7 | # Compiled Object files
  8 | *.slo
  9 | *.lo
 10 | *.o
 11 | *.obj
 12 | 
 13 | # Precompiled Headers
 14 | *.gch
 15 | *.pch
 16 | 
 17 | # Compiled Dynamic libraries
 18 | *.so
 19 | *.dylib
 20 | *.dll
 21 | 
 22 | # Fortran module files
 23 | *.mod
 24 | *.smod
 25 | 
 26 | # Compiled Static libraries
 27 | *.lai
 28 | *.la
 29 | *.a
 30 | *.lib
 31 | 
 32 | # Executables
 33 | *.exe
 34 | *.out
 35 | *.app
 36 | 
 37 | # Models
 38 | *.pt
 39 | *.savedmodel
 40 | install/
 41 | 
 42 | # Ignore backup files.
 43 | *~
 44 | # Ignore Vim swap files.
 45 | .*.swp
 46 | # Ignore files generated by IDEs.
 47 | /.classpath
 48 | /.factorypath
 49 | /.idea/
 50 | /.ijwb/
 51 | /.project
 52 | /.settings
 53 | /.vscode/
 54 | # Ignore outputs generated during Bazel bootstrapping.
 55 | /output/
 56 | # Ignore jekyll build output.
 57 | /production
 58 | /.sass-cache
 59 | 
 60 | # Byte-compiled / optimized / DLL files
 61 | __pycache__/
 62 | *.py[cod]
 63 | *$py.class
 64 | 
 65 | # Distribution / packaging
 66 | .Python
 67 | build/
 68 | develop-eggs/
 69 | dist/
 70 | downloads/
 71 | eggs/
 72 | .eggs/
 73 | lib/
 74 | lib64/
 75 | parts/
 76 | sdist/
 77 | var/
 78 | wheels/
 79 | pip-wheel-metadata/
 80 | share/python-wheels/
 81 | *.egg-info/
 82 | .installed.cfg
 83 | *.egg
 84 | MANIFEST
 85 | 
 86 | # PyInstaller
 87 | #  Usually these files are written by a python script from a template
 88 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 89 | *.manifest
 90 | *.spec
 91 | 
 92 | # Installer logs
 93 | pip-log.txt
 94 | pip-delete-this-directory.txt
 95 | 
 96 | # Unit test / coverage reports
 97 | htmlcov/
 98 | .tox/
 99 | .nox/
100 | .coverage
101 | .coverage.*
102 | .cache
103 | nosetests.xml
104 | coverage.xml
105 | *.cover
106 | .hypothesis/
107 | .pytest_cache/
108 | 
109 | # Translations
110 | *.mo
111 | *.pot
112 | 
113 | # Django stuff:
114 | *.log
115 | local_settings.py
116 | db.sqlite3
117 | db.sqlite3-journal
118 | 
119 | # Flask stuff:
120 | instance/
121 | .webassets-cache
122 | 
123 | # Scrapy stuff:
124 | .scrapy
125 | 
126 | # Sphinx documentation
127 | docs/_build/
128 | 
129 | # PyBuilder
130 | target/
131 | 
132 | # Jupyter Notebook
133 | .ipynb_checkpoints
134 | 
135 | # IPython
136 | profile_default/
137 | ipython_config.py
138 | 
139 | # pyenv
140 | .python-version
141 | 
142 | # pipenv
143 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
144 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
145 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
146 | #   install all needed dependencies.
147 | #Pipfile.lock
148 | 
149 | # celery beat schedule file
150 | celerybeat-schedule
151 | 
152 | # SageMath parsed files
153 | *.sage.py
154 | 
155 | # Environments
156 | .env
157 | .venv
158 | env/
159 | venv/
160 | ENV/
161 | env.bak/
162 | venv.bak/
163 | 
164 | # Spyder project settings
165 | .spyderproject
166 | .spyproject
167 | 
168 | # Rope project settings
169 | .ropeproject
170 | 
171 | # mkdocs documentation
172 | /site
173 | 
174 | # mypy
175 | .mypy_cache/
176 | .dmypy.json
177 | dmypy.json
178 | 
179 | # Pyre type checker
180 | .pyre/
181 | 
182 | tests/integration/asr/outputs
183 | tests/integration/nlp/outputs
184 | tests/integration/tts/outputs
185 | 


--------------------------------------------------------------------------------
/bnr/interfaces/bnr/bnr_pb2_grpc.py:
--------------------------------------------------------------------------------
  1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
  2 | """Client and server classes corresponding to protobuf-defined services."""
  3 | import grpc
  4 | import warnings
  5 | 
  6 | import bnr_pb2 as bnr__pb2
  7 | 
  8 | GRPC_GENERATED_VERSION = '1.67.1'
  9 | GRPC_VERSION = grpc.__version__
 10 | _version_not_supported = False
 11 | 
 12 | try:
 13 |     from grpc._utilities import first_version_is_lower
 14 |     _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
 15 | except ImportError:
 16 |     _version_not_supported = True
 17 | 
 18 | if _version_not_supported:
 19 |     raise RuntimeError(
 20 |         f'The grpc package installed is at version {GRPC_VERSION},'
 21 |         + f' but the generated code in bnr_pb2_grpc.py depends on'
 22 |         + f' grpcio>={GRPC_GENERATED_VERSION}.'
 23 |         + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
 24 |         + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
 25 |     )
 26 | 
 27 | 
 28 | class MaxineBNRStub(object):
 29 |     """The MaxineBNR service provides APIs to run the
 30 |     Maxine BNR NIM.
 31 |     """
 32 | 
 33 |     def __init__(self, channel):
 34 |         """Constructor.
 35 | 
 36 |         Args:
 37 |             channel: A grpc.Channel.
 38 |         """
 39 |         self.EnhanceAudio = channel.stream_stream(
 40 |                 '/nvidia.maxine.bnr.v1.MaxineBNR/EnhanceAudio',
 41 |                 request_serializer=bnr__pb2.EnhanceAudioRequest.SerializeToString,
 42 |                 response_deserializer=bnr__pb2.EnhanceAudioResponse.FromString,
 43 |                 _registered_method=True)
 44 | 
 45 | 
 46 | class MaxineBNRServicer(object):
 47 |     """The MaxineBNR service provides APIs to run the
 48 |     Maxine BNR NIM.
 49 |     """
 50 | 
 51 |     def EnhanceAudio(self, request_iterator, context):
 52 |         """EnhanceAudio is a bidirectional streaming RPC to run the
 53 |         Maxine BNR NIM on audio files.
 54 |         
 55 |         The client streams the input audio file in chunks in the input message and 
 56 |         receives the output audio file in chunks in the output message.
 57 |         
 58 |         The client should only pass one audio file per RPC invocation.
 59 |         """
 60 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 61 |         context.set_details('Method not implemented!')
 62 |         raise NotImplementedError('Method not implemented!')
 63 | 
 64 | 
 65 | def add_MaxineBNRServicer_to_server(servicer, server):
 66 |     rpc_method_handlers = {
 67 |             'EnhanceAudio': grpc.stream_stream_rpc_method_handler(
 68 |                     servicer.EnhanceAudio,
 69 |                     request_deserializer=bnr__pb2.EnhanceAudioRequest.FromString,
 70 |                     response_serializer=bnr__pb2.EnhanceAudioResponse.SerializeToString,
 71 |             ),
 72 |     }
 73 |     generic_handler = grpc.method_handlers_generic_handler(
 74 |             'nvidia.maxine.bnr.v1.MaxineBNR', rpc_method_handlers)
 75 |     server.add_generic_rpc_handlers((generic_handler,))
 76 |     server.add_registered_method_handlers('nvidia.maxine.bnr.v1.MaxineBNR', rpc_method_handlers)
 77 | 
 78 | 
 79 |  # This class is part of an EXPERIMENTAL API.
 80 | class MaxineBNR(object):
 81 |     """The MaxineBNR service provides APIs to run the
 82 |     Maxine BNR NIM.
 83 |     """
 84 | 
 85 |     @staticmethod
 86 |     def EnhanceAudio(request_iterator,
 87 |             target,
 88 |             options=(),
 89 |             channel_credentials=None,
 90 |             call_credentials=None,
 91 |             insecure=False,
 92 |             compression=None,
 93 |             wait_for_ready=None,
 94 |             timeout=None,
 95 |             metadata=None):
 96 |         return grpc.experimental.stream_stream(
 97 |             request_iterator,
 98 |             target,
 99 |             '/nvidia.maxine.bnr.v1.MaxineBNR/EnhanceAudio',
100 |             bnr__pb2.EnhanceAudioRequest.SerializeToString,
101 |             bnr__pb2.EnhanceAudioResponse.FromString,
102 |             options,
103 |             channel_credentials,
104 |             insecure,
105 |             call_credentials,
106 |             compression,
107 |             wait_for_ready,
108 |             timeout,
109 |             metadata,
110 |             _registered_method=True)
111 | 


--------------------------------------------------------------------------------
/studio-voice/interfaces/studio_voice/studiovoice_pb2_grpc.py:
--------------------------------------------------------------------------------
  1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
  2 | """Client and server classes corresponding to protobuf-defined services."""
  3 | import grpc
  4 | import warnings
  5 | 
  6 | import studiovoice_pb2 as studiovoice__pb2
  7 | 
  8 | GRPC_GENERATED_VERSION = '1.67.1'
  9 | GRPC_VERSION = grpc.__version__
 10 | _version_not_supported = False
 11 | 
 12 | try:
 13 |     from grpc._utilities import first_version_is_lower
 14 |     _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
 15 | except ImportError:
 16 |     _version_not_supported = True
 17 | 
 18 | if _version_not_supported:
 19 |     raise RuntimeError(
 20 |         f'The grpc package installed is at version {GRPC_VERSION},'
 21 |         + f' but the generated code in studiovoice_pb2_grpc.py depends on'
 22 |         + f' grpcio>={GRPC_GENERATED_VERSION}.'
 23 |         + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
 24 |         + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
 25 |     )
 26 | 
 27 | 
 28 | class MaxineStudioVoiceStub(object):
 29 |     """The MaxineStudioVoice service provides APIs to run the
 30 |     Maxine Studio Voice NIM.
 31 |     """
 32 | 
 33 |     def __init__(self, channel):
 34 |         """Constructor.
 35 | 
 36 |         Args:
 37 |             channel: A grpc.Channel.
 38 |         """
 39 |         self.EnhanceAudio = channel.stream_stream(
 40 |                 '/nvidia.maxine.studiovoice.v1.MaxineStudioVoice/EnhanceAudio',
 41 |                 request_serializer=studiovoice__pb2.EnhanceAudioRequest.SerializeToString,
 42 |                 response_deserializer=studiovoice__pb2.EnhanceAudioResponse.FromString,
 43 |                 _registered_method=True)
 44 | 
 45 | 
 46 | class MaxineStudioVoiceServicer(object):
 47 |     """The MaxineStudioVoice service provides APIs to run the
 48 |     Maxine Studio Voice NIM.
 49 |     """
 50 | 
 51 |     def EnhanceAudio(self, request_iterator, context):
 52 |         """EnhanceAudio is a bidirectional streaming RPC to run the
 53 |         Maxine Studio Voice NIM on audio files.
 54 |         
 55 |         The client streams the input audio file in chunks in the input message and 
 56 |         receives the output audio file in chunks in the output message.
 57 |         
 58 |         The client should only pass one audio file per RPC invocation.
 59 |         """
 60 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 61 |         context.set_details('Method not implemented!')
 62 |         raise NotImplementedError('Method not implemented!')
 63 | 
 64 | 
 65 | def add_MaxineStudioVoiceServicer_to_server(servicer, server):
 66 |     rpc_method_handlers = {
 67 |             'EnhanceAudio': grpc.stream_stream_rpc_method_handler(
 68 |                     servicer.EnhanceAudio,
 69 |                     request_deserializer=studiovoice__pb2.EnhanceAudioRequest.FromString,
 70 |                     response_serializer=studiovoice__pb2.EnhanceAudioResponse.SerializeToString,
 71 |             ),
 72 |     }
 73 |     generic_handler = grpc.method_handlers_generic_handler(
 74 |             'nvidia.maxine.studiovoice.v1.MaxineStudioVoice', rpc_method_handlers)
 75 |     server.add_generic_rpc_handlers((generic_handler,))
 76 |     server.add_registered_method_handlers('nvidia.maxine.studiovoice.v1.MaxineStudioVoice', rpc_method_handlers)
 77 | 
 78 | 
 79 |  # This class is part of an EXPERIMENTAL API.
 80 | class MaxineStudioVoice(object):
 81 |     """The MaxineStudioVoice service provides APIs to run the
 82 |     Maxine Studio Voice NIM.
 83 |     """
 84 | 
 85 |     @staticmethod
 86 |     def EnhanceAudio(request_iterator,
 87 |             target,
 88 |             options=(),
 89 |             channel_credentials=None,
 90 |             call_credentials=None,
 91 |             insecure=False,
 92 |             compression=None,
 93 |             wait_for_ready=None,
 94 |             timeout=None,
 95 |             metadata=None):
 96 |         return grpc.experimental.stream_stream(
 97 |             request_iterator,
 98 |             target,
 99 |             '/nvidia.maxine.studiovoice.v1.MaxineStudioVoice/EnhanceAudio',
100 |             studiovoice__pb2.EnhanceAudioRequest.SerializeToString,
101 |             studiovoice__pb2.EnhanceAudioResponse.FromString,
102 |             options,
103 |             channel_credentials,
104 |             insecure,
105 |             call_credentials,
106 |             compression,
107 |             wait_for_ready,
108 |             timeout,
109 |             metadata,
110 |             _registered_method=True)
111 | 


--------------------------------------------------------------------------------
/audio2face-2d/assets/head_translation_animation.csv:
--------------------------------------------------------------------------------
  1 | 0.0000, 0.0000, 1.0000
  2 | 0.0030, 0.0000, 1.0000
  3 | 0.0070, 0.0000, 1.0000
  4 | 0.0100, 0.0000, 1.0000
  5 | 0.0130, 0.0000, 1.0000
  6 | 0.0170, 0.0000, 1.0000
  7 | 0.0200, 0.0000, 1.0000
  8 | 0.0230, 0.0000, 1.0000
  9 | 0.0270, 0.0000, 1.0000
 10 | 0.0300, 0.0000, 1.0000
 11 | 0.0330, 0.0000, 1.0000
 12 | 0.0370, 0.0000, 1.0000
 13 | 0.0400, 0.0000, 1.0000
 14 | 0.0430, 0.0000, 1.0000
 15 | 0.0470, 0.0000, 1.0000
 16 | 0.0470, 0.0000, 1.0000
 17 | 0.0430, 0.0000, 1.0000
 18 | 0.0400, 0.0000, 1.0000
 19 | 0.0370, 0.0000, 1.0000
 20 | 0.0330, 0.0000, 1.0000
 21 | 0.0300, 0.0000, 1.0000
 22 | 0.0270, 0.0000, 1.0000
 23 | 0.0230, 0.0000, 1.0000
 24 | 0.0200, 0.0000, 1.0000
 25 | 0.0170, 0.0000, 1.0000
 26 | 0.0130, 0.0000, 1.0000
 27 | 0.0100, 0.0000, 1.0000
 28 | 0.0070, 0.0000, 1.0000
 29 | 0.0030, 0.0000, 1.0000
 30 | 0.0000, 0.0000, 1.0000
 31 | 0.0000, 0.0000, 1.0000
 32 | -0.0030, 0.0000, 1.0000
 33 | -0.0070, 0.0000, 1.0000
 34 | -0.0100, 0.0000, 1.0000
 35 | -0.0130, 0.0000, 1.0000
 36 | -0.0170, 0.0000, 1.0000
 37 | -0.0200, 0.0000, 1.0000
 38 | -0.0230, 0.0000, 1.0000
 39 | -0.0270, 0.0000, 1.0000
 40 | -0.0300, 0.0000, 1.0000
 41 | -0.0330, 0.0000, 1.0000
 42 | -0.0370, 0.0000, 1.0000
 43 | -0.0400, 0.0000, 1.0000
 44 | -0.0430, 0.0000, 1.0000
 45 | -0.0470, 0.0000, 1.0000
 46 | -0.0470, 0.0000, 1.0000
 47 | -0.0430, 0.0000, 1.0000
 48 | -0.0400, 0.0000, 1.0000
 49 | -0.0370, 0.0000, 1.0000
 50 | -0.0330, 0.0000, 1.0000
 51 | -0.0300, 0.0000, 1.0000
 52 | -0.0270, 0.0000, 1.0000
 53 | -0.0230, 0.0000, 1.0000
 54 | -0.0200, 0.0000, 1.0000
 55 | -0.0170, 0.0000, 1.0000
 56 | -0.0130, 0.0000, 1.0000
 57 | -0.0100, 0.0000, 1.0000
 58 | -0.0070, 0.0000, 1.0000
 59 | -0.0030, 0.0000, 1.0000
 60 | 0.0000, 0.0000, 1.0000
 61 | 0.0000, 0.0000, 1.0000
 62 | 0.0000, 0.0030, 1.0000
 63 | 0.0000, 0.0070, 1.0000
 64 | 0.0000, 0.0100, 1.0000
 65 | 0.0000, 0.0130, 1.0000
 66 | 0.0000, 0.0170, 1.0000
 67 | 0.0000, 0.0200, 1.0000
 68 | 0.0000, 0.0230, 1.0000
 69 | 0.0000, 0.0270, 1.0000
 70 | 0.0000, 0.0300, 1.0000
 71 | 0.0000, 0.0330, 1.0000
 72 | 0.0000, 0.0370, 1.0000
 73 | 0.0000, 0.0400, 1.0000
 74 | 0.0000, 0.0430, 1.0000
 75 | 0.0000, 0.0470, 1.0000
 76 | 0.0000, 0.0470, 1.0000
 77 | 0.0000, 0.0430, 1.0000
 78 | 0.0000, 0.0400, 1.0000
 79 | 0.0000, 0.0370, 1.0000
 80 | 0.0000, 0.0330, 1.0000
 81 | 0.0000, 0.0300, 1.0000
 82 | 0.0000, 0.0270, 1.0000
 83 | 0.0000, 0.0230, 1.0000
 84 | 0.0000, 0.0200, 1.0000
 85 | 0.0000, 0.0170, 1.0000
 86 | 0.0000, 0.0130, 1.0000
 87 | 0.0000, 0.0100, 1.0000
 88 | 0.0000, 0.0070, 1.0000
 89 | 0.0000, 0.0030, 1.0000
 90 | 0.0000, 0.0000, 1.0000
 91 | 0.0000, 0.0000, 1.0000
 92 | 0.0000, -0.0030, 1.0000
 93 | 0.0000, -0.0070, 1.0000
 94 | 0.0000, -0.0100, 1.0000
 95 | 0.0000, -0.0130, 1.0000
 96 | 0.0000, -0.0170, 1.0000
 97 | 0.0000, -0.0200, 1.0000
 98 | 0.0000, -0.0230, 1.0000
 99 | 0.0000, -0.0270, 1.0000
100 | 0.0000, -0.0300, 1.0000
101 | 0.0000, -0.0330, 1.0000
102 | 0.0000, -0.0370, 1.0000
103 | 0.0000, -0.0400, 1.0000
104 | 0.0000, -0.0430, 1.0000
105 | 0.0000, -0.0470, 1.0000
106 | 0.0000, -0.0470, 1.0000
107 | 0.0000, -0.0430, 1.0000
108 | 0.0000, -0.0400, 1.0000
109 | 0.0000, -0.0370, 1.0000
110 | 0.0000, -0.0330, 1.0000
111 | 0.0000, -0.0300, 1.0000
112 | 0.0000, -0.0270, 1.0000
113 | 0.0000, -0.0230, 1.0000
114 | 0.0000, -0.0200, 1.0000
115 | 0.0000, -0.0170, 1.0000
116 | 0.0000, -0.0130, 1.0000
117 | 0.0000, -0.0100, 1.0000
118 | 0.0000, -0.0070, 1.0000
119 | 0.0000, -0.0030, 1.0000
120 | 0.0000, 0.0000, 1.0000
121 | 0.0000, 0.0000, 1.0000
122 | 0.0000, 0.0000, 0.9980
123 | 0.0000, 0.0000, 0.9960
124 | 0.0000, 0.0000, 0.9940
125 | 0.0000, 0.0000, 0.9920
126 | 0.0000, 0.0000, 0.9900
127 | 0.0000, 0.0000, 0.9880
128 | 0.0000, 0.0000, 0.9860
129 | 0.0000, 0.0000, 0.9840
130 | 0.0000, 0.0000, 0.9820
131 | 0.0000, 0.0000, 0.9800
132 | 0.0000, 0.0000, 0.9780
133 | 0.0000, 0.0000, 0.9760
134 | 0.0000, 0.0000, 0.9740
135 | 0.0000, 0.0000, 0.9720
136 | 0.0000, 0.0000, 0.9720
137 | 0.0000, 0.0000, 0.9740
138 | 0.0000, 0.0000, 0.9760
139 | 0.0000, 0.0000, 0.9780
140 | 0.0000, 0.0000, 0.9800
141 | 0.0000, 0.0000, 0.9820
142 | 0.0000, 0.0000, 0.9840
143 | 0.0000, 0.0000, 0.9860
144 | 0.0000, 0.0000, 0.9880
145 | 0.0000, 0.0000, 0.9900
146 | 0.0000, 0.0000, 0.9920
147 | 0.0000, 0.0000, 0.9940
148 | 0.0000, 0.0000, 0.9960
149 | 0.0000, 0.0000, 0.9980
150 | 0.0000, 0.0000, 1.0000
151 | 0.0000, 0.0000, 1.0000
152 | 0.0000, 0.0000, 1.0020
153 | 0.0000, 0.0000, 1.0040
154 | 0.0000, 0.0000, 1.0060
155 | 0.0000, 0.0000, 1.0080
156 | 0.0000, 0.0000, 1.0100
157 | 0.0000, 0.0000, 1.0120
158 | 0.0000, 0.0000, 1.0140
159 | 0.0000, 0.0000, 1.0160
160 | 0.0000, 0.0000, 1.0180
161 | 0.0000, 0.0000, 1.0200
162 | 0.0000, 0.0000, 1.0220
163 | 0.0000, 0.0000, 1.0240
164 | 0.0000, 0.0000, 1.0260
165 | 0.0000, 0.0000, 1.0280
166 | 0.0000, 0.0000, 1.0280
167 | 0.0000, 0.0000, 1.0260
168 | 0.0000, 0.0000, 1.0240
169 | 0.0000, 0.0000, 1.0220
170 | 0.0000, 0.0000, 1.0200
171 | 0.0000, 0.0000, 1.0180
172 | 0.0000, 0.0000, 1.0160
173 | 0.0000, 0.0000, 1.0140
174 | 0.0000, 0.0000, 1.0120
175 | 0.0000, 0.0000, 1.0100
176 | 0.0000, 0.0000, 1.0080
177 | 0.0000, 0.0000, 1.0060
178 | 0.0000, 0.0000, 1.0040
179 | 0.0000, 0.0000, 1.0020
180 | 0.0000, 0.0000, 1.0000
181 | 


--------------------------------------------------------------------------------
/bnr/README.md:
--------------------------------------------------------------------------------
  1 | # NVIDIA BNR NIM Client
  2 | 
  3 | This package has a sample client which demonstrates interaction with a BNR NIM.
  4 | 
  5 | ## Getting Started
  6 | 
  7 | NVIDIA Maxine NIM Client packages use gRPC APIs. Instructions below demonstrate usage of BNR NIM using Python gRPC client.
  8 | Additionally, access the [Try API](https://build.nvidia.com/nvidia/bnr/api) feature to experience the NVIDIA BNR NIM API without hosting your own servers, as it leverages the NVIDIA Cloud Functions backend.
  9 | 
 10 | ## Pre-requisites
 11 | 
 12 | - Ensure you have Python 3.10 or above installed on your system.
 13 | Please refer to the [Python documentation](https://www.python.org/downloads/) for download and installation instructions.
 14 | - Access to NVIDIA BNR NIM Container / Service.
 15 | 
 16 | ## Usage guide
 17 | 
 18 | ### 1. Clone the repository
 19 | 
 20 | ```bash
 21 | git clone https://github.com/nvidia-maxine/nim-clients.git
 22 | 
 23 | // Go to the 'bnr' folder
 24 | cd nim-clients/bnr
 25 | ```
 26 | 
 27 | ### 2. Install Dependencies
 28 | 
 29 | ```bash
 30 | sudo apt-get install python3-pip
 31 | pip install -r requirements.txt
 32 | ```
 33 | 
 34 | ### 3. Host the NIM Server
 35 | 
 36 | Before running client part of BNR, please set up a server.
 37 | The simplest way to do that is to follow the [quick start guide](https://docs.nvidia.com/nim/maxine/bnr/latest/index.html).
 38 | This step can be skipped when using [Try API](https://build.nvidia.com/nvidia/bnr/api).
 39 | 
 40 | 
 41 | ### 4. Compile the Protos
 42 | 
 43 | Before running the python client, you can choose to compile the protos.
 44 | The grpcio version needed for compilation can be referred at requirements.txt
 45 | 
 46 | To compile protos on Linux, run:
 47 | ```bash
 48 | // Go to bnr/protos folder
 49 | cd bnr/protos
 50 | 
 51 | chmod +x compile_protos.sh
 52 | ./compile_protos.sh
 53 | ```
 54 | 
 55 | To compile protos on Windows, run:
 56 | ```bash
 57 | // Go to bnr/protos folder
 58 | cd bnr/protos
 59 | 
 60 | compile_protos.bat
 61 | ```
 62 | 
 63 | ### 5. Run the Python Client
 64 | 
 65 | Go to the scripts directory.
 66 | 
 67 | ```bash
 68 | cd scripts
 69 | ```
 70 | 
 71 | #### Usage for Transactional NIM Request
 72 | 
 73 | To run client in transactional mode. Set `--sample-rate` in accordance with the server, default is set to `48000`. The following example command processes the packaged sample audio file in transactional mode and generates a `bnr_48k_output.wav` file in the current folder.
 74 | 
 75 | ```bash
 76 | python bnr.py --target 127.0.0.1:8001 --input ../assets/bnr_48k_input.wav --output bnr_48k_output.wav --sample-rate 48000
 77 | ```
 78 | 
 79 | #### Usage for Streaming NIM Request
 80 | 
 81 | To run the client in streaming mode, add `--streaming`. The following example command processes the packaged sample audio file in streaming mode and generates a `bnr_48k_output.wav` file in the current folder.
 82 | 
 83 | ```bash
 84 | python bnr.py --target 127.0.0.1:8001 --input ../assets/bnr_48k_input.wav --output bnr_48k_output.wav --streaming --sample-rate 48000
 85 | ```
 86 | 
 87 | Only WAV files are supported.
 88 | 
 89 | #### Usage for Preview API Request
 90 | 
 91 | ```bash
 92 | python bnr.py --preview-mode \
 93 |     --ssl-mode TLS \
 94 |     --target grpc.nvcf.nvidia.com:443 \
 95 |     --function-id <function_id> \
 96 |     --api-key $API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC \
 97 |     --input <input_file_path> \
 98 |     --output <output_file_path> \
 99 | ```
100 | 
101 | #### Command Line Arguments
102 | 
103 | - `--preview-mode`  - Flag to send request to preview NVCF server on https://build.nvidia.com/nvidia/bnr/api.
104 | - `--ssl-mode`      - Flag to control if SSL MTLS/TLS encryption should be used. When running preview SSL must be set to TLS. Default value is `None`.
105 | - `--ssl-key`       - The path to ssl private key. Default value is `None`.
106 | - `--ssl-cert`      - The path to ssl certificate chain. Default value is `None`.
107 | - `--ssl-root-cert` - The path to ssl root certificate. Default value is `None`.
108 | - `--target`        - <IP:port> of gRPC service, when hosted locally. Use grpc.nvcf.nvidia.com:443 when hosted on NVCF.
109 | - `--api-key`       - NGC API key required for authentication, utilized when using `TRY API` ignored otherwise.
110 | - `--function-id`   - NVCF function ID for the service, utilized when using `TRY API` ignored otherwise.
111 | - `--input`         - The path to the input audio file. Default value is `../assets/bnr_48k_input.wav`.
112 | - `--output`        - The path for the output audio file. Default is current directory (scripts) with name `bnr_48k_output.wav`.
113 | - `--streaming`     - Flag to control if streaming mode should be used. Transactional mode will be used by default.
114 | - `--sample-rate`    - Sample rate of input audio file in Hz (`16000`, `48000`), default is `48000`.
115 | - `--intensity-ratio` - Intensity ratio value between 0 and 1 to control denoising intensity. Default is 1.0 (maximum denoising).
116 | 
117 | Refer the [docs](https://docs.nvidia.com/nim/maxine/bnr/latest/index.html) for more information.
118 | 


--------------------------------------------------------------------------------
/eye-contact/interfaces/eyecontact_pb2_grpc.py:
--------------------------------------------------------------------------------
  1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
  2 | """Client and server classes corresponding to protobuf-defined services."""
  3 | import grpc
  4 | import warnings
  5 | 
  6 | import eyecontact_pb2 as eyecontact__pb2
  7 | 
  8 | GRPC_GENERATED_VERSION = '1.67.1'
  9 | GRPC_VERSION = grpc.__version__
 10 | _version_not_supported = False
 11 | 
 12 | try:
 13 |     from grpc._utilities import first_version_is_lower
 14 |     _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
 15 | except ImportError:
 16 |     _version_not_supported = True
 17 | 
 18 | if _version_not_supported:
 19 |     raise RuntimeError(
 20 |         f'The grpc package installed is at version {GRPC_VERSION},'
 21 |         + f' but the generated code in eyecontact_pb2_grpc.py depends on'
 22 |         + f' grpcio>={GRPC_GENERATED_VERSION}.'
 23 |         + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
 24 |         + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
 25 |     )
 26 | 
 27 | 
 28 | class MaxineEyeContactServiceStub(object):
 29 |     """The MaxineEyeContactService provides APIs to run the
 30 |     Maxine Eye Contact feature.
 31 |     """
 32 | 
 33 |     def __init__(self, channel):
 34 |         """Constructor.
 35 | 
 36 |         Args:
 37 |             channel: A grpc.Channel.
 38 |         """
 39 |         self.RedirectGaze = channel.stream_stream(
 40 |                 '/nvidia.maxine.eyecontact.v1.MaxineEyeContactService/RedirectGaze',
 41 |                 request_serializer=eyecontact__pb2.RedirectGazeRequest.SerializeToString,
 42 |                 response_deserializer=eyecontact__pb2.RedirectGazeResponse.FromString,
 43 |                 _registered_method=True)
 44 | 
 45 | 
 46 | class MaxineEyeContactServiceServicer(object):
 47 |     """The MaxineEyeContactService provides APIs to run the
 48 |     Maxine Eye Contact feature.
 49 |     """
 50 | 
 51 |     def RedirectGaze(self, request_iterator, context):
 52 |         """RedirectGaze is a bidirectional streaming API to run the
 53 |         Maxine Eye Contact feature on mp4 video files.
 54 | 
 55 |         The input message can contain GazeRedirectionConfig or bytes.
 56 |         In the beginning of the stream, a request with GazeRedirectionConfig may
 57 |         be sent to the server to set the feature's parameter.
 58 |         The server will echo back a response with the config to signify that the
 59 |         parameters were properly set. If not configured, default values will be
 60 |         used for the feature's parameters. Any GazeRedirectionConfig sent during
 61 |         the middle of the stream will be ignored.
 62 | 
 63 |         After the optional configuration, the client streams the input mp4 file in
 64 |         chunks in the input message and receives the output mp4 file in chunks in
 65 |         the output message.
 66 | 
 67 |         The client should only pass one video file per API invocation and the
 68 |         configuration, if set, is applied to only that invocation.
 69 |         """
 70 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 71 |         context.set_details('Method not implemented!')
 72 |         raise NotImplementedError('Method not implemented!')
 73 | 
 74 | 
 75 | def add_MaxineEyeContactServiceServicer_to_server(servicer, server):
 76 |     rpc_method_handlers = {
 77 |             'RedirectGaze': grpc.stream_stream_rpc_method_handler(
 78 |                     servicer.RedirectGaze,
 79 |                     request_deserializer=eyecontact__pb2.RedirectGazeRequest.FromString,
 80 |                     response_serializer=eyecontact__pb2.RedirectGazeResponse.SerializeToString,
 81 |             ),
 82 |     }
 83 |     generic_handler = grpc.method_handlers_generic_handler(
 84 |             'nvidia.maxine.eyecontact.v1.MaxineEyeContactService', rpc_method_handlers)
 85 |     server.add_generic_rpc_handlers((generic_handler,))
 86 |     server.add_registered_method_handlers('nvidia.maxine.eyecontact.v1.MaxineEyeContactService', rpc_method_handlers)
 87 | 
 88 | 
 89 |  # This class is part of an EXPERIMENTAL API.
 90 | class MaxineEyeContactService(object):
 91 |     """The MaxineEyeContactService provides APIs to run the
 92 |     Maxine Eye Contact feature.
 93 |     """
 94 | 
 95 |     @staticmethod
 96 |     def RedirectGaze(request_iterator,
 97 |             target,
 98 |             options=(),
 99 |             channel_credentials=None,
100 |             call_credentials=None,
101 |             insecure=False,
102 |             compression=None,
103 |             wait_for_ready=None,
104 |             timeout=None,
105 |             metadata=None):
106 |         return grpc.experimental.stream_stream(
107 |             request_iterator,
108 |             target,
109 |             '/nvidia.maxine.eyecontact.v1.MaxineEyeContactService/RedirectGaze',
110 |             eyecontact__pb2.RedirectGazeRequest.SerializeToString,
111 |             eyecontact__pb2.RedirectGazeResponse.FromString,
112 |             options,
113 |             channel_credentials,
114 |             insecure,
115 |             call_credentials,
116 |             compression,
117 |             wait_for_ready,
118 |             timeout,
119 |             metadata,
120 |             _registered_method=True)
121 | 


--------------------------------------------------------------------------------
/studio-voice/README.md:
--------------------------------------------------------------------------------
  1 | # NVIDIA Studio Voice NIM Client
  2 | 
  3 | This package has a sample client which demonstrates interaction with a Studio Voice NIM.
  4 | 
  5 | ## Getting Started
  6 | 
  7 | NVIDIA Maxine NIM Client packages use gRPC APIs. Instructions below demonstrate usage of Studio Voice NIM using Python gRPC client.
  8 | Additionally, access the [Try API](https://build.nvidia.com/nvidia/studiovoice/api) feature to experience the NVIDIA Studio Voice NIM API without hosting your own servers, as it leverages the NVIDIA Cloud Functions backend.
  9 | 
 10 | ## Pre-requisites
 11 | 
 12 | - Ensure you have Python 3.10 or above installed on your system.
 13 | Please refer to the [Python documentation](https://www.python.org/downloads/) for download and installation instructions.
 14 | - Access to NVIDIA Studio Voice NIM Container / Service.
 15 | 
 16 | ## Usage guide
 17 | 
 18 | ### 1. Clone the repository
 19 | 
 20 | ```bash
 21 | git clone https://github.com/nvidia-maxine/nim-clients.git
 22 | 
 23 | // Go to the 'studio-voice' folder
 24 | cd nim-clients/studio-voice
 25 | ```
 26 | 
 27 | ### 2. Install Dependencies
 28 | 
 29 | ```bash
 30 | sudo apt-get install python3-pip
 31 | pip install -r requirements.txt
 32 | ```
 33 | 
 34 | ### 3. Host the NIM Server
 35 | 
 36 | Before running client part of Studio Voice, please set up a server.
 37 | The simplest way to do that is to follow the [quick start guide](https://docs.nvidia.com/nim/maxine/studio-voice/latest/index.html).
 38 | This step can be skipped when using [Try API](https://build.nvidia.com/nvidia/studiovoice/api).
 39 | 
 40 | 
 41 | ### 4. Compile the Protos
 42 | 
 43 | Before running the python client, you can choose to compile the protos.
 44 | The grpcio version needed for compilation can be referred at requirements.txt
 45 | 
 46 | To compile protos on Linux, run:
 47 | ```bash
 48 | // Go to studio-voice/protos folder
 49 | cd studio-voice/protos
 50 | 
 51 | chmod +x compile_protos.sh
 52 | ./compile_protos.sh
 53 | ```
 54 | 
 55 | To compile protos on Windows, run:
 56 | ```bash
 57 | // Go to studio-voice/protos folder
 58 | cd studio-voice/protos
 59 | 
 60 | compile_protos.bat
 61 | ```
 62 | 
 63 | ### 5. Run the Python Client
 64 | 
 65 | Go to the scripts directory.
 66 | 
 67 | ```bash
 68 | cd scripts
 69 | ```
 70 | 
 71 | #### Usage for Transactional NIM Request
 72 | 
 73 | To run client in transactional mode. Set `--model-type` in accordance with the server, default is set to `48k-hq`. The following example command processes the packaged sample audio file in transactional mode and generates a `studio_voice_48k_output.wav` file in the current folder.
 74 | 
 75 | ```bash
 76 | python studio_voice.py --target 127.0.0.1:8001 --input ../assets/studio_voice_48k_input.wav --output studio_voice_48k_output.wav --model-type 48k-hq
 77 | ```
 78 | 
 79 | #### Usage for Streaming NIM Request
 80 | 
 81 | To run the client in streaming mode, add `--streaming`. The following example command processes the packaged sample audio file in streaming mode and generates a `studio_voice_48k_output.wav` file in the current folder.
 82 | 
 83 | ```bash
 84 | python studio_voice.py --target 127.0.0.1:8001 --input ../assets/studio_voice_48k_input.wav --output studio_voice_48k_output.wav --streaming --model-type 48k-ll
 85 | ```
 86 | 
 87 | Only WAV files are supported.
 88 | 
 89 | #### Usage for Preview API Request
 90 | 
 91 | ```bash
 92 | python studio_voice.py --preview-mode \
 93 |     --ssl-mode TLS \
 94 |     --target grpc.nvcf.nvidia.com:443 \
 95 |     --function-id <function_id> \
 96 |     --api-key $API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC \
 97 |     --input <input_file_path> \
 98 |     --output <output_file_path> \
 99 | ```
100 | 
101 | #### Command Line Arguments
102 | 
103 | - `--preview-mode`  - Flag to send request to preview NVCF server on https://build.nvidia.com/nvidia/studiovoice/api.
104 | - `--ssl-mode`      - Flag to control if SSL MTLS/TLS encryption should be used. When running preview SSL must be set to TLS. Default value is `None`.
105 | - `--ssl-key`       - The path to ssl private key. Default value is `None`.
106 | - `--ssl-cert`      - The path to ssl certificate chain. Default value is `None`.
107 | - `--ssl-root-cert` - The path to ssl root certificate. Default value is `None`.
108 | - `--target`        - <IP:port> of gRPC service, when hosted locally. Use grpc.nvcf.nvidia.com:443 when hosted on NVCF.
109 | - `--api-key`       - NGC API key required for authentication, utilized when using `TRY API` ignored otherwise.
110 | - `--function-id`   - NVCF function ID for the service, utilized when using `TRY API` ignored otherwise.
111 | - `--input`         - The path to the input audio file. Default value is `../assets/studio_voice_48k_input.wav`.
112 | - `--output`        - The path for the output audio file. Default is current directory (scripts) with name `studio_voice_48k_output.wav`.
113 | - `--streaming`     - Flag to control if streaming mode should be used. Transactional mode will be used by default.
114 | - `--model-type`    - Studio Voice model type hosted on server. It can be set to `48k-hq/48k-ll/16k-hq`. Default value is `48k-hq`.
115 | 
116 | Refer the [docs](https://docs.nvidia.com/nim/maxine/studio-voice/latest/index.html) for more information.
117 | 


--------------------------------------------------------------------------------
/audio2face-2d/python/interfaces/audio2face2d_pb2_grpc.py:
--------------------------------------------------------------------------------
  1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
  2 | """Client and server classes corresponding to protobuf-defined services."""
  3 | import grpc
  4 | import warnings
  5 | 
  6 | import audio2face2d_pb2 as audio2face2d__pb2
  7 | 
  8 | GRPC_GENERATED_VERSION = '1.67.1'
  9 | GRPC_VERSION = grpc.__version__
 10 | _version_not_supported = False
 11 | 
 12 | try:
 13 |     from grpc._utilities import first_version_is_lower
 14 |     _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
 15 | except ImportError:
 16 |     _version_not_supported = True
 17 | 
 18 | if _version_not_supported:
 19 |     raise RuntimeError(
 20 |         f'The grpc package installed is at version {GRPC_VERSION},'
 21 |         + f' but the generated code in audio2face2d_pb2_grpc.py depends on'
 22 |         + f' grpcio>={GRPC_GENERATED_VERSION}.'
 23 |         + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
 24 |         + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
 25 |     )
 26 | 
 27 | 
 28 | class Audio2Face2DServiceStub(object):
 29 |     """The Audio2Face2DService provides APIs to run the
 30 |     Maxine Audio to Face - 2D feature.
 31 |     """
 32 | 
 33 |     def __init__(self, channel):
 34 |         """Constructor.
 35 | 
 36 |         Args:
 37 |             channel: A grpc.Channel.
 38 |         """
 39 |         self.Animate = channel.stream_stream(
 40 |                 '/nvidia.maxine.audio2face2d.v1.Audio2Face2DService/Animate',
 41 |                 request_serializer=audio2face2d__pb2.AnimateRequest.SerializeToString,
 42 |                 response_deserializer=audio2face2d__pb2.AnimateResponse.FromString,
 43 |                 _registered_method=True)
 44 | 
 45 | 
 46 | class Audio2Face2DServiceServicer(object):
 47 |     """The Audio2Face2DService provides APIs to run the
 48 |     Maxine Audio to Face - 2D feature.
 49 |     """
 50 | 
 51 |     def Animate(self, request_iterator, context):
 52 |         """Animate is a bidirectional streaming API to run the
 53 |         Audio2Face-2D.
 54 | 
 55 |         The input message can contain AnimateConfig or bytes.
 56 |         In the beginning of the stream, a request with AnimateConfig should
 57 |         be sent to the server to set the feature's parameters.
 58 |         The server will echo back a response with the config to signify that the
 59 |         parameters were properly set. It is mandatory to set the portrait_image
 60 |         config, other configuration parameters are optional and a default value will
 61 |         be used if not set. Any AnimateConfig sent during the middle of the stream
 62 |         will be ignored.
 63 | 
 64 |         After the configuration step, the client streams the input wav file in
 65 |         chunks in the input message and receives the output mp4 file in chunks in
 66 |         the output message. While the inference is running, the server will periodically
 67 |         echo empty message to keep the channel alive. The client should ignore this message.
 68 | 
 69 |         It is recommended that the client should pass one file per API invocation.
 70 |         The configurations are also set per invocation.
 71 |         """
 72 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 73 |         context.set_details('Method not implemented!')
 74 |         raise NotImplementedError('Method not implemented!')
 75 | 
 76 | 
 77 | def add_Audio2Face2DServiceServicer_to_server(servicer, server):
 78 |     rpc_method_handlers = {
 79 |             'Animate': grpc.stream_stream_rpc_method_handler(
 80 |                     servicer.Animate,
 81 |                     request_deserializer=audio2face2d__pb2.AnimateRequest.FromString,
 82 |                     response_serializer=audio2face2d__pb2.AnimateResponse.SerializeToString,
 83 |             ),
 84 |     }
 85 |     generic_handler = grpc.method_handlers_generic_handler(
 86 |             'nvidia.maxine.audio2face2d.v1.Audio2Face2DService', rpc_method_handlers)
 87 |     server.add_generic_rpc_handlers((generic_handler,))
 88 |     server.add_registered_method_handlers('nvidia.maxine.audio2face2d.v1.Audio2Face2DService', rpc_method_handlers)
 89 | 
 90 | 
 91 |  # This class is part of an EXPERIMENTAL API.
 92 | class Audio2Face2DService(object):
 93 |     """The Audio2Face2DService provides APIs to run the
 94 |     Maxine Audio to Face - 2D feature.
 95 |     """
 96 | 
 97 |     @staticmethod
 98 |     def Animate(request_iterator,
 99 |             target,
100 |             options=(),
101 |             channel_credentials=None,
102 |             call_credentials=None,
103 |             insecure=False,
104 |             compression=None,
105 |             wait_for_ready=None,
106 |             timeout=None,
107 |             metadata=None):
108 |         return grpc.experimental.stream_stream(
109 |             request_iterator,
110 |             target,
111 |             '/nvidia.maxine.audio2face2d.v1.Audio2Face2DService/Animate',
112 |             audio2face2d__pb2.AnimateRequest.SerializeToString,
113 |             audio2face2d__pb2.AnimateResponse.FromString,
114 |             options,
115 |             channel_credentials,
116 |             insecure,
117 |             call_credentials,
118 |             compression,
119 |             wait_for_ready,
120 |             timeout,
121 |             metadata,
122 |             _registered_method=True)
123 | 


--------------------------------------------------------------------------------
/eye-contact/interfaces/eyecontact_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # NO CHECKED-IN PROTOBUF GENCODE
 4 | # source: eyecontact.proto
 5 | # Protobuf Python Version: 5.27.2
 6 | """Generated protocol buffer code."""
 7 | from google.protobuf import descriptor as _descriptor
 8 | from google.protobuf import descriptor_pool as _descriptor_pool
 9 | from google.protobuf import runtime_version as _runtime_version
10 | from google.protobuf import symbol_database as _symbol_database
11 | from google.protobuf.internal import builder as _builder
12 | _runtime_version.ValidateProtobufRuntimeVersion(
13 |     _runtime_version.Domain.PUBLIC,
14 |     5,
15 |     27,
16 |     2,
17 |     '',
18 |     'eyecontact.proto'
19 | )
20 | # @@protoc_insertion_point(imports)
21 | 
22 | _sym_db = _symbol_database.Default()
23 | 
24 | 
25 | from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2
26 | from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2
27 | 
28 | 
29 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10\x65yecontact.proto\x12\x1bnvidia.maxine.eyecontact.v1\x1a\x19google/protobuf/any.proto\x1a\x1bgoogle/protobuf/empty.proto\"]\n\rLossyEncoding\x12\x14\n\x07\x62itrate\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x19\n\x0cidr_interval\x18\x02 \x01(\rH\x01\x88\x01\x01\x42\n\n\x08_bitrateB\x0f\n\r_idr_interval\"\xaa\x01\n\x14\x43ustomEncodingParams\x12M\n\x06\x63ustom\x18\x01 \x03(\x0b\x32=.nvidia.maxine.eyecontact.v1.CustomEncodingParams.CustomEntry\x1a\x43\n\x0b\x43ustomEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12#\n\x05value\x18\x02 \x01(\x0b\x32\x14.google.protobuf.Any:\x02\x38\x01\"\xc5\x01\n\x13OutputVideoEncoding\x12\x12\n\x08lossless\x18\x01 \x01(\x08H\x00\x12;\n\x05lossy\x18\x02 \x01(\x0b\x32*.nvidia.maxine.eyecontact.v1.LossyEncodingH\x00\x12L\n\x0f\x63ustom_encoding\x18\x03 \x01(\x0b\x32\x31.nvidia.maxine.eyecontact.v1.CustomEncodingParamsH\x00\x42\x0f\n\rencoding_type\"\x98\x08\n\x12RedirectGazeConfig\x12\x15\n\x08temporal\x18\x01 \x01(\rH\x00\x88\x01\x01\x12\x1b\n\x0e\x64\x65tect_closure\x18\x02 \x01(\rH\x01\x88\x01\x01\x12!\n\x14\x65ye_size_sensitivity\x18\x03 \x01(\rH\x02\x88\x01\x01\x12\x1c\n\x0f\x65nable_lookaway\x18\x04 \x01(\rH\x03\x88\x01\x01\x12 \n\x13lookaway_max_offset\x18\x05 \x01(\rH\x04\x88\x01\x01\x12\"\n\x15lookaway_interval_min\x18\x06 \x01(\rH\x05\x88\x01\x01\x12$\n\x17lookaway_interval_range\x18\x07 \x01(\rH\x06\x88\x01\x01\x12%\n\x18gaze_pitch_threshold_low\x18\x08 \x01(\x02H\x07\x88\x01\x01\x12&\n\x19gaze_pitch_threshold_high\x18\t \x01(\x02H\x08\x88\x01\x01\x12#\n\x16gaze_yaw_threshold_low\x18\n \x01(\x02H\t\x88\x01\x01\x12$\n\x17gaze_yaw_threshold_high\x18\x0b \x01(\x02H\n\x88\x01\x01\x12%\n\x18head_pitch_threshold_low\x18\x0c \x01(\x02H\x0b\x88\x01\x01\x12&\n\x19head_pitch_threshold_high\x18\r \x01(\x02H\x0c\x88\x01\x01\x12#\n\x16head_yaw_threshold_low\x18\x0e \x01(\x02H\r\x88\x01\x01\x12$\n\x17head_yaw_threshold_high\x18\x0f \x01(\x02H\x0e\x88\x01\x01\x12T\n\x15output_video_encoding\x18\x10 \x01(\x0b\x32\x30.nvidia.maxine.eyecontact.v1.OutputVideoEncodingH\x0f\x88\x01\x01\x42\x0b\n\t_temporalB\x11\n\x0f_detect_closureB\x17\n\x15_eye_size_sensitivityB\x12\n\x10_enable_lookawayB\x16\n\x14_lookaway_max_offsetB\x18\n\x16_lookaway_interval_minB\x1a\n\x18_lookaway_interval_rangeB\x1b\n\x19_gaze_pitch_threshold_lowB\x1c\n\x1a_gaze_pitch_threshold_highB\x19\n\x17_gaze_yaw_threshold_lowB\x1a\n\x18_gaze_yaw_threshold_highB\x1b\n\x19_head_pitch_threshold_lowB\x1c\n\x1a_head_pitch_threshold_highB\x19\n\x17_head_yaw_threshold_lowB\x1a\n\x18_head_yaw_threshold_highB\x18\n\x16_output_video_encoding\"\x83\x01\n\x13RedirectGazeRequest\x12\x41\n\x06\x63onfig\x18\x01 \x01(\x0b\x32/.nvidia.maxine.eyecontact.v1.RedirectGazeConfigH\x00\x12\x19\n\x0fvideo_file_data\x18\x02 \x01(\x0cH\x00\x42\x0e\n\x0cstream_input\"\xb2\x01\n\x14RedirectGazeResponse\x12\x41\n\x06\x63onfig\x18\x01 \x01(\x0b\x32/.nvidia.maxine.eyecontact.v1.RedirectGazeConfigH\x00\x12\x19\n\x0fvideo_file_data\x18\x02 \x01(\x0cH\x00\x12+\n\tkeepalive\x18\x03 \x01(\x0b\x32\x16.google.protobuf.EmptyH\x00\x42\x0f\n\rstream_output2\x94\x01\n\x17MaxineEyeContactService\x12y\n\x0cRedirectGaze\x12\x30.nvidia.maxine.eyecontact.v1.RedirectGazeRequest\x1a\x31.nvidia.maxine.eyecontact.v1.RedirectGazeResponse\"\x00(\x01\x30\x01\x62\x06proto3')
30 | 
31 | _globals = globals()
32 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
33 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'eyecontact_pb2', _globals)
34 | if not _descriptor._USE_C_DESCRIPTORS:
35 |   DESCRIPTOR._loaded_options = None
36 |   _globals['_CUSTOMENCODINGPARAMS_CUSTOMENTRY']._loaded_options = None
37 |   _globals['_CUSTOMENCODINGPARAMS_CUSTOMENTRY']._serialized_options = b'8\001'
38 |   _globals['_LOSSYENCODING']._serialized_start=105
39 |   _globals['_LOSSYENCODING']._serialized_end=198
40 |   _globals['_CUSTOMENCODINGPARAMS']._serialized_start=201
41 |   _globals['_CUSTOMENCODINGPARAMS']._serialized_end=371
42 |   _globals['_CUSTOMENCODINGPARAMS_CUSTOMENTRY']._serialized_start=304
43 |   _globals['_CUSTOMENCODINGPARAMS_CUSTOMENTRY']._serialized_end=371
44 |   _globals['_OUTPUTVIDEOENCODING']._serialized_start=374
45 |   _globals['_OUTPUTVIDEOENCODING']._serialized_end=571
46 |   _globals['_REDIRECTGAZECONFIG']._serialized_start=574
47 |   _globals['_REDIRECTGAZECONFIG']._serialized_end=1622
48 |   _globals['_REDIRECTGAZEREQUEST']._serialized_start=1625
49 |   _globals['_REDIRECTGAZEREQUEST']._serialized_end=1756
50 |   _globals['_REDIRECTGAZERESPONSE']._serialized_start=1759
51 |   _globals['_REDIRECTGAZERESPONSE']._serialized_end=1937
52 |   _globals['_MAXINEEYECONTACTSERVICE']._serialized_start=1940
53 |   _globals['_MAXINEEYECONTACTSERVICE']._serialized_end=2088
54 | # @@protoc_insertion_point(module_scope)
55 | 


--------------------------------------------------------------------------------
/eye-contact/interfaces/eyecontact_pb2.pyi:
--------------------------------------------------------------------------------
 1 | from google.protobuf import any_pb2 as _any_pb2
 2 | from google.protobuf import empty_pb2 as _empty_pb2
 3 | from google.protobuf.internal import containers as _containers
 4 | from google.protobuf import descriptor as _descriptor
 5 | from google.protobuf import message as _message
 6 | from typing import ClassVar as _ClassVar, Mapping as _Mapping, Optional as _Optional, Union as _Union
 7 | 
 8 | DESCRIPTOR: _descriptor.FileDescriptor
 9 | 
10 | class LossyEncoding(_message.Message):
11 |     __slots__ = ("bitrate", "idr_interval")
12 |     BITRATE_FIELD_NUMBER: _ClassVar[int]
13 |     IDR_INTERVAL_FIELD_NUMBER: _ClassVar[int]
14 |     bitrate: int
15 |     idr_interval: int
16 |     def __init__(self, bitrate: _Optional[int] = ..., idr_interval: _Optional[int] = ...) -> None: ...
17 | 
18 | class CustomEncodingParams(_message.Message):
19 |     __slots__ = ("custom",)
20 |     class CustomEntry(_message.Message):
21 |         __slots__ = ("key", "value")
22 |         KEY_FIELD_NUMBER: _ClassVar[int]
23 |         VALUE_FIELD_NUMBER: _ClassVar[int]
24 |         key: str
25 |         value: _any_pb2.Any
26 |         def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[_any_pb2.Any, _Mapping]] = ...) -> None: ...
27 |     CUSTOM_FIELD_NUMBER: _ClassVar[int]
28 |     custom: _containers.MessageMap[str, _any_pb2.Any]
29 |     def __init__(self, custom: _Optional[_Mapping[str, _any_pb2.Any]] = ...) -> None: ...
30 | 
31 | class OutputVideoEncoding(_message.Message):
32 |     __slots__ = ("lossless", "lossy", "custom_encoding")
33 |     LOSSLESS_FIELD_NUMBER: _ClassVar[int]
34 |     LOSSY_FIELD_NUMBER: _ClassVar[int]
35 |     CUSTOM_ENCODING_FIELD_NUMBER: _ClassVar[int]
36 |     lossless: bool
37 |     lossy: LossyEncoding
38 |     custom_encoding: CustomEncodingParams
39 |     def __init__(self, lossless: bool = ..., lossy: _Optional[_Union[LossyEncoding, _Mapping]] = ..., custom_encoding: _Optional[_Union[CustomEncodingParams, _Mapping]] = ...) -> None: ...
40 | 
41 | class RedirectGazeConfig(_message.Message):
42 |     __slots__ = ("temporal", "detect_closure", "eye_size_sensitivity", "enable_lookaway", "lookaway_max_offset", "lookaway_interval_min", "lookaway_interval_range", "gaze_pitch_threshold_low", "gaze_pitch_threshold_high", "gaze_yaw_threshold_low", "gaze_yaw_threshold_high", "head_pitch_threshold_low", "head_pitch_threshold_high", "head_yaw_threshold_low", "head_yaw_threshold_high", "output_video_encoding")
43 |     TEMPORAL_FIELD_NUMBER: _ClassVar[int]
44 |     DETECT_CLOSURE_FIELD_NUMBER: _ClassVar[int]
45 |     EYE_SIZE_SENSITIVITY_FIELD_NUMBER: _ClassVar[int]
46 |     ENABLE_LOOKAWAY_FIELD_NUMBER: _ClassVar[int]
47 |     LOOKAWAY_MAX_OFFSET_FIELD_NUMBER: _ClassVar[int]
48 |     LOOKAWAY_INTERVAL_MIN_FIELD_NUMBER: _ClassVar[int]
49 |     LOOKAWAY_INTERVAL_RANGE_FIELD_NUMBER: _ClassVar[int]
50 |     GAZE_PITCH_THRESHOLD_LOW_FIELD_NUMBER: _ClassVar[int]
51 |     GAZE_PITCH_THRESHOLD_HIGH_FIELD_NUMBER: _ClassVar[int]
52 |     GAZE_YAW_THRESHOLD_LOW_FIELD_NUMBER: _ClassVar[int]
53 |     GAZE_YAW_THRESHOLD_HIGH_FIELD_NUMBER: _ClassVar[int]
54 |     HEAD_PITCH_THRESHOLD_LOW_FIELD_NUMBER: _ClassVar[int]
55 |     HEAD_PITCH_THRESHOLD_HIGH_FIELD_NUMBER: _ClassVar[int]
56 |     HEAD_YAW_THRESHOLD_LOW_FIELD_NUMBER: _ClassVar[int]
57 |     HEAD_YAW_THRESHOLD_HIGH_FIELD_NUMBER: _ClassVar[int]
58 |     OUTPUT_VIDEO_ENCODING_FIELD_NUMBER: _ClassVar[int]
59 |     temporal: int
60 |     detect_closure: int
61 |     eye_size_sensitivity: int
62 |     enable_lookaway: int
63 |     lookaway_max_offset: int
64 |     lookaway_interval_min: int
65 |     lookaway_interval_range: int
66 |     gaze_pitch_threshold_low: float
67 |     gaze_pitch_threshold_high: float
68 |     gaze_yaw_threshold_low: float
69 |     gaze_yaw_threshold_high: float
70 |     head_pitch_threshold_low: float
71 |     head_pitch_threshold_high: float
72 |     head_yaw_threshold_low: float
73 |     head_yaw_threshold_high: float
74 |     output_video_encoding: OutputVideoEncoding
75 |     def __init__(self, temporal: _Optional[int] = ..., detect_closure: _Optional[int] = ..., eye_size_sensitivity: _Optional[int] = ..., enable_lookaway: _Optional[int] = ..., lookaway_max_offset: _Optional[int] = ..., lookaway_interval_min: _Optional[int] = ..., lookaway_interval_range: _Optional[int] = ..., gaze_pitch_threshold_low: _Optional[float] = ..., gaze_pitch_threshold_high: _Optional[float] = ..., gaze_yaw_threshold_low: _Optional[float] = ..., gaze_yaw_threshold_high: _Optional[float] = ..., head_pitch_threshold_low: _Optional[float] = ..., head_pitch_threshold_high: _Optional[float] = ..., head_yaw_threshold_low: _Optional[float] = ..., head_yaw_threshold_high: _Optional[float] = ..., output_video_encoding: _Optional[_Union[OutputVideoEncoding, _Mapping]] = ...) -> None: ...
76 | 
77 | class RedirectGazeRequest(_message.Message):
78 |     __slots__ = ("config", "video_file_data")
79 |     CONFIG_FIELD_NUMBER: _ClassVar[int]
80 |     VIDEO_FILE_DATA_FIELD_NUMBER: _ClassVar[int]
81 |     config: RedirectGazeConfig
82 |     video_file_data: bytes
83 |     def __init__(self, config: _Optional[_Union[RedirectGazeConfig, _Mapping]] = ..., video_file_data: _Optional[bytes] = ...) -> None: ...
84 | 
85 | class RedirectGazeResponse(_message.Message):
86 |     __slots__ = ("config", "video_file_data", "keepalive")
87 |     CONFIG_FIELD_NUMBER: _ClassVar[int]
88 |     VIDEO_FILE_DATA_FIELD_NUMBER: _ClassVar[int]
89 |     KEEPALIVE_FIELD_NUMBER: _ClassVar[int]
90 |     config: RedirectGazeConfig
91 |     video_file_data: bytes
92 |     keepalive: _empty_pb2.Empty
93 |     def __init__(self, config: _Optional[_Union[RedirectGazeConfig, _Mapping]] = ..., video_file_data: _Optional[bytes] = ..., keepalive: _Optional[_Union[_empty_pb2.Empty, _Mapping]] = ...) -> None: ...
94 | 


--------------------------------------------------------------------------------
/audio2face-2d/python/interfaces/audio2face2d_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # NO CHECKED-IN PROTOBUF GENCODE
 4 | # source: audio2face2d.proto
 5 | # Protobuf Python Version: 5.27.2
 6 | """Generated protocol buffer code."""
 7 | from google.protobuf import descriptor as _descriptor
 8 | from google.protobuf import descriptor_pool as _descriptor_pool
 9 | from google.protobuf import runtime_version as _runtime_version
10 | from google.protobuf import symbol_database as _symbol_database
11 | from google.protobuf.internal import builder as _builder
12 | _runtime_version.ValidateProtobufRuntimeVersion(
13 |     _runtime_version.Domain.PUBLIC,
14 |     5,
15 |     27,
16 |     2,
17 |     '',
18 |     'audio2face2d.proto'
19 | )
20 | # @@protoc_insertion_point(imports)
21 | 
22 | _sym_db = _symbol_database.Default()
23 | 
24 | 
25 | from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2
26 | 
27 | 
28 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x12\x61udio2face2d.proto\x12\x1dnvidia.maxine.audio2face2d.v1\x1a\x1bgoogle/protobuf/empty.proto\"\x83\x08\n\rAnimateConfig\x12\x16\n\x0eportrait_image\x18\x01 \x01(\x0c\x12K\n\x0fmodel_selection\x18\x02 \x01(\x0e\x32-.nvidia.maxine.audio2face2d.v1.ModelSelectionH\x00\x88\x01\x01\x12V\n\x13\x61nimation_crop_mode\x18\x03 \x01(\x0e\x32\x34.nvidia.maxine.audio2face2d.v1.AnimationCroppingModeH\x01\x88\x01\x01\x12H\n\x0ehead_pose_mode\x18\x04 \x01(\x0e\x32+.nvidia.maxine.audio2face2d.v1.HeadPoseModeH\x02\x88\x01\x01\x12\x1c\n\x0f\x65nable_lookaway\x18\x05 \x01(\x08H\x03\x88\x01\x01\x12 \n\x13lookaway_max_offset\x18\x06 \x01(\rH\x04\x88\x01\x01\x12$\n\x17lookaway_interval_range\x18\x07 \x01(\rH\x05\x88\x01\x01\x12\"\n\x15lookaway_interval_min\x18\x08 \x01(\rH\x06\x88\x01\x01\x12\x1c\n\x0f\x62link_frequency\x18\t \x01(\rH\x07\x88\x01\x01\x12\x1b\n\x0e\x62link_duration\x18\n \x01(\rH\x08\x88\x01\x01\x12(\n\x1bmouth_expression_multiplier\x18\x0b \x01(\x02H\t\x88\x01\x01\x12!\n\x14head_pose_multiplier\x18\x0c \x01(\x02H\n\x88\x01\x01\x12Q\n\x13input_head_rotation\x18\r \x01(\x0b\x32/.nvidia.maxine.audio2face2d.v1.QuaternionStreamH\x0b\x88\x01\x01\x12R\n\x16input_head_translation\x18\x0e \x01(\x0b\x32-.nvidia.maxine.audio2face2d.v1.Vector3fStreamH\x0c\x88\x01\x01\x42\x12\n\x10_model_selectionB\x16\n\x14_animation_crop_modeB\x11\n\x0f_head_pose_modeB\x12\n\x10_enable_lookawayB\x16\n\x14_lookaway_max_offsetB\x1a\n\x18_lookaway_interval_rangeB\x18\n\x16_lookaway_interval_minB\x12\n\x10_blink_frequencyB\x11\n\x0f_blink_durationB\x1e\n\x1c_mouth_expression_multiplierB\x17\n\x15_head_pose_multiplierB\x16\n\x14_input_head_rotationB\x19\n\x17_input_head_translation\"+\n\x08Vector3f\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\"I\n\x0eVector3fStream\x12\x37\n\x06values\x18\x01 \x03(\x0b\x32\'.nvidia.maxine.audio2face2d.v1.Vector3f\"8\n\nQuaternion\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\x12\t\n\x01w\x18\x04 \x01(\x02\"M\n\x10QuaternionStream\x12\x39\n\x06values\x18\x01 \x03(\x0b\x32).nvidia.maxine.audio2face2d.v1.Quaternion\"{\n\x0e\x41nimateRequest\x12>\n\x06\x63onfig\x18\x01 \x01(\x0b\x32,.nvidia.maxine.audio2face2d.v1.AnimateConfigH\x00\x12\x19\n\x0f\x61udio_file_data\x18\x02 \x01(\x0cH\x00\x42\x0e\n\x0cstream_input\"\xab\x01\n\x0f\x41nimateResponse\x12>\n\x06\x63onfig\x18\x01 \x01(\x0b\x32,.nvidia.maxine.audio2face2d.v1.AnimateConfigH\x00\x12\x19\n\x0fvideo_file_data\x18\x02 \x01(\x0cH\x00\x12,\n\nkeep_alive\x18\x03 \x01(\x0b\x32\x16.google.protobuf.EmptyH\x00\x42\x0f\n\rstream_output*h\n\x0eModelSelection\x12\x1f\n\x1bMODEL_SELECTION_UNSPECIFIED\x10\x00\x12\x18\n\x14MODEL_SELECTION_PERF\x10\x01\x12\x1b\n\x17MODEL_SELECTION_QUALITY\x10\x02*\xc4\x01\n\x15\x41nimationCroppingMode\x12\'\n#ANIMATION_CROPPING_MODE_UNSPECIFIED\x10\x00\x12#\n\x1f\x41NIMATION_CROPPING_MODE_FACEBOX\x10\x01\x12\x31\n-ANIMATION_CROPPING_MODE_REGISTRATION_BLENDING\x10\x02\x12*\n&ANIMATION_CROPPING_MODE_INSET_BLENDING\x10\x03*\xb2\x01\n\x0cHeadPoseMode\x12\x1e\n\x1aHEAD_POSE_MODE_UNSPECIFIED\x10\x00\x12-\n)HEAD_POSE_MODE_RETAIN_FROM_PORTRAIT_IMAGE\x10\x01\x12(\n$HEAD_POSE_MODE_PRE_DEFINED_ANIMATION\x10\x02\x12)\n%HEAD_POSE_MODE_USER_DEFINED_ANIMATION\x10\x03\x32\x85\x01\n\x13\x41udio2Face2DService\x12n\n\x07\x41nimate\x12-.nvidia.maxine.audio2face2d.v1.AnimateRequest\x1a..nvidia.maxine.audio2face2d.v1.AnimateResponse\"\x00(\x01\x30\x01\x62\x06proto3')
29 | 
30 | _globals = globals()
31 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
32 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'audio2face2d_pb2', _globals)
33 | if not _descriptor._USE_C_DESCRIPTORS:
34 |   DESCRIPTOR._loaded_options = None
35 |   _globals['_MODELSELECTION']._serialized_start=1668
36 |   _globals['_MODELSELECTION']._serialized_end=1772
37 |   _globals['_ANIMATIONCROPPINGMODE']._serialized_start=1775
38 |   _globals['_ANIMATIONCROPPINGMODE']._serialized_end=1971
39 |   _globals['_HEADPOSEMODE']._serialized_start=1974
40 |   _globals['_HEADPOSEMODE']._serialized_end=2152
41 |   _globals['_ANIMATECONFIG']._serialized_start=83
42 |   _globals['_ANIMATECONFIG']._serialized_end=1110
43 |   _globals['_VECTOR3F']._serialized_start=1112
44 |   _globals['_VECTOR3F']._serialized_end=1155
45 |   _globals['_VECTOR3FSTREAM']._serialized_start=1157
46 |   _globals['_VECTOR3FSTREAM']._serialized_end=1230
47 |   _globals['_QUATERNION']._serialized_start=1232
48 |   _globals['_QUATERNION']._serialized_end=1288
49 |   _globals['_QUATERNIONSTREAM']._serialized_start=1290
50 |   _globals['_QUATERNIONSTREAM']._serialized_end=1367
51 |   _globals['_ANIMATEREQUEST']._serialized_start=1369
52 |   _globals['_ANIMATEREQUEST']._serialized_end=1492
53 |   _globals['_ANIMATERESPONSE']._serialized_start=1495
54 |   _globals['_ANIMATERESPONSE']._serialized_end=1666
55 |   _globals['_AUDIO2FACE2DSERVICE']._serialized_start=2155
56 |   _globals['_AUDIO2FACE2DSERVICE']._serialized_end=2288
57 | # @@protoc_insertion_point(module_scope)
58 | 


--------------------------------------------------------------------------------
/audio2face-2d/assets/head_rotation_animation.csv:
--------------------------------------------------------------------------------
  1 | 0.0000, 0.0000, 0.0000, 1.0000
  2 | 0.0035, 0.0000, 0.0000, 1.0000
  3 | 0.0070, 0.0000, 0.0000, 1.0000
  4 | 0.0105, 0.0000, 0.0000, 0.9999
  5 | 0.0140, 0.0000, 0.0000, 0.9999
  6 | 0.0174, 0.0000, 0.0000, 0.9998
  7 | 0.0209, 0.0000, 0.0000, 0.9998
  8 | 0.0244, 0.0000, 0.0000, 0.9997
  9 | 0.0279, 0.0000, 0.0000, 0.9996
 10 | 0.0314, 0.0000, 0.0000, 0.9995
 11 | 0.0349, 0.0000, 0.0000, 0.9994
 12 | 0.0384, 0.0000, 0.0000, 0.9993
 13 | 0.0419, 0.0000, 0.0000, 0.9991
 14 | 0.0454, 0.0000, 0.0000, 0.9990
 15 | 0.0488, 0.0000, 0.0000, 0.9988
 16 | 0.0488, 0.0000, 0.0000, 0.9988
 17 | 0.0454, 0.0000, 0.0000, 0.9990
 18 | 0.0419, 0.0000, 0.0000, 0.9991
 19 | 0.0384, 0.0000, 0.0000, 0.9993
 20 | 0.0349, 0.0000, 0.0000, 0.9994
 21 | 0.0314, 0.0000, 0.0000, 0.9995
 22 | 0.0279, 0.0000, 0.0000, 0.9996
 23 | 0.0244, 0.0000, 0.0000, 0.9997
 24 | 0.0209, 0.0000, 0.0000, 0.9998
 25 | 0.0174, 0.0000, 0.0000, 0.9998
 26 | 0.0140, 0.0000, 0.0000, 0.9999
 27 | 0.0105, 0.0000, 0.0000, 0.9999
 28 | 0.0070, 0.0000, 0.0000, 1.0000
 29 | 0.0035, 0.0000, 0.0000, 1.0000
 30 | 0.0000, 0.0000, 0.0000, 1.0000
 31 | 0.0000, 0.0000, 0.0000, 1.0000
 32 | -0.0035, 0.0000, 0.0000, 1.0000
 33 | -0.0070, 0.0000, 0.0000, 1.0000
 34 | -0.0105, 0.0000, 0.0000, 0.9999
 35 | -0.0140, 0.0000, 0.0000, 0.9999
 36 | -0.0174, 0.0000, 0.0000, 0.9998
 37 | -0.0209, 0.0000, 0.0000, 0.9998
 38 | -0.0244, 0.0000, 0.0000, 0.9997
 39 | -0.0279, 0.0000, 0.0000, 0.9996
 40 | -0.0314, 0.0000, 0.0000, 0.9995
 41 | -0.0349, 0.0000, 0.0000, 0.9994
 42 | -0.0384, 0.0000, 0.0000, 0.9993
 43 | -0.0419, 0.0000, 0.0000, 0.9991
 44 | -0.0454, 0.0000, 0.0000, 0.9990
 45 | -0.0488, 0.0000, 0.0000, 0.9988
 46 | -0.0488, 0.0000, 0.0000, 0.9988
 47 | -0.0454, 0.0000, 0.0000, 0.9990
 48 | -0.0419, 0.0000, 0.0000, 0.9991
 49 | -0.0384, 0.0000, 0.0000, 0.9993
 50 | -0.0349, 0.0000, 0.0000, 0.9994
 51 | -0.0314, 0.0000, 0.0000, 0.9995
 52 | -0.0279, 0.0000, 0.0000, 0.9996
 53 | -0.0244, 0.0000, 0.0000, 0.9997
 54 | -0.0209, 0.0000, 0.0000, 0.9998
 55 | -0.0174, 0.0000, 0.0000, 0.9998
 56 | -0.0140, 0.0000, 0.0000, 0.9999
 57 | -0.0105, 0.0000, 0.0000, 0.9999
 58 | -0.0070, 0.0000, 0.0000, 1.0000
 59 | -0.0035, 0.0000, 0.0000, 1.0000
 60 | 0.0000, 0.0000, 0.0000, 1.0000
 61 | 0.0000, 0.0000, 0.0000, 1.0000
 62 | 0.0000, 0.0047, 0.0000, 1.0000
 63 | 0.0000, 0.0093, 0.0000, 1.0000
 64 | 0.0000, 0.0140, 0.0000, 0.9999
 65 | 0.0000, 0.0186, 0.0000, 0.9998
 66 | 0.0000, 0.0233, 0.0000, 0.9997
 67 | 0.0000, 0.0279, 0.0000, 0.9996
 68 | 0.0000, 0.0326, 0.0000, 0.9995
 69 | 0.0000, 0.0372, 0.0000, 0.9993
 70 | 0.0000, 0.0419, 0.0000, 0.9991
 71 | 0.0000, 0.0465, 0.0000, 0.9989
 72 | 0.0000, 0.0512, 0.0000, 0.9987
 73 | 0.0000, 0.0558, 0.0000, 0.9984
 74 | 0.0000, 0.0605, 0.0000, 0.9982
 75 | 0.0000, 0.0651, 0.0000, 0.9979
 76 | 0.0000, 0.0651, 0.0000, 0.9979
 77 | 0.0000, 0.0605, 0.0000, 0.9982
 78 | 0.0000, 0.0558, 0.0000, 0.9984
 79 | 0.0000, 0.0512, 0.0000, 0.9987
 80 | 0.0000, 0.0465, 0.0000, 0.9989
 81 | 0.0000, 0.0419, 0.0000, 0.9991
 82 | 0.0000, 0.0372, 0.0000, 0.9993
 83 | 0.0000, 0.0326, 0.0000, 0.9995
 84 | 0.0000, 0.0279, 0.0000, 0.9996
 85 | 0.0000, 0.0233, 0.0000, 0.9997
 86 | 0.0000, 0.0186, 0.0000, 0.9998
 87 | 0.0000, 0.0140, 0.0000, 0.9999
 88 | 0.0000, 0.0093, 0.0000, 1.0000
 89 | 0.0000, 0.0047, 0.0000, 1.0000
 90 | 0.0000, 0.0000, 0.0000, 1.0000
 91 | 0.0000, 0.0000, 0.0000, 1.0000
 92 | 0.0000, -0.0047, 0.0000, 1.0000
 93 | 0.0000, -0.0093, 0.0000, 1.0000
 94 | 0.0000, -0.0140, 0.0000, 0.9999
 95 | 0.0000, -0.0186, 0.0000, 0.9998
 96 | 0.0000, -0.0233, 0.0000, 0.9997
 97 | 0.0000, -0.0279, 0.0000, 0.9996
 98 | 0.0000, -0.0326, 0.0000, 0.9995
 99 | 0.0000, -0.0372, 0.0000, 0.9993
100 | 0.0000, -0.0419, 0.0000, 0.9991
101 | 0.0000, -0.0465, 0.0000, 0.9989
102 | 0.0000, -0.0512, 0.0000, 0.9987
103 | 0.0000, -0.0558, 0.0000, 0.9984
104 | 0.0000, -0.0605, 0.0000, 0.9982
105 | 0.0000, -0.0651, 0.0000, 0.9979
106 | 0.0000, -0.0651, 0.0000, 0.9979
107 | 0.0000, -0.0605, 0.0000, 0.9982
108 | 0.0000, -0.0558, 0.0000, 0.9984
109 | 0.0000, -0.0512, 0.0000, 0.9987
110 | 0.0000, -0.0465, 0.0000, 0.9989
111 | 0.0000, -0.0419, 0.0000, 0.9991
112 | 0.0000, -0.0372, 0.0000, 0.9993
113 | 0.0000, -0.0326, 0.0000, 0.9995
114 | 0.0000, -0.0279, 0.0000, 0.9996
115 | 0.0000, -0.0233, 0.0000, 0.9997
116 | 0.0000, -0.0186, 0.0000, 0.9998
117 | 0.0000, -0.0140, 0.0000, 0.9999
118 | 0.0000, -0.0093, 0.0000, 1.0000
119 | 0.0000, -0.0047, 0.0000, 1.0000
120 | 0.0000, 0.0000, 0.0000, 1.0000
121 | 0.0000, 0.0000, 0.0000, 1.0000
122 | 0.0000, 0.0000, 0.0029, 1.0000
123 | 0.0000, 0.0000, 0.0058, 1.0000
124 | 0.0000, 0.0000, 0.0087, 1.0000
125 | 0.0000, 0.0000, 0.0116, 0.9999
126 | 0.0000, 0.0000, 0.0145, 0.9999
127 | 0.0000, 0.0000, 0.0174, 0.9998
128 | 0.0000, 0.0000, 0.0204, 0.9998
129 | 0.0000, 0.0000, 0.0233, 0.9997
130 | 0.0000, 0.0000, 0.0262, 0.9997
131 | 0.0000, 0.0000, 0.0291, 0.9996
132 | 0.0000, 0.0000, 0.0320, 0.9995
133 | 0.0000, 0.0000, 0.0349, 0.9994
134 | 0.0000, 0.0000, 0.0378, 0.9993
135 | 0.0000, 0.0000, 0.0407, 0.9992
136 | 0.0000, 0.0000, 0.0407, 0.9992
137 | 0.0000, 0.0000, 0.0378, 0.9993
138 | 0.0000, 0.0000, 0.0349, 0.9994
139 | 0.0000, 0.0000, 0.0320, 0.9995
140 | 0.0000, 0.0000, 0.0291, 0.9996
141 | 0.0000, 0.0000, 0.0262, 0.9997
142 | 0.0000, 0.0000, 0.0233, 0.9997
143 | 0.0000, 0.0000, 0.0204, 0.9998
144 | 0.0000, 0.0000, 0.0174, 0.9998
145 | 0.0000, 0.0000, 0.0145, 0.9999
146 | 0.0000, 0.0000, 0.0116, 0.9999
147 | 0.0000, 0.0000, 0.0087, 1.0000
148 | 0.0000, 0.0000, 0.0058, 1.0000
149 | 0.0000, 0.0000, 0.0029, 1.0000
150 | 0.0000, 0.0000, 0.0000, 1.0000
151 | 0.0000, 0.0000, 0.0000, 1.0000
152 | 0.0000, 0.0000, -0.0029, 1.0000
153 | 0.0000, 0.0000, -0.0058, 1.0000
154 | 0.0000, 0.0000, -0.0087, 1.0000
155 | 0.0000, 0.0000, -0.0116, 0.9999
156 | 0.0000, 0.0000, -0.0145, 0.9999
157 | 0.0000, 0.0000, -0.0174, 0.9998
158 | 0.0000, 0.0000, -0.0204, 0.9998
159 | 0.0000, 0.0000, -0.0233, 0.9997
160 | 0.0000, 0.0000, -0.0262, 0.9997
161 | 0.0000, 0.0000, -0.0291, 0.9996
162 | 0.0000, 0.0000, -0.0320, 0.9995
163 | 0.0000, 0.0000, -0.0349, 0.9994
164 | 0.0000, 0.0000, -0.0378, 0.9993
165 | 0.0000, 0.0000, -0.0407, 0.9992
166 | 0.0000, 0.0000, -0.0407, 0.9992
167 | 0.0000, 0.0000, -0.0378, 0.9993
168 | 0.0000, 0.0000, -0.0349, 0.9994
169 | 0.0000, 0.0000, -0.0320, 0.9995
170 | 0.0000, 0.0000, -0.0291, 0.9996
171 | 0.0000, 0.0000, -0.0262, 0.9997
172 | 0.0000, 0.0000, -0.0233, 0.9997
173 | 0.0000, 0.0000, -0.0204, 0.9998
174 | 0.0000, 0.0000, -0.0174, 0.9998
175 | 0.0000, 0.0000, -0.0145, 0.9999
176 | 0.0000, 0.0000, -0.0116, 0.9999
177 | 0.0000, 0.0000, -0.0087, 1.0000
178 | 0.0000, 0.0000, -0.0058, 1.0000
179 | 0.0000, 0.0000, -0.0029, 1.0000
180 | 0.0000, 0.0000, 0.0000, 1.0000
181 | 


--------------------------------------------------------------------------------
/eye-contact/protos/proto/nvidia/maxine/eyecontact/v1/eyecontact.proto:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | //
  3 | // NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | // and proprietary rights in and to this software, related documentation
  5 | // and any modifications thereto.  Any use, reproduction, disclosure or
  6 | // distribution of this software and related documentation without an express
  7 | // license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | syntax = "proto3";
 10 | 
 11 | package nvidia.maxine.eyecontact.v1;
 12 | 
 13 | import "google/protobuf/any.proto";
 14 | import "google/protobuf/empty.proto";
 15 | 
 16 | // The MaxineEyeContactService provides APIs to run the
 17 | // Maxine Eye Contact feature.
 18 | service MaxineEyeContactService {
 19 | 
 20 |   // RedirectGaze is a bidirectional streaming API to run the
 21 |   // Maxine Eye Contact feature on mp4 video files.
 22 |   //
 23 |   // The input message can contain GazeRedirectionConfig or bytes.
 24 |   // In the beginning of the stream, a request with GazeRedirectionConfig may
 25 |   // be sent to the server to set the feature's parameter.
 26 |   // The server will echo back a response with the config to signify that the
 27 |   // parameters were properly set. If not configured, default values will be
 28 |   // used for the feature's parameters. Any GazeRedirectionConfig sent during
 29 |   // the middle of the stream will be ignored.
 30 |   //
 31 |   // After the optional configuration, the client streams the input mp4 file in
 32 |   // chunks in the input message and receives the output mp4 file in chunks in
 33 |   // the output message.
 34 |   //
 35 |   // The client should only pass one video file per API invocation and the
 36 |   // configuration, if set, is applied to only that invocation.
 37 |   rpc RedirectGaze(stream RedirectGazeRequest)
 38 |       returns (stream RedirectGazeResponse) {}
 39 | }
 40 | 
 41 | // LossyEncoding specifies parameters for lossy video compression
 42 | message LossyEncoding {
 43 |   // Target bitrate for video encoding in bits per second
 44 |   optional uint32 bitrate = 1;
 45 |   // Interval between IDR frames (keyframes) in number of frames
 46 |   optional uint32 idr_interval = 2;
 47 | }
 48 | 
 49 | // CustomEncodingParams allows specifying custom encoding parameters
 50 | message CustomEncodingParams {
 51 |   // Map of string key-value pairs for custom encoding configuration
 52 |   // Each value can be any protobuf message type using google.protobuf.Any
 53 |   map<string, google.protobuf.Any> custom = 1;
 54 | }
 55 | 
 56 | // OutputVideoEncoding specifies parameters for the output video encoding
 57 | message OutputVideoEncoding {
 58 |   // Only one of these encoding types can be specified
 59 |   oneof encoding_type {
 60 |     // If true, use lossless encoding with no compression
 61 |     bool lossless = 1;
 62 |     // Use lossy encoding with configurable bitrate and keyframe settings
 63 |     LossyEncoding lossy = 2;
 64 |     // Use custom encoding parameters specified as key-value pairs
 65 |     CustomEncodingParams custom_encoding = 3;
 66 |   }
 67 | }
 68 | 
 69 | // Configuration for Maxine Eye Contact.
 70 | message RedirectGazeConfig {
 71 |   // Flag to control temporal filtering 
 72 |   // Default: 0xffffffff
 73 |   optional uint32 temporal = 1;
 74 | 
 75 |   // Flag to toggle detection of eye closure and occlusion on/off
 76 |   // Default: 0 | Range: [0, 1]
 77 |   optional uint32 detect_closure = 2;
 78 | 
 79 |   // Eye size sensitivity parameter
 80 |   // Default: 3 | Range: [2, 6]
 81 |   optional uint32 eye_size_sensitivity = 3;
 82 | 
 83 |   // Flag to toggle look away on/off.
 84 |   // Default: 0 | Range: [0, 1]
 85 |   optional uint32 enable_lookaway = 4;
 86 | 
 87 |   // Maximum value of gaze offset angle (degrees) during a random look away
 88 |   // Default: 5 | Range: [1, 10]
 89 |   optional uint32 lookaway_max_offset = 5;
 90 | 
 91 |   // Minimum limit for the number of frames at which random look away occurs
 92 |   // Default: 100 | Range: [1, 600]
 93 |   optional uint32 lookaway_interval_min = 6;
 94 | 
 95 |   // Range for picking the number of frames at which random look away occurs
 96 |   // Default: 250 | Range: [1, 600]
 97 |   optional uint32 lookaway_interval_range = 7;
 98 | 
 99 |   // Gaze pitch threshold (degrees) at which the redirection starts 
100 |   // transitioning
101 |   // Default: 20 | Range: [10, 35]
102 |   optional float gaze_pitch_threshold_low = 8;
103 | 
104 |   // Gaze pitch threshold (degrees) at which the redirection is equal to 
105 |   // estimated gaze
106 |   // Default: 30 | Range: [10, 35] 
107 |   optional float gaze_pitch_threshold_high = 9;
108 | 
109 |   // Gaze yaw threshold (degrees) at which the redirection starts 
110 |   // transitioning
111 |   // Default: 20 | Range: [10, 35]
112 |   optional float gaze_yaw_threshold_low = 10;
113 | 
114 |   // Gaze yaw threshold (degrees) at which the redirection the redirection 
115 |   // is equal to estimated gaze
116 |   // Default: 30 | Range: [10 ,35]
117 |   optional float gaze_yaw_threshold_high = 11;
118 | 
119 |   // Head pose pitch yaw threshold (degrees) at which the redirection 
120 |   // start transitioning away from camera towards estimated gaze
121 |   // Default: 15 | Range: [10, 35]
122 |   optional float head_pitch_threshold_low = 12;
123 |   
124 |   // Head pose pitch yaw threshold (degrees) at which the redirection is 
125 |   // equal to estimated gaze
126 |   // Default: 15 | Range: [10, 35]
127 |   optional float head_pitch_threshold_high = 13;
128 | 
129 |   // Head pose yaw threshold (degrees) at which the redirection starts 
130 |   // transitioning
131 |   // Default: 15 | Range: [10, 35]
132 |   optional float head_yaw_threshold_low = 14;
133 | 
134 |   // Head pose yaw threshold (degrees) at which the redirection is equal 
135 |   // to estimated gaze
136 |   // Default: 15 | Range: [10, 35]
137 |   optional float head_yaw_threshold_high = 15;
138 | 
139 |   // Output video encoding parameters
140 |   optional OutputVideoEncoding output_video_encoding = 16;
141 | }
142 | 
143 | // Input message for RedirectGaze API.
144 | // May contain feature configuration or a chunk of input mp4 file data.
145 | message RedirectGazeRequest {
146 |   oneof stream_input {
147 |     // Configuration parameters for the request
148 |     RedirectGazeConfig config = 1;
149 | 
150 |     // mp4 file based video data
151 |     bytes video_file_data = 2;
152 |   }
153 | }
154 | 
155 | // Output message for RedirectGaze API.
156 | // May contain feature configuration, a chunk of output mp4 file data
157 | // or an empty message to keep the connection alive.
158 | message RedirectGazeResponse {
159 |   oneof stream_output {
160 |     // Configuration parameters used
161 |     RedirectGazeConfig config = 1;
162 | 
163 |     // Output mp4 video stream data
164 |     bytes video_file_data = 2;
165 | 
166 |     // Keep alive signaling flag
167 |     google.protobuf.Empty keepalive = 3;
168 |   }
169 | }
170 | 


--------------------------------------------------------------------------------
/audio2face-2d/python/interfaces/audio2face2d_pb2.pyi:
--------------------------------------------------------------------------------
  1 | from google.protobuf import empty_pb2 as _empty_pb2
  2 | from google.protobuf.internal import containers as _containers
  3 | from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
  4 | from google.protobuf import descriptor as _descriptor
  5 | from google.protobuf import message as _message
  6 | from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union
  7 | 
  8 | DESCRIPTOR: _descriptor.FileDescriptor
  9 | 
 10 | class ModelSelection(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
 11 |     __slots__ = ()
 12 |     MODEL_SELECTION_UNSPECIFIED: _ClassVar[ModelSelection]
 13 |     MODEL_SELECTION_PERF: _ClassVar[ModelSelection]
 14 |     MODEL_SELECTION_QUALITY: _ClassVar[ModelSelection]
 15 | 
 16 | class AnimationCroppingMode(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
 17 |     __slots__ = ()
 18 |     ANIMATION_CROPPING_MODE_UNSPECIFIED: _ClassVar[AnimationCroppingMode]
 19 |     ANIMATION_CROPPING_MODE_FACEBOX: _ClassVar[AnimationCroppingMode]
 20 |     ANIMATION_CROPPING_MODE_REGISTRATION_BLENDING: _ClassVar[AnimationCroppingMode]
 21 |     ANIMATION_CROPPING_MODE_INSET_BLENDING: _ClassVar[AnimationCroppingMode]
 22 | 
 23 | class HeadPoseMode(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
 24 |     __slots__ = ()
 25 |     HEAD_POSE_MODE_UNSPECIFIED: _ClassVar[HeadPoseMode]
 26 |     HEAD_POSE_MODE_RETAIN_FROM_PORTRAIT_IMAGE: _ClassVar[HeadPoseMode]
 27 |     HEAD_POSE_MODE_PRE_DEFINED_ANIMATION: _ClassVar[HeadPoseMode]
 28 |     HEAD_POSE_MODE_USER_DEFINED_ANIMATION: _ClassVar[HeadPoseMode]
 29 | MODEL_SELECTION_UNSPECIFIED: ModelSelection
 30 | MODEL_SELECTION_PERF: ModelSelection
 31 | MODEL_SELECTION_QUALITY: ModelSelection
 32 | ANIMATION_CROPPING_MODE_UNSPECIFIED: AnimationCroppingMode
 33 | ANIMATION_CROPPING_MODE_FACEBOX: AnimationCroppingMode
 34 | ANIMATION_CROPPING_MODE_REGISTRATION_BLENDING: AnimationCroppingMode
 35 | ANIMATION_CROPPING_MODE_INSET_BLENDING: AnimationCroppingMode
 36 | HEAD_POSE_MODE_UNSPECIFIED: HeadPoseMode
 37 | HEAD_POSE_MODE_RETAIN_FROM_PORTRAIT_IMAGE: HeadPoseMode
 38 | HEAD_POSE_MODE_PRE_DEFINED_ANIMATION: HeadPoseMode
 39 | HEAD_POSE_MODE_USER_DEFINED_ANIMATION: HeadPoseMode
 40 | 
 41 | class AnimateConfig(_message.Message):
 42 |     __slots__ = ("portrait_image", "model_selection", "animation_crop_mode", "head_pose_mode", "enable_lookaway", "lookaway_max_offset", "lookaway_interval_range", "lookaway_interval_min", "blink_frequency", "blink_duration", "mouth_expression_multiplier", "head_pose_multiplier", "input_head_rotation", "input_head_translation")
 43 |     PORTRAIT_IMAGE_FIELD_NUMBER: _ClassVar[int]
 44 |     MODEL_SELECTION_FIELD_NUMBER: _ClassVar[int]
 45 |     ANIMATION_CROP_MODE_FIELD_NUMBER: _ClassVar[int]
 46 |     HEAD_POSE_MODE_FIELD_NUMBER: _ClassVar[int]
 47 |     ENABLE_LOOKAWAY_FIELD_NUMBER: _ClassVar[int]
 48 |     LOOKAWAY_MAX_OFFSET_FIELD_NUMBER: _ClassVar[int]
 49 |     LOOKAWAY_INTERVAL_RANGE_FIELD_NUMBER: _ClassVar[int]
 50 |     LOOKAWAY_INTERVAL_MIN_FIELD_NUMBER: _ClassVar[int]
 51 |     BLINK_FREQUENCY_FIELD_NUMBER: _ClassVar[int]
 52 |     BLINK_DURATION_FIELD_NUMBER: _ClassVar[int]
 53 |     MOUTH_EXPRESSION_MULTIPLIER_FIELD_NUMBER: _ClassVar[int]
 54 |     HEAD_POSE_MULTIPLIER_FIELD_NUMBER: _ClassVar[int]
 55 |     INPUT_HEAD_ROTATION_FIELD_NUMBER: _ClassVar[int]
 56 |     INPUT_HEAD_TRANSLATION_FIELD_NUMBER: _ClassVar[int]
 57 |     portrait_image: bytes
 58 |     model_selection: ModelSelection
 59 |     animation_crop_mode: AnimationCroppingMode
 60 |     head_pose_mode: HeadPoseMode
 61 |     enable_lookaway: bool
 62 |     lookaway_max_offset: int
 63 |     lookaway_interval_range: int
 64 |     lookaway_interval_min: int
 65 |     blink_frequency: int
 66 |     blink_duration: int
 67 |     mouth_expression_multiplier: float
 68 |     head_pose_multiplier: float
 69 |     input_head_rotation: QuaternionStream
 70 |     input_head_translation: Vector3fStream
 71 |     def __init__(self, portrait_image: _Optional[bytes] = ..., model_selection: _Optional[_Union[ModelSelection, str]] = ..., animation_crop_mode: _Optional[_Union[AnimationCroppingMode, str]] = ..., head_pose_mode: _Optional[_Union[HeadPoseMode, str]] = ..., enable_lookaway: bool = ..., lookaway_max_offset: _Optional[int] = ..., lookaway_interval_range: _Optional[int] = ..., lookaway_interval_min: _Optional[int] = ..., blink_frequency: _Optional[int] = ..., blink_duration: _Optional[int] = ..., mouth_expression_multiplier: _Optional[float] = ..., head_pose_multiplier: _Optional[float] = ..., input_head_rotation: _Optional[_Union[QuaternionStream, _Mapping]] = ..., input_head_translation: _Optional[_Union[Vector3fStream, _Mapping]] = ...) -> None: ...
 72 | 
 73 | class Vector3f(_message.Message):
 74 |     __slots__ = ("x", "y", "z")
 75 |     X_FIELD_NUMBER: _ClassVar[int]
 76 |     Y_FIELD_NUMBER: _ClassVar[int]
 77 |     Z_FIELD_NUMBER: _ClassVar[int]
 78 |     x: float
 79 |     y: float
 80 |     z: float
 81 |     def __init__(self, x: _Optional[float] = ..., y: _Optional[float] = ..., z: _Optional[float] = ...) -> None: ...
 82 | 
 83 | class Vector3fStream(_message.Message):
 84 |     __slots__ = ("values",)
 85 |     VALUES_FIELD_NUMBER: _ClassVar[int]
 86 |     values: _containers.RepeatedCompositeFieldContainer[Vector3f]
 87 |     def __init__(self, values: _Optional[_Iterable[_Union[Vector3f, _Mapping]]] = ...) -> None: ...
 88 | 
 89 | class Quaternion(_message.Message):
 90 |     __slots__ = ("x", "y", "z", "w")
 91 |     X_FIELD_NUMBER: _ClassVar[int]
 92 |     Y_FIELD_NUMBER: _ClassVar[int]
 93 |     Z_FIELD_NUMBER: _ClassVar[int]
 94 |     W_FIELD_NUMBER: _ClassVar[int]
 95 |     x: float
 96 |     y: float
 97 |     z: float
 98 |     w: float
 99 |     def __init__(self, x: _Optional[float] = ..., y: _Optional[float] = ..., z: _Optional[float] = ..., w: _Optional[float] = ...) -> None: ...
100 | 
101 | class QuaternionStream(_message.Message):
102 |     __slots__ = ("values",)
103 |     VALUES_FIELD_NUMBER: _ClassVar[int]
104 |     values: _containers.RepeatedCompositeFieldContainer[Quaternion]
105 |     def __init__(self, values: _Optional[_Iterable[_Union[Quaternion, _Mapping]]] = ...) -> None: ...
106 | 
107 | class AnimateRequest(_message.Message):
108 |     __slots__ = ("config", "audio_file_data")
109 |     CONFIG_FIELD_NUMBER: _ClassVar[int]
110 |     AUDIO_FILE_DATA_FIELD_NUMBER: _ClassVar[int]
111 |     config: AnimateConfig
112 |     audio_file_data: bytes
113 |     def __init__(self, config: _Optional[_Union[AnimateConfig, _Mapping]] = ..., audio_file_data: _Optional[bytes] = ...) -> None: ...
114 | 
115 | class AnimateResponse(_message.Message):
116 |     __slots__ = ("config", "video_file_data", "keep_alive")
117 |     CONFIG_FIELD_NUMBER: _ClassVar[int]
118 |     VIDEO_FILE_DATA_FIELD_NUMBER: _ClassVar[int]
119 |     KEEP_ALIVE_FIELD_NUMBER: _ClassVar[int]
120 |     config: AnimateConfig
121 |     video_file_data: bytes
122 |     keep_alive: _empty_pb2.Empty
123 |     def __init__(self, config: _Optional[_Union[AnimateConfig, _Mapping]] = ..., video_file_data: _Optional[bytes] = ..., keep_alive: _Optional[_Union[_empty_pb2.Empty, _Mapping]] = ...) -> None: ...
124 | 


--------------------------------------------------------------------------------
/audio2face-2d/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # NVIDIA Maxine Audio2Face-2D NIM Client
  3 | 
  4 | This package has a sample client which demonstrates interaction with a Maxine Audio2Face-2D NIM.
  5 | 
  6 | ## Getting Started
  7 | 
  8 | NVIDIA Maxine NIM Client packages use gRPC APIs. Instructions below demonstrate usage of Audio2Face-2D NIM using Python and NodeJS gRPC clients.
  9 | 
 10 | ## Pre-requisites
 11 | 
 12 | Access to NVIDIA Maxine Audio2Face-2D NIM Container / Service
 13 | 
 14 | ### Python 
 15 | - Ensure you have Python 3.10 or above installed on your system. Please refer to the [Python documentation](https://www.python.org/downloads/) for download and installation instructions.
 16 | 
 17 | ### NodeJS
 18 | - Ensure you have NodeJS 18 or above installed on your system. Please refer to the [NodeJS documentation](https://nodejs.org/en/download/package-manager) for download and installation instructions.
 19 | 
 20 | ## Usage guide
 21 | 
 22 | ### 1. Clone the repository
 23 | 
 24 | ```bash
 25 | git clone https://github.com/nvidia-maxine/nim-clients.git
 26 | 
 27 | # Go to the 'audio2face-2d' folder
 28 | cd nim-clients/audio2face-2d/
 29 | ```
 30 | 
 31 | ### 2. Install dependencies
 32 | #### Python
 33 | ```bash
 34 | # Install all the required packages using requirements.txt file in python directory  
 35 | pip install -r python/requirements.txt
 36 | ```
 37 | 
 38 | #### NodeJS
 39 | ```bash
 40 | # Install all the required packages using package.json file in nodejs directory  
 41 | npm install --prefix nodejs/ 
 42 | ```
 43 | 
 44 | ### 3. Compile the Protos (optional)
 45 | 
 46 | If you want to use the client code provided in the github Client repository, you can skip this step.
 47 | The proto files are available in the audio2face-2d/protos folder. You can compile them to generate client interfaces in your preferred programming language. For more details, refer to [Supported languages](https://grpc.io/docs/languages/) in the gRPC documentation.
 48 | 
 49 | Here is an example of how to compile the protos for Python and Node.js on Linux and Windows.
 50 | 
 51 | #### Python
 52 | 
 53 | The `grpcio` version needed for compilation can be referred at `requirements.txt`
 54 | 
 55 | To compile protos on Linux, run:
 56 | ```bash
 57 | # Go to audio2face-2d/protos/linux/python folder
 58 | cd audio2face-2d/protos/linux/python
 59 | 
 60 | chmod +x compile_protos.sh
 61 | ./compile_protos.sh
 62 | ```
 63 | 
 64 | To compile protos on Windows, run:
 65 | ```bash
 66 | # Go to audio2face-2d/protos/windows/python folder
 67 | cd audio2face-2d/protos/windows/python
 68 | 
 69 | ./compile_protos.bat
 70 | ```
 71 | The compiled proto files will be generated in `nim-clients/audio2face-2d/python/interfaces` directory.
 72 | 
 73 | #### NodeJS
 74 | Before running the NodeJS client, you can choose to compile the protos. 
 75 | 
 76 | To compile protos on Linux, run:
 77 | ```bash
 78 | # Go to audio2face-2d/protos/linux/nodejs folder
 79 | cd audio2face-2d/protos/linux/nodejs
 80 | 
 81 | chmod +x compile_protos.sh
 82 | ./compile_protos.sh
 83 | ```
 84 | 
 85 | To compile protos on Windows, run:
 86 | ```bash
 87 | # Go to audio2face-2d/protos/windows/nodejs folder
 88 | cd audio2face-2d/protos/windows/nodejs
 89 | 
 90 | ./compile_protos.bat
 91 | ```
 92 | The compiled proto files will be generated in `nim-clients/audio2face-2d/nodejs/interfaces` directory.
 93 | 
 94 | ### 4. Host the NIM Server
 95 | 
 96 | Before running client part of Maxine Audio2Face-2D, please set up a server.
 97 | The simplest way to do that is to follow the [quick start guide](https://docs.nvidia.com/nim/maxine/audio2face-2d/latest/getting-started.html)
 98 | 
 99 | ### 5. Run the Client
100 | #### Python
101 | - Go to the scripts directory
102 | 
103 | ```bash
104 | cd scripts
105 | ```
106 | 
107 | #### Usage for Hosted NIM request
108 | 
109 | ```bash
110 | python audio2face-2d.py \
111 |   --target <server_ip:port> \
112 |   --audio-input <input_audio_file_path> \
113 |   --portrait-input <input_portrait_image_file_path> \
114 |   --output <output_file_path_and_name> \
115 |   --head-rotation-animation-filepath <rotation_animation_filepath> \
116 |   --head-translation-animation-filepath <translation_animation_filepath> \
117 |   --ssl-mode <ssl_mode_value> \
118 |   --ssl-key <ssl_key_file_path> \
119 |   --ssl-cert <ssl_cert_filepath> \
120 |   --ssl-root-cert <ssl_root_cert_filepath>
121 | ```
122 | 
123 | To view details of command line arguments, run this command:
124 | ```bash
125 | python audio2face-2d.py -h
126 | ```
127 | 
128 | - Example command to process the packaged sample inputs
129 | 
130 | The following command uses the sample audio and portrait file & generates an output.mp4 file in the current folder
131 | 
132 | ```bash
133 |     python audio2face-2d.py --target 127.0.0.1:8001 --audio-input ../assets/sample_audio.wav --portrait-input ../assets/sample_portrait_image.png --output out.mp4 
134 |    ```
135 | 
136 | #### NodeJS
137 | - Go to the scripts directory
138 | 
139 | ```bash
140 | cd scripts
141 | ```
142 | 
143 | #### Usage for Hosted NIM request
144 | 
145 | ```bash
146 | node audio2face-2d.js \
147 |   --target <server_ip:port> \
148 |   --audio-input <input_audio_file_path> \
149 |   --portrait-input <input_portrait_image_file_path> \
150 |   --output <output_file_path_and_name> \
151 |   --format <wav/pcm> \
152 |   --head-rotation-animation-filepath <rotation_animation_file_path> \
153 |   --head-translation-animation-filepath <translation_animation_file_path> \
154 |   --ssl-mode <ssl_mode_value> \
155 |   --ssl-key <ssl_key_file_path> \
156 |   --ssl-cert <ssl_cert_file_path> \
157 |   --ssl-root-cert <ssl_root_cert_file_path>
158 |  ```
159 | 
160 | - Example command to process the packaged sample inputs
161 | 
162 | The following command uses the sample audio and portrait file & generates an output.mp4 file in the current folder
163 | 
164 | ```bash
165 |     node audio2face-2d.js --target 127.0.0.1:8001 --audio-input ../assets/sample_audio.wav --portrait-input ../assets/sample_portrait_image.png --output out.mp4 --format wav
166 |  ```
167 | 
168 | The NodeJS client supports both `wav` and `pcm` audio formats. The `--format` option can be used to specify the format. The default format is `wav`. 
169 | 
170 | The default configuration expected for PCM audio format in the NodeJS client is as follows:
171 | 
172 | - Sample rate: 48kHz
173 | - Channels: Mono-channel
174 | - Bit Depth: 16
175 | 
176 | If any other config is needed, please change it in the NodeJS client `audio2face-2d/nodejs/scripts/audio2face-2d.js` in the function `sendInputAudioChunks()`.
177 | 
178 | #### Note 
179 | - The supported audio file format is `wav` or `pcm` and for image is `jpg, png, jpeg`.
180 | - The supported languages are English, Spanish, Mandarin, French, a sample file for English language is provided in assets dir. 
181 | 
182 | #### Command line arguments
183 | 
184 | -  `-h, --help` show this help message and exit
185 | - `--target` is `127.0.0.1:8001`
186 | - `--portrait-input` is `../../assets/sample_portrait_image.png`
187 | - `--audio-input` is `../../assets/sample_audio.wav`
188 | - `--output` will be the current directory where the output file will be generated with name `output.mp4`
189 | - `--head-rotation-animation-filepath` is `../../assets/head_rotation_animation.csv`. Used only if head_pose_mode is `HeadPoseMode.HEAD_POSE_MODE_USER_DEFINED_ANIMATION`.
190 | - `--head-translation-animation-filepath` is `../../assets/head_translation_animation.csv`. Used only if head_pose_mode is `HeadPoseMode.HEAD_POSE_MODE_USER_DEFINED_ANIMATION`.
191 | - `--ssl-mode` is DISABLED (no SSL). 
192 | - `--ssl-key` is `../ssl_key/ssl_key_client.pem`. Used only if ssl-mode is `MTLS`. 
193 | - `--ssl-cert` is `../ssl_key/ssl_cert_client.pem`. Used only if ssl-mode is `MTLS`.
194 | - `--ssl-root-cert` is `../ssl_key/ssl_ca_cert.pem`. Used only if ssl-mode is `MTLS` or `TLS`.
195 | 
196 | Only for Nodejs
197 | 
198 | - `--format` - The audio format (wav or pcm) 
199 | 
200 | Refer the [docs](https://docs.nvidia.com/nim/maxine/audio2face-2d/latest/index.html) for more information
201 | 


--------------------------------------------------------------------------------
/audio2face-2d/protos/proto/nvidia/maxine/audio2face2d/v1/audio2face2d.proto:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | //
  3 | // Permission is hereby granted, free of charge, to any person obtaining a
  4 | // copy of this software and associated documentation files (the "Software"),
  5 | // to deal in the Software without restriction, including without limitation
  6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  7 | // and/or sell copies of the Software, and to permit persons to whom the
  8 | // Software is furnished to do so, subject to the following conditions:
  9 | //
 10 | // The above copyright notice and this permission notice shall be included in
 11 | // all copies or substantial portions of the Software.
 12 | //
 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 16 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 18 | // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 19 | // DEALINGS IN THE SOFTWARE.
 20 | 
 21 | 
 22 | syntax = "proto3";
 23 | 
 24 | package nvidia.maxine.audio2face2d.v1;
 25 | 
 26 | import "google/protobuf/empty.proto";
 27 | 
 28 | // The Audio2Face2DService provides APIs to run the
 29 | // Maxine Audio to Face - 2D feature.
 30 | service Audio2Face2DService {
 31 |   // Animate is a bidirectional streaming API to run the
 32 |   // Audio2Face-2D.
 33 |   //
 34 |   // The input message can contain AnimateConfig or bytes.
 35 |   // In the beginning of the stream, a request with AnimateConfig should
 36 |   // be sent to the server to set the feature's parameters.
 37 |   // The server will echo back a response with the config to signify that the
 38 |   // parameters were properly set. It is mandatory to set the portrait_image
 39 |   // config, other configuration parameters are optional and a default value will
 40 |   // be used if not set. Any AnimateConfig sent during the middle of the stream
 41 |   // will be ignored.
 42 |   //
 43 |   // After the configuration step, the client streams the input wav file in
 44 |   // chunks in the input message and receives the output mp4 file in chunks in
 45 |   // the output message. While the inference is running, the server will periodically
 46 |   // echo empty message to keep the channel alive. The client should ignore this message.
 47 |   //
 48 |   // It is recommended that the client should pass one file per API invocation.
 49 |   // The configurations are also set per invocation.
 50 |   rpc Animate(stream AnimateRequest)
 51 |       returns (stream AnimateResponse) {  
 52 |         }
 53 | }
 54 | 
 55 | // Configuration for Animate API.
 56 | message AnimateConfig {
 57 |     // Portrait image (jpg/jpeg/png)
 58 |     bytes portrait_image = 1;
 59 | 
 60 |     // Model selection: 0 - performance or 1 - quality
 61 |     // Default: quality
 62 |     optional ModelSelection model_selection = 2;
 63 | 
 64 |     // Audio2Face animation cropping mode
 65 |     // Default: ANIMATION_CROPPING_MODE_REGISTRATION_BLENDING
 66 |     optional AnimationCroppingMode animation_crop_mode = 3;
 67 | 
 68 |     // Head Pose Animation mode
 69 |     // Default: HEAD_POSE_MODE_RETAIN_FROM_PORTRAIT_IMAGE
 70 |     optional HeadPoseMode head_pose_mode = 4;
 71 | 
 72 |     // Flag to enable Gaze look Away
 73 |     // Default: false
 74 |     optional bool enable_lookaway = 5;
 75 | 
 76 |     // The maximum integer value of gaze offset when lookaway is enabled
 77 |     // Default:20 Unit: Degrees
 78 |     optional uint32 lookaway_max_offset = 6;
 79 | 
 80 |     // Range for picking the number of frames at which random look away occurs
 81 |     // Default: 90 | Range: [1, 600] | Unit: Frames
 82 |     optional uint32 lookaway_interval_range = 7;
 83 | 
 84 |     // Minimum limit for the number of frames at which random look away occurs
 85 |     // Default: 240 | Range: [1, 600] | Unit: Frames
 86 |     optional uint32 lookaway_interval_min = 8;
 87 | 
 88 |     // The frequency of eye blinks per minute
 89 |     // Default: 6 | Range: [0, 120] | Unit: Frames
 90 |     // Note: 0 = disable eye blink
 91 |     optional uint32 blink_frequency = 9;
 92 | 
 93 |     // The duration of an eye blink
 94 |     // Default: 10 | Range: [2, 150] | Unit: Frames
 95 |     optional uint32 blink_duration = 10;
 96 | 
 97 |     // A multiplier to exaggerate the mouth expression.
 98 |     // Default: 1.4f (for quality mode), 1.0f (for performance mode)
 99 |     // Range: [1.0f, 2.0f]
100 |     optional float mouth_expression_multiplier = 11;
101 | 
102 |     // A multiplier to dampen range of Head Pose Animation
103 |     // This is applicable only for HEAD_POSE_MODE_PRE_DEFINED_ANIMATION
104 |     // Default: 1.0f (quality mode), 0.4f (performance mode) | Range: [0.0f, 1.0f]
105 |     optional float head_pose_multiplier = 12;
106 | 
107 |     // Quaternion that provides the head pose rotation to be applied.
108 |     // This is valid only for HEAD_POSE_MODE_USER_DEFINED_ANIMATION
109 |     optional QuaternionStream input_head_rotation = 13;
110 | 
111 |     // Vector3f that provides the head pose rotation to be applied.
112 |     // This is valid only for HEAD_POSE_MODE_USER_DEFINED_ANIMATION
113 |     optional Vector3fStream input_head_translation = 14;
114 | }
115 | 
116 | // Model selection option
117 | enum ModelSelection {
118 |   MODEL_SELECTION_UNSPECIFIED = 0;
119 |   // Performance model
120 |   MODEL_SELECTION_PERF = 1;
121 |   // Quality model
122 |   MODEL_SELECTION_QUALITY = 2;
123 | }
124 | 
125 | // Animation cropping mode which controls output video resolution
126 | enum AnimationCroppingMode {
127 |   ANIMATION_CROPPING_MODE_UNSPECIFIED = 0;
128 | 
129 |   // Produces fixed resolution of 512x512 animation output
130 |   // Face crop will be extracted from the portrait image provided
131 |   ANIMATION_CROPPING_MODE_FACEBOX = 1;
132 | 
133 |   // The animated face crop will be registered and blended back into the portrait photo.
134 |   // The output image includes both the animated
135 |   // face crop and the surrounding area, with the same resolution as the portrait photo
136 |   ANIMATION_CROPPING_MODE_REGISTRATION_BLENDING = 2;
137 | 
138 |   // Light weight and faster version of mode 2, without registration.
139 |   // Preferred over mode 3 if quality is the primary concern
140 |   ANIMATION_CROPPING_MODE_INSET_BLENDING = 3;
141 | }
142 | 
143 | // Head Pose mode
144 | enum HeadPoseMode{
145 |   HEAD_POSE_MODE_UNSPECIFIED = 0;
146 |   // retains the head pose from input portrait image
147 |   HEAD_POSE_MODE_RETAIN_FROM_PORTRAIT_IMAGE = 1;
148 |   // NIM generates a pre-defined animation for the head pose
149 |   HEAD_POSE_MODE_PRE_DEFINED_ANIMATION = 2;
150 |   // NIM generates headpose animation based on headpose_inputs provided by user
151 |   HEAD_POSE_MODE_USER_DEFINED_ANIMATION = 3;
152 | }
153 | 
154 | // Generic 3D float vector
155 | message Vector3f {
156 |   // x-coordinate
157 |   float x = 1;
158 |   // y-coordinate
159 |   float y = 2;
160 |   // z-coordinate
161 |   float z = 3;
162 | }
163 | 
164 | // Stream of 3D-Vectors
165 | message Vector3fStream{
166 |     repeated Vector3f values = 1;
167 | }
168 | 
169 | // Generic Quaternion
170 | message Quaternion {
171 |   // x-coordinate
172 |   float x = 1;
173 |   // y-coordinate
174 |   float y = 2;
175 |   // z-coordinate
176 |   float z = 3;
177 |   // w-coordinate
178 |   float w = 4;
179 | }
180 | 
181 | // Stream of Quaternions
182 | message QuaternionStream{
183 |   repeated Quaternion values = 1;
184 | }
185 | 
186 | // Input message for Animate API.
187 | // May contain feature configuration or a chunk of input wav file data.
188 | message AnimateRequest {
189 |   oneof stream_input {
190 |     // Configuration parameters for the request
191 |     AnimateConfig config = 1;
192 | 
193 |     // .wav file based audio data
194 |     bytes audio_file_data = 2;
195 |   }
196 | }
197 | 
198 | // Output message for Animate API.
199 | // May contain feature configuration, a chunk of output mp4 file data
200 | // or an empty message to keep the connection alive.
201 | message AnimateResponse {
202 |   oneof stream_output {
203 |     // Configuration parameters used
204 |     AnimateConfig config = 1;
205 | 
206 |     // Output .mp4 video stream data
207 |     bytes video_file_data = 2;
208 | 
209 |     // Keep alive signaling flag
210 |     google.protobuf.Empty keep_alive = 3;
211 |   }
212 | }
213 | 


--------------------------------------------------------------------------------
/eye-contact/scripts/eye-contact.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a
  4 | # copy of this software and associated documentation files (the "Software"),
  5 | # to deal in the Software without restriction, including without limitation
  6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  7 | # and/or sell copies of the Software, and to permit persons to whom the
  8 | # Software is furnished to do so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in
 11 | # all copies or substantial portions of the Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 16 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 19 | # DEALINGS IN THE SOFTWARE.
 20 | 
 21 | """Main script for running Eye Contact inference with video files.
 22 | 
 23 | This script provides functionality to:
 24 | - Parse command line arguments for configuring Eye Contact
 25 | - Set up gRPC communication with the Eye Contact service
 26 | - Send video data to the service with streaming support
 27 | - Process responses and write output video files
 28 | 
 29 | The script supports different SSL modes for secure communication and handles
 30 | various input/output formats and configurations.
 31 | """
 32 | 
 33 | # Standard library imports
 34 | import os
 35 | import sys
 36 | import time
 37 | from typing import Iterator
 38 | import pathlib
 39 | 
 40 | # Third-party imports
 41 | import grpc
 42 | from tqdm import tqdm
 43 | 
 44 | # Setup paths for local imports
 45 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")))
 46 | SCRIPT_PATH = str(pathlib.Path(__file__).parent.resolve())
 47 | sys.path.append(os.path.join(SCRIPT_PATH, "../interfaces"))
 48 | 
 49 | # Local imports
 50 | from config import EyeContactConfig, parse_args  # noqa: E402
 51 | from constants import DATA_CHUNK_SIZE  # noqa: E402
 52 | from utils.utils import (  # noqa: E402
 53 |     create_channel_credentials,
 54 |     validate_ssl_args,
 55 |     validate_preview_args,
 56 |     create_request_metadata,
 57 | )
 58 | import eyecontact_pb2  # noqa: E402
 59 | import eyecontact_pb2_grpc  # noqa: E402
 60 | 
 61 | 
 62 | def generate_request_for_inference(
 63 |     eyecontact_config: EyeContactConfig,
 64 | ) -> Iterator[eyecontact_pb2.RedirectGazeRequest]:
 65 |     """Generate a stream of RedirectGazeRequest messages for the Eye Contact service.
 66 | 
 67 |     Args:
 68 |         eyecontact_config: Configuration object containing all Eye Contact
 69 |             parameters
 70 | 
 71 |     Yields:
 72 |         RedirectGazeRequest messages containing either configuration or chunks
 73 |         of input data
 74 | 
 75 |     Raises:
 76 |         RuntimeError: If there are errors reading input files
 77 |     """
 78 |     print("Generating request for inference")
 79 | 
 80 |     # Get configuration parameters
 81 |     params = eyecontact_config.get_config_params()
 82 | 
 83 |     print("Sending data for inference")
 84 | 
 85 |     # Send config first
 86 |     yield eyecontact_pb2.RedirectGazeRequest(config=eyecontact_pb2.RedirectGazeConfig(**params))
 87 | 
 88 |     # Send video data in chunks
 89 |     video_chunk_counter = 0
 90 | 
 91 |     try:
 92 |         with open(eyecontact_config.video_filepath, "rb") as video_file:
 93 |             while True:
 94 |                 video_buffer = video_file.read(DATA_CHUNK_SIZE)
 95 |                 if video_buffer == b"":
 96 |                     break
 97 |                 video_chunk_counter += 1
 98 |                 yield eyecontact_pb2.RedirectGazeRequest(video_file_data=video_buffer)
 99 |     except IOError as e:
100 |         print(f"Error reading video chunk {video_chunk_counter}: {e}")
101 |         raise RuntimeError(f"Failed to read video file: {e}")
102 | 
103 |     print("Data sending completed\n")
104 | 
105 | 
106 | def write_output_file_from_response(
107 |     response_iter: Iterator[eyecontact_pb2.RedirectGazeResponse],
108 |     output_filepath: os.PathLike = "output.mp4",
109 | ) -> None:
110 |     """Function to write the output file from the incoming gRPC data stream.
111 | 
112 |     Args:
113 |         response_iter: Responses from the server to write into output file
114 |         output_filepath: Path to output file
115 |     """
116 |     print(f"Writing output in {output_filepath}")
117 |     sys.stdout.flush()  # Ensure output is flushed before starting progress bar
118 | 
119 |     # Initialize progress bar for streaming data reception
120 |     chunk_count = 0
121 |     total_bytes = 0
122 | 
123 |     with open(output_filepath, "wb") as fd:
124 |         # Create progress bar that shows streaming progress
125 |         # Use leave=False to clean up the progress bar when done
126 |         pbar = tqdm(
127 |             desc="Receiving video chunks",
128 |             unit="chunks",
129 |             unit_scale=False,
130 |             dynamic_ncols=True,
131 |             leave=False,
132 |             bar_format="{desc}: {n} chunks | {rate_fmt} | {postfix}",
133 |         )
134 | 
135 |         try:
136 |             for response in response_iter:
137 |                 if response.HasField("video_file_data"):
138 |                     chunk_data = response.video_file_data
139 |                     fd.write(chunk_data)
140 | 
141 |                     # Update progress tracking
142 |                     chunk_count += 1
143 |                     total_bytes += len(chunk_data)
144 | 
145 |                     # Update progress bar
146 |                     pbar.update(1)
147 |                     pbar.set_postfix_str(f"{total_bytes / (1024*1024):.1f} MB received")
148 |         finally:
149 |             pbar.close()
150 | 
151 |     print(
152 |         f"Completed: Received {chunk_count} chunks " f"({total_bytes / (1024*1024):.1f} MB total)"
153 |     )
154 | 
155 | 
156 | def process_request(
157 |     channel: grpc.Channel,
158 |     eyecontact_config: EyeContactConfig,
159 |     request_metadata: tuple = None,
160 | ) -> None:
161 |     """Process gRPC request and handle responses.
162 | 
163 |     Args:
164 |         channel: gRPC channel for server client communication
165 |         eyecontact_config: Configuration for the Eye Contact service
166 |         request_metadata: Credentials to process preview request
167 | 
168 |     Raises:
169 |         Exception: If any errors occur during processing
170 |     """
171 |     try:
172 |         stub = eyecontact_pb2_grpc.MaxineEyeContactServiceStub(channel)
173 |         start_time = time.time()
174 | 
175 |         responses = stub.RedirectGaze(
176 |             generate_request_for_inference(eyecontact_config=eyecontact_config),
177 |             metadata=request_metadata,
178 |         )
179 | 
180 |         # Skip the echo response if configuration was sent
181 |         next(responses)
182 | 
183 |         write_output_file_from_response(
184 |             response_iter=responses, output_filepath=eyecontact_config.output_filepath
185 |         )
186 |         end_time = time.time()
187 |         print(f"Function invocation completed in {end_time-start_time:.2f}s")
188 |     except Exception as e:
189 |         print(f"An error occurred: {e}")
190 | 
191 | 
192 | def main():
193 |     """Main entry point for the Eye Contact client.
194 | 
195 |     Handles:
196 |     1. Argument parsing
197 |     2. Configuration validation
198 |     3. Channel setup (secure/insecure)
199 |     4. Request processing
200 |     """
201 |     args = parse_args()
202 |     eyecontact_config = EyeContactConfig.from_args(args)
203 | 
204 |     try:
205 |         eyecontact_config.validate_eyecontact_config()
206 |         validate_ssl_args(args)
207 |         validate_preview_args(args)
208 |     except Exception as e:
209 |         print(f"Invalid configuration: {e}")
210 |         return
211 | 
212 |     print(eyecontact_config)
213 | 
214 |     # Prepare request metadata for preview mode
215 |     request_metadata = create_request_metadata(args)
216 | 
217 |     # Check ssl-mode and create channel_credentials for that mode
218 |     if args.ssl_mode != "DISABLED":
219 |         channel_credentials = create_channel_credentials(args)
220 |         # Establish secure channel when ssl-mode is MTLS/TLS
221 |         with grpc.secure_channel(target=args.target, credentials=channel_credentials) as channel:
222 |             process_request(
223 |                 channel=channel,
224 |                 eyecontact_config=eyecontact_config,
225 |                 request_metadata=request_metadata,
226 |             )
227 |     elif args.preview_mode:
228 |         # Establish secure channel when sending request to NVCF server
229 |         with grpc.secure_channel(
230 |             target=args.target, credentials=grpc.ssl_channel_credentials()
231 |         ) as channel:
232 |             process_request(
233 |                 channel=channel,
234 |                 eyecontact_config=eyecontact_config,
235 |                 request_metadata=request_metadata,
236 |             )
237 |     else:
238 |         # Establish insecure channel when ssl-mode is DISABLED
239 |         print(f"Establishing insecure channel to {args.target}")
240 |         with grpc.insecure_channel(target=args.target) as channel:
241 |             process_request(
242 |                 channel=channel,
243 |                 eyecontact_config=eyecontact_config,
244 |                 request_metadata=request_metadata,
245 |             )
246 | 
247 | 
248 | if __name__ == "__main__":
249 |     main()
250 | 


--------------------------------------------------------------------------------
/eye-contact/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # NVIDIA Maxine Eye Contact NIM Client
  3 | 
  4 | This package has a sample client which demonstrates interaction with a Maxine Eye Contact NIM
  5 | 
  6 | ## Getting Started
  7 | 
  8 | NVIDIA Maxine NIM Client packages use gRPC APIs. Instructions below demonstrate usage of Eye contact NIM using Python gRPC client.
  9 | To experience the NVIDIA Maxine Eye Contact NIM API without having to host your own servers, use the [Try API](https://build.nvidia.com/nvidia/eyecontact/api) feature, which uses the NVIDIA Cloud Function backend.
 10 | 
 11 | ## Pre-requisites
 12 | 
 13 | - Ensure you have Python 3.10 or above installed on your system.
 14 | For download and installation instructions, refer to the [Python documentation](https://www.python.org/downloads/).
 15 | - Access to NVIDIA Maxine Eye Contact NIM container and service.
 16 | - MP4 input files with H.264 video codec (audio optional) and videos with Variable Frame Rate (VFR) are not supported.
 17 | 
 18 | ## Usage guide
 19 | 
 20 | ### 1. Clone the repository
 21 | 
 22 | ```bash
 23 | git clone https://github.com/nvidia-maxine/nim-clients.git
 24 | 
 25 | // Go to the 'eye-contact' folder
 26 | cd nim-clients/eye-contact
 27 | ```
 28 | 
 29 | ### 2. Install dependencies
 30 | 
 31 | ```bash
 32 | sudo apt-get install python3-pip
 33 | pip install -r requirements.txt
 34 | ```
 35 | 
 36 | ### 3. Compile the Protos (optional)
 37 | 
 38 | If you want to use the client code provided in the github Client repository, you can skip this step.
 39 | The proto files are available in the eye-contact/protos folder. You can compile them to generate client interfaces in your preferred programming language. For more details, refer to [Supported languages](https://grpc.io/docs/languages/) in the gRPC documentation.
 40 | 
 41 | Here is an example of how to compile the protos for Python on Linux and Windows.
 42 | 
 43 | #### Python
 44 | 
 45 | The `grpcio` version needed for compilation can be referred at `requirements.txt`
 46 | 
 47 | To compile protos on Linux, run:
 48 | ```bash
 49 | # Go to eye-contact/protos/linux folder
 50 | cd eye-contact/protos/linux/
 51 | 
 52 | chmod +x compile_protos.sh
 53 | ./compile_protos.sh
 54 | ```
 55 | 
 56 | To compile protos on Windows, run:
 57 | ```bash
 58 | # Go to eye-contact/protos/windows folder
 59 | cd eye-contact/protos/windows/
 60 | 
 61 | ./compile_protos.bat
 62 | ```
 63 | The compiled proto files will be generated in `nim-clients/eye-contact/interfaces` directory.
 64 | 
 65 | ### 4. Host the NIM Server
 66 | 
 67 | Before running client part of Maxine Eye Contact, please set up a server.
 68 | The simplest way to do that is to follow the [quick start guide](https://docs.nvidia.com/nim/maxine/eye-contact/latest/index.html)
 69 | This step can be skipped when using [Try API](https://build.nvidia.com/nvidia/eyecontact/api).
 70 | 
 71 | ### 5. Run the Python Client
 72 | 
 73 | - Go to the scripts directory
 74 | 
 75 | ```bash
 76 |     cd scripts
 77 | ```
 78 | 
 79 | #### Usage for Hosted NIM request
 80 | 
 81 | ```bash
 82 | python eye-contact.py \
 83 |   --target <server_ip:port> \
 84 |   --input <input_file_path> \
 85 |   --output <output_file_path_and_name> \
 86 |   --ssl-mode <ssl_mode_value> \
 87 |   --ssl-key <ssl_key_file_path> \
 88 |   --ssl-cert <ssl_cert_filepath> \
 89 |   --ssl-root-cert <ssl_root_cert_filepath>
 90 |  ```
 91 | 
 92 | The following command uses the sample video file and generates an `output.mp4` file in the current folder:
 93 | 
 94 |    ```bash
 95 |    python eye-contact.py --target 127.0.0.1:8001 --input ../assets/transactional.mp4 --output output.mp4
 96 |    ```
 97 | 
 98 | The following command uses streaming mode (for streamable video files):
 99 | 
100 |    ```bash
101 |    python eye-contact.py --target 127.0.0.1:8001 --input ../assets/streamable.mp4 --output output.mp4 --streaming
102 |    ```
103 | 
104 | > **Note:** The supported file type is MP4.
105 | 
106 | #### Usage for Preview API request
107 | 
108 | ```bash
109 |     python eye-contact.py --preview-mode \
110 |     --target grpc.nvcf.nvidia.com:443 \
111 |     --function-id 15c6f1a0-3843-4cde-b5bc-803a4966fbb6 \
112 |     --api-key $API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC \
113 |     --input <input file path> \
114 |     --output <output file path and the file name>
115 | ```
116 | 
117 | #### Command line arguments
118 | 
119 | -  `-h, --help` show this help message and exit
120 | -  `--preview-mode` Flag to send request to preview NVCF NIM server on https://build.nvidia.com/nvidia/eyecontact/api.
121 | -  `--ssl-mode` {DISABLED,MTLS,TLS} Flag to set SSL mode, default is DISABLED
122 | -  `--ssl-key SSL_KEY`  The path to ssl private key.
123 | -  `--ssl-cert SSL_CERT`    The path to ssl certificate chain.
124 | -  `--ssl-root-cert`    The path to ssl root certificate.
125 | -  `--target`   IP:port of gRPC service, when hosted locally. Use grpc.nvcf.nvidia.com:443 when hosted on NVCF.
126 | -  `--input`    The path to the input video file.
127 | -  `--output`   The path for the output video file.
128 | -  `--streaming` Flag to enable gRPC streaming mode. Required for streamable video input.
129 | -  `--api-key`  NGC API key required for authentication, utilized when using TRY API ignored otherwise
130 | -  `--function-id`  NVCF function ID for the service, utilized when using TRY API ignored otherwise
131 | 
132 | #### Advanced Configuration Parameters
133 | 
134 | The Eye Contact client supports extensive parameter customization for fine-tuning behavior:
135 | 
136 | **Video Encoding Parameters**
137 | 
138 | - `lossless`: Enables lossless video encoding. This setting overrides any bitrate configuration to ensure maximum quality output, although it results in larger file sizes. Use this mode when quality is the top priority.
139 |    ```bash
140 |    python eye-contact.py --target 127.0.0.1:8001 --lossless
141 |    ```
142 | 
143 | - `bitrate`: Sets the target bitrate for video encoding in bits per second (bps). Higher bitrates result in better video quality but larger file sizes. This parameter allows balancing quality and file size by controlling the video bitrate. The default is 3,000,000 bps (3 Mbps). For example, setting `--bitrate 5000000` targets 5 Mbps encoding.
144 |    ```bash
145 |    python eye-contact.py --target 127.0.0.1:8001 --bitrate 5000000
146 |    ```
147 | 
148 | - `idr-interval`: Sets the interval between instantaneous decoding refresh (IDR) frames in the encoded video. IDR frames are special I-frames that clear all reference buffers, allowing the video to be decoded from that point without needing previous frames. Lower values improve seeking accuracy, random access, and overall encoding quality but increase file size; higher values reduce file size but may impact seeking performance and quality. The default is 8 frames.
149 |    ```bash
150 |    python eye-contact.py --target 127.0.0.1:8001 --idr-interval 10
151 |    ```
152 | 
153 | - `custom-encoding-params`: Passes custom encoding parameters as a JSON string, which provides fine-grained control for expert users via JSON configuration. These parameters are used to configure properties of the GStreamer nvvideo4linux2 encoder plugin, allowing direct control over the underlying hardware encoder settings.
154 |    ```bash
155 |    python eye-contact.py --custom-encoding-params '{"idrinterval": 20, "maxbitrate": 3000000}'
156 |    ```
157 | 
158 | **Note:** Custom encoding parameters are for expert users who need fine-grained control over video encoding. Incorrect values can cause encoding failures or poor-quality output. To configure the nvenc encoder, refer to [Gst properties of the Gst-nvvideo4linux2 encoder plugin](https://docs.nvidia.com/metropolis/deepstream/dev-guide/text/DS_plugin_gst-nvvideo4linux2.html#:~:text=The%20following%20table%20summarizes%20the%20Gst%20properties%20of%20the%20Gst%2Dnvvideo4linux2%20encoder%20plugin).
159 | 
160 | **Eye Contact Behavior Parameters**
161 | -  `--temporal` Flag to control temporal filtering (default: 4294967295).
162 | -  `--detect-closure` Flag to toggle detection of eye closure and occlusion (default: 0, choices: [0, 1]).
163 | -  `--eye-size-sensitivity` Eye size sensitivity parameter (default: 3, range: [2, 6]).
164 | -  `--enable-lookaway` Flag to toggle look away (default: 0, choices: [0, 1]).
165 | -  `--lookaway-max-offset` Maximum value of gaze offset angle (degrees) during a random look away (default: 5, range: [1, 10]).
166 | -  `--lookaway-interval-min` Minimum number of frames at which random look away occurs (default: 100, range: [1, 600]).
167 | -  `--lookaway-interval-range` Range for picking the number of frames at which random look away occurs (default: 250, range: [1, 600]).
168 | 
169 | **Gaze Threshold Parameters**
170 | -  `--gaze-pitch-threshold-low` Gaze pitch threshold (degrees) at which the redirection starts transitioning (default: 20.0, range: [10, 35]).
171 | -  `--gaze-pitch-threshold-high` Gaze pitch threshold (degrees) at which the redirection is equal to estimated gaze (default: 30.0, range: [10, 35]).
172 | -  `--gaze-yaw-threshold-low` Gaze yaw threshold (degrees) at which the redirection starts transitioning (default: 20.0, range: [10, 35]).
173 | -  `--gaze-yaw-threshold-high` Gaze yaw threshold (degrees) at which the redirection is equal to estimated gaze (default: 30.0, range: [10, 35]).
174 | 
175 | **Head Pose Threshold Parameters**
176 | -  `--head-pitch-threshold-low` Head pose pitch threshold (degrees) at which the redirection starts transitioning away from camera toward estimated gaze (default: 15.0, range: [10, 35]).
177 | -  `--head-pitch-threshold-high` Head pose pitch threshold (degrees) at which the redirection is equal to estimated gaze (default: 15.0, range: [10, 35]).
178 | -  `--head-yaw-threshold-low` Head pose yaw threshold (degrees) at which the redirection starts transitioning (default: 15.0, range: [10, 35]).
179 | -  `--head-yaw-threshold-high` Head pose yaw threshold (degrees) at which the redirection is equal to estimated gaze (default: 15.0, range: [10, 35]).
180 | 
181 | #### Important Notes about Streaming Mode
182 | 
183 | Streaming mode (`--streaming`) is required when processing videos that are optimized for streaming (that is, they have the 'moov' atom at the beginning).
184 | 
185 | If you encounter an error when processing non-streamable video files, you can convert your video to be streamable using the following command:
186 |   ```bash
187 |   ffmpeg -i input.mp4 -movflags +faststart output_streamable.mp4
188 |   ```
189 | The client automatically validates video compatibility with the selected mode and provides helpful error messages.
190 | 
191 | When using SSL mode, the default path for the credentials is `../ssl_key/<filename>.pem`.
192 | 
193 | For more information, refer to [Basic Inference](https://docs.nvidia.com/nim/maxine/eye-contact/latest/basic-inference.html) in the Eye Contact NIM documentation.
194 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a
  4 | # copy of this software and associated documentation files (the "Software"),
  5 | # to deal in the Software without restriction, including without limitation
  6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  7 | # and/or sell copies of the Software, and to permit persons to whom the
  8 | # Software is furnished to do so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in
 11 | # all copies or substantial portions of the Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 16 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 19 | # DEALINGS IN THE SOFTWARE.
 20 | 
 21 | import os
 22 | import csv
 23 | import itertools
 24 | from typing import Iterator, List, Union
 25 | import argparse
 26 | import grpc
 27 | from google.protobuf import any_pb2, wrappers_pb2
 28 | 
 29 | 
 30 | def add_ssl_arguments(parser: argparse.ArgumentParser) -> None:
 31 |     """Add SSL-related arguments to an argument parser.
 32 | 
 33 |     Args:
 34 |         parser: The argument parser to add SSL arguments to
 35 |     """
 36 |     # SSL and connection arguments
 37 |     parser.add_argument(
 38 |         "--ssl-mode",
 39 |         type=str,
 40 |         help="Flag to set SSL mode, default is DISABLED",
 41 |         default="DISABLED",
 42 |         choices=["DISABLED", "MTLS", "TLS"],
 43 |     )
 44 |     parser.add_argument(
 45 |         "--ssl-key",
 46 |         type=str,
 47 |         default="../ssl_key/ssl_key_client.pem",
 48 |         help="The path to ssl private key.",
 49 |     )
 50 |     parser.add_argument(
 51 |         "--ssl-cert",
 52 |         type=str,
 53 |         default="../ssl_key/ssl_cert_client.pem",
 54 |         help="The path to ssl certificate chain.",
 55 |     )
 56 |     parser.add_argument(
 57 |         "--ssl-root-cert",
 58 |         type=str,
 59 |         default="../ssl_key/ssl_ca_cert.pem",
 60 |         help="The path to ssl root certificate.",
 61 |     )
 62 |     parser.add_argument(
 63 |         "--target",
 64 |         type=str,
 65 |         default="127.0.0.1:8001",
 66 |         help="IP:port of gRPC service, when hosted locally. Use "
 67 |         "grpc.nvcf.nvidia.com:443 when hosted on NVCF.",
 68 |     )
 69 | 
 70 | 
 71 | def add_preview_arguments(parser: argparse.ArgumentParser) -> None:
 72 |     """Add preview mode related arguments to an argument parser.
 73 | 
 74 |     Args:
 75 |         parser: The argument parser to add preview arguments to
 76 |     """
 77 |     # Preview mode and NVCF arguments
 78 |     parser.add_argument(
 79 |         "--preview-mode",
 80 |         action="store_true",
 81 |         help="Flag to send request to preview NVCF NIM server on "
 82 |         "https://build.nvidia.com/nvidia/eyecontact/api. ",
 83 |     )
 84 |     parser.add_argument(
 85 |         "--api-key",
 86 |         type=str,
 87 |         help="NGC API key required for authentication, utilized when using "
 88 |         "TRY API ignored otherwise",
 89 |     )
 90 |     parser.add_argument(
 91 |         "--function-id",
 92 |         type=str,
 93 |         help="NVCF function ID for the service, utilized when using TRY API " "ignored otherwise",
 94 |     )
 95 | 
 96 | 
 97 | def validate_ssl_args(args: argparse.Namespace) -> None:
 98 |     """Validate SSL-related arguments.
 99 | 
100 |     Args:
101 |         args: Parsed command line arguments
102 | 
103 |     Raises:
104 |         RuntimeError: If SSL configuration is invalid
105 |     """
106 |     if args.ssl_mode == "MTLS":
107 |         if not (args.ssl_key and args.ssl_cert and args.ssl_root_cert):
108 |             raise RuntimeError(
109 |                 "If --ssl-mode is MTLS, --ssl-key, --ssl-cert and " "--ssl-root-cert are required."
110 |             )
111 |     elif args.ssl_mode == "TLS":
112 |         if not args.ssl_root_cert:
113 |             raise RuntimeError("If --ssl-mode is TLS, --ssl-root-cert is required.")
114 | 
115 | 
116 | def validate_preview_args(args: argparse.Namespace) -> None:
117 |     """Validate preview mode related arguments.
118 | 
119 |     Args:
120 |         args: Parsed command line arguments
121 | 
122 |     Raises:
123 |         RuntimeError: If preview configuration is invalid
124 |     """
125 |     if args.preview_mode:
126 |         if not args.api_key or not args.function_id:
127 |             raise RuntimeError(
128 |                 "If --preview-mode is specified, both --api-key and " "--function-id are required."
129 |             )
130 | 
131 | 
132 | def create_request_metadata(args: argparse.Namespace) -> tuple | None:
133 |     """Create request metadata for preview mode.
134 | 
135 |     Args:
136 |         args: Parsed command line arguments
137 | 
138 |     Returns:
139 |         Request metadata tuple or None
140 |     """
141 |     if args.preview_mode:
142 |         return (
143 |             ("authorization", "Bearer {}".format(args.api_key)),
144 |             ("function-id", args.function_id),
145 |         )
146 |     return None
147 | 
148 | 
149 | def is_file_available(file_path: os.PathLike, file_types: List[str]) -> bool:
150 |     """Check if the file exists.
151 | 
152 |     Args:
153 |         file_path: Path to input file
154 |     """
155 |     if not os.path.isfile(file_path):
156 |         raise FileNotFoundError(f"File '{file_path}' not found")
157 |     for file_type in file_types:
158 |         if os.path.splitext(file_path)[1].lower() == f".{file_type}":
159 |             return True
160 |     return False
161 | 
162 | 
163 | def read_file_content(file_path: os.PathLike) -> bytes:
164 |     """Read file content as bytes.
165 | 
166 |     Args:
167 |         file_path: Path to input file
168 | 
169 |     Returns:
170 |         File contents as bytes
171 |     """
172 |     with open(file_path, "rb") as file:
173 |         return file.read()
174 | 
175 | 
176 | def roi_csv_reader(reader: csv.reader, row_count: int) -> Iterator[list]:
177 |     """Read CSV data as multiple rows .
178 | 
179 |     Args:
180 |         reader: CSV reader object to read from
181 |         row_count: Number of rows to include in each batch
182 | 
183 |     Yields:
184 |         List of CSV rows as multiple rows of the specified row count
185 |     """
186 |     while True:
187 |         rows = list(itertools.islice(reader, row_count))
188 |         if not rows:
189 |             break
190 |         yield rows
191 | 
192 | 
193 | def check_streamable(file_path: os.PathLike) -> bool:
194 |     """
195 |     Checks if the video is streamable by checking if the moov atom follows
196 |     immediately after the ftyp atom in an MP4 file.
197 | 
198 |     For streamable MP4s, the moov atom must come immediately after:
199 |     [4 bytes: size][4 bytes: "ftyp"][... ftyp data ...][4 bytes: size]
200 |     [4 bytes: "moov"][... moov data ...]
201 | 
202 |     For non-streamable MP4s, other atoms like mdat may come between ftyp and
203 |     moov:
204 |     [4 bytes: size][4 bytes: "ftyp"][... ftyp data ...][4 bytes: size]
205 |     [4 bytes: "mdat"][... mdat data ...][moov atom]
206 | 
207 |     Args:
208 |         mp4_header_data: bytes of the first chunk of the MP4 file. Ideally we
209 |             need to have at least 40 bytes to check.
210 | 
211 |     Returns:
212 |         A tuple of (is_streamable, ftyp_size).
213 |     """
214 |     # Read first 40 bytes of the file
215 |     with open(file_path, "rb") as f:
216 |         mp4_header_data = f.read(40)
217 |         if len(mp4_header_data) < 40:
218 |             raise RuntimeError("MP4 file is too small to check if it is streamable")
219 | 
220 |     # Read the first atom size
221 |     ftyp_size = int.from_bytes(mp4_header_data[0:4], byteorder="big")
222 | 
223 |     # Check if it's a ftyp atom
224 |     if mp4_header_data[4:8] != b"ftyp":
225 |         return False, -1
226 | 
227 |     next_atom_type = bytes(mp4_header_data[ftyp_size + 4 : ftyp_size + 8])
228 | 
229 |     # Check if the next atom is a moov atom
230 |     if next_atom_type == b"moov":
231 |         return True
232 |     else:
233 |         return False
234 | 
235 | 
236 | def create_channel_credentials(args: argparse.Namespace) -> grpc.ChannelCredentials:
237 |     """Create channel credentials based on SSL mode.
238 | 
239 |     Args:
240 |         args: Command line arguments containing SSL configuration
241 | 
242 |     Returns:
243 |         Configured channel credentials
244 | 
245 |     Raises:
246 |         RuntimeError: If required SSL files are missing
247 |     """
248 |     channel_credentials = None
249 |     if args.ssl_mode == "MTLS":
250 |         if not (args.ssl_key and args.ssl_cert and args.ssl_root_cert):
251 |             raise RuntimeError(
252 |                 "If --ssl-mode is MTLS, --ssl-key, --ssl-cert and " "--ssl-root-cert are required."
253 |             )
254 |         private_key = read_file_content(args.ssl_key)
255 |         certificate_chain = read_file_content(args.ssl_cert)
256 |         root_certificates = read_file_content(args.ssl_root_cert)
257 |         channel_credentials = grpc.ssl_channel_credentials(
258 |             root_certificates=root_certificates,
259 |             private_key=private_key,
260 |             certificate_chain=certificate_chain,
261 |         )
262 |     else:
263 |         if not (args.ssl_root_cert):
264 |             raise RuntimeError("If --ssl-mode is TLS, --ssl-root-cert is required.")
265 |         root_certificates = read_file_content(args.ssl_root_cert)
266 |         channel_credentials = grpc.ssl_channel_credentials(root_certificates=root_certificates)
267 |     return channel_credentials
268 | 
269 | 
270 | def create_protobuf_any_value(value: Union[bool, int, float, str]) -> any_pb2.Any:
271 |     """Create a google.protobuf.Any message from a Python value.
272 | 
273 |     Args:
274 |         value: The value to convert (bool, int, float, or str)
275 | 
276 |     Returns:
277 |         google.protobuf.Any message
278 |     """
279 |     any_message = any_pb2.Any()
280 | 
281 |     if isinstance(value, bool):
282 |         wrapper = wrappers_pb2.BoolValue(value=value)
283 |         any_message.Pack(wrapper)
284 |     elif isinstance(value, int):
285 |         if value > 2147483647 or value < -2147483648:  # int32 range
286 |             wrapper = wrappers_pb2.Int64Value(value=value)
287 |         else:
288 |             wrapper = wrappers_pb2.Int32Value(value=value)
289 |         any_message.Pack(wrapper)
290 |     elif isinstance(value, float):
291 |         wrapper = wrappers_pb2.FloatValue(value=value)
292 |         any_message.Pack(wrapper)
293 |     elif isinstance(value, str):
294 |         wrapper = wrappers_pb2.StringValue(value=value)
295 |         any_message.Pack(wrapper)
296 |     else:
297 |         raise ValueError(f"Unsupported type: {type(value)}")
298 | 
299 |     return any_message
300 | 


--------------------------------------------------------------------------------
/audio2face-2d/python/scripts/audio2face-2d.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a
  4 | # copy of this software and associated documentation files (the "Software"),
  5 | # to deal in the Software without restriction, including without limitation
  6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  7 | # and/or sell copies of the Software, and to permit persons to whom the
  8 | # Software is furnished to do so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in
 11 | # all copies or substantial portions of the Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 16 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 19 | # DEALINGS IN THE SOFTWARE.
 20 | 
 21 | import argparse
 22 | import os
 23 | import sys
 24 | import time
 25 | import io
 26 | import grpc
 27 | 
 28 | sys.path.append(os.path.join(os.getcwd(), "../interfaces"))
 29 | # Importing gRPC compiler auto-generated maxine audio2face-2d library
 30 | import audio2face2d_pb2  # noqa: E402
 31 | import audio2face2d_pb2_grpc  # noqa: E402
 32 | from audio2face2d_pb2 import (  # noqa: E402
 33 |     QuaternionStream,
 34 |     Quaternion,
 35 |     Vector3fStream,
 36 |     Vector3f,
 37 |     ModelSelection,
 38 |     AnimationCroppingMode,
 39 |     HeadPoseMode,
 40 | )
 41 | 
 42 | 
 43 | def parse_args() -> None:
 44 |     """
 45 |     Parse command-line arguments using argparse.
 46 |     """
 47 |     # Set up argument parsing
 48 |     parser = argparse.ArgumentParser(
 49 |         description="Process input audio and portrait files and apply audio2face-2d effect."
 50 |     )
 51 |     parser.add_argument(
 52 |         "--ssl-mode",
 53 |         type=str,
 54 |         help="Flag to set SSL mode, default is None",
 55 |         default="DISABLED",
 56 |         choices=["DISABLED", "MTLS", "TLS"],
 57 |     )
 58 |     parser.add_argument(
 59 |         "--ssl-key",
 60 |         type=str,
 61 |         default="../ssl_key/ssl_key_client.pem",
 62 |         help="The path to ssl private key.",
 63 |     )
 64 |     parser.add_argument(
 65 |         "--ssl-cert",
 66 |         type=str,
 67 |         default="../ssl_key/ssl_cert_client.pem",
 68 |         help="The path to ssl certificate chain.",
 69 |     )
 70 |     parser.add_argument(
 71 |         "--ssl-root-cert",
 72 |         type=str,
 73 |         default="../ssl_key/ssl_ca_cert.pem",
 74 |         help="The path to ssl root certificate.",
 75 |     )
 76 |     parser.add_argument(
 77 |         "--target",
 78 |         type=str,
 79 |         default="127.0.0.1:8001",
 80 |         help="IP:port of gRPC service, when hosted locally.",
 81 |     )
 82 |     parser.add_argument(
 83 |         "--audio-input",
 84 |         type=str,
 85 |         default="../../assets/sample_audio.wav",
 86 |         help="The path to the input audio file.",
 87 |     )
 88 |     parser.add_argument(
 89 |         "--portrait-input",
 90 |         type=str,
 91 |         default="../../assets/sample_portrait_image.png",
 92 |         help="The path to the input portrait file.",
 93 |     )
 94 |     parser.add_argument(
 95 |         "--output",
 96 |         type=str,
 97 |         default="output.mp4",
 98 |         help="The path for the output video file.",
 99 |     )
100 |     parser.add_argument(
101 |         "--head-rotation-animation-filepath",
102 |         type=str,
103 |         default="../../assets/head_rotation_animation.csv",
104 |         help="The path for the head_rotation_animation.csv file. "
105 |         "Only required for HEAD_POSE_MODE_USER_DEFINED_ANIMATION",
106 |     )
107 |     parser.add_argument(
108 |         "--head-translation-animation-filepath",
109 |         type=str,
110 |         default="../../assets/head_translation_animation.csv",
111 |         help="The path for the head_translation_animation.csv file. "
112 |         "Only required for HEAD_POSE_MODE_USER_DEFINED_ANIMATION",
113 |     )
114 |     return parser.parse_args()
115 | 
116 | 
117 | def read_file_content(file_path: os.PathLike) -> None:
118 |     """Function to read file content as bytes.
119 | 
120 |     Args:
121 |       file_path: Path to input file
122 |     """
123 |     with open(file_path, "rb") as file:
124 |         return file.read()
125 | 
126 | 
127 | def generate_request_for_inference(audio_filepath: str, params: dict):
128 |     """Generator to produce the request data stream
129 | 
130 |     Args:
131 |       audio_filepath: Path to input file
132 |       params: Parameters for the feature
133 |     """
134 |     yield audio2face2d_pb2.AnimateRequest(config=audio2face2d_pb2.AnimateConfig(**params))
135 |     file = open(audio_filepath, "rb")
136 |     while True:
137 |         buffer = file.read(1024 * 1024)
138 |         if buffer == b"":
139 |             break
140 |         yield audio2face2d_pb2.AnimateRequest(audio_file_data=buffer)
141 |     print("Data sending done")
142 | 
143 | 
144 | def process_head_pose_data(head_rotation_path, head_translation_path):
145 |     """
146 |     Process head rotation and translation data.
147 | 
148 |     Args:
149 |         head_rotation_path (str): Path to the head rotation animation file.
150 |         head_translation_path (str): Path to the head translation animation file.
151 | 
152 |     Returns:
153 |         Tuple[QuaternionStream, Vector3fStream]: Processed rotation and translation data streams.
154 |     """
155 |     # Read the head rotation data
156 |     with io.StringIO(open(head_rotation_path, "rb").read().decode("utf-8")) as file:
157 |         head_rotation_data = []
158 |         for line in file:
159 |             values = line.strip().split(",")
160 |             if len(values) == 4:
161 |                 head_rotation_data.append([float(val) for val in values])
162 | 
163 |     # Validate the data
164 |     assert len(head_rotation_data) > 0, "Head rotation data is empty"
165 |     assert all(len(row) == 4 for row in head_rotation_data), "Each row must have 4 values"
166 | 
167 |     # Create the QuaternionStream
168 |     rotation_data_stream = QuaternionStream()
169 |     for x in head_rotation_data:
170 |         q = Quaternion()
171 |         q.x, q.y, q.z, q.w = x
172 |         rotation_data_stream.values.append(q)
173 | 
174 |     # Read the head translation data
175 |     with io.StringIO(open(head_translation_path, "rb").read().decode("utf-8")) as file:
176 |         head_translation_data = []
177 |         for line in file:
178 |             values = line.strip().split(",")
179 |             if len(values) == 3:
180 |                 head_translation_data.append([float(val) for val in values])
181 | 
182 |     # Validate the data
183 |     assert len(head_translation_data) > 0, "Head translation data is empty"
184 |     assert all(len(row) == 3 for row in head_translation_data), "Each row must have 3 values"
185 | 
186 |     # Create the Vector3fStream
187 |     translation_data_stream = Vector3fStream()
188 |     for x in head_translation_data:
189 |         v = Vector3f()
190 |         v.x, v.y, v.z = x
191 |         translation_data_stream.values.append(v)
192 | 
193 |     return rotation_data_stream, translation_data_stream
194 | 
195 | 
196 | def process_request(
197 |     channel: any,
198 |     audio_filepath: os.PathLike,
199 |     params: dict,
200 |     output_filepath: os.PathLike,
201 | ) -> None:
202 |     """Function to process gRPC request
203 | 
204 |     Args:
205 |       channel: gRPC channel for server client communication
206 |       input_filepath: Path to input file
207 |       params: Parameters to control the feature
208 |       output_filepath: Path to output file
209 |       request_metadata: Credentials to process preview request
210 |     """
211 |     try:
212 |         stub = audio2face2d_pb2_grpc.Audio2Face2DServiceStub(channel)
213 |         start_time = time.time()
214 |         responses = stub.Animate(
215 |             generate_request_for_inference(audio_filepath=audio_filepath, params=params)
216 |         )
217 |         next(responses)
218 |         file = open(output_filepath, "wb")
219 |         print(f"Writing output in {output_filepath}")
220 |         for response in responses:
221 |             if response.HasField("video_file_data"):
222 |                 file.write(response.video_file_data)
223 |         end_time = time.time()
224 |         print(
225 |             f"Function invocation completed in {end_time-start_time:.2f}s, "
226 |             f"{output_filepath} file is generated."
227 |         )
228 |     except Exception as e:
229 |         print(f"An error occurred: {e}")
230 | 
231 | 
232 | def main():
233 |     """
234 |     Main client function
235 |     """
236 |     args = parse_args()
237 |     portrait_filepath = args.portrait_input
238 |     audio_filepath = args.audio_input
239 |     output_filepath = args.output
240 | 
241 |     # Check file path
242 |     if os.path.isfile(portrait_filepath):
243 |         print(f"The image file '{portrait_filepath}' exists. Checking for audio file.")
244 |     else:
245 |         raise FileNotFoundError(f"The image file '{portrait_filepath}' does not exist. Exiting.")
246 |     if os.path.isfile(audio_filepath):
247 |         print(f"The audio file '{audio_filepath}' exists. Proceeding with processing.")
248 |     else:
249 |         raise FileNotFoundError(f"The audio file '{audio_filepath}' does not exist. Exiting.")
250 | 
251 |     portrait_image_encoded = open(portrait_filepath, "rb").read()
252 | 
253 |     # Configure head pose mode
254 |     head_pose_mode = HeadPoseMode.HEAD_POSE_MODE_RETAIN_FROM_PORTRAIT_IMAGE
255 | 
256 |     # Provide head pose animation values for head pose mode HEAD_POSE_MODE_USER_DEFINED_ANIMATION
257 |     if head_pose_mode == HeadPoseMode.HEAD_POSE_MODE_USER_DEFINED_ANIMATION:
258 |         rotation_data_stream, translation_data_stream = process_head_pose_data(
259 |             args.head_rotation_animation_filepath,
260 |             args.head_translation_animation_filepath,
261 |         )
262 | 
263 |     # Supply params as shown below, refer to the docs for more info.
264 |     feature_params = {
265 |         "portrait_image": portrait_image_encoded,
266 |         "model_selection": ModelSelection.MODEL_SELECTION_QUALITY,
267 |         "animation_crop_mode": AnimationCroppingMode.ANIMATION_CROPPING_MODE_REGISTRATION_BLENDING,
268 |         "enable_lookaway": 1,  # can be 0 or 1
269 |         "lookaway_max_offset": 20,  # value in [5, 25]
270 |         "lookaway_interval_min": 240,  # value in [1, 600]
271 |         "lookaway_interval_range": 90,  # value in [1, 600]
272 |         "blink_frequency": 15,  # value in [0, 120]
273 |         "blink_duration": 6,  # value in [2, 150]
274 |         "mouth_expression_multiplier": 1.4,  # value in [1.0, 2.0]
275 |         "head_pose_mode": head_pose_mode,
276 |         "head_pose_multiplier": 1.0,  # value in [0.0, 1.0]
277 |         # "input_head_rotation": rotation_data_stream,  # HEAD_POSE_MODE_USER_DEFINED_ANIMATION
278 |         # "input_head_translation": translation_data_stream, # HEAD_POSE_MODE_USER_DEFINED_ANIMATION
279 |     }
280 | 
281 |     # Check ssl-mode and create channel_credentials for that mode
282 |     if args.ssl_mode != "DISABLED":
283 |         channel_credentials = ""
284 |         if args.ssl_mode == "MTLS":
285 |             if not (args.ssl_key and args.ssl_cert and args.ssl_root_cert):
286 |                 raise RuntimeError(
287 |                     "If --ssl-mode is MTLS, --ssl-key, --ssl-cert and --ssl-root-cert are required."
288 |                 )
289 |             private_key = read_file_content(args.ssl_key)
290 |             certificate_chain = read_file_content(args.ssl_cert)
291 |             root_certificates = read_file_content(args.ssl_root_cert)
292 |             channel_credentials = grpc.ssl_channel_credentials(
293 |                 root_certificates=root_certificates,
294 |                 private_key=private_key,
295 |                 certificate_chain=certificate_chain,
296 |             )
297 |         else:
298 |             if not (args.ssl_root_cert):
299 |                 raise RuntimeError("If --ssl-mode is TLS, --ssl-root-cert is required.")
300 |             root_certificates = read_file_content(args.ssl_root_cert)
301 |             channel_credentials = grpc.ssl_channel_credentials(root_certificates=root_certificates)
302 | 
303 |         # Establish secure channel when ssl-mode is MTLS/TLS
304 |         with grpc.secure_channel(target=args.target, credentials=channel_credentials) as channel:
305 |             process_request(
306 |                 channel=channel,
307 |                 audio_filepath=audio_filepath,
308 |                 params=feature_params,
309 |                 output_filepath=output_filepath,
310 |             )
311 |     else:
312 |         # Establish insecure channel when ssl-mode is DISABLED
313 |         with grpc.insecure_channel(target=args.target) as channel:
314 |             process_request(
315 |                 channel=channel,
316 |                 audio_filepath=audio_filepath,
317 |                 params=feature_params,
318 |                 output_filepath=output_filepath,
319 |             )
320 | 
321 | 
322 | if __name__ == "__main__":
323 |     main()
324 | 


--------------------------------------------------------------------------------
/studio-voice/scripts/studio_voice.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a
  4 | # copy of this software and associated documentation files (the "Software"),
  5 | # to deal in the Software without restriction, including without limitation
  6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  7 | # and/or sell copies of the Software, and to permit persons to whom the
  8 | # Software is furnished to do so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in
 11 | # all copies or substantial portions of the Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 16 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 19 | # DEALINGS IN THE SOFTWARE.
 20 | 
 21 | import argparse
 22 | import os
 23 | import sys
 24 | import grpc
 25 | import time
 26 | import soundfile as sf
 27 | import numpy as np
 28 | from typing import Iterator
 29 | 
 30 | sys.path.append(os.path.join(os.getcwd(), "../interfaces/studio_voice"))
 31 | # Importing gRPC compiler auto-generated maxine studiovoice library
 32 | import studiovoice_pb2  # noqa: E402
 33 | import studiovoice_pb2_grpc  # noqa: E402
 34 | 
 35 | 
 36 | def read_file_content(file_path: os.PathLike) -> None:
 37 |     """Function to read file content as bytes.
 38 | 
 39 |     Args:
 40 |       file_path: Path to input file
 41 |     """
 42 |     if not os.path.isfile(file_path):
 43 |         raise FileNotFoundError(f"The file '{file_path}' does not exist. Exiting.")
 44 | 
 45 |     with open(file_path, "rb") as file:
 46 |         return file.read()
 47 | 
 48 | 
 49 | def generate_request_for_inference(
 50 |     input_filepath: os.PathLike, model_type: str, sample_rate: int, streaming: bool
 51 | ) -> None:
 52 |     """Generator to produce the request data stream
 53 | 
 54 |     Args:
 55 |       input_filepath: Path to input file
 56 |       model_type: Studio Voice model type to infer
 57 |       sample_rate: Input audio sample rate
 58 |       streaming: Enables grpc streaming mode
 59 |     """
 60 |     if streaming:
 61 |         """
 62 |         Input audio chunk is generated based on model type and sample rate,
 63 |         1) High quality models require 6sec input
 64 |         2) Low latency models require 10ms input chunk
 65 |         """
 66 |         input_audio, sample_rate_file = sf.read(input_filepath)
 67 |         input_audio = input_audio.astype(np.float32)  # Convert to float32
 68 |         input_size_in_ms = 10 if (model_type == "48k-ll") else 6000
 69 |         samples_per_ms = sample_rate // 1000
 70 |         input_float_size = int(input_size_in_ms * samples_per_ms)
 71 | 
 72 |         pad_length = input_float_size - len(input_audio) % input_float_size
 73 |         input_audio = np.pad(input_audio, (0, pad_length), "constant")
 74 | 
 75 |         print(
 76 |             f"Len {len(input_audio)}, chunk_size {input_float_size}, audio {input_audio}, "
 77 |             "type {input_audio.dtype}"
 78 |         )
 79 |         for i in range(0, len(input_audio), input_float_size):
 80 |             data = input_audio[i : i + input_float_size]
 81 |             yield studiovoice_pb2.EnhanceAudioRequest(audio_stream_data=data.tobytes())
 82 |     else:
 83 |         DATA_CHUNKS = 64 * 1024  # bytes, we send the wav file in 64KB chunks
 84 |         with open(input_filepath, "rb") as fd:
 85 |             while True:
 86 |                 buffer = fd.read(DATA_CHUNKS)
 87 |                 if buffer == b"":
 88 |                     break
 89 |                 yield studiovoice_pb2.EnhanceAudioRequest(audio_stream_data=buffer)
 90 | 
 91 | 
 92 | def write_output_file_from_response(
 93 |     response_iter: Iterator[studiovoice_pb2.EnhanceAudioResponse],
 94 |     output_filepath: os.PathLike,
 95 |     sample_rate: int,
 96 |     streaming: bool,
 97 | ) -> None:
 98 |     """Function to write the output file from the incoming gRPC data stream.
 99 | 
100 |     Args:
101 |       response_iter: Responses from the server to write into output file
102 |       output_filepath: Path to output file
103 |       sample_rate: Input audio sample rate
104 |       streaming: Enables grpc streaming mode
105 |     """
106 |     if streaming:
107 |         output_audio = []
108 |         response_count = 0
109 |         for response in response_iter:
110 |             response_count += 1
111 |             output_audio.append(np.frombuffer(response.audio_stream_data, np.float32))
112 | 
113 |         sf.write(output_filepath, np.hstack(output_audio), sample_rate)
114 |         return response_count
115 |     else:
116 |         with open(output_filepath, "wb") as fd:
117 |             for response in response_iter:
118 |                 if response.HasField("audio_stream_data"):
119 |                     fd.write(response.audio_stream_data)
120 | 
121 | 
122 | def parse_args() -> None:
123 |     """
124 |     Parse command-line arguments using argparse.
125 |     """
126 |     # Set up argument parsing
127 |     parser = argparse.ArgumentParser(
128 |         description="Process wav audio files using gRPC and apply studio-voice."
129 |     )
130 |     parser.add_argument(
131 |         "--preview-mode",
132 |         action="store_true",
133 |         help="Flag to send request to preview NVCF NIM server on "
134 |         "https://build.nvidia.com/nvidia/studiovoice/api. ",
135 |     )
136 |     parser.add_argument(
137 |         "--ssl-mode",
138 |         type=str,
139 |         help="Flag to set SSL mode, default is None",
140 |         default=None,
141 |         choices=["MTLS", "TLS"],
142 |     )
143 |     parser.add_argument(
144 |         "--ssl-key",
145 |         type=str,
146 |         default=None,
147 |         help="The path to ssl private key.",
148 |     )
149 |     parser.add_argument(
150 |         "--ssl-cert",
151 |         type=str,
152 |         default=None,
153 |         help="The path to ssl certificate chain.",
154 |     )
155 |     parser.add_argument(
156 |         "--ssl-root-cert",
157 |         type=str,
158 |         default=None,
159 |         help="The path to ssl root certificate.",
160 |     )
161 |     parser.add_argument(
162 |         "--target",
163 |         type=str,
164 |         default="127.0.0.1:8001",
165 |         help="IP:port of gRPC service, when hosted locally. "
166 |         "Use grpc.nvcf.nvidia.com:443 when hosted on NVCF.",
167 |     )
168 |     parser.add_argument(
169 |         "--input",
170 |         type=str,
171 |         default="../assets/studio_voice_48k_input.wav",
172 |         help="The path to the input audio file.",
173 |     )
174 |     parser.add_argument(
175 |         "--output",
176 |         type=str,
177 |         default="studio_voice_48k_output.wav",
178 |         help="The path for the output audio file.",
179 |     )
180 |     parser.add_argument(
181 |         "--api-key",
182 |         type=str,
183 |         help="NGC API key required for authentication, "
184 |         "utilized when using TRY API ignored otherwise",
185 |     )
186 |     parser.add_argument(
187 |         "--function-id",
188 |         type=str,
189 |         help="NVCF function ID for the service, utilized when using TRY API ignored otherwise",
190 |     )
191 |     parser.add_argument(
192 |         "--streaming",
193 |         action="store_true",
194 |         help="Flag to enable grpc streaming mode. ",
195 |     )
196 |     parser.add_argument(
197 |         "--model-type",
198 |         type=str,
199 |         help="Studio Voice model type, default is 48k-hq. ",
200 |         default="48k-hq",
201 |         choices=["48k-hq", "48k-ll", "16k-hq"],
202 |     )
203 |     return parser.parse_args()
204 | 
205 | 
206 | def process_request(
207 |     channel: any,
208 |     input_filepath: os.PathLike,
209 |     output_filepath: os.PathLike,
210 |     model_type: str,
211 |     sample_rate: int,
212 |     streaming: bool,
213 |     request_metadata: dict = None,
214 | ) -> None:
215 |     """Function to process gRPC request
216 | 
217 |     Args:
218 |       channel: gRPC channel for server client communication
219 |       input_filepath: Path to input file
220 |       output_filepath: Path to output file
221 |       model_type: Studio Voice model type to infer
222 |       sample_rate: Input audio sample rate
223 |       streaming: Enables grpc streaming mode
224 |       request_metadata: Credentials to process request
225 |     """
226 |     try:
227 |         stub = studiovoice_pb2_grpc.MaxineStudioVoiceStub(channel)
228 |         start_time = time.time()
229 | 
230 |         responses = stub.EnhanceAudio(
231 |             generate_request_for_inference(
232 |                 input_filepath=input_filepath,
233 |                 model_type=model_type,
234 |                 sample_rate=sample_rate,
235 |                 streaming=streaming,
236 |             ),
237 |             metadata=request_metadata,
238 |         )
239 | 
240 |         response_count = write_output_file_from_response(
241 |             response_iter=responses,
242 |             output_filepath=output_filepath,
243 |             sample_rate=sample_rate,
244 |             streaming=streaming,
245 |         )
246 | 
247 |         end_time = time.time()
248 |         if streaming:
249 |             avg_latency = (end_time - start_time) / response_count
250 |             print(f"Average latency per request: {avg_latency*1000:.2f}ms")
251 |             print(f"Processed {response_count} chunks.")
252 | 
253 |         print(
254 |             f"Function invocation completed in {end_time-start_time:.2f}s, "
255 |             "the output file is generated."
256 |         )
257 |     except BaseException as e:
258 |         print(e)
259 | 
260 | 
261 | def main():
262 |     """
263 |     Main client function
264 |     """
265 |     args = parse_args()
266 |     streaming = args.streaming
267 |     model_type = args.model_type
268 |     print(f"Streaming mode set to {streaming}")
269 |     sample_rate = 48000
270 |     if model_type == "16k-hq":
271 |         sample_rate = 16000
272 |     print(f"Sample Rate: {sample_rate}")
273 |     input_filepath = args.input
274 |     output_filepath = args.output
275 | 
276 |     # Check if input file path exists
277 |     if os.path.isfile(input_filepath):
278 |         print(f"The file '{input_filepath}' exists. Proceeding with processing.")
279 |     else:
280 |         raise FileNotFoundError(f"The file '{input_filepath}' does not exist. Exiting.")
281 | 
282 |     # Check the sample rate of the input audio file
283 |     input_info = sf.info(input_filepath)
284 |     input_sample_rate = input_info.samplerate
285 |     print(f"Input file sample rate: {input_sample_rate}")
286 | 
287 |     # Check if the input file's sample rate matches the expected sample rate
288 |     if input_sample_rate != sample_rate:
289 |         raise ValueError(f"Sample rate mismatch: expected {sample_rate}, got {input_sample_rate}.")
290 | 
291 |     if args.preview_mode:
292 |         if args.ssl_mode != "TLS":
293 |             # Preview mode only supports TLS mode
294 |             args.ssl_mode = "TLS"
295 |             print("--ssl-mode is set as TLS, since preview_mode is enabled.")
296 |         if args.ssl_root_cert:
297 |             raise RuntimeError("Preview mode does not support custom root certificate.")
298 | 
299 |     if args.ssl_mode is not None:
300 |         request_metadata = None
301 |         root_certificates = None
302 |         if args.ssl_mode == "MTLS":
303 |             if not (args.ssl_key and args.ssl_cert and args.ssl_root_cert):
304 |                 raise RuntimeError(
305 |                     "If --ssl-mode is MTLS, --ssl-key, --ssl-cert and --ssl-root-cert are required."
306 |                 )
307 | 
308 |             private_key = read_file_content(args.ssl_key)
309 |             certificate_chain = read_file_content(args.ssl_cert)
310 |             root_certificates = read_file_content(args.ssl_root_cert)
311 |             channel_credentials = grpc.ssl_channel_credentials(
312 |                 root_certificates=root_certificates,
313 |                 private_key=private_key,
314 |                 certificate_chain=certificate_chain,
315 |             )
316 |         else:
317 |             # Running with NVCF
318 |             if args.preview_mode:
319 |                 request_metadata = (
320 |                     ("authorization", "Bearer {}".format(args.api_key)),
321 |                     ("function-id", args.function_id),
322 |                 )
323 |                 channel_credentials = grpc.ssl_channel_credentials()
324 |             # Running TLS mode, without NVCF
325 |             else:
326 |                 if not (args.ssl_root_cert):
327 |                     raise RuntimeError("If --ssl-mode is TLS, --ssl-root-cert is required.")
328 |                 root_certificates = read_file_content(args.ssl_root_cert)
329 |                 channel_credentials = grpc.ssl_channel_credentials(
330 |                     root_certificates=root_certificates
331 |                 )
332 | 
333 |         with grpc.secure_channel(target=args.target, credentials=channel_credentials) as channel:
334 |             process_request(
335 |                 channel=channel,
336 |                 input_filepath=input_filepath,
337 |                 output_filepath=output_filepath,
338 |                 model_type=model_type,
339 |                 sample_rate=sample_rate,
340 |                 streaming=streaming,
341 |                 request_metadata=request_metadata,
342 |             )
343 |     else:
344 |         with grpc.insecure_channel(target=args.target) as channel:
345 |             process_request(
346 |                 channel=channel,
347 |                 input_filepath=input_filepath,
348 |                 output_filepath=output_filepath,
349 |                 model_type=model_type,
350 |                 sample_rate=sample_rate,
351 |                 streaming=streaming,
352 |             )
353 | 
354 | 
355 | if __name__ == "__main__":
356 |     main()
357 | 


--------------------------------------------------------------------------------
/bnr/scripts/bnr.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a
  4 | # copy of this software and associated documentation files (the "Software"),
  5 | # to deal in the Software without restriction, including without limitation
  6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  7 | # and/or sell copies of the Software, and to permit persons to whom the
  8 | # Software is furnished to do so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in
 11 | # all copies or substantial portions of the Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 16 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 19 | # DEALINGS IN THE SOFTWARE.
 20 | 
 21 | import argparse
 22 | import os
 23 | import sys
 24 | import grpc
 25 | import time
 26 | import soundfile as sf
 27 | import numpy as np
 28 | from tqdm import tqdm
 29 | from typing import Iterator, Optional
 30 | 
 31 | sys.path.append(os.path.join(os.getcwd(), "../interfaces/bnr"))
 32 | # Importing gRPC compiler auto-generated maxine bnr library
 33 | import bnr_pb2  # noqa: E402
 34 | import bnr_pb2_grpc  # noqa: E402
 35 | 
 36 | # Sample rate constants
 37 | CONST_SAMPLE_48KHZ = 48000
 38 | CONST_SAMPLE_16KHZ = 16000
 39 | 
 40 | 
 41 | def read_file_content(file_path: os.PathLike) -> None:
 42 |     """Function to read file content as bytes.
 43 | 
 44 |     Args:
 45 |       file_path: Path to input file
 46 |     """
 47 |     if not os.path.isfile(file_path):
 48 |         raise FileNotFoundError(f"The file '{file_path}' does not exist. Exiting.")
 49 | 
 50 |     with open(file_path, "rb") as file:
 51 |         return file.read()
 52 | 
 53 | 
 54 | def generate_request_for_inference(
 55 |     input_filepath: os.PathLike,
 56 |     sample_rate: int,
 57 |     streaming: bool,
 58 |     intensity_ratio: float = None,
 59 |     progress_bar: Optional[tqdm] = None,
 60 | ) -> None:
 61 |     """Generator to produce the request data stream
 62 | 
 63 |     Args:
 64 |       input_filepath: Path to input file
 65 |       sample_rate: Input audio sample rate
 66 |       streaming: Enables grpc streaming mode
 67 |       intensity_ratio: Controls denoising intensity (0.0 to 1.0), only works with v1 models
 68 |       progress_bar: (Optional) Progress bar instance (streaming mode only)
 69 |     """
 70 |     # First send the config if intensity_ratio is specified for v1 models
 71 |     if intensity_ratio is not None:
 72 |         config_request = bnr_pb2.EnhanceAudioRequest(
 73 |             config=bnr_pb2.EnhanceAudioConfig(intensity_ratio=intensity_ratio)
 74 |         )
 75 |         config_request.config.intensity_ratio = intensity_ratio
 76 |         yield config_request
 77 | 
 78 |     if streaming:
 79 |         """
 80 |         Input audio chunk is generated based on sample rate and input size 10ms,
 81 |         """
 82 |         input_audio, sample_rate_file = sf.read(input_filepath)
 83 |         input_audio = input_audio.astype(np.float32)  # Convert to float32
 84 |         input_size_in_ms = 10
 85 |         samples_per_ms = sample_rate // 1000
 86 |         input_float_size = int(input_size_in_ms * samples_per_ms)
 87 | 
 88 |         pad_length = input_float_size - len(input_audio) % input_float_size
 89 |         input_audio = np.pad(input_audio, (0, pad_length), "constant")
 90 | 
 91 |         if progress_bar is not None:
 92 |             progress_bar.total = len(input_audio) // input_float_size
 93 | 
 94 |         print(
 95 |             f"Len {len(input_audio)}, chunk_size {input_float_size}, audio {input_audio}, "
 96 |             f"type {input_audio.dtype}"
 97 |         )
 98 | 
 99 |         print(
100 |             f"Will process {len(input_audio)//sample_rate} seconds of input audio in "
101 |             f"{input_size_in_ms} ms chunks"
102 |         )
103 |         for i in range(0, len(input_audio), input_float_size):
104 |             data = input_audio[i : i + input_float_size]
105 |             yield bnr_pb2.EnhanceAudioRequest(audio_stream_data=data.tobytes())
106 |     else:
107 |         DATA_CHUNKS = 64 * 1024  # bytes, we send the wav file in 64KB chunks
108 |         with open(input_filepath, "rb") as fd:
109 |             while True:
110 |                 buffer = fd.read(DATA_CHUNKS)
111 |                 if buffer == b"":
112 |                     break
113 |                 yield bnr_pb2.EnhanceAudioRequest(audio_stream_data=buffer)
114 | 
115 | 
116 | def write_output_file_from_response(
117 |     response_iter: Iterator[bnr_pb2.EnhanceAudioResponse],
118 |     output_filepath: os.PathLike,
119 |     sample_rate: int,
120 |     streaming: bool,
121 |     progress_bar: Optional[tqdm],
122 | ) -> None:
123 |     """Function to write the output file from the incoming gRPC data stream.
124 | 
125 |     Args:
126 |       response_iter: Responses from the server to write into output file
127 |       output_filepath: Path to output file
128 |       sample_rate: Input audio sample rate
129 |       streaming: Enables grpc streaming mode
130 |       progress_bar: (Optional) Progress bar instance (streaming mode only)
131 |     """
132 |     if streaming:
133 |         output_audio = []
134 |         response_count = 0
135 |         for response in response_iter:
136 |             if response.HasField("audio_stream_data"):
137 |                 response_count += 1
138 |                 if progress_bar is not None:
139 |                     progress_bar.update(1)
140 |                 output_audio.append(np.frombuffer(response.audio_stream_data, np.float32))
141 | 
142 |         sf.write(output_filepath, np.hstack(output_audio), sample_rate)
143 |         if progress_bar:
144 |             progress_bar.close()
145 |         return response_count
146 |     else:
147 |         with open(output_filepath, "wb") as fd:
148 |             for response in response_iter:
149 |                 if response.HasField("audio_stream_data"):
150 |                     fd.write(response.audio_stream_data)
151 | 
152 | 
153 | def parse_args() -> None:
154 |     """
155 |     Parse command-line arguments using argparse.
156 |     """
157 |     # Set up argument parsing
158 |     parser = argparse.ArgumentParser(
159 |         description="Process wav audio files using gRPC and apply bnr."
160 |     )
161 |     parser.add_argument(
162 |         "--preview-mode",
163 |         action="store_true",
164 |         help="Flag to send request to preview NVCF NIM server on "
165 |         "https://build.nvidia.com/nvidia/bnr/api",
166 |     )
167 |     parser.add_argument(
168 |         "--ssl-mode",
169 |         type=str,
170 |         help="Flag to set SSL mode, default is None",
171 |         default=None,
172 |         choices=["MTLS", "TLS"],
173 |     )
174 |     parser.add_argument(
175 |         "--ssl-key",
176 |         type=str,
177 |         default=None,
178 |         help="The path to ssl private key.",
179 |     )
180 |     parser.add_argument(
181 |         "--ssl-cert",
182 |         type=str,
183 |         default=None,
184 |         help="The path to ssl certificate chain.",
185 |     )
186 |     parser.add_argument(
187 |         "--ssl-root-cert",
188 |         type=str,
189 |         default=None,
190 |         help="The path to ssl root certificate.",
191 |     )
192 |     parser.add_argument(
193 |         "--target",
194 |         type=str,
195 |         default="127.0.0.1:8001",
196 |         help="IP:port of gRPC service, when hosted locally. "
197 |         "Use grpc.nvcf.nvidia.com:443 when hosted on NVCF.",
198 |     )
199 |     parser.add_argument(
200 |         "--input",
201 |         type=str,
202 |         default="../assets/bnr_48k_input.wav",
203 |         help="The path to the input audio file.",
204 |     )
205 |     parser.add_argument(
206 |         "--output",
207 |         type=str,
208 |         default="bnr_48k_output.wav",
209 |         help="The path for the output audio file.",
210 |     )
211 |     parser.add_argument(
212 |         "--api-key",
213 |         type=str,
214 |         help="NGC API key required for authentication, "
215 |         "utilized when using TRY API ignored otherwise",
216 |     )
217 |     parser.add_argument(
218 |         "--function-id",
219 |         type=str,
220 |         help="NVCF function ID for the service, utilized when using TRY API ignored otherwise",
221 |     )
222 |     parser.add_argument(
223 |         "--streaming",
224 |         action="store_true",
225 |         help="Flag to enable grpc streaming mode. ",
226 |     )
227 |     parser.add_argument(
228 |         "--sample-rate",
229 |         type=int,
230 |         help="Sample rate of input audio file in Hz, default is 48000.",
231 |         default=CONST_SAMPLE_48KHZ,
232 |         choices=[CONST_SAMPLE_48KHZ, CONST_SAMPLE_16KHZ],
233 |     )
234 | 
235 |     parser.add_argument(
236 |         "--intensity-ratio",
237 |         type=float,
238 |         help=(
239 |             "Intensity ratio value between 0 and 1 to control denoising intensity. "
240 |             "Default is 1.0 (maximum denoising)."
241 |         ),
242 |         default=None,
243 |     )
244 |     args = parser.parse_args()
245 | 
246 |     # Validate intensity_ratio value
247 |     if args.intensity_ratio is not None and (
248 |         args.intensity_ratio < 0.0 or args.intensity_ratio > 1.0
249 |     ):
250 |         parser.error("Intensity ratio value must be between 0.0 and 1.0")
251 | 
252 |     return args
253 | 
254 | 
255 | def process_request(
256 |     channel: any,
257 |     input_filepath: os.PathLike,
258 |     output_filepath: os.PathLike,
259 |     sample_rate: int,
260 |     streaming: bool,
261 |     request_metadata: dict = None,
262 |     intensity_ratio: float = None,
263 | ) -> None:
264 |     """Function to process gRPC request
265 | 
266 |     Args:
267 |       channel: gRPC channel for server client communication
268 |       input_filepath: Path to input file
269 |       output_filepath: Path to output file
270 |       sample_rate: Input audio sample rate
271 |       streaming: Enables grpc streaming mode
272 |       request_metadata: Credentials to process request
273 |       intensity_ratio: Controls denoising intensity (0.0 to 1.0)
274 |     """
275 |     try:
276 |         stub = bnr_pb2_grpc.MaxineBNRStub(channel)
277 |         start_time = time.time()
278 | 
279 |         progress_bar = None
280 |         if streaming:
281 |             progress_bar = tqdm()
282 | 
283 |         responses = stub.EnhanceAudio(
284 |             generate_request_for_inference(
285 |                 input_filepath=input_filepath,
286 |                 sample_rate=sample_rate,
287 |                 streaming=streaming,
288 |                 intensity_ratio=intensity_ratio,
289 |                 progress_bar=progress_bar,
290 |             ),
291 |             metadata=request_metadata,
292 |         )
293 | 
294 |         response_count = write_output_file_from_response(
295 |             response_iter=responses,
296 |             output_filepath=output_filepath,
297 |             sample_rate=sample_rate,
298 |             streaming=streaming,
299 |             progress_bar=progress_bar,
300 |         )
301 | 
302 |         end_time = time.time()
303 |         if streaming:
304 |             avg_latency = (end_time - start_time) / response_count
305 |             print(f"Average latency per request: {avg_latency*1000:.2f}ms")
306 |             print(f"Processed {response_count} chunks.")
307 | 
308 |         print(
309 |             f"Function invocation completed in {end_time-start_time:.2f}s, "
310 |             "the output file is generated."
311 |         )
312 |     except BaseException as e:
313 |         print(e)
314 | 
315 | 
316 | def main():
317 |     """
318 |     Main client function
319 |     """
320 |     args = parse_args()
321 |     streaming = args.streaming
322 |     print(f"Streaming mode set to {streaming}")
323 |     sample_rate = CONST_SAMPLE_48KHZ
324 |     if args.sample_rate == CONST_SAMPLE_16KHZ:
325 |         sample_rate = CONST_SAMPLE_16KHZ
326 |     print(f"Sample Rate: {sample_rate}")
327 |     input_filepath = args.input
328 |     output_filepath = args.output
329 | 
330 |     # Check if input file path exists
331 |     if os.path.isfile(input_filepath):
332 |         print(f"The file '{input_filepath}' exists. Proceeding with processing.")
333 |     else:
334 |         raise FileNotFoundError(f"The file '{input_filepath}' does not exist. Exiting.")
335 | 
336 |     # Check the sample rate of the input audio file
337 |     input_info = sf.info(input_filepath)
338 |     input_sample_rate = input_info.samplerate
339 |     print(f"Input file sample rate: {input_sample_rate}")
340 | 
341 |     # Check if the input file's sample rate matches the expected sample rate
342 |     if input_sample_rate != sample_rate:
343 |         raise ValueError(f"Sample rate mismatch: expected {sample_rate}, got {input_sample_rate}.")
344 | 
345 |     if args.preview_mode:
346 |         if args.ssl_mode != "TLS":
347 |             # Preview mode only supports TLS mode
348 |             args.ssl_mode = "TLS"
349 |             print("--ssl-mode is set as TLS, since preview_mode is enabled.")
350 |         if args.ssl_root_cert:
351 |             raise RuntimeError("Preview mode does not support custom root certificate.")
352 | 
353 |     if args.ssl_mode is not None:
354 |         request_metadata = None
355 |         root_certificates = None
356 |         if args.ssl_mode == "MTLS":
357 |             if not (args.ssl_key and args.ssl_cert and args.ssl_root_cert):
358 |                 raise RuntimeError(
359 |                     "If --ssl-mode is MTLS, --ssl-key, --ssl-cert and "
360 |                     "--ssl-root-cert are required."
361 |                 )
362 | 
363 |             private_key = read_file_content(args.ssl_key)
364 |             certificate_chain = read_file_content(args.ssl_cert)
365 |             root_certificates = read_file_content(args.ssl_root_cert)
366 |             channel_credentials = grpc.ssl_channel_credentials(
367 |                 root_certificates=root_certificates,
368 |                 private_key=private_key,
369 |                 certificate_chain=certificate_chain,
370 |             )
371 |         else:
372 |             # Running with NVCF
373 |             if args.preview_mode:
374 |                 request_metadata = (
375 |                     ("authorization", "Bearer {}".format(args.api_key)),
376 |                     ("function-id", args.function_id),
377 |                 )
378 |                 channel_credentials = grpc.ssl_channel_credentials()
379 |             # Running TLS mode, without NVCF
380 |             else:
381 |                 if not (args.ssl_root_cert):
382 |                     raise RuntimeError("If --ssl-mode is TLS, --ssl-root-cert is required.")
383 |                 root_certificates = read_file_content(args.ssl_root_cert)
384 |                 channel_credentials = grpc.ssl_channel_credentials(
385 |                     root_certificates=root_certificates
386 |                 )
387 | 
388 |         with grpc.secure_channel(target=args.target, credentials=channel_credentials) as channel:
389 |             process_request(
390 |                 channel=channel,
391 |                 input_filepath=input_filepath,
392 |                 output_filepath=output_filepath,
393 |                 sample_rate=sample_rate,
394 |                 streaming=streaming,
395 |                 request_metadata=request_metadata,
396 |                 intensity_ratio=args.intensity_ratio,
397 |             )
398 |     else:
399 |         with grpc.insecure_channel(target=args.target) as channel:
400 |             process_request(
401 |                 channel=channel,
402 |                 input_filepath=input_filepath,
403 |                 output_filepath=output_filepath,
404 |                 sample_rate=sample_rate,
405 |                 streaming=streaming,
406 |                 intensity_ratio=args.intensity_ratio,
407 |             )
408 | 
409 | 
410 | if __name__ == "__main__":
411 |     main()
412 | 


--------------------------------------------------------------------------------