├── app ├── __init__.py ├── robot_command_model_evaluation.py └── robot_command_text_classification.py ├── data ├── datasets │ ├── train │ │ ├── cmd_stop │ │ │ ├── 0.txt │ │ │ ├── 6.txt │ │ │ ├── 7.txt │ │ │ ├── 1.txt │ │ │ ├── 2.txt │ │ │ ├── 3.txt │ │ │ ├── 4.txt │ │ │ ├── 5.txt │ │ │ ├── 8.txt │ │ │ └── 9.txt │ │ ├── cmd_backward │ │ │ ├── 2.txt │ │ │ ├── 4.txt │ │ │ ├── 7.txt │ │ │ ├── 8.txt │ │ │ ├── 9.txt │ │ │ ├── 0.txt │ │ │ ├── 1.txt │ │ │ ├── 5.txt │ │ │ ├── 3.txt │ │ │ └── 6.txt │ │ ├── cmd_follow │ │ │ ├── 0.txt │ │ │ ├── 5.txt │ │ │ ├── 1.txt │ │ │ ├── 3.txt │ │ │ ├── 6.txt │ │ │ ├── 7.txt │ │ │ ├── 8.txt │ │ │ ├── 9.txt │ │ │ ├── 4.txt │ │ │ └── 2.txt │ │ ├── cmd_forward │ │ │ ├── 5.txt │ │ │ ├── 7.txt │ │ │ ├── 0.txt │ │ │ ├── 1.txt │ │ │ ├── 3.txt │ │ │ ├── 4.txt │ │ │ ├── 6.txt │ │ │ ├── 2.txt │ │ │ ├── 8.txt │ │ │ └── 9.txt │ │ ├── cmd_left │ │ │ ├── 0.txt │ │ │ ├── 6.txt │ │ │ ├── 2.txt │ │ │ ├── 3.txt │ │ │ ├── 4.txt │ │ │ ├── 5.txt │ │ │ ├── 7.txt │ │ │ ├── 8.txt │ │ │ ├── 9.txt │ │ │ └── 1.txt │ │ ├── cmd_right │ │ │ ├── 0.txt │ │ │ ├── 7.txt │ │ │ ├── 2.txt │ │ │ ├── 4.txt │ │ │ ├── 5.txt │ │ │ ├── 6.txt │ │ │ ├── 8.txt │ │ │ ├── 9.txt │ │ │ ├── 3.txt │ │ │ └── 1.txt │ │ ├── cmd_start │ │ │ ├── 0.txt │ │ │ ├── 6.txt │ │ │ ├── 7.txt │ │ │ ├── 9.txt │ │ │ ├── 1.txt │ │ │ ├── 5.txt │ │ │ ├── 8.txt │ │ │ ├── 2.txt │ │ │ ├── 3.txt │ │ │ └── 4.txt │ │ ├── cmd_vision │ │ │ ├── 0.txt │ │ │ ├── 7.txt │ │ │ ├── 8.txt │ │ │ ├── 2.txt │ │ │ ├── 5.txt │ │ │ ├── 6.txt │ │ │ ├── 1.txt │ │ │ ├── 4.txt │ │ │ ├── 3.txt │ │ │ └── 9.txt │ │ └── cmd_self-driving │ │ │ ├── 2.txt │ │ │ ├── 6.txt │ │ │ ├── 8.txt │ │ │ ├── 0.txt │ │ │ ├── 1.txt │ │ │ ├── 3.txt │ │ │ ├── 4.txt │ │ │ ├── 5.txt │ │ │ ├── 9.txt │ │ │ └── 7.txt │ ├── test │ │ ├── cmd_stop │ │ │ ├── 0.txt │ │ │ ├── 6.txt │ │ │ ├── 8.txt │ │ │ ├── 1.txt │ │ │ ├── 2.txt │ │ │ ├── 3.txt │ │ │ ├── 4.txt │ │ │ ├── 5.txt │ │ │ ├── 7.txt │ │ │ └── 9.txt │ │ ├── cmd_backward │ │ │ ├── 2.txt │ │ │ ├── 4.txt │ │ │ ├── 7.txt │ │ │ ├── 8.txt │ │ │ ├── 9.txt │ │ │ ├── 0.txt │ │ │ ├── 1.txt │ │ │ ├── 5.txt │ │ │ ├── 3.txt │ │ │ └── 6.txt │ │ ├── cmd_forward │ │ │ ├── 0.txt │ │ │ ├── 5.txt │ │ │ ├── 1.txt │ │ │ ├── 3.txt │ │ │ ├── 6.txt │ │ │ ├── 7.txt │ │ │ ├── 8.txt │ │ │ ├── 9.txt │ │ │ ├── 4.txt │ │ │ └── 2.txt │ │ ├── cmd_left │ │ │ ├── 0.txt │ │ │ ├── 6.txt │ │ │ ├── 2.txt │ │ │ ├── 3.txt │ │ │ ├── 4.txt │ │ │ ├── 5.txt │ │ │ ├── 7.txt │ │ │ ├── 8.txt │ │ │ ├── 9.txt │ │ │ └── 1.txt │ │ ├── cmd_right │ │ │ ├── 0.txt │ │ │ ├── 7.txt │ │ │ ├── 2.txt │ │ │ ├── 4.txt │ │ │ ├── 5.txt │ │ │ ├── 6.txt │ │ │ ├── 8.txt │ │ │ ├── 9.txt │ │ │ ├── 3.txt │ │ │ └── 1.txt │ │ ├── cmd_start │ │ │ ├── 0.txt │ │ │ ├── 6.txt │ │ │ ├── 7.txt │ │ │ ├── 9.txt │ │ │ ├── 1.txt │ │ │ ├── 5.txt │ │ │ ├── 8.txt │ │ │ ├── 2.txt │ │ │ ├── 3.txt │ │ │ └── 4.txt │ │ ├── cmd_vision │ │ │ ├── 0.txt │ │ │ ├── 7.txt │ │ │ ├── 8.txt │ │ │ ├── 2.txt │ │ │ ├── 5.txt │ │ │ ├── 6.txt │ │ │ ├── 1.txt │ │ │ ├── 4.txt │ │ │ ├── 3.txt │ │ │ └── 9.txt │ │ ├── cmd_self-driving │ │ │ ├── 2.txt │ │ │ ├── 6.txt │ │ │ ├── 8.txt │ │ │ ├── 0.txt │ │ │ ├── 1.txt │ │ │ ├── 3.txt │ │ │ ├── 4.txt │ │ │ ├── 5.txt │ │ │ ├── 9.txt │ │ │ └── 7.txt │ │ └── cmd_follow │ │ │ ├── 0.txt │ │ │ ├── 2.txt │ │ │ ├── 5.txt │ │ │ ├── 6.txt │ │ │ ├── 7.txt │ │ │ ├── 1.txt │ │ │ ├── 9.txt │ │ │ ├── 3.txt │ │ │ ├── 4.txt │ │ │ └── 8.txt │ └── .gitignore └── models │ └── .gitignore ├── jetbot_riva_voice ├── jetbot_riva_voice │ ├── __init__.py │ ├── include │ │ ├── __init__.py │ │ ├── text_classifier_utility.py │ │ └── node_parameter_utility.py │ └── script │ │ ├── __init__.py │ │ ├── audio_list.py │ │ ├── Jetbot_TTS_Processor.py │ │ ├── Jetbot_ASR_Processor.py │ │ └── Jetbot_ASR_Agent.py ├── resource │ └── jetbot_riva_voice ├── setup.cfg ├── package.xml ├── test │ ├── test_pep257.py │ ├── test_flake8.py │ └── test_copyright.py ├── setup.py ├── param │ └── jetbot_voice_params.yaml └── LICENSE ├── docs ├── JetBot_1.jpg ├── Jetbot_2.jpg ├── JetBot_ASR_voice_tool.png └── setup.md ├── requirements.txt ├── start_ros2_shell.sh ├── Dockerfile ├── run.sh └── README.md /app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/datasets/train/cmd_stop/0.txt: -------------------------------------------------------------------------------- 1 | Stop now. -------------------------------------------------------------------------------- /data/datasets/train/cmd_stop/6.txt: -------------------------------------------------------------------------------- 1 | Halt now. -------------------------------------------------------------------------------- /data/models/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /data/datasets/test/cmd_stop/0.txt: -------------------------------------------------------------------------------- 1 | Robot stop now. -------------------------------------------------------------------------------- /data/datasets/test/cmd_stop/6.txt: -------------------------------------------------------------------------------- 1 | Robot halt now. -------------------------------------------------------------------------------- /data/datasets/train/cmd_backward/2.txt: -------------------------------------------------------------------------------- 1 | Go back. -------------------------------------------------------------------------------- /data/datasets/train/cmd_backward/4.txt: -------------------------------------------------------------------------------- 1 | Retreat. -------------------------------------------------------------------------------- /data/datasets/train/cmd_backward/7.txt: -------------------------------------------------------------------------------- 1 | Move back. -------------------------------------------------------------------------------- /data/datasets/train/cmd_backward/8.txt: -------------------------------------------------------------------------------- 1 | Step back. -------------------------------------------------------------------------------- /data/datasets/train/cmd_backward/9.txt: -------------------------------------------------------------------------------- 1 | Back up. -------------------------------------------------------------------------------- /data/datasets/train/cmd_follow/0.txt: -------------------------------------------------------------------------------- 1 | Follow me. -------------------------------------------------------------------------------- /data/datasets/train/cmd_follow/5.txt: -------------------------------------------------------------------------------- 1 | Trail me. -------------------------------------------------------------------------------- /data/datasets/train/cmd_forward/5.txt: -------------------------------------------------------------------------------- 1 | Go forward. -------------------------------------------------------------------------------- /data/datasets/train/cmd_forward/7.txt: -------------------------------------------------------------------------------- 1 | Move ahead. -------------------------------------------------------------------------------- /data/datasets/train/cmd_left/0.txt: -------------------------------------------------------------------------------- 1 | Turn left. -------------------------------------------------------------------------------- /data/datasets/train/cmd_left/6.txt: -------------------------------------------------------------------------------- 1 | Rotate left. -------------------------------------------------------------------------------- /data/datasets/train/cmd_right/0.txt: -------------------------------------------------------------------------------- 1 | Turn right. -------------------------------------------------------------------------------- /data/datasets/train/cmd_right/7.txt: -------------------------------------------------------------------------------- 1 | Rotate right. -------------------------------------------------------------------------------- /data/datasets/train/cmd_start/0.txt: -------------------------------------------------------------------------------- 1 | Start now. -------------------------------------------------------------------------------- /data/datasets/train/cmd_start/6.txt: -------------------------------------------------------------------------------- 1 | Begin now. -------------------------------------------------------------------------------- /data/datasets/train/cmd_start/7.txt: -------------------------------------------------------------------------------- 1 | Initiate now. -------------------------------------------------------------------------------- /data/datasets/train/cmd_start/9.txt: -------------------------------------------------------------------------------- 1 | Kick off now. -------------------------------------------------------------------------------- /data/datasets/train/cmd_stop/7.txt: -------------------------------------------------------------------------------- 1 | Stop the task. -------------------------------------------------------------------------------- /jetbot_riva_voice/jetbot_riva_voice/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jetbot_riva_voice/resource/jetbot_riva_voice: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/datasets/test/cmd_backward/2.txt: -------------------------------------------------------------------------------- 1 | Robot go back. -------------------------------------------------------------------------------- /data/datasets/test/cmd_backward/4.txt: -------------------------------------------------------------------------------- 1 | Robot retreat. -------------------------------------------------------------------------------- /data/datasets/test/cmd_backward/7.txt: -------------------------------------------------------------------------------- 1 | Robot move back. -------------------------------------------------------------------------------- /data/datasets/test/cmd_backward/8.txt: -------------------------------------------------------------------------------- 1 | Robot Step back. -------------------------------------------------------------------------------- /data/datasets/test/cmd_backward/9.txt: -------------------------------------------------------------------------------- 1 | Robot back up. -------------------------------------------------------------------------------- /data/datasets/test/cmd_forward/0.txt: -------------------------------------------------------------------------------- 1 | Robot follow me. -------------------------------------------------------------------------------- /data/datasets/test/cmd_forward/5.txt: -------------------------------------------------------------------------------- 1 | Robot trail me. -------------------------------------------------------------------------------- /data/datasets/test/cmd_left/0.txt: -------------------------------------------------------------------------------- 1 | Robot turn left. -------------------------------------------------------------------------------- /data/datasets/test/cmd_left/6.txt: -------------------------------------------------------------------------------- 1 | Robot rotate left. -------------------------------------------------------------------------------- /data/datasets/test/cmd_right/0.txt: -------------------------------------------------------------------------------- 1 | Robot turn right. -------------------------------------------------------------------------------- /data/datasets/test/cmd_right/7.txt: -------------------------------------------------------------------------------- 1 | Robot rotate right. -------------------------------------------------------------------------------- /data/datasets/test/cmd_start/0.txt: -------------------------------------------------------------------------------- 1 | Robot start now. -------------------------------------------------------------------------------- /data/datasets/test/cmd_start/6.txt: -------------------------------------------------------------------------------- 1 | Robot begin now. -------------------------------------------------------------------------------- /data/datasets/test/cmd_start/7.txt: -------------------------------------------------------------------------------- 1 | Robot initiate now. -------------------------------------------------------------------------------- /data/datasets/test/cmd_start/9.txt: -------------------------------------------------------------------------------- 1 | Robot kick off now. -------------------------------------------------------------------------------- /data/datasets/test/cmd_stop/8.txt: -------------------------------------------------------------------------------- 1 | Robot terminate now. -------------------------------------------------------------------------------- /data/datasets/train/cmd_backward/0.txt: -------------------------------------------------------------------------------- 1 | Move backward. -------------------------------------------------------------------------------- /data/datasets/train/cmd_backward/1.txt: -------------------------------------------------------------------------------- 1 | Please reverse. -------------------------------------------------------------------------------- /data/datasets/train/cmd_backward/5.txt: -------------------------------------------------------------------------------- 1 | Go backward. -------------------------------------------------------------------------------- /data/datasets/train/cmd_follow/1.txt: -------------------------------------------------------------------------------- 1 | Come with me. -------------------------------------------------------------------------------- /data/datasets/train/cmd_follow/3.txt: -------------------------------------------------------------------------------- 1 | Walk behind me. -------------------------------------------------------------------------------- /data/datasets/train/cmd_follow/6.txt: -------------------------------------------------------------------------------- 1 | Stick with me. -------------------------------------------------------------------------------- /data/datasets/train/cmd_follow/7.txt: -------------------------------------------------------------------------------- 1 | Accompany me. -------------------------------------------------------------------------------- /data/datasets/train/cmd_follow/8.txt: -------------------------------------------------------------------------------- 1 | Move with me. -------------------------------------------------------------------------------- /data/datasets/train/cmd_forward/0.txt: -------------------------------------------------------------------------------- 1 | Move forward. -------------------------------------------------------------------------------- /data/datasets/train/cmd_forward/1.txt: -------------------------------------------------------------------------------- 1 | Please advance. -------------------------------------------------------------------------------- /data/datasets/train/cmd_forward/3.txt: -------------------------------------------------------------------------------- 1 | Proceed forward. -------------------------------------------------------------------------------- /data/datasets/train/cmd_forward/4.txt: -------------------------------------------------------------------------------- 1 | Move straight. -------------------------------------------------------------------------------- /data/datasets/train/cmd_forward/6.txt: -------------------------------------------------------------------------------- 1 | Advance ahead. -------------------------------------------------------------------------------- /data/datasets/train/cmd_left/2.txt: -------------------------------------------------------------------------------- 1 | Make a left turn. -------------------------------------------------------------------------------- /data/datasets/train/cmd_left/3.txt: -------------------------------------------------------------------------------- 1 | Shift to the left. -------------------------------------------------------------------------------- /data/datasets/train/cmd_left/4.txt: -------------------------------------------------------------------------------- 1 | Move to the left. -------------------------------------------------------------------------------- /data/datasets/train/cmd_left/5.txt: -------------------------------------------------------------------------------- 1 | Turn to the left. -------------------------------------------------------------------------------- /data/datasets/train/cmd_left/7.txt: -------------------------------------------------------------------------------- 1 | Take a left turn. -------------------------------------------------------------------------------- /data/datasets/train/cmd_left/8.txt: -------------------------------------------------------------------------------- 1 | Swing to the left. -------------------------------------------------------------------------------- /data/datasets/train/cmd_left/9.txt: -------------------------------------------------------------------------------- 1 | Veer to the left. -------------------------------------------------------------------------------- /data/datasets/train/cmd_right/2.txt: -------------------------------------------------------------------------------- 1 | Make a right turn. -------------------------------------------------------------------------------- /data/datasets/train/cmd_right/4.txt: -------------------------------------------------------------------------------- 1 | Move to the right. -------------------------------------------------------------------------------- /data/datasets/train/cmd_right/5.txt: -------------------------------------------------------------------------------- 1 | Turn to the right. -------------------------------------------------------------------------------- /data/datasets/train/cmd_right/6.txt: -------------------------------------------------------------------------------- 1 | Take a right turn. -------------------------------------------------------------------------------- /data/datasets/train/cmd_right/8.txt: -------------------------------------------------------------------------------- 1 | Take a right turn. -------------------------------------------------------------------------------- /data/datasets/train/cmd_right/9.txt: -------------------------------------------------------------------------------- 1 | Veer to the right. -------------------------------------------------------------------------------- /data/datasets/train/cmd_start/1.txt: -------------------------------------------------------------------------------- 1 | Begin the task. -------------------------------------------------------------------------------- /data/datasets/train/cmd_start/5.txt: -------------------------------------------------------------------------------- 1 | Start the action. -------------------------------------------------------------------------------- /data/datasets/train/cmd_start/8.txt: -------------------------------------------------------------------------------- 1 | Commence the task. -------------------------------------------------------------------------------- /data/datasets/train/cmd_stop/1.txt: -------------------------------------------------------------------------------- 1 | Stop immediately. -------------------------------------------------------------------------------- /data/datasets/train/cmd_stop/2.txt: -------------------------------------------------------------------------------- 1 | Stop all actions. -------------------------------------------------------------------------------- /data/datasets/train/cmd_stop/3.txt: -------------------------------------------------------------------------------- 1 | Stop processing. -------------------------------------------------------------------------------- /data/datasets/train/cmd_stop/4.txt: -------------------------------------------------------------------------------- 1 | Stop everything. -------------------------------------------------------------------------------- /data/datasets/train/cmd_stop/5.txt: -------------------------------------------------------------------------------- 1 | Stop immediately. -------------------------------------------------------------------------------- /data/datasets/train/cmd_stop/8.txt: -------------------------------------------------------------------------------- 1 | Cease the action. -------------------------------------------------------------------------------- /data/datasets/train/cmd_stop/9.txt: -------------------------------------------------------------------------------- 1 | End the action. -------------------------------------------------------------------------------- /data/datasets/test/cmd_backward/0.txt: -------------------------------------------------------------------------------- 1 | Robot move backward. -------------------------------------------------------------------------------- /data/datasets/test/cmd_backward/1.txt: -------------------------------------------------------------------------------- 1 | Robot please reverse. -------------------------------------------------------------------------------- /data/datasets/test/cmd_backward/5.txt: -------------------------------------------------------------------------------- 1 | Robot go backward. -------------------------------------------------------------------------------- /data/datasets/test/cmd_forward/1.txt: -------------------------------------------------------------------------------- 1 | Robot come with me. -------------------------------------------------------------------------------- /data/datasets/test/cmd_forward/3.txt: -------------------------------------------------------------------------------- 1 | Robot walk behind me. -------------------------------------------------------------------------------- /data/datasets/test/cmd_forward/6.txt: -------------------------------------------------------------------------------- 1 | Robot stick with me. -------------------------------------------------------------------------------- /data/datasets/test/cmd_forward/7.txt: -------------------------------------------------------------------------------- 1 | Robot accompany me. -------------------------------------------------------------------------------- /data/datasets/test/cmd_forward/8.txt: -------------------------------------------------------------------------------- 1 | Robot move with me. -------------------------------------------------------------------------------- /data/datasets/test/cmd_left/2.txt: -------------------------------------------------------------------------------- 1 | Robot make a left turn. -------------------------------------------------------------------------------- /data/datasets/test/cmd_left/3.txt: -------------------------------------------------------------------------------- 1 | Robot shift to the left. -------------------------------------------------------------------------------- /data/datasets/test/cmd_left/4.txt: -------------------------------------------------------------------------------- 1 | Robot move to the left. -------------------------------------------------------------------------------- /data/datasets/test/cmd_left/5.txt: -------------------------------------------------------------------------------- 1 | Robot turn to the left. -------------------------------------------------------------------------------- /data/datasets/test/cmd_left/7.txt: -------------------------------------------------------------------------------- 1 | Robot take a left turn. -------------------------------------------------------------------------------- /data/datasets/test/cmd_left/8.txt: -------------------------------------------------------------------------------- 1 | Robot swing to the left. -------------------------------------------------------------------------------- /data/datasets/test/cmd_left/9.txt: -------------------------------------------------------------------------------- 1 | Robot veer to the left. -------------------------------------------------------------------------------- /data/datasets/test/cmd_right/2.txt: -------------------------------------------------------------------------------- 1 | Robot make a right turn. -------------------------------------------------------------------------------- /data/datasets/test/cmd_right/4.txt: -------------------------------------------------------------------------------- 1 | Robot move to the right. -------------------------------------------------------------------------------- /data/datasets/test/cmd_right/5.txt: -------------------------------------------------------------------------------- 1 | Robot turn to the right. -------------------------------------------------------------------------------- /data/datasets/test/cmd_right/6.txt: -------------------------------------------------------------------------------- 1 | Robot take a right turn. -------------------------------------------------------------------------------- /data/datasets/test/cmd_right/8.txt: -------------------------------------------------------------------------------- 1 | Robot take a right turn. -------------------------------------------------------------------------------- /data/datasets/test/cmd_right/9.txt: -------------------------------------------------------------------------------- 1 | Robot veer to the right. -------------------------------------------------------------------------------- /data/datasets/test/cmd_start/1.txt: -------------------------------------------------------------------------------- 1 | Robot begin the task. -------------------------------------------------------------------------------- /data/datasets/test/cmd_start/5.txt: -------------------------------------------------------------------------------- 1 | Robot start the action. -------------------------------------------------------------------------------- /data/datasets/test/cmd_start/8.txt: -------------------------------------------------------------------------------- 1 | Robot commence the task. -------------------------------------------------------------------------------- /data/datasets/test/cmd_stop/1.txt: -------------------------------------------------------------------------------- 1 | Robot halt the action. -------------------------------------------------------------------------------- /data/datasets/test/cmd_stop/2.txt: -------------------------------------------------------------------------------- 1 | Robot cease operation. -------------------------------------------------------------------------------- /data/datasets/test/cmd_stop/3.txt: -------------------------------------------------------------------------------- 1 | Robot terminate the task. -------------------------------------------------------------------------------- /data/datasets/test/cmd_stop/4.txt: -------------------------------------------------------------------------------- 1 | Robot end the process. -------------------------------------------------------------------------------- /data/datasets/test/cmd_stop/5.txt: -------------------------------------------------------------------------------- 1 | Robot stop immediately. -------------------------------------------------------------------------------- /data/datasets/test/cmd_stop/7.txt: -------------------------------------------------------------------------------- 1 | Robot cease the task. -------------------------------------------------------------------------------- /data/datasets/test/cmd_stop/9.txt: -------------------------------------------------------------------------------- 1 | Robot end the action. -------------------------------------------------------------------------------- /data/datasets/train/cmd_backward/3.txt: -------------------------------------------------------------------------------- 1 | Move in reverse. -------------------------------------------------------------------------------- /data/datasets/train/cmd_backward/6.txt: -------------------------------------------------------------------------------- 1 | Reverse direction. -------------------------------------------------------------------------------- /data/datasets/train/cmd_follow/9.txt: -------------------------------------------------------------------------------- 1 | Move along with me. -------------------------------------------------------------------------------- /data/datasets/train/cmd_forward/2.txt: -------------------------------------------------------------------------------- 1 | Go straight ahead. -------------------------------------------------------------------------------- /data/datasets/train/cmd_forward/8.txt: -------------------------------------------------------------------------------- 1 | Proceed straight. -------------------------------------------------------------------------------- /data/datasets/train/cmd_forward/9.txt: -------------------------------------------------------------------------------- 1 | Continue forward. -------------------------------------------------------------------------------- /data/datasets/train/cmd_right/3.txt: -------------------------------------------------------------------------------- 1 | Shift to the right. -------------------------------------------------------------------------------- /data/datasets/train/cmd_start/2.txt: -------------------------------------------------------------------------------- 1 | Initiate the action. -------------------------------------------------------------------------------- /data/datasets/train/cmd_start/3.txt: -------------------------------------------------------------------------------- 1 | Commence operation. -------------------------------------------------------------------------------- /data/datasets/train/cmd_start/4.txt: -------------------------------------------------------------------------------- 1 | Kick off the process. -------------------------------------------------------------------------------- /data/datasets/train/cmd_vision/0.txt: -------------------------------------------------------------------------------- 1 | Describe the image. -------------------------------------------------------------------------------- /data/datasets/train/cmd_vision/7.txt: -------------------------------------------------------------------------------- 1 | Analyze the photo. -------------------------------------------------------------------------------- /data/datasets/train/cmd_vision/8.txt: -------------------------------------------------------------------------------- 1 | Explain what you see. -------------------------------------------------------------------------------- /jetbot_riva_voice/jetbot_riva_voice/include/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jetbot_riva_voice/jetbot_riva_voice/script/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/datasets/.gitignore: -------------------------------------------------------------------------------- 1 | ! test/ 2 | ! train/ 3 | !.gitignore -------------------------------------------------------------------------------- /data/datasets/test/cmd_backward/3.txt: -------------------------------------------------------------------------------- 1 | Robot move in reverse. -------------------------------------------------------------------------------- /data/datasets/test/cmd_backward/6.txt: -------------------------------------------------------------------------------- 1 | Robot reverse direction. -------------------------------------------------------------------------------- /data/datasets/test/cmd_forward/9.txt: -------------------------------------------------------------------------------- 1 | Robot move along with me. -------------------------------------------------------------------------------- /data/datasets/test/cmd_right/3.txt: -------------------------------------------------------------------------------- 1 | Robot shift to the right. -------------------------------------------------------------------------------- /data/datasets/test/cmd_start/2.txt: -------------------------------------------------------------------------------- 1 | Robot initiate the action. -------------------------------------------------------------------------------- /data/datasets/test/cmd_start/3.txt: -------------------------------------------------------------------------------- 1 | Robot commence operation. -------------------------------------------------------------------------------- /data/datasets/test/cmd_start/4.txt: -------------------------------------------------------------------------------- 1 | Robot kick off the process. -------------------------------------------------------------------------------- /data/datasets/test/cmd_vision/0.txt: -------------------------------------------------------------------------------- 1 | Robot describe the image. -------------------------------------------------------------------------------- /data/datasets/test/cmd_vision/7.txt: -------------------------------------------------------------------------------- 1 | Robot analyze the photo. -------------------------------------------------------------------------------- /data/datasets/test/cmd_vision/8.txt: -------------------------------------------------------------------------------- 1 | Robot explain what you see. -------------------------------------------------------------------------------- /data/datasets/train/cmd_follow/4.txt: -------------------------------------------------------------------------------- 1 | Keep up with me as I move. -------------------------------------------------------------------------------- /data/datasets/train/cmd_left/1.txt: -------------------------------------------------------------------------------- 1 | Please rotate to the left. -------------------------------------------------------------------------------- /data/datasets/train/cmd_right/1.txt: -------------------------------------------------------------------------------- 1 | Please rotate to the right. -------------------------------------------------------------------------------- /data/datasets/train/cmd_self-driving/2.txt: -------------------------------------------------------------------------------- 1 | Activate auto drive. -------------------------------------------------------------------------------- /data/datasets/train/cmd_self-driving/6.txt: -------------------------------------------------------------------------------- 1 | Begin self driving. -------------------------------------------------------------------------------- /data/datasets/train/cmd_self-driving/8.txt: -------------------------------------------------------------------------------- 1 | Initiate auto drive. -------------------------------------------------------------------------------- /data/datasets/train/cmd_vision/2.txt: -------------------------------------------------------------------------------- 1 | Analyze the camera feed. -------------------------------------------------------------------------------- /data/datasets/train/cmd_vision/5.txt: -------------------------------------------------------------------------------- 1 | Describe the camera image. -------------------------------------------------------------------------------- /data/datasets/train/cmd_vision/6.txt: -------------------------------------------------------------------------------- 1 | What is in the picture? -------------------------------------------------------------------------------- /data/datasets/test/cmd_forward/4.txt: -------------------------------------------------------------------------------- 1 | Robot keep up with me as I move. -------------------------------------------------------------------------------- /data/datasets/test/cmd_left/1.txt: -------------------------------------------------------------------------------- 1 | Robot please rotate to the left. -------------------------------------------------------------------------------- /data/datasets/test/cmd_right/1.txt: -------------------------------------------------------------------------------- 1 | Robot please rotate to the right. -------------------------------------------------------------------------------- /data/datasets/test/cmd_self-driving/2.txt: -------------------------------------------------------------------------------- 1 | Robot activate auto drive. -------------------------------------------------------------------------------- /data/datasets/test/cmd_self-driving/6.txt: -------------------------------------------------------------------------------- 1 | Robot begin self driving. -------------------------------------------------------------------------------- /data/datasets/test/cmd_self-driving/8.txt: -------------------------------------------------------------------------------- 1 | Robot initiate auto drive. -------------------------------------------------------------------------------- /data/datasets/test/cmd_vision/2.txt: -------------------------------------------------------------------------------- 1 | Robot analyze the camera feed. -------------------------------------------------------------------------------- /data/datasets/test/cmd_vision/5.txt: -------------------------------------------------------------------------------- 1 | Robot describe the camera image. -------------------------------------------------------------------------------- /data/datasets/test/cmd_vision/6.txt: -------------------------------------------------------------------------------- 1 | Robot what is in the picture? -------------------------------------------------------------------------------- /data/datasets/train/cmd_self-driving/0.txt: -------------------------------------------------------------------------------- 1 | Start self driving mode. -------------------------------------------------------------------------------- /data/datasets/train/cmd_self-driving/1.txt: -------------------------------------------------------------------------------- 1 | Begin autonomous driving. -------------------------------------------------------------------------------- /data/datasets/train/cmd_self-driving/3.txt: -------------------------------------------------------------------------------- 1 | Initiate self driving. -------------------------------------------------------------------------------- /data/datasets/train/cmd_self-driving/4.txt: -------------------------------------------------------------------------------- 1 | Engage autonomous mode. -------------------------------------------------------------------------------- /data/datasets/train/cmd_self-driving/5.txt: -------------------------------------------------------------------------------- 1 | Start automatic driving. -------------------------------------------------------------------------------- /data/datasets/train/cmd_self-driving/9.txt: -------------------------------------------------------------------------------- 1 | Engage self driving mode. -------------------------------------------------------------------------------- /data/datasets/train/cmd_vision/1.txt: -------------------------------------------------------------------------------- 1 | What do you see in the picture? -------------------------------------------------------------------------------- /data/datasets/train/cmd_vision/4.txt: -------------------------------------------------------------------------------- 1 | Provide details about the image. -------------------------------------------------------------------------------- /data/datasets/test/cmd_follow/0.txt: -------------------------------------------------------------------------------- 1 | Robot follow the person in the camera. -------------------------------------------------------------------------------- /data/datasets/test/cmd_self-driving/0.txt: -------------------------------------------------------------------------------- 1 | Robot start self driving mode. -------------------------------------------------------------------------------- /data/datasets/test/cmd_self-driving/1.txt: -------------------------------------------------------------------------------- 1 | Robot begin autonomous driving. -------------------------------------------------------------------------------- /data/datasets/test/cmd_self-driving/3.txt: -------------------------------------------------------------------------------- 1 | Robot initiate self driving. -------------------------------------------------------------------------------- /data/datasets/test/cmd_self-driving/4.txt: -------------------------------------------------------------------------------- 1 | Robot engage autonomous mode. -------------------------------------------------------------------------------- /data/datasets/test/cmd_self-driving/5.txt: -------------------------------------------------------------------------------- 1 | Robot start automatic driving. -------------------------------------------------------------------------------- /data/datasets/test/cmd_self-driving/9.txt: -------------------------------------------------------------------------------- 1 | Robot engage self driving mode. -------------------------------------------------------------------------------- /data/datasets/test/cmd_vision/1.txt: -------------------------------------------------------------------------------- 1 | Robot what do you see in the picture? -------------------------------------------------------------------------------- /data/datasets/test/cmd_vision/4.txt: -------------------------------------------------------------------------------- 1 | Robot provide details about the image. -------------------------------------------------------------------------------- /data/datasets/train/cmd_follow/2.txt: -------------------------------------------------------------------------------- 1 | Begin following the person you see. -------------------------------------------------------------------------------- /data/datasets/train/cmd_self-driving/7.txt: -------------------------------------------------------------------------------- 1 | Activate autonomous driving. -------------------------------------------------------------------------------- /data/datasets/train/cmd_vision/3.txt: -------------------------------------------------------------------------------- 1 | Explain the content of the photo. -------------------------------------------------------------------------------- /data/datasets/train/cmd_vision/9.txt: -------------------------------------------------------------------------------- 1 | Provide a description of the image. -------------------------------------------------------------------------------- /data/datasets/test/cmd_follow/2.txt: -------------------------------------------------------------------------------- 1 | Robot begin following the person you see. -------------------------------------------------------------------------------- /data/datasets/test/cmd_follow/5.txt: -------------------------------------------------------------------------------- 1 | Robot follow the individual in the camera. -------------------------------------------------------------------------------- /data/datasets/test/cmd_follow/6.txt: -------------------------------------------------------------------------------- 1 | Robot start following the person in view. -------------------------------------------------------------------------------- /data/datasets/test/cmd_follow/7.txt: -------------------------------------------------------------------------------- 1 | Robot begin tracking the person you see. -------------------------------------------------------------------------------- /data/datasets/test/cmd_forward/2.txt: -------------------------------------------------------------------------------- 1 | Robot begin following the person you see. -------------------------------------------------------------------------------- /data/datasets/test/cmd_self-driving/7.txt: -------------------------------------------------------------------------------- 1 | Robot activate autonomous driving. -------------------------------------------------------------------------------- /data/datasets/test/cmd_vision/3.txt: -------------------------------------------------------------------------------- 1 | Robot explain the content of the photo. -------------------------------------------------------------------------------- /data/datasets/test/cmd_vision/9.txt: -------------------------------------------------------------------------------- 1 | Robot provide a description of the image. -------------------------------------------------------------------------------- /data/datasets/test/cmd_follow/1.txt: -------------------------------------------------------------------------------- 1 | Robot start tracking the individual in view. -------------------------------------------------------------------------------- /data/datasets/test/cmd_follow/9.txt: -------------------------------------------------------------------------------- 1 | Robot engage tracking for the person in sight. -------------------------------------------------------------------------------- /data/datasets/test/cmd_follow/3.txt: -------------------------------------------------------------------------------- 1 | Robot initiate tracking of the person in the frame. -------------------------------------------------------------------------------- /data/datasets/test/cmd_follow/4.txt: -------------------------------------------------------------------------------- 1 | Robot engage follow mode for the person in sight. -------------------------------------------------------------------------------- /data/datasets/test/cmd_follow/8.txt: -------------------------------------------------------------------------------- 1 | Robot initiate follow mode for the person in the frame. -------------------------------------------------------------------------------- /docs/JetBot_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jen-Hung-Ho/ros2_jetbot_voice/HEAD/docs/JetBot_1.jpg -------------------------------------------------------------------------------- /docs/Jetbot_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jen-Hung-Ho/ros2_jetbot_voice/HEAD/docs/Jetbot_2.jpg -------------------------------------------------------------------------------- /docs/JetBot_ASR_voice_tool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jen-Hung-Ho/ros2_jetbot_voice/HEAD/docs/JetBot_ASR_voice_tool.png -------------------------------------------------------------------------------- /jetbot_riva_voice/setup.cfg: -------------------------------------------------------------------------------- 1 | [develop] 2 | script_dir=$base/lib/jetbot_riva_voice 3 | [install] 4 | install_scripts=$base/lib/jetbot_riva_voice 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | packaging>=20.0 2 | # nvidia-riva-client 3 | # pyyaml>=6 4 | termcolor 5 | # pyaudio 6 | # wget 7 | # keras 8 | # tensorflow 9 | # git+https://github.com/Granulate/DockerHub-API.git 10 | 11 | -------------------------------------------------------------------------------- /start_ros2_shell.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if a container ID is provided 4 | if [ -z "$1" ]; then 5 | echo "Usage: $0 " 6 | else 7 | CONTAINER_ID=$1 8 | # Execute the command inside the specified container 9 | docker exec -it $CONTAINER_ID /bin/bash -c "source install/setup.bash && exec /bin/bash" 10 | fi 11 | 12 | -------------------------------------------------------------------------------- /jetbot_riva_voice/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | jetbot_riva_voice 5 | 0.0.0 6 | TODO: Package description 7 | jetbot 8 | Apache-2.0 9 | 10 | python3-pyaudio 11 | 12 | ament_copyright 13 | ament_flake8 14 | ament_pep257 15 | python3-pytest 16 | 17 | 18 | ament_python 19 | 20 | 21 | -------------------------------------------------------------------------------- /jetbot_riva_voice/test/test_pep257.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Open Source Robotics Foundation, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ament_pep257.main import main 16 | import pytest 17 | 18 | 19 | @pytest.mark.linter 20 | @pytest.mark.pep257 21 | def test_pep257(): 22 | rc = main(argv=['.', 'test']) 23 | assert rc == 0, 'Found code style errors / warnings' 24 | -------------------------------------------------------------------------------- /jetbot_riva_voice/test/test_flake8.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Open Source Robotics Foundation, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ament_flake8.main import main_with_errors 16 | import pytest 17 | 18 | 19 | @pytest.mark.flake8 20 | @pytest.mark.linter 21 | def test_flake8(): 22 | rc, errors = main_with_errors(argv=[]) 23 | assert rc == 0, \ 24 | 'Found %d code style errors / warnings:\n' % len(errors) + \ 25 | '\n'.join(errors) 26 | -------------------------------------------------------------------------------- /jetbot_riva_voice/test/test_copyright.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Open Source Robotics Foundation, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from ament_copyright.main import main 16 | import pytest 17 | 18 | 19 | # Remove the `skip` decorator once the source file(s) have a copyright header 20 | @pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') 21 | @pytest.mark.copyright 22 | @pytest.mark.linter 23 | def test_copyright(): 24 | rc = main(argv=['.', 'test']) 25 | assert rc == 0, 'Found errors' 26 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | # Use an argument for the base image 3 | # ARG BASE_IMAGE=dustynv/ros:humble-llm-r36.3.0 4 | ARG BASE_IMAGE=dustynv/nano_llm:humble-r36.3.0 5 | 6 | FROM ${BASE_IMAGE} 7 | 8 | # ARG ROS2_SETUP=/opt/ros/humble/install/setup.bash 9 | ARG ROS2_SETUP=/ros2_workspace/install/setup.bash 10 | 11 | # ENV key=value 12 | ENV ROS_DISTRO=humble 13 | ENV SHELL=/bin/bash 14 | 15 | # Set up the ROS2 workspace 16 | RUN mkdir -p /ros2_ws/src 17 | WORKDIR /ros2_ws/src 18 | 19 | # Copy the requirements.txt file 20 | COPY requirements.txt ./requirements.txt 21 | 22 | # Copy the entrypoint script into the image 23 | # COPY /ros_entrypoint.sh / 24 | 25 | # Copy your ROS2 packages into the workspace 26 | COPY ./jetbot_riva_voice . 27 | 28 | # Update the package list and install vi 29 | RUN apt-get update && apt-get install -y vim 30 | 31 | # Install any necessary dependencies 32 | RUN pip3 install --no-cache-dir --verbose -r requirements.txt 33 | 34 | # {BASE_IMAGE} will setup the ROS2 ENTRYPOINT 35 | ENTRYPOINT [ "/ros_entrypoint.sh" ] 36 | 37 | # Build the workspace 38 | WORKDIR /ros2_ws 39 | RUN source ${ROS2_SETUP} && \ 40 | colcon build 41 | 42 | CMD ["/bin/bash"] 43 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set the environment variables 4 | DISPLAY_VAR=$DISPLAY 5 | ROS_DOMAIN_ID=7 6 | 7 | # Set the volume mappings 8 | VOLUME_X11=/tmp/.X11-unix/:/tmp/.X11-unix:rw 9 | 10 | # Define Docker volumes and environment variables 11 | ROOT=$(dirname "$0") 12 | DOCKER_VOLUMES=" 13 | --volume=$VOLUME_X11 \ 14 | --volume=$ROOT/app:/app \ 15 | --volume=$ROOT/data:/data \ 16 | --volume=$ROOT/jetbot_riva_voice:/source \ 17 | " 18 | DOCKER_ENV_VARS=" 19 | --env DISPLAY=$DISPLAY_VAR \ 20 | --env QT_X11_NO_MITSHM=1 \ 21 | --env ROS_DOMAIN_ID=$ROS_DOMAIN_ID \ 22 | " 23 | 24 | # check for V4L2 devices 25 | V4L2_DEVICES="" 26 | 27 | for i in {0..9} 28 | do 29 | if [ -a "/dev/video$i" ]; then 30 | V4L2_DEVICES="$V4L2_DEVICES --device /dev/video$i " 31 | fi 32 | done 33 | 34 | # check for I2C devices 35 | I2C_DEVICES="" 36 | 37 | for i in {0..9} 38 | do 39 | if [ -a "/dev/i2c-$i" ]; then 40 | I2C_DEVICES="$I2C_DEVICES --device /dev/i2c-$i " 41 | fi 42 | done 43 | 44 | DOCKER_DEVICES=" 45 | --device /dev/snd \ 46 | --device /dev/bus/usb \ 47 | --device=/dev/input \ 48 | " 49 | DOCKER_ARGS="${DOCKER_VOLUMES} ${DOCKER_ENV_VARS} ${V4L2_DEVICES} ${I2C_DEVICES} ${DOCKER_DEVICES}" 50 | 51 | # Set the docker image 52 | DOCKER_IMAGE=${DOCKER_IMAGE:-jetbot_riva_voice:latest} 53 | 54 | # Run the docker command 55 | docker run -it --rm --net host --ipc host \ 56 | ${DOCKER_ARGS} \ 57 | $DOCKER_IMAGE /bin/bash -c "source install/setup.bash && /bin/bash" 58 | -------------------------------------------------------------------------------- /jetbot_riva_voice/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | import os 3 | from glob import glob 4 | 5 | package_name = 'jetbot_riva_voice' 6 | 7 | setup( 8 | name=package_name, 9 | version='0.0.0', 10 | packages=find_packages(exclude=['test']), 11 | data_files=[ 12 | ('share/ament_index/resource_index/packages', 13 | ['resource/' + package_name]), 14 | ('share/' + package_name, ['package.xml']), 15 | # Include all laumch files 16 | (os.path.join('share', package_name, 'launch'), glob('launch/*.launch.py')), 17 | # Include all param files 18 | (os.path.join('share', package_name, 'param'), glob('param/*params.yaml')), 19 | # Include all includ file 20 | (os.path.join('share', package_name, 'include'), glob('include/*.py')), 21 | ], 22 | install_requires=['setuptools'], 23 | zip_safe=True, 24 | maintainer='jetbot', 25 | maintainer_email='jenhungho@outlook.com', 26 | description='TODO: Package description', 27 | license='Apache-2.0', 28 | tests_require=['pytest'], 29 | entry_points={ 30 | 'console_scripts': [ 31 | 'jetbot_ASR = jetbot_riva_voice.script.Jetbot_ASR_Processor:main', 32 | 'jetbot_TTS = jetbot_riva_voice.script.Jetbot_TTS_Processor:main', 33 | 'jetbot_voice_agent = jetbot_riva_voice.script.Jetbot_ASR_Agent:main', 34 | 'audio_list = jetbot_riva_voice.script:main' 35 | ], 36 | }, 37 | ) 38 | -------------------------------------------------------------------------------- /jetbot_riva_voice/param/jetbot_voice_params.yaml: -------------------------------------------------------------------------------- 1 | Riva_ASR_processor: 2 | ros__parameters: 3 | ASR_topic: "/jetbot_voice/transcripts" 4 | url: "localhost:50051" 5 | streaming_chunk: 16000 6 | index: 26 7 | start: true 8 | Riva_TTS_processor: 9 | ros__parameters: 10 | TTS_topic: "/jetbot_TTS/transcripts" 11 | chat_topic: "/chatbot/response" 12 | ASR_node: "/Riva_ASR_processor" 13 | streaming_chunk: 16000 14 | url: "localhost:50051" 15 | index: 0 16 | start: true 17 | Jetbot_ASR_agent: 18 | ros__parameters: 19 | ASR_topic: "/jetbot_voice/transcripts" 20 | TTS_topic: "/chatbot/response" 21 | LLM_topic: "/jetbot_llm_input" 22 | VISION_topic: "/llm_vision_input" 23 | ASR_node: "/Riva_ASR_processor" 24 | start: true 25 | command_enable: false 26 | command_nodes: ["/Jetbot_tool_voice_copilot"] 27 | # jetbot_commands: syntax: ['ASR keyword', 'command nodes index:command keyword'] 28 | # jetbot_commands: example: ['away','0:self-driving'] 29 | jetbot_commands: > 30 | [ 31 | ['cmd_start','0:start'], ['cmd_stop','0:stop'], 32 | ['cmd_left','0:left'], ['cmd_right','0:right'], ['cmd_forward', '0:forward'], ['cmd_backward', '0:backward'], 33 | ['cmd_self-driving','0:self-driving'], ['cmd_follow','0:follow'] 34 | ] 35 | jetbot_chat: > 36 | [ 37 | ['hello', 'This is jetbot. How can I help you'], 38 | ['bye', 'Take care, talk to you later'] 39 | ] 40 | jetbot_vision: > 41 | [ 42 | ['cmd_vision','Describe the image content in detail'] 43 | ] 44 | -------------------------------------------------------------------------------- /docs/setup.md: -------------------------------------------------------------------------------- 1 | ## Jetbot Voice-Activated Copilot Tools Setup 2 | 3 | 1. **Configure Docker Engine**: 4 | Follow these [setup steps](https://github.com/dusty-nv/jetson-containers/blob/master/docs/setup.md) to configure your Docker engine. 5 | 6 | 2. **Set Up ROS Development Environment**: 7 | Set up your ROS development environment by following the instructions [here](https://docs.ros.org/en/humble/Installation.html). 8 | 9 | 3. **Clone the Repository**: 10 | Open your terminal and run the following command to clone the repository: 11 | ```bash 12 | git clone https://github.com/Jen-Hung-Ho/ros2_jetbot_voice 13 | ``` 14 | 15 | 4. **Navigate to the Repository Directory**: 16 | Change to the directory of the cloned repository: 17 | ```bash 18 | cd ros2_jetbot_voice 19 | ``` 20 | 21 | 5. **Build the Docker Image**: 22 | Ensure the `build.sh` script has execute permissions. If not, add execute permissions using: 23 | ```bash 24 | chmod +x build.sh 25 | ``` 26 | 27 | Then, run the `build.sh` script to build the Docker image: 28 | ```bash 29 | ./build.sh 30 | ``` 31 | 32 | 7. **Start Docker and Build the CNN Model**: 33 | Execute the following commands to run and build the CNN model `(this only needs to be done once)` The model data will be saved under `/data/models/ASR_classify_model` for the voice-activated tool to load and extract in the ROS2 node launch: 34 | ```bash 35 | . run.sh 36 | cd .. 37 | cd app 38 | python3 robot_command_text_classification.py # build CNN model 39 | python3 robot_command_model_evaluation.py # run unit test 40 | ``` 41 | 42 | 8. **Start Docker:** 43 | Execute the following commands to run the docker 44 | ```bash 45 | . run.sh 46 | ``` 47 | 48 | 9. **Attach to an Existing Running Docker Container**: 49 | To attach to an existing running Docker container, use the following commands: 50 | ```bash 51 | docker ps 52 | ``` 53 | 54 | Identify the `CONTAINER ID` of the running container (e.g., `422fc05b7655`), then run: 55 | ```bash 56 | . start_ros2_shell.sh 57 | ``` 58 | 59 | For example: 60 | ```bash 61 | . start_ros2_shell.sh 422fc05b7655 62 | -------------------------------------------------------------------------------- /jetbot_riva_voice/jetbot_riva_voice/script/audio_list.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024, Jen-Hung Ho 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files (the "Software"), 7 | # to deal in the Software without restriction, including without limitation 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | # and/or sell copies of the Software, and to permit persons to whom the 10 | # Software is furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | # DEALINGS IN THE SOFTWARE. 22 | # 23 | 24 | import pyaudio 25 | 26 | from ctypes import * 27 | from contextlib import contextmanager 28 | 29 | # Define our error handler type 30 | ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p) 31 | 32 | def py_error_handler(filename, line, function, err, fmt): 33 | pass 34 | 35 | c_error_handler = ERROR_HANDLER_FUNC(py_error_handler) 36 | 37 | # Load the ALSA library 38 | asound = cdll.LoadLibrary('libasound.so') 39 | 40 | # Set our error handler 41 | asound.snd_lib_error_set_handler(c_error_handler) 42 | 43 | # Suppress the ALSA lib error message 44 | @contextmanager 45 | def noalsaerr(): 46 | asound = cdll.LoadLibrary('libasound.so') 47 | asound.snd_lib_error_set_handler(c_error_handler) 48 | yield 49 | asound.snd_lib_error_set_handler(None) 50 | 51 | def main(args=None): 52 | # Use PyAudio to play the WAV file 53 | with noalsaerr(): 54 | p = pyaudio.PyAudio() 55 | 56 | 57 | print("----------------------------------------------------") 58 | print("Audio Input Devices:") 59 | print("----------------------------------------------------") 60 | for i in range(p.get_device_count()): 61 | device_info = p.get_device_info_by_index(i) 62 | if device_info["maxInputChannels"] > 0: 63 | print('Input Device {:2d} - \'{}\' (inputs={}) (sample_rate={})'.format(i, device_info['name'], device_info['maxInputChannels'], round(device_info['defaultSampleRate']))) 64 | 65 | 66 | print("\n----------------------------------------------------") 67 | print("Audio Output Devices:") 68 | print("----------------------------------------------------") 69 | for i in range(p.get_device_count()): 70 | device_info = p.get_device_info_by_index(i) 71 | if device_info["maxOutputChannels"] > 0: 72 | print('Output Device {:2d} - \'{}\' (outpus={}) (sample_rate={})'.format(i, device_info['name'], device_info['maxOutputChannels'], round(device_info['defaultSampleRate']))) 73 | 74 | p.terminate() 75 | 76 | if __name__ == '__main__': 77 | main() -------------------------------------------------------------------------------- /jetbot_riva_voice/jetbot_riva_voice/include/text_classifier_utility.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024, Jen-Hung Ho 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files (the "Software"), 7 | # to deal in the Software without restriction, including without limitation 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | # and/or sell copies of the Software, and to permit persons to whom the 10 | # Software is furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | # DEALINGS IN THE SOFTWARE. 22 | # 23 | # 24 | # Reference: 25 | # This code is inspired by the example from Keras: 26 | # https://keras.io/examples/nlp/text_classification_from_scratch/ 27 | # 28 | 29 | import tensorflow as tf 30 | from tensorflow.keras.layers import TextVectorization 31 | import json 32 | import string 33 | import re # Import the re module 34 | 35 | 36 | class TextClassifier: 37 | 38 | # 39 | # Initialize the class with load model and labels 40 | # 41 | def __init__(self, model_path, labels_path): 42 | self.custom_objects = { 43 | 'TextVectorization': TextVectorization, 44 | 'custom_standardization': self.custom_standardization 45 | } 46 | model_path = model_path + '.keras' 47 | self.model = tf.keras.models.load_model(model_path) 48 | # disable custom_standardization -- failed to desearized the model data 49 | # self.model = tf.keras.models.load_model(model_path, custom_objects=self.custom_objects) 50 | 51 | with open(labels_path, 'r') as f: 52 | self.class_labels = json.load(f) 53 | 54 | # 55 | # Having looked at our data above, we see that the raw text contains HTML break 56 | # tags of the form '
'. These tags will not be removed by the default 57 | # standardizer (which doesn't strip HTML). Because of this, we will need to 58 | # create a custom standardization function. 59 | # 60 | # Register the custom standardization function 61 | @tf.keras.utils.register_keras_serializable(package='Custom', name='custom_standardization') 62 | def custom_standardization(self, input_data): 63 | lowercase = tf.strings.lower(input_data) 64 | stripped_html = tf.strings.regex_replace(lowercase, '
', ' ') 65 | return tf.strings.regex_replace(stripped_html, '[%s]' % re.escape(string.punctuation), '') 66 | 67 | # 68 | # This function handles the prediction process for the text classifier model 69 | # 1D convolutional neural network (CNN) model 70 | # 71 | def predict(self, min_score, input_text): 72 | prediction = self.model.predict(tf.constant([input_text])) 73 | predicted_class_index = tf.argmax(prediction, axis=1).numpy()[0] 74 | most_fit_score = prediction[0][predicted_class_index] 75 | predicted_class_label = self.class_labels[predicted_class_index] 76 | 77 | if most_fit_score < min_score: 78 | return "other", False, most_fit_score 79 | else: 80 | return predicted_class_label, True, most_fit_score -------------------------------------------------------------------------------- /app/robot_command_model_evaluation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024, Jen-Hung Ho 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files (the "Software"), 7 | # to deal in the Software without restriction, including without limitation 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | # and/or sell copies of the Software, and to permit persons to whom the 10 | # Software is furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | # DEALINGS IN THE SOFTWARE. 22 | # 23 | # 24 | # Reference: 25 | # This code is inspired by the example from Keras: 26 | # https://keras.io/examples/nlp/text_classification_from_scratch/ 27 | # 28 | 29 | import tensorflow as tf 30 | from tensorflow.keras.layers import TextVectorization 31 | from termcolor import colored 32 | import json 33 | import string 34 | import re # Import the re module 35 | import unittest 36 | 37 | from jetbot_riva_voice.include.text_classifier_utility import TextClassifier 38 | 39 | # unit test 40 | class TestTextClassifier(unittest.TestCase): 41 | 42 | @classmethod 43 | def setUpClass(cls): 44 | model_path = '../data/models/ASR_classify_model' 45 | labels_path = '../data/models/class_labels.json' 46 | cls.classifier = TextClassifier(model_path, labels_path) 47 | cls.examples = [ 48 | "turn to right.", 49 | "turn to left.", 50 | "What do you see in camera?", 51 | "follow me.", 52 | "go forward.", 53 | "move backward.", 54 | "stop all actions", 55 | "start the action", 56 | "Start self driving mode", 57 | "Describe what do you see in image?", 58 | "How are you today?", 59 | "Hello What is your name?", 60 | "How many states in the United States?" 61 | ] 62 | cls.expected_results = [ 63 | "cmd_right", 64 | "cmd_left", 65 | "cmd_vision", 66 | "cmd_follow", 67 | "cmd_forward", 68 | "cmd_backward", 69 | "cmd_stop", 70 | "cmd_start", 71 | "cmd_self-driving", 72 | "cmd_vision", 73 | "other", 74 | "other", 75 | "other" 76 | ] 77 | 78 | def test_predictions(self): 79 | pass_count = 0 80 | for i, example in enumerate(self.examples): 81 | label, result, score = self.classifier.predict(0.7, example) 82 | expected_label = self.expected_results[i] 83 | if label == expected_label: 84 | pass_count += 1 85 | # print(f"PASS: Input: '{example}' => Predicted Label: '{label}', Expected Label: '{expected_label}', Result: '{score}'") 86 | print(colored(f"PASS: Input: '{example}' => Predicted Label: '{label}', Expected Label: '{expected_label}', Result: '{score}'", 'green')) 87 | if score < 0.7 and label == 'other': 88 | # print(f"PASS: chat topic: '{example}' => Predicted Label: '{label}', Expected Label: '{expected_label}', Result: '{score}'") 89 | print(colored(f"PASS: chat topic: '{example}' => Predicted Label: '{label}', Expected Label: '{expected_label}', Result: '{score}'", 'magenta')) 90 | 91 | else: 92 | # print(f"FAIL: Input: '{example}' => Predicted Label: '{label}', Expected Label: '{expected_label}', Result: '{score}'") 93 | print(colored(f"FAIL: Input: '{example}' => Predicted Label: '{label}', Expected Label: '{expected_label}', Result: '{score}'", 'red')) 94 | 95 | print("===================================================") 96 | total_tests = len(self.examples) 97 | print(f"Test Summary: Passed {pass_count}/{total_tests} tests") 98 | print("===================================================") 99 | 100 | 101 | def simple_test(): 102 | 103 | # Define data mode file loacation 104 | model_path = '../data/models/ASR_classify_model' 105 | labels_path = '../data/models/class_labels.json' 106 | classifier = TextClassifier(model_path, labels_path) 107 | 108 | 109 | # Sample test cases 110 | examples = [ 111 | "turn to right.", 112 | "turn to left.", 113 | "What do you see in camera?", 114 | "follow me.", 115 | "go forward.", 116 | "move backward.", 117 | "stop all actions", 118 | "start the action", 119 | "Start self driving mode", 120 | "Describe what do you see in image?", 121 | "How many states in the United States?" 122 | ] 123 | 124 | print("============================================================") 125 | print(classifier.class_labels) 126 | print("============================================================") 127 | 128 | # Run predictions for each example 129 | for example in examples: 130 | 131 | label, result, score = classifier.predict(0.7, example) 132 | if result: 133 | print(f"Input: '{example}' => Predicted Label: '{label}', Result: '{score}'") 134 | else: 135 | print("Chat topic: -----------------------") 136 | print(f"Input: '{example}' => Predicted Label: '{label}', Result: '{score}'") 137 | 138 | 139 | if __name__ == "__main__": 140 | unittest.main() -------------------------------------------------------------------------------- /jetbot_riva_voice/jetbot_riva_voice/include/node_parameter_utility.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2023, Jen-Hung Ho 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files (the "Software"), 7 | # to deal in the Software without restriction, including without limitation 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | # and/or sell copies of the Software, and to permit persons to whom the 10 | # Software is furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | # DEALINGS IN THE SOFTWARE. 22 | # 23 | 24 | import rclpy # Python library for ROS 2 25 | import threading 26 | from rcl_interfaces.msg import ParameterType, Parameter, ParameterValue 27 | from ros2param.api import call_get_parameters 28 | from ros2param.api import call_set_parameters 29 | 30 | from std_msgs.msg import String 31 | 32 | class NodeParamTools(): 33 | 34 | def __init__(self, node, executor): 35 | 36 | self.node = node 37 | self.logger = self.node.get_logger() 38 | self.executor = executor 39 | self.lock = threading.Lock() 40 | node_name = self.node.get_name() 41 | 42 | self.init_ros_nodes(node_name) 43 | self.logger.info('NodeParamTools({}) initialize'.format(node_name)) 44 | 45 | # 46 | # Initialize nodes for get/set parameter service call 47 | # 48 | def init_ros_nodes(self, node_name): 49 | # To get/set parameter on another node in ROS2 using Python, use the SetParameters service. 50 | # client = node.create_client(GetParameters, f'{node_name}/get_parameters') 51 | # client = node.create_client(SetParameters, f'{node_name}/set_parameters') 52 | 53 | self.get_param_node = rclpy.create_node(node_name + '_get_param_node') 54 | self.set_param_node = rclpy.create_node(node_name + '_set_param_node') 55 | self.executor.add_node(self.get_param_node) 56 | self.executor.add_node(self.set_param_node) 57 | 58 | # self.node_param_util = NodeParamTools(self.get_logger()) 59 | 60 | # 61 | # Remove nodes for get/set parameter service call 62 | # 63 | def cleanup(self): 64 | 65 | # clean up set_param_node, get_param_node 66 | self.executor.remove_node(self.set_param_node) 67 | self.set_param_node.destroy_node() 68 | self.executor.remove_node(self.get_param_node) 69 | self.get_param_node.destroy_node() 70 | pass 71 | 72 | # 73 | # Try catch version of get_node_parameters 74 | # 75 | def try_get_node_parameters(self, node_name, param): 76 | try: 77 | value = self.get_node_parameters(node_name, param) 78 | return (True, value) 79 | except RuntimeError as e: 80 | # try to catch node not exist with service timer out error 81 | self.logger.info("get node parameter error: {}".format(str(e))) 82 | return (False, None) 83 | 84 | # 85 | # Try catch version of set_node_parameters 86 | # 87 | def try_set_node_parameters(self, node_name, param_name, type, value): 88 | try: 89 | self.set_node_parameters(node_name, param_name, type, value) 90 | return True 91 | except RuntimeError as e: 92 | # try to catch node not exist with service timer out error 93 | self.logger.info("set node parameter error: {}".format(str(e))) 94 | return False 95 | 96 | # 97 | # To get a parameter on another node in ROS2 using Python, use the GetParameters service. 98 | # client = node.create_client(GetParameters, f'{node_name}/get_parameters') 99 | # 100 | def get_node_parameters(self, node_name, param): 101 | self.logger.info('get node parameters : {} - {}'.format(node_name, param)) 102 | 103 | # Block the next get node parameter invoke until the current finishes 104 | # ros2 param get /Jetbot_Param_Client command 105 | with self.lock: 106 | parameters = [param] 107 | response = call_get_parameters(node=self.get_param_node, 108 | #node_name='/detectnet/detectnet', 109 | node_name=node_name, 110 | parameter_names=parameters) 111 | 112 | # print(response.values) 113 | if len(response.values) >= 1: 114 | # txtract type specific value 115 | pvalue = response.values[0] 116 | if pvalue.type == ParameterType.PARAMETER_BOOL: 117 | print(pvalue.bool_value) 118 | self.logger.info('get node bool value: {}'.format(pvalue.bool_value)) 119 | elif pvalue.type == ParameterType.PARAMETER_STRING: 120 | print(pvalue.string_value) 121 | self.logger.info('get node string value: {}'.format(pvalue.string_value)) 122 | elif pvalue.type == ParameterType.PARAMETER_STRING_ARRAY: 123 | self.logger.info('get node string array value: {}'.format(pvalue.string_array_value)) 124 | 125 | return pvalue 126 | 127 | # 128 | # To set a parameter on another node in ROS2 using Python, use the SetParameters service. 129 | # client = node.create_client(SetParameters, f'{node_name}/set_parameters') 130 | # 131 | def set_node_parameters(self, node_name, param_name, type, value): 132 | self.logger.info('set node parameters : {} - {} - {}'.format(node_name, param_name, value)) 133 | 134 | # Block the next set node parameter invoke until the current finishes 135 | with self.lock: 136 | param = Parameter() 137 | param.name = param_name 138 | if type == ParameterType.PARAMETER_STRING: 139 | param.value = ParameterValue(string_value=value, type=ParameterType.PARAMETER_STRING) 140 | elif type == ParameterType.PARAMETER_BOOL: 141 | param.value = ParameterValue(bool_value=value, type=ParameterType.PARAMETER_BOOL) 142 | elif type == ParameterType.PARAMETER_STRING_ARRAY: 143 | param.value = ParameterValue(string_array_value=value, type=ParameterType.PARAMETER_STRING_ARRAY) 144 | 145 | parameters = [param] 146 | response = call_set_parameters(node=self.set_param_node, 147 | node_name=node_name, 148 | parameters=parameters) 149 | 150 | if response is not None: 151 | # SetParametersResult 152 | for result in response.results: 153 | self.logger.debug('set node: {} parameter: {} value:{}'.format(node_name, param_name, value)) 154 | self.logger.info('Parameter set successful: {}'.format(result.successful)) 155 | self.logger.debug('Reason: {}'.format(result.reason)) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Jetbot Voice-Activated Copilot Tools with Nvidia RIVA and NanoLLM Container for ROS2 Robot - version 2.0 2 | 3 | Jetbot Voice-Activated Copilot is a set of ROS2 nodes that utilize the NVIDIA RIVA Automatic Speech Recognition (ASR) deep learning interface library and the Jetson NanoLLM Docker container for NVIDIA Jetson Orin jetbot. These tools leverage NVIDIA RIVA ASR for input and use a 1D convolutional neural network (CNN) model as a text classifier to handle the prediction process for robot task commands. This enables functionalities such as chat via LLM, vision via VLM, Lidar-assisted self-driving with object avoidance, and real-time object detection for following a person. 4 | 5 | 6 | ### Features 7 | --- 8 | - **Jetbot ASR Processor**: Enables your robot to decode human voice messages using the [Nvidia RIVA ASR service](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/asr/asr-overview.html) client ROS2 node. 9 | 10 | - **Jetbot TTS Processor**: Converts chat-vision NLM VLM response text into speech using [Nvidia RIVA TTS services](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-overview.html), which is then played via the robot's speaker. This feature enhances the interaction between the robot and humans, making it more engaging and user-friendly. 11 | 12 | - **Jetbot ASR Agent**: Allows you to build a simple 1D convolutional neural network (CNN) model for [text classification](https://keras.io/examples/nlp/text_classification_from_scratch/) to predict human voice intentions and pipe corresponding NLM chat, VLM vision, and actions that the robot should take. 13 | 14 | - **Jetbot Voice Tools Copilot**: Executes the actions corresponding to the voice commands posted via ROS2 topic from the Jetbot ASR Agent. It also handles tasks related to Lidar-assisted self-driving, object avoidance, and real-time object detection for person following. 15 | 16 | #### Here is a brief overview of the jetbot tools design diagram/architecture 17 | 18 | 19 | ### Setup 20 | - [Jetbot Voice-Activated Copilot Tools Setup Guide](docs/setup.md#setup) 21 |

22 | 23 | ### Jetbot voice-activated copilot tools source code and video demos 24 | --- 25 | - **Jetbot ASR Processor:** 26 | - Code logic explanation: 27 | - Employs the Nvidia RIVA Speech Recognition (ASR) service client side ROS2 node to decode human speech into text. The decoded text is then published as ROS2 Text messages. 28 | - **Start Nvidia RIVA server:** [Riva Server Quick Start Guide](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/quick-start-guide.html) 29 | - `cd riva_quickstart_v2.16.0` 30 | - `bash riva_init.sh` 31 | - `bash riva_start.sh` 32 | - **Running the code:** Please note that this code needs to be run within a Docker container 33 | - `cd ros2_jetbot_voice` 34 | - `. run.sh ` 35 | - `python3 /opt/riva/python-clients/scripts/list_audio_devices.py` 36 | - `ros2 run jetbot_riva_voice jetbot_ASR --ros-args --params-file /ros2_ws/src/param/jetbot_voice_params.yaml` 37 | - **Receiving Nvidia RIVA ASR service Messages:** The Jetbot ASR Processor processes the user message and identifies it as either a Jetbot tool action or a chat-vision message. 38 | - **Handling and publish Chat-Only message to LLM:** For chat-only topics, publish ROS2 topic message to the [LLM](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) ROS2 node hosted in Jetbot tools and mute the mic to prevent TTX audio playback causing an echo effect. 39 | - **Handling and publish Vision message to VLM:** For vision topics, publish ROS2 topic message via the [VLM](https://huggingface.co/Efficient-Large-Model/VILA1.5-3b) ROS2 node host in Jetbot tools and mute the mic to prevent TTX audio playbak causing an echo effect. 40 | - **Handling Jetbot Tool Actions:** For accepted Jetbot tool actions, publish a ROS2 Jetbot tool command text message to the Jetbot voice tools copilot ROS2 client, triggering robot actions. 41 | - Source code: 42 | - [param file: jetbot_voice_params.yaml](jetbot_riva_voice/param/jetbot_voice_params.yaml)
43 | - [ROS2 node: jetbot_ASR_Processor.py](jetbot_riva_voice/jetbot_riva_voice/script/Jetbot_ASR_Processor.py)
44 | - Usage: 45 | - `python3 /opt/riva/python-clients/scripts/list_audio_devices.py` 46 | - Get audio/microphone device ID 47 | - `ros2 run jetbot_riva_voice jetbot_ASR --ros-args --params-file /ros2_ws/src/param/jetbot_voice_params.yaml` 48 | - **Jetbot TTS Processor:** 49 | - Code logic explanation: 50 | - The Jetbot TTS Client is designed to convert text into audio, thereby enabling robots to communicate vocally 51 | - It utilizes the NVIDA RIVA TTX service, which is then played through the robot’s speaker 52 | - The client can convert chat-based greeting text into speech, enhancing the interaction between the robot and humans. This feature makes the robot more engaging and user-friendly 53 | - Source code: 54 | - [param file: jetbot_voice_params.yaml](jetbot_riva_voice/param/jetbot_voice_params.yaml)
55 | - [ROS2 node: jetbot_TTS_Processor.py](jetbot_riva_voice/jetbot_riva_voice/script/Jetbot_TTS_Processor.py)
56 | - Usage: 57 | - `python3 /opt/riva/python-clients/scripts/list_audio_devices.py` 58 | - Get speaker device ID 59 | - `ros2 run jetbot_riva_voice jetbot_TTS --ros-args --params-file /ros2_ws/src/param/jetbot_voice_params.yaml -p index:=11` 60 | - **Jetbot ASR Agent:** 61 | - Code logic explanation: 62 | - **Voice to Action Mapping:** Upon initialization, the Jetbot ASR Agent lazily loads a simple pre-trained 1D convolutional neural network (CNN) model. It then uses the model's labels file to construct a Voice to Action map. This map establishes a comprehensive relationship between specific voice messages or commands and the corresponding actions the robot should execute. 63 | - **CNN mode voice message predition:** The CNN model's message prediction label and score are used to find the Voice to Action map to determine whether the message is for chat, vision, or robot commands. The voice and vision messages are published via ROS2 topics to the LLM and VLM ROS2 nodes hosted in Jetbot tools. 64 | - **Action Execution:** The Jetbot Voice Tools Copilot executes the actions corresponding to the voice commands published via the Jetbot ASR Client ROS2 node. 65 | - **Supported Action Management:** It effectively handles the starting and stopping of Jetbot tool-supported actions, encompassing functionalities such as: 66 | - **Self driving:** Lidar-assisted ovject avoidance self-driving 67 | - **Person following:** Real-time object detection for person following 68 | - **Navigation:** Move forward/backward, turn left/right 69 | - Source code: 70 | - [param file: jetbot_voice_params.yaml](jetbot_riva_voice/param/jetbot_voice_params.yaml)
71 | - [ROS2 node: Jetbot_ASR_Agent.py](jetbot_riva_voice/jetbot_riva_voice/script/Jetbot_ASR_Agent.py)
72 | - Usage: 73 | - `ros2 run jetbot_riva_voice jetbot_voice_agent --ros-args --params-file /ros2_ws/src/param/jetbot_voice_params.yaml 74 | `
75 | [](https://youtu.be/SqDqO-KfWUs) 77 | ### Requirements: 78 | - Jetson Orin Nano or Jetson Orin NX: 79 | - https://developer.nvidia.com/embedded/learn/get-started-jetson-agx-orin-devkit#what-youll-need 80 | - ROS2 humble: https://docs.ros.org/en/humble/index.html 81 | - NanoLLM Docker container: https://github.com/dusty-nv/NanoLLM 82 | - NanoLLM Docker container for ROS2: https://github.com/NVIDIA-AI-IOT/ros2_nanollm 83 | - ROS2 Jetbot Tools Docker container V 2.0 : - https://github.com/Jen-Hung-Ho/ros2_jetbot_tools 84 | - Robot: 85 | - Jetson Orin Jetbot: http://www.yahboom.net/study/ROSMASTER-X3 86 | - https://drive.google.com/drive/folders/1QuXJcrRMs8oyTrrROKMnUNvTHImcIC78 87 | 88 | ### References 89 | - https://www.releases.ubuntu.com/22.04/ 90 | - https://developer.nvidia.com/embedded/learn/get-started-jetson-agx-orin-devkit#what-youll-need 91 | - https://docs.ros.org/en/humble/index.html 92 | - https://navigation.ros.org/ 93 | - https://github.com/Jen-Hung-Ho/ros2_jetbot_tools 94 | - https://github.com/dusty-nv/jetson-containers 95 | - https://dusty-nv.github.io/NanoLLM/ 96 | - https://github.com/dusty-nv/NanoLLM 97 | - https://github.com/NVIDIA-AI-IOT/ros2_nanollm 98 | - https://www.jetson-ai-lab.com/tutorial_llamaspeak.html 99 | - https://org.ngc.nvidia.com/setup/installers/cli 100 | - https://docs.nvidia.com/deeplearning/riva/user-guide/docs/quick-start-guide.html 101 | - https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tutorials/tts-basics-customize-ssml.html?highlight=speechsynthesisservice 102 | - https://keras.io/examples/nlp/text_classification_from_scratch/ 103 | 104 | -------------------------------------------------------------------------------- /jetbot_riva_voice/jetbot_riva_voice/script/Jetbot_TTS_Processor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Open Source Robotics Foundation, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import rclpy 16 | import threading 17 | import time 18 | import asyncio 19 | 20 | # RIVA client 21 | import pyaudio 22 | import riva.client 23 | import riva.client.audio_io 24 | 25 | from rclpy.node import Node 26 | from rclpy.executors import ExternalShutdownException 27 | from rclpy.parameter import Parameter 28 | from rcl_interfaces.msg import ParameterType, SetParametersResult 29 | from std_msgs.msg import String 30 | from threading import Lock 31 | 32 | from jetbot_riva_voice.include.node_parameter_utility import NodeParamTools 33 | 34 | class riva_tts_processor(Node): 35 | 36 | def parameter_callback(self, params): 37 | for param in params: 38 | if param.name == 'start' and param.type_ == Parameter.Type.BOOL: 39 | self.start = param.value 40 | self.get_logger().info('start= {}'.format(bool(param.value))) 41 | elif param.name == 'command' and param.type_ == Parameter.Type.STRING: 42 | self.cmd = param.value 43 | self.get_logger().info('command= {}'.format(str(param.value))) 44 | 45 | return SetParametersResult(successful=True) 46 | 47 | def __init__(self, name): 48 | super().__init__(name) 49 | 50 | self.TTS_topic = self.declare_parameter('TTS_topic', "/TTS/transcripts").get_parameter_value().string_value 51 | self.chat_topic = self.declare_parameter('chat_topic', "/chatbot/response").get_parameter_value().string_value 52 | self.ASR_node = self.declare_parameter('ASR_node', '/Riva_ASR_processor').get_parameter_value().string_value 53 | self.RIVA_URL = self.declare_parameter('url', "localhost:50051").get_parameter_value().string_value 54 | self.index = self.declare_parameter('index', 26).get_parameter_value().integer_value 55 | self.streaming_chunk = self.declare_parameter('streaming_chunk', 16000).get_parameter_value().integer_value 56 | 57 | self.get_logger().info("=========================================") 58 | self.get_logger().info("Jetbot ASR processor :{} start".format(name)) 59 | self.get_logger().info("TTS_topic : {}".format(self.TTS_topic)) 60 | self.get_logger().info("response_reopic : {}".format(self.chat_topic)) 61 | self.get_logger().info('ASR_node : {}'.format(self.ASR_node)) 62 | self.get_logger().info("RIVA url : {}".format(self.RIVA_URL)) 63 | self.get_logger().info("index : {}".format(self.index)) 64 | self.get_logger().info("streaming chunk : {}".format(self.streaming_chunk)) 65 | self.get_logger().info("=========================================") 66 | 67 | self.lock = Lock() 68 | self.subscription = self.create_subscription(String, self.TTS_topic, self.TTS_callback, 10) 69 | self.chat_subscription = self.create_subscription(String, self.chat_topic, self.chat_callback, 10) 70 | 71 | self.msg = String() 72 | self.i = 0 73 | 74 | # Add parameters callback 75 | self.add_on_set_parameters_callback(self.parameter_callback) 76 | 77 | # self.init_ros_nodes() 78 | self.node_param_util = NodeParamTools(self, executor) 79 | 80 | self.riva_init() 81 | 82 | # self.thread = threading.Thread(target=self.ASR_processor) 83 | # self.thread.start() 84 | 85 | # 86 | # Remove nodes for get/set parameter service call 87 | # 88 | def cleanup(self): 89 | # clean up set_param_node, get_param_node 90 | self.node_param_util.cleanup() 91 | pass 92 | 93 | def riva_init(self): 94 | self.p = pyaudio.PyAudio() 95 | default_device_info = riva.client.audio_io.get_default_input_device_info() 96 | self.get_logger().debug("Rivai default info:{}".format(default_device_info)) 97 | # default_index = None if default_device_info is None else default_device_info['index'] 98 | if default_device_info is not None and int(default_device_info['maxOutputChannels']) > 0: 99 | self.audio_index = default_device_info['index'] 100 | self.get_logger().info("use default - ignore user input") 101 | else: 102 | self.audio_index = self.index 103 | default_device = self.p.get_device_info_by_index(self.audio_index) 104 | self.sample_rate = int(default_device['defaultSampleRate']) 105 | 106 | self.get_logger().info("==============================================") 107 | self.get_logger().info("Audio default index : {}".format(self.audio_index)) 108 | self.get_logger().info("Max input ouput channels: [{} - {}]".format(default_device['maxInputChannels'], default_device_info['maxOutputChannels'])) 109 | self.get_logger().info("sample rate : {}".format(self.sample_rate)) 110 | # riva.client.audio_io.list_input_devices() 111 | self.get_logger().info("==============================================") 112 | 113 | # Initialize RIVA 114 | # auth = riva.client.Auth(args.ssl_cert, args.use_ssl, args.server, args.metadata) 115 | auth = riva.client.Auth(None, False, self.RIVA_URL, None) 116 | self.tts_service = riva.client.SpeechSynthesisService(auth) 117 | 118 | self.get_logger().info("==============================================") 119 | self.get_logger().info(" RIVA speech synthesis service") 120 | self.get_logger().info("==============================================") 121 | 122 | def list_audio_devices(self): 123 | self.get_logger().info("==============================================") 124 | self.get_logger().info(" AUDIO DEVICES: ") 125 | self.get_logger().info("==============================================") 126 | for i in range(self.p.get_device_count()): 127 | dev = self.p.get_device_info_by_index(i) 128 | self.get_logger().info(f"{dev['index']:2d}: {dev['name']:50s} (inputs={dev['maxInputChannels']:<3d} outputs={dev['maxOutputChannels']:<3d} sampleRate={int(dev['defaultSampleRate'])})") 129 | self.get_logger().info("==============================================") 130 | 131 | def chat_callback(self, msg): 132 | 133 | self.TTS_callback(msg) 134 | # Turn of mute mode ASR node 135 | passfail, value = self.node_param_util.try_get_node_parameters(self.ASR_node, 'start') 136 | if passfail == True: 137 | self.get_logger().info('Jetbot chat node start:{}'.format(value.bool_value)) 138 | if not value.bool_value: 139 | # delay 3 seconds for wait ASR finish current sound decoding then turn on the ASR 140 | time.sleep(2.0) 141 | self.node_param_util.try_set_node_parameters(self.ASR_node, 'start', type=ParameterType.PARAMETER_BOOL, value=True) 142 | else: 143 | self.get_logger().info('Jetbot chat node start == true') 144 | 145 | 146 | def TTS_callback(self, msg): 147 | with self.lock: 148 | self.get_logger().info('TTS_callback: [{}]'.format(msg.data)) 149 | msg_str = msg.data 150 | nchannels = 1 151 | sampwidth = 2 152 | sound_stream, out_f = None, None 153 | start = time.time() 154 | try: 155 | sound_stream = riva.client.audio_io.SoundCallBack( 156 | self.audio_index, nchannels=nchannels, sampwidth=sampwidth, framerate=self.sample_rate 157 | ) 158 | 159 | self.get_logger().info(" Generating audio for request.. \n msg:{}".format(msg_str)) 160 | responses = self.tts_service.synthesize_online( 161 | msg_str, None, "en-US", sample_rate_hz=self.sample_rate, 162 | audio_prompt_file=None, quality=20 163 | ) 164 | 165 | first = True 166 | for resp in responses: 167 | stop = time.time() 168 | if first: 169 | self.get_logger().info(" Time to first audio: {:.3f}s".format(stop - start)) 170 | first = False 171 | if sound_stream is not None: 172 | sound_stream(resp.audio) 173 | except Exception as e: 174 | self.get_logger().error('An error occured: {}'.format(e)) 175 | finally: 176 | if sound_stream is not None: 177 | sound_stream.close() 178 | self.get_logger().info("Close riva service sound stream") 179 | 180 | def main(args=None): 181 | rclpy.init(args=None) 182 | 183 | global executor 184 | 185 | executor = rclpy.executors.MultiThreadedExecutor() 186 | 187 | JetbotTTS_node = riva_tts_processor('Riva_TTS_processor') 188 | executor.add_node(JetbotTTS_node) 189 | 190 | try: 191 | # rclpy.spin(JetbotTTS_node) 192 | executor.spin() 193 | except (KeyboardInterrupt, ExternalShutdownException): 194 | pass 195 | except Exception as e: 196 | JetbotTTS_node.get_logger().error('An error occured: {}'.format(e)) 197 | print(e) 198 | finally: 199 | JetbotTTS_node.cleanup() 200 | JetbotTTS_node.destroy_node() 201 | rclpy.shutdown() 202 | 203 | 204 | if __name__ == '__main__': 205 | main() -------------------------------------------------------------------------------- /jetbot_riva_voice/jetbot_riva_voice/script/Jetbot_ASR_Processor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Open Source Robotics Foundation, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import rclpy 16 | import threading 17 | import time 18 | import asyncio 19 | from rclpy.node import Node 20 | from rclpy.executors import ExternalShutdownException 21 | from rclpy.parameter import Parameter 22 | from rcl_interfaces.msg import ParameterType, SetParametersResult 23 | from std_msgs.msg import String 24 | 25 | import pyaudio 26 | import riva.client 27 | import riva.client.audio_io 28 | 29 | class riva_asr_processor(Node): 30 | 31 | def parameter_callback(self, params): 32 | for param in params: 33 | if param.name == 'start' and param.type_ == Parameter.Type.BOOL: 34 | self.start = param.value 35 | self.get_logger().info('start= {}'.format(bool(param.value))) 36 | elif param.name == 'command' and param.type_ == Parameter.Type.STRING: 37 | self.cmd = param.value 38 | self.get_logger().info('command= {}'.format(str(param.value))) 39 | 40 | return SetParametersResult(successful=True) 41 | 42 | def __init__(self, name): 43 | super().__init__(name) 44 | 45 | self.ASR_topic = self.declare_parameter('ASR_topic', "/voice/transcripts").get_parameter_value().string_value 46 | self.RIVA_URL = self.declare_parameter('url', "localhost:50051").get_parameter_value().string_value 47 | self.index = self.declare_parameter('index', 26).get_parameter_value().integer_value 48 | self.streaming_chunk = self.declare_parameter('streaming_chunk', 16000).get_parameter_value().integer_value 49 | self.start = self.declare_parameter('start', True).get_parameter_value().bool_value 50 | 51 | self.get_logger().info("=========================================") 52 | self.get_logger().info("Jetbot ASR processor :{} start".format(name)) 53 | self.get_logger().info("ASR_topic : {}".format(self.ASR_topic)) 54 | self.get_logger().info("RIVA url : {}".format(self.RIVA_URL)) 55 | self.get_logger().info("index : {}".format(self.index)) 56 | self.get_logger().info("streaming chunk : {}".format(self.streaming_chunk)) 57 | self.get_logger().info("start : {}".format(self.start)) 58 | self.get_logger().info("=========================================") 59 | 60 | self.publisher = self.create_publisher(String, self.ASR_topic, 10) 61 | 62 | self.msg = String() 63 | self.i = 0 64 | 65 | # Add parameters callback 66 | self.add_on_set_parameters_callback(self.parameter_callback) 67 | 68 | # self.p = pyaudio.PyAudio() 69 | # self.list_audio_devices() 70 | self.riva_init() 71 | 72 | timer_period = 0.5 # seconds 73 | # self.timer = self.create_timer(timer_period, self.timer_callback) 74 | 75 | self.thread = threading.Thread(target=self.ASR_processor) 76 | self.thread.start() 77 | 78 | def riva_init(self): 79 | self.p = pyaudio.PyAudio() 80 | default_device_info = riva.client.audio_io.get_default_input_device_info() 81 | self.get_logger().debug("Rivai default info:{}".format(default_device_info)) 82 | # default_index = None if default_device_info is None else default_device_info['index'] 83 | if default_device_info is not None and int(default_device_info['maxInputChannels']) > 0: 84 | self.audio_index = default_device_info['index'] 85 | self.get_logger().info("use default - ignore user input") 86 | else: 87 | self.audio_index = self.index 88 | default_device = self.p.get_device_info_by_index(self.audio_index) 89 | self.sample_rate = int(default_device_info['defaultSampleRate']) 90 | 91 | self.get_logger().info("==============================================") 92 | self.get_logger().info("Audio default index : {}".format(self.audio_index)) 93 | self.get_logger().info("Max input ouput channels: [{} - {}]".format(default_device['maxInputChannels'], default_device_info['maxOutputChannels'])) 94 | self.get_logger().info("sample rate : {}".format(self.sample_rate)) 95 | # riva.client.audio_io.list_input_devices() 96 | self.get_logger().info("==============================================") 97 | 98 | def list_audio_devices(self): 99 | self.get_logger().info("==============================================") 100 | self.get_logger().info(" AUDIO DEVICES: ") 101 | self.get_logger().info("==============================================") 102 | for i in range(self.p.get_device_count()): 103 | dev = self.p.get_device_info_by_index(i) 104 | self.get_logger().info(f"{dev['index']:2d}: {dev['name']:50s} (inputs={dev['maxInputChannels']:<3d} outputs={dev['maxOutputChannels']:<3d} sampleRate={int(dev['defaultSampleRate'])})") 105 | self.get_logger().info("==============================================") 106 | 107 | 108 | def timer_callback(self): 109 | self.msg.data = 'Hello World: %d' % self.i 110 | self.i += 1 111 | self.get_logger().info('Publishing: "%s"' % self.msg.data) 112 | self.publisher.publish(self.msg) 113 | 114 | # thread: Jetbot ASR processor 115 | def ASR_processor(self): 116 | self.get_logger().info('==============================') 117 | self.get_logger().info('Jetbot ASR processor --> START') 118 | self.get_logger().info('==============================') 119 | 120 | # Initialize RIVA 121 | # auth = riva.client.Auth(args.ssl_cert, args.use_ssl, args.server, args.metadata) 122 | auth = riva.client.Auth(None, False, self.RIVA_URL, None) 123 | asr_service = riva.client.ASRService(auth) 124 | 125 | config = riva.client.StreamingRecognitionConfig( 126 | config=riva.client.RecognitionConfig( 127 | encoding=riva.client.AudioEncoding.LINEAR_PCM, 128 | language_code="en-US", 129 | model="", 130 | max_alternatives=1, 131 | profanity_filter=False, 132 | enable_automatic_punctuation=False, 133 | verbatim_transcripts=True, 134 | sample_rate_hertz=self.sample_rate, 135 | audio_channel_count=1, 136 | ), 137 | interim_results=True, 138 | ) 139 | 140 | boosted_words = ["jetbot", "action"] 141 | boosted_score = 4.0 142 | riva.client.add_word_boosting_to_config(config, boosted_words, boosted_score) 143 | 144 | if hasattr(riva.client, 'add_endpoint_parameters_to_config'): 145 | riva.client.add_endpoint_parameters_to_config( 146 | config, 147 | -1, #start history 148 | -1.0, #start threshold 149 | -1, # stop history 150 | -1, # stop history eou 151 | -1, # stop threshold 152 | -1.0 # top threshold eou 153 | ) 154 | else: 155 | self.get_logger().info("The function add_endpoint_parameters_to_config() does not exist in this version of RIVA client API.") 156 | 157 | with riva.client.audio_io.MicrophoneStream( 158 | self.sample_rate, 159 | self.streaming_chunk, 160 | device=self.audio_index, 161 | ) as audio_chunk_iterator: 162 | responses=asr_service.streaming_response_generator( 163 | audio_chunks=audio_chunk_iterator, 164 | streaming_config=config 165 | ) 166 | 167 | for response in responses: 168 | if not response.results: 169 | continue 170 | for result in response.results: 171 | if not result.alternatives: 172 | continue 173 | transcript = result.alternatives[0].transcript 174 | if result.is_final: 175 | self.get_logger().info('ASR buffer: [ {} ]'.format(transcript)) 176 | self.get_logger().debug('ASR RAW:{}'.format(result.alternatives)) 177 | for i, alternative in enumerate(result.alternatives): 178 | asr_msg = (f'(alternative {i + 1})' if i > 0 else '') + f' {alternative.transcript}' 179 | self.get_logger().info( '## {}'.format(asr_msg)) 180 | 181 | if (self.start): 182 | self.msg.data = transcript 183 | self.get_logger().info('Publishing: "%s"' % self.msg.data) 184 | self.publisher.publish(self.msg) 185 | else: 186 | self.get_logger().info('ASR -off- ignore: {}'.format(transcript)) 187 | 188 | self.get_logger().info('==============================') 189 | self.get_logger().info('Jetbot ASR processor -- EXIT') 190 | self.get_logger().info('==============================') 191 | 192 | 193 | 194 | def main(args=None): 195 | rclpy.init(args=None) 196 | 197 | node = riva_asr_processor('Riva_ASR_processor') 198 | 199 | try: 200 | rclpy.spin(node) 201 | except (KeyboardInterrupt, ExternalShutdownException): 202 | pass 203 | except Exception as e: 204 | node.get_logger().error('An error occured: {}'.format(e)) 205 | print(e) 206 | 207 | node.destroy_node() 208 | rclpy.shutdown() 209 | 210 | 211 | if __name__ == '__main__': 212 | main() -------------------------------------------------------------------------------- /app/robot_command_text_classification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024, Jen-Hung Ho 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files (the "Software"), 7 | # to deal in the Software without restriction, including without limitation 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | # and/or sell copies of the Software, and to permit persons to whom the 10 | # Software is furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | # DEALINGS IN THE SOFTWARE. 22 | # 23 | # 24 | # Reference: 25 | # This code is inspired by the example from Keras: 26 | # https://keras.io/examples/nlp/text_classification_from_scratch/ 27 | # 28 | 29 | import matplotlib.pyplot as plt 30 | import os 31 | import re 32 | import shutil 33 | import string 34 | import tensorflow as tf 35 | from tensorflow.keras import layers, losses 36 | import json 37 | 38 | class TextClassification: 39 | def __init__(self, train_data_path, test_data_path): 40 | # Model constants 41 | self.batch_size = 32 42 | self.seed = 42 43 | self.max_features = 20000 44 | self.sequence_length = 550 45 | self.embedding_dim = 100 46 | self.epochs = 100 47 | self.vectorize_layer = None 48 | self.model = None 49 | self.class_labels = None 50 | self.train_data_path = train_data_path 51 | self.test_data_path = test_data_path 52 | 53 | # 54 | # Load text dataset from directory 55 | # 56 | def load_data(self): 57 | raw_train_ds = tf.keras.utils.text_dataset_from_directory( 58 | self.train_data_path, 59 | batch_size=self.batch_size, 60 | validation_split=0.2, 61 | subset='training', 62 | seed=self.seed) 63 | 64 | raw_val_ds = tf.keras.utils.text_dataset_from_directory( 65 | self.train_data_path, 66 | batch_size=self.batch_size, 67 | validation_split=0.2, 68 | subset='validation', 69 | seed=self.seed) 70 | 71 | raw_test_ds = tf.keras.utils.text_dataset_from_directory( 72 | self.test_data_path, 73 | batch_size=self.batch_size) 74 | 75 | self.class_labels = raw_train_ds.class_names 76 | print("Class labels:", self.class_labels) 77 | 78 | return raw_train_ds, raw_val_ds, raw_test_ds 79 | 80 | # Having looked at our data above, we see that the raw text contains HTML break 81 | # tags of the form '
'. These tags will not be removed by the default 82 | # standardizer (which doesn't strip HTML). Because of this, we will need to 83 | # create a custom standardization function. 84 | # Register the custom standardization function 85 | @tf.keras.utils.register_keras_serializable(package='Custom', name='custom_standardization') 86 | def custom_standardization(self, input_data): 87 | lowercase = tf.strings.lower(input_data) 88 | stripped_html = tf.strings.regex_replace(lowercase, '
', ' ') 89 | return tf.strings.regex_replace(stripped_html, 90 | '[%s]' % re.escape(string.punctuation), 91 | '') 92 | 93 | # Now that we have our custom standardization, we can instantiate our text 94 | # vectorization layer. We are using this layer to normalize, split, and map 95 | # strings to integers, so we set our 'output_mode' to 'int'. 96 | # Note that we're using the default split function, 97 | # --- and the custom standardization defined above. --- 98 | # and the built-in standardization function. 99 | # We also set an explicit maximum sequence length, since the CNNs later in our 100 | # model won't support ragged sequences. 101 | def build_vectorize_layer(self, raw_train_ds): 102 | self.vectorize_layer = layers.TextVectorization( 103 | # standardize=self.custom_standardization, 104 | max_tokens=self.max_features, 105 | output_mode='int', 106 | output_sequence_length=self.sequence_length) 107 | 108 | # Now that the vectorize_layer has been created, call `adapt` on a text-only 109 | # dataset to create the vocabulary. You don't have to batch, but for very large 110 | # datasets this means you're not keeping spare copies of the dataset in memory. 111 | 112 | # Let's make a text-only dataset (no labels): 113 | train_text = raw_train_ds.map(lambda x, y: x) 114 | self.vectorize_layer.adapt(train_text) 115 | 116 | def vectorize_text(self, text, label): 117 | text = tf.expand_dims(text, -1) 118 | return self.vectorize_layer(text), label 119 | 120 | # 121 | # Build a simple 1D convolutional neural network (CNN) 122 | # starting with an Embedding layer. 123 | # 124 | def build_model(self): 125 | # This initializes a sequential model, which is a linear stack of layers. 126 | self.model = tf.keras.Sequential([ 127 | # It’s used for text data where each word is represented by an integer. 128 | # Turns positive integers (indexes) into dense vectors of fixed size. 129 | layers.Embedding(self.max_features, self.embedding_dim), 130 | # This layer randomly sets 50% of the input units to 0 at each update during training time, which helps prevent overfitting. 131 | layers.Dropout(0.5), 132 | # This is a 1D convolutional layer with 128 filters, a kernel size of 7, “valid” padding (no padding), ReLU activation function, and a stride of 3. 133 | # It helps in extracting features from the input sequence. 134 | layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3), # Change kernel size to 7 and strides to 3 135 | layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3), # Change kernel size to 7 and strides to 3 136 | # This layer performs global max pooling operation for temporal data. 137 | # It reduces the dimensionality of the input by taking the maximum value over the time dimension. 138 | layers.GlobalMaxPooling1D(), 139 | # This is a fully connected (dense) layer with 128 units and ReLU activation function. 140 | # It helps in learning complex representations. 141 | layers.Dense(128, activation="relu"), 142 | layers.Dropout(0.5), # Increase dropout rate to 0.5 143 | # This is the output layer with a number of units equal to the number of classes. 144 | # The softmax activation function is used to output a probability distribution over the classes. 145 | layers.Dense(len(self.class_labels), activation='softmax') 146 | ]) 147 | 148 | self.model.compile(loss=losses.SparseCategoricalCrossentropy(from_logits=False), 149 | optimizer='adam', 150 | metrics=['accuracy']) 151 | 152 | 153 | # 154 | # Train the model 155 | # 156 | def train_model(self, train_ds, val_ds): 157 | history = self.model.fit( 158 | train_ds, 159 | validation_data=val_ds, 160 | epochs=self.epochs) 161 | return history 162 | 163 | # 164 | # Save the trained model 165 | # 166 | def save_export_model(self, lable_file, model_file_name, raw_test_ds): 167 | export_model = tf.keras.Sequential([ 168 | self.vectorize_layer, 169 | self.model 170 | ]) 171 | 172 | # Reinitialize the optimizer 173 | export_model.compile( 174 | loss=losses.SparseCategoricalCrossentropy(from_logits=False), 175 | optimizer="adam", 176 | metrics=['accuracy'] 177 | ) 178 | 179 | results = export_model.evaluate(raw_test_ds) 180 | print(results) 181 | loss, accuracy = results[:2] 182 | print(accuracy) 183 | 184 | export_model.class_labels = self.class_labels 185 | # Save the model with the appropriate file extension 186 | # Save the model with the .kears extension 187 | model_file_name = model_file_name + '.keras' 188 | export_model.save(model_file_name) 189 | 190 | with open(lable_file, 'w') as f: 191 | json.dump(self.class_labels, f) 192 | 193 | 194 | def main(): 195 | 196 | # Define datasets folder location 197 | train_data_path = '../data/datasets/train' 198 | test_data_path = '../data/datasets/test' 199 | lable_file = '../data/models/class_labels.json' 200 | model_file_name = '../data/models/ASR_classify_model' 201 | 202 | text_classification = TextClassification(train_data_path, test_data_path) 203 | raw_train_ds, raw_val_ds, raw_test_ds = text_classification.load_data() 204 | text_classification.build_vectorize_layer(raw_train_ds) 205 | 206 | # Veecorize the data 207 | train_ds = raw_train_ds.map(text_classification.vectorize_text) 208 | val_ds = raw_val_ds.map(text_classification.vectorize_text) 209 | test_ds = raw_test_ds.map(text_classification.vectorize_text) 210 | 211 | # Do async prefetching / buffering of the data for best performance on GPU. 212 | AUTOTUNE = tf.data.AUTOTUNE 213 | train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE) 214 | val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE) 215 | test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE) 216 | 217 | # Build model with simple 1D convolutional neural network (CNN) 218 | text_classification.build_model() 219 | text_classification.train_model(train_ds, val_ds) 220 | # Evulate the test data before save it 221 | text_classification.save_export_model(lable_file, model_file_name, raw_test_ds) 222 | 223 | if __name__ == "__main__": 224 | main() 225 | -------------------------------------------------------------------------------- /jetbot_riva_voice/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /jetbot_riva_voice/jetbot_riva_voice/script/Jetbot_ASR_Agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024, Jen-Hung Ho 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files (the "Software"), 7 | # to deal in the Software without restriction, including without limitation 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | # and/or sell copies of the Software, and to permit persons to whom the 10 | # Software is furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | # DEALINGS IN THE SOFTWARE. 22 | # 23 | 24 | import rclpy # Python library for ROS 2 25 | import ast # Parse string into a 2D array 26 | import subprocess 27 | import time 28 | 29 | from rclpy.node import Node # Handles the creation of nodes 30 | from threading import Lock 31 | from rcl_interfaces.msg import ParameterType, SetParametersResult, Parameter 32 | from std_msgs.msg import String 33 | 34 | from jetbot_riva_voice.include.text_classifier_utility import TextClassifier 35 | from jetbot_riva_voice.include.node_parameter_utility import NodeParamTools 36 | 37 | class JetbotASRagent(Node): 38 | 39 | def parameter_callback(self, params): 40 | for param in params: 41 | if param.name == 'start' and param.type_ == Parameter.Type.BOOL: 42 | self.start = param.value 43 | self.get_logger().info('start= {}'.format(bool(param.value))) 44 | 45 | return SetParametersResult(successful=True) 46 | 47 | def __init__(self): 48 | super().__init__('Jetbot_ASR_agent') 49 | 50 | self.start = self.declare_parameter('start', True).get_parameter_value().bool_value 51 | self.ASR_topic = self.declare_parameter('ASR_topic', '/jetbot_voice/transcripts').get_parameter_value().string_value 52 | self.TTS_topic = self.declare_parameter('TTS_topic', '/chatbot/response').get_parameter_value().string_value 53 | self.LLM_topic = self.declare_parameter('LLM_topic', '/jetbot_llm_input').get_parameter_value().string_value 54 | self.LLM_vision_topic = self.declare_parameter('VISION_topic', '/llm_vision_input').get_parameter_value().string_value 55 | self.ASR_node = self.declare_parameter('ASR_node', '/Riva_ASR_processor').get_parameter_value().string_value 56 | self.command_nodes = self.declare_parameter('command_nodes', ["/Jetbot_Param_Client"]).get_parameter_value().string_array_value 57 | self.lable_path = self.declare_parameter('label_path', '/data/models/class_labels.json').get_parameter_value().string_value 58 | self.model_path = self.declare_parameter('model_path', '/data/models/ASR_classify_model').get_parameter_value().string_value 59 | self.predict_threshold = self.declare_parameter('predict_threshold', 0.7).get_parameter_value().double_value 60 | self.command_enable = self.declare_parameter('command_enable', False).get_parameter_value().bool_value 61 | # Get the parameter as a string (2 dimentional string array) 62 | self.jetbot_commands = self.declare_parameter('jetbot_commands', "[['start', '0:start'], ['stop', '0:stop']]").get_parameter_value().string_value 63 | self.jetbot_chat = self.declare_parameter('jetbot_chat', "[['hello', 'hello 1'], ['bye', 'bye 2']]").get_parameter_value().string_value 64 | self.jetbot_vision = self.declare_parameter('jetbot_vision', "[['vision', 'vision 1']]]").get_parameter_value().string_value 65 | 66 | 67 | # YAML debug 68 | self.get_logger().info('YAML command: {}'.format(self.jetbot_commands)) 69 | self.get_logger().info('YAML chat: {}'.format(self.jetbot_chat)) 70 | 71 | # Parse the string into a 2D array 72 | self.cmd_two_dim_array = ast.literal_eval(self.jetbot_commands) 73 | self.cmd_dict_array = {row[0]: row[1] for row in self.cmd_two_dim_array} 74 | self.chat_two_dim_array = ast.literal_eval(self.jetbot_chat) 75 | self.chat_dict_array = {row[0]: row[1] for row in self.chat_two_dim_array} 76 | self.vision_two_dim_array = ast.literal_eval(self.jetbot_vision) 77 | self.vision_dict_array = {row[0]: row[1] for row in self.vision_two_dim_array} 78 | 79 | # Collect command and chat keywords 80 | self.keywords = [] 81 | command_values = [row[0] for row in self.cmd_two_dim_array] 82 | self.chat_values = [row[0] for row in self.chat_two_dim_array] 83 | vision_values = [row[0] for row in self.vision_two_dim_array] 84 | self.keywords = self.chat_values + command_values + vision_values 85 | 86 | self.get_logger().info('start : {}'.format(self.start)) 87 | self.get_logger().info('ASR_topic : {}'.format(self.ASR_topic)) 88 | self.get_logger().info('TTS_topic : {}'.format(self.TTS_topic)) 89 | self.get_logger().info('LLM_topic : {}'.format(self.LLM_topic)) 90 | self.get_logger().info('LLM VISION_topic : {}'.format(self.LLM_vision_topic)) 91 | self.get_logger().info('ASR_node : {}'.format(self.ASR_node)) 92 | self.get_logger().info('command_nodes : {}'.format(self.command_nodes)) 93 | self.get_logger().info('jetbot_keywords : {}'.format(self.keywords)) 94 | self.get_logger().info('predict threshold: {}'.format(self.predict_threshold)) 95 | self.get_logger().info('command enable : {}'.format(self.command_enable)) 96 | 97 | # ASR message keywords -- reference MatchboxNet classes 98 | # self.keywords = ["hello", "yes", "no", "up", "down", "left", "right", "on", "off", "unknown", "silence", "start", "stop", "come", "follow", "go"] 99 | 100 | self.lock = Lock() 101 | 102 | # Add parameters callback 103 | self.add_on_set_parameters_callback(self.parameter_callback) 104 | 105 | # self.init_ros_nodes() 106 | self.node_param_util = NodeParamTools(self, executor) 107 | 108 | # Create the subscriber. This subscriber will receive lidar message 109 | self.subscription = self.create_subscription( 110 | String, 111 | self.ASR_topic, 112 | self.ASR_callback, 113 | 10) 114 | 115 | self.pub_TTS = self.create_publisher( 116 | String, 117 | self.TTS_topic, 118 | 10 119 | ) 120 | 121 | self.pub_LLM = self.create_publisher( 122 | String, 123 | self.LLM_topic, 124 | 10 125 | ) 126 | 127 | self.pub_LLM_vision = self.create_publisher( 128 | String, 129 | self.LLM_vision_topic, 130 | 10 131 | ) 132 | 133 | 134 | # Load robot command and load tensorflow model data 135 | self.init_text_classifier() 136 | 137 | # 138 | # Remove nodes for get/set parameter service call 139 | # 140 | def cleanup(self): 141 | # clean up set_param_node, get_param_node 142 | self.node_param_util.cleanup() 143 | pass 144 | 145 | # 146 | # Load robot command and load tensorflow model data 147 | # 148 | def init_text_classifier(self): 149 | self.classifier = TextClassifier(self.model_path, self.lable_path) 150 | self.get_logger().info("===================================================") 151 | self.get_logger().info("Robot commands: {}".format(self.classifier.class_labels)) 152 | self.get_logger().info("===================================================") 153 | 154 | # 155 | # This function handles the prediction process for the text classifier model 156 | # 1D convolutional neural network (CNN) model 157 | # 158 | def handle_prediction(self, min_score, prediction): 159 | self.get_logger().info("ASR raw:[{}]".format(prediction)) 160 | label, result, score= self.classifier.predict(min_score, prediction) 161 | if result: 162 | self.get_logger().info("predict: [{}]:[{}]".format(label, score)) 163 | else: 164 | self.get_logger().info("Chat topic : ================================") 165 | self.get_logger().info("predict: [{}]:[{}]".format(label, score)) 166 | 167 | return result, label, score 168 | 169 | # 170 | # NVIDIA jetson-voice ASR ROS2 topic subscription 171 | # 172 | def ASR_callback(self, msg): 173 | self.get_logger().info('Raw ASR: [{}]:{}'.format(len((msg.data).split()), msg.data)) 174 | 175 | greeting = False 176 | if len((msg.data).split()) == 1: 177 | # Greeting is static chat - response define in jetbot_chat: 2 dimention array 178 | self.get_logger().info('static chat keyword list:{}'.format(self.chat_values)) 179 | greeting, keyword = self.filter_keywords(msg.data, self.chat_values) 180 | self.get_logger().info('greeting: {}:{}'.format(greeting, keyword)) 181 | 182 | # If the input is not a greeting, utilize the 1D convolutional neural network (CNN) model 183 | # for text classification to determine the user's intention for JetBot. 184 | if greeting == False: 185 | # Filter out ASR noise -- 1D convolutional neural network (CNN) model 186 | result, label, score = self.handle_prediction(self.predict_threshold, msg.data) 187 | # Only pick up ASR input contains jetbot command keywords 188 | found, keyword = self.filter_keywords(label, self.keywords) 189 | else: 190 | found = greeting 191 | 192 | command = False 193 | vision_chat = False 194 | chat = False 195 | 196 | ASR_string = msg.data 197 | node_name = "/jetbot" 198 | 199 | if found: 200 | if greeting and keyword in self.chat_dict_array: 201 | ASR_string = self.chat_dict_array[keyword] 202 | self.get_logger().info("ASR input: {} greeting: {}".format(keyword,ASR_string)) 203 | elif keyword in self.vision_dict_array: 204 | # ASR_string = self.chat_dict_array[keyword] 205 | self.get_logger().info("ASR input: {} chat: {}".format(keyword,ASR_string)) 206 | vision_chat = True 207 | elif keyword in self.cmd_dict_array: 208 | command = True 209 | self.get_logger().info('jetbot command tool enable: {}'.format(command)) 210 | # Retrieve node name and command value [index:value] 211 | parts = self.cmd_dict_array[keyword].split(':') 212 | node_index = int(parts[0]) 213 | if node_index < len(self.command_nodes): 214 | node_name = self.command_nodes[node_index] 215 | else: 216 | self.get_lobber().info('Error: incorrect node name index:{}'.format(node_index)) 217 | # Retrive command parameter 218 | ASR_string = parts[1] 219 | 220 | # Turn on command feature if jetbot tools copilot node exist 221 | if self.command_enable == False: 222 | node_exist = self.check_node_exists(node_name) 223 | self.get_logger().info('check node:{} exit:{}'.format(node_name, node_exist)) 224 | if node_exist: 225 | self.get_logger().info('Turn on node:{} start parameter'.format(node_name)) 226 | self.enable_jetbot_tool_copilot(node_name) 227 | self.command_enable = True 228 | time.sleep(1.0) 229 | 230 | else: 231 | self.get_logger().info("ASR input not found in keyword list --> chatbot:" + msg.data) 232 | chat = True 233 | node_name = self.ASR_node 234 | # return 235 | 236 | # Block the next callback from executing until the current callback finishes 237 | with self.lock: 238 | # Jetbot chat acton no need to set command to target node 239 | if command == True: 240 | # 'Echoing' in ASR occurs 241 | # when the microphone picks up the system's own text-to-speech output, 242 | # creating a recursive voice recognition loop. 243 | self.mute_ASR_processor(self.ASR_node) 244 | if self.command_enable: 245 | passfail = self.node_param_util.try_set_node_parameters(node_name, 'command', type=ParameterType.PARAMETER_STRING, value=ASR_string) 246 | if passfail == True: 247 | ASR_string = "jetbot process: " + ASR_string 248 | else: 249 | ASR_string = "jetbot node :{} not exit skip command :{}" + ASR_string 250 | else: 251 | # Publish to TTS node to play audio streaming and disable ASR muting 252 | ASR_string = "jetbot tool copilot command: " + ASR_string 253 | TTS_string = String() 254 | TTS_string.data = ASR_string 255 | self.pub_TTS.publish(TTS_string) 256 | elif greeting == True: 257 | self.mute_ASR_processor(self.ASR_node) 258 | TTS_string = String() 259 | TTS_string.data = ASR_string 260 | self.pub_TTS.publish(TTS_string) 261 | elif vision_chat == True: 262 | self.mute_ASR_processor(self.ASR_node) 263 | # Publish to LLM vidion node to response as chatbot 264 | LLM_vision_string = String() 265 | # Use ASR raw input data as LLM input 266 | LLM_vision_string.data = msg.data 267 | self.pub_LLM_vision.publish(LLM_vision_string) 268 | elif chat == True: 269 | self.mute_ASR_processor(self.ASR_node) 270 | # Publish to LLM node to response as chatbot 271 | LLM_string = String() 272 | # Use ASR raw input data as LLM input 273 | LLM_string.data = msg.data 274 | self.pub_LLM.publish(LLM_string) 275 | 276 | # 277 | # Mute ASR processor and wait until LLM chat reponse to TTS task complete 278 | # 279 | def mute_ASR_processor(self, node_name): 280 | # Turn off ASR and wait until LLM chat response to TTS task complete 281 | self.set_jetbot_node_bool_parameters(node_name, 'start', False) 282 | 283 | # 284 | # Enable Jetbot voice 285 | # 286 | def enable_jetbot_tool_copilot(self, node_name): 287 | # Trun on Jerbot voice 288 | self.set_jetbot_node_bool_parameters(node_name, 'start', True) 289 | 290 | # 291 | # Set jetbot ROS2 node bool parameter 292 | # 293 | def set_jetbot_node_bool_parameters(self, node_name, parameter, bool_value): 294 | passfail, value = self.node_param_util.try_get_node_parameters(node_name, parameter) 295 | if passfail == True: 296 | self.get_logger().info('Jetbot node:{} param:{} value:{}'.format(node_name, parameter, value.bool_value)) 297 | if value.bool_value != bool_value: 298 | self.node_param_util.try_set_node_parameters(node_name, parameter, type=ParameterType.PARAMETER_BOOL, value=bool_value) 299 | else: 300 | self.get_logger().info('Jetbot chat {} node not exit, skip the task'.format(node_name)) 301 | 302 | # 303 | # Filter out ASR noise -- TODO how to improve the filtering 304 | # Only pick up ASR input contains keywords 305 | # 306 | def filter_keywords(self, asr_output, keywords): 307 | # Convert ASR output and keywords to lowercase for case-insensitive matching 308 | asr_output = asr_output.lower() 309 | keywords = [keyword.lower() for keyword in keywords] 310 | 311 | # Find keywords in ASR output 312 | for keyword in keywords: 313 | if keyword in asr_output: 314 | return True, keyword 315 | 316 | # If no keyword found, return False and None 317 | return False, None 318 | 319 | # 320 | # check if a node exist in ROS2 321 | # 322 | def check_node_exists(self, node_name): 323 | result = subprocess.run(['ros2', 'node', 'list'], stdout=subprocess.PIPE) 324 | nodes = result.stdout.decode().split('\n') 325 | return node_name in nodes 326 | 327 | 328 | 329 | def main(args=None): 330 | 331 | rclpy.init(args=args) 332 | 333 | global executor 334 | 335 | executor = rclpy.executors.MultiThreadedExecutor() 336 | 337 | JetbotASR_node = JetbotASRagent() 338 | executor.add_node(JetbotASR_node) 339 | 340 | try: 341 | # rclpy.spin(JetbotTTS_node) 342 | executor.spin() 343 | except KeyboardInterrupt: 344 | print('\ncontrol-c: JetbotTTS_node shutting down') 345 | finally: 346 | # Destroy the node explictly - don't depend on garbage collector 347 | JetbotASR_node.cleanup() 348 | JetbotASR_node.destroy_node() 349 | rclpy.shutdown() 350 | 351 | 352 | if __name__ == '__main__': 353 | main() 354 | --------------------------------------------------------------------------------