├── app
    ├── __init__.py
    ├── robot_command_model_evaluation.py
    └── robot_command_text_classification.py
├── data
    ├── datasets
    │   ├── train
    │   │   ├── cmd_stop
    │   │   │   ├── 0.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 2.txt
    │   │   │   ├── 3.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 8.txt
    │   │   │   └── 9.txt
    │   │   ├── cmd_backward
    │   │   │   ├── 2.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 9.txt
    │   │   │   ├── 0.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 3.txt
    │   │   │   └── 6.txt
    │   │   ├── cmd_follow
    │   │   │   ├── 0.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 3.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 9.txt
    │   │   │   ├── 4.txt
    │   │   │   └── 2.txt
    │   │   ├── cmd_forward
    │   │   │   ├── 5.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 0.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 3.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 2.txt
    │   │   │   ├── 8.txt
    │   │   │   └── 9.txt
    │   │   ├── cmd_left
    │   │   │   ├── 0.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 2.txt
    │   │   │   ├── 3.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 9.txt
    │   │   │   └── 1.txt
    │   │   ├── cmd_right
    │   │   │   ├── 0.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 2.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 9.txt
    │   │   │   ├── 3.txt
    │   │   │   └── 1.txt
    │   │   ├── cmd_start
    │   │   │   ├── 0.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 9.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 2.txt
    │   │   │   ├── 3.txt
    │   │   │   └── 4.txt
    │   │   ├── cmd_vision
    │   │   │   ├── 0.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 2.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 3.txt
    │   │   │   └── 9.txt
    │   │   └── cmd_self-driving
    │   │   │   ├── 2.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 0.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 3.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 9.txt
    │   │   │   └── 7.txt
    │   ├── test
    │   │   ├── cmd_stop
    │   │   │   ├── 0.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 2.txt
    │   │   │   ├── 3.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 7.txt
    │   │   │   └── 9.txt
    │   │   ├── cmd_backward
    │   │   │   ├── 2.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 9.txt
    │   │   │   ├── 0.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 3.txt
    │   │   │   └── 6.txt
    │   │   ├── cmd_forward
    │   │   │   ├── 0.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 3.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 9.txt
    │   │   │   ├── 4.txt
    │   │   │   └── 2.txt
    │   │   ├── cmd_left
    │   │   │   ├── 0.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 2.txt
    │   │   │   ├── 3.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 9.txt
    │   │   │   └── 1.txt
    │   │   ├── cmd_right
    │   │   │   ├── 0.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 2.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 9.txt
    │   │   │   ├── 3.txt
    │   │   │   └── 1.txt
    │   │   ├── cmd_start
    │   │   │   ├── 0.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 9.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 2.txt
    │   │   │   ├── 3.txt
    │   │   │   └── 4.txt
    │   │   ├── cmd_vision
    │   │   │   ├── 0.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 2.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 3.txt
    │   │   │   └── 9.txt
    │   │   ├── cmd_self-driving
    │   │   │   ├── 2.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 8.txt
    │   │   │   ├── 0.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 3.txt
    │   │   │   ├── 4.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 9.txt
    │   │   │   └── 7.txt
    │   │   └── cmd_follow
    │   │   │   ├── 0.txt
    │   │   │   ├── 2.txt
    │   │   │   ├── 5.txt
    │   │   │   ├── 6.txt
    │   │   │   ├── 7.txt
    │   │   │   ├── 1.txt
    │   │   │   ├── 9.txt
    │   │   │   ├── 3.txt
    │   │   │   ├── 4.txt
    │   │   │   └── 8.txt
    │   └── .gitignore
    └── models
    │   └── .gitignore
├── jetbot_riva_voice
    ├── jetbot_riva_voice
    │   ├── __init__.py
    │   ├── include
    │   │   ├── __init__.py
    │   │   ├── text_classifier_utility.py
    │   │   └── node_parameter_utility.py
    │   └── script
    │   │   ├── __init__.py
    │   │   ├── audio_list.py
    │   │   ├── Jetbot_TTS_Processor.py
    │   │   ├── Jetbot_ASR_Processor.py
    │   │   └── Jetbot_ASR_Agent.py
    ├── resource
    │   └── jetbot_riva_voice
    ├── setup.cfg
    ├── package.xml
    ├── test
    │   ├── test_pep257.py
    │   ├── test_flake8.py
    │   └── test_copyright.py
    ├── setup.py
    ├── param
    │   └── jetbot_voice_params.yaml
    └── LICENSE
├── docs
    ├── JetBot_1.jpg
    ├── Jetbot_2.jpg
    ├── JetBot_ASR_voice_tool.png
    └── setup.md
├── requirements.txt
├── start_ros2_shell.sh
├── Dockerfile
├── run.sh
└── README.md


/app/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/datasets/train/cmd_stop/0.txt:
--------------------------------------------------------------------------------
1 | Stop now.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_stop/6.txt:
--------------------------------------------------------------------------------
1 | Halt now.


--------------------------------------------------------------------------------
/data/models/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore


--------------------------------------------------------------------------------
/data/datasets/test/cmd_stop/0.txt:
--------------------------------------------------------------------------------
1 | Robot stop now.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_stop/6.txt:
--------------------------------------------------------------------------------
1 | Robot halt now.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_backward/2.txt:
--------------------------------------------------------------------------------
1 | Go back.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_backward/4.txt:
--------------------------------------------------------------------------------
1 | Retreat.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_backward/7.txt:
--------------------------------------------------------------------------------
1 | Move back.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_backward/8.txt:
--------------------------------------------------------------------------------
1 | Step back.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_backward/9.txt:
--------------------------------------------------------------------------------
1 | Back up.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_follow/0.txt:
--------------------------------------------------------------------------------
1 | Follow me.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_follow/5.txt:
--------------------------------------------------------------------------------
1 | Trail me.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_forward/5.txt:
--------------------------------------------------------------------------------
1 | Go forward.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_forward/7.txt:
--------------------------------------------------------------------------------
1 | Move ahead.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_left/0.txt:
--------------------------------------------------------------------------------
1 | Turn left.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_left/6.txt:
--------------------------------------------------------------------------------
1 | Rotate left.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_right/0.txt:
--------------------------------------------------------------------------------
1 | Turn right.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_right/7.txt:
--------------------------------------------------------------------------------
1 | Rotate right.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_start/0.txt:
--------------------------------------------------------------------------------
1 | Start now.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_start/6.txt:
--------------------------------------------------------------------------------
1 | Begin now.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_start/7.txt:
--------------------------------------------------------------------------------
1 | Initiate now.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_start/9.txt:
--------------------------------------------------------------------------------
1 | Kick off now.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_stop/7.txt:
--------------------------------------------------------------------------------
1 | Stop the task.


--------------------------------------------------------------------------------
/jetbot_riva_voice/jetbot_riva_voice/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jetbot_riva_voice/resource/jetbot_riva_voice:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/datasets/test/cmd_backward/2.txt:
--------------------------------------------------------------------------------
1 | Robot go back.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_backward/4.txt:
--------------------------------------------------------------------------------
1 | Robot retreat.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_backward/7.txt:
--------------------------------------------------------------------------------
1 | Robot move back.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_backward/8.txt:
--------------------------------------------------------------------------------
1 | Robot Step back.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_backward/9.txt:
--------------------------------------------------------------------------------
1 | Robot back up.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_forward/0.txt:
--------------------------------------------------------------------------------
1 | Robot follow me.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_forward/5.txt:
--------------------------------------------------------------------------------
1 | Robot trail me.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_left/0.txt:
--------------------------------------------------------------------------------
1 | Robot turn left.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_left/6.txt:
--------------------------------------------------------------------------------
1 | Robot rotate left.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_right/0.txt:
--------------------------------------------------------------------------------
1 | Robot turn right.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_right/7.txt:
--------------------------------------------------------------------------------
1 | Robot rotate right.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_start/0.txt:
--------------------------------------------------------------------------------
1 | Robot start now.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_start/6.txt:
--------------------------------------------------------------------------------
1 | Robot begin now.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_start/7.txt:
--------------------------------------------------------------------------------
1 | Robot initiate now.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_start/9.txt:
--------------------------------------------------------------------------------
1 | Robot kick off now.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_stop/8.txt:
--------------------------------------------------------------------------------
1 | Robot terminate now.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_backward/0.txt:
--------------------------------------------------------------------------------
1 | Move backward.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_backward/1.txt:
--------------------------------------------------------------------------------
1 | Please reverse.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_backward/5.txt:
--------------------------------------------------------------------------------
1 | Go backward.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_follow/1.txt:
--------------------------------------------------------------------------------
1 | Come with me.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_follow/3.txt:
--------------------------------------------------------------------------------
1 | Walk behind me.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_follow/6.txt:
--------------------------------------------------------------------------------
1 | Stick with me.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_follow/7.txt:
--------------------------------------------------------------------------------
1 | Accompany me.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_follow/8.txt:
--------------------------------------------------------------------------------
1 | Move with me.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_forward/0.txt:
--------------------------------------------------------------------------------
1 | Move forward.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_forward/1.txt:
--------------------------------------------------------------------------------
1 | Please advance.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_forward/3.txt:
--------------------------------------------------------------------------------
1 | Proceed forward.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_forward/4.txt:
--------------------------------------------------------------------------------
1 | Move straight.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_forward/6.txt:
--------------------------------------------------------------------------------
1 | Advance ahead.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_left/2.txt:
--------------------------------------------------------------------------------
1 | Make a left turn.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_left/3.txt:
--------------------------------------------------------------------------------
1 | Shift to the left.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_left/4.txt:
--------------------------------------------------------------------------------
1 | Move to the left.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_left/5.txt:
--------------------------------------------------------------------------------
1 | Turn to the left.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_left/7.txt:
--------------------------------------------------------------------------------
1 | Take a left turn.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_left/8.txt:
--------------------------------------------------------------------------------
1 | Swing to the left.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_left/9.txt:
--------------------------------------------------------------------------------
1 | Veer to the left.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_right/2.txt:
--------------------------------------------------------------------------------
1 | Make a right turn.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_right/4.txt:
--------------------------------------------------------------------------------
1 | Move to the right.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_right/5.txt:
--------------------------------------------------------------------------------
1 | Turn to the right.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_right/6.txt:
--------------------------------------------------------------------------------
1 | Take a right turn.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_right/8.txt:
--------------------------------------------------------------------------------
1 | Take a right turn.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_right/9.txt:
--------------------------------------------------------------------------------
1 | Veer to the right.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_start/1.txt:
--------------------------------------------------------------------------------
1 | Begin the task.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_start/5.txt:
--------------------------------------------------------------------------------
1 | Start the action.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_start/8.txt:
--------------------------------------------------------------------------------
1 | Commence the task.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_stop/1.txt:
--------------------------------------------------------------------------------
1 | Stop immediately.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_stop/2.txt:
--------------------------------------------------------------------------------
1 | Stop all actions.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_stop/3.txt:
--------------------------------------------------------------------------------
1 | Stop processing.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_stop/4.txt:
--------------------------------------------------------------------------------
1 | Stop everything.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_stop/5.txt:
--------------------------------------------------------------------------------
1 | Stop immediately.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_stop/8.txt:
--------------------------------------------------------------------------------
1 | Cease the action.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_stop/9.txt:
--------------------------------------------------------------------------------
1 | End the action.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_backward/0.txt:
--------------------------------------------------------------------------------
1 | Robot move backward.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_backward/1.txt:
--------------------------------------------------------------------------------
1 | Robot please reverse.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_backward/5.txt:
--------------------------------------------------------------------------------
1 | Robot go backward.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_forward/1.txt:
--------------------------------------------------------------------------------
1 | Robot come with me.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_forward/3.txt:
--------------------------------------------------------------------------------
1 | Robot walk behind me.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_forward/6.txt:
--------------------------------------------------------------------------------
1 | Robot stick with me.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_forward/7.txt:
--------------------------------------------------------------------------------
1 | Robot accompany me.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_forward/8.txt:
--------------------------------------------------------------------------------
1 | Robot move with me.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_left/2.txt:
--------------------------------------------------------------------------------
1 | Robot make a left turn.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_left/3.txt:
--------------------------------------------------------------------------------
1 | Robot shift to the left.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_left/4.txt:
--------------------------------------------------------------------------------
1 | Robot move to the left.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_left/5.txt:
--------------------------------------------------------------------------------
1 | Robot turn to the left.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_left/7.txt:
--------------------------------------------------------------------------------
1 | Robot take a left turn.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_left/8.txt:
--------------------------------------------------------------------------------
1 | Robot swing to the left.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_left/9.txt:
--------------------------------------------------------------------------------
1 | Robot veer to the left.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_right/2.txt:
--------------------------------------------------------------------------------
1 | Robot make a right turn.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_right/4.txt:
--------------------------------------------------------------------------------
1 | Robot move to the right.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_right/5.txt:
--------------------------------------------------------------------------------
1 | Robot turn to the right.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_right/6.txt:
--------------------------------------------------------------------------------
1 | Robot take a right turn.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_right/8.txt:
--------------------------------------------------------------------------------
1 | Robot take a right turn.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_right/9.txt:
--------------------------------------------------------------------------------
1 | Robot veer to the right.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_start/1.txt:
--------------------------------------------------------------------------------
1 | Robot begin the task.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_start/5.txt:
--------------------------------------------------------------------------------
1 | Robot start the action.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_start/8.txt:
--------------------------------------------------------------------------------
1 | Robot commence the task.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_stop/1.txt:
--------------------------------------------------------------------------------
1 | Robot halt the action.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_stop/2.txt:
--------------------------------------------------------------------------------
1 | Robot cease operation.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_stop/3.txt:
--------------------------------------------------------------------------------
1 | Robot terminate the task.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_stop/4.txt:
--------------------------------------------------------------------------------
1 | Robot end the process.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_stop/5.txt:
--------------------------------------------------------------------------------
1 | Robot stop immediately.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_stop/7.txt:
--------------------------------------------------------------------------------
1 | Robot cease the task.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_stop/9.txt:
--------------------------------------------------------------------------------
1 | Robot end the action.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_backward/3.txt:
--------------------------------------------------------------------------------
1 | Move in reverse.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_backward/6.txt:
--------------------------------------------------------------------------------
1 | Reverse direction.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_follow/9.txt:
--------------------------------------------------------------------------------
1 | Move along with me.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_forward/2.txt:
--------------------------------------------------------------------------------
1 | Go straight ahead.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_forward/8.txt:
--------------------------------------------------------------------------------
1 | Proceed straight.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_forward/9.txt:
--------------------------------------------------------------------------------
1 | Continue forward.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_right/3.txt:
--------------------------------------------------------------------------------
1 | Shift to the right.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_start/2.txt:
--------------------------------------------------------------------------------
1 | Initiate the action.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_start/3.txt:
--------------------------------------------------------------------------------
1 | Commence operation.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_start/4.txt:
--------------------------------------------------------------------------------
1 | Kick off the process.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_vision/0.txt:
--------------------------------------------------------------------------------
1 | Describe the image.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_vision/7.txt:
--------------------------------------------------------------------------------
1 | Analyze the photo.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_vision/8.txt:
--------------------------------------------------------------------------------
1 | Explain what you see.


--------------------------------------------------------------------------------
/jetbot_riva_voice/jetbot_riva_voice/include/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/jetbot_riva_voice/jetbot_riva_voice/script/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | ! test/
2 | ! train/
3 | !.gitignore


--------------------------------------------------------------------------------
/data/datasets/test/cmd_backward/3.txt:
--------------------------------------------------------------------------------
1 | Robot move in reverse.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_backward/6.txt:
--------------------------------------------------------------------------------
1 | Robot reverse direction.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_forward/9.txt:
--------------------------------------------------------------------------------
1 | Robot move along with me.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_right/3.txt:
--------------------------------------------------------------------------------
1 | Robot shift to the right.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_start/2.txt:
--------------------------------------------------------------------------------
1 | Robot initiate the action.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_start/3.txt:
--------------------------------------------------------------------------------
1 | Robot commence operation.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_start/4.txt:
--------------------------------------------------------------------------------
1 | Robot kick off the process.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_vision/0.txt:
--------------------------------------------------------------------------------
1 | Robot describe the image.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_vision/7.txt:
--------------------------------------------------------------------------------
1 | Robot analyze the photo.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_vision/8.txt:
--------------------------------------------------------------------------------
1 | Robot explain what you see.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_follow/4.txt:
--------------------------------------------------------------------------------
1 | Keep up with me as I move.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_left/1.txt:
--------------------------------------------------------------------------------
1 | Please rotate to the left.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_right/1.txt:
--------------------------------------------------------------------------------
1 | Please rotate to the right.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_self-driving/2.txt:
--------------------------------------------------------------------------------
1 | Activate auto drive.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_self-driving/6.txt:
--------------------------------------------------------------------------------
1 | Begin self driving.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_self-driving/8.txt:
--------------------------------------------------------------------------------
1 | Initiate auto drive.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_vision/2.txt:
--------------------------------------------------------------------------------
1 | Analyze the camera feed.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_vision/5.txt:
--------------------------------------------------------------------------------
1 | Describe the camera image.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_vision/6.txt:
--------------------------------------------------------------------------------
1 | What is in the picture?


--------------------------------------------------------------------------------
/data/datasets/test/cmd_forward/4.txt:
--------------------------------------------------------------------------------
1 | Robot keep up with me as I move.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_left/1.txt:
--------------------------------------------------------------------------------
1 | Robot please rotate to the left.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_right/1.txt:
--------------------------------------------------------------------------------
1 | Robot please rotate to the right.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_self-driving/2.txt:
--------------------------------------------------------------------------------
1 | Robot activate auto drive.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_self-driving/6.txt:
--------------------------------------------------------------------------------
1 | Robot begin self driving.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_self-driving/8.txt:
--------------------------------------------------------------------------------
1 | Robot initiate auto drive.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_vision/2.txt:
--------------------------------------------------------------------------------
1 | Robot analyze the camera feed.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_vision/5.txt:
--------------------------------------------------------------------------------
1 | Robot describe the camera image.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_vision/6.txt:
--------------------------------------------------------------------------------
1 | Robot what is in the picture?


--------------------------------------------------------------------------------
/data/datasets/train/cmd_self-driving/0.txt:
--------------------------------------------------------------------------------
1 | Start self driving mode.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_self-driving/1.txt:
--------------------------------------------------------------------------------
1 | Begin autonomous driving.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_self-driving/3.txt:
--------------------------------------------------------------------------------
1 | Initiate self driving.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_self-driving/4.txt:
--------------------------------------------------------------------------------
1 | Engage autonomous mode.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_self-driving/5.txt:
--------------------------------------------------------------------------------
1 | Start automatic driving.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_self-driving/9.txt:
--------------------------------------------------------------------------------
1 | Engage self driving mode.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_vision/1.txt:
--------------------------------------------------------------------------------
1 | What do you see in the picture?


--------------------------------------------------------------------------------
/data/datasets/train/cmd_vision/4.txt:
--------------------------------------------------------------------------------
1 | Provide details about the image.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_follow/0.txt:
--------------------------------------------------------------------------------
1 | Robot follow the person in the camera.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_self-driving/0.txt:
--------------------------------------------------------------------------------
1 | Robot start self driving mode.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_self-driving/1.txt:
--------------------------------------------------------------------------------
1 | Robot begin autonomous driving.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_self-driving/3.txt:
--------------------------------------------------------------------------------
1 | Robot initiate self driving.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_self-driving/4.txt:
--------------------------------------------------------------------------------
1 | Robot engage autonomous mode.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_self-driving/5.txt:
--------------------------------------------------------------------------------
1 | Robot start automatic driving.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_self-driving/9.txt:
--------------------------------------------------------------------------------
1 | Robot engage self driving mode.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_vision/1.txt:
--------------------------------------------------------------------------------
1 | Robot what do you see in the picture?


--------------------------------------------------------------------------------
/data/datasets/test/cmd_vision/4.txt:
--------------------------------------------------------------------------------
1 | Robot provide details about the image.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_follow/2.txt:
--------------------------------------------------------------------------------
1 | Begin following the person you see.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_self-driving/7.txt:
--------------------------------------------------------------------------------
1 | Activate autonomous driving.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_vision/3.txt:
--------------------------------------------------------------------------------
1 | Explain the content of the photo.


--------------------------------------------------------------------------------
/data/datasets/train/cmd_vision/9.txt:
--------------------------------------------------------------------------------
1 | Provide a description of the image.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_follow/2.txt:
--------------------------------------------------------------------------------
1 | Robot begin following the person you see.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_follow/5.txt:
--------------------------------------------------------------------------------
1 | Robot follow the individual in the camera.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_follow/6.txt:
--------------------------------------------------------------------------------
1 | Robot start following the person in view.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_follow/7.txt:
--------------------------------------------------------------------------------
1 | Robot begin tracking the person you see.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_forward/2.txt:
--------------------------------------------------------------------------------
1 | Robot begin following the person you see.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_self-driving/7.txt:
--------------------------------------------------------------------------------
1 | Robot activate autonomous driving.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_vision/3.txt:
--------------------------------------------------------------------------------
1 | Robot explain the content of the photo.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_vision/9.txt:
--------------------------------------------------------------------------------
1 | Robot provide a description of the image.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_follow/1.txt:
--------------------------------------------------------------------------------
1 | Robot start tracking the individual in view.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_follow/9.txt:
--------------------------------------------------------------------------------
1 | Robot engage tracking for the person in sight.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_follow/3.txt:
--------------------------------------------------------------------------------
1 | Robot initiate tracking of the person in the frame.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_follow/4.txt:
--------------------------------------------------------------------------------
1 | Robot engage follow mode for the person in sight.


--------------------------------------------------------------------------------
/data/datasets/test/cmd_follow/8.txt:
--------------------------------------------------------------------------------
1 | Robot initiate follow mode for the person in the frame.


--------------------------------------------------------------------------------
/docs/JetBot_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jen-Hung-Ho/ros2_jetbot_voice/HEAD/docs/JetBot_1.jpg


--------------------------------------------------------------------------------
/docs/Jetbot_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jen-Hung-Ho/ros2_jetbot_voice/HEAD/docs/Jetbot_2.jpg


--------------------------------------------------------------------------------
/docs/JetBot_ASR_voice_tool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jen-Hung-Ho/ros2_jetbot_voice/HEAD/docs/JetBot_ASR_voice_tool.png


--------------------------------------------------------------------------------
/jetbot_riva_voice/setup.cfg:
--------------------------------------------------------------------------------
1 | [develop]
2 | script_dir=$base/lib/jetbot_riva_voice
3 | [install]
4 | install_scripts=$base/lib/jetbot_riva_voice
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | packaging>=20.0
 2 | # nvidia-riva-client
 3 | # pyyaml>=6
 4 | termcolor
 5 | # pyaudio
 6 | # wget
 7 | # keras
 8 | # tensorflow
 9 | # git+https://github.com/Granulate/DockerHub-API.git
10 | 
11 | 


--------------------------------------------------------------------------------
/start_ros2_shell.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Check if a container ID is provided
 4 | if [ -z "$1" ]; then
 5 |   echo "Usage: $0 <container_id>"
 6 | else
 7 |     CONTAINER_ID=$1
 8 |     # Execute the command inside the specified container
 9 |     docker exec -it $CONTAINER_ID /bin/bash -c "source install/setup.bash && exec /bin/bash"
10 | fi
11 | 
12 | 


--------------------------------------------------------------------------------
/jetbot_riva_voice/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 3 | <package format="3">
 4 |   <name>jetbot_riva_voice</name>
 5 |   <version>0.0.0</version>
 6 |   <description>TODO: Package description</description>
 7 |   <maintainer email="jenhungho@outlook.com">jetbot</maintainer>
 8 |   <license>Apache-2.0</license>
 9 | 
10 |   <exec_depend>python3-pyaudio</exec_depend>
11 |   
12 |   <test_depend>ament_copyright</test_depend>
13 |   <test_depend>ament_flake8</test_depend>
14 |   <test_depend>ament_pep257</test_depend>
15 |   <test_depend>python3-pytest</test_depend>
16 | 
17 |   <export>
18 |     <build_type>ament_python</build_type>
19 |   </export>
20 | </package>
21 | 


--------------------------------------------------------------------------------
/jetbot_riva_voice/test/test_pep257.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Open Source Robotics Foundation, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from ament_pep257.main import main
16 | import pytest
17 | 
18 | 
19 | @pytest.mark.linter
20 | @pytest.mark.pep257
21 | def test_pep257():
22 |     rc = main(argv=['.', 'test'])
23 |     assert rc == 0, 'Found code style errors / warnings'
24 | 


--------------------------------------------------------------------------------
/jetbot_riva_voice/test/test_flake8.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Open Source Robotics Foundation, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from ament_flake8.main import main_with_errors
16 | import pytest
17 | 
18 | 
19 | @pytest.mark.flake8
20 | @pytest.mark.linter
21 | def test_flake8():
22 |     rc, errors = main_with_errors(argv=[])
23 |     assert rc == 0, \
24 |         'Found %d code style errors / warnings:\n' % len(errors) + \
25 |         '\n'.join(errors)
26 | 


--------------------------------------------------------------------------------
/jetbot_riva_voice/test/test_copyright.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 Open Source Robotics Foundation, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from ament_copyright.main import main
16 | import pytest
17 | 
18 | 
19 | # Remove the `skip` decorator once the source file(s) have a copyright header
20 | @pytest.mark.skip(reason='No copyright header has been placed in the generated source file.')
21 | @pytest.mark.copyright
22 | @pytest.mark.linter
23 | def test_copyright():
24 |     rc = main(argv=['.', 'test'])
25 |     assert rc == 0, 'Found errors'
26 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | 
 2 | # Use an argument for the base image
 3 | # ARG BASE_IMAGE=dustynv/ros:humble-llm-r36.3.0
 4 | ARG BASE_IMAGE=dustynv/nano_llm:humble-r36.3.0
 5 | 
 6 | FROM ${BASE_IMAGE}
 7 | 
 8 | # ARG ROS2_SETUP=/opt/ros/humble/install/setup.bash
 9 | ARG ROS2_SETUP=/ros2_workspace/install/setup.bash
10 | 
11 | # ENV key=value
12 | ENV ROS_DISTRO=humble
13 | ENV SHELL=/bin/bash
14 | 
15 | # Set up the ROS2 workspace
16 | RUN mkdir -p /ros2_ws/src
17 | WORKDIR /ros2_ws/src
18 | 
19 | # Copy the requirements.txt file
20 | COPY requirements.txt ./requirements.txt
21 | 
22 | # Copy the entrypoint script into the image
23 | # COPY /ros_entrypoint.sh /
24 | 
25 | # Copy your ROS2 packages into the workspace
26 | COPY ./jetbot_riva_voice .
27 | 
28 | # Update the package list and install vi
29 | RUN apt-get update && apt-get install -y vim
30 | 
31 | # Install any necessary dependencies
32 | RUN pip3 install --no-cache-dir --verbose -r requirements.txt
33 | 
34 | # {BASE_IMAGE} will setup the ROS2 ENTRYPOINT
35 | ENTRYPOINT [ "/ros_entrypoint.sh" ]
36 | 
37 | # Build the workspace
38 | WORKDIR /ros2_ws
39 | RUN source ${ROS2_SETUP} && \
40 |     colcon build
41 | 
42 | CMD ["/bin/bash"]
43 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Set the environment variables
 4 | DISPLAY_VAR=$DISPLAY
 5 | ROS_DOMAIN_ID=7
 6 | 
 7 | # Set the volume mappings
 8 | VOLUME_X11=/tmp/.X11-unix/:/tmp/.X11-unix:rw
 9 | 
10 | # Define Docker volumes and environment variables
11 | ROOT=$(dirname "$0")
12 | DOCKER_VOLUMES="
13 | --volume=$VOLUME_X11 \
14 | --volume=$ROOT/app:/app \
15 | --volume=$ROOT/data:/data \
16 | --volume=$ROOT/jetbot_riva_voice:/source \
17 | "
18 | DOCKER_ENV_VARS="
19 | --env DISPLAY=$DISPLAY_VAR \
20 | --env QT_X11_NO_MITSHM=1 \
21 | --env ROS_DOMAIN_ID=$ROS_DOMAIN_ID \
22 | "
23 | 
24 | # check for V4L2 devices
25 | V4L2_DEVICES=""
26 | 
27 | for i in {0..9}
28 | do
29 |     if [ -a "/dev/video$i" ]; then
30 |         V4L2_DEVICES="$V4L2_DEVICES --device /dev/video$i "
31 |     fi
32 | done
33 | 
34 | # check for I2C devices
35 | I2C_DEVICES=""
36 | 
37 | for i in {0..9}
38 | do
39 |     if [ -a "/dev/i2c-$i" ]; then
40 |         I2C_DEVICES="$I2C_DEVICES --device /dev/i2c-$i "
41 |     fi
42 | done
43 | 
44 | DOCKER_DEVICES="
45 | --device /dev/snd \
46 | --device /dev/bus/usb \
47 | --device=/dev/input \
48 | "
49 | DOCKER_ARGS="${DOCKER_VOLUMES} ${DOCKER_ENV_VARS} ${V4L2_DEVICES} ${I2C_DEVICES} ${DOCKER_DEVICES}"
50 | 
51 | # Set the docker image
52 | DOCKER_IMAGE=${DOCKER_IMAGE:-jetbot_riva_voice:latest}
53 | 
54 | # Run the docker command
55 | docker run -it --rm --net host --ipc host \
56 | ${DOCKER_ARGS} \
57 | $DOCKER_IMAGE /bin/bash -c "source install/setup.bash && /bin/bash"
58 | 


--------------------------------------------------------------------------------
/jetbot_riva_voice/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | import os
 3 | from glob import glob
 4 | 
 5 | package_name = 'jetbot_riva_voice'
 6 | 
 7 | setup(
 8 |     name=package_name,
 9 |     version='0.0.0',
10 |     packages=find_packages(exclude=['test']),
11 |     data_files=[
12 |         ('share/ament_index/resource_index/packages',
13 |             ['resource/' + package_name]),
14 |         ('share/' + package_name, ['package.xml']),
15 |         # Include all laumch files
16 |         (os.path.join('share', package_name, 'launch'), glob('launch/*.launch.py')),
17 |         # Include all param files
18 |         (os.path.join('share', package_name, 'param'), glob('param/*params.yaml')),
19 |         # Include all includ file
20 |         (os.path.join('share', package_name, 'include'), glob('include/*.py')),
21 |     ],
22 |     install_requires=['setuptools'],
23 |     zip_safe=True,
24 |     maintainer='jetbot',
25 |     maintainer_email='jenhungho@outlook.com',
26 |     description='TODO: Package description',
27 |     license='Apache-2.0',
28 |     tests_require=['pytest'],
29 |     entry_points={
30 |         'console_scripts': [
31 |             'jetbot_ASR = jetbot_riva_voice.script.Jetbot_ASR_Processor:main',
32 |             'jetbot_TTS = jetbot_riva_voice.script.Jetbot_TTS_Processor:main',
33 |             'jetbot_voice_agent = jetbot_riva_voice.script.Jetbot_ASR_Agent:main',
34 |             'audio_list = jetbot_riva_voice.script:main'
35 |         ],
36 |     },
37 | )
38 | 


--------------------------------------------------------------------------------
/jetbot_riva_voice/param/jetbot_voice_params.yaml:
--------------------------------------------------------------------------------
 1 | Riva_ASR_processor:
 2 |   ros__parameters:
 3 |     ASR_topic: "/jetbot_voice/transcripts"
 4 |     url: "localhost:50051"
 5 |     streaming_chunk: 16000
 6 |     index: 26
 7 |     start: true
 8 | Riva_TTS_processor:
 9 |   ros__parameters:
10 |     TTS_topic: "/jetbot_TTS/transcripts"
11 |     chat_topic: "/chatbot/response"
12 |     ASR_node: "/Riva_ASR_processor"
13 |     streaming_chunk: 16000
14 |     url: "localhost:50051"
15 |     index: 0
16 |     start: true
17 | Jetbot_ASR_agent:
18 |   ros__parameters:
19 |     ASR_topic: "/jetbot_voice/transcripts"
20 |     TTS_topic: "/chatbot/response"
21 |     LLM_topic: "/jetbot_llm_input"
22 |     VISION_topic: "/llm_vision_input"
23 |     ASR_node: "/Riva_ASR_processor"
24 |     start: true
25 |     command_enable: false
26 |     command_nodes: ["/Jetbot_tool_voice_copilot"]
27 |     # jetbot_commands: syntax: ['ASR keyword', 'command nodes index:command keyword']
28 |     # jetbot_commands: example: ['away','0:self-driving']
29 |     jetbot_commands: >
30 |       [
31 |         ['cmd_start','0:start'], ['cmd_stop','0:stop'],
32 |         ['cmd_left','0:left'], ['cmd_right','0:right'], ['cmd_forward', '0:forward'], ['cmd_backward', '0:backward'],
33 |         ['cmd_self-driving','0:self-driving'], ['cmd_follow','0:follow']
34 |       ]
35 |     jetbot_chat: >
36 |       [
37 |         ['hello', 'This is jetbot. How can I help you'],
38 |         ['bye', 'Take care, talk to you later']
39 |       ]
40 |     jetbot_vision: >
41 |       [
42 |         ['cmd_vision','Describe the image content in detail']
43 |       ]
44 | 


--------------------------------------------------------------------------------
/docs/setup.md:
--------------------------------------------------------------------------------
 1 | ## Jetbot Voice-Activated Copilot Tools Setup
 2 | 
 3 | 1. **Configure Docker Engine**:
 4 |    Follow these [setup steps](https://github.com/dusty-nv/jetson-containers/blob/master/docs/setup.md) to configure your Docker engine.
 5 | 
 6 | 2. **Set Up ROS Development Environment**:
 7 |    Set up your ROS development environment by following the instructions [here](https://docs.ros.org/en/humble/Installation.html).
 8 | 
 9 | 3. **Clone the Repository**:
10 |    Open your terminal and run the following command to clone the repository:
11 |    ```bash
12 |    git clone https://github.com/Jen-Hung-Ho/ros2_jetbot_voice
13 |    ```
14 | 
15 | 4. **Navigate to the Repository Directory**:
16 |    Change to the directory of the cloned repository:
17 |    ```bash
18 |    cd ros2_jetbot_voice
19 |    ```
20 | 
21 | 5. **Build the Docker Image**:
22 |    Ensure the `build.sh` script has execute permissions. If not, add execute permissions using:
23 |    ```bash
24 |    chmod +x build.sh
25 |    ```
26 | 
27 |    Then, run the `build.sh` script to build the Docker image:
28 |    ```bash
29 |    ./build.sh
30 |    ```
31 |    
32 | 7. **Start Docker and Build the CNN Model**:
33 |    Execute the following commands to run and build the CNN model `(this only needs to be done once)` The model data will be saved under `/data/models/ASR_classify_model` for the voice-activated tool to load and extract in the ROS2 node launch:
34 |    ```bash
35 |    . run.sh
36 |    cd ..
37 |    cd app
38 |    python3 robot_command_text_classification.py  # build CNN model
39 |    python3 robot_command_model_evaluation.py     # run unit test
40 |    ```
41 | 
42 | 8. **Start Docker:**
43 |    Execute the following commands to run the docker
44 |    ```bash
45 |    . run.sh
46 |    ```
47 |     
48 | 9. **Attach to an Existing Running Docker Container**:
49 |    To attach to an existing running Docker container, use the following commands:
50 |    ```bash
51 |    docker ps
52 |    ```
53 | 
54 |    Identify the `CONTAINER ID` of the running container (e.g., `422fc05b7655`), then run:
55 |    ```bash
56 |    . start_ros2_shell.sh <CONTAINER_ID>
57 |    ```
58 | 
59 |    For example:
60 |    ```bash
61 |    . start_ros2_shell.sh 422fc05b7655
62 | 


--------------------------------------------------------------------------------
/jetbot_riva_voice/jetbot_riva_voice/script/audio_list.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # Copyright (c) 2024, Jen-Hung Ho 
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a
 6 | # copy of this software and associated documentation files (the "Software"),
 7 | # to deal in the Software without restriction, including without limitation
 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | # and/or sell copies of the Software, and to permit persons to whom the
10 | # Software is furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 | # DEALINGS IN THE SOFTWARE.
22 | #
23 | 
24 | import pyaudio
25 | 
26 | from ctypes import *
27 | from contextlib import contextmanager
28 | 
29 | # Define our error handler type
30 | ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)
31 | 
32 | def py_error_handler(filename, line, function, err, fmt):
33 |     pass
34 | 
35 | c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)
36 | 
37 | # Load the ALSA library
38 | asound = cdll.LoadLibrary('libasound.so')
39 | 
40 | # Set our error handler
41 | asound.snd_lib_error_set_handler(c_error_handler)
42 | 
43 | # Suppress the ALSA lib error message 
44 | @contextmanager
45 | def noalsaerr():
46 |     asound = cdll.LoadLibrary('libasound.so')
47 |     asound.snd_lib_error_set_handler(c_error_handler)
48 |     yield
49 |     asound.snd_lib_error_set_handler(None)
50 | 
51 | def main(args=None):
52 |     # Use PyAudio to play the WAV file
53 |     with noalsaerr():
54 |         p = pyaudio.PyAudio()
55 | 
56 | 
57 |     print("----------------------------------------------------")
58 |     print("Audio Input Devices:")
59 |     print("----------------------------------------------------")
60 |     for i in range(p.get_device_count()):
61 |         device_info = p.get_device_info_by_index(i)
62 |         if device_info["maxInputChannels"] > 0:
63 |             print('Input Device {:2d} - \'{}\' (inputs={}) (sample_rate={})'.format(i, device_info['name'], device_info['maxInputChannels'], round(device_info['defaultSampleRate'])))
64 | 
65 | 
66 |     print("\n----------------------------------------------------")
67 |     print("Audio Output Devices:")
68 |     print("----------------------------------------------------")
69 |     for i in range(p.get_device_count()):
70 |         device_info = p.get_device_info_by_index(i)
71 |         if device_info["maxOutputChannels"] > 0:
72 |             print('Output Device {:2d} - \'{}\' (outpus={}) (sample_rate={})'.format(i, device_info['name'], device_info['maxOutputChannels'], round(device_info['defaultSampleRate'])))
73 | 
74 |     p.terminate()
75 | 
76 | if __name__ == '__main__':
77 |     main()


--------------------------------------------------------------------------------
/jetbot_riva_voice/jetbot_riva_voice/include/text_classifier_utility.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # Copyright (c) 2024, Jen-Hung Ho 
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a
 6 | # copy of this software and associated documentation files (the "Software"),
 7 | # to deal in the Software without restriction, including without limitation
 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | # and/or sell copies of the Software, and to permit persons to whom the
10 | # Software is furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 | # DEALINGS IN THE SOFTWARE.
22 | #
23 | #
24 | # Reference:
25 | # This code is inspired by the example from Keras: 
26 | # https://keras.io/examples/nlp/text_classification_from_scratch/
27 | #
28 | 
29 | import tensorflow as tf
30 | from tensorflow.keras.layers import TextVectorization
31 | import json
32 | import string
33 | import re  # Import the re module
34 | 
35 | 
36 | class TextClassifier:
37 | 
38 |     #
39 |     # Initialize the class with load model and labels
40 |     #
41 |     def __init__(self, model_path, labels_path):
42 |         self.custom_objects = {
43 |             'TextVectorization': TextVectorization,
44 |             'custom_standardization': self.custom_standardization
45 |         }
46 |         model_path = model_path + '.keras'
47 |         self.model = tf.keras.models.load_model(model_path)
48 |         # disable custom_standardization -- failed to desearized the model data
49 |         # self.model = tf.keras.models.load_model(model_path, custom_objects=self.custom_objects)
50 | 
51 |         with open(labels_path, 'r') as f:
52 |             self.class_labels = json.load(f)
53 | 
54 |     #
55 |     # Having looked at our data above, we see that the raw text contains HTML break
56 |     # tags of the form '<br />'. These tags will not be removed by the default
57 |     # standardizer (which doesn't strip HTML). Because of this, we will need to
58 |     # create a custom standardization function.
59 |     #
60 |     # Register the custom standardization function
61 |     @tf.keras.utils.register_keras_serializable(package='Custom', name='custom_standardization')
62 |     def custom_standardization(self, input_data):
63 |         lowercase = tf.strings.lower(input_data)
64 |         stripped_html = tf.strings.regex_replace(lowercase, '<br />', ' ')
65 |         return tf.strings.regex_replace(stripped_html, '[%s]' % re.escape(string.punctuation), '')
66 |     
67 |     #
68 |     # This function handles the prediction process for the text classifier model
69 |     # 1D convolutional neural network (CNN) model
70 |     #
71 |     def predict(self, min_score, input_text):
72 |         prediction = self.model.predict(tf.constant([input_text]))
73 |         predicted_class_index = tf.argmax(prediction, axis=1).numpy()[0]
74 |         most_fit_score = prediction[0][predicted_class_index]
75 |         predicted_class_label = self.class_labels[predicted_class_index]
76 | 
77 |         if most_fit_score < min_score:
78 |             return "other", False, most_fit_score
79 |         else:
80 |             return predicted_class_label, True, most_fit_score


--------------------------------------------------------------------------------
/app/robot_command_model_evaluation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # Copyright (c) 2024, Jen-Hung Ho 
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a
  6 | # copy of this software and associated documentation files (the "Software"),
  7 | # to deal in the Software without restriction, including without limitation
  8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9 | # and/or sell copies of the Software, and to permit persons to whom the
 10 | # Software is furnished to do so, subject to the following conditions:
 11 | #
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | #
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 21 | # DEALINGS IN THE SOFTWARE.
 22 | #
 23 | #
 24 | # Reference:
 25 | # This code is inspired by the example from Keras: 
 26 | # https://keras.io/examples/nlp/text_classification_from_scratch/
 27 | #
 28 | 
 29 | import tensorflow as tf
 30 | from tensorflow.keras.layers import TextVectorization
 31 | from termcolor import colored
 32 | import json
 33 | import string
 34 | import re  # Import the re module
 35 | import unittest
 36 | 
 37 | from jetbot_riva_voice.include.text_classifier_utility import TextClassifier
 38 | 
 39 | # unit test
 40 | class TestTextClassifier(unittest.TestCase):
 41 | 
 42 |     @classmethod
 43 |     def setUpClass(cls):
 44 |         model_path = '../data/models/ASR_classify_model'
 45 |         labels_path = '../data/models/class_labels.json'
 46 |         cls.classifier = TextClassifier(model_path, labels_path)
 47 |         cls.examples = [
 48 |             "turn to right.",
 49 |             "turn to left.",
 50 |             "What do you see in camera?",
 51 |             "follow me.",
 52 |             "go forward.",
 53 |             "move backward.",
 54 |             "stop all actions",
 55 |             "start the action",
 56 |             "Start self driving mode",
 57 |             "Describe what do you see in image?",
 58 |             "How are you today?",
 59 |             "Hello What is your name?",
 60 |             "How many states in the United States?"
 61 |         ]
 62 |         cls.expected_results = [
 63 |             "cmd_right",
 64 |             "cmd_left",
 65 |             "cmd_vision",
 66 |             "cmd_follow",
 67 |             "cmd_forward",
 68 |             "cmd_backward",
 69 |             "cmd_stop",
 70 |             "cmd_start",
 71 |             "cmd_self-driving",
 72 |             "cmd_vision",
 73 |             "other",
 74 |             "other",
 75 |             "other"
 76 |         ]
 77 | 
 78 |     def test_predictions(self):
 79 |         pass_count = 0
 80 |         for i, example in enumerate(self.examples):
 81 |             label, result, score = self.classifier.predict(0.7, example)
 82 |             expected_label = self.expected_results[i]
 83 |             if label == expected_label:
 84 |                 pass_count += 1
 85 |                 # print(f"PASS: Input: '{example}' => Predicted Label: '{label}', Expected Label: '{expected_label}', Result: '{score}'")
 86 |                 print(colored(f"PASS: Input: '{example}' => Predicted Label: '{label}', Expected Label: '{expected_label}', Result: '{score}'", 'green'))
 87 |                 if score < 0.7 and label == 'other':
 88 |                     # print(f"PASS: chat topic: '{example}' => Predicted Label: '{label}', Expected Label: '{expected_label}', Result: '{score}'")
 89 |                     print(colored(f"PASS: chat topic: '{example}' => Predicted Label: '{label}', Expected Label: '{expected_label}', Result: '{score}'", 'magenta'))
 90 | 
 91 |             else:
 92 |                 # print(f"FAIL: Input: '{example}' => Predicted Label: '{label}', Expected Label: '{expected_label}', Result: '{score}'")
 93 |                 print(colored(f"FAIL: Input: '{example}' => Predicted Label: '{label}', Expected Label: '{expected_label}', Result: '{score}'", 'red'))
 94 | 
 95 |         print("===================================================")
 96 |         total_tests = len(self.examples)
 97 |         print(f"Test Summary: Passed {pass_count}/{total_tests} tests")
 98 |         print("===================================================")
 99 | 
100 | 
101 | def simple_test():
102 | 
103 |     # Define data mode file loacation
104 |     model_path = '../data/models/ASR_classify_model'
105 |     labels_path = '../data/models/class_labels.json'
106 |     classifier = TextClassifier(model_path, labels_path)
107 | 
108 | 
109 |     # Sample test cases
110 |     examples = [
111 |         "turn to right.",
112 |         "turn to left.",
113 |         "What do you see in camera?",
114 |         "follow me.",
115 |         "go forward.",
116 |         "move backward.",
117 |         "stop all actions",
118 |         "start the action",
119 |         "Start self driving mode",
120 |         "Describe what do you see in image?",
121 |         "How many states in the United States?"
122 |     ]
123 | 
124 |     print("============================================================")
125 |     print(classifier.class_labels)
126 |     print("============================================================")
127 | 
128 |     # Run predictions for each example
129 |     for example in examples:
130 |     
131 |         label, result, score = classifier.predict(0.7, example)
132 |         if result:
133 |             print(f"Input: '{example}' => Predicted Label: '{label}', Result: '{score}'")
134 |         else:
135 |             print("Chat topic: -----------------------")
136 |             print(f"Input: '{example}' => Predicted Label: '{label}', Result: '{score}'")
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     unittest.main()


--------------------------------------------------------------------------------
/jetbot_riva_voice/jetbot_riva_voice/include/node_parameter_utility.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # Copyright (c) 2023, Jen-Hung Ho 
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a
  6 | # copy of this software and associated documentation files (the "Software"),
  7 | # to deal in the Software without restriction, including without limitation
  8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9 | # and/or sell copies of the Software, and to permit persons to whom the
 10 | # Software is furnished to do so, subject to the following conditions:
 11 | #
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | #
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 21 | # DEALINGS IN THE SOFTWARE.
 22 | #
 23 | 
 24 | import rclpy  # Python library for ROS 2
 25 | import threading
 26 | from rcl_interfaces.msg import ParameterType, Parameter, ParameterValue
 27 | from ros2param.api import call_get_parameters
 28 | from ros2param.api import call_set_parameters
 29 | 
 30 | from std_msgs.msg import String
 31 | 
 32 | class NodeParamTools():
 33 | 
 34 |     def __init__(self, node, executor):
 35 | 
 36 |         self.node = node
 37 |         self.logger = self.node.get_logger()
 38 |         self.executor = executor
 39 |         self.lock = threading.Lock()
 40 |         node_name = self.node.get_name()
 41 | 
 42 |         self.init_ros_nodes(node_name)
 43 |         self.logger.info('NodeParamTools({}) initialize'.format(node_name))
 44 | 
 45 |     #
 46 |     # Initialize nodes for get/set parameter service call
 47 |     #
 48 |     def init_ros_nodes(self, node_name):
 49 |         # To get/set parameter on another node in ROS2 using Python, use the SetParameters service.
 50 |         # client = node.create_client(GetParameters, f'{node_name}/get_parameters')
 51 |         # client = node.create_client(SetParameters, f'{node_name}/set_parameters')
 52 | 
 53 |         self.get_param_node = rclpy.create_node(node_name + '_get_param_node')
 54 |         self.set_param_node = rclpy.create_node(node_name + '_set_param_node')
 55 |         self.executor.add_node(self.get_param_node)
 56 |         self.executor.add_node(self.set_param_node)
 57 | 
 58 |         # self.node_param_util = NodeParamTools(self.get_logger())
 59 | 
 60 |     #
 61 |     # Remove nodes for get/set parameter service call
 62 |     #
 63 |     def cleanup(self):
 64 | 
 65 |         # clean up set_param_node, get_param_node
 66 |         self.executor.remove_node(self.set_param_node)
 67 |         self.set_param_node.destroy_node()
 68 |         self.executor.remove_node(self.get_param_node)
 69 |         self.get_param_node.destroy_node()
 70 |         pass
 71 | 
 72 |     #
 73 |     # Try catch version of get_node_parameters
 74 |     #
 75 |     def try_get_node_parameters(self, node_name, param):
 76 |         try:
 77 |             value = self.get_node_parameters(node_name, param)
 78 |             return (True, value)
 79 |         except RuntimeError as e:
 80 |             # try to catch node not exist with service timer out error
 81 |             self.logger.info("get node parameter error: {}".format(str(e)))
 82 |             return (False, None)
 83 | 
 84 |     #
 85 |     # Try catch version of set_node_parameters
 86 |     #
 87 |     def try_set_node_parameters(self, node_name, param_name, type, value):
 88 |         try:
 89 |             self.set_node_parameters(node_name, param_name, type, value)
 90 |             return True
 91 |         except RuntimeError as e:
 92 |             # try to catch node not exist with service timer out error
 93 |             self.logger.info("set node parameter error: {}".format(str(e)))
 94 |             return False
 95 | 
 96 |     #
 97 |     # To get a parameter on another node in ROS2 using Python, use the GetParameters service.
 98 |     # client = node.create_client(GetParameters, f'{node_name}/get_parameters')
 99 |     #
100 |     def get_node_parameters(self, node_name, param):
101 |         self.logger.info('get node parameters : {} - {}'.format(node_name, param))
102 | 
103 |         # Block the next get node parameter invoke until the current finishes
104 |         # ros2 param get /Jetbot_Param_Client command
105 |         with self.lock:
106 |             parameters = [param]
107 |             response = call_get_parameters(node=self.get_param_node, 
108 |                                 #node_name='/detectnet/detectnet', 
109 |                                 node_name=node_name,
110 |                                 parameter_names=parameters)
111 | 
112 |         # print(response.values)
113 |         if len(response.values) >= 1:
114 |             # txtract type specific value
115 |             pvalue = response.values[0]
116 |             if pvalue.type == ParameterType.PARAMETER_BOOL:
117 |                 print(pvalue.bool_value) 
118 |                 self.logger.info('get node bool value: {}'.format(pvalue.bool_value))
119 |             elif pvalue.type == ParameterType.PARAMETER_STRING:
120 |                 print(pvalue.string_value)
121 |                 self.logger.info('get node string value: {}'.format(pvalue.string_value))
122 |             elif pvalue.type == ParameterType.PARAMETER_STRING_ARRAY:
123 |                 self.logger.info('get node string array value: {}'.format(pvalue.string_array_value))
124 | 
125 |         return pvalue
126 |     
127 |     #
128 |     # To set a parameter on another node in ROS2 using Python, use the SetParameters service.
129 |     # client = node.create_client(SetParameters, f'{node_name}/set_parameters')
130 |     #
131 |     def set_node_parameters(self, node_name, param_name, type, value):
132 |         self.logger.info('set node parameters : {} - {} - {}'.format(node_name, param_name, value))
133 | 
134 |         # Block the next set node parameter invoke until the current finishes
135 |         with self.lock:
136 |             param = Parameter()
137 |             param.name = param_name
138 |             if type == ParameterType.PARAMETER_STRING:
139 |                 param.value = ParameterValue(string_value=value, type=ParameterType.PARAMETER_STRING)
140 |             elif type == ParameterType.PARAMETER_BOOL:
141 |                 param.value = ParameterValue(bool_value=value, type=ParameterType.PARAMETER_BOOL)
142 |             elif type == ParameterType.PARAMETER_STRING_ARRAY:
143 |                 param.value = ParameterValue(string_array_value=value, type=ParameterType.PARAMETER_STRING_ARRAY)
144 | 
145 |             parameters = [param]
146 |             response = call_set_parameters(node=self.set_param_node,
147 |                                     node_name=node_name,
148 |                                     parameters=parameters)
149 | 
150 |         if response is not None:
151 |             # SetParametersResult
152 |             for result in response.results:
153 |                 self.logger.debug('set node: {} parameter: {} value:{}'.format(node_name, param_name, value))
154 |                 self.logger.info('Parameter set successful: {}'.format(result.successful))
155 |                 self.logger.debug('Reason: {}'.format(result.reason))


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Jetbot Voice-Activated Copilot Tools with Nvidia RIVA and NanoLLM Container for ROS2 Robot - version 2.0
  2 | 
  3 | Jetbot Voice-Activated Copilot is a set of ROS2 nodes that utilize the NVIDIA RIVA Automatic Speech Recognition (ASR) deep learning interface library and the Jetson NanoLLM Docker container for NVIDIA Jetson Orin jetbot. These tools leverage NVIDIA RIVA ASR for input and use a 1D convolutional neural network (CNN) model as a text classifier to handle the prediction process for robot task commands. This enables functionalities such as chat via LLM, vision via VLM, Lidar-assisted self-driving with object avoidance, and real-time object detection for following a person.
  4 | 
  5 | 
  6 | ### Features
  7 | ---
  8 | - **Jetbot ASR Processor**: Enables your robot to decode human voice messages using the [Nvidia RIVA ASR service](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/asr/asr-overview.html) client ROS2 node.
  9 | 
 10 | - **Jetbot TTS Processor**: Converts chat-vision NLM VLM response text into speech using [Nvidia RIVA TTS services](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-overview.html), which is then played via the robot's speaker. This feature enhances the interaction between the robot and humans, making it more engaging and user-friendly.
 11 | 
 12 | - **Jetbot ASR Agent**: Allows you to build a simple 1D convolutional neural network (CNN) model for [text classification](https://keras.io/examples/nlp/text_classification_from_scratch/) to predict human voice intentions and pipe corresponding NLM chat, VLM vision, and actions that the robot should take.
 13 | 
 14 | - **Jetbot Voice Tools Copilot**: Executes the actions corresponding to the voice commands posted via ROS2 topic from the Jetbot ASR Agent. It also handles tasks related to Lidar-assisted self-driving, object avoidance, and real-time object detection for person following.
 15 | 
 16 | #### Here is a brief overview of the jetbot tools design diagram/architecture
 17 | <img src="docs/JetBot_ASR_voice_tool.png" width="700" />
 18 | 
 19 | ### Setup
 20 | - [Jetbot Voice-Activated Copilot Tools Setup Guide](docs/setup.md#setup)
 21 | <br><br>
 22 | 
 23 | ### Jetbot voice-activated copilot tools source code and video demos
 24 | ---
 25 | - **Jetbot ASR Processor:**
 26 |   - Code logic explanation:
 27 |     - Employs the Nvidia RIVA Speech Recognition (ASR) service client side ROS2 node to decode human speech into text. The decoded text is then published as ROS2 Text messages.
 28 |       - **Start Nvidia RIVA server:** [Riva Server Quick Start Guide](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/quick-start-guide.html)
 29 |          - `cd riva_quickstart_v2.16.0`
 30 |          - `bash riva_init.sh`
 31 |          - `bash riva_start.sh`
 32 |       - **Running the code:** Please note that this code needs to be run within a Docker container
 33 |         - `cd ros2_jetbot_voice`
 34 |         - `. run.sh `
 35 |         - `python3 /opt/riva/python-clients/scripts/list_audio_devices.py` 
 36 |         - `ros2 run jetbot_riva_voice jetbot_ASR --ros-args --params-file /ros2_ws/src/param/jetbot_voice_params.yaml`
 37 |     - **Receiving Nvidia RIVA ASR service Messages:** The Jetbot ASR Processor processes the user message and identifies it as either a Jetbot tool action or a chat-vision message.
 38 |     - **Handling and publish Chat-Only message to LLM:** For chat-only topics, publish ROS2 topic message to the [LLM](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) ROS2 node hosted in Jetbot tools and mute the mic to prevent TTX audio playback causing an echo effect.
 39 |     - **Handling and publish Vision message to VLM:** For vision topics, publish ROS2 topic message via the [VLM](https://huggingface.co/Efficient-Large-Model/VILA1.5-3b) ROS2 node host in Jetbot tools and mute the mic to prevent TTX audio playbak causing an echo effect.
 40 |     - **Handling Jetbot Tool Actions:** For accepted Jetbot tool actions, publish a ROS2 Jetbot tool command text message to the Jetbot voice tools copilot ROS2 client, triggering robot actions.
 41 |   - Source code:
 42 |     - [param file: jetbot_voice_params.yaml](jetbot_riva_voice/param/jetbot_voice_params.yaml) <br>
 43 |     - [ROS2 node: jetbot_ASR_Processor.py](jetbot_riva_voice/jetbot_riva_voice/script/Jetbot_ASR_Processor.py) <br>
 44 |   - Usage:
 45 |     - `python3 /opt/riva/python-clients/scripts/list_audio_devices.py`
 46 |       - Get audio/microphone device ID
 47 |     - `ros2 run jetbot_riva_voice jetbot_ASR --ros-args --params-file /ros2_ws/src/param/jetbot_voice_params.yaml`
 48 | - **Jetbot TTS Processor:**
 49 |   - Code logic explanation:
 50 |     - The Jetbot TTS Client is designed to convert text into audio, thereby enabling robots to communicate vocally
 51 |     - It utilizes the NVIDA RIVA TTX service, which is then played through the robot’s speaker
 52 |     - The client can convert chat-based greeting text into speech, enhancing the interaction between the robot and humans. This feature makes the robot more engaging and user-friendly
 53 |   - Source code:
 54 |     - [param file: jetbot_voice_params.yaml](jetbot_riva_voice/param/jetbot_voice_params.yaml) <br>
 55 |     - [ROS2 node: jetbot_TTS_Processor.py](jetbot_riva_voice/jetbot_riva_voice/script/Jetbot_TTS_Processor.py) <br>
 56 |   - Usage:
 57 |     - `python3 /opt/riva/python-clients/scripts/list_audio_devices.py`
 58 |       - Get speaker device ID
 59 |     - `ros2 run jetbot_riva_voice jetbot_TTS --ros-args --params-file /ros2_ws/src/param/jetbot_voice_params.yaml -p index:=11`
 60 | - **Jetbot ASR Agent:**
 61 |   - Code logic explanation:
 62 |     - **Voice to Action Mapping:** Upon initialization, the Jetbot ASR Agent lazily loads a simple pre-trained 1D convolutional neural network (CNN) model. It then uses the model's labels file to construct a Voice to Action map. This map establishes a comprehensive relationship between specific voice messages or commands and the corresponding actions the robot should execute.
 63 |     - **CNN mode voice message predition:** The CNN model's message prediction label and score are used to find the Voice to Action map to determine whether the message is for chat, vision, or robot commands. The voice and vision messages are published via ROS2 topics to the LLM and VLM ROS2 nodes hosted in Jetbot tools.
 64 |     - **Action Execution:** The Jetbot Voice Tools Copilot executes the actions corresponding to the voice commands published via the Jetbot ASR Client ROS2 node.
 65 |       - **Supported Action Management:** It effectively handles the starting and stopping of Jetbot tool-supported actions, encompassing functionalities such as:
 66 |         - **Self driving:** Lidar-assisted ovject avoidance self-driving
 67 |         - **Person following:** Real-time object detection for person following
 68 |         - **Navigation:** Move forward/backward, turn left/right
 69 |   - Source code:
 70 |     - [param file: jetbot_voice_params.yaml](jetbot_riva_voice/param/jetbot_voice_params.yaml) <br>
 71 |     - [ROS2 node: Jetbot_ASR_Agent.py](jetbot_riva_voice/jetbot_riva_voice/script/Jetbot_ASR_Agent.py) <br>
 72 |   - Usage:
 73 |     - `ros2 run jetbot_riva_voice jetbot_voice_agent --ros-args --params-file /ros2_ws/src/param/jetbot_voice_params.yaml
 74 | ` <br>
 75 |   [<img src="https://img.youtube.com/vi/SqDqO-KfWUs/hqdefault.jpg" width="300" height="200"
 76 | />](https://youtu.be/SqDqO-KfWUs)
 77 |   ### Requirements:
 78 | - Jetson Orin Nano or Jetson Orin NX:
 79 |   - https://developer.nvidia.com/embedded/learn/get-started-jetson-agx-orin-devkit#what-youll-need
 80 |   - ROS2 humble: https://docs.ros.org/en/humble/index.html
 81 |   - NanoLLM Docker container: https://github.com/dusty-nv/NanoLLM
 82 |   - NanoLLM Docker container for ROS2: https://github.com/NVIDIA-AI-IOT/ros2_nanollm
 83 |   - ROS2 Jetbot Tools Docker container V 2.0 : - https://github.com/Jen-Hung-Ho/ros2_jetbot_tools
 84 | - Robot:
 85 |   - Jetson Orin Jetbot: http://www.yahboom.net/study/ROSMASTER-X3
 86 |     - https://drive.google.com/drive/folders/1QuXJcrRMs8oyTrrROKMnUNvTHImcIC78
 87 |   
 88 |   ### References
 89 | - https://www.releases.ubuntu.com/22.04/
 90 | - https://developer.nvidia.com/embedded/learn/get-started-jetson-agx-orin-devkit#what-youll-need
 91 | - https://docs.ros.org/en/humble/index.html
 92 | - https://navigation.ros.org/
 93 | - https://github.com/Jen-Hung-Ho/ros2_jetbot_tools
 94 | - https://github.com/dusty-nv/jetson-containers
 95 | - https://dusty-nv.github.io/NanoLLM/
 96 | - https://github.com/dusty-nv/NanoLLM
 97 | - https://github.com/NVIDIA-AI-IOT/ros2_nanollm
 98 | - https://www.jetson-ai-lab.com/tutorial_llamaspeak.html
 99 | - https://org.ngc.nvidia.com/setup/installers/cli
100 | - https://docs.nvidia.com/deeplearning/riva/user-guide/docs/quick-start-guide.html
101 | - https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tutorials/tts-basics-customize-ssml.html?highlight=speechsynthesisservice
102 | - https://keras.io/examples/nlp/text_classification_from_scratch/
103 | 
104 | 


--------------------------------------------------------------------------------
/jetbot_riva_voice/jetbot_riva_voice/script/Jetbot_TTS_Processor.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Open Source Robotics Foundation, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import rclpy
 16 | import threading
 17 | import time
 18 | import asyncio
 19 | 
 20 | # RIVA client 
 21 | import pyaudio
 22 | import riva.client
 23 | import riva.client.audio_io
 24 | 
 25 | from rclpy.node import Node
 26 | from rclpy.executors import ExternalShutdownException
 27 | from rclpy.parameter import Parameter
 28 | from rcl_interfaces.msg import ParameterType, SetParametersResult
 29 | from std_msgs.msg import String
 30 | from threading import Lock
 31 | 
 32 | from jetbot_riva_voice.include.node_parameter_utility import NodeParamTools
 33 | 
 34 | class riva_tts_processor(Node):
 35 | 
 36 |     def parameter_callback(self, params):
 37 |         for param in params:
 38 |             if param.name == 'start' and param.type_ == Parameter.Type.BOOL:
 39 |                 self.start = param.value
 40 |                 self.get_logger().info('start= {}'.format(bool(param.value)))
 41 |             elif param.name == 'command' and param.type_ == Parameter.Type.STRING:
 42 |                 self.cmd = param.value
 43 |                 self.get_logger().info('command= {}'.format(str(param.value)))
 44 | 
 45 |         return SetParametersResult(successful=True)
 46 | 
 47 |     def __init__(self, name):
 48 |         super().__init__(name)
 49 | 
 50 |         self.TTS_topic = self.declare_parameter('TTS_topic', "/TTS/transcripts").get_parameter_value().string_value 
 51 |         self.chat_topic = self.declare_parameter('chat_topic', "/chatbot/response").get_parameter_value().string_value 
 52 |         self.ASR_node = self.declare_parameter('ASR_node', '/Riva_ASR_processor').get_parameter_value().string_value
 53 |         self.RIVA_URL = self.declare_parameter('url', "localhost:50051").get_parameter_value().string_value
 54 |         self.index = self.declare_parameter('index', 26).get_parameter_value().integer_value
 55 |         self.streaming_chunk = self.declare_parameter('streaming_chunk', 16000).get_parameter_value().integer_value
 56 | 
 57 |         self.get_logger().info("=========================================")
 58 |         self.get_logger().info("Jetbot ASR processor :{} start".format(name))
 59 |         self.get_logger().info("TTS_topic       : {}".format(self.TTS_topic))
 60 |         self.get_logger().info("response_reopic : {}".format(self.chat_topic))
 61 |         self.get_logger().info('ASR_node        : {}'.format(self.ASR_node))
 62 |         self.get_logger().info("RIVA url        : {}".format(self.RIVA_URL))
 63 |         self.get_logger().info("index           : {}".format(self.index))
 64 |         self.get_logger().info("streaming chunk : {}".format(self.streaming_chunk))
 65 |         self.get_logger().info("=========================================")
 66 | 
 67 |         self.lock = Lock()
 68 |         self.subscription = self.create_subscription(String, self.TTS_topic, self.TTS_callback, 10)
 69 |         self.chat_subscription = self.create_subscription(String, self.chat_topic, self.chat_callback, 10)
 70 | 
 71 |         self.msg = String()
 72 |         self.i = 0
 73 | 
 74 |         # Add parameters callback 
 75 |         self.add_on_set_parameters_callback(self.parameter_callback)
 76 | 
 77 |         # self.init_ros_nodes()
 78 |         self.node_param_util = NodeParamTools(self, executor)
 79 | 
 80 |         self.riva_init()
 81 | 
 82 |         # self.thread = threading.Thread(target=self.ASR_processor)
 83 |         # self.thread.start()
 84 | 
 85 |     #
 86 |     # Remove nodes for get/set parameter service call
 87 |     #
 88 |     def cleanup(self):
 89 |         # clean up set_param_node, get_param_node
 90 |         self.node_param_util.cleanup()
 91 |         pass
 92 | 
 93 |     def riva_init(self):
 94 |         self.p = pyaudio.PyAudio()
 95 |         default_device_info = riva.client.audio_io.get_default_input_device_info()
 96 |         self.get_logger().debug("Rivai default info:{}".format(default_device_info))
 97 |         # default_index = None if default_device_info is None else default_device_info['index']
 98 |         if default_device_info is not None and int(default_device_info['maxOutputChannels']) > 0:
 99 |             self.audio_index = default_device_info['index']
100 |             self.get_logger().info("use default - ignore user input")
101 |         else:
102 |             self.audio_index = self.index
103 |         default_device = self.p.get_device_info_by_index(self.audio_index)
104 |         self.sample_rate = int(default_device['defaultSampleRate'])
105 | 
106 |         self.get_logger().info("==============================================")
107 |         self.get_logger().info("Audio default index     : {}".format(self.audio_index))
108 |         self.get_logger().info("Max input ouput channels: [{} - {}]".format(default_device['maxInputChannels'], default_device_info['maxOutputChannels']))
109 |         self.get_logger().info("sample rate             : {}".format(self.sample_rate))
110 |         # riva.client.audio_io.list_input_devices()
111 |         self.get_logger().info("==============================================")
112 | 
113 |          # Initialize RIVA
114 |         # auth = riva.client.Auth(args.ssl_cert, args.use_ssl, args.server, args.metadata)
115 |         auth = riva.client.Auth(None, False, self.RIVA_URL, None)
116 |         self.tts_service = riva.client.SpeechSynthesisService(auth)
117 | 
118 |         self.get_logger().info("==============================================")
119 |         self.get_logger().info(" RIVA speech synthesis service")
120 |         self.get_logger().info("==============================================")
121 | 
122 |     def list_audio_devices(self):
123 |         self.get_logger().info("==============================================")
124 |         self.get_logger().info(" AUDIO DEVICES: ")
125 |         self.get_logger().info("==============================================")
126 |         for i in range(self.p.get_device_count()):
127 |             dev = self.p.get_device_info_by_index(i)
128 |             self.get_logger().info(f"{dev['index']:2d}: {dev['name']:50s} (inputs={dev['maxInputChannels']:<3d} outputs={dev['maxOutputChannels']:<3d} sampleRate={int(dev['defaultSampleRate'])})")
129 |         self.get_logger().info("==============================================")
130 | 
131 |     def chat_callback(self, msg):
132 | 
133 |         self.TTS_callback(msg)
134 |         # Turn of mute mode ASR node
135 |         passfail, value = self.node_param_util.try_get_node_parameters(self.ASR_node, 'start')
136 |         if passfail == True:
137 |             self.get_logger().info('Jetbot chat node start:{}'.format(value.bool_value))
138 |             if not value.bool_value:
139 |                 # delay 3 seconds for wait ASR finish current sound decoding then turn on the ASR 
140 |                 time.sleep(2.0)
141 |                 self.node_param_util.try_set_node_parameters(self.ASR_node, 'start', type=ParameterType.PARAMETER_BOOL, value=True)
142 |             else:
143 |                 self.get_logger().info('Jetbot chat node start == true')
144 | 
145 | 
146 |     def TTS_callback(self, msg):
147 |         with self.lock:
148 |             self.get_logger().info('TTS_callback: [{}]'.format(msg.data))
149 |             msg_str = msg.data
150 |             nchannels = 1
151 |             sampwidth = 2
152 |             sound_stream, out_f = None, None
153 |             start = time.time()
154 |             try:
155 |                 sound_stream = riva.client.audio_io.SoundCallBack(
156 |                     self.audio_index, nchannels=nchannels, sampwidth=sampwidth, framerate=self.sample_rate
157 |                 )
158 | 
159 |                 self.get_logger().info(" Generating audio for request.. \n msg:{}".format(msg_str))
160 |                 responses = self.tts_service.synthesize_online(
161 |                     msg_str, None, "en-US", sample_rate_hz=self.sample_rate,
162 |                     audio_prompt_file=None, quality=20
163 |                 )
164 | 
165 |                 first = True
166 |                 for resp in responses:
167 |                     stop = time.time()
168 |                     if first:
169 |                         self.get_logger().info(" Time to first audio: {:.3f}s".format(stop - start))
170 |                         first = False
171 |                     if sound_stream is not None:
172 |                         sound_stream(resp.audio)
173 |             except Exception as e:
174 |                 self.get_logger().error('An error occured: {}'.format(e))
175 |             finally:
176 |                 if sound_stream is not None:
177 |                     sound_stream.close()
178 |                     self.get_logger().info("Close riva service sound stream")
179 | 
180 | def main(args=None):
181 |     rclpy.init(args=None)
182 | 
183 |     global executor
184 | 
185 |     executor = rclpy.executors.MultiThreadedExecutor()
186 | 
187 |     JetbotTTS_node = riva_tts_processor('Riva_TTS_processor')
188 |     executor.add_node(JetbotTTS_node)
189 | 
190 |     try:
191 |         # rclpy.spin(JetbotTTS_node)
192 |         executor.spin()
193 |     except (KeyboardInterrupt, ExternalShutdownException):
194 |         pass
195 |     except Exception as e:
196 |         JetbotTTS_node.get_logger().error('An error occured: {}'.format(e))
197 |         print(e)
198 |     finally:
199 |         JetbotTTS_node.cleanup()
200 |         JetbotTTS_node.destroy_node()
201 |         rclpy.shutdown()
202 | 
203 | 
204 | if __name__ == '__main__':
205 |     main()


--------------------------------------------------------------------------------
/jetbot_riva_voice/jetbot_riva_voice/script/Jetbot_ASR_Processor.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Open Source Robotics Foundation, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import rclpy
 16 | import threading
 17 | import time
 18 | import asyncio
 19 | from rclpy.node import Node
 20 | from rclpy.executors import ExternalShutdownException
 21 | from rclpy.parameter import Parameter
 22 | from rcl_interfaces.msg import ParameterType, SetParametersResult
 23 | from std_msgs.msg import String
 24 | 
 25 | import pyaudio
 26 | import riva.client
 27 | import riva.client.audio_io
 28 | 
 29 | class riva_asr_processor(Node):
 30 | 
 31 |     def parameter_callback(self, params):
 32 |         for param in params:
 33 |             if param.name == 'start' and param.type_ == Parameter.Type.BOOL:
 34 |                 self.start = param.value
 35 |                 self.get_logger().info('start= {}'.format(bool(param.value)))
 36 |             elif param.name == 'command' and param.type_ == Parameter.Type.STRING:
 37 |                 self.cmd = param.value
 38 |                 self.get_logger().info('command= {}'.format(str(param.value)))
 39 | 
 40 |         return SetParametersResult(successful=True)
 41 | 
 42 |     def __init__(self, name):
 43 |         super().__init__(name)
 44 | 
 45 |         self.ASR_topic = self.declare_parameter('ASR_topic', "/voice/transcripts").get_parameter_value().string_value 
 46 |         self.RIVA_URL = self.declare_parameter('url', "localhost:50051").get_parameter_value().string_value
 47 |         self.index = self.declare_parameter('index', 26).get_parameter_value().integer_value
 48 |         self.streaming_chunk = self.declare_parameter('streaming_chunk', 16000).get_parameter_value().integer_value
 49 |         self.start = self.declare_parameter('start', True).get_parameter_value().bool_value
 50 | 
 51 |         self.get_logger().info("=========================================")
 52 |         self.get_logger().info("Jetbot ASR processor :{} start".format(name))
 53 |         self.get_logger().info("ASR_topic      : {}".format(self.ASR_topic))
 54 |         self.get_logger().info("RIVA url        : {}".format(self.RIVA_URL))
 55 |         self.get_logger().info("index           : {}".format(self.index))
 56 |         self.get_logger().info("streaming chunk : {}".format(self.streaming_chunk))
 57 |         self.get_logger().info("start           : {}".format(self.start))
 58 |         self.get_logger().info("=========================================")
 59 | 
 60 |         self.publisher = self.create_publisher(String, self.ASR_topic, 10)
 61 | 
 62 |         self.msg = String()
 63 |         self.i = 0
 64 | 
 65 |         # Add parameters callback 
 66 |         self.add_on_set_parameters_callback(self.parameter_callback)
 67 | 
 68 |         # self.p = pyaudio.PyAudio()
 69 |         # self.list_audio_devices()
 70 |         self.riva_init()
 71 | 
 72 |         timer_period = 0.5  # seconds
 73 |         # self.timer = self.create_timer(timer_period, self.timer_callback)
 74 | 
 75 |         self.thread = threading.Thread(target=self.ASR_processor)
 76 |         self.thread.start()
 77 | 
 78 |     def riva_init(self):
 79 |         self.p = pyaudio.PyAudio()
 80 |         default_device_info = riva.client.audio_io.get_default_input_device_info()
 81 |         self.get_logger().debug("Rivai default info:{}".format(default_device_info))
 82 |         # default_index = None if default_device_info is None else default_device_info['index']
 83 |         if default_device_info is not None and int(default_device_info['maxInputChannels']) > 0:
 84 |             self.audio_index = default_device_info['index']
 85 |             self.get_logger().info("use default - ignore user input")
 86 |         else:
 87 |             self.audio_index = self.index
 88 |         default_device = self.p.get_device_info_by_index(self.audio_index)
 89 |         self.sample_rate = int(default_device_info['defaultSampleRate'])
 90 | 
 91 |         self.get_logger().info("==============================================")
 92 |         self.get_logger().info("Audio default index     : {}".format(self.audio_index))
 93 |         self.get_logger().info("Max input ouput channels: [{} - {}]".format(default_device['maxInputChannels'], default_device_info['maxOutputChannels']))
 94 |         self.get_logger().info("sample rate             : {}".format(self.sample_rate))
 95 |         # riva.client.audio_io.list_input_devices()
 96 |         self.get_logger().info("==============================================")
 97 | 
 98 |     def list_audio_devices(self):
 99 |         self.get_logger().info("==============================================")
100 |         self.get_logger().info(" AUDIO DEVICES: ")
101 |         self.get_logger().info("==============================================")
102 |         for i in range(self.p.get_device_count()):
103 |             dev = self.p.get_device_info_by_index(i)
104 |             self.get_logger().info(f"{dev['index']:2d}: {dev['name']:50s} (inputs={dev['maxInputChannels']:<3d} outputs={dev['maxOutputChannels']:<3d} sampleRate={int(dev['defaultSampleRate'])})")
105 |         self.get_logger().info("==============================================")
106 | 
107 | 
108 |     def timer_callback(self):
109 |         self.msg.data = 'Hello World: %d' % self.i
110 |         self.i += 1
111 |         self.get_logger().info('Publishing: "%s"' % self.msg.data)
112 |         self.publisher.publish(self.msg)
113 | 
114 |     # thread: Jetbot ASR processor
115 |     def ASR_processor(self):
116 |         self.get_logger().info('==============================')
117 |         self.get_logger().info('Jetbot ASR processor --> START')
118 |         self.get_logger().info('==============================')
119 | 
120 |         # Initialize RIVA
121 |         # auth = riva.client.Auth(args.ssl_cert, args.use_ssl, args.server, args.metadata)
122 |         auth = riva.client.Auth(None, False, self.RIVA_URL, None)
123 |         asr_service = riva.client.ASRService(auth)
124 | 
125 |         config = riva.client.StreamingRecognitionConfig(
126 |             config=riva.client.RecognitionConfig(
127 |                 encoding=riva.client.AudioEncoding.LINEAR_PCM,
128 |                 language_code="en-US",
129 |                 model="",
130 |                 max_alternatives=1,
131 |                 profanity_filter=False,
132 |                 enable_automatic_punctuation=False,
133 |                 verbatim_transcripts=True,
134 |                 sample_rate_hertz=self.sample_rate,
135 |                 audio_channel_count=1,
136 |             ),
137 |             interim_results=True,
138 |         )
139 |         
140 |         boosted_words = ["jetbot", "action"]
141 |         boosted_score = 4.0
142 |         riva.client.add_word_boosting_to_config(config, boosted_words, boosted_score)
143 |         
144 |         if hasattr(riva.client, 'add_endpoint_parameters_to_config'):
145 |             riva.client.add_endpoint_parameters_to_config(
146 |                 config,
147 |                 -1,  #start history
148 |                 -1.0, #start threshold
149 |                 -1, # stop history
150 |                 -1, # stop history eou
151 |                 -1, # stop threshold
152 |                 -1.0 # top threshold  eou
153 |             )
154 |         else:
155 |             self.get_logger().info("The function add_endpoint_parameters_to_config() does not exist in this version of RIVA client API.")
156 | 
157 |         with riva.client.audio_io.MicrophoneStream(
158 |             self.sample_rate,
159 |             self.streaming_chunk,
160 |             device=self.audio_index,
161 |         ) as audio_chunk_iterator:
162 |             responses=asr_service.streaming_response_generator(
163 |                     audio_chunks=audio_chunk_iterator,
164 |                     streaming_config=config
165 |             )
166 | 
167 |             for response in responses:
168 |                 if not response.results:
169 |                     continue
170 |                 for result in response.results:
171 |                     if not result.alternatives:
172 |                         continue
173 |                     transcript = result.alternatives[0].transcript
174 |                     if result.is_final:
175 |                         self.get_logger().info('ASR buffer: [ {} ]'.format(transcript))
176 |                         self.get_logger().debug('ASR RAW:{}'.format(result.alternatives))
177 |                         for i, alternative in enumerate(result.alternatives):
178 |                             asr_msg = (f'(alternative {i + 1})' if i > 0 else '') + f' {alternative.transcript}'
179 |                             self.get_logger().info( '## {}'.format(asr_msg))
180 | 
181 |                         if (self.start):
182 |                             self.msg.data = transcript
183 |                             self.get_logger().info('Publishing: "%s"' % self.msg.data)
184 |                             self.publisher.publish(self.msg)
185 |                         else:
186 |                             self.get_logger().info('ASR -off- ignore: {}'.format(transcript))
187 | 
188 |         self.get_logger().info('==============================')
189 |         self.get_logger().info('Jetbot ASR processor -- EXIT')
190 |         self.get_logger().info('==============================')
191 | 
192 | 
193 | 
194 | def main(args=None):
195 |     rclpy.init(args=None)
196 | 
197 |     node = riva_asr_processor('Riva_ASR_processor')
198 | 
199 |     try:
200 |         rclpy.spin(node)
201 |     except (KeyboardInterrupt, ExternalShutdownException):
202 |         pass
203 |     except Exception as e:
204 |         node.get_logger().error('An error occured: {}'.format(e))
205 |         print(e)
206 | 
207 |     node.destroy_node()
208 |     rclpy.shutdown()
209 | 
210 | 
211 | if __name__ == '__main__':
212 |     main()


--------------------------------------------------------------------------------
/app/robot_command_text_classification.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # Copyright (c) 2024, Jen-Hung Ho 
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a
  6 | # copy of this software and associated documentation files (the "Software"),
  7 | # to deal in the Software without restriction, including without limitation
  8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9 | # and/or sell copies of the Software, and to permit persons to whom the
 10 | # Software is furnished to do so, subject to the following conditions:
 11 | #
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | #
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 21 | # DEALINGS IN THE SOFTWARE.
 22 | #
 23 | #
 24 | # Reference:
 25 | # This code is inspired by the example from Keras: 
 26 | # https://keras.io/examples/nlp/text_classification_from_scratch/
 27 | #
 28 | 
 29 | import matplotlib.pyplot as plt
 30 | import os
 31 | import re
 32 | import shutil
 33 | import string
 34 | import tensorflow as tf
 35 | from tensorflow.keras import layers, losses
 36 | import json
 37 | 
 38 | class TextClassification:
 39 |     def __init__(self, train_data_path, test_data_path):
 40 |         # Model constants
 41 |         self.batch_size = 32
 42 |         self.seed = 42
 43 |         self.max_features = 20000
 44 |         self.sequence_length = 550
 45 |         self.embedding_dim = 100
 46 |         self.epochs = 100
 47 |         self.vectorize_layer = None
 48 |         self.model = None
 49 |         self.class_labels = None
 50 |         self.train_data_path = train_data_path
 51 |         self.test_data_path = test_data_path
 52 | 
 53 |     #
 54 |     # Load text dataset from directory
 55 |     #
 56 |     def load_data(self):
 57 |         raw_train_ds = tf.keras.utils.text_dataset_from_directory(
 58 |             self.train_data_path,
 59 |             batch_size=self.batch_size,
 60 |             validation_split=0.2,
 61 |             subset='training',
 62 |             seed=self.seed)
 63 | 
 64 |         raw_val_ds = tf.keras.utils.text_dataset_from_directory(
 65 |             self.train_data_path,
 66 |             batch_size=self.batch_size,
 67 |             validation_split=0.2,
 68 |             subset='validation',
 69 |             seed=self.seed)
 70 | 
 71 |         raw_test_ds = tf.keras.utils.text_dataset_from_directory(
 72 |             self.test_data_path,
 73 |             batch_size=self.batch_size)
 74 | 
 75 |         self.class_labels = raw_train_ds.class_names
 76 |         print("Class labels:", self.class_labels)
 77 | 
 78 |         return raw_train_ds, raw_val_ds, raw_test_ds
 79 | 
 80 |     # Having looked at our data above, we see that the raw text contains HTML break
 81 |     # tags of the form '<br />'. These tags will not be removed by the default
 82 |     # standardizer (which doesn't strip HTML). Because of this, we will need to
 83 |     # create a custom standardization function.
 84 |     # Register the custom standardization function
 85 |     @tf.keras.utils.register_keras_serializable(package='Custom', name='custom_standardization')
 86 |     def custom_standardization(self, input_data):
 87 |         lowercase = tf.strings.lower(input_data)
 88 |         stripped_html = tf.strings.regex_replace(lowercase, '<br />', ' ')
 89 |         return tf.strings.regex_replace(stripped_html,
 90 |                                         '[%s]' % re.escape(string.punctuation),
 91 |                                         '')
 92 | 
 93 |     # Now that we have our custom standardization, we can instantiate our text
 94 |     # vectorization layer. We are using this layer to normalize, split, and map
 95 |     # strings to integers, so we set our 'output_mode' to 'int'.
 96 |     # Note that we're using the default split function,
 97 |     # ---  and the custom standardization defined above. ---
 98 |     # and the built-in standardization function.
 99 |     # We also set an explicit maximum sequence length, since the CNNs later in our
100 |     # model won't support ragged sequences.
101 |     def build_vectorize_layer(self, raw_train_ds):
102 |         self.vectorize_layer = layers.TextVectorization(
103 |             # standardize=self.custom_standardization,
104 |             max_tokens=self.max_features,
105 |             output_mode='int',
106 |             output_sequence_length=self.sequence_length)
107 | 
108 |         # Now that the vectorize_layer has been created, call `adapt` on a text-only
109 |         # dataset to create the vocabulary. You don't have to batch, but for very large
110 |         # datasets this means you're not keeping spare copies of the dataset in memory.
111 | 
112 |         # Let's make a text-only dataset (no labels):
113 |         train_text = raw_train_ds.map(lambda x, y: x)
114 |         self.vectorize_layer.adapt(train_text)
115 | 
116 |     def vectorize_text(self, text, label):
117 |         text = tf.expand_dims(text, -1)
118 |         return self.vectorize_layer(text), label
119 | 
120 |     #
121 |     # Build a simple 1D convolutional neural network (CNN) 
122 |     # starting with an Embedding layer.
123 |     #
124 |     def build_model(self):
125 |         # This initializes a sequential model, which is a linear stack of layers.
126 |         self.model = tf.keras.Sequential([
127 |             # It’s used for text data where each word is represented by an integer.
128 |             # Turns positive integers (indexes) into dense vectors of fixed size.
129 |             layers.Embedding(self.max_features, self.embedding_dim),
130 |             # This layer randomly sets 50% of the input units to 0 at each update during training time, which helps prevent overfitting.
131 |             layers.Dropout(0.5),
132 |             # This is a 1D convolutional layer with 128 filters, a kernel size of 7, “valid” padding (no padding), ReLU activation function, and a stride of 3.
133 |             # It helps in extracting features from the input sequence.
134 |             layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3),  # Change kernel size to 7 and strides to 3
135 |             layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3),  # Change kernel size to 7 and strides to 3
136 |             # This layer performs global max pooling operation for temporal data. 
137 |             # It reduces the dimensionality of the input by taking the maximum value over the time dimension.
138 |             layers.GlobalMaxPooling1D(),
139 |             # This is a fully connected (dense) layer with 128 units and ReLU activation function. 
140 |             # It helps in learning complex representations.
141 |             layers.Dense(128, activation="relu"),
142 |             layers.Dropout(0.5),  # Increase dropout rate to 0.5
143 |             # This is the output layer with a number of units equal to the number of classes. 
144 |             # The softmax activation function is used to output a probability distribution over the classes.
145 |             layers.Dense(len(self.class_labels), activation='softmax')
146 |         ])
147 | 
148 |         self.model.compile(loss=losses.SparseCategoricalCrossentropy(from_logits=False),
149 |                         optimizer='adam',
150 |                         metrics=['accuracy'])
151 | 
152 | 
153 |     #
154 |     # Train the model
155 |     #
156 |     def train_model(self, train_ds, val_ds):
157 |         history = self.model.fit(
158 |             train_ds,
159 |             validation_data=val_ds,
160 |             epochs=self.epochs)
161 |         return history
162 | 
163 |     #
164 |     # Save the trained  model
165 |     #
166 |     def save_export_model(self, lable_file, model_file_name, raw_test_ds):
167 |         export_model = tf.keras.Sequential([
168 |             self.vectorize_layer,
169 |             self.model
170 |         ])
171 | 
172 |         # Reinitialize the optimizer
173 |         export_model.compile(
174 |             loss=losses.SparseCategoricalCrossentropy(from_logits=False),
175 |             optimizer="adam",
176 |             metrics=['accuracy']
177 |         )
178 | 
179 |         results = export_model.evaluate(raw_test_ds)
180 |         print(results)
181 |         loss, accuracy = results[:2]
182 |         print(accuracy)
183 | 
184 |         export_model.class_labels = self.class_labels
185 |         # Save the model with the appropriate file extension
186 |         # Save the model with the .kears extension
187 |         model_file_name = model_file_name + '.keras'
188 |         export_model.save(model_file_name)
189 | 
190 |         with open(lable_file, 'w') as f:
191 |             json.dump(self.class_labels, f)
192 | 
193 | 
194 | def main():
195 | 
196 |     # Define datasets folder location
197 |     train_data_path = '../data/datasets/train'
198 |     test_data_path = '../data/datasets/test'
199 |     lable_file = '../data/models/class_labels.json'
200 |     model_file_name = '../data/models/ASR_classify_model'
201 | 
202 |     text_classification = TextClassification(train_data_path, test_data_path)
203 |     raw_train_ds, raw_val_ds, raw_test_ds = text_classification.load_data()
204 |     text_classification.build_vectorize_layer(raw_train_ds)
205 | 
206 |     # Veecorize the data
207 |     train_ds = raw_train_ds.map(text_classification.vectorize_text)
208 |     val_ds = raw_val_ds.map(text_classification.vectorize_text)
209 |     test_ds = raw_test_ds.map(text_classification.vectorize_text)
210 | 
211 |     # Do async prefetching / buffering of the data for best performance on GPU.
212 |     AUTOTUNE = tf.data.AUTOTUNE
213 |     train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
214 |     val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
215 |     test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)
216 | 
217 |     # Build model with simple 1D convolutional neural network (CNN)
218 |     text_classification.build_model()
219 |     text_classification.train_model(train_ds, val_ds)
220 |     # Evulate the test data before save it
221 |     text_classification.save_export_model(lable_file, model_file_name, raw_test_ds)
222 | 
223 | if __name__ == "__main__":
224 |     main()
225 | 


--------------------------------------------------------------------------------
/jetbot_riva_voice/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/jetbot_riva_voice/jetbot_riva_voice/script/Jetbot_ASR_Agent.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # Copyright (c) 2024, Jen-Hung Ho 
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a
  6 | # copy of this software and associated documentation files (the "Software"),
  7 | # to deal in the Software without restriction, including without limitation
  8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9 | # and/or sell copies of the Software, and to permit persons to whom the
 10 | # Software is furnished to do so, subject to the following conditions:
 11 | #
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | #
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 21 | # DEALINGS IN THE SOFTWARE.
 22 | #
 23 | 
 24 | import rclpy  # Python library for ROS 2
 25 | import ast    # Parse string into a 2D array
 26 | import subprocess
 27 | import time
 28 | 
 29 | from rclpy.node import Node # Handles the creation of nodes
 30 | from threading import Lock
 31 | from rcl_interfaces.msg import ParameterType, SetParametersResult, Parameter
 32 | from std_msgs.msg import String
 33 | 
 34 | from jetbot_riva_voice.include.text_classifier_utility import TextClassifier
 35 | from jetbot_riva_voice.include.node_parameter_utility import NodeParamTools
 36 | 
 37 | class JetbotASRagent(Node):
 38 | 
 39 |     def parameter_callback(self, params):
 40 |         for param in params:
 41 |             if param.name == 'start' and param.type_ == Parameter.Type.BOOL:
 42 |                 self.start = param.value
 43 |                 self.get_logger().info('start= {}'.format(bool(param.value)))
 44 | 
 45 |         return SetParametersResult(successful=True)
 46 | 
 47 |     def __init__(self):
 48 |         super().__init__('Jetbot_ASR_agent')
 49 | 
 50 |         self.start = self.declare_parameter('start', True).get_parameter_value().bool_value
 51 |         self.ASR_topic = self.declare_parameter('ASR_topic', '/jetbot_voice/transcripts').get_parameter_value().string_value
 52 |         self.TTS_topic = self.declare_parameter('TTS_topic', '/chatbot/response').get_parameter_value().string_value
 53 |         self.LLM_topic = self.declare_parameter('LLM_topic', '/jetbot_llm_input').get_parameter_value().string_value
 54 |         self.LLM_vision_topic = self.declare_parameter('VISION_topic', '/llm_vision_input').get_parameter_value().string_value
 55 |         self.ASR_node = self.declare_parameter('ASR_node', '/Riva_ASR_processor').get_parameter_value().string_value
 56 |         self.command_nodes = self.declare_parameter('command_nodes', ["/Jetbot_Param_Client"]).get_parameter_value().string_array_value
 57 |         self.lable_path = self.declare_parameter('label_path', '/data/models/class_labels.json').get_parameter_value().string_value
 58 |         self.model_path = self.declare_parameter('model_path', '/data/models/ASR_classify_model').get_parameter_value().string_value
 59 |         self.predict_threshold = self.declare_parameter('predict_threshold', 0.7).get_parameter_value().double_value
 60 |         self.command_enable = self.declare_parameter('command_enable', False).get_parameter_value().bool_value
 61 |         # Get the parameter as a string (2 dimentional string array)
 62 |         self.jetbot_commands = self.declare_parameter('jetbot_commands', "[['start', '0:start'], ['stop', '0:stop']]").get_parameter_value().string_value
 63 |         self.jetbot_chat = self.declare_parameter('jetbot_chat', "[['hello', 'hello 1'], ['bye', 'bye 2']]").get_parameter_value().string_value
 64 |         self.jetbot_vision = self.declare_parameter('jetbot_vision', "[['vision', 'vision 1']]]").get_parameter_value().string_value
 65 | 
 66 | 
 67 |         # YAML debug
 68 |         self.get_logger().info('YAML command: {}'.format(self.jetbot_commands))
 69 |         self.get_logger().info('YAML chat: {}'.format(self.jetbot_chat))
 70 | 
 71 |         # Parse the string into a 2D array
 72 |         self.cmd_two_dim_array = ast.literal_eval(self.jetbot_commands)
 73 |         self.cmd_dict_array = {row[0]: row[1] for row in self.cmd_two_dim_array}
 74 |         self.chat_two_dim_array = ast.literal_eval(self.jetbot_chat)
 75 |         self.chat_dict_array = {row[0]: row[1] for row in self.chat_two_dim_array}
 76 |         self.vision_two_dim_array = ast.literal_eval(self.jetbot_vision)
 77 |         self.vision_dict_array = {row[0]: row[1] for row in self.vision_two_dim_array}
 78 | 
 79 |         # Collect command and chat keywords
 80 |         self.keywords = []
 81 |         command_values =  [row[0] for row in self.cmd_two_dim_array]       
 82 |         self.chat_values =  [row[0] for row in self.chat_two_dim_array]
 83 |         vision_values = [row[0] for row in self.vision_two_dim_array]
 84 |         self.keywords =  self.chat_values + command_values + vision_values
 85 | 
 86 |         self.get_logger().info('start            : {}'.format(self.start))
 87 |         self.get_logger().info('ASR_topic        : {}'.format(self.ASR_topic))
 88 |         self.get_logger().info('TTS_topic        : {}'.format(self.TTS_topic))
 89 |         self.get_logger().info('LLM_topic        : {}'.format(self.LLM_topic))
 90 |         self.get_logger().info('LLM VISION_topic : {}'.format(self.LLM_vision_topic))
 91 |         self.get_logger().info('ASR_node         : {}'.format(self.ASR_node))
 92 |         self.get_logger().info('command_nodes    : {}'.format(self.command_nodes))
 93 |         self.get_logger().info('jetbot_keywords  : {}'.format(self.keywords))
 94 |         self.get_logger().info('predict threshold: {}'.format(self.predict_threshold))
 95 |         self.get_logger().info('command enable   : {}'.format(self.command_enable))
 96 | 
 97 |         # ASR message keywords -- reference MatchboxNet classes 
 98 |         # self.keywords = ["hello", "yes", "no", "up", "down", "left", "right", "on", "off", "unknown", "silence", "start", "stop", "come", "follow", "go"]
 99 | 
100 |         self.lock = Lock()
101 | 
102 |         # Add parameters callback 
103 |         self.add_on_set_parameters_callback(self.parameter_callback)
104 | 
105 |         # self.init_ros_nodes()
106 |         self.node_param_util = NodeParamTools(self, executor)
107 | 
108 |         # Create the subscriber. This subscriber will receive lidar message
109 |         self.subscription = self.create_subscription(
110 |             String, 
111 |             self.ASR_topic,
112 |             self.ASR_callback,
113 |             10)
114 |         
115 |         self.pub_TTS = self.create_publisher(
116 |             String,
117 |             self.TTS_topic,
118 |             10
119 |         )
120 | 
121 |         self.pub_LLM = self.create_publisher(
122 |             String,
123 |             self.LLM_topic,
124 |             10
125 |         )
126 | 
127 |         self.pub_LLM_vision = self.create_publisher(
128 |             String,
129 |             self.LLM_vision_topic,
130 |             10
131 |         )
132 | 
133 | 
134 |         # Load robot command and load tensorflow model data
135 |         self.init_text_classifier()
136 | 
137 |     #
138 |     # Remove nodes for get/set parameter service call
139 |     #
140 |     def cleanup(self):
141 |         # clean up set_param_node, get_param_node
142 |         self.node_param_util.cleanup()
143 |         pass
144 | 
145 |     #
146 |     # Load robot command and load tensorflow model data
147 |     #
148 |     def init_text_classifier(self):
149 |         self.classifier = TextClassifier(self.model_path, self.lable_path)
150 |         self.get_logger().info("===================================================")
151 |         self.get_logger().info("Robot commands: {}".format(self.classifier.class_labels))
152 |         self.get_logger().info("===================================================")
153 | 
154 |     #
155 |     # This function handles the prediction process for the text classifier model
156 |     # 1D convolutional neural network (CNN) model
157 |     #
158 |     def handle_prediction(self, min_score, prediction):
159 |         self.get_logger().info("ASR raw:[{}]".format(prediction))
160 |         label, result, score=  self.classifier.predict(min_score, prediction)
161 |         if result:
162 |             self.get_logger().info("predict: [{}]:[{}]".format(label, score))
163 |         else:
164 |             self.get_logger().info("Chat topic : ================================")
165 |             self.get_logger().info("predict: [{}]:[{}]".format(label, score))
166 | 
167 |         return result, label, score
168 | 
169 |     #
170 |     # NVIDIA jetson-voice ASR ROS2 topic subscription
171 |     #
172 |     def ASR_callback(self, msg):
173 |         self.get_logger().info('Raw ASR: [{}]:{}'.format(len((msg.data).split()), msg.data))
174 | 
175 |         greeting = False
176 |         if len((msg.data).split()) == 1:
177 |             # Greeting is static chat - response define in jetbot_chat: 2 dimention array
178 |             self.get_logger().info('static chat keyword list:{}'.format(self.chat_values))
179 |             greeting, keyword = self.filter_keywords(msg.data, self.chat_values)
180 |             self.get_logger().info('greeting: {}:{}'.format(greeting, keyword))
181 | 
182 |         # If the input is not a greeting, utilize the 1D convolutional neural network (CNN) model 
183 |         # for text classification to determine the user's intention for JetBot.
184 |         if greeting == False:
185 |             # Filter out ASR noise -- 1D convolutional neural network (CNN) model
186 |             result, label, score = self.handle_prediction(self.predict_threshold, msg.data)
187 |             # Only pick up ASR input contains jetbot command keywords
188 |             found, keyword = self.filter_keywords(label, self.keywords)
189 |         else:
190 |             found = greeting
191 | 
192 |         command = False
193 |         vision_chat = False
194 |         chat = False
195 | 
196 |         ASR_string = msg.data
197 |         node_name = "/jetbot"
198 | 
199 |         if found:
200 |             if greeting and keyword in self.chat_dict_array:
201 |                 ASR_string = self.chat_dict_array[keyword]
202 |                 self.get_logger().info("ASR input: {} greeting: {}".format(keyword,ASR_string))
203 |             elif keyword in self.vision_dict_array:
204 |                 # ASR_string = self.chat_dict_array[keyword]
205 |                 self.get_logger().info("ASR input: {} chat: {}".format(keyword,ASR_string))
206 |                 vision_chat = True
207 |             elif keyword in self.cmd_dict_array:
208 |                 command = True
209 |                 self.get_logger().info('jetbot command tool enable: {}'.format(command))
210 |                 # Retrieve node name and command value [index:value]
211 |                 parts = self.cmd_dict_array[keyword].split(':')
212 |                 node_index = int(parts[0])
213 |                 if node_index < len(self.command_nodes):
214 |                     node_name = self.command_nodes[node_index]
215 |                 else:
216 |                     self.get_lobber().info('Error: incorrect node name index:{}'.format(node_index))
217 |                 # Retrive command parameter
218 |                 ASR_string = parts[1]
219 | 
220 |                 # Turn on command feature if jetbot tools copilot node exist
221 |                 if self.command_enable == False:
222 |                     node_exist = self.check_node_exists(node_name)
223 |                     self.get_logger().info('check node:{} exit:{}'.format(node_name, node_exist))
224 |                     if node_exist:
225 |                         self.get_logger().info('Turn on node:{} start parameter'.format(node_name))
226 |                         self.enable_jetbot_tool_copilot(node_name)
227 |                         self.command_enable = True
228 |                         time.sleep(1.0)
229 | 
230 |         else:
231 |             self.get_logger().info("ASR input not found in keyword list --> chatbot:" + msg.data)
232 |             chat = True
233 |             node_name = self.ASR_node
234 |             # return
235 | 
236 |         # Block the next callback from executing until the current callback finishes 
237 |         with self.lock:
238 |             # Jetbot chat acton no need to set command to target node
239 |             if command == True:
240 |                 # 'Echoing' in ASR occurs
241 |                 # when the microphone picks up the system's own text-to-speech output,
242 |                 # creating a recursive voice recognition loop.
243 |                 self.mute_ASR_processor(self.ASR_node)
244 |                 if self.command_enable:
245 |                     passfail = self.node_param_util.try_set_node_parameters(node_name, 'command', type=ParameterType.PARAMETER_STRING, value=ASR_string)
246 |                     if passfail == True:
247 |                         ASR_string = "jetbot process: " + ASR_string
248 |                     else:
249 |                         ASR_string = "jetbot node :{} not exit skip command :{}" + ASR_string
250 |                 else:
251 |                     # Publish to TTS node to play audio streaming and disable ASR muting
252 |                     ASR_string = "jetbot tool copilot command: " + ASR_string
253 |                 TTS_string = String()
254 |                 TTS_string.data = ASR_string
255 |                 self.pub_TTS.publish(TTS_string)
256 |             elif greeting == True:
257 |                 self.mute_ASR_processor(self.ASR_node)
258 |                 TTS_string = String()
259 |                 TTS_string.data = ASR_string
260 |                 self.pub_TTS.publish(TTS_string)
261 |             elif vision_chat == True:
262 |                 self.mute_ASR_processor(self.ASR_node)
263 |                 # Publish to LLM vidion node to response as chatbot
264 |                 LLM_vision_string = String()
265 |                 # Use ASR raw input data as LLM input
266 |                 LLM_vision_string.data = msg.data
267 |                 self.pub_LLM_vision.publish(LLM_vision_string)
268 |             elif chat == True:
269 |                 self.mute_ASR_processor(self.ASR_node)
270 |                 # Publish to LLM node to response as chatbot
271 |                 LLM_string = String()
272 |                 # Use ASR raw input data as LLM input
273 |                 LLM_string.data = msg.data
274 |                 self.pub_LLM.publish(LLM_string)
275 | 
276 |     #
277 |     # Mute ASR processor and wait until LLM chat reponse to TTS task complete
278 |     #
279 |     def mute_ASR_processor(self, node_name):
280 |         # Turn off ASR and wait until LLM chat response to TTS task complete
281 |         self.set_jetbot_node_bool_parameters(node_name, 'start', False)
282 | 
283 |     #
284 |     # Enable Jetbot voice 
285 |     #
286 |     def enable_jetbot_tool_copilot(self, node_name):
287 |         # Trun on Jerbot voice 
288 |         self.set_jetbot_node_bool_parameters(node_name, 'start', True)
289 | 
290 |     #
291 |     # Set jetbot ROS2 node bool parameter
292 |     #
293 |     def set_jetbot_node_bool_parameters(self, node_name, parameter, bool_value):
294 |         passfail, value = self.node_param_util.try_get_node_parameters(node_name, parameter)
295 |         if passfail == True:
296 |             self.get_logger().info('Jetbot node:{} param:{} value:{}'.format(node_name, parameter, value.bool_value))
297 |             if value.bool_value != bool_value:
298 |                 self.node_param_util.try_set_node_parameters(node_name, parameter, type=ParameterType.PARAMETER_BOOL, value=bool_value)
299 |         else:
300 |             self.get_logger().info('Jetbot chat {} node not exit, skip the task'.format(node_name))
301 | 
302 |     #
303 |     # Filter out ASR noise -- TODO how to improve the filtering 
304 |     # Only pick up ASR input contains keywords
305 |     #
306 |     def filter_keywords(self, asr_output, keywords):
307 |         # Convert ASR output and keywords to lowercase for case-insensitive matching
308 |         asr_output = asr_output.lower()
309 |         keywords = [keyword.lower() for keyword in keywords]
310 | 
311 |         # Find keywords in ASR output
312 |         for keyword in keywords:
313 |             if keyword in asr_output:
314 |                 return True, keyword
315 | 
316 |         # If no keyword found, return False and None
317 |         return False, None
318 | 
319 |     #
320 |     # check if a node exist in ROS2
321 |     #
322 |     def check_node_exists(self, node_name):
323 |         result = subprocess.run(['ros2', 'node', 'list'], stdout=subprocess.PIPE)
324 |         nodes = result.stdout.decode().split('\n')
325 |         return node_name in nodes
326 | 
327 | 
328 | 
329 | def main(args=None):
330 | 
331 |     rclpy.init(args=args)
332 | 
333 |     global executor
334 | 
335 |     executor = rclpy.executors.MultiThreadedExecutor()
336 | 
337 |     JetbotASR_node = JetbotASRagent()
338 |     executor.add_node(JetbotASR_node)
339 | 
340 |     try:
341 |         # rclpy.spin(JetbotTTS_node)
342 |         executor.spin()
343 |     except KeyboardInterrupt:
344 |         print('\ncontrol-c: JetbotTTS_node shutting down')
345 |     finally:
346 |         # Destroy the node explictly - don't depend on garbage collector
347 |         JetbotASR_node.cleanup()
348 |         JetbotASR_node.destroy_node()
349 |         rclpy.shutdown()
350 | 
351 | 
352 | if __name__ == '__main__':
353 |     main()
354 | 


--------------------------------------------------------------------------------