├── CMakeLists.txt
├── LICENSE
├── README.md
├── config
└── params.yaml
├── env
├── bin
│ ├── python
│ └── python3
├── lib64
└── pyvenv.cfg
├── install.sh
├── launch
├── remap.launch
├── voice.launch
└── voixmap.launch
├── mymap1.pgm
├── mymap1.yaml
├── package.xml
├── requirements.txt
├── rviz-simul.png
└── scripts
├── mic_client.py
└── nav_v1.py
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8.3)
2 | project(voice_ros)
3 |
4 | ## Find catkin macros and libraries
5 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
6 | ## is used, also find other catkin packages
7 | find_package(catkin REQUIRED COMPONENTS
8 | rospy
9 | std_msgs
10 | message_generation
11 | move_base_msgs
12 | actionlib
13 | geometry_msgs
14 |
15 | )
16 |
17 | ## Uncomment this if the package has a setup.py. This macro ensures
18 | ## modules and global scripts declared therein get installed
19 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
20 | # catkin_python_setup()
21 |
22 | add_message_files(
23 | FILES
24 |
25 | )
26 |
27 | generate_messages(
28 | DEPENDENCIES
29 | std_msgs
30 | move_base_msgs
31 | geometry_msgs
32 | )
33 |
34 | ###################################
35 | ## catkin specific configuration ##
36 | ###################################
37 | ## The catkin_package macro generates cmake config files for your package
38 | ## Declare things to be passed to dependent projects
39 | ## INCLUDE_DIRS: uncomment this if your package contains header files
40 | ## LIBRARIES: libraries you create in this project that dependent projects also need
41 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need
42 | ## DEPENDS: system dependencies of this project that dependent projects also need
43 | catkin_package(
44 | # INCLUDE_DIRS include
45 | # LIBRARIES ros_dialogflow
46 | # CATKIN_DEPENDS rospy std_msgs
47 | # DEPENDS system_lib
48 | )
49 |
50 | ###########
51 | ## Build ##
52 | ###########
53 |
54 | ## Specify additional locations of header files
55 | ## Your package locations should be listed before other locations
56 | include_directories(
57 | # include
58 | ${catkin_INCLUDE_DIRS}
59 | )
60 |
61 |
62 | ## Mark executable scripts (Python etc.) for installation
63 | ## in contrast to setup.py, you can choose the destination
64 | install(PROGRAMS
65 | scripts/mic_client.py
66 | scripts/nav_v1.py
67 | DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
68 | )
69 |
70 | # Mark other files for installation (e.g. launch and bag files, etc.)
71 | foreach (dir launch config)
72 | install(DIRECTORY ${dir}/
73 | DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/${dir})
74 | endforeach(dir)
75 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Mohammed Elmzaghi
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Voix
3 | This package uses the Google Text-To-Speech API to allow for seamless human robot interaction by sending navigation commands issued by the user to robot. Main operating system used was ROS while Turtlebot and Gazebo were used for simulation. Actual experiments were conducted using RosAria and the Pioneer3AT robot.
4 |
5 | # Academic Citations
6 | Please cite this IEEE publication when using this package for any academic projects: [M. Elmzaghi, M. Fahad and Y. Guo, "Implementing Robust Voice-Control for Human Robot Interaction for Autonomous Robot Guides," 2019 IEEE MIT Undergraduate Research Technology Conference (URTC), Cambridge, MA, USA, 2019, pp. 1-6, doi: 10.1109/URTC49097.2019.9660499.](https://ieeexplore.ieee.org/document/9660499)
7 |
8 | # Why use Google API?
9 | The goal is to create natural human robot interaction. Current voice processing software requires users to design speech recognition algorithms, collect datasets, as well as understand Natural Language Processing methods. The accuracy of these methods still does not exceed that of Google Cloud API given its massive datasets from the various voice services as well as the use of deep learning neural networks. By incorporating an accurate and robust voice processing software easily integrated with popular robotics software, users can include robust voice control into their robotics projects.
10 | # Installation
11 |
12 | To use this software package, ensure you are running ROS for your respective system as well as have installed Google Cloud SDK (which is detailed below.)
13 | However, we need to install PortAudio so we can use PyAudio to get mic data.
14 | ```bash
15 | sudo apt-get install portaudio19-dev
16 | ```
17 | Install all the requirements using pip by cloning the Github repo and installing all the packages in requirements.txt.
18 |
19 | ```bash
20 | cd ~/catkin_ws/src
21 | git clone https://github.com/moeelm/voice_ros.git
22 | cd voice_ros
23 | pip install -r requirements.txt
24 | ```
25 |
26 | ## Google Cloud Setup
27 | Follow the instructions [here](https://cloud.google.com/speech/docs/quickstart) for configuring your Google Cloud project and installing the SDK for authentication. You will need a google/gmail account.
28 |
29 | Usage of the Google Cloud SDK requires authentication. This means you require an API key and an activated service account to utilize the APIs.
30 | 1. Setup a [service account](https://cloud.google.com/docs/authentication/getting-started)
31 | 2. Download the service account key as a JSON.
32 | 3. Check you have GOOGLE_APPLICATION_CREDENTIALS in your environment. This should be the path to the keys.
33 | ```bash
34 | export GOOGLE_APPLICATION_CREDENTIALS='/path/to/key'
35 | ```
36 | 4. Run the authentication command:
37 | ```bash
38 | gcloud auth activate-service-account --key-file GOOGLE_APPLICATION_CREDENTIALS
39 | ```
40 |
41 |
42 | # Usage
43 | Follow the steps below to setup the package properly.
44 |
45 | ## Configure topics
46 | Go into the config directory and change the following parameters in the `params.yaml` file:
47 |
48 | * `results_topic`: (Optional) The topic where your results will be published.
49 | * `project_id`: The name of your project for the Google Speech node. This is the name of your Google Cloud project when going through the Google Cloud setup.
50 |
51 | ## Launching nodes
52 | To start the voice processing node, run the following command:
53 | ```bash
54 | roslaunch voice_ros voice.launch
55 | ```
56 | ## Autonomous Navigation Using Turtlebot
57 |
58 |
59 | To run autonomous navigation using the Turtlebot robot as well as RVIZ simulator,
60 | 1. Launch Turtlebot as well as Gazebo environment
61 | ```bash
62 | roslaunch turtlebot_gazebo turtlebot_world.launch
63 | ```
64 | 2. Run the map
65 | ```bash
66 | rosrun map_server map_server /PATH/TO/myMap.yaml
67 | ```
68 | 3. Run the navigation demo. AMCL (Adaptive Monte Carlo Localization) is used for localizing the robot
69 | ```bash
70 | roslaunch turtlebot_gazebo amcl_demo.launch
71 | ```
72 | 4. Launch ROS Vizualization (RVIZ)
73 | ```bash
74 | roslaunch turtlebot_rviz_launchers view_navigation.launch
75 | ```
76 | The `2D Nav Goal` sends a navigation goal, `2D Pose Estimate` sets the pose of the robot.
77 |
78 | # ROS Nodes
79 |
80 | ## mic_client
81 | ROS node receives text from the Google Cloud Speech API and publishes it onto `text_topic` (see config/params.yaml). This is parsed and sent to the navigation node, `nav_v1.py`. `single_utterance` is set to true to allow for single user input. This prevents the voice processing from being continous, allowing the intent to be parsed and detected.
82 |
83 | ### Published Topics
84 | `text_topic` ([std_msgs/String](http://docs.ros.org/api/std_msgs/html/msg/String.html))
85 | Acquired text from the Google Cloud Speech API.
86 | `text2_topic`([std_msgs/String](http://docs.ros.org/api/std_msgs/html/msg/String.html))
87 | Intent detected in voice command.
88 |
89 | ## Navigation
90 | ROS node that takes text from the _mic\_client_ node, publishes the parsed intent and sends the spatial location associated with the intent to the `\move_base_simple/goal` topic. For example, `'microwave'`has the position coordinates `(2.00, -5.00, 0.000)` mapped to it.
91 |
92 |
93 |
--------------------------------------------------------------------------------
/config/params.yaml:
--------------------------------------------------------------------------------
1 | results_topic: "/results"
2 | text_topic: "/text"
3 | text2_topic: "/intent"
4 | project_id: "voice-controlled-1528217063857"
5 |
--------------------------------------------------------------------------------
/env/bin/python:
--------------------------------------------------------------------------------
1 | python3
--------------------------------------------------------------------------------
/env/bin/python3:
--------------------------------------------------------------------------------
1 | /usr/bin/python3
--------------------------------------------------------------------------------
/env/lib64:
--------------------------------------------------------------------------------
1 | lib
--------------------------------------------------------------------------------
/env/pyvenv.cfg:
--------------------------------------------------------------------------------
1 | home = /usr/bin
2 | include-system-site-packages = false
3 | version = 3.5.2
4 |
--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | if [[-v GOOGLE_APPLICATION_CREDENTIALS]] then
3 | echo "Found credentials in: $GOOGLE_APPLICATION_CREDENTIALS"
4 | else
5 | read -p "No credentials path found, please enter the path for your Google service account credentials: " gpath
6 | export GOOGLE_APPLICATION_CREDENTIALS=$gpath
7 | # Start by installing Google Cloud dependencies
8 | export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)"
9 | echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | sudo tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
10 | curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
11 | sudo apt-get update && sudo apt-get install google-cloud-sdk
12 | # Now install python dependencies
13 | sudo apt-get install python-pip portaudio19-dev
14 | pip install --user -r requirements.txt
15 | echo "Remember to run 'gcloud init' to configure Google Cloud"
--------------------------------------------------------------------------------
/launch/remap.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/launch/voice.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/launch/voixmap.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/mymap1.pgm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moeelm/Voice-Control-ROS/56c00d1d9cbbbb2aef0ebb6c5668bb37918815f5/mymap1.pgm
--------------------------------------------------------------------------------
/mymap1.yaml:
--------------------------------------------------------------------------------
1 | image: mymap1.pgm
2 | resolution: 0.050000
3 | origin: [-24.400000, -14.800000, 0.000000]
4 | negate: 0
5 | occupied_thresh: 0.65
6 | free_thresh: 0.196
7 |
8 |
--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | voice_ros
4 | 1.0.0
5 | Voice Control ROS
6 | Mohammed Elmzaghi
7 |
8 |
9 |
10 |
11 |
12 | MIT
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | catkin
23 | rospy
24 | std_msgs
25 | message_generation
26 | message_runtime
27 |
28 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | PyAudio
2 | google-cloud-speech
3 | six==1.10.0
4 | google-api-python-client
5 | dialogflow
6 | pyyaml
7 | numpy==1.13.0
8 | protobuf-to-dict
9 |
--------------------------------------------------------------------------------
/rviz-simul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/moeelm/Voice-Control-ROS/56c00d1d9cbbbb2aef0ebb6c5668bb37918815f5/rviz-simul.png
--------------------------------------------------------------------------------
/scripts/mic_client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from google.cloud import speech
4 | from google.cloud.speech import enums
5 | from google.cloud.speech import types
6 | import pyaudio
7 | import Queue
8 | import rospy
9 | from std_msgs.msg import String
10 |
11 |
12 | class GspeechClient(object):
13 | def __init__(self):
14 | # Audio stream input setup
15 |
16 | #Shut down command
17 | rospy.on_shutdown(self.shutdown)
18 |
19 | FORMAT = pyaudio.paInt16
20 | CHANNELS = 1
21 | RATE = 16000
22 | self.CHUNK = 4096
23 | self.audio = pyaudio.PyAudio()
24 | self.stream = self.audio.open(format=FORMAT, channels=CHANNELS,
25 | rate=RATE, input=True,
26 | frames_per_buffer=self.CHUNK,
27 | stream_callback=self._get_data)
28 | self._buff = Queue.Queue() # Buffer to hold audio data
29 | self.closed = False
30 |
31 | # ROS Text Publisher
32 | text_topic = rospy.get_param('/text_topic','/text')
33 | text2_topic = rospy.get_param('/text2_topic', '/intent')
34 | self.text_pub = rospy.Publisher(text_topic, String, queue_size=10)
35 | self.text2_pub = rospy.Publisher(text2_topic, String, queue_size=10)
36 |
37 | def _get_data(self, in_data, frame_count, time_info, status):
38 | """Daemon thread to continuously get audio data from the server and put
39 | it in a buffer.
40 | """
41 | # Uncomment this if you want to hear the audio being replayed.
42 | self._buff.put(in_data)
43 | return None, pyaudio.paContinue
44 |
45 | def _generator(self):
46 | """Generator function that continuously yields audio chunks from the buffer.
47 | Used to stream data to the Google Speech API Asynchronously.
48 | """
49 | while not self.closed:
50 | # Check first chunk of data
51 | chunk = self._buff.get()
52 | if chunk is None:
53 | return
54 | data = [chunk]
55 |
56 | # Read in a stream till the end using a non-blocking get()
57 | while True:
58 | try:
59 | chunk = self._buff.get(block=False)
60 | if chunk is None:
61 | return
62 | data.append(chunk)
63 | except Queue.Empty:
64 | break
65 |
66 | yield b''.join(data)
67 |
68 | def _listen_print_loop(self, responses):
69 | """Iterates through server responses and prints them.
70 | The responses passed is a generator that will block until a response
71 | is provided by the server.
72 | Each response may contain multiple results, and each result may contain
73 | multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we
74 | print only the transcription for the top alternative of the top result.
75 | """
76 | for response in responses:
77 | # If not a valid response, move on to next potential one
78 | if not response.results:
79 | continue
80 |
81 | # The `results` list is consecutive. For streaming, we only care about
82 | # the first result being considered, since once it's `is_final`, it
83 | # moves on to considering the next utterance.
84 | result = response.results[0]
85 | if not result.alternatives:
86 | continue
87 |
88 | # Display the transcription of the top alternative.
89 | transcript = result.alternatives[0].transcript
90 |
91 | # Parse the final utterance
92 | if result.is_final:
93 | rospy.loginfo("Google Speech result: {}".format(result))
94 | # Received data is Unicode, convert it to string
95 | transcript = transcript.encode('utf-8')
96 | # Strip the initial space, if any
97 | if transcript.startswith(' '):
98 | transcript = transcript[1:]
99 | # Exit if needed
100 | if transcript.lower() == 'exit':
101 | self.shutdown()
102 | # Send the rest of the sentence to topic
103 | self.text_pub.publish(transcript)
104 | #Checks if sentence contains command in dictionary
105 | commands = ['microwave', 'bathroom']
106 | mycommands = transcript.split(' ')
107 | check_command = False
108 | for i in commands:
109 | for j in mycommands:
110 | if i == j:
111 | print 'Sure, going to %s' %(j)
112 | check_command = True
113 | rospy.loginfo(j)
114 | self.text2_pub.publish(j)
115 | if check_command == False:
116 | print "Sorry, cant do that"
117 | break
118 | # rospy.sleep(3)
119 |
120 |
121 |
122 | def gspeech_client(self):
123 | """Creates the Google Speech API client, configures it, and sends/gets
124 | audio/text data for parsing.
125 | """
126 | language_code = 'en-US'
127 | client = speech.SpeechClient()
128 | config = types.RecognitionConfig(
129 | encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
130 | sample_rate_hertz=16000,
131 | language_code=language_code)
132 | streaming_config = types.StreamingRecognitionConfig(
133 | config=config,
134 | interim_results=True,
135 | single_utterance=True)
136 | requests = (types.StreamingRecognizeRequest(audio_content=content) for content in self._generator())
137 | responses = client.streaming_recognize(streaming_config, requests)
138 | self._listen_print_loop(responses)
139 |
140 | def shutdown(self):
141 | """Shut down as cleanly as possible"""
142 | rospy.loginfo("Shutting down")
143 | self.closed = True
144 | self._buff.put(None)
145 | self.stream.close()
146 | self.audio.terminate()
147 | exit()
148 |
149 | def start_client(self):
150 | """Entry function to start the client"""
151 | try:
152 | rospy.loginfo("Starting Google speech mic client")
153 | self.gspeech_client()
154 | except KeyboardInterrupt:
155 | self.shutdown()
156 |
157 |
158 | if __name__ == '__main__':
159 | rospy.init_node('mic_client')
160 | g = GspeechClient()
161 | g.start_client()
162 |
163 |
--------------------------------------------------------------------------------
/scripts/nav_v1.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import rospy
3 | from std_msgs.msg import String
4 | from geometry_msgs.msg import PoseStamped
5 | from actionlib_msgs.msg import GoalStatus
6 | from move_base_msgs.msg import MoveBaseAction
7 |
8 | class Motion(object):
9 | def __init__(self):
10 |
11 | rospy.on_shutdown(self.shutdown)
12 |
13 |
14 | self.pub_pos = rospy.Publisher('move_base_simple/goal', PoseStamped, queue_size=10)
15 |
16 | rospy.sleep(3)
17 | rospy.loginfo("Wait for the action server to come up")
18 | text2_topic = rospy.get_param('/text2_topic', '/intent')
19 | rospy.Subscriber(text2_topic, String, self.callback, queue_size=10)
20 |
21 |
22 | def callback(self, ros_data):
23 | # Dictionary with various spatial locations, change according to position
24 | self.my_intent = {'microwave':{'position':{'x':2.00, 'y':-5.000, 'z':0.000}},
25 | 'bathroom':{'position':{'x':0.49, 'y':-7.15, 'z':-0.002}}
26 | }
27 | self.word_received = ros_data.data
28 | for key in sorted(self.my_intent.keys()):
29 | if self.word_received == key:
30 | self.myGoal = self.my_intent[key]['position']
31 |
32 | def start(self):
33 | rospy.loginfo("Starting Navigation")
34 |
35 |
36 | def go_to(self):
37 | rospy.sleep(5)
38 | goal = PoseStamped()
39 | goal.header.frame_id = "odom"
40 | goal.header.stamp = rospy.Time.now()
41 | goal.pose.position.z = self.myGoal['z']
42 | goal.pose.position.x = self.myGoal['x']
43 | goal.pose.position.y = self.myGoal['y']
44 | goal.pose.orientation.w = 1.0
45 | self.pub_pos.publish(goal)
46 | rospy.loginfo("Sending goal")
47 | rospy.signal_shutdown("Restarting nav node")
48 | rospy.sleep(5)
49 |
50 |
51 | def shutdown(self):
52 | rospy.loginfo("Stop")
53 | self.closed = True
54 | exit()
55 |
56 | if __name__ == '__main__':
57 | try:
58 | rospy.init_node("nav_v1")
59 | navigator = Motion()
60 | navigator.start()
61 | success = navigator.go_to()
62 | if success:
63 | rospy.loginfo("Reached desired destination")
64 | else:
65 | rospy.loginfo("Sorry, I was not able to reach the destination. Try again")
66 | except KeyboardInterrupt:
67 | navigator.shutdown()
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------