├── CMakeLists.txt
├── launch
└── pocketsphinx.launch
├── vocab
├── voice_cmd.kwlist
└── voice_cmd.dic
├── CHANGELOG.rst
├── package.xml
├── README.md
└── scripts
└── recognizer.py
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8.3)
2 | project(pocketsphinx)
3 |
4 | find_package(catkin REQUIRED)
5 | catkin_package(DEPENDS)
6 |
7 | install(DIRECTORY vocab
8 | DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION})
9 |
10 | install(PROGRAMS scripts/recognizer.py
11 | DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION})
12 |
13 |
--------------------------------------------------------------------------------
/launch/pocketsphinx.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/vocab/voice_cmd.kwlist:
--------------------------------------------------------------------------------
1 | CLOSE HAND /1e-1/
2 | CONSIDER ROTATION /1e-1/
3 | FREE DIRECTIONS /1e-1/
4 | IGNORE ROTATION /1e-1/
5 | INVERTED CONTROL MODE /1e-1/
6 | LIMIT DIRECTIONS /1e-1/
7 | MANIPULATION /1e-1/
8 | NATURAL CONTROL MODE /1e-1/
9 | NAVIGATION /1e-1/
10 | OPEN HAND /1e-1/
11 | ROBOT PLAN AND GO /1e-1/
12 | ROBOT PLAN HOME /1e-1/
13 | ROBOT PLEASE EXECUTE /1e-1/
14 | ROBOT PLEASE PLAN /1e-1/
15 | ROBOT PLEASE GO HOME /1e-1/
16 | ROBOT PLAN HOME /1e-1/
17 | TURN HANDLE CLOCKWISE /1e-1/
18 | TURN HANDLE COUNTERCLOCKWISE /1e-1/
19 | STOP STOP /1e-1/
20 | FORWARD /1e-1/
21 | BACKWARD /1e-1/
22 |
--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
1 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2 | Changelog for package pocketsphinx
3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4 |
5 | 0.4.0 (2014-06-03)
6 | ------------------
7 | * add ~source parameter, for setting things like 'alsasrc'
8 | * add depend on python-gst
9 | * Contributors: Michael Ferguson
10 |
11 | 0.3.0 (2013-11-27)
12 | ------------------
13 | * Convert print statements to rosinfo/rosdebugs
14 | * Change language model at runtime + specify audio source
15 |
16 | 0.2.2 (2013-09-29)
17 | ------------------
18 | * Add changelog
19 |
20 | 0.2.1 (2013-09-21)
21 | ------------------
22 | * Fix install rules so that python scripts are executable
23 |
24 | 0.2.0 (2013-07-12)
25 | ------------------
26 | * Added support for grammar fsg files
27 | * First catkinized release
28 |
--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | pocketsphinx
4 | 0.4.0
5 |
6 | This package is a simple wrapper around the pocketsphinx speech recognizer,
7 | using gstreamer and a Python-based interface.
8 |
9 | Michael Ferguson
10 | Michael Ferguson
11 | BSD
12 | http://ros.org/wiki/pocketsphinx
13 |
14 | catkin
15 |
16 | rospy>
17 | std_msgs
18 | std_srvs
19 | geometry_msgs>
20 |
21 |
22 |
--------------------------------------------------------------------------------
/vocab/voice_cmd.dic:
--------------------------------------------------------------------------------
1 | AND AH N D
2 | CLOCKWISE K L AA K W AY Z
3 | CLOSE K L OW S
4 | CONSIDER K AH N S IH D ER
5 | CONTROL K AH N T R OW L
6 | COUNTERCLOCKWISE K AW N T ER K L AO K W AY Z
7 | DIRECTIONS D ER EH K SH AH N Z
8 | DIRECTIONS(2) D IY R EH K SH IH N Z
9 | DIRECTIONS(3) D AY R EH K SH IH N Z
10 | DIRECTIONS(4) D IH R EH K SH IH N Z
11 | EXECUTE EH K S AH K Y UW T
12 | FREE F R IY
13 | GO G OW
14 | HAND HH AE N D
15 | HANDLE HH AE N D AH L
16 | HOME HH OW M
17 | IGNORE IH G N AO R
18 | INVERTED IH N V ER T IH D
19 | LIMIT L IH M AH T
20 | MANIPULATION M AH N IH P Y AH L EY SH AH N
21 | MODE M OW D
22 | NATURAL N AE CH ER AH L
23 | NATURAL(2) N AE CH R AH L
24 | NAVIGATION N AE V AH G EY SH AH N
25 | OPEN OW P AH N
26 | PLAN P L AE N
27 | PLEASE P L IY Z
28 | ROBOT R OW B AA T
29 | ROBOT(2) R OW B AH T
30 | ROTATION R OW T EY SH AH N
31 | STOP S T AA P
32 | TURN T ER N
33 | FORWARD F AO R W ER D
34 | BACKWARD B AE K W ER D
35 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Modified ROS wrapper for pocketsphinx
2 |
3 | The following repositories were used in our wrapper, but are not required for installation:
4 |
5 | * Original repository: https://github.com/mikeferguson/pocketsphinx
6 |
7 | * Also used repo: https://github.com/gorinars/ros_voice_control
8 |
9 | It uses up-to-date pocketsphinx features and is independent of most external dependencies.
10 |
11 | Current repository is a ROS wrapper which incorporates those features.
12 |
13 | ## Installation
14 | 1)
15 | ```
16 | sudo apt-get install -y python python-dev python-pip build-essential swig libpulse-dev git
17 | sudo apt-get install python-pyaudio
18 | ```
19 |
20 | 2) pocketsphinx: You will need to have pip preinstalled for this to work
21 | ```
22 | sudo pip install pocketsphinx
23 | ```
24 | 3) Download and copy the hub4wsj_sc_8k language model to /usr/share/pocketsphinx/model/hmm/en_US/. It can be found [here](https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/Archive/US%20English%20HUB4WSJ%20Acoustic%20Model/)
25 |
26 | 4) Git clone our repository into the your catkin workspace's src folder:
27 | ```
28 | cd ~/catkin_ws/src
29 | git clone https://github.com/UTNuclearRoboticsPublic/pocketsphinx.git
30 | ```
31 | ## Usage
32 |
33 | ### If you want to decode from a microphone stream:
34 | 1. From pocketsphinx.launch set stream to true
35 | 2.
36 | ```
37 | roslaunch pocketsphinx pocketsphinx.launch
38 | ```
39 |
40 | ### If you want to decode from a wav file:
41 | 1. Fron pocketsphinx.launch set stream to false
42 | 2. set wavpath to the desired wav file path
43 | 3.
44 | ```
45 | roslaunch pocketsphinx pocketsphinx.launch
46 | ```
47 |
48 | ## Using your own keywords
49 |
50 | You can run this with any set of words. To do that, you need lexicon and keyword list files (check voice_cmd.dic and voice_cmd.kwlist for details).
51 |
52 | Word pronunciations for English can be found in CMUdict
53 |
54 | You can also download pocketsphinx acoustic models for several other languages here
55 |
56 | Read more about pocketsphinx on the official website: http://cmusphinx.sourceforge.net
57 |
--------------------------------------------------------------------------------
/scripts/recognizer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import argparse
4 | import rospy
5 |
6 | from pocketsphinx.pocketsphinx import *
7 | from sphinxbase.sphinxbase import *
8 | import pyaudio
9 |
10 | from std_msgs.msg import String
11 | from std_srvs.srv import *
12 | import os
13 | import commands
14 |
15 | class recognizer(object):
16 |
17 | def __init__(self):
18 |
19 | # initialize ROS
20 | self.speed = 0.2
21 |
22 | # Start node
23 | rospy.init_node("recognizer")
24 | rospy.on_shutdown(self.shutdown)
25 |
26 | self._lm_param = "~lm"
27 | self._dict_param = "~dict"
28 | self._kws_param = "~kws"
29 | self._stream_param = "~stream"
30 | self._wavpath_param = "~wavpath"
31 |
32 | # you may need to change publisher destination depending on what you run
33 | self.pub_ = rospy.Publisher('~output', String, queue_size=1)
34 |
35 | if rospy.has_param(self._lm_param):
36 | self.lm = rospy.get_param(self._lm_param)
37 | else:
38 | rospy.loginfo("Loading the default acoustic model")
39 | self.lm = "/usr/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k"
40 | rospy.loginfo("Done loading the default acoustic model")
41 |
42 | if rospy.has_param(self._dict_param):
43 | self.lexicon = rospy.get_param(self._dict_param)
44 | else:
45 | rospy.logerr('No dictionary found. Please add an appropriate dictionary argument.')
46 | return
47 |
48 | if rospy.has_param(self._kws_param):
49 | self.kw_list = rospy.get_param(self._kws_param)
50 | else:
51 | rospy.logerr('kws cant run. Please add an appropriate keyword list file.')
52 | return
53 |
54 | if rospy.has_param(self._stream_param):
55 | self.is_stream = rospy.get_param(self._stream_param)
56 | if not self.is_stream:
57 | if rospy.has_param(self._wavpath_param):
58 | self.wavpath = rospy.get_param(self._wavpath_param)
59 | if self.wavpath == "none":
60 | rospy.logerr('Please set the wav path to the correct file location')
61 | else:
62 | rospy.logerr('No wav file is set')
63 | else:
64 | rospy.logerr('Audio is not set to a stream (true) or wav file (false).')
65 | self.is_stream = rospy.get_param(self._stream_param)
66 |
67 | self.start_recognizer()
68 |
69 | def start_recognizer(self):
70 | # initialize pocketsphinx. As mentioned in python wrapper
71 | rospy.loginfo("Initializing pocketsphinx")
72 | config = Decoder.default_config()
73 | rospy.loginfo("Done initializing pocketsphinx")
74 |
75 | # Hidden Markov model: The model which has been used
76 | config.set_string('-hmm', self.lm)
77 | # Pronunciation dictionary used
78 | config.set_string('-dict', self.lexicon)
79 | # Keyword list file for keyword searching
80 | config.set_string('-kws', self.kw_list)
81 |
82 | rospy.loginfo("Opening the audio channel")
83 |
84 | if not self.is_stream:
85 | self.decoder = Decoder(config)
86 | self.decoder.start_utt()
87 | try:
88 | wavFile = open(self.wavpath, 'rb')
89 | except:
90 | rospy.logerr('Please set the wav path to the correct location from the pocketsphinx launch file')
91 | rospy.signal_shutdown()
92 | # Update the file link above with relevant username and file
93 | # location
94 | in_speech_bf = False
95 | while not rospy.is_shutdown():
96 | buf = wavFile.read(1024)
97 | if buf:
98 | self.decoder.process_raw(buf, False, False)
99 | else:
100 | break
101 | self.decoder.end_utt()
102 | hypothesis = self.decoder.hyp()
103 | if hypothesis == None:
104 | rospy.logwarn("Error, make sure your wav file is composed of keywords!!")
105 | rospy.logwarn("Otherwise, your speech is uninterpretable :C ")
106 | else:
107 | print hypothesis.hypstr
108 |
109 | else:
110 | # Pocketsphinx requires 16kHz, mono, 16-bit little-Endian audio.
111 | # See http://cmusphinx.sourceforge.net/wiki/tutorialtuning
112 | stream = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1,
113 | rate=16000, input=True, frames_per_buffer=1024)
114 | stream.start_stream()
115 | rospy.loginfo("Done opening the audio channel")
116 |
117 | #decoder streaming data
118 | rospy.loginfo("Starting the decoder")
119 | self.decoder = Decoder(config)
120 | self.decoder.start_utt()
121 | rospy.loginfo("Done starting the decoder")
122 |
123 | # Main loop
124 | while not rospy.is_shutdown():
125 | # taken as is from python wrapper
126 | buf = stream.read(1024)
127 | if buf:
128 | self.decoder.process_raw(buf, False, False)
129 | else:
130 | break
131 | self.publish_result()
132 |
133 | def publish_result(self):
134 | """
135 | Publish the words
136 | """
137 | if self.decoder.hyp() != None:
138 | print ([(seg.word)
139 | for seg in self.decoder.seg()])
140 | seg.word = seg.word.lower()
141 | self.decoder.end_utt()
142 | self.decoder.start_utt()
143 | self.pub_.publish(seg.word)
144 |
145 | def shutdown(self):
146 | """
147 | command executed after Ctrl+C is pressed
148 | """
149 | rospy.loginfo("Stopping PocketSphinx")
150 |
151 | if __name__ == "__main__":
152 | if len(sys.argv) > 0:
153 | start = recognizer()
154 |
--------------------------------------------------------------------------------