├── CMakeLists.txt
├── launch
    └── pocketsphinx.launch
├── vocab
    ├── voice_cmd.kwlist
    └── voice_cmd.dic
├── CHANGELOG.rst
├── package.xml
├── README.md
└── scripts
    └── recognizer.py


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.3)
 2 | project(pocketsphinx)
 3 | 
 4 | find_package(catkin REQUIRED)
 5 | catkin_package(DEPENDS)
 6 | 
 7 | install(DIRECTORY vocab
 8 |     DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION})
 9 | 
10 | install(PROGRAMS scripts/recognizer.py
11 |   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION})
12 | 
13 | 


--------------------------------------------------------------------------------
/launch/pocketsphinx.launch:
--------------------------------------------------------------------------------
 1 | <launch>
 2 | 
 3 |   <arg name="_dict" value="_dict:=$(find pocketsphinx)/vocab/voice_cmd.dic" />
 4 |   <arg name="_kws" value="_kws:=$(find pocketsphinx)/vocab/voice_cmd.kwlist" />
 5 |   <arg name="_stream" value="_stream:=false" />
 6 |   <arg name="_wavpath" value="_wavpath:=none" />
 7 | 
 8 |   <node name="pocketsphinx_recognizer" pkg="pocketsphinx" type="recognizer.py" output="screen" args="$(arg _dict) $(arg _kws) $(arg _stream) $(arg _wavpath)" />
 9 | 
10 | </launch>
11 | 


--------------------------------------------------------------------------------
/vocab/voice_cmd.kwlist:
--------------------------------------------------------------------------------
 1 | CLOSE HAND /1e-1/
 2 | CONSIDER ROTATION /1e-1/
 3 | FREE DIRECTIONS /1e-1/
 4 | IGNORE ROTATION /1e-1/
 5 | INVERTED CONTROL MODE /1e-1/
 6 | LIMIT DIRECTIONS /1e-1/
 7 | MANIPULATION /1e-1/
 8 | NATURAL CONTROL MODE /1e-1/
 9 | NAVIGATION /1e-1/
10 | OPEN HAND /1e-1/
11 | ROBOT PLAN AND GO /1e-1/
12 | ROBOT PLAN HOME /1e-1/
13 | ROBOT PLEASE EXECUTE /1e-1/
14 | ROBOT PLEASE PLAN /1e-1/
15 | ROBOT PLEASE GO HOME /1e-1/
16 | ROBOT PLAN HOME /1e-1/
17 | TURN HANDLE CLOCKWISE /1e-1/
18 | TURN HANDLE COUNTERCLOCKWISE /1e-1/
19 | STOP STOP /1e-1/
20 | FORWARD /1e-1/
21 | BACKWARD /1e-1/
22 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 2 | Changelog for package pocketsphinx
 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 4 | 
 5 | 0.4.0 (2014-06-03)
 6 | ------------------
 7 | * add ~source parameter, for setting things like 'alsasrc'
 8 | * add depend on python-gst
 9 | * Contributors: Michael Ferguson
10 | 
11 | 0.3.0 (2013-11-27)
12 | ------------------
13 | * Convert print statements to rosinfo/rosdebugs
14 | * Change language model at runtime + specify audio source
15 | 
16 | 0.2.2 (2013-09-29)
17 | ------------------
18 | * Add changelog
19 | 
20 | 0.2.1 (2013-09-21)
21 | ------------------
22 | * Fix install rules so that python scripts are executable
23 | 
24 | 0.2.0 (2013-07-12)
25 | ------------------
26 | * Added support for grammar fsg files
27 | * First catkinized release
28 | 


--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <package format="2">
 3 |   <name>pocketsphinx</name>
 4 |   <version>0.4.0</version>
 5 |   <description>
 6 |     This package is a simple wrapper around the pocketsphinx speech recognizer, 
 7 |     using gstreamer and a Python-based interface. 
 8 |   </description>
 9 |   <author>Michael Ferguson</author>
10 |   <maintainer email="mike@vanadiumlabs.com">Michael Ferguson</maintainer>
11 |   <license>BSD</license>
12 |   <url type="website">http://ros.org/wiki/pocketsphinx</url>
13 | 
14 |   <buildtool_depend>catkin</buildtool_depend>
15 | 
16 |   <exec_depend>rospy</exec_depend>>
17 |   <exec_depend>std_msgs</exec_depend>
18 |   <exec_depend>std_srvs</exec_depend>
19 |   <exec_depend>geometry_msgs</exec_depend>><!-- only needed for demos -->
20 | 
21 | </package>
22 | 


--------------------------------------------------------------------------------
/vocab/voice_cmd.dic:
--------------------------------------------------------------------------------
 1 | AND	AH N D
 2 | CLOCKWISE	K L AA K W AY Z
 3 | CLOSE	K L OW S
 4 | CONSIDER	K AH N S IH D ER
 5 | CONTROL	K AH N T R OW L
 6 | COUNTERCLOCKWISE	K AW N T ER K L AO K W AY Z
 7 | DIRECTIONS	D ER EH K SH AH N Z
 8 | DIRECTIONS(2)	D IY R EH K SH IH N Z
 9 | DIRECTIONS(3)	D AY R EH K SH IH N Z
10 | DIRECTIONS(4)	D IH R EH K SH IH N Z
11 | EXECUTE	EH K S AH K Y UW T
12 | FREE	F R IY
13 | GO	G OW
14 | HAND	HH AE N D
15 | HANDLE	HH AE N D AH L
16 | HOME	HH OW M
17 | IGNORE	IH G N AO R
18 | INVERTED	IH N V ER T IH D
19 | LIMIT	L IH M AH T
20 | MANIPULATION	M AH N IH P Y AH L EY SH AH N
21 | MODE	M OW D
22 | NATURAL	N AE CH ER AH L
23 | NATURAL(2)	N AE CH R AH L
24 | NAVIGATION	N AE V AH G EY SH AH N
25 | OPEN	OW P AH N
26 | PLAN	P L AE N
27 | PLEASE	P L IY Z
28 | ROBOT	R OW B AA T
29 | ROBOT(2)	R OW B AH T
30 | ROTATION	R OW T EY SH AH N
31 | STOP	S T AA P
32 | TURN	T ER N
33 | FORWARD F AO R W ER D
34 | BACKWARD B AE K W ER D
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Modified ROS wrapper for pocketsphinx  
 2 | 
 3 | The following repositories were used in our wrapper, but are not required for installation:
 4 | 
 5 | * Original repository: https://github.com/mikeferguson/pocketsphinx  
 6 |   
 7 | * Also used repo: https://github.com/gorinars/ros_voice_control  
 8 | 
 9 | It uses up-to-date pocketsphinx features and is independent of most external dependencies.  
10 |   
11 | Current repository is a ROS wrapper which incorporates those features. 
12 |   
13 | ## Installation 
14 | 1)   
15 |     ```
16 |     sudo apt-get install -y python python-dev python-pip build-essential swig libpulse-dev git
17 |     sudo apt-get install python-pyaudio
18 |     ```
19 | 
20 | 2) pocketsphinx: You will need to have pip preinstalled for this to work
21 |     ```
22 |     sudo pip install pocketsphinx
23 |     ```
24 | 3) Download and copy the hub4wsj_sc_8k language model  to /usr/share/pocketsphinx/model/hmm/en_US/. It can be found [here](https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/Archive/US%20English%20HUB4WSJ%20Acoustic%20Model/)
25 | 
26 | 4) Git clone our repository into the your catkin workspace's src folder:
27 |   ```
28 |   cd ~/catkin_ws/src
29 |   git clone https://github.com/UTNuclearRoboticsPublic/pocketsphinx.git  
30 |   ```
31 | ## Usage
32 | 
33 | ### If you want to decode from a microphone stream:
34 | 1. From pocketsphinx.launch set stream to true
35 | 2. 
36 | ```
37 | roslaunch pocketsphinx pocketsphinx.launch
38 | ```
39 | 
40 | ### If you want to decode from a wav file:
41 | 1. Fron pocketsphinx.launch set stream to false 
42 | 2. set wavpath to the desired wav file path 
43 | 3.
44 | ``` 
45 | roslaunch pocketsphinx pocketsphinx.launch
46 | ```
47 | 
48 | ## Using your own keywords
49 | 
50 | You can run this with any set of words. To do that, you need lexicon and keyword list files (check voice_cmd.dic and voice_cmd.kwlist for details).
51 | 
52 | Word pronunciations for English can be found in CMUdict
53 | 
54 | You can also download pocketsphinx acoustic models for several other languages here
55 | 
56 | Read more about pocketsphinx on the official website: http://cmusphinx.sourceforge.net
57 | 


--------------------------------------------------------------------------------
/scripts/recognizer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | import rospy
  5 | 
  6 | from pocketsphinx.pocketsphinx import *
  7 | from sphinxbase.sphinxbase import *
  8 | import pyaudio
  9 | 
 10 | from std_msgs.msg import String
 11 | from std_srvs.srv import *
 12 | import os
 13 | import commands
 14 | 
 15 | class recognizer(object):
 16 | 
 17 |     def __init__(self):
 18 | 
 19 |         # initialize ROS
 20 |         self.speed = 0.2
 21 | 
 22 |         # Start node
 23 |         rospy.init_node("recognizer")
 24 |         rospy.on_shutdown(self.shutdown)
 25 | 
 26 |         self._lm_param = "~lm"
 27 |         self._dict_param = "~dict"
 28 |         self._kws_param = "~kws"
 29 |         self._stream_param = "~stream"
 30 |         self._wavpath_param = "~wavpath"
 31 | 
 32 |         # you may need to change publisher destination depending on what you run
 33 |         self.pub_ = rospy.Publisher('~output', String, queue_size=1)
 34 | 
 35 |         if rospy.has_param(self._lm_param):
 36 |             self.lm = rospy.get_param(self._lm_param)
 37 |         else:
 38 |             rospy.loginfo("Loading the default acoustic model")
 39 |             self.lm = "/usr/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k"
 40 |             rospy.loginfo("Done loading the default acoustic model")
 41 | 
 42 |         if rospy.has_param(self._dict_param):
 43 |             self.lexicon = rospy.get_param(self._dict_param)
 44 |         else:
 45 |             rospy.logerr('No dictionary found. Please add an appropriate dictionary argument.')
 46 |             return
 47 | 
 48 |         if rospy.has_param(self._kws_param):
 49 |             self.kw_list = rospy.get_param(self._kws_param)
 50 |         else:
 51 |             rospy.logerr('kws cant run. Please add an appropriate keyword list file.')
 52 |             return
 53 | 
 54 |         if rospy.has_param(self._stream_param):
 55 |             self.is_stream = rospy.get_param(self._stream_param)
 56 |             if not self.is_stream:
 57 |                 if rospy.has_param(self._wavpath_param):
 58 |                     self.wavpath = rospy.get_param(self._wavpath_param)
 59 |                     if self.wavpath == "none":
 60 |                         rospy.logerr('Please set the wav path to the correct file location')
 61 |                 else:
 62 |                     rospy.logerr('No wav file is set')
 63 |         else:
 64 |             rospy.logerr('Audio is not set to a stream (true) or wav file (false).')
 65 |             self.is_stream = rospy.get_param(self._stream_param)
 66 | 
 67 |         self.start_recognizer()
 68 | 
 69 |     def start_recognizer(self):
 70 |         # initialize pocketsphinx. As mentioned in python wrapper
 71 |         rospy.loginfo("Initializing pocketsphinx")
 72 |         config = Decoder.default_config()
 73 |         rospy.loginfo("Done initializing pocketsphinx")
 74 | 
 75 |         # Hidden Markov model: The model which has been used
 76 |         config.set_string('-hmm', self.lm)
 77 |         # Pronunciation dictionary used
 78 |         config.set_string('-dict', self.lexicon)
 79 |         # Keyword list file for keyword searching
 80 |         config.set_string('-kws', self.kw_list)
 81 | 
 82 |         rospy.loginfo("Opening the audio channel")
 83 | 
 84 |         if not self.is_stream:
 85 |             self.decoder = Decoder(config)
 86 |             self.decoder.start_utt()
 87 |             try:
 88 |                 wavFile = open(self.wavpath, 'rb')
 89 |             except:
 90 |                 rospy.logerr('Please set the wav path to the correct location from the pocketsphinx launch file')
 91 |                 rospy.signal_shutdown()
 92 |             # Update the file link above with relevant username and file
 93 |             # location
 94 |             in_speech_bf = False
 95 |             while not rospy.is_shutdown():
 96 |                 buf = wavFile.read(1024)
 97 |                 if buf:
 98 |                     self.decoder.process_raw(buf, False, False)
 99 |                 else:
100 |                     break
101 |             self.decoder.end_utt()
102 |             hypothesis = self.decoder.hyp()
103 |             if hypothesis == None:
104 |                 rospy.logwarn("Error, make sure your wav file is composed of keywords!!")
105 |                 rospy.logwarn("Otherwise, your speech is uninterpretable :C ")
106 |             else:
107 |                 print hypothesis.hypstr
108 | 
109 |         else:
110 | 	    # Pocketsphinx requires 16kHz, mono, 16-bit little-Endian audio.
111 | 	    # See http://cmusphinx.sourceforge.net/wiki/tutorialtuning
112 |             stream = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1,
113 |                         rate=16000, input=True, frames_per_buffer=1024)
114 |             stream.start_stream()
115 |             rospy.loginfo("Done opening the audio channel")
116 | 
117 |             #decoder streaming data
118 |             rospy.loginfo("Starting the decoder")
119 |             self.decoder = Decoder(config)
120 |             self.decoder.start_utt()
121 |             rospy.loginfo("Done starting the decoder")
122 | 
123 |             # Main loop
124 |             while not rospy.is_shutdown():
125 |                 # taken as is from python wrapper
126 |                 buf = stream.read(1024)
127 |                 if buf:
128 |                     self.decoder.process_raw(buf, False, False)
129 |                 else:
130 |                     break
131 |                 self.publish_result()
132 | 
133 |     def publish_result(self):
134 |         """
135 |         Publish the words
136 |         """
137 |         if self.decoder.hyp() != None:
138 |             print ([(seg.word) 
139 |                 for seg in self.decoder.seg()])
140 |             seg.word = seg.word.lower()
141 |             self.decoder.end_utt()
142 |             self.decoder.start_utt()
143 |             self.pub_.publish(seg.word)
144 | 
145 |     def shutdown(self):
146 |         """
147 |         command executed after Ctrl+C is pressed
148 |         """
149 |         rospy.loginfo("Stopping PocketSphinx")
150 | 
151 | if __name__ == "__main__":
152 |     if len(sys.argv) > 0:
153 |         start = recognizer()
154 | 


--------------------------------------------------------------------------------