├── .gitignore ├── LICENSE ├── Procfile ├── README.md ├── app.py ├── creds.py ├── pyDubMod.py ├── requirements.txt ├── static ├── .DS_Store ├── clippy.png ├── icon.png ├── icon.xcf ├── icon_white.png ├── privacy.html ├── return.html ├── silence.mp3 ├── speedtest │ └── random4000x4000.jpg └── tokengenerator.html ├── timeout_dec.py └── welcomemessage.txt /.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | *~ 3 | __pycache__/ 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Sam Machin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: python app.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AlexaBot 2 | 3 | Type to Alexa! 4 | 5 | With Amazon Alexa as a Facebook contact, you can quietly message Alexa and ask her to turn off the oven you left on while you're in the middle of a meeting. AlexaBot makes use of Amazon's Alexa Voice Service API using sammachin's alexaweb core code. However, the AVS API only takes in and returns audio. I found a hacky workaround by going from text->speech->AVS->audio->text using VoiceRSS and Google Cloud Speech. For convenience, AlexaBot is integrated with Facebook's Messenger Platform as a chatbot relaying messages to this server. 6 | 7 | To fork, obtain the credentials required in creds.py (stored in Heroku in this case). Requirements are in requirements.txt. Python, Redis required. FFMPEG required. 8 | 9 | 10 | Smiley face

11 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tornado.httpserver 3 | import tornado.ioloop 4 | import tornado.web 5 | import tornado.template 6 | from creds import * 7 | from requests import Request 8 | import requests 9 | import json 10 | import re 11 | import tempfile 12 | import redis 13 | import uuid 14 | import string 15 | from pymessenger.bot import Bot 16 | import traceback 17 | 18 | # PyDub has some issues with Google Speech API params - fixed in pyDubMod 19 | from pyDubMod import * 20 | 21 | # Timeout decorator 22 | from timeout_dec import timeout_dec 23 | 24 | bot = Bot(Facebook_Token) 25 | 26 | def shutil_which(pgm): 27 | """Python 2 backport of ``shutil.which()`` from Python 3""" 28 | path = os.getenv('PATH') 29 | for p in path.split(os.path.pathsep): 30 | p = os.path.join(p, pgm) 31 | if os.path.exists(p) and os.access(p, os.X_OK): 32 | return p 33 | 34 | def gettoken(uid): 35 | red = redis.from_url(redis_url) 36 | token = red.get(uid+"-access_token") 37 | refresh = red.get(uid+"-refresh_token") 38 | if token: 39 | return token 40 | elif refresh: 41 | #good refresh token 42 | try: 43 | payload = {"client_id" : Client_ID, "client_secret" : Client_Secret, "refresh_token" : refresh, "grant_type" : "refresh_token", } 44 | url = "https://api.amazon.com/auth/o2/token" 45 | r = requests.post(url, data = payload) 46 | resp = json.loads(r.text) 47 | red.set(uid+"-access_token", resp['access_token']) 48 | red.expire(uid+"-access_token", 3600) 49 | return resp['access_token'] 50 | #bad refresh token 51 | except: 52 | return False 53 | else: 54 | return False 55 | 56 | # Get Alexa's [text] response to a [text] query 57 | @timeout_dec(20) 58 | def getAlexa(text, mid): 59 | # Fetch user's Amazon access token with Messenger ID as the uid 60 | token = gettoken(mid) 61 | 62 | # Nonexistent or broken refresh token - was more of an issue in older version, when user manually entered refresh token 63 | if (not token): 64 | # Remove any broken refresh token 65 | red = redis.from_url(redis_url) 66 | red.delete(mid + "-refresh_token") 67 | return "Sorry, it looks like you didn't log in to Amazon correctly. Try again here https://amazonalexabot.herokuapp.com/start and come back with your code." 68 | 69 | # Google Translate TTS was also considered - 70 | # http://translate.google.com/translate_tts?ie=UTF-8&total=1&idx=0&textlen=32&client=tw-ob&q=hello&tl=En-us 71 | 72 | # Speech synthesis through VoiceRSS API 73 | audio = requests.get('https://api.voicerss.org/', params={'key': VoiceRSS_Token, 'src': text, 'hl': 'en-us', 'c': 'WAV', 'f': '16khz_16bit_mono'}) 74 | 75 | # Write out synthesized speech to a temporary file 76 | tf = tempfile.NamedTemporaryFile(suffix=".wav") 77 | tf.write(audio.content) 78 | 79 | # Create an AudioSegment object from synthesized audio file 80 | _input = AudioSegment.from_wav(tf.name) 81 | tf.close() 82 | 83 | # Convert audio object - mono channel 16 khz for Alexa Voice Service 84 | _output = _input.set_channels(1).set_frame_rate(16000) 85 | 86 | # Formatted synthesized audio file 87 | audio_infile = _output.export(format="wav") 88 | 89 | # Parameters for AVS request 90 | url = 'https://access-alexa-na.amazon.com/v1/avs/speechrecognizer/recognize' 91 | headers = {'Authorization' : 'Bearer %s' % token} 92 | avs_json = { 93 | "messageHeader": { 94 | "deviceContext": [ 95 | { 96 | "name": "playbackState", 97 | "namespace": "AudioPlayer", 98 | "payload": { 99 | "streamId": "", 100 | "offsetInMilliseconds": "0", 101 | "playerActivity": "IDLE" 102 | } 103 | } 104 | ] 105 | }, 106 | "messageBody": { 107 | "profile": "alexa-close-talk", 108 | "locale": "en-us", 109 | "format": "audio/L16; rate=16000; channels=1" 110 | } 111 | } 112 | files = [ 113 | ('file', ('request', json.dumps(avs_json), 'application/json; charset=UTF-8')), 114 | ('file', ('audio', audio_infile, 'audio/L16; rate=16000; channels=1')) 115 | ] 116 | 117 | # Make request to AVS 118 | r = requests.post(url, headers=headers, files=files) 119 | 120 | for v in r.headers['content-type'].split(";"): 121 | if re.match('.*boundary.*', v): 122 | boundary = v.split("=")[1] 123 | 124 | data = r.content.split(boundary) 125 | for d in data: 126 | if (len(d) >= 1024): 127 | audio_outfile = d.split('\r\n\r\n')[1].rstrip('--') 128 | 129 | # Temporary file to store Alexa audio output 130 | tf = tempfile.NamedTemporaryFile(suffix=".mp3") 131 | tf.write(audio_outfile) 132 | 133 | # Create AudioSegment object for Alexa audio output 134 | _input = AudioSegment.from_mp3(tf.name) 135 | tf.close() 136 | 137 | # Export Alexa audio output in wav format 138 | wav_audio_outfile =_input.export(tf3.name, format="wav") 139 | 140 | # Speech recognizer object initialization 141 | r = Recognizer() 142 | with AudioFile(wav_audio_outfile) as source: 143 | audio = r.record(source) # read the entire audio file 144 | 145 | # Recognize speech using Google Speech Recognition 146 | try: 147 | transcription = r.recognize_google(audio, key=Google_Speech_Token) 148 | 149 | # Fallback speech recognition 150 | except (UnknownValueError, RequestError): 151 | print("Google Speech Recognition could not understand audio") 152 | 153 | # Recognize speech using Wit.ai 154 | WIT_AI_KEY = Wit_Token # Wit.ai keys are 32-character uppercase alphanumeric strings 155 | try: 156 | transcription = r.recognize_wit(audio, key=WIT_AI_KEY) 157 | print("Wit.ai thinks you said " + transcription) 158 | except UnknownValueError: 159 | print("Wit.ai could not understand audio") 160 | except RequestError as e: 161 | print("Could not request results from Wit.ai service; {0}".format(e)) 162 | 163 | return transcription 164 | 165 | 166 | class BaseHandler(tornado.web.RequestHandler): 167 | def get_current_user(self): 168 | return self.get_cookie("user") 169 | 170 | 171 | class MainHandler(BaseHandler): 172 | # @tornado.web.authenticated 173 | @tornado.web.asynchronous 174 | def get(self): 175 | self.render("static/tokengenerator.html", token=self.get_argument("refreshtoken")) 176 | 177 | 178 | class StartAuthHandler(tornado.web.RequestHandler): 179 | @tornado.web.asynchronous 180 | def get(self): 181 | mid=self.get_argument("mid", default=None, strip=False) 182 | scope="alexa_all" 183 | sd = json.dumps({ 184 | "alexa:all": { 185 | "productID": Product_ID, 186 | "productInstanceAttributes": { 187 | "deviceSerialNumber": "1" 188 | } 189 | } 190 | }) 191 | url = "https://www.amazon.com/ap/oa" 192 | path = "https" + "://" + self.request.host 193 | if mid != None: 194 | self.set_cookie("user", mid) 195 | callback = path + "/code" 196 | payload = {"client_id" : Client_ID, "scope" : "alexa:all", "scope_data" : sd, "response_type" : "code", "redirect_uri" : callback } 197 | req = Request('GET', url, params=payload) 198 | p = req.prepare() 199 | self.redirect(p.url) 200 | 201 | 202 | class CodeAuthHandler(tornado.web.RequestHandler): 203 | @tornado.web.asynchronous 204 | def get(self): 205 | code=self.get_argument("code") 206 | mid=self.get_cookie("user") 207 | path = "https" + "://" + self.request.host 208 | callback = path+"/code" 209 | payload = {"client_id" : Client_ID, "client_secret" : Client_Secret, "code" : code, "grant_type" : "authorization_code", "redirect_uri" : callback } 210 | url = "https://api.amazon.com/auth/o2/token" 211 | r = requests.post(url, data = payload) 212 | red = redis.from_url(redis_url) 213 | resp = json.loads(r.text) 214 | if mid != None: 215 | print("fetched MID: ",mid) 216 | red.set(mid+"-access_token", resp['access_token']) 217 | red.expire(mid+"-access_token", 3600) 218 | red.set(mid+"-refresh_token", resp['refresh_token']) 219 | self.render("static/return.html") 220 | bot.send_text_message(mid, "Great, you're logged in. Start talking to Alexa!") 221 | else: 222 | self.redirect("/?refreshtoken="+resp['refresh_token']) 223 | 224 | class LogoutHandler(BaseHandler): 225 | @tornado.web.authenticated 226 | @tornado.web.asynchronous 227 | def get(self): 228 | uid = tornado.escape.xhtml_escape(self.current_user) 229 | red = redis.from_url(redis_url) 230 | red.delete(uid+"-access_token") 231 | red.delete(uid+"-refresh_token") 232 | self.clear_cookie("user") 233 | self.set_header('Content-Type', 'text/plain') 234 | self.write("Logged Out, Goodbye") 235 | self.finish() 236 | 237 | # Facebook Messenger webhook 238 | class MessageHandler(BaseHandler): 239 | 240 | # Verify webhook 241 | @tornado.web.asynchronous 242 | def get(self): 243 | if (self.get_argument("hub.verify_token", default=None, strip=False) == "my_voice_is_my_password_verify_me"): 244 | self.set_header('Content-Type', 'text/plain') 245 | self.write(self.get_argument("hub.challenge", default=None, strip=False)) 246 | self.finish() 247 | 248 | # Receive messages from users 249 | def post(self): 250 | fb_json = tornado.escape.json_decode(self.request.body) 251 | event = fb_json['entry'][0]['messaging'] 252 | 253 | for x in event: 254 | # User's messenger ID (MID) 255 | recipient_id = x['sender']['id'] 256 | 257 | # Get Started button - used for AVS authentication 258 | if "postback" in x and "payload" in x['postback']: 259 | payload = x['postback']['payload'] 260 | 261 | # User authentication for AVS 262 | if payload == "AUTH": 263 | # Generate login link with user's MID 264 | link = "https://amazonalexabot.herokuapp.com/start?mid=" + recipient_id 265 | 266 | # Send a login dialog to user in Messenger 267 | messageData = {"attachment": {"type": "template","payload": {"template_type": "generic","elements": [{"title": "Login to Amazon","buttons": [{"type": "web_url","url": link,"title": "Login"}]}]}}} 268 | payload = {"recipient": {"id": recipient_id}, "message": messageData} 269 | r = requests.post("https://graph.facebook.com/v2.6/me/messages?access_token="+Facebook_Token, json=payload) 270 | 271 | # Received sticker 272 | elif "message" in x and "sticker_id" in x["message"]: 273 | bot.send_text_message(recipient_id, "(y)") 274 | 275 | # Received a textual message 276 | elif "message" in x and "text" in x["message"]: 277 | message = x["message"]["text"] 278 | try: 279 | # Hardcode a few greetings that are problematic for the speech synthesis 280 | if message.lower() in {"hi", "hello", "hi alexa", "hello alexa","hi there","hey alexa","hey", "hello there"}: 281 | bot.send_text_message(recipient_id, "Hi there") 282 | 283 | # Help message required by Facebook 284 | elif message.lower() in {"help", "help me"}: 285 | bot.send_text_message(recipient_id, "Type anything you would say to Amazon's Alexa assistant and receive her response. For more help with what you can say, check out the Things to Try section of the Alexa app.") 286 | 287 | # Normal textual message 288 | else: 289 | red = redis.from_url(redis_url) 290 | 291 | # User is not/improperly logged into Amazon - used to handle direct text refresh tokens here 292 | if not red.exists(recipient_id+"-refresh_token"): 293 | # Generate login link with user's MID 294 | link='https://amazonalexabot.herokuapp.com/start?mid='+recipient_id 295 | 296 | # Send login dialog to user in Messenger 297 | messageData = {"attachment": {"type": "template","payload": {"template_type": "generic","elements": [{"title": "You are not logged in properly.","buttons": [{"type": "web_url","url": link,"title": "Login"}]}]}}} 298 | payload = {"recipient": {"id": recipient_id}, "message": messageData} 299 | r = requests.post("https://graph.facebook.com/v2.6/me/messages?access_token="+Facebook_Token, json=payload) 300 | 301 | # User is logged into Amazon 302 | else: 303 | # Get response from Alexa - convert text-to-speech, pass through AVS, and then convert speech-to-text 304 | alexa_response = getAlexa(message, recipient_id) 305 | 306 | # Truncate response 307 | if len(alexa_response) > 320: 308 | alexa_response = alexaresponse[:317] + "..." 309 | 310 | # Send Alexa's textual response to Messenger user 311 | bot.send_text_message(recipient_id, alexa_response) 312 | 313 | except TimeoutError: 314 | print(traceback.format_exc()) 315 | bot.send_text_message(recipient_id, "Request took too long.") 316 | 317 | except Exception,err: 318 | print("Couldn't understand: ", traceback.format_exc()) 319 | bot.send_text_message(recipient_id, "Alexa gave an invalid response. This may occur if you gave Alexa a command such as \"Turn on the lights,\" which requires no reply from Alexa. Otherwise, something went wrong and we are trying to fix it!") 320 | else: 321 | pass 322 | self.set_status(200) 323 | self.finish() 324 | 325 | def main(): 326 | settings = { 327 | "cookie_secret": "parisPOLANDbroadFENCEcornWOULD", 328 | } 329 | static_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'static') 330 | application = tornado.web.Application([(r"/", MainHandler), 331 | (r"/start", StartAuthHandler), 332 | (r"/code", CodeAuthHandler), 333 | (r"/logout", LogoutHandler), 334 | (r"/audio", AudioHandler), 335 | (r"/webhook", MessageHandler), 336 | (r'/(favicon.ico)', tornado.web.StaticFileHandler,{'path': static_path}), 337 | (r'/static/(.*)', tornado.web.StaticFileHandler, {'path': static_path}), 338 | ], **settings) 339 | http_server = tornado.httpserver.HTTPServer(application) 340 | port = int(os.environ.get("PORT", 5000)) 341 | http_server.listen(port) 342 | tornado.ioloop.IOLoop.instance().start() 343 | 344 | if __name__ == "__main__": 345 | main() 346 | 347 | -------------------------------------------------------------------------------- /creds.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | #Alexa 4 | Security_Profile_Description = os.environ['SECURITY_PROFILE_DESCRIPTION'] 5 | Security_Profile_ID = os.environ['SECURITY_PROFILE_ID'] 6 | Client_ID = os.environ['CLIENT_ID'] 7 | Client_Secret = os.environ['CLIENT_SECRET'] 8 | Product_ID= os.environ['PRODUCT_ID'] 9 | 10 | #Facebook 11 | Facebook_Token= os.environ['FACEBOOK_TOKEN'] 12 | 13 | #wit.ai 14 | Wit_Token= os.environ['WIT_TOKEN'] 15 | 16 | #Redis 17 | redis_url = os.environ['REDIS_URL'] 18 | 19 | #VoiceRSS 20 | VoiceRSS_Token=os.environ['VOICERSS_TOKEN'] 21 | 22 | #Google Speech Recognition 23 | Google_Speech_Token=os.environ['GOOGLE_SPEECH_TOKEN'] 24 | -------------------------------------------------------------------------------- /pyDubMod.py: -------------------------------------------------------------------------------- 1 | #pydub imports 2 | import os 3 | from requests import Request 4 | import requests 5 | import json 6 | import uuid 7 | import string 8 | from pydub import AudioSegment 9 | import io, subprocess, wave, aifc, base64 10 | import math, audioop, collections, threading 11 | import platform, stat, random, uuid 12 | 13 | 14 | # define exceptions 15 | class TimeoutError(Exception): pass 16 | class RequestError(Exception): pass 17 | class UnknownValueError(Exception): pass 18 | 19 | class AudioSource(object): 20 | def __init__(self): 21 | raise NotImplementedError("this is an abstract class") 22 | 23 | def __enter__(self): 24 | raise NotImplementedError("this is an abstract class") 25 | 26 | def __exit__(self, exc_type, exc_value, traceback): 27 | raise NotImplementedError("this is an abstract class") 28 | 29 | 30 | class AudioFile(AudioSource): 31 | """ 32 | Creates a new ``AudioFile`` instance given a WAV/AIFF/FLAC audio file `filename_or_fileobject`. Subclass of ``AudioSource``. 33 | If ``filename_or_fileobject`` is a string, then it is interpreted as a path to an audio file on the filesystem. Otherwise, ``filename_or_fileobject`` should be a file-like object such as ``io.BytesIO`` or similar. 34 | Note that functions that read from the audio (such as ``recognizer_instance.record`` or ``recognizer_instance.listen``) will move ahead in the stream. For example, if you execute ``recognizer_instance.record(audiofile_instance, duration=10)`` twice, the first time it will return the first 10 seconds of audio, and the second time it will return the 10 seconds of audio right after that. This is always reset to the beginning when entering an ``AudioFile`` context. 35 | WAV files must be in PCM/LPCM format; WAVE_FORMAT_EXTENSIBLE and compressed WAV are not supported and may result in undefined behaviour. 36 | Both AIFF and AIFF-C (compressed AIFF) formats are supported. 37 | FLAC files must be in native FLAC format; OGG-FLAC is not supported and may result in undefined behaviour. 38 | """ 39 | 40 | def __init__(self, filename_or_fileobject): 41 | if str is bytes: # Python 2 - if a file path is specified, it must either be a `str` instance or a `unicode` instance 42 | assert isinstance(filename_or_fileobject, (str, unicode)) or hasattr(filename_or_fileobject, "read"), "Given audio file must be a filename string or a file-like object" 43 | else: # Python 3 - if a file path is specified, it must be a `str` instance 44 | assert isinstance(filename_or_fileobject, str) or hasattr(filename_or_fileobject, "read"), "Given audio file must be a filename string or a file-like object" 45 | self.filename_or_fileobject = filename_or_fileobject 46 | self.stream = None 47 | self.DURATION = None 48 | 49 | def __enter__(self): 50 | assert self.stream is None, "This audio source is already inside a context manager" 51 | try: 52 | # attempt to read the file as WAV 53 | self.audio_reader = wave.open(self.filename_or_fileobject, "rb") 54 | self.little_endian = True # RIFF WAV is a little-endian format (most ``audioop`` operations assume that the frames are stored in little-endian form) 55 | except wave.Error: 56 | try: 57 | # attempt to read the file as AIFF 58 | self.audio_reader = aifc.open(self.filename_or_fileobject, "rb") 59 | self.little_endian = False # AIFF is a big-endian format 60 | except aifc.Error: 61 | # attempt to read the file as FLAC 62 | if hasattr(self.filename_or_fileobject, "read"): 63 | flac_data = self.filename_or_fileobject.read() 64 | else: 65 | with open(self.filename_or_fileobject, "rb") as f: flac_data = f.read() 66 | 67 | # run the FLAC converter with the FLAC data to get the AIFF data 68 | flac_converter = get_flac_converter() 69 | process = subprocess.Popen([ 70 | flac_converter, 71 | "--stdout", "--totally-silent", # put the resulting AIFF file in stdout, and make sure it's not mixed with any program output 72 | "--decode", "--force-aiff-format", # decode the FLAC file into an AIFF file 73 | "-", # the input FLAC file contents will be given in stdin 74 | ], stdin=subprocess.PIPE, stdout=subprocess.PIPE) 75 | aiff_data, stderr = process.communicate(flac_data) 76 | aiff_file = io.BytesIO(aiff_data) 77 | try: 78 | self.audio_reader = aifc.open(aiff_file, "rb") 79 | except aifc.Error: 80 | assert False, "Audio file could not be read as WAV, AIFF, or FLAC; check if file is corrupted" 81 | self.little_endian = False # AIFF is a big-endian format 82 | assert 1 <= self.audio_reader.getnchannels() <= 2, "Audio must be mono or stereo" 83 | self.SAMPLE_WIDTH = self.audio_reader.getsampwidth() 84 | 85 | # 24-bit audio needs some special handling for old Python versions (workaround for https://bugs.python.org/issue12866) 86 | samples_24_bit_pretending_to_be_32_bit = False 87 | if self.SAMPLE_WIDTH == 3: # 24-bit audio 88 | try: audioop.bias(b"", self.SAMPLE_WIDTH, 0) # test whether this sample width is supported (for example, ``audioop`` in Python 3.3 and below don't support sample width 3, while Python 3.4+ do) 89 | except audioop.error: # this version of audioop doesn't support 24-bit audio (probably Python 3.3 or less) 90 | samples_24_bit_pretending_to_be_32_bit = True # while the ``AudioFile`` instance will outwardly appear to be 32-bit, it will actually internally be 24-bit 91 | self.SAMPLE_WIDTH = 4 # the ``AudioFile`` instance should present itself as a 32-bit stream now, since we'll be converting into 32-bit on the fly when reading 92 | 93 | self.SAMPLE_RATE = self.audio_reader.getframerate() 94 | self.CHUNK = 4096 95 | self.FRAME_COUNT = self.audio_reader.getnframes() 96 | self.DURATION = self.FRAME_COUNT / float(self.SAMPLE_RATE) 97 | self.stream = AudioFile.AudioFileStream(self.audio_reader, self.little_endian, samples_24_bit_pretending_to_be_32_bit) 98 | return self 99 | 100 | def __exit__(self, exc_type, exc_value, traceback): 101 | if not hasattr(self.filename_or_fileobject, "read"): # only close the file if it was opened by this class in the first place (if the file was originally given as a path) 102 | self.audio_reader.close() 103 | self.stream = None 104 | self.DURATION = None 105 | 106 | 107 | class AudioFileStream(object): 108 | def __init__(self, audio_reader, little_endian, samples_24_bit_pretending_to_be_32_bit): 109 | self.audio_reader = audio_reader # an audio file object (e.g., a `wave.Wave_read` instance) 110 | self.little_endian = little_endian # whether the audio data is little-endian (when working with big-endian things, we'll have to convert it to little-endian before we process it) 111 | self.samples_24_bit_pretending_to_be_32_bit = samples_24_bit_pretending_to_be_32_bit # this is true if the audio is 24-bit audio, but 24-bit audio isn't supported, so we have to pretend that this is 32-bit audio and convert it on the fly 112 | 113 | def read(self, size = -1): 114 | buffer = self.audio_reader.readframes(self.audio_reader.getnframes() if size == -1 else size) 115 | if not isinstance(buffer, bytes): buffer = b"" # workaround for https://bugs.python.org/issue24608 116 | 117 | sample_width = self.audio_reader.getsampwidth() 118 | if not self.little_endian: # big endian format, convert to little endian on the fly 119 | if hasattr(audioop, "byteswap"): # ``audioop.byteswap`` was only added in Python 3.4 (incidentally, that also means that we don't need to worry about 24-bit audio being unsupported, since Python 3.4+ always has that functionality) 120 | buffer = audioop.byteswap(buffer, sample_width) 121 | else: # manually reverse the bytes of each sample, which is slower but works well enough as a fallback 122 | buffer = buffer[sample_width - 1::-1] + b"".join(buffer[i + sample_width:i:-1] for i in range(sample_width - 1, len(buffer), sample_width)) 123 | 124 | # workaround for https://bugs.python.org/issue12866 125 | if self.samples_24_bit_pretending_to_be_32_bit: # we need to convert samples from 24-bit to 32-bit before we can process them with ``audioop`` functions 126 | buffer = b"".join("\x00" + buffer[i:i + sample_width] for i in range(0, len(buffer), sample_width)) # since we're in little endian, we prepend a zero byte to each 24-bit sample to get a 32-bit sample 127 | if self.audio_reader.getnchannels() != 1: # stereo audio 128 | buffer = audioop.tomono(buffer, sample_width, 1, 1) # convert stereo audio data to mono 129 | return buffer 130 | 131 | 132 | class AudioData(object): 133 | 134 | def __init__(self, frame_data, sample_rate, sample_width): 135 | assert sample_rate > 0, "Sample rate must be a positive integer" 136 | assert sample_width % 1 == 0 and 1 <= sample_width <= 4, "Sample width must be between 1 and 4 inclusive" 137 | self.frame_data = frame_data 138 | self.sample_rate = sample_rate 139 | self.sample_width = int(sample_width) 140 | 141 | def get_raw_data(self, convert_rate = None, convert_width = None): 142 | """ 143 | Returns a byte string representing the raw frame data for the audio represented by the ``AudioData`` instance. 144 | If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match. 145 | If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match. 146 | Writing these bytes directly to a file results in a valid `RAW/PCM audio file `__. 147 | """ 148 | assert convert_rate is None or convert_rate > 0, "Sample rate to convert to must be a positive integer" 149 | assert convert_width is None or (convert_width % 1 == 0 and 1 <= convert_width <= 4), "Sample width to convert to must be between 1 and 4 inclusive" 150 | 151 | raw_data = self.frame_data 152 | 153 | # make sure unsigned 8-bit audio (which uses unsigned samples) is handled like higher sample width audio (which uses signed samples) 154 | if self.sample_width == 1: 155 | raw_data = audioop.bias(raw_data, 1, -128) # subtract 128 from every sample to make them act like signed samples 156 | 157 | # resample audio at the desired rate if specified 158 | if convert_rate is not None and self.sample_rate != convert_rate: 159 | raw_data, _ = audioop.ratecv(raw_data, self.sample_width, 1, self.sample_rate, convert_rate, None) 160 | 161 | # convert samples to desired sample width if specified 162 | if convert_width is not None and self.sample_width != convert_width: 163 | if convert_width == 3: # we're converting the audio into 24-bit (workaround for https://bugs.python.org/issue12866) 164 | raw_data = audioop.lin2lin(raw_data, self.sample_width, 4) # convert audio into 32-bit first, which is always supported 165 | try: audioop.bias(b"", 3, 0) # test whether 24-bit audio is supported (for example, ``audioop`` in Python 3.3 and below don't support sample width 3, while Python 3.4+ do) 166 | except audioop.error: # this version of audioop doesn't support 24-bit audio (probably Python 3.3 or less) 167 | raw_data = b"".join(raw_data[i + 1:i + 4] for i in range(0, len(raw_data), 4)) # since we're in little endian, we discard the first byte from each 32-bit sample to get a 24-bit sample 168 | else: # 24-bit audio fully supported, we don't need to shim anything 169 | raw_data = audioop.lin2lin(raw_data, self.sample_width, convert_width) 170 | else: 171 | raw_data = audioop.lin2lin(raw_data, self.sample_width, convert_width) 172 | 173 | # if the output is 8-bit audio with unsigned samples, convert the samples we've been treating as signed to unsigned again 174 | if convert_width == 1: 175 | raw_data = audioop.bias(raw_data, 1, 128) # add 128 to every sample to make them act like unsigned samples again 176 | 177 | return raw_data 178 | 179 | def get_wav_data(self, convert_rate = None, convert_width = None): 180 | """ 181 | Returns a byte string representing the contents of a WAV file containing the audio represented by the ``AudioData`` instance. 182 | If ``convert_width`` is specified and the audio samples are not ``convert_width`` bytes each, the resulting audio is converted to match. 183 | If ``convert_rate`` is specified and the audio sample rate is not ``convert_rate`` Hz, the resulting audio is resampled to match. 184 | Writing these bytes directly to a file results in a valid `WAV file `__. 185 | """ 186 | raw_data = self.get_raw_data(convert_rate, convert_width) 187 | sample_rate = self.sample_rate if convert_rate is None else convert_rate 188 | sample_width = self.sample_width if convert_width is None else convert_width 189 | 190 | # generate the WAV file contents 191 | with io.BytesIO() as wav_file: 192 | wav_writer = wave.open(wav_file, "wb") 193 | try: # note that we can't use context manager, since that was only added in Python 3.4 194 | wav_writer.setframerate(sample_rate) 195 | wav_writer.setsampwidth(sample_width) 196 | wav_writer.setnchannels(1) 197 | wav_writer.writeframes(raw_data) 198 | wav_data = wav_file.getvalue() 199 | finally: # make sure resources are cleaned up 200 | wav_writer.close() 201 | return wav_data 202 | 203 | 204 | class Recognizer(AudioSource): 205 | def __init__(self): 206 | """ 207 | Creates a new ``Recognizer`` instance, which represents a collection of speech recognition functionality. 208 | """ 209 | self.energy_threshold = 300 # minimum audio energy to consider for recording 210 | self.dynamic_energy_threshold = True 211 | self.dynamic_energy_adjustment_damping = 0.15 212 | self.dynamic_energy_ratio = 1.5 213 | self.pause_threshold = 0.8 # seconds of non-speaking audio before a phrase is considered complete 214 | self.phrase_threshold = 0.3 # minimum seconds of speaking audio before we consider the speaking audio a phrase - values below this are ignored (for filtering out clicks and pops) 215 | self.non_speaking_duration = 0.5 # seconds of non-speaking audio to keep on both sides of the recording 216 | 217 | def record(self, source, duration = None, offset = None): 218 | """ 219 | Records up to ``duration`` seconds of audio from ``source`` (an ``AudioSource`` instance) starting at ``offset`` (or at the beginning if not specified) into an ``AudioData`` instance, which it returns. 220 | If ``duration`` is not specified, then it will record until there is no more audio input. 221 | """ 222 | assert isinstance(source, AudioSource), "Source must be an audio source" 223 | assert source.stream is not None, "Audio source must be entered before recording, see documentation for `AudioSource`; are you using `source` outside of a `with` statement?" 224 | 225 | frames = io.BytesIO() 226 | seconds_per_buffer = (source.CHUNK + 0.0) / source.SAMPLE_RATE 227 | elapsed_time = 0 228 | offset_time = 0 229 | offset_reached = False 230 | while True: # loop for the total number of chunks needed 231 | if offset and not offset_reached: 232 | offset_time += seconds_per_buffer 233 | if offset_time > offset: 234 | offset_reached = True 235 | 236 | buffer = source.stream.read(source.CHUNK) 237 | if len(buffer) == 0: break 238 | 239 | if offset_reached or not offset: 240 | elapsed_time += seconds_per_buffer 241 | if duration and elapsed_time > duration: break 242 | 243 | frames.write(buffer) 244 | 245 | frame_data = frames.getvalue() 246 | frames.close() 247 | return AudioData(frame_data, source.SAMPLE_RATE, source.SAMPLE_WIDTH) 248 | 249 | def adjust_for_ambient_noise(self, source, duration = 1): 250 | """ 251 | Adjusts the energy threshold dynamically using audio from ``source`` (an ``AudioSource`` instance) to account for ambient noise. 252 | Intended to calibrate the energy threshold with the ambient energy level. Should be used on periods of audio without speech - will stop early if any speech is detected. 253 | The ``duration`` parameter is the maximum number of seconds that it will dynamically adjust the threshold for before returning. This value should be at least 0.5 in order to get a representative sample of the ambient noise. 254 | """ 255 | assert isinstance(source, AudioSource), "Source must be an audio source" 256 | assert source.stream is not None, "Audio source must be entered before adjusting, see documentation for `AudioSource`; are you using `source` outside of a `with` statement?" 257 | assert self.pause_threshold >= self.non_speaking_duration >= 0 258 | 259 | seconds_per_buffer = (source.CHUNK + 0.0) / source.SAMPLE_RATE 260 | elapsed_time = 0 261 | 262 | # adjust energy threshold until a phrase starts 263 | while True: 264 | elapsed_time += seconds_per_buffer 265 | if elapsed_time > duration: break 266 | buffer = source.stream.read(source.CHUNK) 267 | energy = audioop.rms(buffer, source.SAMPLE_WIDTH) # energy of the audio signal 268 | 269 | # dynamically adjust the energy threshold using assymmetric weighted average 270 | damping = self.dynamic_energy_adjustment_damping ** seconds_per_buffer # account for different chunk sizes and rates 271 | target_energy = energy * self.dynamic_energy_ratio 272 | self.energy_threshold = self.energy_threshold * damping + target_energy * (1 - damping) 273 | 274 | def listen(self, source, timeout = None): 275 | """ 276 | Records a single phrase from ``source`` (an ``AudioSource`` instance) into an ``AudioData`` instance, which it returns. 277 | This is done by waiting until the audio has an energy above ``recognizer_instance.energy_threshold`` (the user has started speaking), and then recording until it encounters ``recognizer_instance.pause_threshold`` seconds of non-speaking or there is no more audio input. The ending silence is not included. 278 | The ``timeout`` parameter is the maximum number of seconds that it will wait for a phrase to start before giving up and throwing an ``speech_recognition.WaitTimeoutError`` exception. If ``timeout`` is ``None``, it will wait indefinitely. 279 | """ 280 | assert isinstance(source, AudioSource), "Source must be an audio source" 281 | assert source.stream is not None, "Audio source must be entered before listening, see documentation for `AudioSource`; are you using `source` outside of a `with` statement?" 282 | assert self.pause_threshold >= self.non_speaking_duration >= 0 283 | 284 | seconds_per_buffer = (source.CHUNK + 0.0) / source.SAMPLE_RATE 285 | pause_buffer_count = int(math.ceil(self.pause_threshold / seconds_per_buffer)) # number of buffers of non-speaking audio before the phrase is complete 286 | phrase_buffer_count = int(math.ceil(self.phrase_threshold / seconds_per_buffer)) # minimum number of buffers of speaking audio before we consider the speaking audio a phrase 287 | non_speaking_buffer_count = int(math.ceil(self.non_speaking_duration / seconds_per_buffer)) # maximum number of buffers of non-speaking audio to retain before and after 288 | 289 | # read audio input for phrases until there is a phrase that is long enough 290 | elapsed_time = 0 # number of seconds of audio read 291 | while True: 292 | frames = collections.deque() 293 | 294 | # store audio input until the phrase starts 295 | while True: 296 | elapsed_time += seconds_per_buffer 297 | if timeout and elapsed_time > timeout: # handle timeout if specified 298 | raise TimeoutError("listening timed out") 299 | 300 | buffer = source.stream.read(source.CHUNK) 301 | if len(buffer) == 0: break # reached end of the stream 302 | frames.append(buffer) 303 | if len(frames) > non_speaking_buffer_count: # ensure we only keep the needed amount of non-speaking buffers 304 | frames.popleft() 305 | 306 | # detect whether speaking has started on audio input 307 | energy = audioop.rms(buffer, source.SAMPLE_WIDTH) # energy of the audio signal 308 | if energy > self.energy_threshold: break 309 | 310 | # dynamically adjust the energy threshold using assymmetric weighted average 311 | if self.dynamic_energy_threshold: 312 | damping = self.dynamic_energy_adjustment_damping ** seconds_per_buffer # account for different chunk sizes and rates 313 | target_energy = energy * self.dynamic_energy_ratio 314 | self.energy_threshold = self.energy_threshold * damping + target_energy * (1 - damping) 315 | 316 | # read audio input until the phrase ends 317 | pause_count, phrase_count = 0, 0 318 | while True: 319 | elapsed_time += seconds_per_buffer 320 | 321 | buffer = source.stream.read(source.CHUNK) 322 | if len(buffer) == 0: break # reached end of the stream 323 | frames.append(buffer) 324 | phrase_count += 1 325 | 326 | # check if speaking has stopped for longer than the pause threshold on the audio input 327 | energy = audioop.rms(buffer, source.SAMPLE_WIDTH) # energy of the audio signal 328 | if energy > self.energy_threshold: 329 | pause_count = 0 330 | else: 331 | pause_count += 1 332 | if pause_count > pause_buffer_count: # end of the phrase 333 | break 334 | 335 | # check how long the detected phrase is, and retry listening if the phrase is too short 336 | phrase_count -= pause_count 337 | if phrase_count >= phrase_buffer_count: break # phrase is long enough, stop listening 338 | 339 | # obtain frame data 340 | for i in range(pause_count - non_speaking_buffer_count): frames.pop() # remove extra non-speaking frames at the end 341 | frame_data = b"".join(list(frames)) 342 | 343 | return AudioData(frame_data, source.SAMPLE_RATE, source.SAMPLE_WIDTH) 344 | 345 | def listen_in_background(self, source, callback): 346 | """ 347 | Spawns a thread to repeatedly record phrases from ``source`` (an ``AudioSource`` instance) into an ``AudioData`` instance and call ``callback`` with that ``AudioData`` instance as soon as each phrase are detected. 348 | Returns a function object that, when called, requests that the background listener thread stop, and waits until it does before returning. The background thread is a daemon and will not stop the program from exiting if there are no other non-daemon threads. 349 | Phrase recognition uses the exact same mechanism as ``recognizer_instance.listen(source)``. 350 | The ``callback`` parameter is a function that should accept two parameters - the ``recognizer_instance``, and an ``AudioData`` instance representing the captured audio. Note that ``callback`` function will be called from a non-main thread. 351 | """ 352 | assert isinstance(source, AudioSource), "Source must be an audio source" 353 | running = [True] 354 | def threaded_listen(): 355 | with source as s: 356 | while running[0]: 357 | try: # listen for 1 second, then check again if the stop function has been called 358 | audio = self.listen(s, 1) 359 | except TimeoutError: # listening timed out, just try again 360 | pass 361 | else: 362 | if running[0]: callback(self, audio) 363 | def stopper(): 364 | running[0] = False 365 | listener_thread.join() # block until the background thread is done, which can be up to 1 second 366 | listener_thread = threading.Thread(target=threaded_listen) 367 | listener_thread.daemon = True 368 | listener_thread.start() 369 | return stopper 370 | 371 | def recognize_bing(self, audio_data, key, language = "en-US", show_all = False): 372 | """ 373 | Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Microsoft Bing Voice Recognition API. 374 | The Microsoft Bing Voice Recognition API key is specified by ``key``. Unfortunately, these are not available without `signing up for an account `__ with Microsoft Cognitive Services. 375 | To get the API key, go to the `Microsoft Cognitive Services subscriptions overview `__, go to the entry titled "Speech", and look for the key under the "Keys" column. Microsoft Bing Voice Recognition API keys are 32-character lowercase hexadecimal strings. 376 | The recognition language is determined by ``language``, an RFC5646 language tag like ``"en-US"`` (US English) or ``"fr-FR"`` (International French), defaulting to US English. A list of supported language values can be found in the `API documentation `__. 377 | Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the `raw API response `__ as a JSON dictionary. 378 | Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection. 379 | """ 380 | 381 | try: # attempt to use the Python 2 modules 382 | from urllib import urlencode 383 | from urllib2 import Request, urlopen, URLError, HTTPError 384 | except ImportError: # use the Python 3 modules 385 | from urllib.parse import urlencode 386 | from urllib.request import Request, urlopen 387 | from urllib.error import URLError, HTTPError 388 | 389 | 390 | assert isinstance(audio_data, AudioData), "Data must be audio data" 391 | assert isinstance(key, str), "`key` must be a string" 392 | assert isinstance(language, str), "`language` must be a string" 393 | 394 | access_token, expire_time = getattr(self, "bing_cached_access_token", None), getattr(self, "bing_cached_access_token_expiry", None) 395 | allow_caching = True 396 | try: 397 | from time import monotonic # we need monotonic time to avoid being affected by system clock changes, but this is only available in Python 3.3+ 398 | except ImportError: 399 | try: 400 | from monotonic import monotonic # use time.monotonic backport for Python 2 if available (from https://pypi.python.org/pypi/monotonic) 401 | except (ImportError, RuntimeError): 402 | expire_time = None # monotonic time not available, don't cache access tokens 403 | allow_caching = False # don't allow caching, since monotonic time isn't available 404 | if expire_time is None or monotonic() > expire_time: # caching not enabled, first credential request, or the access token from the previous one expired 405 | # get an access token using OAuth 406 | credential_url = "https://oxford-speech.cloudapp.net/token/issueToken" 407 | credential_request = Request(credential_url, data = urlencode({ 408 | "grant_type": "client_credentials", 409 | "client_id": "python", 410 | "client_secret": key, 411 | "scope": "https://speech.platform.bing.com" 412 | }).encode("utf-8")) 413 | if allow_caching: 414 | start_time = monotonic() 415 | try: 416 | credential_response = urlopen(credential_request) 417 | except HTTPError as e: 418 | raise RequestError("recognition request failed: {0}".format(getattr(e, "reason", "status {0}".format(e.code)))) # use getattr to be compatible with Python 2.6 419 | except URLError as e: 420 | raise RequestError("recognition connection failed: {0}".format(e.reason)) 421 | credential_text = credential_response.read().decode("utf-8") 422 | credentials = json.loads(credential_text) 423 | access_token, expiry_seconds = credentials["access_token"], float(credentials["expires_in"]) 424 | 425 | if allow_caching: 426 | # save the token for the duration it is valid for 427 | self.bing_cached_access_token = access_token 428 | self.bing_cached_access_token_expiry = start_time + expiry_seconds 429 | 430 | wav_data = audio_data.get_wav_data( 431 | convert_rate = 16000, # audio samples must be 8kHz or 16 kHz 432 | convert_width = 2 # audio samples should be 16-bit 433 | ) 434 | url = "https://speech.platform.bing.com/recognize/query?{0}".format(urlencode({ 435 | "version": "3.0", 436 | "requestid": uuid.uuid4(), 437 | "appID": "D4D52672-91D7-4C74-8AD8-42B1D98141A5", 438 | "format": "json", 439 | "locale": language, 440 | "device.os": "wp7", 441 | "scenarios": "ulm", 442 | "instanceid": uuid.uuid4(), 443 | "result.profanitymarkup": "0", 444 | })) 445 | request = Request(url, data = wav_data, headers = { 446 | "Authorization": "Bearer {0}".format(access_token), 447 | "Content-Type": "audio/wav; samplerate=16000; sourcerate={0}; trustsourcerate=true".format(audio_data.sample_rate), 448 | }) 449 | try: 450 | response = urlopen(request) 451 | except HTTPError as e: 452 | raise RequestError("recognition request failed: {0}".format(getattr(e, "reason", "status {0}".format(e.code)))) # use getattr to be compatible with Python 2.6 453 | except URLError as e: 454 | raise RequestError("recognition connection failed: {0}".format(e.reason)) 455 | response_text = response.read().decode("utf-8") 456 | result = json.loads(response_text) 457 | 458 | # return results 459 | if show_all: return result 460 | if "header" not in result or "lexical" not in result["header"]: raise UnknownValueError() 461 | return result["header"]["lexical"] 462 | 463 | 464 | def recognize_google(self,audio_data, key = None, language = "en-US", show_all = False): 465 | """ 466 | Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Google Speech Recognition API. 467 | The Google Speech Recognition API key is specified by ``key``. If not specified, it uses a generic key that works out of the box. This should generally be used for personal or testing purposes only, as it **may be revoked by Google at any time**. 468 | To obtain your own API key, simply following the steps on the `API Keys `__ page at the Chromium Developers site. In the Google Developers Console, Google Speech Recognition is listed as "Speech API". 469 | The recognition language is determined by ``language``, an RFC5646 language tag like ``"en-US"`` (US English) or ``"fr-FR"`` (International French), defaulting to US English. A list of supported language values can be found in this `StackOverflow answer `__. 470 | Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the raw API response as a JSON dictionary. 471 | Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection. 472 | """ 473 | 474 | try: # attempt to use the Python 2 modules 475 | from urllib import urlencode 476 | from urllib2 import Request, urlopen, URLError, HTTPError 477 | except ImportError: # use the Python 3 modules 478 | from urllib.parse import urlencode 479 | from urllib.request import Request, urlopen 480 | from urllib.error import URLError, HTTPError 481 | 482 | 483 | assert isinstance(audio_data, AudioData), "`audio_data` must be audio data" 484 | assert key is None or isinstance(key, str), "`key` must be `None` or a string" 485 | assert isinstance(language, str), "`language` must be a string" 486 | 487 | #module uses flac by default, which attempts to open a subprocess which fails on Heroku 488 | #modified this function to use a wav file instead, which Google apparently supports 489 | flac_data = audio_data.get_wav_data( 490 | convert_rate = 16000, # audio samples must be at least 8 kHz 491 | convert_width = 2 # audio samples must be 16-bit 492 | ) 493 | 494 | #we're using the Google Chromium Speech APIv2 which has been deprecated in favor of the Google Cloud Speech API 495 | #this API is meant for devs, and has a wonky process to enable which involves joining a Google Group 496 | if key is None: key = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw" 497 | url = "http://www.google.com/speech-api/v2/recognize?{0}".format(urlencode({ 498 | "client": "chromium", 499 | "lang": language, 500 | "key": key, 501 | })) 502 | 503 | #changed header parameters for wav file 504 | request = Request(url, data = flac_data, headers = {"Content-Type": "audio/l16; rate=16000"}) 505 | 506 | # obtain audio transcription results 507 | try: 508 | response = urlopen(request) 509 | except HTTPError as e: 510 | raise RequestError("recognition request failed: {0}".format(getattr(e, "reason", "status {0}".format(e.code)))) # use getattr to be compatible with Python 2.6 511 | except URLError as e: 512 | raise RequestError("recognition connection failed: {0}".format(e.reason)) 513 | response_text = response.read().decode("utf-8") 514 | #. 515 | 516 | # ignore any blank blocks 517 | actual_result = [] 518 | for line in response_text.split("\n"): 519 | if not line: continue 520 | result = json.loads(line)["result"] 521 | if len(result) != 0: 522 | actual_result = result[0] 523 | break 524 | 525 | # return results 526 | if show_all: return actual_result 527 | if "alternative" not in actual_result: raise UnknownValueError() 528 | for entry in actual_result["alternative"]: 529 | if "transcript" in entry: 530 | return entry["transcript"] 531 | raise UnknownValueError() # no transcriptions available 532 | 533 | def recognize_wit(self, audio_data, key, show_all = False): 534 | """ 535 | Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Wit.ai API. 536 | The Wit.ai API key is specified by ``key``. Unfortunately, these are not available without `signing up for an account `__ and creating an app. You will need to add at least one intent to the app before you can see the API key, though the actual intent settings don't matter. 537 | To get the API key for a Wit.ai app, go to the app's overview page, go to the section titled "Make an API request", and look for something along the lines of ``Authorization: Bearer XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX``; ``XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX`` is the API key. Wit.ai API keys are 32-character uppercase alphanumeric strings. 538 | The recognition language is configured in the Wit.ai app settings. 539 | Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the `raw API response `__ as a JSON dictionary. 540 | Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection. 541 | """ 542 | try: # attempt to use the Python 2 modules 543 | from urllib import urlencode 544 | from urllib2 import Request, urlopen, URLError, HTTPError 545 | except ImportError: # use the Python 3 modules 546 | from urllib.parse import urlencode 547 | from urllib.request import Request, urlopen 548 | from urllib.error import URLError, HTTPError 549 | assert isinstance(audio_data, AudioData), "Data must be audio data" 550 | assert isinstance(key, str), "`key` must be a string" 551 | 552 | wav_data = audio_data.get_wav_data( 553 | convert_rate = None if audio_data.sample_rate >= 8000 else 8000, # audio samples must be at least 8 kHz 554 | convert_width = 2 # audio samples should be 16-bit 555 | ) 556 | url = "https://api.wit.ai/speech?v=20141022" 557 | request = Request(url, data = wav_data, headers = {"Authorization": "Bearer {0}".format(key), "Content-Type": "audio/wav"}) 558 | try: 559 | response = urlopen(request) 560 | except HTTPError as e: 561 | raise RequestError("recognition request failed: {0}".format(getattr(e, "reason", "status {0}".format(e.code)))) # use getattr to be compatible with Python 2.6 562 | except URLError as e: 563 | raise RequestError("recognition connection failed: {0}".format(e.reason)) 564 | response_text = response.read().decode("utf-8") 565 | result = json.loads(response_text) 566 | 567 | # return results 568 | if show_all: return result 569 | if "_text" not in result or result["_text"] is None: raise UnknownValueError() 570 | return result["_text"] -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tornado==3.1.1 2 | wsgiref==0.1.2 3 | requests==2.7.0 4 | redis==2.10.3 5 | pydub==0.16.4 6 | SpeechRecognition==3.4.5 7 | pymessenger==0.0.5.0 8 | requests-toolbelt==0.6.2 9 | -------------------------------------------------------------------------------- /static/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobajit/AlexaBot/f8dd29e3fddb6e86a59f40f550607e9d5e7d6c35/static/.DS_Store -------------------------------------------------------------------------------- /static/clippy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobajit/AlexaBot/f8dd29e3fddb6e86a59f40f550607e9d5e7d6c35/static/clippy.png -------------------------------------------------------------------------------- /static/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobajit/AlexaBot/f8dd29e3fddb6e86a59f40f550607e9d5e7d6c35/static/icon.png -------------------------------------------------------------------------------- /static/icon.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobajit/AlexaBot/f8dd29e3fddb6e86a59f40f550607e9d5e7d6c35/static/icon.xcf -------------------------------------------------------------------------------- /static/icon_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobajit/AlexaBot/f8dd29e3fddb6e86a59f40f550607e9d5e7d6c35/static/icon_white.png -------------------------------------------------------------------------------- /static/privacy.html: -------------------------------------------------------------------------------- 1 | 2 |

Terms and Conditions ("Terms")

3 |

Last updated: June 10, 2016

4 |

Please read these Terms and Conditions ("Terms", "Terms and Conditions") carefully before using the http://m.me/1312199065476896 bot (the "Service") operated by AlexaBot ("us", "we", or "our").

5 |

Your access to and use of the Service is conditioned on your acceptance of and compliance with these Terms. These Terms apply to all visitors, users and others who access or use the Service.

6 |

By accessing or using the Service you agree to be bound by these Terms. If you disagree with any part of the terms then you may not access the Service.

7 |

Links To Other Web Sites

8 |

Our Service may contain links to third-party web sites or services that are not owned or controlled by AlexaBot.

9 |

AlexaBot has no control over, and assumes no responsibility for, the content, privacy policies, or practices of any third party web sites or services. You further acknowledge and agree that AlexaBot shall not be responsible or liable, directly or indirectly, for any damage or loss caused or alleged to be caused by or in connection with use of or reliance on any such content, goods or services available on or through any such web sites or services.

10 |

We strongly advise you to read the terms and conditions and privacy policies of any third-party web sites or services that you visit.

11 |

Termination

12 |

We may terminate or suspend access to our Service immediately, without prior notice or liability, for any reason whatsoever, including without limitation if you breach the Terms.

13 |

All provisions of the Terms which by their nature should survive termination shall survive termination, including, without limitation, ownership provisions, warranty disclaimers, indemnity and limitations of liability.

14 |

Governing Law

15 |

These Terms shall be governed and construed in accordance with the laws of Virginia, United States, without regard to its conflict of law provisions.

16 |

Our failure to enforce any right or provision of these Terms will not be considered a waiver of those rights. If any provision of these Terms is held to be invalid or unenforceable by a court, the remaining provisions of these Terms will remain in effect. These Terms constitute the entire agreement between us regarding our Service, and supersede and replace any prior agreements we might have between us regarding the Service.

17 |

Changes

18 |

We reserve the right, at our sole discretion, to modify or replace these Terms at any time. If a revision is material we will try to provide at least 30 days notice prior to any new terms taking effect. What constitutes a material change will be determined at our sole discretion.

19 |

By continuing to access or use our Service after those revisions become effective, you agree to be bound by the revised terms. If you do not agree to the new terms, please stop using the Service.

20 |

Contact Us

21 |

If you have any questions about these Terms, please contact us.

22 | -------------------------------------------------------------------------------- /static/return.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 16 | 35 | 36 | 37 | 38 |

39 | 40 |

41 | 42 | 43 | -------------------------------------------------------------------------------- /static/silence.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobajit/AlexaBot/f8dd29e3fddb6e86a59f40f550607e9d5e7d6c35/static/silence.mp3 -------------------------------------------------------------------------------- /static/speedtest/random4000x4000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobajit/AlexaBot/f8dd29e3fddb6e86a59f40f550607e9d5e7d6c35/static/speedtest/random4000x4000.jpg -------------------------------------------------------------------------------- /static/tokengenerator.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | AlexaBot Token Generator 4 | 5 | 10 | 11 | 12 | 13 | 14 |

15 |

AlexaBot Token Generator

16 |

17 |

18 | Send the following token to AlexaBot in Messenger: 19 |
20 |

21 | 22 |

23 |

Return to AlexaBot

24 | 25 |

26 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /timeout_dec.py: -------------------------------------------------------------------------------- 1 | import errno 2 | from functools import wraps 3 | import signal 4 | import os 5 | 6 | class TimeoutError(Exception): 7 | pass 8 | 9 | def timeout_dec(seconds=20, error_message=os.strerror(errno.ETIME)): 10 | def decorator(func): 11 | def _handle_timeout(signum, frame): 12 | raise TimeoutError(error_message) 13 | 14 | def wrapper(*args, **kwargs): 15 | signal.signal(signal.SIGALRM, _handle_timeout) 16 | signal.alarm(seconds) 17 | try: 18 | result = func(*args, **kwargs) 19 | finally: 20 | signal.alarm(0) 21 | return result 22 | 23 | return wraps(func)(wrapper) 24 | 25 | return decorator 26 | -------------------------------------------------------------------------------- /welcomemessage.txt: -------------------------------------------------------------------------------- 1 | curl -X POST -H "Content-Type: application/json" -d '{ 2 | "setting_type":"call_to_actions", 3 | "thread_state":"new_thread", 4 | "call_to_actions":[ 5 | { 6 | "message":{ 7 | "attachment":{ 8 | "type":"template", 9 | "payload":{ 10 | "template_type":"generic", 11 | "elements":[ 12 | { 13 | "title":"Welcome to AlexaBot!", 14 | "buttons":[ 15 | { 16 | "type":"postback", 17 | "title":"Get Started", 18 | "payload":"AUTH" 19 | } 20 | ] 21 | } 22 | ] 23 | } 24 | } 25 | } 26 | } 27 | ] 28 | }' "https://graph.facebook.com/v2.6/1312199065476896/thread_settings?access_token=[Facebook_Token]" 29 | --------------------------------------------------------------------------------